├── directionalvi
    ├── utils
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── metrics.cpython-37.pyc
    │   │   ├── rescale.cpython-37.pyc
    │   │   ├── __init__.cpython-37.pyc
    │   │   ├── show_pickle.cpython-37.pyc
    │   │   └── synthetic_dataset.cpython-37.pyc
    │   ├── show_pickle.py
    │   ├── test
    │   │   ├── test_load_data.py
    │   │   └── test_synthetic_functions.py
    │   ├── test_synthetic.py
    │   ├── count_params.py
    │   ├── metrics.py
    │   ├── rescale.py
    │   ├── synthetic_dataset.py
    │   ├── test load helens.ipynb
    │   ├── .ipynb_checkpoints
    │   │   └── test load helens-checkpoint.ipynb
    │   ├── csv_dataset.py
    │   └── load_data.py
    ├── README.md
    ├── RBFKernelDirectionalGrad.py
    ├── traditional_vi.py
    ├── grad_svgp.py
    └── GradVariationalStrategy.py
├── experiments
    ├── bunny
    │   ├── ImplicitBunny
    │   │   ├── bunny.mat
    │   │   ├── readObj.m
    │   │   └── bunny.m
    │   └── bunny.sub
    ├── GNN_bo
    │   ├── GCN
    │   │   ├── run.sh
    │   │   ├── models2.py
    │   │   ├── models.py
    │   │   ├── utils.py
    │   │   └── train.py
    │   ├── gcn_sgd.sub
    │   ├── turbo_utils.py
    │   ├── gcn.sub
    │   ├── gp.py
    │   ├── gcn_sgd.py
    │   └── plot_traj.py
    ├── README.md
    ├── synthetic1
    │   ├── write_dataset.py
    │   ├── compute_optimal_subspace.py
    │   ├── plot_nll.py
    │   ├── run_exp.py
    │   ├── ExactGradGP.py
    │   └── synthetic1.py
    ├── rover
    │   ├── optimize_rover_gd.py
    │   ├── random_search.py
    │   ├── finite_difference.py
    │   ├── plot_rover.py
    │   ├── gradient_descent.py
    │   ├── turbo_utils.py
    │   ├── rover.py
    │   ├── plot_traj_new.py
    │   ├── plot_traj.py
    │   ├── exact_gp.py
    │   ├── run_exp.py
    │   └── test_turbo.py
    ├── synthetic
    │   ├── exp_setup.sh
    │   └── exp_run.sh
    ├── uci_dfree
    │   ├── plot_nll.py
    │   ├── run_exp.py
    │   └── test.py
    └── stellarator_regression
    │   ├── plot_nll.py
    │   ├── run_exp.py
    │   └── plot_stellarator.py
├── tests
    ├── testfun.py
    ├── README.md
    ├── test_traditional_vi.py
    ├── test_grad_svgp.py
    ├── test_dfree_dsvgp.py
    └── test_dsvgp.py
├── README.md
├── .gitignore
└── graphite_environment.yml


/directionalvi/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/bunny/ImplicitBunny/bunny.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/experiments/bunny/ImplicitBunny/bunny.mat


--------------------------------------------------------------------------------
/directionalvi/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/directionalvi/utils/__pycache__/rescale.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/rescale.cpython-37.pyc


--------------------------------------------------------------------------------
/directionalvi/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/directionalvi/utils/__pycache__/show_pickle.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/show_pickle.cpython-37.pyc


--------------------------------------------------------------------------------
/directionalvi/utils/__pycache__/synthetic_dataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/synthetic_dataset.cpython-37.pyc


--------------------------------------------------------------------------------
/directionalvi/utils/show_pickle.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import sys
 3 | 
 4 | filename = sys.argv[1]
 5 | 
 6 | # load the test point
 7 | with open(filename, "rb") as f:
 8 |    d= pickle.load(f)
 9 |    for item in d.items():
10 |      print(item)
11 | 


--------------------------------------------------------------------------------
/tests/testfun.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | 
 4 | def f(x, deriv=True):
 5 |   # f(x) = sin(2pi(x**2+y**2)), df/dx = cos(2pi(x**2+y**2))4pi*x
 6 |   fx = torch.sin(2*np.pi*torch.sum(x**2,dim=1))
 7 |   gx = 4*np.pi*( torch.cos(2*np.pi*torch.sum(x**2,dim=1)) * x.T).T
 8 |   fx = fx.reshape(len(x),1)
 9 |   if deriv:
10 |     return torch.cat([fx,gx],1)
11 |   else:   
12 |     return fx.squeeze(axis=1)


--------------------------------------------------------------------------------
/experiments/GNN_bo/GCN/run.sh:
--------------------------------------------------------------------------------
1 | dataset="citeseer"
2 | expid="10"
3 | python train.py --fastmode --seed 1212 --epochs 1000\
4 |                 --lr 0.0025 --weight_decay 1e-4 --hidden 32\
5 |                 --dropout 0.2 --dataset ${dataset}\
6 |                 --watch_model True --train_percent 0.036\
7 |                 --expid ${expid} --lr_sched "none"\
8 |                 2>&1 | tee ../runlogs/a.out_${dataset}_${expid}
9 |      


--------------------------------------------------------------------------------
/directionalvi/utils/test/test_load_data.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | sys.path.append("../")
 4 | from load_data import *
 5 | 
 6 | args = {}
 7 | args["n_train"] = 12000 
 8 | args["n_test"] = 2040
 9 | args["seed"] = 3
10 | 
11 | #cwd = os.getcwd()
12 | #print(cwd)
13 | #print("hi")
14 | train, test, dim = load_helens("../../../data/MtSH.mat", **args)
15 | print(len(train))
16 | print(train[0])
17 | print(len(test))
18 | print(dim)


--------------------------------------------------------------------------------
/experiments/README.md:
--------------------------------------------------------------------------------
 1 | # Experiments 
 2 | 
 3 | - `GNN_bo` contains the GCN experiment.
 4 | - `bunny` contains the bunny experiment.
 5 | - `rover` contains the rover experiment.
 6 | - `stellarator_regression` contains the stellarator regression experiment.
 7 | - `synthetic` can run all synthetic experiments except sin-5
 8 | - `synthetic1` can run the sin-5 experiment.
 9 | - `uci_dfree` contains the code to run DSVGP on the UCI datasets without derivative information.
10 | 


--------------------------------------------------------------------------------
/directionalvi/utils/test_synthetic.py:
--------------------------------------------------------------------------------
 1 | from botorch.test_functions.base import BaseTestProblem
 2 | from botorch.test_functions.synthetic import Branin, SixHumpCamel, StyblinskiTang, Hartmann, SyntheticTestFunction
 3 | from torch import Tensor
 4 | import torch
 5 | from synthetic_functions import Hart_with_deriv
 6 | 
 7 | x= Hart_with_deriv()
 8 | 
 9 | #t = torch.tensor([[.1, .25, .2, .6, .1, .1], [.2, .1, .3, .1, .2, .4], [.1, .2, .3, .1, .2, .4]])
10 | t = torch.tensor([0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573]).reshape(1, 6)
11 | res = x.evaluate_true_with_deriv(t)
12 | print(res.shape)
13 | print(res)


--------------------------------------------------------------------------------
/experiments/synthetic1/write_dataset.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import pickle
 3 | import numpy as np
 4 | 
 5 | # objective
 6 | def testf(x, deriv=True):
 7 |   # f(x) = sin(2pi(x**2+y**2)), df/dx = cos(2pi(x**2+y**2))4pi*x
 8 |   fx = torch.sin(2*np.pi*torch.sum(x**2,dim=1))
 9 |   gx = 4*np.pi*( torch.cos(2*np.pi*torch.sum(x**2,dim=1)) * x.T).T
10 |   fx = fx.reshape(len(x),1)
11 |   if deriv:
12 |     return torch.cat([fx,gx],1)
13 |   else:   
14 |     return fx.squeeze(axis=1)
15 | 
16 | n  = 20000
17 | dim = 12
18 | X = torch.rand(n,dim)
19 | Y = testf(X)
20 | d ={}
21 | d['X'] = X
22 | d['Y'] = Y
23 | name = f"./synthetic1_dataset_{n}_points_{dim}_dim.pickle"
24 | pickle.dump(d,open(name,"wb"))
25 | 


--------------------------------------------------------------------------------
/experiments/rover/optimize_rover_gd.py:
--------------------------------------------------------------------------------
 1 | import numpy as np 
 2 | from rover import *
 3 | from gradient_descent import gradient_descent
 4 | import pickle
 5 | 
 6 | # generate a starting point
 7 | dim = 200
 8 | x0 = np.random.uniform(-5,5,size=dim)
 9 | max_iter = 1000
10 | gtol = 1e-7
11 | # optimize
12 | def noisy_rover(u):
13 |   return rover_obj(u) + 1e1*np.random.randn()
14 | xopt,X = gradient_descent(noisy_rover,rover_grad,x0,max_iter=max_iter,gtol=gtol)
15 | fX = np.array([rover_obj(x) for x in X])
16 | print(fX[-1])
17 | # save data
18 | d = {}
19 | d['X'] = X
20 | d['fX'] = fX
21 | d['mode'] = "GD"
22 | outfilename = f"./output/data_rover_GD_{max_iter}_iter.pickle"
23 | pickle.dump(d,open(outfilename,"wb"))
24 | 


--------------------------------------------------------------------------------
/tests/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Basic tests to show usage and functionality of methods
 3 | To run the code in this directory use `python3 filename`. For instance, to train a Variational GP with directional derivatives such as DSVGP2 run `python3 test_dsvgp.py`. Make sure that you have compatible version of gpytorch or are inside the supplied conda environment.
 4 | 
 5 | - `test_dsvgp.py` can be used to test a variational GP with directional derivatives, i.e. DSVGP or DPPGPR
 6 | - `test_dfree_dsvgp.py` can be used to train DSVGP on a data set that has no derivative information.
 7 | - `test_grad_svgp.py` runs a multi-output stochastic variational GP with full derivative information. 
 8 | - `test_traditional_vi.py` runs SVGP or PPGPR.
 9 | 
10 | 


--------------------------------------------------------------------------------
/directionalvi/utils/count_params.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def count_params(model, likelihood):
 4 |     # count number of parameters to learn
 5 |     param_total_dim = 0
 6 |     print("All parameters to learn:")
 7 |     for name, param in model.named_parameters():
 8 |         print("     ", name)
 9 |         print("     ", param.data.shape)
10 |         if param.requires_grad:
11 |             param_total_dim += np.prod(param.data.shape)
12 |     for name, param in likelihood.named_parameters():
13 |         print("     ", name)
14 |         print("     ", param.data.shape)
15 |         if param.requires_grad:
16 |             param_total_dim += np.prod(param.data.shape)
17 | 
18 |     print("Total number of parameters: ", param_total_dim)
19 |     return param_total_dim


--------------------------------------------------------------------------------
/directionalvi/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | def MSE(Y,Z):
 2 |   """Compute the MSE.
 3 |   Y: torch tensor, function values
 4 |   Z: torch tensor, predicted function values 
 5 |   """
 6 |   return ((Y-Z)**2).mean()
 7 | 
 8 | def MAE(Y,Z):
 9 |   """Compute the MSE.
10 |   Y: torch tensor, function values
11 |   Z: torch tensor, predicted function values 
12 |   """
13 |   return ((Y-Z).abs()).mean()
14 | 
15 | def RMSE(Y, Z):
16 |   """Compute the MSE.
17 |   Y: torch tensor, function values
18 |   Z: torch tensor, predicted function values 
19 |   """
20 |   return ((Y-Z)**2).mean().sqrt()
21 | 
22 | def SMAE(Y, Z):
23 |   """Compute the MSE.
24 |   Y: torch tensor, function values
25 |   Z: torch tensor, predicted function values 
26 |   """
27 |   return ((Y-Z).abs()).mean() / Y.abs().mean()


--------------------------------------------------------------------------------
/experiments/rover/random_search.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from rover import rover_obj
 3 | import sys
 4 | 
 5 | 
 6 | 
 7 | if __name__ == '__main__':
 8 | 
 9 |   dim = 200
10 |   max_evals = 2000
11 |   lb = -5 * np.ones(dim)
12 |   ub = 5 * np.ones(dim)
13 |   batch_size = 5
14 |   num_epochs = 30
15 | 
16 |   from datetime import datetime
17 |   now     = datetime.now()
18 |   seed    = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second))
19 |   barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second)
20 |   np.random.seed(seed)
21 | 
22 |   X = np.random.uniform(lb,ub,(max_evals,dim))
23 |   fX = [rover_obj(x) for x in X]
24 | 
25 |   d ={}
26 |   d['X']  = X
27 |   d['fX'] = fX
28 |   d['mode'] = "Random Search"
29 |   outfilename = f"./output/data_rover_Random_Search_{max_evals}_evals_{barcode}.pickle"
30 |   import pickle
31 |   pickle.dump(d,open(outfilename,"wb"))
32 | 
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GP-Derivatives-Variational-Inference
 2 | 
 3 | This repo contains code for the NeurIPS paper, Scaling Gaussian Processes with Derivatives Using Variational Inference.
 4 | 
 5 | All of our code leverages the GPyTorch framework for efficient computations and GPU acceleration. Much of the functionality of this code base is, or soon will be, built into GPyTorch.
 6 | 
 7 | The `graphite_environment.yml` can be used to create a compatible conda environment.
 8 | 
 9 | The directory structure is as follows:
10 | - `directional_vi` the main methods used in the paper.
11 | - `tests` contains basic tests showing how to use the methods.
12 | -  `experiments` contains code for the experiments run in the paper, including the graph convolutional network, stellarator regression, rover, bunny, and UCI experiments. For experimental data please contact the authors.
13 | 
14 | For a basic introduction on how to use the methods from the paper visit the `tests` directory.
15 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | gpytorch_tutorials
 2 | notebooks/*.ipynb_checkpoints
 3 | __pycache__
 4 | slurm_output
 5 | experiments/.ipynb_checkpoints
 6 | experiments/synthetic/wandb
 7 | experiments/synthetic/postprocess/
 8 | *.ipynb
 9 | experiments/*.sub
10 | experiments/*-copy.py
11 | experiments/synthetic/*.exp*
12 | experiments/synthetic/*.sub
13 | experiments/bunny/wandb
14 | experiments/bunny/results
15 | experiments/lenet_bo
16 | directionalvi/directional_vi-fixed_inducing.py
17 | experiments/GNN_bo/results*
18 | experiments/GNN_bo/runlogs*
19 | experiments/GNN_bo/plots
20 | experiments/GNN_bo/GCN/data
21 | experiments/synthetic/logs/
22 | experiments/GNN_bo/*.sh
23 | experiments/GNN_bo/*.sub
24 | experiments/GNN_bo/gcn_turbo_test.py
25 | experiments/synthetic/ablation
26 | experiments/plot/synthetic/data
27 | experiments/plot/synthetic/exp_res
28 | experiments/plot/synthetic/plots
29 | experiments/synthetic/outputs
30 | experiments/rover/plots/
31 | experiments/stellarator_regression/plots/
32 | experiments/stellarator_regression/data/
33 | experiments/rover/data/
34 | experiments/synthetic/ExactGradGP-run.py
35 | 


--------------------------------------------------------------------------------
/experiments/rover/finite_difference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import multiprocessing as mp
 3 | 
 4 | 
 5 | def fdiff_jac(f,x0,h=1e-6):
 6 |   """Compute the jacobian of f with 
 7 |   central difference
 8 |   """
 9 |   h2   = h/2.0
10 |   dim  = len(x0)
11 |   Ep   = x0 + h2*np.eye(dim)
12 |   Fp   = np.array([f(e) for e in Ep])
13 |   Em   = x0 - h2*np.eye(dim)
14 |   Fm   = np.array([f(e) for e in Em])
15 |   jac = (Fp - Fm)/(h)
16 |   return jac.T
17 | 
18 | def fdiff_jac_mp(f,x0,h=1e-6,n_comp=1):
19 |   """Compute the jacobian of f with 
20 |   central difference
21 |   using multiprocessing for acceleration.
22 |   """
23 |   h2   = h/2.0
24 |   dim  = len(x0)
25 |   Ep   = x0 + h2*np.eye(dim)
26 |   Em   = x0 - h2*np.eye(dim)
27 |   with mp.Pool(n_comp) as p:
28 |     Fp = np.array(p.map(f, Ep))
29 |     Fm = np.array(p.map(f, Em))
30 |   jac = (Fp - Fm)/(h)
31 |   return jac.T
32 | 
33 | if __name__ == '__main__':
34 |   np.random.seed(0)
35 |   dim = 4
36 |   A = np.random.randn(dim,dim)
37 |   print(A)
38 |   f = lambda x: A @ x
39 |   x0   = np.random.randn(dim)
40 |   t0 = time.time()
41 |   print(fdiff_jac(f,x0))
42 | 


--------------------------------------------------------------------------------
/directionalvi/utils/rescale.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | def to_unit_cube(x, lb, ub, g=None):
 5 |     """Project to [0, 1]^d from hypercube with bounds lb and ub"""
 6 |     assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1
 7 |     xx = (x - lb) / (ub - lb)
 8 |     return xx
 9 | 
10 | 
11 | def from_unit_cube(x, lb, ub, g=None):
12 |     """Project from [0, 1]^d to hypercube with bounds lb and ub"""
13 |     assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1
14 |     xx = x * (ub - lb) + lb
15 |     return xx
16 | 
17 | 
18 | def normalize(y, **kwargs):
19 |     '''
20 |     normalize function values and derivatives
21 |     Input: torch tensor storing function values and derivatives
22 |     '''
23 |     if kwargs["derivative"]:
24 |         f = y[..., 0].reshape(len(y),1)
25 |         g = y[..., 1:].reshape(len(y),-1)
26 |         fcopy = np.array(f.flatten())
27 |         sigma = np.std(fcopy, ddof=1)
28 |         f -= np.mean(fcopy)
29 |         f /= sigma
30 |         g /= sigma
31 |         y = torch.cat([f, g], 1) 
32 |     else:
33 |         fcopy = np.array(y.flatten())
34 |         sigma = np.std(fcopy)
35 |         y -= np.mean(fcopy)
36 |         y /= sigma
37 | 
38 | 


--------------------------------------------------------------------------------
/experiments/synthetic/exp_setup.sh:
--------------------------------------------------------------------------------
 1 | dataset=${1}
 2 | variational_strategy=${2}
 3 | variational_distribution=${3}
 4 | n_train=${4}
 5 | n_test=${5}
 6 | num_inducing=${6}
 7 | num_directions=${7}
 8 | num_epochs=${8}
 9 | batch_size=${9}
10 | model=${10}
11 | lr=${11}
12 | lr_ngd=${12}
13 | num_contour_quad=${13}
14 | watch_model=${14}
15 | exp_name=${15}
16 | seed=${16}
17 | lr_sched=${17}
18 | save_results=${18}
19 | mll_type=${19}
20 | gamma=${20}
21 | 
22 | if [ ! -d "./logs" ]
23 | then
24 |     mkdir ./logs
25 | fi
26 | 
27 | python -u exp_script.py \
28 |     --dataset ${dataset} --variational_strategy ${variational_strategy}\
29 |     --variational_distribution ${variational_distribution} \
30 |     --n_train ${n_train} --n_test ${n_test}\
31 |     --num_inducing ${num_inducing} --num_directions ${num_directions}\
32 |     --num_epochs ${num_epochs} --batch_size ${batch_size} --model ${model}\
33 |     --lr ${lr} --lr_ngd ${lr_ngd} --num_contour_quad ${num_contour_quad}\
34 |     --watch_model ${watch_model} --exp_name ${exp_name} --seed ${seed}\
35 |     --lr_sched ${lr_sched} --save_results ${save_results} --mll_type ${mll_type}\
36 |     --gamma ${gamma}\
37 |     2>&1 | tee logs/a.out_${dataset}_${model}_train${n_train}_test${n_test}_m${num_inducing}_p${num_directions}_epoch${num_epochs}_${variational_distribution}_${variational_strategy}_exp${expname}_${mll_type}


--------------------------------------------------------------------------------
/directionalvi/utils/synthetic_dataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch import is_tensor
 3 | from torch.utils.data import Dataset
 4 | from torch.quasirandom import SobolEngine
 5 | from rescale import from_unit_cube
 6 | 
 7 | class synthetic_dataset(Dataset):
 8 |     """A synthetic dataset that generates data when called from. 
 9 |     """
10 |     def __init__(self, f,lb,ub,n_points,dim):
11 |       """
12 |       Args:
13 |           f (function handle): Returns a function value and gradient eval
14 |           lb,ub (1D tensors): lower and upper bounds on domain of f
15 |           n_points (int): number of data points
16 |       """
17 |       self.f = f
18 |       self.lb = lb
19 |       self.ub = ub
20 |       self.n_points = n_points
21 |       self.dim = dim
22 |       self.sobol = SobolEngine(dim,scramble=True)
23 | 
24 |     def __len__(self):
25 |       return self.n_points
26 | 
27 |     def __getitem__(self, idx):
28 |       if is_tensor(idx):
29 |         idx = idx.tolist()
30 |       # reset the sobol sequence
31 |       self.sobol.reset()
32 |       # fast forward to the desired index
33 |       self.sobol.fast_forward(idx-1)
34 |       # generate a point
35 |       x = self.sobol.draw().flatten()
36 |       # map from unit cube
37 |       x = x * (self.ub - self.lb) + self.lb
38 |       # evaluate it
39 |       fx = self.f(x)
40 |       # return a tuple of tensors
41 |       sample = (x,fx)
42 |       return sample
43 | 


--------------------------------------------------------------------------------
/experiments/rover/plot_rover.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import matplotlib.pylab as pl
 3 | import seaborn as sns
 4 | import pandas as pd
 5 | import pickle
 6 | import numpy as np
 7 | import glob
 8 | from rover import *
 9 | 
10 | # read the data
11 | data_files = glob.glob("./output/data*.pickle")
12 | colors = pl.cm.jet(np.linspace(0,1,len(data_files)))
13 | 
14 | means = np.zeros((2,1000))
15 | n_type = np.zeros(2)
16 | data = []
17 | for ii in range(len(data_files)):
18 |   ff = data_files[ii]
19 |   # attributes
20 |   attrib = {}
21 |   # load
22 |   d = pickle.load(open(ff, "rb"))  
23 |   if d['mode'] == 'Vanilla':
24 |     label = "TuRBO" 
25 |   elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL':
26 |     M = d['num_inducing']*(d['num_directions']+1)
27 |     label = f"TuRBO-DPPGPR{d['num_directions']}"
28 |   elif d['mode'] == "GD":
29 |     label= d['mode']
30 |   uopt = d['X'][-1]
31 |   x0 = np.array([5,20,0,0])
32 |   X = rover_dynamics(uopt,x0)
33 |   plt.plot(X[:,0],X[:,1],linewidth=5,markersize=12,color=colors[ii],label=label)
34 | 
35 | # plot the waypoints
36 | W = np.array([x0,[8,15,3,-4],[16,7,6,-4],[16,12,-6,-4],[0,0,0,0]])
37 | plt.scatter(W[:,0],W[:,1],color='k',s=50,label='waypoints')
38 | # sns.set_style("whitegrid")
39 | # sns.set_context("paper", font_scale=1.5)
40 | plt.legend()
41 | plt.title("Rover Path")
42 | plt.ylabel("$x_2$")
43 | plt.xlabel("$x_1$")
44 | plt.show()
45 | 
46 | 


--------------------------------------------------------------------------------
/experiments/synthetic1/compute_optimal_subspace.py:
--------------------------------------------------------------------------------
 1 | from scipy.sparse.linalg import svds
 2 | import numpy as np
 3 | import torch
 4 | 
 5 | 
 6 | def compute_optimal_subspace_projection(G,X,k):
 7 |   """Compute the optimal k-dimensional representation G
 8 |   np array, 2d: G, rows are observations
 9 |   int: k, dimension of subspace
10 |   return:
11 |     V: optimal directions
12 |     S: optimal weights (eigenvalues)
13 |     P: optimal weighted directions
14 |   """
15 |   # center the data
16 |   G = G - np.mean(G,axis=0)
17 |   # compute the k largest eigens of G
18 |   U,S,VT = np.linalg.svd(G)
19 |   # truncated score matrix
20 |   #G =  U[:,:k] @ np.diag(S[:k])
21 |   G =  G @ (VT.T)[:,:k]
22 |   # project X as well
23 |   X = X @ (VT.T[:,:k])
24 |   print("Singular values", S)
25 |   return G,X, (VT.T[:,:k])
26 | 
27 | if __name__ == "__main__":
28 |   import pickle
29 |   d = pickle.load(open("synthetic1_dataset_10000_points_5_dim.pickle", "rb"))
30 |   X = d['X'].detach().numpy()
31 |   Y = d['Y'].detach().numpy()
32 |   n,dim = X.shape
33 |   f = Y[:,0].reshape(n,1)
34 |   G = Y[:,1:]
35 |   # compute the reduced G and X
36 |   k = 2 # reduced dimension
37 |   G,X = compute_optimal_subspace_projection(G,X,k)
38 |   # make a reduced dataset
39 |   Y = np.hstack((f,G))
40 |   d = {}
41 |   d['X'] = torch.tensor(X)
42 |   d['Y'] = torch.tensor(Y)
43 |   pickle.dump(d,open(f"synthetic1_dataset_10000_points_5_dim_grad_dimredux_{k}_directions.pickle","wb"))
44 |   
45 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/gcn_sgd.sub:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J  basic                 # Job name
 3 | #SBATCH -o ../../slurm_output/job_%j.out    # Name of stdout output file(%j expands to jobId)
 4 | #SBATCH -e ../../slurm_output/job_%j.err    # Name of stderr output file(%j expands to jobId)
 5 | #SBATCH --get-user-env                     # Tells sbatch to retrieve the users login environment
 6 | #SBATCH -N 1                               # Total number of nodes requested
 7 | #SBATCH -n 16                               # Total number of cores requested
 8 | #SBATCH --mem=32G                        # Total amount of (real) memory requested (per node)
 9 | #SBATCH -t 168:00:00                       # Time limit (hh:mm:ss)
10 | #SBATCH --partition=default_partition      # Request partition for resource 
11 | ##SBATCH --exclude=marschner-compute01      # Request partition for resource 
12 | #SBATCH --exclude=joachims-compute-01,sablab-gpu-11
13 | #SBATCH --gres=gpu:1                       # Specify a list of generic consumable resources (per node)
14 | 
15 | 
16 | . /home/xz584/anaconda3/etc/profile.d/conda.sh
17 | conda activate DSVGP2
18 | 
19 | # exp setups
20 | dataset="PubMed"
21 | # watch_model=True
22 | exp_name=${1}
23 | seed=${2}
24 | turbo_max_evals=${3}
25 | # find runlogs in logs folder
26 | python3 -u gcn_sgd.py --dataset ${dataset} --exp_name ${exp_name} --seed ${seed} \
27 |                        --turbo_max_evals ${turbo_max_evals} \
28 |                        2>&1 | tee runlogs/a.out_${dataset}_SGD_epoch${turbo_max_evals}_exp${exp_name}
29 | 
30 | 
31 | 
32 | 


--------------------------------------------------------------------------------
/experiments/rover/gradient_descent.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def gradient_descent(Loss,grad,x0,max_iter=1000,gtol=1e-3):
 4 |   # inital guess
 5 |   x_k = np.copy(x0)
 6 |   # initialize step size
 7 |   mu_k   = 1e-2
 8 |   # minimum step size
 9 |   mu_min = 1e-10
10 |   # compute gradient
11 |   g_k    = grad(x_k)
12 |   # compute function value
13 |   f_k    = Loss(x_k)
14 | 
15 |   # storage
16 |   dim = len(x0)
17 |   X = np.zeros((1,dim))
18 |   X[0] = np.copy(x0)
19 | 
20 |   # stop when gradient is flat (within tolerance)
21 |   nn = 0
22 |   while np.linalg.norm(g_k) > gtol and nn < max_iter:
23 |     if nn%1 == 0:
24 |       print(nn,f_k)
25 |     # double the step size to counter backtracking
26 |     mu_k = 2*mu_k;
27 |     
28 |     # compute step 
29 |     x_kp1 = x_k -mu_k*g_k;
30 |     f_kp1 = Loss(x_kp1);
31 |     
32 |     # backtracking to find step size
33 |     while f_kp1 >= f_k:
34 |       # half our step size
35 |       mu_k = mu_k /2 ;
36 |       # take step
37 |       x_kp1 = x_k -mu_k*g_k;
38 |       # f_kp1
39 |       f_kp1 = Loss(x_kp1);
40 | 
41 |       # break if mu is too small
42 |       if mu_k <= mu_min:
43 |         print('ERROR: mu too small.')
44 |         return x_k,X
45 | 
46 |     # reset for next iteration
47 |     x_k   = np.copy(x_kp1)
48 |     f_k   = f_kp1;
49 |     
50 |     # compute gradient
51 |     g_k  = grad(x_k);
52 | 
53 |     # update iteration counter
54 |     nn += 1
55 |     X = np.copy(np.vstack((X,x_k)))
56 | 
57 |   return x_k,X
58 | 
59 | 
60 | if __name__ == '__main__':
61 |   f = lambda x: x @ x
62 |   g = lambda x: 2*x
63 |   dim = 2
64 |   x0 = 10*np.random.randn(dim)
65 |   xopt,X = gradient_descent(f,g,x0,max_iter=200,gtol=1e-7)
66 |   print(xopt)
67 |   print(X)
68 | 


--------------------------------------------------------------------------------
/directionalvi/README.md:
--------------------------------------------------------------------------------
 1 | ## Main Methods for Variational GP computions
 2 | This directory contains the main components of the methods for a variational GP with directional derivatives, as well as GPs with derivatives.
 3 | Much of this functionality is, or soon will be, incorporated into GPyTorch.
 4 | 
 5 | The RBF directional derivative kernel is `RBFKernelDirectionalGrad.py`.
 6 | 
 7 | The following files are the main scripts to initialize and run a method. To run an instance of DSVGP you would only need to import `directional_vi.py`. See the tests directory for usability.
 8 | - `directional_vi.py` contains the methods for initializing and running a variational GP with directional derivatives.
 9 | - `shared_directional_vi.py` contains the main methods for running a variational GP with directional derivatives with shared inducing directions.
10 | - `dfree_directional_vi.py` contains the main methods for running a variational GP with directional derivatives on a dataset that does not have any derivative information.
11 | - `traditional_vi.py` runs a standard SVGP.
12 | - `grad_svgp.py` runs a multi-output SVGP with full derivative information.
13 | 
14 | Variational Stategies are used for prediction in Variational GPs.
15 | - `DirectionalGradVariationalStrategy.py` is the workhorse variational strategy for GPs with directional derivatives.
16 | - `DFreeDirectionalGradVariationalStrategy.py` allows DSVGP and DPPGPR to train on data without derivative labels.
17 | - `SharedDirectionalGradVariationalStrategy.py` allows DSVGP and DPPGPR to use shared inducing directions.
18 | - `CiqDirectionalGradVariationalStrategy.py` allows DSVGP and DPPGPR to leverage contour integral quadrature.
19 | - `GradVariationalStrategy.py` is the variational strategy for a stochastic variational gaussian process with full derivative information.
20 | 
21 | 
22 | The `utils` directory contains useful helper functions.
23 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/turbo_utils.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Copyright (c) 2019 Uber Technologies, Inc.                                  #
 3 | #                                                                             #
 4 | # Licensed under the Uber Non-Commercial License (the "License");             #
 5 | # you may not use this file except in compliance with the License.            #
 6 | # You may obtain a copy of the License at the root directory of this project. #
 7 | #                                                                             #
 8 | # See the License for the specific language governing permissions and         #
 9 | # limitations under the License.                                              #
10 | ###############################################################################
11 | 
12 | import numpy as np
13 | 
14 | 
15 | def to_unit_cube(x, lb, ub):
16 |     """Project to [0, 1]^d from hypercube with bounds lb and ub"""
17 |     assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2
18 |     xx = (x - lb) / (ub - lb)
19 |     return xx
20 | 
21 | 
22 | def from_unit_cube(x, lb, ub):
23 |     """Project from [0, 1]^d to hypercube with bounds lb and ub"""
24 |     assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2
25 |     xx = x * (ub - lb) + lb
26 |     return xx
27 | 
28 | 
29 | def latin_hypercube(n_pts, dim):
30 |     """Basic Latin hypercube implementation with center perturbation."""
31 |     X = np.zeros((n_pts, dim))
32 |     centers = (1.0 + 2.0 * np.arange(0.0, n_pts)) / float(2 * n_pts)
33 |     for i in range(dim):  # Shuffle the center locataions for each dimension.
34 |         X[:, i] = centers[np.random.permutation(n_pts)]
35 | 
36 |     # Add some perturbations within each box
37 |     pert = np.random.uniform(-1.0, 1.0, (n_pts, dim)) / float(2 * n_pts)
38 |     X += pert
39 |     return X
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/rover/turbo_utils.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Copyright (c) 2019 Uber Technologies, Inc.                                  #
 3 | #                                                                             #
 4 | # Licensed under the Uber Non-Commercial License (the "License");             #
 5 | # you may not use this file except in compliance with the License.            #
 6 | # You may obtain a copy of the License at the root directory of this project. #
 7 | #                                                                             #
 8 | # See the License for the specific language governing permissions and         #
 9 | # limitations under the License.                                              #
10 | ###############################################################################
11 | 
12 | import numpy as np
13 | 
14 | 
15 | def to_unit_cube(x, lb, ub):
16 |     """Project to [0, 1]^d from hypercube with bounds lb and ub"""
17 |     assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2
18 |     xx = (x - lb) / (ub - lb)
19 |     return xx
20 | 
21 | 
22 | def from_unit_cube(x, lb, ub):
23 |     """Project from [0, 1]^d to hypercube with bounds lb and ub"""
24 |     assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2
25 |     xx = x * (ub - lb) + lb
26 |     return xx
27 | 
28 | 
29 | def latin_hypercube(n_pts, dim):
30 |     """Basic Latin hypercube implementation with center perturbation."""
31 |     X = np.zeros((n_pts, dim))
32 |     centers = (1.0 + 2.0 * np.arange(0.0, n_pts)) / float(2 * n_pts)
33 |     for i in range(dim):  # Shuffle the center locataions for each dimension.
34 |         X[:, i] = centers[np.random.permutation(n_pts)]
35 | 
36 |     # Add some perturbations within each box
37 |     pert = np.random.uniform(-1.0, 1.0, (n_pts, dim)) / float(2 * n_pts)
38 |     X += pert
39 |     return X
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/rover/rover.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from finite_difference import fdiff_jac
 3 | 
 4 | def rover_dynamics(u,x0):
 5 |   m   = 5 # mass
 6 |   h   = 0.1 #deltat
 7 |   T   = 100 # number of steps
 8 |   eta = 1.0 # friction coeff
 9 | 
10 |   # state, control
11 |   dim_s = 4 
12 |   dim_c = 2
13 | 
14 |   # dynamics
15 |   A = np.array([[1,0,h,0],[0,1,0,h],[0,0,(1-eta*h/m),0],[0,0,0,(1-eta*h/m)]])
16 |   B = np.array([[0,0],[0,0],[h/m,0],[0,h/m]])
17 |   
18 |   # state control (time is a row)
19 |   x = np.zeros((T,dim_s))
20 |   
21 |   # reshape the control
22 |   u = np.reshape(u,(T,dim_c))
23 | 
24 |   # initial condition
25 |   x[0] = x0
26 | 
27 |   # dynamics
28 |   # x_{t+1}  = Ax_t + Bu_t for t=0,...,T-1
29 |   for t in range(0,T-1):
30 |     x[t+1] = A @ x[t] + B @ u[t]
31 |   return x
32 | 
33 | def rover_obj(u):
34 |   """
35 |   The rover problem:
36 |   The goal is to learn a controller to drive a rover through four
37 |   waypoints. 
38 |   state: 4dim position, velocity
39 |   control: 2dim x,y forces
40 | 
41 |   input:
42 |   u: length 2T array, open-loop controller
43 |   return:
44 |   cost: float, cost associated with the controller
45 |   """
46 |   assert len(u) == 200
47 |   # initial condition
48 |   x0 = np.array([5,20,0,0])
49 |   # compute dynamics
50 |   x = rover_dynamics(u,x0)
51 |   # waypoints
52 |   W = np.array([[8,15,3,-4],[16,7,6,-4],[16,12,-6,-4],[0,0,0,0]])
53 |   way_times = (np.array([10,40,70,100]) - 1).astype(int)
54 |   q1   = 1e0  # penalty on missing waypoint
55 |   q2   = 1e-4 # penalty on control
56 |   # compute cost
57 |   cost = q1*np.sum((x[way_times] - W)**2) + q2*np.sum(u**2)
58 | 
59 |   return cost
60 | 
61 | def rover_grad(u):
62 |   assert len(u) == 200
63 |   """finite difference gradient"""
64 |   return fdiff_jac(rover_obj,u,h=1e-6)
65 | 
66 | 
67 | if __name__=="__main__":
68 |   u = np.ones(200)
69 |   print(rover_obj(u))
70 |   grad = rover_grad(u)
71 |   u = u - 1e0*rover_grad(u)
72 |   print(rover_obj(u))
73 |   u = u - 1e0*rover_grad(u)
74 |   print(rover_obj(u))
75 | 


--------------------------------------------------------------------------------
/experiments/rover/plot_traj_new.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import os.path as osp
 3 | import argparse
 4 | import pickle
 5 | from operator import itemgetter
 6 | from argparse import Namespace
 7 | import numpy as np
 8 | import pandas as pd
 9 | from matplotlib import pyplot as plt
10 | import matplotlib
11 | import pylab
12 | 
13 | data = pickle.load(open("./data/rover_plot_data_p3.pickle", "rb"))
14 | 
15 | FONTSIZE=20
16 | MARKERSIZE=20
17 | FIGURESUZE=(10,7)
18 | ALPHA=0.2
19 | LINEWIDTH=5
20 | PADDING=0.1
21 | 
22 | style_dict = {"TuRBO": ["TuRBO", "dashed", '#ff7f0e'],
23 |               "TuRBO-DPPGPR1": ["TuRBO-DPPGPR1", "solid", '#1f77b4'],
24 |               "TuRBO-DPPGPR2": ["TuRBO-DPPGPR2", "solid", '#d62728'],
25 |               "TuRBO-DPPGPR3": ["TuRBO-DPPGPR3", "solid", '#e377c2'],
26 |               "BO-LCB": ["BO", "dashed", '#8c564b'],
27 |               "Random Search": ["Random", "dotted", "#7f7f7f"] }
28 | 
29 | N_method = len(data['labels'])
30 | 
31 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=FIGURESUZE)
32 | for i in range(N_method):
33 |     method = data['labels'][i]
34 |     mean_data = data['means'][i]
35 |     std_data = data['std'][i]
36 |     ax.plot(mean_data, linewidth=LINEWIDTH,
37 |             color=style_dict[method][2],
38 |             label=style_dict[method][0],
39 |             linestyle=style_dict[method][1])
40 |     ax.fill_between(range(len(mean_data)), 
41 |                     mean_data+std_data, mean_data-std_data, 
42 |                     color=style_dict[method][2], alpha=ALPHA)
43 | 
44 | plt.ylim([200, 1200])
45 | plt.yticks([300, 500, 700, 900, 1100], fontsize=FONTSIZE)
46 | plt.ylabel('Objective function value', fontsize=FONTSIZE)
47 | plt.xticks([0, 400, 800, 1200, 1600, 2000], fontsize=FONTSIZE)
48 | plt.xlabel("Number of evaluations", fontsize=FONTSIZE)
49 | plt.grid()
50 | plt.legend(fontsize=FONTSIZE-5)
51 | 
52 | figurename = f"TuRBO_rover.pdf"
53 | figurepath = os.path.abspath(__file__ + "/../plots/" + figurename)
54 | fig.savefig(figurepath, bbox_inches='tight', pad_inches = PADDING)
55 | print("Figure saved:", figurepath)
56 | 


--------------------------------------------------------------------------------
/directionalvi/utils/test load helens.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from load_data import *"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 5,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "args= {'seed': 123, 'n_train': 700, 'derivative': True}"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 6,
24 |    "metadata": {},
25 |    "outputs": [],
26 |    "source": [
27 |     "res = load_helens(\"../../data/helens\", 0.5, **args)"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": 7,
33 |    "metadata": {},
34 |    "outputs": [
35 |     {
36 |      "data": {
37 |       "text/plain": [
38 |        "(<torch.utils.data.dataset.Subset at 0x22f0d664fc8>,\n",
39 |        " <torch.utils.data.dataset.Subset at 0x22f3e5f3fc8>,\n",
40 |        " 2)"
41 |       ]
42 |      },
43 |      "execution_count": 7,
44 |      "metadata": {},
45 |      "output_type": "execute_result"
46 |     }
47 |    ],
48 |    "source": [
49 |     "res"
50 |    ]
51 |   },
52 |   {
53 |    "cell_type": "code",
54 |    "execution_count": 12,
55 |    "metadata": {},
56 |    "outputs": [
57 |     {
58 |      "name": "stdout",
59 |      "output_type": "stream",
60 |      "text": [
61 |       "700\n",
62 |       "2700\n"
63 |      ]
64 |     }
65 |    ],
66 |    "source": [
67 |     "print(len(res[0]))\n",
68 |     "print(len(res[1]))"
69 |    ]
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "kernelspec": {
74 |    "display_name": "Python 3",
75 |    "language": "python",
76 |    "name": "python3"
77 |   },
78 |   "language_info": {
79 |    "codemirror_mode": {
80 |     "name": "ipython",
81 |     "version": 3
82 |    },
83 |    "file_extension": ".py",
84 |    "mimetype": "text/x-python",
85 |    "name": "python",
86 |    "nbconvert_exporter": "python",
87 |    "pygments_lexer": "ipython3",
88 |    "version": "3.7.8"
89 |   }
90 |  },
91 |  "nbformat": 4,
92 |  "nbformat_minor": 4
93 | }
94 | 


--------------------------------------------------------------------------------
/directionalvi/utils/.ipynb_checkpoints/test load helens-checkpoint.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "from load_data import *"
10 |    ]
11 |   },
12 |   {
13 |    "cell_type": "code",
14 |    "execution_count": 5,
15 |    "metadata": {},
16 |    "outputs": [],
17 |    "source": [
18 |     "args= {'seed': 123, 'n_train': 700, 'derivative': True}"
19 |    ]
20 |   },
21 |   {
22 |    "cell_type": "code",
23 |    "execution_count": 6,
24 |    "metadata": {},
25 |    "outputs": [],
26 |    "source": [
27 |     "res = load_helens(\"../../data/helens\", 0.5, **args)"
28 |    ]
29 |   },
30 |   {
31 |    "cell_type": "code",
32 |    "execution_count": 7,
33 |    "metadata": {},
34 |    "outputs": [
35 |     {
36 |      "data": {
37 |       "text/plain": [
38 |        "(<torch.utils.data.dataset.Subset at 0x22f0d664fc8>,\n",
39 |        " <torch.utils.data.dataset.Subset at 0x22f3e5f3fc8>,\n",
40 |        " 2)"
41 |       ]
42 |      },
43 |      "execution_count": 7,
44 |      "metadata": {},
45 |      "output_type": "execute_result"
46 |     }
47 |    ],
48 |    "source": [
49 |     "res"
50 |    ]
51 |   },
52 |   {
53 |    "cell_type": "code",
54 |    "execution_count": 12,
55 |    "metadata": {},
56 |    "outputs": [
57 |     {
58 |      "name": "stdout",
59 |      "output_type": "stream",
60 |      "text": [
61 |       "700\n",
62 |       "2700\n"
63 |      ]
64 |     }
65 |    ],
66 |    "source": [
67 |     "print(len(res[0]))\n",
68 |     "print(len(res[1]))"
69 |    ]
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "kernelspec": {
74 |    "display_name": "Python 3",
75 |    "language": "python",
76 |    "name": "python3"
77 |   },
78 |   "language_info": {
79 |    "codemirror_mode": {
80 |     "name": "ipython",
81 |     "version": 3
82 |    },
83 |    "file_extension": ".py",
84 |    "mimetype": "text/x-python",
85 |    "name": "python",
86 |    "nbconvert_exporter": "python",
87 |    "pygments_lexer": "ipython3",
88 |    "version": "3.7.8"
89 |   }
90 |  },
91 |  "nbformat": 4,
92 |  "nbformat_minor": 4
93 | }
94 | 


--------------------------------------------------------------------------------
/experiments/uci_dfree/plot_nll.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import pandas as pd
 4 | import pickle
 5 | import numpy as np
 6 | import glob
 7 | 
 8 | # read the data
 9 | #data_files = glob.glob("./output/elevators/data*.pickle")
10 | #data_files = glob.glob("./output/kin40k/data*.pickle")
11 | #data_files = glob.glob("./output/energy/data*.pickle")
12 | #data_files = glob.glob("./output/protein/data*.pickle")
13 | data_files = glob.glob("./output/keggdirected/data*.pickle")
14 | 
15 | data = []
16 | for ff in data_files:
17 |   # attributes
18 |   attrib = {}
19 |   # load
20 |   d = pickle.load(open(ff, "rb"))  
21 |   attrib['mode']= d['mode']
22 |   attrib['ni']  = d['num_inducing']
23 |   if d['mode'] == 'SVGP':
24 |     d['num_directions']= 0
25 |   if d['mode'] == 'SVGP' and d['mll_type']  == "PLL":
26 |     d['mode'] = 'PPGPR'
27 |   elif d['mode'] == 'DSVGP' and d['mll_type']  == "PLL":
28 |     d['mode'] = 'DPPGPR'
29 |   attrib['nd']  = d['num_directions']
30 |   attrib['M']   = d['num_inducing']*(d['num_directions']+1)
31 |   attrib['nll'] = d['test_nll'].item()
32 |   attrib['rmse'] = np.sqrt(d['test_mse'].item())
33 |   attrib['test_time']  = d['test_time']
34 |   attrib['train_time'] = d['train_time']
35 |   # add an indicator attribute for plotting
36 |   attrib['run'] = d['mode'] + str(d['num_directions'])
37 |   data.append(attrib)
38 | 
39 | # make a pandas df
40 | df = pd.DataFrame.from_dict(data,orient='columns')
41 | print(df)
42 | 
43 | # compute the mean and standard error
44 | errs = df.groupby(['run','M']).sem()
45 | avgs = df.groupby(['run','M']).mean()
46 | print("\nMeans")
47 | print(avgs[['nll','rmse']])
48 | print("\nStandard Errors")
49 | print(errs[['nll','rmse']])
50 | 
51 | # plot
52 | sns.set()
53 | sns.lineplot(x='M',y='nll',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=3,data=df)
54 | plt.title("NLL vs Inducing Matrix size")
55 | plt.ylabel("NLL")
56 | plt.xlabel("Inducing Matrix Size")
57 | plt.show()
58 | 
59 | sns.lineplot(x='M',y='rmse',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=3,data=df)
60 | plt.title("RMSE vs Inducing Matrix size")
61 | plt.ylabel("RMSE")
62 | plt.xlabel("Inducing Matrix Size")
63 | plt.show()
64 | 
65 | 


--------------------------------------------------------------------------------
/experiments/bunny/ImplicitBunny/readObj.m:
--------------------------------------------------------------------------------
 1 | function obj = readObj(fname)
 2 | %
 3 | % obj = readObj(fname)
 4 | %
 5 | % This function parses wavefront object data
 6 | % It reads the mesh vertices, texture coordinates, normal coordinates
 7 | % and face definitions(grouped by number of vertices) in a .obj file 
 8 | % 
 9 | %
10 | % INPUT: fname - wavefront object file full path
11 | %
12 | % OUTPUT: obj.v - mesh vertices
13 | %       : obj.vt - texture coordinates
14 | %       : obj.vn - normal coordinates
15 | %       : obj.f - face definition assuming faces are made of of 3 vertices
16 | %
17 | % Bernard Abayowa, Tec^Edge
18 | % 11/8/07
19 | 
20 | % set up field types
21 | v = []; vt = []; vn = []; f.v = []; f.vt = []; f.vn = [];
22 | 
23 | fid = fopen(fname);
24 | 
25 | % parse .obj file 
26 | while 1    
27 |     tline = fgetl(fid);
28 |     if ~ischar(tline),   break,   end  % exit at end of file 
29 |      ln = sscanf(tline,'%s',1); % line type 
30 |      %disp(ln)
31 |     switch ln
32 |         case 'v'   % mesh vertexs
33 |             v = [v; sscanf(tline(2:end),'%f')'];
34 |         case 'vt'  % texture coordinate
35 |             vt = [vt; sscanf(tline(3:end),'%f')'];
36 |         case 'vn'  % normal coordinate
37 |             vn = [vn; sscanf(tline(3:end),'%f')'];
38 |         case 'f'   % face definition
39 |             fv = []; fvt = []; fvn = [];
40 |             str = textscan(tline(2:end),'%s'); str = str{1};
41 |        
42 |            nf = length(findstr(str{1},'/')); % number of fields with this face vertices
43 | 
44 | 
45 |            [tok str] = strtok(str,'//');     % vertex only
46 |             for k = 1:length(tok) fv = [fv str2num(tok{k})]; end
47 |            
48 |             if (nf > 0) 
49 |             [tok str] = strtok(str,'//');   % add texture coordinates
50 |                 for k = 1:length(tok) fvt = [fvt str2num(tok{k})]; end
51 |             end
52 |             if (nf > 1) 
53 |             [tok str] = strtok(str,'//');   % add normal coordinates
54 |                 for k = 1:length(tok) fvn = [fvn str2num(tok{k})]; end
55 |             end
56 |              f.v = [f.v; fv]; f.vt = [f.vt; fvt]; f.vn = [f.vn; fvn];
57 |     end
58 | end
59 | fclose(fid);
60 | 
61 | % set up matlab object 
62 | obj.v = v; obj.vt = vt; obj.vn = vn; obj.f = f;
63 | 


--------------------------------------------------------------------------------
/experiments/bunny/bunny.sub:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J  basic                 # Job name
 3 | #SBATCH -o ../../slurm_output/job_%j.out    # Name of stdout output file(%j expands to jobId)
 4 | #SBATCH -e ../../slurm_output/job_%j.err    # Name of stderr output file(%j expands to jobId)
 5 | #SBATCH --get-user-env                     # Tells sbatch to retrieve the users login environment
 6 | #SBATCH -N 1                               # Total number of nodes requested
 7 | #SBATCH -n 16                               # Total number of cores requested
 8 | #SBATCH --mem=32G                        # Total amount of (real) memory requested (per node)
 9 | #SBATCH -t 168:00:00                       # Time limit (hh:mm:ss)
10 | #SBATCH --partition=default_partition      # Request partition for resource 
11 | ##SBATCH --exclude=marschner-compute01      # Request partition for resource 
12 | #SBATCH --exclude=joachims-compute-01,sablab-gpu-11
13 | #SBATCH --gres=gpu:1                       # Specify a list of generic consumable resources (per node)
14 | 
15 | 
16 | . /home/xz584/anaconda3/etc/profile.d/conda.sh
17 | conda activate DSVGP
18 | dataset="real-bunny" 
19 | 
20 | # exp setups
21 | # fix some setups for this dataset
22 | batch_size=500
23 | watch_model=True
24 | # read other arguments from command line when sbatch this job
25 | model=DSVGP
26 | variational_strategy='CIQ'
27 | variational_distribution='standard'
28 | num_inducing=${1}
29 | num_directions=${2}
30 | num_epochs=${3}
31 | exp_name=${4}
32 | lr=${5}
33 | lr_ngd=0.1
34 | num_contour_quad=15
35 | seed=${6}
36 | lr_sched=${7}
37 | save_results=True
38 | mll_type='ELBO'
39 | gamma=${8}
40 | # compare different methods, comment out the chunk if not comparing with this method
41 | # find runlogs in logs folder
42 | python -u exp_bunny.py --dataset ${dataset} --variational_strategy ${variational_strategy} \
43 |                        --variational_distribution ${variational_distribution} \
44 |                        --num_inducing ${num_inducing} \
45 |                        --num_directions ${num_directions} --num_epochs ${num_epochs} \
46 |                        --batch_size ${batch_size} --model ${model} \
47 |                        --lr ${lr} --lr_ngd ${lr_ngd} --num_contour_quad ${num_contour_quad} \
48 |                        --watch_model ${watch_model} \
49 |                        --exp_name ${exp_name} --seed ${seed} --lr_sched ${lr_sched} --save_results ${save_results} \
50 |                        --mll_type ${mll_type} --gamma ${gamma}
51 | 
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/experiments/stellarator_regression/plot_nll.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import pandas as pd
 4 | import pickle
 5 | import numpy as np
 6 | import glob
 7 | 
 8 | # read the data
 9 | data_files = glob.glob("./output/data_stell_regress_*.pickle")
10 | 
11 | plt.figure(figsize=(10,10))
12 | 
13 | data = []
14 | for ff in data_files:
15 |   # attributes
16 |   attrib = {}
17 |   # load
18 |   d = pickle.load(open(ff, "rb"))  
19 |   attrib['mode']= d['mode']
20 |   attrib['ni']  = d['num_inducing']
21 |   if d['mode'] == 'SVGP':
22 |     d['num_directions']= 0
23 |   attrib['nd']  = d['num_directions']
24 |   attrib['M']   = d['num_inducing']*(d['num_directions']+1)
25 |   attrib['nll'] = d['test_nll'].item()
26 |   attrib['rmse'] = np.sqrt(d['test_mse'].item())
27 |   attrib['test_time']  = d['test_time']
28 |   attrib['train_time'] = d['train_time']
29 |   if d['mode'] == 'SVGP' and d['mll_type'] == 'PLL':
30 |     d['mode'] = "PPGPR"
31 |   elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL':
32 |     d['mode'] = "DPPGPR"
33 |   elif d['mode'] == 'GradSVGP' and d['mll_type'] == 'PLL':
34 |     d['mode'] = "GradPPGPR"
35 |   elif d['mode'] == 'DSVGP-Shared' and d['mll_type'] == 'PLL':
36 |     d['mode'] = "DPPGPR-Shared"
37 |   if "D" in d['mode'] or "Grad" in d['mode']:
38 |     attrib['run'] = d['mode'] + str(d['num_directions'])
39 |   else:
40 |     attrib['run'] = d['mode']
41 |   data.append(attrib)
42 | # make a pandas df
43 | df = pd.DataFrame.from_dict(data,orient='columns')
44 | #df = df[df['M'] > 400]
45 | print(df)
46 | pd.to_pickle(df,"./stellarator_plot_data.pickle")
47 | # compute means
48 | avgs = df.groupby(['run','M']).mean()
49 | print("\nMeans")
50 | print(avgs[['nll','rmse']])
51 | 
52 | # plot
53 | rc = {'figure.figsize':(10,5),
54 |       'axes.facecolor':'white',
55 |       'axes.grid' : True,
56 |       'grid.color': '.8',
57 |       'font.family':'Times New Roman',
58 |       'font.size' : 15}
59 | plt.rcParams.update(rc)
60 | #sns.set()
61 | #sns.set_style("whitegrid")
62 | #sns.set_context("paper", font_scale=2.0)
63 | sns.lineplot(x='M',y='nll',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df)
64 | #sns.lineplot(x='M',y='rmse',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df)
65 | plt.title("NLL vs Inducing Matrix size")
66 | plt.ylabel("NLL")
67 | plt.xlabel("Inducing Matrix Size")
68 | plt.legend(loc=1)
69 | plt.show()
70 | 
71 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/gcn.sub:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #SBATCH -J  basic                 # Job name
 3 | #SBATCH -o ../../slurm_output/job_%j.out    # Name of stdout output file(%j expands to jobId)
 4 | #SBATCH -e ../../slurm_output/job_%j.err    # Name of stderr output file(%j expands to jobId)
 5 | #SBATCH --get-user-env                     # Tells sbatch to retrieve the users login environment
 6 | #SBATCH -N 1                               # Total number of nodes requested
 7 | #SBATCH -n 16                               # Total number of cores requested
 8 | #SBATCH --mem=32G                        # Total amount of (real) memory requested (per node)
 9 | #SBATCH -t 168:00:00                       # Time limit (hh:mm:ss)
10 | #SBATCH --partition=default_partition      # Request partition for resource 
11 | ##SBATCH --exclude=marschner-compute01      # Request partition for resource 
12 | #SBATCH --exclude=joachims-compute-01,sablab-gpu-11
13 | #SBATCH --gres=gpu:1                       # Specify a list of generic consumable resources (per node)
14 | 
15 | 
16 | . /home/xz584/anaconda3/etc/profile.d/conda.sh
17 | conda activate DSVGP2
18 | 
19 | # exp setups
20 | dataset="PubMed"
21 | batch_size=500
22 | # watch_model=True
23 | model=${11}
24 | variational_strategy='standard'
25 | variational_distribution='standard'
26 | num_inducing=${1}
27 | num_directions=${2}
28 | num_epochs=${3}
29 | exp_name=${4}
30 | lr=${5}
31 | lr_ngd=0.1
32 | num_contour_quad=15
33 | seed=${6}
34 | lr_sched=${7}
35 | mll_type=${12}
36 | gamma=${8}
37 | turbo_batch_size=${9}
38 | turbo_max_evals=${10}
39 | # find runlogs in logs folder
40 | python -u gcn_turbo.py --dataset ${dataset} --variational_strategy ${variational_strategy} \
41 |                        --variational_distribution ${variational_distribution} \
42 |                        --num_inducing ${num_inducing} \
43 |                        --num_directions ${num_directions} --num_epochs ${num_epochs} \
44 |                        --batch_size ${batch_size} --model ${model} \
45 |                        --lr ${lr} --lr_ngd ${lr_ngd} --num_contour_quad ${num_contour_quad} \
46 |                        --exp_name ${exp_name} --seed ${seed} --lr_sched ${lr_sched} \
47 |                        --mll_type ${mll_type} --gamma ${gamma} \
48 |                        --turbo_batch_size ${turbo_batch_size} --turbo_max_evals ${turbo_max_evals} \
49 |                        2>&1 | tee runlogs/a.out_${dataset}_${model}_m${num_inducing}_p${num_directions}_epoch${num_epochs}_turboN${turbo_max_evals}_turbo_bs${turbo_batch_size}_exp${exp_name}
50 | 
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/experiments/synthetic/exp_run.sh:
--------------------------------------------------------------------------------
 1 | dataset="synthetic-StyblinskiTang" # synthetic/real - dataset name 
 2 | # dataset="real-helens"
 3 | 
 4 | # exp setups
 5 | n_train=10000
 6 | n_test=10000
 7 | num_inducing=500
 8 | num_directions=2
 9 | num_epochs=1000
10 | batch_size=512
11 | lr=0.01
12 | lr_ngd=0.1
13 | num_contour_quad=15
14 | watch_model=False
15 | exp_name="TEST"
16 | seed=0
17 | lr_sched="step_lr"
18 | save_results=False
19 | mll_type="ELBO"
20 | 
21 | # compare different methods, comment out the chunk if not comparing with this method
22 | # find runlogs in logs folder
23 | 
24 | model="DSVGP"
25 | variational_strategy="standard"
26 | variational_distribution="standard" 
27 | sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \
28 |                   ${n_train} ${n_test} ${num_inducing} \
29 |                   ${num_directions} ${num_epochs} ${batch_size} ${model} \
30 |                   ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \
31 |                   ${seed} ${lr_sched} ${save_results} ${mll_type}
32 | 
33 | # model="DSVGP"
34 | # variational_strategy="standard"
35 | # variational_distribution="NGD"
36 | # sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \
37 | #                   ${n_train} ${n_test} ${num_inducing} \
38 | #                   ${num_directions} ${num_epochs} ${batch_size} ${model} \
39 | #                   ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \
40 | #                   ${seed} ${lr_sched} ${save_results} ${mll_type}
41 | 
42 | # model="DSVGP"
43 | # variational_strategy="CIQ"
44 | # variational_distribution="NGD"
45 | # sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \
46 | #                   ${n_train} ${n_test} ${num_inducing} \
47 | #                   ${num_directions} ${num_epochs} ${batch_size} ${model} \
48 | #                   ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \
49 | #                   ${seed} ${lr_sched} ${save_results} ${mll_type}
50 | 
51 | # for traditional SVGP, 
52 | # variational_strategy and variational_distribution don't matter, but need to pass in them.
53 | # model="SVGP"
54 | # variational_strategy="standard"
55 | # variational_distribution="standard"
56 | # sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \
57 | #                   ${n_train} ${n_test} ${num_inducing} \
58 | #                   ${num_directions} ${num_epochs} ${batch_size} ${model} \
59 | #                   ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \
60 | #                   ${seed} ${lr_sched} ${save_results} ${mll_type}
61 | 


--------------------------------------------------------------------------------
/experiments/synthetic1/plot_nll.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import seaborn as sns
 3 | import pandas as pd
 4 | import pickle
 5 | import numpy as np
 6 | import glob
 7 | 
 8 | # read the data
 9 | data_files = glob.glob("./output/data*.pickle")
10 | 
11 | data = []
12 | for ff in data_files:
13 |   # attributes
14 |   attrib = {}
15 |   # load
16 |   d = pickle.load(open(ff, "rb"))  
17 |   attrib['mode']= d['mode']
18 |   if d['mode'] == "ExactGradGP":
19 |     attrib['mode'] = d['mode']
20 |     attrib['run']  = d['mode']
21 |     attrib['M']   = d['M']
22 |     attrib['nll'] = d['test_nll'].item()
23 |     attrib['rmse'] = np.sqrt(d['test_mse'].item())
24 |     print(f"ExactGradGP nll: {d['test_nll'].item()}, rmse: {np.sqrt(d['test_mse'].item())}")
25 |     # dont plot ExactGradGP
26 |     data.append(attrib)
27 |     continue
28 | 
29 |   attrib['ni']  = d['num_inducing']
30 |   if d['mode'] == 'SVGP':
31 |     d['num_directions']= 0
32 |   attrib['nd']  = d['num_directions']
33 |   attrib['M']   = d['num_inducing']*(d['num_directions']+1)
34 |   attrib['nll'] = d['test_nll'].item()
35 |   attrib['rmse'] = np.sqrt(d['test_mse'].item())
36 |   attrib['test_time']  = d['test_time']
37 |   attrib['train_time'] = d['train_time']
38 |   if d['mode'] == 'SVGP' and d['mll_type'] == 'PLL':
39 |     d['mode'] = "PPGPR"
40 |   elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL':
41 |     d['mode'] = "DPPGPR"
42 |   elif d['mode'] == 'GradSVGP' and d['mll_type'] == 'PLL':
43 |     d['mode'] = "GradPPGPR"
44 |   if "D" in d['mode'] or "Grad" in d['mode']:
45 |     attrib['run'] = d['mode'] + str(d['num_directions'])
46 |   else:
47 |     attrib['run'] = d['mode']
48 | 
49 |   # reduce points
50 |   #if not np.any(np.isclose(attrib['M'],[800],atol=10)):
51 |   #  continue
52 |   if not np.any(np.isclose(attrib['M'],[200,400,800,1200],atol=10)):
53 |     continue
54 |   ## reduce methods
55 |   if not attrib['run'] in ['SVGP','PPGPR','GradSVGP5','GradPPGPR5','DSVGP2','DPPGPR2','DSKI','ExactGradGP']:
56 |     continue
57 |   data.append(attrib)
58 | # make a pandas df
59 | df = pd.DataFrame.from_dict(data,orient='columns')
60 | #pd.to_pickle(df,"sin5_plot_data.pickle")
61 | #df = df[df['run']!='GradSVGP3']
62 | print(df)
63 | 
64 | # plot
65 | rc = {'figure.figsize':(10,5),
66 |       'axes.facecolor':'white',
67 |       'axes.grid' : True,
68 |       'grid.color': '.8',
69 |       'font.family':'Times New Roman',
70 |       'font.size' : 15}
71 | plt.rcParams.update(rc)
72 | sns.lineplot(x='M',y='nll',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df)
73 | #sns.lineplot(x='M',y='rmse',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df)
74 | plt.title("NLL vs Inducing Matrix size")
75 | plt.ylabel("NLL")
76 | plt.xlabel("Inducing Matrix Size")
77 | plt.show()
78 | 
79 | 


--------------------------------------------------------------------------------
/directionalvi/utils/csv_dataset.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import torch
 3 | from torch.utils.data import Dataset
 4 | import numpy as np
 5 | import pandas as pd
 6 | 
 7 | class csv_dataset(Dataset):
 8 |     """Reads a CSV dataset on the fly
 9 |     """
10 |     def __init__(self,csv_file,gradients=True,rescale=False):
11 |       """
12 |       Args:
13 |       csv_file (string): csv file name containing data
14 |         rows have header x0,x1,...xd,f,g0,...,gd
15 |         xi is ith x index
16 |         f is function value
17 |         gi is ith g index
18 |       """
19 |       self.df = pd.read_csv(csv_file)
20 |       # x indexes
21 |       self.xidx  = np.where(['x' in ci for ci in self.df.columns])[0]
22 |       # function values
23 |       self.fidx  = np.where(['f' in ci for ci in self.df.columns])[0]
24 |       # gradient indexes
25 |       self.gidx  = np.where(['g' in ci for ci in self.df.columns])[0]
26 |       # combined f and g indexes with f first
27 |       self.fgidx = np.concatenate((self.fidx,self.gidx))
28 |       #print stuff
29 |       print(self.xidx)
30 |       print(self.fidx)      
31 |       
32 |       # gradients option
33 |       self.gradients = gradients
34 |       # map to unit cube
35 |       self.rescale = rescale
36 |       # bounds for rescaling
37 |       self.lb = torch.tensor(self.df.iloc[:,self.xidx].min(axis=0).to_numpy()).float()
38 |       self.ub = torch.tensor(self.df.iloc[:,self.xidx].max(axis=0).to_numpy()).float()
39 |       # mean and std of f
40 |       self.favg = self.df.iloc[:,self.fidx].mean().to_numpy()[0]
41 |       self.fstd = self.df.iloc[:,self.fidx].std().to_numpy()[0]
42 |   
43 |       self.dim = len(self.xidx)
44 |       self.n = self.df.shape[0]
45 | 
46 |     def __len__(self):
47 |       return len(self.df)
48 | 
49 |     def __getitem__(self, idx):
50 |       """
51 |       return: a tuple of torch tensors (x,y)
52 |              x is a 2d-tensor of type float 
53 |              y is a 1d-tensor containing function value then the gradient
54 |       """
55 |       if torch.is_tensor(idx):
56 |         idx = idx.tolist()
57 |       # get the row
58 |       sample = self.df.iloc[idx].to_numpy()
59 |       # return a tuple of tensors (x,[f(x),g(x)])
60 |       x = torch.tensor(sample[self.xidx]).float() # x must be dtype float
61 |       if self.gradients:
62 |         y = torch.tensor(sample[self.fgidx])
63 |         if self.rescale:
64 |           # map x to unit cube
65 |           x = (x-self.lb)/(self.ub - self.lb)
66 |           # standardize function values (f-mu)/sigma
67 |           y[0] = (y[0] - self.favg)/self.fstd
68 |           # scale gradients appropriately
69 |           y[1:] =y[1:]*(self.ub - self.lb)/self.fstd
70 |       else:
71 |         y = sample[self.fidx][0]
72 |         if self.rescale:
73 |           # map x to unit cube
74 |           x = (x-self.lb)/(self.ub - self.lb)
75 |           # standardize function values (f-mu)/sigma
76 |           y = (y - self.favg)/self.fstd
77 |       return (x,y)
78 | 
79 | 


--------------------------------------------------------------------------------
/experiments/rover/plot_traj.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import matplotlib.pylab as pl
 3 | import seaborn as sns
 4 | import pandas as pd
 5 | import pickle
 6 | import numpy as np
 7 | import glob
 8 | 
 9 | # read the data
10 | data_files = glob.glob("./output/data*.pickle")
11 | colors = pl.cm.jet(np.linspace(0,1,len(data_files)))
12 | 
13 | rc = {'figure.figsize':(10,5),
14 |       'axes.facecolor':'white',
15 |       'axes.grid' : True,
16 |       'grid.color': '.8',
17 |       'font.family':'Times New Roman',
18 |       'font.size' : 20}
19 | plt.rcParams.update(rc)
20 | plt.figure(figsize=(10,10))
21 | 
22 | num_curves = 6
23 | means  = np.zeros((num_curves,2000))
24 | std    = np.zeros((num_curves,2000))
25 | n_type = np.zeros(num_curves)
26 | data = []
27 | for ii in range(len(data_files)):
28 |   ff = data_files[ii]
29 |   # attributes
30 |   attrib = {}
31 |   # load
32 |   d = pickle.load(open(ff, "rb"))  
33 |   if d['mode'] == 'Vanilla':
34 |     label = "TuRBO" 
35 |   elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL':
36 |     M = d['num_inducing']*(d['num_directions']+1)
37 |     label = f"TuRBO-DPPGPR{d['num_directions']}"
38 |   elif d['mode'] == "GD":
39 |     label= d['mode']
40 |   # minimum function values
41 |   fX = d['fX']
42 |   fXmin = np.minimum.accumulate(fX)
43 |   # accumulate means
44 |   if d['mode'] == "DSVGP" and d['num_directions']==1:
45 |     means[0] += fXmin
46 |     std[0] += fXmin**2
47 |     n_type[0] += 1
48 |   if d['mode'] == "Vanilla":
49 |     means[1] += fXmin
50 |     std[1] += fXmin**2
51 |     n_type[1] += 1
52 |   if d['mode'] == "Random Search":
53 |     means[2] += fXmin
54 |     std[2] += fXmin**2
55 |     n_type[2] += 1
56 |   if d['mode'] == "BO-LCB":
57 |     means[3] += fXmin
58 |     std[3] += fXmin**2
59 |     n_type[3] += 1
60 |   if d['mode'] == "DSVGP" and d['num_directions']==2:
61 |     means[4] += fXmin
62 |     std[4] += fXmin**2
63 |     n_type[4] += 1
64 |   if d['mode'] == "DSVGP" and d['num_directions']==3:
65 |     means[5] += fXmin
66 |     std[5] += fXmin**2
67 |     n_type[5] += 1
68 | 
69 |   #plt.plot(fXmin,linewidth=5,markersize=12,color=colors[ii],label=label)
70 | 
71 | means = np.diag(1/n_type) @ means
72 | std = np.sqrt(np.diag(1/n_type)@ std - means**2)
73 | labels =["TuRBO-DPPGPR1","TuRBO","Random Search","BO-LCB","TuRBO-DPPGPR2","TuRBO-DPPGPR3"]
74 | for ii,label in enumerate(labels):
75 |   plt.plot(means[ii],linewidth=3,markersize=12,label=labels[ii])
76 |   plt.fill_between(np.arange(0,2000),means[ii]-std[ii], means[ii]+std[ii],alpha=0.7)
77 | 
78 | print(means)
79 | print(std)
80 | print(labels)
81 | d = {}
82 | d['labels'] = labels
83 | d['means'] = means
84 | d['std'] = std
85 | pickle.dump(d,open("rover_plot_data.pickle","wb"))
86 | # plot
87 | #sns.set()
88 | #sns.set_style("whitegrid")
89 | #sns.set_context("paper", font_scale=1.5)
90 | plt.legend()
91 | plt.title("Optimization Convergence on Rover Problem")
92 | plt.yscale("log")
93 | plt.ylabel("f(x)")
94 | plt.xlabel("Evaluation")
95 | plt.show()
96 | 
97 | 
98 | 
99 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/GCN/models2.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | from torch_geometric.nn import MessagePassing
 4 | from torch_geometric.utils import add_self_loops, degree
 5 | import networkx as nx
 6 | 
 7 | 
 8 | # https://github.com/praxidike97/GraphNeuralNet/blob/master/main.py
 9 | 
10 | class GCNConv(MessagePassing):
11 |     def __init__(self, in_channels, out_channels):
12 |         super(GCNConv, self).__init__(aggr='add')
13 |         self.lin = torch.nn.Linear(in_channels, out_channels)
14 | 
15 |     def forward(self, x, edge_index):
16 |         # Step 1: Add self-loops
17 |         edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
18 | 
19 |         # Step 2: Multiply with weights
20 |         x = self.lin(x)
21 | 
22 |         # Step 3: Calculate the normalization
23 |         row, col = edge_index
24 |         deg = degree(row, x.size(0), dtype=x.dtype)
25 |         deg_inv_sqrt = deg.pow(-0.5)
26 |         norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]
27 | 
28 |         # Step 4: Propagate the embeddings to the next layer
29 |         return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x,
30 |                               norm=norm)
31 | 
32 |     def message(self, x_j, norm):
33 |         # Normalize node features.
34 |         return norm.view(-1, 1) * x_j
35 | 
36 | 
37 | class Net(torch.nn.Module):
38 |     def __init__(self, dataset):
39 |         super(Net, self).__init__()
40 |         self.conv1 = GCNConv(dataset.num_node_features, 8)
41 |         self.conv2 = GCNConv(8, dataset.num_classes)
42 |         self.n_params = sum(p.numel() for p in self.parameters())
43 |         
44 |     def forward(self, data):
45 |         x, edge_index = data.x, data.edge_index
46 | 
47 |         x = self.conv1(x, edge_index)
48 |         x = F.relu(x)
49 |         x = F.dropout(x, training=self.training)
50 |         x = self.conv2(x, edge_index)
51 | 
52 |         return F.log_softmax(x, dim=1)
53 | 
54 |     def update_weights(self,weights):
55 |         """
56 |         method to update the weights of the nn
57 |         """
58 |         # dont track update in grad
59 |         self.eval()
60 | 
61 |         # ordered keys of params
62 |         state_dict = self.state_dict()
63 |         keys = list(state_dict.keys())
64 |         keys.sort() # ensure we have the same order each time
65 | 
66 |         used_params = 0
67 |         #for key in keys:
68 |         for param in self.parameters():
69 |             # get the size and shape of the parameter
70 |             #param_size = state_dict[key].numel()
71 |             #param_shape = state_dict[key].shape
72 |             param_size = param.numel()
73 |             param_shape = param.shape
74 |             new_params = weights[used_params:used_params+param_size].reshape(param_shape)
75 |             # Update the parameter.
76 |             #state_dict[key].copy_(new_params)
77 |             param.data = new_params
78 |             # counter
79 |             used_params +=param_size
80 | 
81 |     def get_grad(self):
82 |         grads = []
83 |         for param in self.parameters():
84 |             grads.append(param.grad.view(-1))
85 |         grads = torch.cat(grads)
86 |         return grads
87 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/GCN/models.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | from torch.nn.parameter import Parameter
 6 | from torch.nn.modules.module import Module
 7 | 
 8 | class GCN(nn.Module):
 9 |     def __init__(self, nfeat, nhid, nclass, dropout):
10 |         super(GCN, self).__init__()
11 | 
12 |         self.gc1 = GraphConvolution(nfeat, nhid)
13 |         self.gc2 = GraphConvolution(nhid, nclass)
14 |         self.dropout = dropout
15 |         self.n_params = sum(p.numel() for p in self.parameters())
16 | 
17 |     def forward(self, x, adj):
18 |         x = F.relu(self.gc1(x, adj))
19 |         x = F.dropout(x, self.dropout, training=self.training)
20 |         x = self.gc2(x, adj)
21 |         return F.log_softmax(x, dim=1)
22 | 
23 |     def update_weights(self,weights):
24 |         """
25 |         method to update the weights of the nn
26 |         """
27 |         # dont track update in grad
28 |         self.eval()
29 | 
30 |         # ordered keys of params
31 |         state_dict = self.state_dict()
32 |         keys = list(state_dict.keys())
33 |         keys.sort() # ensure we have the same order each time
34 | 
35 |         used_params = 0
36 |         #for key in keys:
37 |         for param in self.parameters():
38 |             # get the size and shape of the parameter
39 |             #param_size = state_dict[key].numel()
40 |             #param_shape = state_dict[key].shape
41 |             param_size = param.numel()
42 |             param_shape = param.shape
43 |             new_params = weights[used_params:used_params+param_size].reshape(param_shape)
44 |             # Update the parameter.
45 |             #state_dict[key].copy_(new_params)
46 |             param.data = new_params
47 |             # counter
48 |             used_params +=param_size
49 | 
50 |     def get_grad(self):
51 |         grads = []
52 |         for param in self.parameters():
53 |             grads.append(param.grad.view(-1))
54 |         grads = torch.cat(grads)
55 | 
56 |         return grads
57 | 
58 | class GraphConvolution(Module):
59 |     """
60 |     Simple GCN layer, similar to https://arxiv.org/abs/1609.02907
61 |     """
62 | 
63 |     def __init__(self, in_features, out_features, bias=True):
64 |         super(GraphConvolution, self).__init__()
65 |         self.in_features = in_features
66 |         self.out_features = out_features
67 |         self.weight = Parameter(torch.FloatTensor(in_features, out_features))
68 |         if bias:
69 |             self.bias = Parameter(torch.FloatTensor(out_features))
70 |         else:
71 |             self.register_parameter('bias', None)
72 |         self.reset_parameters()
73 | 
74 |     def reset_parameters(self):
75 |         stdv = 1. / math.sqrt(self.weight.size(1))
76 |         self.weight.data.uniform_(-stdv, stdv)
77 |         if self.bias is not None:
78 |             self.bias.data.uniform_(-stdv, stdv)
79 | 
80 |     def forward(self, input, adj):
81 |         support = torch.mm(input, self.weight)
82 |         output = torch.spmm(adj, support)
83 |         if self.bias is not None:
84 |             return output + self.bias
85 |         else:
86 |             return output
87 | 
88 |     def __repr__(self):
89 |         return self.__class__.__name__ + ' (' \
90 |                + str(self.in_features) + ' -> ' \
91 |                + str(self.out_features) + ')'
92 | 


--------------------------------------------------------------------------------
/tests/test_traditional_vi.py:
--------------------------------------------------------------------------------
 1 | from gpytorch.models import ApproximateGP
 2 | from gpytorch.variational import CholeskyVariationalDistribution
 3 | from gpytorch.variational import VariationalStrategy
 4 | from torch.utils.data import TensorDataset, DataLoader
 5 | import tqdm
 6 | import math
 7 | import time
 8 | import torch
 9 | import gpytorch
10 | from matplotlib import pyplot as plt
11 | import numpy as np
12 | import sys
13 | sys.path.append("../")
14 | sys.path.append("../directionalvi/utils")
15 | sys.path.append("../directionalvi")
16 | from utils.metrics import MSE
17 | import traditional_vi
18 | import testfun
19 | 
20 | 
21 | # setups
22 | n  = 600
23 | n_test = 1000
24 | dim = 2
25 | num_inducing = 20
26 | minibatch_size = int(n/2)
27 | num_epochs = 400
28 | use_ciq = False
29 | use_ngd = False
30 | learning_rate_hypers=0.01
31 | learning_rate_ngd=0.1
32 | # lr-schedule
33 | gamma  = 10.0
34 | levels = np.array([20,150,300])
35 | def lr_sched(epoch):
36 |   a = np.sum(levels > epoch)
37 |   return (1./gamma)**a
38 | 
39 | # seed
40 | torch.random.manual_seed(0)
41 | 
42 | # trainig and testing data
43 | train_x = torch.rand(n,dim)
44 | test_x = torch.rand(n_test,dim)
45 | train_y = testfun.f(train_x, deriv=False)
46 | test_y = testfun.f(test_x, deriv=False)
47 | if torch.cuda.is_available():
48 |     train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
49 | 
50 | train_dataset = TensorDataset(train_x, train_y)
51 | test_dataset = TensorDataset(test_x, test_y)
52 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
53 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False)
54 | 
55 | print("\n\n---Standard SVGP---")
56 | print(f"Start training with {n} trainig data of dim {dim}")
57 | print(f"VI setups: {num_inducing} inducing points")
58 | 
59 | args={"verbose":True}
60 | 
61 | # model training
62 | t1 = time.time_ns()	
63 | model,likelihood = traditional_vi.train_gp(train_dataset,dim,
64 |                                             num_inducing=num_inducing,
65 |                                             minibatch_size=minibatch_size,
66 |                                             num_epochs=num_epochs,
67 |                                             use_ngd=use_ngd,
68 |                                             use_ciq=use_ciq,
69 |                                             learning_rate_hypers=learning_rate_hypers,
70 |                                             learning_rate_ngd=learning_rate_ngd,
71 |                                             lr_sched=lr_sched,
72 |                                             tqdm=False, **args)
73 | t2 = time.time_ns()	
74 | means, variances = traditional_vi.eval_gp(test_dataset,model,likelihood, 
75 |                                             num_inducing=num_inducing,
76 |                                             minibatch_size=n_test)
77 | t3 = time.time_ns()	
78 | 
79 | # compute MSE
80 | test_mse = MSE(test_y.cpu(),means)
81 | # compute mean negative predictive density
82 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y.cpu()).mean()
83 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}")
84 | print(f"Training time: {(t2-t1)/1e9:.2f} sec, testing time: {(t3-t2)/1e9:.2f} sec")
85 | 
86 | 
87 | # from mpl_toolkits.mplot3d import axes3d
88 | # import matplotlib.pyplot as plt
89 | # fig = plt.figure(figsize=(12,6))
90 | # ax = fig.add_subplot(111, projection='3d')
91 | # ax.scatter(train_x[:,0],train_x[:,1],train_y, color='k')
92 | # ax.scatter(train_x[:,0],train_x[:,1],means, color='b')
93 | # plt.title("f(x,y) variational fit; actual curve is black, variational is blue")
94 | # plt.show()
95 | 


--------------------------------------------------------------------------------
/graphite_environment.yml:
--------------------------------------------------------------------------------
  1 | name: DSVGP
  2 | channels:
  3 |   - pytorch
  4 |   - intel
  5 |   - defaults
  6 | dependencies:
  7 |   - backcall=0.2.0=pyhd3eb1b0_0
  8 |   - blas=1.0=mkl
  9 |   - ca-certificates=2021.1.19=h06a4308_1
 10 |   - certifi=2020.12.5=py38h06a4308_0
 11 |   - cudatoolkit=11.0.221=h6bb024c_0
 12 |   - cycler=0.10.0=py38_0
 13 |   - dbus=1.13.18=hb2f20db_0
 14 |   - decorator=4.4.2=pyhd3eb1b0_0
 15 |   - expat=2.2.10=he6710b0_2
 16 |   - fontconfig=2.13.1=h6c09931_0
 17 |   - freetype=2.10.4=h9e62b58_0
 18 |   - glib=2.67.4=h36276a3_1
 19 |   - gst-plugins-base=1.14.0=h8213a91_2
 20 |   - gstreamer=1.14.0=h28cd5cc_2
 21 |   - icu=58.2=he6710b0_3
 22 |   - intel-openmp=2020.3=intel_304
 23 |   - intelpython=2021.1.1=1
 24 |   - ipykernel=5.3.4=py38h5ca1d4c_0
 25 |   - ipython=7.21.0=py38hb070fc8_0
 26 |   - ipython_genutils=0.2.0=pyhd3eb1b0_1
 27 |   - jedi=0.17.0=py38_0
 28 |   - jpeg=9b=h024ee3a_2
 29 |   - jupyter_client=6.1.7=py_0
 30 |   - jupyter_core=4.7.1=py38h06a4308_0
 31 |   - kiwisolver=1.3.1=py38h2531618_0
 32 |   - lcms2=2.11=h396b838_0
 33 |   - ld_impl_linux-64=2.33.1=h53a641e_7
 34 |   - libffi=3.3=h07ac4c1_13
 35 |   - libgcc-ng=9.3.0=hdf63c60_101
 36 |   - libgfortran-ng=7.3.0=hdf63c60_0
 37 |   - libpng=1.6.37=h17b3f18_7
 38 |   - libsodium=1.0.16=3
 39 |   - libstdcxx-ng=9.3.0=hdf63c60_101
 40 |   - libtiff=4.1.0=h2733197_1
 41 |   - libuuid=1.0.3=h1bed415_2
 42 |   - libuv=1.40.0=h7b6447c_0
 43 |   - libxcb=1.14=h7b6447c_0
 44 |   - libxml2=2.9.10=h9aba842_4
 45 |   - lz4-c=1.9.2=h7708b8d_3
 46 |   - matplotlib=3.3.4=py38h06a4308_0
 47 |   - matplotlib-base=3.3.4=py38h62a2d02_0
 48 |   - mkl=2020.4=intel_304
 49 |   - mkl-service=2.3.0=py38he904b0f_0
 50 |   - mkl_fft=1.3.0=py38h54f3939_0
 51 |   - mkl_random=1.1.1=py38h0573a6f_0
 52 |   - ncurses=6.2=he6710b0_1
 53 |   - ninja=1.10.2=py38hff7bd54_0
 54 |   - numpy=1.19.2=py38h54aff64_0
 55 |   - numpy-base=1.19.2=py38hfa32c7d_0
 56 |   - olefile=0.46=py_0
 57 |   - openssl=1.1.1j=h27cfd23_0
 58 |   - pandas=1.2.3=py38ha9443f7_0
 59 |   - parso=0.8.1=pyhd3eb1b0_0
 60 |   - pcre=8.44=he6710b0_0
 61 |   - pexpect=4.8.0=pyhd3eb1b0_3
 62 |   - pickleshare=0.7.5=pyhd3eb1b0_1003
 63 |   - pillow=8.1.2=py38he98fc37_0
 64 |   - pip=21.0.1=py38h06a4308_0
 65 |   - prompt-toolkit=3.0.8=py_0
 66 |   - ptyprocess=0.7.0=pyhd3eb1b0_2
 67 |   - pygments=2.8.1=pyhd3eb1b0_0
 68 |   - pyparsing=2.4.7=pyhd3eb1b0_0
 69 |   - pyqt=5.9.2=py38h05f1152_4
 70 |   - python=3.8.8=hdb3f193_4
 71 |   - python-dateutil=2.8.1=pyhd3eb1b0_0
 72 |   - pytorch=1.7.1=py3.8_cuda11.0.221_cudnn8.0.5_0
 73 |   - pytz=2021.1=pyhd3eb1b0_0
 74 |   - pyzmq=18.1.1=py38he6710b0_0
 75 |   - qt=5.9.7=h5867ecd_1
 76 |   - readline=8.1=h27cfd23_0
 77 |   - scipy=1.6.1=py38h91f5cce_0
 78 |   - setuptools=52.0.0=py38h06a4308_0
 79 |   - sip=4.19.13=py38he6710b0_0
 80 |   - six=1.15.0=py38h06a4308_0
 81 |   - sqlite=3.33.0=h88c068d_1
 82 |   - tbb=2020.3=intel_304
 83 |   - tk=8.6.10=hbc83047_0
 84 |   - torchaudio=0.7.2=py38
 85 |   - torchvision=0.8.2=py38_cu110
 86 |   - tornado=6.1=py38h27cfd23_0
 87 |   - tqdm=4.56.0=pyhd3eb1b0_0
 88 |   - traitlets=5.0.5=pyhd3eb1b0_0
 89 |   - typing_extensions=3.7.4.3=pyha847dfd_0
 90 |   - wcwidth=0.2.5=py_0
 91 |   - wheel=0.36.2=pyhd3eb1b0_0
 92 |   - xz=5.2.5=hcc43529_2
 93 |   - zeromq=4.3.1=he6710b0_3
 94 |   - zlib=1.2.11.1=hb8a9d29_3
 95 |   - zstd=1.4.5=hdb51d2f_0
 96 |   - pip:
 97 |     - apptools==5.1.0
 98 |     - botorch==0.4.0
 99 |     - configobj==5.0.6
100 |     - envisage==5.0.0
101 |     - gpytorch==1.4.0
102 |     - importlib-metadata==3.7.3
103 |     - importlib-resources==5.1.2
104 |     - pyface==7.3.0
105 |     - traits==6.2.0
106 |     - traitsui==7.1.1
107 |     - vtk==9.0.1
108 |     - zipp==3.4.1
109 | prefix: /home/xz584/anaconda3/envs/DSVGP
110 | 


--------------------------------------------------------------------------------
/tests/test_grad_svgp.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import TensorDataset, DataLoader
 2 | import tqdm
 3 | import math
 4 | import time
 5 | import torch
 6 | import gpytorch
 7 | from matplotlib import pyplot as plt
 8 | import numpy as np
 9 | import sys
10 | sys.path.append("../")
11 | sys.path.append("../directionalvi/utils")
12 | sys.path.append("../directionalvi")
13 | from utils.metrics import MSE
14 | from grad_svgp import train_gp,eval_gp
15 | import testfun
16 | 
17 | 
18 | # setups
19 | n  = 600
20 | n_test = 1000
21 | dim = 2
22 | num_inducing = 20
23 | minibatch_size = int(n/2)
24 | num_epochs = 400
25 | use_ciq = False 
26 | use_ngd = False 
27 | learning_rate_hypers=0.01
28 | learning_rate_ngd=0.1
29 | # lr-schedule
30 | gamma  = 10.0
31 | levels = np.array([20,150,300])
32 | def lr_sched(epoch):
33 |   a = np.sum(levels > epoch)
34 |   return (1./gamma)**a
35 | lr_sched=None
36 | mll_type="PLL"
37 | 
38 | # seed
39 | torch.random.manual_seed(0)
40 | 
41 | # trainig and testing data
42 | train_x = torch.rand(n,dim)
43 | test_x = torch.rand(n_test,dim)
44 | train_y = testfun.f(train_x, deriv=True)
45 | test_y = testfun.f(test_x, deriv=True)
46 | if torch.cuda.is_available():
47 |     train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
48 | 
49 | train_dataset = TensorDataset(train_x, train_y)
50 | test_dataset = TensorDataset(test_x, test_y)
51 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
52 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False)
53 | 
54 | print("\n\n---Standard SVGP---")
55 | print(f"Start training with {n} trainig data of dim {dim}")
56 | print(f"VI setups: {num_inducing} inducing points")
57 | 
58 | args={"verbose":True}
59 | 
60 | # model training
61 | t1 = time.time_ns()	
62 | model,likelihood = train_gp(train_dataset,dim,
63 |                                             num_inducing=num_inducing,
64 |                                             minibatch_size=minibatch_size,
65 |                                             num_epochs=num_epochs,
66 |                                             use_ngd=use_ngd,
67 |                                             use_ciq=use_ciq,
68 |                                             learning_rate_hypers=learning_rate_hypers,
69 |                                             learning_rate_ngd=learning_rate_ngd,
70 |                                             lr_sched=lr_sched,
71 |                                             tqdm=False, 
72 |                                             mll_type=mll_type,
73 |                                             **args)
74 | t2 = time.time_ns()	
75 | means, variances = eval_gp(test_dataset,model,likelihood, 
76 |                                             num_inducing=num_inducing,
77 |                                             minibatch_size=n_test)
78 | t3 = time.time_ns()	
79 | 
80 | # compute MSE
81 | #test_mse = MSE(test_y.cpu(),means)
82 | test_mse = MSE(test_y[:,0],means[::dim+1])
83 | # compute mean negative predictive density
84 | test_nll = -torch.distributions.Normal(means[::dim+1], variances.sqrt()[::dim+1]).log_prob(test_y[:,0]).mean()
85 | #test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y.cpu()).mean()
86 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}")
87 | print(f"Training time: {(t2-t1)/1e9:.2f} sec, testing time: {(t3-t2)/1e9:.2f} sec")
88 | 
89 | 
90 | # from mpl_toolkits.mplot3d import axes3d
91 | # import matplotlib.pyplot as plt
92 | # fig = plt.figure(figsize=(12,6))
93 | # ax = fig.add_subplot(111, projection='3d')
94 | # ax.scatter(train_x[:,0],train_x[:,1],train_y, color='k')
95 | # ax.scatter(train_x[:,0],train_x[:,1],means, color='b')
96 | # plt.title("f(x,y) variational fit; actual curve is black, variational is blue")
97 | # plt.show()
98 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/gp.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Copyright (c) 2019 Uber Technologies, Inc.                                  #
 3 | #                                                                             #
 4 | # Licensed under the Uber Non-Commercial License (the "License");             #
 5 | # you may not use this file except in compliance with the License.            #
 6 | # You may obtain a copy of the License at the root directory of this project. #
 7 | #                                                                             #
 8 | # See the License for the specific language governing permissions and         #
 9 | # limitations under the License.                                              #
10 | ###############################################################################
11 | 
12 | import math
13 | 
14 | import gpytorch
15 | import numpy as np
16 | import torch
17 | from gpytorch.constraints.constraints import Interval
18 | from gpytorch.distributions import MultivariateNormal
19 | from gpytorch.kernels import MaternKernel, ScaleKernel
20 | from gpytorch.likelihoods import GaussianLikelihood
21 | from gpytorch.means import ConstantMean
22 | from gpytorch.mlls import ExactMarginalLogLikelihood
23 | from gpytorch.models import ExactGP
24 | 
25 | 
26 | # GP Model
27 | class GP(ExactGP):
28 |     def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims):
29 |         super(GP, self).__init__(train_x, train_y, likelihood)
30 |         self.ard_dims = ard_dims
31 |         self.mean_module = ConstantMean()
32 |         base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=2.5)
33 |         self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint)
34 | 
35 |     def forward(self, x):
36 |         mean_x = self.mean_module(x)
37 |         covar_x = self.covar_module(x)
38 |         return MultivariateNormal(mean_x, covar_x)
39 | 
40 | 
41 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}):
42 |     """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized."""
43 |     assert train_x.ndim == 2
44 |     assert train_y.ndim == 1
45 |     assert train_x.shape[0] == train_y.shape[0]
46 | 
47 |     # Create hyper parameter bounds
48 |     noise_constraint = Interval(5e-4, 0.2)
49 |     if use_ard:
50 |         lengthscale_constraint = Interval(0.005, 2.0)
51 |     else:
52 |         lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1]))  # [0.005, sqrt(dim)]
53 |     outputscale_constraint = Interval(0.05, 20.0)
54 | 
55 |     # Create models
56 |     likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype)
57 |     ard_dims = train_x.shape[1] if use_ard else None
58 |     model = GP(
59 |         train_x=train_x,
60 |         train_y=train_y,
61 |         likelihood=likelihood,
62 |         lengthscale_constraint=lengthscale_constraint,
63 |         outputscale_constraint=outputscale_constraint,
64 |         ard_dims=ard_dims,
65 |     ).to(device=train_x.device, dtype=train_x.dtype)
66 | 
67 |     # Find optimal model hyperparameters
68 |     model.train()
69 |     likelihood.train()
70 | 
71 |     # "Loss" for GPs - the marginal log likelihood
72 |     mll = ExactMarginalLogLikelihood(likelihood, model)
73 | 
74 |     # Initialize model hypers
75 |     if hypers:
76 |         model.load_state_dict(hypers)
77 |     else:
78 |         hypers = {}
79 |         hypers["covar_module.outputscale"] = 1.0
80 |         hypers["covar_module.base_kernel.lengthscale"] = 0.5
81 |         hypers["likelihood.noise"] = 0.005
82 |         model.initialize(**hypers)
83 | 
84 |     # Use the adam optimizer
85 |     optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)
86 | 
87 |     for _ in range(num_steps):
88 |         optimizer.zero_grad()
89 |         output = model(train_x)
90 |         loss = -mll(output, train_y)
91 |         loss.backward()
92 |         optimizer.step()
93 | 
94 |     # Switch to eval mode
95 |     model.eval()
96 |     likelihood.eval()
97 | 
98 |     return model


--------------------------------------------------------------------------------
/experiments/rover/exact_gp.py:
--------------------------------------------------------------------------------
 1 | ###############################################################################
 2 | # Copyright (c) 2019 Uber Technologies, Inc.                                  #
 3 | #                                                                             #
 4 | # Licensed under the Uber Non-Commercial License (the "License");             #
 5 | # you may not use this file except in compliance with the License.            #
 6 | # You may obtain a copy of the License at the root directory of this project. #
 7 | #                                                                             #
 8 | # See the License for the specific language governing permissions and         #
 9 | # limitations under the License.                                              #
10 | ###############################################################################
11 | 
12 | import math
13 | 
14 | import gpytorch
15 | import numpy as np
16 | import torch
17 | from gpytorch.constraints.constraints import Interval
18 | from gpytorch.distributions import MultivariateNormal
19 | from gpytorch.kernels import MaternKernel, ScaleKernel
20 | from gpytorch.likelihoods import GaussianLikelihood
21 | from gpytorch.means import ConstantMean
22 | from gpytorch.mlls import ExactMarginalLogLikelihood
23 | from gpytorch.models import ExactGP
24 | 
25 | 
26 | # GP Model
27 | class GP(ExactGP):
28 |     def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims):
29 |         super(GP, self).__init__(train_x, train_y, likelihood)
30 |         self.ard_dims = ard_dims
31 |         self.mean_module = ConstantMean()
32 |         base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=2.5)
33 |         self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint)
34 | 
35 |     def forward(self, x):
36 |         mean_x = self.mean_module(x)
37 |         covar_x = self.covar_module(x)
38 |         return MultivariateNormal(mean_x, covar_x)
39 | 
40 | 
41 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}):
42 |     """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized."""
43 |     assert train_x.ndim == 2
44 |     assert train_y.ndim == 1
45 |     assert train_x.shape[0] == train_y.shape[0]
46 | 
47 |     # Create hyper parameter bounds
48 |     noise_constraint = Interval(5e-4, 0.2)
49 |     if use_ard:
50 |         lengthscale_constraint = Interval(0.005, 2.0)
51 |     else:
52 |         lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1]))  # [0.005, sqrt(dim)]
53 |     outputscale_constraint = Interval(0.05, 20.0)
54 | 
55 |     # Create models
56 |     likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype)
57 |     ard_dims = train_x.shape[1] if use_ard else None
58 |     model = GP(
59 |         train_x=train_x,
60 |         train_y=train_y,
61 |         likelihood=likelihood,
62 |         lengthscale_constraint=lengthscale_constraint,
63 |         outputscale_constraint=outputscale_constraint,
64 |         ard_dims=ard_dims,
65 |     ).to(device=train_x.device, dtype=train_x.dtype)
66 | 
67 |     # Find optimal model hyperparameters
68 |     model.train()
69 |     likelihood.train()
70 | 
71 |     # "Loss" for GPs - the marginal log likelihood
72 |     mll = ExactMarginalLogLikelihood(likelihood, model)
73 | 
74 |     # Initialize model hypers
75 |     if hypers:
76 |         model.load_state_dict(hypers)
77 |     else:
78 |         hypers = {}
79 |         hypers["covar_module.outputscale"] = 1.0
80 |         hypers["covar_module.base_kernel.lengthscale"] = 0.5
81 |         hypers["likelihood.noise"] = 0.005
82 |         model.initialize(**hypers)
83 | 
84 |     # Use the adam optimizer
85 |     optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1)
86 | 
87 |     for _ in range(num_steps):
88 |         optimizer.zero_grad()
89 |         output = model(train_x)
90 |         loss = -mll(output, train_y)
91 |         loss.backward()
92 |         optimizer.step()
93 | 
94 |     # Switch to eval mode
95 |     model.eval()
96 |     likelihood.eval()
97 | 
98 |     return model
99 | 


--------------------------------------------------------------------------------
/tests/test_dfree_dsvgp.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | import gpytorch
  5 | import tqdm
  6 | import random
  7 | import time
  8 | from matplotlib import pyplot as plt
  9 | from torch.utils.data import TensorDataset, DataLoader
 10 | import sys
 11 | sys.path.append("../")
 12 | sys.path.append("../directionalvi/utils")
 13 | sys.path.append("../directionalvi")
 14 | from RBFKernelDirectionalGrad import RBFKernelDirectionalGrad
 15 | #from DirectionalGradVariationalStrategy import DirectionalGradVariationalStrategy
 16 | from dfree_directional_vi import train_gp, eval_gp
 17 | from metrics import MSE
 18 | import testfun
 19 | 
 20 | # data parameters
 21 | n   = 600
 22 | dim = 2
 23 | n_test = 1000
 24 | 
 25 | # training params
 26 | num_inducing = 20
 27 | num_directions = 2
 28 | minibatch_size = 200
 29 | num_epochs = 400
 30 | 
 31 | # seed
 32 | torch.random.manual_seed(0)
 33 | # use tqdm or just have print statements
 34 | tqdm = False
 35 | # use data to initialize inducing stuff
 36 | inducing_data_initialization = False
 37 | # use natural gradients and/or CIQ
 38 | use_ngd = False
 39 | use_ciq = False
 40 | num_contour_quadrature=15
 41 | # learning rate
 42 | learning_rate_hypers = 0.01
 43 | learning_rate_ngd    = 0.1
 44 | gamma  = 10.0
 45 | #levels = np.array([20,150,300])
 46 | #def lr_sched(epoch):
 47 | #  a = np.sum(levels > epoch)
 48 | #  return (1./gamma)**a
 49 | lr_sched = None
 50 | 
 51 | # training and testing data
 52 | train_x = torch.rand(n,dim)
 53 | test_x = torch.rand(n_test,dim)
 54 | train_y = testfun.f(train_x, deriv=False)
 55 | test_y = testfun.f(test_x, deriv=False)
 56 | if torch.cuda.is_available():
 57 |     train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
 58 | 
 59 | train_dataset = TensorDataset(train_x, train_y)
 60 | test_dataset = TensorDataset(test_x, test_y)
 61 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
 62 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False)
 63 | 
 64 | # train
 65 | print("\n\n---DirectionalGradVGP---")
 66 | print(f"Start training with {n} trainig data of dim {dim}")
 67 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
 68 | args={"verbose":True}
 69 | t1 = time.time()	
 70 | model,likelihood = train_gp(train_dataset,
 71 |                       num_inducing=num_inducing,
 72 |                       num_directions=num_directions,
 73 |                       minibatch_size = minibatch_size,
 74 |                       minibatch_dim = num_directions,
 75 |                       num_epochs =num_epochs, 
 76 |                       learning_rate_hypers=learning_rate_hypers,
 77 |                       learning_rate_ngd=learning_rate_ngd,
 78 |                       inducing_data_initialization=inducing_data_initialization,
 79 |                       use_ngd = use_ngd,
 80 |                       use_ciq = use_ciq,
 81 |                       lr_sched=lr_sched,
 82 |                       num_contour_quadrature=num_contour_quadrature,
 83 |                       tqdm=tqdm,**args
 84 |                       )
 85 | t2 = time.time()	
 86 | 
 87 | # save the model
 88 | # torch.save(model.state_dict(), "../data/test_dvi_basic.model")
 89 | 
 90 | # test
 91 | means, variances = eval_gp( test_dataset,model,likelihood,
 92 |                             num_directions=num_directions,
 93 |                             minibatch_size=n_test,
 94 |                             minibatch_dim=num_directions)
 95 | t3 = time.time()	
 96 | 
 97 | # compute MSE
 98 | test_y = test_y.cpu()
 99 | test_mse = MSE(test_y,means)
100 | # compute mean negative predictive density
101 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y).mean()
102 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
103 | print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec")
104 | 
105 | plot=1
106 | if plot == 1:
107 |     from mpl_toolkits.mplot3d import axes3d
108 |     import matplotlib.pyplot as plt
109 |     fig = plt.figure(figsize=(12,6))
110 |     ax = fig.add_subplot(111, projection='3d')
111 |     ax.scatter(test_x[:,0],test_x[:,1],test_y, color='k')
112 |     ax.scatter(test_x[:,0],test_x[:,1],means, color='b')
113 |     plt.title("f(x,y) variational fit; actual curve is black, variational is blue")
114 |     plt.show()
115 | 
116 | 


--------------------------------------------------------------------------------
/experiments/synthetic1/run_exp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pickle
 3 | import subprocess
 4 | from datetime import datetime
 5 | import numpy as np
 6 | 
 7 | # flags
 8 | write_sbatch =True
 9 | submit       =True
10 | 
11 | dd = 0
12 | M_list = np.array([200,400,600,800,1000,1200,1400]) # matrix sizes
13 | #M_list = np.array([1000,1200,1400]) # matrix sizes
14 | ni_list = (M_list/(dd+1)).astype(int)
15 | for ni in ni_list:
16 | 
17 |   # write a pickle file with the run info
18 |   run_params_dir = "./param_files/"
19 |   if os.path.exists(run_params_dir) is False:
20 |     os.mkdir(run_params_dir)
21 |   run_params = {}
22 |   run_params['mode']                         = "SVGP" # DSVGP, SVGP or GradSVGP
23 |   run_params['num_inducing']                 = ni
24 |   run_params['num_directions']               = dd
25 |   run_params['minibatch_size']               = 512
26 |   run_params['num_epochs']                   = 1200
27 |   run_params['tqdm']                         = False
28 |   run_params['inducing_data_initialization'] = False
29 |   run_params['use_ngd']                      = False
30 |   run_params['use_ciq']                      = False
31 |   run_params['num_contour_quadrature']       = 10 # gpytorch default=15
32 |   run_params['learning_rate_hypers']         = 0.01
33 |   run_params['learning_rate_ngd']            = 0.1
34 |   run_params['lr_benchmarks']                = 20*np.array([800])
35 |   run_params['lr_gamma']                     = 0.1
36 |   run_params['lr_sched']                     = "MultiStepLR"
37 |   run_params['mll_type']                     = "PLL"
38 |   run_params['data_file'] = "./synthetic1_dataset_10000_points_5_dim.pickle"
39 |   #run_params['data_file'] = f"./synthetic1_dataset_10000_points_5_dim_grad_dimredux_{run_params['num_directions']}_directions.pickle"
40 |   # seed and date
41 |   now     = datetime.now()
42 |   seed    = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second))
43 |   barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second)
44 |   run_params['date']  = now
45 |   run_params['seed']  = seed
46 |   # file name
47 |   if run_params['mode'] == "DSVGP":
48 |     base_name = f"synthetic1_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
49 |               f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\
50 |               f"_ciq_{run_params['use_ciq']}_{barcode}"
51 |   elif run_params['mode'] == "SVGP":
52 |     base_name = f"synthetic1_SVGP_ni_{run_params['num_inducing']}"+\
53 |               f"_ne_{run_params['num_epochs']}_{barcode}"
54 |   elif run_params['mode'] == "GradSVGP":
55 |     base_name = f"synthetic1_GradSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
56 |               f"_ne_{run_params['num_epochs']}_{barcode}"
57 |   run_params['base_name']  = base_name
58 |   param_filename = run_params_dir + "params_" +base_name + ".pickle"
59 |   pickle.dump(run_params,open(param_filename,'wb'))
60 |   print(f"Dumped param file: {param_filename}")
61 |   
62 |   if write_sbatch:
63 |     # write a slurm batch script
64 |     slurm_dir  = "./slurm_scripts/"
65 |     if os.path.exists(slurm_dir) is False:
66 |       os.mkdir(slurm_dir)
67 |     slurm_name = slurm_dir + base_name + ".sub"
68 |     #slurm_name = base_name + ".sub"
69 |     f = open(slurm_name,"w")
70 |     f.write(f"#!/bin/bash\n")
71 |     f.write(f"#SBATCH -J  {run_params['mode']}_{run_params['num_directions']}\n")
72 |     f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n")
73 |     f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n")
74 |     f.write(f"#SBATCH --get-user-env\n")
75 |     f.write(f"#SBATCH -N 1\n")
76 |     f.write(f"#SBATCH -n 1\n")
77 |     f.write(f"#SBATCH --mem=15000\n")
78 |     f.write(f"#SBATCH -t 168:00:00\n")
79 |     f.write(f"#SBATCH --partition=default_partition\n")
80 |     f.write(f"#SBATCH --gres=gpu:1\n")
81 |     f.write(f"python3 synthetic1.py {param_filename}\n")
82 |     print(f"Dumped slurm file: {slurm_name}")
83 |     
84 |     # write the shell submission script
85 |     submit_name = slurm_dir + 'slurm_submit.sh'
86 |     f = open(submit_name,"w")
87 |     f.write(f"#!/bin/bash\n")
88 |     f.write(f"sbatch --requeue {slurm_name}")
89 |     f.close()
90 |     print(f"Dumped bash script: {submit_name}")
91 |   
92 |   if submit:
93 |     # submit the script
94 |     #bash_command = f"sbatch {slurm_name}"
95 |     bash_command = f"bash {submit_name}"
96 |     subprocess.run(bash_command.split(" "))
97 | 


--------------------------------------------------------------------------------
/directionalvi/utils/test/test_synthetic_functions.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.append("../")
  4 | from synthetic_functions import *
  5 | import torch
  6 | import copy
  7 | from rescale import *
  8 | 
  9 | def comp_err_deriv(fun,x,h):
 10 |     '''
 11 |     test derivative at x using finite difference
 12 |     Inputs:
 13 |         fun::callable  function handle, returns function values and derivatives
 14 |         x::tensor      one testing position
 15 |         h:float        finite difference step size
 16 |     '''
 17 |     n = x.shape[0]
 18 |     d = x.shape[1]
 19 |     y = fun(x)
 20 |     g_true = y[:,1:]
 21 |     id_mat = torch.eye(d)
 22 |     error = torch.zeros(n)
 23 |     g_fd_set = torch.zeros((n,d))
 24 |     for j in range(n):
 25 |         g_fd = torch.zeros(d)
 26 |         for i in range(d):
 27 |             xph = copy.deepcopy(x[j])
 28 |             xph[i] = x[j][i] + h
 29 |             g_fd[i] = (fun(xph.reshape(1, d))[0][0] - fun(x[j].reshape(1, d))[0][0])/h
 30 |         g_fd_set[j,:] = g_fd
 31 |         error[j] = (g_fd - g_true[j]).abs().max()
 32 |     return error.max(), g_fd_set, g_true
 33 | 
 34 | def test_fun_val(test_fun, n):
 35 |     try:
 36 |         dim = test_fun.dim
 37 |     except err:
 38 |         dim = 3
 39 |     x = torch.rand(n, dim)
 40 |     lb, ub = test_fun.get_bounds()
 41 |     x = from_unit_cube(x, lb, ub)
 42 |     y = test_fun.evaluate_true_with_deriv(x)
 43 |     err_fun = (y[:,0] - test_fun.evaluate_true(x)).abs().max()
 44 |     return err_fun
 45 |     
 46 | def test_deriv(test_fun, n):
 47 |     # test derivative
 48 |     try:
 49 |         dim = test_fun.dim
 50 |     except err:
 51 |         dim = 3
 52 |     lb, ub = test_fun.get_bounds()
 53 |     x = torch.rand(n, dim)
 54 |     x = from_unit_cube(x, lb, ub)
 55 |     err, g_fd_set, g_true = comp_err_deriv(test_fun.evaluate_true_with_deriv,x,1e-6)
 56 |     return err, g_fd_set, g_true
 57 | 
 58 | 
 59 | #branin
 60 | test_fun_name = "Branin"
 61 | test_fun = eval(f"{test_fun_name}_with_deriv")()
 62 | err_fun = test_fun_val(test_fun, 10)
 63 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.")
 64 | err_deriv, _, _ = test_deriv(test_fun, 300)
 65 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.")
 66 | 
 67 | 
 68 | 
 69 | # stytang 
 70 | # print("stytang")
 71 | # st = StyblinskiTang_with_deriv()
 72 | # w = torch.rand(5, 2)
 73 | # y = st.evaluate_true_with_deriv(w)
 74 | # print(y)
 75 | test_fun_name = "StyblinskiTang"
 76 | test_fun = eval(f"{test_fun_name}_with_deriv")()
 77 | err_fun = test_fun_val(test_fun, 10)
 78 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.")
 79 | err_deriv, _, _ = test_deriv(test_fun, 300)
 80 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.")
 81 | 
 82 | 
 83 | 
 84 | # #six_hump_camel
 85 | # print("six hump camel")
 86 | # cc = SixHumpCamel_with_deriv()
 87 | # w = torch.rand(6, 2)
 88 | # y = cc.evaluate_true_with_deriv(w)
 89 | # print(y)
 90 | test_fun_name = "SixHumpCamel"
 91 | test_fun = eval(f"{test_fun_name}_with_deriv")()
 92 | err_fun = test_fun_val(test_fun, 10)
 93 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.")
 94 | err_deriv, _, _ = test_deriv(test_fun, 300)
 95 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.")
 96 | 
 97 | 
 98 | # test_fun_name = "Welch"
 99 | # test_fun = eval(f"{test_fun_name}_with_deriv")()
100 | # err_fun = test_fun_val(test_fun, 10)
101 | # print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.")
102 | # err_deriv, _, _ = test_deriv(test_fun, 300)
103 | # print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.")
104 | 
105 | 
106 | #Hartmann
107 | test_fun_name = "Hartmann"
108 | test_fun = eval(f"{test_fun_name}_with_deriv")()
109 | err_fun = test_fun_val(test_fun, 10)
110 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.")
111 | err_deriv, _, _ = test_deriv(test_fun, 300)
112 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.")
113 | 
114 | 
115 | test_fun_name = "Welch2"
116 | test_fun = eval(f"{test_fun_name}_with_deriv")()
117 | err_fun = test_fun_val(test_fun, 10)
118 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.")
119 | # g stores the computed derivatives, g_fd stores values approximated by FD. 
120 | err_deriv, g, g_fd = test_deriv(test_fun, 300) 
121 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.")
122 | 
123 | 


--------------------------------------------------------------------------------
/experiments/synthetic1/ExactGradGP.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import gpytorch
  3 | import math
  4 | import numpy as np
  5 | import sys
  6 | 
  7 | class GPModelWithDerivatives(gpytorch.models.ExactGP):
  8 |     def __init__(self, train_x, train_y, likelihood):
  9 |         super(GPModelWithDerivatives, self).__init__(train_x, train_y, likelihood)
 10 |         self.mean_module = gpytorch.means.ConstantMeanGrad()
 11 |         self.base_kernel = gpytorch.kernels.RBFKernelGrad()
 12 |         self.covar_module = gpytorch.kernels.ScaleKernel(self.base_kernel)
 13 |         
 14 |     def forward(self, x):
 15 |         mean_x = self.mean_module(x)
 16 |         covar_x = self.covar_module(x)
 17 |         return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x)
 18 | 
 19 | def train_gp(train_x,train_y,num_epochs=1,lr_hypers=0.01,verbose=True):
 20 | 
 21 |   dim = train_x.shape[-1]
 22 |   n_tasks = dim + 1
 23 |   likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=n_tasks)  # Value + x-derivative + y-derivative
 24 |   model = GPModelWithDerivatives(train_x, train_y, likelihood)
 25 |   
 26 |   # if torch.cuda.is_available():
 27 |   #   model = model.cuda()
 28 |   #   likelihood = likelihood.cuda()
 29 |   # Find optimal model hyperparameters
 30 |   model.train()
 31 |   likelihood.train()
 32 |   
 33 |   # Use the adam optimizer
 34 |   optimizer = torch.optim.Adam(model.parameters(), lr=lr_hypers)  # Includes GaussianLikelihood parameters
 35 |   
 36 |   # "Loss" for GPs - the marginal log likelihood
 37 |   mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model)
 38 |   
 39 |   for i in range(num_epochs):
 40 |       optimizer.zero_grad()
 41 |       output = likelihood(model(train_x))
 42 |       loss = -mll(output, train_y)
 43 |       loss.backward()
 44 |       print(f"Iter {i}, Loss: {loss.item()}")
 45 |       sys.stdout.flush()
 46 |       optimizer.step()
 47 | 
 48 |   print("Done Training")
 49 |   return model,likelihood
 50 | 
 51 | 
 52 | def eval_gp(test_x,model,likelihood):
 53 | 
 54 |   print("Predicting")
 55 |   # Set into eval mode
 56 |   model.eval()
 57 |   likelihood.eval()
 58 |   
 59 |   # Make predictions
 60 |   with torch.no_grad(), gpytorch.settings.fast_computations(log_prob=False, covar_root_decomposition=False):
 61 |       predictions = likelihood(model(test_x))
 62 |       means = predictions.mean
 63 |       variances = predictions.variance
 64 | 
 65 |   return means, variances
 66 | 
 67 | if __name__ == "__main__":
 68 |   from datetime import datetime
 69 |   now     = datetime.now()
 70 |   seed    = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second))
 71 |   barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second)
 72 |   torch.random.manual_seed(seed)
 73 | 
 74 |   # load data
 75 |   import pickle
 76 |   d = pickle.load(open("./synthetic1_dataset_10000_points_5_dim.pickle", "rb"))
 77 |   X = d['X']
 78 |   Y = d['Y']
 79 |   n,dim = X.shape
 80 |   n_train = int(0.8*n) 
 81 |   n_test  = n - n_train
 82 |   # reduce n_train
 83 |   n_train = int(n_train/(dim+1))
 84 |   # train/test split
 85 |   train_x = X[:n_train]
 86 |   train_y = Y[:n_train]
 87 |   test_x = X[n_train:n_train+n_test]
 88 |   test_y = Y[n_train:n_train+n_test]
 89 |   test_f = test_y[:,0] # just function values
 90 |   # train gp
 91 |   num_epochs = 400
 92 |   lr_hypers = 0.05
 93 |   model,likelihood = train_gp(train_x,train_y,num_epochs=num_epochs,lr_hypers=lr_hypers,verbose=True)
 94 |   # eval gp
 95 |   means,variances = eval_gp(test_x,model,likelihood)
 96 |   means = means[:,0] # just function values
 97 |   variances= variances[:,0] # just function values
 98 |   # compute MSE
 99 |   test_mse = torch.mean((test_f-means)**2)
100 |   # compute mean negative predictive density
101 |   test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_f).mean()
102 |   print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
103 | 
104 |   # file name
105 |   data_filename = f"./output/data_ExactGradGP_ne_{num_epochs}_{barcode}.pickle"
106 |   # dump the data
107 |   outdata = {}
108 |   outdata['test_mse']   = test_mse
109 |   outdata['test_nll']   = test_nll
110 |   outdata['mode']       = "ExactGradGP"
111 |   outdata['dim']        = dim
112 |   outdata['M']          = n_train
113 |   outdata['num_epochs'] = num_epochs
114 |   outdata['lr_hypers'] = lr_hypers
115 |   data_filename
116 |   pickle.dump(outdata,open(data_filename,"wb"))
117 |   print(f"Dropped file: {data_filename}")
118 | 


--------------------------------------------------------------------------------
/experiments/rover/run_exp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import subprocess
  4 | from datetime import datetime
  5 | import numpy as np
  6 | 
  7 | # flags
  8 | write_sbatch =True
  9 | submit       =True
 10 | 
 11 | dd = 0
 12 | M_list = np.array([400]) # matrix sizes
 13 | ni_list = (M_list/(dd+1)).astype(int)
 14 | for ni in ni_list:
 15 | 
 16 |   # write a pickle file with the run info
 17 |   run_params_dir = "./param_files/"
 18 |   if os.path.exists(run_params_dir) is False:
 19 |     os.mkdir(run_params_dir)
 20 |   run_params = {}
 21 |   run_params['mode']                         = "Vanilla" # DSVGP, SVGP or Vanilla
 22 |   run_params['num_inducing']                 = ni
 23 |   run_params['num_directions']               = dd
 24 |   run_params['dim']                          = 200 # not a parameter
 25 |   run_params['minibatch_size']               = 512
 26 |   run_params['num_epochs']                   = 300
 27 |   run_params['inducing_data_initialization'] = False
 28 |   run_params['use_ngd']                      = False
 29 |   run_params['use_ciq']                      = False
 30 |   run_params['num_contour_quadrature']       = 15 # gpytorch default=15
 31 |   run_params['learning_rate_hypers']         = 0.01
 32 |   run_params['learning_rate_ngd']            = 0.1
 33 |   run_params['lr_benchmarks']                = 20*np.array([400])
 34 |   run_params['lr_gamma']                     = 0.1
 35 |   run_params['lr_sched']                     = None
 36 |   run_params['mll_type']                     = "PLL"
 37 |   run_params['verbose']                      = False
 38 |   run_params['turbo_lb']                     = -5*np.ones(run_params['dim']) 
 39 |   run_params['turbo_ub']                     = 5*np.ones(run_params['dim'])
 40 |   run_params['turbo_n_init']                 = 100
 41 |   run_params['turbo_max_evals']              = 2000 
 42 |   run_params['turbo_batch_size']             = 5
 43 |   # seed and date
 44 |   now     = datetime.now()
 45 |   seed    = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second))
 46 |   barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second)
 47 |   run_params['date']  = now
 48 |   run_params['seed']  = seed
 49 |   # file name
 50 |   if run_params['mode'] == "DSVGP":
 51 |     base_name = f"rover_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
 52 |               f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\
 53 |               f"_ciq_{run_params['use_ciq']}_{barcode}"
 54 |   elif run_params['mode'] == "SVGP":
 55 |     base_name = f"rover_SVGP_ni_{run_params['num_inducing']}"+\
 56 |               f"_ne_{run_params['num_epochs']}_{barcode}"
 57 |   elif run_params['mode'] == "Vanilla":
 58 |     base_name = f"rover_Vanilla"+\
 59 |               f"_ne_{run_params['num_epochs']}_{barcode}"
 60 |   run_params['base_name']  = base_name
 61 |   param_filename = run_params_dir + "params_" +base_name + ".pickle"
 62 |   pickle.dump(run_params,open(param_filename,'wb'))
 63 |   print(f"Dumped param file: {param_filename}")
 64 |   
 65 |   if write_sbatch:
 66 |     # write a slurm batch script
 67 |     slurm_dir  = "./slurm_scripts/"
 68 |     if os.path.exists(slurm_dir) is False:
 69 |       os.mkdir(slurm_dir)
 70 |     slurm_name = slurm_dir + base_name + ".sub"
 71 |     #slurm_name = base_name + ".sub"
 72 |     f = open(slurm_name,"w")
 73 |     f.write(f"#!/bin/bash\n")
 74 |     f.write(f"#SBATCH -J rover_{run_params['mode']}{run_params['num_directions']}\n")
 75 |     f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n")
 76 |     f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n")
 77 |     f.write(f"#SBATCH --get-user-env\n")
 78 |     f.write(f"#SBATCH -N 1\n")
 79 |     f.write(f"#SBATCH -n 1\n")
 80 |     f.write(f"#SBATCH --mem=15000\n")
 81 |     f.write(f"#SBATCH -t 168:00:00\n")
 82 |     f.write(f"#SBATCH --partition=default_partition\n")
 83 |     f.write(f"#SBATCH --gres=gpu:1\n")
 84 |     f.write(f"python3 test_turbo.py {param_filename}\n")
 85 |     print(f"Dumped slurm file: {slurm_name}")
 86 |     
 87 |     # write the shell submission script
 88 |     submit_name = slurm_dir + 'slurm_submit.sh'
 89 |     f = open(submit_name,"w")
 90 |     f.write(f"#!/bin/bash\n")
 91 |     f.write(f"sbatch --requeue {slurm_name}")
 92 |     f.close()
 93 |     print(f"Dumped bash script: {submit_name}")
 94 |   
 95 |   if submit:
 96 |     # submit the script
 97 |     #bash_command = f"sbatch {slurm_name}"
 98 |     bash_command = f"bash {submit_name}"
 99 |     subprocess.run(bash_command.split(" "))
100 | 


--------------------------------------------------------------------------------
/experiments/bunny/ImplicitBunny/bunny.m:
--------------------------------------------------------------------------------
  1 | % Script to reconstruct Stanford bunny via implicit surface. **Warning** running this script will take > 30 minutes 
  2 | % as training a GP with tens of thousands of data points is computationally expensive. 
  3 | 
  4 | clear all
  5 | ski_order = 5;
  6 | ninduce = 30;
  7 | d = 3;
  8 | 
  9 | %% Load pre-processed large Stanford bunny (~70,000 vertices) 
 10 | % fprintf('Reading... '); obj = readObj('bunny.obj'); fprintf(' Done!\n'); 
 11 | % Original Processing Script
 12 | load('bunny.mat');
 13 | 
 14 | 
 15 | X = obj.v; Xorig = X;
 16 | T = obj.f.vt; Torig = T;
 17 | nx = obj.vn;
 18 | 
 19 | %% Map to unitbox
 20 | X = mapToUnitbox(X);
 21 | lims = [-0.01 1.01     -0.01 1.01     -0.01 1.01];
 22 | 
 23 | % %% Normalize the normals
 24 | nx = nx ./ sqrt(sum(nx.^2, 2));
 25 | % 
 26 | % %% Add noise
 27 | noise = 0.01;
 28 | X = X + noise*randn(size(X,1), d);
 29 | nx = nx + 0*randn(size(X,1), d);
 30 | % 
 31 | % %% Map back to unitbox and pick subset
 32 | X = mapToUnitbox(X);
 33 | nn = 5; % use 1/nn of data
 34 | x = X(1:nn:end,1); y = X(1:nn:end,2); z = X(1:nn:end,3);
 35 | nx = nx(1:nn:end, :);
 36 | n = size(x, 1); 
 37 | fprintf('Size of Kdot: [%d %d]\n', n*(d+1), n*(d+1))
 38 | 
 39 | %% Train GP with gradients using TPS kernel 
 40 | % lb = min([x,y,z]); ub = max([x,y,z]);
 41 | % beta = 1e-4; R = 2;
 42 | % s0 = 1; sig0 = 1e-1;
 43 | %  
 44 | % nZ = 3; Z = sign(randn(n*(d+1),nZ));
 45 | % xg = repmat({linspace(-0.1, 1.1, ninduce)}, d, 1);
 46 | % [Wtrain{1}, Wtrain{2}] = interpGrid([x,y,z], xg, ski_order);
 47 | % cov = @(hyp) tps_kernel_grad_ski(R, [x,y,z], hyp, xg, Wtrain);
 48 | % hyp = struct('cov', log([s0]), 'lik', log([sig0 sig0]));
 49 | % lmlfun = @(hyp) lml_mvm(cov, [zeros(n, 1), nx], hyp, Z, beta, true);
 50 | % params = minimize(hyp, lmlfun, -30);
 51 | % s = exp(params.cov(1));
 52 | % sigma = sqrt(exp(2*params.lik) + beta);
 53 | % fprintf('TPS-SKI with gradients: (s, sigma1, sigma2) = (%.3f, %.3f, %.3f)\n', exp(params.cov), sigma)
 54 | 
 55 | %% Prediction handle
 56 | % if length(sigma) == 1, sigma = [sigma, sigma]; end
 57 | % sig = [sigma(1)*ones(1,n), sigma(2)*ones(1,n*d)]';
 58 | % [K, ~, precond] = tps_kernel_grad_ski(R, [x,y,z], params, xg, Wtrain);
 59 | % mvm = @(x) K(x) + sig.^2 .* x;
 60 | % lambda = pcg(mvm, [zeros(n, 1); nx(:)], 1e-10, 1000, precond);
 61 | 
 62 | %% Compute implicit surface
 63 | isize = 100;
 64 | nxx = isize; nyy = isize; nzz = isize;
 65 | x1 = linspace(lims(1), lims(2), nxx); 
 66 | x2 = linspace(lims(3), lims(4), nyy); 
 67 | x3 = linspace(lims(5), lims(6), nzz);
 68 | 
 69 | % V = zeros(nxx, nyy, nzz);
 70 | % for i=1:nzz % Loop over third dimension to not have the memory blow up on us....
 71 | %     [XX, YY, ZZ] = meshgrid(x1, x2, x3(i));
 72 | %     Wtest = {}; Wtest{1} = interpGrid([XX(:) YY(:) ZZ(:)], xg, ski_order);
 73 | %     KK = tps_kernel_grad_ski(R, [x,y,z], params, xg, Wtrain, [XX(:) YY(:) ZZ(:)], Wtest);
 74 | %     V(:, :, i) = reshape(KK(lambda), [nxx, nyy, 1]); 
 75 | % end
 76 | 
 77 | % y_pred = readtable('bunny_DSVGP_ntrain3482_m300_p3_epochs100_standard_standard_expTEST_ntest31335_pred_y.csv');
 78 | % V = reshape(y_pred, [nxx, nyy, nzz]);
 79 | 
 80 | V = load('bunny_DSVGP_ntrain3482_m300_p3_epochs100_standard_standard_expTEST_ntest31335_V.mat').V;
 81 | 
 82 | FV = isosurface(x1, x2, x3, V, 0);
 83 | 
 84 | %% Remove vertices far from training points
 85 | D = pdist2([x,y,z], FV.vertices,  'euclidean', 'Smallest', 1)';
 86 | verticesToRemove = find(D > 3.2e-2)'; 
 87 | newVertices = FV.vertices;
 88 | newVertices(verticesToRemove,:) = [];
 89 | [~, newVertexIndex] = ismember(FV.vertices, newVertices, 'rows');
 90 | newFaces = FV.faces(all([FV.faces(:,1) ~= verticesToRemove, ...
 91 |     FV.faces(:,2) ~= verticesToRemove, ...
 92 |     FV.faces(:,3) ~= verticesToRemove], 2),:);
 93 | newFaces = newVertexIndex(newFaces);
 94 | V = newVertices;
 95 | F = newFaces;
 96 | 
 97 | %% Plot original points
 98 | figure('units','normalized','outerposition',[0 0 1 1]);
 99 | subaxis(1, 3, 1, 'Spacing', 0, 'Padding', 0, 'Margin', 0);
100 | trisurf(Torig, Xorig(:,3), Xorig(:,1), Xorig(:,2), 'EdgeColor', 'none');
101 | axis(lims)
102 | axis equal off
103 | shading interp % Make surface look smooth
104 | view(90, 15)
105 | camlight; lighting phong % Shine light on surface
106 | 
107 | %% Plot noisy points
108 | subaxis(1, 3, 2, 'Spacing', 0, 'Padding', 0, 'Margin', 0);
109 | trisurf(T, X(:,3), X(:,1), X(:,2), 'EdgeColor', 'none');
110 | axis(lims)
111 | axis equal off
112 | shading interp % Make surface look smooth
113 | view(90, 15)
114 | camlight; lighting phong % Shine light on surface
115 | 
116 | %% Plot bunny after (hopefully) removing all dummy points
117 | subaxis(1, 3, 3, 'Spacing', 0, 'Padding', 0, 'Margin', 0);
118 | trisurf(F, V(:,3), V(:,1), V(:,2), 'EdgeColor', 'none');
119 | axis(lims)
120 | axis equal off
121 | shading interp % Make surface look smooth
122 | view(90, 15)
123 | camlight; lighting phong % Shine light on surface


--------------------------------------------------------------------------------
/experiments/stellarator_regression/run_exp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import subprocess
  4 | from datetime import datetime
  5 | import numpy as np
  6 | 
  7 | # flags
  8 | write_sbatch =True
  9 | submit       =True
 10 | 
 11 | dd =1
 12 | M_list = np.array([200,500,800,1000,1200,1400])
 13 | ni_list = (M_list/(dd+1)).astype(int)
 14 | for ni in ni_list:
 15 | 
 16 |   # write a pickle file with the run info
 17 |   run_params_dir = "./param_files/"
 18 |   if os.path.exists(run_params_dir) is False:
 19 |     os.mkdir(run_params_dir)
 20 |   run_params = {}
 21 |   run_params['mode']                         = "DSVGP" # DSVGP, SVGP, GradSVGP, DSVGP-Shared
 22 |   run_params['num_inducing']                 = ni
 23 |   run_params['num_directions']               = dd
 24 |   run_params['minibatch_size']               = 512
 25 |   run_params['num_epochs']                   = 1000
 26 |   run_params['tqdm']                         = False
 27 |   run_params['inducing_data_initialization'] = False
 28 |   run_params['use_ngd']                      = False 
 29 |   run_params['use_ciq']                      = False
 30 |   run_params['num_contour_quadrature']       = 15 # gpytorch default=15
 31 |   run_params['learning_rate_hypers']         = 0.01
 32 |   run_params['learning_rate_ngd']            = 0.1
 33 |   # lr_benchmarks has units number of steps not number of epochs
 34 |   run_params['lr_benchmarks']                = 45*np.array([600,800])
 35 |   run_params['lr_gamma']                     = 0.1
 36 |   run_params['lr_sched']                     = "MultiStepLR"
 37 |   run_params['mll_type']                     = "PLL" #ELBO or PLL
 38 |   run_params['data_file'] = "focus_w7x_dataset_45dim_pickle_format.pickle"
 39 |   #run_params['data_file'] = "focus_w7x_dataset_45dim_500000_points.pickle"
 40 |   #run_params['data_file'] = f"./focus_w7x_dataset_45dim_grad_dimredux_{run_params['num_directions']}_directions.pickle"
 41 |   # seed and date
 42 |   now     = datetime.now()
 43 |   seed    = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second))
 44 |   barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second)
 45 |   run_params['date']  = now
 46 |   run_params['seed']  = seed
 47 |   # file name
 48 |   if run_params['mode'] == "DSVGP":
 49 |     base_name = f"stell_regress_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
 50 |               f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\
 51 |               f"_ciq_{run_params['use_ciq']}_{barcode}"
 52 |   elif run_params['mode'] == "DSVGP-Shared":
 53 |     base_name = f"stell_regress_DSVGP_Shared_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
 54 |               f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\
 55 |               f"_ciq_{run_params['use_ciq']}_{barcode}"
 56 |   elif run_params['mode'] == "SVGP":
 57 |     base_name = f"stell_regress_SVGP_ni_{run_params['num_inducing']}"+\
 58 |               f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\
 59 |               f"_ciq_{run_params['use_ciq']}_{barcode}"
 60 |   elif run_params['mode'] == "GradSVGP":
 61 |     base_name = f"stell_regress_GradSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
 62 |               f"_ne_{run_params['num_epochs']}_{barcode}"
 63 |   run_params['base_name']  = base_name
 64 |   param_filename = run_params_dir + "params_" +base_name + ".pickle"
 65 |   pickle.dump(run_params,open(param_filename,'wb'))
 66 |   print(f"Dumped param file: {param_filename}")
 67 |   
 68 |   if write_sbatch:
 69 |     # write a slurm batch script
 70 |     slurm_dir  = "./slurm_scripts/"
 71 |     if os.path.exists(slurm_dir) is False:
 72 |       os.mkdir(slurm_dir)
 73 |     slurm_name = slurm_dir + base_name + ".sub"
 74 |     #slurm_name = base_name + ".sub"
 75 |     f = open(slurm_name,"w")
 76 |     f.write(f"#!/bin/bash\n")
 77 |     f.write(f"#SBATCH -J  {run_params['mode']}_{run_params['num_directions']}\n")
 78 |     f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n")
 79 |     f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n")
 80 |     f.write(f"#SBATCH --get-user-env\n")
 81 |     f.write(f"#SBATCH -N 1\n")
 82 |     f.write(f"#SBATCH -n 1\n")
 83 |     f.write(f"#SBATCH --mem=15000\n")
 84 |     f.write(f"#SBATCH -t 168:00:00\n")
 85 |     f.write(f"#SBATCH --partition=default_partition\n")
 86 |     f.write(f"#SBATCH --gres=gpu:1\n")
 87 |     f.write(f"python3 stellarator_regression.py {param_filename}\n")
 88 |     print(f"Dumped slurm file: {slurm_name}")
 89 |     
 90 |     # write the shell submission script
 91 |     submit_name = slurm_dir + 'slurm_submit.sh'
 92 |     f = open(submit_name,"w")
 93 |     f.write(f"#!/bin/bash\n")
 94 |     f.write(f"sbatch --requeue {slurm_name}")
 95 |     f.close()
 96 |     print(f"Dumped bash script: {submit_name}")
 97 |   
 98 |   if submit:
 99 |     # submit the script
100 |     #bash_command = f"sbatch {slurm_name}"
101 |     bash_command = f"bash {submit_name}"
102 |     subprocess.run(bash_command.split(" "))
103 | 


--------------------------------------------------------------------------------
/experiments/uci_dfree/run_exp.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pickle
  3 | import subprocess
  4 | from datetime import datetime
  5 | import numpy as np
  6 | 
  7 | # flags
  8 | write_sbatch =True
  9 | submit       =True
 10 | 
 11 | model = ['DSVGP','DSVGP','SVGP','SVGP']
 12 | mll   = ['PLL','ELBO','PLL','ELBO']
 13 | dd = np.array([1,1,0,0]) # number of directions (use 0 for SVGP)
 14 | M = 800 # inducing matrix size
 15 | ni_list = (M/(dd+1)).astype(int) # ensures equal inducing matrix size
 16 | 
 17 | for jj,ni in enumerate(ni_list):
 18 | 
 19 |   # write a pickle file with the run info
 20 |   run_params_dir = "./param_files/"
 21 |   if os.path.exists(run_params_dir) is False:
 22 |     os.mkdir(run_params_dir)
 23 |   run_params = {}
 24 | 
 25 |   # select the dataset
 26 |   #run_params['data_file']                    = "../../../uci/protein/protein.mat" # use relative import
 27 |   #run_params['data_file']                    = "../../../uci/elevators/elevators.mat" # use relative import
 28 |   #run_params['data_file']                    = "../../../uci/kin40k/kin40k.mat" # use relative import
 29 |   #run_params['data_file']                    = "../../../uci/keggdirected/keggdirected.mat" 
 30 |   run_params['data_file']                    = "../../../uci/energy/energy.mat" # use relative import
 31 |   run_params['data_dir']                     = "./output/energy/" # save location for output
 32 | 
 33 |   run_params['mode']                         = model[jj] # DSVGP, SVGP
 34 |   run_params['mll_type']                     = mll[jj] # PLL or ELBO
 35 |   run_params['num_inducing']                 = ni_list[jj] # number of inducing
 36 |   run_params['num_directions']               = dd[jj] # number of directions
 37 |   run_params['minibatch_size']               = 512
 38 |   run_params['num_epochs']                   = 700
 39 |   run_params['verbose']                      = True
 40 |   run_params['inducing_data_initialization'] = False
 41 |   run_params['use_ngd']                      = False
 42 |   run_params['use_ciq']                      = False
 43 |   run_params['num_contour_quadrature']       = 15 # gpytorch default=15
 44 |   run_params['learning_rate_hypers']         = 0.01
 45 |   run_params['learning_rate_ngd']            = 0.1
 46 |   run_params['lr_sched']                     = "MultiStepLR"
 47 |   run_params['lr_benchmarks']                = 73*np.array([300,500])
 48 |   run_params['lr_gamma']                     = 0.1 # LR decrease rate
 49 |   # seed and date
 50 |   now     = datetime.now()
 51 |   seed    = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second))
 52 |   barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second)
 53 |   run_params['date']  = now
 54 |   run_params['seed']  = seed
 55 |   # file name
 56 |   if run_params['mode'] == "DSVGP":
 57 |     base_name = f"uci_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
 58 |               f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\
 59 |               f"_ciq_{run_params['use_ciq']}_{barcode}"
 60 |   elif run_params['mode'] == "SVGP":
 61 |     base_name = f"uci_SVGP_ni_{run_params['num_inducing']}"+\
 62 |               f"_ne_{run_params['num_epochs']}_{barcode}"
 63 |   elif run_params['mode'] == "GradSVGP":
 64 |     base_name = f"uci_GradSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\
 65 |               f"_ne_{run_params['num_epochs']}_{barcode}"
 66 |   run_params['base_name']  = base_name
 67 |   param_filename = run_params_dir + "params_" +base_name + ".pickle"
 68 |   pickle.dump(run_params,open(param_filename,'wb'))
 69 |   print(f"Dumped param file: {param_filename}")
 70 |   
 71 |   if write_sbatch:
 72 |     # write a slurm batch script
 73 |     slurm_dir  = "./slurm_scripts/"
 74 |     if os.path.exists(slurm_dir) is False:
 75 |       os.mkdir(slurm_dir)
 76 |     slurm_name = slurm_dir + base_name + ".sub"
 77 |     #slurm_name = base_name + ".sub"
 78 |     f = open(slurm_name,"w")
 79 |     f.write(f"#!/bin/bash\n")
 80 |     f.write(f"#SBATCH -J  {run_params['mode']}_{run_params['num_directions']}\n")
 81 |     f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n")
 82 |     f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n")
 83 |     f.write(f"#SBATCH --get-user-env\n")
 84 |     f.write(f"#SBATCH -N 1\n")
 85 |     f.write(f"#SBATCH -n 1\n")
 86 |     f.write(f"#SBATCH --mem=15000\n")
 87 |     f.write(f"#SBATCH -t 168:00:00\n")
 88 |     f.write(f"#SBATCH --partition=default_partition\n")
 89 |     f.write(f"#SBATCH --gres=gpu:1\n")
 90 |     f.write(f"python3 test.py {param_filename}\n")
 91 |     print(f"Dumped slurm file: {slurm_name}")
 92 |     
 93 |     # write the shell submission script
 94 |     submit_name = slurm_dir + 'slurm_submit.sh'
 95 |     f = open(submit_name,"w")
 96 |     f.write(f"#!/bin/bash\n")
 97 |     f.write(f"sbatch --requeue {slurm_name}")
 98 |     f.close()
 99 |     print(f"Dumped bash script: {submit_name}")
100 |   
101 |   if submit:
102 |     # submit the script
103 |     #bash_command = f"sbatch {slurm_name}"
104 |     bash_command = f"bash {submit_name}"
105 |     subprocess.run(bash_command.split(" "))
106 | 


--------------------------------------------------------------------------------
/tests/test_dsvgp.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | import gpytorch
  5 | import tqdm
  6 | import random
  7 | import time
  8 | from matplotlib import pyplot as plt
  9 | from torch.utils.data import TensorDataset, DataLoader
 10 | import sys
 11 | sys.path.append("../")
 12 | sys.path.append("../directionalvi/utils")
 13 | sys.path.append("../directionalvi")
 14 | from RBFKernelDirectionalGrad import RBFKernelDirectionalGrad
 15 | from DirectionalGradVariationalStrategy import DirectionalGradVariationalStrategy
 16 | from directional_vi import train_gp, eval_gp
 17 | from metrics import MSE
 18 | import testfun
 19 | 
 20 | # data parameters
 21 | n   = 600
 22 | dim = 2
 23 | n_test = 1000
 24 | 
 25 | # training params
 26 | num_inducing = 20
 27 | num_directions = 2
 28 | minibatch_size = 200
 29 | num_epochs = 400
 30 | 
 31 | # seed
 32 | torch.random.manual_seed(0)
 33 | # use tqdm or just have print statements
 34 | tqdm = False
 35 | # use data to initialize inducing stuff
 36 | inducing_data_initialization = False
 37 | # use natural gradients and/or CIQ
 38 | use_ngd = False
 39 | use_ciq = False
 40 | num_contour_quadrature=15
 41 | # learning rate
 42 | learning_rate_hypers = 0.01
 43 | learning_rate_ngd    = 0.1
 44 | gamma  = 10.0
 45 | #levels = np.array([20,150,300])
 46 | #def lr_sched(epoch):
 47 | #  a = np.sum(levels > epoch)
 48 | #  return (1./gamma)**a
 49 | lr_sched = None
 50 | 
 51 | # training and testing data
 52 | train_x = torch.rand(n,dim)
 53 | test_x = torch.rand(n_test,dim)
 54 | train_y = testfun.f(train_x, deriv=True)
 55 | test_y = testfun.f(test_x, deriv=True)
 56 | if torch.cuda.is_available():
 57 |     train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
 58 | 
 59 | train_dataset = TensorDataset(train_x, train_y)
 60 | test_dataset = TensorDataset(test_x, test_y)
 61 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
 62 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False)
 63 | 
 64 | # train
 65 | print("\n\n---DirectionalGradVGP---")
 66 | print(f"Start training with {n} trainig data of dim {dim}")
 67 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
 68 | args={"verbose":True}
 69 | t1 = time.time()	
 70 | model,likelihood = train_gp(train_dataset,
 71 |                       num_inducing=num_inducing,
 72 |                       num_directions=num_directions,
 73 |                       minibatch_size = minibatch_size,
 74 |                       minibatch_dim = num_directions,
 75 |                       num_epochs =num_epochs, 
 76 |                       learning_rate_hypers=learning_rate_hypers,
 77 |                       learning_rate_ngd=learning_rate_ngd,
 78 |                       inducing_data_initialization=inducing_data_initialization,
 79 |                       use_ngd = use_ngd,
 80 |                       use_ciq = use_ciq,
 81 |                       lr_sched=lr_sched,
 82 |                       num_contour_quadrature=num_contour_quadrature,
 83 |                       tqdm=tqdm,**args
 84 |                       )
 85 | t2 = time.time()	
 86 | 
 87 | # save the model
 88 | # torch.save(model.state_dict(), "../data/test_dvi_basic.model")
 89 | 
 90 | # test
 91 | means, variances = eval_gp( test_dataset,model,likelihood,
 92 |                             num_directions=num_directions,
 93 |                             minibatch_size=n_test,
 94 |                             minibatch_dim=num_directions)
 95 | t3 = time.time()	
 96 | 
 97 | # compute MSE
 98 | test_y = test_y.cpu()
 99 | test_mse = MSE(test_y[:,0],means[::num_directions+1])
100 | # compute mean negative predictive density
101 | test_nll = -torch.distributions.Normal(means[::num_directions+1], variances.sqrt()[::num_directions+1]).log_prob(test_y[:,0]).mean()
102 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
103 | print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec")
104 | 
105 | plot=0
106 | if plot == 1:
107 |     from mpl_toolkits.mplot3d import axes3d
108 |     import matplotlib.pyplot as plt
109 |     fig = plt.figure(figsize=(12,6))
110 |     ax = fig.add_subplot(111, projection='3d')
111 |     ax.scatter(test_x[:,0],test_x[:,1],test_y[:,0], color='k')
112 |     ax.scatter(test_x[:,0],test_x[:,1],means[::num_directions+1], color='b')
113 |     plt.title("f(x,y) variational fit; actual curve is black, variational is blue")
114 |     plt.show()
115 |     fig = plt.figure(figsize=(12,6))
116 |     ax = fig.add_subplot(111, projection='3d')
117 |     ax.scatter(test_x[:,0],test_x[:,1],test_y[:,1], color='k')
118 |     ax.scatter(test_x[:,0],test_x[:,1],means[1::num_directions+1], color='b')
119 |     plt.title("df/dx variational fit; actual curve is black, variational is blue")
120 |     plt.show()
121 |     fig = plt.figure(figsize=(12,6))
122 |     ax = fig.add_subplot(111, projection='3d')
123 |     ax.scatter(test_x[:,0],test_x[:,1],test_y[:,2], color='k')
124 |     ax.scatter(test_x[:,0],test_x[:,1],means[2::num_directions+1], color='b')
125 |     plt.title("df/dy variational fit; actual curve is black, variational is blue")
126 |     plt.show()
127 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/GCN/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy.sparse as sp
  3 | import torch
  4 | import pickle
  5 | import os
  6 | 
  7 | def encode_onehot(labels):
  8 |     classes = set(labels)
  9 |     classes_dict = {c: np.identity(len(classes))[i, :] for i, c in
 10 |                     enumerate(classes)}
 11 |     labels_onehot = np.array(list(map(classes_dict.get, labels)),
 12 |                              dtype=np.int32)
 13 |     return labels_onehot
 14 | 
 15 | 
 16 | def load_data(dataset="cora", train_percent=0.036):
 17 |     """Load citation network dataset"""
 18 |     path = os.path.abspath(__file__ + f"/../data/{dataset}/")
 19 | 
 20 |     idx_features_labels = np.genfromtxt("{}/{}.content".format(path, dataset),
 21 |                                         dtype=np.dtype(str))
 22 |     features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
 23 |     labels = encode_onehot(idx_features_labels[:, -1])
 24 | 
 25 |     # build graph
 26 |     idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
 27 |     idx_map = {j: i for i, j in enumerate(idx)}
 28 |     edges_unordered = np.genfromtxt("{}/{}.cites".format(path, dataset),
 29 |                                     dtype=np.int32)
 30 |     edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
 31 |                      dtype=np.int32).reshape(edges_unordered.shape)
 32 |     adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
 33 |                         shape=(labels.shape[0], labels.shape[0]),
 34 |                         dtype=np.float32)
 35 | 
 36 |     # build symmetric adjacency matrix
 37 |     adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
 38 | 
 39 |     features = normalize(features)
 40 |     adj = normalize(adj + sp.eye(adj.shape[0]))
 41 | 
 42 |     num_total_nodes = len(labels)
 43 |     n_train = int(train_percent*num_total_nodes)
 44 |     idx_train = range(n_train)
 45 |     idx_val = range(n_train, n_train+300)
 46 |     idx_test = range(n_train+300, num_total_nodes)
 47 | 
 48 |     features = torch.FloatTensor(np.array(features.todense()))
 49 |     labels = torch.LongTensor(np.where(labels)[1])
 50 |     adj = sparse_mx_to_torch_sparse_tensor(adj)
 51 | 
 52 |     idx_train = torch.LongTensor(idx_train)
 53 |     idx_val = torch.LongTensor(idx_val)
 54 |     idx_test = torch.LongTensor(idx_test)
 55 | 
 56 |     return adj, features, labels, idx_train, idx_val, idx_test
 57 | 
 58 | def load_citeseer(train_percent=0.036):
 59 |     dataset="citeseer"
 60 |     path = os.path.abspath(__file__ + f"/../data/{dataset}/")
 61 |     nodes_source=f"{path}/{dataset}.content"
 62 |     edges_source=f"{path}/{dataset}.cites"
 63 |     # load nodes dataframe
 64 |     df_nodes = pickle.load(open(f'{path}/{dataset}_nodes.pkl','rb'))
 65 |     # load edges, np array of type int32
 66 |     edges = pickle.load(open(f'{path}/{dataset}_edges.pkl','rb'))
 67 | 
 68 |     # node features and labels
 69 |     idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset),
 70 |                                     dtype=np.dtype(str))
 71 |     features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
 72 |     labels = encode_onehot(idx_features_labels[:, -1])
 73 | 
 74 |     # generate adjacency matrix
 75 |     adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])),
 76 |                         shape=(labels.shape[0], labels.shape[0]),
 77 |                         dtype=np.float32)
 78 |     # build symmetric adjacency matrix
 79 |     adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
 80 |     adj = normalize(adj + sp.eye(adj.shape[0]))
 81 | 
 82 |     features = torch.FloatTensor(np.array(features.todense()))
 83 |     labels = torch.LongTensor(np.where(labels)[1])
 84 |     adj = sparse_mx_to_torch_sparse_tensor(adj)
 85 | 
 86 |     num_nodes = len(labels)
 87 |     n_train = int(num_nodes*train_percent)
 88 |     idx_train = range(n_train)
 89 |     idx_val = range(n_train, n_train+200)
 90 |     idx_test = range(n_train+200, num_nodes)
 91 |     idx_train = torch.LongTensor(idx_train)
 92 |     idx_val = torch.LongTensor(idx_val)
 93 |     idx_test = torch.LongTensor(idx_test)
 94 |     return adj, features, labels, idx_train, idx_val, idx_test
 95 | 
 96 | 
 97 | 
 98 | 
 99 | 
100 | def normalize(mx):
101 |     """Row-normalize sparse matrix"""
102 |     rowsum = np.array(mx.sum(1))
103 |     r_inv = np.power(rowsum, -1).flatten()
104 |     r_inv[np.isinf(r_inv)] = 0.
105 |     r_mat_inv = sp.diags(r_inv)
106 |     mx = r_mat_inv.dot(mx)
107 |     return mx
108 | 
109 | 
110 | def accuracy(output, labels):
111 |     preds = output.max(1)[1].type_as(labels)
112 |     correct = preds.eq(labels).double()
113 |     correct = correct.sum()
114 |     return correct / len(labels)
115 | 
116 | 
117 | def sparse_mx_to_torch_sparse_tensor(sparse_mx):
118 |     """Convert a scipy sparse matrix to a torch sparse tensor."""
119 |     sparse_mx = sparse_mx.tocoo().astype(np.float32)
120 |     indices = torch.from_numpy(
121 |         np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
122 |     values = torch.from_numpy(sparse_mx.data)
123 |     shape = torch.Size(sparse_mx.shape)
124 |     return torch.sparse.FloatTensor(indices, values, shape)
125 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/gcn_sgd.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import random
  4 | import time
  5 | import argparse
  6 | import wandb
  7 | 
  8 | import torch
  9 | import gpytorch
 10 | import torch.nn as nn
 11 | import torch.nn.functional as F
 12 | from torch.utils.data import TensorDataset, DataLoader
 13 | # import networkx as nx
 14 | 
 15 | from matplotlib import pyplot as plt
 16 | 
 17 | import os
 18 | import sys
 19 | sys.path.append("../")
 20 | sys.path.append("../../directionalvi/utils")
 21 | sys.path.append("../../directionalvi")
 22 | import directional_vi 
 23 | import traditional_vi
 24 | import grad_svgp
 25 | from metrics import MSE
 26 | import pickle
 27 | from scipy.io import loadmat
 28 | # from GCN.utils import *
 29 | # from GCN.models import GCN
 30 | from torch_geometric.datasets import Planetoid
 31 | from GCN.models2 import Net
 32 | 
 33 | def str2bool(v):
 34 |     if isinstance(v, bool):
 35 |        return v
 36 |     if v.lower() in ('yes', 'true', 't', 'y', '1'):
 37 |         return True
 38 |     elif v.lower() in ('no', 'false', 'f', 'n', '0'):
 39 |         return False
 40 |     else:
 41 |         raise argparse.ArgumentTypeError('Boolean value expected.')
 42 | 
 43 | 
 44 | 
 45 | parser = argparse.ArgumentParser(description="parse args")
 46 | # Directories for data/logs
 47 | # parser.add_argument("--watch_model", type=str2bool, nargs='?',const=True, default=False) 
 48 | parser.add_argument("--exp_name", type=str, default="-")
 49 | # Dataset and model type
 50 | parser.add_argument("-d", "--dataset", type=str, default="synthetic-Branin")
 51 | parser.add_argument("--model", type=str, default="DSVGP")
 52 | parser.add_argument("-vs", "--variational_strategy", type=str, default="standard", choices=["standard", "CIQ"])
 53 | parser.add_argument("-vd", "--variational_distribution", type=str, default="standard", choices=["standard", "NGD"])
 54 | parser.add_argument("-m", "--num_inducing", type=int, default=10)
 55 | parser.add_argument("-p", "--num_directions", type=int, default=10)
 56 | parser.add_argument("-n", "--num_epochs", type=int, default=1)
 57 | parser.add_argument("-bs", "--batch_size", type=int, default=256)
 58 | parser.add_argument("--turbo_batch_size", type=int, default=50)
 59 | parser.add_argument("--turbo_max_evals", type=int, default=100)
 60 | parser.add_argument("--lr", type=float, default=0.01)
 61 | parser.add_argument("--lr_ngd", type=float, default=0.1)
 62 | parser.add_argument("--gamma", type=float, default=0.1)
 63 | parser.add_argument("--num_contour_quad", type=int, default=15)
 64 | parser.add_argument("--lr_sched", type=str, default=None)
 65 | parser.add_argument("--mll_type", type=str, default="ELBO", choices=["ELBO", "PLL"])
 66 | parser.add_argument("-s", "--seed", type=int, default=0)
 67 | 
 68 | 
 69 | args =  vars(parser.parse_args())
 70 | 
 71 | 
 72 | exp_name = args["exp_name"]
 73 | num_epochs = args["turbo_max_evals"]
 74 | args["model"] = "SGD"
 75 | expname_full =  f"{args['dataset']}_{args['model']}_epochs{num_epochs}_exp{exp_name}"
 76 | print(expname_full)
 77 | 
 78 | 
 79 | # output result file names
 80 | data_dir = "./results/"
 81 | data_filename  = data_dir + expname_full + ".pickle"
 82 | if os.path.exists(data_dir) is False:
 83 |   os.mkdir(data_dir)
 84 | 
 85 | 
 86 | torch.set_default_dtype(torch.float64)
 87 | torch.random.manual_seed(args["seed"])  
 88 | 
 89 | def test(data, train=True):
 90 |     model.eval()
 91 | 
 92 |     correct = 0
 93 |     pred = model(data).max(dim=1)[1]
 94 | 
 95 |     if train:
 96 |         correct += pred[data.train_mask].eq(data.y[data.train_mask]).sum().item()
 97 |         return correct / (len(data.y[data.train_mask]))
 98 |     else:
 99 |         correct += pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
100 |         return correct / (len(data.y[data.test_mask]))
101 | 
102 | 
103 | def train(data, plot=False):
104 |   train_acc_list, test_acc_list, loss_history = list(), list(), list()
105 |   for epoch in range(num_epochs):
106 |     model.train()
107 |     optimizer.zero_grad()
108 |     out = model(data)
109 |     loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
110 |     loss.backward()
111 |     optimizer.step()
112 | 
113 |     train_acc = test(data)
114 |     test_acc = test(data, train=False)
115 |     train_acc_list.append(train_acc)
116 |     test_acc_list.append(test_acc)
117 |     loss_history.append(loss.item())
118 |     print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'.
119 |             format(epoch, loss.item(), train_acc, test_acc))
120 |   return loss_history, train_acc_list, test_acc_list
121 | 
122 | 
123 | 
124 | # load data for GCN
125 | dataset = "PubMed"
126 | assert args["dataset"] == "PubMed"
127 | dataset = Planetoid(root='/tmp/PubMed', name='PubMed')  
128 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')  
129 | turbo_device = 'cuda' if torch.cuda.is_available() else 'cpu'
130 | model = Net(dataset).to(device)
131 | data = dataset[0].to(device)
132 | print("\nDimension of GCN:", model.n_params)
133 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
134 | loss_history, train_acc_list, test_acc_list = train(data)
135 | 
136 | print("\nFinal parameters of GCN:", )
137 | for name, param in model.named_parameters():
138 |   print(name)
139 |   print(param)
140 |   print(param.min())
141 |   print(param.max())
142 |   print()
143 | 
144 | # dump the data
145 | outdata = {}
146 | outdata['X']    = None
147 | outdata['fX']   = loss_history
148 | outdata['train_acc_list'] = train_acc_list
149 | outdata['test_acc_list'] = test_acc_list
150 | outdata['xopt'] = None
151 | outdata['fopt'] = min(loss_history)
152 | # add the run params
153 | outdata.update(args)
154 | pickle.dump(outdata,open(data_filename,"wb"))
155 | print(f"Dropped file: {data_filename}")
156 | 
157 | 
158 | 
159 | 
160 | 
161 |  
162 | 


--------------------------------------------------------------------------------
/directionalvi/utils/load_data.py:
--------------------------------------------------------------------------------
  1 | from synthetic_functions import *
  2 | from rescale import *
  3 | import scipy.io
  4 | import torch
  5 | from torch.utils.data import TensorDataset, DataLoader
  6 | 
  7 | def load_synthetic_data(test_fun, n, **kwargs):
  8 |     """
  9 |     load synthetic data 
 10 |     Input: 
 11 |         test_fun: a modified Botorch test function
 12 |         n: number of datapoints
 13 |     Output: 
 14 |         x: torch tensor, random data from unit cube
 15 |         y: torch tensor, normalized and rescaled labels (w/ or w/o derivatives)
 16 |     """
 17 |     torch.random.manual_seed(kwargs["seed"])
 18 |     dim = test_fun.dim
 19 |     x_unit = torch.rand(n,dim)
 20 |     # evaluate in the true range
 21 |     lb, ub = test_fun.get_bounds()
 22 |     x = from_unit_cube(x_unit, lb, ub)
 23 |     if kwargs["derivative"]:
 24 |         y = test_fun.evaluate_true_with_deriv(x)
 25 |     else:
 26 |         y = test_fun.evaluate_true(x)
 27 |     # normalize y values (with or without derivatives)
 28 |     normalize(y, **kwargs)
 29 |     if kwargs["derivative"]:
 30 |         # mapping derivative values to unit cube
 31 |         f = y[..., 0].reshape(len(y),1)
 32 |         g = y[..., 1:].reshape(len(y),-1)
 33 |         g *= (ub - lb)
 34 |         y = torch.cat([f, g], 1)
 35 | 
 36 |     # add scaling factors to info_dict for further accurate plot
 37 |     info_dict = {}
 38 |     return x_unit, y, info_dict
 39 | 
 40 | #use real_helens when calling in exp_script.py
 41 | def load_helens(data_src_path, **args):
 42 |     """
 43 |     load synthetic data 
 44 |     Input: 
 45 |         data_src_path: path to dataset
 46 |         filter_val: float64 in [0, 1]; code will filter out points which possess x-coordinate > filter_val
 47 |     Output: 
 48 |         train_dataset: torch TensorDataset
 49 |         test_dataset: torch TensorDataset
 50 |         dim: x-dimension of data
 51 |     """
 52 |     torch.random.manual_seed(args["seed"])
 53 |     n = args["n_train"]
 54 |     filter_val = args["filter_val"]
 55 |     #n_test = args["n_test"]
 56 | 
 57 |     # Apply normalizations to dataset 
 58 |     mat = scipy.io.loadmat(data_src_path)
 59 |     x = torch.tensor(np.float64(mat['mth_points'])).float()
 60 |     SCALE_0_FACTOR = x[:, 0].max()
 61 |     SCALE_1_FACTOR = x[:, 1].max()
 62 |     x[:, 0] = x[:, 0]/SCALE_0_FACTOR
 63 |     x[:, 1] = x[:, 1]/SCALE_1_FACTOR
 64 |     y = torch.tensor(np.float64(mat['mth_verts'])).float()
 65 |     SCALE_Y_FACTOR = max(y)
 66 |     y = y/SCALE_Y_FACTOR
 67 |     if args["derivative"]:
 68 |         dy = torch.tensor(np.float64(mat['mth_grads'])).float()
 69 |         dy = dy / SCALE_Y_FACTOR #modify derivatives due to y-scaling
 70 |         dy[:, 0] = dy[:, 0]*SCALE_0_FACTOR #modify derivatives due to x-scaling
 71 |         dy[:, 1] = dy[:, 1]*SCALE_1_FACTOR
 72 |         Y = torch.cat((y, dy), dim = 1).float()
 73 |     else:
 74 |         Y = y
 75 |     # FILTERING DATA
 76 |     # location concatenated with y and dy values, for the sake of filtering
 77 |     full_data = torch.cat((x, Y), dim=1).float() 
 78 |     temp_full_data = np.array(full_data)
 79 |     def fun(x, val = filter_val):
 80 |         if x[0]>val or x[1]>val:       
 81 |             return False 
 82 |         else:
 83 |             return True
 84 |     filtered = filter(fun, temp_full_data)
 85 |     arr = [item for item in filtered]
 86 |     len_arr = len(arr) 
 87 |     arr = arr[:len_arr]
 88 |     #recover X and Y from filtered concatenated values (arr)
 89 |     X = torch.tensor([item[0:2] for item in arr])
 90 |     Y = torch.tensor([item[2:] for item in arr]) 
 91 |     Y = Y.squeeze(-1)   
 92 |     # shuffle the data
 93 |     indices = torch.randperm(X.size(0))[:X.size(0)]
 94 |     X = X[indices]
 95 |     Y = Y[indices]
 96 |     dim = X.shape[-1]
 97 |     train_x = X[:n, :].contiguous()
 98 |     train_y = Y[:n].contiguous()
 99 |     test_x = X[n:, :].contiguous()
100 |     test_y = Y[n:].contiguous()
101 | 
102 |     if torch.cuda.is_available():
103 |         train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
104 | 
105 |     # dataset = TensorDataset(x, data)
106 |     # Train-Test Split
107 |     # train_dataset, test_dataset = torch.utils.data.random_split(dataset, [n, len_arr - n])#, generator=torch.Generator().manual_seed(42))
108 |     # dim = len(train_dataset[0][0])
109 |     info_dict = {"SCALE_x0_FACTOR": SCALE_0_FACTOR.item(),
110 |                  "SCALE_x1_FACTOR": SCALE_1_FACTOR.item(),
111 |                  "SCALE_Y_FACTOR": SCALE_Y_FACTOR[0].item(),
112 |                  "n_train":n,
113 |                  "n_test": len_arr - n}
114 |     return train_x, train_y, test_x, test_y, dim, info_dict
115 | 
116 | 
117 | def load_3droad(data_src_path, **args):
118 |     data = torch.Tensor(scipy.io.loadmat(data_src_path)['data'])
119 |     X = data[:, :-2]
120 |     X = X - X.min(0)[0]
121 |     X = 2 * (X / X.max(0)[0]) - 1
122 |     y = data[:, -1]
123 |     y.sub_(y.mean(0)).div_(y.std(0))
124 | 
125 |     # shuffle the data
126 |     torch.random.manual_seed(args["seed"])
127 |     indices = torch.randperm(X.size(0))[:326155]
128 |     X = X[indices]
129 |     y = y[indices]
130 |     dim = X.shape[-1]
131 | 
132 |     train_n = args["n_train"]
133 |     # train_n = int(floor(0.8 * len(X)))
134 |     train_x = X[:train_n, :].contiguous()
135 |     train_y = y[:train_n].contiguous()
136 | 
137 |     test_x = X[train_n:, :].contiguous()
138 |     test_y = y[train_n:].contiguous()
139 | 
140 |     if torch.cuda.is_available():
141 |         train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
142 | 
143 |     train_dataset = TensorDataset(train_x, train_y)
144 |     test_dataset = TensorDataset(test_x, test_y)
145 |     
146 |     info_dict = {"n_train":train_n,
147 |                  "n_test": len(X) - train_n}
148 | 
149 |     return train_x, train_y, test_x, test_y, dim, info_dict


--------------------------------------------------------------------------------
/experiments/GNN_bo/GCN/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import division
  2 | from __future__ import print_function
  3 | 
  4 | import time
  5 | import argparse
  6 | import numpy as np
  7 | 
  8 | import torch
  9 | import torch.nn.functional as F
 10 | import torch.optim as optim
 11 | from torch.utils.data import DataLoader, TensorDataset
 12 | 
 13 | from utils import *
 14 | from models import GCN
 15 | try: # import wandb if watch model on weights&biases
 16 |   import wandb
 17 | except:
 18 |   pass
 19 | 
 20 | # Training settings
 21 | parser = argparse.ArgumentParser()
 22 | parser.add_argument('--no-cuda', action='store_true', default=False,
 23 |                     help='Disables CUDA training.')
 24 | parser.add_argument('--fastmode', action='store_true', default=False,
 25 |                     help='Validate during training pass.')
 26 | parser.add_argument('--dataset', type=str, default='cora', help='dataset name')
 27 | parser.add_argument('--seed', type=int, default=42, help='Random seed.')
 28 | parser.add_argument('--epochs', type=int, default=200,
 29 |                     help='Number of epochs to train.')
 30 | parser.add_argument('--lr', type=float, default=0.01,
 31 |                     help='Initial learning rate.')
 32 | parser.add_argument('--weight_decay', type=float, default=5e-4,
 33 |                     help='Weight decay (L2 loss on parameters).')
 34 | parser.add_argument('--hidden', type=int, default=16,
 35 |                     help='Number of hidden units.')
 36 | parser.add_argument('--dropout', type=float, default=0.5,
 37 |                     help='Dropout rate (1 - keep probability).')
 38 | parser.add_argument('--n_train', type=int, default=10000,
 39 |                     help='number of training data')
 40 | parser.add_argument('--batch_size', type=int, default=1024,
 41 |                     help='batch size')
 42 | parser.add_argument('--train_percent', type=float, default=0.1,
 43 |                     help='training label percentage')
 44 | parser.add_argument('--watch_model', type=bool, default=False,
 45 |                     help='watch model from wandb')
 46 | parser.add_argument('--expid', type=int, default="-",
 47 |                     help='experiment id')
 48 | parser.add_argument('--lr_sched', type=str, default="step_lr",
 49 |                     help='type of learning rate scheduler')
 50 | 
 51 | args = parser.parse_args()
 52 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 53 | 
 54 | if args.watch_model: # watch model on weights&biases
 55 |         wandb.init(project='L2C', entity='xinranzhu',
 56 |                 name=f"{args.dataset}_exp{args.expid}")
 57 |         print("Experiment settings:")
 58 |         print(args)
 59 |         wandb.config.seed = args.seed
 60 |         wandb.config.dropout = args.dropout
 61 |         wandb.config.epochs = args.epochs
 62 |         wandb.config.lr = args.lr
 63 |         wandb.config.weight_decay = args.weight_decay
 64 |         wandb.config.hidden = args.hidden
 65 |         wandb.config.train_percent = args.train_percent
 66 |         wandb.config.expid = args.expid
 67 | 
 68 | 
 69 | np.random.seed(args.seed)
 70 | torch.manual_seed(args.seed)
 71 | if args.cuda:
 72 |     torch.cuda.manual_seed(args.seed)
 73 | 
 74 | # Load data
 75 | if args.dataset == "cora":
 76 |     adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset=args.dataset, 
 77 |                                                                     train_percent=args.train_percent)
 78 | if args.dataset == "reddit":
 79 |     adj, features, labels, idx_train, idx_val, idx_test = load_reddit(args.n_train)
 80 | 
 81 | if args.dataset == "citeseer":
 82 |     adj, features, labels, idx_train, idx_val, idx_test = load_citeseer(train_percent=args.train_percent)
 83 | 
 84 | # Model and optimizer
 85 | model = GCN(nfeat=features.shape[1],
 86 |             nhid=args.hidden,
 87 |             nclass=labels.max().item() + 1,
 88 |             dropout=args.dropout)
 89 | optimizer = optim.Adam(model.parameters(),
 90 |                        lr=args.lr, weight_decay=args.weight_decay)
 91 | 
 92 | if args.cuda:
 93 |     model.cuda()
 94 |     features = features.cuda()
 95 |     adj = adj.cuda()
 96 |     labels = labels.cuda()
 97 |     idx_train = idx_train.cuda()
 98 |     idx_val = idx_val.cuda()
 99 |     idx_test = idx_test.cuda()
100 | 
101 | if args.lr_sched == "step_lr":
102 |     milestones = [int(len(idx_train)/3), int(2*len(idx_train)/3)]
103 |     lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1)
104 | elif args.lr_sched == "lambda_lr":
105 |     lr_sched_fun = lambda epoch: 1.0/(epoch+1)
106 |     lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_sched_fun)
107 | else:
108 |     lr_scheduler = None
109 | 
110 | def train(epoch, print_loss=True, lr_scheduler=None):
111 |     t = time.time()
112 |     model.train()
113 |     optimizer.zero_grad()
114 |     output = model(features, adj)
115 |     loss_train = F.nll_loss(output[idx_train], labels[idx_train])
116 |     wandb.log({'loss': loss_train})
117 |     acc_train = accuracy(output[idx_train], labels[idx_train])
118 |     loss_train.backward()
119 |     optimizer.step()
120 |     if lr_scheduler != None:
121 |         variational_scheduler.step()
122 |     if not args.fastmode:
123 |         # Evaluate validation set performance separately,
124 |         # deactivates dropout during validation run.
125 |         model.eval()
126 |         output = model(features, adj)
127 | 
128 |     loss_val = F.nll_loss(output[idx_val], labels[idx_val])
129 |     acc_val = accuracy(output[idx_val], labels[idx_val])
130 |     if print_loss:
131 |         print('Epoch: {:04d}'.format(epoch+1),
132 |             'loss_train: {:.4f}'.format(loss_train.item()),
133 |             'acc_train: {:.4f}'.format(acc_train.item()),
134 |             'loss_val: {:.4f}'.format(loss_val.item()),
135 |             'acc_val: {:.4f}'.format(acc_val.item()),
136 |             'time: {:.4f}s'.format(time.time() - t))
137 | 
138 | 
139 | def test():
140 |     model.eval()
141 |     output = model(features, adj)
142 |     loss_test = F.nll_loss(output[idx_test], labels[idx_test])
143 |     acc_test = accuracy(output[idx_test], labels[idx_test])
144 |     print("Test set results:",
145 |           "loss= {:.4f}".format(loss_test.item()),
146 |           "accuracy= {:.4f}".format(acc_test.item()))
147 | 
148 | 
149 | # Train model
150 | t_total = time.time()
151 | for epoch in range(args.epochs):
152 |     print_loss = True if epoch % 50 == 0 else False
153 |     train(epoch, print_loss=print_loss)
154 | print("Optimization Finished!")
155 | print("Total time elapsed: {:.4f}s".format(time.time() - t_total))
156 | 
157 | # Testing
158 | test()
159 | wandb.save(f"a.out_{args.dataset}exp{args.expid}")
160 | 
161 | 


--------------------------------------------------------------------------------
/directionalvi/RBFKernelDirectionalGrad.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import torch
  3 | 
  4 | from gpytorch.lazy.kronecker_product_lazy_tensor import KroneckerProductLazyTensor
  5 | from gpytorch.kernels.rbf_kernel import RBFKernel, postprocess_rbf
  6 | 
  7 | 
  8 | class RBFKernelDirectionalGrad(RBFKernel):
  9 |     r"""
 10 |     Pass in v1 and v2 through the params. If v1 has n_dir1 directions per
 11 |     point in x2 then it should be shape n1*n_dir1 x dim. The directions
 12 |     are assumed to be stored in blocks so that the first n_dir1 directions
 13 |     belong to x1[0] and the second n_dir1 directions belong to x1[1] etc.
 14 |     
 15 |     If you have a single set of global directions such as torch.eye(dim), then 
 16 |     you can repeat those to make v1 and v2 with 
 17 |     v1 = torch.eye(dim).repeat(n1,1)
 18 | 
 19 |     Args:
 20 |         :attr:`batch_shape` (torch.Size, optional):
 21 |             Set this if you want a separate lengthscale for each
 22 |              batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])`.
 23 |         :attr:`active_dims` (tuple of ints, optional):
 24 |             Set this if you want to compute the covariance of only a few input dimensions. The ints
 25 |             corresponds to the indices of the dimensions. Default: `None`.
 26 |         :attr:`lengthscale_prior` (Prior, optional):
 27 |             Set this if you want to apply a prior to the lengthscale parameter.  Default: `None`.
 28 |         :attr:`lengthscale_constraint` (Constraint, optional):
 29 |             Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`.
 30 |         :attr:`eps` (float):
 31 |             The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`.
 32 | 
 33 |     Attributes:
 34 |         :attr:`lengthscale` (Tensor):
 35 |             The lengthscale parameter. Size/shape of parameter depends on the
 36 |             :attr:`ard_num_dims` and :attr:`batch_shape` arguments.
 37 | 
 38 | 
 39 |     """
 40 | 
 41 |     def forward(self, x1, x2, diag=False, **params):
 42 |         batch_shape = x1.shape[:-2]
 43 |         n_batch_dims = len(batch_shape)
 44 |         n1, d = x1.shape[-2:]
 45 |         n2 = x2.shape[-2]
 46 | 
 47 | 
 48 |         v1 = params['v1']
 49 |         v2 = params['v2']
 50 |         # number of directions per point
 51 |         n_dir1 = int(v1.shape[-2]/n1)
 52 |         n_dir2 = int(v2.shape[-2]/n2)
 53 |         assert n_dir1 == n_dir2, "v1 and v2 must contain same number of directions"
 54 | 
 55 |         self.set_num_directions(n_dir1)
 56 |         # normalize directions
 57 |         v1 = (v1.T/torch.norm(v1,dim=1)).T
 58 |         v2 = (v2.T/torch.norm(v2,dim=1)).T
 59 | 
 60 |         # K = torch.zeros(*batch_shape, n1 * (d + 1), n2 * (d + 1), device=x1.device, dtype=x1.dtype)
 61 |         K = torch.zeros(*batch_shape, n1 * (n_dir1 + 1), n2 * (n_dir2 + 1), device=x1.device, dtype=x1.dtype)
 62 |         K = torch.zeros(*batch_shape, n1 * (n_dir1 + 1), n2 * (n_dir2 + 1), device=x1.device, dtype=x1.dtype)
 63 | 
 64 |   
 65 |         if not diag:
 66 |             # Scale the inputs by the lengthscale (for stability)
 67 |             x1_ = x1.div(self.lengthscale)
 68 |             x2_ = x2.div(self.lengthscale)
 69 | 
 70 |             # 1) Kernel block
 71 |             diff = self.covar_dist(x1_, x2_, square_dist=True, dist_postprocess_func=postprocess_rbf, **params)
 72 |             K_11 = diff
 73 |             K[..., :n1, :n2] = K_11
 74 | 
 75 | 
 76 |             # 2) First gradient block
 77 |             x2_v2 = x2_.reshape(n2,1,d).bmm(torch.transpose(v2.reshape(n2,n_dir2,d),-2,-1))
 78 |             x1_v2 = x1_ @ v2.T
 79 |             outer  = x1_v2 - x2_v2.flatten()
 80 |             # permute cols so we get blocks for v1,v2,v3,...
 81 |             pi1 = torch.arange(n2 * (n_dir2)).view(n2,n_dir2).t().reshape((n2 * (n_dir2)))
 82 |             outer1 = outer[:,pi1]/ self.lengthscale.unsqueeze(-2)
 83 |             K[..., :n1, n2:] = outer1 * K_11.repeat([*([1] * (n_batch_dims + 1)), n_dir2]) 
 84 | 
 85 |             # Second gradient block
 86 |             x1_v1 = x1_.reshape(n1,1,d).bmm(torch.transpose(v1.reshape(n1,n_dir1,d),-2,-1))
 87 |             x2_v1 = x2_ @ v1.T
 88 |             outer  = x1_v1.flatten() - x2_v1
 89 |             # permute cols so we get blocks for v1,v2,v3,...
 90 |             pi2 = torch.arange(n1 * (n_dir1)).view(n1,n_dir1).t().reshape((n1 * (n_dir1)))
 91 |             outer2 = outer[:,pi2]
 92 |             outer2  = outer2.t() / self.lengthscale.unsqueeze(-2)
 93 |             K[..., n1:, :n2] = -outer2 * K_11.repeat([n_dir1,*([1] * (n_batch_dims + 1))]) 
 94 | 
 95 | 
 96 |             # 4) Hessian block (n1*n_dir1, n2*n_dir2)
 97 |             outer3 = outer1.repeat(1, n_dir2, 1) * outer2.repeat(1,1,n_dir1)  
 98 |             # kronecker product term
 99 |             kp = v1 @ v2.T / self.lengthscale.pow(2)
100 |             kp = kp[:,pi1][pi2,:]
101 |             chain_rule = kp - outer3
102 |             K[..., n1:, n2:] = chain_rule * K_11.repeat([*([1] * n_batch_dims), n_dir1,n_dir2])
103 |             
104 |             # Apply a perfect shuffle permutation to match the MutiTask ordering
105 |             pi1 = torch.arange(n1 * (n_dir1 + 1)).view(n_dir1 + 1, n1).t().reshape((n1 * (n_dir1 + 1)))
106 |             pi2 = torch.arange(n2 * (n_dir2 + 1)).view(n_dir2 + 1, n2).t().reshape((n2 * (n_dir2 + 1)))
107 |             K = K[..., pi1, :][..., :, pi2]
108 |             return K
109 | 
110 |         else:
111 |             if not (n1 == n2 and torch.eq(x1, x2).all() and n_dir1 == n_dir2 and torch.eq(v1, v2).all()):
112 |                 raise RuntimeError("diag=True only works when x1 == x2 and v1 == v2")
113 | 
114 |             kernel_diag = super(RBFKernelDirectionalGrad, self).forward(x1, x2, diag=True)
115 |             grad_diag = torch.ones(*batch_shape, n2, n_dir2, device=x1.device, dtype=x1.dtype) / self.lengthscale.pow(2)
116 |             grad_diag = grad_diag.transpose(-1, -2).reshape(*batch_shape, n2 * n_dir2)
117 |             k_diag = torch.cat((kernel_diag, grad_diag), dim=-1)
118 |             pi = torch.arange(n2 * (n_dir2 + 1)).view(n_dir2 + 1, n2).t().reshape((n2 * (n_dir2 + 1)))
119 |             return k_diag[..., pi]
120 | 
121 |     def set_num_directions(self,num_directions):
122 |         self.n_dir1 = num_directions
123 | 
124 |     def num_outputs_per_input(self, x1, x2):
125 |         return self.n_dir1 +1
126 | 
127 | 
128 | 
129 | if __name__ == '__main__':
130 | 
131 |   torch.manual_seed(0)
132 |   # generate training data
133 |   n1   = 100
134 |   n2   = n1
135 |   dim = 2
136 |   train_x  = torch.rand(n1,dim)
137 |   # train_x2 = torch.rand(n2,dim)
138 |   train_x2 = train_x
139 |   # set directions
140 |   n_directions = 2
141 |   # v1 = torch.eye(dim)[:n_directions]
142 |   v1 = torch.rand(n_directions,dim)
143 |   v1 = v1.repeat(n1,1)
144 |   # v2 = torch.eye(dim)[:n_directions]
145 |   # v2 = torch.rand(n_directions,dim)
146 |   # v2 = v2.repeat(n2,1)
147 |   v2 = v1
148 |   v1 = (v1.T/torch.norm(v1,dim=1)).T
149 |   v2 = (v2.T/torch.norm(v2,dim=1)).T
150 | 
151 |   k = RBFKernelDirectionalGrad()
152 |   params = {'v1':v1,'v2':v2}
153 |   K = k(train_x,train_x2, **params)
154 |   print(K.detach().numpy().shape)
155 | 
156 |   # torch.cholesky(K.add_jitter().evaluate())
157 |   # verify against RBFKernelGrad
158 |   # from gpytorch.kernels import RBFKernelGrad
159 |   # kk = RBFKernelGrad()
160 |   # KK = kk(train_x,train_x2)
161 |   # print(KK.detach().numpy() - K.detach().numpy())
162 | 


--------------------------------------------------------------------------------
/experiments/GNN_bo/plot_traj.py:
--------------------------------------------------------------------------------
  1 | from sys import meta_path
  2 | import matplotlib.pyplot as plt
  3 | import matplotlib.pylab as pl
  4 | # import seaborn as sns
  5 | import pandas as pd
  6 | import pickle
  7 | import numpy as np
  8 | import glob
  9 | import os
 10 | 
 11 | ADD_SHARED=True
 12 | FONTSIZE=25
 13 | MARKERSIZE=20
 14 | FIGURESUZE=(10,7)
 15 | ALPHA=0.2
 16 | LINEWIDTH=5
 17 | PADDING=0.1
 18 | 
 19 | if ADD_SHARED:
 20 |   style_dict = {"SGD": ["GD", "dotted", '#2ca02c'],
 21 |                   "TURBO": ["TuRBO", "dashed", '#ff7f0e'],
 22 |                   "DSVGP1": ["TuRBO-DPPGPR1", "solid", '#1f77b4'],
 23 |                   "DSVGP2": ["TuRBO-DPPGPR2", "solid", '#d62728'],
 24 |                   "DSVGP3": ["TuRBO-DPPGPR3", "solid", '#e377c2'],
 25 |                   "DSVGP_shared1": ["TuRBO-DPPGPR-Shared1", "dotted", '#1f77b4'],
 26 |                   "DSVGP_shared2": ["TuRBO-DPPGPR-Shared2", "dotted", '#d62728'],
 27 |                   "DSVGP_shared3": ["TuRBO-DPPGPR-Shared3", "dotted", '#e377c2'],
 28 |                   "SVGP": ["TuRBO-PPGPR", "dashed", '#9467bd'],
 29 |                   "BO": ["BO", "dashed", '#8c564b'],
 30 |                   "random": ["Random", "dotted", "#7f7f7f"] }
 31 | else:
 32 |   style_dict = {"SGD": ["GD", "dotted", '#2ca02c'],
 33 |                   "TURBO": ["TuRBO", "dashed", '#ff7f0e'],
 34 |                   "DSVGP1": ["TuRBO-DPPGPR1", "solid", '#1f77b4'],
 35 |                   "DSVGP2": ["TuRBO-DPPGPR2", "solid", '#d62728'],
 36 |                   "DSVGP3": ["TuRBO-DPPGPR3", "solid", '#e377c2'],
 37 |                   "SVGP": ["TuRBO-PPGPR", "dashed", '#9467bd'],
 38 |                   "BO": ["BO", "dashed", '#8c564b'],
 39 |                   "random": ["Random", "dotted", "#7f7f7f"] }
 40 | 
 41 | 
 42 | def plot_average(style_dict, dataset, methods_list, data_type='fX', deleted_methods=None):
 43 |   assert data_type == 'fX' or data_type == 'train_acc_list' or data_type == 'test_acc_list'
 44 | 
 45 |   # sort to fix color for each method
 46 |   colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
 47 |   
 48 |   # collect data for each method
 49 |   data_files_dict = {}
 50 |   fig, ax = plt.subplots(nrows=1, ncols=1, figsize=FIGURESUZE)
 51 | 
 52 |   for i, method in enumerate(methods_list):
 53 |     data_files_dict[method] = glob.glob(f"./results/{dataset}_{method}*.pickle")
 54 |     fX_set = []
 55 |     for ii in range(len(data_files_dict[method])):
 56 |       ff = data_files_dict[method][ii]
 57 |       d = pickle.load(open(ff, "rb"))
 58 |       if method == "TURBO" or method == "SVGP":
 59 |         assert d['model'] == method
 60 |       elif method == "DSVGP1" or method == "DSVGP_shared1":
 61 |         assert d['model'].startswith("DSVGP") and d['num_directions'] == 1
 62 |       elif method == "DSVGP2" or method == "DSVGP_shared2":
 63 |         assert d['model'].startswith("DSVGP") and d['num_directions'] == 2
 64 |       elif method == "DSVGP3" or method == "DSVGP_shared3":
 65 |         assert d['model'].startswith("DSVGP") and d['num_directions'] == 3
 66 | 
 67 |       fX = d[data_type]
 68 |       fXmin = np.minimum.accumulate(fX) if data_type == 'fX' else np.maximum.accumulate(fX)
 69 |       fX_set.append(fXmin)
 70 |     
 71 |     print(f"Averaging {len(fX_set)} trials for {method}\n")
 72 | 
 73 |     if deleted_methods == None or method not in deleted_methods:
 74 |       label_cur = style_dict[method][0]
 75 |       linestyle_cur = style_dict[method][1]
 76 |       color_cur = style_dict[method][2]
 77 |       # find mean and std of fX_set
 78 |       if len(fX_set) > 1:
 79 |         fX_mean = np.mean(fX_set, axis=0)
 80 |         ax.plot(fX_mean,linewidth=LINEWIDTH,
 81 |                 color=color_cur,label=label_cur,linestyle=linestyle_cur)
 82 |         fX_std = np.std(fX_set, axis=0, ddof=0)
 83 |         ax.fill_between(range(len(fX_mean)), fX_mean-fX_std, fX_mean+fX_std, color=color_cur, alpha=ALPHA)
 84 |       elif len(fX_set) == 1:
 85 |         ax.plot(fX_set[0],linewidth=LINEWIDTH,
 86 |                  color=color_cur,label=label_cur,linestyle=linestyle_cur)
 87 |     
 88 |   
 89 |   
 90 | 
 91 |   # plot
 92 |   rc = {'figure.figsize':(12,6),
 93 |         'axes.facecolor':'white',
 94 |         'axes.grid' : True,
 95 |         'grid.color': '.8',
 96 |         'font.family':'Times New Roman',
 97 |         'font.size' : FONTSIZE}
 98 |   plt.rcParams.update(rc)
 99 |   if data_type == 'fX':
100 |     ylabel = 'Training loss'
101 |     plt.ylim( (0.65e-2,1.35e2) )
102 |     plt.yscale("log")
103 |   elif data_type == 'test_acc_list':
104 |     ylabel = 'Test accuracy'
105 |   elif data_type == 'train_acc_list':
106 |     plt.ylim((0.28, 1.05))
107 |     ylabel = 'Training accuracy'
108 |   
109 |   plt.xticks(fontsize=FONTSIZE)
110 |   plt.yticks(fontsize=FONTSIZE)
111 |   plt.ylabel(ylabel, fontsize=FONTSIZE)
112 |   plt.xlabel("Number of evaluations", fontsize=FONTSIZE)
113 |   # box = plt.get_position()
114 |   # plt.set_position([box.x0, box.y0, box.width * 0.8, box.height])
115 |   # plt.legend(loc='best',prop={'size': fontsize})
116 |   
117 |   plt.grid()
118 |   # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left")
119 | 
120 |   if ADD_SHARED:
121 |     figurename = f"TuRBO_{dataset}_{data_type}_add_shared.pdf"
122 |   else:
123 |     figurename = f"TuRBO_{dataset}_{data_type}.pdf"
124 |   figurepath = os.path.abspath(__file__ + "/../plots/" + figurename)
125 |   fig.savefig(figurepath, bbox_inches = 'tight', pad_inches = PADDING)
126 |   print("Figure saved:", figurepath)
127 | 
128 | def plot_legend(style_dict, dataset, methods_list):
129 |   plt.clf()
130 |   plt.cla()
131 | 
132 |   if ADD_SHARED:
133 |     figurename = f"TuRBO_{dataset}_legend_add_shared.pdf"
134 |   else:
135 |     figurename = f"TuRBO_{dataset}_legend.pdf"
136 |   figurepath = os.path.abspath(__file__ + "/../plots/" + figurename)
137 |   
138 |   color_set = [style_dict[method][2] for method in methods_list]
139 |   linestyle_set = [style_dict[method][1] for method in methods_list]
140 |   label_set = [style_dict[method][0] for method in methods_list]
141 |   ncol = len(style_dict)//2
142 | 
143 |   fig, ax = plt.subplots(figsize=(10,8))
144 |   f = lambda ls,c,label: ax.plot([],[], linewidth=LINEWIDTH+1, linestyle=ls, color=c, label=label)[0]
145 |   handles = [f(linestyle_set[i], color_set[i], label_set[i]) for i in range(len(methods_list))]
146 |   #legend
147 |   LABEL_SIZE=15
148 |   figsize = (5, 1)
149 |   fig_leg = plt.figure(figsize=figsize)
150 |   legend_properties = {'weight': 'bold', 'size': LABEL_SIZE}
151 |   ax_leg = fig_leg.add_subplot(111)
152 | 
153 |   ax_leg.set_facecolor('white')
154 |   ax_leg.grid(False)
155 |   ax_leg.set_axis_off()
156 |   ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=ncol, prop=legend_properties, facecolor="white", edgecolor="grey")
157 |   fig_leg.savefig(figurepath, bbox_inches = 'tight')
158 | 
159 |   print("Figure saved:", figurepath)
160 | 
161 | if __name__ == "__main__":
162 | 
163 |   # dataset="squared"
164 |   # methods_list = ["BO", "random"]
165 | 
166 |   dataset="PubMed"
167 |   if ADD_SHARED:
168 |     methods_list = ["random", "SGD", "BO", "TURBO", "SVGP", 
169 |                     "DSVGP1", "DSVGP2", "DSVGP3",
170 |                     "DSVGP_shared1", "DSVGP_shared2", "DSVGP_shared3"]
171 |   else:
172 |     methods_list = ["random", "SGD", "BO", "TURBO", "SVGP", 
173 |                     "DSVGP1", "DSVGP2", "DSVGP3"]
174 |   
175 |   data_type='fX'
176 |   # data_type='train_acc_list'
177 |   plot_average(style_dict, dataset, methods_list, data_type, deleted_methods=None)
178 |   plot_legend(style_dict, dataset, methods_list)
179 | 
180 | 
181 | 
182 | 


--------------------------------------------------------------------------------
/experiments/uci_dfree/test.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | from scipy.io import loadmat
  4 | import torch
  5 | import gpytorch
  6 | import random
  7 | import time
  8 | from matplotlib import pyplot as plt
  9 | from torch.utils.data import TensorDataset, DataLoader
 10 | import os
 11 | import sys
 12 | sys.path.append("../")
 13 | sys.path.append("../../directionalvi/utils")
 14 | sys.path.append("../../directionalvi")
 15 | import dfree_directional_vi 
 16 | import traditional_vi
 17 | from csv_dataset import csv_dataset
 18 | from metrics import MSE
 19 | import pickle
 20 | 
 21 | 
 22 | # load a pickle with the run params
 23 | args = sys.argv
 24 | param_filename = args[1]
 25 | run_params = pickle.load(open(param_filename,"rb"))
 26 | num_inducing   =run_params['num_inducing']
 27 | num_directions =run_params['num_directions'] 
 28 | minibatch_size =run_params['minibatch_size'] 
 29 | num_epochs     =run_params['num_epochs']
 30 | verbose        =run_params['verbose']
 31 | inducing_data_initialization =run_params['inducing_data_initialization'] 
 32 | use_ngd =run_params['use_ngd']
 33 | use_ciq =run_params['use_ciq']
 34 | num_contour_quadrature= run_params['num_contour_quadrature']
 35 | learning_rate_hypers = run_params['learning_rate_hypers']
 36 | learning_rate_ngd    = run_params['learning_rate_ngd']
 37 | lr_gamma    = run_params['lr_gamma']
 38 | lr_benchmarks = run_params['lr_benchmarks']
 39 | lr_sched = run_params['lr_sched']
 40 | mll_type = run_params['mll_type']
 41 | seed     = run_params['seed']
 42 | base_name = run_params['base_name']
 43 | data_file = run_params['data_file']
 44 | mode = run_params['mode']
 45 | 
 46 | # make the learning rate schedule
 47 | assert lr_sched in [None, "MultiStepLR", "LambdaLR"], "Not a valid choice of lr_sched"
 48 | if lr_sched is None:
 49 |   pass
 50 | elif lr_sched == "MultiStepLR":
 51 |   def lr_sched(epoch):
 52 |     a = np.sum(lr_benchmarks < epoch)
 53 |     # lr_gamma should be > 1
 54 |     return (lr_gamma)**a
 55 | elif lr_sched == "LambdaLR":
 56 |   lr_sched = lambda epoch: 1./(1+lr_gamma*epoch)
 57 | 
 58 | # set the seed
 59 | torch.random.manual_seed(seed)
 60 | 
 61 | # output file names
 62 | #data_dir = "./output/"
 63 | data_dir = run_params['data_dir']
 64 | model_filename = data_dir + "model_"+ base_name + ".model"
 65 | data_filename  = data_dir + "data_" + base_name + ".pickle"
 66 | if os.path.exists(data_dir) is False:
 67 |   os.mkdir(data_dir)
 68 | 
 69 | # load data
 70 | ff = loadmat(data_file)
 71 | X_data = torch.tensor(ff['data'][:,:-1]) # @Leo double check this is right
 72 | y_data = torch.tensor(ff['data'][:,-1]) # @Leo double check this is right
 73 | n, dim = X_data.shape
 74 | 
 75 | # make sure right type
 76 | X_data =X_data.float()
 77 | y_data =y_data.float()
 78 | 
 79 | # standardize data
 80 | lb = torch.min(X_data,axis=0)[0]
 81 | ub = torch.max(X_data,axis=0)[0]
 82 | X_data = (X_data - lb)/(ub-lb)
 83 | med = torch.median(y_data)
 84 | std = torch.std(y_data)
 85 | y_data = (y_data - med)/std
 86 | 
 87 | # make a torch dataset
 88 | dataset = TensorDataset(X_data,y_data)
 89 | 
 90 | # train-test split
 91 | n_train = int(0.8*n)
 92 | n_test  = n - n_train
 93 | train_dataset,test_dataset = torch.utils.data.random_split(dataset,[n_train,n_test])
 94 | 
 95 | # make dataloaders
 96 | train_loader  = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
 97 | test_loader   = DataLoader(test_dataset, batch_size=n_test, shuffle=False)
 98 | 
 99 | 
100 | if mode == "DSVGP":
101 |   # train
102 |   print("\n\n---DirectionalGradVGP---")
103 |   print(f"Start training with {n} trainig data of dim {dim}")
104 |   print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
105 |   t1 = time.time()	
106 |   model,likelihood = dfree_directional_vi.train_gp(train_dataset,
107 |                         num_inducing=num_inducing,
108 |                         num_directions=num_directions,
109 |                         minibatch_size = minibatch_size,
110 |                         minibatch_dim = num_directions,
111 |                         num_epochs =num_epochs, 
112 |                         learning_rate_hypers=learning_rate_hypers,
113 |                         learning_rate_ngd=learning_rate_ngd,
114 |                         inducing_data_initialization=inducing_data_initialization,
115 |                         use_ngd = use_ngd,
116 |                         use_ciq = use_ciq,
117 |                         lr_sched=lr_sched,
118 |                         mll_type=mll_type,
119 |                         num_contour_quadrature=num_contour_quadrature,
120 |                         verbose=verbose,
121 |                         )
122 |   t2 = time.time()	
123 |   train_time = t2 - t1
124 |   
125 |   # save the model
126 |   torch.save(model.state_dict(),model_filename)
127 |   
128 |   # test
129 |   means, variances = dfree_directional_vi.eval_gp(test_dataset,model,likelihood,
130 |                               num_directions=num_directions,
131 |                               minibatch_size=minibatch_size,
132 |                               minibatch_dim=num_directions)
133 |   t3 = time.time()	
134 |   test_time = t3 - t2
135 | 
136 | elif mode == "SVGP":
137 |   # train
138 |   print("\n\n---Traditional SVGP---")
139 |   print(f"Start training with {n} training data of dim {dim}")
140 |   print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
141 |   t1 = time.time()	
142 |   model,likelihood = traditional_vi.train_gp(train_dataset,dim,
143 |                                             num_inducing=num_inducing,
144 |                                             minibatch_size=minibatch_size,
145 |                                             num_epochs=num_epochs,
146 |                                             use_ngd=use_ngd,
147 |                                             use_ciq=use_ciq,
148 |                                             learning_rate_hypers=learning_rate_hypers,
149 |                                             learning_rate_ngd=learning_rate_ngd,
150 |                                             lr_sched=lr_sched,
151 |                                             num_contour_quadrature=num_contour_quadrature,
152 |                                             mll_type=mll_type,
153 |                                             verbose=verbose)
154 |   t2 = time.time()	
155 |   train_time = t2 - t1
156 |   
157 |   # save the model
158 |   torch.save(model.state_dict(),model_filename)
159 |   
160 |   # test
161 |   means, variances = traditional_vi.eval_gp(test_dataset,model,likelihood, 
162 |                                             num_inducing=num_inducing,
163 |                                             minibatch_size=n_test)
164 |   t3 = time.time()	
165 |   test_time = t3 - t2
166 | 
167 | 
168 | # collect the test function values
169 | test_f = torch.zeros(n_test)
170 | for ii in range(n_test):
171 |     test_f[ii] = test_dataset[ii][1] # function value
172 | 
173 | # compute MSE
174 | test_mse = MSE(test_f,means)
175 | # compute mean negative predictive density
176 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_f).mean()
177 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
178 | print(f"Training time: {train_time:.2f} sec, testing time: {test_time:.2f} sec")
179 | 
180 | # dump the data
181 | outdata = {}
182 | outdata['test_mse']   = test_mse
183 | outdata['test_nll']   = test_nll
184 | outdata['train_time'] = train_time
185 | outdata['test_time']  = test_time
186 | # add the run params
187 | outdata.update(run_params)
188 | pickle.dump(outdata,open(data_filename,"wb"))
189 | print(f"Dropped file: {data_filename}")
190 | 


--------------------------------------------------------------------------------
/directionalvi/traditional_vi.py:
--------------------------------------------------------------------------------
  1 | from gpytorch.models import ApproximateGP
  2 | from gpytorch.variational import CholeskyVariationalDistribution
  3 | from gpytorch.variational import VariationalStrategy
  4 | from torch.utils.data import TensorDataset, DataLoader
  5 | import math
  6 | import time
  7 | import torch
  8 | import sys
  9 | import gpytorch
 10 | from matplotlib import pyplot as plt
 11 | import numpy as np
 12 | from utils.count_params import count_params
 13 | try: # import wandb if watch model on weights&biases
 14 |   import wandb
 15 | except:
 16 |   pass
 17 | 
 18 | 
 19 | class GPModel(ApproximateGP):
 20 |     def __init__(self, inducing_points,**kwargs):
 21 |         if "variational_distribution" in kwargs and kwargs["variational_distribution"] == "NGD":
 22 |             variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0))
 23 |         else:
 24 |             variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0))
 25 |         if "variational_strategy" in kwargs and kwargs["variational_strategy"] == "CIQ":
 26 |             variational_strategy = gpytorch.variational.CiqVariationalStrategy(
 27 |                 self, inducing_points, variational_distribution, learn_inducing_locations=True)
 28 |         else:
 29 |             variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)
 30 |         super(GPModel, self).__init__(variational_strategy)
 31 |         self.mean_module = gpytorch.means.ConstantMean()
 32 |         self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel())
 33 | 
 34 |     def forward(self, x):
 35 |         mean_x = self.mean_module(x)
 36 |         covar_x = self.covar_module(x)
 37 |         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
 38 | 
 39 | def train_gp(train_dataset,dim,num_inducing=128,
 40 |             minibatch_size=1,
 41 |             num_epochs=1,
 42 |             use_ngd=False,
 43 |             use_ciq=False,
 44 |             learning_rate_hypers=0.01,
 45 |             learning_rate_ngd=0.1,
 46 |             lr_sched=None,
 47 |             mll_type="ELBO",
 48 |             num_contour_quadrature=15,
 49 |             watch_model=False,gamma=0.1,
 50 |             verbose=True,
 51 |             **args):
 52 |     
 53 |     train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
 54 |     n_samples = len(train_dataset)
 55 | 
 56 |     # setup model
 57 |     # inducing_points = train_x[:num_inducing, :]
 58 |     inducing_points = torch.rand(num_inducing,dim)
 59 |     if torch.cuda.is_available():
 60 |         inducing_points = inducing_points.cuda()
 61 | 
 62 |     if use_ciq:
 63 |         gpytorch.settings.num_contour_quadrature(num_contour_quadrature)
 64 |         model = GPModel(inducing_points=inducing_points,variational_distribution="NGD",variational_strategy="CIQ")
 65 |     elif use_ngd:
 66 |         model = GPModel(inducing_points=inducing_points,variational_distribution="NGD")
 67 |     else:
 68 |         model = GPModel(inducing_points=inducing_points)
 69 |     likelihood = gpytorch.likelihoods.GaussianLikelihood()
 70 | 
 71 |     if torch.cuda.is_available():
 72 |         model = model.cuda()
 73 |         likelihood = likelihood.cuda()
 74 |     if watch_model:
 75 |         wandb.watch(model)
 76 | 
 77 |     model.train()
 78 |     likelihood.train()
 79 |     
 80 |     if verbose:
 81 |         param_total_dim = count_params(model,likelihood)
 82 | 
 83 |     # optimizers
 84 |     if use_ngd or use_ciq:
 85 |         variational_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data=n_samples, lr=learning_rate_ngd)
 86 |         hyperparameter_optimizer = torch.optim.Adam([
 87 |             {'params': model.hyperparameters()},
 88 |             {'params': likelihood.parameters()},
 89 |         ], lr=learning_rate_hypers)
 90 |     else:
 91 |         variational_optimizer = torch.optim.Adam([
 92 |             {'params': model.variational_parameters()},
 93 |         ], lr=learning_rate_hypers)
 94 |         hyperparameter_optimizer = torch.optim.Adam([
 95 |             {'params': model.hyperparameters()},
 96 |             {'params': likelihood.parameters()},
 97 |         ], lr=learning_rate_hypers)
 98 |     
 99 |     # learning rate scheduler
100 |     #lambda1 = lambda epoch: 1.0/(1 + epoch)
101 |     if lr_sched == "step_lr":
102 |         num_batches = int(np.ceil(n_samples/minibatch_size))
103 |         milestones = [int(num_epochs*num_batches/3), int(2*num_epochs*num_batches/3)]
104 |         hyperparameter_scheduler = torch.optim.lr_scheduler.MultiStepLR(hyperparameter_optimizer, milestones, gamma=gamma)
105 |         variational_scheduler = torch.optim.lr_scheduler.MultiStepLR(variational_optimizer, milestones, gamma=gamma)
106 |     elif lr_sched is None:
107 |         lr_sched = lambda epoch: 1.0
108 |         hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched)
109 |         variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched)
110 |     else:
111 |         hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched)
112 |         variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched)
113 | 
114 |     # Our loss object. We're using the VariationalELBO
115 |     if mll_type=="ELBO":
116 |         print("Using ELBO")
117 |         mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=n_samples)
118 |     elif mll_type=="PLL": 
119 |         print("Using PLL")
120 |         mll = gpytorch.mlls.PredictiveLogLikelihood(likelihood, model, num_data=n_samples)
121 |     
122 |     epochs_iter = range(num_epochs)
123 |     total_step=0
124 |     for i in epochs_iter:
125 |         minibatch_iter = train_loader
126 | 
127 |         for x_batch, y_batch in minibatch_iter:
128 |             if torch.cuda.is_available():
129 |                 x_batch = x_batch.cuda()
130 |                 y_batch = y_batch.cuda()
131 | 
132 |             variational_optimizer.zero_grad()
133 |             hyperparameter_optimizer.zero_grad()
134 |             output = likelihood(model(x_batch))
135 |             loss = -mll(output, y_batch)
136 |             if watch_model:
137 |                 wandb.log({"loss": loss.item()})
138 |             loss.backward()
139 |             # step optimizers and learning rate schedulers
140 |             variational_optimizer.step()
141 |             variational_scheduler.step()
142 |             hyperparameter_optimizer.step()
143 |             hyperparameter_scheduler.step()
144 | 
145 |             if total_step % 50 == 0 and verbose:
146 |                 means = output.mean
147 |                 stds  = output.variance.sqrt()
148 |                 nll   = -torch.distributions.Normal(means, stds).log_prob(y_batch).mean()
149 |                 print(f"Epoch: {i}; total_step: {total_step}, loss: {loss.item()}, nll: {nll}")
150 | 
151 |             total_step +=1
152 |             sys.stdout.flush()
153 |         
154 |     
155 |      
156 |     if verbose:
157 |         print(f"Done! loss: {loss.item()}")
158 |         print("\nDone Training!")
159 |     sys.stdout.flush()
160 |     return model, likelihood
161 | 
162 | def eval_gp(test_dataset,model,likelihood, mll_type="ELBO", num_inducing=128,minibatch_size=1):
163 |   
164 |     dim = len(test_dataset[0][0])
165 |     n_test = len(test_dataset)
166 |     test_loader = DataLoader(test_dataset, batch_size=minibatch_size, shuffle=False)
167 |     
168 |     model.eval()
169 |     likelihood.eval()
170 |     
171 |     means = torch.tensor([0.])
172 |     variances = torch.tensor([0.])
173 |     with torch.no_grad():
174 |         for x_batch, y_batch in test_loader:
175 |             if torch.cuda.is_available():
176 |                 x_batch = x_batch.cuda()
177 |                 y_batch = y_batch.cuda()
178 |             preds = likelihood(model(x_batch))
179 |             means = torch.cat([means, preds.mean.cpu()])
180 |             variances = torch.cat([variances, preds.variance.cpu()])
181 |     means = means[1:]
182 |     variances = variances[1:]
183 | 
184 |     return means, variances
185 | 


--------------------------------------------------------------------------------
/experiments/stellarator_regression/plot_stellarator.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import os
  3 | import os.path as osp
  4 | import argparse
  5 | import pickle
  6 | from operator import itemgetter
  7 | from argparse import Namespace
  8 | import numpy as np
  9 | import pandas as pd
 10 | from matplotlib import pyplot as plt
 11 | import matplotlib
 12 | import pylab
 13 | 
 14 | ADD_SHARED=True
 15 | ncol=1
 16 | FONTSIZE=20
 17 | MARKERSIZE=15
 18 | FIGURESUZE=(10,7)
 19 | ALPHA=0.2
 20 | ALPHA_MARKER=0.8
 21 | LINEWIDTH=4
 22 | 
 23 | # ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 
 24 | #  '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
 25 | 
 26 | if ADD_SHARED:
 27 |     data = pickle.load(open("./data/stellarator_plot_data_p3_with_shared.pickle", "rb" ))
 28 |     methods_list = ['PPGPR', 'SVGP', 
 29 |                     'DPPGPR2', 'DSVGP2', 'DPPGPR1', 'DSVGP1', 'DPPGPR3', 'DSVGP3',
 30 |                     'DPPGPR-Shared1', 'DPPGPR-Shared2', 'DPPGPR-Shared3',
 31 |                     'DSVGP-Shared1', 'DSVGP-Shared2', 'DSVGP-Shared3']
 32 |     styles = {"PPGPR": ["PPGPR", "solid", '#9467bd', 'o'],
 33 |             "SVGP": ["SVGP", "solid", '#1f77b4', '*'],
 34 |             "DPPGPR1": ["DPPGPR1", "solid", '#2ca02c', 'v'],
 35 |             "DSVGP1": ["DSVGP1","solid", '#8c564b', 'd'],
 36 |             "DPPGPR2": ["DPPGPR2", "solid", '#d62728', 'p'],
 37 |             "DSVGP2": ["DSVGP2", "solid",  '#ff7f0e', 'X'],
 38 |             "DPPGPR3": ["DPPGPR3", "solid", '#e377c2', '^'],
 39 |             "DSVGP3": ["DSVGP3", "solid",  '#7f7f7f', '>'],
 40 |             "DPPGPR-Shared1": ["DPPGPR-Shared1", "dashed", '#2ca02c', 'v'],
 41 |             "DSVGP-Shared1": ["DSVGP-Shared1","dashed", '#8c564b', 'd'],
 42 |             "DPPGPR-Shared2": ["DPPGPR-Shared2", "dashed", '#d62728', 'p'],
 43 |             "DSVGP-Shared2": ["DSVGP-Shared2", "dashed",  '#ff7f0e', 'X'],
 44 |             "DPPGPR-Shared3": ["DPPGPR-Shared3", "dashed", '#e377c2', '^'],
 45 |             "DSVGP-Shared3": ["DSVGP-Shared3", "dashed",  '#7f7f7f', '>'],
 46 |             }
 47 |     ylim = [-2.4, -0.6]
 48 |     yticks = [-2.0, -1.6, -1.2, -0.8]
 49 |     legend = False
 50 | else:
 51 |     data = pickle.load(open("./data/stellarator_plot_data_p3.pickle", "rb" ))
 52 |     methods_list = ['PPGPR', 'SVGP', 'DPPGPR2', 'DSVGP2', 'DPPGPR1', 'DSVGP1', 'DPPGPR3', 'DSVGP3']
 53 |     styles = {"PPGPR": ["PPGPR", "solid", '#9467bd', 'o'],
 54 |             "SVGP": ["SVGP", "solid", '#1f77b4', '*'],
 55 |             "DPPGPR1": ["DPPGPR1", "solid", '#2ca02c', 'v'],
 56 |             "DSVGP1": ["DSVGP1","solid", '#8c564b', 'd'],
 57 |             "DPPGPR2": ["DPPGPR2", "solid", '#d62728', 'p'],
 58 |             "DSVGP2": ["DSVGP2", "solid",  '#ff7f0e', 'X'],
 59 |             "DPPGPR3": ["DPPGPR3", "solid", '#e377c2', '^'],
 60 |             "DSVGP3": ["DSVGP3", "solid",  '#7f7f7f', '>'],
 61 |             }
 62 |     ylim = [-2.4, -1.3]
 63 |     yticks = [-2.2, -2.0, -1.8, -1.6, -1.4]
 64 |     legend = True
 65 | 
 66 | 
 67 | 
 68 | del data["ni"]
 69 | del data["nd"]
 70 | del data['train_time']
 71 | del data['test_time']
 72 | del data['mode']
 73 | data['rmse'] = np.sqrt(data['mse'])
 74 | 
 75 | 
 76 | M_set = [200, 500, 800, 1000, 1200, 1400] # SVGP, DPPGPR, DSVGP1, DPPGPR1,  DSVGP3, DPPGPR3
 77 | M_set2 = [198, 498, 798, 999, 1200, 1398] # DSVGP2, DPPGPR2
 78 | M_set_dict2 = {198: 200, 498:500, 798:800, 999:1000, 1200:1200, 1398:1400}
 79 | 
 80 | data_dict = {}
 81 | for method in methods_list:
 82 |     # select the partial dataframe  = data[data['run']==method]
 83 |     data_dict_cur = {}
 84 |     data_dict_full = data[data['run']==method]
 85 |     if method.endswith("2"):
 86 |         for M in M_set2:
 87 |             key = M_set_dict2[M]
 88 |             data_dict_cur[key] = data_dict_full[data_dict_full['M']==M]
 89 |     else:
 90 |         for M in M_set:
 91 |             data_dict_cur[M] = data_dict_full[data_dict_full['M']==M]
 92 | 
 93 |     data_dict[method] = data_dict_cur
 94 | 
 95 | 
 96 | # compelete rmse for mean 
 97 | rmse_nll_dict = {}
 98 | for method in methods_list:
 99 |     rmse_dict = {"mean":[], "std":[]}
100 |     nll_dict = {"mean": [], "std":[]}
101 |     for M in M_set:
102 |         data_cur = data_dict[method][M]
103 |         rmse_mean = np.mean(data_cur['rmse'])
104 |         rmse_std = np.std(data_cur['rmse'])
105 |         nll_mean = np.mean(data_cur['nll'])
106 |         nll_std = np.std(data_cur['nll'])
107 |         rmse_dict["mean"].append(rmse_mean)
108 |         rmse_dict["std"].append(rmse_std)
109 |         nll_dict["mean"].append(nll_mean)
110 |         nll_dict["std"].append(nll_std)
111 |     rmse_nll_dict[method] = {"rmse": rmse_dict, "nll": nll_dict}
112 | 
113 |  
114 | 
115 | def plot_stellarator(datatype, ylim=None, yticks=None, logy=False, legend=True):
116 |     fig, ax = plt.subplots(nrows=1, ncols=1, figsize=FIGURESUZE)
117 |     for method in methods_list:
118 |         mean_nll = np.array(rmse_nll_dict[method][datatype]['mean'])
119 |         std_nll = np.array(rmse_nll_dict[method][datatype]['std'])
120 |         if ADD_SHARED:
121 |             ax.plot(M_set, mean_nll,
122 |                 color=styles[method][2],
123 |                 label=styles[method][0],
124 |                 linestyle=styles[method][1],
125 |                 marker=styles[method][3], 
126 |                 markersize=MARKERSIZE,
127 |                 alpha=ALPHA_MARKER,
128 |                 linewidth=LINEWIDTH,
129 |                 )
130 |         else:
131 |             ax.plot(M_set, mean_nll,
132 |                     color=styles[method][2],
133 |                     label=styles[method][0], 
134 |                     marker=styles[method][3], 
135 |                     markersize=MARKERSIZE,
136 |                     alpha=ALPHA_MARKER)
137 |         ax.fill_between(M_set, 
138 |                         mean_nll+std_nll, 
139 |                         mean_nll-std_nll,
140 |                         color=styles[method][2],
141 |                         alpha=ALPHA,
142 |                         )
143 | 
144 |         ax.set_xlabel("Inducing matrix size",fontsize=FONTSIZE)
145 |         ax.set_xticks(M_set)
146 |         ax.set_xticklabels(M_set,fontsize=FONTSIZE)
147 | 
148 |         ylabel='NLL' if datatype=='nll' else "RMSE"
149 |         ax.set_ylabel(ylabel, fontsize=FONTSIZE)
150 |         ax.set_yticks(yticks)
151 |         ax.set_yticklabels(yticks,fontsize=FONTSIZE)
152 |         ax.set_ylim(ylim)
153 |         if legend:
154 |             ax.legend(loc='upper right', fontsize=FONTSIZE-5)
155 | 
156 |     plt.grid()
157 |     plt.minorticks_off()
158 |     plt.tight_layout()
159 |     if ADD_SHARED:
160 |         figurepath=f"./plots/stellarator_{datatype}_add_shared.pdf"
161 |     else:
162 |         figurepath=f"./plots/stellarator_{datatype}.pdf"
163 |     fig.savefig(figurepath)
164 |     print("Figure saved:", figurepath)
165 | 
166 | def plot_legend(style_dict, methods_list, ncol):
167 |   plt.clf()
168 |   plt.cla()
169 | 
170 |   figurename = f"stellarator_legend_add_shared_ncol{ncol}.pdf"
171 |   figurepath = os.path.abspath(__file__ + "/../plots/" + figurename)
172 |   
173 |   color_set = [style_dict[method][2] for method in methods_list]
174 |   linestyle_set = [style_dict[method][1] for method in methods_list]
175 |   label_set = [style_dict[method][0] for method in methods_list]
176 |   marker_set = [style_dict[method][3] for method in methods_list]
177 | 
178 |   fig, ax = plt.subplots(figsize=(10,8))
179 |   f = lambda ls,c,label,marker: ax.plot([],[], linestyle=ls, color=c, 
180 |                                         label=label, marker=marker, 
181 |                                         markersize=MARKERSIZE*0.68,
182 |                                         # linewidth=LINEWIDTH,
183 |                                         )[0]
184 |   handles = [f(linestyle_set[i], color_set[i], label_set[i], marker_set[i]) for i in range(len(methods_list))]
185 |   #legend
186 |   LABEL_SIZE=15
187 |   figsize = (5, 1)
188 |   fig_leg = plt.figure(figsize=figsize)
189 |   legend_properties = {'weight': 'bold', 'size': LABEL_SIZE}
190 |   ax_leg = fig_leg.add_subplot(111)
191 | 
192 |   ax_leg.set_facecolor('white')
193 |   ax_leg.grid(False)
194 |   ax_leg.set_axis_off()
195 |   ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=ncol, prop=legend_properties, facecolor="white", edgecolor="grey")
196 |   fig_leg.savefig(figurepath, bbox_inches = 'tight')
197 | 
198 |   print("Figure saved:", figurepath)
199 | 
200 | 
201 | # plot_stellarator('nll', ylim=ylim, yticks=yticks, 
202 | #          logy=False, legend=legend)
203 | if ADD_SHARED:
204 |     plot_legend(styles, methods_list, ncol=ncol)
205 | 
206 | 


--------------------------------------------------------------------------------
/directionalvi/grad_svgp.py:
--------------------------------------------------------------------------------
  1 | from gpytorch.models import ApproximateGP
  2 | from gpytorch.variational import CholeskyVariationalDistribution
  3 | from GradVariationalStrategy import GradVariationalStrategy
  4 | from torch.utils.data import TensorDataset, DataLoader
  5 | import tqdm
  6 | import math
  7 | import time
  8 | import torch
  9 | import sys
 10 | import gpytorch
 11 | from matplotlib import pyplot as plt
 12 | import numpy as np
 13 | from utils.count_params import count_params
 14 | try: # import wandb if watch model on weights&biases
 15 |   import wandb
 16 | except:
 17 |   pass
 18 | 
 19 | 
 20 | class GPModel(ApproximateGP):
 21 |     def __init__(self, inducing_points,**kwargs):
 22 |         dim = inducing_points.size(1)
 23 |         if "variational_distribution" in kwargs and kwargs["variational_distribution"] == "NGD":
 24 |             variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0)*(dim+1))
 25 |         else:
 26 |             variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0)*(dim+1))
 27 |         if "variational_strategy" in kwargs and kwargs["variational_strategy"] == "CIQ":
 28 |             variational_strategy = gpytorch.variational.CiqVariationalStrategy(
 29 |                 self, inducing_points, variational_distribution, learn_inducing_locations=True)
 30 |         else:
 31 |             variational_strategy = GradVariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True)
 32 |         super(GPModel, self).__init__(variational_strategy)
 33 |         self.mean_module = gpytorch.means.ConstantMean()
 34 |         self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernelGrad())
 35 | 
 36 |     def forward(self, x):
 37 |         mean_x = self.mean_module(x)
 38 |         covar_x = self.covar_module(x)
 39 |         return gpytorch.distributions.MultivariateNormal(mean_x, covar_x)
 40 | 
 41 | def train_gp(train_dataset,dim,num_inducing=128,
 42 |             minibatch_size=1,
 43 |             num_epochs=1,
 44 |             use_ngd=False,
 45 |             use_ciq=False,
 46 |             learning_rate_hypers=0.01,
 47 |             learning_rate_ngd=0.1,
 48 |             lr_sched=None,
 49 |             mll_type="ELBO",
 50 |             num_contour_quadrature=15,
 51 |             watch_model=False,gamma=0.1,
 52 |             verbose=True,
 53 |             **args):
 54 |     
 55 |     print_loss=True
 56 |     train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
 57 |     n_samples = len(train_dataset)
 58 | 
 59 |     # setup model
 60 |     # inducing_points = train_x[:num_inducing, :]
 61 |     inducing_points = torch.rand(num_inducing,dim)
 62 |     if torch.cuda.is_available():
 63 |         inducing_points = inducing_points.cuda()
 64 | 
 65 |     if use_ciq:
 66 |         gpytorch.settings.num_contour_quadrature(num_contour_quadrature)
 67 |         model = GPModel(inducing_points=inducing_points,variational_distribution="NGD",variational_strategy="CIQ")
 68 |     elif use_ngd:
 69 |         model = GPModel(inducing_points=inducing_points,variational_distribution="NGD")
 70 |     else:
 71 |         model = GPModel(inducing_points=inducing_points)
 72 |     likelihood = gpytorch.likelihoods.GaussianLikelihood()
 73 | 
 74 |     if torch.cuda.is_available():
 75 |         model = model.cuda()
 76 |         likelihood = likelihood.cuda()
 77 |     if watch_model:
 78 |         wandb.watch(model)
 79 | 
 80 |     model.train()
 81 |     likelihood.train()
 82 |     
 83 |     if verbose:
 84 |         param_total_dim = count_params(model,likelihood)
 85 | 
 86 |     # optimizers
 87 |     if use_ngd or use_ciq:
 88 |         variational_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data=n_samples, lr=learning_rate_ngd)
 89 |         hyperparameter_optimizer = torch.optim.Adam([
 90 |             {'params': model.hyperparameters()},
 91 |             {'params': likelihood.parameters()},
 92 |         ], lr=learning_rate_hypers)
 93 |     else:
 94 |         variational_optimizer = torch.optim.Adam([
 95 |             {'params': model.variational_parameters()},
 96 |         ], lr=learning_rate_hypers)
 97 |         hyperparameter_optimizer = torch.optim.Adam([
 98 |             {'params': model.hyperparameters()},
 99 |             {'params': likelihood.parameters()},
100 |         ], lr=learning_rate_hypers)
101 |     
102 |     # learning rate scheduler
103 |     #lambda1 = lambda epoch: 1.0/(1 + epoch)
104 |     if lr_sched == "step_lr":
105 |         num_batches = int(np.ceil(n_samples/minibatch_size))
106 |         milestones = [int(num_epochs*num_batches/3), int(2*num_epochs*num_batches/3)]
107 |         hyperparameter_scheduler = torch.optim.lr_scheduler.MultiStepLR(hyperparameter_optimizer, milestones, gamma=gamma)
108 |         variational_scheduler = torch.optim.lr_scheduler.MultiStepLR(variational_optimizer, milestones, gamma=gamma)
109 |     elif lr_sched is None:
110 |         lr_sched = lambda epoch: 1.0
111 |         hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched)
112 |         variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched)
113 |     else:
114 |         hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched)
115 |         variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched)
116 | 
117 |     # Our loss object. We're using the VariationalELBO
118 |     if mll_type=="ELBO":
119 |         mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=n_samples)
120 |     elif mll_type=="PLL": 
121 |         mll = gpytorch.mlls.PredictiveLogLikelihood(likelihood, model, num_data=n_samples)
122 |     
123 |     if "tqdm" in args and args["tqdm"]:
124 |         print_loss=False # don't print loss every 100 epoch if use tqdm
125 |         epochs_iter = tqdm.tqdm(range(num_epochs), desc="Epoch")
126 |     else:
127 |         epochs_iter = range(num_epochs)
128 |     
129 |     total_step=0
130 |     for i in epochs_iter:
131 |         if "tqdm" in args and args["tqdm"]:
132 |             minibatch_iter = tqdm.tqdm(train_loader, desc="Minibatch", leave=False)
133 |         else:
134 |             minibatch_iter = train_loader
135 | 
136 |         mini_steps = 0
137 |         for x_batch, y_batch in minibatch_iter:
138 |             if torch.cuda.is_available():
139 |                 x_batch = x_batch.cuda()
140 |                 y_batch = y_batch.cuda()
141 | 
142 |             # pass in interleaved data
143 |             y_batch = y_batch.reshape(torch.numel(y_batch))
144 | 
145 |             variational_optimizer.zero_grad()
146 |             hyperparameter_optimizer.zero_grad()
147 |             output = likelihood(model(x_batch))
148 |             loss = -mll(output, y_batch)
149 |             if watch_model:
150 |                 wandb.log({"loss": loss.item()})
151 |             loss.backward()
152 |             # step optimizers and learning rate schedulers
153 |             variational_optimizer.step()
154 |             variational_scheduler.step()
155 |             hyperparameter_optimizer.step()
156 |             hyperparameter_scheduler.step()
157 | 
158 |             if "tqdm" in args and args["tqdm"]:
159 |                 epochs_iter.set_postfix(loss=loss.item())           
160 |             
161 |             if total_step % 25 == 0 and print_loss:
162 |                 means = output.mean[::dim+1]
163 |                 stds  = output.variance.sqrt()[::dim+1]
164 |                 nll   = -torch.distributions.Normal(means, stds).log_prob(y_batch[::dim+1]).mean()
165 |                 print(f"Epoch: {i}; total_step: {mini_steps}, loss: {loss.item()}, nll: {nll}")
166 | 
167 |             mini_steps +=1
168 |             total_step +=1
169 |             sys.stdout.flush()
170 |         
171 |     
172 |      
173 |     if print_loss:
174 |         print(f"Done! loss: {loss.item()}")
175 | 
176 |     print("\nDone Training!")
177 |     sys.stdout.flush()
178 |     return model, likelihood
179 | 
180 | def eval_gp(test_dataset,model,likelihood, mll_type="ELBO", num_inducing=128,minibatch_size=1):
181 |   
182 |     dim = len(test_dataset[0][0])
183 |     n_test = len(test_dataset)
184 |     test_loader = DataLoader(test_dataset, batch_size=minibatch_size, shuffle=False)
185 |     
186 |     model.eval()
187 |     likelihood.eval()
188 |     
189 |     means = torch.tensor([0.])
190 |     variances = torch.tensor([0.])
191 |     with torch.no_grad():
192 |         for x_batch, y_batch in test_loader:
193 |             if torch.cuda.is_available():
194 |                 x_batch = x_batch.cuda()
195 |                 y_batch = y_batch.cuda()
196 |             preds = likelihood(model(x_batch))
197 |             means = torch.cat([means, preds.mean.cpu()])
198 |             variances = torch.cat([variances, preds.variance.cpu()])
199 |     means = means[1:]
200 |     variances = variances[1:]
201 | 
202 |     return means, variances
203 | 


--------------------------------------------------------------------------------
/directionalvi/GradVariationalStrategy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import warnings
  3 | 
  4 | import torch
  5 | 
  6 | from gpytorch import settings
  7 | from gpytorch.distributions import MultivariateNormal
  8 | from gpytorch.lazy import DiagLazyTensor, MatmulLazyTensor, RootLazyTensor, SumLazyTensor, TriangularLazyTensor, delazify
  9 | from gpytorch.settings import trace_mode
 10 | from gpytorch.utils.cholesky import psd_safe_cholesky
 11 | from gpytorch.utils.errors import CachingError
 12 | from gpytorch.utils.memoize import cached, clear_cache_hook, pop_from_cache_ignore_args
 13 | from gpytorch.utils.warnings import OldVersionWarning
 14 | from gpytorch.variational._variational_strategy import _VariationalStrategy
 15 | 
 16 | 
 17 | def _ensure_updated_strategy_flag_set(
 18 |     state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs
 19 | ):
 20 |     device = state_dict[list(state_dict.keys())[0]].device
 21 |     if prefix + "updated_strategy" not in state_dict:
 22 |         state_dict[prefix + "updated_strategy"] = torch.tensor(False, device=device)
 23 |         warnings.warn(
 24 |             "You have loaded a variational GP model (using `VariationalStrategy`) from a previous version of "
 25 |             "GPyTorch. We have updated the parameters of your model to work with the new version of "
 26 |             "`VariationalStrategy` that uses whitened parameters.\nYour model will work as expected, but we "
 27 |             "recommend that you re-save your model.",
 28 |             OldVersionWarning,
 29 |         )
 30 | 
 31 | 
 32 | class GradVariationalStrategy(_VariationalStrategy):
 33 |     r"""
 34 |     The standard variational strategy, as defined by `Hensman et al. (2015)`_.
 35 |     This strategy takes a set of :math:`m \ll n` inducing points :math:`\mathbf Z`
 36 |     and applies an approximate distribution :math:`q( \mathbf u)` over their function values.
 37 |     (Here, we use the common notation :math:`\mathbf u = f(\mathbf Z)`.
 38 |     The approximate function distribution for any abitrary input :math:`\mathbf X` is given by:
 39 | 
 40 |     .. math::
 41 | 
 42 |         q( f(\mathbf X) ) = \int p( f(\mathbf X) \mid \mathbf u) q(\mathbf u) \: d\mathbf u
 43 | 
 44 |     This variational strategy uses "whitening" to accelerate the optimization of the variational
 45 |     parameters. See `Matthews (2017)`_ for more info.
 46 | 
 47 |     :param ~gpytorch.models.ApproximateGP model: Model this strategy is applied to.
 48 |         Typically passed in when the VariationalStrategy is created in the
 49 |         __init__ method of the user defined model.
 50 |     :param torch.Tensor inducing_points: Tensor containing a set of inducing
 51 |         points to use for variational inference.
 52 |     :param ~gpytorch.variational.VariationalDistribution variational_distribution: A
 53 |         VariationalDistribution object that represents the form of the variational distribution :math:`q(\mathbf u)`
 54 |     :param learn_inducing_locations: (Default True): Whether or not
 55 |         the inducing point locations :math:`\mathbf Z` should be learned (i.e. are they
 56 |         parameters of the model).
 57 |     :type learn_inducing_locations: `bool`, optional
 58 | 
 59 |     .. _Hensman et al. (2015):
 60 |         http://proceedings.mlr.press/v38/hensman15.pdf
 61 |     .. _Matthews (2017):
 62 |         https://www.repository.cam.ac.uk/handle/1810/278022
 63 |     """
 64 | 
 65 |     def __init__(self, model, inducing_points, variational_distribution, learn_inducing_locations=True):
 66 |         super().__init__(model, inducing_points, variational_distribution, learn_inducing_locations)
 67 |         self.register_buffer("updated_strategy", torch.tensor(True))
 68 |         self._register_load_state_dict_pre_hook(_ensure_updated_strategy_flag_set)
 69 | 
 70 |     @cached(name="cholesky_factor", ignore_args=True)
 71 |     def _cholesky_factor(self, induc_induc_covar):
 72 |         L = psd_safe_cholesky(delazify(induc_induc_covar).double())
 73 |         return TriangularLazyTensor(L)
 74 | 
 75 |     @property
 76 |     @cached(name="prior_distribution_memo")
 77 |     def prior_distribution(self):
 78 |         zeros = torch.zeros(
 79 |             self._variational_distribution.shape(),
 80 |             dtype=self._variational_distribution.dtype,
 81 |             device=self._variational_distribution.device,
 82 |         )
 83 |         ones = torch.ones_like(zeros)
 84 |         res = MultivariateNormal(zeros, DiagLazyTensor(ones))
 85 |         return res
 86 | 
 87 |     def forward(self, x, inducing_points, inducing_values, variational_inducing_covar=None, **kwargs):
 88 |         # Compute full prior distribution
 89 |         full_inputs = torch.cat([inducing_points, x], dim=-2)
 90 |         full_output = self.model.forward(full_inputs, **kwargs)
 91 |         full_covar = full_output.lazy_covariance_matrix
 92 | 
 93 |         # Covariance terms
 94 |         dim = inducing_points.size(1)
 95 |         num_induc = inducing_points.size(-2)
 96 |         test_mean = self.model.mean_module(x.repeat_interleave(dim+1,dim=0))  
 97 |         induc_induc_covar = full_covar[..., :num_induc*(dim+1), :num_induc*(dim+1)].add_jitter()
 98 |         induc_data_covar = full_covar[..., :num_induc*(dim+1), num_induc*(dim+1):].evaluate()
 99 |         data_data_covar = full_covar[..., num_induc*(dim+1):, num_induc*(dim+1):]
100 | 
101 |         # Compute interpolation terms
102 |         # K_ZZ^{-1/2} K_ZX
103 |         # K_ZZ^{-1/2} \mu_Z
104 |         L = self._cholesky_factor(induc_induc_covar)
105 |         if L.shape != induc_induc_covar.shape:
106 |             # Aggressive caching can cause nasty shape incompatibilies when evaluating with different batch shapes
107 |             # TODO: Use a hook fo this
108 |             try:
109 |                 pop_from_cache_ignore_args(self, "cholesky_factor")
110 |             except CachingError:
111 |                 pass
112 |             L = self._cholesky_factor(induc_induc_covar)
113 |         interp_term = L.inv_matmul(induc_data_covar.double()).to(full_inputs.dtype)
114 | 
115 |         # Compute the mean of q(f)
116 |         # k_XZ K_ZZ^{-1/2} (m - K_ZZ^{-1/2} \mu_Z) + \mu_X
117 |         predictive_mean = (interp_term.transpose(-1, -2) @ inducing_values.unsqueeze(-1)).squeeze(-1) + test_mean
118 | 
119 |         # Compute the covariance of q(f)
120 |         # K_XX + k_XZ K_ZZ^{-1/2} (S - I) K_ZZ^{-1/2} k_ZX
121 |         middle_term = self.prior_distribution.lazy_covariance_matrix.mul(-1)
122 |         if variational_inducing_covar is not None:
123 |             middle_term = SumLazyTensor(variational_inducing_covar, middle_term)
124 | 
125 |         if trace_mode.on():
126 |             predictive_covar = (
127 |                 data_data_covar.add_jitter(1e-4).evaluate()
128 |                 + interp_term.transpose(-1, -2) @ middle_term.evaluate() @ interp_term
129 |             )
130 |         else:
131 |             predictive_covar = SumLazyTensor(
132 |                 data_data_covar.add_jitter(1e-4),
133 |                 MatmulLazyTensor(interp_term.transpose(-1, -2), middle_term @ interp_term),
134 |             )
135 | 
136 |         # Return the distribution
137 |         return MultivariateNormal(predictive_mean, predictive_covar)
138 | 
139 |     def __call__(self, x, prior=False, **kwargs):
140 |         if not self.updated_strategy.item() and not prior:
141 |             with torch.no_grad():
142 |                 # Get unwhitened p(u)
143 |                 prior_function_dist = self(self.inducing_points, prior=True)
144 |                 prior_mean = prior_function_dist.loc
145 |                 L = self._cholesky_factor(prior_function_dist.lazy_covariance_matrix.add_jitter())
146 | 
147 |                 # Temporarily turn off noise that's added to the mean
148 |                 orig_mean_init_std = self._variational_distribution.mean_init_std
149 |                 self._variational_distribution.mean_init_std = 0.0
150 | 
151 |                 # Change the variational parameters to be whitened
152 |                 variational_dist = self.variational_distribution
153 |                 mean_diff = (variational_dist.loc - prior_mean).unsqueeze(-1).double()
154 |                 whitened_mean = L.inv_matmul(mean_diff).squeeze(-1).to(variational_dist.loc.dtype)
155 |                 covar_root = variational_dist.lazy_covariance_matrix.root_decomposition().root.evaluate().double()
156 |                 whitened_covar = RootLazyTensor(L.inv_matmul(covar_root).to(variational_dist.loc.dtype))
157 |                 whitened_variational_distribution = variational_dist.__class__(whitened_mean, whitened_covar)
158 |                 self._variational_distribution.initialize_variational_distribution(whitened_variational_distribution)
159 | 
160 |                 # Reset the random noise parameter of the model
161 |                 self._variational_distribution.mean_init_std = orig_mean_init_std
162 | 
163 |                 # Reset the cache
164 |                 clear_cache_hook(self)
165 | 
166 |                 # Mark that we have updated the variational strategy
167 |                 self.updated_strategy.fill_(True)
168 | 
169 |         return super().__call__(x, prior=prior, **kwargs)
170 | 


--------------------------------------------------------------------------------
/experiments/synthetic1/synthetic1.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | import torch
  4 | import gpytorch
  5 | import tqdm
  6 | import random
  7 | import time
  8 | from matplotlib import pyplot as plt
  9 | from torch.utils.data import TensorDataset, DataLoader
 10 | import os
 11 | import sys
 12 | sys.path.append("../")
 13 | sys.path.append("../../directionalvi/utils")
 14 | sys.path.append("../../directionalvi")
 15 | from directional_vi import train_gp, eval_gp
 16 | import traditional_vi
 17 | import grad_svgp
 18 | from csv_dataset import csv_dataset
 19 | from metrics import MSE
 20 | import pickle
 21 | 
 22 | 
 23 | # load a pickle with the run params
 24 | args = sys.argv
 25 | param_filename = args[1]
 26 | run_params = pickle.load(open(param_filename,"rb"))
 27 | num_inducing   =run_params['num_inducing']
 28 | num_directions =run_params['num_directions'] 
 29 | minibatch_size =run_params['minibatch_size'] 
 30 | num_epochs     =run_params['num_epochs']
 31 | tqdm           =run_params['tqdm']
 32 | inducing_data_initialization =run_params['inducing_data_initialization'] 
 33 | use_ngd =run_params['use_ngd']
 34 | use_ciq =run_params['use_ciq']
 35 | num_contour_quadrature= run_params['num_contour_quadrature']
 36 | learning_rate_hypers = run_params['learning_rate_hypers']
 37 | learning_rate_ngd    = run_params['learning_rate_ngd']
 38 | lr_gamma    = run_params['lr_gamma']
 39 | lr_benchmarks = run_params['lr_benchmarks']
 40 | lr_sched = run_params['lr_sched']
 41 | mll_type = run_params['mll_type']
 42 | seed     = run_params['seed']
 43 | base_name = run_params['base_name']
 44 | data_file = run_params['data_file']
 45 | mode = run_params['mode']
 46 | 
 47 | # make the learning rate schedule
 48 | assert lr_sched in [None, "MultiStepLR", "LambdaLR"], "Not a valid choice of lr_sched"
 49 | if lr_sched is None:
 50 |   pass
 51 | elif lr_sched == "MultiStepLR":
 52 |   def lr_sched(epoch):
 53 |     a = np.sum(lr_benchmarks < epoch)
 54 |     # lr_gamma should be > 1
 55 |     return (lr_gamma)**a
 56 | elif lr_sched == "LambdaLR":
 57 |   lr_sched = lambda epoch: 1./(1+lr_gamma*epoch)
 58 | 
 59 | # set the seed
 60 | torch.random.manual_seed(seed)
 61 | 
 62 | # output file names
 63 | data_dir = "./output/"
 64 | model_filename = data_dir + "model_"+ base_name + ".model"
 65 | data_filename  = data_dir + "data_" + base_name + ".pickle"
 66 | if os.path.exists(data_dir) is False:
 67 |   os.mkdir(data_dir)
 68 | 
 69 | if mode == "DSVGP" or mode == "GradSVGP": deriv=True
 70 | elif mode == "SVGP": deriv = False
 71 | 
 72 | # load the data
 73 | d = pickle.load(open(data_file, "rb"))
 74 | X = d['X']
 75 | Y = d['Y']
 76 | n,dim = X.shape
 77 | if deriv == False:
 78 |   Y = Y[:,0]
 79 | 
 80 | # make a torch dataset
 81 | dataset = TensorDataset(X,Y)
 82 | 
 83 | # train-test split
 84 | n_train = int(0.8*n)
 85 | n_test  = int(0.2*n)
 86 | train_dataset,test_dataset = torch.utils.data.random_split(dataset,[n_train,n_test])
 87 | 
 88 | #if torch.cuda.is_available():
 89 | #    train_dataset, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda()
 90 | 
 91 | # make dataloaders
 92 | train_loader  = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True)
 93 | test_loader   = DataLoader(test_dataset, batch_size=n_test, shuffle=False)
 94 | 
 95 | 
 96 | if mode == "DSVGP":
 97 |   # train
 98 |   print("\n\n---DirectionalGradVGP---")
 99 |   print(f"Start training with {n} trainig data of dim {dim}")
100 |   print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
101 |   t1 = time.time()	
102 |   model,likelihood = train_gp(train_dataset,
103 |                         num_inducing=num_inducing,
104 |                         num_directions=num_directions,
105 |                         minibatch_size = minibatch_size,
106 |                         minibatch_dim = num_directions,
107 |                         num_epochs =num_epochs, 
108 |                         learning_rate_hypers=learning_rate_hypers,
109 |                         learning_rate_ngd=learning_rate_ngd,
110 |                         inducing_data_initialization=inducing_data_initialization,
111 |                         use_ngd = use_ngd,
112 |                         use_ciq = use_ciq,
113 |                         lr_sched=lr_sched,
114 |                         mll_type=mll_type,
115 |                         num_contour_quadrature=num_contour_quadrature,
116 |                         tqdm=tqdm,
117 |                         )
118 |   t2 = time.time()	
119 |   train_time = t2 - t1
120 |   
121 |   # save the model
122 |   torch.save(model.state_dict(),model_filename)
123 |   
124 |   # test
125 |   means, variances = eval_gp(test_dataset,model,likelihood,
126 |                               num_directions=num_directions,
127 |                               minibatch_size=minibatch_size,
128 |                               minibatch_dim=num_directions)
129 |   t3 = time.time()	
130 |   test_time = t3 - t2
131 | 
132 |   # only keep the function values
133 |   means = means[::num_directions+1]
134 |   variances = variances[::num_directions+1]
135 | 
136 | elif mode == "SVGP":
137 |   # train
138 |   print("\n\n---Traditional SVGP---")
139 |   print(f"Start training with {n} training data of dim {dim}")
140 |   print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
141 |   t1 = time.time()	
142 |   model,likelihood = traditional_vi.train_gp(train_dataset,dim,
143 |                                             num_inducing=num_inducing,
144 |                                             minibatch_size=minibatch_size,
145 |                                             num_epochs=num_epochs,
146 |                                             use_ngd=use_ngd,
147 |                                             use_ciq=use_ciq,
148 |                                             learning_rate_hypers=learning_rate_hypers,
149 |                                             learning_rate_ngd=learning_rate_ngd,
150 |                                             lr_sched=lr_sched,
151 |                                             num_contour_quadrature=num_contour_quadrature,
152 |                                             mll_type=mll_type,
153 |                                             tqdm=False)
154 |   t2 = time.time()	
155 |   train_time = t2 - t1
156 |   
157 |   # save the model
158 |   torch.save(model.state_dict(),model_filename)
159 |   
160 |   # test
161 |   means, variances = traditional_vi.eval_gp(test_dataset,model,likelihood, 
162 |                                             num_inducing=num_inducing,
163 |                                             minibatch_size=n_test)
164 |   t3 = time.time()	
165 |   test_time = t3 - t2
166 | 
167 | elif mode == "GradSVGP":
168 |   # train
169 |   print("\n\n---Grad SVGP---")
170 |   print(f"Start training with {n} training data of dim {dim}")
171 |   print(f"VI setup: {num_inducing} inducing points, {num_directions} inducing directions")
172 |   t1 = time.time()	
173 |   model,likelihood = grad_svgp.train_gp(train_dataset,dim,
174 |                                             num_inducing=num_inducing,
175 |                                             minibatch_size=minibatch_size,
176 |                                             num_epochs=num_epochs,
177 |                                             use_ngd=use_ngd,
178 |                                             use_ciq=use_ciq,
179 |                                             learning_rate_hypers=learning_rate_hypers,
180 |                                             learning_rate_ngd=learning_rate_ngd,
181 |                                             lr_sched=lr_sched,
182 |                                             num_contour_quadrature=num_contour_quadrature,
183 |                                             mll_type=mll_type,
184 |                                             tqdm=False)
185 |   t2 = time.time()	
186 |   train_time = t2 - t1
187 |   
188 |   # save the model
189 |   torch.save(model.state_dict(),model_filename)
190 |   
191 |   # test
192 |   means, variances = grad_svgp.eval_gp(test_dataset,model,likelihood,
193 |                                             num_inducing=num_inducing,
194 |                                             minibatch_size=n_test)
195 |   t3 = time.time()	
196 |   test_time = t3 - t2
197 | 
198 |   # only keep the function values
199 |   means = means[::dim+1]
200 |   variances = variances[::dim+1]
201 |   
202 | 
203 | # collect the test function values
204 | test_f = torch.zeros(n_test)
205 | for ii in range(n_test):
206 |   if mode == "DSVGP" or mode == "GradSVGP":
207 |     test_f[ii] = test_dataset[ii][1][0] # function value
208 |   elif mode == "SVGP":
209 |     test_f[ii] = test_dataset[ii][1] # function value
210 | 
211 | # compute MSE
212 | test_mse = MSE(test_f,means)
213 | # compute mean negative predictive density
214 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_f).mean()
215 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.")
216 | print(f"Training time: {train_time:.2f} sec, testing time: {test_time:.2f} sec")
217 | 
218 | # dump the data
219 | outdata = {}
220 | outdata['test_mse']   = test_mse
221 | outdata['test_nll']   = test_nll
222 | outdata['train_time'] = train_time
223 | outdata['test_time']  = test_time
224 | # add the run params
225 | outdata.update(run_params)
226 | pickle.dump(outdata,open(data_filename,"wb"))
227 | print(f"Dropped file: {data_filename}")
228 | 


--------------------------------------------------------------------------------
/experiments/rover/test_turbo.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | import gpytorch
  4 | import time
  5 | from torch.utils.data import TensorDataset, DataLoader
  6 | import os
  7 | import sys
  8 | sys.path.append("../")
  9 | sys.path.append("../../directionalvi/utils")
 10 | sys.path.append("../../directionalvi")
 11 | import directional_vi 
 12 | import traditional_vi
 13 | import pickle
 14 | 
 15 | 
 16 | 
 17 | # load a pickle with the run params
 18 | args = sys.argv
 19 | param_filename = args[1]
 20 | run_params = pickle.load(open(param_filename,"rb"))
 21 | num_inducing   =run_params['num_inducing']
 22 | num_directions =run_params['num_directions'] 
 23 | minibatch_size =run_params['minibatch_size'] 
 24 | num_epochs     =run_params['num_epochs']
 25 | verbose           =run_params['verbose']
 26 | inducing_data_initialization =run_params['inducing_data_initialization'] 
 27 | use_ngd =run_params['use_ngd']
 28 | use_ciq =run_params['use_ciq']
 29 | num_contour_quadrature= run_params['num_contour_quadrature']
 30 | learning_rate_hypers = run_params['learning_rate_hypers']
 31 | learning_rate_ngd    = run_params['learning_rate_ngd']
 32 | lr_gamma    = run_params['lr_gamma']
 33 | lr_benchmarks = run_params['lr_benchmarks']
 34 | lr_sched = run_params['lr_sched']
 35 | mll_type = run_params['mll_type']
 36 | seed     = run_params['seed']
 37 | base_name = run_params['base_name']
 38 | mode = run_params['mode']
 39 | turbo_lb = run_params['turbo_lb']
 40 | turbo_ub =  run_params['turbo_ub']
 41 | turbo_n_init =  run_params['turbo_n_init']
 42 | turbo_max_evals = run_params['turbo_max_evals']
 43 | turbo_batch_size =   run_params['turbo_batch_size']
 44 | dim = run_params['dim']
 45 | 
 46 | # make the learning rate schedule
 47 | assert lr_sched in [None, "MultiStepLR", "LambdaLR"], "Not a valid choice of lr_sched"
 48 | if lr_sched is None:
 49 |   pass
 50 | elif lr_sched == "MultiStepLR":
 51 |   def lr_sched(epoch):
 52 |     a = np.sum(lr_benchmarks < epoch)
 53 |     # lr_gamma should be > 1
 54 |     return (lr_gamma)**a
 55 | elif lr_sched == "LambdaLR":
 56 |   lr_sched = lambda epoch: 1./(1+lr_gamma*epoch)
 57 | 
 58 | # set the seed
 59 | torch.random.manual_seed(seed)
 60 | 
 61 | # output file names
 62 | data_dir = "./output/"
 63 | model_filename = data_dir + "model_"+ base_name + ".model"
 64 | data_filename  = data_dir + "data_" + base_name + ".pickle"
 65 | if os.path.exists(data_dir) is False:
 66 |   os.mkdir(data_dir)
 67 | 
 68 | if mode == "DSVGP": deriv=True
 69 | elif mode == "SVGP" or mode == "Vanilla": deriv = False
 70 | 
 71 | # wrap the objective
 72 | from rover import *
 73 | def myObj(u):
 74 |   if deriv==True:
 75 |     # stack it
 76 |     fg = np.zeros(len(u)+1)
 77 |     fg[0] = rover_obj(u)
 78 |     fg[1:] = np.copy(rover_grad(u))
 79 |     return fg
 80 |   else:
 81 |     return rover_obj(u)
 82 | 
 83 | if torch.cuda.is_available():
 84 |   turbo_device = 'cuda'
 85 | else:
 86 |   turbo_device = 'cpu'
 87 | 
 88 | if mode == "DSVGP":
 89 |   # train
 90 |   print(f"\n\n---TuRBO-Grad with DSVGP in dim {dim}---")
 91 |   print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
 92 | 
 93 |   #from turbo1_grad_linesearch import *
 94 |   from turbo1_grad import *
 95 |   def train_gp_for_turbo(train_x, train_y, use_ard, num_steps, hypers):
 96 |     # expects train_x on unit cube and train_y standardized
 97 |     # make a trainable model for TuRBO
 98 |     train_x = train_x.float()
 99 |     train_y = train_y.float()
100 |     dataset = TensorDataset(train_x,train_y)
101 |     model,likelihood = directional_vi.train_gp(dataset,
102 |                         num_inducing=num_inducing,
103 |                         num_directions=num_directions,
104 |                         minibatch_size = minibatch_size,
105 |                         minibatch_dim = num_directions,
106 |                         num_epochs =num_steps, 
107 |                         learning_rate_hypers=learning_rate_hypers,
108 |                         learning_rate_ngd=learning_rate_ngd,
109 |                         inducing_data_initialization=inducing_data_initialization,
110 |                         use_ngd = use_ngd,
111 |                         use_ciq = use_ciq,
112 |                         lr_sched=lr_sched,
113 |                         mll_type=mll_type,
114 |                         num_contour_quadrature=num_contour_quadrature,
115 |                         verbose=verbose,
116 |                         )
117 |     return model.double(),likelihood.double()
118 | 
119 |   def sample_from_gp(model,likelihood,X_cand,n_samples):
120 |     """
121 |     X_cand: 2d torch tensor, points to sample at
122 |     n_samples: int, number of samples to take per point in X_cand
123 |     """
124 |     model.eval()
125 |     likelihood.eval()
126 | 
127 |     # ensure correct type
128 |     model = model.float()
129 |     likelihood = likelihood.float()
130 |     X_cand = X_cand.float()
131 |     
132 |     n,dim = X_cand.shape
133 |     kwargs = {}
134 |     derivative_directions = torch.eye(dim)[:model.num_directions]
135 |     derivative_directions = derivative_directions.repeat(n,1)
136 |     kwargs['derivative_directions'] = derivative_directions.to(X_cand.device).float()
137 |     preds  = likelihood(model(X_cand,**kwargs))
138 |     y_cand = preds.sample(torch.Size([n_samples])) # shape (n_samples x n*(n_dir+1))
139 |     y_cand = y_cand[:,::model.num_directions+1].t() # shape (n, n_samples)
140 | 
141 |     # only use mean
142 |     #y_cand = preds.mean.repeat(n_samples,1).t() # (n,n_samples)
143 | 
144 |     ## only use distribution of f(x) to predict (dont use joint covariance with derivatives)
145 |     #mean  = preds.mean[::num_directions+1]
146 |     #var  = preds.variance[::num_directions+1] # could have used covariance for f(x) too
147 |     #mvn  = gpytorch.distributions.MultivariateNormal(mean,torch.diag(var))
148 |     #y_cand = mvn.sample(torch.Size([n_samples])).t() # shape (n x n_samples)
149 | 
150 |     return y_cand
151 | 
152 |   
153 |   # initialize TuRBO
154 |   problem = Turbo1Grad(
155 |         myObj,
156 |         lb=turbo_lb,ub=turbo_ub,
157 |         n_init=turbo_n_init,
158 |         max_evals=turbo_max_evals,
159 |         train_gp=train_gp_for_turbo,
160 |         sample_from_gp=sample_from_gp,
161 |         batch_size=turbo_batch_size,
162 |         verbose=True,
163 |         use_ard=True,
164 |         max_cholesky_size=2000,
165 |         n_training_steps=num_epochs,
166 |         min_cuda=0, # directional_vi.py always runs on cuda if available
167 |         device=turbo_device,
168 |         dtype="float64")
169 |   # optimize
170 |   problem.optimize()
171 |   X_turbo, fX_turbo = problem.X, problem.fX[:,0] # Evaluated points
172 | 
173 | elif mode == "SVGP":
174 |   # train
175 |   print(f"\n\n---TuRBO with Traditional SVGP in dim {dim}---")
176 |   print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions")
177 | 
178 |   from turbo1 import *
179 |   def train_gp_for_turbo(train_x, train_y, use_ard, num_steps, hypers):
180 |     # expects train_x on unit cube and train_y standardized
181 |     # make a trainable model for TuRBO
182 |     train_x = train_x.float()
183 |     train_y = train_y.float()
184 |     dataset = TensorDataset(train_x,train_y)
185 |     model,likelihood = traditional_vi.train_gp(dataset,dim,num_inducing=num_inducing,
186 |                        minibatch_size=minibatch_size,num_epochs=num_steps,use_ngd=use_ngd,
187 |                        use_ciq=use_ciq,learning_rate_hypers=learning_rate_hypers,
188 |                        learning_rate_ngd=learning_rate_ngd,
189 |                        lr_sched=lr_sched,num_contour_quadrature=num_contour_quadrature,
190 |                        mll_type=mll_type,verbose=verbose)
191 |     return model.double(),likelihood.double()
192 |   
193 |   # initialize TuRBO
194 |   problem = Turbo1(
195 |         myObj,
196 |         lb=turbo_lb,ub=turbo_ub,
197 |         n_init=turbo_n_init,
198 |         max_evals=turbo_max_evals,
199 |         train_gp=train_gp_for_turbo,
200 |         batch_size=turbo_batch_size,
201 |         verbose=True,
202 |         use_ard=True,
203 |         max_cholesky_size=2000,
204 |         n_training_steps=num_epochs,
205 |         min_cuda=0,
206 |         device=turbo_device,
207 |         dtype="float64")
208 |   # optimize
209 |   problem.optimize()
210 |   X_turbo, fX_turbo = problem.X, problem.fX.flatten()  # Evaluated points
211 |   
212 | elif mode == "Vanilla":
213 |   # train
214 |   print(f"\n\n---Vanilla TuRBO in dim {dim}---")
215 | 
216 |   from turbo1_vanilla import *
217 | 
218 |   # initialize TuRBO
219 |   problem = Turbo1(
220 |         myObj,
221 |         lb=turbo_lb,ub=turbo_ub,
222 |         n_init=turbo_n_init,
223 |         max_evals=turbo_max_evals,
224 |         batch_size=turbo_batch_size,
225 |         verbose=True,
226 |         use_ard=True,
227 |         max_cholesky_size=2000,
228 |         n_training_steps=num_epochs,
229 |         min_cuda=0,
230 |         device=turbo_device,
231 |         dtype="float64")
232 | 
233 |   # optimize
234 |   problem.optimize()
235 |   X_turbo, fX_turbo = problem.X, problem.fX.flatten()  # Evaluated points
236 | 
237 |   
238 | 
239 | # get the optimum
240 | idx_opt = np.argmin(fX_turbo)
241 | fopt = fX_turbo[idx_opt]
242 | xopt = X_turbo[idx_opt]
243 | print(f"fopt = {fopt}")
244 | 
245 | # dump the data
246 | outdata = {}
247 | outdata['X']    = X_turbo
248 | outdata['fX']   = fX_turbo
249 | outdata['xopt'] = xopt
250 | outdata['fopt'] = fopt
251 | # add the run params
252 | outdata.update(run_params)
253 | pickle.dump(outdata,open(data_filename,"wb"))
254 | print(f"Dropped file: {data_filename}")
255 | 


--------------------------------------------------------------------------------