├── directionalvi ├── utils │ ├── __init__.py │ ├── __pycache__ │ │ ├── metrics.cpython-37.pyc │ │ ├── rescale.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ ├── show_pickle.cpython-37.pyc │ │ └── synthetic_dataset.cpython-37.pyc │ ├── show_pickle.py │ ├── test │ │ ├── test_load_data.py │ │ └── test_synthetic_functions.py │ ├── test_synthetic.py │ ├── count_params.py │ ├── metrics.py │ ├── rescale.py │ ├── synthetic_dataset.py │ ├── test load helens.ipynb │ ├── .ipynb_checkpoints │ │ └── test load helens-checkpoint.ipynb │ ├── csv_dataset.py │ └── load_data.py ├── README.md ├── RBFKernelDirectionalGrad.py ├── traditional_vi.py ├── grad_svgp.py └── GradVariationalStrategy.py ├── experiments ├── bunny │ ├── ImplicitBunny │ │ ├── bunny.mat │ │ ├── readObj.m │ │ └── bunny.m │ └── bunny.sub ├── GNN_bo │ ├── GCN │ │ ├── run.sh │ │ ├── models2.py │ │ ├── models.py │ │ ├── utils.py │ │ └── train.py │ ├── gcn_sgd.sub │ ├── turbo_utils.py │ ├── gcn.sub │ ├── gp.py │ ├── gcn_sgd.py │ └── plot_traj.py ├── README.md ├── synthetic1 │ ├── write_dataset.py │ ├── compute_optimal_subspace.py │ ├── plot_nll.py │ ├── run_exp.py │ ├── ExactGradGP.py │ └── synthetic1.py ├── rover │ ├── optimize_rover_gd.py │ ├── random_search.py │ ├── finite_difference.py │ ├── plot_rover.py │ ├── gradient_descent.py │ ├── turbo_utils.py │ ├── rover.py │ ├── plot_traj_new.py │ ├── plot_traj.py │ ├── exact_gp.py │ ├── run_exp.py │ └── test_turbo.py ├── synthetic │ ├── exp_setup.sh │ └── exp_run.sh ├── uci_dfree │ ├── plot_nll.py │ ├── run_exp.py │ └── test.py └── stellarator_regression │ ├── plot_nll.py │ ├── run_exp.py │ └── plot_stellarator.py ├── tests ├── testfun.py ├── README.md ├── test_traditional_vi.py ├── test_grad_svgp.py ├── test_dfree_dsvgp.py └── test_dsvgp.py ├── README.md ├── .gitignore └── graphite_environment.yml /directionalvi/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/bunny/ImplicitBunny/bunny.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/experiments/bunny/ImplicitBunny/bunny.mat -------------------------------------------------------------------------------- /directionalvi/utils/__pycache__/metrics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/metrics.cpython-37.pyc -------------------------------------------------------------------------------- /directionalvi/utils/__pycache__/rescale.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/rescale.cpython-37.pyc -------------------------------------------------------------------------------- /directionalvi/utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /directionalvi/utils/__pycache__/show_pickle.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/show_pickle.cpython-37.pyc -------------------------------------------------------------------------------- /directionalvi/utils/__pycache__/synthetic_dataset.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mishapadidar/GP-Derivatives-Variational-Inference/HEAD/directionalvi/utils/__pycache__/synthetic_dataset.cpython-37.pyc -------------------------------------------------------------------------------- /directionalvi/utils/show_pickle.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import sys 3 | 4 | filename = sys.argv[1] 5 | 6 | # load the test point 7 | with open(filename, "rb") as f: 8 | d= pickle.load(f) 9 | for item in d.items(): 10 | print(item) 11 | -------------------------------------------------------------------------------- /tests/testfun.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | def f(x, deriv=True): 5 | # f(x) = sin(2pi(x**2+y**2)), df/dx = cos(2pi(x**2+y**2))4pi*x 6 | fx = torch.sin(2*np.pi*torch.sum(x**2,dim=1)) 7 | gx = 4*np.pi*( torch.cos(2*np.pi*torch.sum(x**2,dim=1)) * x.T).T 8 | fx = fx.reshape(len(x),1) 9 | if deriv: 10 | return torch.cat([fx,gx],1) 11 | else: 12 | return fx.squeeze(axis=1) -------------------------------------------------------------------------------- /experiments/GNN_bo/GCN/run.sh: -------------------------------------------------------------------------------- 1 | dataset="citeseer" 2 | expid="10" 3 | python train.py --fastmode --seed 1212 --epochs 1000\ 4 | --lr 0.0025 --weight_decay 1e-4 --hidden 32\ 5 | --dropout 0.2 --dataset ${dataset}\ 6 | --watch_model True --train_percent 0.036\ 7 | --expid ${expid} --lr_sched "none"\ 8 | 2>&1 | tee ../runlogs/a.out_${dataset}_${expid} 9 | -------------------------------------------------------------------------------- /directionalvi/utils/test/test_load_data.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append("../") 4 | from load_data import * 5 | 6 | args = {} 7 | args["n_train"] = 12000 8 | args["n_test"] = 2040 9 | args["seed"] = 3 10 | 11 | #cwd = os.getcwd() 12 | #print(cwd) 13 | #print("hi") 14 | train, test, dim = load_helens("../../../data/MtSH.mat", **args) 15 | print(len(train)) 16 | print(train[0]) 17 | print(len(test)) 18 | print(dim) -------------------------------------------------------------------------------- /experiments/README.md: -------------------------------------------------------------------------------- 1 | # Experiments 2 | 3 | - `GNN_bo` contains the GCN experiment. 4 | - `bunny` contains the bunny experiment. 5 | - `rover` contains the rover experiment. 6 | - `stellarator_regression` contains the stellarator regression experiment. 7 | - `synthetic` can run all synthetic experiments except sin-5 8 | - `synthetic1` can run the sin-5 experiment. 9 | - `uci_dfree` contains the code to run DSVGP on the UCI datasets without derivative information. 10 | -------------------------------------------------------------------------------- /directionalvi/utils/test_synthetic.py: -------------------------------------------------------------------------------- 1 | from botorch.test_functions.base import BaseTestProblem 2 | from botorch.test_functions.synthetic import Branin, SixHumpCamel, StyblinskiTang, Hartmann, SyntheticTestFunction 3 | from torch import Tensor 4 | import torch 5 | from synthetic_functions import Hart_with_deriv 6 | 7 | x= Hart_with_deriv() 8 | 9 | #t = torch.tensor([[.1, .25, .2, .6, .1, .1], [.2, .1, .3, .1, .2, .4], [.1, .2, .3, .1, .2, .4]]) 10 | t = torch.tensor([0.20169, 0.150011, 0.476874, 0.275332, 0.311652, 0.6573]).reshape(1, 6) 11 | res = x.evaluate_true_with_deriv(t) 12 | print(res.shape) 13 | print(res) -------------------------------------------------------------------------------- /experiments/synthetic1/write_dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import pickle 3 | import numpy as np 4 | 5 | # objective 6 | def testf(x, deriv=True): 7 | # f(x) = sin(2pi(x**2+y**2)), df/dx = cos(2pi(x**2+y**2))4pi*x 8 | fx = torch.sin(2*np.pi*torch.sum(x**2,dim=1)) 9 | gx = 4*np.pi*( torch.cos(2*np.pi*torch.sum(x**2,dim=1)) * x.T).T 10 | fx = fx.reshape(len(x),1) 11 | if deriv: 12 | return torch.cat([fx,gx],1) 13 | else: 14 | return fx.squeeze(axis=1) 15 | 16 | n = 20000 17 | dim = 12 18 | X = torch.rand(n,dim) 19 | Y = testf(X) 20 | d ={} 21 | d['X'] = X 22 | d['Y'] = Y 23 | name = f"./synthetic1_dataset_{n}_points_{dim}_dim.pickle" 24 | pickle.dump(d,open(name,"wb")) 25 | -------------------------------------------------------------------------------- /experiments/rover/optimize_rover_gd.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rover import * 3 | from gradient_descent import gradient_descent 4 | import pickle 5 | 6 | # generate a starting point 7 | dim = 200 8 | x0 = np.random.uniform(-5,5,size=dim) 9 | max_iter = 1000 10 | gtol = 1e-7 11 | # optimize 12 | def noisy_rover(u): 13 | return rover_obj(u) + 1e1*np.random.randn() 14 | xopt,X = gradient_descent(noisy_rover,rover_grad,x0,max_iter=max_iter,gtol=gtol) 15 | fX = np.array([rover_obj(x) for x in X]) 16 | print(fX[-1]) 17 | # save data 18 | d = {} 19 | d['X'] = X 20 | d['fX'] = fX 21 | d['mode'] = "GD" 22 | outfilename = f"./output/data_rover_GD_{max_iter}_iter.pickle" 23 | pickle.dump(d,open(outfilename,"wb")) 24 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | 2 | ## Basic tests to show usage and functionality of methods 3 | To run the code in this directory use `python3 filename`. For instance, to train a Variational GP with directional derivatives such as DSVGP2 run `python3 test_dsvgp.py`. Make sure that you have compatible version of gpytorch or are inside the supplied conda environment. 4 | 5 | - `test_dsvgp.py` can be used to test a variational GP with directional derivatives, i.e. DSVGP or DPPGPR 6 | - `test_dfree_dsvgp.py` can be used to train DSVGP on a data set that has no derivative information. 7 | - `test_grad_svgp.py` runs a multi-output stochastic variational GP with full derivative information. 8 | - `test_traditional_vi.py` runs SVGP or PPGPR. 9 | 10 | -------------------------------------------------------------------------------- /directionalvi/utils/count_params.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def count_params(model, likelihood): 4 | # count number of parameters to learn 5 | param_total_dim = 0 6 | print("All parameters to learn:") 7 | for name, param in model.named_parameters(): 8 | print(" ", name) 9 | print(" ", param.data.shape) 10 | if param.requires_grad: 11 | param_total_dim += np.prod(param.data.shape) 12 | for name, param in likelihood.named_parameters(): 13 | print(" ", name) 14 | print(" ", param.data.shape) 15 | if param.requires_grad: 16 | param_total_dim += np.prod(param.data.shape) 17 | 18 | print("Total number of parameters: ", param_total_dim) 19 | return param_total_dim -------------------------------------------------------------------------------- /directionalvi/utils/metrics.py: -------------------------------------------------------------------------------- 1 | def MSE(Y,Z): 2 | """Compute the MSE. 3 | Y: torch tensor, function values 4 | Z: torch tensor, predicted function values 5 | """ 6 | return ((Y-Z)**2).mean() 7 | 8 | def MAE(Y,Z): 9 | """Compute the MSE. 10 | Y: torch tensor, function values 11 | Z: torch tensor, predicted function values 12 | """ 13 | return ((Y-Z).abs()).mean() 14 | 15 | def RMSE(Y, Z): 16 | """Compute the MSE. 17 | Y: torch tensor, function values 18 | Z: torch tensor, predicted function values 19 | """ 20 | return ((Y-Z)**2).mean().sqrt() 21 | 22 | def SMAE(Y, Z): 23 | """Compute the MSE. 24 | Y: torch tensor, function values 25 | Z: torch tensor, predicted function values 26 | """ 27 | return ((Y-Z).abs()).mean() / Y.abs().mean() -------------------------------------------------------------------------------- /experiments/rover/random_search.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from rover import rover_obj 3 | import sys 4 | 5 | 6 | 7 | if __name__ == '__main__': 8 | 9 | dim = 200 10 | max_evals = 2000 11 | lb = -5 * np.ones(dim) 12 | ub = 5 * np.ones(dim) 13 | batch_size = 5 14 | num_epochs = 30 15 | 16 | from datetime import datetime 17 | now = datetime.now() 18 | seed = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second)) 19 | barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second) 20 | np.random.seed(seed) 21 | 22 | X = np.random.uniform(lb,ub,(max_evals,dim)) 23 | fX = [rover_obj(x) for x in X] 24 | 25 | d ={} 26 | d['X'] = X 27 | d['fX'] = fX 28 | d['mode'] = "Random Search" 29 | outfilename = f"./output/data_rover_Random_Search_{max_evals}_evals_{barcode}.pickle" 30 | import pickle 31 | pickle.dump(d,open(outfilename,"wb")) 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GP-Derivatives-Variational-Inference 2 | 3 | This repo contains code for the NeurIPS paper, Scaling Gaussian Processes with Derivatives Using Variational Inference. 4 | 5 | All of our code leverages the GPyTorch framework for efficient computations and GPU acceleration. Much of the functionality of this code base is, or soon will be, built into GPyTorch. 6 | 7 | The `graphite_environment.yml` can be used to create a compatible conda environment. 8 | 9 | The directory structure is as follows: 10 | - `directional_vi` the main methods used in the paper. 11 | - `tests` contains basic tests showing how to use the methods. 12 | - `experiments` contains code for the experiments run in the paper, including the graph convolutional network, stellarator regression, rover, bunny, and UCI experiments. For experimental data please contact the authors. 13 | 14 | For a basic introduction on how to use the methods from the paper visit the `tests` directory. 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | gpytorch_tutorials 2 | notebooks/*.ipynb_checkpoints 3 | __pycache__ 4 | slurm_output 5 | experiments/.ipynb_checkpoints 6 | experiments/synthetic/wandb 7 | experiments/synthetic/postprocess/ 8 | *.ipynb 9 | experiments/*.sub 10 | experiments/*-copy.py 11 | experiments/synthetic/*.exp* 12 | experiments/synthetic/*.sub 13 | experiments/bunny/wandb 14 | experiments/bunny/results 15 | experiments/lenet_bo 16 | directionalvi/directional_vi-fixed_inducing.py 17 | experiments/GNN_bo/results* 18 | experiments/GNN_bo/runlogs* 19 | experiments/GNN_bo/plots 20 | experiments/GNN_bo/GCN/data 21 | experiments/synthetic/logs/ 22 | experiments/GNN_bo/*.sh 23 | experiments/GNN_bo/*.sub 24 | experiments/GNN_bo/gcn_turbo_test.py 25 | experiments/synthetic/ablation 26 | experiments/plot/synthetic/data 27 | experiments/plot/synthetic/exp_res 28 | experiments/plot/synthetic/plots 29 | experiments/synthetic/outputs 30 | experiments/rover/plots/ 31 | experiments/stellarator_regression/plots/ 32 | experiments/stellarator_regression/data/ 33 | experiments/rover/data/ 34 | experiments/synthetic/ExactGradGP-run.py 35 | -------------------------------------------------------------------------------- /experiments/rover/finite_difference.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import multiprocessing as mp 3 | 4 | 5 | def fdiff_jac(f,x0,h=1e-6): 6 | """Compute the jacobian of f with 7 | central difference 8 | """ 9 | h2 = h/2.0 10 | dim = len(x0) 11 | Ep = x0 + h2*np.eye(dim) 12 | Fp = np.array([f(e) for e in Ep]) 13 | Em = x0 - h2*np.eye(dim) 14 | Fm = np.array([f(e) for e in Em]) 15 | jac = (Fp - Fm)/(h) 16 | return jac.T 17 | 18 | def fdiff_jac_mp(f,x0,h=1e-6,n_comp=1): 19 | """Compute the jacobian of f with 20 | central difference 21 | using multiprocessing for acceleration. 22 | """ 23 | h2 = h/2.0 24 | dim = len(x0) 25 | Ep = x0 + h2*np.eye(dim) 26 | Em = x0 - h2*np.eye(dim) 27 | with mp.Pool(n_comp) as p: 28 | Fp = np.array(p.map(f, Ep)) 29 | Fm = np.array(p.map(f, Em)) 30 | jac = (Fp - Fm)/(h) 31 | return jac.T 32 | 33 | if __name__ == '__main__': 34 | np.random.seed(0) 35 | dim = 4 36 | A = np.random.randn(dim,dim) 37 | print(A) 38 | f = lambda x: A @ x 39 | x0 = np.random.randn(dim) 40 | t0 = time.time() 41 | print(fdiff_jac(f,x0)) 42 | -------------------------------------------------------------------------------- /directionalvi/utils/rescale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def to_unit_cube(x, lb, ub, g=None): 5 | """Project to [0, 1]^d from hypercube with bounds lb and ub""" 6 | assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 7 | xx = (x - lb) / (ub - lb) 8 | return xx 9 | 10 | 11 | def from_unit_cube(x, lb, ub, g=None): 12 | """Project from [0, 1]^d to hypercube with bounds lb and ub""" 13 | assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 14 | xx = x * (ub - lb) + lb 15 | return xx 16 | 17 | 18 | def normalize(y, **kwargs): 19 | ''' 20 | normalize function values and derivatives 21 | Input: torch tensor storing function values and derivatives 22 | ''' 23 | if kwargs["derivative"]: 24 | f = y[..., 0].reshape(len(y),1) 25 | g = y[..., 1:].reshape(len(y),-1) 26 | fcopy = np.array(f.flatten()) 27 | sigma = np.std(fcopy, ddof=1) 28 | f -= np.mean(fcopy) 29 | f /= sigma 30 | g /= sigma 31 | y = torch.cat([f, g], 1) 32 | else: 33 | fcopy = np.array(y.flatten()) 34 | sigma = np.std(fcopy) 35 | y -= np.mean(fcopy) 36 | y /= sigma 37 | 38 | -------------------------------------------------------------------------------- /experiments/synthetic/exp_setup.sh: -------------------------------------------------------------------------------- 1 | dataset=${1} 2 | variational_strategy=${2} 3 | variational_distribution=${3} 4 | n_train=${4} 5 | n_test=${5} 6 | num_inducing=${6} 7 | num_directions=${7} 8 | num_epochs=${8} 9 | batch_size=${9} 10 | model=${10} 11 | lr=${11} 12 | lr_ngd=${12} 13 | num_contour_quad=${13} 14 | watch_model=${14} 15 | exp_name=${15} 16 | seed=${16} 17 | lr_sched=${17} 18 | save_results=${18} 19 | mll_type=${19} 20 | gamma=${20} 21 | 22 | if [ ! -d "./logs" ] 23 | then 24 | mkdir ./logs 25 | fi 26 | 27 | python -u exp_script.py \ 28 | --dataset ${dataset} --variational_strategy ${variational_strategy}\ 29 | --variational_distribution ${variational_distribution} \ 30 | --n_train ${n_train} --n_test ${n_test}\ 31 | --num_inducing ${num_inducing} --num_directions ${num_directions}\ 32 | --num_epochs ${num_epochs} --batch_size ${batch_size} --model ${model}\ 33 | --lr ${lr} --lr_ngd ${lr_ngd} --num_contour_quad ${num_contour_quad}\ 34 | --watch_model ${watch_model} --exp_name ${exp_name} --seed ${seed}\ 35 | --lr_sched ${lr_sched} --save_results ${save_results} --mll_type ${mll_type}\ 36 | --gamma ${gamma}\ 37 | 2>&1 | tee logs/a.out_${dataset}_${model}_train${n_train}_test${n_test}_m${num_inducing}_p${num_directions}_epoch${num_epochs}_${variational_distribution}_${variational_strategy}_exp${expname}_${mll_type} -------------------------------------------------------------------------------- /directionalvi/utils/synthetic_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | from torch import is_tensor 3 | from torch.utils.data import Dataset 4 | from torch.quasirandom import SobolEngine 5 | from rescale import from_unit_cube 6 | 7 | class synthetic_dataset(Dataset): 8 | """A synthetic dataset that generates data when called from. 9 | """ 10 | def __init__(self, f,lb,ub,n_points,dim): 11 | """ 12 | Args: 13 | f (function handle): Returns a function value and gradient eval 14 | lb,ub (1D tensors): lower and upper bounds on domain of f 15 | n_points (int): number of data points 16 | """ 17 | self.f = f 18 | self.lb = lb 19 | self.ub = ub 20 | self.n_points = n_points 21 | self.dim = dim 22 | self.sobol = SobolEngine(dim,scramble=True) 23 | 24 | def __len__(self): 25 | return self.n_points 26 | 27 | def __getitem__(self, idx): 28 | if is_tensor(idx): 29 | idx = idx.tolist() 30 | # reset the sobol sequence 31 | self.sobol.reset() 32 | # fast forward to the desired index 33 | self.sobol.fast_forward(idx-1) 34 | # generate a point 35 | x = self.sobol.draw().flatten() 36 | # map from unit cube 37 | x = x * (self.ub - self.lb) + self.lb 38 | # evaluate it 39 | fx = self.f(x) 40 | # return a tuple of tensors 41 | sample = (x,fx) 42 | return sample 43 | -------------------------------------------------------------------------------- /experiments/rover/plot_rover.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.pylab as pl 3 | import seaborn as sns 4 | import pandas as pd 5 | import pickle 6 | import numpy as np 7 | import glob 8 | from rover import * 9 | 10 | # read the data 11 | data_files = glob.glob("./output/data*.pickle") 12 | colors = pl.cm.jet(np.linspace(0,1,len(data_files))) 13 | 14 | means = np.zeros((2,1000)) 15 | n_type = np.zeros(2) 16 | data = [] 17 | for ii in range(len(data_files)): 18 | ff = data_files[ii] 19 | # attributes 20 | attrib = {} 21 | # load 22 | d = pickle.load(open(ff, "rb")) 23 | if d['mode'] == 'Vanilla': 24 | label = "TuRBO" 25 | elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL': 26 | M = d['num_inducing']*(d['num_directions']+1) 27 | label = f"TuRBO-DPPGPR{d['num_directions']}" 28 | elif d['mode'] == "GD": 29 | label= d['mode'] 30 | uopt = d['X'][-1] 31 | x0 = np.array([5,20,0,0]) 32 | X = rover_dynamics(uopt,x0) 33 | plt.plot(X[:,0],X[:,1],linewidth=5,markersize=12,color=colors[ii],label=label) 34 | 35 | # plot the waypoints 36 | W = np.array([x0,[8,15,3,-4],[16,7,6,-4],[16,12,-6,-4],[0,0,0,0]]) 37 | plt.scatter(W[:,0],W[:,1],color='k',s=50,label='waypoints') 38 | # sns.set_style("whitegrid") 39 | # sns.set_context("paper", font_scale=1.5) 40 | plt.legend() 41 | plt.title("Rover Path") 42 | plt.ylabel("$x_2$") 43 | plt.xlabel("$x_1$") 44 | plt.show() 45 | 46 | -------------------------------------------------------------------------------- /experiments/synthetic1/compute_optimal_subspace.py: -------------------------------------------------------------------------------- 1 | from scipy.sparse.linalg import svds 2 | import numpy as np 3 | import torch 4 | 5 | 6 | def compute_optimal_subspace_projection(G,X,k): 7 | """Compute the optimal k-dimensional representation G 8 | np array, 2d: G, rows are observations 9 | int: k, dimension of subspace 10 | return: 11 | V: optimal directions 12 | S: optimal weights (eigenvalues) 13 | P: optimal weighted directions 14 | """ 15 | # center the data 16 | G = G - np.mean(G,axis=0) 17 | # compute the k largest eigens of G 18 | U,S,VT = np.linalg.svd(G) 19 | # truncated score matrix 20 | #G = U[:,:k] @ np.diag(S[:k]) 21 | G = G @ (VT.T)[:,:k] 22 | # project X as well 23 | X = X @ (VT.T[:,:k]) 24 | print("Singular values", S) 25 | return G,X, (VT.T[:,:k]) 26 | 27 | if __name__ == "__main__": 28 | import pickle 29 | d = pickle.load(open("synthetic1_dataset_10000_points_5_dim.pickle", "rb")) 30 | X = d['X'].detach().numpy() 31 | Y = d['Y'].detach().numpy() 32 | n,dim = X.shape 33 | f = Y[:,0].reshape(n,1) 34 | G = Y[:,1:] 35 | # compute the reduced G and X 36 | k = 2 # reduced dimension 37 | G,X = compute_optimal_subspace_projection(G,X,k) 38 | # make a reduced dataset 39 | Y = np.hstack((f,G)) 40 | d = {} 41 | d['X'] = torch.tensor(X) 42 | d['Y'] = torch.tensor(Y) 43 | pickle.dump(d,open(f"synthetic1_dataset_10000_points_5_dim_grad_dimredux_{k}_directions.pickle","wb")) 44 | 45 | -------------------------------------------------------------------------------- /experiments/GNN_bo/gcn_sgd.sub: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J basic # Job name 3 | #SBATCH -o ../../slurm_output/job_%j.out # Name of stdout output file(%j expands to jobId) 4 | #SBATCH -e ../../slurm_output/job_%j.err # Name of stderr output file(%j expands to jobId) 5 | #SBATCH --get-user-env # Tells sbatch to retrieve the users login environment 6 | #SBATCH -N 1 # Total number of nodes requested 7 | #SBATCH -n 16 # Total number of cores requested 8 | #SBATCH --mem=32G # Total amount of (real) memory requested (per node) 9 | #SBATCH -t 168:00:00 # Time limit (hh:mm:ss) 10 | #SBATCH --partition=default_partition # Request partition for resource 11 | ##SBATCH --exclude=marschner-compute01 # Request partition for resource 12 | #SBATCH --exclude=joachims-compute-01,sablab-gpu-11 13 | #SBATCH --gres=gpu:1 # Specify a list of generic consumable resources (per node) 14 | 15 | 16 | . /home/xz584/anaconda3/etc/profile.d/conda.sh 17 | conda activate DSVGP2 18 | 19 | # exp setups 20 | dataset="PubMed" 21 | # watch_model=True 22 | exp_name=${1} 23 | seed=${2} 24 | turbo_max_evals=${3} 25 | # find runlogs in logs folder 26 | python3 -u gcn_sgd.py --dataset ${dataset} --exp_name ${exp_name} --seed ${seed} \ 27 | --turbo_max_evals ${turbo_max_evals} \ 28 | 2>&1 | tee runlogs/a.out_${dataset}_SGD_epoch${turbo_max_evals}_exp${exp_name} 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /experiments/rover/gradient_descent.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def gradient_descent(Loss,grad,x0,max_iter=1000,gtol=1e-3): 4 | # inital guess 5 | x_k = np.copy(x0) 6 | # initialize step size 7 | mu_k = 1e-2 8 | # minimum step size 9 | mu_min = 1e-10 10 | # compute gradient 11 | g_k = grad(x_k) 12 | # compute function value 13 | f_k = Loss(x_k) 14 | 15 | # storage 16 | dim = len(x0) 17 | X = np.zeros((1,dim)) 18 | X[0] = np.copy(x0) 19 | 20 | # stop when gradient is flat (within tolerance) 21 | nn = 0 22 | while np.linalg.norm(g_k) > gtol and nn < max_iter: 23 | if nn%1 == 0: 24 | print(nn,f_k) 25 | # double the step size to counter backtracking 26 | mu_k = 2*mu_k; 27 | 28 | # compute step 29 | x_kp1 = x_k -mu_k*g_k; 30 | f_kp1 = Loss(x_kp1); 31 | 32 | # backtracking to find step size 33 | while f_kp1 >= f_k: 34 | # half our step size 35 | mu_k = mu_k /2 ; 36 | # take step 37 | x_kp1 = x_k -mu_k*g_k; 38 | # f_kp1 39 | f_kp1 = Loss(x_kp1); 40 | 41 | # break if mu is too small 42 | if mu_k <= mu_min: 43 | print('ERROR: mu too small.') 44 | return x_k,X 45 | 46 | # reset for next iteration 47 | x_k = np.copy(x_kp1) 48 | f_k = f_kp1; 49 | 50 | # compute gradient 51 | g_k = grad(x_k); 52 | 53 | # update iteration counter 54 | nn += 1 55 | X = np.copy(np.vstack((X,x_k))) 56 | 57 | return x_k,X 58 | 59 | 60 | if __name__ == '__main__': 61 | f = lambda x: x @ x 62 | g = lambda x: 2*x 63 | dim = 2 64 | x0 = 10*np.random.randn(dim) 65 | xopt,X = gradient_descent(f,g,x0,max_iter=200,gtol=1e-7) 66 | print(xopt) 67 | print(X) 68 | -------------------------------------------------------------------------------- /directionalvi/README.md: -------------------------------------------------------------------------------- 1 | ## Main Methods for Variational GP computions 2 | This directory contains the main components of the methods for a variational GP with directional derivatives, as well as GPs with derivatives. 3 | Much of this functionality is, or soon will be, incorporated into GPyTorch. 4 | 5 | The RBF directional derivative kernel is `RBFKernelDirectionalGrad.py`. 6 | 7 | The following files are the main scripts to initialize and run a method. To run an instance of DSVGP you would only need to import `directional_vi.py`. See the tests directory for usability. 8 | - `directional_vi.py` contains the methods for initializing and running a variational GP with directional derivatives. 9 | - `shared_directional_vi.py` contains the main methods for running a variational GP with directional derivatives with shared inducing directions. 10 | - `dfree_directional_vi.py` contains the main methods for running a variational GP with directional derivatives on a dataset that does not have any derivative information. 11 | - `traditional_vi.py` runs a standard SVGP. 12 | - `grad_svgp.py` runs a multi-output SVGP with full derivative information. 13 | 14 | Variational Stategies are used for prediction in Variational GPs. 15 | - `DirectionalGradVariationalStrategy.py` is the workhorse variational strategy for GPs with directional derivatives. 16 | - `DFreeDirectionalGradVariationalStrategy.py` allows DSVGP and DPPGPR to train on data without derivative labels. 17 | - `SharedDirectionalGradVariationalStrategy.py` allows DSVGP and DPPGPR to use shared inducing directions. 18 | - `CiqDirectionalGradVariationalStrategy.py` allows DSVGP and DPPGPR to leverage contour integral quadrature. 19 | - `GradVariationalStrategy.py` is the variational strategy for a stochastic variational gaussian process with full derivative information. 20 | 21 | 22 | The `utils` directory contains useful helper functions. 23 | -------------------------------------------------------------------------------- /experiments/GNN_bo/turbo_utils.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | 12 | import numpy as np 13 | 14 | 15 | def to_unit_cube(x, lb, ub): 16 | """Project to [0, 1]^d from hypercube with bounds lb and ub""" 17 | assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2 18 | xx = (x - lb) / (ub - lb) 19 | return xx 20 | 21 | 22 | def from_unit_cube(x, lb, ub): 23 | """Project from [0, 1]^d to hypercube with bounds lb and ub""" 24 | assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2 25 | xx = x * (ub - lb) + lb 26 | return xx 27 | 28 | 29 | def latin_hypercube(n_pts, dim): 30 | """Basic Latin hypercube implementation with center perturbation.""" 31 | X = np.zeros((n_pts, dim)) 32 | centers = (1.0 + 2.0 * np.arange(0.0, n_pts)) / float(2 * n_pts) 33 | for i in range(dim): # Shuffle the center locataions for each dimension. 34 | X[:, i] = centers[np.random.permutation(n_pts)] 35 | 36 | # Add some perturbations within each box 37 | pert = np.random.uniform(-1.0, 1.0, (n_pts, dim)) / float(2 * n_pts) 38 | X += pert 39 | return X 40 | 41 | -------------------------------------------------------------------------------- /experiments/rover/turbo_utils.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | 12 | import numpy as np 13 | 14 | 15 | def to_unit_cube(x, lb, ub): 16 | """Project to [0, 1]^d from hypercube with bounds lb and ub""" 17 | assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2 18 | xx = (x - lb) / (ub - lb) 19 | return xx 20 | 21 | 22 | def from_unit_cube(x, lb, ub): 23 | """Project from [0, 1]^d to hypercube with bounds lb and ub""" 24 | assert np.all(lb < ub) and lb.ndim == 1 and ub.ndim == 1 and x.ndim == 2 25 | xx = x * (ub - lb) + lb 26 | return xx 27 | 28 | 29 | def latin_hypercube(n_pts, dim): 30 | """Basic Latin hypercube implementation with center perturbation.""" 31 | X = np.zeros((n_pts, dim)) 32 | centers = (1.0 + 2.0 * np.arange(0.0, n_pts)) / float(2 * n_pts) 33 | for i in range(dim): # Shuffle the center locataions for each dimension. 34 | X[:, i] = centers[np.random.permutation(n_pts)] 35 | 36 | # Add some perturbations within each box 37 | pert = np.random.uniform(-1.0, 1.0, (n_pts, dim)) / float(2 * n_pts) 38 | X += pert 39 | return X 40 | 41 | -------------------------------------------------------------------------------- /experiments/rover/rover.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from finite_difference import fdiff_jac 3 | 4 | def rover_dynamics(u,x0): 5 | m = 5 # mass 6 | h = 0.1 #deltat 7 | T = 100 # number of steps 8 | eta = 1.0 # friction coeff 9 | 10 | # state, control 11 | dim_s = 4 12 | dim_c = 2 13 | 14 | # dynamics 15 | A = np.array([[1,0,h,0],[0,1,0,h],[0,0,(1-eta*h/m),0],[0,0,0,(1-eta*h/m)]]) 16 | B = np.array([[0,0],[0,0],[h/m,0],[0,h/m]]) 17 | 18 | # state control (time is a row) 19 | x = np.zeros((T,dim_s)) 20 | 21 | # reshape the control 22 | u = np.reshape(u,(T,dim_c)) 23 | 24 | # initial condition 25 | x[0] = x0 26 | 27 | # dynamics 28 | # x_{t+1} = Ax_t + Bu_t for t=0,...,T-1 29 | for t in range(0,T-1): 30 | x[t+1] = A @ x[t] + B @ u[t] 31 | return x 32 | 33 | def rover_obj(u): 34 | """ 35 | The rover problem: 36 | The goal is to learn a controller to drive a rover through four 37 | waypoints. 38 | state: 4dim position, velocity 39 | control: 2dim x,y forces 40 | 41 | input: 42 | u: length 2T array, open-loop controller 43 | return: 44 | cost: float, cost associated with the controller 45 | """ 46 | assert len(u) == 200 47 | # initial condition 48 | x0 = np.array([5,20,0,0]) 49 | # compute dynamics 50 | x = rover_dynamics(u,x0) 51 | # waypoints 52 | W = np.array([[8,15,3,-4],[16,7,6,-4],[16,12,-6,-4],[0,0,0,0]]) 53 | way_times = (np.array([10,40,70,100]) - 1).astype(int) 54 | q1 = 1e0 # penalty on missing waypoint 55 | q2 = 1e-4 # penalty on control 56 | # compute cost 57 | cost = q1*np.sum((x[way_times] - W)**2) + q2*np.sum(u**2) 58 | 59 | return cost 60 | 61 | def rover_grad(u): 62 | assert len(u) == 200 63 | """finite difference gradient""" 64 | return fdiff_jac(rover_obj,u,h=1e-6) 65 | 66 | 67 | if __name__=="__main__": 68 | u = np.ones(200) 69 | print(rover_obj(u)) 70 | grad = rover_grad(u) 71 | u = u - 1e0*rover_grad(u) 72 | print(rover_obj(u)) 73 | u = u - 1e0*rover_grad(u) 74 | print(rover_obj(u)) 75 | -------------------------------------------------------------------------------- /experiments/rover/plot_traj_new.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os.path as osp 3 | import argparse 4 | import pickle 5 | from operator import itemgetter 6 | from argparse import Namespace 7 | import numpy as np 8 | import pandas as pd 9 | from matplotlib import pyplot as plt 10 | import matplotlib 11 | import pylab 12 | 13 | data = pickle.load(open("./data/rover_plot_data_p3.pickle", "rb")) 14 | 15 | FONTSIZE=20 16 | MARKERSIZE=20 17 | FIGURESUZE=(10,7) 18 | ALPHA=0.2 19 | LINEWIDTH=5 20 | PADDING=0.1 21 | 22 | style_dict = {"TuRBO": ["TuRBO", "dashed", '#ff7f0e'], 23 | "TuRBO-DPPGPR1": ["TuRBO-DPPGPR1", "solid", '#1f77b4'], 24 | "TuRBO-DPPGPR2": ["TuRBO-DPPGPR2", "solid", '#d62728'], 25 | "TuRBO-DPPGPR3": ["TuRBO-DPPGPR3", "solid", '#e377c2'], 26 | "BO-LCB": ["BO", "dashed", '#8c564b'], 27 | "Random Search": ["Random", "dotted", "#7f7f7f"] } 28 | 29 | N_method = len(data['labels']) 30 | 31 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=FIGURESUZE) 32 | for i in range(N_method): 33 | method = data['labels'][i] 34 | mean_data = data['means'][i] 35 | std_data = data['std'][i] 36 | ax.plot(mean_data, linewidth=LINEWIDTH, 37 | color=style_dict[method][2], 38 | label=style_dict[method][0], 39 | linestyle=style_dict[method][1]) 40 | ax.fill_between(range(len(mean_data)), 41 | mean_data+std_data, mean_data-std_data, 42 | color=style_dict[method][2], alpha=ALPHA) 43 | 44 | plt.ylim([200, 1200]) 45 | plt.yticks([300, 500, 700, 900, 1100], fontsize=FONTSIZE) 46 | plt.ylabel('Objective function value', fontsize=FONTSIZE) 47 | plt.xticks([0, 400, 800, 1200, 1600, 2000], fontsize=FONTSIZE) 48 | plt.xlabel("Number of evaluations", fontsize=FONTSIZE) 49 | plt.grid() 50 | plt.legend(fontsize=FONTSIZE-5) 51 | 52 | figurename = f"TuRBO_rover.pdf" 53 | figurepath = os.path.abspath(__file__ + "/../plots/" + figurename) 54 | fig.savefig(figurepath, bbox_inches='tight', pad_inches = PADDING) 55 | print("Figure saved:", figurepath) 56 | -------------------------------------------------------------------------------- /directionalvi/utils/test load helens.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from load_data import *" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 5, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "args= {'seed': 123, 'n_train': 700, 'derivative': True}" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 6, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "res = load_helens(\"../../data/helens\", 0.5, **args)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 7, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "(,\n", 39 | " ,\n", 40 | " 2)" 41 | ] 42 | }, 43 | "execution_count": 7, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "res" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 12, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "700\n", 62 | "2700\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "print(len(res[0]))\n", 68 | "print(len(res[1]))" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.7.8" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 4 93 | } 94 | -------------------------------------------------------------------------------- /directionalvi/utils/.ipynb_checkpoints/test load helens-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from load_data import *" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 5, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "args= {'seed': 123, 'n_train': 700, 'derivative': True}" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 6, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "res = load_helens(\"../../data/helens\", 0.5, **args)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 7, 33 | "metadata": {}, 34 | "outputs": [ 35 | { 36 | "data": { 37 | "text/plain": [ 38 | "(,\n", 39 | " ,\n", 40 | " 2)" 41 | ] 42 | }, 43 | "execution_count": 7, 44 | "metadata": {}, 45 | "output_type": "execute_result" 46 | } 47 | ], 48 | "source": [ 49 | "res" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 12, 55 | "metadata": {}, 56 | "outputs": [ 57 | { 58 | "name": "stdout", 59 | "output_type": "stream", 60 | "text": [ 61 | "700\n", 62 | "2700\n" 63 | ] 64 | } 65 | ], 66 | "source": [ 67 | "print(len(res[0]))\n", 68 | "print(len(res[1]))" 69 | ] 70 | } 71 | ], 72 | "metadata": { 73 | "kernelspec": { 74 | "display_name": "Python 3", 75 | "language": "python", 76 | "name": "python3" 77 | }, 78 | "language_info": { 79 | "codemirror_mode": { 80 | "name": "ipython", 81 | "version": 3 82 | }, 83 | "file_extension": ".py", 84 | "mimetype": "text/x-python", 85 | "name": "python", 86 | "nbconvert_exporter": "python", 87 | "pygments_lexer": "ipython3", 88 | "version": "3.7.8" 89 | } 90 | }, 91 | "nbformat": 4, 92 | "nbformat_minor": 4 93 | } 94 | -------------------------------------------------------------------------------- /experiments/uci_dfree/plot_nll.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import pandas as pd 4 | import pickle 5 | import numpy as np 6 | import glob 7 | 8 | # read the data 9 | #data_files = glob.glob("./output/elevators/data*.pickle") 10 | #data_files = glob.glob("./output/kin40k/data*.pickle") 11 | #data_files = glob.glob("./output/energy/data*.pickle") 12 | #data_files = glob.glob("./output/protein/data*.pickle") 13 | data_files = glob.glob("./output/keggdirected/data*.pickle") 14 | 15 | data = [] 16 | for ff in data_files: 17 | # attributes 18 | attrib = {} 19 | # load 20 | d = pickle.load(open(ff, "rb")) 21 | attrib['mode']= d['mode'] 22 | attrib['ni'] = d['num_inducing'] 23 | if d['mode'] == 'SVGP': 24 | d['num_directions']= 0 25 | if d['mode'] == 'SVGP' and d['mll_type'] == "PLL": 26 | d['mode'] = 'PPGPR' 27 | elif d['mode'] == 'DSVGP' and d['mll_type'] == "PLL": 28 | d['mode'] = 'DPPGPR' 29 | attrib['nd'] = d['num_directions'] 30 | attrib['M'] = d['num_inducing']*(d['num_directions']+1) 31 | attrib['nll'] = d['test_nll'].item() 32 | attrib['rmse'] = np.sqrt(d['test_mse'].item()) 33 | attrib['test_time'] = d['test_time'] 34 | attrib['train_time'] = d['train_time'] 35 | # add an indicator attribute for plotting 36 | attrib['run'] = d['mode'] + str(d['num_directions']) 37 | data.append(attrib) 38 | 39 | # make a pandas df 40 | df = pd.DataFrame.from_dict(data,orient='columns') 41 | print(df) 42 | 43 | # compute the mean and standard error 44 | errs = df.groupby(['run','M']).sem() 45 | avgs = df.groupby(['run','M']).mean() 46 | print("\nMeans") 47 | print(avgs[['nll','rmse']]) 48 | print("\nStandard Errors") 49 | print(errs[['nll','rmse']]) 50 | 51 | # plot 52 | sns.set() 53 | sns.lineplot(x='M',y='nll',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=3,data=df) 54 | plt.title("NLL vs Inducing Matrix size") 55 | plt.ylabel("NLL") 56 | plt.xlabel("Inducing Matrix Size") 57 | plt.show() 58 | 59 | sns.lineplot(x='M',y='rmse',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=3,data=df) 60 | plt.title("RMSE vs Inducing Matrix size") 61 | plt.ylabel("RMSE") 62 | plt.xlabel("Inducing Matrix Size") 63 | plt.show() 64 | 65 | -------------------------------------------------------------------------------- /experiments/bunny/ImplicitBunny/readObj.m: -------------------------------------------------------------------------------- 1 | function obj = readObj(fname) 2 | % 3 | % obj = readObj(fname) 4 | % 5 | % This function parses wavefront object data 6 | % It reads the mesh vertices, texture coordinates, normal coordinates 7 | % and face definitions(grouped by number of vertices) in a .obj file 8 | % 9 | % 10 | % INPUT: fname - wavefront object file full path 11 | % 12 | % OUTPUT: obj.v - mesh vertices 13 | % : obj.vt - texture coordinates 14 | % : obj.vn - normal coordinates 15 | % : obj.f - face definition assuming faces are made of of 3 vertices 16 | % 17 | % Bernard Abayowa, Tec^Edge 18 | % 11/8/07 19 | 20 | % set up field types 21 | v = []; vt = []; vn = []; f.v = []; f.vt = []; f.vn = []; 22 | 23 | fid = fopen(fname); 24 | 25 | % parse .obj file 26 | while 1 27 | tline = fgetl(fid); 28 | if ~ischar(tline), break, end % exit at end of file 29 | ln = sscanf(tline,'%s',1); % line type 30 | %disp(ln) 31 | switch ln 32 | case 'v' % mesh vertexs 33 | v = [v; sscanf(tline(2:end),'%f')']; 34 | case 'vt' % texture coordinate 35 | vt = [vt; sscanf(tline(3:end),'%f')']; 36 | case 'vn' % normal coordinate 37 | vn = [vn; sscanf(tline(3:end),'%f')']; 38 | case 'f' % face definition 39 | fv = []; fvt = []; fvn = []; 40 | str = textscan(tline(2:end),'%s'); str = str{1}; 41 | 42 | nf = length(findstr(str{1},'/')); % number of fields with this face vertices 43 | 44 | 45 | [tok str] = strtok(str,'//'); % vertex only 46 | for k = 1:length(tok) fv = [fv str2num(tok{k})]; end 47 | 48 | if (nf > 0) 49 | [tok str] = strtok(str,'//'); % add texture coordinates 50 | for k = 1:length(tok) fvt = [fvt str2num(tok{k})]; end 51 | end 52 | if (nf > 1) 53 | [tok str] = strtok(str,'//'); % add normal coordinates 54 | for k = 1:length(tok) fvn = [fvn str2num(tok{k})]; end 55 | end 56 | f.v = [f.v; fv]; f.vt = [f.vt; fvt]; f.vn = [f.vn; fvn]; 57 | end 58 | end 59 | fclose(fid); 60 | 61 | % set up matlab object 62 | obj.v = v; obj.vt = vt; obj.vn = vn; obj.f = f; 63 | -------------------------------------------------------------------------------- /experiments/bunny/bunny.sub: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J basic # Job name 3 | #SBATCH -o ../../slurm_output/job_%j.out # Name of stdout output file(%j expands to jobId) 4 | #SBATCH -e ../../slurm_output/job_%j.err # Name of stderr output file(%j expands to jobId) 5 | #SBATCH --get-user-env # Tells sbatch to retrieve the users login environment 6 | #SBATCH -N 1 # Total number of nodes requested 7 | #SBATCH -n 16 # Total number of cores requested 8 | #SBATCH --mem=32G # Total amount of (real) memory requested (per node) 9 | #SBATCH -t 168:00:00 # Time limit (hh:mm:ss) 10 | #SBATCH --partition=default_partition # Request partition for resource 11 | ##SBATCH --exclude=marschner-compute01 # Request partition for resource 12 | #SBATCH --exclude=joachims-compute-01,sablab-gpu-11 13 | #SBATCH --gres=gpu:1 # Specify a list of generic consumable resources (per node) 14 | 15 | 16 | . /home/xz584/anaconda3/etc/profile.d/conda.sh 17 | conda activate DSVGP 18 | dataset="real-bunny" 19 | 20 | # exp setups 21 | # fix some setups for this dataset 22 | batch_size=500 23 | watch_model=True 24 | # read other arguments from command line when sbatch this job 25 | model=DSVGP 26 | variational_strategy='CIQ' 27 | variational_distribution='standard' 28 | num_inducing=${1} 29 | num_directions=${2} 30 | num_epochs=${3} 31 | exp_name=${4} 32 | lr=${5} 33 | lr_ngd=0.1 34 | num_contour_quad=15 35 | seed=${6} 36 | lr_sched=${7} 37 | save_results=True 38 | mll_type='ELBO' 39 | gamma=${8} 40 | # compare different methods, comment out the chunk if not comparing with this method 41 | # find runlogs in logs folder 42 | python -u exp_bunny.py --dataset ${dataset} --variational_strategy ${variational_strategy} \ 43 | --variational_distribution ${variational_distribution} \ 44 | --num_inducing ${num_inducing} \ 45 | --num_directions ${num_directions} --num_epochs ${num_epochs} \ 46 | --batch_size ${batch_size} --model ${model} \ 47 | --lr ${lr} --lr_ngd ${lr_ngd} --num_contour_quad ${num_contour_quad} \ 48 | --watch_model ${watch_model} \ 49 | --exp_name ${exp_name} --seed ${seed} --lr_sched ${lr_sched} --save_results ${save_results} \ 50 | --mll_type ${mll_type} --gamma ${gamma} 51 | 52 | 53 | 54 | -------------------------------------------------------------------------------- /experiments/stellarator_regression/plot_nll.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import pandas as pd 4 | import pickle 5 | import numpy as np 6 | import glob 7 | 8 | # read the data 9 | data_files = glob.glob("./output/data_stell_regress_*.pickle") 10 | 11 | plt.figure(figsize=(10,10)) 12 | 13 | data = [] 14 | for ff in data_files: 15 | # attributes 16 | attrib = {} 17 | # load 18 | d = pickle.load(open(ff, "rb")) 19 | attrib['mode']= d['mode'] 20 | attrib['ni'] = d['num_inducing'] 21 | if d['mode'] == 'SVGP': 22 | d['num_directions']= 0 23 | attrib['nd'] = d['num_directions'] 24 | attrib['M'] = d['num_inducing']*(d['num_directions']+1) 25 | attrib['nll'] = d['test_nll'].item() 26 | attrib['rmse'] = np.sqrt(d['test_mse'].item()) 27 | attrib['test_time'] = d['test_time'] 28 | attrib['train_time'] = d['train_time'] 29 | if d['mode'] == 'SVGP' and d['mll_type'] == 'PLL': 30 | d['mode'] = "PPGPR" 31 | elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL': 32 | d['mode'] = "DPPGPR" 33 | elif d['mode'] == 'GradSVGP' and d['mll_type'] == 'PLL': 34 | d['mode'] = "GradPPGPR" 35 | elif d['mode'] == 'DSVGP-Shared' and d['mll_type'] == 'PLL': 36 | d['mode'] = "DPPGPR-Shared" 37 | if "D" in d['mode'] or "Grad" in d['mode']: 38 | attrib['run'] = d['mode'] + str(d['num_directions']) 39 | else: 40 | attrib['run'] = d['mode'] 41 | data.append(attrib) 42 | # make a pandas df 43 | df = pd.DataFrame.from_dict(data,orient='columns') 44 | #df = df[df['M'] > 400] 45 | print(df) 46 | pd.to_pickle(df,"./stellarator_plot_data.pickle") 47 | # compute means 48 | avgs = df.groupby(['run','M']).mean() 49 | print("\nMeans") 50 | print(avgs[['nll','rmse']]) 51 | 52 | # plot 53 | rc = {'figure.figsize':(10,5), 54 | 'axes.facecolor':'white', 55 | 'axes.grid' : True, 56 | 'grid.color': '.8', 57 | 'font.family':'Times New Roman', 58 | 'font.size' : 15} 59 | plt.rcParams.update(rc) 60 | #sns.set() 61 | #sns.set_style("whitegrid") 62 | #sns.set_context("paper", font_scale=2.0) 63 | sns.lineplot(x='M',y='nll',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df) 64 | #sns.lineplot(x='M',y='rmse',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df) 65 | plt.title("NLL vs Inducing Matrix size") 66 | plt.ylabel("NLL") 67 | plt.xlabel("Inducing Matrix Size") 68 | plt.legend(loc=1) 69 | plt.show() 70 | 71 | -------------------------------------------------------------------------------- /experiments/GNN_bo/gcn.sub: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J basic # Job name 3 | #SBATCH -o ../../slurm_output/job_%j.out # Name of stdout output file(%j expands to jobId) 4 | #SBATCH -e ../../slurm_output/job_%j.err # Name of stderr output file(%j expands to jobId) 5 | #SBATCH --get-user-env # Tells sbatch to retrieve the users login environment 6 | #SBATCH -N 1 # Total number of nodes requested 7 | #SBATCH -n 16 # Total number of cores requested 8 | #SBATCH --mem=32G # Total amount of (real) memory requested (per node) 9 | #SBATCH -t 168:00:00 # Time limit (hh:mm:ss) 10 | #SBATCH --partition=default_partition # Request partition for resource 11 | ##SBATCH --exclude=marschner-compute01 # Request partition for resource 12 | #SBATCH --exclude=joachims-compute-01,sablab-gpu-11 13 | #SBATCH --gres=gpu:1 # Specify a list of generic consumable resources (per node) 14 | 15 | 16 | . /home/xz584/anaconda3/etc/profile.d/conda.sh 17 | conda activate DSVGP2 18 | 19 | # exp setups 20 | dataset="PubMed" 21 | batch_size=500 22 | # watch_model=True 23 | model=${11} 24 | variational_strategy='standard' 25 | variational_distribution='standard' 26 | num_inducing=${1} 27 | num_directions=${2} 28 | num_epochs=${3} 29 | exp_name=${4} 30 | lr=${5} 31 | lr_ngd=0.1 32 | num_contour_quad=15 33 | seed=${6} 34 | lr_sched=${7} 35 | mll_type=${12} 36 | gamma=${8} 37 | turbo_batch_size=${9} 38 | turbo_max_evals=${10} 39 | # find runlogs in logs folder 40 | python -u gcn_turbo.py --dataset ${dataset} --variational_strategy ${variational_strategy} \ 41 | --variational_distribution ${variational_distribution} \ 42 | --num_inducing ${num_inducing} \ 43 | --num_directions ${num_directions} --num_epochs ${num_epochs} \ 44 | --batch_size ${batch_size} --model ${model} \ 45 | --lr ${lr} --lr_ngd ${lr_ngd} --num_contour_quad ${num_contour_quad} \ 46 | --exp_name ${exp_name} --seed ${seed} --lr_sched ${lr_sched} \ 47 | --mll_type ${mll_type} --gamma ${gamma} \ 48 | --turbo_batch_size ${turbo_batch_size} --turbo_max_evals ${turbo_max_evals} \ 49 | 2>&1 | tee runlogs/a.out_${dataset}_${model}_m${num_inducing}_p${num_directions}_epoch${num_epochs}_turboN${turbo_max_evals}_turbo_bs${turbo_batch_size}_exp${exp_name} 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /experiments/synthetic/exp_run.sh: -------------------------------------------------------------------------------- 1 | dataset="synthetic-StyblinskiTang" # synthetic/real - dataset name 2 | # dataset="real-helens" 3 | 4 | # exp setups 5 | n_train=10000 6 | n_test=10000 7 | num_inducing=500 8 | num_directions=2 9 | num_epochs=1000 10 | batch_size=512 11 | lr=0.01 12 | lr_ngd=0.1 13 | num_contour_quad=15 14 | watch_model=False 15 | exp_name="TEST" 16 | seed=0 17 | lr_sched="step_lr" 18 | save_results=False 19 | mll_type="ELBO" 20 | 21 | # compare different methods, comment out the chunk if not comparing with this method 22 | # find runlogs in logs folder 23 | 24 | model="DSVGP" 25 | variational_strategy="standard" 26 | variational_distribution="standard" 27 | sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \ 28 | ${n_train} ${n_test} ${num_inducing} \ 29 | ${num_directions} ${num_epochs} ${batch_size} ${model} \ 30 | ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \ 31 | ${seed} ${lr_sched} ${save_results} ${mll_type} 32 | 33 | # model="DSVGP" 34 | # variational_strategy="standard" 35 | # variational_distribution="NGD" 36 | # sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \ 37 | # ${n_train} ${n_test} ${num_inducing} \ 38 | # ${num_directions} ${num_epochs} ${batch_size} ${model} \ 39 | # ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \ 40 | # ${seed} ${lr_sched} ${save_results} ${mll_type} 41 | 42 | # model="DSVGP" 43 | # variational_strategy="CIQ" 44 | # variational_distribution="NGD" 45 | # sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \ 46 | # ${n_train} ${n_test} ${num_inducing} \ 47 | # ${num_directions} ${num_epochs} ${batch_size} ${model} \ 48 | # ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \ 49 | # ${seed} ${lr_sched} ${save_results} ${mll_type} 50 | 51 | # for traditional SVGP, 52 | # variational_strategy and variational_distribution don't matter, but need to pass in them. 53 | # model="SVGP" 54 | # variational_strategy="standard" 55 | # variational_distribution="standard" 56 | # sh ./exp_setup.sh ${dataset} ${variational_strategy} ${variational_distribution} \ 57 | # ${n_train} ${n_test} ${num_inducing} \ 58 | # ${num_directions} ${num_epochs} ${batch_size} ${model} \ 59 | # ${lr} ${lr_ngd} ${num_contour_quad} ${watch_model} ${exp_name} \ 60 | # ${seed} ${lr_sched} ${save_results} ${mll_type} 61 | -------------------------------------------------------------------------------- /experiments/synthetic1/plot_nll.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import seaborn as sns 3 | import pandas as pd 4 | import pickle 5 | import numpy as np 6 | import glob 7 | 8 | # read the data 9 | data_files = glob.glob("./output/data*.pickle") 10 | 11 | data = [] 12 | for ff in data_files: 13 | # attributes 14 | attrib = {} 15 | # load 16 | d = pickle.load(open(ff, "rb")) 17 | attrib['mode']= d['mode'] 18 | if d['mode'] == "ExactGradGP": 19 | attrib['mode'] = d['mode'] 20 | attrib['run'] = d['mode'] 21 | attrib['M'] = d['M'] 22 | attrib['nll'] = d['test_nll'].item() 23 | attrib['rmse'] = np.sqrt(d['test_mse'].item()) 24 | print(f"ExactGradGP nll: {d['test_nll'].item()}, rmse: {np.sqrt(d['test_mse'].item())}") 25 | # dont plot ExactGradGP 26 | data.append(attrib) 27 | continue 28 | 29 | attrib['ni'] = d['num_inducing'] 30 | if d['mode'] == 'SVGP': 31 | d['num_directions']= 0 32 | attrib['nd'] = d['num_directions'] 33 | attrib['M'] = d['num_inducing']*(d['num_directions']+1) 34 | attrib['nll'] = d['test_nll'].item() 35 | attrib['rmse'] = np.sqrt(d['test_mse'].item()) 36 | attrib['test_time'] = d['test_time'] 37 | attrib['train_time'] = d['train_time'] 38 | if d['mode'] == 'SVGP' and d['mll_type'] == 'PLL': 39 | d['mode'] = "PPGPR" 40 | elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL': 41 | d['mode'] = "DPPGPR" 42 | elif d['mode'] == 'GradSVGP' and d['mll_type'] == 'PLL': 43 | d['mode'] = "GradPPGPR" 44 | if "D" in d['mode'] or "Grad" in d['mode']: 45 | attrib['run'] = d['mode'] + str(d['num_directions']) 46 | else: 47 | attrib['run'] = d['mode'] 48 | 49 | # reduce points 50 | #if not np.any(np.isclose(attrib['M'],[800],atol=10)): 51 | # continue 52 | if not np.any(np.isclose(attrib['M'],[200,400,800,1200],atol=10)): 53 | continue 54 | ## reduce methods 55 | if not attrib['run'] in ['SVGP','PPGPR','GradSVGP5','GradPPGPR5','DSVGP2','DPPGPR2','DSKI','ExactGradGP']: 56 | continue 57 | data.append(attrib) 58 | # make a pandas df 59 | df = pd.DataFrame.from_dict(data,orient='columns') 60 | #pd.to_pickle(df,"sin5_plot_data.pickle") 61 | #df = df[df['run']!='GradSVGP3'] 62 | print(df) 63 | 64 | # plot 65 | rc = {'figure.figsize':(10,5), 66 | 'axes.facecolor':'white', 67 | 'axes.grid' : True, 68 | 'grid.color': '.8', 69 | 'font.family':'Times New Roman', 70 | 'font.size' : 15} 71 | plt.rcParams.update(rc) 72 | sns.lineplot(x='M',y='nll',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df) 73 | #sns.lineplot(x='M',y='rmse',hue='run',style='run',palette='colorblind',err_style='band',markers=True,dashes=False,linewidth=5,markersize=12,data=df) 74 | plt.title("NLL vs Inducing Matrix size") 75 | plt.ylabel("NLL") 76 | plt.xlabel("Inducing Matrix Size") 77 | plt.show() 78 | 79 | -------------------------------------------------------------------------------- /directionalvi/utils/csv_dataset.py: -------------------------------------------------------------------------------- 1 | 2 | import torch 3 | from torch.utils.data import Dataset 4 | import numpy as np 5 | import pandas as pd 6 | 7 | class csv_dataset(Dataset): 8 | """Reads a CSV dataset on the fly 9 | """ 10 | def __init__(self,csv_file,gradients=True,rescale=False): 11 | """ 12 | Args: 13 | csv_file (string): csv file name containing data 14 | rows have header x0,x1,...xd,f,g0,...,gd 15 | xi is ith x index 16 | f is function value 17 | gi is ith g index 18 | """ 19 | self.df = pd.read_csv(csv_file) 20 | # x indexes 21 | self.xidx = np.where(['x' in ci for ci in self.df.columns])[0] 22 | # function values 23 | self.fidx = np.where(['f' in ci for ci in self.df.columns])[0] 24 | # gradient indexes 25 | self.gidx = np.where(['g' in ci for ci in self.df.columns])[0] 26 | # combined f and g indexes with f first 27 | self.fgidx = np.concatenate((self.fidx,self.gidx)) 28 | #print stuff 29 | print(self.xidx) 30 | print(self.fidx) 31 | 32 | # gradients option 33 | self.gradients = gradients 34 | # map to unit cube 35 | self.rescale = rescale 36 | # bounds for rescaling 37 | self.lb = torch.tensor(self.df.iloc[:,self.xidx].min(axis=0).to_numpy()).float() 38 | self.ub = torch.tensor(self.df.iloc[:,self.xidx].max(axis=0).to_numpy()).float() 39 | # mean and std of f 40 | self.favg = self.df.iloc[:,self.fidx].mean().to_numpy()[0] 41 | self.fstd = self.df.iloc[:,self.fidx].std().to_numpy()[0] 42 | 43 | self.dim = len(self.xidx) 44 | self.n = self.df.shape[0] 45 | 46 | def __len__(self): 47 | return len(self.df) 48 | 49 | def __getitem__(self, idx): 50 | """ 51 | return: a tuple of torch tensors (x,y) 52 | x is a 2d-tensor of type float 53 | y is a 1d-tensor containing function value then the gradient 54 | """ 55 | if torch.is_tensor(idx): 56 | idx = idx.tolist() 57 | # get the row 58 | sample = self.df.iloc[idx].to_numpy() 59 | # return a tuple of tensors (x,[f(x),g(x)]) 60 | x = torch.tensor(sample[self.xidx]).float() # x must be dtype float 61 | if self.gradients: 62 | y = torch.tensor(sample[self.fgidx]) 63 | if self.rescale: 64 | # map x to unit cube 65 | x = (x-self.lb)/(self.ub - self.lb) 66 | # standardize function values (f-mu)/sigma 67 | y[0] = (y[0] - self.favg)/self.fstd 68 | # scale gradients appropriately 69 | y[1:] =y[1:]*(self.ub - self.lb)/self.fstd 70 | else: 71 | y = sample[self.fidx][0] 72 | if self.rescale: 73 | # map x to unit cube 74 | x = (x-self.lb)/(self.ub - self.lb) 75 | # standardize function values (f-mu)/sigma 76 | y = (y - self.favg)/self.fstd 77 | return (x,y) 78 | 79 | -------------------------------------------------------------------------------- /experiments/rover/plot_traj.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | import matplotlib.pylab as pl 3 | import seaborn as sns 4 | import pandas as pd 5 | import pickle 6 | import numpy as np 7 | import glob 8 | 9 | # read the data 10 | data_files = glob.glob("./output/data*.pickle") 11 | colors = pl.cm.jet(np.linspace(0,1,len(data_files))) 12 | 13 | rc = {'figure.figsize':(10,5), 14 | 'axes.facecolor':'white', 15 | 'axes.grid' : True, 16 | 'grid.color': '.8', 17 | 'font.family':'Times New Roman', 18 | 'font.size' : 20} 19 | plt.rcParams.update(rc) 20 | plt.figure(figsize=(10,10)) 21 | 22 | num_curves = 6 23 | means = np.zeros((num_curves,2000)) 24 | std = np.zeros((num_curves,2000)) 25 | n_type = np.zeros(num_curves) 26 | data = [] 27 | for ii in range(len(data_files)): 28 | ff = data_files[ii] 29 | # attributes 30 | attrib = {} 31 | # load 32 | d = pickle.load(open(ff, "rb")) 33 | if d['mode'] == 'Vanilla': 34 | label = "TuRBO" 35 | elif d['mode'] == 'DSVGP' and d['mll_type'] == 'PLL': 36 | M = d['num_inducing']*(d['num_directions']+1) 37 | label = f"TuRBO-DPPGPR{d['num_directions']}" 38 | elif d['mode'] == "GD": 39 | label= d['mode'] 40 | # minimum function values 41 | fX = d['fX'] 42 | fXmin = np.minimum.accumulate(fX) 43 | # accumulate means 44 | if d['mode'] == "DSVGP" and d['num_directions']==1: 45 | means[0] += fXmin 46 | std[0] += fXmin**2 47 | n_type[0] += 1 48 | if d['mode'] == "Vanilla": 49 | means[1] += fXmin 50 | std[1] += fXmin**2 51 | n_type[1] += 1 52 | if d['mode'] == "Random Search": 53 | means[2] += fXmin 54 | std[2] += fXmin**2 55 | n_type[2] += 1 56 | if d['mode'] == "BO-LCB": 57 | means[3] += fXmin 58 | std[3] += fXmin**2 59 | n_type[3] += 1 60 | if d['mode'] == "DSVGP" and d['num_directions']==2: 61 | means[4] += fXmin 62 | std[4] += fXmin**2 63 | n_type[4] += 1 64 | if d['mode'] == "DSVGP" and d['num_directions']==3: 65 | means[5] += fXmin 66 | std[5] += fXmin**2 67 | n_type[5] += 1 68 | 69 | #plt.plot(fXmin,linewidth=5,markersize=12,color=colors[ii],label=label) 70 | 71 | means = np.diag(1/n_type) @ means 72 | std = np.sqrt(np.diag(1/n_type)@ std - means**2) 73 | labels =["TuRBO-DPPGPR1","TuRBO","Random Search","BO-LCB","TuRBO-DPPGPR2","TuRBO-DPPGPR3"] 74 | for ii,label in enumerate(labels): 75 | plt.plot(means[ii],linewidth=3,markersize=12,label=labels[ii]) 76 | plt.fill_between(np.arange(0,2000),means[ii]-std[ii], means[ii]+std[ii],alpha=0.7) 77 | 78 | print(means) 79 | print(std) 80 | print(labels) 81 | d = {} 82 | d['labels'] = labels 83 | d['means'] = means 84 | d['std'] = std 85 | pickle.dump(d,open("rover_plot_data.pickle","wb")) 86 | # plot 87 | #sns.set() 88 | #sns.set_style("whitegrid") 89 | #sns.set_context("paper", font_scale=1.5) 90 | plt.legend() 91 | plt.title("Optimization Convergence on Rover Problem") 92 | plt.yscale("log") 93 | plt.ylabel("f(x)") 94 | plt.xlabel("Evaluation") 95 | plt.show() 96 | 97 | 98 | 99 | -------------------------------------------------------------------------------- /experiments/GNN_bo/GCN/models2.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | from torch_geometric.nn import MessagePassing 4 | from torch_geometric.utils import add_self_loops, degree 5 | import networkx as nx 6 | 7 | 8 | # https://github.com/praxidike97/GraphNeuralNet/blob/master/main.py 9 | 10 | class GCNConv(MessagePassing): 11 | def __init__(self, in_channels, out_channels): 12 | super(GCNConv, self).__init__(aggr='add') 13 | self.lin = torch.nn.Linear(in_channels, out_channels) 14 | 15 | def forward(self, x, edge_index): 16 | # Step 1: Add self-loops 17 | edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0)) 18 | 19 | # Step 2: Multiply with weights 20 | x = self.lin(x) 21 | 22 | # Step 3: Calculate the normalization 23 | row, col = edge_index 24 | deg = degree(row, x.size(0), dtype=x.dtype) 25 | deg_inv_sqrt = deg.pow(-0.5) 26 | norm = deg_inv_sqrt[row] * deg_inv_sqrt[col] 27 | 28 | # Step 4: Propagate the embeddings to the next layer 29 | return self.propagate(edge_index, size=(x.size(0), x.size(0)), x=x, 30 | norm=norm) 31 | 32 | def message(self, x_j, norm): 33 | # Normalize node features. 34 | return norm.view(-1, 1) * x_j 35 | 36 | 37 | class Net(torch.nn.Module): 38 | def __init__(self, dataset): 39 | super(Net, self).__init__() 40 | self.conv1 = GCNConv(dataset.num_node_features, 8) 41 | self.conv2 = GCNConv(8, dataset.num_classes) 42 | self.n_params = sum(p.numel() for p in self.parameters()) 43 | 44 | def forward(self, data): 45 | x, edge_index = data.x, data.edge_index 46 | 47 | x = self.conv1(x, edge_index) 48 | x = F.relu(x) 49 | x = F.dropout(x, training=self.training) 50 | x = self.conv2(x, edge_index) 51 | 52 | return F.log_softmax(x, dim=1) 53 | 54 | def update_weights(self,weights): 55 | """ 56 | method to update the weights of the nn 57 | """ 58 | # dont track update in grad 59 | self.eval() 60 | 61 | # ordered keys of params 62 | state_dict = self.state_dict() 63 | keys = list(state_dict.keys()) 64 | keys.sort() # ensure we have the same order each time 65 | 66 | used_params = 0 67 | #for key in keys: 68 | for param in self.parameters(): 69 | # get the size and shape of the parameter 70 | #param_size = state_dict[key].numel() 71 | #param_shape = state_dict[key].shape 72 | param_size = param.numel() 73 | param_shape = param.shape 74 | new_params = weights[used_params:used_params+param_size].reshape(param_shape) 75 | # Update the parameter. 76 | #state_dict[key].copy_(new_params) 77 | param.data = new_params 78 | # counter 79 | used_params +=param_size 80 | 81 | def get_grad(self): 82 | grads = [] 83 | for param in self.parameters(): 84 | grads.append(param.grad.view(-1)) 85 | grads = torch.cat(grads) 86 | return grads 87 | -------------------------------------------------------------------------------- /experiments/GNN_bo/GCN/models.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.nn.parameter import Parameter 6 | from torch.nn.modules.module import Module 7 | 8 | class GCN(nn.Module): 9 | def __init__(self, nfeat, nhid, nclass, dropout): 10 | super(GCN, self).__init__() 11 | 12 | self.gc1 = GraphConvolution(nfeat, nhid) 13 | self.gc2 = GraphConvolution(nhid, nclass) 14 | self.dropout = dropout 15 | self.n_params = sum(p.numel() for p in self.parameters()) 16 | 17 | def forward(self, x, adj): 18 | x = F.relu(self.gc1(x, adj)) 19 | x = F.dropout(x, self.dropout, training=self.training) 20 | x = self.gc2(x, adj) 21 | return F.log_softmax(x, dim=1) 22 | 23 | def update_weights(self,weights): 24 | """ 25 | method to update the weights of the nn 26 | """ 27 | # dont track update in grad 28 | self.eval() 29 | 30 | # ordered keys of params 31 | state_dict = self.state_dict() 32 | keys = list(state_dict.keys()) 33 | keys.sort() # ensure we have the same order each time 34 | 35 | used_params = 0 36 | #for key in keys: 37 | for param in self.parameters(): 38 | # get the size and shape of the parameter 39 | #param_size = state_dict[key].numel() 40 | #param_shape = state_dict[key].shape 41 | param_size = param.numel() 42 | param_shape = param.shape 43 | new_params = weights[used_params:used_params+param_size].reshape(param_shape) 44 | # Update the parameter. 45 | #state_dict[key].copy_(new_params) 46 | param.data = new_params 47 | # counter 48 | used_params +=param_size 49 | 50 | def get_grad(self): 51 | grads = [] 52 | for param in self.parameters(): 53 | grads.append(param.grad.view(-1)) 54 | grads = torch.cat(grads) 55 | 56 | return grads 57 | 58 | class GraphConvolution(Module): 59 | """ 60 | Simple GCN layer, similar to https://arxiv.org/abs/1609.02907 61 | """ 62 | 63 | def __init__(self, in_features, out_features, bias=True): 64 | super(GraphConvolution, self).__init__() 65 | self.in_features = in_features 66 | self.out_features = out_features 67 | self.weight = Parameter(torch.FloatTensor(in_features, out_features)) 68 | if bias: 69 | self.bias = Parameter(torch.FloatTensor(out_features)) 70 | else: 71 | self.register_parameter('bias', None) 72 | self.reset_parameters() 73 | 74 | def reset_parameters(self): 75 | stdv = 1. / math.sqrt(self.weight.size(1)) 76 | self.weight.data.uniform_(-stdv, stdv) 77 | if self.bias is not None: 78 | self.bias.data.uniform_(-stdv, stdv) 79 | 80 | def forward(self, input, adj): 81 | support = torch.mm(input, self.weight) 82 | output = torch.spmm(adj, support) 83 | if self.bias is not None: 84 | return output + self.bias 85 | else: 86 | return output 87 | 88 | def __repr__(self): 89 | return self.__class__.__name__ + ' (' \ 90 | + str(self.in_features) + ' -> ' \ 91 | + str(self.out_features) + ')' 92 | -------------------------------------------------------------------------------- /tests/test_traditional_vi.py: -------------------------------------------------------------------------------- 1 | from gpytorch.models import ApproximateGP 2 | from gpytorch.variational import CholeskyVariationalDistribution 3 | from gpytorch.variational import VariationalStrategy 4 | from torch.utils.data import TensorDataset, DataLoader 5 | import tqdm 6 | import math 7 | import time 8 | import torch 9 | import gpytorch 10 | from matplotlib import pyplot as plt 11 | import numpy as np 12 | import sys 13 | sys.path.append("../") 14 | sys.path.append("../directionalvi/utils") 15 | sys.path.append("../directionalvi") 16 | from utils.metrics import MSE 17 | import traditional_vi 18 | import testfun 19 | 20 | 21 | # setups 22 | n = 600 23 | n_test = 1000 24 | dim = 2 25 | num_inducing = 20 26 | minibatch_size = int(n/2) 27 | num_epochs = 400 28 | use_ciq = False 29 | use_ngd = False 30 | learning_rate_hypers=0.01 31 | learning_rate_ngd=0.1 32 | # lr-schedule 33 | gamma = 10.0 34 | levels = np.array([20,150,300]) 35 | def lr_sched(epoch): 36 | a = np.sum(levels > epoch) 37 | return (1./gamma)**a 38 | 39 | # seed 40 | torch.random.manual_seed(0) 41 | 42 | # trainig and testing data 43 | train_x = torch.rand(n,dim) 44 | test_x = torch.rand(n_test,dim) 45 | train_y = testfun.f(train_x, deriv=False) 46 | test_y = testfun.f(test_x, deriv=False) 47 | if torch.cuda.is_available(): 48 | train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 49 | 50 | train_dataset = TensorDataset(train_x, train_y) 51 | test_dataset = TensorDataset(test_x, test_y) 52 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 53 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False) 54 | 55 | print("\n\n---Standard SVGP---") 56 | print(f"Start training with {n} trainig data of dim {dim}") 57 | print(f"VI setups: {num_inducing} inducing points") 58 | 59 | args={"verbose":True} 60 | 61 | # model training 62 | t1 = time.time_ns() 63 | model,likelihood = traditional_vi.train_gp(train_dataset,dim, 64 | num_inducing=num_inducing, 65 | minibatch_size=minibatch_size, 66 | num_epochs=num_epochs, 67 | use_ngd=use_ngd, 68 | use_ciq=use_ciq, 69 | learning_rate_hypers=learning_rate_hypers, 70 | learning_rate_ngd=learning_rate_ngd, 71 | lr_sched=lr_sched, 72 | tqdm=False, **args) 73 | t2 = time.time_ns() 74 | means, variances = traditional_vi.eval_gp(test_dataset,model,likelihood, 75 | num_inducing=num_inducing, 76 | minibatch_size=n_test) 77 | t3 = time.time_ns() 78 | 79 | # compute MSE 80 | test_mse = MSE(test_y.cpu(),means) 81 | # compute mean negative predictive density 82 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y.cpu()).mean() 83 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}") 84 | print(f"Training time: {(t2-t1)/1e9:.2f} sec, testing time: {(t3-t2)/1e9:.2f} sec") 85 | 86 | 87 | # from mpl_toolkits.mplot3d import axes3d 88 | # import matplotlib.pyplot as plt 89 | # fig = plt.figure(figsize=(12,6)) 90 | # ax = fig.add_subplot(111, projection='3d') 91 | # ax.scatter(train_x[:,0],train_x[:,1],train_y, color='k') 92 | # ax.scatter(train_x[:,0],train_x[:,1],means, color='b') 93 | # plt.title("f(x,y) variational fit; actual curve is black, variational is blue") 94 | # plt.show() 95 | -------------------------------------------------------------------------------- /graphite_environment.yml: -------------------------------------------------------------------------------- 1 | name: DSVGP 2 | channels: 3 | - pytorch 4 | - intel 5 | - defaults 6 | dependencies: 7 | - backcall=0.2.0=pyhd3eb1b0_0 8 | - blas=1.0=mkl 9 | - ca-certificates=2021.1.19=h06a4308_1 10 | - certifi=2020.12.5=py38h06a4308_0 11 | - cudatoolkit=11.0.221=h6bb024c_0 12 | - cycler=0.10.0=py38_0 13 | - dbus=1.13.18=hb2f20db_0 14 | - decorator=4.4.2=pyhd3eb1b0_0 15 | - expat=2.2.10=he6710b0_2 16 | - fontconfig=2.13.1=h6c09931_0 17 | - freetype=2.10.4=h9e62b58_0 18 | - glib=2.67.4=h36276a3_1 19 | - gst-plugins-base=1.14.0=h8213a91_2 20 | - gstreamer=1.14.0=h28cd5cc_2 21 | - icu=58.2=he6710b0_3 22 | - intel-openmp=2020.3=intel_304 23 | - intelpython=2021.1.1=1 24 | - ipykernel=5.3.4=py38h5ca1d4c_0 25 | - ipython=7.21.0=py38hb070fc8_0 26 | - ipython_genutils=0.2.0=pyhd3eb1b0_1 27 | - jedi=0.17.0=py38_0 28 | - jpeg=9b=h024ee3a_2 29 | - jupyter_client=6.1.7=py_0 30 | - jupyter_core=4.7.1=py38h06a4308_0 31 | - kiwisolver=1.3.1=py38h2531618_0 32 | - lcms2=2.11=h396b838_0 33 | - ld_impl_linux-64=2.33.1=h53a641e_7 34 | - libffi=3.3=h07ac4c1_13 35 | - libgcc-ng=9.3.0=hdf63c60_101 36 | - libgfortran-ng=7.3.0=hdf63c60_0 37 | - libpng=1.6.37=h17b3f18_7 38 | - libsodium=1.0.16=3 39 | - libstdcxx-ng=9.3.0=hdf63c60_101 40 | - libtiff=4.1.0=h2733197_1 41 | - libuuid=1.0.3=h1bed415_2 42 | - libuv=1.40.0=h7b6447c_0 43 | - libxcb=1.14=h7b6447c_0 44 | - libxml2=2.9.10=h9aba842_4 45 | - lz4-c=1.9.2=h7708b8d_3 46 | - matplotlib=3.3.4=py38h06a4308_0 47 | - matplotlib-base=3.3.4=py38h62a2d02_0 48 | - mkl=2020.4=intel_304 49 | - mkl-service=2.3.0=py38he904b0f_0 50 | - mkl_fft=1.3.0=py38h54f3939_0 51 | - mkl_random=1.1.1=py38h0573a6f_0 52 | - ncurses=6.2=he6710b0_1 53 | - ninja=1.10.2=py38hff7bd54_0 54 | - numpy=1.19.2=py38h54aff64_0 55 | - numpy-base=1.19.2=py38hfa32c7d_0 56 | - olefile=0.46=py_0 57 | - openssl=1.1.1j=h27cfd23_0 58 | - pandas=1.2.3=py38ha9443f7_0 59 | - parso=0.8.1=pyhd3eb1b0_0 60 | - pcre=8.44=he6710b0_0 61 | - pexpect=4.8.0=pyhd3eb1b0_3 62 | - pickleshare=0.7.5=pyhd3eb1b0_1003 63 | - pillow=8.1.2=py38he98fc37_0 64 | - pip=21.0.1=py38h06a4308_0 65 | - prompt-toolkit=3.0.8=py_0 66 | - ptyprocess=0.7.0=pyhd3eb1b0_2 67 | - pygments=2.8.1=pyhd3eb1b0_0 68 | - pyparsing=2.4.7=pyhd3eb1b0_0 69 | - pyqt=5.9.2=py38h05f1152_4 70 | - python=3.8.8=hdb3f193_4 71 | - python-dateutil=2.8.1=pyhd3eb1b0_0 72 | - pytorch=1.7.1=py3.8_cuda11.0.221_cudnn8.0.5_0 73 | - pytz=2021.1=pyhd3eb1b0_0 74 | - pyzmq=18.1.1=py38he6710b0_0 75 | - qt=5.9.7=h5867ecd_1 76 | - readline=8.1=h27cfd23_0 77 | - scipy=1.6.1=py38h91f5cce_0 78 | - setuptools=52.0.0=py38h06a4308_0 79 | - sip=4.19.13=py38he6710b0_0 80 | - six=1.15.0=py38h06a4308_0 81 | - sqlite=3.33.0=h88c068d_1 82 | - tbb=2020.3=intel_304 83 | - tk=8.6.10=hbc83047_0 84 | - torchaudio=0.7.2=py38 85 | - torchvision=0.8.2=py38_cu110 86 | - tornado=6.1=py38h27cfd23_0 87 | - tqdm=4.56.0=pyhd3eb1b0_0 88 | - traitlets=5.0.5=pyhd3eb1b0_0 89 | - typing_extensions=3.7.4.3=pyha847dfd_0 90 | - wcwidth=0.2.5=py_0 91 | - wheel=0.36.2=pyhd3eb1b0_0 92 | - xz=5.2.5=hcc43529_2 93 | - zeromq=4.3.1=he6710b0_3 94 | - zlib=1.2.11.1=hb8a9d29_3 95 | - zstd=1.4.5=hdb51d2f_0 96 | - pip: 97 | - apptools==5.1.0 98 | - botorch==0.4.0 99 | - configobj==5.0.6 100 | - envisage==5.0.0 101 | - gpytorch==1.4.0 102 | - importlib-metadata==3.7.3 103 | - importlib-resources==5.1.2 104 | - pyface==7.3.0 105 | - traits==6.2.0 106 | - traitsui==7.1.1 107 | - vtk==9.0.1 108 | - zipp==3.4.1 109 | prefix: /home/xz584/anaconda3/envs/DSVGP 110 | -------------------------------------------------------------------------------- /tests/test_grad_svgp.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import TensorDataset, DataLoader 2 | import tqdm 3 | import math 4 | import time 5 | import torch 6 | import gpytorch 7 | from matplotlib import pyplot as plt 8 | import numpy as np 9 | import sys 10 | sys.path.append("../") 11 | sys.path.append("../directionalvi/utils") 12 | sys.path.append("../directionalvi") 13 | from utils.metrics import MSE 14 | from grad_svgp import train_gp,eval_gp 15 | import testfun 16 | 17 | 18 | # setups 19 | n = 600 20 | n_test = 1000 21 | dim = 2 22 | num_inducing = 20 23 | minibatch_size = int(n/2) 24 | num_epochs = 400 25 | use_ciq = False 26 | use_ngd = False 27 | learning_rate_hypers=0.01 28 | learning_rate_ngd=0.1 29 | # lr-schedule 30 | gamma = 10.0 31 | levels = np.array([20,150,300]) 32 | def lr_sched(epoch): 33 | a = np.sum(levels > epoch) 34 | return (1./gamma)**a 35 | lr_sched=None 36 | mll_type="PLL" 37 | 38 | # seed 39 | torch.random.manual_seed(0) 40 | 41 | # trainig and testing data 42 | train_x = torch.rand(n,dim) 43 | test_x = torch.rand(n_test,dim) 44 | train_y = testfun.f(train_x, deriv=True) 45 | test_y = testfun.f(test_x, deriv=True) 46 | if torch.cuda.is_available(): 47 | train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 48 | 49 | train_dataset = TensorDataset(train_x, train_y) 50 | test_dataset = TensorDataset(test_x, test_y) 51 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 52 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False) 53 | 54 | print("\n\n---Standard SVGP---") 55 | print(f"Start training with {n} trainig data of dim {dim}") 56 | print(f"VI setups: {num_inducing} inducing points") 57 | 58 | args={"verbose":True} 59 | 60 | # model training 61 | t1 = time.time_ns() 62 | model,likelihood = train_gp(train_dataset,dim, 63 | num_inducing=num_inducing, 64 | minibatch_size=minibatch_size, 65 | num_epochs=num_epochs, 66 | use_ngd=use_ngd, 67 | use_ciq=use_ciq, 68 | learning_rate_hypers=learning_rate_hypers, 69 | learning_rate_ngd=learning_rate_ngd, 70 | lr_sched=lr_sched, 71 | tqdm=False, 72 | mll_type=mll_type, 73 | **args) 74 | t2 = time.time_ns() 75 | means, variances = eval_gp(test_dataset,model,likelihood, 76 | num_inducing=num_inducing, 77 | minibatch_size=n_test) 78 | t3 = time.time_ns() 79 | 80 | # compute MSE 81 | #test_mse = MSE(test_y.cpu(),means) 82 | test_mse = MSE(test_y[:,0],means[::dim+1]) 83 | # compute mean negative predictive density 84 | test_nll = -torch.distributions.Normal(means[::dim+1], variances.sqrt()[::dim+1]).log_prob(test_y[:,0]).mean() 85 | #test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y.cpu()).mean() 86 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}") 87 | print(f"Training time: {(t2-t1)/1e9:.2f} sec, testing time: {(t3-t2)/1e9:.2f} sec") 88 | 89 | 90 | # from mpl_toolkits.mplot3d import axes3d 91 | # import matplotlib.pyplot as plt 92 | # fig = plt.figure(figsize=(12,6)) 93 | # ax = fig.add_subplot(111, projection='3d') 94 | # ax.scatter(train_x[:,0],train_x[:,1],train_y, color='k') 95 | # ax.scatter(train_x[:,0],train_x[:,1],means, color='b') 96 | # plt.title("f(x,y) variational fit; actual curve is black, variational is blue") 97 | # plt.show() 98 | -------------------------------------------------------------------------------- /experiments/GNN_bo/gp.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | 12 | import math 13 | 14 | import gpytorch 15 | import numpy as np 16 | import torch 17 | from gpytorch.constraints.constraints import Interval 18 | from gpytorch.distributions import MultivariateNormal 19 | from gpytorch.kernels import MaternKernel, ScaleKernel 20 | from gpytorch.likelihoods import GaussianLikelihood 21 | from gpytorch.means import ConstantMean 22 | from gpytorch.mlls import ExactMarginalLogLikelihood 23 | from gpytorch.models import ExactGP 24 | 25 | 26 | # GP Model 27 | class GP(ExactGP): 28 | def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims): 29 | super(GP, self).__init__(train_x, train_y, likelihood) 30 | self.ard_dims = ard_dims 31 | self.mean_module = ConstantMean() 32 | base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=2.5) 33 | self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint) 34 | 35 | def forward(self, x): 36 | mean_x = self.mean_module(x) 37 | covar_x = self.covar_module(x) 38 | return MultivariateNormal(mean_x, covar_x) 39 | 40 | 41 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}): 42 | """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized.""" 43 | assert train_x.ndim == 2 44 | assert train_y.ndim == 1 45 | assert train_x.shape[0] == train_y.shape[0] 46 | 47 | # Create hyper parameter bounds 48 | noise_constraint = Interval(5e-4, 0.2) 49 | if use_ard: 50 | lengthscale_constraint = Interval(0.005, 2.0) 51 | else: 52 | lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1])) # [0.005, sqrt(dim)] 53 | outputscale_constraint = Interval(0.05, 20.0) 54 | 55 | # Create models 56 | likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype) 57 | ard_dims = train_x.shape[1] if use_ard else None 58 | model = GP( 59 | train_x=train_x, 60 | train_y=train_y, 61 | likelihood=likelihood, 62 | lengthscale_constraint=lengthscale_constraint, 63 | outputscale_constraint=outputscale_constraint, 64 | ard_dims=ard_dims, 65 | ).to(device=train_x.device, dtype=train_x.dtype) 66 | 67 | # Find optimal model hyperparameters 68 | model.train() 69 | likelihood.train() 70 | 71 | # "Loss" for GPs - the marginal log likelihood 72 | mll = ExactMarginalLogLikelihood(likelihood, model) 73 | 74 | # Initialize model hypers 75 | if hypers: 76 | model.load_state_dict(hypers) 77 | else: 78 | hypers = {} 79 | hypers["covar_module.outputscale"] = 1.0 80 | hypers["covar_module.base_kernel.lengthscale"] = 0.5 81 | hypers["likelihood.noise"] = 0.005 82 | model.initialize(**hypers) 83 | 84 | # Use the adam optimizer 85 | optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) 86 | 87 | for _ in range(num_steps): 88 | optimizer.zero_grad() 89 | output = model(train_x) 90 | loss = -mll(output, train_y) 91 | loss.backward() 92 | optimizer.step() 93 | 94 | # Switch to eval mode 95 | model.eval() 96 | likelihood.eval() 97 | 98 | return model -------------------------------------------------------------------------------- /experiments/rover/exact_gp.py: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Copyright (c) 2019 Uber Technologies, Inc. # 3 | # # 4 | # Licensed under the Uber Non-Commercial License (the "License"); # 5 | # you may not use this file except in compliance with the License. # 6 | # You may obtain a copy of the License at the root directory of this project. # 7 | # # 8 | # See the License for the specific language governing permissions and # 9 | # limitations under the License. # 10 | ############################################################################### 11 | 12 | import math 13 | 14 | import gpytorch 15 | import numpy as np 16 | import torch 17 | from gpytorch.constraints.constraints import Interval 18 | from gpytorch.distributions import MultivariateNormal 19 | from gpytorch.kernels import MaternKernel, ScaleKernel 20 | from gpytorch.likelihoods import GaussianLikelihood 21 | from gpytorch.means import ConstantMean 22 | from gpytorch.mlls import ExactMarginalLogLikelihood 23 | from gpytorch.models import ExactGP 24 | 25 | 26 | # GP Model 27 | class GP(ExactGP): 28 | def __init__(self, train_x, train_y, likelihood, lengthscale_constraint, outputscale_constraint, ard_dims): 29 | super(GP, self).__init__(train_x, train_y, likelihood) 30 | self.ard_dims = ard_dims 31 | self.mean_module = ConstantMean() 32 | base_kernel = MaternKernel(lengthscale_constraint=lengthscale_constraint, ard_num_dims=ard_dims, nu=2.5) 33 | self.covar_module = ScaleKernel(base_kernel, outputscale_constraint=outputscale_constraint) 34 | 35 | def forward(self, x): 36 | mean_x = self.mean_module(x) 37 | covar_x = self.covar_module(x) 38 | return MultivariateNormal(mean_x, covar_x) 39 | 40 | 41 | def train_gp(train_x, train_y, use_ard, num_steps, hypers={}): 42 | """Fit a GP model where train_x is in [0, 1]^d and train_y is standardized.""" 43 | assert train_x.ndim == 2 44 | assert train_y.ndim == 1 45 | assert train_x.shape[0] == train_y.shape[0] 46 | 47 | # Create hyper parameter bounds 48 | noise_constraint = Interval(5e-4, 0.2) 49 | if use_ard: 50 | lengthscale_constraint = Interval(0.005, 2.0) 51 | else: 52 | lengthscale_constraint = Interval(0.005, math.sqrt(train_x.shape[1])) # [0.005, sqrt(dim)] 53 | outputscale_constraint = Interval(0.05, 20.0) 54 | 55 | # Create models 56 | likelihood = GaussianLikelihood(noise_constraint=noise_constraint).to(device=train_x.device, dtype=train_y.dtype) 57 | ard_dims = train_x.shape[1] if use_ard else None 58 | model = GP( 59 | train_x=train_x, 60 | train_y=train_y, 61 | likelihood=likelihood, 62 | lengthscale_constraint=lengthscale_constraint, 63 | outputscale_constraint=outputscale_constraint, 64 | ard_dims=ard_dims, 65 | ).to(device=train_x.device, dtype=train_x.dtype) 66 | 67 | # Find optimal model hyperparameters 68 | model.train() 69 | likelihood.train() 70 | 71 | # "Loss" for GPs - the marginal log likelihood 72 | mll = ExactMarginalLogLikelihood(likelihood, model) 73 | 74 | # Initialize model hypers 75 | if hypers: 76 | model.load_state_dict(hypers) 77 | else: 78 | hypers = {} 79 | hypers["covar_module.outputscale"] = 1.0 80 | hypers["covar_module.base_kernel.lengthscale"] = 0.5 81 | hypers["likelihood.noise"] = 0.005 82 | model.initialize(**hypers) 83 | 84 | # Use the adam optimizer 85 | optimizer = torch.optim.Adam([{"params": model.parameters()}], lr=0.1) 86 | 87 | for _ in range(num_steps): 88 | optimizer.zero_grad() 89 | output = model(train_x) 90 | loss = -mll(output, train_y) 91 | loss.backward() 92 | optimizer.step() 93 | 94 | # Switch to eval mode 95 | model.eval() 96 | likelihood.eval() 97 | 98 | return model 99 | -------------------------------------------------------------------------------- /tests/test_dfree_dsvgp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | import gpytorch 5 | import tqdm 6 | import random 7 | import time 8 | from matplotlib import pyplot as plt 9 | from torch.utils.data import TensorDataset, DataLoader 10 | import sys 11 | sys.path.append("../") 12 | sys.path.append("../directionalvi/utils") 13 | sys.path.append("../directionalvi") 14 | from RBFKernelDirectionalGrad import RBFKernelDirectionalGrad 15 | #from DirectionalGradVariationalStrategy import DirectionalGradVariationalStrategy 16 | from dfree_directional_vi import train_gp, eval_gp 17 | from metrics import MSE 18 | import testfun 19 | 20 | # data parameters 21 | n = 600 22 | dim = 2 23 | n_test = 1000 24 | 25 | # training params 26 | num_inducing = 20 27 | num_directions = 2 28 | minibatch_size = 200 29 | num_epochs = 400 30 | 31 | # seed 32 | torch.random.manual_seed(0) 33 | # use tqdm or just have print statements 34 | tqdm = False 35 | # use data to initialize inducing stuff 36 | inducing_data_initialization = False 37 | # use natural gradients and/or CIQ 38 | use_ngd = False 39 | use_ciq = False 40 | num_contour_quadrature=15 41 | # learning rate 42 | learning_rate_hypers = 0.01 43 | learning_rate_ngd = 0.1 44 | gamma = 10.0 45 | #levels = np.array([20,150,300]) 46 | #def lr_sched(epoch): 47 | # a = np.sum(levels > epoch) 48 | # return (1./gamma)**a 49 | lr_sched = None 50 | 51 | # training and testing data 52 | train_x = torch.rand(n,dim) 53 | test_x = torch.rand(n_test,dim) 54 | train_y = testfun.f(train_x, deriv=False) 55 | test_y = testfun.f(test_x, deriv=False) 56 | if torch.cuda.is_available(): 57 | train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 58 | 59 | train_dataset = TensorDataset(train_x, train_y) 60 | test_dataset = TensorDataset(test_x, test_y) 61 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 62 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False) 63 | 64 | # train 65 | print("\n\n---DirectionalGradVGP---") 66 | print(f"Start training with {n} trainig data of dim {dim}") 67 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 68 | args={"verbose":True} 69 | t1 = time.time() 70 | model,likelihood = train_gp(train_dataset, 71 | num_inducing=num_inducing, 72 | num_directions=num_directions, 73 | minibatch_size = minibatch_size, 74 | minibatch_dim = num_directions, 75 | num_epochs =num_epochs, 76 | learning_rate_hypers=learning_rate_hypers, 77 | learning_rate_ngd=learning_rate_ngd, 78 | inducing_data_initialization=inducing_data_initialization, 79 | use_ngd = use_ngd, 80 | use_ciq = use_ciq, 81 | lr_sched=lr_sched, 82 | num_contour_quadrature=num_contour_quadrature, 83 | tqdm=tqdm,**args 84 | ) 85 | t2 = time.time() 86 | 87 | # save the model 88 | # torch.save(model.state_dict(), "../data/test_dvi_basic.model") 89 | 90 | # test 91 | means, variances = eval_gp( test_dataset,model,likelihood, 92 | num_directions=num_directions, 93 | minibatch_size=n_test, 94 | minibatch_dim=num_directions) 95 | t3 = time.time() 96 | 97 | # compute MSE 98 | test_y = test_y.cpu() 99 | test_mse = MSE(test_y,means) 100 | # compute mean negative predictive density 101 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_y).mean() 102 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.") 103 | print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec") 104 | 105 | plot=1 106 | if plot == 1: 107 | from mpl_toolkits.mplot3d import axes3d 108 | import matplotlib.pyplot as plt 109 | fig = plt.figure(figsize=(12,6)) 110 | ax = fig.add_subplot(111, projection='3d') 111 | ax.scatter(test_x[:,0],test_x[:,1],test_y, color='k') 112 | ax.scatter(test_x[:,0],test_x[:,1],means, color='b') 113 | plt.title("f(x,y) variational fit; actual curve is black, variational is blue") 114 | plt.show() 115 | 116 | -------------------------------------------------------------------------------- /experiments/synthetic1/run_exp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import subprocess 4 | from datetime import datetime 5 | import numpy as np 6 | 7 | # flags 8 | write_sbatch =True 9 | submit =True 10 | 11 | dd = 0 12 | M_list = np.array([200,400,600,800,1000,1200,1400]) # matrix sizes 13 | #M_list = np.array([1000,1200,1400]) # matrix sizes 14 | ni_list = (M_list/(dd+1)).astype(int) 15 | for ni in ni_list: 16 | 17 | # write a pickle file with the run info 18 | run_params_dir = "./param_files/" 19 | if os.path.exists(run_params_dir) is False: 20 | os.mkdir(run_params_dir) 21 | run_params = {} 22 | run_params['mode'] = "SVGP" # DSVGP, SVGP or GradSVGP 23 | run_params['num_inducing'] = ni 24 | run_params['num_directions'] = dd 25 | run_params['minibatch_size'] = 512 26 | run_params['num_epochs'] = 1200 27 | run_params['tqdm'] = False 28 | run_params['inducing_data_initialization'] = False 29 | run_params['use_ngd'] = False 30 | run_params['use_ciq'] = False 31 | run_params['num_contour_quadrature'] = 10 # gpytorch default=15 32 | run_params['learning_rate_hypers'] = 0.01 33 | run_params['learning_rate_ngd'] = 0.1 34 | run_params['lr_benchmarks'] = 20*np.array([800]) 35 | run_params['lr_gamma'] = 0.1 36 | run_params['lr_sched'] = "MultiStepLR" 37 | run_params['mll_type'] = "PLL" 38 | run_params['data_file'] = "./synthetic1_dataset_10000_points_5_dim.pickle" 39 | #run_params['data_file'] = f"./synthetic1_dataset_10000_points_5_dim_grad_dimredux_{run_params['num_directions']}_directions.pickle" 40 | # seed and date 41 | now = datetime.now() 42 | seed = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second)) 43 | barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second) 44 | run_params['date'] = now 45 | run_params['seed'] = seed 46 | # file name 47 | if run_params['mode'] == "DSVGP": 48 | base_name = f"synthetic1_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 49 | f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\ 50 | f"_ciq_{run_params['use_ciq']}_{barcode}" 51 | elif run_params['mode'] == "SVGP": 52 | base_name = f"synthetic1_SVGP_ni_{run_params['num_inducing']}"+\ 53 | f"_ne_{run_params['num_epochs']}_{barcode}" 54 | elif run_params['mode'] == "GradSVGP": 55 | base_name = f"synthetic1_GradSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 56 | f"_ne_{run_params['num_epochs']}_{barcode}" 57 | run_params['base_name'] = base_name 58 | param_filename = run_params_dir + "params_" +base_name + ".pickle" 59 | pickle.dump(run_params,open(param_filename,'wb')) 60 | print(f"Dumped param file: {param_filename}") 61 | 62 | if write_sbatch: 63 | # write a slurm batch script 64 | slurm_dir = "./slurm_scripts/" 65 | if os.path.exists(slurm_dir) is False: 66 | os.mkdir(slurm_dir) 67 | slurm_name = slurm_dir + base_name + ".sub" 68 | #slurm_name = base_name + ".sub" 69 | f = open(slurm_name,"w") 70 | f.write(f"#!/bin/bash\n") 71 | f.write(f"#SBATCH -J {run_params['mode']}_{run_params['num_directions']}\n") 72 | f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n") 73 | f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n") 74 | f.write(f"#SBATCH --get-user-env\n") 75 | f.write(f"#SBATCH -N 1\n") 76 | f.write(f"#SBATCH -n 1\n") 77 | f.write(f"#SBATCH --mem=15000\n") 78 | f.write(f"#SBATCH -t 168:00:00\n") 79 | f.write(f"#SBATCH --partition=default_partition\n") 80 | f.write(f"#SBATCH --gres=gpu:1\n") 81 | f.write(f"python3 synthetic1.py {param_filename}\n") 82 | print(f"Dumped slurm file: {slurm_name}") 83 | 84 | # write the shell submission script 85 | submit_name = slurm_dir + 'slurm_submit.sh' 86 | f = open(submit_name,"w") 87 | f.write(f"#!/bin/bash\n") 88 | f.write(f"sbatch --requeue {slurm_name}") 89 | f.close() 90 | print(f"Dumped bash script: {submit_name}") 91 | 92 | if submit: 93 | # submit the script 94 | #bash_command = f"sbatch {slurm_name}" 95 | bash_command = f"bash {submit_name}" 96 | subprocess.run(bash_command.split(" ")) 97 | -------------------------------------------------------------------------------- /directionalvi/utils/test/test_synthetic_functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append("../") 4 | from synthetic_functions import * 5 | import torch 6 | import copy 7 | from rescale import * 8 | 9 | def comp_err_deriv(fun,x,h): 10 | ''' 11 | test derivative at x using finite difference 12 | Inputs: 13 | fun::callable function handle, returns function values and derivatives 14 | x::tensor one testing position 15 | h:float finite difference step size 16 | ''' 17 | n = x.shape[0] 18 | d = x.shape[1] 19 | y = fun(x) 20 | g_true = y[:,1:] 21 | id_mat = torch.eye(d) 22 | error = torch.zeros(n) 23 | g_fd_set = torch.zeros((n,d)) 24 | for j in range(n): 25 | g_fd = torch.zeros(d) 26 | for i in range(d): 27 | xph = copy.deepcopy(x[j]) 28 | xph[i] = x[j][i] + h 29 | g_fd[i] = (fun(xph.reshape(1, d))[0][0] - fun(x[j].reshape(1, d))[0][0])/h 30 | g_fd_set[j,:] = g_fd 31 | error[j] = (g_fd - g_true[j]).abs().max() 32 | return error.max(), g_fd_set, g_true 33 | 34 | def test_fun_val(test_fun, n): 35 | try: 36 | dim = test_fun.dim 37 | except err: 38 | dim = 3 39 | x = torch.rand(n, dim) 40 | lb, ub = test_fun.get_bounds() 41 | x = from_unit_cube(x, lb, ub) 42 | y = test_fun.evaluate_true_with_deriv(x) 43 | err_fun = (y[:,0] - test_fun.evaluate_true(x)).abs().max() 44 | return err_fun 45 | 46 | def test_deriv(test_fun, n): 47 | # test derivative 48 | try: 49 | dim = test_fun.dim 50 | except err: 51 | dim = 3 52 | lb, ub = test_fun.get_bounds() 53 | x = torch.rand(n, dim) 54 | x = from_unit_cube(x, lb, ub) 55 | err, g_fd_set, g_true = comp_err_deriv(test_fun.evaluate_true_with_deriv,x,1e-6) 56 | return err, g_fd_set, g_true 57 | 58 | 59 | #branin 60 | test_fun_name = "Branin" 61 | test_fun = eval(f"{test_fun_name}_with_deriv")() 62 | err_fun = test_fun_val(test_fun, 10) 63 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.") 64 | err_deriv, _, _ = test_deriv(test_fun, 300) 65 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.") 66 | 67 | 68 | 69 | # stytang 70 | # print("stytang") 71 | # st = StyblinskiTang_with_deriv() 72 | # w = torch.rand(5, 2) 73 | # y = st.evaluate_true_with_deriv(w) 74 | # print(y) 75 | test_fun_name = "StyblinskiTang" 76 | test_fun = eval(f"{test_fun_name}_with_deriv")() 77 | err_fun = test_fun_val(test_fun, 10) 78 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.") 79 | err_deriv, _, _ = test_deriv(test_fun, 300) 80 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.") 81 | 82 | 83 | 84 | # #six_hump_camel 85 | # print("six hump camel") 86 | # cc = SixHumpCamel_with_deriv() 87 | # w = torch.rand(6, 2) 88 | # y = cc.evaluate_true_with_deriv(w) 89 | # print(y) 90 | test_fun_name = "SixHumpCamel" 91 | test_fun = eval(f"{test_fun_name}_with_deriv")() 92 | err_fun = test_fun_val(test_fun, 10) 93 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.") 94 | err_deriv, _, _ = test_deriv(test_fun, 300) 95 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.") 96 | 97 | 98 | # test_fun_name = "Welch" 99 | # test_fun = eval(f"{test_fun_name}_with_deriv")() 100 | # err_fun = test_fun_val(test_fun, 10) 101 | # print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.") 102 | # err_deriv, _, _ = test_deriv(test_fun, 300) 103 | # print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.") 104 | 105 | 106 | #Hartmann 107 | test_fun_name = "Hartmann" 108 | test_fun = eval(f"{test_fun_name}_with_deriv")() 109 | err_fun = test_fun_val(test_fun, 10) 110 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.") 111 | err_deriv, _, _ = test_deriv(test_fun, 300) 112 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.") 113 | 114 | 115 | test_fun_name = "Welch2" 116 | test_fun = eval(f"{test_fun_name}_with_deriv")() 117 | err_fun = test_fun_val(test_fun, 10) 118 | print(f"\nFor {test_fun_name}, error in function values is {err_fun:.4e}.") 119 | # g stores the computed derivatives, g_fd stores values approximated by FD. 120 | err_deriv, g, g_fd = test_deriv(test_fun, 300) 121 | print(f"For {test_fun_name}, error in derivatives is {err_deriv:.4e}.") 122 | 123 | -------------------------------------------------------------------------------- /experiments/synthetic1/ExactGradGP.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import gpytorch 3 | import math 4 | import numpy as np 5 | import sys 6 | 7 | class GPModelWithDerivatives(gpytorch.models.ExactGP): 8 | def __init__(self, train_x, train_y, likelihood): 9 | super(GPModelWithDerivatives, self).__init__(train_x, train_y, likelihood) 10 | self.mean_module = gpytorch.means.ConstantMeanGrad() 11 | self.base_kernel = gpytorch.kernels.RBFKernelGrad() 12 | self.covar_module = gpytorch.kernels.ScaleKernel(self.base_kernel) 13 | 14 | def forward(self, x): 15 | mean_x = self.mean_module(x) 16 | covar_x = self.covar_module(x) 17 | return gpytorch.distributions.MultitaskMultivariateNormal(mean_x, covar_x) 18 | 19 | def train_gp(train_x,train_y,num_epochs=1,lr_hypers=0.01,verbose=True): 20 | 21 | dim = train_x.shape[-1] 22 | n_tasks = dim + 1 23 | likelihood = gpytorch.likelihoods.MultitaskGaussianLikelihood(num_tasks=n_tasks) # Value + x-derivative + y-derivative 24 | model = GPModelWithDerivatives(train_x, train_y, likelihood) 25 | 26 | # if torch.cuda.is_available(): 27 | # model = model.cuda() 28 | # likelihood = likelihood.cuda() 29 | # Find optimal model hyperparameters 30 | model.train() 31 | likelihood.train() 32 | 33 | # Use the adam optimizer 34 | optimizer = torch.optim.Adam(model.parameters(), lr=lr_hypers) # Includes GaussianLikelihood parameters 35 | 36 | # "Loss" for GPs - the marginal log likelihood 37 | mll = gpytorch.mlls.ExactMarginalLogLikelihood(likelihood, model) 38 | 39 | for i in range(num_epochs): 40 | optimizer.zero_grad() 41 | output = likelihood(model(train_x)) 42 | loss = -mll(output, train_y) 43 | loss.backward() 44 | print(f"Iter {i}, Loss: {loss.item()}") 45 | sys.stdout.flush() 46 | optimizer.step() 47 | 48 | print("Done Training") 49 | return model,likelihood 50 | 51 | 52 | def eval_gp(test_x,model,likelihood): 53 | 54 | print("Predicting") 55 | # Set into eval mode 56 | model.eval() 57 | likelihood.eval() 58 | 59 | # Make predictions 60 | with torch.no_grad(), gpytorch.settings.fast_computations(log_prob=False, covar_root_decomposition=False): 61 | predictions = likelihood(model(test_x)) 62 | means = predictions.mean 63 | variances = predictions.variance 64 | 65 | return means, variances 66 | 67 | if __name__ == "__main__": 68 | from datetime import datetime 69 | now = datetime.now() 70 | seed = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second)) 71 | barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second) 72 | torch.random.manual_seed(seed) 73 | 74 | # load data 75 | import pickle 76 | d = pickle.load(open("./synthetic1_dataset_10000_points_5_dim.pickle", "rb")) 77 | X = d['X'] 78 | Y = d['Y'] 79 | n,dim = X.shape 80 | n_train = int(0.8*n) 81 | n_test = n - n_train 82 | # reduce n_train 83 | n_train = int(n_train/(dim+1)) 84 | # train/test split 85 | train_x = X[:n_train] 86 | train_y = Y[:n_train] 87 | test_x = X[n_train:n_train+n_test] 88 | test_y = Y[n_train:n_train+n_test] 89 | test_f = test_y[:,0] # just function values 90 | # train gp 91 | num_epochs = 400 92 | lr_hypers = 0.05 93 | model,likelihood = train_gp(train_x,train_y,num_epochs=num_epochs,lr_hypers=lr_hypers,verbose=True) 94 | # eval gp 95 | means,variances = eval_gp(test_x,model,likelihood) 96 | means = means[:,0] # just function values 97 | variances= variances[:,0] # just function values 98 | # compute MSE 99 | test_mse = torch.mean((test_f-means)**2) 100 | # compute mean negative predictive density 101 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_f).mean() 102 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.") 103 | 104 | # file name 105 | data_filename = f"./output/data_ExactGradGP_ne_{num_epochs}_{barcode}.pickle" 106 | # dump the data 107 | outdata = {} 108 | outdata['test_mse'] = test_mse 109 | outdata['test_nll'] = test_nll 110 | outdata['mode'] = "ExactGradGP" 111 | outdata['dim'] = dim 112 | outdata['M'] = n_train 113 | outdata['num_epochs'] = num_epochs 114 | outdata['lr_hypers'] = lr_hypers 115 | data_filename 116 | pickle.dump(outdata,open(data_filename,"wb")) 117 | print(f"Dropped file: {data_filename}") 118 | -------------------------------------------------------------------------------- /experiments/rover/run_exp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import subprocess 4 | from datetime import datetime 5 | import numpy as np 6 | 7 | # flags 8 | write_sbatch =True 9 | submit =True 10 | 11 | dd = 0 12 | M_list = np.array([400]) # matrix sizes 13 | ni_list = (M_list/(dd+1)).astype(int) 14 | for ni in ni_list: 15 | 16 | # write a pickle file with the run info 17 | run_params_dir = "./param_files/" 18 | if os.path.exists(run_params_dir) is False: 19 | os.mkdir(run_params_dir) 20 | run_params = {} 21 | run_params['mode'] = "Vanilla" # DSVGP, SVGP or Vanilla 22 | run_params['num_inducing'] = ni 23 | run_params['num_directions'] = dd 24 | run_params['dim'] = 200 # not a parameter 25 | run_params['minibatch_size'] = 512 26 | run_params['num_epochs'] = 300 27 | run_params['inducing_data_initialization'] = False 28 | run_params['use_ngd'] = False 29 | run_params['use_ciq'] = False 30 | run_params['num_contour_quadrature'] = 15 # gpytorch default=15 31 | run_params['learning_rate_hypers'] = 0.01 32 | run_params['learning_rate_ngd'] = 0.1 33 | run_params['lr_benchmarks'] = 20*np.array([400]) 34 | run_params['lr_gamma'] = 0.1 35 | run_params['lr_sched'] = None 36 | run_params['mll_type'] = "PLL" 37 | run_params['verbose'] = False 38 | run_params['turbo_lb'] = -5*np.ones(run_params['dim']) 39 | run_params['turbo_ub'] = 5*np.ones(run_params['dim']) 40 | run_params['turbo_n_init'] = 100 41 | run_params['turbo_max_evals'] = 2000 42 | run_params['turbo_batch_size'] = 5 43 | # seed and date 44 | now = datetime.now() 45 | seed = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second)) 46 | barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second) 47 | run_params['date'] = now 48 | run_params['seed'] = seed 49 | # file name 50 | if run_params['mode'] == "DSVGP": 51 | base_name = f"rover_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 52 | f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\ 53 | f"_ciq_{run_params['use_ciq']}_{barcode}" 54 | elif run_params['mode'] == "SVGP": 55 | base_name = f"rover_SVGP_ni_{run_params['num_inducing']}"+\ 56 | f"_ne_{run_params['num_epochs']}_{barcode}" 57 | elif run_params['mode'] == "Vanilla": 58 | base_name = f"rover_Vanilla"+\ 59 | f"_ne_{run_params['num_epochs']}_{barcode}" 60 | run_params['base_name'] = base_name 61 | param_filename = run_params_dir + "params_" +base_name + ".pickle" 62 | pickle.dump(run_params,open(param_filename,'wb')) 63 | print(f"Dumped param file: {param_filename}") 64 | 65 | if write_sbatch: 66 | # write a slurm batch script 67 | slurm_dir = "./slurm_scripts/" 68 | if os.path.exists(slurm_dir) is False: 69 | os.mkdir(slurm_dir) 70 | slurm_name = slurm_dir + base_name + ".sub" 71 | #slurm_name = base_name + ".sub" 72 | f = open(slurm_name,"w") 73 | f.write(f"#!/bin/bash\n") 74 | f.write(f"#SBATCH -J rover_{run_params['mode']}{run_params['num_directions']}\n") 75 | f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n") 76 | f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n") 77 | f.write(f"#SBATCH --get-user-env\n") 78 | f.write(f"#SBATCH -N 1\n") 79 | f.write(f"#SBATCH -n 1\n") 80 | f.write(f"#SBATCH --mem=15000\n") 81 | f.write(f"#SBATCH -t 168:00:00\n") 82 | f.write(f"#SBATCH --partition=default_partition\n") 83 | f.write(f"#SBATCH --gres=gpu:1\n") 84 | f.write(f"python3 test_turbo.py {param_filename}\n") 85 | print(f"Dumped slurm file: {slurm_name}") 86 | 87 | # write the shell submission script 88 | submit_name = slurm_dir + 'slurm_submit.sh' 89 | f = open(submit_name,"w") 90 | f.write(f"#!/bin/bash\n") 91 | f.write(f"sbatch --requeue {slurm_name}") 92 | f.close() 93 | print(f"Dumped bash script: {submit_name}") 94 | 95 | if submit: 96 | # submit the script 97 | #bash_command = f"sbatch {slurm_name}" 98 | bash_command = f"bash {submit_name}" 99 | subprocess.run(bash_command.split(" ")) 100 | -------------------------------------------------------------------------------- /experiments/bunny/ImplicitBunny/bunny.m: -------------------------------------------------------------------------------- 1 | % Script to reconstruct Stanford bunny via implicit surface. **Warning** running this script will take > 30 minutes 2 | % as training a GP with tens of thousands of data points is computationally expensive. 3 | 4 | clear all 5 | ski_order = 5; 6 | ninduce = 30; 7 | d = 3; 8 | 9 | %% Load pre-processed large Stanford bunny (~70,000 vertices) 10 | % fprintf('Reading... '); obj = readObj('bunny.obj'); fprintf(' Done!\n'); 11 | % Original Processing Script 12 | load('bunny.mat'); 13 | 14 | 15 | X = obj.v; Xorig = X; 16 | T = obj.f.vt; Torig = T; 17 | nx = obj.vn; 18 | 19 | %% Map to unitbox 20 | X = mapToUnitbox(X); 21 | lims = [-0.01 1.01 -0.01 1.01 -0.01 1.01]; 22 | 23 | % %% Normalize the normals 24 | nx = nx ./ sqrt(sum(nx.^2, 2)); 25 | % 26 | % %% Add noise 27 | noise = 0.01; 28 | X = X + noise*randn(size(X,1), d); 29 | nx = nx + 0*randn(size(X,1), d); 30 | % 31 | % %% Map back to unitbox and pick subset 32 | X = mapToUnitbox(X); 33 | nn = 5; % use 1/nn of data 34 | x = X(1:nn:end,1); y = X(1:nn:end,2); z = X(1:nn:end,3); 35 | nx = nx(1:nn:end, :); 36 | n = size(x, 1); 37 | fprintf('Size of Kdot: [%d %d]\n', n*(d+1), n*(d+1)) 38 | 39 | %% Train GP with gradients using TPS kernel 40 | % lb = min([x,y,z]); ub = max([x,y,z]); 41 | % beta = 1e-4; R = 2; 42 | % s0 = 1; sig0 = 1e-1; 43 | % 44 | % nZ = 3; Z = sign(randn(n*(d+1),nZ)); 45 | % xg = repmat({linspace(-0.1, 1.1, ninduce)}, d, 1); 46 | % [Wtrain{1}, Wtrain{2}] = interpGrid([x,y,z], xg, ski_order); 47 | % cov = @(hyp) tps_kernel_grad_ski(R, [x,y,z], hyp, xg, Wtrain); 48 | % hyp = struct('cov', log([s0]), 'lik', log([sig0 sig0])); 49 | % lmlfun = @(hyp) lml_mvm(cov, [zeros(n, 1), nx], hyp, Z, beta, true); 50 | % params = minimize(hyp, lmlfun, -30); 51 | % s = exp(params.cov(1)); 52 | % sigma = sqrt(exp(2*params.lik) + beta); 53 | % fprintf('TPS-SKI with gradients: (s, sigma1, sigma2) = (%.3f, %.3f, %.3f)\n', exp(params.cov), sigma) 54 | 55 | %% Prediction handle 56 | % if length(sigma) == 1, sigma = [sigma, sigma]; end 57 | % sig = [sigma(1)*ones(1,n), sigma(2)*ones(1,n*d)]'; 58 | % [K, ~, precond] = tps_kernel_grad_ski(R, [x,y,z], params, xg, Wtrain); 59 | % mvm = @(x) K(x) + sig.^2 .* x; 60 | % lambda = pcg(mvm, [zeros(n, 1); nx(:)], 1e-10, 1000, precond); 61 | 62 | %% Compute implicit surface 63 | isize = 100; 64 | nxx = isize; nyy = isize; nzz = isize; 65 | x1 = linspace(lims(1), lims(2), nxx); 66 | x2 = linspace(lims(3), lims(4), nyy); 67 | x3 = linspace(lims(5), lims(6), nzz); 68 | 69 | % V = zeros(nxx, nyy, nzz); 70 | % for i=1:nzz % Loop over third dimension to not have the memory blow up on us.... 71 | % [XX, YY, ZZ] = meshgrid(x1, x2, x3(i)); 72 | % Wtest = {}; Wtest{1} = interpGrid([XX(:) YY(:) ZZ(:)], xg, ski_order); 73 | % KK = tps_kernel_grad_ski(R, [x,y,z], params, xg, Wtrain, [XX(:) YY(:) ZZ(:)], Wtest); 74 | % V(:, :, i) = reshape(KK(lambda), [nxx, nyy, 1]); 75 | % end 76 | 77 | % y_pred = readtable('bunny_DSVGP_ntrain3482_m300_p3_epochs100_standard_standard_expTEST_ntest31335_pred_y.csv'); 78 | % V = reshape(y_pred, [nxx, nyy, nzz]); 79 | 80 | V = load('bunny_DSVGP_ntrain3482_m300_p3_epochs100_standard_standard_expTEST_ntest31335_V.mat').V; 81 | 82 | FV = isosurface(x1, x2, x3, V, 0); 83 | 84 | %% Remove vertices far from training points 85 | D = pdist2([x,y,z], FV.vertices, 'euclidean', 'Smallest', 1)'; 86 | verticesToRemove = find(D > 3.2e-2)'; 87 | newVertices = FV.vertices; 88 | newVertices(verticesToRemove,:) = []; 89 | [~, newVertexIndex] = ismember(FV.vertices, newVertices, 'rows'); 90 | newFaces = FV.faces(all([FV.faces(:,1) ~= verticesToRemove, ... 91 | FV.faces(:,2) ~= verticesToRemove, ... 92 | FV.faces(:,3) ~= verticesToRemove], 2),:); 93 | newFaces = newVertexIndex(newFaces); 94 | V = newVertices; 95 | F = newFaces; 96 | 97 | %% Plot original points 98 | figure('units','normalized','outerposition',[0 0 1 1]); 99 | subaxis(1, 3, 1, 'Spacing', 0, 'Padding', 0, 'Margin', 0); 100 | trisurf(Torig, Xorig(:,3), Xorig(:,1), Xorig(:,2), 'EdgeColor', 'none'); 101 | axis(lims) 102 | axis equal off 103 | shading interp % Make surface look smooth 104 | view(90, 15) 105 | camlight; lighting phong % Shine light on surface 106 | 107 | %% Plot noisy points 108 | subaxis(1, 3, 2, 'Spacing', 0, 'Padding', 0, 'Margin', 0); 109 | trisurf(T, X(:,3), X(:,1), X(:,2), 'EdgeColor', 'none'); 110 | axis(lims) 111 | axis equal off 112 | shading interp % Make surface look smooth 113 | view(90, 15) 114 | camlight; lighting phong % Shine light on surface 115 | 116 | %% Plot bunny after (hopefully) removing all dummy points 117 | subaxis(1, 3, 3, 'Spacing', 0, 'Padding', 0, 'Margin', 0); 118 | trisurf(F, V(:,3), V(:,1), V(:,2), 'EdgeColor', 'none'); 119 | axis(lims) 120 | axis equal off 121 | shading interp % Make surface look smooth 122 | view(90, 15) 123 | camlight; lighting phong % Shine light on surface -------------------------------------------------------------------------------- /experiments/stellarator_regression/run_exp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import subprocess 4 | from datetime import datetime 5 | import numpy as np 6 | 7 | # flags 8 | write_sbatch =True 9 | submit =True 10 | 11 | dd =1 12 | M_list = np.array([200,500,800,1000,1200,1400]) 13 | ni_list = (M_list/(dd+1)).astype(int) 14 | for ni in ni_list: 15 | 16 | # write a pickle file with the run info 17 | run_params_dir = "./param_files/" 18 | if os.path.exists(run_params_dir) is False: 19 | os.mkdir(run_params_dir) 20 | run_params = {} 21 | run_params['mode'] = "DSVGP" # DSVGP, SVGP, GradSVGP, DSVGP-Shared 22 | run_params['num_inducing'] = ni 23 | run_params['num_directions'] = dd 24 | run_params['minibatch_size'] = 512 25 | run_params['num_epochs'] = 1000 26 | run_params['tqdm'] = False 27 | run_params['inducing_data_initialization'] = False 28 | run_params['use_ngd'] = False 29 | run_params['use_ciq'] = False 30 | run_params['num_contour_quadrature'] = 15 # gpytorch default=15 31 | run_params['learning_rate_hypers'] = 0.01 32 | run_params['learning_rate_ngd'] = 0.1 33 | # lr_benchmarks has units number of steps not number of epochs 34 | run_params['lr_benchmarks'] = 45*np.array([600,800]) 35 | run_params['lr_gamma'] = 0.1 36 | run_params['lr_sched'] = "MultiStepLR" 37 | run_params['mll_type'] = "PLL" #ELBO or PLL 38 | run_params['data_file'] = "focus_w7x_dataset_45dim_pickle_format.pickle" 39 | #run_params['data_file'] = "focus_w7x_dataset_45dim_500000_points.pickle" 40 | #run_params['data_file'] = f"./focus_w7x_dataset_45dim_grad_dimredux_{run_params['num_directions']}_directions.pickle" 41 | # seed and date 42 | now = datetime.now() 43 | seed = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second)) 44 | barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second) 45 | run_params['date'] = now 46 | run_params['seed'] = seed 47 | # file name 48 | if run_params['mode'] == "DSVGP": 49 | base_name = f"stell_regress_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 50 | f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\ 51 | f"_ciq_{run_params['use_ciq']}_{barcode}" 52 | elif run_params['mode'] == "DSVGP-Shared": 53 | base_name = f"stell_regress_DSVGP_Shared_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 54 | f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\ 55 | f"_ciq_{run_params['use_ciq']}_{barcode}" 56 | elif run_params['mode'] == "SVGP": 57 | base_name = f"stell_regress_SVGP_ni_{run_params['num_inducing']}"+\ 58 | f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\ 59 | f"_ciq_{run_params['use_ciq']}_{barcode}" 60 | elif run_params['mode'] == "GradSVGP": 61 | base_name = f"stell_regress_GradSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 62 | f"_ne_{run_params['num_epochs']}_{barcode}" 63 | run_params['base_name'] = base_name 64 | param_filename = run_params_dir + "params_" +base_name + ".pickle" 65 | pickle.dump(run_params,open(param_filename,'wb')) 66 | print(f"Dumped param file: {param_filename}") 67 | 68 | if write_sbatch: 69 | # write a slurm batch script 70 | slurm_dir = "./slurm_scripts/" 71 | if os.path.exists(slurm_dir) is False: 72 | os.mkdir(slurm_dir) 73 | slurm_name = slurm_dir + base_name + ".sub" 74 | #slurm_name = base_name + ".sub" 75 | f = open(slurm_name,"w") 76 | f.write(f"#!/bin/bash\n") 77 | f.write(f"#SBATCH -J {run_params['mode']}_{run_params['num_directions']}\n") 78 | f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n") 79 | f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n") 80 | f.write(f"#SBATCH --get-user-env\n") 81 | f.write(f"#SBATCH -N 1\n") 82 | f.write(f"#SBATCH -n 1\n") 83 | f.write(f"#SBATCH --mem=15000\n") 84 | f.write(f"#SBATCH -t 168:00:00\n") 85 | f.write(f"#SBATCH --partition=default_partition\n") 86 | f.write(f"#SBATCH --gres=gpu:1\n") 87 | f.write(f"python3 stellarator_regression.py {param_filename}\n") 88 | print(f"Dumped slurm file: {slurm_name}") 89 | 90 | # write the shell submission script 91 | submit_name = slurm_dir + 'slurm_submit.sh' 92 | f = open(submit_name,"w") 93 | f.write(f"#!/bin/bash\n") 94 | f.write(f"sbatch --requeue {slurm_name}") 95 | f.close() 96 | print(f"Dumped bash script: {submit_name}") 97 | 98 | if submit: 99 | # submit the script 100 | #bash_command = f"sbatch {slurm_name}" 101 | bash_command = f"bash {submit_name}" 102 | subprocess.run(bash_command.split(" ")) 103 | -------------------------------------------------------------------------------- /experiments/uci_dfree/run_exp.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import subprocess 4 | from datetime import datetime 5 | import numpy as np 6 | 7 | # flags 8 | write_sbatch =True 9 | submit =True 10 | 11 | model = ['DSVGP','DSVGP','SVGP','SVGP'] 12 | mll = ['PLL','ELBO','PLL','ELBO'] 13 | dd = np.array([1,1,0,0]) # number of directions (use 0 for SVGP) 14 | M = 800 # inducing matrix size 15 | ni_list = (M/(dd+1)).astype(int) # ensures equal inducing matrix size 16 | 17 | for jj,ni in enumerate(ni_list): 18 | 19 | # write a pickle file with the run info 20 | run_params_dir = "./param_files/" 21 | if os.path.exists(run_params_dir) is False: 22 | os.mkdir(run_params_dir) 23 | run_params = {} 24 | 25 | # select the dataset 26 | #run_params['data_file'] = "../../../uci/protein/protein.mat" # use relative import 27 | #run_params['data_file'] = "../../../uci/elevators/elevators.mat" # use relative import 28 | #run_params['data_file'] = "../../../uci/kin40k/kin40k.mat" # use relative import 29 | #run_params['data_file'] = "../../../uci/keggdirected/keggdirected.mat" 30 | run_params['data_file'] = "../../../uci/energy/energy.mat" # use relative import 31 | run_params['data_dir'] = "./output/energy/" # save location for output 32 | 33 | run_params['mode'] = model[jj] # DSVGP, SVGP 34 | run_params['mll_type'] = mll[jj] # PLL or ELBO 35 | run_params['num_inducing'] = ni_list[jj] # number of inducing 36 | run_params['num_directions'] = dd[jj] # number of directions 37 | run_params['minibatch_size'] = 512 38 | run_params['num_epochs'] = 700 39 | run_params['verbose'] = True 40 | run_params['inducing_data_initialization'] = False 41 | run_params['use_ngd'] = False 42 | run_params['use_ciq'] = False 43 | run_params['num_contour_quadrature'] = 15 # gpytorch default=15 44 | run_params['learning_rate_hypers'] = 0.01 45 | run_params['learning_rate_ngd'] = 0.1 46 | run_params['lr_sched'] = "MultiStepLR" 47 | run_params['lr_benchmarks'] = 73*np.array([300,500]) 48 | run_params['lr_gamma'] = 0.1 # LR decrease rate 49 | # seed and date 50 | now = datetime.now() 51 | seed = int("%d%.2d%.2d%.2d%.2d"%(now.month,now.day,now.hour,now.minute,now.second)) 52 | barcode = "%d%.2d%.2d%.2d%.2d%.2d"%(now.year,now.month,now.day,now.hour,now.minute,now.second) 53 | run_params['date'] = now 54 | run_params['seed'] = seed 55 | # file name 56 | if run_params['mode'] == "DSVGP": 57 | base_name = f"uci_DSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 58 | f"_ne_{run_params['num_epochs']}_ngd_{run_params['use_ngd']}"+\ 59 | f"_ciq_{run_params['use_ciq']}_{barcode}" 60 | elif run_params['mode'] == "SVGP": 61 | base_name = f"uci_SVGP_ni_{run_params['num_inducing']}"+\ 62 | f"_ne_{run_params['num_epochs']}_{barcode}" 63 | elif run_params['mode'] == "GradSVGP": 64 | base_name = f"uci_GradSVGP_ni_{run_params['num_inducing']}_nd_{run_params['num_directions']}"+\ 65 | f"_ne_{run_params['num_epochs']}_{barcode}" 66 | run_params['base_name'] = base_name 67 | param_filename = run_params_dir + "params_" +base_name + ".pickle" 68 | pickle.dump(run_params,open(param_filename,'wb')) 69 | print(f"Dumped param file: {param_filename}") 70 | 71 | if write_sbatch: 72 | # write a slurm batch script 73 | slurm_dir = "./slurm_scripts/" 74 | if os.path.exists(slurm_dir) is False: 75 | os.mkdir(slurm_dir) 76 | slurm_name = slurm_dir + base_name + ".sub" 77 | #slurm_name = base_name + ".sub" 78 | f = open(slurm_name,"w") 79 | f.write(f"#!/bin/bash\n") 80 | f.write(f"#SBATCH -J {run_params['mode']}_{run_params['num_directions']}\n") 81 | f.write(f"#SBATCH -o ./slurm_output/job_%j.out\n") 82 | f.write(f"#SBATCH -e ./slurm_output/job_%j.err\n") 83 | f.write(f"#SBATCH --get-user-env\n") 84 | f.write(f"#SBATCH -N 1\n") 85 | f.write(f"#SBATCH -n 1\n") 86 | f.write(f"#SBATCH --mem=15000\n") 87 | f.write(f"#SBATCH -t 168:00:00\n") 88 | f.write(f"#SBATCH --partition=default_partition\n") 89 | f.write(f"#SBATCH --gres=gpu:1\n") 90 | f.write(f"python3 test.py {param_filename}\n") 91 | print(f"Dumped slurm file: {slurm_name}") 92 | 93 | # write the shell submission script 94 | submit_name = slurm_dir + 'slurm_submit.sh' 95 | f = open(submit_name,"w") 96 | f.write(f"#!/bin/bash\n") 97 | f.write(f"sbatch --requeue {slurm_name}") 98 | f.close() 99 | print(f"Dumped bash script: {submit_name}") 100 | 101 | if submit: 102 | # submit the script 103 | #bash_command = f"sbatch {slurm_name}" 104 | bash_command = f"bash {submit_name}" 105 | subprocess.run(bash_command.split(" ")) 106 | -------------------------------------------------------------------------------- /tests/test_dsvgp.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | import gpytorch 5 | import tqdm 6 | import random 7 | import time 8 | from matplotlib import pyplot as plt 9 | from torch.utils.data import TensorDataset, DataLoader 10 | import sys 11 | sys.path.append("../") 12 | sys.path.append("../directionalvi/utils") 13 | sys.path.append("../directionalvi") 14 | from RBFKernelDirectionalGrad import RBFKernelDirectionalGrad 15 | from DirectionalGradVariationalStrategy import DirectionalGradVariationalStrategy 16 | from directional_vi import train_gp, eval_gp 17 | from metrics import MSE 18 | import testfun 19 | 20 | # data parameters 21 | n = 600 22 | dim = 2 23 | n_test = 1000 24 | 25 | # training params 26 | num_inducing = 20 27 | num_directions = 2 28 | minibatch_size = 200 29 | num_epochs = 400 30 | 31 | # seed 32 | torch.random.manual_seed(0) 33 | # use tqdm or just have print statements 34 | tqdm = False 35 | # use data to initialize inducing stuff 36 | inducing_data_initialization = False 37 | # use natural gradients and/or CIQ 38 | use_ngd = False 39 | use_ciq = False 40 | num_contour_quadrature=15 41 | # learning rate 42 | learning_rate_hypers = 0.01 43 | learning_rate_ngd = 0.1 44 | gamma = 10.0 45 | #levels = np.array([20,150,300]) 46 | #def lr_sched(epoch): 47 | # a = np.sum(levels > epoch) 48 | # return (1./gamma)**a 49 | lr_sched = None 50 | 51 | # training and testing data 52 | train_x = torch.rand(n,dim) 53 | test_x = torch.rand(n_test,dim) 54 | train_y = testfun.f(train_x, deriv=True) 55 | test_y = testfun.f(test_x, deriv=True) 56 | if torch.cuda.is_available(): 57 | train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 58 | 59 | train_dataset = TensorDataset(train_x, train_y) 60 | test_dataset = TensorDataset(test_x, test_y) 61 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 62 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False) 63 | 64 | # train 65 | print("\n\n---DirectionalGradVGP---") 66 | print(f"Start training with {n} trainig data of dim {dim}") 67 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 68 | args={"verbose":True} 69 | t1 = time.time() 70 | model,likelihood = train_gp(train_dataset, 71 | num_inducing=num_inducing, 72 | num_directions=num_directions, 73 | minibatch_size = minibatch_size, 74 | minibatch_dim = num_directions, 75 | num_epochs =num_epochs, 76 | learning_rate_hypers=learning_rate_hypers, 77 | learning_rate_ngd=learning_rate_ngd, 78 | inducing_data_initialization=inducing_data_initialization, 79 | use_ngd = use_ngd, 80 | use_ciq = use_ciq, 81 | lr_sched=lr_sched, 82 | num_contour_quadrature=num_contour_quadrature, 83 | tqdm=tqdm,**args 84 | ) 85 | t2 = time.time() 86 | 87 | # save the model 88 | # torch.save(model.state_dict(), "../data/test_dvi_basic.model") 89 | 90 | # test 91 | means, variances = eval_gp( test_dataset,model,likelihood, 92 | num_directions=num_directions, 93 | minibatch_size=n_test, 94 | minibatch_dim=num_directions) 95 | t3 = time.time() 96 | 97 | # compute MSE 98 | test_y = test_y.cpu() 99 | test_mse = MSE(test_y[:,0],means[::num_directions+1]) 100 | # compute mean negative predictive density 101 | test_nll = -torch.distributions.Normal(means[::num_directions+1], variances.sqrt()[::num_directions+1]).log_prob(test_y[:,0]).mean() 102 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.") 103 | print(f"Training time: {(t2-t1):.2f} sec, testing time: {(t3-t2):.2f} sec") 104 | 105 | plot=0 106 | if plot == 1: 107 | from mpl_toolkits.mplot3d import axes3d 108 | import matplotlib.pyplot as plt 109 | fig = plt.figure(figsize=(12,6)) 110 | ax = fig.add_subplot(111, projection='3d') 111 | ax.scatter(test_x[:,0],test_x[:,1],test_y[:,0], color='k') 112 | ax.scatter(test_x[:,0],test_x[:,1],means[::num_directions+1], color='b') 113 | plt.title("f(x,y) variational fit; actual curve is black, variational is blue") 114 | plt.show() 115 | fig = plt.figure(figsize=(12,6)) 116 | ax = fig.add_subplot(111, projection='3d') 117 | ax.scatter(test_x[:,0],test_x[:,1],test_y[:,1], color='k') 118 | ax.scatter(test_x[:,0],test_x[:,1],means[1::num_directions+1], color='b') 119 | plt.title("df/dx variational fit; actual curve is black, variational is blue") 120 | plt.show() 121 | fig = plt.figure(figsize=(12,6)) 122 | ax = fig.add_subplot(111, projection='3d') 123 | ax.scatter(test_x[:,0],test_x[:,1],test_y[:,2], color='k') 124 | ax.scatter(test_x[:,0],test_x[:,1],means[2::num_directions+1], color='b') 125 | plt.title("df/dy variational fit; actual curve is black, variational is blue") 126 | plt.show() 127 | -------------------------------------------------------------------------------- /experiments/GNN_bo/GCN/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.sparse as sp 3 | import torch 4 | import pickle 5 | import os 6 | 7 | def encode_onehot(labels): 8 | classes = set(labels) 9 | classes_dict = {c: np.identity(len(classes))[i, :] for i, c in 10 | enumerate(classes)} 11 | labels_onehot = np.array(list(map(classes_dict.get, labels)), 12 | dtype=np.int32) 13 | return labels_onehot 14 | 15 | 16 | def load_data(dataset="cora", train_percent=0.036): 17 | """Load citation network dataset""" 18 | path = os.path.abspath(__file__ + f"/../data/{dataset}/") 19 | 20 | idx_features_labels = np.genfromtxt("{}/{}.content".format(path, dataset), 21 | dtype=np.dtype(str)) 22 | features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) 23 | labels = encode_onehot(idx_features_labels[:, -1]) 24 | 25 | # build graph 26 | idx = np.array(idx_features_labels[:, 0], dtype=np.int32) 27 | idx_map = {j: i for i, j in enumerate(idx)} 28 | edges_unordered = np.genfromtxt("{}/{}.cites".format(path, dataset), 29 | dtype=np.int32) 30 | edges = np.array(list(map(idx_map.get, edges_unordered.flatten())), 31 | dtype=np.int32).reshape(edges_unordered.shape) 32 | adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), 33 | shape=(labels.shape[0], labels.shape[0]), 34 | dtype=np.float32) 35 | 36 | # build symmetric adjacency matrix 37 | adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) 38 | 39 | features = normalize(features) 40 | adj = normalize(adj + sp.eye(adj.shape[0])) 41 | 42 | num_total_nodes = len(labels) 43 | n_train = int(train_percent*num_total_nodes) 44 | idx_train = range(n_train) 45 | idx_val = range(n_train, n_train+300) 46 | idx_test = range(n_train+300, num_total_nodes) 47 | 48 | features = torch.FloatTensor(np.array(features.todense())) 49 | labels = torch.LongTensor(np.where(labels)[1]) 50 | adj = sparse_mx_to_torch_sparse_tensor(adj) 51 | 52 | idx_train = torch.LongTensor(idx_train) 53 | idx_val = torch.LongTensor(idx_val) 54 | idx_test = torch.LongTensor(idx_test) 55 | 56 | return adj, features, labels, idx_train, idx_val, idx_test 57 | 58 | def load_citeseer(train_percent=0.036): 59 | dataset="citeseer" 60 | path = os.path.abspath(__file__ + f"/../data/{dataset}/") 61 | nodes_source=f"{path}/{dataset}.content" 62 | edges_source=f"{path}/{dataset}.cites" 63 | # load nodes dataframe 64 | df_nodes = pickle.load(open(f'{path}/{dataset}_nodes.pkl','rb')) 65 | # load edges, np array of type int32 66 | edges = pickle.load(open(f'{path}/{dataset}_edges.pkl','rb')) 67 | 68 | # node features and labels 69 | idx_features_labels = np.genfromtxt("{}{}.content".format(path, dataset), 70 | dtype=np.dtype(str)) 71 | features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32) 72 | labels = encode_onehot(idx_features_labels[:, -1]) 73 | 74 | # generate adjacency matrix 75 | adj = sp.coo_matrix((np.ones(edges.shape[0]), (edges[:, 0], edges[:, 1])), 76 | shape=(labels.shape[0], labels.shape[0]), 77 | dtype=np.float32) 78 | # build symmetric adjacency matrix 79 | adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj) 80 | adj = normalize(adj + sp.eye(adj.shape[0])) 81 | 82 | features = torch.FloatTensor(np.array(features.todense())) 83 | labels = torch.LongTensor(np.where(labels)[1]) 84 | adj = sparse_mx_to_torch_sparse_tensor(adj) 85 | 86 | num_nodes = len(labels) 87 | n_train = int(num_nodes*train_percent) 88 | idx_train = range(n_train) 89 | idx_val = range(n_train, n_train+200) 90 | idx_test = range(n_train+200, num_nodes) 91 | idx_train = torch.LongTensor(idx_train) 92 | idx_val = torch.LongTensor(idx_val) 93 | idx_test = torch.LongTensor(idx_test) 94 | return adj, features, labels, idx_train, idx_val, idx_test 95 | 96 | 97 | 98 | 99 | 100 | def normalize(mx): 101 | """Row-normalize sparse matrix""" 102 | rowsum = np.array(mx.sum(1)) 103 | r_inv = np.power(rowsum, -1).flatten() 104 | r_inv[np.isinf(r_inv)] = 0. 105 | r_mat_inv = sp.diags(r_inv) 106 | mx = r_mat_inv.dot(mx) 107 | return mx 108 | 109 | 110 | def accuracy(output, labels): 111 | preds = output.max(1)[1].type_as(labels) 112 | correct = preds.eq(labels).double() 113 | correct = correct.sum() 114 | return correct / len(labels) 115 | 116 | 117 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 118 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 119 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 120 | indices = torch.from_numpy( 121 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 122 | values = torch.from_numpy(sparse_mx.data) 123 | shape = torch.Size(sparse_mx.shape) 124 | return torch.sparse.FloatTensor(indices, values, shape) 125 | -------------------------------------------------------------------------------- /experiments/GNN_bo/gcn_sgd.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import random 4 | import time 5 | import argparse 6 | import wandb 7 | 8 | import torch 9 | import gpytorch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from torch.utils.data import TensorDataset, DataLoader 13 | # import networkx as nx 14 | 15 | from matplotlib import pyplot as plt 16 | 17 | import os 18 | import sys 19 | sys.path.append("../") 20 | sys.path.append("../../directionalvi/utils") 21 | sys.path.append("../../directionalvi") 22 | import directional_vi 23 | import traditional_vi 24 | import grad_svgp 25 | from metrics import MSE 26 | import pickle 27 | from scipy.io import loadmat 28 | # from GCN.utils import * 29 | # from GCN.models import GCN 30 | from torch_geometric.datasets import Planetoid 31 | from GCN.models2 import Net 32 | 33 | def str2bool(v): 34 | if isinstance(v, bool): 35 | return v 36 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 37 | return True 38 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 39 | return False 40 | else: 41 | raise argparse.ArgumentTypeError('Boolean value expected.') 42 | 43 | 44 | 45 | parser = argparse.ArgumentParser(description="parse args") 46 | # Directories for data/logs 47 | # parser.add_argument("--watch_model", type=str2bool, nargs='?',const=True, default=False) 48 | parser.add_argument("--exp_name", type=str, default="-") 49 | # Dataset and model type 50 | parser.add_argument("-d", "--dataset", type=str, default="synthetic-Branin") 51 | parser.add_argument("--model", type=str, default="DSVGP") 52 | parser.add_argument("-vs", "--variational_strategy", type=str, default="standard", choices=["standard", "CIQ"]) 53 | parser.add_argument("-vd", "--variational_distribution", type=str, default="standard", choices=["standard", "NGD"]) 54 | parser.add_argument("-m", "--num_inducing", type=int, default=10) 55 | parser.add_argument("-p", "--num_directions", type=int, default=10) 56 | parser.add_argument("-n", "--num_epochs", type=int, default=1) 57 | parser.add_argument("-bs", "--batch_size", type=int, default=256) 58 | parser.add_argument("--turbo_batch_size", type=int, default=50) 59 | parser.add_argument("--turbo_max_evals", type=int, default=100) 60 | parser.add_argument("--lr", type=float, default=0.01) 61 | parser.add_argument("--lr_ngd", type=float, default=0.1) 62 | parser.add_argument("--gamma", type=float, default=0.1) 63 | parser.add_argument("--num_contour_quad", type=int, default=15) 64 | parser.add_argument("--lr_sched", type=str, default=None) 65 | parser.add_argument("--mll_type", type=str, default="ELBO", choices=["ELBO", "PLL"]) 66 | parser.add_argument("-s", "--seed", type=int, default=0) 67 | 68 | 69 | args = vars(parser.parse_args()) 70 | 71 | 72 | exp_name = args["exp_name"] 73 | num_epochs = args["turbo_max_evals"] 74 | args["model"] = "SGD" 75 | expname_full = f"{args['dataset']}_{args['model']}_epochs{num_epochs}_exp{exp_name}" 76 | print(expname_full) 77 | 78 | 79 | # output result file names 80 | data_dir = "./results/" 81 | data_filename = data_dir + expname_full + ".pickle" 82 | if os.path.exists(data_dir) is False: 83 | os.mkdir(data_dir) 84 | 85 | 86 | torch.set_default_dtype(torch.float64) 87 | torch.random.manual_seed(args["seed"]) 88 | 89 | def test(data, train=True): 90 | model.eval() 91 | 92 | correct = 0 93 | pred = model(data).max(dim=1)[1] 94 | 95 | if train: 96 | correct += pred[data.train_mask].eq(data.y[data.train_mask]).sum().item() 97 | return correct / (len(data.y[data.train_mask])) 98 | else: 99 | correct += pred[data.test_mask].eq(data.y[data.test_mask]).sum().item() 100 | return correct / (len(data.y[data.test_mask])) 101 | 102 | 103 | def train(data, plot=False): 104 | train_acc_list, test_acc_list, loss_history = list(), list(), list() 105 | for epoch in range(num_epochs): 106 | model.train() 107 | optimizer.zero_grad() 108 | out = model(data) 109 | loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask]) 110 | loss.backward() 111 | optimizer.step() 112 | 113 | train_acc = test(data) 114 | test_acc = test(data, train=False) 115 | train_acc_list.append(train_acc) 116 | test_acc_list.append(test_acc) 117 | loss_history.append(loss.item()) 118 | print('Epoch: {:03d}, Loss: {:.5f}, Train Acc: {:.5f}, Test Acc: {:.5f}'. 119 | format(epoch, loss.item(), train_acc, test_acc)) 120 | return loss_history, train_acc_list, test_acc_list 121 | 122 | 123 | 124 | # load data for GCN 125 | dataset = "PubMed" 126 | assert args["dataset"] == "PubMed" 127 | dataset = Planetoid(root='/tmp/PubMed', name='PubMed') 128 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 129 | turbo_device = 'cuda' if torch.cuda.is_available() else 'cpu' 130 | model = Net(dataset).to(device) 131 | data = dataset[0].to(device) 132 | print("\nDimension of GCN:", model.n_params) 133 | optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4) 134 | loss_history, train_acc_list, test_acc_list = train(data) 135 | 136 | print("\nFinal parameters of GCN:", ) 137 | for name, param in model.named_parameters(): 138 | print(name) 139 | print(param) 140 | print(param.min()) 141 | print(param.max()) 142 | print() 143 | 144 | # dump the data 145 | outdata = {} 146 | outdata['X'] = None 147 | outdata['fX'] = loss_history 148 | outdata['train_acc_list'] = train_acc_list 149 | outdata['test_acc_list'] = test_acc_list 150 | outdata['xopt'] = None 151 | outdata['fopt'] = min(loss_history) 152 | # add the run params 153 | outdata.update(args) 154 | pickle.dump(outdata,open(data_filename,"wb")) 155 | print(f"Dropped file: {data_filename}") 156 | 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /directionalvi/utils/load_data.py: -------------------------------------------------------------------------------- 1 | from synthetic_functions import * 2 | from rescale import * 3 | import scipy.io 4 | import torch 5 | from torch.utils.data import TensorDataset, DataLoader 6 | 7 | def load_synthetic_data(test_fun, n, **kwargs): 8 | """ 9 | load synthetic data 10 | Input: 11 | test_fun: a modified Botorch test function 12 | n: number of datapoints 13 | Output: 14 | x: torch tensor, random data from unit cube 15 | y: torch tensor, normalized and rescaled labels (w/ or w/o derivatives) 16 | """ 17 | torch.random.manual_seed(kwargs["seed"]) 18 | dim = test_fun.dim 19 | x_unit = torch.rand(n,dim) 20 | # evaluate in the true range 21 | lb, ub = test_fun.get_bounds() 22 | x = from_unit_cube(x_unit, lb, ub) 23 | if kwargs["derivative"]: 24 | y = test_fun.evaluate_true_with_deriv(x) 25 | else: 26 | y = test_fun.evaluate_true(x) 27 | # normalize y values (with or without derivatives) 28 | normalize(y, **kwargs) 29 | if kwargs["derivative"]: 30 | # mapping derivative values to unit cube 31 | f = y[..., 0].reshape(len(y),1) 32 | g = y[..., 1:].reshape(len(y),-1) 33 | g *= (ub - lb) 34 | y = torch.cat([f, g], 1) 35 | 36 | # add scaling factors to info_dict for further accurate plot 37 | info_dict = {} 38 | return x_unit, y, info_dict 39 | 40 | #use real_helens when calling in exp_script.py 41 | def load_helens(data_src_path, **args): 42 | """ 43 | load synthetic data 44 | Input: 45 | data_src_path: path to dataset 46 | filter_val: float64 in [0, 1]; code will filter out points which possess x-coordinate > filter_val 47 | Output: 48 | train_dataset: torch TensorDataset 49 | test_dataset: torch TensorDataset 50 | dim: x-dimension of data 51 | """ 52 | torch.random.manual_seed(args["seed"]) 53 | n = args["n_train"] 54 | filter_val = args["filter_val"] 55 | #n_test = args["n_test"] 56 | 57 | # Apply normalizations to dataset 58 | mat = scipy.io.loadmat(data_src_path) 59 | x = torch.tensor(np.float64(mat['mth_points'])).float() 60 | SCALE_0_FACTOR = x[:, 0].max() 61 | SCALE_1_FACTOR = x[:, 1].max() 62 | x[:, 0] = x[:, 0]/SCALE_0_FACTOR 63 | x[:, 1] = x[:, 1]/SCALE_1_FACTOR 64 | y = torch.tensor(np.float64(mat['mth_verts'])).float() 65 | SCALE_Y_FACTOR = max(y) 66 | y = y/SCALE_Y_FACTOR 67 | if args["derivative"]: 68 | dy = torch.tensor(np.float64(mat['mth_grads'])).float() 69 | dy = dy / SCALE_Y_FACTOR #modify derivatives due to y-scaling 70 | dy[:, 0] = dy[:, 0]*SCALE_0_FACTOR #modify derivatives due to x-scaling 71 | dy[:, 1] = dy[:, 1]*SCALE_1_FACTOR 72 | Y = torch.cat((y, dy), dim = 1).float() 73 | else: 74 | Y = y 75 | # FILTERING DATA 76 | # location concatenated with y and dy values, for the sake of filtering 77 | full_data = torch.cat((x, Y), dim=1).float() 78 | temp_full_data = np.array(full_data) 79 | def fun(x, val = filter_val): 80 | if x[0]>val or x[1]>val: 81 | return False 82 | else: 83 | return True 84 | filtered = filter(fun, temp_full_data) 85 | arr = [item for item in filtered] 86 | len_arr = len(arr) 87 | arr = arr[:len_arr] 88 | #recover X and Y from filtered concatenated values (arr) 89 | X = torch.tensor([item[0:2] for item in arr]) 90 | Y = torch.tensor([item[2:] for item in arr]) 91 | Y = Y.squeeze(-1) 92 | # shuffle the data 93 | indices = torch.randperm(X.size(0))[:X.size(0)] 94 | X = X[indices] 95 | Y = Y[indices] 96 | dim = X.shape[-1] 97 | train_x = X[:n, :].contiguous() 98 | train_y = Y[:n].contiguous() 99 | test_x = X[n:, :].contiguous() 100 | test_y = Y[n:].contiguous() 101 | 102 | if torch.cuda.is_available(): 103 | train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 104 | 105 | # dataset = TensorDataset(x, data) 106 | # Train-Test Split 107 | # train_dataset, test_dataset = torch.utils.data.random_split(dataset, [n, len_arr - n])#, generator=torch.Generator().manual_seed(42)) 108 | # dim = len(train_dataset[0][0]) 109 | info_dict = {"SCALE_x0_FACTOR": SCALE_0_FACTOR.item(), 110 | "SCALE_x1_FACTOR": SCALE_1_FACTOR.item(), 111 | "SCALE_Y_FACTOR": SCALE_Y_FACTOR[0].item(), 112 | "n_train":n, 113 | "n_test": len_arr - n} 114 | return train_x, train_y, test_x, test_y, dim, info_dict 115 | 116 | 117 | def load_3droad(data_src_path, **args): 118 | data = torch.Tensor(scipy.io.loadmat(data_src_path)['data']) 119 | X = data[:, :-2] 120 | X = X - X.min(0)[0] 121 | X = 2 * (X / X.max(0)[0]) - 1 122 | y = data[:, -1] 123 | y.sub_(y.mean(0)).div_(y.std(0)) 124 | 125 | # shuffle the data 126 | torch.random.manual_seed(args["seed"]) 127 | indices = torch.randperm(X.size(0))[:326155] 128 | X = X[indices] 129 | y = y[indices] 130 | dim = X.shape[-1] 131 | 132 | train_n = args["n_train"] 133 | # train_n = int(floor(0.8 * len(X))) 134 | train_x = X[:train_n, :].contiguous() 135 | train_y = y[:train_n].contiguous() 136 | 137 | test_x = X[train_n:, :].contiguous() 138 | test_y = y[train_n:].contiguous() 139 | 140 | if torch.cuda.is_available(): 141 | train_x, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 142 | 143 | train_dataset = TensorDataset(train_x, train_y) 144 | test_dataset = TensorDataset(test_x, test_y) 145 | 146 | info_dict = {"n_train":train_n, 147 | "n_test": len(X) - train_n} 148 | 149 | return train_x, train_y, test_x, test_y, dim, info_dict -------------------------------------------------------------------------------- /experiments/GNN_bo/GCN/train.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | 4 | import time 5 | import argparse 6 | import numpy as np 7 | 8 | import torch 9 | import torch.nn.functional as F 10 | import torch.optim as optim 11 | from torch.utils.data import DataLoader, TensorDataset 12 | 13 | from utils import * 14 | from models import GCN 15 | try: # import wandb if watch model on weights&biases 16 | import wandb 17 | except: 18 | pass 19 | 20 | # Training settings 21 | parser = argparse.ArgumentParser() 22 | parser.add_argument('--no-cuda', action='store_true', default=False, 23 | help='Disables CUDA training.') 24 | parser.add_argument('--fastmode', action='store_true', default=False, 25 | help='Validate during training pass.') 26 | parser.add_argument('--dataset', type=str, default='cora', help='dataset name') 27 | parser.add_argument('--seed', type=int, default=42, help='Random seed.') 28 | parser.add_argument('--epochs', type=int, default=200, 29 | help='Number of epochs to train.') 30 | parser.add_argument('--lr', type=float, default=0.01, 31 | help='Initial learning rate.') 32 | parser.add_argument('--weight_decay', type=float, default=5e-4, 33 | help='Weight decay (L2 loss on parameters).') 34 | parser.add_argument('--hidden', type=int, default=16, 35 | help='Number of hidden units.') 36 | parser.add_argument('--dropout', type=float, default=0.5, 37 | help='Dropout rate (1 - keep probability).') 38 | parser.add_argument('--n_train', type=int, default=10000, 39 | help='number of training data') 40 | parser.add_argument('--batch_size', type=int, default=1024, 41 | help='batch size') 42 | parser.add_argument('--train_percent', type=float, default=0.1, 43 | help='training label percentage') 44 | parser.add_argument('--watch_model', type=bool, default=False, 45 | help='watch model from wandb') 46 | parser.add_argument('--expid', type=int, default="-", 47 | help='experiment id') 48 | parser.add_argument('--lr_sched', type=str, default="step_lr", 49 | help='type of learning rate scheduler') 50 | 51 | args = parser.parse_args() 52 | args.cuda = not args.no_cuda and torch.cuda.is_available() 53 | 54 | if args.watch_model: # watch model on weights&biases 55 | wandb.init(project='L2C', entity='xinranzhu', 56 | name=f"{args.dataset}_exp{args.expid}") 57 | print("Experiment settings:") 58 | print(args) 59 | wandb.config.seed = args.seed 60 | wandb.config.dropout = args.dropout 61 | wandb.config.epochs = args.epochs 62 | wandb.config.lr = args.lr 63 | wandb.config.weight_decay = args.weight_decay 64 | wandb.config.hidden = args.hidden 65 | wandb.config.train_percent = args.train_percent 66 | wandb.config.expid = args.expid 67 | 68 | 69 | np.random.seed(args.seed) 70 | torch.manual_seed(args.seed) 71 | if args.cuda: 72 | torch.cuda.manual_seed(args.seed) 73 | 74 | # Load data 75 | if args.dataset == "cora": 76 | adj, features, labels, idx_train, idx_val, idx_test = load_data(dataset=args.dataset, 77 | train_percent=args.train_percent) 78 | if args.dataset == "reddit": 79 | adj, features, labels, idx_train, idx_val, idx_test = load_reddit(args.n_train) 80 | 81 | if args.dataset == "citeseer": 82 | adj, features, labels, idx_train, idx_val, idx_test = load_citeseer(train_percent=args.train_percent) 83 | 84 | # Model and optimizer 85 | model = GCN(nfeat=features.shape[1], 86 | nhid=args.hidden, 87 | nclass=labels.max().item() + 1, 88 | dropout=args.dropout) 89 | optimizer = optim.Adam(model.parameters(), 90 | lr=args.lr, weight_decay=args.weight_decay) 91 | 92 | if args.cuda: 93 | model.cuda() 94 | features = features.cuda() 95 | adj = adj.cuda() 96 | labels = labels.cuda() 97 | idx_train = idx_train.cuda() 98 | idx_val = idx_val.cuda() 99 | idx_test = idx_test.cuda() 100 | 101 | if args.lr_sched == "step_lr": 102 | milestones = [int(len(idx_train)/3), int(2*len(idx_train)/3)] 103 | lr_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=0.1) 104 | elif args.lr_sched == "lambda_lr": 105 | lr_sched_fun = lambda epoch: 1.0/(epoch+1) 106 | lr_scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lr_sched_fun) 107 | else: 108 | lr_scheduler = None 109 | 110 | def train(epoch, print_loss=True, lr_scheduler=None): 111 | t = time.time() 112 | model.train() 113 | optimizer.zero_grad() 114 | output = model(features, adj) 115 | loss_train = F.nll_loss(output[idx_train], labels[idx_train]) 116 | wandb.log({'loss': loss_train}) 117 | acc_train = accuracy(output[idx_train], labels[idx_train]) 118 | loss_train.backward() 119 | optimizer.step() 120 | if lr_scheduler != None: 121 | variational_scheduler.step() 122 | if not args.fastmode: 123 | # Evaluate validation set performance separately, 124 | # deactivates dropout during validation run. 125 | model.eval() 126 | output = model(features, adj) 127 | 128 | loss_val = F.nll_loss(output[idx_val], labels[idx_val]) 129 | acc_val = accuracy(output[idx_val], labels[idx_val]) 130 | if print_loss: 131 | print('Epoch: {:04d}'.format(epoch+1), 132 | 'loss_train: {:.4f}'.format(loss_train.item()), 133 | 'acc_train: {:.4f}'.format(acc_train.item()), 134 | 'loss_val: {:.4f}'.format(loss_val.item()), 135 | 'acc_val: {:.4f}'.format(acc_val.item()), 136 | 'time: {:.4f}s'.format(time.time() - t)) 137 | 138 | 139 | def test(): 140 | model.eval() 141 | output = model(features, adj) 142 | loss_test = F.nll_loss(output[idx_test], labels[idx_test]) 143 | acc_test = accuracy(output[idx_test], labels[idx_test]) 144 | print("Test set results:", 145 | "loss= {:.4f}".format(loss_test.item()), 146 | "accuracy= {:.4f}".format(acc_test.item())) 147 | 148 | 149 | # Train model 150 | t_total = time.time() 151 | for epoch in range(args.epochs): 152 | print_loss = True if epoch % 50 == 0 else False 153 | train(epoch, print_loss=print_loss) 154 | print("Optimization Finished!") 155 | print("Total time elapsed: {:.4f}s".format(time.time() - t_total)) 156 | 157 | # Testing 158 | test() 159 | wandb.save(f"a.out_{args.dataset}exp{args.expid}") 160 | 161 | -------------------------------------------------------------------------------- /directionalvi/RBFKernelDirectionalGrad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import torch 3 | 4 | from gpytorch.lazy.kronecker_product_lazy_tensor import KroneckerProductLazyTensor 5 | from gpytorch.kernels.rbf_kernel import RBFKernel, postprocess_rbf 6 | 7 | 8 | class RBFKernelDirectionalGrad(RBFKernel): 9 | r""" 10 | Pass in v1 and v2 through the params. If v1 has n_dir1 directions per 11 | point in x2 then it should be shape n1*n_dir1 x dim. The directions 12 | are assumed to be stored in blocks so that the first n_dir1 directions 13 | belong to x1[0] and the second n_dir1 directions belong to x1[1] etc. 14 | 15 | If you have a single set of global directions such as torch.eye(dim), then 16 | you can repeat those to make v1 and v2 with 17 | v1 = torch.eye(dim).repeat(n1,1) 18 | 19 | Args: 20 | :attr:`batch_shape` (torch.Size, optional): 21 | Set this if you want a separate lengthscale for each 22 | batch of input data. It should be `b` if :attr:`x1` is a `b x n x d` tensor. Default: `torch.Size([])`. 23 | :attr:`active_dims` (tuple of ints, optional): 24 | Set this if you want to compute the covariance of only a few input dimensions. The ints 25 | corresponds to the indices of the dimensions. Default: `None`. 26 | :attr:`lengthscale_prior` (Prior, optional): 27 | Set this if you want to apply a prior to the lengthscale parameter. Default: `None`. 28 | :attr:`lengthscale_constraint` (Constraint, optional): 29 | Set this if you want to apply a constraint to the lengthscale parameter. Default: `Positive`. 30 | :attr:`eps` (float): 31 | The minimum value that the lengthscale can take (prevents divide by zero errors). Default: `1e-6`. 32 | 33 | Attributes: 34 | :attr:`lengthscale` (Tensor): 35 | The lengthscale parameter. Size/shape of parameter depends on the 36 | :attr:`ard_num_dims` and :attr:`batch_shape` arguments. 37 | 38 | 39 | """ 40 | 41 | def forward(self, x1, x2, diag=False, **params): 42 | batch_shape = x1.shape[:-2] 43 | n_batch_dims = len(batch_shape) 44 | n1, d = x1.shape[-2:] 45 | n2 = x2.shape[-2] 46 | 47 | 48 | v1 = params['v1'] 49 | v2 = params['v2'] 50 | # number of directions per point 51 | n_dir1 = int(v1.shape[-2]/n1) 52 | n_dir2 = int(v2.shape[-2]/n2) 53 | assert n_dir1 == n_dir2, "v1 and v2 must contain same number of directions" 54 | 55 | self.set_num_directions(n_dir1) 56 | # normalize directions 57 | v1 = (v1.T/torch.norm(v1,dim=1)).T 58 | v2 = (v2.T/torch.norm(v2,dim=1)).T 59 | 60 | # K = torch.zeros(*batch_shape, n1 * (d + 1), n2 * (d + 1), device=x1.device, dtype=x1.dtype) 61 | K = torch.zeros(*batch_shape, n1 * (n_dir1 + 1), n2 * (n_dir2 + 1), device=x1.device, dtype=x1.dtype) 62 | K = torch.zeros(*batch_shape, n1 * (n_dir1 + 1), n2 * (n_dir2 + 1), device=x1.device, dtype=x1.dtype) 63 | 64 | 65 | if not diag: 66 | # Scale the inputs by the lengthscale (for stability) 67 | x1_ = x1.div(self.lengthscale) 68 | x2_ = x2.div(self.lengthscale) 69 | 70 | # 1) Kernel block 71 | diff = self.covar_dist(x1_, x2_, square_dist=True, dist_postprocess_func=postprocess_rbf, **params) 72 | K_11 = diff 73 | K[..., :n1, :n2] = K_11 74 | 75 | 76 | # 2) First gradient block 77 | x2_v2 = x2_.reshape(n2,1,d).bmm(torch.transpose(v2.reshape(n2,n_dir2,d),-2,-1)) 78 | x1_v2 = x1_ @ v2.T 79 | outer = x1_v2 - x2_v2.flatten() 80 | # permute cols so we get blocks for v1,v2,v3,... 81 | pi1 = torch.arange(n2 * (n_dir2)).view(n2,n_dir2).t().reshape((n2 * (n_dir2))) 82 | outer1 = outer[:,pi1]/ self.lengthscale.unsqueeze(-2) 83 | K[..., :n1, n2:] = outer1 * K_11.repeat([*([1] * (n_batch_dims + 1)), n_dir2]) 84 | 85 | # Second gradient block 86 | x1_v1 = x1_.reshape(n1,1,d).bmm(torch.transpose(v1.reshape(n1,n_dir1,d),-2,-1)) 87 | x2_v1 = x2_ @ v1.T 88 | outer = x1_v1.flatten() - x2_v1 89 | # permute cols so we get blocks for v1,v2,v3,... 90 | pi2 = torch.arange(n1 * (n_dir1)).view(n1,n_dir1).t().reshape((n1 * (n_dir1))) 91 | outer2 = outer[:,pi2] 92 | outer2 = outer2.t() / self.lengthscale.unsqueeze(-2) 93 | K[..., n1:, :n2] = -outer2 * K_11.repeat([n_dir1,*([1] * (n_batch_dims + 1))]) 94 | 95 | 96 | # 4) Hessian block (n1*n_dir1, n2*n_dir2) 97 | outer3 = outer1.repeat(1, n_dir2, 1) * outer2.repeat(1,1,n_dir1) 98 | # kronecker product term 99 | kp = v1 @ v2.T / self.lengthscale.pow(2) 100 | kp = kp[:,pi1][pi2,:] 101 | chain_rule = kp - outer3 102 | K[..., n1:, n2:] = chain_rule * K_11.repeat([*([1] * n_batch_dims), n_dir1,n_dir2]) 103 | 104 | # Apply a perfect shuffle permutation to match the MutiTask ordering 105 | pi1 = torch.arange(n1 * (n_dir1 + 1)).view(n_dir1 + 1, n1).t().reshape((n1 * (n_dir1 + 1))) 106 | pi2 = torch.arange(n2 * (n_dir2 + 1)).view(n_dir2 + 1, n2).t().reshape((n2 * (n_dir2 + 1))) 107 | K = K[..., pi1, :][..., :, pi2] 108 | return K 109 | 110 | else: 111 | if not (n1 == n2 and torch.eq(x1, x2).all() and n_dir1 == n_dir2 and torch.eq(v1, v2).all()): 112 | raise RuntimeError("diag=True only works when x1 == x2 and v1 == v2") 113 | 114 | kernel_diag = super(RBFKernelDirectionalGrad, self).forward(x1, x2, diag=True) 115 | grad_diag = torch.ones(*batch_shape, n2, n_dir2, device=x1.device, dtype=x1.dtype) / self.lengthscale.pow(2) 116 | grad_diag = grad_diag.transpose(-1, -2).reshape(*batch_shape, n2 * n_dir2) 117 | k_diag = torch.cat((kernel_diag, grad_diag), dim=-1) 118 | pi = torch.arange(n2 * (n_dir2 + 1)).view(n_dir2 + 1, n2).t().reshape((n2 * (n_dir2 + 1))) 119 | return k_diag[..., pi] 120 | 121 | def set_num_directions(self,num_directions): 122 | self.n_dir1 = num_directions 123 | 124 | def num_outputs_per_input(self, x1, x2): 125 | return self.n_dir1 +1 126 | 127 | 128 | 129 | if __name__ == '__main__': 130 | 131 | torch.manual_seed(0) 132 | # generate training data 133 | n1 = 100 134 | n2 = n1 135 | dim = 2 136 | train_x = torch.rand(n1,dim) 137 | # train_x2 = torch.rand(n2,dim) 138 | train_x2 = train_x 139 | # set directions 140 | n_directions = 2 141 | # v1 = torch.eye(dim)[:n_directions] 142 | v1 = torch.rand(n_directions,dim) 143 | v1 = v1.repeat(n1,1) 144 | # v2 = torch.eye(dim)[:n_directions] 145 | # v2 = torch.rand(n_directions,dim) 146 | # v2 = v2.repeat(n2,1) 147 | v2 = v1 148 | v1 = (v1.T/torch.norm(v1,dim=1)).T 149 | v2 = (v2.T/torch.norm(v2,dim=1)).T 150 | 151 | k = RBFKernelDirectionalGrad() 152 | params = {'v1':v1,'v2':v2} 153 | K = k(train_x,train_x2, **params) 154 | print(K.detach().numpy().shape) 155 | 156 | # torch.cholesky(K.add_jitter().evaluate()) 157 | # verify against RBFKernelGrad 158 | # from gpytorch.kernels import RBFKernelGrad 159 | # kk = RBFKernelGrad() 160 | # KK = kk(train_x,train_x2) 161 | # print(KK.detach().numpy() - K.detach().numpy()) 162 | -------------------------------------------------------------------------------- /experiments/GNN_bo/plot_traj.py: -------------------------------------------------------------------------------- 1 | from sys import meta_path 2 | import matplotlib.pyplot as plt 3 | import matplotlib.pylab as pl 4 | # import seaborn as sns 5 | import pandas as pd 6 | import pickle 7 | import numpy as np 8 | import glob 9 | import os 10 | 11 | ADD_SHARED=True 12 | FONTSIZE=25 13 | MARKERSIZE=20 14 | FIGURESUZE=(10,7) 15 | ALPHA=0.2 16 | LINEWIDTH=5 17 | PADDING=0.1 18 | 19 | if ADD_SHARED: 20 | style_dict = {"SGD": ["GD", "dotted", '#2ca02c'], 21 | "TURBO": ["TuRBO", "dashed", '#ff7f0e'], 22 | "DSVGP1": ["TuRBO-DPPGPR1", "solid", '#1f77b4'], 23 | "DSVGP2": ["TuRBO-DPPGPR2", "solid", '#d62728'], 24 | "DSVGP3": ["TuRBO-DPPGPR3", "solid", '#e377c2'], 25 | "DSVGP_shared1": ["TuRBO-DPPGPR-Shared1", "dotted", '#1f77b4'], 26 | "DSVGP_shared2": ["TuRBO-DPPGPR-Shared2", "dotted", '#d62728'], 27 | "DSVGP_shared3": ["TuRBO-DPPGPR-Shared3", "dotted", '#e377c2'], 28 | "SVGP": ["TuRBO-PPGPR", "dashed", '#9467bd'], 29 | "BO": ["BO", "dashed", '#8c564b'], 30 | "random": ["Random", "dotted", "#7f7f7f"] } 31 | else: 32 | style_dict = {"SGD": ["GD", "dotted", '#2ca02c'], 33 | "TURBO": ["TuRBO", "dashed", '#ff7f0e'], 34 | "DSVGP1": ["TuRBO-DPPGPR1", "solid", '#1f77b4'], 35 | "DSVGP2": ["TuRBO-DPPGPR2", "solid", '#d62728'], 36 | "DSVGP3": ["TuRBO-DPPGPR3", "solid", '#e377c2'], 37 | "SVGP": ["TuRBO-PPGPR", "dashed", '#9467bd'], 38 | "BO": ["BO", "dashed", '#8c564b'], 39 | "random": ["Random", "dotted", "#7f7f7f"] } 40 | 41 | 42 | def plot_average(style_dict, dataset, methods_list, data_type='fX', deleted_methods=None): 43 | assert data_type == 'fX' or data_type == 'train_acc_list' or data_type == 'test_acc_list' 44 | 45 | # sort to fix color for each method 46 | colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] 47 | 48 | # collect data for each method 49 | data_files_dict = {} 50 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=FIGURESUZE) 51 | 52 | for i, method in enumerate(methods_list): 53 | data_files_dict[method] = glob.glob(f"./results/{dataset}_{method}*.pickle") 54 | fX_set = [] 55 | for ii in range(len(data_files_dict[method])): 56 | ff = data_files_dict[method][ii] 57 | d = pickle.load(open(ff, "rb")) 58 | if method == "TURBO" or method == "SVGP": 59 | assert d['model'] == method 60 | elif method == "DSVGP1" or method == "DSVGP_shared1": 61 | assert d['model'].startswith("DSVGP") and d['num_directions'] == 1 62 | elif method == "DSVGP2" or method == "DSVGP_shared2": 63 | assert d['model'].startswith("DSVGP") and d['num_directions'] == 2 64 | elif method == "DSVGP3" or method == "DSVGP_shared3": 65 | assert d['model'].startswith("DSVGP") and d['num_directions'] == 3 66 | 67 | fX = d[data_type] 68 | fXmin = np.minimum.accumulate(fX) if data_type == 'fX' else np.maximum.accumulate(fX) 69 | fX_set.append(fXmin) 70 | 71 | print(f"Averaging {len(fX_set)} trials for {method}\n") 72 | 73 | if deleted_methods == None or method not in deleted_methods: 74 | label_cur = style_dict[method][0] 75 | linestyle_cur = style_dict[method][1] 76 | color_cur = style_dict[method][2] 77 | # find mean and std of fX_set 78 | if len(fX_set) > 1: 79 | fX_mean = np.mean(fX_set, axis=0) 80 | ax.plot(fX_mean,linewidth=LINEWIDTH, 81 | color=color_cur,label=label_cur,linestyle=linestyle_cur) 82 | fX_std = np.std(fX_set, axis=0, ddof=0) 83 | ax.fill_between(range(len(fX_mean)), fX_mean-fX_std, fX_mean+fX_std, color=color_cur, alpha=ALPHA) 84 | elif len(fX_set) == 1: 85 | ax.plot(fX_set[0],linewidth=LINEWIDTH, 86 | color=color_cur,label=label_cur,linestyle=linestyle_cur) 87 | 88 | 89 | 90 | 91 | # plot 92 | rc = {'figure.figsize':(12,6), 93 | 'axes.facecolor':'white', 94 | 'axes.grid' : True, 95 | 'grid.color': '.8', 96 | 'font.family':'Times New Roman', 97 | 'font.size' : FONTSIZE} 98 | plt.rcParams.update(rc) 99 | if data_type == 'fX': 100 | ylabel = 'Training loss' 101 | plt.ylim( (0.65e-2,1.35e2) ) 102 | plt.yscale("log") 103 | elif data_type == 'test_acc_list': 104 | ylabel = 'Test accuracy' 105 | elif data_type == 'train_acc_list': 106 | plt.ylim((0.28, 1.05)) 107 | ylabel = 'Training accuracy' 108 | 109 | plt.xticks(fontsize=FONTSIZE) 110 | plt.yticks(fontsize=FONTSIZE) 111 | plt.ylabel(ylabel, fontsize=FONTSIZE) 112 | plt.xlabel("Number of evaluations", fontsize=FONTSIZE) 113 | # box = plt.get_position() 114 | # plt.set_position([box.x0, box.y0, box.width * 0.8, box.height]) 115 | # plt.legend(loc='best',prop={'size': fontsize}) 116 | 117 | plt.grid() 118 | # plt.legend(bbox_to_anchor=(1.04,1), loc="upper left") 119 | 120 | if ADD_SHARED: 121 | figurename = f"TuRBO_{dataset}_{data_type}_add_shared.pdf" 122 | else: 123 | figurename = f"TuRBO_{dataset}_{data_type}.pdf" 124 | figurepath = os.path.abspath(__file__ + "/../plots/" + figurename) 125 | fig.savefig(figurepath, bbox_inches = 'tight', pad_inches = PADDING) 126 | print("Figure saved:", figurepath) 127 | 128 | def plot_legend(style_dict, dataset, methods_list): 129 | plt.clf() 130 | plt.cla() 131 | 132 | if ADD_SHARED: 133 | figurename = f"TuRBO_{dataset}_legend_add_shared.pdf" 134 | else: 135 | figurename = f"TuRBO_{dataset}_legend.pdf" 136 | figurepath = os.path.abspath(__file__ + "/../plots/" + figurename) 137 | 138 | color_set = [style_dict[method][2] for method in methods_list] 139 | linestyle_set = [style_dict[method][1] for method in methods_list] 140 | label_set = [style_dict[method][0] for method in methods_list] 141 | ncol = len(style_dict)//2 142 | 143 | fig, ax = plt.subplots(figsize=(10,8)) 144 | f = lambda ls,c,label: ax.plot([],[], linewidth=LINEWIDTH+1, linestyle=ls, color=c, label=label)[0] 145 | handles = [f(linestyle_set[i], color_set[i], label_set[i]) for i in range(len(methods_list))] 146 | #legend 147 | LABEL_SIZE=15 148 | figsize = (5, 1) 149 | fig_leg = plt.figure(figsize=figsize) 150 | legend_properties = {'weight': 'bold', 'size': LABEL_SIZE} 151 | ax_leg = fig_leg.add_subplot(111) 152 | 153 | ax_leg.set_facecolor('white') 154 | ax_leg.grid(False) 155 | ax_leg.set_axis_off() 156 | ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=ncol, prop=legend_properties, facecolor="white", edgecolor="grey") 157 | fig_leg.savefig(figurepath, bbox_inches = 'tight') 158 | 159 | print("Figure saved:", figurepath) 160 | 161 | if __name__ == "__main__": 162 | 163 | # dataset="squared" 164 | # methods_list = ["BO", "random"] 165 | 166 | dataset="PubMed" 167 | if ADD_SHARED: 168 | methods_list = ["random", "SGD", "BO", "TURBO", "SVGP", 169 | "DSVGP1", "DSVGP2", "DSVGP3", 170 | "DSVGP_shared1", "DSVGP_shared2", "DSVGP_shared3"] 171 | else: 172 | methods_list = ["random", "SGD", "BO", "TURBO", "SVGP", 173 | "DSVGP1", "DSVGP2", "DSVGP3"] 174 | 175 | data_type='fX' 176 | # data_type='train_acc_list' 177 | plot_average(style_dict, dataset, methods_list, data_type, deleted_methods=None) 178 | plot_legend(style_dict, dataset, methods_list) 179 | 180 | 181 | 182 | -------------------------------------------------------------------------------- /experiments/uci_dfree/test.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from scipy.io import loadmat 4 | import torch 5 | import gpytorch 6 | import random 7 | import time 8 | from matplotlib import pyplot as plt 9 | from torch.utils.data import TensorDataset, DataLoader 10 | import os 11 | import sys 12 | sys.path.append("../") 13 | sys.path.append("../../directionalvi/utils") 14 | sys.path.append("../../directionalvi") 15 | import dfree_directional_vi 16 | import traditional_vi 17 | from csv_dataset import csv_dataset 18 | from metrics import MSE 19 | import pickle 20 | 21 | 22 | # load a pickle with the run params 23 | args = sys.argv 24 | param_filename = args[1] 25 | run_params = pickle.load(open(param_filename,"rb")) 26 | num_inducing =run_params['num_inducing'] 27 | num_directions =run_params['num_directions'] 28 | minibatch_size =run_params['minibatch_size'] 29 | num_epochs =run_params['num_epochs'] 30 | verbose =run_params['verbose'] 31 | inducing_data_initialization =run_params['inducing_data_initialization'] 32 | use_ngd =run_params['use_ngd'] 33 | use_ciq =run_params['use_ciq'] 34 | num_contour_quadrature= run_params['num_contour_quadrature'] 35 | learning_rate_hypers = run_params['learning_rate_hypers'] 36 | learning_rate_ngd = run_params['learning_rate_ngd'] 37 | lr_gamma = run_params['lr_gamma'] 38 | lr_benchmarks = run_params['lr_benchmarks'] 39 | lr_sched = run_params['lr_sched'] 40 | mll_type = run_params['mll_type'] 41 | seed = run_params['seed'] 42 | base_name = run_params['base_name'] 43 | data_file = run_params['data_file'] 44 | mode = run_params['mode'] 45 | 46 | # make the learning rate schedule 47 | assert lr_sched in [None, "MultiStepLR", "LambdaLR"], "Not a valid choice of lr_sched" 48 | if lr_sched is None: 49 | pass 50 | elif lr_sched == "MultiStepLR": 51 | def lr_sched(epoch): 52 | a = np.sum(lr_benchmarks < epoch) 53 | # lr_gamma should be > 1 54 | return (lr_gamma)**a 55 | elif lr_sched == "LambdaLR": 56 | lr_sched = lambda epoch: 1./(1+lr_gamma*epoch) 57 | 58 | # set the seed 59 | torch.random.manual_seed(seed) 60 | 61 | # output file names 62 | #data_dir = "./output/" 63 | data_dir = run_params['data_dir'] 64 | model_filename = data_dir + "model_"+ base_name + ".model" 65 | data_filename = data_dir + "data_" + base_name + ".pickle" 66 | if os.path.exists(data_dir) is False: 67 | os.mkdir(data_dir) 68 | 69 | # load data 70 | ff = loadmat(data_file) 71 | X_data = torch.tensor(ff['data'][:,:-1]) # @Leo double check this is right 72 | y_data = torch.tensor(ff['data'][:,-1]) # @Leo double check this is right 73 | n, dim = X_data.shape 74 | 75 | # make sure right type 76 | X_data =X_data.float() 77 | y_data =y_data.float() 78 | 79 | # standardize data 80 | lb = torch.min(X_data,axis=0)[0] 81 | ub = torch.max(X_data,axis=0)[0] 82 | X_data = (X_data - lb)/(ub-lb) 83 | med = torch.median(y_data) 84 | std = torch.std(y_data) 85 | y_data = (y_data - med)/std 86 | 87 | # make a torch dataset 88 | dataset = TensorDataset(X_data,y_data) 89 | 90 | # train-test split 91 | n_train = int(0.8*n) 92 | n_test = n - n_train 93 | train_dataset,test_dataset = torch.utils.data.random_split(dataset,[n_train,n_test]) 94 | 95 | # make dataloaders 96 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 97 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False) 98 | 99 | 100 | if mode == "DSVGP": 101 | # train 102 | print("\n\n---DirectionalGradVGP---") 103 | print(f"Start training with {n} trainig data of dim {dim}") 104 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 105 | t1 = time.time() 106 | model,likelihood = dfree_directional_vi.train_gp(train_dataset, 107 | num_inducing=num_inducing, 108 | num_directions=num_directions, 109 | minibatch_size = minibatch_size, 110 | minibatch_dim = num_directions, 111 | num_epochs =num_epochs, 112 | learning_rate_hypers=learning_rate_hypers, 113 | learning_rate_ngd=learning_rate_ngd, 114 | inducing_data_initialization=inducing_data_initialization, 115 | use_ngd = use_ngd, 116 | use_ciq = use_ciq, 117 | lr_sched=lr_sched, 118 | mll_type=mll_type, 119 | num_contour_quadrature=num_contour_quadrature, 120 | verbose=verbose, 121 | ) 122 | t2 = time.time() 123 | train_time = t2 - t1 124 | 125 | # save the model 126 | torch.save(model.state_dict(),model_filename) 127 | 128 | # test 129 | means, variances = dfree_directional_vi.eval_gp(test_dataset,model,likelihood, 130 | num_directions=num_directions, 131 | minibatch_size=minibatch_size, 132 | minibatch_dim=num_directions) 133 | t3 = time.time() 134 | test_time = t3 - t2 135 | 136 | elif mode == "SVGP": 137 | # train 138 | print("\n\n---Traditional SVGP---") 139 | print(f"Start training with {n} training data of dim {dim}") 140 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 141 | t1 = time.time() 142 | model,likelihood = traditional_vi.train_gp(train_dataset,dim, 143 | num_inducing=num_inducing, 144 | minibatch_size=minibatch_size, 145 | num_epochs=num_epochs, 146 | use_ngd=use_ngd, 147 | use_ciq=use_ciq, 148 | learning_rate_hypers=learning_rate_hypers, 149 | learning_rate_ngd=learning_rate_ngd, 150 | lr_sched=lr_sched, 151 | num_contour_quadrature=num_contour_quadrature, 152 | mll_type=mll_type, 153 | verbose=verbose) 154 | t2 = time.time() 155 | train_time = t2 - t1 156 | 157 | # save the model 158 | torch.save(model.state_dict(),model_filename) 159 | 160 | # test 161 | means, variances = traditional_vi.eval_gp(test_dataset,model,likelihood, 162 | num_inducing=num_inducing, 163 | minibatch_size=n_test) 164 | t3 = time.time() 165 | test_time = t3 - t2 166 | 167 | 168 | # collect the test function values 169 | test_f = torch.zeros(n_test) 170 | for ii in range(n_test): 171 | test_f[ii] = test_dataset[ii][1] # function value 172 | 173 | # compute MSE 174 | test_mse = MSE(test_f,means) 175 | # compute mean negative predictive density 176 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_f).mean() 177 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.") 178 | print(f"Training time: {train_time:.2f} sec, testing time: {test_time:.2f} sec") 179 | 180 | # dump the data 181 | outdata = {} 182 | outdata['test_mse'] = test_mse 183 | outdata['test_nll'] = test_nll 184 | outdata['train_time'] = train_time 185 | outdata['test_time'] = test_time 186 | # add the run params 187 | outdata.update(run_params) 188 | pickle.dump(outdata,open(data_filename,"wb")) 189 | print(f"Dropped file: {data_filename}") 190 | -------------------------------------------------------------------------------- /directionalvi/traditional_vi.py: -------------------------------------------------------------------------------- 1 | from gpytorch.models import ApproximateGP 2 | from gpytorch.variational import CholeskyVariationalDistribution 3 | from gpytorch.variational import VariationalStrategy 4 | from torch.utils.data import TensorDataset, DataLoader 5 | import math 6 | import time 7 | import torch 8 | import sys 9 | import gpytorch 10 | from matplotlib import pyplot as plt 11 | import numpy as np 12 | from utils.count_params import count_params 13 | try: # import wandb if watch model on weights&biases 14 | import wandb 15 | except: 16 | pass 17 | 18 | 19 | class GPModel(ApproximateGP): 20 | def __init__(self, inducing_points,**kwargs): 21 | if "variational_distribution" in kwargs and kwargs["variational_distribution"] == "NGD": 22 | variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0)) 23 | else: 24 | variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0)) 25 | if "variational_strategy" in kwargs and kwargs["variational_strategy"] == "CIQ": 26 | variational_strategy = gpytorch.variational.CiqVariationalStrategy( 27 | self, inducing_points, variational_distribution, learn_inducing_locations=True) 28 | else: 29 | variational_strategy = VariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) 30 | super(GPModel, self).__init__(variational_strategy) 31 | self.mean_module = gpytorch.means.ConstantMean() 32 | self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernel()) 33 | 34 | def forward(self, x): 35 | mean_x = self.mean_module(x) 36 | covar_x = self.covar_module(x) 37 | return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) 38 | 39 | def train_gp(train_dataset,dim,num_inducing=128, 40 | minibatch_size=1, 41 | num_epochs=1, 42 | use_ngd=False, 43 | use_ciq=False, 44 | learning_rate_hypers=0.01, 45 | learning_rate_ngd=0.1, 46 | lr_sched=None, 47 | mll_type="ELBO", 48 | num_contour_quadrature=15, 49 | watch_model=False,gamma=0.1, 50 | verbose=True, 51 | **args): 52 | 53 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 54 | n_samples = len(train_dataset) 55 | 56 | # setup model 57 | # inducing_points = train_x[:num_inducing, :] 58 | inducing_points = torch.rand(num_inducing,dim) 59 | if torch.cuda.is_available(): 60 | inducing_points = inducing_points.cuda() 61 | 62 | if use_ciq: 63 | gpytorch.settings.num_contour_quadrature(num_contour_quadrature) 64 | model = GPModel(inducing_points=inducing_points,variational_distribution="NGD",variational_strategy="CIQ") 65 | elif use_ngd: 66 | model = GPModel(inducing_points=inducing_points,variational_distribution="NGD") 67 | else: 68 | model = GPModel(inducing_points=inducing_points) 69 | likelihood = gpytorch.likelihoods.GaussianLikelihood() 70 | 71 | if torch.cuda.is_available(): 72 | model = model.cuda() 73 | likelihood = likelihood.cuda() 74 | if watch_model: 75 | wandb.watch(model) 76 | 77 | model.train() 78 | likelihood.train() 79 | 80 | if verbose: 81 | param_total_dim = count_params(model,likelihood) 82 | 83 | # optimizers 84 | if use_ngd or use_ciq: 85 | variational_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data=n_samples, lr=learning_rate_ngd) 86 | hyperparameter_optimizer = torch.optim.Adam([ 87 | {'params': model.hyperparameters()}, 88 | {'params': likelihood.parameters()}, 89 | ], lr=learning_rate_hypers) 90 | else: 91 | variational_optimizer = torch.optim.Adam([ 92 | {'params': model.variational_parameters()}, 93 | ], lr=learning_rate_hypers) 94 | hyperparameter_optimizer = torch.optim.Adam([ 95 | {'params': model.hyperparameters()}, 96 | {'params': likelihood.parameters()}, 97 | ], lr=learning_rate_hypers) 98 | 99 | # learning rate scheduler 100 | #lambda1 = lambda epoch: 1.0/(1 + epoch) 101 | if lr_sched == "step_lr": 102 | num_batches = int(np.ceil(n_samples/minibatch_size)) 103 | milestones = [int(num_epochs*num_batches/3), int(2*num_epochs*num_batches/3)] 104 | hyperparameter_scheduler = torch.optim.lr_scheduler.MultiStepLR(hyperparameter_optimizer, milestones, gamma=gamma) 105 | variational_scheduler = torch.optim.lr_scheduler.MultiStepLR(variational_optimizer, milestones, gamma=gamma) 106 | elif lr_sched is None: 107 | lr_sched = lambda epoch: 1.0 108 | hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched) 109 | variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched) 110 | else: 111 | hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched) 112 | variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched) 113 | 114 | # Our loss object. We're using the VariationalELBO 115 | if mll_type=="ELBO": 116 | print("Using ELBO") 117 | mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=n_samples) 118 | elif mll_type=="PLL": 119 | print("Using PLL") 120 | mll = gpytorch.mlls.PredictiveLogLikelihood(likelihood, model, num_data=n_samples) 121 | 122 | epochs_iter = range(num_epochs) 123 | total_step=0 124 | for i in epochs_iter: 125 | minibatch_iter = train_loader 126 | 127 | for x_batch, y_batch in minibatch_iter: 128 | if torch.cuda.is_available(): 129 | x_batch = x_batch.cuda() 130 | y_batch = y_batch.cuda() 131 | 132 | variational_optimizer.zero_grad() 133 | hyperparameter_optimizer.zero_grad() 134 | output = likelihood(model(x_batch)) 135 | loss = -mll(output, y_batch) 136 | if watch_model: 137 | wandb.log({"loss": loss.item()}) 138 | loss.backward() 139 | # step optimizers and learning rate schedulers 140 | variational_optimizer.step() 141 | variational_scheduler.step() 142 | hyperparameter_optimizer.step() 143 | hyperparameter_scheduler.step() 144 | 145 | if total_step % 50 == 0 and verbose: 146 | means = output.mean 147 | stds = output.variance.sqrt() 148 | nll = -torch.distributions.Normal(means, stds).log_prob(y_batch).mean() 149 | print(f"Epoch: {i}; total_step: {total_step}, loss: {loss.item()}, nll: {nll}") 150 | 151 | total_step +=1 152 | sys.stdout.flush() 153 | 154 | 155 | 156 | if verbose: 157 | print(f"Done! loss: {loss.item()}") 158 | print("\nDone Training!") 159 | sys.stdout.flush() 160 | return model, likelihood 161 | 162 | def eval_gp(test_dataset,model,likelihood, mll_type="ELBO", num_inducing=128,minibatch_size=1): 163 | 164 | dim = len(test_dataset[0][0]) 165 | n_test = len(test_dataset) 166 | test_loader = DataLoader(test_dataset, batch_size=minibatch_size, shuffle=False) 167 | 168 | model.eval() 169 | likelihood.eval() 170 | 171 | means = torch.tensor([0.]) 172 | variances = torch.tensor([0.]) 173 | with torch.no_grad(): 174 | for x_batch, y_batch in test_loader: 175 | if torch.cuda.is_available(): 176 | x_batch = x_batch.cuda() 177 | y_batch = y_batch.cuda() 178 | preds = likelihood(model(x_batch)) 179 | means = torch.cat([means, preds.mean.cpu()]) 180 | variances = torch.cat([variances, preds.variance.cpu()]) 181 | means = means[1:] 182 | variances = variances[1:] 183 | 184 | return means, variances 185 | -------------------------------------------------------------------------------- /experiments/stellarator_regression/plot_stellarator.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import os.path as osp 4 | import argparse 5 | import pickle 6 | from operator import itemgetter 7 | from argparse import Namespace 8 | import numpy as np 9 | import pandas as pd 10 | from matplotlib import pyplot as plt 11 | import matplotlib 12 | import pylab 13 | 14 | ADD_SHARED=True 15 | ncol=1 16 | FONTSIZE=20 17 | MARKERSIZE=15 18 | FIGURESUZE=(10,7) 19 | ALPHA=0.2 20 | ALPHA_MARKER=0.8 21 | LINEWIDTH=4 22 | 23 | # ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', 24 | # '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] 25 | 26 | if ADD_SHARED: 27 | data = pickle.load(open("./data/stellarator_plot_data_p3_with_shared.pickle", "rb" )) 28 | methods_list = ['PPGPR', 'SVGP', 29 | 'DPPGPR2', 'DSVGP2', 'DPPGPR1', 'DSVGP1', 'DPPGPR3', 'DSVGP3', 30 | 'DPPGPR-Shared1', 'DPPGPR-Shared2', 'DPPGPR-Shared3', 31 | 'DSVGP-Shared1', 'DSVGP-Shared2', 'DSVGP-Shared3'] 32 | styles = {"PPGPR": ["PPGPR", "solid", '#9467bd', 'o'], 33 | "SVGP": ["SVGP", "solid", '#1f77b4', '*'], 34 | "DPPGPR1": ["DPPGPR1", "solid", '#2ca02c', 'v'], 35 | "DSVGP1": ["DSVGP1","solid", '#8c564b', 'd'], 36 | "DPPGPR2": ["DPPGPR2", "solid", '#d62728', 'p'], 37 | "DSVGP2": ["DSVGP2", "solid", '#ff7f0e', 'X'], 38 | "DPPGPR3": ["DPPGPR3", "solid", '#e377c2', '^'], 39 | "DSVGP3": ["DSVGP3", "solid", '#7f7f7f', '>'], 40 | "DPPGPR-Shared1": ["DPPGPR-Shared1", "dashed", '#2ca02c', 'v'], 41 | "DSVGP-Shared1": ["DSVGP-Shared1","dashed", '#8c564b', 'd'], 42 | "DPPGPR-Shared2": ["DPPGPR-Shared2", "dashed", '#d62728', 'p'], 43 | "DSVGP-Shared2": ["DSVGP-Shared2", "dashed", '#ff7f0e', 'X'], 44 | "DPPGPR-Shared3": ["DPPGPR-Shared3", "dashed", '#e377c2', '^'], 45 | "DSVGP-Shared3": ["DSVGP-Shared3", "dashed", '#7f7f7f', '>'], 46 | } 47 | ylim = [-2.4, -0.6] 48 | yticks = [-2.0, -1.6, -1.2, -0.8] 49 | legend = False 50 | else: 51 | data = pickle.load(open("./data/stellarator_plot_data_p3.pickle", "rb" )) 52 | methods_list = ['PPGPR', 'SVGP', 'DPPGPR2', 'DSVGP2', 'DPPGPR1', 'DSVGP1', 'DPPGPR3', 'DSVGP3'] 53 | styles = {"PPGPR": ["PPGPR", "solid", '#9467bd', 'o'], 54 | "SVGP": ["SVGP", "solid", '#1f77b4', '*'], 55 | "DPPGPR1": ["DPPGPR1", "solid", '#2ca02c', 'v'], 56 | "DSVGP1": ["DSVGP1","solid", '#8c564b', 'd'], 57 | "DPPGPR2": ["DPPGPR2", "solid", '#d62728', 'p'], 58 | "DSVGP2": ["DSVGP2", "solid", '#ff7f0e', 'X'], 59 | "DPPGPR3": ["DPPGPR3", "solid", '#e377c2', '^'], 60 | "DSVGP3": ["DSVGP3", "solid", '#7f7f7f', '>'], 61 | } 62 | ylim = [-2.4, -1.3] 63 | yticks = [-2.2, -2.0, -1.8, -1.6, -1.4] 64 | legend = True 65 | 66 | 67 | 68 | del data["ni"] 69 | del data["nd"] 70 | del data['train_time'] 71 | del data['test_time'] 72 | del data['mode'] 73 | data['rmse'] = np.sqrt(data['mse']) 74 | 75 | 76 | M_set = [200, 500, 800, 1000, 1200, 1400] # SVGP, DPPGPR, DSVGP1, DPPGPR1, DSVGP3, DPPGPR3 77 | M_set2 = [198, 498, 798, 999, 1200, 1398] # DSVGP2, DPPGPR2 78 | M_set_dict2 = {198: 200, 498:500, 798:800, 999:1000, 1200:1200, 1398:1400} 79 | 80 | data_dict = {} 81 | for method in methods_list: 82 | # select the partial dataframe = data[data['run']==method] 83 | data_dict_cur = {} 84 | data_dict_full = data[data['run']==method] 85 | if method.endswith("2"): 86 | for M in M_set2: 87 | key = M_set_dict2[M] 88 | data_dict_cur[key] = data_dict_full[data_dict_full['M']==M] 89 | else: 90 | for M in M_set: 91 | data_dict_cur[M] = data_dict_full[data_dict_full['M']==M] 92 | 93 | data_dict[method] = data_dict_cur 94 | 95 | 96 | # compelete rmse for mean 97 | rmse_nll_dict = {} 98 | for method in methods_list: 99 | rmse_dict = {"mean":[], "std":[]} 100 | nll_dict = {"mean": [], "std":[]} 101 | for M in M_set: 102 | data_cur = data_dict[method][M] 103 | rmse_mean = np.mean(data_cur['rmse']) 104 | rmse_std = np.std(data_cur['rmse']) 105 | nll_mean = np.mean(data_cur['nll']) 106 | nll_std = np.std(data_cur['nll']) 107 | rmse_dict["mean"].append(rmse_mean) 108 | rmse_dict["std"].append(rmse_std) 109 | nll_dict["mean"].append(nll_mean) 110 | nll_dict["std"].append(nll_std) 111 | rmse_nll_dict[method] = {"rmse": rmse_dict, "nll": nll_dict} 112 | 113 | 114 | 115 | def plot_stellarator(datatype, ylim=None, yticks=None, logy=False, legend=True): 116 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize=FIGURESUZE) 117 | for method in methods_list: 118 | mean_nll = np.array(rmse_nll_dict[method][datatype]['mean']) 119 | std_nll = np.array(rmse_nll_dict[method][datatype]['std']) 120 | if ADD_SHARED: 121 | ax.plot(M_set, mean_nll, 122 | color=styles[method][2], 123 | label=styles[method][0], 124 | linestyle=styles[method][1], 125 | marker=styles[method][3], 126 | markersize=MARKERSIZE, 127 | alpha=ALPHA_MARKER, 128 | linewidth=LINEWIDTH, 129 | ) 130 | else: 131 | ax.plot(M_set, mean_nll, 132 | color=styles[method][2], 133 | label=styles[method][0], 134 | marker=styles[method][3], 135 | markersize=MARKERSIZE, 136 | alpha=ALPHA_MARKER) 137 | ax.fill_between(M_set, 138 | mean_nll+std_nll, 139 | mean_nll-std_nll, 140 | color=styles[method][2], 141 | alpha=ALPHA, 142 | ) 143 | 144 | ax.set_xlabel("Inducing matrix size",fontsize=FONTSIZE) 145 | ax.set_xticks(M_set) 146 | ax.set_xticklabels(M_set,fontsize=FONTSIZE) 147 | 148 | ylabel='NLL' if datatype=='nll' else "RMSE" 149 | ax.set_ylabel(ylabel, fontsize=FONTSIZE) 150 | ax.set_yticks(yticks) 151 | ax.set_yticklabels(yticks,fontsize=FONTSIZE) 152 | ax.set_ylim(ylim) 153 | if legend: 154 | ax.legend(loc='upper right', fontsize=FONTSIZE-5) 155 | 156 | plt.grid() 157 | plt.minorticks_off() 158 | plt.tight_layout() 159 | if ADD_SHARED: 160 | figurepath=f"./plots/stellarator_{datatype}_add_shared.pdf" 161 | else: 162 | figurepath=f"./plots/stellarator_{datatype}.pdf" 163 | fig.savefig(figurepath) 164 | print("Figure saved:", figurepath) 165 | 166 | def plot_legend(style_dict, methods_list, ncol): 167 | plt.clf() 168 | plt.cla() 169 | 170 | figurename = f"stellarator_legend_add_shared_ncol{ncol}.pdf" 171 | figurepath = os.path.abspath(__file__ + "/../plots/" + figurename) 172 | 173 | color_set = [style_dict[method][2] for method in methods_list] 174 | linestyle_set = [style_dict[method][1] for method in methods_list] 175 | label_set = [style_dict[method][0] for method in methods_list] 176 | marker_set = [style_dict[method][3] for method in methods_list] 177 | 178 | fig, ax = plt.subplots(figsize=(10,8)) 179 | f = lambda ls,c,label,marker: ax.plot([],[], linestyle=ls, color=c, 180 | label=label, marker=marker, 181 | markersize=MARKERSIZE*0.68, 182 | # linewidth=LINEWIDTH, 183 | )[0] 184 | handles = [f(linestyle_set[i], color_set[i], label_set[i], marker_set[i]) for i in range(len(methods_list))] 185 | #legend 186 | LABEL_SIZE=15 187 | figsize = (5, 1) 188 | fig_leg = plt.figure(figsize=figsize) 189 | legend_properties = {'weight': 'bold', 'size': LABEL_SIZE} 190 | ax_leg = fig_leg.add_subplot(111) 191 | 192 | ax_leg.set_facecolor('white') 193 | ax_leg.grid(False) 194 | ax_leg.set_axis_off() 195 | ax_leg.legend(*ax.get_legend_handles_labels(), loc='center', ncol=ncol, prop=legend_properties, facecolor="white", edgecolor="grey") 196 | fig_leg.savefig(figurepath, bbox_inches = 'tight') 197 | 198 | print("Figure saved:", figurepath) 199 | 200 | 201 | # plot_stellarator('nll', ylim=ylim, yticks=yticks, 202 | # logy=False, legend=legend) 203 | if ADD_SHARED: 204 | plot_legend(styles, methods_list, ncol=ncol) 205 | 206 | -------------------------------------------------------------------------------- /directionalvi/grad_svgp.py: -------------------------------------------------------------------------------- 1 | from gpytorch.models import ApproximateGP 2 | from gpytorch.variational import CholeskyVariationalDistribution 3 | from GradVariationalStrategy import GradVariationalStrategy 4 | from torch.utils.data import TensorDataset, DataLoader 5 | import tqdm 6 | import math 7 | import time 8 | import torch 9 | import sys 10 | import gpytorch 11 | from matplotlib import pyplot as plt 12 | import numpy as np 13 | from utils.count_params import count_params 14 | try: # import wandb if watch model on weights&biases 15 | import wandb 16 | except: 17 | pass 18 | 19 | 20 | class GPModel(ApproximateGP): 21 | def __init__(self, inducing_points,**kwargs): 22 | dim = inducing_points.size(1) 23 | if "variational_distribution" in kwargs and kwargs["variational_distribution"] == "NGD": 24 | variational_distribution = gpytorch.variational.NaturalVariationalDistribution(inducing_points.size(0)*(dim+1)) 25 | else: 26 | variational_distribution = CholeskyVariationalDistribution(inducing_points.size(0)*(dim+1)) 27 | if "variational_strategy" in kwargs and kwargs["variational_strategy"] == "CIQ": 28 | variational_strategy = gpytorch.variational.CiqVariationalStrategy( 29 | self, inducing_points, variational_distribution, learn_inducing_locations=True) 30 | else: 31 | variational_strategy = GradVariationalStrategy(self, inducing_points, variational_distribution, learn_inducing_locations=True) 32 | super(GPModel, self).__init__(variational_strategy) 33 | self.mean_module = gpytorch.means.ConstantMean() 34 | self.covar_module = gpytorch.kernels.ScaleKernel(gpytorch.kernels.RBFKernelGrad()) 35 | 36 | def forward(self, x): 37 | mean_x = self.mean_module(x) 38 | covar_x = self.covar_module(x) 39 | return gpytorch.distributions.MultivariateNormal(mean_x, covar_x) 40 | 41 | def train_gp(train_dataset,dim,num_inducing=128, 42 | minibatch_size=1, 43 | num_epochs=1, 44 | use_ngd=False, 45 | use_ciq=False, 46 | learning_rate_hypers=0.01, 47 | learning_rate_ngd=0.1, 48 | lr_sched=None, 49 | mll_type="ELBO", 50 | num_contour_quadrature=15, 51 | watch_model=False,gamma=0.1, 52 | verbose=True, 53 | **args): 54 | 55 | print_loss=True 56 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 57 | n_samples = len(train_dataset) 58 | 59 | # setup model 60 | # inducing_points = train_x[:num_inducing, :] 61 | inducing_points = torch.rand(num_inducing,dim) 62 | if torch.cuda.is_available(): 63 | inducing_points = inducing_points.cuda() 64 | 65 | if use_ciq: 66 | gpytorch.settings.num_contour_quadrature(num_contour_quadrature) 67 | model = GPModel(inducing_points=inducing_points,variational_distribution="NGD",variational_strategy="CIQ") 68 | elif use_ngd: 69 | model = GPModel(inducing_points=inducing_points,variational_distribution="NGD") 70 | else: 71 | model = GPModel(inducing_points=inducing_points) 72 | likelihood = gpytorch.likelihoods.GaussianLikelihood() 73 | 74 | if torch.cuda.is_available(): 75 | model = model.cuda() 76 | likelihood = likelihood.cuda() 77 | if watch_model: 78 | wandb.watch(model) 79 | 80 | model.train() 81 | likelihood.train() 82 | 83 | if verbose: 84 | param_total_dim = count_params(model,likelihood) 85 | 86 | # optimizers 87 | if use_ngd or use_ciq: 88 | variational_optimizer = gpytorch.optim.NGD(model.variational_parameters(), num_data=n_samples, lr=learning_rate_ngd) 89 | hyperparameter_optimizer = torch.optim.Adam([ 90 | {'params': model.hyperparameters()}, 91 | {'params': likelihood.parameters()}, 92 | ], lr=learning_rate_hypers) 93 | else: 94 | variational_optimizer = torch.optim.Adam([ 95 | {'params': model.variational_parameters()}, 96 | ], lr=learning_rate_hypers) 97 | hyperparameter_optimizer = torch.optim.Adam([ 98 | {'params': model.hyperparameters()}, 99 | {'params': likelihood.parameters()}, 100 | ], lr=learning_rate_hypers) 101 | 102 | # learning rate scheduler 103 | #lambda1 = lambda epoch: 1.0/(1 + epoch) 104 | if lr_sched == "step_lr": 105 | num_batches = int(np.ceil(n_samples/minibatch_size)) 106 | milestones = [int(num_epochs*num_batches/3), int(2*num_epochs*num_batches/3)] 107 | hyperparameter_scheduler = torch.optim.lr_scheduler.MultiStepLR(hyperparameter_optimizer, milestones, gamma=gamma) 108 | variational_scheduler = torch.optim.lr_scheduler.MultiStepLR(variational_optimizer, milestones, gamma=gamma) 109 | elif lr_sched is None: 110 | lr_sched = lambda epoch: 1.0 111 | hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched) 112 | variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched) 113 | else: 114 | hyperparameter_scheduler = torch.optim.lr_scheduler.LambdaLR(hyperparameter_optimizer, lr_lambda=lr_sched) 115 | variational_scheduler = torch.optim.lr_scheduler.LambdaLR(variational_optimizer, lr_lambda=lr_sched) 116 | 117 | # Our loss object. We're using the VariationalELBO 118 | if mll_type=="ELBO": 119 | mll = gpytorch.mlls.VariationalELBO(likelihood, model, num_data=n_samples) 120 | elif mll_type=="PLL": 121 | mll = gpytorch.mlls.PredictiveLogLikelihood(likelihood, model, num_data=n_samples) 122 | 123 | if "tqdm" in args and args["tqdm"]: 124 | print_loss=False # don't print loss every 100 epoch if use tqdm 125 | epochs_iter = tqdm.tqdm(range(num_epochs), desc="Epoch") 126 | else: 127 | epochs_iter = range(num_epochs) 128 | 129 | total_step=0 130 | for i in epochs_iter: 131 | if "tqdm" in args and args["tqdm"]: 132 | minibatch_iter = tqdm.tqdm(train_loader, desc="Minibatch", leave=False) 133 | else: 134 | minibatch_iter = train_loader 135 | 136 | mini_steps = 0 137 | for x_batch, y_batch in minibatch_iter: 138 | if torch.cuda.is_available(): 139 | x_batch = x_batch.cuda() 140 | y_batch = y_batch.cuda() 141 | 142 | # pass in interleaved data 143 | y_batch = y_batch.reshape(torch.numel(y_batch)) 144 | 145 | variational_optimizer.zero_grad() 146 | hyperparameter_optimizer.zero_grad() 147 | output = likelihood(model(x_batch)) 148 | loss = -mll(output, y_batch) 149 | if watch_model: 150 | wandb.log({"loss": loss.item()}) 151 | loss.backward() 152 | # step optimizers and learning rate schedulers 153 | variational_optimizer.step() 154 | variational_scheduler.step() 155 | hyperparameter_optimizer.step() 156 | hyperparameter_scheduler.step() 157 | 158 | if "tqdm" in args and args["tqdm"]: 159 | epochs_iter.set_postfix(loss=loss.item()) 160 | 161 | if total_step % 25 == 0 and print_loss: 162 | means = output.mean[::dim+1] 163 | stds = output.variance.sqrt()[::dim+1] 164 | nll = -torch.distributions.Normal(means, stds).log_prob(y_batch[::dim+1]).mean() 165 | print(f"Epoch: {i}; total_step: {mini_steps}, loss: {loss.item()}, nll: {nll}") 166 | 167 | mini_steps +=1 168 | total_step +=1 169 | sys.stdout.flush() 170 | 171 | 172 | 173 | if print_loss: 174 | print(f"Done! loss: {loss.item()}") 175 | 176 | print("\nDone Training!") 177 | sys.stdout.flush() 178 | return model, likelihood 179 | 180 | def eval_gp(test_dataset,model,likelihood, mll_type="ELBO", num_inducing=128,minibatch_size=1): 181 | 182 | dim = len(test_dataset[0][0]) 183 | n_test = len(test_dataset) 184 | test_loader = DataLoader(test_dataset, batch_size=minibatch_size, shuffle=False) 185 | 186 | model.eval() 187 | likelihood.eval() 188 | 189 | means = torch.tensor([0.]) 190 | variances = torch.tensor([0.]) 191 | with torch.no_grad(): 192 | for x_batch, y_batch in test_loader: 193 | if torch.cuda.is_available(): 194 | x_batch = x_batch.cuda() 195 | y_batch = y_batch.cuda() 196 | preds = likelihood(model(x_batch)) 197 | means = torch.cat([means, preds.mean.cpu()]) 198 | variances = torch.cat([variances, preds.variance.cpu()]) 199 | means = means[1:] 200 | variances = variances[1:] 201 | 202 | return means, variances 203 | -------------------------------------------------------------------------------- /directionalvi/GradVariationalStrategy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import warnings 3 | 4 | import torch 5 | 6 | from gpytorch import settings 7 | from gpytorch.distributions import MultivariateNormal 8 | from gpytorch.lazy import DiagLazyTensor, MatmulLazyTensor, RootLazyTensor, SumLazyTensor, TriangularLazyTensor, delazify 9 | from gpytorch.settings import trace_mode 10 | from gpytorch.utils.cholesky import psd_safe_cholesky 11 | from gpytorch.utils.errors import CachingError 12 | from gpytorch.utils.memoize import cached, clear_cache_hook, pop_from_cache_ignore_args 13 | from gpytorch.utils.warnings import OldVersionWarning 14 | from gpytorch.variational._variational_strategy import _VariationalStrategy 15 | 16 | 17 | def _ensure_updated_strategy_flag_set( 18 | state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs 19 | ): 20 | device = state_dict[list(state_dict.keys())[0]].device 21 | if prefix + "updated_strategy" not in state_dict: 22 | state_dict[prefix + "updated_strategy"] = torch.tensor(False, device=device) 23 | warnings.warn( 24 | "You have loaded a variational GP model (using `VariationalStrategy`) from a previous version of " 25 | "GPyTorch. We have updated the parameters of your model to work with the new version of " 26 | "`VariationalStrategy` that uses whitened parameters.\nYour model will work as expected, but we " 27 | "recommend that you re-save your model.", 28 | OldVersionWarning, 29 | ) 30 | 31 | 32 | class GradVariationalStrategy(_VariationalStrategy): 33 | r""" 34 | The standard variational strategy, as defined by `Hensman et al. (2015)`_. 35 | This strategy takes a set of :math:`m \ll n` inducing points :math:`\mathbf Z` 36 | and applies an approximate distribution :math:`q( \mathbf u)` over their function values. 37 | (Here, we use the common notation :math:`\mathbf u = f(\mathbf Z)`. 38 | The approximate function distribution for any abitrary input :math:`\mathbf X` is given by: 39 | 40 | .. math:: 41 | 42 | q( f(\mathbf X) ) = \int p( f(\mathbf X) \mid \mathbf u) q(\mathbf u) \: d\mathbf u 43 | 44 | This variational strategy uses "whitening" to accelerate the optimization of the variational 45 | parameters. See `Matthews (2017)`_ for more info. 46 | 47 | :param ~gpytorch.models.ApproximateGP model: Model this strategy is applied to. 48 | Typically passed in when the VariationalStrategy is created in the 49 | __init__ method of the user defined model. 50 | :param torch.Tensor inducing_points: Tensor containing a set of inducing 51 | points to use for variational inference. 52 | :param ~gpytorch.variational.VariationalDistribution variational_distribution: A 53 | VariationalDistribution object that represents the form of the variational distribution :math:`q(\mathbf u)` 54 | :param learn_inducing_locations: (Default True): Whether or not 55 | the inducing point locations :math:`\mathbf Z` should be learned (i.e. are they 56 | parameters of the model). 57 | :type learn_inducing_locations: `bool`, optional 58 | 59 | .. _Hensman et al. (2015): 60 | http://proceedings.mlr.press/v38/hensman15.pdf 61 | .. _Matthews (2017): 62 | https://www.repository.cam.ac.uk/handle/1810/278022 63 | """ 64 | 65 | def __init__(self, model, inducing_points, variational_distribution, learn_inducing_locations=True): 66 | super().__init__(model, inducing_points, variational_distribution, learn_inducing_locations) 67 | self.register_buffer("updated_strategy", torch.tensor(True)) 68 | self._register_load_state_dict_pre_hook(_ensure_updated_strategy_flag_set) 69 | 70 | @cached(name="cholesky_factor", ignore_args=True) 71 | def _cholesky_factor(self, induc_induc_covar): 72 | L = psd_safe_cholesky(delazify(induc_induc_covar).double()) 73 | return TriangularLazyTensor(L) 74 | 75 | @property 76 | @cached(name="prior_distribution_memo") 77 | def prior_distribution(self): 78 | zeros = torch.zeros( 79 | self._variational_distribution.shape(), 80 | dtype=self._variational_distribution.dtype, 81 | device=self._variational_distribution.device, 82 | ) 83 | ones = torch.ones_like(zeros) 84 | res = MultivariateNormal(zeros, DiagLazyTensor(ones)) 85 | return res 86 | 87 | def forward(self, x, inducing_points, inducing_values, variational_inducing_covar=None, **kwargs): 88 | # Compute full prior distribution 89 | full_inputs = torch.cat([inducing_points, x], dim=-2) 90 | full_output = self.model.forward(full_inputs, **kwargs) 91 | full_covar = full_output.lazy_covariance_matrix 92 | 93 | # Covariance terms 94 | dim = inducing_points.size(1) 95 | num_induc = inducing_points.size(-2) 96 | test_mean = self.model.mean_module(x.repeat_interleave(dim+1,dim=0)) 97 | induc_induc_covar = full_covar[..., :num_induc*(dim+1), :num_induc*(dim+1)].add_jitter() 98 | induc_data_covar = full_covar[..., :num_induc*(dim+1), num_induc*(dim+1):].evaluate() 99 | data_data_covar = full_covar[..., num_induc*(dim+1):, num_induc*(dim+1):] 100 | 101 | # Compute interpolation terms 102 | # K_ZZ^{-1/2} K_ZX 103 | # K_ZZ^{-1/2} \mu_Z 104 | L = self._cholesky_factor(induc_induc_covar) 105 | if L.shape != induc_induc_covar.shape: 106 | # Aggressive caching can cause nasty shape incompatibilies when evaluating with different batch shapes 107 | # TODO: Use a hook fo this 108 | try: 109 | pop_from_cache_ignore_args(self, "cholesky_factor") 110 | except CachingError: 111 | pass 112 | L = self._cholesky_factor(induc_induc_covar) 113 | interp_term = L.inv_matmul(induc_data_covar.double()).to(full_inputs.dtype) 114 | 115 | # Compute the mean of q(f) 116 | # k_XZ K_ZZ^{-1/2} (m - K_ZZ^{-1/2} \mu_Z) + \mu_X 117 | predictive_mean = (interp_term.transpose(-1, -2) @ inducing_values.unsqueeze(-1)).squeeze(-1) + test_mean 118 | 119 | # Compute the covariance of q(f) 120 | # K_XX + k_XZ K_ZZ^{-1/2} (S - I) K_ZZ^{-1/2} k_ZX 121 | middle_term = self.prior_distribution.lazy_covariance_matrix.mul(-1) 122 | if variational_inducing_covar is not None: 123 | middle_term = SumLazyTensor(variational_inducing_covar, middle_term) 124 | 125 | if trace_mode.on(): 126 | predictive_covar = ( 127 | data_data_covar.add_jitter(1e-4).evaluate() 128 | + interp_term.transpose(-1, -2) @ middle_term.evaluate() @ interp_term 129 | ) 130 | else: 131 | predictive_covar = SumLazyTensor( 132 | data_data_covar.add_jitter(1e-4), 133 | MatmulLazyTensor(interp_term.transpose(-1, -2), middle_term @ interp_term), 134 | ) 135 | 136 | # Return the distribution 137 | return MultivariateNormal(predictive_mean, predictive_covar) 138 | 139 | def __call__(self, x, prior=False, **kwargs): 140 | if not self.updated_strategy.item() and not prior: 141 | with torch.no_grad(): 142 | # Get unwhitened p(u) 143 | prior_function_dist = self(self.inducing_points, prior=True) 144 | prior_mean = prior_function_dist.loc 145 | L = self._cholesky_factor(prior_function_dist.lazy_covariance_matrix.add_jitter()) 146 | 147 | # Temporarily turn off noise that's added to the mean 148 | orig_mean_init_std = self._variational_distribution.mean_init_std 149 | self._variational_distribution.mean_init_std = 0.0 150 | 151 | # Change the variational parameters to be whitened 152 | variational_dist = self.variational_distribution 153 | mean_diff = (variational_dist.loc - prior_mean).unsqueeze(-1).double() 154 | whitened_mean = L.inv_matmul(mean_diff).squeeze(-1).to(variational_dist.loc.dtype) 155 | covar_root = variational_dist.lazy_covariance_matrix.root_decomposition().root.evaluate().double() 156 | whitened_covar = RootLazyTensor(L.inv_matmul(covar_root).to(variational_dist.loc.dtype)) 157 | whitened_variational_distribution = variational_dist.__class__(whitened_mean, whitened_covar) 158 | self._variational_distribution.initialize_variational_distribution(whitened_variational_distribution) 159 | 160 | # Reset the random noise parameter of the model 161 | self._variational_distribution.mean_init_std = orig_mean_init_std 162 | 163 | # Reset the cache 164 | clear_cache_hook(self) 165 | 166 | # Mark that we have updated the variational strategy 167 | self.updated_strategy.fill_(True) 168 | 169 | return super().__call__(x, prior=prior, **kwargs) 170 | -------------------------------------------------------------------------------- /experiments/synthetic1/synthetic1.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | import torch 4 | import gpytorch 5 | import tqdm 6 | import random 7 | import time 8 | from matplotlib import pyplot as plt 9 | from torch.utils.data import TensorDataset, DataLoader 10 | import os 11 | import sys 12 | sys.path.append("../") 13 | sys.path.append("../../directionalvi/utils") 14 | sys.path.append("../../directionalvi") 15 | from directional_vi import train_gp, eval_gp 16 | import traditional_vi 17 | import grad_svgp 18 | from csv_dataset import csv_dataset 19 | from metrics import MSE 20 | import pickle 21 | 22 | 23 | # load a pickle with the run params 24 | args = sys.argv 25 | param_filename = args[1] 26 | run_params = pickle.load(open(param_filename,"rb")) 27 | num_inducing =run_params['num_inducing'] 28 | num_directions =run_params['num_directions'] 29 | minibatch_size =run_params['minibatch_size'] 30 | num_epochs =run_params['num_epochs'] 31 | tqdm =run_params['tqdm'] 32 | inducing_data_initialization =run_params['inducing_data_initialization'] 33 | use_ngd =run_params['use_ngd'] 34 | use_ciq =run_params['use_ciq'] 35 | num_contour_quadrature= run_params['num_contour_quadrature'] 36 | learning_rate_hypers = run_params['learning_rate_hypers'] 37 | learning_rate_ngd = run_params['learning_rate_ngd'] 38 | lr_gamma = run_params['lr_gamma'] 39 | lr_benchmarks = run_params['lr_benchmarks'] 40 | lr_sched = run_params['lr_sched'] 41 | mll_type = run_params['mll_type'] 42 | seed = run_params['seed'] 43 | base_name = run_params['base_name'] 44 | data_file = run_params['data_file'] 45 | mode = run_params['mode'] 46 | 47 | # make the learning rate schedule 48 | assert lr_sched in [None, "MultiStepLR", "LambdaLR"], "Not a valid choice of lr_sched" 49 | if lr_sched is None: 50 | pass 51 | elif lr_sched == "MultiStepLR": 52 | def lr_sched(epoch): 53 | a = np.sum(lr_benchmarks < epoch) 54 | # lr_gamma should be > 1 55 | return (lr_gamma)**a 56 | elif lr_sched == "LambdaLR": 57 | lr_sched = lambda epoch: 1./(1+lr_gamma*epoch) 58 | 59 | # set the seed 60 | torch.random.manual_seed(seed) 61 | 62 | # output file names 63 | data_dir = "./output/" 64 | model_filename = data_dir + "model_"+ base_name + ".model" 65 | data_filename = data_dir + "data_" + base_name + ".pickle" 66 | if os.path.exists(data_dir) is False: 67 | os.mkdir(data_dir) 68 | 69 | if mode == "DSVGP" or mode == "GradSVGP": deriv=True 70 | elif mode == "SVGP": deriv = False 71 | 72 | # load the data 73 | d = pickle.load(open(data_file, "rb")) 74 | X = d['X'] 75 | Y = d['Y'] 76 | n,dim = X.shape 77 | if deriv == False: 78 | Y = Y[:,0] 79 | 80 | # make a torch dataset 81 | dataset = TensorDataset(X,Y) 82 | 83 | # train-test split 84 | n_train = int(0.8*n) 85 | n_test = int(0.2*n) 86 | train_dataset,test_dataset = torch.utils.data.random_split(dataset,[n_train,n_test]) 87 | 88 | #if torch.cuda.is_available(): 89 | # train_dataset, train_y, test_x, test_y = train_x.cuda(), train_y.cuda(), test_x.cuda(), test_y.cuda() 90 | 91 | # make dataloaders 92 | train_loader = DataLoader(train_dataset, batch_size=minibatch_size, shuffle=True) 93 | test_loader = DataLoader(test_dataset, batch_size=n_test, shuffle=False) 94 | 95 | 96 | if mode == "DSVGP": 97 | # train 98 | print("\n\n---DirectionalGradVGP---") 99 | print(f"Start training with {n} trainig data of dim {dim}") 100 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 101 | t1 = time.time() 102 | model,likelihood = train_gp(train_dataset, 103 | num_inducing=num_inducing, 104 | num_directions=num_directions, 105 | minibatch_size = minibatch_size, 106 | minibatch_dim = num_directions, 107 | num_epochs =num_epochs, 108 | learning_rate_hypers=learning_rate_hypers, 109 | learning_rate_ngd=learning_rate_ngd, 110 | inducing_data_initialization=inducing_data_initialization, 111 | use_ngd = use_ngd, 112 | use_ciq = use_ciq, 113 | lr_sched=lr_sched, 114 | mll_type=mll_type, 115 | num_contour_quadrature=num_contour_quadrature, 116 | tqdm=tqdm, 117 | ) 118 | t2 = time.time() 119 | train_time = t2 - t1 120 | 121 | # save the model 122 | torch.save(model.state_dict(),model_filename) 123 | 124 | # test 125 | means, variances = eval_gp(test_dataset,model,likelihood, 126 | num_directions=num_directions, 127 | minibatch_size=minibatch_size, 128 | minibatch_dim=num_directions) 129 | t3 = time.time() 130 | test_time = t3 - t2 131 | 132 | # only keep the function values 133 | means = means[::num_directions+1] 134 | variances = variances[::num_directions+1] 135 | 136 | elif mode == "SVGP": 137 | # train 138 | print("\n\n---Traditional SVGP---") 139 | print(f"Start training with {n} training data of dim {dim}") 140 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 141 | t1 = time.time() 142 | model,likelihood = traditional_vi.train_gp(train_dataset,dim, 143 | num_inducing=num_inducing, 144 | minibatch_size=minibatch_size, 145 | num_epochs=num_epochs, 146 | use_ngd=use_ngd, 147 | use_ciq=use_ciq, 148 | learning_rate_hypers=learning_rate_hypers, 149 | learning_rate_ngd=learning_rate_ngd, 150 | lr_sched=lr_sched, 151 | num_contour_quadrature=num_contour_quadrature, 152 | mll_type=mll_type, 153 | tqdm=False) 154 | t2 = time.time() 155 | train_time = t2 - t1 156 | 157 | # save the model 158 | torch.save(model.state_dict(),model_filename) 159 | 160 | # test 161 | means, variances = traditional_vi.eval_gp(test_dataset,model,likelihood, 162 | num_inducing=num_inducing, 163 | minibatch_size=n_test) 164 | t3 = time.time() 165 | test_time = t3 - t2 166 | 167 | elif mode == "GradSVGP": 168 | # train 169 | print("\n\n---Grad SVGP---") 170 | print(f"Start training with {n} training data of dim {dim}") 171 | print(f"VI setup: {num_inducing} inducing points, {num_directions} inducing directions") 172 | t1 = time.time() 173 | model,likelihood = grad_svgp.train_gp(train_dataset,dim, 174 | num_inducing=num_inducing, 175 | minibatch_size=minibatch_size, 176 | num_epochs=num_epochs, 177 | use_ngd=use_ngd, 178 | use_ciq=use_ciq, 179 | learning_rate_hypers=learning_rate_hypers, 180 | learning_rate_ngd=learning_rate_ngd, 181 | lr_sched=lr_sched, 182 | num_contour_quadrature=num_contour_quadrature, 183 | mll_type=mll_type, 184 | tqdm=False) 185 | t2 = time.time() 186 | train_time = t2 - t1 187 | 188 | # save the model 189 | torch.save(model.state_dict(),model_filename) 190 | 191 | # test 192 | means, variances = grad_svgp.eval_gp(test_dataset,model,likelihood, 193 | num_inducing=num_inducing, 194 | minibatch_size=n_test) 195 | t3 = time.time() 196 | test_time = t3 - t2 197 | 198 | # only keep the function values 199 | means = means[::dim+1] 200 | variances = variances[::dim+1] 201 | 202 | 203 | # collect the test function values 204 | test_f = torch.zeros(n_test) 205 | for ii in range(n_test): 206 | if mode == "DSVGP" or mode == "GradSVGP": 207 | test_f[ii] = test_dataset[ii][1][0] # function value 208 | elif mode == "SVGP": 209 | test_f[ii] = test_dataset[ii][1] # function value 210 | 211 | # compute MSE 212 | test_mse = MSE(test_f,means) 213 | # compute mean negative predictive density 214 | test_nll = -torch.distributions.Normal(means, variances.sqrt()).log_prob(test_f).mean() 215 | print(f"At {n_test} testing points, MSE: {test_mse:.4e}, nll: {test_nll:.4e}.") 216 | print(f"Training time: {train_time:.2f} sec, testing time: {test_time:.2f} sec") 217 | 218 | # dump the data 219 | outdata = {} 220 | outdata['test_mse'] = test_mse 221 | outdata['test_nll'] = test_nll 222 | outdata['train_time'] = train_time 223 | outdata['test_time'] = test_time 224 | # add the run params 225 | outdata.update(run_params) 226 | pickle.dump(outdata,open(data_filename,"wb")) 227 | print(f"Dropped file: {data_filename}") 228 | -------------------------------------------------------------------------------- /experiments/rover/test_turbo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import gpytorch 4 | import time 5 | from torch.utils.data import TensorDataset, DataLoader 6 | import os 7 | import sys 8 | sys.path.append("../") 9 | sys.path.append("../../directionalvi/utils") 10 | sys.path.append("../../directionalvi") 11 | import directional_vi 12 | import traditional_vi 13 | import pickle 14 | 15 | 16 | 17 | # load a pickle with the run params 18 | args = sys.argv 19 | param_filename = args[1] 20 | run_params = pickle.load(open(param_filename,"rb")) 21 | num_inducing =run_params['num_inducing'] 22 | num_directions =run_params['num_directions'] 23 | minibatch_size =run_params['minibatch_size'] 24 | num_epochs =run_params['num_epochs'] 25 | verbose =run_params['verbose'] 26 | inducing_data_initialization =run_params['inducing_data_initialization'] 27 | use_ngd =run_params['use_ngd'] 28 | use_ciq =run_params['use_ciq'] 29 | num_contour_quadrature= run_params['num_contour_quadrature'] 30 | learning_rate_hypers = run_params['learning_rate_hypers'] 31 | learning_rate_ngd = run_params['learning_rate_ngd'] 32 | lr_gamma = run_params['lr_gamma'] 33 | lr_benchmarks = run_params['lr_benchmarks'] 34 | lr_sched = run_params['lr_sched'] 35 | mll_type = run_params['mll_type'] 36 | seed = run_params['seed'] 37 | base_name = run_params['base_name'] 38 | mode = run_params['mode'] 39 | turbo_lb = run_params['turbo_lb'] 40 | turbo_ub = run_params['turbo_ub'] 41 | turbo_n_init = run_params['turbo_n_init'] 42 | turbo_max_evals = run_params['turbo_max_evals'] 43 | turbo_batch_size = run_params['turbo_batch_size'] 44 | dim = run_params['dim'] 45 | 46 | # make the learning rate schedule 47 | assert lr_sched in [None, "MultiStepLR", "LambdaLR"], "Not a valid choice of lr_sched" 48 | if lr_sched is None: 49 | pass 50 | elif lr_sched == "MultiStepLR": 51 | def lr_sched(epoch): 52 | a = np.sum(lr_benchmarks < epoch) 53 | # lr_gamma should be > 1 54 | return (lr_gamma)**a 55 | elif lr_sched == "LambdaLR": 56 | lr_sched = lambda epoch: 1./(1+lr_gamma*epoch) 57 | 58 | # set the seed 59 | torch.random.manual_seed(seed) 60 | 61 | # output file names 62 | data_dir = "./output/" 63 | model_filename = data_dir + "model_"+ base_name + ".model" 64 | data_filename = data_dir + "data_" + base_name + ".pickle" 65 | if os.path.exists(data_dir) is False: 66 | os.mkdir(data_dir) 67 | 68 | if mode == "DSVGP": deriv=True 69 | elif mode == "SVGP" or mode == "Vanilla": deriv = False 70 | 71 | # wrap the objective 72 | from rover import * 73 | def myObj(u): 74 | if deriv==True: 75 | # stack it 76 | fg = np.zeros(len(u)+1) 77 | fg[0] = rover_obj(u) 78 | fg[1:] = np.copy(rover_grad(u)) 79 | return fg 80 | else: 81 | return rover_obj(u) 82 | 83 | if torch.cuda.is_available(): 84 | turbo_device = 'cuda' 85 | else: 86 | turbo_device = 'cpu' 87 | 88 | if mode == "DSVGP": 89 | # train 90 | print(f"\n\n---TuRBO-Grad with DSVGP in dim {dim}---") 91 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 92 | 93 | #from turbo1_grad_linesearch import * 94 | from turbo1_grad import * 95 | def train_gp_for_turbo(train_x, train_y, use_ard, num_steps, hypers): 96 | # expects train_x on unit cube and train_y standardized 97 | # make a trainable model for TuRBO 98 | train_x = train_x.float() 99 | train_y = train_y.float() 100 | dataset = TensorDataset(train_x,train_y) 101 | model,likelihood = directional_vi.train_gp(dataset, 102 | num_inducing=num_inducing, 103 | num_directions=num_directions, 104 | minibatch_size = minibatch_size, 105 | minibatch_dim = num_directions, 106 | num_epochs =num_steps, 107 | learning_rate_hypers=learning_rate_hypers, 108 | learning_rate_ngd=learning_rate_ngd, 109 | inducing_data_initialization=inducing_data_initialization, 110 | use_ngd = use_ngd, 111 | use_ciq = use_ciq, 112 | lr_sched=lr_sched, 113 | mll_type=mll_type, 114 | num_contour_quadrature=num_contour_quadrature, 115 | verbose=verbose, 116 | ) 117 | return model.double(),likelihood.double() 118 | 119 | def sample_from_gp(model,likelihood,X_cand,n_samples): 120 | """ 121 | X_cand: 2d torch tensor, points to sample at 122 | n_samples: int, number of samples to take per point in X_cand 123 | """ 124 | model.eval() 125 | likelihood.eval() 126 | 127 | # ensure correct type 128 | model = model.float() 129 | likelihood = likelihood.float() 130 | X_cand = X_cand.float() 131 | 132 | n,dim = X_cand.shape 133 | kwargs = {} 134 | derivative_directions = torch.eye(dim)[:model.num_directions] 135 | derivative_directions = derivative_directions.repeat(n,1) 136 | kwargs['derivative_directions'] = derivative_directions.to(X_cand.device).float() 137 | preds = likelihood(model(X_cand,**kwargs)) 138 | y_cand = preds.sample(torch.Size([n_samples])) # shape (n_samples x n*(n_dir+1)) 139 | y_cand = y_cand[:,::model.num_directions+1].t() # shape (n, n_samples) 140 | 141 | # only use mean 142 | #y_cand = preds.mean.repeat(n_samples,1).t() # (n,n_samples) 143 | 144 | ## only use distribution of f(x) to predict (dont use joint covariance with derivatives) 145 | #mean = preds.mean[::num_directions+1] 146 | #var = preds.variance[::num_directions+1] # could have used covariance for f(x) too 147 | #mvn = gpytorch.distributions.MultivariateNormal(mean,torch.diag(var)) 148 | #y_cand = mvn.sample(torch.Size([n_samples])).t() # shape (n x n_samples) 149 | 150 | return y_cand 151 | 152 | 153 | # initialize TuRBO 154 | problem = Turbo1Grad( 155 | myObj, 156 | lb=turbo_lb,ub=turbo_ub, 157 | n_init=turbo_n_init, 158 | max_evals=turbo_max_evals, 159 | train_gp=train_gp_for_turbo, 160 | sample_from_gp=sample_from_gp, 161 | batch_size=turbo_batch_size, 162 | verbose=True, 163 | use_ard=True, 164 | max_cholesky_size=2000, 165 | n_training_steps=num_epochs, 166 | min_cuda=0, # directional_vi.py always runs on cuda if available 167 | device=turbo_device, 168 | dtype="float64") 169 | # optimize 170 | problem.optimize() 171 | X_turbo, fX_turbo = problem.X, problem.fX[:,0] # Evaluated points 172 | 173 | elif mode == "SVGP": 174 | # train 175 | print(f"\n\n---TuRBO with Traditional SVGP in dim {dim}---") 176 | print(f"VI setups: {num_inducing} inducing points, {num_directions} inducing directions") 177 | 178 | from turbo1 import * 179 | def train_gp_for_turbo(train_x, train_y, use_ard, num_steps, hypers): 180 | # expects train_x on unit cube and train_y standardized 181 | # make a trainable model for TuRBO 182 | train_x = train_x.float() 183 | train_y = train_y.float() 184 | dataset = TensorDataset(train_x,train_y) 185 | model,likelihood = traditional_vi.train_gp(dataset,dim,num_inducing=num_inducing, 186 | minibatch_size=minibatch_size,num_epochs=num_steps,use_ngd=use_ngd, 187 | use_ciq=use_ciq,learning_rate_hypers=learning_rate_hypers, 188 | learning_rate_ngd=learning_rate_ngd, 189 | lr_sched=lr_sched,num_contour_quadrature=num_contour_quadrature, 190 | mll_type=mll_type,verbose=verbose) 191 | return model.double(),likelihood.double() 192 | 193 | # initialize TuRBO 194 | problem = Turbo1( 195 | myObj, 196 | lb=turbo_lb,ub=turbo_ub, 197 | n_init=turbo_n_init, 198 | max_evals=turbo_max_evals, 199 | train_gp=train_gp_for_turbo, 200 | batch_size=turbo_batch_size, 201 | verbose=True, 202 | use_ard=True, 203 | max_cholesky_size=2000, 204 | n_training_steps=num_epochs, 205 | min_cuda=0, 206 | device=turbo_device, 207 | dtype="float64") 208 | # optimize 209 | problem.optimize() 210 | X_turbo, fX_turbo = problem.X, problem.fX.flatten() # Evaluated points 211 | 212 | elif mode == "Vanilla": 213 | # train 214 | print(f"\n\n---Vanilla TuRBO in dim {dim}---") 215 | 216 | from turbo1_vanilla import * 217 | 218 | # initialize TuRBO 219 | problem = Turbo1( 220 | myObj, 221 | lb=turbo_lb,ub=turbo_ub, 222 | n_init=turbo_n_init, 223 | max_evals=turbo_max_evals, 224 | batch_size=turbo_batch_size, 225 | verbose=True, 226 | use_ard=True, 227 | max_cholesky_size=2000, 228 | n_training_steps=num_epochs, 229 | min_cuda=0, 230 | device=turbo_device, 231 | dtype="float64") 232 | 233 | # optimize 234 | problem.optimize() 235 | X_turbo, fX_turbo = problem.X, problem.fX.flatten() # Evaluated points 236 | 237 | 238 | 239 | # get the optimum 240 | idx_opt = np.argmin(fX_turbo) 241 | fopt = fX_turbo[idx_opt] 242 | xopt = X_turbo[idx_opt] 243 | print(f"fopt = {fopt}") 244 | 245 | # dump the data 246 | outdata = {} 247 | outdata['X'] = X_turbo 248 | outdata['fX'] = fX_turbo 249 | outdata['xopt'] = xopt 250 | outdata['fopt'] = fopt 251 | # add the run params 252 | outdata.update(run_params) 253 | pickle.dump(outdata,open(data_filename,"wb")) 254 | print(f"Dropped file: {data_filename}") 255 | --------------------------------------------------------------------------------