├── README.md ├── TGP.py ├── figures ├── test1_GP.png ├── test1_TGP.png ├── test2_GP.png └── test2_TGP.png ├── get_dataset.py ├── paper └── ATL via GP.pdf ├── run.py ├── run.sh ├── test1.toml ├── test2.toml └── util.py /README.md: -------------------------------------------------------------------------------- 1 | # Transfer-GP 2 | 3 | ## About 4 | An adaptive transfer learning algorithm based on Gaussian Process, which can be used to adapt the transfer learning schemes by automatically estimating the similarity between a source and a target task. 5 | 6 | The regression results comparisons between conventional Gaussian process method and transfer Gaussian process method are shown here. 7 | 8 | ![image](https://github.com/Xiao-dong-Wang/Transfer-GP/blob/master/figures/test1_GP.png) 9 | 10 | ![image](https://github.com/Xiao-dong-Wang/Transfer-GP/blob/master/figures/test1_TGP.png) 11 | 12 | Codes reimplemented here is based on the idea from the following paper: 13 | 14 | - Bin Cao, Sinno Jialin Pan, Yu Zhang, Dit-Yan Yeung, Qiang Yang, Adaptive transfer learning, *Twenty-Fourth Conference on Artificial Intelligence* 15 | (AAAI), 2010. 16 | 17 | Dependencies: 18 | 19 | Autograd: https://github.com/HIPS/autograd 20 | 21 | Scipy: https://github.com/scipy/scipy 22 | -------------------------------------------------------------------------------- /TGP.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | from autograd import value_and_grad 3 | from scipy.optimize import fmin_l_bfgs_b 4 | from util import chol_inv 5 | import traceback 6 | import sys 7 | 8 | # Transfer Gaussian Process 9 | class TGP: 10 | # Initialize TGP class 11 | # train_x shape: (dim_in, num_train); train_y shape: (dim_out, num_train) 12 | def __init__(self, dataset, bfgs_iter=2000, debug=True): 13 | self.src_x = dataset['src_x'] 14 | self.src_y = dataset['src_y'] 15 | self.tag_x = dataset['tag_x'] 16 | self.tag_y = dataset['tag_y'] 17 | self.train_x = np.hstack((self.src_x, self.tag_x)) 18 | self.train_y = np.hstack((self.src_y, self.tag_y)) 19 | self.bfgs_iter = bfgs_iter 20 | self.debug = debug 21 | self.dim = self.tag_x.shape[0] 22 | self.num_src = self.src_x.shape[1] 23 | self.num_tag = self.tag_x.shape[1] 24 | self.jitter = 1e-4 25 | self.normalize() 26 | 27 | # Normalize y 28 | def normalize(self): 29 | self.train_y = self.train_y.reshape(-1) 30 | self.mean = self.train_y.mean() 31 | self.std = self.train_y.std() + 0.000001 32 | self.train_y = (self.train_y - self.mean)/self.std 33 | 34 | self.src_y = self.train_y[:self.num_src] 35 | self.tag_y = self.train_y[self.num_src:] 36 | 37 | # Initialize hyper_parameters 38 | # theta: output_scale, length_scale, sigma2_src, sigma2_tag, lambda 39 | def get_default_theta(self): 40 | theta = np.random.randn(4 + self.dim) 41 | for i in range(self.dim): 42 | theta[1+i] = np.maximum(-100, np.log(0.5*(self.train_x[i].max() - self.train_x[i].min()))) #length scale 43 | theta[self.dim+1] = np.log(np.std(self.src_y)) # sigma2_src 44 | theta[self.dim+2] = np.log(np.std(self.tag_y)) # sigma2_tag 45 | theta[self.dim+3] = 2 * np.random.random(1) - 1 # -1< lambda <1 46 | return theta 47 | 48 | # inner domain kernel 49 | def kernel1(self, x, xp, theta): 50 | output_scale = np.exp(theta[0]) 51 | lengthscales = np.exp(theta[1:self.dim+1]) + 0.000001 52 | diffs = np.expand_dims((x.T/lengthscales).T, 2) - np.expand_dims((xp.T/lengthscales).T, 1) 53 | return output_scale * np.exp(-0.5*np.sum(diffs**2, axis=0)) 54 | 55 | # inter domain kernel 56 | def kernel2(self, x, xp, theta): 57 | lamd = theta[self.dim+3] 58 | return lamd * self.kernel1(x, xp, theta) 59 | 60 | def kernel(self, src_x, tag_x, theta): 61 | # K = 62 | # K_ss K_st 63 | # K_ts K_tt 64 | sigma2_src = np.exp(theta[self.dim+1]) 65 | sigma2_tag = np.exp(theta[self.dim+2]) 66 | K_ss = self.kernel1(src_x, src_x, theta) + sigma2_src * np.eye(self.num_src) + self.jitter*np.eye(self.num_src) 67 | K_st = self.kernel2(src_x, tag_x, theta) 68 | K_ts = K_st.T 69 | K_tt = self.kernel1(tag_x, tag_x, theta) + sigma2_tag * np.eye(self.num_tag) + self.jitter*np.eye(self.num_tag) 70 | tmp1 = np.hstack((K_ss, K_st)) 71 | tmp2 = np.hstack((K_ts, K_tt)) 72 | K = np.concatenate((tmp1, tmp2)) 73 | return K 74 | 75 | def neg_log_likelihood(self, theta): 76 | sigma2_src = np.exp(theta[self.dim+1]) 77 | sigma2_tag = np.exp(theta[self.dim+2]) 78 | K_ss = self.kernel1(self.src_x, self.src_x, theta) + sigma2_src * np.eye(self.num_src) + self.jitter*np.eye(self.num_src) 79 | K_st = self.kernel2(self.src_x, self.tag_x, theta) 80 | K_ts = K_st.T 81 | K_tt = self.kernel1(self.tag_x, self.tag_x, theta) + sigma2_tag * np.eye(self.num_tag) + self.jitter*np.eye(self.num_tag) 82 | 83 | L_ss = np.linalg.cholesky(K_ss) 84 | tmp1 = chol_inv(L_ss, self.src_y.T) 85 | tmp2 = chol_inv(L_ss, K_st) 86 | mu_t = np.dot(K_ts, tmp1) 87 | C_t = K_tt - np.dot(K_ts, tmp2) 88 | 89 | L_t = np.linalg.cholesky(C_t) 90 | logDetCt = np.sum(np.log(np.diag(L_t))) 91 | delta = self.tag_y.T - mu_t 92 | alpha = chol_inv(L_t, delta) 93 | nlz = 0.5*(np.dot(delta.T, alpha) + self.num_tag*np.log(2*np.pi)) + logDetCt 94 | if(np.isnan(nlz)): 95 | nlz = np.inf 96 | 97 | self.nlz = nlz 98 | return nlz 99 | 100 | # Minimize the negative log-likelihood 101 | def train(self): 102 | theta0 = self.get_default_theta() 103 | self.loss = np.inf 104 | self.theta = np.copy(theta0) 105 | hyp_bounds = [[None, None]] * (self.dim+3) 106 | hyp_bounds.extend([[-1,1]]) 107 | 108 | nlz = self.neg_log_likelihood(theta0) 109 | 110 | def loss(theta): 111 | nlz = self.neg_log_likelihood(theta) 112 | return nlz 113 | 114 | def callback(theta): 115 | if self.nlz < self.loss: 116 | self.loss = self.nlz 117 | self.theta = np.copy(theta) 118 | 119 | gloss = value_and_grad(loss) 120 | 121 | try: 122 | fmin_l_bfgs_b(gloss, theta0, bounds=hyp_bounds, maxiter=self.bfgs_iter, m = 100, iprint=self.debug, callback=callback) 123 | except np.linalg.LinAlgError: 124 | print('TGP. Increase noise term and re-optimization') 125 | theta0 = np.copy(self.theta) 126 | theta0[self.dim+1] += np.log(10) 127 | theta0[self.dim+2] += np.log(10) 128 | try: 129 | fmin_l_bfgs_b(gloss, theta0, bounds=hyp_bounds, maxiter=self.bfgs_iter, m=10, iprint=self.debug, callback=callback) 130 | except: 131 | print('TGP. Exception caught, L-BFGS early stopping...') 132 | if self.debug: 133 | print(traceback.format_exc()) 134 | except: 135 | print('TGP. Exception caught, L-BFGS early stopping...') 136 | if self.debug: 137 | print(traceback.format_exc()) 138 | 139 | if(np.isinf(self.loss) or np.isnan(self.loss)): 140 | print('TGP. Failed to build TGP model') 141 | sys.exit(1) 142 | 143 | print('TGP. TGP model training process finished') 144 | 145 | def predict(self, test_x, is_diag=1): 146 | output_scale = np.exp(self.theta[0]) 147 | sigma2_tag = np.exp(self.theta[self.dim+2]) 148 | C = self.kernel(self.src_x, self.tag_x, self.theta) 149 | L_C = np.linalg.cholesky(C) 150 | alpha_C = chol_inv(L_C, self.train_y.T) 151 | k_star_s = self.kernel2(test_x, self.src_x, self.theta) 152 | k_star_t = self.kernel1(test_x, self.tag_x, self.theta) 153 | k_star = np.hstack((k_star_s, k_star_t)) 154 | py = np.dot(k_star, alpha_C) 155 | 156 | Cvks = chol_inv(L_C, k_star.T) 157 | if is_diag: 158 | ps2 = output_scale + sigma2_tag - (k_star * Cvks.T).sum(axis=1) 159 | else: 160 | ps2 = self.kernel1(test_x, test_x, self.theta) + sigma2_tag - np.dot(k_star, Cvks) 161 | ps2 = np.abs(ps2) 162 | py = py * self.std + self.mean 163 | ps2 = ps2 * (self.std**2) 164 | return py, ps2 165 | 166 | 167 | -------------------------------------------------------------------------------- /figures/test1_GP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test1_GP.png -------------------------------------------------------------------------------- /figures/test1_TGP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test1_TGP.png -------------------------------------------------------------------------------- /figures/test2_GP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test2_GP.png -------------------------------------------------------------------------------- /figures/test2_TGP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test2_TGP.png -------------------------------------------------------------------------------- /get_dataset.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | import os 3 | import string 4 | 5 | def init_dataset(funct, num, bounds): 6 | dim = bounds.shape[0] 7 | num_src = num[0] 8 | num_tag = num[1] 9 | src_x = np.random.uniform(-0.5, 0.5, (dim, num_src)) 10 | tag_x = np.random.uniform(-0.5, 0.5, (dim, num_tag)) 11 | 12 | dataset = {} 13 | dataset['src_x'] = src_x 14 | dataset['tag_x'] = tag_x 15 | dataset['src_y'] = funct[0](src_x, bounds) 16 | dataset['tag_y'] = funct[1](tag_x, bounds) 17 | return dataset 18 | 19 | def get_test(funct, num, bounds): 20 | dim = bounds.shape[1] 21 | dataset = {} 22 | dataset['test_x'] = np.random.uniform(-0.5, 0.5, (dim, num)) 23 | dataset['test_y'] = funct[1](dataset['test_x'], bounds) 24 | return dataset 25 | 26 | 27 | # bounds: -0.5 : 1 28 | def test1_tag(x, bounds): 29 | mean = bounds.mean(axis=1) 30 | delta = bounds[:,1] - bounds[:,0] 31 | x = (x.T * delta + mean).T 32 | ret = (x+0.03)**2 * np.sin(5.0*np.pi*(x+0.03))+0.1 33 | return ret.reshape(1, -1) 34 | 35 | def test1_src(x, bounds): 36 | mean = bounds.mean(axis=1) 37 | delta = bounds[:,1] - bounds[:,0] 38 | x = (x.T * delta + mean).T 39 | ret = x**2 * np.sin(5.0*np.pi*x) 40 | return ret.reshape(1, -1) 41 | 42 | 43 | # bounds: 0 : 1 44 | def test2_tag(x, bounds): 45 | mean = bounds.mean(axis=1) 46 | delta = bounds[:,1] - bounds[:,0] 47 | x = (x.T * delta + mean).T 48 | ret = (6.0*x - 2.0)**2 * np.sin(12.*x - 4.0) 49 | return ret.reshape(1, -1) 50 | 51 | def test2_src(x, bounds): 52 | tmp = test2_tag(x, bounds) 53 | mean = bounds.mean(axis=1) 54 | delta = bounds[:,1] - bounds[:,0] 55 | x = (x.T * delta + mean).T 56 | ret = 0.5*tmp + 10.0*(x-0.5) - 5.0 57 | return ret.reshape(1, -1) 58 | 59 | def get_funct(funct): 60 | if funct == 'test1': 61 | return [test1_src, test1_tag] 62 | elif funct == 'test2': 63 | return [test2_src, test2_tag] 64 | else: 65 | return [test1_src, test1_tag] 66 | 67 | -------------------------------------------------------------------------------- /paper/ATL via GP.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/paper/ATL via GP.pdf -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | import sys 3 | import toml 4 | from util import * 5 | from TGP import TGP 6 | from get_dataset import * 7 | import multiprocessing 8 | import pickle 9 | import matplotlib.pyplot as plt 10 | 11 | np.random.seed(9) 12 | 13 | argv = sys.argv[1:] 14 | conf = toml.load(argv[0]) 15 | 16 | name = conf['funct'] 17 | funct = get_funct(name) 18 | num = conf['num'] 19 | bounds = np.array(conf['bounds']) 20 | bfgs_iter = conf['bfgs_iter'] 21 | 22 | #### TGP 23 | dataset = init_dataset(funct, num, bounds) 24 | src_x = dataset['src_x'] 25 | src_y = dataset['src_y'] 26 | tag_x = dataset['tag_x'] 27 | tag_y = dataset['tag_y'] 28 | model = TGP(dataset, bfgs_iter[0], debug=True) 29 | model.train() 30 | 31 | 32 | 33 | # Test data 34 | nn = 200 35 | X_star = np.linspace(-0.5, 0.5, nn)[None,:] 36 | y_star_tag = funct[1](X_star,bounds) 37 | y_star_src = funct[0](X_star,bounds) 38 | X_star_real = X_star * (bounds[0,1]-bounds[0,0]) + (bounds[0,1]+bounds[0,0])/2 39 | y_pred, y_var = model.predict(X_star) 40 | 41 | src_x_real = src_x * (bounds[0,1]-bounds[0,0]) + (bounds[0,1]+bounds[0,0])/2 42 | tag_x_real = tag_x * (bounds[0,1]-bounds[0,0]) + (bounds[0,1]+bounds[0,0])/2 43 | 44 | 45 | plt.figure() 46 | plt.cla() 47 | plt.rc('text', usetex=True) 48 | plt.rc('font', family='serif', size=10) 49 | plt.plot(X_star_real.flatten(), y_star_tag.flatten(), 'b-', label = "tag function", linewidth=2) 50 | plt.plot(X_star_real.flatten(), y_star_src.flatten(), 'g-', label = "src function", linewidth=2) 51 | plt.plot(X_star_real.flatten(), y_pred.flatten(), 'r--', label = "Prediction", linewidth=2) 52 | lower = y_pred - 2.0*np.sqrt(y_var) 53 | upper = y_pred + 2.0*np.sqrt(y_var) 54 | plt.fill_between(X_star_real.flatten(), lower.flatten(), upper.flatten(), 55 | facecolor='pink', alpha=0.5, label="Two std band") 56 | plt.plot(src_x_real, src_y, 'go') 57 | plt.plot(tag_x_real, tag_y, 'ko') 58 | plt.legend() 59 | ax = plt.gca() 60 | ax.set_xlim([bounds[0,0], bounds[0,1]]) 61 | plt.xlabel('x') 62 | plt.ylabel('f(x)') 63 | 64 | plt.show() 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | python run.py test2.toml > log 2>&1 & 4 | -------------------------------------------------------------------------------- /test1.toml: -------------------------------------------------------------------------------- 1 | funct = 'test1' 2 | num = [80,8] 3 | bounds = [[-0.5,1.0]] 4 | bfgs_iter = [2000] 5 | -------------------------------------------------------------------------------- /test2.toml: -------------------------------------------------------------------------------- 1 | funct = 'test2' 2 | num = [60,8] 3 | bounds = [[0,1]] 4 | bfgs_iter = [2000] 5 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | import autograd.numpy as np 2 | 3 | def chol_inv(L, y): 4 | tmp = np.linalg.solve(L, y) 5 | return np.linalg.solve(L.T, tmp) 6 | 7 | def erf(x): 8 | # constants 9 | a1 = 0.254829592 10 | a2 = -0.284496736 11 | a3 = 1.421413741 12 | a4 = -1.453152027 13 | a5 = 1.061405429 14 | p = 0.3275911 15 | 16 | # Save the sign of x 17 | sign = np.sign(x) 18 | x = np.abs(x) 19 | 20 | # A&S formula 7.1.26 21 | t = 1.0/(1.0 + p*x) 22 | y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*np.exp(-x**2) 23 | 24 | return sign*y 25 | 26 | def normpdf(x): 27 | return np.exp(-x**2 / 2) / np.sqrt(2*np.pi) 28 | 29 | def normcdf(x): 30 | return 0.5 + erf(x/np.sqrt(2)) / 2 31 | 32 | def logphi(x): 33 | if x**2 < 0.0492: 34 | lp0 = -x/np.sqrt(2*np.pi) 35 | c = np.array([0.00048204, -0.00142906, 0.0013200243174, 0.0009461589032, -0.0045563339802, 0.00556964649138, 0.00125993961762116, -0.01621575378835404, 0.02629651521057465, -0.001829764677455021, 2*(1-np.pi/3), (4-np.pi)/3, 1, 1]) 36 | f = 0 37 | for i in range(14): 38 | f = lp0*(c[i]+f) 39 | return -2*f-np.log(2) 40 | elif x < -11.3137: 41 | r = np.array([1.2753666447299659525, 5.019049726784267463450, 6.1602098531096305441, 7.409740605964741794425, 2.9788656263939928886]) 42 | q = np.array([2.260528520767326969592, 9.3960340162350541504, 12.048951927855129036034, 17.081440747466004316, 9.608965327192787870698, 3.3690752069827527677]) 43 | num = 0.5641895835477550741 44 | for i in range(5): 45 | num = -x*num/np.sqrt(2)+r[i] 46 | den = 1.0 47 | for i in range(6): 48 | den = -x*den/np.sqrt(2)+q[i] 49 | return np.log(0.5*np.maximum(0.000001,num/den))-0.5*(x**2) 50 | else: 51 | return np.log(0.5*np.maximum(0.000001,(1.0-erf(-x/np.sqrt(2))))) 52 | 53 | # logphi_vector for autograd 54 | def logphi_vector(x): 55 | # phi1 56 | lp0 = -x/np.sqrt(2*np.pi) 57 | c = np.array([0.00048204, -0.00142906, 0.0013200243174, 0.0009461589032, -0.0045563339802, 0.00556964649138, 0.00125993961762116, -0.01621575378835404, 0.02629651521057465, -0.001829764677455021, 2*(1-np.pi/3), (4-np.pi)/3, 1, 1]) 58 | f = 0 59 | for i in range(14): 60 | f = lp0*(c[i]+f) 61 | phi1 = -2*f - np.log(2) 62 | 63 | # phi2 64 | r = np.array([1.2753666447299659525, 5.019049726784267463450, 6.1602098531096305441, 7.409740605964741794425, 2.9788656263939928886]) 65 | q = np.array([2.260528520767326969592, 9.3960340162350541504, 12.048951927855129036034, 17.081440747466004316, 9.608965327192787870698, 3.3690752069827527677]) 66 | num = 0.5641895835477550741 67 | for i in range(5): 68 | num = -x*num/np.sqrt(2)+r[i] 69 | den = 1.0 70 | for i in range(6): 71 | den = -x*den/np.sqrt(2)+q[i] 72 | phi2 = np.log(0.5*np.maximum(0.000001,num/den))-0.5*(x**2) 73 | 74 | # phi3 75 | phi3 = np.log(0.5*np.maximum(0.000001,(1.0-erf(-x/np.sqrt(2))))) 76 | 77 | # phi 78 | x2 = x**2 79 | phi = phi1 * (x2 < 0.0492) + phi2 * (x < -11.3137) + phi3 * ((x >= -11.3137) | (x2 >= 0.0492)) 80 | return phi 81 | 82 | 83 | 84 | 85 | 86 | --------------------------------------------------------------------------------