├── README.md
├── TGP.py
├── figures
    ├── test1_GP.png
    ├── test1_TGP.png
    ├── test2_GP.png
    └── test2_TGP.png
├── get_dataset.py
├── paper
    └── ATL via GP.pdf
├── run.py
├── run.sh
├── test1.toml
├── test2.toml
└── util.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Transfer-GP
 2 | 
 3 | ## About
 4 | An adaptive transfer learning algorithm based on Gaussian Process, which can be used to adapt the transfer learning schemes by automatically estimating the similarity between a source and a target task.
 5 | 
 6 | The regression results comparisons between conventional Gaussian process method and transfer Gaussian process method are shown here.
 7 | 
 8 | ![image](https://github.com/Xiao-dong-Wang/Transfer-GP/blob/master/figures/test1_GP.png)
 9 | 
10 | ![image](https://github.com/Xiao-dong-Wang/Transfer-GP/blob/master/figures/test1_TGP.png)
11 | 
12 | Codes reimplemented here is based on the idea from the following paper:
13 | 
14 | - Bin Cao, Sinno Jialin Pan, Yu Zhang, Dit-Yan Yeung, Qiang Yang, Adaptive transfer learning, *Twenty-Fourth Conference on Artificial Intelligence*
15 | (AAAI), 2010.
16 | 
17 | Dependencies:
18 | 
19 | Autograd: https://github.com/HIPS/autograd
20 | 
21 | Scipy: https://github.com/scipy/scipy
22 | 


--------------------------------------------------------------------------------
/TGP.py:
--------------------------------------------------------------------------------
  1 | import autograd.numpy as np
  2 | from autograd import value_and_grad
  3 | from scipy.optimize import fmin_l_bfgs_b
  4 | from util import chol_inv
  5 | import traceback
  6 | import sys
  7 | 
  8 | # Transfer Gaussian Process
  9 | class TGP:
 10 |     # Initialize TGP class
 11 |     # train_x shape: (dim_in, num_train);   train_y shape: (dim_out, num_train) 
 12 |     def __init__(self, dataset, bfgs_iter=2000, debug=True):
 13 |         self.src_x = dataset['src_x']
 14 |         self.src_y = dataset['src_y']
 15 |         self.tag_x = dataset['tag_x']
 16 |         self.tag_y = dataset['tag_y']
 17 |         self.train_x = np.hstack((self.src_x, self.tag_x))
 18 |         self.train_y = np.hstack((self.src_y, self.tag_y))
 19 |         self.bfgs_iter = bfgs_iter
 20 |         self.debug = debug
 21 |         self.dim = self.tag_x.shape[0]
 22 |         self.num_src = self.src_x.shape[1]
 23 |         self.num_tag = self.tag_x.shape[1]
 24 |         self.jitter = 1e-4
 25 |         self.normalize()
 26 | 
 27 |     # Normalize y
 28 |     def normalize(self):
 29 |         self.train_y = self.train_y.reshape(-1)
 30 |         self.mean = self.train_y.mean()
 31 |         self.std = self.train_y.std() + 0.000001
 32 |         self.train_y = (self.train_y - self.mean)/self.std
 33 | 
 34 |         self.src_y = self.train_y[:self.num_src]
 35 |         self.tag_y = self.train_y[self.num_src:]
 36 | 
 37 |     # Initialize hyper_parameters
 38 |     #   theta: output_scale, length_scale, sigma2_src, sigma2_tag, lambda
 39 |     def get_default_theta(self):
 40 |         theta = np.random.randn(4 + self.dim)
 41 |         for i in range(self.dim):
 42 |             theta[1+i] = np.maximum(-100, np.log(0.5*(self.train_x[i].max() - self.train_x[i].min()))) #length scale
 43 |         theta[self.dim+1] = np.log(np.std(self.src_y)) # sigma2_src
 44 |         theta[self.dim+2] = np.log(np.std(self.tag_y)) # sigma2_tag
 45 |         theta[self.dim+3] = 2 * np.random.random(1) - 1 # -1< lambda <1
 46 |         return theta
 47 | 
 48 |     # inner domain kernel
 49 |     def kernel1(self, x, xp, theta):
 50 |         output_scale = np.exp(theta[0])
 51 |         lengthscales = np.exp(theta[1:self.dim+1]) + 0.000001
 52 |         diffs = np.expand_dims((x.T/lengthscales).T, 2) - np.expand_dims((xp.T/lengthscales).T, 1)
 53 |         return output_scale * np.exp(-0.5*np.sum(diffs**2, axis=0))
 54 |     
 55 |     # inter domain kernel
 56 |     def kernel2(self, x, xp, theta):
 57 |         lamd = theta[self.dim+3]
 58 |         return lamd * self.kernel1(x, xp, theta)
 59 | 
 60 |     def kernel(self, src_x, tag_x, theta):
 61 |         # K =
 62 |         # K_ss  K_st
 63 |         # K_ts  K_tt
 64 |         sigma2_src = np.exp(theta[self.dim+1])
 65 |         sigma2_tag = np.exp(theta[self.dim+2])
 66 |         K_ss = self.kernel1(src_x, src_x, theta) + sigma2_src * np.eye(self.num_src) + self.jitter*np.eye(self.num_src)
 67 |         K_st = self.kernel2(src_x, tag_x, theta)
 68 |         K_ts = K_st.T
 69 |         K_tt = self.kernel1(tag_x, tag_x, theta) + sigma2_tag * np.eye(self.num_tag) + self.jitter*np.eye(self.num_tag)
 70 |         tmp1 = np.hstack((K_ss, K_st))
 71 |         tmp2 = np.hstack((K_ts, K_tt))
 72 |         K = np.concatenate((tmp1, tmp2))
 73 |         return K
 74 | 
 75 |     def neg_log_likelihood(self, theta):
 76 |         sigma2_src = np.exp(theta[self.dim+1])
 77 |         sigma2_tag = np.exp(theta[self.dim+2])
 78 |         K_ss = self.kernel1(self.src_x, self.src_x, theta) + sigma2_src * np.eye(self.num_src) + self.jitter*np.eye(self.num_src)
 79 |         K_st = self.kernel2(self.src_x, self.tag_x, theta)
 80 |         K_ts = K_st.T
 81 |         K_tt = self.kernel1(self.tag_x, self.tag_x, theta) + sigma2_tag * np.eye(self.num_tag) + self.jitter*np.eye(self.num_tag)
 82 | 
 83 |         L_ss = np.linalg.cholesky(K_ss)
 84 |         tmp1 = chol_inv(L_ss, self.src_y.T)
 85 |         tmp2 = chol_inv(L_ss, K_st)
 86 |         mu_t = np.dot(K_ts, tmp1)
 87 |         C_t  = K_tt - np.dot(K_ts, tmp2)
 88 | 
 89 |         L_t = np.linalg.cholesky(C_t)
 90 |         logDetCt = np.sum(np.log(np.diag(L_t)))
 91 |         delta = self.tag_y.T - mu_t
 92 |         alpha = chol_inv(L_t, delta)
 93 |         nlz = 0.5*(np.dot(delta.T, alpha) + self.num_tag*np.log(2*np.pi)) + logDetCt
 94 |         if(np.isnan(nlz)):
 95 |             nlz = np.inf
 96 | 
 97 |         self.nlz = nlz
 98 |         return nlz
 99 | 
100 |     # Minimize the negative log-likelihood
101 |     def train(self):
102 |         theta0 = self.get_default_theta()
103 |         self.loss = np.inf
104 |         self.theta = np.copy(theta0)
105 |         hyp_bounds = [[None, None]] * (self.dim+3)
106 |         hyp_bounds.extend([[-1,1]])
107 | 
108 |         nlz = self.neg_log_likelihood(theta0)
109 | 
110 |         def loss(theta):
111 |             nlz = self.neg_log_likelihood(theta)
112 |             return nlz
113 | 
114 |         def callback(theta):
115 |             if self.nlz < self.loss:
116 |                 self.loss = self.nlz
117 |                 self.theta = np.copy(theta)
118 | 
119 |         gloss = value_and_grad(loss)
120 | 
121 |         try:
122 |             fmin_l_bfgs_b(gloss, theta0, bounds=hyp_bounds, maxiter=self.bfgs_iter, m = 100, iprint=self.debug, callback=callback)
123 |         except np.linalg.LinAlgError:
124 |             print('TGP. Increase noise term and re-optimization')
125 |             theta0 = np.copy(self.theta)
126 |             theta0[self.dim+1] += np.log(10)
127 |             theta0[self.dim+2] += np.log(10)
128 |             try:
129 |                 fmin_l_bfgs_b(gloss, theta0, bounds=hyp_bounds, maxiter=self.bfgs_iter, m=10, iprint=self.debug, callback=callback)
130 |             except:
131 |                 print('TGP. Exception caught, L-BFGS early stopping...')
132 |                 if self.debug:
133 |                     print(traceback.format_exc())
134 |         except:
135 |             print('TGP. Exception caught, L-BFGS early stopping...')
136 |             if self.debug:
137 |                 print(traceback.format_exc())
138 | 
139 |         if(np.isinf(self.loss) or np.isnan(self.loss)):
140 |             print('TGP. Failed to build TGP model')
141 |             sys.exit(1)
142 | 
143 |         print('TGP. TGP model training process finished')
144 | 
145 |     def predict(self, test_x, is_diag=1):
146 |         output_scale = np.exp(self.theta[0])
147 |         sigma2_tag = np.exp(self.theta[self.dim+2])
148 |         C = self.kernel(self.src_x, self.tag_x, self.theta)
149 |         L_C = np.linalg.cholesky(C)
150 |         alpha_C = chol_inv(L_C, self.train_y.T)
151 |         k_star_s = self.kernel2(test_x, self.src_x, self.theta)
152 |         k_star_t = self.kernel1(test_x, self.tag_x, self.theta)
153 |         k_star = np.hstack((k_star_s, k_star_t))
154 |         py = np.dot(k_star, alpha_C)
155 | 
156 |         Cvks = chol_inv(L_C, k_star.T)
157 |         if is_diag:
158 |             ps2 = output_scale + sigma2_tag - (k_star * Cvks.T).sum(axis=1)
159 |         else:
160 |             ps2 = self.kernel1(test_x, test_x, self.theta) + sigma2_tag - np.dot(k_star, Cvks)
161 |         ps2 = np.abs(ps2)
162 |         py = py * self.std + self.mean
163 |         ps2 = ps2 * (self.std**2)
164 |         return py, ps2
165 |    
166 | 
167 | 


--------------------------------------------------------------------------------
/figures/test1_GP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test1_GP.png


--------------------------------------------------------------------------------
/figures/test1_TGP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test1_TGP.png


--------------------------------------------------------------------------------
/figures/test2_GP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test2_GP.png


--------------------------------------------------------------------------------
/figures/test2_TGP.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/figures/test2_TGP.png


--------------------------------------------------------------------------------
/get_dataset.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | import os
 3 | import string
 4 | 
 5 | def init_dataset(funct, num, bounds):
 6 |     dim = bounds.shape[0]
 7 |     num_src = num[0]
 8 |     num_tag = num[1]
 9 |     src_x = np.random.uniform(-0.5, 0.5, (dim, num_src))
10 |     tag_x = np.random.uniform(-0.5, 0.5, (dim, num_tag))
11 | 
12 |     dataset = {}
13 |     dataset['src_x']    = src_x
14 |     dataset['tag_x']    = tag_x
15 |     dataset['src_y']    = funct[0](src_x, bounds)
16 |     dataset['tag_y']    = funct[1](tag_x, bounds)
17 |     return dataset
18 | 
19 | def get_test(funct, num, bounds):
20 |     dim = bounds.shape[1]
21 |     dataset = {}
22 |     dataset['test_x'] = np.random.uniform(-0.5, 0.5, (dim, num))
23 |     dataset['test_y'] = funct[1](dataset['test_x'], bounds)
24 |     return dataset
25 | 
26 | 
27 | # bounds:  -0.5 : 1
28 | def test1_tag(x, bounds):
29 |     mean = bounds.mean(axis=1)
30 |     delta = bounds[:,1] - bounds[:,0]
31 |     x = (x.T * delta + mean).T
32 |     ret = (x+0.03)**2 * np.sin(5.0*np.pi*(x+0.03))+0.1
33 |     return ret.reshape(1, -1)
34 | 
35 | def test1_src(x, bounds):
36 |     mean = bounds.mean(axis=1)
37 |     delta = bounds[:,1] - bounds[:,0]
38 |     x = (x.T * delta + mean).T
39 |     ret = x**2 * np.sin(5.0*np.pi*x)
40 |     return ret.reshape(1, -1)
41 | 
42 | 
43 | # bounds:   0 : 1
44 | def test2_tag(x, bounds):
45 |     mean = bounds.mean(axis=1)
46 |     delta = bounds[:,1] - bounds[:,0]
47 |     x = (x.T * delta + mean).T
48 |     ret = (6.0*x - 2.0)**2 * np.sin(12.*x - 4.0)
49 |     return ret.reshape(1, -1)
50 | 
51 | def test2_src(x, bounds):
52 |     tmp = test2_tag(x, bounds)
53 |     mean = bounds.mean(axis=1)
54 |     delta = bounds[:,1] - bounds[:,0]
55 |     x = (x.T * delta + mean).T
56 |     ret = 0.5*tmp + 10.0*(x-0.5) - 5.0
57 |     return ret.reshape(1, -1)
58 | 
59 | def get_funct(funct):
60 |     if funct == 'test1':
61 |         return [test1_src, test1_tag]
62 |     elif funct == 'test2':
63 |         return [test2_src, test2_tag]
64 |     else:
65 |         return [test1_src, test1_tag]
66 | 
67 | 


--------------------------------------------------------------------------------
/paper/ATL via GP.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Xiao-dong-Wang/Transfer-GP/bf2ac4cfd7c5faf3fd2372f8e28803a1e7e2ed0b/paper/ATL via GP.pdf


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | import sys
 3 | import toml
 4 | from util import *
 5 | from TGP import TGP
 6 | from get_dataset import *
 7 | import multiprocessing
 8 | import pickle
 9 | import matplotlib.pyplot as plt
10 | 
11 | np.random.seed(9)
12 | 
13 | argv = sys.argv[1:]
14 | conf = toml.load(argv[0])
15 | 
16 | name = conf['funct']
17 | funct = get_funct(name)
18 | num = conf['num']
19 | bounds = np.array(conf['bounds'])
20 | bfgs_iter = conf['bfgs_iter']
21 | 
22 | #### TGP
23 | dataset = init_dataset(funct, num, bounds)
24 | src_x = dataset['src_x']
25 | src_y = dataset['src_y']
26 | tag_x = dataset['tag_x']
27 | tag_y = dataset['tag_y']
28 | model = TGP(dataset, bfgs_iter[0], debug=True)
29 | model.train()
30 | 
31 | 
32 | 
33 | # Test data
34 | nn = 200
35 | X_star = np.linspace(-0.5, 0.5, nn)[None,:]
36 | y_star_tag = funct[1](X_star,bounds)
37 | y_star_src = funct[0](X_star,bounds)
38 | X_star_real = X_star * (bounds[0,1]-bounds[0,0]) + (bounds[0,1]+bounds[0,0])/2
39 | y_pred, y_var = model.predict(X_star)
40 | 
41 | src_x_real = src_x * (bounds[0,1]-bounds[0,0]) + (bounds[0,1]+bounds[0,0])/2
42 | tag_x_real = tag_x * (bounds[0,1]-bounds[0,0]) + (bounds[0,1]+bounds[0,0])/2
43 | 
44 | 
45 | plt.figure()
46 | plt.cla()
47 | plt.rc('text', usetex=True)
48 | plt.rc('font', family='serif', size=10)
49 | plt.plot(X_star_real.flatten(), y_star_tag.flatten(), 'b-', label = "tag function", linewidth=2)
50 | plt.plot(X_star_real.flatten(), y_star_src.flatten(), 'g-', label = "src function", linewidth=2)
51 | plt.plot(X_star_real.flatten(), y_pred.flatten(), 'r--', label = "Prediction", linewidth=2)
52 | lower = y_pred - 2.0*np.sqrt(y_var)
53 | upper = y_pred + 2.0*np.sqrt(y_var)
54 | plt.fill_between(X_star_real.flatten(), lower.flatten(), upper.flatten(), 
55 |                  facecolor='pink', alpha=0.5, label="Two std band")
56 | plt.plot(src_x_real, src_y, 'go')
57 | plt.plot(tag_x_real, tag_y, 'ko')
58 | plt.legend()
59 | ax = plt.gca()
60 | ax.set_xlim([bounds[0,0], bounds[0,1]])
61 | plt.xlabel('x')
62 | plt.ylabel('f(x)')
63 | 
64 | plt.show()
65 | 
66 | 
67 | 
68 | 
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python run.py test2.toml > log 2>&1 &
4 | 


--------------------------------------------------------------------------------
/test1.toml:
--------------------------------------------------------------------------------
1 | funct = 'test1'
2 | num = [80,8]
3 | bounds = [[-0.5,1.0]]
4 | bfgs_iter = [2000]
5 | 


--------------------------------------------------------------------------------
/test2.toml:
--------------------------------------------------------------------------------
1 | funct = 'test2'
2 | num = [60,8]
3 | bounds = [[0,1]]
4 | bfgs_iter = [2000]
5 | 


--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
 1 | import autograd.numpy as np
 2 | 
 3 | def chol_inv(L, y):
 4 |     tmp = np.linalg.solve(L, y)
 5 |     return np.linalg.solve(L.T, tmp)
 6 | 
 7 | def erf(x):
 8 |     # constants
 9 |     a1 =  0.254829592
10 |     a2 = -0.284496736
11 |     a3 =  1.421413741
12 |     a4 = -1.453152027
13 |     a5 =  1.061405429
14 |     p  =  0.3275911
15 |                             
16 |     # Save the sign of x
17 |     sign = np.sign(x)
18 |     x = np.abs(x)
19 |                                                         
20 |     # A&S formula 7.1.26
21 |     t = 1.0/(1.0 + p*x)
22 |     y = 1.0 - (((((a5*t + a4)*t) + a3)*t + a2)*t + a1)*t*np.exp(-x**2)
23 |                                                                     
24 |     return sign*y
25 | 
26 | def normpdf(x):
27 |     return np.exp(-x**2 / 2) / np.sqrt(2*np.pi)
28 | 
29 | def normcdf(x):
30 |     return 0.5 + erf(x/np.sqrt(2)) / 2
31 | 
32 | def logphi(x):
33 |     if x**2 < 0.0492:
34 |         lp0 = -x/np.sqrt(2*np.pi)
35 |         c = np.array([0.00048204, -0.00142906, 0.0013200243174, 0.0009461589032, -0.0045563339802, 0.00556964649138, 0.00125993961762116, -0.01621575378835404, 0.02629651521057465, -0.001829764677455021, 2*(1-np.pi/3), (4-np.pi)/3, 1, 1])
36 |         f = 0
37 |         for i in range(14):
38 |             f = lp0*(c[i]+f)
39 |         return -2*f-np.log(2)
40 |     elif x < -11.3137:
41 |         r = np.array([1.2753666447299659525, 5.019049726784267463450, 6.1602098531096305441, 7.409740605964741794425, 2.9788656263939928886])
42 |         q = np.array([2.260528520767326969592, 9.3960340162350541504, 12.048951927855129036034, 17.081440747466004316, 9.608965327192787870698, 3.3690752069827527677])
43 |         num = 0.5641895835477550741
44 |         for i in range(5):
45 |             num = -x*num/np.sqrt(2)+r[i]
46 |         den = 1.0
47 |         for i in range(6):
48 |             den = -x*den/np.sqrt(2)+q[i]
49 |         return np.log(0.5*np.maximum(0.000001,num/den))-0.5*(x**2)
50 |     else:
51 |         return np.log(0.5*np.maximum(0.000001,(1.0-erf(-x/np.sqrt(2)))))
52 | 
53 | # logphi_vector for autograd
54 | def logphi_vector(x):
55 |     # phi1
56 |     lp0 = -x/np.sqrt(2*np.pi)
57 |     c = np.array([0.00048204, -0.00142906, 0.0013200243174, 0.0009461589032, -0.0045563339802, 0.00556964649138, 0.00125993961762116, -0.01621575378835404, 0.02629651521057465, -0.001829764677455021, 2*(1-np.pi/3), (4-np.pi)/3, 1, 1])
58 |     f = 0
59 |     for i in range(14):
60 |         f = lp0*(c[i]+f)
61 |     phi1 = -2*f - np.log(2)
62 | 
63 |     # phi2 
64 |     r = np.array([1.2753666447299659525, 5.019049726784267463450, 6.1602098531096305441, 7.409740605964741794425, 2.9788656263939928886])
65 |     q = np.array([2.260528520767326969592, 9.3960340162350541504, 12.048951927855129036034, 17.081440747466004316, 9.608965327192787870698, 3.3690752069827527677])
66 |     num = 0.5641895835477550741
67 |     for i in range(5):
68 |         num = -x*num/np.sqrt(2)+r[i]
69 |     den = 1.0
70 |     for i in range(6):
71 |         den = -x*den/np.sqrt(2)+q[i]
72 |     phi2 = np.log(0.5*np.maximum(0.000001,num/den))-0.5*(x**2)
73 | 
74 |     # phi3
75 |     phi3 = np.log(0.5*np.maximum(0.000001,(1.0-erf(-x/np.sqrt(2)))))
76 |     
77 |     # phi
78 |     x2 = x**2
79 |     phi = phi1 * (x2 < 0.0492) + phi2 * (x < -11.3137) + phi3 * ((x >= -11.3137) | (x2 >= 0.0492))
80 |     return phi
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 


--------------------------------------------------------------------------------