├── AlternatingStructureOptimization.py ├── ClusteredRegression.py ├── ConvexAlternatingStructureOptimization.py ├── README.md ├── RandomMTLRegressor.py ├── cherkaoui_corbiere_multitasks_learning_final_report.pdf ├── computeScores.py ├── data ├── sarcos_inv.mat ├── sarcos_inv_test.mat └── school.mat ├── loadData.py ├── mult_ind_SVM.py └── plotResults.py /AlternatingStructureOptimization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 19:06:43 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import scipy.io 10 | import numpy as np 11 | from sklearn.base import BaseEstimator 12 | from sklearn.metrics import mean_squared_error 13 | from scipy.optimize import fmin_l_bfgs_b 14 | 15 | 16 | class AlternatingStructureOptimization(BaseEstimator): 17 | 18 | def __init__(self, lbda, m, d, h, n_iter=5): 19 | self.m=m 20 | self.d=d 21 | self.h=h 22 | self.n_iter=n_iter 23 | self.lbda = lbda 24 | self.params={"h": np.arange(3,int(d/3))} 25 | 26 | self.U = np.zeros((self.d,self.m)) 27 | self.U0 = np.ones((self.d,self.m)) 28 | self.V = np.zeros((self.h,self.m)) 29 | self.W = np.zeros((self.d,self.m)) 30 | self.theta = np.ones((self.h,self.d)) 31 | 32 | def fit(self, X, y): 33 | for it in range(self.n_iter): 34 | if it%10==0: 35 | print ("Iteration %d..." %(it+1)) 36 | 37 | for l in range(1,self.m): 38 | idx=np.where(X[:,self.d]==l)[0] 39 | X_l = X[idx,:self.d] 40 | y_l = np.ravel(y[idx,:1]) 41 | 42 | self.V[:,l] = np.dot(self.theta,self.W)[:,l] 43 | 44 | model = optim_ASO( X=X_l, y=y_l, theta=self.theta, v=self.V[:,l], 45 | lbda=self.lbda[:,l]) 46 | 47 | self.U[:,l] = l_bfgs_b(self.U0[:,l], model, n_iter=self.n_iter) 48 | self.W[:,l] = self.U[:,l] + np.dot(self.theta.T,self.V[:,l]) 49 | 50 | V1, D, V2 = scipy.linalg.svd(np.sqrt(self.lbda)*self.W) 51 | self.theta = V1.T[np.arange(self.h),:] 52 | self.V = np.dot(self.theta,self.W) 53 | 54 | def predict(self, X): 55 | y_pred = np.zeros((X.shape[0],2)) 56 | for l in range(1,self.m): 57 | idx=np.where(X[:,self.d]==l)[0] 58 | X_l = X[idx,:self.d] 59 | y_pred[idx,0]=np.dot(self.U[:,l] + np.dot(self.theta.T,self.V)[:,l],X_l.T) 60 | y_pred[idx,1]=l 61 | return y_pred 62 | 63 | def score(self, X, y): 64 | y_pred = self.predict(X) 65 | return 1.- np.sqrt(mean_squared_error(y[:,0], y_pred[:,0]))/(np.max(y[:,0])-np.min(y[:,0])) 66 | 67 | def l_bfgs_b(x_init, model, n_iter=500, bounds=None, callback=None, **kwargs): 68 | """ 69 | l-BFGS-b algorithm 70 | """ 71 | x, _, _ = fmin_l_bfgs_b(model.loss, x_init, model.grad, bounds=bounds, pgtol=1e-20, callback=callback) 72 | return x 73 | 74 | 75 | class optim_ASO(): 76 | 77 | def __init__(self, X, y, theta, v, lbda): 78 | # model param 79 | self.X = X 80 | self.y = y 81 | self.theta = theta 82 | self.v = v 83 | self.n = X.shape[0] 84 | self.lbda=lbda 85 | 86 | def loss(self, u): 87 | """" 88 | loss of the optim problem 89 | """ 90 | f = np.dot(u.T+np.dot(self.v.T,self.theta), self.X.T) 91 | return (1./self.n)*np.sum((f-self.y)**2)+self.lbda*np.linalg.norm(u)**2 92 | 93 | def grad(self, u): 94 | """ 95 | gradient of the optim problem 96 | """ 97 | f = np.dot(u.T+np.dot(self.v.T,self.theta), self.X.T) 98 | return ((2./self.n)*np.dot(self.X.T,(f-self.y))+(2./self.n)*self.lbda*u) 99 | 100 | 101 | -------------------------------------------------------------------------------- /ClusteredRegression.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 19:46:23 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import scipy 10 | import numpy as np 11 | from time import time 12 | from sklearn.base import BaseEstimator 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.metrics import mean_squared_error 15 | from sklearn.linear_model import LinearRegression 16 | 17 | 18 | class ClusteredLinearRegression(BaseEstimator): 19 | def __init__(self, r, m, epsilon_m, epsilon_w, epsilon_b, E=None, mu=1e0, 20 | maxiter=5000, step=1e-8, eps=5e1, verbose=True): 21 | """ 22 | init the params 23 | """ 24 | self.r = r 25 | self.epsilon_m = epsilon_m 26 | self.epsilon_w = epsilon_w 27 | self.epsilon_b = epsilon_b 28 | self.mu = mu 29 | self.W = None 30 | self.E = None 31 | self.true_inv_s_star = None 32 | if E is not None: 33 | self.E = E 34 | M = E.dot(np.linalg.inv(E.T.dot(E))).dot(E.T) 35 | I = np.eye(m) 36 | U = np.ones((m, m)) 37 | self.true_inv_s_star = epsilon_b*(M-U) + epsilon_w*(I-M) 38 | self.maxiter = maxiter 39 | self.step = step 40 | self.eps = eps 41 | self.verbose = verbose 42 | self.insp_grad = [] 43 | 44 | def get_W(self): 45 | """ 46 | return W. 47 | """ 48 | return self.W 49 | 50 | def get_insp_grad(self): 51 | """ 52 | return lngrad 53 | """ 54 | return self.insp_grad 55 | 56 | def _grad(self, W, X, Y): 57 | """ 58 | compute the gradient of the obj function 59 | """ 60 | m = len(np.unique(X[:, -1])) 61 | d = X.shape[1] - 1 62 | I = np.eye(m) 63 | U = np.ones((m, m)) 64 | Pi = I - U 65 | alpha = 1. / self.epsilon_w 66 | beta = 1. / self.epsilon_b 67 | gamma = (m-self.r+1)*alpha + (self.r-1)*beta 68 | if self.true_inv_s_star is None: 69 | lbda_star = np.diag(_get_lambda_star(W, alpha, beta, gamma)) #XXX bug 70 | lbda_star = scipy.linalg.block_diag(lbda_star, np.zeros((d-m, d-m))) 71 | V = np.linalg.eig(W.dot(Pi).dot(W.T))[1] 72 | s_star = V.dot(lbda_star).dot(np.linalg.pinv(V)).real 73 | inv_s_star = np.linalg.pinv(s_star) 74 | pen_grad = inv_s_star.dot(W.dot(Pi).dot(Pi.T)) 75 | pen_grad += inv_s_star.T.dot(W.dot(Pi).dot(Pi.T)) 76 | else: 77 | inv_s_star = self.true_inv_s_star 78 | pen_grad = 2 * W.dot(inv_s_star) 79 | struct_grad = 2 * self.epsilon_m * W.dot(U) 80 | loss_grad = self._loss_grad(X, Y, W) 81 | return loss_grad + struct_grad + self.mu * pen_grad 82 | 83 | def _loss_grad(self, X, Y, W): 84 | """ 85 | """ 86 | m = len(np.unique(X[:, -1])) 87 | d = X.shape[1] - 1 88 | # init loop 89 | t = np.unique(X[:, -1])[0].astype(int)-1 90 | X_t = X[X[:, -1]==t][:, :-1] 91 | pred = X_t.dot(W[:, t])[:, None] 92 | Y_t = Y[X[:, -1]==t][:, :-1] 93 | e = pred - Y_t 94 | grad = X_t.T.dot(e) 95 | # loop 96 | for t in np.unique(X[:, -1])[1:].astype(int)-1: 97 | X_t = X[X[:, -1]==t][:, :-1] 98 | pred = X_t.dot(W[:, t])[:, None] 99 | Y_t = Y[X[:, -1]==t][:, :-1] 100 | e = pred - Y_t 101 | grad = np.c_[grad, X_t.T.dot(e)] 102 | return 2 * grad 103 | 104 | def fit(self, X, Y): 105 | """run the double optimisation (based on FISTA), ie fit the model 106 | """ 107 | m = len(np.unique(X[:, -1])) 108 | d = X.shape[1] - 1 109 | W = np.random.sample((d, m)) 110 | W_old = np.random.sample((d, m)) 111 | Z = W 112 | old_grad = np.zeros_like(W) 113 | t = 1 114 | t_old = 0 115 | for idx in range(self.maxiter): 116 | t = 0.5 * (1 + np.sqrt(1+4*t**2)) 117 | grad = self._grad(W, X, Y) 118 | W = Z - self.step * grad 119 | Z = W + (t_old - 1) / t *(W - W_old) 120 | W_old = W 121 | 122 | norm_grad = np.linalg.norm(grad) 123 | self.insp_grad.append(norm_grad) 124 | if self.verbose and ((idx%100)==0): 125 | print("iter: %d |df|=%f" % (idx, norm_grad)) 126 | if np.linalg.norm(grad - old_grad) < self.eps: 127 | print("iter: %d |df|=%f" % (idx, norm_grad)) 128 | break 129 | old_grad = grad 130 | self.W = W 131 | 132 | def predict(self, X): 133 | """return the prediction for the given X 134 | """ 135 | n = X.shape[0] 136 | pred = np.empty((n, 2)) 137 | pred[:, 1] = X[:, -1] 138 | for t in np.unique(X[:, -1]).astype(int)-1: 139 | X_t = X[X[:, -1]==t][:, :-1] 140 | pred[pred[:, 1]==t, 0] = X_t.dot(self.W[:, t]) 141 | return pred 142 | 143 | 144 | def score(self, X, y): 145 | y_pred = self.predict(X) 146 | return 1.- np.sqrt(mean_squared_error(y[:,0], y_pred[:,0]))/(np.max(y[:,0])-np.min(y[:,0])) 147 | 148 | 149 | def _get_lambda_star(W, alpha, beta, gamma): #XXX bug 150 | """return the optimal lambda to compute sigma_c_star 151 | """ 152 | # code directly taken from Laurent Jacob's demo: 153 | # see: https://lbbe.univ-lyon1.fr/-Jacob-Laurent-.html?lang=fr 154 | _, s, _ = np.linalg.svd(W) 155 | m = len(s) 156 | s2 = s**2 157 | s2beta2 = s2 / beta**2 158 | s2alpha2 = s2 / alpha**2 159 | palpha = -1 160 | pbeta = 0 161 | chidx = pbeta 162 | chval = 2 163 | partition = np.ones(m) 164 | b = s2beta2[0] 165 | nustar = 0 166 | while not ((pbeta+1 < len(s2beta2)) or (palpha+1 < len(s2alpha2))): 167 | # update a, b 168 | a = b 169 | partition[chidx] = chval 170 | if ((pbeta < len(s2beta2)) and \ 171 | (palpha < len(s2alpha2) ) and \ 172 | (s2beta2[pbeta+1] > s2alpha2[palpha+1])) or \ 173 | (pbeta >= len(s2beta2) and \ 174 | (palpha < len(s2alpha2))): 175 | palpha = palpha + 1 176 | chidx = palpha 177 | chval = 3 178 | b = s2alpha2[palpha] 179 | else: 180 | pbeta = pbeta + 1 181 | chidx = pbeta 182 | chval = 2 183 | b = s2beta2[pbeta] 184 | 185 | # compute nustar 186 | n_p = (partition == 1).sum() 187 | ssi = s[partition == 2].sum() 188 | n_m = (partition == 3).sum() 189 | snsden = gamma - alpha*n_m - beta*n_p 190 | 191 | # breaking conditions 192 | if not ssi: 193 | if snsden <= 0: 194 | continue 195 | else: 196 | nustar = a 197 | break 198 | if not snsden: 199 | continue 200 | sqrtnustar = ssi / snsden 201 | if sqrtnustar < 0: 202 | continue 203 | nustar = sqrtnustar**2 204 | if nustar < b: 205 | if nustar <= a: 206 | nustar = a 207 | break 208 | 209 | # compute lbda 210 | lbda = np.zeros(m) 211 | lbda[partition == 1] = beta 212 | lbda[partition == 2] = s[partition == 2] / np.sqrt(nustar) 213 | lbda[partition == 3] = alpha 214 | return lbda 215 | 216 | 217 | if __name__ == '__main__': 218 | """run a simple regression example with a toy dataset 219 | """ 220 | print ("running Multi-tasks learning on toy dataset...") 221 | n, d, m, r = 2000, 30, 4, 2 222 | X, Y, E = toy_dataset(n, d, m, r) 223 | X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) 224 | epsilon = 0.5 225 | epsilon_m = 0.2*epsilon 226 | epsilon_b = 3.5*epsilon 227 | epsilon_w = 4.5*epsilon 228 | 229 | # linReg with estimated clusters 230 | t0 = time() 231 | reg1 = ClusteredLinearRegression(r, epsilon_m, epsilon_w, epsilon_b, step=1e-7, mu=2.5) 232 | reg1.fit(X_train, Y_train) 233 | pred1 = reg1.predict(X_test) 234 | error1 = mean_squared_error(Y_test, pred1) 235 | print ("linReg with estimated clusters: mse = %f, run in %fs" % (error1, time() - t0)) 236 | 237 | # linReg with given clusters 238 | t0 = time() 239 | reg2 = ClusteredLinearRegression(r, epsilon_m, epsilon_w, epsilon_b, E, mu=2.5) 240 | reg2.fit(X_train, Y_train) 241 | pred2 = reg2.predict(X_test) 242 | error2 = mean_squared_error(Y_test, pred2) 243 | print ("linReg with true given clusters: mse = %f, run in %fs" % (error2, time() - t0)) 244 | 245 | # linReg with no clusters 246 | t0 = time() 247 | reg3 = ClusteredLinearRegression(r, epsilon_m, epsilon_w, epsilon_b, E, mu=0) 248 | reg3.fit(X_train, Y_train) 249 | pred3 = reg3.predict(X_test) 250 | error3 = mean_squared_error(Y_test, pred3) 251 | print ("linReg with no clusters: mse = %f, run in %fs" % (error3, time() - t0)) 252 | 253 | # Scikit-Learn linReg ref 254 | t0 = time() 255 | reg = LinearRegression() 256 | reg.fit(X_train, Y_train) 257 | pred = reg.predict(X_test) 258 | error = mean_squared_error(Y_test, pred) 259 | print ("Scikit-Learn linReg ref: mse = %f, run in %fs" % (error, time() - t0)) 260 | -------------------------------------------------------------------------------- /ConvexAlternatingStructureOptimization.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 23:47:18 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import scipy.io 10 | import numpy as np 11 | from sklearn.base import BaseEstimator 12 | from sklearn.metrics import mean_squared_error 13 | from AlternatingStructureOptimization import l_bfgs_b 14 | 15 | 16 | class ConvexAlternatingStructureOptimization(BaseEstimator): 17 | 18 | def __init__(self, alpha,beta, m, d, h, n_iter=5, C=1., s=1): 19 | self.m=m 20 | self.d=d 21 | self.h=h 22 | self.n_iter=n_iter 23 | self.C=C 24 | self.s=s 25 | self.alpha=alpha 26 | self.eta=beta/alpha 27 | 28 | self.M = np.eye(self.d)*self.h/self.d 29 | self.U = np.zeros((self.d,self.m)) 30 | self.W0 = np.ones((self.d,self.m)) 31 | self.V = np.zeros((self.h,self.m)) 32 | self.W = np.zeros((self.d,self.m)) 33 | self.theta = np.ones((self.h,self.d)) 34 | 35 | def fit(self, X, y): 36 | 37 | for it in range(self.n_iter): 38 | if it%10==0: 39 | print ("Iteration %d..." %(it+1)) 40 | for l in range(1,self.m): 41 | idx=np.where(X[:,self.d]==l)[0] 42 | X_l = X[idx,:self.d] 43 | y_l = np.ravel(y[idx,:1]) 44 | model = optim_W_cASO( X=X_l, y=y_l, M=self.M, alpha=self.alpha, eta=self.eta, 45 | C=self.C, s=self.s) 46 | 47 | self.W[:,l] = l_bfgs_b(self.W0[:,l], model, n_iter=self.n_iter) 48 | 49 | P1, D, P2 = scipy.linalg.svd(self.W) 50 | q = np.linalg.matrix_rank(self.W) 51 | 52 | gammas_0 = np.ones(q)*self.h/q 53 | sigmas = D[:q] 54 | 55 | model_gammas = optim_M_cASO(sigmas=sigmas, eta=self.eta) 56 | cons = ({'type': 'eq', 57 | 'fun' : lambda x: np.sum(x) - self.h, 58 | 'jac' : lambda x: np.array([1]*x.shape[0]) }) 59 | bounds=[ (0,1)] * gammas_0.shape[0] 60 | res = scipy.optimize.minimize(model_gammas.loss, x0=gammas_0, 61 | jac=model_gammas.grad, method='SLSQP', bounds=bounds, constraints=cons) 62 | gammas=res['x'] 63 | Gamma = np.diag(np.append(gammas, np.zeros((self.d-len(gammas))))) 64 | self.M = np.dot(P1,np.dot(Gamma,P1.T)) 65 | 66 | _, M_eigenvectors = np.linalg.eig(self.M) 67 | 68 | self.theta = M_eigenvectors[:,range(self.h)].T 69 | self.V = np.dot(self.theta,self.W) 70 | self.U = self.W - np.dot(self.theta.T,self.V) 71 | 72 | def predict(self, X): 73 | y_pred = np.zeros((X.shape[0],2)) 74 | for l in range(1,self.m): 75 | idx=np.where(X[:,self.d]==l)[0] 76 | X_l = X[idx,:self.d] 77 | y_pred[idx,0]=np.dot(self.U[:,l] + np.dot(self.theta.T,self.V)[:,l],X_l.T) 78 | y_pred[idx,1]=l 79 | return y_pred 80 | 81 | def score(self, X, y): 82 | y_pred = self.predict(X) 83 | return 1. - np.sqrt(mean_squared_error(y[:,0], y_pred[:,0]))/(np.max(y[:,0])-np.min(y[:,0])) 84 | 85 | 86 | 87 | class optim_W_cASO(): 88 | 89 | def __init__(self, X, y, M, alpha, eta, C=1.0, s=1.0): 90 | # model param 91 | self.X = X 92 | self.y = y 93 | self.M = M 94 | self.alpha=alpha 95 | self.eta=eta 96 | self.C=C 97 | self.s=s 98 | self.m = M.shape[0] 99 | self.d = X.shape[1] 100 | self.n = X.shape[0] 101 | 102 | def loss(self, W): 103 | """"loss of the optim problem""" 104 | inv = np.linalg.solve(self.eta*np.eye(self.d)+self.M,W.T) 105 | g = self.alpha*self.eta*(1.+self.eta)*(np.dot(W,inv)) 106 | return 0.5*np.linalg.norm(self.y-np.dot(W.T,self.X.T))**2 +g 107 | 108 | def grad(self, W): 109 | """gradient of the optim problem""" 110 | inv = np.linalg.solve(self.eta*np.eye(self.d)+self.M,W) 111 | grad_g = 2. * self.alpha*self.eta*(1.+self.eta)*inv 112 | return np.dot(self.X.T,(np.dot(W.T,self.X.T)-self.y)) + np.dot(W,grad_g.T) 113 | 114 | 115 | 116 | class optim_M_cASO(): 117 | 118 | def __init__(self, sigmas, eta): 119 | # model param 120 | self.sigmas = sigmas 121 | self.eta=eta 122 | self.q = sigmas.shape[0] 123 | 124 | def loss(self, gammas): 125 | """"loss of the optim problem""" 126 | loss = 0 127 | for i in range(self.q): 128 | loss += self.sigmas[i]**2/(self.eta+gammas[i]) 129 | return loss 130 | 131 | def grad(self, gammas): 132 | """"loss of the optim problem""" 133 | grad = np.zeros(self.q) 134 | for i in range(self.q): 135 | grad[i] = -self.sigmas[i]**2/((self.eta+gammas[i])**2) 136 | return grad -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #Multi-Task Learning 2 | Authors : Charles Corbière, Hamza Cherkaoui 3 | 4 | ## Synopsis 5 | 6 | This package implement differents multi-task learning models: 7 | - Multilearning SVM (svm): an SVM is learning for each task 8 | - Alternating Structure Optimization (aso): a modele assuming every task shared a low dimensional structure 9 | - Convex Alternating Structure Optimization (caso): convex relaxation of ASO 10 | - Clustered Multi-task Learning (cmtl): a modele assuming tasks are groupd within clusters. 11 | 12 | Dataset included: 13 | - a clustered toy dataset (toy) 14 | - School data (school) 15 | - Sarcos data (sarcos) 16 | 17 | 18 | ## How to use it 19 | 20 | - To compute score for a given algorithm on a given dataset, for a test size proportion and a number of splits 21 | ``` 22 | python computeScores.py school cmtl 5 0.30 23 | ``` 24 | Here, we run 5 times CMTL on school dataset with a 30% test size proportion. 25 | 26 | - To plot all algorithms scores for a given dataset and a number of splits, iterating on the test size proportion 27 | ``` 28 | python plotResults.py school 5 29 | ``` 30 | Here, we run 5 times for each algorithm on school dataset. Note that on current implementation, the test size range is [0.30, 0.40, 0.50, 0.60] 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /RandomMTLRegressor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 13:40:09 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import numpy as np 10 | from random import randint 11 | from sklearn.base import BaseEstimator 12 | from sklearn.metrics import mean_squared_error 13 | 14 | class randompred(BaseEstimator): 15 | 16 | def __init__(self): 17 | self.m=139 18 | self.min=0 19 | self.max=0 20 | self.param_grid={} 21 | 22 | def fit(self, X, y): 23 | self.min=np.min(y[:,0]) 24 | self.max=np.max(y[:,0]) 25 | 26 | def predict(self,X): 27 | d=X.shape[1] 28 | y_pred = np.zeros((X.shape[0],2)) 29 | for l in range(1,self.m): 30 | idx=np.where(X[:,d-1]==l)[0] 31 | y_pred[idx,0]=np.asarray([randint(0,100) for p in range(0,idx.shape[0])]) # Randomly generate labels between 0 and 100 32 | y_pred[idx,1]=l 33 | return y_pred 34 | 35 | def score(self, X, y): 36 | y_pred = self.predict(X) 37 | return 1. - np.sqrt(mean_squared_error(y[:,0], y_pred[:,0]))/(np.max(y[:,0])-np.min(y[:,0])) -------------------------------------------------------------------------------- /cherkaoui_corbiere_multitasks_learning_final_report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chcorbi/MultiTaskLearning/51cdbf9eb79342a7ef17b6ad5af76c5fbceab868/cherkaoui_corbiere_multitasks_learning_final_report.pdf -------------------------------------------------------------------------------- /computeScores.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 19:14:19 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import sys 10 | import numpy as np 11 | from time import time 12 | from sklearn.model_selection import GridSearchCV, ShuffleSplit 13 | from loadData import load_toy_dataset, load_school_dataset, load_sarcos_dataset 14 | from RandomMTLRegressor import randompred 15 | from mult_ind_SVM import mult_ind_SVM 16 | from AlternatingStructureOptimization import AlternatingStructureOptimization 17 | from ConvexAlternatingStructureOptimization import ConvexAlternatingStructureOptimization 18 | from ClusteredRegression import ClusteredLinearRegression 19 | 20 | 21 | def compute_scores(X,y, model, n_splits=5, test_size=0.30, gridsearch=False, verbose=False): 22 | """ 23 | Compute the nrMSE score for a given model and a given dataset (X,y) 24 | """ 25 | t0 = time() 26 | nrMSE = [] 27 | 28 | # Shuffle split 29 | ss = ShuffleSplit(n_splits=n_splits, test_size=test_size, random_state=42) 30 | i=1 31 | for train_index, test_index in ss.split(X): 32 | t1 = time() 33 | X_train, X_test = X[train_index], X[test_index] 34 | y_train, y_test = y[train_index], y[test_index] 35 | 36 | if verbose: 37 | print("Random shuffle split %d"%i) 38 | 39 | if gridsearch==True: 40 | grid = GridSearchCV(model, cv=3, param_grid=model.params,verbose=1) 41 | grid.fit(X_train,y_train) 42 | print (grid.best_estimator_) 43 | else: 44 | model.fit(X_train,y_train) 45 | 46 | nrMSE.append(1. - model.score(X_test,y_test)) 47 | i+=1 48 | 49 | if verbose: 50 | print ("....run in %fs" % (time() - t1) ) 51 | 52 | print("Total run in %fs" % (time() - t0)) 53 | if n_splits==1: 54 | return nrMSE[0] 55 | else: 56 | return [np.mean(nrMSE),np.var(nrMSE)] 57 | 58 | 59 | if __name__=='__main__': 60 | # Get choices 61 | dataset = sys.argv[1] 62 | algo = sys.argv[2] 63 | splits = int(sys.argv[3]) 64 | test_size = float(sys.argv[4]) 65 | 66 | if test_size>=1: 67 | print("Test size > 1.") 68 | sys.exit() 69 | 70 | # Generate dataset 71 | if dataset=="toy": 72 | X, y, E = load_toy_dataset() 73 | C = 1e2 74 | r = 3 75 | h = 3 76 | elif dataset=="school": 77 | X, y = load_school_dataset() 78 | C = 1e1 79 | r = 7 80 | h = 3 81 | elif dataset=="sarcos": 82 | X, y = load_sarcos_dataset() 83 | C = 1e4 84 | r = 6 85 | h = 3 86 | else: 87 | print("Unkown dataset.") 88 | sys.exit() 89 | 90 | m=len(np.unique(X[:,-1])) 91 | 92 | # Initialize chosen algorithm 93 | if algo=="random": 94 | modele = randompred() 95 | elif algo=="svm": 96 | modele = mult_ind_SVM(m=m, C=C) 97 | elif algo=="aso": 98 | lbda = np.ones((1,m))*0.225 99 | modele = AlternatingStructureOptimization(lbda=lbda,m=m, d=X.shape[1]-1, h=h) 100 | elif algo=="caso": 101 | alpha = 0.225 102 | beta = 0.15 103 | modele = ConvexAlternatingStructureOptimization(alpha=alpha, beta=beta,m=m, d=X.shape[1]-1, h=h) 104 | elif algo=="cmtl": 105 | epsilon = 0.5 106 | epsilon_m = 0.2*epsilon 107 | epsilon_b = 3.5*epsilon 108 | epsilon_w = 4.5*epsilon 109 | modele = ClusteredLinearRegression(r, m, epsilon_m, epsilon_w, epsilon_b, mu=2.5) 110 | elif algo=="cmtl_e": 111 | epsilon = 0.5 112 | epsilon_m = 0.2*epsilon 113 | epsilon_b = 3.5*epsilon 114 | epsilon_w = 4.5*epsilon 115 | r=E.shape[1] 116 | modele = ClusteredLinearRegression(r, m, epsilon_m, epsilon_w, epsilon_b, E,mu=2.5) 117 | 118 | # Compute score 119 | nrMSE = compute_scores(X,y, modele, n_splits=splits, test_size=test_size) 120 | 121 | if splits==1: 122 | print("nrMSE score: %f, +/- %f " % (nrMSE,0)) 123 | else: 124 | print("nrMSE score: %f, +/- %f " % (nrMSE[0],nrMSE[1])) -------------------------------------------------------------------------------- /data/sarcos_inv.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chcorbi/MultiTaskLearning/51cdbf9eb79342a7ef17b6ad5af76c5fbceab868/data/sarcos_inv.mat -------------------------------------------------------------------------------- /data/sarcos_inv_test.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chcorbi/MultiTaskLearning/51cdbf9eb79342a7ef17b6ad5af76c5fbceab868/data/sarcos_inv_test.mat -------------------------------------------------------------------------------- /data/school.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chcorbi/MultiTaskLearning/51cdbf9eb79342a7ef17b6ad5af76c5fbceab868/data/school.mat -------------------------------------------------------------------------------- /loadData.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 12:41:58 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import scipy.io 10 | import numpy as np 11 | import pandas as pd 12 | 13 | def _preprocessing(X,Y): 14 | """ 15 | Prepare the dataset for the MTL algorithms 16 | """ 17 | X_process=np.concatenate((X,np.ones((X.shape[0],1))),axis=1) 18 | y_process=np.concatenate((Y[:,0].reshape(Y.shape[0],1),np.ones((Y.shape[0],1))),axis=1) 19 | for l in range(2,Y.shape[1]+1): 20 | X_l=np.concatenate((X,np.ones((X.shape[0],1))*l),axis=1) 21 | X_process=np.append(X_process,X_l,axis=0) 22 | y_l = np.concatenate((Y[:,0].reshape(Y.shape[0],1),l*np.ones((Y.shape[0],1))),axis=1) 23 | y_process=np.append(y_process,y_l,axis=0) 24 | return X_process, y_process 25 | 26 | def _make_true_W(d, m, r, v1=900, v2=16): 27 | """ 28 | Return a weight matrix used in generating toy dataset 29 | """ 30 | bws = [] # hold the base cluster 31 | for _ in range(r): 32 | bw = np.random.normal(0, np.sqrt(v1), int((d-2)/2.)) 33 | bw = np.r_[bw, np.zeros_like(bw)] 34 | bws.append(bw) 35 | m_c = int(m / r) # nb tasks per cluster 36 | W = [] 37 | E = np.zeros((m, r)) 38 | i = 0 # i indice of tasks 39 | for c in range(r): 40 | Wc = np.empty((d-2, m_c)) 41 | for t in range(m_c): # t indice of task within the cluster c 42 | w = np.random.normal(0, np.sqrt(v2), int((d-2)/2.)) 43 | w = np.r_[w, np.zeros_like(w)] 44 | w += bws[c] 45 | Wc[:, t] = w 46 | E[i, c] = 1 47 | i += 1 48 | W.append(Wc) 49 | W = np.concatenate(W, axis=1) 50 | return np.r_[W, np.random.normal(0, v2, (2, m))], E 51 | 52 | 53 | 54 | def load_toy_dataset(n=1000,d=12, m=9,r=3,v=150): 55 | """ 56 | Generate a toy dataset for a fix degree d, a fix number of sample n, a fix number of tasks m, 57 | a predifine number of clusters r and a variance from center v. 58 | OUTPOUT : X = featues 59 | y = labels 60 | E = residuals 61 | """ 62 | W, E = _make_true_W(d, m, r,) 63 | X = np.random.sample((n, d)) 64 | Y = X.dot(W) + np.random.normal(0, np.sqrt(v), (n, m)) 65 | X, y = _preprocessing(X,Y) 66 | return X,y,E 67 | 68 | 69 | def load_school_dataset(): 70 | """ 71 | Load School dataset and select the first 27 tasks for computing reasons 72 | """ 73 | dataset = scipy.io.loadmat('data/school.mat') 74 | FEATURES_COLUMNS = ['Year_1985','Year_1986','Year_1987','FSM','VR1Percentage','Gender_Male','Gender_Female','VR_1','VR_2','VR_3', 75 | 'Ethnic_ESWI','Ethnic_African','Ethnic_Arabe','Ethnic_Bangladeshi','Ethnic_Carribean','Ethnic_Greek','Ethnic_Indian', 76 | 'Ethnic_Pakistani','Ethnic_Asian','Ethnic_Turkish','Ethnic_Others','SchoolGender_Mixed','SchoolGender_Male', 77 | 'SchoolGender_Female','SchoolDenomination_Maintained','SchoolDenomination_Church','SchoolDenomination_Catholic', 78 | 'Bias'] 79 | 80 | # Dataframe representation 81 | X_df=pd.DataFrame(dataset['X'][:,0][0],columns=FEATURES_COLUMNS) 82 | y_df=pd.DataFrame(dataset['Y'][:,0][0],columns=['Exam_Score']) 83 | X_df['School'] = 1 84 | y_df['School'] = 1 85 | 86 | d = X_df.shape[1]-1 87 | for i in range(1,d): 88 | X_df_i=pd.DataFrame(dataset['X'][:,i][0],columns=FEATURES_COLUMNS) 89 | X_df_i['School'] = i+1 90 | X_df = X_df.append(X_df_i,ignore_index=True) 91 | 92 | y_df_i=pd.DataFrame(dataset['Y'][:,i][0],columns=['Exam_Score']) 93 | y_df_i['School'] = i+1 94 | y_df = y_df.append(y_df_i,ignore_index=True) 95 | 96 | return X_df.values, y_df.values 97 | 98 | 99 | def load_sarcos_dataset(set_size=1000): 100 | """ 101 | Load SARCOS dataset and select the first 2000 samples for computing reasons 102 | """ 103 | # Load training set 104 | sarcos_train = scipy.io.loadmat('data/sarcos_inv.mat') 105 | # Inputs (7 joint positions, 7 joint velocities, 7 joint accelerations) 106 | Xtrain = sarcos_train["sarcos_inv"][:, :21] 107 | # Outputs (7 joint torques) 108 | Ytrain = sarcos_train["sarcos_inv"][:, 21:] 109 | 110 | # Load test set 111 | sarcos_test = scipy.io.loadmat("data/sarcos_inv_test.mat") 112 | Xtest = sarcos_test["sarcos_inv_test"][:, :21] 113 | Ytest = sarcos_test["sarcos_inv_test"][:, 21:] 114 | 115 | X = np.concatenate((Xtrain,Xtest),axis=0) 116 | Y = np.concatenate((Ytrain,Ytest),axis=0) 117 | 118 | return _preprocessing(X[:set_size,:],Y[:set_size,:]) 119 | -------------------------------------------------------------------------------- /mult_ind_SVM.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 12:51:59 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import numpy as np 10 | from sklearn.base import BaseEstimator 11 | from sklearn.svm import SVR 12 | from sklearn.metrics import mean_squared_error 13 | 14 | 15 | class mult_ind_SVM(BaseEstimator): 16 | 17 | def __init__(self,m, C): 18 | self.m=m 19 | self.C=C 20 | self.dict_reg= {} 21 | for l in range(1,self.m): 22 | self.dict_reg[l]= SVR(kernel='rbf', C=C, gamma=0.1,epsilon=0.01) 23 | 24 | def fit(self, X, y): 25 | d=X.shape[1] 26 | for l in range(1,self.m): 27 | print ("Fit task %d..." % l) 28 | idx=np.where(X[:,d-1]==l)[0] 29 | X_l = X[idx,:d-1] 30 | y_l = np.ravel(y[idx,:1]) 31 | self.dict_reg[l].fit(X_l,y_l) 32 | 33 | def predict(self, X): 34 | d=X.shape[1] 35 | y_pred = np.zeros((X.shape[0],2)) 36 | for l in range(1,self.m): 37 | idx=np.where(X[:,d-1]==l)[0] 38 | X_l = X[idx,:d-1] 39 | y_pred[idx,0]=self.dict_reg[l].predict(X_l) 40 | y_pred[idx,1]=l 41 | return y_pred.astype(int) 42 | 43 | def score(self, X, y): 44 | y_pred = self.predict(X) 45 | return 1. - np.sqrt(mean_squared_error(y[:,0], y_pred[:,0]))/(np.max(y[:,0])-np.min(y[:,0])) -------------------------------------------------------------------------------- /plotResults.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Jan 30 22:33:42 2017 5 | 6 | @author: corbi 7 | """ 8 | 9 | import sys 10 | import numpy as np 11 | import matplotlib.pyplot as plt 12 | from loadData import load_toy_dataset, load_school_dataset, load_sarcos_dataset 13 | from mult_ind_SVM import mult_ind_SVM 14 | from AlternatingStructureOptimization import AlternatingStructureOptimization 15 | from ConvexAlternatingStructureOptimization import ConvexAlternatingStructureOptimization 16 | from ClusteredRegression import ClusteredLinearRegression 17 | from computeScores import compute_scores 18 | 19 | def plot_results(X,y, name, C, r, h, n_splits=5, gridsearch=False): 20 | """ 21 | Run each modele n_splits times and plot the average nrMSE score 22 | """ 23 | m=len(np.unique(X[:,-1])) 24 | test_size = [0.30, 0.40, 0.50, 0.60] 25 | 26 | nrMSE_SVM = [] 27 | nrMSE_CMTL = [] 28 | nrMSE_CMTLE = [] 29 | nrMSE_ASO = [] 30 | nrMSE_cASO = [] 31 | 32 | for i,size in enumerate(test_size): 33 | print ("======================= %d / %d : test_size=%f =======================" % (i+1,len(test_size),size)) 34 | 35 | print("------------Run ASO...") 36 | lbda = np.ones((1,m))*0.225 37 | ASO = AlternatingStructureOptimization(lbda=lbda,m=m, d=X.shape[1]-1, h=h) 38 | nrMSE_ASO.append(compute_scores(X,y, ASO, n_splits=splits, test_size=size)[0]) 39 | 40 | print("------------Run cASO...") 41 | alpha = 0.225 42 | beta = 0.15 43 | cASO = ConvexAlternatingStructureOptimization(alpha=alpha, beta=beta,m=m, d=X.shape[1]-1, h=h) 44 | nrMSE_cASO.append(compute_scores(X,y, cASO, n_splits=splits, test_size=size)[0]) 45 | 46 | print("------------Run CMTL...") 47 | epsilon = 0.5 48 | epsilon_m = 0.2*epsilon 49 | epsilon_b = 3.5*epsilon 50 | epsilon_w = 4.5*epsilon 51 | CMTL = ClusteredLinearRegression(r, m, epsilon_m, epsilon_w, epsilon_b, mu=2.5) 52 | nrMSE_CMTL.append(compute_scores(X,y, CMTL, n_splits=splits, test_size=size)[0]) 53 | 54 | if name=="toy": 55 | print("------------Run CMTLE...") 56 | CMTLE = ClusteredLinearRegression(r, m, epsilon_m, epsilon_w, epsilon_b, mu=2.5) 57 | nrMSE_CMTLE.append(compute_scores(X,y, CMTLE, n_splits=splits, test_size=size)[0]) 58 | 59 | print("------------Run SVM...") 60 | SVM = mult_ind_SVM(m=m, C=C) 61 | nrMSE_SVM.append(compute_scores(X,y, SVM, n_splits=splits, test_size=size)[0]) 62 | 63 | print ("SVM: %f, CMTL: %f, ASO: %f, cASO: %f" % (nrMSE_SVM[i], nrMSE_CMTL[i], nrMSE_ASO[i], nrMSE_cASO[i])) 64 | 65 | fig, ax = plt.subplots() 66 | ax.set_title("nrMSE for %s dataset, run %d times" % (name, n_splits)) 67 | ax.plot(test_size,nrMSE_SVM, label='M-SVM') 68 | ax.plot(test_size,nrMSE_CMTL, label='CMTL') 69 | if name=='toy': 70 | ax.plot(test_size,nrMSE_CMTLE, label='CMTL_E') 71 | ax.plot(test_size,nrMSE_ASO,label='ASO') 72 | ax.plot(test_size,nrMSE_cASO,label='cASO') 73 | #ax.set_ylim([0, 0.35]) 74 | ax.set_xlabel('% Test size') 75 | ax.set_ylabel('nrMSE') 76 | 77 | box = ax.get_position() 78 | ax.set_position([box.x0, box.y0 + box.height * 0.1, 79 | box.width, box.height * 0.9]) 80 | ax.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), fancybox=True, shadow=True, ncol=5) 81 | plt.show() 82 | 83 | 84 | if __name__=='__main__': 85 | # Get choices 86 | dataset = sys.argv[1] 87 | splits = int(sys.argv[2]) 88 | 89 | # Generate dataset 90 | if dataset=="toy": 91 | X, y, E = load_toy_dataset() 92 | C = 1e2 93 | r = 3 94 | h = 3 95 | elif dataset=="school": 96 | X, y = load_school_dataset() 97 | C = 1e1 98 | r = 6 99 | h = 3 100 | elif dataset=="sarcos": 101 | X, y = load_sarcos_dataset() 102 | C = 1e4 103 | r = 6 104 | h = 3 105 | else: 106 | print("Unkown dataset.") 107 | sys.exit() 108 | 109 | plot_results(X,y,n_splits=splits, name=dataset, C=C, r=r, h=h) 110 | --------------------------------------------------------------------------------