├── .gitignore ├── Data ├── Y.mat ├── x.mat ├── Yte.mat ├── xte.mat └── TCK_data.mat ├── imgs ├── pca.png ├── kdAE_arch.png ├── learned code.png ├── prediction.png ├── tck_kernel.png ├── Selection_002.jpg └── Selection_003.jpg ├── TCK ├── TCK_data.mat ├── myKNN.m ├── get_BloodData.m ├── main.m ├── confusion_stats.m ├── GMMposterior.m ├── TCK.m ├── trainTCK.m └── GMM_MAP_EM.m ├── TS_datasets.py ├── utils.py ├── README.md └── AE.py /.gitignore: -------------------------------------------------------------------------------- 1 | AE_pred 2 | xxx.py 3 | *.pyc 4 | TCK_classification.py 5 | -------------------------------------------------------------------------------- /Data/Y.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/Y.mat -------------------------------------------------------------------------------- /Data/x.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/x.mat -------------------------------------------------------------------------------- /Data/Yte.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/Yte.mat -------------------------------------------------------------------------------- /Data/xte.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/xte.mat -------------------------------------------------------------------------------- /imgs/pca.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/pca.png -------------------------------------------------------------------------------- /TCK/TCK_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/TCK/TCK_data.mat -------------------------------------------------------------------------------- /Data/TCK_data.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/TCK_data.mat -------------------------------------------------------------------------------- /imgs/kdAE_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/kdAE_arch.png -------------------------------------------------------------------------------- /imgs/learned code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/learned code.png -------------------------------------------------------------------------------- /imgs/prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/prediction.png -------------------------------------------------------------------------------- /imgs/tck_kernel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/tck_kernel.png -------------------------------------------------------------------------------- /imgs/Selection_002.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/Selection_002.jpg -------------------------------------------------------------------------------- /imgs/Selection_003.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/Selection_003.jpg -------------------------------------------------------------------------------- /TCK/myKNN.m: -------------------------------------------------------------------------------- 1 | function [ acc, labels_pred ] = myKNN( S, labels_tr, labels_ts, k ) 2 | %MYKNN 3 | % Input: 4 | % S - similarity matrix 5 | % labels - class labels 6 | % k - num of neighbors 7 | 8 | labels_pred = zeros(size(labels_ts)); 9 | for i=1:size(S,2) 10 | S_i = S(:,i); 11 | [~,c] = sort(S_i,'descend'); 12 | c = c(1:k); 13 | L = labels_tr(c); 14 | labels_pred(i) = mode(L); 15 | 16 | end 17 | 18 | acc = sum(labels_pred == labels_ts)/length(labels_ts); 19 | 20 | end 21 | 22 | -------------------------------------------------------------------------------- /TCK/get_BloodData.m: -------------------------------------------------------------------------------- 1 | function [ X, Y, Xte, Yte ] = get_BloodData(data_norm) 2 | 3 | x_tr = load('../Data/x.mat'); 4 | y_tr = load('../Data/Y.mat'); 5 | x_te = load('../Data/xte.mat'); 6 | y_te = load('../Data/Yte.mat'); 7 | 8 | Ntr = size(x_tr.x,1); 9 | Nts = size(x_te.xte,1); 10 | T = 20; 11 | V = 10; 12 | 13 | X = reshape(x_tr.x,[Ntr,T,V]); 14 | Xte = reshape(x_te.xte,[Nts,T,V]); 15 | Y = y_tr.Y; 16 | Yte = y_te.Yte; 17 | 18 | if data_norm 19 | for v=1:V 20 | X_v = X(:,:,v); 21 | Xte_v = Xte(:,:,v); 22 | Xv_m = nanmean(X_v(:)); 23 | Xv_s = nanstd(X_v(:)); 24 | 25 | X_v = (X_v - Xv_m)/Xv_s; 26 | X(:,:,v) = X_v; 27 | Xte_v = (Xte_v - Xv_m)/Xv_s; 28 | Xte(:,:,v) = Xte_v; 29 | end 30 | end 31 | 32 | end -------------------------------------------------------------------------------- /TCK/main.m: -------------------------------------------------------------------------------- 1 | % load data 2 | [ X, Y, Xte, Yte ] = get_BloodData(1); 3 | 4 | %% Train GMM models 5 | [GMMpar,C,G] = trainTCK(X); 6 | 7 | % Compute in-sample kernel matrix 8 | Ktrtr = TCK(GMMpar,C,G,'tr-tr'); 9 | 10 | % Compute similarity between Xte and the training elements 11 | Ktrte = TCK(GMMpar,C,G,'tr-te',Xte); 12 | 13 | % Compute kernel matrix between test elements 14 | Ktete = TCK(GMMpar,C,G,'te-te',Xte); 15 | 16 | %% kNN -classifier 17 | [acc, Ypred] = myKNN(Ktrte,Y,Yte,1); 18 | [accuracy, sensitivity, specificity, precision, recall, f_measure, gmean] = confusion_stats(Yte,Ypred); 19 | [~,~,~,AUC] = perfcurve(Yte,Ypred,1); 20 | disp(['ACC: ',num2str(acc),', F1: ',num2str(f_measure),', AUC: ',num2str(AUC)]) 21 | 22 | %% visualization 23 | 24 | [~,idx] = sort(Yte); 25 | Ksort = Ktete(idx,idx); 26 | figure 27 | imagesc(Ksort) 28 | colormap('gray') 29 | set(gca,'xtick',[]) 30 | set(gca,'ytick',[]) 31 | title('TCK K') 32 | 33 | %% save mat files 34 | save('../Data/TCK_data.mat', 'X','Y','Xte','Yte','Ktrtr','Ktrte','Ktete') 35 | -------------------------------------------------------------------------------- /TCK/confusion_stats.m: -------------------------------------------------------------------------------- 1 | function [accuracy, sensitivity, specificity, precision, recall, f_measure, gmean] = confusion_stats(ACTUAL,PREDICTED) 2 | % This fucntion evaluates the performance of a classification model by 3 | % calculating the common performance measures: Accuracy, Sensitivity, 4 | % Specificity, Precision, Recall, F-Measure, G-mean. 5 | % Input: ACTUAL = Column matrix with actual class labels of the training 6 | % examples 7 | % PREDICTED = Column matrix with predicted class labels by the 8 | % classification model 9 | % Output: EVAL = Row matrix with all the performance measures 10 | 11 | 12 | idx = (ACTUAL()==1); 13 | 14 | p = length(ACTUAL(idx)); 15 | n = length(ACTUAL(~idx)); 16 | N = p+n; 17 | 18 | tp = sum(ACTUAL(idx)==PREDICTED(idx)); 19 | tn = sum(ACTUAL(~idx)==PREDICTED(~idx)); 20 | fp = n-tn; 21 | fn = p-tp; 22 | 23 | tp_rate = tp/p; 24 | tn_rate = tn/n; 25 | 26 | accuracy = (tp+tn)/N; 27 | sensitivity = tp_rate; 28 | specificity = tn_rate; 29 | precision = tp/(tp+fp); 30 | recall = sensitivity; 31 | f_measure = 2*((precision*recall)/(precision + recall)); 32 | gmean = sqrt(tp_rate*tn_rate); 33 | 34 | -------------------------------------------------------------------------------- /TCK/GMMposterior.m: -------------------------------------------------------------------------------- 1 | function [ Q ] = GMMposterior( X, C, mu, s2, theta, dim_idx, time_idx, missing ) 2 | %GMMposterior - Evaluate the posterior for the data X of the GMM described 3 | %by C, mu, s2 and theta 4 | % 5 | % INPUTS 6 | % X: data array of size N x V x T 7 | % C: number of mixture components (optional) 8 | % mu: cluster means over time and variables (V x T) 9 | % s2: cluster stds over variables (sV x 1) 10 | % theta: cluster priors 11 | % dim_idx: subset of variables to be used in the clustering 12 | % time_idx: subset of time intervals to be used in the clustering 13 | % missing: binary indicator. 1 if there is missing data and 0 if not 14 | % 15 | % OUTPUTS 16 | % Q: posterior 17 | % 18 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier. 19 | % Authors: "Karl �yvind Mikalsen, Filippo Maria Bianchi" 20 | 21 | N = size(X,1); % number of time series 22 | 23 | % initialize variables 24 | Q = zeros(N,C); 25 | sX = X(:,time_idx,dim_idx); 26 | sV = length(dim_idx); 27 | sT = length(time_idx); 28 | 29 | 30 | if(missing == 1) 31 | nan_idx = isnan(sX); 32 | R = ones(size(sX)); 33 | R(nan_idx)=0; 34 | sX(R==0) = -100000; 35 | 36 | for c=1:C 37 | distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,N]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,N,sT]),[2,3,1]) ).^R; 38 | distr_c(distr_c < normpdf(3)) = normpdf(3); 39 | distr_c = reshape(distr_c,[N,sV*sT]); 40 | Q(:,c) = theta(c)*prod(distr_c,2); 41 | end 42 | Q = Q./repmat(sum(Q,2),[1,C]); 43 | 44 | elseif(missing == 0) 45 | for c=1:C 46 | distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,N]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,N,sT]),[2,3,1]) ); 47 | distr_c(distr_c < normpdf(3)) = normpdf(3); 48 | distr_c = reshape(distr_c,[N,sV*sT]); 49 | Q(:,c) = theta(c)*prod(distr_c,2); 50 | end 51 | Q = Q./repmat(sum(Q,2),[1,C]); 52 | 53 | else 54 | error('The value of the variable missing is not 0 or 1'); 55 | end 56 | 57 | 58 | end 59 | 60 | -------------------------------------------------------------------------------- /TCK/TCK.m: -------------------------------------------------------------------------------- 1 | function [ K ] = TCK(GMM, C, G, mode, Xte) 2 | % TCK - compute TCK kernel matrix between training data and test data Xte 3 | % 4 | % INPUTS 5 | % 6 | % GMM : Cell output from the function trainTCK 7 | % 8 | % C: Second output from trainTCK 9 | % G: Third output from trainTCK 10 | % 11 | % Xte: data array of size Nte x T x V, where Nte is the number of 12 | % multivariate time series, T the length and V the number of attributes. 13 | % 14 | % OUTPUTS 15 | % K: kernel matrix 16 | 17 | % 18 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier. 19 | % Authors: "Karl Oyvind Mikalsen, Filippo Maria Bianchi" 20 | 21 | 22 | if strcmp(mode, 'tr-te') 23 | 24 | % Check if the dataset contains mising elements 25 | nan_idx = isnan(Xte); 26 | if(sum(sum(sum(nan_idx)))>0) 27 | missing = 1; 28 | else 29 | missing = 0; 30 | end 31 | 32 | K = zeros(size(GMM{1,1},1),size(Xte,1)); 33 | parfor i=1:G*(C-1) 34 | c= floor((i-1)/G) + 2; 35 | K = K + normr(GMM{i,1})*normr(GMMposterior( Xte, c, GMM{i,2}, GMM{i,3}, GMM{i,4}, GMM{i,5}, GMM{i,6}, missing ))'; 36 | end 37 | 38 | elseif strcmp(mode, 'te-te') 39 | 40 | % Check if the dataset contains mising elements 41 | nan_idx = isnan(Xte); 42 | if(sum(sum(sum(nan_idx)))>0) 43 | missing = 1; 44 | else 45 | missing = 0; 46 | end 47 | 48 | K = zeros(size(Xte,1)); 49 | parfor i=1:G*(C-1) 50 | c = floor((i-1)/G) + 2; 51 | K = K + normr(GMMposterior(Xte,c,GMM{i,2},GMM{i,3},GMM{i,4},GMM{i,5},GMM{i,6},missing)) * normr(GMMposterior(Xte,c,GMM{i,2},GMM{i,3},GMM{i,4},GMM{i,5},GMM{i,6},missing))'; 52 | end 53 | 54 | elseif strcmp(mode, 'tr-tr') %in-sample kernel matrix 55 | 56 | K = zeros(size(GMM{1,1},1),size(GMM{1,1},1)); 57 | parfor i=1:G*(C-1) 58 | K = K + normr(GMM{i,1})*normr(GMM{i,1})'; 59 | end 60 | 61 | else 62 | 63 | error('Invalid training mode'); 64 | 65 | end 66 | 67 | 68 | 69 | 70 | -------------------------------------------------------------------------------- /TS_datasets.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy.io 3 | from utils import ideal_kernel 4 | import pandas as pd 5 | 6 | """ 7 | Data manager for loading blood data and (precomputed) TCK kernel 8 | """ 9 | 10 | 11 | def getBlood(kernel='TCK', inp='zero'): 12 | blood_data = scipy.io.loadmat('Data/TCK_data.mat') 13 | 14 | # ------ train ------- 15 | train_data = blood_data['X'] 16 | train_data = np.transpose(train_data,axes=[1,0,2]) # time_major=True 17 | train_len = [train_data.shape[0] for _ in range(train_data.shape[1])] 18 | 19 | train_data0 = train_data[0,:,:] 20 | train_data0[np.isnan(train_data0)] = 0 21 | train_data[0,:,:] = train_data0 22 | for i in range(train_data.shape[1]): 23 | train_data_i = pd.DataFrame(train_data[:,i,:]) 24 | if inp == 'last': 25 | train_data_i.fillna(method='ffill',inplace=True) 26 | elif inp =='zero': 27 | train_data_i.fillna(0,inplace=True) 28 | elif inp=='mean': 29 | train_data_i.fillna(train_data_i.mean(),inplace=True) 30 | train_data[:,i,:] = train_data_i.values 31 | 32 | train_labels = np.asarray(blood_data['Y']) 33 | 34 | # ----- test ------- 35 | test_data = blood_data['Xte'] 36 | test_data = np.transpose(test_data,axes=[1,0,2]) # time_major=True 37 | test_len = [test_data.shape[0] for _ in range(test_data.shape[1])] 38 | 39 | test_data0 = test_data[0,:,:] 40 | test_data0[np.isnan(test_data0)] = 0 41 | test_data[0,:,:] = test_data0 42 | for i in range(test_data.shape[1]): 43 | test_data_i = pd.DataFrame(test_data[:,i,:]) 44 | if inp == 'last': 45 | test_data_i.fillna(method='ffill',inplace=True) 46 | elif inp =='zero': 47 | test_data_i.fillna(0,inplace=True) 48 | elif inp=='mean': 49 | test_data_i.fillna(test_data_i.mean(),inplace=True) 50 | test_data[:,i,:] = test_data_i.values 51 | 52 | test_labels = np.asarray(blood_data['Yte']) 53 | 54 | # valid == train 55 | valid_data = train_data 56 | valid_labels = train_labels 57 | valid_len = train_len 58 | 59 | # target outputs 60 | train_targets = train_data 61 | valid_targets = valid_data 62 | test_targets = test_data 63 | 64 | if kernel=='TCK': 65 | K_tr = blood_data['Ktrtr'] 66 | K_vs = K_tr 67 | K_ts = blood_data['Ktete'] 68 | else: 69 | K_tr = ideal_kernel(train_labels) 70 | K_vs = ideal_kernel(valid_labels) 71 | K_ts = ideal_kernel(test_labels) 72 | 73 | return (train_data, train_labels, train_len, train_targets, K_tr, 74 | valid_data, valid_labels, valid_len, valid_targets, K_vs, 75 | test_data, test_labels, test_len, test_targets, K_ts) -------------------------------------------------------------------------------- /TCK/trainTCK.m: -------------------------------------------------------------------------------- 1 | function [ res, C, G ] = trainTCK( X, varargin ) 2 | % trainTCK - Train the TCK 3 | % 4 | % INPUTS 5 | % X: data array of size N x T x V, where N is the number of multivariate time series, T the length and V the number of attributes. 6 | % minN: min percentage of subsample (optional) 7 | % minV: min number of attributes for each GMM (optional) 8 | % maxV: max number of attributes for each GMM (optional) 9 | % minT: min length of time segments for each GMM (optional) 10 | % maxT: max length of time segments for each GMM (optional) 11 | % C: max number of mixture components for each GMM (optional) 12 | % G: number of randomizations for each number of components (optional) 13 | % I: number of iterations (optional) 14 | % 15 | % OUTPUTS 16 | % res: A cell of size ((C-1)*G,6) that for each q = 1:(C-1)*G contain 17 | % Q: cluster posterior probabilities 18 | % mu: cluster means (time dependant + variable dependant) 19 | % s2: cluster variances (variable dependant) 20 | % theta: cluster priors 21 | % dim_idx: indexes of the subset of dimension considered 22 | % time_idx: indexes of the subset of time intervals considered 23 | % C 24 | % G 25 | 26 | % 27 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier. 28 | % Authors: "Karl Oyvind Mikalsen, Filippo Maria Bianchi" 29 | 30 | N = size(X,1); % number of time series 31 | T = size(X,2); % time steps in each time series 32 | V = size(X,3); % number of variables in each time series 33 | 34 | % Parse the optional parameters 35 | p = inputParser(); 36 | if(N < 100) 37 | p.addParameter('C', 10, @(z) assert(z>=2, 'C must be larger than 1')); 38 | else 39 | p.addParameter('C', 40, @(z) assert(z>=2, 'C must be larger than 1')); 40 | end 41 | p.addParameter('G', 30); 42 | p.addParameter('minN', 0.8, @(z) assert(z>0 && z<=1, 'The minimum percentage of subsample must be in (0,1]')); 43 | if(V==1) 44 | p.addParameter('minV', 1, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]')); 45 | else 46 | p.addParameter('minV', 2, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]')); 47 | end 48 | p.addParameter('maxV', min(ceil(0.9*V),15), @(z) assert(z>=1 && z<=V, 'The maximum number of variables must be in [1,V]')); 49 | p.addParameter('minT', 6, @(z) assert(z>=1 && z<=T, 'The minimum length of time segments should be in [1,T]')); 50 | p.addParameter('maxT', min(floor(0.8*T),25), @(z) assert(z>=1 && z<=T, 'The maximum length of time segments should be in [1,T]')); 51 | p.addParameter('I', 20); 52 | p.parse(varargin{:}); 53 | C = p.Results.C; 54 | G = p.Results.G; 55 | minN = p.Results.minN; 56 | minV = p.Results.minV; 57 | maxV = p.Results.maxV; 58 | minT = p.Results.minT; 59 | maxT = p.Results.maxT; 60 | I = p.Results.I; 61 | 62 | 63 | res = cell(G*(C-1),6); 64 | 65 | % Check if there is missing data in the dataset. 66 | nan_idx = isnan(X); 67 | if(sum(sum(sum(nan_idx)))>0) 68 | missing = 1; 69 | fprintf('The dataset contains missing data\n\n'); 70 | else 71 | missing = 0; 72 | fprintf('The dataset does not contain missing data\n\n'); 73 | end 74 | 75 | fprintf(' Training the TCK using the following parameters:\n C = %d, G =%d\n Number of MTS for each GMM: %d - %d (%d - 100 percent)\n Number of attributes sampled from [%d, %d]\n Length of time segments sampled from [%d, %d]\n\n', C, G, floor(minN*N), N, floor(minN*100), minV, maxV, minT, maxT); 76 | 77 | parfor i=1:G*(C-1) 78 | c= floor((i-1)/G) + 2; 79 | [o1, o2 , o3, o4, o5, o6] = GMM_MAP_EM(X,'C',c,'minN',minN,'minT',minT,'maxT',maxT,'minV',minV,'maxV',maxV,'I',I,'missing',missing); 80 | [res(i,:)] = {o1, o2 , o3, o4, o5, o6}; 81 | end 82 | 83 | 84 | 85 | end 86 | 87 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import matplotlib.pyplot as plt 2 | from sklearn.decomposition import TruncatedSVD 3 | import numpy as np 4 | from scipy import interpolate 5 | 6 | 7 | def dim_reduction_plot(data, label, block_flag): 8 | """ 9 | Compute linear PCA and scatter the first two components 10 | """ 11 | 12 | PCA_model = TruncatedSVD(n_components=3).fit(data) 13 | data_PCA = PCA_model.transform(data) 14 | idxc1 = np.where(label==0) 15 | idxc2 = np.where(label==1) 16 | plt.scatter(data_PCA[idxc1,0],data_PCA[idxc1,1],s=80,c='r', marker='^',linewidths = 0, label='healthy') 17 | plt.scatter(data_PCA[idxc2,0],data_PCA[idxc2,1],s=80,c='y', marker='o',linewidths = 0, label='infected') 18 | plt.gca().axes.get_xaxis().set_ticks([]) 19 | plt.gca().axes.get_yaxis().set_ticks([]) 20 | plt.title('PCA of the codes') 21 | plt.legend(scatterpoints=1,loc='best') 22 | plt.show(block=block_flag) 23 | 24 | def ideal_kernel(labels): 25 | """ 26 | Compute the ideal kernel K 27 | An entry k_ij = 0 if i and j have different class 28 | k_ij = 1 if i and j have same class 29 | """ 30 | K = np.zeros([labels.shape[0], labels.shape[0]]) 31 | 32 | for i in range(labels.shape[0]): 33 | k = labels[i] == labels 34 | k.astype(int) 35 | K[:,i] = k[:,0] 36 | return K 37 | 38 | 39 | def interp_data(X, X_len, restore=False, interp_kind='linear'): 40 | """ 41 | Interpolate data to match the same maximum length in X_len 42 | If restore is True, data are interpolated back to their original length 43 | data are assumed to be time-major 44 | interp_kind: can be 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic' 45 | """ 46 | 47 | [T, N, V] = X.shape 48 | X_new = np.zeros_like(X) 49 | 50 | # restore original lengths 51 | if restore: 52 | for n in range(N): 53 | t = np.linspace(start=0, stop=X_len[n], num=T) 54 | t_new = np.linspace(start=0, stop=X_len[n], num=X_len[n]) 55 | for v in range(V): 56 | x_n_v = X[:,n,v] 57 | f = interpolate.interp1d(t, x_n_v, kind=interp_kind) 58 | X_new[:X_len[n],n,v] = f(t_new) 59 | 60 | # interpolate all data to length T 61 | else: 62 | for n in range(N): 63 | t = np.linspace(start=0, stop=X_len[n], num=X_len[n]) 64 | t_new = np.linspace(start=0, stop=X_len[n], num=T) 65 | for v in range(V): 66 | x_n_v = X[:X_len[n],n,v] 67 | f = interpolate.interp1d(t, x_n_v, kind=interp_kind) 68 | X_new[:,n,v] = f(t_new) 69 | 70 | return X_new 71 | 72 | 73 | def classify_with_knn(train_data, train_labels, test_data, test_labels, k=3, metric='minkowski'): 74 | """ 75 | Perform classification with knn. 76 | """ 77 | from sklearn.neighbors import KNeighborsClassifier 78 | from sklearn.metrics import f1_score, roc_auc_score 79 | 80 | neigh = KNeighborsClassifier(n_neighbors=k, metric=metric) 81 | neigh.fit(train_data, train_labels) 82 | accuracy = neigh.score(test_data, test_labels) 83 | pred_labels = neigh.predict(test_data) 84 | F1 = f1_score(test_labels, pred_labels) 85 | AUC = roc_auc_score(test_labels, pred_labels) 86 | 87 | return accuracy, F1, AUC 88 | 89 | def mse_and_corr(targets, preds, targets_len): 90 | """ 91 | targets and preds must have shape [time_steps, samples, variables] 92 | targets_len must have shape [samples,] 93 | """ 94 | mse_list = [] 95 | corr_list = [] 96 | for i in range(targets.shape[1]): 97 | len_i = targets_len[i] 98 | test_data_i = targets[:len_i,i,:] 99 | pred_i = preds[:len_i,i,:] 100 | mse_list.append(np.mean((test_data_i-pred_i)**2)) 101 | corr_list.append(np.corrcoef(test_data_i.flatten(), pred_i.flatten())[0,1]) 102 | tot_mse = np.mean(mse_list) 103 | tot_corr = np.mean(corr_list) 104 | 105 | return tot_mse, tot_corr 106 | 107 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep Kernelized Auto Encoder with Time series Cluster Kernel 2 | 3 | [![arXiv](https://img.shields.io/badge/arXiv-1710.07547-b31b1b.svg?style=flat-square&logo=arxiv&logoColor=white)](https://arxiv.org/abs/1710.07547) 4 | 5 | Reference paper: [Learning compressed representations of blood samples time series with missing data](https://arxiv.org/abs/1710.07547) 6 | 7 | 8 | 9 | ## TL;DR 10 | Tensorflow implementation of a [Deep Kernelized Auto Encoder (dkAE)](https://link.springer.com/chapter/10.1007/978-3-319-59126-1_35), aligned with the [Time series Cluster Kernel (TCK)](https://arxiv.org/abs/1704.00794), for learning vectorial representations of mutlivariate time series (MTS) with missing data. 11 | 12 | The MTS analyzed are blood sample measurements of patients with surgical site infections. 13 | The original dataset is available [at this link](https://groups.google.com/forum/#!topic/ml-news/MQtVkxizrrU). 14 | Here, we consider a subset of 883 MTS divided into 2 classes: superficial infections (651 MTS) and severe infections (232 MTS). 15 | Each MTS has 10 attributes recorded for 20 time steps. 16 | The first 80% of the datasets is used as a training set and the rest as a test set. 17 | 18 | The dataset is located in the folder [Data/](https://github.com/FilippoMB/TCK_AE/tree/master/Data) and consists of 4 files: 19 | * `x.mat` -- training set 20 | * `Y.mat` -- labels of the training set 21 | * `xte.mat` -- test set 22 | * `Yte.mat` -- labels of the test set 23 | 24 | ## Train TCK (MATLAB) 25 | 26 | Run the matlab script [TCK/main.m](https://github.com/FilippoMB/TCK_AE/blob/master/TCK/main.m) to compute TCK on the blood data. 27 | The TCK parameters are fit on the training data in `x.mat` and then TCK is evaluated on the test data in `xte.mat`. 28 | 29 | The computed kernel **K** can be divided in 4 parts: *K_trtr*, the similarities among the elements of the training set, *K_tete*, the similarities among elements of the test set, *K_trte* and *K_tetr*, the similarities between elements of the training set and test set. 30 | The classification of the test set is done on *K_tetr* (or *K_trte*) using a *k*NN classifier, with *k*=1. 31 | In particular, for each row *i* in *K_tetr*, relative to the *i*th test element, we select the column *j*, relative to the *j*th training element. 32 | Then, we assign to the *i*th test element the same label of the *j*th training element. 33 | 34 | For example, we get the following classification results on the test set: 35 | 36 | ```matlab 37 | ACC: 0.86932, F1: 0.7013, AUC: 0.77809 38 | ``` 39 | A visualization of *K_tete* is also returned. 40 | 41 | 42 | 43 | We can see that the matrix has a block structure: the first larger block on the diagonal is the similarities between the MTS of class 1, and the second smaller block is relative to the elements of class 2. 44 | Results are saved in [/Data/TCK_data.mat](https://github.com/FilippoMB/TCK_AE/blob/master/Data/TCK_data.mat) and they are used in the next section to train the dkAE. 45 | 46 | Due to the stochastic procedure for computing TCK, we repeat the procedure 10 times using random and independent initializations. 47 | Hyperparameters selection in TCK is not critical and we always use the default ones (see the original [TCK paper](https://arxiv.org/abs/1704.00794) for details). 48 | 49 | ## Train the dkAE with TCK (Python) 50 | 51 | The dkAE depends on a set of hyperparameters, whose values used in this experiment are specified in the following 52 | * `code_size`: the dimension of hidden representations learned by the dkAE (value=20); 53 | * `w_reg`: parameter that weights the L2 regulaziation of the model weights in the loss function (value=0.001); 54 | * `a_reg`: parameter that weights the kernel alignments of the code inner products with the prior TCK kernel in the loss function (value=0.001); 55 | * `num_epochs`: number of epochs used to train the model (value=5000); 56 | * `batch_size`: the size of the mini-batches used during training (value=25); 57 | * `max_gradient_norm`: maximum value that gradients are allowed to assume. Values larger than that are clipped (value=1.0); 58 | * `learning_rate`: initial learning rate in the Adam algorithm, used in the gradient descent training procedure (value=0.001); 59 | * `hidden_size` size on the second hidden layer of the encoder and the first hidden layer of the decoder (value=30). 60 | 61 | The configuration (in terms of procesing units in each layer) of the AE used in the experiments is [200, 30, 20, 30, 200]. 62 | 63 | To run the whole training and testing procedure, just execute the script [AE.py](https://github.com/FilippoMB/TCK_AE/blob/master/AE.py). 64 | Hyperparameters are set by default at the values described above, but new values can be specified, for example, in this way: 65 | ``` 66 | python3 AE.py --code_size 5 --w_reg 0.001 --a_reg 0.1 --num_epochs 10000 --max_gradient_norm 0.5 --learning_rate 0.001 --hidden_size 30 67 | ``` 68 | Additional hyperparameters can be modified within [AE.py](https://github.com/FilippoMB/TCK_AE/blob/master/AE.py). They are listed in the following with the values used in our experiment: 69 | 70 | * `dim_red`: computes PCA on the learned code representations of the test set and plots the first two components; 71 | * `plot_on`: show plots at the end of the training (set to 0 for only textual output); 72 | * `interp_on`: interpolate the time series if they have different lengths (not used); 73 | * `tied_weights`: encoder and decoder have tied weights (not used); 74 | * `lin_dec`: the decoder has only linear activations rather than squashing nonlinearities. 75 | 76 | ```python 77 | dim_red = 1 78 | plot_on = 1 79 | interp_on = 0 80 | tied_weights = 0 81 | lin_dec = 1 82 | ``` 83 | 84 | During the training, the reconstruction loss and the code loss can be visualized in [Tensorboard](https://www.tensorflow.org/get_started/summaries_and_tensorboard). 85 | The *reconstruction loss* is the MSE error between encoder input and its reconstruction performed by the decoder, while *code loss* is the Frobenious norm of the difference between the prior TCK kernel and the inner products of the codes. 86 | 87 | 88 | 89 | 90 | ## Learned representations of test set and classification 91 | 92 | Once the training procedure is over, the dkAE is fed with the MTS of the test set, and the relative codes are generated. 93 | The inner products of the codes are visualized and we can notice that the structure resembles the one of the prior TCK shown previously (here we focus on the test part *K_tete*), where the two classes in the test set can be clearly recognized 94 | 95 | 96 | 97 | To have a visualization of the learned representations, we perform PCA and the codes and we plot the first two components. It is possible to see that the 2 classes are well separated 98 | 99 | 100 | 101 | The results returned are the reconstruction MSE and the Pearson correlation between encoder input and decoder output and the accuracy, F1 score, and area under the ROC curve of the *k*NN classifier with *k*=1 102 | ```python 103 | Test MSE: 0.096 104 | Test Pearson correlation: 0.518 105 | kNN: acc: 0.869, F1: 0.716, AUC: 0.792 106 | ``` 107 | 108 | ## Citation 109 | 110 | ```bibtex 111 | @article{bianchi2017learning, 112 | title={Learning compressed representations of blood samples time series with missing data}, 113 | author={Bianchi, Filippo Maria and Mikalsen, Karl {\O}yvind and Jenssen, Robert}, 114 | journal={arXiv preprint arXiv:1710.07547}, 115 | year={2017} 116 | } 117 | ``` 118 | -------------------------------------------------------------------------------- /TCK/GMM_MAP_EM.m: -------------------------------------------------------------------------------- 1 | function [ Q, mu, s2, theta, dim_idx, time_idx] = GMM_MAP_EM(X, varargin) 2 | % MAP_EM - fit a GMM to time series data with missing values using MAP-EM 3 | % 4 | % INPUTS 5 | % X: data array of size N x T x V 6 | % C: number of mixture components (optional) 7 | % minN: min percentage of subsample (optional) 8 | % minV: min number of dimensions (optional) 9 | % maxV: max number of dimensions (optional) 10 | % minT: min length of time segments (optional) 11 | % maxT: max length of time segments (optional) 12 | % I: number of iterations (optional) 13 | % missing: binary indicator. 1 if there is missing data and 0 if not 14 | % 15 | % OUTPUTS 16 | % Q: cluster posterior probabilities 17 | % mu: cluster means (time dependant + variable dependant) 18 | % s2: cluster variances (variable dependant) 19 | % theta: cluster priors 20 | % dim_idx: indexes of the subset of dimension considered 21 | % time_idx: indexes of the subset of time intervals considered 22 | % 23 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier. 24 | % Authors: "Karl �yvind Mikalsen, Filippo Maria Bianchi" 25 | 26 | N = size(X,1); % number of time series 27 | T = size(X,2); % time steps in each time series 28 | V = size(X,3); % number of variables in each time series 29 | 30 | % Parse the optional parameters 31 | p = inputParser(); 32 | p.addParameter('minN', 0.8, @(z) assert(z>0 && z<=1, 'The minimum percentage of subsample must be in (0,1]')); 33 | if(V==1) 34 | p.addParameter('minV', 1, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]')); 35 | else 36 | p.addParameter('minV', 2, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]')); 37 | end 38 | p.addParameter('maxV', V, @(z) assert(z>=1 && z<=V, 'The maximum number of variables must be in [1,V]')); 39 | p.addParameter('minT', 6, @(z) assert(z>=1 && z<=T, 'The minimum length of time segments should be in [1,T]')); 40 | p.addParameter('maxT', min(floor(0.8*T),25), @(z) assert(z>=1 && z<=T, 'The maximum length of time segments should be in [1,T]')); 41 | p.addParameter('C', 40); 42 | p.addParameter('missing', 2); 43 | p.addParameter('I', 20); 44 | p.parse(varargin{:}); 45 | minN = p.Results.minN; 46 | minV = p.Results.minV; 47 | maxV = p.Results.maxV; 48 | minT = p.Results.minT; 49 | maxT = p.Results.maxT; 50 | C = p.Results.C; 51 | I = p.Results.I; 52 | missing = p.Results.missing; 53 | 54 | % Hyperparameters for mean prior (a0, b0) and the std dev prior (n0) of the mixture components 55 | a0 = (1.0-0.001).*rand + 0.001; 56 | b0 = (0.2-0.005).*rand + 0.005; 57 | n0 = (0.2-0.001).*rand + 0.001; 58 | 59 | % Randomly subsample dimensions, time intervals and samples 60 | s = RandStream('mt19937ar','Seed',0); 61 | if(N > 100) 62 | sN = randi([round(minN*N),N]); 63 | else 64 | sN = round(0.9*N); 65 | end 66 | sub_idx = sort(randperm(s,N,sN)); % generate sN (sorted) integers between 1 and N 67 | 68 | sV = randi([minV,maxV]); 69 | dim_idx = sort(randperm(s,V,sV)); % generate sV (sorted) integers between 1 and V 70 | 71 | t1 = randi([1,T-minT+1]); 72 | t2 = randi([t1+minT-1,min(T,(t1+maxT-1))]); 73 | sT = t2-t1+1; 74 | time_idx = t1:t2; % generate sT contigous integers from t1 to t2 75 | sX = X(sub_idx,time_idx,dim_idx); 76 | 77 | 78 | if(missing == 1) 79 | nan_idx = isnan(sX); 80 | R = ones(size(sX)); 81 | R(nan_idx)=0; 82 | 83 | % Calculate empirical moments 84 | mu_0 = zeros(sT,sV); % prior mean over time and variables (sT x sV) 85 | for v = 1:sV 86 | mu_0(:,v) = nanmean(sX(:,:,v),1); 87 | end 88 | s_0 = zeros(sV,1); % prior std over variables (sV x 1) 89 | tempX = reshape(sX,[sN*sT,sV]); 90 | for v = 1:sV 91 | s_0(v) = nanstd(tempX(:,v),0,1); 92 | end 93 | s2_0 = s_0.^2; 94 | 95 | 96 | [S_0, invS_0] = deal(zeros(sT,sT,sV)); 97 | T1 = repmat((1:sT)',[1,sT]); 98 | T2 = repmat((1:sT),[sT,1]); 99 | for v=1:sV 100 | S_0(:,:,v) = s_0(v)*b0*exp(-a0*(T1-T2).^2); 101 | if(rcond(S_0(:,:,v)) < 1e-8) % check if the matrix can be inverted 102 | S_0(:,:,v) = S_0(:,:,v) + 0.1*S_0(1,1,v)*eye(sT); %add a small number to the diagonal 103 | end 104 | invS_0(:,:,v) = inv(S_0(:,:,v)); 105 | end 106 | 107 | 108 | % initialize model parameters 109 | theta = ones(1,C)/C; % cluster priors (1 x C) 110 | mu= zeros(sT,sV,C); % cluster means (sT x sV x C) 111 | s2 = zeros(sV,C); % cluster variances (sV x C) 112 | Q = zeros(sN,C); % cluster assignments (sN x C) 113 | 114 | sX(R==0) = -100000; 115 | 116 | for i=1:I 117 | 118 | % initialization: random clusters assignment 119 | if(i==1) 120 | cluster = randi(C,[sN,1]); 121 | Q = double(bsxfun(@eq, cluster(:), 1:C)); 122 | 123 | % update clusters assignment 124 | else 125 | for c=1:C 126 | distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,sN]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,sN,sT]),[2,3,1]) ).^R; 127 | distr_c(distr_c < normpdf(3)) = normpdf(3); 128 | distr_c = reshape(distr_c,[sN,sV*sT]); 129 | Q(:,c) = theta(c)*prod(distr_c,2); 130 | end 131 | Q = Q./repmat(sum(Q,2),[1,C]); 132 | end 133 | 134 | % update mu, s2 and theta 135 | for c=1:C 136 | theta(c) = sum(Q(:,c))/sN; 137 | for v=1:sV 138 | var2 = sum(R(:,:,v),2)'*Q(:,c); 139 | temp = (sX(:,:,v) - repmat(mu(:,v,c)',[sN,1]) ).^2; 140 | var1 = Q(:,c)'*sum((R(:,:,v).*temp),2); 141 | s2(v,c) = (n0*s2_0(v)+var1) / (n0+var2); 142 | 143 | A = invS_0(:,:,v) + diag(R(:,:,v)'*Q(:,c)/ s2(v,c)); 144 | b = invS_0(:,:,v)*mu_0(:,v) + (R(:,:,v).*sX(:,:,v))'*Q(:,c)/s2(v,c); 145 | mu(:,v,c) = A\b; 146 | end 147 | end 148 | end % end for i=1:I 149 | 150 | % compute assignments for all data 151 | Q = GMMposterior(X, C, mu, s2, theta, dim_idx, time_idx, missing ); 152 | 153 | 154 | %if no missing data the computations simplify a bit 155 | elseif(missing == 0) 156 | % Calculate empirical moments 157 | mu_0 = zeros(sT,sV); % prior mean over time and variables (sT x sV) 158 | for v = 1:sV 159 | mu_0(:,v) = mean(sX(:,:,v),1); 160 | end 161 | s_0 = zeros(sV,1); % prior std over variables (sV x 1) 162 | tempX = reshape(sX,[sN*sT,sV]); 163 | for v = 1:sV 164 | s_0(v) = std(tempX(:,v)); 165 | end 166 | s2_0 = s_0.^2; 167 | 168 | 169 | [S_0, invS_0] = deal(zeros(sT,sT,sV)); 170 | T1 = repmat((1:sT)',[1,sT]); 171 | T2 = repmat((1:sT),[sT,1]); 172 | for v=1:sV 173 | S_0(:,:,v) = s_0(v)*b0*exp(-a0*(T1-T2).^2); 174 | if(rcond(S_0(:,:,v)) < 1e-8) % check if the matrix can be inverted 175 | S_0(:,:,v) = S_0(:,:,v) + 0.1*S_0(1,1,v)*eye(sT); %add a small number to the diagonal if S_0 is not invertible 176 | end 177 | invS_0(:,:,v) = inv(S_0(:,:,v)); 178 | end 179 | 180 | 181 | 182 | % initialize model parameters 183 | theta = ones(1,C)/C; % cluster priors (1 x C) 184 | mu= zeros(sT,sV,C); % cluster means (sT x sV x C) 185 | s2 = zeros(sV,C); % cluster variances (sV x C) 186 | Q = zeros(sN,C); % cluster assignments (sN x C) 187 | 188 | for i=1:I 189 | 190 | % initialization: random clusters assignment 191 | if(i==1) 192 | cluster = randi(C,[sN,1]); 193 | Q = double(bsxfun(@eq, cluster(:), 1:C)); 194 | 195 | % update clusters assignment 196 | else 197 | for c=1:C 198 | distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,sN]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,sN,sT]),[2,3,1]) ); 199 | distr_c(distr_c < normpdf(3)) = normpdf(3); 200 | distr_c = reshape(distr_c,[sN,sV*sT]); 201 | Q(:,c) = theta(c)*prod(distr_c,2); 202 | end 203 | Q = Q./repmat(sum(Q,2),[1,C]); 204 | end 205 | 206 | % update mu, s2 and theta 207 | for c=1:C 208 | sumQ = sum(Q(:,c)); 209 | theta(c) = sumQ/sN; 210 | for v=1:sV 211 | var2 = sT*sumQ; 212 | var1 = Q(:,c)'*sum((sX(:,:,v) - repmat(mu(:,v,c)',[sN,1]) ).^2,2); 213 | s2(v,c) = (n0*s2_0(v)+var1) / (n0+var2); 214 | 215 | A = invS_0(:,:,v) + (sumQ /s2(v,c))*eye(sT); 216 | b = invS_0(:,:,v)*mu_0(:,v) + (sX(:,:,v))'*Q(:,c)/s2(v,c); 217 | mu(:,v,c) = A\b; 218 | end 219 | end 220 | end % end for i=1:I 221 | 222 | % compute assignments for all data 223 | Q = GMMposterior(X, C, mu, s2, theta, dim_idx, time_idx, missing ); 224 | 225 | 226 | else 227 | error('The value of the variable is not 0 or 1'); 228 | end 229 | 230 | 231 | 232 | end 233 | 234 | -------------------------------------------------------------------------------- /AE.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import argparse 3 | from TS_datasets import getBlood 4 | import time 5 | import numpy as np 6 | import matplotlib.pyplot as plt 7 | from utils import classify_with_knn, interp_data, mse_and_corr, dim_reduction_plot 8 | import math 9 | 10 | dim_red = 1 # perform PCA on the codes and plot the first two components 11 | plot_on = 1 # plot the results, otherwise only textual output is returned 12 | interp_on = 0 # interpolate data (needed if the input time series have different length) 13 | tied_weights = 0 # train an AE where the decoder weights are the econder weights transposed 14 | lin_dec = 1 # train an AE with linear activations in the decoder 15 | 16 | # parse input data 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument("--code_size", default=20, help="size of the code", type=int) 19 | parser.add_argument("--w_reg", default=0.001, help="weight of the regularization in the loss function", type=float) 20 | parser.add_argument("--a_reg", default=0.2, help="weight of the kernel alignment", type=float) 21 | parser.add_argument("--num_epochs", default=5000, help="number of epochs in training", type=int) 22 | parser.add_argument("--batch_size", default=25, help="number of samples in each batch", type=int) 23 | parser.add_argument("--max_gradient_norm", default=1.0, help="max gradient norm for gradient clipping", type=float) 24 | parser.add_argument("--learning_rate", default=0.001, help="Adam initial learning rate", type=float) 25 | parser.add_argument("--hidden_size", default=30, help="size of the code", type=int) 26 | args = parser.parse_args() 27 | print(args) 28 | 29 | # ================= DATASET ================= 30 | (train_data, train_labels, train_len, _, K_tr, 31 | valid_data, _, valid_len, _, K_vs, 32 | test_data_orig, test_labels, test_len, _, K_ts) = getBlood(kernel='TCK', inp='zero') # data shape is [T, N, V] = [time_steps, num_elements, num_var] 33 | 34 | # sort test data (for a better visualization of the inner product of the codes) 35 | sort_idx = np.argsort(test_labels,axis=0)[:,0] 36 | test_data_orig = test_data_orig[:,sort_idx,:] 37 | test_labels = test_labels[sort_idx,:] 38 | K_ts = K_ts[sort_idx,:] [:,sort_idx] 39 | 40 | # interpolation 41 | if np.min(train_len) < np.max(train_len) and interp_on: 42 | print('-- Data Interpolation --') 43 | train_data = interp_data(train_data, train_len) 44 | valid_data = interp_data(valid_data, valid_len) 45 | test_data = interp_data(test_data_orig, test_len) 46 | else: 47 | test_data = test_data_orig 48 | 49 | # transpose and reshape [T, N, V] --> [N, T, V] --> [N, T*V] 50 | train_data = np.transpose(train_data,axes=[1,0,2]) 51 | train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1]*train_data.shape[2])) 52 | valid_data = np.transpose(valid_data,axes=[1,0,2]) 53 | valid_data = np.reshape(valid_data, (valid_data.shape[0], valid_data.shape[1]*valid_data.shape[2])) 54 | test_data = np.transpose(test_data,axes=[1,0,2]) 55 | test_data = np.reshape(test_data, (test_data.shape[0], test_data.shape[1]*test_data.shape[2])) 56 | 57 | print('\n**** Processing Blood data: Tr{}, Vs{}, Ts{} ****\n'.format(train_data.shape, valid_data.shape, test_data.shape)) 58 | 59 | input_length = train_data.shape[1] # same for all inputs 60 | 61 | # ================= GRAPH ================= 62 | 63 | # init session 64 | tf.reset_default_graph() # needed when working with iPython 65 | sess = tf.Session() 66 | 67 | # placeholders 68 | encoder_inputs = tf.placeholder(shape=(None,input_length), dtype=tf.float32, name='encoder_inputs') 69 | prior_K = tf.placeholder(shape=(None, None), dtype=tf.float32, name='prior_K') 70 | 71 | # ----- ENCODER ----- 72 | We1 = tf.Variable(tf.random_uniform((input_length, args.hidden_size), -1.0 / math.sqrt(input_length), 1.0 / math.sqrt(input_length))) 73 | We2 = tf.Variable(tf.random_uniform((args.hidden_size, args.code_size), -1.0 / math.sqrt(args.hidden_size), 1.0 / math.sqrt(args.hidden_size))) 74 | 75 | be1 = tf.Variable(tf.zeros([args.hidden_size])) 76 | be2 = tf.Variable(tf.zeros([args.code_size])) 77 | 78 | hidden_1 = tf.nn.tanh(tf.matmul(encoder_inputs, We1) + be1) 79 | code = tf.nn.tanh(tf.matmul(hidden_1, We2) + be2) 80 | 81 | # kernel on codes 82 | code_K = tf.tensordot(code, tf.transpose(code), axes=1) 83 | 84 | # ----- DECODER ----- 85 | if tied_weights: 86 | Wd1 = tf.transpose(We2) 87 | Wd2 = tf.transpose(We1) 88 | else: 89 | Wd1 = tf.Variable(tf.random_uniform((args.code_size, args.hidden_size), -1.0 / math.sqrt(args.code_size), 1.0 / math.sqrt(args.code_size))) 90 | Wd2 = tf.Variable(tf.random_uniform((args.hidden_size, input_length), -1.0 / math.sqrt(args.hidden_size), 1.0 / math.sqrt(args.hidden_size))) 91 | 92 | bd1 = tf.Variable(tf.zeros([args.hidden_size])) 93 | bd2 = tf.Variable(tf.zeros([input_length])) 94 | 95 | if lin_dec: 96 | hidden_2 = tf.matmul(code, Wd1) + bd1 97 | else: 98 | hidden_2 = tf.nn.tanh(tf.matmul(code, Wd1) + bd1) 99 | 100 | dec_out = tf.matmul(hidden_2, Wd2) + bd2 101 | 102 | # ----- LOSS ----- 103 | # kernel alignment loss with normalized Frobenius norm 104 | code_K_norm = code_K/tf.norm(code_K, ord='fro', axis=[-2,-1]) 105 | prior_K_norm = prior_K/tf.norm(prior_K, ord='fro', axis=[-2,-1]) 106 | k_loss = tf.norm(code_K_norm - prior_K_norm, ord='fro', axis=[-2,-1]) 107 | 108 | # reconstruction loss 109 | parameters = tf.trainable_variables() 110 | optimizer = tf.train.AdamOptimizer(args.learning_rate) 111 | reconstruct_loss = tf.losses.mean_squared_error(labels=dec_out, predictions=encoder_inputs) 112 | 113 | # L2 loss 114 | reg_loss = 0 115 | for tf_var in tf.trainable_variables(): 116 | reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var)) 117 | 118 | tot_loss = reconstruct_loss + args.w_reg*reg_loss + args.a_reg*k_loss 119 | 120 | # Calculate and clip gradients 121 | gradients = tf.gradients(tot_loss, parameters) 122 | clipped_gradients, _ = tf.clip_by_global_norm(gradients, args.max_gradient_norm) 123 | update_step = optimizer.apply_gradients(zip(clipped_gradients, parameters)) 124 | 125 | sess.run(tf.global_variables_initializer()) 126 | 127 | # trainable parameters count 128 | total_parameters = 0 129 | for variable in tf.trainable_variables(): 130 | shape = variable.get_shape() 131 | variable_parametes = 1 132 | for dim in shape: 133 | variable_parametes *= dim.value 134 | total_parameters += variable_parametes 135 | print('Total parameters: {}'.format(total_parameters)) 136 | 137 | # ============= TENSORBOARD ============= 138 | mean_grads = tf.reduce_mean([tf.reduce_mean(grad) for grad in gradients]) 139 | tf.summary.scalar('mean_grads', mean_grads) 140 | tf.summary.scalar('reconstruct_loss', reconstruct_loss) 141 | tf.summary.scalar('k_loss', k_loss) 142 | tvars = tf.trainable_variables() 143 | for tvar in tvars: 144 | tf.summary.histogram(tvar.name.replace(':','_'), tvar) 145 | merged_summary = tf.summary.merge_all() 146 | 147 | # ================= TRAINING ================= 148 | 149 | # initialize training variables 150 | time_tr_start = time.time() 151 | batch_size = args.batch_size 152 | max_batches = train_data.shape[0]//batch_size 153 | loss_track = [] 154 | kloss_track = [] 155 | min_vs_loss = np.infty 156 | model_name = "/tmp/dkae_models/m_0.ckpt" 157 | train_writer = tf.summary.FileWriter('/tmp/tensorboard', graph=sess.graph) 158 | saver = tf.train.Saver() 159 | 160 | try: 161 | for ep in range(args.num_epochs): 162 | 163 | # shuffle training data 164 | idx = np.random.permutation(train_data.shape[0]) 165 | train_data_s = train_data[idx,:] 166 | K_tr_s = K_tr[idx,:][:,idx] 167 | 168 | for batch in range(max_batches): 169 | 170 | fdtr = {encoder_inputs: train_data_s[(batch)*batch_size:(batch+1)*batch_size,:], 171 | prior_K: K_tr_s[(batch)*batch_size:(batch+1)*batch_size, (batch)*batch_size:(batch+1)*batch_size] 172 | } 173 | _,train_loss, train_kloss = sess.run([update_step, reconstruct_loss, k_loss], fdtr) 174 | loss_track.append(train_loss) 175 | kloss_track.append(train_kloss) 176 | 177 | # check training progress on the validations set (in blood data valid=train) 178 | if ep % 100 == 0: 179 | print('Ep: {}'.format(ep)) 180 | 181 | fdvs = {encoder_inputs: valid_data, 182 | prior_K: K_vs} 183 | outvs, lossvs, klossvs, vs_code_K, summary = sess.run([dec_out, reconstruct_loss, k_loss, code_K, merged_summary], fdvs) 184 | train_writer.add_summary(summary, ep) 185 | print('VS r_loss=%.3f, k_loss=%.3f -- TR r_loss=%.3f, k_loss=%.3f'%(lossvs, klossvs, np.mean(loss_track[-100:]), np.mean(kloss_track[-100:]))) 186 | 187 | # Save model yielding best results on validation 188 | if lossvs < min_vs_loss: 189 | min_vs_loss = lossvs 190 | tf.add_to_collection("encoder_inputs",encoder_inputs) 191 | tf.add_to_collection("dec_out",dec_out) 192 | tf.add_to_collection("reconstruct_loss",reconstruct_loss) 193 | save_path = saver.save(sess, model_name) 194 | 195 | except KeyboardInterrupt: 196 | print('training interrupted') 197 | 198 | 199 | time_tr_end = time.time() 200 | print('Tot training time: {}'.format((time_tr_end-time_tr_start)//60) ) 201 | 202 | # ================= TEST ================= 203 | print('************ TEST ************ \n>>restoring from:'+model_name+'<<') 204 | 205 | tf.reset_default_graph() # be sure that correct weights are loaded 206 | saver.restore(sess, model_name) 207 | 208 | tr_code = sess.run(code, {encoder_inputs: train_data}) 209 | pred, pred_loss, ts_code, ts_code_K = sess.run([dec_out, reconstruct_loss, code, code_K], {encoder_inputs: test_data}) 210 | print('Test loss: %.3f'%(np.mean((pred-test_data)**2))) 211 | 212 | # reverse transformations 213 | pred = np.reshape(pred, (test_data_orig.shape[1], test_data_orig.shape[0], test_data_orig.shape[2])) 214 | pred = np.transpose(pred,axes=[1,0,2]) 215 | test_data = test_data_orig 216 | 217 | if np.min(train_len) < np.max(train_len) and interp_on: 218 | print('-- Reverse Interpolation --') 219 | pred = interp_data(pred, test_len, restore=True) 220 | 221 | if plot_on: 222 | 223 | # plot the reconstruction of a random time series 224 | plot_idx1 = np.random.randint(low=0,high=test_data.shape[1]) 225 | plot_idx2 = np.random.randint(low=0,high=test_data.shape[2]) 226 | target = test_data[:,plot_idx1,plot_idx2] 227 | ts_out = pred[:,plot_idx1,plot_idx2] 228 | plt.plot(target, label='target') 229 | plt.plot(ts_out, label='pred') 230 | plt.legend(loc='best') 231 | plt.title('Prediction of a random MTS variable') 232 | plt.show(block=True) 233 | np.savetxt('AE_pred',ts_out) 234 | 235 | plt.matshow(K_ts,cmap='binary_r') 236 | plt.title('Prior TCK kernel') 237 | plt.gca().axes.get_xaxis().set_ticks([]) 238 | plt.gca().axes.get_yaxis().set_ticks([]) 239 | plt.show() 240 | plt.matshow(ts_code_K,cmap='binary_r') 241 | plt.title('Codes inner products') 242 | plt.gca().axes.get_xaxis().set_ticks([]) 243 | plt.gca().axes.get_yaxis().set_ticks([]) 244 | plt.show() 245 | 246 | # MSE and corr 247 | test_mse, test_corr = mse_and_corr(test_data, pred, test_len) 248 | print('Test MSE: %.3f\nTest Pearson correlation: %.3f'%(test_mse, test_corr)) 249 | 250 | # kNN classification on the codes 251 | acc, f1, auc = classify_with_knn(tr_code, train_labels[:, 0], ts_code, test_labels[:, 0], k=1) 252 | print('kNN -- acc: %.3f, F1: %.3f, AUC: %.3f'%(acc, f1, auc)) 253 | 254 | # dim reduction plots 255 | if dim_red: 256 | dim_reduction_plot(ts_code, test_labels, 1) 257 | 258 | #train_writer.close() 259 | sess.close() 260 | --------------------------------------------------------------------------------