├── .gitignore
├── Data
    ├── Y.mat
    ├── x.mat
    ├── Yte.mat
    ├── xte.mat
    └── TCK_data.mat
├── imgs
    ├── pca.png
    ├── kdAE_arch.png
    ├── learned code.png
    ├── prediction.png
    ├── tck_kernel.png
    ├── Selection_002.jpg
    └── Selection_003.jpg
├── TCK
    ├── TCK_data.mat
    ├── myKNN.m
    ├── get_BloodData.m
    ├── main.m
    ├── confusion_stats.m
    ├── GMMposterior.m
    ├── TCK.m
    ├── trainTCK.m
    └── GMM_MAP_EM.m
├── TS_datasets.py
├── utils.py
├── README.md
└── AE.py


/.gitignore:
--------------------------------------------------------------------------------
1 | AE_pred
2 | xxx.py
3 | *.pyc
4 | TCK_classification.py
5 | 


--------------------------------------------------------------------------------
/Data/Y.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/Y.mat


--------------------------------------------------------------------------------
/Data/x.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/x.mat


--------------------------------------------------------------------------------
/Data/Yte.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/Yte.mat


--------------------------------------------------------------------------------
/Data/xte.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/xte.mat


--------------------------------------------------------------------------------
/imgs/pca.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/pca.png


--------------------------------------------------------------------------------
/TCK/TCK_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/TCK/TCK_data.mat


--------------------------------------------------------------------------------
/Data/TCK_data.mat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/Data/TCK_data.mat


--------------------------------------------------------------------------------
/imgs/kdAE_arch.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/kdAE_arch.png


--------------------------------------------------------------------------------
/imgs/learned code.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/learned code.png


--------------------------------------------------------------------------------
/imgs/prediction.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/prediction.png


--------------------------------------------------------------------------------
/imgs/tck_kernel.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/tck_kernel.png


--------------------------------------------------------------------------------
/imgs/Selection_002.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/Selection_002.jpg


--------------------------------------------------------------------------------
/imgs/Selection_003.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FilippoMB/Deep-Kernelized-Auto-Encoder-with-Time-series-Cluster-Kernel/HEAD/imgs/Selection_003.jpg


--------------------------------------------------------------------------------
/TCK/myKNN.m:
--------------------------------------------------------------------------------
 1 | function [ acc, labels_pred ] = myKNN( S, labels_tr, labels_ts, k )
 2 | %MYKNN  
 3 | % Input:
 4 | % S - similarity matrix
 5 | % labels - class labels
 6 | % k - num of neighbors
 7 | 
 8 |     labels_pred = zeros(size(labels_ts));
 9 |     for i=1:size(S,2)
10 |         S_i = S(:,i);
11 |         [~,c] = sort(S_i,'descend');
12 |         c = c(1:k);
13 |         L = labels_tr(c);
14 |         labels_pred(i) = mode(L);
15 | 
16 |     end
17 |     
18 |     acc = sum(labels_pred == labels_ts)/length(labels_ts);    
19 | 
20 | end
21 | 
22 | 


--------------------------------------------------------------------------------
/TCK/get_BloodData.m:
--------------------------------------------------------------------------------
 1 | function [ X, Y, Xte, Yte ] = get_BloodData(data_norm)
 2 | 
 3 | x_tr = load('../Data/x.mat');
 4 | y_tr = load('../Data/Y.mat');
 5 | x_te = load('../Data/xte.mat');
 6 | y_te = load('../Data/Yte.mat');
 7 | 
 8 | Ntr = size(x_tr.x,1);
 9 | Nts = size(x_te.xte,1);
10 | T = 20;
11 | V = 10;
12 | 
13 | X = reshape(x_tr.x,[Ntr,T,V]);
14 | Xte = reshape(x_te.xte,[Nts,T,V]);
15 | Y = y_tr.Y;
16 | Yte = y_te.Yte;
17 | 
18 | if data_norm
19 |     for v=1:V
20 |        X_v = X(:,:,v);
21 |        Xte_v = Xte(:,:,v);
22 |        Xv_m = nanmean(X_v(:));
23 |        Xv_s = nanstd(X_v(:));
24 |        
25 |        X_v = (X_v - Xv_m)/Xv_s;
26 |        X(:,:,v) = X_v;
27 |        Xte_v = (Xte_v - Xv_m)/Xv_s;
28 |        Xte(:,:,v) = Xte_v;
29 |     end
30 | end
31 | 
32 | end


--------------------------------------------------------------------------------
/TCK/main.m:
--------------------------------------------------------------------------------
 1 | % load data
 2 | [ X, Y, Xte, Yte ] = get_BloodData(1);
 3 | 
 4 | %% Train GMM models
 5 | [GMMpar,C,G]  = trainTCK(X);
 6 | 
 7 | % Compute in-sample kernel matrix
 8 | Ktrtr = TCK(GMMpar,C,G,'tr-tr');
 9 | 
10 | % Compute similarity between Xte and the training elements
11 | Ktrte = TCK(GMMpar,C,G,'tr-te',Xte);
12 | 
13 | % Compute kernel matrix between test elements
14 | Ktete = TCK(GMMpar,C,G,'te-te',Xte);
15 | 
16 | %% kNN -classifier
17 | [acc, Ypred] = myKNN(Ktrte,Y,Yte,1);
18 | [accuracy, sensitivity, specificity, precision, recall, f_measure, gmean] = confusion_stats(Yte,Ypred);
19 | [~,~,~,AUC] = perfcurve(Yte,Ypred,1);
20 | disp(['ACC: ',num2str(acc),', F1: ',num2str(f_measure),', AUC: ',num2str(AUC)])
21 | 
22 | %% visualization
23 | 
24 | [~,idx] = sort(Yte);
25 | Ksort = Ktete(idx,idx);
26 | figure
27 | imagesc(Ksort)
28 | colormap('gray')
29 | set(gca,'xtick',[])
30 | set(gca,'ytick',[])
31 | title('TCK K')
32 | 
33 | %% save mat files
34 | save('../Data/TCK_data.mat', 'X','Y','Xte','Yte','Ktrtr','Ktrte','Ktete')
35 | 


--------------------------------------------------------------------------------
/TCK/confusion_stats.m:
--------------------------------------------------------------------------------
 1 |  function [accuracy, sensitivity, specificity, precision, recall, f_measure, gmean] = confusion_stats(ACTUAL,PREDICTED)
 2 | % This fucntion evaluates the performance of a classification model by 
 3 | % calculating the common performance measures: Accuracy, Sensitivity, 
 4 | % Specificity, Precision, Recall, F-Measure, G-mean.
 5 | % Input: ACTUAL = Column matrix with actual class labels of the training
 6 | %                 examples
 7 | %        PREDICTED = Column matrix with predicted class labels by the
 8 | %                    classification model
 9 | % Output: EVAL = Row matrix with all the performance measures
10 | 
11 | 
12 | idx = (ACTUAL()==1);
13 | 
14 | p = length(ACTUAL(idx));
15 | n = length(ACTUAL(~idx));
16 | N = p+n;
17 | 
18 | tp = sum(ACTUAL(idx)==PREDICTED(idx));
19 | tn = sum(ACTUAL(~idx)==PREDICTED(~idx));
20 | fp = n-tn;
21 | fn = p-tp;
22 | 
23 | tp_rate = tp/p;
24 | tn_rate = tn/n;
25 | 
26 | accuracy = (tp+tn)/N;
27 | sensitivity = tp_rate;
28 | specificity = tn_rate;
29 | precision = tp/(tp+fp);
30 | recall = sensitivity;
31 | f_measure = 2*((precision*recall)/(precision + recall));
32 | gmean = sqrt(tp_rate*tn_rate);
33 | 
34 | 


--------------------------------------------------------------------------------
/TCK/GMMposterior.m:
--------------------------------------------------------------------------------
 1 | function [ Q ] = GMMposterior( X, C, mu, s2, theta, dim_idx, time_idx, missing )
 2 | %GMMposterior - Evaluate the posterior for the data X of the GMM described
 3 | %by C, mu, s2 and theta
 4 | %
 5 | % INPUTS
 6 | % X: data array of size N x V x T 
 7 | % C: number of mixture components (optional)
 8 | % mu: cluster means over time and variables (V x T)
 9 | % s2: cluster stds over variables (sV x 1)
10 | % theta: cluster priors
11 | % dim_idx: subset of variables to be used in the clustering
12 | % time_idx: subset of time intervals to be used in the clustering
13 | % missing: binary indicator. 1 if there is missing data and 0 if not
14 | %
15 | % OUTPUTS
16 | % Q: posterior
17 | %
18 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier.
19 | % Authors: "Karl �yvind Mikalsen, Filippo Maria Bianchi"
20 | 
21 | N = size(X,1); % number of time series
22 | 
23 | % initialize variables
24 | Q = zeros(N,C);
25 | sX = X(:,time_idx,dim_idx);
26 | sV = length(dim_idx);
27 | sT = length(time_idx);
28 | 
29 | 
30 | if(missing == 1)
31 |     nan_idx = isnan(sX);
32 |     R = ones(size(sX));
33 |     R(nan_idx)=0;
34 |     sX(R==0) = -100000;
35 | 
36 |     for c=1:C
37 |         distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,N]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,N,sT]),[2,3,1]) ).^R;
38 |         distr_c(distr_c < normpdf(3)) = normpdf(3);
39 |         distr_c = reshape(distr_c,[N,sV*sT]);
40 |         Q(:,c) = theta(c)*prod(distr_c,2);
41 |     end
42 |     Q = Q./repmat(sum(Q,2),[1,C]);
43 | 
44 | elseif(missing == 0)
45 |     for c=1:C
46 |         distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,N]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,N,sT]),[2,3,1]) );
47 |         distr_c(distr_c < normpdf(3)) = normpdf(3);
48 |         distr_c = reshape(distr_c,[N,sV*sT]);
49 |         Q(:,c) = theta(c)*prod(distr_c,2);
50 |     end
51 |     Q = Q./repmat(sum(Q,2),[1,C]);
52 |     
53 | else
54 |     error('The value of the variable missing is not 0 or 1');
55 | end
56 | 
57 | 
58 | end
59 | 
60 | 


--------------------------------------------------------------------------------
/TCK/TCK.m:
--------------------------------------------------------------------------------
 1 | function [ K ] = TCK(GMM, C, G, mode, Xte)
 2 | % TCK -  compute TCK kernel matrix between training data and test data Xte
 3 | %
 4 | % INPUTS
 5 | %
 6 | % GMM : Cell output from the function trainTCK
 7 | %
 8 | % C: Second output from trainTCK 
 9 | % G: Third output from trainTCK 
10 | %
11 | % Xte: data array of size Nte x T x V, where Nte is the number of
12 | % multivariate time series, T the length and V the number of attributes.
13 | %
14 | % OUTPUTS
15 | % K: kernel matrix
16 | 
17 | %
18 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier.
19 | % Authors: "Karl Oyvind Mikalsen, Filippo Maria Bianchi"
20 | 
21 | 
22 | if strcmp(mode, 'tr-te')
23 |      
24 |     % Check if the dataset contains mising elements
25 |     nan_idx = isnan(Xte);
26 |     if(sum(sum(sum(nan_idx)))>0)
27 |         missing = 1;
28 |     else
29 |         missing = 0;
30 |     end
31 |     
32 |     K = zeros(size(GMM{1,1},1),size(Xte,1));
33 |     parfor i=1:G*(C-1)
34 |         c= floor((i-1)/G) + 2;
35 |         K = K + normr(GMM{i,1})*normr(GMMposterior( Xte, c, GMM{i,2}, GMM{i,3}, GMM{i,4}, GMM{i,5}, GMM{i,6}, missing ))';
36 |     end
37 |     
38 | elseif strcmp(mode, 'te-te')
39 |     
40 |     % Check if the dataset contains mising elements
41 |     nan_idx = isnan(Xte);
42 |     if(sum(sum(sum(nan_idx)))>0)
43 |         missing = 1;
44 |     else
45 |         missing = 0;
46 |     end
47 |     
48 |     K = zeros(size(Xte,1));
49 |     parfor i=1:G*(C-1)
50 |         c = floor((i-1)/G) + 2;
51 |         K = K + normr(GMMposterior(Xte,c,GMM{i,2},GMM{i,3},GMM{i,4},GMM{i,5},GMM{i,6},missing)) * normr(GMMposterior(Xte,c,GMM{i,2},GMM{i,3},GMM{i,4},GMM{i,5},GMM{i,6},missing))';
52 |     end
53 |     
54 | elseif strcmp(mode, 'tr-tr')  %in-sample kernel matrix
55 |     
56 |     K = zeros(size(GMM{1,1},1),size(GMM{1,1},1));
57 |     parfor i=1:G*(C-1)
58 |         K = K + normr(GMM{i,1})*normr(GMM{i,1})';
59 |     end
60 |     
61 | else
62 |     
63 |     error('Invalid training mode');
64 |     
65 | end
66 |     
67 | 
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/TS_datasets.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import scipy.io
 3 | from utils import ideal_kernel
 4 | import pandas as pd
 5 | 
 6 | """
 7 | Data manager for loading blood data and (precomputed) TCK kernel
 8 | """
 9 | 
10 | 
11 | def getBlood(kernel='TCK', inp='zero'):
12 |     blood_data = scipy.io.loadmat('Data/TCK_data.mat')
13 |     
14 |     # ------ train -------
15 |     train_data = blood_data['X']
16 |     train_data = np.transpose(train_data,axes=[1,0,2]) # time_major=True
17 |     train_len = [train_data.shape[0] for _ in range(train_data.shape[1])]
18 |    
19 |     train_data0 = train_data[0,:,:]
20 |     train_data0[np.isnan(train_data0)] = 0
21 |     train_data[0,:,:] = train_data0
22 |     for i in range(train_data.shape[1]):
23 |         train_data_i = pd.DataFrame(train_data[:,i,:])
24 |         if inp == 'last': 
25 |             train_data_i.fillna(method='ffill',inplace=True)  
26 |         elif inp =='zero':
27 |             train_data_i.fillna(0,inplace=True)
28 |         elif inp=='mean':
29 |             train_data_i.fillna(train_data_i.mean(),inplace=True)
30 |         train_data[:,i,:] = train_data_i.values
31 |                     
32 |     train_labels = np.asarray(blood_data['Y'])
33 |     
34 |     # ----- test -------
35 |     test_data = blood_data['Xte'] 
36 |     test_data = np.transpose(test_data,axes=[1,0,2]) # time_major=True
37 |     test_len = [test_data.shape[0] for _ in range(test_data.shape[1])]
38 |         
39 |     test_data0 = test_data[0,:,:]
40 |     test_data0[np.isnan(test_data0)] = 0
41 |     test_data[0,:,:] = test_data0
42 |     for i in range(test_data.shape[1]):
43 |         test_data_i = pd.DataFrame(test_data[:,i,:])
44 |         if inp == 'last':
45 |             test_data_i.fillna(method='ffill',inplace=True)
46 |         elif inp =='zero':
47 |             test_data_i.fillna(0,inplace=True)
48 |         elif inp=='mean':
49 |             test_data_i.fillna(test_data_i.mean(),inplace=True)
50 |         test_data[:,i,:] = test_data_i.values
51 |             
52 |     test_labels = np.asarray(blood_data['Yte'])
53 |                 
54 |     # valid == train   
55 |     valid_data = train_data
56 |     valid_labels = train_labels
57 |     valid_len = train_len
58 |     
59 |     # target outputs
60 |     train_targets = train_data
61 |     valid_targets = valid_data
62 |     test_targets = test_data    
63 |     
64 |     if kernel=='TCK':
65 |         K_tr = blood_data['Ktrtr']
66 |         K_vs = K_tr
67 |         K_ts = blood_data['Ktete']
68 |     else:
69 |         K_tr = ideal_kernel(train_labels)
70 |         K_vs = ideal_kernel(valid_labels)
71 |         K_ts = ideal_kernel(test_labels)
72 |     
73 |     return (train_data, train_labels, train_len, train_targets, K_tr,
74 |         valid_data, valid_labels, valid_len, valid_targets, K_vs,
75 |         test_data, test_labels, test_len, test_targets, K_ts)


--------------------------------------------------------------------------------
/TCK/trainTCK.m:
--------------------------------------------------------------------------------
 1 | function [ res, C, G ] = trainTCK( X, varargin )
 2 | % trainTCK - Train the TCK
 3 | %
 4 | % INPUTS
 5 | % X: data array of size N x T x V, where N is the number of multivariate time series, T the length and V the number of attributes.
 6 | % minN: min percentage of subsample (optional)
 7 | % minV: min number of attributes for each GMM (optional)
 8 | % maxV: max number of attributes for each GMM  (optional)
 9 | % minT: min length of time segments for each GMM (optional)
10 | % maxT: max length of time segments for each GMM (optional)
11 | % C: max number of mixture components for each GMM (optional)
12 | % G: number of randomizations for each number of components (optional)
13 | % I: number of iterations (optional)
14 | %
15 | % OUTPUTS
16 | % res: A cell of size ((C-1)*G,6) that for each q = 1:(C-1)*G contain 
17 | 	% Q: cluster posterior probabilities
18 | 	% mu: cluster means (time dependant + variable dependant)
19 | 	% s2: cluster variances (variable dependant)
20 | 	% theta: cluster priors
21 | 	% dim_idx: indexes of the subset of dimension considered 
22 | 	% time_idx: indexes of the subset of time intervals considered
23 | % C
24 | % G
25 | 
26 | %
27 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier.
28 | % Authors: "Karl Oyvind Mikalsen, Filippo Maria Bianchi"
29 | 
30 | N = size(X,1); % number of time series
31 | T = size(X,2); % time steps in each time series
32 | V = size(X,3); % number of variables in each time series
33 | 
34 | % Parse the optional parameters
35 | p = inputParser();
36 | if(N < 100)
37 |     p.addParameter('C', 10, @(z) assert(z>=2, 'C must be larger than 1'));
38 | else
39 |     p.addParameter('C', 40, @(z) assert(z>=2, 'C must be larger than 1'));
40 | end
41 | p.addParameter('G', 30);
42 | p.addParameter('minN', 0.8, @(z) assert(z>0 && z<=1, 'The minimum percentage of subsample must be in (0,1]'));
43 | if(V==1)
44 |     p.addParameter('minV', 1, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]'));
45 | else
46 |     p.addParameter('minV', 2, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]'));
47 | end
48 | p.addParameter('maxV', min(ceil(0.9*V),15), @(z) assert(z>=1 && z<=V, 'The maximum number of variables must be in [1,V]'));
49 | p.addParameter('minT', 6, @(z) assert(z>=1 && z<=T, 'The minimum length of time segments should be in [1,T]'));
50 | p.addParameter('maxT', min(floor(0.8*T),25), @(z) assert(z>=1 && z<=T, 'The maximum length of time segments should be in [1,T]'));
51 | p.addParameter('I', 20);
52 | p.parse(varargin{:});
53 | C = p.Results.C;
54 | G = p.Results.G;
55 | minN = p.Results.minN;
56 | minV = p.Results.minV;
57 | maxV = p.Results.maxV;
58 | minT = p.Results.minT;
59 | maxT = p.Results.maxT;
60 | I = p.Results.I;
61 | 
62 | 
63 | res = cell(G*(C-1),6);
64 | 
65 | % Check if there is missing data in the dataset.
66 | nan_idx = isnan(X);
67 | if(sum(sum(sum(nan_idx)))>0)
68 |     missing = 1;
69 |     fprintf('The dataset contains missing data\n\n');
70 | else
71 |     missing = 0;
72 |     fprintf('The dataset does not contain missing data\n\n');
73 | end
74 | 
75 | fprintf(' Training the TCK using the following parameters:\n C = %d, G =%d\n Number of MTS for each GMM: %d - %d (%d - 100 percent)\n Number of attributes sampled from [%d, %d]\n Length of time segments sampled from [%d, %d]\n\n', C, G, floor(minN*N), N, floor(minN*100), minV, maxV, minT, maxT);  
76 | 
77 | parfor i=1:G*(C-1)
78 |     c= floor((i-1)/G) + 2;
79 |     [o1, o2 , o3, o4, o5, o6] = GMM_MAP_EM(X,'C',c,'minN',minN,'minT',minT,'maxT',maxT,'minV',minV,'maxV',maxV,'I',I,'missing',missing);
80 |     [res(i,:)] = {o1, o2 , o3, o4, o5, o6};
81 | end
82 | 
83 | 
84 | 
85 | end
86 | 
87 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import matplotlib.pyplot as plt
  2 | from sklearn.decomposition import TruncatedSVD
  3 | import numpy as np
  4 | from scipy import interpolate
  5 | 
  6 | 
  7 | def dim_reduction_plot(data, label, block_flag):
  8 |     """
  9 |     Compute linear PCA and scatter the first two components
 10 |     """
 11 |   
 12 |     PCA_model = TruncatedSVD(n_components=3).fit(data)
 13 |     data_PCA = PCA_model.transform(data)
 14 |     idxc1 = np.where(label==0)
 15 |     idxc2 = np.where(label==1)
 16 |     plt.scatter(data_PCA[idxc1,0],data_PCA[idxc1,1],s=80,c='r', marker='^',linewidths = 0, label='healthy')
 17 |     plt.scatter(data_PCA[idxc2,0],data_PCA[idxc2,1],s=80,c='y', marker='o',linewidths = 0, label='infected')
 18 |     plt.gca().axes.get_xaxis().set_ticks([])
 19 |     plt.gca().axes.get_yaxis().set_ticks([])
 20 |     plt.title('PCA of the codes')
 21 |     plt.legend(scatterpoints=1,loc='best')
 22 |     plt.show(block=block_flag)
 23 |   
 24 | def ideal_kernel(labels):
 25 |     """
 26 |     Compute the ideal kernel K
 27 |     An entry k_ij = 0 if i and j have different class
 28 |     k_ij = 1 if i and j have same class
 29 |     """
 30 |     K = np.zeros([labels.shape[0], labels.shape[0]])
 31 |     
 32 |     for i in range(labels.shape[0]):
 33 |         k = labels[i] == labels
 34 |         k.astype(int)
 35 |         K[:,i] = k[:,0]
 36 |     return K        
 37 |     
 38 | 
 39 | def interp_data(X, X_len, restore=False, interp_kind='linear'):
 40 |     """
 41 |     Interpolate data to match the same maximum length in X_len
 42 |     If restore is True, data are interpolated back to their original length
 43 |     data are assumed to be time-major
 44 |     interp_kind: can be 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'
 45 |     """
 46 |     
 47 |     [T, N, V] = X.shape
 48 |     X_new = np.zeros_like(X)
 49 |     
 50 |     # restore original lengths
 51 |     if restore:
 52 |         for n in range(N):
 53 |             t = np.linspace(start=0, stop=X_len[n], num=T)
 54 |             t_new = np.linspace(start=0, stop=X_len[n], num=X_len[n])
 55 |             for v in range(V):
 56 |                 x_n_v = X[:,n,v]
 57 |                 f = interpolate.interp1d(t, x_n_v, kind=interp_kind)
 58 |                 X_new[:X_len[n],n,v] = f(t_new)
 59 |             
 60 |     # interpolate all data to length T    
 61 |     else:
 62 |         for n in range(N):
 63 |             t = np.linspace(start=0, stop=X_len[n], num=X_len[n])
 64 |             t_new = np.linspace(start=0, stop=X_len[n], num=T)
 65 |             for v in range(V):
 66 |                 x_n_v = X[:X_len[n],n,v]
 67 |                 f = interpolate.interp1d(t, x_n_v, kind=interp_kind)
 68 |                 X_new[:,n,v] = f(t_new)
 69 |                 
 70 |     return X_new
 71 | 
 72 | 
 73 | def classify_with_knn(train_data, train_labels, test_data, test_labels, k=3, metric='minkowski'):
 74 |     """
 75 |     Perform classification with knn.
 76 |     """
 77 |     from sklearn.neighbors import KNeighborsClassifier
 78 |     from sklearn.metrics import f1_score, roc_auc_score
 79 | 
 80 |     neigh = KNeighborsClassifier(n_neighbors=k, metric=metric)
 81 |     neigh.fit(train_data, train_labels)
 82 |     accuracy = neigh.score(test_data, test_labels)
 83 |     pred_labels = neigh.predict(test_data)
 84 |     F1 = f1_score(test_labels, pred_labels)
 85 |     AUC = roc_auc_score(test_labels, pred_labels)
 86 | 
 87 |     return accuracy, F1, AUC
 88 | 
 89 | def mse_and_corr(targets, preds, targets_len):
 90 |     """
 91 |     targets and preds must have shape [time_steps, samples, variables]
 92 |     targets_len must have shape [samples,]
 93 |     """
 94 |     mse_list = []
 95 |     corr_list = []
 96 |     for i in range(targets.shape[1]):
 97 |         len_i = targets_len[i]
 98 |         test_data_i = targets[:len_i,i,:]
 99 |         pred_i = preds[:len_i,i,:]
100 |         mse_list.append(np.mean((test_data_i-pred_i)**2))
101 |         corr_list.append(np.corrcoef(test_data_i.flatten(), pred_i.flatten())[0,1])
102 |     tot_mse = np.mean(mse_list)
103 |     tot_corr = np.mean(corr_list)
104 |     
105 |     return tot_mse, tot_corr
106 | 
107 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Deep Kernelized Auto Encoder with Time series Cluster Kernel 
  2 | 
  3 | [![arXiv](https://img.shields.io/badge/arXiv-1710.07547-b31b1b.svg?style=flat-square&logo=arxiv&logoColor=white)](https://arxiv.org/abs/1710.07547)
  4 | 
  5 | Reference paper: [Learning compressed representations of blood samples time series with missing data](https://arxiv.org/abs/1710.07547)
  6 | 
  7 | <img src="./imgs/kdAE_arch.png" width="250" height="200">
  8 | 
  9 | ## TL;DR
 10 | Tensorflow implementation of a [Deep Kernelized Auto Encoder (dkAE)](https://link.springer.com/chapter/10.1007/978-3-319-59126-1_35), aligned with the [Time series Cluster Kernel (TCK)](https://arxiv.org/abs/1704.00794), for learning vectorial representations of mutlivariate time series (MTS) with missing data.
 11 | 
 12 | The MTS analyzed are blood sample measurements of patients with surgical site infections.
 13 | The original dataset is available [at this link](https://groups.google.com/forum/#!topic/ml-news/MQtVkxizrrU).
 14 | Here, we consider a subset of 883 MTS divided into 2 classes: superficial infections (651 MTS) and severe infections (232 MTS).
 15 | Each MTS has 10 attributes recorded for 20 time steps.
 16 | The first 80% of the datasets is used as a training set and the rest as a test set.
 17 | 
 18 | The dataset is located in the folder [Data/](https://github.com/FilippoMB/TCK_AE/tree/master/Data) and consists of 4 files:
 19 | * `x.mat` -- training set
 20 | * `Y.mat` -- labels of the training set
 21 | * `xte.mat` -- test set
 22 | * `Yte.mat` -- labels of the test set
 23 | 
 24 | ## Train TCK (MATLAB)
 25 | 
 26 | Run the matlab script [TCK/main.m](https://github.com/FilippoMB/TCK_AE/blob/master/TCK/main.m) to compute TCK on the blood data. 
 27 | The TCK parameters are fit on the training data in `x.mat` and then TCK is evaluated on the test data in `xte.mat`.
 28 | 
 29 | The computed kernel **K** can be divided in 4 parts: *K_trtr*, the similarities among the elements of the training set, *K_tete*, the similarities among elements of the test set, *K_trte* and *K_tetr*, the similarities between elements of the training set and test set.
 30 | The classification of the test set is done on *K_tetr* (or *K_trte*) using a *k*NN classifier, with *k*=1.
 31 | In particular, for each row *i* in *K_tetr*, relative to the *i*th test element, we select the column *j*, relative to the *j*th training element.
 32 | Then, we assign to the *i*th test element the same label of the *j*th training element.
 33 | 
 34 | For example, we get the following classification results on the test set:
 35 | 
 36 | ```matlab
 37 | ACC: 0.86932, F1: 0.7013, AUC: 0.77809
 38 | ```
 39 | A visualization of *K_tete* is also returned.
 40 | 
 41 | <img src="./imgs/tck_kernel.png" width="200" height="200">
 42 | 
 43 | We can see that the matrix has a block structure: the first larger block on the diagonal is the similarities between the MTS of class 1, and the second smaller block is relative to the elements of class 2.
 44 | Results are saved in [/Data/TCK_data.mat](https://github.com/FilippoMB/TCK_AE/blob/master/Data/TCK_data.mat) and they are used in the next section to train the dkAE.
 45 | 
 46 | Due to the stochastic procedure for computing TCK, we repeat the procedure 10 times using random and independent initializations.
 47 | Hyperparameters selection in TCK is not critical and we always use the default ones (see the original [TCK paper](https://arxiv.org/abs/1704.00794) for details).
 48 | 
 49 | ## Train the dkAE with TCK (Python)
 50 | 
 51 | The dkAE depends on a set of hyperparameters, whose values used in this experiment are specified in the following
 52 | * `code_size`: the dimension of hidden representations learned by the dkAE (value=20);
 53 | * `w_reg`: parameter that weights the L2 regulaziation of the model weights in the loss function (value=0.001);
 54 | * `a_reg`: parameter that weights the kernel alignments of the code inner products with the prior TCK kernel in the loss function (value=0.001);
 55 | * `num_epochs`: number of epochs used to train the model (value=5000);
 56 | * `batch_size`: the size of the mini-batches used during training (value=25);
 57 | * `max_gradient_norm`: maximum value that gradients are allowed to assume. Values larger than that are clipped (value=1.0);
 58 | * `learning_rate`: initial learning rate in the Adam algorithm, used in the gradient descent training procedure (value=0.001);
 59 | * `hidden_size` size on the second hidden layer of the encoder and the first hidden layer of the decoder (value=30).
 60 | 
 61 | The configuration (in terms of procesing units in each layer) of the AE used in the experiments is [200, 30, 20, 30, 200].
 62 | 
 63 | To run the whole training and testing procedure, just execute the script [AE.py](https://github.com/FilippoMB/TCK_AE/blob/master/AE.py).
 64 | Hyperparameters are set by default at the values described above, but new values can be specified, for example, in this way:
 65 | ```
 66 | python3 AE.py --code_size 5 --w_reg 0.001 --a_reg 0.1 --num_epochs 10000 --max_gradient_norm 0.5 --learning_rate 0.001 --hidden_size 30
 67 | ```
 68 | Additional hyperparameters can be modified within [AE.py](https://github.com/FilippoMB/TCK_AE/blob/master/AE.py). They are listed in the following with the values used in our experiment:
 69 | 
 70 | * `dim_red`: computes PCA on the learned code representations of the test set and plots the first two components;
 71 | * `plot_on`: show plots at the end of the training (set to 0 for only textual output);
 72 | * `interp_on`: interpolate the time series if they have different lengths (not used);
 73 | * `tied_weights`: encoder and decoder have tied weights (not used);
 74 | * `lin_dec`: the decoder has only linear activations rather than squashing nonlinearities.
 75 | 
 76 | ```python
 77 | dim_red = 1
 78 | plot_on = 1
 79 | interp_on = 0
 80 | tied_weights = 0
 81 | lin_dec = 1
 82 | ```
 83 | 
 84 | During the training, the reconstruction loss and the code loss can be visualized in [Tensorboard](https://www.tensorflow.org/get_started/summaries_and_tensorboard). 
 85 | The *reconstruction loss* is the MSE error between encoder input and its reconstruction performed by the decoder, while *code loss* is the Frobenious norm of the difference between the prior TCK kernel and the inner products of the codes.
 86 | 
 87 | <img src="./imgs/Selection_003.jpg">
 88 | <img src="./imgs/Selection_002.jpg">
 89 | 
 90 | ## Learned representations of test set and classification
 91 | 
 92 | Once the training procedure is over, the dkAE is fed with the MTS of the test set, and the relative codes are generated.
 93 | The inner products of the codes are visualized and we can notice that the structure resembles the one of the prior TCK shown previously (here we focus on the test part *K_tete*), where the two classes in the test set can be clearly recognized
 94 | 
 95 | <img src="./imgs/learned code.png" width="200" height="200">
 96 | 
 97 | To have a visualization of the learned representations, we perform PCA and the codes and we plot the first two components. It is possible to see that the 2 classes are well separated
 98 | 
 99 | <img src="./imgs/pca.png" width="300" height="250">
100 | 
101 | The results returned are the reconstruction MSE and the Pearson correlation between encoder input and decoder output and the accuracy, F1 score, and area under the ROC curve of the *k*NN classifier with *k*=1
102 | ```python
103 | Test MSE: 0.096
104 | Test Pearson correlation: 0.518
105 | kNN: acc: 0.869, F1: 0.716, AUC: 0.792
106 | ```
107 | 
108 | ## Citation
109 | 
110 | ```bibtex
111 | @article{bianchi2017learning,
112 |   title={Learning compressed representations of blood samples time series with missing data},
113 |   author={Bianchi, Filippo Maria and Mikalsen, Karl {\O}yvind and Jenssen, Robert},
114 |   journal={arXiv preprint arXiv:1710.07547},
115 |   year={2017}
116 | }
117 | ```
118 | 


--------------------------------------------------------------------------------
/TCK/GMM_MAP_EM.m:
--------------------------------------------------------------------------------
  1 | function [ Q, mu, s2, theta, dim_idx, time_idx] = GMM_MAP_EM(X, varargin)
  2 | % MAP_EM - fit a GMM to time series data with missing values using MAP-EM
  3 | %
  4 | % INPUTS
  5 | % X: data array of size N x T x V
  6 | % C: number of mixture components (optional)
  7 | % minN: min percentage of subsample (optional)
  8 | % minV: min number of dimensions (optional)
  9 | % maxV: max number of dimensions (optional)
 10 | % minT: min length of time segments (optional)
 11 | % maxT: max length of time segments (optional)
 12 | % I: number of iterations (optional)
 13 | % missing: binary indicator. 1 if there is missing data and 0 if not
 14 | %
 15 | % OUTPUTS
 16 | % Q: cluster posterior probabilities
 17 | % mu: cluster means (time dependant + variable dependant)
 18 | % s2: cluster variances (variable dependant)
 19 | % theta: cluster priors
 20 | % dim_idx: indexes of the subset of dimension considered 
 21 | % time_idx: indexes of the subset of time intervals considered
 22 | %
 23 | % Reference: "Time Series Cluster Kernel for Learning Similarities between Multivariate Time Series with Missing Data", 2017 Pattern Recognition, Elsevier.
 24 | % Authors: "Karl �yvind Mikalsen, Filippo Maria Bianchi"
 25 | 
 26 | N = size(X,1); % number of time series
 27 | T = size(X,2); % time steps in each time series
 28 | V = size(X,3); % number of variables in each time series
 29 | 
 30 | % Parse the optional parameters
 31 | p = inputParser();
 32 | p.addParameter('minN', 0.8, @(z) assert(z>0 && z<=1, 'The minimum percentage of subsample must be in (0,1]'));
 33 | if(V==1)
 34 |     p.addParameter('minV', 1, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]'));
 35 | else
 36 |     p.addParameter('minV', 2, @(z) assert(z>=1 && z<=V, 'The minimum number of variables must be in [1,V]'));
 37 | end
 38 | p.addParameter('maxV', V, @(z) assert(z>=1 && z<=V, 'The maximum number of variables must be in [1,V]'));
 39 | p.addParameter('minT', 6, @(z) assert(z>=1 && z<=T, 'The minimum length of time segments should be in [1,T]'));
 40 | p.addParameter('maxT', min(floor(0.8*T),25), @(z) assert(z>=1 && z<=T, 'The maximum length of time segments should be in [1,T]'));
 41 | p.addParameter('C', 40);
 42 | p.addParameter('missing', 2);
 43 | p.addParameter('I', 20);
 44 | p.parse(varargin{:});
 45 | minN = p.Results.minN;
 46 | minV = p.Results.minV;
 47 | maxV = p.Results.maxV;
 48 | minT = p.Results.minT;
 49 | maxT = p.Results.maxT;
 50 | C = p.Results.C;
 51 | I = p.Results.I;
 52 | missing = p.Results.missing;
 53 | 
 54 | % Hyperparameters for mean prior (a0, b0) and the std dev prior (n0) of the mixture components
 55 | a0 = (1.0-0.001).*rand + 0.001;
 56 | b0 = (0.2-0.005).*rand + 0.005;
 57 | n0 = (0.2-0.001).*rand + 0.001;
 58 | 
 59 | % Randomly subsample dimensions, time intervals and samples
 60 | s = RandStream('mt19937ar','Seed',0);
 61 | if(N > 100)
 62 |     sN = randi([round(minN*N),N]);
 63 | else
 64 |     sN = round(0.9*N);
 65 | end
 66 | sub_idx = sort(randperm(s,N,sN)); % generate sN (sorted) integers between 1 and N
 67 | 
 68 | sV = randi([minV,maxV]);
 69 | dim_idx = sort(randperm(s,V,sV)); % generate sV (sorted) integers between 1 and V
 70 | 
 71 | t1 = randi([1,T-minT+1]);
 72 | t2 = randi([t1+minT-1,min(T,(t1+maxT-1))]);
 73 | sT = t2-t1+1;
 74 | time_idx = t1:t2; % generate sT contigous integers from t1 to t2
 75 | sX = X(sub_idx,time_idx,dim_idx);
 76 | 
 77 | 
 78 | if(missing == 1)
 79 |     nan_idx = isnan(sX);
 80 |     R = ones(size(sX));
 81 |     R(nan_idx)=0;
 82 | 
 83 |     % Calculate empirical moments
 84 |     mu_0 = zeros(sT,sV); % prior mean over time and variables (sT x sV)
 85 |     for v = 1:sV
 86 |         mu_0(:,v) = nanmean(sX(:,:,v),1);
 87 |     end
 88 |     s_0 = zeros(sV,1); % prior std over variables (sV x 1)
 89 |     tempX = reshape(sX,[sN*sT,sV]);
 90 |     for v = 1:sV
 91 |         s_0(v) = nanstd(tempX(:,v),0,1);
 92 |     end
 93 |     s2_0 = s_0.^2; 
 94 | 
 95 | 
 96 |     [S_0, invS_0] = deal(zeros(sT,sT,sV));
 97 |     T1 = repmat((1:sT)',[1,sT]);
 98 |     T2 = repmat((1:sT),[sT,1]);
 99 |     for v=1:sV
100 |         S_0(:,:,v) = s_0(v)*b0*exp(-a0*(T1-T2).^2);
101 |         if(rcond(S_0(:,:,v)) < 1e-8)  % check if the matrix can be inverted
102 |             S_0(:,:,v) = S_0(:,:,v) + 0.1*S_0(1,1,v)*eye(sT);   %add a small number to the diagonal
103 |         end
104 |         invS_0(:,:,v) = inv(S_0(:,:,v));
105 |     end
106 | 
107 | 
108 |     % initialize model parameters
109 |     theta = ones(1,C)/C;    % cluster priors        (1 x C)
110 |     mu= zeros(sT,sV,C);     % cluster means         (sT x sV x C)
111 |     s2 = zeros(sV,C);       % cluster variances     (sV x C)
112 |     Q = zeros(sN,C);        % cluster assignments   (sN x C)
113 | 
114 |     sX(R==0) = -100000;
115 | 
116 |     for i=1:I
117 | 
118 |         % initialization: random clusters assignment
119 |         if(i==1) 
120 |             cluster = randi(C,[sN,1]);
121 |             Q = double(bsxfun(@eq, cluster(:), 1:C));   
122 | 
123 |        % update clusters assignment
124 |        else  
125 |             for c=1:C
126 |                 distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,sN]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,sN,sT]),[2,3,1]) ).^R;
127 |                 distr_c(distr_c < normpdf(3)) = normpdf(3);
128 |                 distr_c = reshape(distr_c,[sN,sV*sT]);
129 |                 Q(:,c) = theta(c)*prod(distr_c,2);
130 |             end
131 |             Q = Q./repmat(sum(Q,2),[1,C]);         
132 |        end
133 | 
134 |         % update mu, s2 and theta
135 |             for c=1:C
136 |                 theta(c) = sum(Q(:,c))/sN;
137 |                 for v=1:sV                          
138 |                     var2 = sum(R(:,:,v),2)'*Q(:,c);
139 |                     temp = (sX(:,:,v) - repmat(mu(:,v,c)',[sN,1]) ).^2;
140 |                     var1 = Q(:,c)'*sum((R(:,:,v).*temp),2);
141 |                     s2(v,c) = (n0*s2_0(v)+var1) / (n0+var2);
142 | 
143 |                     A =  invS_0(:,:,v) + diag(R(:,:,v)'*Q(:,c)/ s2(v,c));
144 |                     b =  invS_0(:,:,v)*mu_0(:,v) + (R(:,:,v).*sX(:,:,v))'*Q(:,c)/s2(v,c);
145 |                     mu(:,v,c) = A\b;
146 |                 end
147 |             end        
148 |     end % end for i=1:I
149 | 
150 |     % compute assignments for all data
151 |     Q  = GMMposterior(X, C, mu, s2, theta, dim_idx, time_idx, missing );
152 | 
153 | 
154 | %if no missing data the computations simplify a bit
155 | elseif(missing == 0)
156 | % Calculate empirical moments
157 |     mu_0 = zeros(sT,sV); % prior mean over time and variables (sT x sV)
158 |     for v = 1:sV
159 |         mu_0(:,v) = mean(sX(:,:,v),1);
160 |     end
161 |     s_0 = zeros(sV,1); % prior std over variables (sV x 1)
162 |     tempX = reshape(sX,[sN*sT,sV]);
163 |     for v = 1:sV
164 |         s_0(v) = std(tempX(:,v));
165 |     end
166 |     s2_0 = s_0.^2; 
167 | 
168 | 
169 |     [S_0, invS_0] = deal(zeros(sT,sT,sV));
170 |     T1 = repmat((1:sT)',[1,sT]);
171 |     T2 = repmat((1:sT),[sT,1]);
172 |     for v=1:sV
173 |         S_0(:,:,v) = s_0(v)*b0*exp(-a0*(T1-T2).^2);
174 |         if(rcond(S_0(:,:,v)) < 1e-8)  % check if the matrix can be inverted
175 |             S_0(:,:,v) = S_0(:,:,v) + 0.1*S_0(1,1,v)*eye(sT);   %add a small number to the diagonal if S_0 is not invertible
176 |         end
177 |         invS_0(:,:,v) = inv(S_0(:,:,v));
178 |     end
179 | 
180 | 
181 | 
182 |     % initialize model parameters
183 |     theta = ones(1,C)/C;    % cluster priors        (1 x C)
184 |     mu= zeros(sT,sV,C);     % cluster means         (sT x sV x C)
185 |     s2 = zeros(sV,C);       % cluster variances     (sV x C)
186 |     Q = zeros(sN,C);        % cluster assignments   (sN x C)
187 | 
188 |     for i=1:I
189 | 
190 |         % initialization: random clusters assignment
191 |         if(i==1) 
192 |             cluster = randi(C,[sN,1]);
193 |             Q = double(bsxfun(@eq, cluster(:), 1:C));   
194 | 
195 |        % update clusters assignment
196 |        else  
197 |             for c=1:C
198 |                 distr_c = normpdf(sX, permute(repmat(mu(:,:,c),[1,1,sN]),[3,1,2]), permute(repmat(sqrt(s2(:,c)),[1,sN,sT]),[2,3,1]) );
199 |                 distr_c(distr_c < normpdf(3)) = normpdf(3);
200 |                 distr_c = reshape(distr_c,[sN,sV*sT]);
201 |                 Q(:,c) = theta(c)*prod(distr_c,2);
202 |             end
203 |             Q = Q./repmat(sum(Q,2),[1,C]);         
204 |        end
205 | 
206 |         % update mu, s2 and theta
207 |             for c=1:C
208 |                 sumQ = sum(Q(:,c));
209 |                 theta(c) = sumQ/sN;
210 |                 for v=1:sV
211 |                     var2 = sT*sumQ;
212 |                     var1 = Q(:,c)'*sum((sX(:,:,v) - repmat(mu(:,v,c)',[sN,1]) ).^2,2);
213 |                     s2(v,c) = (n0*s2_0(v)+var1) / (n0+var2);
214 | 
215 |                     A =  invS_0(:,:,v) + (sumQ /s2(v,c))*eye(sT);
216 |                     b =  invS_0(:,:,v)*mu_0(:,v) + (sX(:,:,v))'*Q(:,c)/s2(v,c);
217 |                     mu(:,v,c) = A\b;
218 |                 end
219 |             end        
220 |     end % end for i=1:I
221 | 
222 |     % compute assignments for all data
223 |     Q  = GMMposterior(X, C, mu, s2, theta, dim_idx, time_idx, missing );
224 | 
225 | 
226 | else
227 |     error('The value of the variable <missing> is not 0 or 1');
228 | end
229 | 
230 | 
231 | 
232 | end
233 | 
234 | 


--------------------------------------------------------------------------------
/AE.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import argparse
  3 | from TS_datasets import getBlood
  4 | import time
  5 | import numpy as np
  6 | import matplotlib.pyplot as plt
  7 | from utils import classify_with_knn, interp_data, mse_and_corr, dim_reduction_plot
  8 | import math
  9 | 
 10 | dim_red = 1 # perform PCA on the codes and plot the first two components
 11 | plot_on = 1 # plot the results, otherwise only textual output is returned
 12 | interp_on = 0 # interpolate data (needed if the input time series have different length)
 13 | tied_weights = 0 # train an AE where the decoder weights are the econder weights transposed
 14 | lin_dec = 1 # train an AE with linear activations in the decoder
 15 | 
 16 | # parse input data
 17 | parser = argparse.ArgumentParser()
 18 | parser.add_argument("--code_size", default=20, help="size of the code", type=int)
 19 | parser.add_argument("--w_reg", default=0.001, help="weight of the regularization in the loss function", type=float)
 20 | parser.add_argument("--a_reg", default=0.2, help="weight of the kernel alignment", type=float)
 21 | parser.add_argument("--num_epochs", default=5000, help="number of epochs in training", type=int)
 22 | parser.add_argument("--batch_size", default=25, help="number of samples in each batch", type=int)
 23 | parser.add_argument("--max_gradient_norm", default=1.0, help="max gradient norm for gradient clipping", type=float)
 24 | parser.add_argument("--learning_rate", default=0.001, help="Adam initial learning rate", type=float)
 25 | parser.add_argument("--hidden_size", default=30, help="size of the code", type=int)
 26 | args = parser.parse_args()
 27 | print(args)
 28 | 
 29 | # ================= DATASET =================
 30 | (train_data, train_labels, train_len, _, K_tr,
 31 |         valid_data, _, valid_len, _, K_vs,
 32 |         test_data_orig, test_labels, test_len, _, K_ts) = getBlood(kernel='TCK', inp='zero') # data shape is [T, N, V] = [time_steps, num_elements, num_var]
 33 | 
 34 | # sort test data (for a better visualization of the inner product of the codes)
 35 | sort_idx = np.argsort(test_labels,axis=0)[:,0]
 36 | test_data_orig = test_data_orig[:,sort_idx,:]
 37 | test_labels = test_labels[sort_idx,:]
 38 | K_ts = K_ts[sort_idx,:] [:,sort_idx]
 39 |        
 40 | # interpolation
 41 | if np.min(train_len) < np.max(train_len) and interp_on:
 42 |     print('-- Data Interpolation --')
 43 |     train_data = interp_data(train_data, train_len)
 44 |     valid_data = interp_data(valid_data, valid_len)
 45 |     test_data = interp_data(test_data_orig, test_len)
 46 | else:
 47 |     test_data = test_data_orig
 48 | 
 49 | # transpose and reshape [T, N, V] --> [N, T, V] --> [N, T*V]
 50 | train_data = np.transpose(train_data,axes=[1,0,2])
 51 | train_data = np.reshape(train_data, (train_data.shape[0], train_data.shape[1]*train_data.shape[2]))
 52 | valid_data = np.transpose(valid_data,axes=[1,0,2])
 53 | valid_data = np.reshape(valid_data, (valid_data.shape[0], valid_data.shape[1]*valid_data.shape[2]))
 54 | test_data = np.transpose(test_data,axes=[1,0,2])
 55 | test_data = np.reshape(test_data, (test_data.shape[0], test_data.shape[1]*test_data.shape[2]))   
 56 | 
 57 | print('\n**** Processing Blood data: Tr{}, Vs{}, Ts{} ****\n'.format(train_data.shape, valid_data.shape, test_data.shape))
 58 | 
 59 | input_length = train_data.shape[1] # same for all inputs
 60 | 
 61 | # ================= GRAPH =================
 62 | 
 63 | # init session
 64 | tf.reset_default_graph() # needed when working with iPython
 65 | sess = tf.Session()
 66 | 
 67 | # placeholders
 68 | encoder_inputs = tf.placeholder(shape=(None,input_length), dtype=tf.float32, name='encoder_inputs')
 69 | prior_K = tf.placeholder(shape=(None, None), dtype=tf.float32, name='prior_K')
 70 | 
 71 | # ----- ENCODER -----
 72 | We1 = tf.Variable(tf.random_uniform((input_length, args.hidden_size), -1.0 / math.sqrt(input_length), 1.0 / math.sqrt(input_length)))
 73 | We2 = tf.Variable(tf.random_uniform((args.hidden_size, args.code_size), -1.0 / math.sqrt(args.hidden_size), 1.0 / math.sqrt(args.hidden_size)))
 74 | 
 75 | be1 = tf.Variable(tf.zeros([args.hidden_size]))
 76 | be2 = tf.Variable(tf.zeros([args.code_size]))
 77 | 
 78 | hidden_1 = tf.nn.tanh(tf.matmul(encoder_inputs, We1) + be1)
 79 | code = tf.nn.tanh(tf.matmul(hidden_1, We2) + be2)
 80 | 
 81 | # kernel on codes
 82 | code_K = tf.tensordot(code, tf.transpose(code), axes=1)
 83 | 
 84 | # ----- DECODER -----
 85 | if tied_weights:
 86 |     Wd1 = tf.transpose(We2)
 87 |     Wd2 = tf.transpose(We1)
 88 | else:
 89 |     Wd1 = tf.Variable(tf.random_uniform((args.code_size, args.hidden_size), -1.0 / math.sqrt(args.code_size), 1.0 / math.sqrt(args.code_size)))
 90 |     Wd2 = tf.Variable(tf.random_uniform((args.hidden_size, input_length), -1.0 / math.sqrt(args.hidden_size), 1.0 / math.sqrt(args.hidden_size)))
 91 |     
 92 | bd1 = tf.Variable(tf.zeros([args.hidden_size]))  
 93 | bd2 = tf.Variable(tf.zeros([input_length])) 
 94 | 
 95 | if lin_dec:
 96 |     hidden_2 = tf.matmul(code, Wd1) + bd1
 97 | else:
 98 |     hidden_2 = tf.nn.tanh(tf.matmul(code, Wd1) + bd1)
 99 | 
100 | dec_out = tf.matmul(hidden_2, Wd2) + bd2
101 | 
102 | # ----- LOSS -----
103 | # kernel alignment loss with normalized Frobenius norm
104 | code_K_norm = code_K/tf.norm(code_K, ord='fro', axis=[-2,-1])
105 | prior_K_norm = prior_K/tf.norm(prior_K, ord='fro', axis=[-2,-1])
106 | k_loss = tf.norm(code_K_norm - prior_K_norm, ord='fro', axis=[-2,-1])
107 | 
108 | # reconstruction loss    
109 | parameters = tf.trainable_variables()
110 | optimizer = tf.train.AdamOptimizer(args.learning_rate)
111 | reconstruct_loss = tf.losses.mean_squared_error(labels=dec_out, predictions=encoder_inputs)
112 | 
113 | # L2 loss
114 | reg_loss = 0
115 | for tf_var in tf.trainable_variables():
116 |     reg_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var))
117 |         
118 | tot_loss = reconstruct_loss + args.w_reg*reg_loss + args.a_reg*k_loss
119 | 
120 | # Calculate and clip gradients
121 | gradients = tf.gradients(tot_loss, parameters)
122 | clipped_gradients, _ = tf.clip_by_global_norm(gradients, args.max_gradient_norm)
123 | update_step = optimizer.apply_gradients(zip(clipped_gradients, parameters))
124 | 
125 | sess.run(tf.global_variables_initializer())
126 | 
127 | # trainable parameters count
128 | total_parameters = 0
129 | for variable in tf.trainable_variables():
130 |     shape = variable.get_shape()
131 |     variable_parametes = 1
132 |     for dim in shape:
133 |         variable_parametes *= dim.value
134 |     total_parameters += variable_parametes
135 | print('Total parameters: {}'.format(total_parameters))
136 | 
137 | # ============= TENSORBOARD =============             
138 | mean_grads = tf.reduce_mean([tf.reduce_mean(grad) for grad in gradients])
139 | tf.summary.scalar('mean_grads', mean_grads)
140 | tf.summary.scalar('reconstruct_loss', reconstruct_loss)
141 | tf.summary.scalar('k_loss', k_loss)
142 | tvars = tf.trainable_variables()
143 | for tvar in tvars:
144 |     tf.summary.histogram(tvar.name.replace(':','_'), tvar)
145 | merged_summary = tf.summary.merge_all()
146 | 
147 | # ================= TRAINING =================
148 | 
149 | # initialize training variables
150 | time_tr_start = time.time()
151 | batch_size = args.batch_size
152 | max_batches = train_data.shape[0]//batch_size
153 | loss_track = []
154 | kloss_track = []
155 | min_vs_loss = np.infty
156 | model_name = "/tmp/dkae_models/m_0.ckpt"
157 | train_writer = tf.summary.FileWriter('/tmp/tensorboard', graph=sess.graph)
158 | saver = tf.train.Saver()
159 | 
160 | try:
161 |     for ep in range(args.num_epochs):
162 |         
163 |         # shuffle training data
164 |         idx = np.random.permutation(train_data.shape[0])
165 |         train_data_s = train_data[idx,:] 
166 |         K_tr_s = K_tr[idx,:][:,idx]
167 |         
168 |         for batch in range(max_batches):
169 |             
170 |             fdtr = {encoder_inputs: train_data_s[(batch)*batch_size:(batch+1)*batch_size,:],
171 |                     prior_K: K_tr_s[(batch)*batch_size:(batch+1)*batch_size, (batch)*batch_size:(batch+1)*batch_size]
172 |                     }           
173 |             _,train_loss, train_kloss = sess.run([update_step, reconstruct_loss, k_loss], fdtr)    
174 |             loss_track.append(train_loss)
175 |             kloss_track.append(train_kloss)
176 |             
177 |         # check training progress on the validations set (in blood data valid=train) 
178 |         if ep % 100 == 0:            
179 |             print('Ep: {}'.format(ep))
180 |             
181 |             fdvs = {encoder_inputs: valid_data,
182 |                     prior_K: K_vs}
183 |             outvs, lossvs, klossvs, vs_code_K, summary = sess.run([dec_out, reconstruct_loss, k_loss, code_K, merged_summary], fdvs)
184 |             train_writer.add_summary(summary, ep)
185 |             print('VS r_loss=%.3f, k_loss=%.3f -- TR r_loss=%.3f, k_loss=%.3f'%(lossvs, klossvs, np.mean(loss_track[-100:]), np.mean(kloss_track[-100:])))     
186 |             
187 |             # Save model yielding best results on validation
188 |             if lossvs < min_vs_loss:
189 |                 min_vs_loss = lossvs
190 |                 tf.add_to_collection("encoder_inputs",encoder_inputs)
191 |                 tf.add_to_collection("dec_out",dec_out)
192 |                 tf.add_to_collection("reconstruct_loss",reconstruct_loss)
193 |                 save_path = saver.save(sess, model_name)
194 |                                                     
195 | except KeyboardInterrupt:
196 |     print('training interrupted')
197 | 
198 |    
199 | time_tr_end = time.time()
200 | print('Tot training time: {}'.format((time_tr_end-time_tr_start)//60) )
201 | 
202 | # ================= TEST =================
203 | print('************ TEST ************ \n>>restoring from:'+model_name+'<<')
204 | 
205 | tf.reset_default_graph() # be sure that correct weights are loaded
206 | saver.restore(sess, model_name)
207 | 
208 | tr_code = sess.run(code, {encoder_inputs: train_data})
209 | pred, pred_loss, ts_code, ts_code_K = sess.run([dec_out, reconstruct_loss, code, code_K], {encoder_inputs: test_data})
210 | print('Test loss: %.3f'%(np.mean((pred-test_data)**2)))
211 | 
212 | # reverse transformations
213 | pred = np.reshape(pred, (test_data_orig.shape[1], test_data_orig.shape[0], test_data_orig.shape[2]))
214 | pred = np.transpose(pred,axes=[1,0,2])
215 | test_data = test_data_orig
216 | 
217 | if np.min(train_len) < np.max(train_len) and interp_on:
218 |     print('-- Reverse Interpolation --')
219 |     pred = interp_data(pred, test_len, restore=True)
220 | 
221 | if plot_on:
222 |     
223 |     # plot the reconstruction of a random time series
224 |     plot_idx1 = np.random.randint(low=0,high=test_data.shape[1])
225 |     plot_idx2 = np.random.randint(low=0,high=test_data.shape[2])
226 |     target = test_data[:,plot_idx1,plot_idx2]
227 |     ts_out = pred[:,plot_idx1,plot_idx2]
228 |     plt.plot(target, label='target')
229 |     plt.plot(ts_out, label='pred')
230 |     plt.legend(loc='best')
231 |     plt.title('Prediction of a random MTS variable')
232 |     plt.show(block=True)  
233 |     np.savetxt('AE_pred',ts_out)
234 |         
235 |     plt.matshow(K_ts,cmap='binary_r')
236 |     plt.title('Prior TCK kernel')
237 |     plt.gca().axes.get_xaxis().set_ticks([])
238 |     plt.gca().axes.get_yaxis().set_ticks([])
239 |     plt.show()
240 |     plt.matshow(ts_code_K,cmap='binary_r')
241 |     plt.title('Codes inner products')
242 |     plt.gca().axes.get_xaxis().set_ticks([])
243 |     plt.gca().axes.get_yaxis().set_ticks([])
244 |     plt.show()
245 | 
246 | # MSE and corr
247 | test_mse, test_corr = mse_and_corr(test_data, pred, test_len)
248 | print('Test MSE: %.3f\nTest Pearson correlation: %.3f'%(test_mse, test_corr))
249 | 
250 | # kNN classification on the codes
251 | acc, f1, auc = classify_with_knn(tr_code, train_labels[:, 0], ts_code, test_labels[:, 0], k=1)
252 | print('kNN -- acc: %.3f, F1: %.3f, AUC: %.3f'%(acc, f1, auc))
253 | 
254 | # dim reduction plots
255 | if dim_red:
256 |     dim_reduction_plot(ts_code, test_labels, 1)
257 | 
258 | #train_writer.close()
259 | sess.close()
260 | 


--------------------------------------------------------------------------------