├── README.md
├── data
    ├── energy_data.csv
    └── stock_data.csv
├── data_loading.py
├── main.py
├── metrics
    ├── discriminative_metrics.py
    ├── predictive_metrics.py
    └── visualization_metrics.py
├── networks.py
├── output
    ├── PCA.png
    └── t-SNE.png
├── run.py
├── timegan.py
├── trained_networks
    ├── discriminator.pth
    ├── embedder.pth
    ├── generator.pth
    ├── recovery.pth
    └── supervisor.pth
└── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # TimeGAN_PytorchRebuild
 2 | Use pytorch to rebulid TimeGAN.
 3 | 
 4 | ## Reference
 5 | Reference: Jinsung Yoon, Daniel Jarrett, Mihaela van der Schaar, "Time-series Generative Adversarial Networks," Neural Information Processing Systems (NeurIPS), 2019.
 6 | 
 7 | Paper Link: https://papers.nips.cc/paper/8789-time-series-generative-adversarial-networks  
 8 | 
 9 | Original Code: https://github.com/jsyoon0823/TimeGAN
10 | 
11 | ## Requirement
12 | python >= 3.9.15  
13 | pytorch >= 1.13.1  
14 | cuda >= 11.7.1  
15 | numpy >= 1.23.4  
16 | pandas >= 1.5.3  
17 | matplotlib >= 3.6.2  
18 | 


--------------------------------------------------------------------------------
/data_loading.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | 
 4 | def MinMaxScaler(data):
 5 |     """Min Max normalizer.
 6 |     Args:
 7 |       - data: original data
 8 | 
 9 |     Returns:
10 |       - norm_data: normalized data
11 |     """
12 |     numerator = data - np.min(data, 0)
13 |     denominator = np.max(data, 0) - np.min(data, 0)
14 |     norm_data = numerator / (denominator + 1e-7)
15 |     return norm_data
16 | 
17 | 
18 | def sine_data_generation(no, seq_len, dim):
19 |     """Sine data generation.
20 |     Args:
21 |       - no: the number of samples
22 |       - seq_len: sequence length of the time-series
23 |       - dim: feature dimensions
24 | 
25 |     Returns:
26 |       - data: generated data
27 |     """
28 |     # Initialize the output
29 |     data = list()
30 | 
31 |     # Generate sine data
32 |     for i in range(no):
33 |         # Initialize each time-series
34 |         temp = list()
35 |         # For each feature
36 |         for k in range(dim):
37 |             # Randomly drawn frequency and phase
38 |             freq = np.random.uniform(0, 0.1)
39 |             phase = np.random.uniform(0, 0.1)
40 | 
41 |             # Generate sine signal based on the drawn frequency and phase
42 |             temp_data = [np.sin(freq * j + phase) for j in range(seq_len)]
43 |             temp.append(temp_data)
44 | 
45 |         # Align row/column
46 |         temp = np.transpose(np.asarray(temp))
47 |         # Normalize to [0,1]
48 |         temp = (temp + 1) * 0.5
49 |         # Stack the generated data
50 |         data.append(temp)
51 | 
52 |     return data
53 | 
54 | 
55 | def real_data_loading(data_dir, data_name, seq_len):
56 |     """Load and preprocess real-world datasets.
57 |     Args:
58 |       - data_name: stock or energy
59 |       - seq_len: sequence length
60 |     Returns:
61 |       - data: preprocessed data.
62 |     """
63 | 
64 |     assert data_name in ['stock', 'energy']
65 | 
66 |     ori_data = []
67 |     if data_name == 'stock':
68 |         ori_data = np.loadtxt(os.path.join(data_dir, 'stock_data.csv'), delimiter=",", skiprows=1)
69 |     elif data_name == 'energy':
70 |         ori_data = np.loadtxt(os.path.join(data_dir, 'energy_data.csv'), delimiter=",", skiprows=1)
71 | 
72 |     # Flip the data to make chronological data
73 |     ori_data = ori_data[::-1]
74 |     # Normalize the data
75 |     ori_data = MinMaxScaler(ori_data)
76 | 
77 |     # Preprocess the dataset
78 |     temp_data = []
79 |     # Cut data by sequence length
80 |     for i in range(0, len(ori_data) - seq_len):
81 |         _x = ori_data[i:i + seq_len]
82 |         temp_data.append(_x)
83 | 
84 |     # Mix the datasets (to make it similar to i.i.d)
85 |     idx = np.random.permutation(len(temp_data))
86 |     data = []
87 |     for i in range(len(temp_data)):
88 |         data.append(temp_data[idx[i]])
89 | 
90 |     return data
91 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import numpy as np
 3 | 
 4 | # 1. Training model
 5 | from run import train, test
 6 | # 2. Data loading
 7 | from data_loading import real_data_loading, sine_data_generation
 8 | 
 9 | 
10 | def main(opt):
11 |     # Data loading
12 |     ori_data = None
13 |     if opt.data_name in ['stock', 'energy']:
14 |         ori_data = real_data_loading(opt.data_dir, opt.data_name, opt.seq_len)
15 |     elif opt.data_name == 'sine':
16 |         # Set number of samples and its dimensions
17 |         ori_data = sine_data_generation(opt.sine_no, opt.seq_len, opt.sine_dim)
18 | 
19 |     print(opt.data_name + ' dataset is ready.')
20 | 
21 |     # Training or Testing
22 |     if opt.is_test:
23 |         test(opt, ori_data)
24 |     else:
25 |         train(opt, ori_data)
26 |         test(opt, ori_data)
27 | 
28 | 
29 | if __name__ == '__main__':
30 |     """Main function for timeGAN experiments.
31 |     Args:
32 |       - data_name: sine, stock, or energy
33 |       - seq_len: sequence length
34 |       - Network parameters (should be optimized for different datasets)
35 |         - module: gru, lstm
36 |         - hidden_dim: hidden dimensions
37 |         - num_layer: number of layers
38 |         - iteration: number of training iterations
39 |         - batch_size: the number of samples in each batch
40 |       - metric_iteration: number of iterations for metric computation
41 |     Returns:
42 |       - ori_data: original data
43 |       - gen_data: generated synthetic data
44 |       - metric_results: discriminative and predictive scores
45 |     """
46 |     # Args for the main function
47 |     parser = argparse.ArgumentParser()
48 |     # Data parameters
49 |     parser.add_argument('--data_name', type=str, default='stock', choices=['sine', 'stock', 'energy'], )
50 |     parser.add_argument('--seq_len', type=int, default=24, help='sequence length')
51 |     parser.add_argument('--sine_no', type=int, default=10000, help='number of sine data samples')
52 |     parser.add_argument('--sine_dim', type=int, default=5, help='dim of  sine data')
53 |     # Network parameters (should be optimized for different datasets)
54 |     parser.add_argument('--module', choices=['gru', 'lstm'], default='gru', type=str)
55 |     parser.add_argument('--hidden_dim', type=int, default=24, help='hidden state dimensions')
56 |     parser.add_argument('--num_layer', type=int, default=3, help='number of layers')
57 |     # Model training and testing parameters
58 |     parser.add_argument('--gamma', type=float, default=1, help='gamma weight for G_loss and D_loss')
59 |     parser.add_argument('--lr', type=float, default=0.001, help='initial learning rate for adam')
60 |     parser.add_argument('--iterations', type=int, default=50000, help='Training iterations')
61 |     parser.add_argument('--print_times', type=int, default=10, help='Print times when Training')
62 |     parser.add_argument('--batch_size', type=int, default=128, help='the number of samples in mini-batch')
63 |     parser.add_argument('--synth_size', type=int, default=0, help='the number of samples in synthetic data, '
64 |                                                                   '0--len(ori_data)')
65 |     parser.add_argument('--metric_iteration', type=int, default=10, help='iterations of the metric computation')
66 |     # Save and Load
67 |     parser.add_argument('--data_dir', type=str, default="./data", help='path to stock and energy data')
68 |     parser.add_argument('--networks_dir', type=str, default="./trained_networks", help='path to checkpoint')
69 |     parser.add_argument('--output_dir', type=str, default="./output", help='folder to output metrics and images')
70 |     # Model running parameters
71 |     parser.add_argument('--is_test', type=bool, default=False, help='iterations of the metric computation')
72 |     parser.add_argument('--only_visualize_metric', type=bool, default=False, help='only compute visualization metrics')
73 |     parser.add_argument('--load_checkpoint', type=bool, default=False, help='load pretrain networks')
74 | 
75 |     # Call main function
76 |     opt = parser.parse_args()
77 |     main(opt)
78 | 


--------------------------------------------------------------------------------
/metrics/discriminative_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import accuracy_score
 3 | from utils import train_test_divide, extract_time, batch_generator
 4 | import torch
 5 | import torch.nn as nn
 6 | 
 7 | 
 8 | class Discriminator(nn.Module):
 9 | 
10 |     def __init__(self, input_dim, hidden_dim, num_layer):
11 |         super(Discriminator, self).__init__()
12 |         self.rnn = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layer, batch_first=True)
13 |         self.fc = nn.Linear(hidden_dim, 1)
14 |         self.sigmoid = nn.Sigmoid()
15 | 
16 |     def forward(self, X):
17 |         d_outputs, d_last_states = self.rnn(X)
18 |         y_hat_logit = self.fc(d_last_states)
19 |         y_hat = self.sigmoid(y_hat_logit)
20 |         return y_hat_logit, y_hat
21 | 
22 | 
23 | def discriminative_score_metrics(ori_data, generated_data):
24 |     # Basic Parameters
25 |     no, seq_len, dim = np.asarray(ori_data).shape
26 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
27 | 
28 |     # Set maximum sequence length and each sequence length
29 |     ori_time, ori_max_seq_len = extract_time(ori_data)
30 |     generated_time, generated_max_seq_len = extract_time(generated_data)
31 |     max_seq_len = max([ori_max_seq_len, generated_max_seq_len])
32 | 
33 |     train_x, train_x_hat, test_x, test_x_hat, train_t, train_t_hat, test_t, test_t_hat = \
34 |         train_test_divide(ori_data, generated_data, ori_time, generated_time)
35 | 
36 |     # ------ Build a post-hoc RNN discriminator network
37 |     # Network-parameters
38 |     hidden_dim = int(dim / 2)
39 |     iterations = 2000
40 |     batch_size = 128
41 | 
42 |     discriminator = Discriminator(dim, hidden_dim, 1).to(device)
43 |     optim_discriminator = torch.optim.Adam(discriminator.parameters())
44 |     loss_function = nn.BCEWithLogitsLoss()
45 | 
46 |     # Training step
47 |     for itt in range(iterations):
48 |         discriminator.train()
49 |         optim_discriminator.zero_grad()
50 |         # Batch setting
51 |         X_mb, T_mb = batch_generator(train_x, train_t, batch_size)
52 |         X_hat_mb, T_hat_mb = batch_generator(train_x_hat, train_t_hat, batch_size)
53 |         # Forward
54 |         X_mb = torch.tensor(X_mb, dtype=torch.float32).to(device)
55 |         X_hat_mb = torch.tensor(X_hat_mb, dtype=torch.float32).to(device)
56 |         y_logit_real, y_pred_real = discriminator(X_mb)
57 |         y_logit_fake, y_pred_fake = discriminator(X_hat_mb)
58 |         # Loss function
59 |         d_loss_real = torch.mean(loss_function(y_logit_real, torch.ones_like(y_logit_real)))
60 |         d_loss_fake = torch.mean(loss_function(y_logit_fake, torch.zeros_like(y_logit_fake)))
61 |         d_loss = d_loss_real + d_loss_fake
62 | 
63 |         d_loss.backward()
64 |         optim_discriminator.step()
65 | 
66 |     # ------ Test the performance on the testing set
67 |     test_x = torch.tensor(test_x, dtype=torch.float32).to(device)
68 |     _, y_pred_real_curr = discriminator(test_x)
69 |     y_pred_real_curr = y_pred_real_curr.cpu().detach().numpy()[0]
70 | 
71 |     test_x_hat = torch.tensor(test_x_hat, dtype=torch.float32).to(device)
72 |     _, y_pred_fake_curr = discriminator(test_x_hat)
73 |     y_pred_fake_curr = y_pred_fake_curr.cpu().detach().numpy()[0]
74 |     y_pred_final = np.squeeze(np.concatenate((y_pred_real_curr, y_pred_fake_curr), axis=0))
75 |     y_label_final = np.concatenate((np.ones([len(y_pred_real_curr), ]), np.zeros([len(y_pred_fake_curr), ])),
76 |                                    axis=0)
77 |     # Compute the accuracy
78 |     acc = accuracy_score(y_label_final, (y_pred_final > 0.5))
79 |     discriminative_score = np.abs(0.5 - acc)
80 |     print('discriminative_score: ', discriminative_score)
81 | 
82 |     return discriminative_score
83 | 


--------------------------------------------------------------------------------
/metrics/predictive_metrics.py:
--------------------------------------------------------------------------------
 1 | """Time-series Generative Adversarial Networks (TimeGAN) Codebase.
 2 | Note: Use Post-hoc RNN to predict one-step ahead (last feature)
 3 | """
 4 | 
 5 | # Necessary Packages
 6 | import torch
 7 | import torch.nn as nn
 8 | import numpy as np
 9 | from sklearn.metrics import mean_absolute_error
10 | from utils import extract_time, train_test_divide
11 | 
12 | 
13 | class Predictor(nn.Module):
14 |     def __init__(self, input_dim, hidden_dim, num_layer):
15 |         super(Predictor, self).__init__()
16 |         self.rnn = nn.GRU(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layer, batch_first=True)
17 |         self.fc = nn.Linear(hidden_dim, 1)
18 |         self.sigmoid = nn.Sigmoid()
19 | 
20 |     def forward(self, X):
21 |         p_outputs, p_last_states = self.rnn(X)
22 |         y_hat_logit = self.fc(p_outputs)
23 |         y_hat = self.sigmoid(y_hat_logit)
24 |         return y_hat
25 | 
26 | 
27 | def predictive_score_metrics(ori_data, generated_data):
28 |     """Report the performance of Post-hoc RNN one-step ahead prediction.
29 | 
30 |     Args:
31 |       - ori_data: original data
32 |       - generated_data: generated synthetic data
33 | 
34 |     Returns:
35 |       - predictive_score: MAE of the predictions on the original data
36 |     """
37 | 
38 |     # Basic Parameters
39 |     no, seq_len, dim = np.asarray(ori_data).shape
40 |     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
41 | 
42 |     # Set maximum sequence length and each sequence length
43 |     ori_time, ori_max_seq_len = extract_time(ori_data)
44 |     generated_time, generated_max_seq_len = extract_time(ori_data)
45 |     max_seq_len = max([ori_max_seq_len, generated_max_seq_len])
46 | 
47 |     # ------ Build a post-hoc RNN predictive network
48 |     # Network-parameters
49 |     hidden_dim = int(dim / 2)
50 |     iterations = 5000
51 |     batch_size = 128
52 | 
53 |     predictor = Predictor(dim-1, hidden_dim, 1).to(device)  # why dim-1 rather than dim, puzzle for me(AlanDongMu)
54 |     optim_predictor = torch.optim.Adam(predictor.parameters())
55 |     loss_function = nn.L1Loss()
56 | 
57 |     # Training step
58 |     for itt in range(iterations):
59 |         predictor.train()
60 |         optim_predictor.zero_grad()
61 |         # Batch setting
62 |         idx = np.random.permutation(len(generated_data))
63 |         train_idx = idx[:batch_size]
64 | 
65 |         X_mb = list(generated_data[i][:-1, :(dim - 1)] for i in train_idx)
66 |         T_mb = list(generated_time[i] - 1 for i in train_idx)
67 |         Y_mb = list(
68 |             np.reshape(generated_data[i][1:, (dim - 1)], [len(generated_data[i][1:, (dim - 1)]), 1]) for i in train_idx)
69 |         # Forward
70 |         X_mb = torch.tensor(X_mb, dtype=torch.float32).to(device)
71 |         Y_mb = torch.tensor(Y_mb, dtype=torch.float32).to(device)
72 |         y_pred = predictor(X_mb)
73 |         # Loss for the predictor
74 |         p_loss = loss_function(y_pred, Y_mb)
75 | 
76 |         p_loss.backward()
77 |         optim_predictor.step()
78 | 
79 |     idx = np.random.permutation(len(ori_data))
80 |     train_idx = idx[:no]
81 | 
82 |     X_mb = list(ori_data[i][:-1, :(dim - 1)] for i in train_idx)
83 |     T_mb = list(ori_time[i] - 1 for i in train_idx)
84 |     Y_mb = list(np.reshape(ori_data[i][1:, (dim - 1)], [len(ori_data[i][1:, (dim - 1)]), 1]) for i in train_idx)
85 | 
86 |     # Prediction
87 |     X_mb = torch.tensor(X_mb, dtype=torch.float32).to(device)
88 |     pred_Y_curr = predictor(X_mb)
89 |     pred_Y_curr = pred_Y_curr.cpu().detach().numpy()
90 | 
91 |     # Compute the performance in terms of MAE
92 |     MAE_temp = 0
93 |     for i in range(no):
94 |         MAE_temp = MAE_temp + mean_absolute_error(Y_mb[i], pred_Y_curr[i, :, :])
95 | 
96 |     predictive_score = MAE_temp / no
97 |     print('predictive_score: ', predictive_score)
98 |     return predictive_score
99 | 


--------------------------------------------------------------------------------
/metrics/visualization_metrics.py:
--------------------------------------------------------------------------------
 1 | from sklearn.manifold import TSNE
 2 | from sklearn.decomposition import PCA
 3 | import matplotlib.pyplot as plt
 4 | import numpy as np
 5 | import os
 6 | 
 7 | 
 8 | def visualization(ori_data, generated_data, analysis, outputs_dir):
 9 |     """Using PCA or tSNE for generated and original data visualization.
10 | 
11 |     Args:
12 |       - ori_data: original data
13 |       - generated_data: generated synthetic data
14 |       - analysis: tsne or pca
15 |       - outputs_dir: path to images output
16 |     """
17 |     # Analysis sample size (for faster computation)
18 |     anal_sample_no = min([1000, len(ori_data)])
19 |     idx = np.random.permutation(len(ori_data))[:anal_sample_no]
20 | 
21 |     # Data preprocessing
22 |     ori_data = np.asarray(ori_data)
23 |     generated_data = np.asarray(generated_data)
24 | 
25 |     ori_data = ori_data[idx]
26 |     generated_data = generated_data[:anal_sample_no]
27 | 
28 |     no, seq_len, dim = ori_data.shape
29 | 
30 |     for i in range(anal_sample_no):
31 |         if i == 0:
32 |             prep_data = np.reshape(np.mean(ori_data[0, :, :], 1), [1, seq_len])
33 |             prep_data_hat = np.reshape(np.mean(generated_data[0, :, :], 1), [1, seq_len])
34 |         else:
35 |             prep_data = np.concatenate((prep_data,
36 |                                         np.reshape(np.mean(ori_data[i, :, :], 1), [1, seq_len])))
37 |             prep_data_hat = np.concatenate((prep_data_hat,
38 |                                             np.reshape(np.mean(generated_data[i, :, :], 1), [1, seq_len])))
39 | 
40 |     # Visualization parameter
41 |     colors = ["red" for i in range(anal_sample_no)] + ["blue" for i in range(anal_sample_no)]
42 | 
43 |     if analysis == 'pca':
44 |         # PCA Analysis
45 |         pca = PCA(n_components=2)
46 |         pca.fit(prep_data)
47 |         pca_results = pca.transform(prep_data)
48 |         pca_hat_results = pca.transform(prep_data_hat)
49 | 
50 |         # Plotting
51 |         f, ax = plt.subplots(1)
52 |         plt.scatter(pca_results[:, 0], pca_results[:, 1],
53 |                     c=colors[:anal_sample_no], alpha=0.2, label="Original")
54 |         plt.scatter(pca_hat_results[:, 0], pca_hat_results[:, 1],
55 |                     c=colors[anal_sample_no:], alpha=0.2, label="Synthetic")
56 | 
57 |         ax.legend()
58 |         plt.title('PCA plot')
59 |         plt.xlabel('x-pca')
60 |         plt.ylabel('y_pca')
61 |         plt.savefig(os.path.join(outputs_dir, 'PCA.png'), dpi=800)
62 |         plt.show()
63 | 
64 | 
65 |     elif analysis == 'tsne':
66 | 
67 |         # Do t-SNE Analysis together
68 |         prep_data_final = np.concatenate((prep_data, prep_data_hat), axis=0)
69 | 
70 |         # TSNE anlaysis
71 |         tsne = TSNE(n_components=2, verbose=1, perplexity=40, n_iter=300)
72 |         tsne_results = tsne.fit_transform(prep_data_final)
73 | 
74 |         # Plotting
75 |         f, ax = plt.subplots(1)
76 | 
77 |         plt.scatter(tsne_results[:anal_sample_no, 0], tsne_results[:anal_sample_no, 1],
78 |                     c=colors[:anal_sample_no], alpha=0.2, label="Original")
79 |         plt.scatter(tsne_results[anal_sample_no:, 0], tsne_results[anal_sample_no:, 1],
80 |                     c=colors[anal_sample_no:], alpha=0.2, label="Synthetic")
81 | 
82 |         ax.legend()
83 | 
84 |         plt.title('t-SNE plot')
85 |         plt.xlabel('x-tsne')
86 |         plt.ylabel('y_tsne')
87 |         plt.savefig(os.path.join(outputs_dir, 't-SNE.png'), dpi=800)
88 |         plt.show()
89 | 


--------------------------------------------------------------------------------
/networks.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | 
  3 | 
  4 | def get_rnn_cell(module_name):
  5 |     """Basic RNN Cell.
  6 |       Args:
  7 |         - module_name: gru, lstm
  8 |       Returns:
  9 |         - rnn_cell: RNN Cell
 10 |     """
 11 |     assert module_name in ['gru', 'lstm']
 12 |     rnn_cell = None
 13 |     # GRU
 14 |     if module_name == 'gru':
 15 |         rnn_cell = nn.GRU
 16 |     # LSTM
 17 |     elif module_name == 'lstm':
 18 |         rnn_cell = nn.LSTM
 19 |     return rnn_cell
 20 | 
 21 | 
 22 | class Embedder(nn.Module):
 23 |     """Embedding network between original feature space to latent space.
 24 |         Args:
 25 |           - input: input time-series features. Size:(Num, Len, Dim) = (3661, 24, 6)
 26 |         Returns:
 27 |           - H: embedding features size: (Num, Len, Dim) = (3661, 24, 6)
 28 |         """
 29 | 
 30 |     def __init__(self, para):
 31 |         super(Embedder, self).__init__()
 32 |         rnn_cell = get_rnn_cell(para['module'])
 33 |         self.rnn = rnn_cell(input_size=para['input_dim'], hidden_size=para['hidden_dim'], num_layers=para['num_layer'],
 34 |                             batch_first=True)
 35 |         self.fc = nn.Linear(para['hidden_dim'], para['hidden_dim'])
 36 |         self.sigmoid = nn.Sigmoid()
 37 | 
 38 |     def forward(self, X):
 39 |         e_outputs, _ = self.rnn(X)
 40 |         H = self.fc(e_outputs)
 41 |         H = self.sigmoid(H)
 42 |         return H
 43 | 
 44 | 
 45 | class Recovery(nn.Module):
 46 |     """Recovery network from latent space to original space.
 47 |     Args:
 48 |       - H: latent representation
 49 |       - T: input time information
 50 |     Returns:
 51 |       - X_tilde: recovered data
 52 |     """
 53 | 
 54 |     def __init__(self, para):
 55 |         super(Recovery, self).__init__()
 56 |         rnn_cell = get_rnn_cell(para['module'])
 57 |         self.rnn = rnn_cell(input_size=para['hidden_dim'], hidden_size=para['input_dim'], num_layers=para['num_layer'],
 58 |                             batch_first=True)
 59 |         self.fc = nn.Linear(para['input_dim'], para['input_dim'])
 60 |         self.sigmoid = nn.Sigmoid()
 61 | 
 62 |     def forward(self, H):
 63 |         r_outputs, _ = self.rnn(H)
 64 |         X_tilde = self.fc(r_outputs)
 65 |         X_tilde = self.sigmoid(X_tilde)
 66 |         return X_tilde
 67 | 
 68 | 
 69 | class Generator(nn.Module):
 70 |     """Generator function: Generate time-series data in latent space.
 71 |     Args:
 72 |       - Z: random variables
 73 |     Returns:
 74 |       - E: generated embedding
 75 |     """
 76 | 
 77 |     def __init__(self, para):
 78 |         super(Generator, self).__init__()
 79 |         rnn_cell = get_rnn_cell(para['module'])
 80 |         self.rnn = rnn_cell(input_size=para['input_dim'], hidden_size=para['hidden_dim'], num_layers=para['num_layer'],
 81 |                             batch_first=True)
 82 |         self.fc = nn.Linear(para['hidden_dim'], para['hidden_dim'])
 83 |         self.sigmoid = nn.Sigmoid()
 84 | 
 85 |     def forward(self, Z):
 86 |         g_outputs, _ = self.rnn(Z)
 87 |         E = self.fc(g_outputs)
 88 |         E = self.sigmoid(E)
 89 |         return E
 90 | 
 91 | 
 92 | class Supervisor(nn.Module):
 93 |     """Generate next sequence using the previous sequence.
 94 |     Args:
 95 |       - H: latent representation
 96 |       - T: input time information
 97 |     Returns:
 98 |       - S: generated sequence based on the latent representations generated by the generator
 99 |     """
100 | 
101 |     def __init__(self, para):
102 |         super(Supervisor, self).__init__()
103 |         rnn_cell = get_rnn_cell(para['module'])
104 |         self.rnn = rnn_cell(input_size=para['hidden_dim'], hidden_size=para['hidden_dim'], num_layers=para['num_layer'] - 1,
105 |                             batch_first=True)
106 |         self.fc = nn.Linear(para['hidden_dim'], para['hidden_dim'])
107 |         self.sigmoid = nn.Sigmoid()
108 | 
109 |     def forward(self, H):
110 |         s_outputs, _ = self.rnn(H)
111 |         S = self.fc(s_outputs)
112 |         S = self.sigmoid(S)
113 |         return S
114 | 
115 | 
116 | class Discriminator(nn.Module):
117 |     """Discriminate the original and synthetic time-series data.
118 |     Args:
119 |       - H: latent representation
120 |       - T: input time information
121 |     Returns:
122 |       - Y_hat: classification results between original and synthetic time-series
123 |     """
124 | 
125 |     def __init__(self, para):
126 |         super(Discriminator, self).__init__()
127 |         rnn_cell = get_rnn_cell(para['module'])
128 |         self.rnn = rnn_cell(input_size=para['hidden_dim'], hidden_size=para['hidden_dim'], num_layers=para['num_layer'],
129 |                             batch_first=True)
130 |         self.fc = nn.Linear(para['hidden_dim'], para['hidden_dim'])
131 |         self.sigmoid = nn.Sigmoid()
132 | 
133 |     def forward(self, H):
134 |         d_outputs, _ = self.rnn(H)
135 |         Y = self.fc(d_outputs)
136 |         Y = self.sigmoid(Y)
137 |         return Y
138 | 


--------------------------------------------------------------------------------
/output/PCA.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/output/PCA.png


--------------------------------------------------------------------------------
/output/t-SNE.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/output/t-SNE.png


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import timegan
  3 | from metrics.discriminative_metrics import discriminative_score_metrics
  4 | from metrics.predictive_metrics import predictive_score_metrics
  5 | from metrics.visualization_metrics import visualization
  6 | from utils import extract_time
  7 | 
  8 | 
  9 | def train(opt, ori_data):
 10 | 
 11 |     # Model Setting
 12 |     model = timegan.TimeGAN(opt, ori_data)
 13 |     per_print_num = opt.iterations / opt.print_times
 14 | 
 15 |     # 1. Embedding network training
 16 |     print('Start Embedding Network Training')
 17 |     for i in range(opt.iterations):
 18 |         model.gen_batch()
 19 |         model.batch_forward()
 20 |         model.train_embedder()
 21 |         if i % per_print_num == 0:
 22 |             print('step: ' + str(i) + '/' + str(opt.iterations) +
 23 |                   ', e_loss: ' + str(np.round(np.sqrt(model.E_loss_T0.item()), 4)))
 24 |     print('Finish Embedding Network Training')
 25 | 
 26 |     # 2. Training only with supervised loss
 27 |     print('Start Training with Supervised Loss Only')
 28 |     for i in range(opt.iterations):
 29 |         model.gen_batch()
 30 |         model.batch_forward()
 31 |         model.train_supervisor()
 32 |         if i % per_print_num == 0:
 33 |             print('step: ' + str(i) + '/' + str(opt.iterations) +
 34 |                   ', e_loss: ' + str(np.round(np.sqrt(model.G_loss_S.item()), 4)))
 35 | 
 36 |     # 3. Joint Training
 37 |     print('Start Joint Training')
 38 |     for i in range(opt.iterations):
 39 |         # Generator training (twice more than discriminator training)
 40 |         for kk in range(2):
 41 |             model.gen_batch()
 42 |             model.batch_forward()
 43 |             model.train_generator(join_train=True)
 44 |             model.batch_forward()
 45 |             model.train_embedder(join_train=True)
 46 |         # Discriminator training
 47 |         model.gen_batch()
 48 |         model.batch_forward()
 49 |         model.train_discriminator()
 50 | 
 51 |         # Print multiple checkpoints
 52 |         if i % per_print_num == 0:
 53 |             print('step: ' + str(i) + '/' + str(opt.iterations) +
 54 |                   ', d_loss: ' + str(np.round(model.D_loss.item(), 4)) +
 55 |                   ', g_loss_u: ' + str(np.round(model.G_loss_U.item(), 4)) +
 56 |                   ', g_loss_s: ' + str(np.round(np.sqrt(model.G_loss_S.item()), 4)) +
 57 |                   ', g_loss_v: ' + str(np.round(model.G_loss_V.item(), 4)) +
 58 |                   ', e_loss_t0: ' + str(np.round(np.sqrt(model.E_loss_T0.item()), 4)))
 59 |     print('Finish Joint Training')
 60 | 
 61 |     # Save trained networks
 62 |     model.save_trained_networks()
 63 | 
 64 | 
 65 | def test(opt, ori_data):
 66 | 
 67 |     print('Start Testing')
 68 |     # Model Setting
 69 |     model = timegan.TimeGAN(opt, ori_data)
 70 |     model.load_trained_networks()
 71 | 
 72 |     # Synthetic data generation
 73 |     if opt.synth_size != 0:
 74 |         synth_size = opt.synth_size
 75 |     else:
 76 |         synth_size = len(ori_data)
 77 |     generated_data = model.gen_synth_data(synth_size)
 78 |     generated_data = generated_data.cpu().detach().numpy()
 79 |     gen_data = list()
 80 |     for i in range(synth_size):
 81 |         temp = generated_data[i, :opt.seq_len, :]
 82 |         gen_data.append(temp)
 83 |     print('Finish Synthetic Data Generation')
 84 | 
 85 |     # Performance metrics
 86 |     metric_results = dict()
 87 |     if not opt.only_visualize_metric:
 88 |         # 1. Discriminative Score
 89 |         discriminative_score = list()
 90 |         print('Start discriminative_score_metrics')
 91 |         for i in range(opt.metric_iteration):
 92 |             print('discriminative_score iteration: ', i)
 93 |             temp_disc = discriminative_score_metrics(ori_data, gen_data)
 94 |             discriminative_score.append(temp_disc)
 95 | 
 96 |         metric_results['discriminative'] = np.mean(discriminative_score)
 97 |         print('Finish discriminative_score_metrics compute')
 98 | 
 99 |         # 2. Predictive score
100 |         predictive_score = list()
101 |         print('Start predictive_score_metrics')
102 |         for i in range(opt.metric_iteration):
103 |             print('predictive_score iteration: ', i)
104 |             temp_predict = predictive_score_metrics(ori_data, gen_data)
105 |             predictive_score.append(temp_predict)
106 |         metric_results['predictive'] = np.mean(predictive_score)
107 |         print('Finish predictive_score_metrics compute')
108 | 
109 |     # 3. Visualization (PCA and tSNE)
110 |     visualization(ori_data, gen_data, 'pca', opt.output_dir)
111 |     visualization(ori_data, gen_data, 'tsne', opt.output_dir)
112 | 
113 |     # Print discriminative and predictive scores
114 |     print(metric_results)
115 | 


--------------------------------------------------------------------------------
/timegan.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import numpy as np
  4 | from networks import Embedder, Recovery, Generator, Discriminator, Supervisor
  5 | from utils import batch_generator, random_generator, MinMaxScaler, extract_time
  6 | 
  7 | torch.autograd.set_detect_anomaly(True)
  8 | class TimeGAN:
  9 |     def __init__(self, opt, ori_data):
 10 | 
 11 |         self.opt = opt
 12 |         self.ori_data, self.min_val, self.max_val = MinMaxScaler(ori_data)
 13 |         self.ori_time, self.max_seq_len = extract_time(self.ori_data)
 14 |         self.no, self.seq_len, self.z_dim = np.asarray(ori_data).shape
 15 |         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 16 | 
 17 |         # Create and initialize networks.
 18 |         self.para = dict()
 19 |         self.para['module'] = self.opt.module
 20 |         self.para['input_dim'] = self.z_dim
 21 |         self.para['hidden_dim'] = self.opt.hidden_dim
 22 |         self.para['num_layer'] = self.opt.num_layer
 23 |         self.embedder = Embedder(self.para).to(self.device)
 24 |         self.recovery = Recovery(self.para).to(self.device)
 25 |         self.generator = Generator(self.para).to(self.device)
 26 |         self.discriminator = Discriminator(self.para).to(self.device)
 27 |         self.supervisor = Supervisor(self.para).to(self.device)
 28 | 
 29 |         # Create and initialize optimizer.
 30 |         self.optim_embedder = torch.optim.Adam(self.embedder.parameters(), lr=self.opt.lr)
 31 |         self.optim_recovery = torch.optim.Adam(self.recovery.parameters(), lr=self.opt.lr)
 32 |         self.optim_generator = torch.optim.Adam(self.generator.parameters(), lr=self.opt.lr)
 33 |         self.optim_discriminator = torch.optim.Adam(self.discriminator.parameters(), lr=self.opt.lr)
 34 |         self.optim_supervisor = torch.optim.Adam(self.supervisor.parameters(), lr=self.opt.lr)
 35 | 
 36 |         # Set loss function
 37 |         self.MSELoss = torch.nn.MSELoss()
 38 |         self.BCELoss = torch.nn.BCELoss()
 39 | 
 40 |         if self.opt.load_checkpoint:
 41 |             self.load_trained_networks()
 42 | 
 43 |     def gen_batch(self):
 44 | 
 45 |         # Set training batch
 46 |         self.X, self.T = batch_generator(self.ori_data, self.ori_time, self.opt.batch_size)
 47 |         self.X = torch.tensor(np.array(self.X), dtype=torch.float32).to(self.device)
 48 |         # Random vector generation
 49 |         self.Z = random_generator(self.opt.batch_size, self.para['input_dim'], self.max_seq_len, self.T)
 50 |         self.Z = torch.tensor(np.array(self.Z), dtype=torch.float32).to(self.device)
 51 | 
 52 |         # total networks forward
 53 |     def batch_forward(self):
 54 |         self.H = self.embedder(self.X)
 55 |         self.X_tilde = self.recovery(self.H)
 56 |         self.H_hat_supervise = self.supervisor(self.H)
 57 | 
 58 |         self.E_hat = self.generator(self.Z)
 59 |         self.H_hat = self.supervisor(self.E_hat)
 60 |         self.X_hat = self.recovery(self.H_hat)
 61 | 
 62 |         self.Y_real = self.discriminator(self.H)
 63 |         self.Y_fake = self.discriminator(self.H_hat)
 64 |         self.Y_fake_e = self.discriminator(self.E_hat)
 65 | 
 66 |     def gen_synth_data(self, batch_size):
 67 |         self.Z = random_generator(batch_size, self.para['input_dim'], self.max_seq_len, self.ori_time)
 68 |         self.Z = torch.tensor(self.Z, dtype=torch.float32).to(self.device)
 69 | 
 70 |         self.E_hat = self.generator(self.Z)
 71 |         self.H_hat = self.supervisor(self.E_hat)
 72 |         self.X_hat = self.recovery(self.H_hat)
 73 | 
 74 |         return self.X_hat
 75 | 
 76 |     def train_embedder(self, join_train=False):
 77 |         self.embedder.train()
 78 |         self.recovery.train()
 79 |         self.optim_embedder.zero_grad()
 80 |         self.optim_recovery.zero_grad()
 81 |         self.E_loss_T0 = self.MSELoss(self.X, self.X_tilde)
 82 |         self.E_loss0 = 10 * torch.sqrt(self.E_loss_T0)
 83 |         if not join_train:
 84 |             # E0_solver
 85 |             self.E_loss0.backward()
 86 |         else:
 87 |             # E_solver
 88 |             self.G_loss_S = self.MSELoss(self.H[:, 1:, :], self.H_hat_supervise[:, :-1, :])
 89 |             self.E_loss = self.E_loss0 + 0.1 * self.G_loss_S
 90 |             self.E_loss.backward()
 91 |         self.optim_embedder.step()
 92 |         self.optim_recovery.step()
 93 | 
 94 |     def train_supervisor(self):
 95 |         # GS_solver
 96 |         self.generator.train()
 97 |         self.supervisor.train()
 98 |         self.optim_generator.zero_grad()
 99 |         self.optim_supervisor.zero_grad()
100 |         self.G_loss_S = self.MSELoss(self.H[:, 1:, :], self.H_hat_supervise[:, :-1, :])
101 |         self.G_loss_S.backward()
102 |         self.optim_generator.step()
103 |         self.optim_supervisor.step()
104 | 
105 |     def train_generator(self,join_train=False):
106 |         # G_solver
107 |         self.optim_generator.zero_grad()
108 |         self.optim_supervisor.zero_grad()
109 |         self.G_loss_U = self.BCELoss(self.Y_fake, torch.ones_like(self.Y_fake))
110 |         self.G_loss_U_e = self.BCELoss(self.Y_fake_e, torch.ones_like(self.Y_fake_e))
111 |         self.G_loss_V1 = torch.mean(torch.abs(torch.sqrt(torch.std(self.X_hat, [0])[1] + 1e-6) - torch.sqrt(
112 |             torch.std(self.X, [0])[1] + 1e-6)))
113 |         self.G_loss_V2 = torch.mean(torch.abs((torch.mean(self.X_hat, [0])) - (torch.mean(self.X, [0]))))
114 |         self.G_loss_V = self.G_loss_V1 + self.G_loss_V2
115 |         self.G_loss_S = self.MSELoss(self.H_hat_supervise[:, :-1, :], self.H[:, 1:, :])
116 |         self.G_loss = self.G_loss_U + \
117 |                       self.opt.gamma * self.G_loss_U_e + \
118 |                       torch.sqrt(self.G_loss_S) * 100 + \
119 |                       self.G_loss_V * 100
120 |         if not join_train:
121 |             self.G_loss.backward()
122 |         else:
123 |             self.G_loss.backward(retain_graph=True)
124 | 
125 |         self.optim_generator.step()
126 |         self.optim_supervisor.step()
127 | 
128 | 
129 |     def train_discriminator(self):
130 |         # D_solver
131 |         self.discriminator.train()
132 |         self.optim_discriminator.zero_grad()
133 |         self.D_loss_real = self.BCELoss(self.Y_real, torch.ones_like(self.Y_real))
134 |         self.D_loss_fake = self.BCELoss(self.Y_fake, torch.zeros_like(self.Y_fake))
135 |         self.D_loss_fake_e = self.BCELoss(self.Y_fake_e, torch.zeros_like(self.Y_fake_e))
136 |         self.D_loss = self.D_loss_real + \
137 |                       self.D_loss_fake + \
138 |                       self.opt.gamma * self.D_loss_fake_e
139 |         # Train discriminator (only when the discriminator does not work well)
140 |         if self.D_loss > 0.15:
141 |             self.D_loss.backward()
142 |             self.optim_discriminator.step()
143 | 
144 |     def load_trained_networks(self):
145 |         print("Loading trained networks")
146 |         self.embedder.load_state_dict(torch.load(os.path.join(self.opt.networks_dir, 'embedder.pth')))
147 |         self.recovery.load_state_dict(torch.load(os.path.join(self.opt.networks_dir, 'recovery.pth')))
148 |         self.generator.load_state_dict(torch.load(os.path.join(self.opt.networks_dir, 'generator.pth')))
149 |         self.discriminator.load_state_dict(torch.load(os.path.join(self.opt.networks_dir, 'discriminator.pth')))
150 |         self.supervisor.load_state_dict(torch.load(os.path.join(self.opt.networks_dir, 'supervisor.pth')))
151 |         print("Done.")
152 | 
153 |     def save_trained_networks(self):
154 |         print("Saving trained networks")
155 |         torch.save(self.embedder.state_dict(), os.path.join(self.opt.networks_dir, 'embedder.pth'))
156 |         torch.save(self.recovery.state_dict(), os.path.join(self.opt.networks_dir, 'recovery.pth'))
157 |         torch.save(self.generator.state_dict(), os.path.join(self.opt.networks_dir, 'generator.pth'))
158 |         torch.save(self.discriminator.state_dict(), os.path.join(self.opt.networks_dir, 'discriminator.pth'))
159 |         torch.save(self.supervisor.state_dict(), os.path.join(self.opt.networks_dir, 'supervisor.pth'))
160 |         print("Done.")
161 | 


--------------------------------------------------------------------------------
/trained_networks/discriminator.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/trained_networks/discriminator.pth


--------------------------------------------------------------------------------
/trained_networks/embedder.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/trained_networks/embedder.pth


--------------------------------------------------------------------------------
/trained_networks/generator.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/trained_networks/generator.pth


--------------------------------------------------------------------------------
/trained_networks/recovery.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/trained_networks/recovery.pth


--------------------------------------------------------------------------------
/trained_networks/supervisor.pth:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlanDongMu/TimeGAN_PytorchRebuild/9cab6ea292cb1a498a1880f79f05a3e1981cef35/trained_networks/supervisor.pth


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def train_test_divide(data_x, data_x_hat, data_t, data_t_hat, train_rate=0.8):
  5 |     """Divide train and test data for both original and synthetic data.
  6 |     Args:
  7 |       - data_x: original data
  8 |       - data_x_hat: generated data
  9 |       - data_t: original time
 10 |       - data_t_hat: generated time
 11 |       - train_rate: ratio of training data from the original data
 12 |     """
 13 |     # Divide train/test index (original data)
 14 |     no = len(data_x)
 15 |     idx = np.random.permutation(no)
 16 |     train_idx = idx[:int(no * train_rate)]
 17 |     test_idx = idx[int(no * train_rate):]
 18 | 
 19 |     train_x = [data_x[i] for i in train_idx]
 20 |     test_x = [data_x[i] for i in test_idx]
 21 |     train_t = [data_t[i] for i in train_idx]
 22 |     test_t = [data_t[i] for i in test_idx]
 23 | 
 24 |     # Divide train/test index (synthetic data)
 25 |     no = len(data_x_hat)
 26 |     idx = np.random.permutation(no)
 27 |     train_idx = idx[:int(no * train_rate)]
 28 |     test_idx = idx[int(no * train_rate):]
 29 | 
 30 |     train_x_hat = [data_x_hat[i] for i in train_idx]
 31 |     test_x_hat = [data_x_hat[i] for i in test_idx]
 32 |     train_t_hat = [data_t_hat[i] for i in train_idx]
 33 |     test_t_hat = [data_t_hat[i] for i in test_idx]
 34 | 
 35 |     return train_x, train_x_hat, test_x, test_x_hat, train_t, train_t_hat, test_t, test_t_hat
 36 | 
 37 | 
 38 | def MinMaxScaler(data):
 39 |     """Min-Max Normalizer.
 40 | 
 41 |     Args:
 42 |       - data: raw data
 43 | 
 44 |     Returns:
 45 |       - norm_data: normalized data
 46 |       - min_val: minimum values (for renormalization)
 47 |       - max_val: maximum values (for renormalization)
 48 |     """
 49 |     min_val = np.min(np.min(data, axis=0), axis=0)
 50 |     data = data - min_val
 51 | 
 52 |     max_val = np.max(np.max(data, axis=0), axis=0)
 53 |     norm_data = data / (max_val + 1e-7)
 54 | 
 55 |     return norm_data, min_val, max_val
 56 | 
 57 | 
 58 | def extract_time(data):
 59 |     """Returns Maximum sequence length and each sequence length.
 60 |     Args:
 61 |       - data: original data
 62 |     Returns:
 63 |       - time: extracted time information
 64 |       - max_seq_len: maximum sequence length
 65 |     """
 66 |     time = list()
 67 |     max_seq_len = 0
 68 |     for i in range(len(data)):
 69 |         max_seq_len = max(max_seq_len, len(data[i][:, 0]))
 70 |         time.append(len(data[i][:, 0]))
 71 | 
 72 |     return time, max_seq_len
 73 | 
 74 | 
 75 | def random_generator(batch_size, z_dim, max_seq_len, *T):
 76 |     """Random vector generation.
 77 |     Args:
 78 |       - batch_size: size of the random vector
 79 |       - z_dim: dimension of random vector
 80 |       - T_mb: time information for the random vector
 81 |       - max_seq_len: maximum sequence length
 82 |     Returns:
 83 |       - Z_mb: generated random vector
 84 |     """
 85 |     Z_mb = list()
 86 |     for i in range(batch_size):
 87 |         if not T:
 88 |             temp = np.random.uniform(0., 1, [max_seq_len, z_dim])
 89 |         else:
 90 |             T_mb = T[0]
 91 |             temp = np.random.uniform(0., 1, [T_mb[i], z_dim])
 92 |         Z_mb.append(temp)
 93 |     return Z_mb
 94 | 
 95 | 
 96 | def batch_generator(data, time, batch_size):
 97 |     """Mini-batch generator.
 98 |     Args:
 99 |       - data: time-series data
100 |       - time: time information
101 |       - batch_size: the number of samples in each batch
102 |     Returns:
103 |       - X_mb: time-series data in each batch
104 |       - T_mb: time information in each batch
105 |     """
106 |     no = len(data)
107 |     idx = np.random.permutation(no)
108 |     train_idx = idx[:batch_size]
109 | 
110 |     X_mb = list(data[i] for i in train_idx)
111 |     T_mb = list(time[i] for i in train_idx)
112 | 
113 |     return X_mb, T_mb
114 | 


--------------------------------------------------------------------------------