├── .gitattributes
├── FT_phase.py
├── README.md
├── data
    ├── args.txt
    ├── data_test
    └── raw_data_train_complete
├── models
    ├── GRU_Decoder.py
    ├── GRU_ED.py
    ├── README.md
    └── VGRU_ED.py
├── training_GRU_Decoder_t1.py
├── training_GRU_ED.py
├── training_VGRU_ED.py
└── utils
    └── utils_ft.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | data/raw_data_train_complete filter=lfs diff=lfs merge=lfs -text
2 | 


--------------------------------------------------------------------------------
/FT_phase.py:
--------------------------------------------------------------------------------
  1 | # Essential imports for PyTorch and data manipulation
  2 | import torch
  3 | import torch.optim as optim
  4 | import torch.nn as nn
  5 | from torch.nn import functional as F
  6 | from torch.utils.data import DataLoader, TensorDataset
  7 | from torch.autograd import Variable
  8 | from torch.nn.modules import ModuleList, normalization
  9 | 
 10 | # Additional utilities
 11 | import argparse
 12 | import datetime 
 13 | import matplotlib.pyplot as plt
 14 | import numpy as np
 15 | import os
 16 | import pandas as pd
 17 | import pickle
 18 | import random
 19 | import time
 20 | import tqdm
 21 | from utils import utils_ft # Import custom utilities for feature transformation
 22 | 
 23 | # Disable warnings (consider reviewing this for better debugging)
 24 | import warnings
 25 | warnings.filterwarnings("ignore")
 26 | 
 27 | # Define a function to parse training arguments
 28 | def training_args():
 29 |     """
 30 |     Parses command line arguments for training configuration.
 31 |     
 32 |     Returns:
 33 |         argparse.Namespace: Parsed arguments with training configurations.
 34 |     """
 35 |     parser = argparse.ArgumentParser(description='fine_tune')
 36 |     # Define arguments
 37 |     parser.add_argument('--path', default='', type=str, help='model path')
 38 |     parser.add_argument('--folder', default='', type=str, help='folder path')
 39 |     parser.add_argument('--reset', default='no', type=str, help='Reset weights?')
 40 |     parser.add_argument('--freeze', default='freeze', type=str, help='Freeze weights')
 41 |     parser.add_argument('--finetune', default=False, type=bool, help='Finetune')
 42 |     parser.add_argument('--cv', default=5, type=int, help='k fold')
 43 |     parser.add_argument('--num_gpus', default=1, type=int, help='number of GPUs')
 44 |     parser.add_argument('--nb_samples', default=10, type=int, help='Number of samples')
 45 |     parser.add_argument('-b', '--batch_size', default=4096, type=int, help='mini-batch size')
 46 |     parser.add_argument('-e', '--epochs', default=10, type=int, help='number of total epochs')
 47 |     parser.add_argument('--device', default=0, type=int, help='which device')
 48 |     parser.add_argument('--maxlen', default=30, type=int, help='Windows length')
 49 |     parser.add_argument('--nb_gauges', default=3, type=int, help='Number of gauges')
 50 |     parser.add_argument('--thinning', default=500, type=int, help='Thinning')
 51 |     parser.add_argument('--lr', default=1e-6, type=float, help='Learning rate')
 52 |     parser.add_argument('--drop', default=0.1, type=float, help='Dropout rate')
 53 | 
 54 |     # Parse and return arguments
 55 |     return parser.parse_args()
 56 | 
 57 | # Define function to create data loaders
 58 | def create_loaders(data, bs=512, jobs=0):
 59 |     """
 60 |     Creates a data loader for the given dataset.
 61 | 
 62 |     Args:
 63 |         data (Dataset): The dataset for which to create the data loader.
 64 |         bs (int): Batch size. Default is 512.
 65 |         jobs (int): Number of worker processes to use. Default is 0.
 66 | 
 67 |     Returns:
 68 |         DataLoader: Data loader for the given dataset.
 69 |     """
 70 |     return DataLoader(data, batch_size=bs, shuffle=True, num_workers=jobs, pin_memory=False)
 71 | 
 72 | # Define a custom GRU Layer class
 73 | class GRU_Layer(nn.Module):
 74 |     """
 75 |     Custom GRU Layer class.
 76 |     """
 77 |     def __init__(self, input_dim, hidden_dim, n_layers, drop_prob):
 78 |         super(GRU_Layer, self).__init__()
 79 |         self.hidden_dim = hidden_dim
 80 |         self.n_layers = n_layers
 81 |         self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob)
 82 | 
 83 |     def forward(self, x, hidden):
 84 |         out, hidden = self.gru(x, hidden)    
 85 |         return F.silu(out), hidden
 86 | 
 87 | # Define a custom sequential class
 88 | class MySequential(nn.Sequential):
 89 |     """
 90 |     Custom Sequential class to handle multiple input formats.
 91 |     """
 92 |     def forward(self, *inputs):
 93 |         for module in self._modules.values():
 94 |             inputs = module(*inputs) if type(inputs) == tuple else module(inputs)
 95 |         return inputs
 96 | 
 97 | # Define a function to update the learning rate
 98 | def update_lr(optimizer, lr):
 99 |     """
100 |     Updates the learning rate for an optimizer.
101 | 
102 |     Args:
103 |         optimizer (Optimizer): The optimizer to update.
104 |         lr (float): The new learning rate.
105 |     """
106 |     for g in optimizer.param_groups:
107 |         g['lr'] = lr
108 | 
109 | 
110 | def configure_model(model, args, trn_std, trn_mean, bias, lr, device):
111 |     """
112 |     Configures the model based on the given arguments.
113 | 
114 |     Args:
115 |         model (torch.nn.Module): The model to configure.
116 |         args (argparse.Namespace): Command-line arguments with 'reset' and 'freeze' options.
117 |         trn_std (torch.Tensor): Standard deviation for normalization.
118 |         trn_mean (torch.Tensor): Mean for normalization.
119 |         bias (float): Bias value to add to the last layer.
120 |         lr (float): Learning rate for the optimizer.
121 |         device (torch.device): The device to use for tensors.
122 | 
123 |     Returns:
124 |         torch.optim.Optimizer: Configured optimizer for the model.
125 |     """
126 |     def reset_weights(m):
127 |         if hasattr(m, 'reset_parameters'):
128 |             m.reset_parameters()
129 | 
130 |     def set_fc_layer(num_features):
131 |         fc_layer = mySequential(
132 |             GRU_Layer(num_features, num_features, 1, 0.1),
133 |             nn.LayerNorm(num_features, elementwise_affine=False),
134 |             nn.Linear(num_features, 1)
135 |         ).to(device)
136 |         nn.init.xavier_normal_(fc_layer[-1].weight.data)
137 |         fc_layer[-1].bias.data += bias
138 |         return fc_layer
139 | 
140 |     if args.reset == 'reset':
141 |         print('Reset weights...')
142 |         model.apply(reset_weights)
143 |         model.std = trn_std.to(device)
144 |         model.mean = trn_mean.to(device)
145 | 
146 |     elif args.freeze == 'freeze':
147 |         print('Freeze pre-trained layers')
148 |         utils_ft.set_parameter_requires_grad(model, True)
149 |     else:
150 |         print('Unfreeze pre-trained layers')
151 |         utils_ft.set_parameter_requires_grad(model, False)
152 | 
153 |     # Set up the fully connected layer
154 |     num_features = model.fc.in_features
155 |     model.fc = set_fc_layer(num_features)
156 |     model.length_seq = 30
157 |     model.ft = True
158 | 
159 |     # Return the configured optimizer
160 |     return model, optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
161 | 
162 | def prepare_data_for_fold(data_train_raw, i, cv, nb_samples, sequence_length, seq_cols_in, seq_cols_out, bs, device):
163 |     """
164 |     Prepares training and validation data for a given fold in cross-validation.
165 | 
166 |     Args:
167 |         data_train_raw (DataFrame): Raw training data.
168 |         i (int): Index of the current fold in cross-validation.
169 |         cv (int): Total number of folds in cross-validation.
170 |         nb_samples (int): Number of samples in the dataset.
171 |         sequence_length (int): Length of the sequence for training.
172 |         seq_cols_in (list): List of column names for input features.
173 |         seq_cols_out (list): List of column names for output labels.
174 |         bs (int): Batch size for data loaders.
175 |         device (torch.device): Device to use for tensors.
176 | 
177 |     Returns:
178 |         DataLoader: DataLoader for training data.
179 |         DataLoader: DataLoader for validation data.
180 |     """
181 |     print('----------------------------------')
182 |     print(f'Fold {i + 1}')
183 |     print('----------------------------------')
184 | 
185 |     # Create indices for training and validation data
186 |     list_train = np.arange(1, nb_samples + 1)
187 |     list_val = list_train[i:i + nb_samples // cv]
188 |     list_train = np.delete(list_train, np.arange(i, i + nb_samples // cv))
189 | 
190 |     # Split data into training and validation sets
191 |     data_train = data_train_raw[data_train_raw.ID.isin(list_train)]
192 |     data_val = data_train_raw[data_train_raw.ID.isin(list_val)]
193 | 
194 |     # Preprocess the data
195 |     X_train, y_train = utils_ft.seq_preprocess(data_train, sequence_length, seq_cols_in, seq_cols_out, type_set='Train')
196 |     X_val, y_val = utils_ft.seq_preprocess(data_val, sequence_length, seq_cols_in, seq_cols_out, type_set='Val')
197 | 
198 |     # Create TensorDatasets
199 |     train_dl = TensorDataset(X_train, torch.log(y_train + 500).to(device))
200 |     val_dl = TensorDataset(X_val, torch.log(y_val + 500).to(device))
201 | 
202 |     # Create DataLoaders
203 |     print(f'Creating data loaders with batch size: {bs}')
204 |     trn_dl = create_loaders(train_dl, bs, jobs=4)
205 |     val_dl = create_loaders(val_dl, bs, jobs=4)
206 | 
207 |     return trn_dl, val_dl
208 | 
209 | if __name__ == "__main__":
210 |     # Parse arguments for training
211 |     args = training_args()
212 |     print(args)
213 | 
214 |     # Set up various training configurations
215 |     nb_gauges = args.nb_gauges
216 |     device = torch.device('cuda') # Consider checking for CUDA availability
217 |     bs = args.batch_size
218 |     epochs = args.epochs
219 |     maxlen = args.maxlen
220 |     nb_samples = args.nb_samples
221 |     thinning = args.thinning
222 |     cv = args.cv
223 |     patience = 500  # Initial patience for early stopping
224 | 
225 |     # Set manual seeds for reproducibility
226 |     torch.manual_seed(0)
227 |     torch.cuda.manual_seed(0)
228 |     torch.backends.cudnn.benchmark = False
229 |     torch.backends.cudnn.deterministic = True
230 |     
231 |     # Change directory to data folder and load data
232 |     os.chdir("/home/anassakrim/FolderThesis/ProjectSSL/")
233 |     fd_data = 'data/'
234 |     data_train_raw = pd.read_pickle(fd_data + 'data_train_ft').reset_index().iloc[:, 1:]
235 |     data_test = pd.read_pickle(fd_data + 'data_test').reset_index().iloc[:, 1:]  
236 |     data_train = data_train_raw[data_train_raw.ID <= nb_samples]
237 | 
238 |     # Data preparation
239 |     seq_cols_in = ['gauge' + str(i + 1) for i in range(nb_gauges)]
240 |     seq_cols_out = ['RUL']
241 |     sequence_length = maxlen
242 | 
243 |     # Preprocess training data
244 |     tmp = data_train[seq_cols_in].values
245 |     trn_mean = tmp.mean(axis=0).reshape(1, -1)
246 |     trn_std = tmp.std(axis=0).reshape(1, -1)
247 |     trn_mean = torch.tensor(trn_mean).float()
248 |     trn_std = torch.tensor(trn_std).float()
249 |     bias = np.log(data_train[seq_cols_out].values + 500).mean()  # Bias for fine-tuning
250 | 
251 |     # Preprocess test set
252 |     X_test, y_test = utils_ft.seq_preprocess(data_test, sequence_length, seq_cols_in, seq_cols_out, type_set='Test')
253 |     y_test = y_test.to(device).reshape(-1)
254 |     X_test = X_test.to(device)
255 | 
256 |     # Prepare the model directory
257 |     fd = args.folder
258 |     dt = f"{datetime.datetime.now():%Y%h%d_%Hh%M}"
259 |     path_model = fd + "/L2_PT_Test_k_fold_" + dt + '_' + f"Finetune{nb_samples}"
260 |     os.makedirs(path_model, exist_ok=True)
261 |     dir_path = path_model + "/"
262 | 
263 |     # Load pre-trained model
264 |     model_path = args.path + 'model.pth'
265 |     it = 0
266 |     lr = args.lr
267 |     model = utils_ft.load_checkpoint(model_path, train=True)
268 |     optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)
269 | 
270 |     # Save model architecture and training configuration
271 |     with open(dir_path + "model_parameters.txt", "a") as f:
272 |         f.write(str(model.state_dict()))
273 | 
274 |     with open(dir_path + "log_loss.txt", "a") as f:
275 |         pass  # Currently empty, consider logging training progress here
276 | 
277 |     with open(dir_path + "args.txt", "w+") as f:
278 |         f.write(str(args))
279 | 
280 |     with open(dir_path + "optim.txt", "w+") as f:
281 |         f.write(str(optimizer))
282 | 
283 |     # Main training and evaluation loop
284 |     t0 = time.time()
285 | 
286 |     scores_val = []
287 |     scores_test = []
288 | 
289 |     for i in range(0, nb_samples, nb_samples // cv):
290 |         patience = 500
291 |         best_mape = float('inf')
292 |         saved_mape_test = float('inf')
293 | 
294 |         model = utils_ft.load_checkpoint(model_path, train=True)
295 |         model, optimizer = configure_model(model, args, trn_std, trn_mean, bias, lr, device)
296 |         model.to(device)
297 |         criterion = nn.MSELoss().to(device)
298 | 
299 | 
300 |         # Prepare DataLoader for both training and validation datasets
301 |         trn_dl, val_dl = prepare_data_for_fold(data_train_raw, it, cv, nb_samples, 
302 |                                                sequence_length, seq_cols_in, seq_cols_out, bs, device)
303 |         trn_mape_track = []
304 |         val_mape_track = []
305 | 
306 |         # Save model information
307 |         infos_model = (
308 |             f'Number of training aircraft components: {len(np.unique(data_train.ID))} '
309 |             f'\\ Sequence length: {maxlen} '
310 |             f'\\ Number of training samples: {X_train.shape[0]} '
311 |             f'\\ Number of epochs: {args.epochs} '
312 |             f'\\ Optimizer learning rate: {args.lr} '
313 |             f'\\ Running time in minutes: {(time.time() - t0) / 60} '
314 |             f'\\ Number of model parameters: {model.number_of_parameters()}'
315 |         )
316 |         with open(os.path.join(dir_path, "training_readme.txt"), "w+") as f:
317 |             f.write(infos_model)
318 |         
319 |         # Move training and validation data to the specified device
320 |         y_train = y_train.to(device).reshape(-1)
321 |         y_val = y_val.to(device).reshape(-1)
322 |         X_train = X_train.to(device)
323 |         X_val = X_val.to(device)
324 |         
325 |         # Iterate over a range of learning rates
326 |         it_lr = 0
327 |         for l_r in [args.lr / (10 ** p) for p in range(3)]:
328 |             patience = 500  # Reset patience variable for early stopping
329 |             if it_lr != 0:  # For second fold and beyond
330 |                 print("Load the model...")
331 |                 with open(os.path.join(dir_path, "log_loss.txt"), "a") as f:
332 |                     f.write("Load the model...\n")
333 |         
334 |                 model = utils_ft.load_checkpoint(os.path.join(dir_path, PATH), train=True)
335 |                 # Freeze or unfreeze model layers based on arguments
336 |                 if args.freeze == 'freeze' and args.reset == 'no':
337 |                     utils_ft.set_parameter_requires_grad(model, True)  # Freeze all weights
338 |                     for param in model.fc.parameters():
339 |                         param.requires_grad = True  # Unfreeze weights of the final layer
340 |         
341 |                 model.to(device)
342 |                 checkpoint = torch.load(os.path.join(dir_path, PATH))
343 |                 optimizer = optim.Adam(model.parameters(), lr=l_r, weight_decay=1e-6)
344 |                 optimizer.load_state_dict(checkpoint['optimizer_dic'])
345 |                 update_lr(optimizer, l_r)
346 |         
347 |             it_lr += 1
348 |         
349 |             print(f'Learning rate adjusted to {optimizer.param_groups[0]["lr"]:.7f}')
350 |             with open(os.path.join(dir_path, "log_loss.txt"), "a") as f:
351 |                 f.write(f"Begin training.\nLearning rate adjusted to {optimizer.param_groups[0]['lr']:.7f}\n")
352 |         
353 |             # Initialize progress bar for training epochs
354 |             pbar = trange(args.epochs, unit="epoch")
355 |             for epoch in range(args.epochs):
356 |                 # Training step
357 |                 time.sleep(0.1)
358 |                 model.train()
359 |                 t1 = time.time()
360 |                 loss = 0
361 |                 mape_loss = 0
362 |                 for i, data in enumerate(trn_dl):   
363 |                     X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
364 |                     optimizer.zero_grad()
365 |                     y_train_pred = model(X_train_batch)[0][:,-1,:].reshape(-1)
366 |                     mse_loss = criterion(y_train_pred, y_train_batch)
367 |                     mse_loss.backward()
368 |                     optimizer.step()
369 | 
370 |                # Model Evaluation
371 |                 model.eval()
372 |                 with torch.no_grad():
373 |                     # Evaluate on training data
374 |                     pred_train = model(X_train)[0][:, -1, :].reshape(-1)
375 |                     pred_train = torch.exp(pred_train) - 500  # Inverse transform
376 |                     train_mape = torch.mean(torch.abs((pred_train - y_train) / y_train).masked_fill(torch.isinf(pred_train - y_train), 0))
377 |                 
378 |                     # Evaluate on validation data
379 |                     pred_val = model(X_val)[0][:, -1, :].reshape(-1)
380 |                     pred_val = torch.exp(pred_val) - 500
381 |                     val_mape = torch.mean(torch.abs((pred_val - y_val) / y_val).masked_fill(torch.isinf(pred_val - y_val), 0))
382 |                 
383 |                     # Evaluate on test data
384 |                     pred_test = model(X_test)[0][:, -1, :].reshape(-1)
385 |                     pred_test = torch.exp(pred_test) - 500
386 |                     test_mape = torch.mean(torch.abs((pred_test - y_test) / y_test))
387 |                 
388 |                 # Update progress bar and log results
389 |                 pbar.set_description(f'Epoch {epoch + 1}/{args.epochs}')
390 |                 pbar.set_postfix_str(f'Train MAPE {train_mape:.2%}, Val MAPE {val_mape:.2%}, Test MAPE {test_mape:.2%}, Best MAPE {best_mape:.2%}, Saved Test MAPE {saved_mape_test:.2%}, Patience {patience}')
391 |                 with open(os.path.join(dir_path, "log_loss.txt"), "a") as f:
392 |                     f.write(f'Epoch {epoch + 1}/{args.epochs}, Train MAPE {train_mape:.2%}, Val MAPE {val_mape:.2%}, Test MAPE {test_mape:.2%}, Best MAPE {best_mape:.2%}, Saved Test MAPE {saved_mape_test:.2%}, Patience {patience}\n')
393 |                 
394 |                 # Checkpointing and Early Stopping
395 |                 if epoch % 100 == 0:
396 |                     torch.cuda.empty_cache()
397 |                 if val_mape < best_mape:
398 |                     best_mape = val_mape
399 |                     saved_mape_test = test_mape
400 |                     patience = 500
401 |                     with open(os.path.join(dir_path, "log_loss.txt"), "a") as f:
402 |                         f.write("Save the model...\n")
403 |                     checkpoint = {'model': model, 'mape': trn_mape_track, 'val_mape': val_mape_track, 'state_dict': model.state_dict(), 'optimizer_dic': optimizer.state_dict(), 'lr': lr}
404 |                     torch.save(checkpoint, os.path.join(dir_path, PATH))
405 |                 else:
406 |                     patience -= 1
407 |                 if patience == 0:
408 |                     break
409 |                 
410 |                 # Plotting
411 |                 plt.figure(figsize=(22, 12))
412 |                 plt.plot(trn_mape_track, label='Training set')
413 |                 plt.plot(val_mape_track, label=f'Val set with best MAPE = {best_mape:.2%}')
414 |                 plt.grid()
415 |                 plt.legend()
416 |                 plt.title(f'{nb_samples} training structures, Fold {it}, Test set MAPE {saved_mape_test:.2%}')
417 |                 plt.xlabel('Epoch')
418 |                 plt.ylabel('MAPE (%)')
419 |                 plt.savefig(os.path.join(dir_path, f'Fold{it}.jpg'))
420 |                 plt.close()
421 |                 
422 |                 # Update scores
423 |                 scores_val.append(best_mape.cpu().numpy())
424 |                 scores_test.append(saved_mape_test)
425 |                 
426 |                 # Clean up
427 |                 del model
428 |                 torch.cuda.empty_cache()
429 | 
430 | 
431 |     scores_val = np.array(scores_val)
432 |     scores_test = np.array(scores_test)
433 |     mn_val, std_val = scores_val.mean(), scores_val.std()
434 |     mn_test, std_test = scores_test.mean(), scores_test.std()
435 | 
436 |     with open(dir_path + "log_loss.txt", "a") as f:
437 |         # Logic for logging fold-wise and overall performance
438 | 
439 |     print("Training completed.")
440 | 
441 |     
442 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # 🚀 Deep Self-Supervised Learning for Remaining Useful Life Prediction
 2 | 
 3 | ## Project Introduction
 4 | This repository showcases a cutting-edge approach to Remaining Useful Life (RUL) prediction, utilizing Deep Self-Supervised Learning. This innovative method represents a paradigm shift in Machine Learning, enabling AI systems to extract meaningful insights from available unlabelled data, without the need for externally provided annotations. This research addresses a critical challenge in predictive maintenance, particularly in environments where labeled data is scarce or difficult/expensive to obtain.
 5 | 
 6 | ## 📖 Detailed Description
 7 | 
 8 | ### Background
 9 | Initiated as part of the PREDICT project, this research represents a collaborative effort between experts at the University of Toulouse, including Institut Clément Ader and ISAE-SUPAERO DISC. The project focuses on leveraging the untapped potential of self-supervised learning in the domain of Prognostics and Health Management (PHM). Specifically, it aims to demonstrate the efficacy of pre-training Deep Learning models on large volumes of unlabeled sensor data and applying them to PHM tasks like RUL estimation, even with minimal labeled data availability.
10 | 
11 | ### Research Focus
12 | The core challenge tackled here is the scarcity of data in fatigue damage prognostics. The project's ambition is to accurately estimate the Remaining Useful Life of critical components, such as aluminum panels commonly used in aerospace structures, which are prone to fatigue cracks. To achieve this, the research utilizes strain gauge data, a type of data that presents unique challenges due to its nature and collection methods.
13 | 
14 | 
15 | ### Dataset Composition
16 | A synthetic dataset forms the backbone of this research. It is strategically divided into two key components:
17 | - A large, unlabeled dataset comprising strain gauge readings from structures prior to failure, used for the initial phase of model pre-training.
18 | - A smaller, labeled dataset containing strain gauge data up to the point of structural failure, utilized for subsequent fine-tuning of the models.
19 | 
20 | ### Contribution and Citation
21 | The findings and methodologies developed in this project could be invaluable to researchers and practitioners in the field. Those who find this repository beneficial for their work are encouraged to cite the published research:
22 | 
23 | ```
24 | @article{akrim2023self,
25 |   title={Self-Supervised Learning for data scarcity in a fatigue damage prognostic problem},
26 |   author={Akrim, Anass and Gogu, Christian and Vingerhoeds, Rob and Sala{\"u}n, Michel},
27 |   journal={Engineering Applications of Artificial Intelligence},
28 |   volume={120},
29 |   pages={105837},
30 |   year={2023},
31 |   publisher={Elsevier}}
32 | ``` 
33 | 
34 | ## 🙏 Acknowledgements
35 | 
36 | ◦ This work was partially funded by Occitanie region under the Predict project. This funding is gratefully acknowledged. 
37 | 
38 | ◦ This work has been carried out on the supercomputers PANDO (ISAE Supaero, Toulouse) and Olympe (CALMIP, Toulouse, project n°21042). Authors are grateful to ISAE Supaero and CALMIP for the hours allocated to this project.
39 | 


--------------------------------------------------------------------------------
/data/args.txt:
--------------------------------------------------------------------------------
1 | {'a0_mean': 0.0005, 'a0_std': 0.00025, 'C_mean': 1e-10, 'C_std': 4.998750156230471e-11, 'm_mean': 3.4, 'm_std': 0.25}


--------------------------------------------------------------------------------
/data/data_test:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ansak95/DeepSSL/bfc73ece3975e85bf14bdb3f97313f1b198b66a1/data/data_test


--------------------------------------------------------------------------------
/data/raw_data_train_complete:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:08ff7c3742949fb1b16c43557408c811bcb0d0813140024da7b486e084f3a349
3 | size 229112426
4 | 


--------------------------------------------------------------------------------
/models/GRU_Decoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | from torch.nn.modules.normalization import LayerNorm
  7 | #from torchviz import make_dot
  8 | from torch.autograd import Variable
  9 | from torch.nn.modules import ModuleList
 10 | import copy
 11 | 
 12 | 
 13 | import numpy as np
 14 | import os
 15 | from tqdm import tqdm_notebook, trange
 16 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 17 | 
 18 | 
 19 | 
 20 | class GRU_Decoder(nn.Module):
 21 |     def __init__(self,  input_dim, emb_dim, hidden_dim, output_dim, n_layers, drop_prob, mean_val, std_val, criterion, init_bias, length_seq):
 22 |         super(GRU_Decoder, self).__init__()
 23 |         self.hidden_dim = hidden_dim
 24 |         self.n_layers = n_layers
 25 |         self.output_dim = output_dim
 26 |         self.length_seq = length_seq
 27 |         
 28 |         
 29 |         self.encoder = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device)
 30 |         self.emb = nn.Linear(input_dim, emb_dim).to(device)
 31 |         self.fc = nn.Linear(hidden_dim, output_dim).to(device)
 32 |         
 33 |         self.criterion = criterion.to(device)
 34 |         self.factor = -1
 35 |         self.act = nn.SiLU()
 36 |         self.mean = mean_val.to(device)
 37 |         self.std = std_val.to(device)
 38 |         self.ft = False
 39 |         self.drop = nn.Dropout(0)
 40 |         self.norm = nn.LayerNorm(emb_dim, elementwise_affine=False)
 41 |         self.norm1 = nn.LayerNorm(hidden_dim, elementwise_affine=False)
 42 |         self.init_bias = init_bias
 43 |         self.init_weights()
 44 |     
 45 |     def init_weights(self):
 46 |         self.apply(self._init_weights)
 47 |         
 48 |     def _init_weights(self, module):
 49 |         if isinstance(module, (nn.Linear)) and (module.bias is not None) :#and ():
 50 |             nn.init.xavier_normal_(module.weight.data)
 51 |             if module.weight.shape[0] == self.output_dim : 
 52 |                 module.bias.data = module.bias.data + self.init_bias
 53 | 
 54 | 
 55 |         elif isinstance(module, nn.GRU)  :
 56 |             for layer_p in module._all_weights:
 57 |                 for p in layer_p:
 58 |                     if 'weight' in p:
 59 |                         nn.init.xavier_normal_(module.__getattr__(p))
 60 |                     
 61 |                          
 62 |     def transform_minmax(self, input) :
 63 |         return (input-self.mean)/self.std
 64 |     
 65 |     def invtransform_minmax(self, input) :
 66 |         return input*self.std+self.mean
 67 |     
 68 | 
 69 |     def forward(self, input, y = None):
 70 | 
 71 |         input = self.transform_minmax(input).to(device)       
 72 |         x = self.emb(input)
 73 |         x = self.norm(x)
 74 |         x = self.drop(x)
 75 |         
 76 |         memory, cn = self.encoder(x) 
 77 |         inp = x + self.norm1(memory)
 78 |         
 79 |         out = self.factor*self.act(inp)
 80 | 
 81 |         if self.ft == False :	
 82 |             out = self.fc(out[:,-self.length_seq:,:])
 83 |         else :
 84 |             out = self.fc(out,cn[-1:])
 85 |         
 86 |        
 87 |        
 88 |         if y != None :
 89 |             y = self.transform_minmax(y)
 90 |             loss = self.criterion(out,y)
 91 |             return out, loss, memory, cn, inp
 92 |         else : 
 93 |             return out, memory, cn,inp
 94 |         
 95 |     def train_model(self, loader, optimizer) :
 96 |         loss = 0   
 97 |         for i, data in enumerate(loader):   
 98 |             X_train_batch, y_train_batch = data[0].cuda(),data[1].cuda().float()  #torch.cuda.device_count()
 99 |             optimizer.zero_grad()
100 |             loss = self.forward(X_train_batch, y_train_batch)[1]#[0].reshape(-1)       
101 |             loss.backward()
102 |             optimizer.step()
103 |         
104 |     def eval_mape(self, loader) :
105 |         metric_mape = 0
106 |         with torch.no_grad() :
107 |             for i, data in enumerate(loader):   
108 |                 x, y = data[0].to(device),data[1].to(device).float()
109 |                 y_out = self.invtransform_minmax(self.forward(x)[0])
110 |                 metric_mape += torch.mean(torch.abs((y_out-y)/y)).item()
111 |         return metric_mape/(i+1)
112 |     
113 |     def number_of_parameters(self):
114 |         return(sum(p.numel() for p in self.parameters() if p.requires_grad))
115 | 
116 | 
117 | 
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/models/GRU_ED.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | from torch.nn.modules.normalization import LayerNorm
  7 | #from torchviz import make_dot
  8 | from torch.autograd import Variable
  9 | from torch.nn.modules import ModuleList
 10 | import copy
 11 | 
 12 | import numpy as np
 13 | import os
 14 | from tqdm import tqdm_notebook, trange
 15 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 16 | 
 17 | 
 18 | class GRU_ED(nn.Module):
 19 |     def __init__(self,  input_dim, emb_dim, hidden_dim, output_dim, n_layers, drop_prob, mean_val, std_val, criterion, init_bias, length_seq):
 20 |         super(GRU_ED, self).__init__()
 21 |         self.hidden_dim = hidden_dim
 22 |         self.n_layers = n_layers
 23 |         self.output_dim = output_dim
 24 |         self.length_seq = length_seq
 25 |         self.ft = False
 26 |         
 27 |         self.encoder = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device)
 28 |         
 29 |         self.decoder = nn.GRU(hidden_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device)
 30 |         self.emb = nn.Linear(input_dim, emb_dim).to(device)
 31 |         self.fc = nn.Linear(hidden_dim, output_dim).to(device)
 32 |         
 33 |         self.criterion = criterion.to(device)
 34 |         self.factor = -1
 35 |         self.act = nn.SiLU()
 36 |         self.mean = mean_val.to(device)
 37 |         self.std = std_val.to(device)
 38 | 
 39 |         self.drop = nn.Dropout(0)
 40 |         self.norm = nn.LayerNorm(emb_dim, elementwise_affine=False)
 41 |         self.norm1 = nn.LayerNorm(hidden_dim, elementwise_affine=False)
 42 |         self.norm2 = nn.LayerNorm(hidden_dim, elementwise_affine=False)
 43 |         self.init_bias = init_bias
 44 |         self.init_weights()
 45 |     
 46 |     def init_weights(self):
 47 |         self.apply(self._init_weights)
 48 |         
 49 |     def _init_weights(self, module):
 50 |         if isinstance(module, (nn.Linear)) and (module.bias is not None) :#and ():
 51 |             nn.init.xavier_normal_(module.weight.data)
 52 |             if module.weight.shape[0] == self.output_dim : 
 53 |                 module.bias.data = module.bias.data + self.init_bias
 54 | 
 55 | 
 56 |         elif isinstance(module, nn.GRU)  :
 57 |             for layer_p in module._all_weights:
 58 |                 for p in layer_p:
 59 |                     if 'weight' in p:
 60 |                         nn.init.xavier_normal_(module.__getattr__(p))
 61 |                     
 62 |                          
 63 |     def transform_minmax(self, input) :
 64 |         return (input-self.mean)/self.std
 65 |     
 66 |     def invtransform_minmax(self, input) :
 67 |         return input*self.std+self.mean
 68 |     
 69 | 
 70 |     def forward(self, input, y = None):
 71 | 
 72 |         input = self.transform_minmax(input).to(device)       
 73 |         
 74 |         #embed
 75 |         x = self.emb(input)
 76 |         x = self.norm(x)
 77 |         #x = self.drop(x)
 78 |         
 79 |         #encode
 80 |         memory, context = self.encoder(x) 
 81 |         z = self.act(x + self.norm1(memory)) #z
 82 |         
 83 |         
 84 |         if self.ft == False :
 85 |           #decode
 86 |           out, cn = self.decoder(z,context)
 87 |           out = self.act(z + self.norm2(out))
 88 |           
 89 |           #linear layer
 90 |           out = self.fc(self.factor*out)
 91 |         else :
 92 |           out = self.fc(z, context[-1:])
 93 |         
 94 |        
 95 |        
 96 |         if y != None :
 97 |             y = self.transform_minmax(y)
 98 |             loss = self.criterion(out,y)
 99 |             return out, loss, memory, context, z
100 |         else : 
101 |             return out, memory,context, z
102 |         
103 |     def train_model(self, loader, optimizer) :
104 |         loss = 0   
105 |         for i, data in enumerate(loader):   
106 |             X_train_batch, y_train_batch = data[0].cuda(),data[1].cuda().float()  #torch.cuda.device_count()
107 |             optimizer.zero_grad()
108 |             loss = self.forward(X_train_batch, y_train_batch)[1]#[0].reshape(-1)       
109 |             loss.backward()
110 |             optimizer.step()
111 |         
112 |     def eval_mape(self, loader) :
113 |         metric_mape = 0
114 |         with torch.no_grad() :
115 |             for i, data in enumerate(loader):   
116 |                 x, y = data[0].to(device),data[1].to(device).float()
117 |                 y_out = self.invtransform_minmax(self.forward(x)[0])
118 |                 metric_mape += torch.mean(torch.abs((y_out-y)/y)).item()
119 |         return metric_mape/(i+1)
120 |     
121 |     def number_of_parameters(self):
122 |         return(sum(p.numel() for p in self.parameters() if p.requires_grad))
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 


--------------------------------------------------------------------------------
/models/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/models/VGRU_ED.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import math
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torch.optim as optim
  6 | from torch.nn.modules.normalization import LayerNorm
  7 | #from torchviz import make_dot
  8 | from torch.autograd import Variable
  9 | from torch.nn.modules import ModuleList
 10 | import copy
 11 | 
 12 | import numpy as np
 13 | import os
 14 | from tqdm import tqdm_notebook, trange
 15 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 16 | 
 17 | 
 18 | class VGRU_ED(nn.Module):
 19 |     def __init__(self,  input_dim, emb_dim, hidden_dim, output_dim, n_layers, drop_prob, mean_val, std_val, criterion, init_bias, length_seq, weight_kl = 5e-4):
 20 |         super(VGRU_ED, self).__init__()
 21 |         self.hidden_dim = hidden_dim
 22 |         self.n_layers = n_layers
 23 |         self.output_dim = output_dim
 24 |         self.length_seq = length_seq
 25 |         self.ft = False
 26 |         self.w = weight_kl
 27 |         
 28 |         self.encoder = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device)
 29 |         self.fc_enc_mu = nn.Sequential(nn.Linear(64,64),
 30 |               nn.Dropout(0.1),
 31 |               nn.LayerNorm(64, elementwise_affine=False),                      
 32 |               nn.GELU(),
 33 |               nn.Linear(64,64)).to(device)
 34 |         
 35 |         self.fc_enc_logvar = nn.Sequential(nn.Linear(64,64),
 36 |               nn.Dropout(0.1),
 37 |               nn.LayerNorm(64, elementwise_affine=False),                      
 38 |               nn.GELU(),
 39 |               nn.Linear(64,64)).to(device)
 40 |         
 41 |         
 42 |         self.decoder = nn.GRU(hidden_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device)
 43 |         self.emb = nn.Linear(input_dim, emb_dim).to(device)
 44 |         self.fc = nn.Linear(hidden_dim, output_dim).to(device)
 45 |         
 46 |         self.criterion = criterion.to(device)
 47 |         self.factor = -1
 48 |         self.act = nn.SiLU()
 49 |         self.mean = mean_val.to(device)
 50 |         self.std = std_val.to(device)
 51 | 
 52 |         self.drop = nn.Dropout(0)
 53 |         self.norm = nn.LayerNorm(emb_dim, elementwise_affine=False)
 54 |         self.norm1 = nn.LayerNorm(hidden_dim, elementwise_affine=False)
 55 |         self.norm2 = nn.LayerNorm(hidden_dim, elementwise_affine=False)
 56 |         self.init_bias = init_bias
 57 |         self.init_weights()
 58 |     
 59 |     def init_weights(self):
 60 |         self.apply(self._init_weights)
 61 |         
 62 |     def _init_weights(self, module):
 63 |         if isinstance(module, (nn.Linear)) and (module.bias is not None) :#and ():
 64 |             nn.init.xavier_normal_(module.weight.data)
 65 |             if module.weight.shape[0] == self.output_dim : 
 66 |                 module.bias.data = module.bias.data + self.init_bias
 67 | 
 68 | 
 69 |         elif isinstance(module, nn.GRU)  :
 70 |             for layer_p in module._all_weights:
 71 |                 for p in layer_p:
 72 |                     if 'weight' in p:
 73 |                         nn.init.xavier_normal_(module.__getattr__(p))
 74 |                     
 75 |                          
 76 |     def transform_minmax(self, input) :
 77 |         return (input-self.mean)/self.std
 78 |     
 79 |     def invtransform_minmax(self, input) :
 80 |         return input*self.std+self.mean
 81 |     
 82 |     def reparameterize(self, mu, log_var):
 83 |         # std can not be negative, thats why we use log variance
 84 |         sigma = torch.exp(0.5 * log_var) + 1e-5
 85 |         eps = torch.randn_like(sigma)
 86 |         return mu + sigma * eps
 87 |         
 88 |     def forward(self, input, y = None):
 89 | 
 90 |         input = self.transform_minmax(input).to(device)       
 91 |         
 92 |         #embed
 93 |         x = self.emb(input)
 94 |         x = self.norm(x)
 95 |         #x = self.drop(x)
 96 |         
 97 |         #encode
 98 |         memory, context = self.encoder(x) 
 99 |         memory = self.act(x + self.norm1(memory)) #z
100 |         
101 |         # Split the result embedding into mu and var components
102 |         # of the latent Gaussian distribution
103 |         mu = self.fc_enc_mu(memory)
104 |         log_var = self.fc_enc_logvar(memory)
105 |         
106 |         #compute the latent embedding
107 |         z = self.reparameterize(mu, log_var)
108 |         
109 |         
110 |         if self.ft == False :
111 |           if self.train :
112 |             #decode
113 |             out, cn = self.decoder(z,context)
114 |             out = self.act(z + self.norm2(out))
115 |             
116 |           else :
117 |             #decode
118 |             out, cn = self.decoder(mu,context)
119 |             out = self.act(mu + self.norm2(out))
120 |           
121 |           #linear layer
122 |           out = self.fc(self.factor*out)
123 |         
124 |         else :
125 |           out = self.fc(mu, context[-1:]) #keeps only the mean, not the latent embedding
126 |         
127 |        
128 |         if y != None :
129 |             y = self.transform_minmax(y)
130 |             reconstruction_error = self.criterion(out,y)
131 |             kl_divergence = (-0.5 * torch.sum(1 + log_var - mu**2 - log_var.exp()))
132 |             loss = (reconstruction_error + self.w*kl_divergence).sum()
133 |             return out, loss, memory, mu, z
134 |         else : 
135 |             return out, memory, mu, z
136 |         
137 |     def train_model(self, loader, optimizer) :
138 |         loss = 0   
139 |         for i, data in enumerate(loader):   
140 |             X_train_batch, y_train_batch = data[0].cuda(),data[1].cuda().float()  #torch.cuda.device_count()
141 |             optimizer.zero_grad()
142 |             loss = self.forward(X_train_batch, y_train_batch)[1]#[0].reshape(-1)       
143 |             loss.backward()
144 |             optimizer.step()
145 |         
146 |     def eval_mape(self, loader) :
147 |         metric_mape = 0
148 |         with torch.no_grad() :
149 |             for i, data in enumerate(loader):   
150 |                 x, y = data[0].to(device),data[1].to(device).float()
151 |                 y_out = self.invtransform_minmax(self.forward(x)[0])
152 |                 metric_mape += torch.mean(torch.abs((y_out-y)/y)).item()
153 |         return metric_mape/(i+1)
154 |     
155 |     def number_of_parameters(self):
156 |         return(sum(p.numel() for p in self.parameters() if p.requires_grad))
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 


--------------------------------------------------------------------------------
/training_GRU_Decoder_t1.py:
--------------------------------------------------------------------------------
  1 | # import deepspeed
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | import pickle
  6 | 
  7 | from torch.utils.data import TensorDataset, DataLoader
  8 | 
  9 | import pandas as pd
 10 | 
 11 | import datetime 
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | from IPython.display import display
 15 | 
 16 | import os
 17 | import argparse
 18 | import random
 19 | import tqdm
 20 | import time
 21 | import numpy as np
 22 | import torch
 23 | import torch.optim as optim
 24 | from torch.nn import functional as F
 25 | from torch.utils.data import DataLoader, Dataset
 26 | 
 27 | 
 28 | import torch.nn.functional as nnf
 29 | import torch.nn.functional as F
 30 | from torch.nn.modules import ModuleList
 31 | from torch.nn.modules.normalization import LayerNorm
 32 | from torch.cuda.amp import autocast
 33 | from GRU_Decoder import GRU_Decoder
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 
 42 | def training_args():
 43 |     parser=argparse.ArgumentParser(description='GRU')
 44 |     
 45 |     parser.add_argument('--timestep', default=1, type=int,
 46 |                         help='Pred timestep')	
 47 |     parser.add_argument('--num_gpus', default=1, type=int,
 48 |                         help='nb_gpus')
 49 |     parser.add_argument('--nlayers', default=4, type=int,
 50 |                         help='Number of Layers (default: 2)')
 51 |     parser.add_argument('-b', '--batch_size', default=4096, type=int,
 52 |                         help='mini-batch size (default: 4096)')
 53 |     parser.add_argument('-e', '--epochs', default=10, type=int,
 54 |                         help='number of total epochs (default: 30)')
 55 |     parser.add_argument('--hidden_size', default=64, type=int,
 56 |                         help='Nb_neurons (default: 64)')
 57 |     
 58 |     parser.add_argument('--device', default=0, type=int,
 59 |                         help='which device')
 60 |     
 61 |     parser.add_argument('--maxlen', default=30, type=int,
 62 |                         help='Windows length (default : 30)')
 63 |     parser.add_argument('--timestep_pred', default=1, type=int,
 64 |                         help='Pred sequence length (default : 1)')
 65 |     
 66 |     parser.add_argument('--ratio', default=1, type=float,
 67 |                         help='Ratio sequence (default: 1)')
 68 |     parser.add_argument('--drop', default=0.1, type=float,
 69 |                         help='Dropout (default: 0.1)')
 70 |     
 71 |     
 72 |     #     parser = deepspeed.add_config_arguments(parser)
 73 |     args=parser.parse_args()
 74 |     return args
 75 | 
 76 | # constants
 77 | 
 78 | args = training_args()
 79 | print(args)
 80 | # cmd_args = add_argument()
 81 | nb_gauges = 3
 82 | 
 83 | 
 84 | device = torch.device('cuda')#torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 85 | 
 86 | bs = args.batch_size
 87 | epochs = args.epochs
 88 | maxlen = args.maxlen
 89 | 
 90 | 
 91 | 
 92 | import os
 93 | fd_data = os.path.split(os.getcwd())[0] ##+ '/Data_'+ str(codebook_size) +'Clusters' 
 94 | 
 95 | df = pd.read_pickle(fd_data + '/raw_data_train_complete').reset_index().iloc[:,1:]
 96 | data_train = df[(df.ID<=95) & (df.cycle != -1)]
 97 | data_val = df[(df.ID>9995) & (df.cycle != -1)].reset_index()
 98 | 
 99 | 
100 | 
101 | from torch.utils.data import TensorDataset, DataLoader
102 | 
103 | seq_cols =  ['gauge'+ str(i+1) for i in range(3)]
104 | sequence_length = 30
105 | timesteps_pred = args.timestep_pred
106 | 
107 | def gen_sequence_autoregressive(id_df, seq_length, seq_cols,timesteps_pred,h, ratio = 1):
108 |     
109 |     ind_start = 0
110 |     data_array = id_df[seq_cols].values
111 |     th = int(ratio*data_array.shape[0])
112 |     data_array = data_array[:th]
113 |     num_elements = data_array.shape[0]
114 |     
115 |     for start, stop in zip(range(0+ind_start, num_elements-seq_length+1-timesteps_pred), range(seq_length+ind_start, num_elements+1-timesteps_pred)):
116 |         yield data_array[start+h:stop+h, :]#,data_array[start:stop, :])
117 |       
118 | 
119 | def autoregressive_preprocess(data, sequence_length, seq_cols, timestep_pred, type_set = 'float', ratio = 1) :
120 |     
121 |     seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 
122 |                    for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length)
123 |     # generate sequences and convert to numpy array
124 |     dbX = np.concatenate(list(seq_gen))#[:,:,:1]
125 |     
126 |     seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = timestep_pred, ratio = ratio)) 
127 |                    for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length)
128 |     # generate sequences and convert to numpy array
129 |     dbY = np.concatenate(list(seq_gen))#[:,:,:1]
130 |     dbY = dbY[:,-timestep_pred:,:]
131 |     
132 |     print(dbX.shape)
133 |     print(dbY.shape)
134 |     
135 |     
136 |     
137 |     print('Preparing datasets')
138 |     if type_set =='float' :
139 |         X = torch.tensor(dbX, dtype=torch.float)#.to(device)
140 |         Y = torch.tensor(dbY, dtype=torch.float)#.to(device)
141 |     elif type_set =='long' :
142 |         X = torch.tensor(dbX, dtype=torch.long)#.to(device)
143 |         Y = torch.tensor(dbY, dtype=torch.long)#.to(device)
144 | 
145 |     return TensorDataset(X, Y), X, Y#, dbY.mean(0), dbY.std(0)
146 | 
147 | from torch.utils.data import TensorDataset, DataLoader
148 | def create_loaders(data, bs=512, jobs=0):
149 |     data = DataLoader(data, bs, shuffle=True, num_workers=jobs, pin_memory = True)
150 |     return data
151 | for rt in [60, 70, 80, 90] :
152 | 
153 |     
154 |     
155 |     train_dl, X_train, y_train = autoregressive_preprocess(data_train, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 
156 |     val_dl,  X_val,y_val  = autoregressive_preprocess(data_val, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 
157 |     
158 |     X_trn_full = torch.cat([X_train,X_val],0)
159 |     y_trn_full = torch.cat([y_train,y_val],0)
160 |     full_train_dl = TensorDataset(X_trn_full, y_trn_full)
161 |     
162 |     tmp = X_trn_full[:,-1,:]#.values
163 |     #trn_min = tmp.min(axis=0).reshape(1,-1)#[0]
164 |     #trn_max = tmp.max(axis=0).reshape(1,-1)#[0]
165 |     trn_mean = tmp.mean(axis=0).reshape(1,-1)#[0]
166 |     trn_std = tmp.std(axis=0).reshape(1,-1)#[0]
167 |     print(trn_mean)
168 |     print(trn_std)
169 |     
170 |     bs = args.batch_size
171 |     trn_dl = create_loaders(train_dl, bs, jobs=1)
172 |     val_dl = create_loaders(val_dl, 4096, jobs=1)
173 |     
174 |     
175 |     
176 |     def update_lr(optimizer, lr):
177 |         for g in optimizer.param_groups:
178 |             g['lr'] = lr
179 |     
180 |     import time
181 |        
182 |         
183 |     
184 |     
185 |     hidden_size = args.hidden_size
186 |     nlayers = args.nlayers
187 |     embedding_size = args.hidden_size
188 |     dropout = args.drop
189 |     
190 |     def load_checkpoint(filepath, train = False):
191 |         checkpoint = torch.load(filepath)
192 |         model = checkpoint['model']
193 |         model.load_state_dict(checkpoint['state_dict'])
194 |         
195 |         if train :
196 |             for parameter in model.parameters():
197 |                 parameter.requires_grad = True
198 |             model.train()
199 |         else :
200 |             for parameter in model.parameters():
201 |                 parameter.requires_grad = False
202 |             model.eval()
203 |         return model
204 |     
205 |     criterion = nn.MSELoss()
206 |     trn_mean = torch.tensor(trn_mean).float()#.to(device).float()
207 |     trn_std = torch.tensor(trn_std).float()
208 |     bias = torch.tensor([torch.mean((y_train[:,:,k]-trn_mean[0,k])/trn_std[0,k]) for k in range(3)]).to(device)
209 |     model = GRU_Decoder(input_dim=3,emb_dim = hidden_size, hidden_dim=hidden_size, output_dim=3, n_layers=nlayers, drop_prob=dropout, mean_val = trn_mean, std_val = trn_std, criterion = criterion, init_bias = bias, length_seq = args.timestep_pred)
210 |     nb_params = model.number_of_parameters()
211 |     print(nb_params)
212 |     lr = 1e-2
213 |     optimizer = optim.Adam(model.parameters(), lr=lr)#, betas=(0.9, 0.95), eps=1e-08)
214 |     # criterion = nn.MS#nn.CrossEntropyLoss(weight = class_weights).to(device)
215 |       
216 |         
217 |     if torch.cuda.device_count() > 1:
218 |         print("Let's use", torch.cuda.device_count(), "GPUs!")
219 |         model = nn.DataParallel(model)        
220 |     model.to(device)
221 |     
222 |     
223 |     
224 |     #create folder
225 |     dir_path = f"Decoder_t1_{rt}_100"#folder_models + '/'
226 |     os.makedirs(dir_path)
227 |     dir_path = dir_path + '/'
228 |     
229 |     #save the model architecture
230 |     f = open(dir_path+"model_parameters.txt", "a")
231 |     f.write(str(model.state_dict))
232 |     f.close()
233 |     
234 |     # #save the log 
235 |     f = open(dir_path+"log_loss.txt", "a")
236 |     # f.write(str(model.state_dict))
237 |     f.close()
238 |     
239 |     PATH = "model.pth"
240 |     
241 |     #save the args
242 |     f = open(dir_path+"args.txt", "w+")
243 |     f.write(str(args))
244 |     f.close()
245 |     
246 |     #which optimizer
247 |     f = open(dir_path+"optim.txt", "w+")
248 |     f.write(str(optimizer))
249 |     f.close()                    
250 |     
251 |     t0 = time.time()
252 |     
253 |     
254 |     all_trn_mape_track = []
255 |     all_val_mape_track = []
256 |     
257 |     trn_mape_track = []
258 |     val_mape_track = []
259 |     
260 |     
261 |     j = 0  #indicator used to load the model
262 |     k = 0
263 |     best_mape = 10000
264 |     step = 0
265 |     epoch_stop = np.zeros(3)
266 |     #save some useful informations
267 |     infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\  Sequence length : {maxlen} \
268 |     \\  Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \
269 |     \\ Optimizer learning rate : {lr} \\  Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 
270 |     f = open(dir_path+"training_readme.txt", "w+")
271 |     f.write(infos_model)
272 |     f.close()
273 |     
274 |     
275 |     # instantiate model
276 |     torch.manual_seed(7)
277 |     torch.cuda.manual_seed(7)
278 |     for l_r in [lr, 1e-3, 1e-4] :
279 |         
280 |         
281 |         
282 |         if j != 0 :
283 |             f = open(dir_path+"log_loss.txt", "a")
284 |             f.write("Load the model...")
285 |             f.write("\n")
286 |             f.close()
287 |             
288 |             model = load_checkpoint(dir_path+PATH, train = True)
289 |             
290 |             if torch.cuda.device_count() > 1:
291 |                 print("Let's use", torch.cuda.device_count(), "GPUs!")
292 |                 model = nn.DataParallel(model)        
293 |             model.to(device)
294 |             
295 |          
296 |             
297 |             checkpoint = torch.load(dir_path+PATH)
298 |             optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08)
299 |             optimizer.load_state_dict(checkpoint['optimizer_dic'])
300 |             update_lr(optimizer, l_r)
301 |             best_mape = checkpoint['best_mape']   
302 |             trn_mape_track = checkpoint['mape']
303 |             val_mape_track = checkpoint['val_mape']
304 |             epoch_stop = checkpoint['epoch_stop']
305 |     
306 |         j = j+1
307 |         
308 |         # TRAINING    
309 |         f = open(dir_path+"log_loss.txt", "a")
310 |         f.write("Begin training." + "\n")
311 |         f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr']))
312 |         f.write("\n")
313 |         f.close()
314 |          
315 |         
316 |         
317 |         
318 |         for epoch in range(args.epochs):
319 |             model.train()
320 |         #         patience = patience-1
321 |             t1 = time.time()
322 |             loss = 0
323 |             trn_mape = 0
324 |     
325 |     
326 |             for i, data in enumerate(trn_dl):   
327 |          
328 |             
329 |                 X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
330 |                 optimizer.zero_grad()
331 |                 loss = model(X_train_batch, y_train_batch)[1]
332 |                 loss.backward()
333 |                 optimizer.step()
334 |     
335 |     
336 |     
337 |             # Eval phase  
338 |             model.eval()
339 |             with torch.no_grad() :
340 |                 train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
341 |                 trn_mape_track.append(train_mape)
342 |                 all_trn_mape_track.append(train_mape)
343 |                 
344 |                 
345 |                 val_mape = model.eval_mape(val_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
346 |                 val_mape_track.append(val_mape)
347 |                 all_val_mape_track.append(val_mape)
348 |     
349 |     
350 |             f = open(dir_path+"log_loss.txt", "a")
351 |             f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}, val mape : {val_mape:2.2%}')
352 |             f.write("\n")
353 |             f.close()
354 |             
355 |             
356 |             if val_mape < best_mape :
357 |                 #trials = 0
358 |                 best_mape = val_mape#.item()
359 |                 epoch_stop[j:] = k
360 |                 
361 |                 f = open(dir_path+"log_loss.txt", "a")
362 |                 f.write(f'Epoch {epoch+1} best model saved with mape: {val_mape:2.2%}')
363 |                 f.write("Save the model...")
364 |                 f.write("\n")
365 |                 f.close()
366 |         
367 |                 checkpoint = {'model': model, 
368 |                               'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track,
369 |                       'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop,
370 |                       'optimizer_dic' : optimizer.state_dict()}
371 |                 torch.save(checkpoint, dir_path+PATH)
372 |                     
373 |             k = k+1        
374 |     
375 |     
376 |     
377 |     for l_r in [1e-5] :
378 |     
379 |         f = open(dir_path+"log_loss.txt", "a")
380 |         f.write("Load the model...")
381 |         f.write("\n")
382 |         f.close()
383 |         
384 |         model = load_checkpoint(dir_path+PATH, train = True)
385 |         
386 |         if torch.cuda.device_count() > 1:
387 |             print("Let's use", torch.cuda.device_count(), "GPUs!")
388 |             model = nn.DataParallel(model)        
389 |         model.to(device)
390 |         
391 |      
392 |         
393 |         checkpoint = torch.load(dir_path+PATH)
394 |         optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08)
395 |         optimizer.load_state_dict(checkpoint['optimizer_dic'])
396 |         update_lr(optimizer, l_r)
397 |         best_mape = 10000#checkpoint['best_mape']   
398 |         trn_mape_track = checkpoint['mape']
399 |         val_mape_track = checkpoint['val_mape']
400 |         epoch_stop = checkpoint['epoch_stop']
401 |     
402 |         j = j+1
403 |         
404 |         # TRAINING    
405 |         f = open(dir_path+"log_loss.txt", "a")
406 |         f.write("Begin training (full set)." + "\n")
407 |         f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr']))
408 |         f.write("\n")
409 |         f.close()
410 |          
411 |         print(f'Creating data loaders with batch size: {bs}')
412 |         trn_dl = create_loaders(full_train_dl, bs, jobs=1)#4*args.num_gpus)
413 |         #trn_dl_eval = create_loaders(full_train_dl, 4096*4, jobs=1)#4*args.num_gpus)
414 |         torch.cuda.empty_cache()  
415 |             
416 |             
417 |         for epoch in range(args.epochs):
418 |             model.train()
419 |             t1 = time.time()
420 |             loss = 0
421 |             trn_mape = 0
422 |     
423 |     
424 |             for i, data in enumerate(trn_dl):  
425 |                 X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
426 |                 optimizer.zero_grad()
427 |                 loss = model(X_train_batch, y_train_batch)[1]
428 |                 loss.backward()
429 |                 optimizer.step()
430 |             
431 |             model.eval()
432 |             with torch.no_grad() :
433 |                 train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
434 |             trn_mape_track.append(train_mape)
435 |             all_trn_mape_track.append(train_mape)
436 |             
437 |             
438 |             f = open(dir_path+"log_loss.txt", "a")
439 |             f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}')
440 |             f.write("\n")
441 |             f.close()
442 |             
443 |             if train_mape < best_mape :
444 |                 #trials = 0
445 |                 best_mape = train_mape#.item()
446 |                 epoch_stop[j:] = k
447 |                 
448 |                 f = open(dir_path+"log_loss.txt", "a")
449 |                 f.write(f'Epoch {epoch+1} best model saved with mape: {train_mape:2.2%}')
450 |                 f.write("Save the model...")
451 |                 f.write("\n")
452 |                 f.close()
453 |         
454 |                 checkpoint = {'model': model, 
455 |                               'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track,
456 |                       'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop,
457 |                       'optimizer_dic' : optimizer.state_dict()}
458 |                 torch.save(checkpoint, dir_path+PATH)
459 |             
460 |             
461 |     
462 |     
463 |     #save some useful informations
464 |     infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\  Sequence length : {maxlen} \
465 |     \\  Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\  Number of epochs : {k+1} \
466 |     \\ Optimizer learning rate : {lr} \\  Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 
467 |     f = open(dir_path+"training_readme.txt", "w+")
468 |     f.write(infos_model)
469 |     f.close()            
470 |     
471 |     #del model and empty cache
472 |     del(model)
473 |     torch.cuda.empty_cache()       
474 |     
475 |     
476 |     
477 | 
478 | 
479 | 
480 | 
481 | 
482 | 
483 | 
484 | 
485 | 
486 | 
487 | 
488 | 
489 | 
490 | 
491 | 
492 | 
493 | 
494 | 
495 | 
496 | 
497 | 
498 | 
499 | 
500 | 
501 | 
502 |         
503 | 


--------------------------------------------------------------------------------
/training_GRU_ED.py:
--------------------------------------------------------------------------------
  1 | # import deepspeed
  2 | import pickle
  3 | import pandas as pd
  4 | import datetime 
  5 | 
  6 | import matplotlib.pyplot as plt
  7 | from IPython.display import display
  8 | 
  9 | import os
 10 | import argparse
 11 | import random
 12 | import tqdm
 13 | import time
 14 | import numpy as np
 15 | import torch
 16 | import torch.optim as optim
 17 | from torch.nn import functional as F
 18 | from torch.utils.data import TensorDataset, DataLoader, Dataset
 19 | 
 20 | import torch.nn.functional as F
 21 | from torch.nn.modules import ModuleList
 22 | from torch.nn.modules.normalization import LayerNorm
 23 | from torch.cuda.amp import autocast
 24 | from GRU_ED import GRU_ED
 25 | 
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 | def training_args():
 32 |     parser=argparse.ArgumentParser(description='GRU')
 33 |     
 34 |     parser.add_argument('--timestep', default=1, type=int,
 35 |                         help='Pred timestep')	
 36 |     parser.add_argument('--num_gpus', default=1, type=int,
 37 |                         help='nb_gpus')
 38 |     parser.add_argument('--nlayers', default=4, type=int,
 39 |                         help='Number of Layers (default: 2)')
 40 |     parser.add_argument('-b', '--batch_size', default=4096, type=int,
 41 |                         help='mini-batch size (default: 4096)')
 42 |     parser.add_argument('-e', '--epochs', default=10, type=int,
 43 |                         help='number of total epochs (default: 30)')
 44 |     parser.add_argument('--hidden_size', default=64, type=int,
 45 |                         help='Nb_neurons (default: 64)')
 46 |     
 47 |     parser.add_argument('--device', default=0, type=int,
 48 |                         help='which device')
 49 |     
 50 |     parser.add_argument('--maxlen', default=30, type=int,
 51 |                         help='Windows length (default : 30)')
 52 |     parser.add_argument('--timestep_pred', default=1, type=int,
 53 |                         help='Pred sequence length (default : 1)')
 54 |     
 55 |     parser.add_argument('--ratio', default=1, type=float,
 56 |                         help='Ratio sequence (default: 1)')
 57 |     parser.add_argument('--drop', default=0.1, type=float,
 58 |                         help='Dropout (default: 0.1)')
 59 |     
 60 |     
 61 |     #     parser = deepspeed.add_config_arguments(parser)
 62 |     args=parser.parse_args()
 63 |     return args
 64 | 
 65 | # constants
 66 | args = training_args()
 67 | print(args)
 68 | # cmd_args = add_argument()
 69 | nb_gauges = 3
 70 | 
 71 | device = torch.device('cuda')#torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 72 | bs = args.batch_size
 73 | epochs = args.epochs
 74 | maxlen = args.maxlen
 75 | 
 76 | 
 77 | 
 78 | import os
 79 | fd_data = os.path.split(os.getcwd())[0] ##+ '/Data_'+ str(codebook_size) +'Clusters' 
 80 | 
 81 | df = pd.read_pickle(fd_data + '/raw_data_train_complete').reset_index().iloc[:,1:]
 82 | data_train = df[(df.ID<=95) & (df.cycle != -1)]
 83 | data_val = df[(df.ID>9995) & (df.cycle != -1)].reset_index()
 84 | 
 85 | 
 86 | # instantiate model
 87 | torch.manual_seed(7)
 88 | torch.cuda.manual_seed(7)
 89 | 
 90 | seq_cols =  ['gauge'+ str(i+1) for i in range(3)]
 91 | sequence_length = 30
 92 | timesteps_pred = args.timestep_pred
 93 | 
 94 | 
 95 | def gen_sequence_autoregressive(id_df, seq_length, seq_cols,timesteps_pred,h, ratio = 1):
 96 |     
 97 |     ind_start = 0
 98 |     data_array = id_df[seq_cols].values
 99 |     th = int(ratio*data_array.shape[0])
100 |     data_array = data_array[:th]
101 |     num_elements = data_array.shape[0]
102 |     
103 |     for start, stop in zip(range(0+ind_start, num_elements-seq_length+1-timesteps_pred), range(seq_length+ind_start, num_elements+1-timesteps_pred)):
104 |         yield data_array[start+h:stop+h, :]#,data_array[start:stop, :])
105 |       
106 | 
107 | def autoregressive_preprocess(data, sequence_length, seq_cols, timestep_pred, type_set = 'float', ratio = 1) :
108 |     
109 |     seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 
110 |                    for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length)
111 |     # generate sequences and convert to numpy array
112 |     dbX = np.concatenate(list(seq_gen))#[:,:,:1]
113 |     
114 |     seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 
115 |                    for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length)
116 |     # generate sequences and convert to numpy array
117 |     dbY = np.concatenate(list(seq_gen))#[:,:,:1]
118 |     #dbY = dbY[:,-timestep_pred:,:]
119 |     
120 |     print(dbX.shape)
121 |     print(dbY.shape)
122 |     
123 |     
124 |     
125 |     print('Preparing datasets')
126 |     if type_set =='float' :
127 |         X = torch.tensor(dbX, dtype=torch.float)#.to(device)
128 |         Y = torch.tensor(dbY, dtype=torch.float)#.to(device)
129 |     elif type_set =='long' :
130 |         X = torch.tensor(dbX, dtype=torch.long)#.to(device)
131 |         Y = torch.tensor(dbY, dtype=torch.long)#.to(device)
132 | 
133 |     return TensorDataset(X, Y), X, Y#, dbY.mean(0), dbY.std(0)
134 | 
135 | from torch.utils.data import TensorDataset, DataLoader
136 | 
137 | def create_loaders(data, bs=512, jobs=0):
138 |     data = DataLoader(data, bs, shuffle=True, num_workers=jobs, pin_memory = True)
139 |     return data
140 | 
141 | for rt in [60, 70, 80, 90] :
142 | 
143 |     
144 |     timesteps_pred = 0
145 |     train_dl, X_train, y_train = autoregressive_preprocess(data_train, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 
146 |     val_dl,  X_val,y_val  = autoregressive_preprocess(data_val, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 
147 |     
148 |     X_trn_full = torch.cat([X_train,X_val],0)
149 |     y_trn_full = torch.cat([y_train,y_val],0)
150 |     full_train_dl = TensorDataset(X_trn_full, y_trn_full)
151 |     
152 |     tmp = X_trn_full[:,-1,:]#.values
153 |     #trn_min = tmp.min(axis=0).reshape(1,-1)#[0]
154 |     #trn_max = tmp.max(axis=0).reshape(1,-1)#[0]
155 |     trn_mean = tmp.mean(axis=0).reshape(1,-1)#[0]
156 |     trn_std = tmp.std(axis=0).reshape(1,-1)#[0]
157 |     print(trn_mean)
158 |     print(trn_std)
159 |     
160 |     bs = args.batch_size
161 |     trn_dl = create_loaders(train_dl, bs, jobs=1)
162 |     val_dl = create_loaders(val_dl, 4096, jobs=1)
163 |     
164 |     
165 |     
166 |     def update_lr(optimizer, lr):
167 |         for g in optimizer.param_groups:
168 |             g['lr'] = lr
169 |     
170 |     import time
171 |        
172 |         
173 |     hidden_size = args.hidden_size
174 |     nlayers = args.nlayers
175 |     embedding_size = args.hidden_size
176 |     dropout = args.drop
177 |     
178 |     def load_checkpoint(filepath, train = False):
179 |         checkpoint = torch.load(filepath)
180 |         model = checkpoint['model']
181 |         model.load_state_dict(checkpoint['state_dict'])
182 |         
183 |         if train :
184 |             for parameter in model.parameters():
185 |                 parameter.requires_grad = True
186 |             model.train()
187 |         else :
188 |             for parameter in model.parameters():
189 |                 parameter.requires_grad = False
190 |             model.eval()
191 |         return model
192 |     
193 |     criterion = nn.MSELoss()
194 |     trn_mean = torch.tensor(trn_mean).float()#.to(device).float()
195 |     trn_std = torch.tensor(trn_std).float()
196 |     bias = torch.tensor([torch.mean((y_train[:,:,k]-trn_mean[0,k])/trn_std[0,k]) for k in range(3)]).to(device)
197 |     model = GRU_ED(input_dim=3,emb_dim = hidden_size, hidden_dim=hidden_size, output_dim=3, n_layers=nlayers, drop_prob=dropout, mean_val = trn_mean, std_val = trn_std, criterion = criterion, init_bias = bias, length_seq = args.timestep_pred)
198 |     nb_params = model.number_of_parameters()
199 |     print(nb_params)
200 |     lr = 1e-2
201 |     optimizer = optim.Adam(model.parameters(), lr=lr)#, betas=(0.9, 0.95), eps=1e-08)
202 |     # criterion = nn.MS#nn.CrossEntropyLoss(weight = class_weights).to(device)
203 |       
204 |         
205 |     if torch.cuda.device_count() > 1:
206 |         print("Let's use", torch.cuda.device_count(), "GPUs!")
207 |         model = nn.DataParallel(model)        
208 |     model.to(device)
209 |     
210 |     
211 |     
212 |     #create folder
213 |     dir_path = f"Decoder_t1_{rt}_100"#folder_models + '/'
214 |     os.makedirs(dir_path)
215 |     dir_path = dir_path + '/'
216 |     
217 |     #save the model architecture
218 |     f = open(dir_path+"model_parameters.txt", "a")
219 |     f.write(str(model.state_dict))
220 |     f.close()
221 |     
222 |     # #save the log 
223 |     f = open(dir_path+"log_loss.txt", "a")
224 |     # f.write(str(model.state_dict))
225 |     f.close()
226 |     
227 |     PATH = "model.pth"
228 |     
229 |     #save the args
230 |     f = open(dir_path+"args.txt", "w+")
231 |     f.write(str(args))
232 |     f.close()
233 |     
234 |     #which optimizer
235 |     f = open(dir_path+"optim.txt", "w+")
236 |     f.write(str(optimizer))
237 |     f.close()                    
238 |     
239 |     t0 = time.time()
240 |     
241 |     
242 |     all_trn_mape_track = []
243 |     all_val_mape_track = []
244 |     
245 |     trn_mape_track = []
246 |     val_mape_track = []
247 |     
248 |     
249 |     j = 0  #indicator used to load the model
250 |     k = 0
251 |     best_mape = 10000
252 |     step = 0
253 |     epoch_stop = np.zeros(3)
254 |     #save some useful informations
255 |     infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\  Sequence length : {maxlen} \
256 |     \\  Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \
257 |     \\ Optimizer learning rate : {lr} \\  Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 
258 |     f = open(dir_path+"training_readme.txt", "w+")
259 |     f.write(infos_model)
260 |     f.close()
261 |     
262 |     
263 |     
264 |     for l_r in [lr, 1e-3, 1e-4] :
265 |         
266 |         
267 |         if j != 0 :
268 |             f = open(dir_path+"log_loss.txt", "a")
269 |             f.write("Load the model...")
270 |             f.write("\n")
271 |             f.close()
272 |             
273 |             model = load_checkpoint(dir_path+PATH, train = True)
274 |             
275 |             if torch.cuda.device_count() > 1:
276 |                 print("Let's use", torch.cuda.device_count(), "GPUs!")
277 |                 model = nn.DataParallel(model)        
278 |             model.to(device)
279 |             
280 |          
281 |             
282 |             checkpoint = torch.load(dir_path+PATH)
283 |             optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08)
284 |             optimizer.load_state_dict(checkpoint['optimizer_dic'])
285 |             update_lr(optimizer, l_r)
286 |             best_mape = checkpoint['best_mape']   
287 |             trn_mape_track = checkpoint['mape']
288 |             val_mape_track = checkpoint['val_mape']
289 |             epoch_stop = checkpoint['epoch_stop']
290 |     
291 |         j = j+1
292 |         
293 |         # TRAINING    
294 |         f = open(dir_path+"log_loss.txt", "a")
295 |         f.write("Begin training." + "\n")
296 |         f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr']))
297 |         f.write("\n")
298 |         f.close()
299 |          
300 |         
301 |         
302 |         
303 |         for epoch in range(args.epochs):
304 |             model.train()
305 |         #         patience = patience-1
306 |             t1 = time.time()
307 |             loss = 0
308 |             trn_mape = 0
309 |     
310 |     
311 |             for i, data in enumerate(trn_dl):   
312 |          
313 |             
314 |                 X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
315 |                 optimizer.zero_grad()
316 |                 loss = model(X_train_batch, y_train_batch)[1]
317 |                 loss.backward()
318 |                 optimizer.step()
319 |     
320 |     
321 |     
322 |             # Eval phase  
323 |             model.eval()
324 |             with torch.no_grad() :
325 |                 train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
326 |                 trn_mape_track.append(train_mape)
327 |                 all_trn_mape_track.append(train_mape)
328 |                 
329 |                 
330 |                 val_mape = model.eval_mape(val_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
331 |                 val_mape_track.append(val_mape)
332 |                 all_val_mape_track.append(val_mape)
333 |     
334 |     
335 |             f = open(dir_path+"log_loss.txt", "a")
336 |             f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}, val mape : {val_mape:2.2%}')
337 |             f.write("\n")
338 |             f.close()
339 |             
340 |             
341 |             if val_mape < best_mape :
342 |                 #trials = 0
343 |                 best_mape = val_mape#.item()
344 |                 epoch_stop[j:] = k
345 |                 
346 |                 f = open(dir_path+"log_loss.txt", "a")
347 |                 f.write(f'Epoch {epoch+1} best model saved with mape: {val_mape:2.2%}')
348 |                 f.write("Save the model...")
349 |                 f.write("\n")
350 |                 f.close()
351 |         
352 |                 checkpoint = {'model': model, 
353 |                               'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track,
354 |                       'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop,
355 |                       'optimizer_dic' : optimizer.state_dict()}
356 |                 torch.save(checkpoint, dir_path+PATH)
357 |                     
358 |             k = k+1        
359 |     
360 |     
361 |     
362 |     for l_r in [1e-5] :
363 |     
364 |         f = open(dir_path+"log_loss.txt", "a")
365 |         f.write("Load the model...")
366 |         f.write("\n")
367 |         f.close()
368 |         
369 |         model = load_checkpoint(dir_path+PATH, train = True)
370 |         
371 |         if torch.cuda.device_count() > 1:
372 |             print("Let's use", torch.cuda.device_count(), "GPUs!")
373 |             model = nn.DataParallel(model)        
374 |         model.to(device)
375 |         
376 |      
377 |         
378 |         checkpoint = torch.load(dir_path+PATH)
379 |         optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08)
380 |         optimizer.load_state_dict(checkpoint['optimizer_dic'])
381 |         update_lr(optimizer, l_r)
382 |         best_mape = 10000#checkpoint['best_mape']   
383 |         trn_mape_track = checkpoint['mape']
384 |         val_mape_track = checkpoint['val_mape']
385 |         epoch_stop = checkpoint['epoch_stop']
386 |     
387 |         j = j+1
388 |         
389 |         # TRAINING    
390 |         f = open(dir_path+"log_loss.txt", "a")
391 |         f.write("Begin training (full set)." + "\n")
392 |         f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr']))
393 |         f.write("\n")
394 |         f.close()
395 |          
396 |         print(f'Creating data loaders with batch size: {bs}')
397 |         trn_dl = create_loaders(full_train_dl, bs, jobs=1)#4*args.num_gpus)
398 |         #trn_dl_eval = create_loaders(full_train_dl, 4096*4, jobs=1)#4*args.num_gpus)
399 |         torch.cuda.empty_cache()  
400 |             
401 |             
402 |         for epoch in range(args.epochs):
403 |             model.train()
404 |             t1 = time.time()
405 |             loss = 0
406 |             trn_mape = 0
407 |     
408 |     
409 |             for i, data in enumerate(trn_dl):  
410 |                 X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
411 |                 optimizer.zero_grad()
412 |                 loss = model(X_train_batch, y_train_batch)[1]
413 |                 loss.backward()
414 |                 optimizer.step()
415 |             
416 |             model.eval()
417 |             with torch.no_grad() :
418 |                 train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
419 |             trn_mape_track.append(train_mape)
420 |             all_trn_mape_track.append(train_mape)
421 |             
422 |             
423 |             f = open(dir_path+"log_loss.txt", "a")
424 |             f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}')
425 |             f.write("\n")
426 |             f.close()
427 |             
428 |             if train_mape < best_mape :
429 |                 #trials = 0
430 |                 best_mape = train_mape#.item()
431 |                 epoch_stop[j:] = k
432 |                 
433 |                 f = open(dir_path+"log_loss.txt", "a")
434 |                 f.write(f'Epoch {epoch+1} best model saved with mape: {train_mape:2.2%}')
435 |                 f.write("Save the model...")
436 |                 f.write("\n")
437 |                 f.close()
438 |         
439 |                 checkpoint = {'model': model, 
440 |                               'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track,
441 |                       'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop,
442 |                       'optimizer_dic' : optimizer.state_dict()}
443 |                 torch.save(checkpoint, dir_path+PATH)
444 |             
445 |             
446 |     
447 |     
448 |     #save some useful informations
449 |     infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\  Sequence length : {maxlen} \
450 |     \\  Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\  Number of epochs : {k+1} \
451 |     \\ Optimizer learning rate : {lr} \\  Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 
452 |     f = open(dir_path+"training_readme.txt", "w+")
453 |     f.write(infos_model)
454 |     f.close()            
455 |     
456 |     #del model and empty cache
457 |     del(model)
458 |     torch.cuda.empty_cache()       
459 | 


--------------------------------------------------------------------------------
/training_VGRU_ED.py:
--------------------------------------------------------------------------------
  1 | # import deepspeed
  2 | 
  3 | import torch.nn as nn
  4 | 
  5 | import pickle
  6 | 
  7 | from torch.utils.data import TensorDataset, DataLoader
  8 | 
  9 | import pandas as pd
 10 | 
 11 | import datetime 
 12 | 
 13 | import matplotlib.pyplot as plt
 14 | from IPython.display import display
 15 | 
 16 | import os
 17 | import argparse
 18 | import random
 19 | import tqdm
 20 | import time
 21 | import numpy as np
 22 | import torch
 23 | import torch.optim as optim
 24 | from torch.nn import functional as F
 25 | from torch.utils.data import DataLoader, Dataset
 26 | 
 27 | 
 28 | import torch.nn.functional as nnf
 29 | import torch.nn.functional as F
 30 | from torch.nn.modules import ModuleList
 31 | from torch.nn.modules.normalization import LayerNorm
 32 | from torch.cuda.amp import autocast
 33 | from VGRU_ED import VGRU_ED
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 
 42 | def training_args():
 43 |     parser=argparse.ArgumentParser(description='VGRUED')
 44 |     
 45 |     parser.add_argument('--timestep', default=1, type=int,
 46 |                         help='Pred timestep')	
 47 |     parser.add_argument('--num_gpus', default=1, type=int,
 48 |                         help='nb_gpus')
 49 |     parser.add_argument('--nlayers', default=4, type=int,
 50 |                         help='Number of Layers (default: 2)')
 51 |     parser.add_argument('-b', '--batch_size', default=4096, type=int,
 52 |                         help='mini-batch size (default: 4096)')
 53 |     parser.add_argument('-e', '--epochs', default=10, type=int,
 54 |                         help='number of total epochs (default: 30)')
 55 |     parser.add_argument('--hidden_size', default=64, type=int,
 56 |                         help='Nb_neurons (default: 64)')
 57 |     
 58 |     parser.add_argument('--device', default=0, type=int,
 59 |                         help='which device')
 60 |     
 61 |     parser.add_argument('--maxlen', default=30, type=int,
 62 |                         help='Windows length (default : 30)')
 63 |     parser.add_argument('--timestep_pred', default=1, type=int,
 64 |                         help='Pred sequence length (default : 1)')
 65 |     
 66 |     parser.add_argument('--ratio', default=1, type=float,
 67 |                         help='Ratio sequence (default: 1)')
 68 |     parser.add_argument('--drop', default=0.1, type=float,
 69 |                         help='Dropout (default: 0.1)')
 70 |     parser.add_argument('--wgt', default=1e-4, type=float,
 71 |                         help='Weight for loss function')
 72 |   
 73 |     
 74 |     #     parser = deepspeed.add_config_arguments(parser)
 75 |     args=parser.parse_args()
 76 |     return args
 77 | 
 78 | # constants
 79 | 
 80 | args = training_args()
 81 | print(args)
 82 | # cmd_args = add_argument()
 83 | nb_gauges = 3
 84 | 
 85 | 
 86 | device = torch.device('cuda')#torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 87 | 
 88 | bs = args.batch_size
 89 | epochs = args.epochs
 90 | maxlen = args.maxlen
 91 | 
 92 | 
 93 | 
 94 | import os
 95 | fd_data = os.path.split(os.getcwd())[0] ##+ '/Data_'+ str(codebook_size) +'Clusters' 
 96 | 
 97 | df = pd.read_pickle(fd_data + '/raw_data_train_complete').reset_index().iloc[:,1:]
 98 | data_train = df[(df.ID<=9500) & (df.cycle != -1)]
 99 | data_val = df[(df.ID>9500) & (df.cycle != -1)].reset_index()
100 | 
101 | 
102 | 
103 | from torch.utils.data import TensorDataset, DataLoader
104 | # instantiate model
105 | torch.manual_seed(7)
106 | torch.cuda.manual_seed(7)
107 | 
108 | seq_cols =  ['gauge'+ str(i+1) for i in range(3)]
109 | sequence_length = 30
110 | #timesteps_pred = args.timestep_pred
111 | 
112 | def gen_sequence_autoregressive(id_df, seq_length, seq_cols,timesteps_pred,h, ratio = 1):
113 |     
114 |     ind_start = 0
115 |     data_array = id_df[seq_cols].values
116 |     th = int(ratio*data_array.shape[0])
117 |     data_array = data_array[:th]
118 |     num_elements = data_array.shape[0]
119 |     
120 |     for start, stop in zip(range(0+ind_start, num_elements-seq_length+1-timesteps_pred), range(seq_length+ind_start, num_elements+1-timesteps_pred)):
121 |         yield data_array[start+h:stop+h, :]#,data_array[start:stop, :])
122 |       
123 | 
124 | def autoregressive_preprocess(data, sequence_length, seq_cols, timestep_pred, type_set = 'float', ratio = 1) :
125 |     
126 |     seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 
127 |                    for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length)
128 |     # generate sequences and convert to numpy array
129 |     dbX = np.concatenate(list(seq_gen))#[:,:,:1]
130 |     
131 |     seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 
132 |                    for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length)
133 |     # generate sequences and convert to numpy array
134 |     dbY = np.concatenate(list(seq_gen))#[:,:,:1]
135 |     #dbY = dbY[:,-timestep_pred:,:]
136 |     
137 |     print(dbX.shape)
138 |     print(dbY.shape)
139 |     
140 |     
141 |     
142 |     print('Preparing datasets')
143 |     if type_set =='float' :
144 |         X = torch.tensor(dbX, dtype=torch.float)#.to(device)
145 |         Y = torch.tensor(dbY, dtype=torch.float)#.to(device)
146 |     elif type_set =='long' :
147 |         X = torch.tensor(dbX, dtype=torch.long)#.to(device)
148 |         Y = torch.tensor(dbY, dtype=torch.long)#.to(device)
149 | 
150 |     return TensorDataset(X, Y), X, Y#, dbY.mean(0), dbY.std(0)
151 | 
152 | from torch.utils.data import TensorDataset, DataLoader
153 | def create_loaders(data, bs=512, jobs=0):
154 |     data = DataLoader(data, bs, shuffle=True, num_workers=jobs, pin_memory = True)
155 |     return data
156 |     
157 |     
158 | for rt in [60, 70, 80, 90] :
159 | 
160 |     
161 |     timesteps_pred = 0
162 |     train_dl, X_train, y_train = autoregressive_preprocess(data_train, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 
163 |     val_dl,  X_val,y_val  = autoregressive_preprocess(data_val, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 
164 |     
165 |     X_trn_full = torch.cat([X_train,X_val],0)
166 |     y_trn_full = torch.cat([y_train,y_val],0)
167 |     full_train_dl = TensorDataset(X_trn_full, y_trn_full)
168 |     
169 |     tmp = X_trn_full[:,-1,:]#.values
170 |     #trn_min = tmp.min(axis=0).reshape(1,-1)#[0]
171 |     #trn_max = tmp.max(axis=0).reshape(1,-1)#[0]
172 |     trn_mean = tmp.mean(axis=0).reshape(1,-1)#[0]
173 |     trn_std = tmp.std(axis=0).reshape(1,-1)#[0]
174 |     print(trn_mean)
175 |     print(trn_std)
176 |     
177 |     bs = args.batch_size
178 |     trn_dl = create_loaders(train_dl, bs, jobs=1)
179 |     val_dl = create_loaders(val_dl, 4096, jobs=1)
180 |     
181 |     
182 |     
183 |     def update_lr(optimizer, lr):
184 |         for g in optimizer.param_groups:
185 |             g['lr'] = lr
186 |     
187 |     import time
188 |        
189 |         
190 |     
191 |     
192 |     hidden_size = args.hidden_size
193 |     nlayers = args.nlayers
194 |     embedding_size = args.hidden_size
195 |     dropout = args.drop
196 |     
197 |     def load_checkpoint(filepath, train = False):
198 |         checkpoint = torch.load(filepath)
199 |         model = checkpoint['model']
200 |         model.load_state_dict(checkpoint['state_dict'])
201 |         
202 |         if train :
203 |             for parameter in model.parameters():
204 |                 parameter.requires_grad = True
205 |             model.train()
206 |         else :
207 |             for parameter in model.parameters():
208 |                 parameter.requires_grad = False
209 |             model.eval()
210 |         return model
211 |     
212 |     criterion = nn.MSELoss()
213 |     trn_mean = torch.tensor(trn_mean).float()#.to(device).float()
214 |     trn_std = torch.tensor(trn_std).float()
215 |     bias = torch.tensor([torch.mean((y_train[:,:,k]-trn_mean[0,k])/trn_std[0,k]) for k in range(3)]).to(device)
216 |     model = VGRU_ED(input_dim=3,emb_dim = hidden_size, hidden_dim=hidden_size, output_dim=3, n_layers=nlayers, drop_prob=dropout, mean_val = trn_mean, std_val = trn_std, criterion = criterion, init_bias = bias, length_seq = args.timestep_pred, weight_kl = args.wgt)
217 |     nb_params = model.number_of_parameters()
218 |     print(nb_params)
219 |     lr = 1e-2
220 |     optimizer = optim.Adam(model.parameters(), lr=lr)#, betas=(0.9, 0.95), eps=1e-08)
221 |     # criterion = nn.MS#nn.CrossEntropyLoss(weight = class_weights).to(device)
222 |       
223 |         
224 |     if torch.cuda.device_count() > 1:
225 |         print("Let's use", torch.cuda.device_count(), "GPUs!")
226 |         model = nn.DataParallel(model)        
227 |     model.to(device)
228 |     
229 |     
230 |     
231 |     #create folder
232 |     dir_path = f"VAE_{rt}"#folder_models + '/'
233 |     os.makedirs(dir_path)
234 |     dir_path = dir_path + '/'
235 |     
236 |     #save the model architecture
237 |     f = open(dir_path+"model_parameters.txt", "a")
238 |     f.write(str(model.state_dict))
239 |     f.close()
240 |     
241 |     # #save the log 
242 |     f = open(dir_path+"log_loss.txt", "a")
243 |     # f.write(str(model.state_dict))
244 |     f.close()
245 |     
246 |     PATH = "model.pth"
247 |     
248 |     #save the args
249 |     f = open(dir_path+"args.txt", "w+")
250 |     f.write(str(args))
251 |     f.close()
252 |     
253 |     #which optimizer
254 |     f = open(dir_path+"optim.txt", "w+")
255 |     f.write(str(optimizer))
256 |     f.close()                    
257 |     
258 |     t0 = time.time()
259 |     
260 |     
261 |     all_trn_mape_track = []
262 |     all_val_mape_track = []
263 |     
264 |     trn_mape_track = []
265 |     val_mape_track = []
266 |     
267 |     
268 |     j = 0  #indicator used to load the model
269 |     k = 0
270 |     best_mape = 10000
271 |     step = 0
272 |     epoch_stop = np.zeros(3)
273 |     #save some useful informations
274 |     infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\  Sequence length : {maxlen} \
275 |     \\  Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \
276 |     \\ Optimizer learning rate : {lr} \\  Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 
277 |     f = open(dir_path+"training_readme.txt", "w+")
278 |     f.write(infos_model)
279 |     f.close()
280 |     
281 |     
282 |     
283 |     for l_r in [lr, 1e-3, 1e-4] :
284 |         
285 |         
286 |         
287 |         if j != 0 :
288 |             f = open(dir_path+"log_loss.txt", "a")
289 |             f.write("Load the model...")
290 |             f.write("\n")
291 |             f.close()
292 |             
293 |             model = load_checkpoint(dir_path+PATH, train = True)
294 |             
295 |             if torch.cuda.device_count() > 1:
296 |                 print("Let's use", torch.cuda.device_count(), "GPUs!")
297 |                 model = nn.DataParallel(model)        
298 |             model.to(device)
299 |             
300 |          
301 |             
302 |             checkpoint = torch.load(dir_path+PATH)
303 |             optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08)
304 |             optimizer.load_state_dict(checkpoint['optimizer_dic'])
305 |             update_lr(optimizer, l_r)
306 |             best_mape = checkpoint['best_mape']   
307 |             trn_mape_track = checkpoint['mape']
308 |             val_mape_track = checkpoint['val_mape']
309 |             epoch_stop = checkpoint['epoch_stop']
310 |     
311 |         j = j+1
312 |         
313 |         # TRAINING    
314 |         f = open(dir_path+"log_loss.txt", "a")
315 |         f.write("Begin training." + "\n")
316 |         f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr']))
317 |         f.write("\n")
318 |         f.close()
319 |          
320 |         
321 |         
322 |         
323 |         for epoch in range(args.epochs):
324 |             model.train()
325 |         #         patience = patience-1
326 |             t1 = time.time()
327 |             loss = 0
328 |             trn_mape = 0
329 |     
330 |     
331 |             for i, data in enumerate(trn_dl):   
332 |          
333 |             
334 |                 X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
335 |                 optimizer.zero_grad()
336 |                 loss = model(X_train_batch, y_train_batch)[1]
337 |                 loss.backward()
338 |                 optimizer.step()
339 |     
340 |     
341 |     
342 |             # Eval phase  
343 |             model.eval()
344 |             with torch.no_grad() :
345 |                 train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
346 |                 trn_mape_track.append(train_mape)
347 |                 all_trn_mape_track.append(train_mape)
348 |                 
349 |                 
350 |                 val_mape = model.eval_mape(val_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
351 |                 val_mape_track.append(val_mape)
352 |                 all_val_mape_track.append(val_mape)
353 |     
354 |     
355 |             f = open(dir_path+"log_loss.txt", "a")
356 |             f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}, val mape : {val_mape:2.2%}')
357 |             f.write("\n")
358 |             f.close()
359 |             
360 |             
361 |             if val_mape < best_mape :
362 |                 #trials = 0
363 |                 best_mape = val_mape#.item()
364 |                 epoch_stop[j:] = k
365 |                 
366 |                 f = open(dir_path+"log_loss.txt", "a")
367 |                 f.write(f'Epoch {epoch+1} best model saved with mape: {val_mape:2.2%}')
368 |                 f.write("Save the model...")
369 |                 f.write("\n")
370 |                 f.close()
371 |         
372 |                 checkpoint = {'model': model, 
373 |                               'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track,
374 |                       'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop,
375 |                       'optimizer_dic' : optimizer.state_dict()}
376 |                 torch.save(checkpoint, dir_path+PATH)
377 |                     
378 |             k = k+1        
379 |     
380 |     
381 |     
382 |     for l_r in [1e-5] :
383 |     
384 |         f = open(dir_path+"log_loss.txt", "a")
385 |         f.write("Load the model...")
386 |         f.write("\n")
387 |         f.close()
388 |         
389 |         model = load_checkpoint(dir_path+PATH, train = True)
390 |         
391 |         if torch.cuda.device_count() > 1:
392 |             print("Let's use", torch.cuda.device_count(), "GPUs!")
393 |             model = nn.DataParallel(model)        
394 |         model.to(device)
395 |         
396 |      
397 |         
398 |         checkpoint = torch.load(dir_path+PATH)
399 |         optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08)
400 |         optimizer.load_state_dict(checkpoint['optimizer_dic'])
401 |         update_lr(optimizer, l_r)
402 |         best_mape = 10000#checkpoint['best_mape']   
403 |         trn_mape_track = checkpoint['mape']
404 |         val_mape_track = checkpoint['val_mape']
405 |         epoch_stop = checkpoint['epoch_stop']
406 |     
407 |         j = j+1
408 |         
409 |         # TRAINING    
410 |         f = open(dir_path+"log_loss.txt", "a")
411 |         f.write("Begin training (full set)." + "\n")
412 |         f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr']))
413 |         f.write("\n")
414 |         f.close()
415 |          
416 |         print(f'Creating data loaders with batch size: {bs}')
417 |         trn_dl = create_loaders(full_train_dl, bs, jobs=1)#4*args.num_gpus)
418 |         #trn_dl_eval = create_loaders(full_train_dl, 4096*4, jobs=1)#4*args.num_gpus)
419 |         torch.cuda.empty_cache()  
420 |             
421 |             
422 |         for epoch in range(args.epochs):
423 |             model.train()
424 |             t1 = time.time()
425 |             loss = 0
426 |             trn_mape = 0
427 |     
428 |     
429 |             for i, data in enumerate(trn_dl):  
430 |                 X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float()
431 |                 optimizer.zero_grad()
432 |                 loss = model(X_train_batch, y_train_batch)[1]
433 |                 loss.backward()
434 |                 optimizer.step()
435 |             
436 |             model.eval()
437 |             with torch.no_grad() :
438 |                 train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item()
439 |             trn_mape_track.append(train_mape)
440 |             all_trn_mape_track.append(train_mape)
441 |             
442 |             
443 |             f = open(dir_path+"log_loss.txt", "a")
444 |             f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}')
445 |             f.write("\n")
446 |             f.close()
447 |             
448 |             if train_mape < best_mape :
449 |                 #trials = 0
450 |                 best_mape = train_mape#.item()
451 |                 epoch_stop[j:] = k
452 |                 
453 |                 f = open(dir_path+"log_loss.txt", "a")
454 |                 f.write(f'Epoch {epoch+1} best model saved with mape: {train_mape:2.2%}')
455 |                 f.write("Save the model...")
456 |                 f.write("\n")
457 |                 f.close()
458 |         
459 |                 checkpoint = {'model': model, 
460 |                               'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track,
461 |                       'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop,
462 |                       'optimizer_dic' : optimizer.state_dict()}
463 |                 torch.save(checkpoint, dir_path+PATH)
464 |             
465 |             
466 |     
467 |     
468 |     #save some useful informations
469 |     infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\  Sequence length : {maxlen} \
470 |     \\  Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\  Number of epochs : {k+1} \
471 |     \\ Optimizer learning rate : {lr} \\  Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 
472 |     f = open(dir_path+"training_readme.txt", "w+")
473 |     f.write(infos_model)
474 |     f.close()            
475 |     
476 |     #del model and empty cache
477 |     del(model)
478 |     torch.cuda.empty_cache()       
479 |     
480 |     
481 |     
482 | 
483 | 
484 | 
485 | 
486 | 
487 | 
488 | 
489 | 
490 | 
491 | 
492 | 
493 | 
494 | 
495 | 
496 | 
497 | 
498 | 
499 | 
500 | 
501 | 
502 | 
503 | 
504 | 
505 | 
506 | 
507 |         
508 | 


--------------------------------------------------------------------------------
/utils/utils_ft.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.optim as optim
  3 | from torch.nn import functional as F
  4 | from torch.utils.data import DataLoader, Dataset
  5 | from torch.autograd import Variable
  6 | import torch.nn as nn
  7 | from torch.utils.data import TensorDataset, DataLoader
  8 | from torch.nn.modules import ModuleList
  9 | from torch.nn.modules.normalization import LayerNorm
 10 | 
 11 | import pickle
 12 | import pandas as pd
 13 | import datetime 
 14 | import matplotlib.pyplot as plt
 15 | from IPython.display import display
 16 | import os
 17 | import argparse
 18 | import random
 19 | import tqdm
 20 | import time
 21 | import numpy as np
 22 | from time import time
 23 | 
 24 | 
 25 | 
 26 | 
 27 | 
 28 | 
 29 | 
 30 | 
 31 | 
 32 | 
 33 | 
 34 | def load_checkpoint(filepath, train = False):
 35 |     checkpoint = torch.load(filepath)
 36 |     model = checkpoint['model']
 37 |     model.load_state_dict(checkpoint['state_dict'])
 38 |     
 39 |     if train :
 40 |         for parameter in model.parameters():
 41 |             parameter.requires_grad = True
 42 |         model.train()
 43 |     else :
 44 |         for parameter in model.parameters():
 45 |             parameter.requires_grad = False
 46 |         model.eval()
 47 |     return model
 48 | 
 49 | 
 50 | def set_parameter_requires_grad(model, feature_extracting):
 51 |     if feature_extracting:
 52 |         for param in model.parameters():
 53 |             param.requires_grad = False
 54 | 
 55 | 
 56 | 
 57 | #prepare forecasting data
 58 | def gen_RUL_sequence(id_df, seq_length, seq_cols, type_data = 'Input', ind_start = 0):
 59 |     data_array = id_df[seq_cols].values
 60 |     num_elements = data_array.shape[0]
 61 |     if type_data == 'Input' :
 62 |         for start, stop in zip(range(0+ind_start, num_elements-seq_length+1), range(seq_length+ind_start, num_elements+1)):
 63 |             yield data_array[start:stop, :]
 64 |     else :
 65 |         for start, stop in zip(range(0+ind_start, num_elements-seq_length+1), range(seq_length+ind_start, num_elements+1)):
 66 |             yield data_array[stop-1, :]
 67 | 
 68 | 
 69 | 
 70 | def seq_preprocess(data, sequence_length, seq_cols_in, seq_cols_out, type_set = 'Train', num_type = 'float') :
 71 |     
 72 | 
 73 | 	
 74 |     #generate sequences and convert to numpy array
 75 | 
 76 |     if type_set == 'Test' :
 77 |         dbX = [data[data['ID']==id][seq_cols_in].values[-sequence_length:] for id in data['ID'].unique()]
 78 |         dbX = np.asarray(dbX)
 79 |         dbY = [data[data['ID']==id][seq_cols_out].values[-1] for id in data['ID'].unique()]
 80 |         dbY = np.asarray(dbY)
 81 | 
 82 |     else :	
 83 |         seq_gen = (list(gen_RUL_sequence(data[data['ID']==id], sequence_length, seq_cols_in, type_data= 'Input')) for id in data['ID'].unique())
 84 |         dbX = np.concatenate(list(seq_gen))
 85 |         seq_gen = (list(gen_RUL_sequence(data[data['ID']==id], sequence_length, seq_cols_out,  type_data= 'Output')) for id in data['ID'].unique())
 86 |         dbY = np.concatenate(list(seq_gen)).reshape(-1,)
 87 | 
 88 |     print(dbX.shape)
 89 |     print(dbY.shape)
 90 |     
 91 | 
 92 | 
 93 |     
 94 | 
 95 |     
 96 |     print('Preparing datasets')
 97 |     if num_type =='float' :
 98 |         torch_type = torch.float
 99 |         Y = torch.tensor(dbY, dtype=torch.float)#.to(device)       
100 |     elif num_type =='long' :
101 |         torch_type = torch.long
102 |         Y = torch.tensor(dbY, dtype=torch.long)#.to(device)
103 |     
104 |     X = torch.tensor(dbX, dtype=torch.float)#.to(device)
105 |     
106 |     print('Preparing datasets')
107 |     
108 |     X_torch = torch.tensor(X, dtype=torch.float)
109 |     y_torch = torch.tensor(Y, dtype=torch_type)
110 | 
111 | 
112 | 
113 |     return X_torch, y_torch
114 | 


--------------------------------------------------------------------------------