├── .gitattributes ├── FT_phase.py ├── README.md ├── data ├── args.txt ├── data_test └── raw_data_train_complete ├── models ├── GRU_Decoder.py ├── GRU_ED.py ├── README.md └── VGRU_ED.py ├── training_GRU_Decoder_t1.py ├── training_GRU_ED.py ├── training_VGRU_ED.py └── utils └── utils_ft.py /.gitattributes: -------------------------------------------------------------------------------- 1 | data/raw_data_train_complete filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /FT_phase.py: -------------------------------------------------------------------------------- 1 | # Essential imports for PyTorch and data manipulation 2 | import torch 3 | import torch.optim as optim 4 | import torch.nn as nn 5 | from torch.nn import functional as F 6 | from torch.utils.data import DataLoader, TensorDataset 7 | from torch.autograd import Variable 8 | from torch.nn.modules import ModuleList, normalization 9 | 10 | # Additional utilities 11 | import argparse 12 | import datetime 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | import os 16 | import pandas as pd 17 | import pickle 18 | import random 19 | import time 20 | import tqdm 21 | from utils import utils_ft # Import custom utilities for feature transformation 22 | 23 | # Disable warnings (consider reviewing this for better debugging) 24 | import warnings 25 | warnings.filterwarnings("ignore") 26 | 27 | # Define a function to parse training arguments 28 | def training_args(): 29 | """ 30 | Parses command line arguments for training configuration. 31 | 32 | Returns: 33 | argparse.Namespace: Parsed arguments with training configurations. 34 | """ 35 | parser = argparse.ArgumentParser(description='fine_tune') 36 | # Define arguments 37 | parser.add_argument('--path', default='', type=str, help='model path') 38 | parser.add_argument('--folder', default='', type=str, help='folder path') 39 | parser.add_argument('--reset', default='no', type=str, help='Reset weights?') 40 | parser.add_argument('--freeze', default='freeze', type=str, help='Freeze weights') 41 | parser.add_argument('--finetune', default=False, type=bool, help='Finetune') 42 | parser.add_argument('--cv', default=5, type=int, help='k fold') 43 | parser.add_argument('--num_gpus', default=1, type=int, help='number of GPUs') 44 | parser.add_argument('--nb_samples', default=10, type=int, help='Number of samples') 45 | parser.add_argument('-b', '--batch_size', default=4096, type=int, help='mini-batch size') 46 | parser.add_argument('-e', '--epochs', default=10, type=int, help='number of total epochs') 47 | parser.add_argument('--device', default=0, type=int, help='which device') 48 | parser.add_argument('--maxlen', default=30, type=int, help='Windows length') 49 | parser.add_argument('--nb_gauges', default=3, type=int, help='Number of gauges') 50 | parser.add_argument('--thinning', default=500, type=int, help='Thinning') 51 | parser.add_argument('--lr', default=1e-6, type=float, help='Learning rate') 52 | parser.add_argument('--drop', default=0.1, type=float, help='Dropout rate') 53 | 54 | # Parse and return arguments 55 | return parser.parse_args() 56 | 57 | # Define function to create data loaders 58 | def create_loaders(data, bs=512, jobs=0): 59 | """ 60 | Creates a data loader for the given dataset. 61 | 62 | Args: 63 | data (Dataset): The dataset for which to create the data loader. 64 | bs (int): Batch size. Default is 512. 65 | jobs (int): Number of worker processes to use. Default is 0. 66 | 67 | Returns: 68 | DataLoader: Data loader for the given dataset. 69 | """ 70 | return DataLoader(data, batch_size=bs, shuffle=True, num_workers=jobs, pin_memory=False) 71 | 72 | # Define a custom GRU Layer class 73 | class GRU_Layer(nn.Module): 74 | """ 75 | Custom GRU Layer class. 76 | """ 77 | def __init__(self, input_dim, hidden_dim, n_layers, drop_prob): 78 | super(GRU_Layer, self).__init__() 79 | self.hidden_dim = hidden_dim 80 | self.n_layers = n_layers 81 | self.gru = nn.GRU(input_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob) 82 | 83 | def forward(self, x, hidden): 84 | out, hidden = self.gru(x, hidden) 85 | return F.silu(out), hidden 86 | 87 | # Define a custom sequential class 88 | class MySequential(nn.Sequential): 89 | """ 90 | Custom Sequential class to handle multiple input formats. 91 | """ 92 | def forward(self, *inputs): 93 | for module in self._modules.values(): 94 | inputs = module(*inputs) if type(inputs) == tuple else module(inputs) 95 | return inputs 96 | 97 | # Define a function to update the learning rate 98 | def update_lr(optimizer, lr): 99 | """ 100 | Updates the learning rate for an optimizer. 101 | 102 | Args: 103 | optimizer (Optimizer): The optimizer to update. 104 | lr (float): The new learning rate. 105 | """ 106 | for g in optimizer.param_groups: 107 | g['lr'] = lr 108 | 109 | 110 | def configure_model(model, args, trn_std, trn_mean, bias, lr, device): 111 | """ 112 | Configures the model based on the given arguments. 113 | 114 | Args: 115 | model (torch.nn.Module): The model to configure. 116 | args (argparse.Namespace): Command-line arguments with 'reset' and 'freeze' options. 117 | trn_std (torch.Tensor): Standard deviation for normalization. 118 | trn_mean (torch.Tensor): Mean for normalization. 119 | bias (float): Bias value to add to the last layer. 120 | lr (float): Learning rate for the optimizer. 121 | device (torch.device): The device to use for tensors. 122 | 123 | Returns: 124 | torch.optim.Optimizer: Configured optimizer for the model. 125 | """ 126 | def reset_weights(m): 127 | if hasattr(m, 'reset_parameters'): 128 | m.reset_parameters() 129 | 130 | def set_fc_layer(num_features): 131 | fc_layer = mySequential( 132 | GRU_Layer(num_features, num_features, 1, 0.1), 133 | nn.LayerNorm(num_features, elementwise_affine=False), 134 | nn.Linear(num_features, 1) 135 | ).to(device) 136 | nn.init.xavier_normal_(fc_layer[-1].weight.data) 137 | fc_layer[-1].bias.data += bias 138 | return fc_layer 139 | 140 | if args.reset == 'reset': 141 | print('Reset weights...') 142 | model.apply(reset_weights) 143 | model.std = trn_std.to(device) 144 | model.mean = trn_mean.to(device) 145 | 146 | elif args.freeze == 'freeze': 147 | print('Freeze pre-trained layers') 148 | utils_ft.set_parameter_requires_grad(model, True) 149 | else: 150 | print('Unfreeze pre-trained layers') 151 | utils_ft.set_parameter_requires_grad(model, False) 152 | 153 | # Set up the fully connected layer 154 | num_features = model.fc.in_features 155 | model.fc = set_fc_layer(num_features) 156 | model.length_seq = 30 157 | model.ft = True 158 | 159 | # Return the configured optimizer 160 | return model, optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6) 161 | 162 | def prepare_data_for_fold(data_train_raw, i, cv, nb_samples, sequence_length, seq_cols_in, seq_cols_out, bs, device): 163 | """ 164 | Prepares training and validation data for a given fold in cross-validation. 165 | 166 | Args: 167 | data_train_raw (DataFrame): Raw training data. 168 | i (int): Index of the current fold in cross-validation. 169 | cv (int): Total number of folds in cross-validation. 170 | nb_samples (int): Number of samples in the dataset. 171 | sequence_length (int): Length of the sequence for training. 172 | seq_cols_in (list): List of column names for input features. 173 | seq_cols_out (list): List of column names for output labels. 174 | bs (int): Batch size for data loaders. 175 | device (torch.device): Device to use for tensors. 176 | 177 | Returns: 178 | DataLoader: DataLoader for training data. 179 | DataLoader: DataLoader for validation data. 180 | """ 181 | print('----------------------------------') 182 | print(f'Fold {i + 1}') 183 | print('----------------------------------') 184 | 185 | # Create indices for training and validation data 186 | list_train = np.arange(1, nb_samples + 1) 187 | list_val = list_train[i:i + nb_samples // cv] 188 | list_train = np.delete(list_train, np.arange(i, i + nb_samples // cv)) 189 | 190 | # Split data into training and validation sets 191 | data_train = data_train_raw[data_train_raw.ID.isin(list_train)] 192 | data_val = data_train_raw[data_train_raw.ID.isin(list_val)] 193 | 194 | # Preprocess the data 195 | X_train, y_train = utils_ft.seq_preprocess(data_train, sequence_length, seq_cols_in, seq_cols_out, type_set='Train') 196 | X_val, y_val = utils_ft.seq_preprocess(data_val, sequence_length, seq_cols_in, seq_cols_out, type_set='Val') 197 | 198 | # Create TensorDatasets 199 | train_dl = TensorDataset(X_train, torch.log(y_train + 500).to(device)) 200 | val_dl = TensorDataset(X_val, torch.log(y_val + 500).to(device)) 201 | 202 | # Create DataLoaders 203 | print(f'Creating data loaders with batch size: {bs}') 204 | trn_dl = create_loaders(train_dl, bs, jobs=4) 205 | val_dl = create_loaders(val_dl, bs, jobs=4) 206 | 207 | return trn_dl, val_dl 208 | 209 | if __name__ == "__main__": 210 | # Parse arguments for training 211 | args = training_args() 212 | print(args) 213 | 214 | # Set up various training configurations 215 | nb_gauges = args.nb_gauges 216 | device = torch.device('cuda') # Consider checking for CUDA availability 217 | bs = args.batch_size 218 | epochs = args.epochs 219 | maxlen = args.maxlen 220 | nb_samples = args.nb_samples 221 | thinning = args.thinning 222 | cv = args.cv 223 | patience = 500 # Initial patience for early stopping 224 | 225 | # Set manual seeds for reproducibility 226 | torch.manual_seed(0) 227 | torch.cuda.manual_seed(0) 228 | torch.backends.cudnn.benchmark = False 229 | torch.backends.cudnn.deterministic = True 230 | 231 | # Change directory to data folder and load data 232 | os.chdir("/home/anassakrim/FolderThesis/ProjectSSL/") 233 | fd_data = 'data/' 234 | data_train_raw = pd.read_pickle(fd_data + 'data_train_ft').reset_index().iloc[:, 1:] 235 | data_test = pd.read_pickle(fd_data + 'data_test').reset_index().iloc[:, 1:] 236 | data_train = data_train_raw[data_train_raw.ID <= nb_samples] 237 | 238 | # Data preparation 239 | seq_cols_in = ['gauge' + str(i + 1) for i in range(nb_gauges)] 240 | seq_cols_out = ['RUL'] 241 | sequence_length = maxlen 242 | 243 | # Preprocess training data 244 | tmp = data_train[seq_cols_in].values 245 | trn_mean = tmp.mean(axis=0).reshape(1, -1) 246 | trn_std = tmp.std(axis=0).reshape(1, -1) 247 | trn_mean = torch.tensor(trn_mean).float() 248 | trn_std = torch.tensor(trn_std).float() 249 | bias = np.log(data_train[seq_cols_out].values + 500).mean() # Bias for fine-tuning 250 | 251 | # Preprocess test set 252 | X_test, y_test = utils_ft.seq_preprocess(data_test, sequence_length, seq_cols_in, seq_cols_out, type_set='Test') 253 | y_test = y_test.to(device).reshape(-1) 254 | X_test = X_test.to(device) 255 | 256 | # Prepare the model directory 257 | fd = args.folder 258 | dt = f"{datetime.datetime.now():%Y%h%d_%Hh%M}" 259 | path_model = fd + "/L2_PT_Test_k_fold_" + dt + '_' + f"Finetune{nb_samples}" 260 | os.makedirs(path_model, exist_ok=True) 261 | dir_path = path_model + "/" 262 | 263 | # Load pre-trained model 264 | model_path = args.path + 'model.pth' 265 | it = 0 266 | lr = args.lr 267 | model = utils_ft.load_checkpoint(model_path, train=True) 268 | optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6) 269 | 270 | # Save model architecture and training configuration 271 | with open(dir_path + "model_parameters.txt", "a") as f: 272 | f.write(str(model.state_dict())) 273 | 274 | with open(dir_path + "log_loss.txt", "a") as f: 275 | pass # Currently empty, consider logging training progress here 276 | 277 | with open(dir_path + "args.txt", "w+") as f: 278 | f.write(str(args)) 279 | 280 | with open(dir_path + "optim.txt", "w+") as f: 281 | f.write(str(optimizer)) 282 | 283 | # Main training and evaluation loop 284 | t0 = time.time() 285 | 286 | scores_val = [] 287 | scores_test = [] 288 | 289 | for i in range(0, nb_samples, nb_samples // cv): 290 | patience = 500 291 | best_mape = float('inf') 292 | saved_mape_test = float('inf') 293 | 294 | model = utils_ft.load_checkpoint(model_path, train=True) 295 | model, optimizer = configure_model(model, args, trn_std, trn_mean, bias, lr, device) 296 | model.to(device) 297 | criterion = nn.MSELoss().to(device) 298 | 299 | 300 | # Prepare DataLoader for both training and validation datasets 301 | trn_dl, val_dl = prepare_data_for_fold(data_train_raw, it, cv, nb_samples, 302 | sequence_length, seq_cols_in, seq_cols_out, bs, device) 303 | trn_mape_track = [] 304 | val_mape_track = [] 305 | 306 | # Save model information 307 | infos_model = ( 308 | f'Number of training aircraft components: {len(np.unique(data_train.ID))} ' 309 | f'\\ Sequence length: {maxlen} ' 310 | f'\\ Number of training samples: {X_train.shape[0]} ' 311 | f'\\ Number of epochs: {args.epochs} ' 312 | f'\\ Optimizer learning rate: {args.lr} ' 313 | f'\\ Running time in minutes: {(time.time() - t0) / 60} ' 314 | f'\\ Number of model parameters: {model.number_of_parameters()}' 315 | ) 316 | with open(os.path.join(dir_path, "training_readme.txt"), "w+") as f: 317 | f.write(infos_model) 318 | 319 | # Move training and validation data to the specified device 320 | y_train = y_train.to(device).reshape(-1) 321 | y_val = y_val.to(device).reshape(-1) 322 | X_train = X_train.to(device) 323 | X_val = X_val.to(device) 324 | 325 | # Iterate over a range of learning rates 326 | it_lr = 0 327 | for l_r in [args.lr / (10 ** p) for p in range(3)]: 328 | patience = 500 # Reset patience variable for early stopping 329 | if it_lr != 0: # For second fold and beyond 330 | print("Load the model...") 331 | with open(os.path.join(dir_path, "log_loss.txt"), "a") as f: 332 | f.write("Load the model...\n") 333 | 334 | model = utils_ft.load_checkpoint(os.path.join(dir_path, PATH), train=True) 335 | # Freeze or unfreeze model layers based on arguments 336 | if args.freeze == 'freeze' and args.reset == 'no': 337 | utils_ft.set_parameter_requires_grad(model, True) # Freeze all weights 338 | for param in model.fc.parameters(): 339 | param.requires_grad = True # Unfreeze weights of the final layer 340 | 341 | model.to(device) 342 | checkpoint = torch.load(os.path.join(dir_path, PATH)) 343 | optimizer = optim.Adam(model.parameters(), lr=l_r, weight_decay=1e-6) 344 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 345 | update_lr(optimizer, l_r) 346 | 347 | it_lr += 1 348 | 349 | print(f'Learning rate adjusted to {optimizer.param_groups[0]["lr"]:.7f}') 350 | with open(os.path.join(dir_path, "log_loss.txt"), "a") as f: 351 | f.write(f"Begin training.\nLearning rate adjusted to {optimizer.param_groups[0]['lr']:.7f}\n") 352 | 353 | # Initialize progress bar for training epochs 354 | pbar = trange(args.epochs, unit="epoch") 355 | for epoch in range(args.epochs): 356 | # Training step 357 | time.sleep(0.1) 358 | model.train() 359 | t1 = time.time() 360 | loss = 0 361 | mape_loss = 0 362 | for i, data in enumerate(trn_dl): 363 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 364 | optimizer.zero_grad() 365 | y_train_pred = model(X_train_batch)[0][:,-1,:].reshape(-1) 366 | mse_loss = criterion(y_train_pred, y_train_batch) 367 | mse_loss.backward() 368 | optimizer.step() 369 | 370 | # Model Evaluation 371 | model.eval() 372 | with torch.no_grad(): 373 | # Evaluate on training data 374 | pred_train = model(X_train)[0][:, -1, :].reshape(-1) 375 | pred_train = torch.exp(pred_train) - 500 # Inverse transform 376 | train_mape = torch.mean(torch.abs((pred_train - y_train) / y_train).masked_fill(torch.isinf(pred_train - y_train), 0)) 377 | 378 | # Evaluate on validation data 379 | pred_val = model(X_val)[0][:, -1, :].reshape(-1) 380 | pred_val = torch.exp(pred_val) - 500 381 | val_mape = torch.mean(torch.abs((pred_val - y_val) / y_val).masked_fill(torch.isinf(pred_val - y_val), 0)) 382 | 383 | # Evaluate on test data 384 | pred_test = model(X_test)[0][:, -1, :].reshape(-1) 385 | pred_test = torch.exp(pred_test) - 500 386 | test_mape = torch.mean(torch.abs((pred_test - y_test) / y_test)) 387 | 388 | # Update progress bar and log results 389 | pbar.set_description(f'Epoch {epoch + 1}/{args.epochs}') 390 | pbar.set_postfix_str(f'Train MAPE {train_mape:.2%}, Val MAPE {val_mape:.2%}, Test MAPE {test_mape:.2%}, Best MAPE {best_mape:.2%}, Saved Test MAPE {saved_mape_test:.2%}, Patience {patience}') 391 | with open(os.path.join(dir_path, "log_loss.txt"), "a") as f: 392 | f.write(f'Epoch {epoch + 1}/{args.epochs}, Train MAPE {train_mape:.2%}, Val MAPE {val_mape:.2%}, Test MAPE {test_mape:.2%}, Best MAPE {best_mape:.2%}, Saved Test MAPE {saved_mape_test:.2%}, Patience {patience}\n') 393 | 394 | # Checkpointing and Early Stopping 395 | if epoch % 100 == 0: 396 | torch.cuda.empty_cache() 397 | if val_mape < best_mape: 398 | best_mape = val_mape 399 | saved_mape_test = test_mape 400 | patience = 500 401 | with open(os.path.join(dir_path, "log_loss.txt"), "a") as f: 402 | f.write("Save the model...\n") 403 | checkpoint = {'model': model, 'mape': trn_mape_track, 'val_mape': val_mape_track, 'state_dict': model.state_dict(), 'optimizer_dic': optimizer.state_dict(), 'lr': lr} 404 | torch.save(checkpoint, os.path.join(dir_path, PATH)) 405 | else: 406 | patience -= 1 407 | if patience == 0: 408 | break 409 | 410 | # Plotting 411 | plt.figure(figsize=(22, 12)) 412 | plt.plot(trn_mape_track, label='Training set') 413 | plt.plot(val_mape_track, label=f'Val set with best MAPE = {best_mape:.2%}') 414 | plt.grid() 415 | plt.legend() 416 | plt.title(f'{nb_samples} training structures, Fold {it}, Test set MAPE {saved_mape_test:.2%}') 417 | plt.xlabel('Epoch') 418 | plt.ylabel('MAPE (%)') 419 | plt.savefig(os.path.join(dir_path, f'Fold{it}.jpg')) 420 | plt.close() 421 | 422 | # Update scores 423 | scores_val.append(best_mape.cpu().numpy()) 424 | scores_test.append(saved_mape_test) 425 | 426 | # Clean up 427 | del model 428 | torch.cuda.empty_cache() 429 | 430 | 431 | scores_val = np.array(scores_val) 432 | scores_test = np.array(scores_test) 433 | mn_val, std_val = scores_val.mean(), scores_val.std() 434 | mn_test, std_test = scores_test.mean(), scores_test.std() 435 | 436 | with open(dir_path + "log_loss.txt", "a") as f: 437 | # Logic for logging fold-wise and overall performance 438 | 439 | print("Training completed.") 440 | 441 | 442 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # 🚀 Deep Self-Supervised Learning for Remaining Useful Life Prediction 2 | 3 | ## Project Introduction 4 | This repository showcases a cutting-edge approach to Remaining Useful Life (RUL) prediction, utilizing Deep Self-Supervised Learning. This innovative method represents a paradigm shift in Machine Learning, enabling AI systems to extract meaningful insights from available unlabelled data, without the need for externally provided annotations. This research addresses a critical challenge in predictive maintenance, particularly in environments where labeled data is scarce or difficult/expensive to obtain. 5 | 6 | ## 📖 Detailed Description 7 | 8 | ### Background 9 | Initiated as part of the PREDICT project, this research represents a collaborative effort between experts at the University of Toulouse, including Institut Clément Ader and ISAE-SUPAERO DISC. The project focuses on leveraging the untapped potential of self-supervised learning in the domain of Prognostics and Health Management (PHM). Specifically, it aims to demonstrate the efficacy of pre-training Deep Learning models on large volumes of unlabeled sensor data and applying them to PHM tasks like RUL estimation, even with minimal labeled data availability. 10 | 11 | ### Research Focus 12 | The core challenge tackled here is the scarcity of data in fatigue damage prognostics. The project's ambition is to accurately estimate the Remaining Useful Life of critical components, such as aluminum panels commonly used in aerospace structures, which are prone to fatigue cracks. To achieve this, the research utilizes strain gauge data, a type of data that presents unique challenges due to its nature and collection methods. 13 | 14 | 15 | ### Dataset Composition 16 | A synthetic dataset forms the backbone of this research. It is strategically divided into two key components: 17 | - A large, unlabeled dataset comprising strain gauge readings from structures prior to failure, used for the initial phase of model pre-training. 18 | - A smaller, labeled dataset containing strain gauge data up to the point of structural failure, utilized for subsequent fine-tuning of the models. 19 | 20 | ### Contribution and Citation 21 | The findings and methodologies developed in this project could be invaluable to researchers and practitioners in the field. Those who find this repository beneficial for their work are encouraged to cite the published research: 22 | 23 | ``` 24 | @article{akrim2023self, 25 | title={Self-Supervised Learning for data scarcity in a fatigue damage prognostic problem}, 26 | author={Akrim, Anass and Gogu, Christian and Vingerhoeds, Rob and Sala{\"u}n, Michel}, 27 | journal={Engineering Applications of Artificial Intelligence}, 28 | volume={120}, 29 | pages={105837}, 30 | year={2023}, 31 | publisher={Elsevier}} 32 | ``` 33 | 34 | ## 🙏 Acknowledgements 35 | 36 | ◦ This work was partially funded by Occitanie region under the Predict project. This funding is gratefully acknowledged. 37 | 38 | ◦ This work has been carried out on the supercomputers PANDO (ISAE Supaero, Toulouse) and Olympe (CALMIP, Toulouse, project n°21042). Authors are grateful to ISAE Supaero and CALMIP for the hours allocated to this project. 39 | -------------------------------------------------------------------------------- /data/args.txt: -------------------------------------------------------------------------------- 1 | {'a0_mean': 0.0005, 'a0_std': 0.00025, 'C_mean': 1e-10, 'C_std': 4.998750156230471e-11, 'm_mean': 3.4, 'm_std': 0.25} -------------------------------------------------------------------------------- /data/data_test: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ansak95/DeepSSL/bfc73ece3975e85bf14bdb3f97313f1b198b66a1/data/data_test -------------------------------------------------------------------------------- /data/raw_data_train_complete: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:08ff7c3742949fb1b16c43557408c811bcb0d0813140024da7b486e084f3a349 3 | size 229112426 4 | -------------------------------------------------------------------------------- /models/GRU_Decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.nn.modules.normalization import LayerNorm 7 | #from torchviz import make_dot 8 | from torch.autograd import Variable 9 | from torch.nn.modules import ModuleList 10 | import copy 11 | 12 | 13 | import numpy as np 14 | import os 15 | from tqdm import tqdm_notebook, trange 16 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 17 | 18 | 19 | 20 | class GRU_Decoder(nn.Module): 21 | def __init__(self, input_dim, emb_dim, hidden_dim, output_dim, n_layers, drop_prob, mean_val, std_val, criterion, init_bias, length_seq): 22 | super(GRU_Decoder, self).__init__() 23 | self.hidden_dim = hidden_dim 24 | self.n_layers = n_layers 25 | self.output_dim = output_dim 26 | self.length_seq = length_seq 27 | 28 | 29 | self.encoder = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device) 30 | self.emb = nn.Linear(input_dim, emb_dim).to(device) 31 | self.fc = nn.Linear(hidden_dim, output_dim).to(device) 32 | 33 | self.criterion = criterion.to(device) 34 | self.factor = -1 35 | self.act = nn.SiLU() 36 | self.mean = mean_val.to(device) 37 | self.std = std_val.to(device) 38 | self.ft = False 39 | self.drop = nn.Dropout(0) 40 | self.norm = nn.LayerNorm(emb_dim, elementwise_affine=False) 41 | self.norm1 = nn.LayerNorm(hidden_dim, elementwise_affine=False) 42 | self.init_bias = init_bias 43 | self.init_weights() 44 | 45 | def init_weights(self): 46 | self.apply(self._init_weights) 47 | 48 | def _init_weights(self, module): 49 | if isinstance(module, (nn.Linear)) and (module.bias is not None) :#and (): 50 | nn.init.xavier_normal_(module.weight.data) 51 | if module.weight.shape[0] == self.output_dim : 52 | module.bias.data = module.bias.data + self.init_bias 53 | 54 | 55 | elif isinstance(module, nn.GRU) : 56 | for layer_p in module._all_weights: 57 | for p in layer_p: 58 | if 'weight' in p: 59 | nn.init.xavier_normal_(module.__getattr__(p)) 60 | 61 | 62 | def transform_minmax(self, input) : 63 | return (input-self.mean)/self.std 64 | 65 | def invtransform_minmax(self, input) : 66 | return input*self.std+self.mean 67 | 68 | 69 | def forward(self, input, y = None): 70 | 71 | input = self.transform_minmax(input).to(device) 72 | x = self.emb(input) 73 | x = self.norm(x) 74 | x = self.drop(x) 75 | 76 | memory, cn = self.encoder(x) 77 | inp = x + self.norm1(memory) 78 | 79 | out = self.factor*self.act(inp) 80 | 81 | if self.ft == False : 82 | out = self.fc(out[:,-self.length_seq:,:]) 83 | else : 84 | out = self.fc(out,cn[-1:]) 85 | 86 | 87 | 88 | if y != None : 89 | y = self.transform_minmax(y) 90 | loss = self.criterion(out,y) 91 | return out, loss, memory, cn, inp 92 | else : 93 | return out, memory, cn,inp 94 | 95 | def train_model(self, loader, optimizer) : 96 | loss = 0 97 | for i, data in enumerate(loader): 98 | X_train_batch, y_train_batch = data[0].cuda(),data[1].cuda().float() #torch.cuda.device_count() 99 | optimizer.zero_grad() 100 | loss = self.forward(X_train_batch, y_train_batch)[1]#[0].reshape(-1) 101 | loss.backward() 102 | optimizer.step() 103 | 104 | def eval_mape(self, loader) : 105 | metric_mape = 0 106 | with torch.no_grad() : 107 | for i, data in enumerate(loader): 108 | x, y = data[0].to(device),data[1].to(device).float() 109 | y_out = self.invtransform_minmax(self.forward(x)[0]) 110 | metric_mape += torch.mean(torch.abs((y_out-y)/y)).item() 111 | return metric_mape/(i+1) 112 | 113 | def number_of_parameters(self): 114 | return(sum(p.numel() for p in self.parameters() if p.requires_grad)) 115 | 116 | 117 | 118 | 119 | 120 | -------------------------------------------------------------------------------- /models/GRU_ED.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.nn.modules.normalization import LayerNorm 7 | #from torchviz import make_dot 8 | from torch.autograd import Variable 9 | from torch.nn.modules import ModuleList 10 | import copy 11 | 12 | import numpy as np 13 | import os 14 | from tqdm import tqdm_notebook, trange 15 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 16 | 17 | 18 | class GRU_ED(nn.Module): 19 | def __init__(self, input_dim, emb_dim, hidden_dim, output_dim, n_layers, drop_prob, mean_val, std_val, criterion, init_bias, length_seq): 20 | super(GRU_ED, self).__init__() 21 | self.hidden_dim = hidden_dim 22 | self.n_layers = n_layers 23 | self.output_dim = output_dim 24 | self.length_seq = length_seq 25 | self.ft = False 26 | 27 | self.encoder = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device) 28 | 29 | self.decoder = nn.GRU(hidden_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device) 30 | self.emb = nn.Linear(input_dim, emb_dim).to(device) 31 | self.fc = nn.Linear(hidden_dim, output_dim).to(device) 32 | 33 | self.criterion = criterion.to(device) 34 | self.factor = -1 35 | self.act = nn.SiLU() 36 | self.mean = mean_val.to(device) 37 | self.std = std_val.to(device) 38 | 39 | self.drop = nn.Dropout(0) 40 | self.norm = nn.LayerNorm(emb_dim, elementwise_affine=False) 41 | self.norm1 = nn.LayerNorm(hidden_dim, elementwise_affine=False) 42 | self.norm2 = nn.LayerNorm(hidden_dim, elementwise_affine=False) 43 | self.init_bias = init_bias 44 | self.init_weights() 45 | 46 | def init_weights(self): 47 | self.apply(self._init_weights) 48 | 49 | def _init_weights(self, module): 50 | if isinstance(module, (nn.Linear)) and (module.bias is not None) :#and (): 51 | nn.init.xavier_normal_(module.weight.data) 52 | if module.weight.shape[0] == self.output_dim : 53 | module.bias.data = module.bias.data + self.init_bias 54 | 55 | 56 | elif isinstance(module, nn.GRU) : 57 | for layer_p in module._all_weights: 58 | for p in layer_p: 59 | if 'weight' in p: 60 | nn.init.xavier_normal_(module.__getattr__(p)) 61 | 62 | 63 | def transform_minmax(self, input) : 64 | return (input-self.mean)/self.std 65 | 66 | def invtransform_minmax(self, input) : 67 | return input*self.std+self.mean 68 | 69 | 70 | def forward(self, input, y = None): 71 | 72 | input = self.transform_minmax(input).to(device) 73 | 74 | #embed 75 | x = self.emb(input) 76 | x = self.norm(x) 77 | #x = self.drop(x) 78 | 79 | #encode 80 | memory, context = self.encoder(x) 81 | z = self.act(x + self.norm1(memory)) #z 82 | 83 | 84 | if self.ft == False : 85 | #decode 86 | out, cn = self.decoder(z,context) 87 | out = self.act(z + self.norm2(out)) 88 | 89 | #linear layer 90 | out = self.fc(self.factor*out) 91 | else : 92 | out = self.fc(z, context[-1:]) 93 | 94 | 95 | 96 | if y != None : 97 | y = self.transform_minmax(y) 98 | loss = self.criterion(out,y) 99 | return out, loss, memory, context, z 100 | else : 101 | return out, memory,context, z 102 | 103 | def train_model(self, loader, optimizer) : 104 | loss = 0 105 | for i, data in enumerate(loader): 106 | X_train_batch, y_train_batch = data[0].cuda(),data[1].cuda().float() #torch.cuda.device_count() 107 | optimizer.zero_grad() 108 | loss = self.forward(X_train_batch, y_train_batch)[1]#[0].reshape(-1) 109 | loss.backward() 110 | optimizer.step() 111 | 112 | def eval_mape(self, loader) : 113 | metric_mape = 0 114 | with torch.no_grad() : 115 | for i, data in enumerate(loader): 116 | x, y = data[0].to(device),data[1].to(device).float() 117 | y_out = self.invtransform_minmax(self.forward(x)[0]) 118 | metric_mape += torch.mean(torch.abs((y_out-y)/y)).item() 119 | return metric_mape/(i+1) 120 | 121 | def number_of_parameters(self): 122 | return(sum(p.numel() for p in self.parameters() if p.requires_grad)) 123 | 124 | 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /models/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /models/VGRU_ED.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import math 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torch.optim as optim 6 | from torch.nn.modules.normalization import LayerNorm 7 | #from torchviz import make_dot 8 | from torch.autograd import Variable 9 | from torch.nn.modules import ModuleList 10 | import copy 11 | 12 | import numpy as np 13 | import os 14 | from tqdm import tqdm_notebook, trange 15 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 16 | 17 | 18 | class VGRU_ED(nn.Module): 19 | def __init__(self, input_dim, emb_dim, hidden_dim, output_dim, n_layers, drop_prob, mean_val, std_val, criterion, init_bias, length_seq, weight_kl = 5e-4): 20 | super(VGRU_ED, self).__init__() 21 | self.hidden_dim = hidden_dim 22 | self.n_layers = n_layers 23 | self.output_dim = output_dim 24 | self.length_seq = length_seq 25 | self.ft = False 26 | self.w = weight_kl 27 | 28 | self.encoder = nn.GRU(emb_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device) 29 | self.fc_enc_mu = nn.Sequential(nn.Linear(64,64), 30 | nn.Dropout(0.1), 31 | nn.LayerNorm(64, elementwise_affine=False), 32 | nn.GELU(), 33 | nn.Linear(64,64)).to(device) 34 | 35 | self.fc_enc_logvar = nn.Sequential(nn.Linear(64,64), 36 | nn.Dropout(0.1), 37 | nn.LayerNorm(64, elementwise_affine=False), 38 | nn.GELU(), 39 | nn.Linear(64,64)).to(device) 40 | 41 | 42 | self.decoder = nn.GRU(hidden_dim, hidden_dim, n_layers, batch_first=True, dropout=drop_prob).to(device) 43 | self.emb = nn.Linear(input_dim, emb_dim).to(device) 44 | self.fc = nn.Linear(hidden_dim, output_dim).to(device) 45 | 46 | self.criterion = criterion.to(device) 47 | self.factor = -1 48 | self.act = nn.SiLU() 49 | self.mean = mean_val.to(device) 50 | self.std = std_val.to(device) 51 | 52 | self.drop = nn.Dropout(0) 53 | self.norm = nn.LayerNorm(emb_dim, elementwise_affine=False) 54 | self.norm1 = nn.LayerNorm(hidden_dim, elementwise_affine=False) 55 | self.norm2 = nn.LayerNorm(hidden_dim, elementwise_affine=False) 56 | self.init_bias = init_bias 57 | self.init_weights() 58 | 59 | def init_weights(self): 60 | self.apply(self._init_weights) 61 | 62 | def _init_weights(self, module): 63 | if isinstance(module, (nn.Linear)) and (module.bias is not None) :#and (): 64 | nn.init.xavier_normal_(module.weight.data) 65 | if module.weight.shape[0] == self.output_dim : 66 | module.bias.data = module.bias.data + self.init_bias 67 | 68 | 69 | elif isinstance(module, nn.GRU) : 70 | for layer_p in module._all_weights: 71 | for p in layer_p: 72 | if 'weight' in p: 73 | nn.init.xavier_normal_(module.__getattr__(p)) 74 | 75 | 76 | def transform_minmax(self, input) : 77 | return (input-self.mean)/self.std 78 | 79 | def invtransform_minmax(self, input) : 80 | return input*self.std+self.mean 81 | 82 | def reparameterize(self, mu, log_var): 83 | # std can not be negative, thats why we use log variance 84 | sigma = torch.exp(0.5 * log_var) + 1e-5 85 | eps = torch.randn_like(sigma) 86 | return mu + sigma * eps 87 | 88 | def forward(self, input, y = None): 89 | 90 | input = self.transform_minmax(input).to(device) 91 | 92 | #embed 93 | x = self.emb(input) 94 | x = self.norm(x) 95 | #x = self.drop(x) 96 | 97 | #encode 98 | memory, context = self.encoder(x) 99 | memory = self.act(x + self.norm1(memory)) #z 100 | 101 | # Split the result embedding into mu and var components 102 | # of the latent Gaussian distribution 103 | mu = self.fc_enc_mu(memory) 104 | log_var = self.fc_enc_logvar(memory) 105 | 106 | #compute the latent embedding 107 | z = self.reparameterize(mu, log_var) 108 | 109 | 110 | if self.ft == False : 111 | if self.train : 112 | #decode 113 | out, cn = self.decoder(z,context) 114 | out = self.act(z + self.norm2(out)) 115 | 116 | else : 117 | #decode 118 | out, cn = self.decoder(mu,context) 119 | out = self.act(mu + self.norm2(out)) 120 | 121 | #linear layer 122 | out = self.fc(self.factor*out) 123 | 124 | else : 125 | out = self.fc(mu, context[-1:]) #keeps only the mean, not the latent embedding 126 | 127 | 128 | if y != None : 129 | y = self.transform_minmax(y) 130 | reconstruction_error = self.criterion(out,y) 131 | kl_divergence = (-0.5 * torch.sum(1 + log_var - mu**2 - log_var.exp())) 132 | loss = (reconstruction_error + self.w*kl_divergence).sum() 133 | return out, loss, memory, mu, z 134 | else : 135 | return out, memory, mu, z 136 | 137 | def train_model(self, loader, optimizer) : 138 | loss = 0 139 | for i, data in enumerate(loader): 140 | X_train_batch, y_train_batch = data[0].cuda(),data[1].cuda().float() #torch.cuda.device_count() 141 | optimizer.zero_grad() 142 | loss = self.forward(X_train_batch, y_train_batch)[1]#[0].reshape(-1) 143 | loss.backward() 144 | optimizer.step() 145 | 146 | def eval_mape(self, loader) : 147 | metric_mape = 0 148 | with torch.no_grad() : 149 | for i, data in enumerate(loader): 150 | x, y = data[0].to(device),data[1].to(device).float() 151 | y_out = self.invtransform_minmax(self.forward(x)[0]) 152 | metric_mape += torch.mean(torch.abs((y_out-y)/y)).item() 153 | return metric_mape/(i+1) 154 | 155 | def number_of_parameters(self): 156 | return(sum(p.numel() for p in self.parameters() if p.requires_grad)) 157 | 158 | 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /training_GRU_Decoder_t1.py: -------------------------------------------------------------------------------- 1 | # import deepspeed 2 | 3 | import torch.nn as nn 4 | 5 | import pickle 6 | 7 | from torch.utils.data import TensorDataset, DataLoader 8 | 9 | import pandas as pd 10 | 11 | import datetime 12 | 13 | import matplotlib.pyplot as plt 14 | from IPython.display import display 15 | 16 | import os 17 | import argparse 18 | import random 19 | import tqdm 20 | import time 21 | import numpy as np 22 | import torch 23 | import torch.optim as optim 24 | from torch.nn import functional as F 25 | from torch.utils.data import DataLoader, Dataset 26 | 27 | 28 | import torch.nn.functional as nnf 29 | import torch.nn.functional as F 30 | from torch.nn.modules import ModuleList 31 | from torch.nn.modules.normalization import LayerNorm 32 | from torch.cuda.amp import autocast 33 | from GRU_Decoder import GRU_Decoder 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | def training_args(): 43 | parser=argparse.ArgumentParser(description='GRU') 44 | 45 | parser.add_argument('--timestep', default=1, type=int, 46 | help='Pred timestep') 47 | parser.add_argument('--num_gpus', default=1, type=int, 48 | help='nb_gpus') 49 | parser.add_argument('--nlayers', default=4, type=int, 50 | help='Number of Layers (default: 2)') 51 | parser.add_argument('-b', '--batch_size', default=4096, type=int, 52 | help='mini-batch size (default: 4096)') 53 | parser.add_argument('-e', '--epochs', default=10, type=int, 54 | help='number of total epochs (default: 30)') 55 | parser.add_argument('--hidden_size', default=64, type=int, 56 | help='Nb_neurons (default: 64)') 57 | 58 | parser.add_argument('--device', default=0, type=int, 59 | help='which device') 60 | 61 | parser.add_argument('--maxlen', default=30, type=int, 62 | help='Windows length (default : 30)') 63 | parser.add_argument('--timestep_pred', default=1, type=int, 64 | help='Pred sequence length (default : 1)') 65 | 66 | parser.add_argument('--ratio', default=1, type=float, 67 | help='Ratio sequence (default: 1)') 68 | parser.add_argument('--drop', default=0.1, type=float, 69 | help='Dropout (default: 0.1)') 70 | 71 | 72 | # parser = deepspeed.add_config_arguments(parser) 73 | args=parser.parse_args() 74 | return args 75 | 76 | # constants 77 | 78 | args = training_args() 79 | print(args) 80 | # cmd_args = add_argument() 81 | nb_gauges = 3 82 | 83 | 84 | device = torch.device('cuda')#torch.device('cuda' if torch.cuda.is_available() else 'cpu') 85 | 86 | bs = args.batch_size 87 | epochs = args.epochs 88 | maxlen = args.maxlen 89 | 90 | 91 | 92 | import os 93 | fd_data = os.path.split(os.getcwd())[0] ##+ '/Data_'+ str(codebook_size) +'Clusters' 94 | 95 | df = pd.read_pickle(fd_data + '/raw_data_train_complete').reset_index().iloc[:,1:] 96 | data_train = df[(df.ID<=95) & (df.cycle != -1)] 97 | data_val = df[(df.ID>9995) & (df.cycle != -1)].reset_index() 98 | 99 | 100 | 101 | from torch.utils.data import TensorDataset, DataLoader 102 | 103 | seq_cols = ['gauge'+ str(i+1) for i in range(3)] 104 | sequence_length = 30 105 | timesteps_pred = args.timestep_pred 106 | 107 | def gen_sequence_autoregressive(id_df, seq_length, seq_cols,timesteps_pred,h, ratio = 1): 108 | 109 | ind_start = 0 110 | data_array = id_df[seq_cols].values 111 | th = int(ratio*data_array.shape[0]) 112 | data_array = data_array[:th] 113 | num_elements = data_array.shape[0] 114 | 115 | for start, stop in zip(range(0+ind_start, num_elements-seq_length+1-timesteps_pred), range(seq_length+ind_start, num_elements+1-timesteps_pred)): 116 | yield data_array[start+h:stop+h, :]#,data_array[start:stop, :]) 117 | 118 | 119 | def autoregressive_preprocess(data, sequence_length, seq_cols, timestep_pred, type_set = 'float', ratio = 1) : 120 | 121 | seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 122 | for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length) 123 | # generate sequences and convert to numpy array 124 | dbX = np.concatenate(list(seq_gen))#[:,:,:1] 125 | 126 | seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = timestep_pred, ratio = ratio)) 127 | for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length) 128 | # generate sequences and convert to numpy array 129 | dbY = np.concatenate(list(seq_gen))#[:,:,:1] 130 | dbY = dbY[:,-timestep_pred:,:] 131 | 132 | print(dbX.shape) 133 | print(dbY.shape) 134 | 135 | 136 | 137 | print('Preparing datasets') 138 | if type_set =='float' : 139 | X = torch.tensor(dbX, dtype=torch.float)#.to(device) 140 | Y = torch.tensor(dbY, dtype=torch.float)#.to(device) 141 | elif type_set =='long' : 142 | X = torch.tensor(dbX, dtype=torch.long)#.to(device) 143 | Y = torch.tensor(dbY, dtype=torch.long)#.to(device) 144 | 145 | return TensorDataset(X, Y), X, Y#, dbY.mean(0), dbY.std(0) 146 | 147 | from torch.utils.data import TensorDataset, DataLoader 148 | def create_loaders(data, bs=512, jobs=0): 149 | data = DataLoader(data, bs, shuffle=True, num_workers=jobs, pin_memory = True) 150 | return data 151 | for rt in [60, 70, 80, 90] : 152 | 153 | 154 | 155 | train_dl, X_train, y_train = autoregressive_preprocess(data_train, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 156 | val_dl, X_val,y_val = autoregressive_preprocess(data_val, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 157 | 158 | X_trn_full = torch.cat([X_train,X_val],0) 159 | y_trn_full = torch.cat([y_train,y_val],0) 160 | full_train_dl = TensorDataset(X_trn_full, y_trn_full) 161 | 162 | tmp = X_trn_full[:,-1,:]#.values 163 | #trn_min = tmp.min(axis=0).reshape(1,-1)#[0] 164 | #trn_max = tmp.max(axis=0).reshape(1,-1)#[0] 165 | trn_mean = tmp.mean(axis=0).reshape(1,-1)#[0] 166 | trn_std = tmp.std(axis=0).reshape(1,-1)#[0] 167 | print(trn_mean) 168 | print(trn_std) 169 | 170 | bs = args.batch_size 171 | trn_dl = create_loaders(train_dl, bs, jobs=1) 172 | val_dl = create_loaders(val_dl, 4096, jobs=1) 173 | 174 | 175 | 176 | def update_lr(optimizer, lr): 177 | for g in optimizer.param_groups: 178 | g['lr'] = lr 179 | 180 | import time 181 | 182 | 183 | 184 | 185 | hidden_size = args.hidden_size 186 | nlayers = args.nlayers 187 | embedding_size = args.hidden_size 188 | dropout = args.drop 189 | 190 | def load_checkpoint(filepath, train = False): 191 | checkpoint = torch.load(filepath) 192 | model = checkpoint['model'] 193 | model.load_state_dict(checkpoint['state_dict']) 194 | 195 | if train : 196 | for parameter in model.parameters(): 197 | parameter.requires_grad = True 198 | model.train() 199 | else : 200 | for parameter in model.parameters(): 201 | parameter.requires_grad = False 202 | model.eval() 203 | return model 204 | 205 | criterion = nn.MSELoss() 206 | trn_mean = torch.tensor(trn_mean).float()#.to(device).float() 207 | trn_std = torch.tensor(trn_std).float() 208 | bias = torch.tensor([torch.mean((y_train[:,:,k]-trn_mean[0,k])/trn_std[0,k]) for k in range(3)]).to(device) 209 | model = GRU_Decoder(input_dim=3,emb_dim = hidden_size, hidden_dim=hidden_size, output_dim=3, n_layers=nlayers, drop_prob=dropout, mean_val = trn_mean, std_val = trn_std, criterion = criterion, init_bias = bias, length_seq = args.timestep_pred) 210 | nb_params = model.number_of_parameters() 211 | print(nb_params) 212 | lr = 1e-2 213 | optimizer = optim.Adam(model.parameters(), lr=lr)#, betas=(0.9, 0.95), eps=1e-08) 214 | # criterion = nn.MS#nn.CrossEntropyLoss(weight = class_weights).to(device) 215 | 216 | 217 | if torch.cuda.device_count() > 1: 218 | print("Let's use", torch.cuda.device_count(), "GPUs!") 219 | model = nn.DataParallel(model) 220 | model.to(device) 221 | 222 | 223 | 224 | #create folder 225 | dir_path = f"Decoder_t1_{rt}_100"#folder_models + '/' 226 | os.makedirs(dir_path) 227 | dir_path = dir_path + '/' 228 | 229 | #save the model architecture 230 | f = open(dir_path+"model_parameters.txt", "a") 231 | f.write(str(model.state_dict)) 232 | f.close() 233 | 234 | # #save the log 235 | f = open(dir_path+"log_loss.txt", "a") 236 | # f.write(str(model.state_dict)) 237 | f.close() 238 | 239 | PATH = "model.pth" 240 | 241 | #save the args 242 | f = open(dir_path+"args.txt", "w+") 243 | f.write(str(args)) 244 | f.close() 245 | 246 | #which optimizer 247 | f = open(dir_path+"optim.txt", "w+") 248 | f.write(str(optimizer)) 249 | f.close() 250 | 251 | t0 = time.time() 252 | 253 | 254 | all_trn_mape_track = [] 255 | all_val_mape_track = [] 256 | 257 | trn_mape_track = [] 258 | val_mape_track = [] 259 | 260 | 261 | j = 0 #indicator used to load the model 262 | k = 0 263 | best_mape = 10000 264 | step = 0 265 | epoch_stop = np.zeros(3) 266 | #save some useful informations 267 | infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\ Sequence length : {maxlen} \ 268 | \\ Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \ 269 | \\ Optimizer learning rate : {lr} \\ Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 270 | f = open(dir_path+"training_readme.txt", "w+") 271 | f.write(infos_model) 272 | f.close() 273 | 274 | 275 | # instantiate model 276 | torch.manual_seed(7) 277 | torch.cuda.manual_seed(7) 278 | for l_r in [lr, 1e-3, 1e-4] : 279 | 280 | 281 | 282 | if j != 0 : 283 | f = open(dir_path+"log_loss.txt", "a") 284 | f.write("Load the model...") 285 | f.write("\n") 286 | f.close() 287 | 288 | model = load_checkpoint(dir_path+PATH, train = True) 289 | 290 | if torch.cuda.device_count() > 1: 291 | print("Let's use", torch.cuda.device_count(), "GPUs!") 292 | model = nn.DataParallel(model) 293 | model.to(device) 294 | 295 | 296 | 297 | checkpoint = torch.load(dir_path+PATH) 298 | optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08) 299 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 300 | update_lr(optimizer, l_r) 301 | best_mape = checkpoint['best_mape'] 302 | trn_mape_track = checkpoint['mape'] 303 | val_mape_track = checkpoint['val_mape'] 304 | epoch_stop = checkpoint['epoch_stop'] 305 | 306 | j = j+1 307 | 308 | # TRAINING 309 | f = open(dir_path+"log_loss.txt", "a") 310 | f.write("Begin training." + "\n") 311 | f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr'])) 312 | f.write("\n") 313 | f.close() 314 | 315 | 316 | 317 | 318 | for epoch in range(args.epochs): 319 | model.train() 320 | # patience = patience-1 321 | t1 = time.time() 322 | loss = 0 323 | trn_mape = 0 324 | 325 | 326 | for i, data in enumerate(trn_dl): 327 | 328 | 329 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 330 | optimizer.zero_grad() 331 | loss = model(X_train_batch, y_train_batch)[1] 332 | loss.backward() 333 | optimizer.step() 334 | 335 | 336 | 337 | # Eval phase 338 | model.eval() 339 | with torch.no_grad() : 340 | train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 341 | trn_mape_track.append(train_mape) 342 | all_trn_mape_track.append(train_mape) 343 | 344 | 345 | val_mape = model.eval_mape(val_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 346 | val_mape_track.append(val_mape) 347 | all_val_mape_track.append(val_mape) 348 | 349 | 350 | f = open(dir_path+"log_loss.txt", "a") 351 | f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}, val mape : {val_mape:2.2%}') 352 | f.write("\n") 353 | f.close() 354 | 355 | 356 | if val_mape < best_mape : 357 | #trials = 0 358 | best_mape = val_mape#.item() 359 | epoch_stop[j:] = k 360 | 361 | f = open(dir_path+"log_loss.txt", "a") 362 | f.write(f'Epoch {epoch+1} best model saved with mape: {val_mape:2.2%}') 363 | f.write("Save the model...") 364 | f.write("\n") 365 | f.close() 366 | 367 | checkpoint = {'model': model, 368 | 'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track, 369 | 'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop, 370 | 'optimizer_dic' : optimizer.state_dict()} 371 | torch.save(checkpoint, dir_path+PATH) 372 | 373 | k = k+1 374 | 375 | 376 | 377 | for l_r in [1e-5] : 378 | 379 | f = open(dir_path+"log_loss.txt", "a") 380 | f.write("Load the model...") 381 | f.write("\n") 382 | f.close() 383 | 384 | model = load_checkpoint(dir_path+PATH, train = True) 385 | 386 | if torch.cuda.device_count() > 1: 387 | print("Let's use", torch.cuda.device_count(), "GPUs!") 388 | model = nn.DataParallel(model) 389 | model.to(device) 390 | 391 | 392 | 393 | checkpoint = torch.load(dir_path+PATH) 394 | optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08) 395 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 396 | update_lr(optimizer, l_r) 397 | best_mape = 10000#checkpoint['best_mape'] 398 | trn_mape_track = checkpoint['mape'] 399 | val_mape_track = checkpoint['val_mape'] 400 | epoch_stop = checkpoint['epoch_stop'] 401 | 402 | j = j+1 403 | 404 | # TRAINING 405 | f = open(dir_path+"log_loss.txt", "a") 406 | f.write("Begin training (full set)." + "\n") 407 | f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr'])) 408 | f.write("\n") 409 | f.close() 410 | 411 | print(f'Creating data loaders with batch size: {bs}') 412 | trn_dl = create_loaders(full_train_dl, bs, jobs=1)#4*args.num_gpus) 413 | #trn_dl_eval = create_loaders(full_train_dl, 4096*4, jobs=1)#4*args.num_gpus) 414 | torch.cuda.empty_cache() 415 | 416 | 417 | for epoch in range(args.epochs): 418 | model.train() 419 | t1 = time.time() 420 | loss = 0 421 | trn_mape = 0 422 | 423 | 424 | for i, data in enumerate(trn_dl): 425 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 426 | optimizer.zero_grad() 427 | loss = model(X_train_batch, y_train_batch)[1] 428 | loss.backward() 429 | optimizer.step() 430 | 431 | model.eval() 432 | with torch.no_grad() : 433 | train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 434 | trn_mape_track.append(train_mape) 435 | all_trn_mape_track.append(train_mape) 436 | 437 | 438 | f = open(dir_path+"log_loss.txt", "a") 439 | f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}') 440 | f.write("\n") 441 | f.close() 442 | 443 | if train_mape < best_mape : 444 | #trials = 0 445 | best_mape = train_mape#.item() 446 | epoch_stop[j:] = k 447 | 448 | f = open(dir_path+"log_loss.txt", "a") 449 | f.write(f'Epoch {epoch+1} best model saved with mape: {train_mape:2.2%}') 450 | f.write("Save the model...") 451 | f.write("\n") 452 | f.close() 453 | 454 | checkpoint = {'model': model, 455 | 'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track, 456 | 'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop, 457 | 'optimizer_dic' : optimizer.state_dict()} 458 | torch.save(checkpoint, dir_path+PATH) 459 | 460 | 461 | 462 | 463 | #save some useful informations 464 | infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\ Sequence length : {maxlen} \ 465 | \\ Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \ 466 | \\ Optimizer learning rate : {lr} \\ Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 467 | f = open(dir_path+"training_readme.txt", "w+") 468 | f.write(infos_model) 469 | f.close() 470 | 471 | #del model and empty cache 472 | del(model) 473 | torch.cuda.empty_cache() 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | -------------------------------------------------------------------------------- /training_GRU_ED.py: -------------------------------------------------------------------------------- 1 | # import deepspeed 2 | import pickle 3 | import pandas as pd 4 | import datetime 5 | 6 | import matplotlib.pyplot as plt 7 | from IPython.display import display 8 | 9 | import os 10 | import argparse 11 | import random 12 | import tqdm 13 | import time 14 | import numpy as np 15 | import torch 16 | import torch.optim as optim 17 | from torch.nn import functional as F 18 | from torch.utils.data import TensorDataset, DataLoader, Dataset 19 | 20 | import torch.nn.functional as F 21 | from torch.nn.modules import ModuleList 22 | from torch.nn.modules.normalization import LayerNorm 23 | from torch.cuda.amp import autocast 24 | from GRU_ED import GRU_ED 25 | 26 | 27 | 28 | 29 | 30 | 31 | def training_args(): 32 | parser=argparse.ArgumentParser(description='GRU') 33 | 34 | parser.add_argument('--timestep', default=1, type=int, 35 | help='Pred timestep') 36 | parser.add_argument('--num_gpus', default=1, type=int, 37 | help='nb_gpus') 38 | parser.add_argument('--nlayers', default=4, type=int, 39 | help='Number of Layers (default: 2)') 40 | parser.add_argument('-b', '--batch_size', default=4096, type=int, 41 | help='mini-batch size (default: 4096)') 42 | parser.add_argument('-e', '--epochs', default=10, type=int, 43 | help='number of total epochs (default: 30)') 44 | parser.add_argument('--hidden_size', default=64, type=int, 45 | help='Nb_neurons (default: 64)') 46 | 47 | parser.add_argument('--device', default=0, type=int, 48 | help='which device') 49 | 50 | parser.add_argument('--maxlen', default=30, type=int, 51 | help='Windows length (default : 30)') 52 | parser.add_argument('--timestep_pred', default=1, type=int, 53 | help='Pred sequence length (default : 1)') 54 | 55 | parser.add_argument('--ratio', default=1, type=float, 56 | help='Ratio sequence (default: 1)') 57 | parser.add_argument('--drop', default=0.1, type=float, 58 | help='Dropout (default: 0.1)') 59 | 60 | 61 | # parser = deepspeed.add_config_arguments(parser) 62 | args=parser.parse_args() 63 | return args 64 | 65 | # constants 66 | args = training_args() 67 | print(args) 68 | # cmd_args = add_argument() 69 | nb_gauges = 3 70 | 71 | device = torch.device('cuda')#torch.device('cuda' if torch.cuda.is_available() else 'cpu') 72 | bs = args.batch_size 73 | epochs = args.epochs 74 | maxlen = args.maxlen 75 | 76 | 77 | 78 | import os 79 | fd_data = os.path.split(os.getcwd())[0] ##+ '/Data_'+ str(codebook_size) +'Clusters' 80 | 81 | df = pd.read_pickle(fd_data + '/raw_data_train_complete').reset_index().iloc[:,1:] 82 | data_train = df[(df.ID<=95) & (df.cycle != -1)] 83 | data_val = df[(df.ID>9995) & (df.cycle != -1)].reset_index() 84 | 85 | 86 | # instantiate model 87 | torch.manual_seed(7) 88 | torch.cuda.manual_seed(7) 89 | 90 | seq_cols = ['gauge'+ str(i+1) for i in range(3)] 91 | sequence_length = 30 92 | timesteps_pred = args.timestep_pred 93 | 94 | 95 | def gen_sequence_autoregressive(id_df, seq_length, seq_cols,timesteps_pred,h, ratio = 1): 96 | 97 | ind_start = 0 98 | data_array = id_df[seq_cols].values 99 | th = int(ratio*data_array.shape[0]) 100 | data_array = data_array[:th] 101 | num_elements = data_array.shape[0] 102 | 103 | for start, stop in zip(range(0+ind_start, num_elements-seq_length+1-timesteps_pred), range(seq_length+ind_start, num_elements+1-timesteps_pred)): 104 | yield data_array[start+h:stop+h, :]#,data_array[start:stop, :]) 105 | 106 | 107 | def autoregressive_preprocess(data, sequence_length, seq_cols, timestep_pred, type_set = 'float', ratio = 1) : 108 | 109 | seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 110 | for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length) 111 | # generate sequences and convert to numpy array 112 | dbX = np.concatenate(list(seq_gen))#[:,:,:1] 113 | 114 | seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 115 | for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length) 116 | # generate sequences and convert to numpy array 117 | dbY = np.concatenate(list(seq_gen))#[:,:,:1] 118 | #dbY = dbY[:,-timestep_pred:,:] 119 | 120 | print(dbX.shape) 121 | print(dbY.shape) 122 | 123 | 124 | 125 | print('Preparing datasets') 126 | if type_set =='float' : 127 | X = torch.tensor(dbX, dtype=torch.float)#.to(device) 128 | Y = torch.tensor(dbY, dtype=torch.float)#.to(device) 129 | elif type_set =='long' : 130 | X = torch.tensor(dbX, dtype=torch.long)#.to(device) 131 | Y = torch.tensor(dbY, dtype=torch.long)#.to(device) 132 | 133 | return TensorDataset(X, Y), X, Y#, dbY.mean(0), dbY.std(0) 134 | 135 | from torch.utils.data import TensorDataset, DataLoader 136 | 137 | def create_loaders(data, bs=512, jobs=0): 138 | data = DataLoader(data, bs, shuffle=True, num_workers=jobs, pin_memory = True) 139 | return data 140 | 141 | for rt in [60, 70, 80, 90] : 142 | 143 | 144 | timesteps_pred = 0 145 | train_dl, X_train, y_train = autoregressive_preprocess(data_train, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 146 | val_dl, X_val,y_val = autoregressive_preprocess(data_val, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 147 | 148 | X_trn_full = torch.cat([X_train,X_val],0) 149 | y_trn_full = torch.cat([y_train,y_val],0) 150 | full_train_dl = TensorDataset(X_trn_full, y_trn_full) 151 | 152 | tmp = X_trn_full[:,-1,:]#.values 153 | #trn_min = tmp.min(axis=0).reshape(1,-1)#[0] 154 | #trn_max = tmp.max(axis=0).reshape(1,-1)#[0] 155 | trn_mean = tmp.mean(axis=0).reshape(1,-1)#[0] 156 | trn_std = tmp.std(axis=0).reshape(1,-1)#[0] 157 | print(trn_mean) 158 | print(trn_std) 159 | 160 | bs = args.batch_size 161 | trn_dl = create_loaders(train_dl, bs, jobs=1) 162 | val_dl = create_loaders(val_dl, 4096, jobs=1) 163 | 164 | 165 | 166 | def update_lr(optimizer, lr): 167 | for g in optimizer.param_groups: 168 | g['lr'] = lr 169 | 170 | import time 171 | 172 | 173 | hidden_size = args.hidden_size 174 | nlayers = args.nlayers 175 | embedding_size = args.hidden_size 176 | dropout = args.drop 177 | 178 | def load_checkpoint(filepath, train = False): 179 | checkpoint = torch.load(filepath) 180 | model = checkpoint['model'] 181 | model.load_state_dict(checkpoint['state_dict']) 182 | 183 | if train : 184 | for parameter in model.parameters(): 185 | parameter.requires_grad = True 186 | model.train() 187 | else : 188 | for parameter in model.parameters(): 189 | parameter.requires_grad = False 190 | model.eval() 191 | return model 192 | 193 | criterion = nn.MSELoss() 194 | trn_mean = torch.tensor(trn_mean).float()#.to(device).float() 195 | trn_std = torch.tensor(trn_std).float() 196 | bias = torch.tensor([torch.mean((y_train[:,:,k]-trn_mean[0,k])/trn_std[0,k]) for k in range(3)]).to(device) 197 | model = GRU_ED(input_dim=3,emb_dim = hidden_size, hidden_dim=hidden_size, output_dim=3, n_layers=nlayers, drop_prob=dropout, mean_val = trn_mean, std_val = trn_std, criterion = criterion, init_bias = bias, length_seq = args.timestep_pred) 198 | nb_params = model.number_of_parameters() 199 | print(nb_params) 200 | lr = 1e-2 201 | optimizer = optim.Adam(model.parameters(), lr=lr)#, betas=(0.9, 0.95), eps=1e-08) 202 | # criterion = nn.MS#nn.CrossEntropyLoss(weight = class_weights).to(device) 203 | 204 | 205 | if torch.cuda.device_count() > 1: 206 | print("Let's use", torch.cuda.device_count(), "GPUs!") 207 | model = nn.DataParallel(model) 208 | model.to(device) 209 | 210 | 211 | 212 | #create folder 213 | dir_path = f"Decoder_t1_{rt}_100"#folder_models + '/' 214 | os.makedirs(dir_path) 215 | dir_path = dir_path + '/' 216 | 217 | #save the model architecture 218 | f = open(dir_path+"model_parameters.txt", "a") 219 | f.write(str(model.state_dict)) 220 | f.close() 221 | 222 | # #save the log 223 | f = open(dir_path+"log_loss.txt", "a") 224 | # f.write(str(model.state_dict)) 225 | f.close() 226 | 227 | PATH = "model.pth" 228 | 229 | #save the args 230 | f = open(dir_path+"args.txt", "w+") 231 | f.write(str(args)) 232 | f.close() 233 | 234 | #which optimizer 235 | f = open(dir_path+"optim.txt", "w+") 236 | f.write(str(optimizer)) 237 | f.close() 238 | 239 | t0 = time.time() 240 | 241 | 242 | all_trn_mape_track = [] 243 | all_val_mape_track = [] 244 | 245 | trn_mape_track = [] 246 | val_mape_track = [] 247 | 248 | 249 | j = 0 #indicator used to load the model 250 | k = 0 251 | best_mape = 10000 252 | step = 0 253 | epoch_stop = np.zeros(3) 254 | #save some useful informations 255 | infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\ Sequence length : {maxlen} \ 256 | \\ Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \ 257 | \\ Optimizer learning rate : {lr} \\ Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 258 | f = open(dir_path+"training_readme.txt", "w+") 259 | f.write(infos_model) 260 | f.close() 261 | 262 | 263 | 264 | for l_r in [lr, 1e-3, 1e-4] : 265 | 266 | 267 | if j != 0 : 268 | f = open(dir_path+"log_loss.txt", "a") 269 | f.write("Load the model...") 270 | f.write("\n") 271 | f.close() 272 | 273 | model = load_checkpoint(dir_path+PATH, train = True) 274 | 275 | if torch.cuda.device_count() > 1: 276 | print("Let's use", torch.cuda.device_count(), "GPUs!") 277 | model = nn.DataParallel(model) 278 | model.to(device) 279 | 280 | 281 | 282 | checkpoint = torch.load(dir_path+PATH) 283 | optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08) 284 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 285 | update_lr(optimizer, l_r) 286 | best_mape = checkpoint['best_mape'] 287 | trn_mape_track = checkpoint['mape'] 288 | val_mape_track = checkpoint['val_mape'] 289 | epoch_stop = checkpoint['epoch_stop'] 290 | 291 | j = j+1 292 | 293 | # TRAINING 294 | f = open(dir_path+"log_loss.txt", "a") 295 | f.write("Begin training." + "\n") 296 | f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr'])) 297 | f.write("\n") 298 | f.close() 299 | 300 | 301 | 302 | 303 | for epoch in range(args.epochs): 304 | model.train() 305 | # patience = patience-1 306 | t1 = time.time() 307 | loss = 0 308 | trn_mape = 0 309 | 310 | 311 | for i, data in enumerate(trn_dl): 312 | 313 | 314 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 315 | optimizer.zero_grad() 316 | loss = model(X_train_batch, y_train_batch)[1] 317 | loss.backward() 318 | optimizer.step() 319 | 320 | 321 | 322 | # Eval phase 323 | model.eval() 324 | with torch.no_grad() : 325 | train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 326 | trn_mape_track.append(train_mape) 327 | all_trn_mape_track.append(train_mape) 328 | 329 | 330 | val_mape = model.eval_mape(val_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 331 | val_mape_track.append(val_mape) 332 | all_val_mape_track.append(val_mape) 333 | 334 | 335 | f = open(dir_path+"log_loss.txt", "a") 336 | f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}, val mape : {val_mape:2.2%}') 337 | f.write("\n") 338 | f.close() 339 | 340 | 341 | if val_mape < best_mape : 342 | #trials = 0 343 | best_mape = val_mape#.item() 344 | epoch_stop[j:] = k 345 | 346 | f = open(dir_path+"log_loss.txt", "a") 347 | f.write(f'Epoch {epoch+1} best model saved with mape: {val_mape:2.2%}') 348 | f.write("Save the model...") 349 | f.write("\n") 350 | f.close() 351 | 352 | checkpoint = {'model': model, 353 | 'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track, 354 | 'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop, 355 | 'optimizer_dic' : optimizer.state_dict()} 356 | torch.save(checkpoint, dir_path+PATH) 357 | 358 | k = k+1 359 | 360 | 361 | 362 | for l_r in [1e-5] : 363 | 364 | f = open(dir_path+"log_loss.txt", "a") 365 | f.write("Load the model...") 366 | f.write("\n") 367 | f.close() 368 | 369 | model = load_checkpoint(dir_path+PATH, train = True) 370 | 371 | if torch.cuda.device_count() > 1: 372 | print("Let's use", torch.cuda.device_count(), "GPUs!") 373 | model = nn.DataParallel(model) 374 | model.to(device) 375 | 376 | 377 | 378 | checkpoint = torch.load(dir_path+PATH) 379 | optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08) 380 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 381 | update_lr(optimizer, l_r) 382 | best_mape = 10000#checkpoint['best_mape'] 383 | trn_mape_track = checkpoint['mape'] 384 | val_mape_track = checkpoint['val_mape'] 385 | epoch_stop = checkpoint['epoch_stop'] 386 | 387 | j = j+1 388 | 389 | # TRAINING 390 | f = open(dir_path+"log_loss.txt", "a") 391 | f.write("Begin training (full set)." + "\n") 392 | f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr'])) 393 | f.write("\n") 394 | f.close() 395 | 396 | print(f'Creating data loaders with batch size: {bs}') 397 | trn_dl = create_loaders(full_train_dl, bs, jobs=1)#4*args.num_gpus) 398 | #trn_dl_eval = create_loaders(full_train_dl, 4096*4, jobs=1)#4*args.num_gpus) 399 | torch.cuda.empty_cache() 400 | 401 | 402 | for epoch in range(args.epochs): 403 | model.train() 404 | t1 = time.time() 405 | loss = 0 406 | trn_mape = 0 407 | 408 | 409 | for i, data in enumerate(trn_dl): 410 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 411 | optimizer.zero_grad() 412 | loss = model(X_train_batch, y_train_batch)[1] 413 | loss.backward() 414 | optimizer.step() 415 | 416 | model.eval() 417 | with torch.no_grad() : 418 | train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 419 | trn_mape_track.append(train_mape) 420 | all_trn_mape_track.append(train_mape) 421 | 422 | 423 | f = open(dir_path+"log_loss.txt", "a") 424 | f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}') 425 | f.write("\n") 426 | f.close() 427 | 428 | if train_mape < best_mape : 429 | #trials = 0 430 | best_mape = train_mape#.item() 431 | epoch_stop[j:] = k 432 | 433 | f = open(dir_path+"log_loss.txt", "a") 434 | f.write(f'Epoch {epoch+1} best model saved with mape: {train_mape:2.2%}') 435 | f.write("Save the model...") 436 | f.write("\n") 437 | f.close() 438 | 439 | checkpoint = {'model': model, 440 | 'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track, 441 | 'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop, 442 | 'optimizer_dic' : optimizer.state_dict()} 443 | torch.save(checkpoint, dir_path+PATH) 444 | 445 | 446 | 447 | 448 | #save some useful informations 449 | infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\ Sequence length : {maxlen} \ 450 | \\ Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \ 451 | \\ Optimizer learning rate : {lr} \\ Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 452 | f = open(dir_path+"training_readme.txt", "w+") 453 | f.write(infos_model) 454 | f.close() 455 | 456 | #del model and empty cache 457 | del(model) 458 | torch.cuda.empty_cache() 459 | -------------------------------------------------------------------------------- /training_VGRU_ED.py: -------------------------------------------------------------------------------- 1 | # import deepspeed 2 | 3 | import torch.nn as nn 4 | 5 | import pickle 6 | 7 | from torch.utils.data import TensorDataset, DataLoader 8 | 9 | import pandas as pd 10 | 11 | import datetime 12 | 13 | import matplotlib.pyplot as plt 14 | from IPython.display import display 15 | 16 | import os 17 | import argparse 18 | import random 19 | import tqdm 20 | import time 21 | import numpy as np 22 | import torch 23 | import torch.optim as optim 24 | from torch.nn import functional as F 25 | from torch.utils.data import DataLoader, Dataset 26 | 27 | 28 | import torch.nn.functional as nnf 29 | import torch.nn.functional as F 30 | from torch.nn.modules import ModuleList 31 | from torch.nn.modules.normalization import LayerNorm 32 | from torch.cuda.amp import autocast 33 | from VGRU_ED import VGRU_ED 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | def training_args(): 43 | parser=argparse.ArgumentParser(description='VGRUED') 44 | 45 | parser.add_argument('--timestep', default=1, type=int, 46 | help='Pred timestep') 47 | parser.add_argument('--num_gpus', default=1, type=int, 48 | help='nb_gpus') 49 | parser.add_argument('--nlayers', default=4, type=int, 50 | help='Number of Layers (default: 2)') 51 | parser.add_argument('-b', '--batch_size', default=4096, type=int, 52 | help='mini-batch size (default: 4096)') 53 | parser.add_argument('-e', '--epochs', default=10, type=int, 54 | help='number of total epochs (default: 30)') 55 | parser.add_argument('--hidden_size', default=64, type=int, 56 | help='Nb_neurons (default: 64)') 57 | 58 | parser.add_argument('--device', default=0, type=int, 59 | help='which device') 60 | 61 | parser.add_argument('--maxlen', default=30, type=int, 62 | help='Windows length (default : 30)') 63 | parser.add_argument('--timestep_pred', default=1, type=int, 64 | help='Pred sequence length (default : 1)') 65 | 66 | parser.add_argument('--ratio', default=1, type=float, 67 | help='Ratio sequence (default: 1)') 68 | parser.add_argument('--drop', default=0.1, type=float, 69 | help='Dropout (default: 0.1)') 70 | parser.add_argument('--wgt', default=1e-4, type=float, 71 | help='Weight for loss function') 72 | 73 | 74 | # parser = deepspeed.add_config_arguments(parser) 75 | args=parser.parse_args() 76 | return args 77 | 78 | # constants 79 | 80 | args = training_args() 81 | print(args) 82 | # cmd_args = add_argument() 83 | nb_gauges = 3 84 | 85 | 86 | device = torch.device('cuda')#torch.device('cuda' if torch.cuda.is_available() else 'cpu') 87 | 88 | bs = args.batch_size 89 | epochs = args.epochs 90 | maxlen = args.maxlen 91 | 92 | 93 | 94 | import os 95 | fd_data = os.path.split(os.getcwd())[0] ##+ '/Data_'+ str(codebook_size) +'Clusters' 96 | 97 | df = pd.read_pickle(fd_data + '/raw_data_train_complete').reset_index().iloc[:,1:] 98 | data_train = df[(df.ID<=9500) & (df.cycle != -1)] 99 | data_val = df[(df.ID>9500) & (df.cycle != -1)].reset_index() 100 | 101 | 102 | 103 | from torch.utils.data import TensorDataset, DataLoader 104 | # instantiate model 105 | torch.manual_seed(7) 106 | torch.cuda.manual_seed(7) 107 | 108 | seq_cols = ['gauge'+ str(i+1) for i in range(3)] 109 | sequence_length = 30 110 | #timesteps_pred = args.timestep_pred 111 | 112 | def gen_sequence_autoregressive(id_df, seq_length, seq_cols,timesteps_pred,h, ratio = 1): 113 | 114 | ind_start = 0 115 | data_array = id_df[seq_cols].values 116 | th = int(ratio*data_array.shape[0]) 117 | data_array = data_array[:th] 118 | num_elements = data_array.shape[0] 119 | 120 | for start, stop in zip(range(0+ind_start, num_elements-seq_length+1-timesteps_pred), range(seq_length+ind_start, num_elements+1-timesteps_pred)): 121 | yield data_array[start+h:stop+h, :]#,data_array[start:stop, :]) 122 | 123 | 124 | def autoregressive_preprocess(data, sequence_length, seq_cols, timestep_pred, type_set = 'float', ratio = 1) : 125 | 126 | seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 127 | for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length) 128 | # generate sequences and convert to numpy array 129 | dbX = np.concatenate(list(seq_gen))#[:,:,:1] 130 | 131 | seq_gen = (list(gen_sequence_autoregressive(data[data['ID']==id], sequence_length, seq_cols, timesteps_pred=timestep_pred, h = 0, ratio = ratio)) 132 | for id in data['ID'].unique() if len(data[data['ID']==id]) >= sequence_length) 133 | # generate sequences and convert to numpy array 134 | dbY = np.concatenate(list(seq_gen))#[:,:,:1] 135 | #dbY = dbY[:,-timestep_pred:,:] 136 | 137 | print(dbX.shape) 138 | print(dbY.shape) 139 | 140 | 141 | 142 | print('Preparing datasets') 143 | if type_set =='float' : 144 | X = torch.tensor(dbX, dtype=torch.float)#.to(device) 145 | Y = torch.tensor(dbY, dtype=torch.float)#.to(device) 146 | elif type_set =='long' : 147 | X = torch.tensor(dbX, dtype=torch.long)#.to(device) 148 | Y = torch.tensor(dbY, dtype=torch.long)#.to(device) 149 | 150 | return TensorDataset(X, Y), X, Y#, dbY.mean(0), dbY.std(0) 151 | 152 | from torch.utils.data import TensorDataset, DataLoader 153 | def create_loaders(data, bs=512, jobs=0): 154 | data = DataLoader(data, bs, shuffle=True, num_workers=jobs, pin_memory = True) 155 | return data 156 | 157 | 158 | for rt in [60, 70, 80, 90] : 159 | 160 | 161 | timesteps_pred = 0 162 | train_dl, X_train, y_train = autoregressive_preprocess(data_train, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 163 | val_dl, X_val,y_val = autoregressive_preprocess(data_val, sequence_length, seq_cols, timesteps_pred, type_set = 'float', ratio = rt/100) 164 | 165 | X_trn_full = torch.cat([X_train,X_val],0) 166 | y_trn_full = torch.cat([y_train,y_val],0) 167 | full_train_dl = TensorDataset(X_trn_full, y_trn_full) 168 | 169 | tmp = X_trn_full[:,-1,:]#.values 170 | #trn_min = tmp.min(axis=0).reshape(1,-1)#[0] 171 | #trn_max = tmp.max(axis=0).reshape(1,-1)#[0] 172 | trn_mean = tmp.mean(axis=0).reshape(1,-1)#[0] 173 | trn_std = tmp.std(axis=0).reshape(1,-1)#[0] 174 | print(trn_mean) 175 | print(trn_std) 176 | 177 | bs = args.batch_size 178 | trn_dl = create_loaders(train_dl, bs, jobs=1) 179 | val_dl = create_loaders(val_dl, 4096, jobs=1) 180 | 181 | 182 | 183 | def update_lr(optimizer, lr): 184 | for g in optimizer.param_groups: 185 | g['lr'] = lr 186 | 187 | import time 188 | 189 | 190 | 191 | 192 | hidden_size = args.hidden_size 193 | nlayers = args.nlayers 194 | embedding_size = args.hidden_size 195 | dropout = args.drop 196 | 197 | def load_checkpoint(filepath, train = False): 198 | checkpoint = torch.load(filepath) 199 | model = checkpoint['model'] 200 | model.load_state_dict(checkpoint['state_dict']) 201 | 202 | if train : 203 | for parameter in model.parameters(): 204 | parameter.requires_grad = True 205 | model.train() 206 | else : 207 | for parameter in model.parameters(): 208 | parameter.requires_grad = False 209 | model.eval() 210 | return model 211 | 212 | criterion = nn.MSELoss() 213 | trn_mean = torch.tensor(trn_mean).float()#.to(device).float() 214 | trn_std = torch.tensor(trn_std).float() 215 | bias = torch.tensor([torch.mean((y_train[:,:,k]-trn_mean[0,k])/trn_std[0,k]) for k in range(3)]).to(device) 216 | model = VGRU_ED(input_dim=3,emb_dim = hidden_size, hidden_dim=hidden_size, output_dim=3, n_layers=nlayers, drop_prob=dropout, mean_val = trn_mean, std_val = trn_std, criterion = criterion, init_bias = bias, length_seq = args.timestep_pred, weight_kl = args.wgt) 217 | nb_params = model.number_of_parameters() 218 | print(nb_params) 219 | lr = 1e-2 220 | optimizer = optim.Adam(model.parameters(), lr=lr)#, betas=(0.9, 0.95), eps=1e-08) 221 | # criterion = nn.MS#nn.CrossEntropyLoss(weight = class_weights).to(device) 222 | 223 | 224 | if torch.cuda.device_count() > 1: 225 | print("Let's use", torch.cuda.device_count(), "GPUs!") 226 | model = nn.DataParallel(model) 227 | model.to(device) 228 | 229 | 230 | 231 | #create folder 232 | dir_path = f"VAE_{rt}"#folder_models + '/' 233 | os.makedirs(dir_path) 234 | dir_path = dir_path + '/' 235 | 236 | #save the model architecture 237 | f = open(dir_path+"model_parameters.txt", "a") 238 | f.write(str(model.state_dict)) 239 | f.close() 240 | 241 | # #save the log 242 | f = open(dir_path+"log_loss.txt", "a") 243 | # f.write(str(model.state_dict)) 244 | f.close() 245 | 246 | PATH = "model.pth" 247 | 248 | #save the args 249 | f = open(dir_path+"args.txt", "w+") 250 | f.write(str(args)) 251 | f.close() 252 | 253 | #which optimizer 254 | f = open(dir_path+"optim.txt", "w+") 255 | f.write(str(optimizer)) 256 | f.close() 257 | 258 | t0 = time.time() 259 | 260 | 261 | all_trn_mape_track = [] 262 | all_val_mape_track = [] 263 | 264 | trn_mape_track = [] 265 | val_mape_track = [] 266 | 267 | 268 | j = 0 #indicator used to load the model 269 | k = 0 270 | best_mape = 10000 271 | step = 0 272 | epoch_stop = np.zeros(3) 273 | #save some useful informations 274 | infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\ Sequence length : {maxlen} \ 275 | \\ Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \ 276 | \\ Optimizer learning rate : {lr} \\ Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 277 | f = open(dir_path+"training_readme.txt", "w+") 278 | f.write(infos_model) 279 | f.close() 280 | 281 | 282 | 283 | for l_r in [lr, 1e-3, 1e-4] : 284 | 285 | 286 | 287 | if j != 0 : 288 | f = open(dir_path+"log_loss.txt", "a") 289 | f.write("Load the model...") 290 | f.write("\n") 291 | f.close() 292 | 293 | model = load_checkpoint(dir_path+PATH, train = True) 294 | 295 | if torch.cuda.device_count() > 1: 296 | print("Let's use", torch.cuda.device_count(), "GPUs!") 297 | model = nn.DataParallel(model) 298 | model.to(device) 299 | 300 | 301 | 302 | checkpoint = torch.load(dir_path+PATH) 303 | optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08) 304 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 305 | update_lr(optimizer, l_r) 306 | best_mape = checkpoint['best_mape'] 307 | trn_mape_track = checkpoint['mape'] 308 | val_mape_track = checkpoint['val_mape'] 309 | epoch_stop = checkpoint['epoch_stop'] 310 | 311 | j = j+1 312 | 313 | # TRAINING 314 | f = open(dir_path+"log_loss.txt", "a") 315 | f.write("Begin training." + "\n") 316 | f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr'])) 317 | f.write("\n") 318 | f.close() 319 | 320 | 321 | 322 | 323 | for epoch in range(args.epochs): 324 | model.train() 325 | # patience = patience-1 326 | t1 = time.time() 327 | loss = 0 328 | trn_mape = 0 329 | 330 | 331 | for i, data in enumerate(trn_dl): 332 | 333 | 334 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 335 | optimizer.zero_grad() 336 | loss = model(X_train_batch, y_train_batch)[1] 337 | loss.backward() 338 | optimizer.step() 339 | 340 | 341 | 342 | # Eval phase 343 | model.eval() 344 | with torch.no_grad() : 345 | train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 346 | trn_mape_track.append(train_mape) 347 | all_trn_mape_track.append(train_mape) 348 | 349 | 350 | val_mape = model.eval_mape(val_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 351 | val_mape_track.append(val_mape) 352 | all_val_mape_track.append(val_mape) 353 | 354 | 355 | f = open(dir_path+"log_loss.txt", "a") 356 | f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}, val mape : {val_mape:2.2%}') 357 | f.write("\n") 358 | f.close() 359 | 360 | 361 | if val_mape < best_mape : 362 | #trials = 0 363 | best_mape = val_mape#.item() 364 | epoch_stop[j:] = k 365 | 366 | f = open(dir_path+"log_loss.txt", "a") 367 | f.write(f'Epoch {epoch+1} best model saved with mape: {val_mape:2.2%}') 368 | f.write("Save the model...") 369 | f.write("\n") 370 | f.close() 371 | 372 | checkpoint = {'model': model, 373 | 'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track, 374 | 'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop, 375 | 'optimizer_dic' : optimizer.state_dict()} 376 | torch.save(checkpoint, dir_path+PATH) 377 | 378 | k = k+1 379 | 380 | 381 | 382 | for l_r in [1e-5] : 383 | 384 | f = open(dir_path+"log_loss.txt", "a") 385 | f.write("Load the model...") 386 | f.write("\n") 387 | f.close() 388 | 389 | model = load_checkpoint(dir_path+PATH, train = True) 390 | 391 | if torch.cuda.device_count() > 1: 392 | print("Let's use", torch.cuda.device_count(), "GPUs!") 393 | model = nn.DataParallel(model) 394 | model.to(device) 395 | 396 | 397 | 398 | checkpoint = torch.load(dir_path+PATH) 399 | optimizer = optim.Adam(model.parameters(), lr=l_r)#, betas=(0.9, 0.95), eps=1e-08) 400 | optimizer.load_state_dict(checkpoint['optimizer_dic']) 401 | update_lr(optimizer, l_r) 402 | best_mape = 10000#checkpoint['best_mape'] 403 | trn_mape_track = checkpoint['mape'] 404 | val_mape_track = checkpoint['val_mape'] 405 | epoch_stop = checkpoint['epoch_stop'] 406 | 407 | j = j+1 408 | 409 | # TRAINING 410 | f = open(dir_path+"log_loss.txt", "a") 411 | f.write("Begin training (full set)." + "\n") 412 | f.write('Learning rate adjusted to {:0.7f}'.format(optimizer.param_groups[0]['lr'])) 413 | f.write("\n") 414 | f.close() 415 | 416 | print(f'Creating data loaders with batch size: {bs}') 417 | trn_dl = create_loaders(full_train_dl, bs, jobs=1)#4*args.num_gpus) 418 | #trn_dl_eval = create_loaders(full_train_dl, 4096*4, jobs=1)#4*args.num_gpus) 419 | torch.cuda.empty_cache() 420 | 421 | 422 | for epoch in range(args.epochs): 423 | model.train() 424 | t1 = time.time() 425 | loss = 0 426 | trn_mape = 0 427 | 428 | 429 | for i, data in enumerate(trn_dl): 430 | X_train_batch, y_train_batch = data[0].to(device),data[1].to(device).float() 431 | optimizer.zero_grad() 432 | loss = model(X_train_batch, y_train_batch)[1] 433 | loss.backward() 434 | optimizer.step() 435 | 436 | model.eval() 437 | with torch.no_grad() : 438 | train_mape = model.eval_mape(trn_dl)#torch.mean(torch.abs((pred_train-y_train.to(device))/y_train.to(device)))#.item() 439 | trn_mape_track.append(train_mape) 440 | all_trn_mape_track.append(train_mape) 441 | 442 | 443 | f = open(dir_path+"log_loss.txt", "a") 444 | f.write(f'Epoch {epoch+1}/{args.epochs} in {time.time()-t1}s, mape : {train_mape:2.2%}') 445 | f.write("\n") 446 | f.close() 447 | 448 | if train_mape < best_mape : 449 | #trials = 0 450 | best_mape = train_mape#.item() 451 | epoch_stop[j:] = k 452 | 453 | f = open(dir_path+"log_loss.txt", "a") 454 | f.write(f'Epoch {epoch+1} best model saved with mape: {train_mape:2.2%}') 455 | f.write("Save the model...") 456 | f.write("\n") 457 | f.close() 458 | 459 | checkpoint = {'model': model, 460 | 'mape': trn_mape_track, 'val_mape' : val_mape_track, 'all_mape' : all_trn_mape_track , 'all_val_mape' : all_val_mape_track, 461 | 'state_dict': model.state_dict(), 'best_mape' : best_mape, 'epoch_stop' : epoch_stop, 462 | 'optimizer_dic' : optimizer.state_dict()} 463 | torch.save(checkpoint, dir_path+PATH) 464 | 465 | 466 | 467 | 468 | #save some useful informations 469 | infos_model = f'Number of training aircraft components : {len(np.unique(data_train.ID))} \\ Number of validation aircraft components : {len(np.unique(data_val.ID))} \\ Sequence length : {maxlen} \ 470 | \\ Number of training samples : {X_train.shape[0]} \\ Number of validation samples : {X_val.shape[0]} \\ Number of epochs : {k+1} \ 471 | \\ Optimizer learning rate : {lr} \\ Running time in minutes : {(time.time()-t0)/60} \\ Nb model parameters : {model.number_of_parameters()}' 472 | f = open(dir_path+"training_readme.txt", "w+") 473 | f.write(infos_model) 474 | f.close() 475 | 476 | #del model and empty cache 477 | del(model) 478 | torch.cuda.empty_cache() 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | -------------------------------------------------------------------------------- /utils/utils_ft.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.optim as optim 3 | from torch.nn import functional as F 4 | from torch.utils.data import DataLoader, Dataset 5 | from torch.autograd import Variable 6 | import torch.nn as nn 7 | from torch.utils.data import TensorDataset, DataLoader 8 | from torch.nn.modules import ModuleList 9 | from torch.nn.modules.normalization import LayerNorm 10 | 11 | import pickle 12 | import pandas as pd 13 | import datetime 14 | import matplotlib.pyplot as plt 15 | from IPython.display import display 16 | import os 17 | import argparse 18 | import random 19 | import tqdm 20 | import time 21 | import numpy as np 22 | from time import time 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | def load_checkpoint(filepath, train = False): 35 | checkpoint = torch.load(filepath) 36 | model = checkpoint['model'] 37 | model.load_state_dict(checkpoint['state_dict']) 38 | 39 | if train : 40 | for parameter in model.parameters(): 41 | parameter.requires_grad = True 42 | model.train() 43 | else : 44 | for parameter in model.parameters(): 45 | parameter.requires_grad = False 46 | model.eval() 47 | return model 48 | 49 | 50 | def set_parameter_requires_grad(model, feature_extracting): 51 | if feature_extracting: 52 | for param in model.parameters(): 53 | param.requires_grad = False 54 | 55 | 56 | 57 | #prepare forecasting data 58 | def gen_RUL_sequence(id_df, seq_length, seq_cols, type_data = 'Input', ind_start = 0): 59 | data_array = id_df[seq_cols].values 60 | num_elements = data_array.shape[0] 61 | if type_data == 'Input' : 62 | for start, stop in zip(range(0+ind_start, num_elements-seq_length+1), range(seq_length+ind_start, num_elements+1)): 63 | yield data_array[start:stop, :] 64 | else : 65 | for start, stop in zip(range(0+ind_start, num_elements-seq_length+1), range(seq_length+ind_start, num_elements+1)): 66 | yield data_array[stop-1, :] 67 | 68 | 69 | 70 | def seq_preprocess(data, sequence_length, seq_cols_in, seq_cols_out, type_set = 'Train', num_type = 'float') : 71 | 72 | 73 | 74 | #generate sequences and convert to numpy array 75 | 76 | if type_set == 'Test' : 77 | dbX = [data[data['ID']==id][seq_cols_in].values[-sequence_length:] for id in data['ID'].unique()] 78 | dbX = np.asarray(dbX) 79 | dbY = [data[data['ID']==id][seq_cols_out].values[-1] for id in data['ID'].unique()] 80 | dbY = np.asarray(dbY) 81 | 82 | else : 83 | seq_gen = (list(gen_RUL_sequence(data[data['ID']==id], sequence_length, seq_cols_in, type_data= 'Input')) for id in data['ID'].unique()) 84 | dbX = np.concatenate(list(seq_gen)) 85 | seq_gen = (list(gen_RUL_sequence(data[data['ID']==id], sequence_length, seq_cols_out, type_data= 'Output')) for id in data['ID'].unique()) 86 | dbY = np.concatenate(list(seq_gen)).reshape(-1,) 87 | 88 | print(dbX.shape) 89 | print(dbY.shape) 90 | 91 | 92 | 93 | 94 | 95 | 96 | print('Preparing datasets') 97 | if num_type =='float' : 98 | torch_type = torch.float 99 | Y = torch.tensor(dbY, dtype=torch.float)#.to(device) 100 | elif num_type =='long' : 101 | torch_type = torch.long 102 | Y = torch.tensor(dbY, dtype=torch.long)#.to(device) 103 | 104 | X = torch.tensor(dbX, dtype=torch.float)#.to(device) 105 | 106 | print('Preparing datasets') 107 | 108 | X_torch = torch.tensor(X, dtype=torch.float) 109 | y_torch = torch.tensor(Y, dtype=torch_type) 110 | 111 | 112 | 113 | return X_torch, y_torch 114 | --------------------------------------------------------------------------------