├── models ├── __init__.py └── RULPrediction │ ├── __init__.py │ ├── BiGRU_TSAM.py │ ├── ResBlockModel.py │ ├── experiments.py │ ├── CNN_GRU.py │ ├── SimpleModels.py │ ├── DAMCNN.py │ ├── MLPMixer.py │ ├── ContrastiveModules.py │ └── IMDSSN.py ├── dataset ├── __init__.py ├── utils.py └── cmapss.py ├── train ├── __init__.py ├── functions.py └── trainable.py ├── .assets ├── FSGRI.png ├── layer_structure.png └── model_structure.png ├── LICENSE └── readme.md /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dataset/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | from .trainable import TrainableModule 2 | from .functions import * 3 | -------------------------------------------------------------------------------- /.assets/FSGRI.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fuen1590/PhmDeepLearningProjects/HEAD/.assets/FSGRI.png -------------------------------------------------------------------------------- /.assets/layer_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fuen1590/PhmDeepLearningProjects/HEAD/.assets/layer_structure.png -------------------------------------------------------------------------------- /.assets/model_structure.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fuen1590/PhmDeepLearningProjects/HEAD/.assets/model_structure.png -------------------------------------------------------------------------------- /models/RULPrediction/__init__.py: -------------------------------------------------------------------------------- 1 | from .CNN_GRU import CnnGru 2 | from .ContrastiveModules import ContrastiveModel, MSEContrastiveLoss 3 | from .ResBlockModel import ResNet 4 | from .DAMCNN import DAMCNN 5 | -------------------------------------------------------------------------------- /train/functions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def precision(): 6 | pass 7 | 8 | 9 | def accuracy(): 10 | pass 11 | 12 | 13 | def recall(): 14 | pass 15 | 16 | 17 | def confusion_matrix(): 18 | pass 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 fuen1590 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /dataset/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import sklearn.metrics as me 3 | 4 | from torch.utils.data import Dataset 5 | 6 | 7 | class Sampler: 8 | """ 9 | This class is used for customize yourself sampling method used for dataset. 10 | 11 | This class is first used in cmapss.CMAPSS dataset, and will be supported more custom dataset sampling. 12 | 13 | When customizing your own sampler, you should: 14 | 15 | 1. override the sample(index) method. The sample(index) method should return the sample and label similar to 16 | torch.utils.data.Dataset class. 17 | 18 | 2. Making sure your __init__(dataset) method containing the sampling target argument "dataset". 19 | The argument "dataset" should be a torch.utils.data.Dataset instance. 20 | And call the super.__init__(dataset) at the first line in your __init__ method. 21 | """ 22 | def __init__(self, dataset: Dataset): 23 | self.dataset = dataset 24 | 25 | def sample(self, index: int): 26 | raise NotImplementedError("You must define the Sampler.sample(index) method.") 27 | 28 | 29 | def compute_metrics(path): 30 | out = np.load(path+r"/model_test_output_part1.npy") 31 | label = np.load(path+r"/model_test_labels_part1.npy") 32 | mse = me.mean_squared_error(out, label) 33 | mape = me.mean_absolute_percentage_error(out, label) 34 | print("MSE:{}".format(mse)) 35 | print("MAPE:{}".format(mape)) 36 | print("R2:{}".format(me.r2_score(out, label))) 37 | return (out, label), mse, mape 38 | 39 | 40 | def count_parameters(model): 41 | return sum(p.numel() for p in model.parameters() if p.requires_grad) 42 | 43 | 44 | def gaussian_distribution(x: int or np.ndarray, mean, std): 45 | l1 = 1/((2*np.pi)**0.5 * std) 46 | l2 = np.exp(-((x-mean)**2)/(2*std**2)) 47 | return l1*l2 48 | 49 | -------------------------------------------------------------------------------- /models/RULPrediction/BiGRU_TSAM.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | from ContrastiveModules import ContrastiveModel, pn_rul_compute 5 | 6 | """ 7 | Implementation of https://doi.org/10.1016/j.ress.2021.108297 8 | """ 9 | 10 | 11 | class TSAM(nn.Module): 12 | def __init__(self, window_size, in_features): 13 | super(TSAM, self).__init__() 14 | # self.layers = nn.ModuleList() 15 | # for _ in range(window_size): 16 | # self.layers.append(nn.Sequential( 17 | # nn.Linear(in_features, 1), 18 | # nn.Sigmoid() 19 | # )) 20 | self.layers = nn.Sequential( 21 | nn.Linear(in_features, 1), 22 | nn.Sigmoid() 23 | ) 24 | self.window_size = window_size 25 | 26 | self.softmax = nn.Softmax(dim=-1) 27 | 28 | def forward(self, x): 29 | # x.shape = (b, t, f) 30 | _, t, f = x.shape 31 | assert t == self.window_size 32 | f = [] 33 | for i in range(t): 34 | # f.append(self.layers[i](x[:, i, :])) # (b, 1) 35 | f.append(self.layers(x[:, i, :])) # (b, 1) 36 | f = torch.concat(f, dim=-1) # (b, t) 37 | f = self.softmax(f) # (b, t) 38 | f = f.unsqueeze(dim=-1) * x 39 | return f 40 | 41 | 42 | class BiGRU_TSAM(ContrastiveModel): 43 | def __init__(self, window_size, in_features, filter_size, 44 | model_flag="BiGRU_TSAM", label_norm=True, device="cuda:0"): 45 | super(BiGRU_TSAM, self).__init__(model_flag=model_flag, label_norm=label_norm, device=device) 46 | if filter_size > 0: 47 | window_size = window_size // filter_size 48 | self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size) 49 | else: 50 | window_size = window_size 51 | self.MaV = None 52 | self.tsam = TSAM(window_size=window_size, in_features=in_features) 53 | self.gru = nn.GRU(input_size=in_features, hidden_size=256, num_layers=3, 54 | bidirectional=True, batch_first=True) 55 | self.linear = nn.Sequential( 56 | nn.Linear(in_features=512, out_features=64), 57 | nn.LeakyReLU(), 58 | nn.Linear(in_features=64, out_features=128), 59 | nn.LeakyReLU(), 60 | ) 61 | self.output = nn.Sequential( 62 | nn.Linear(in_features=128, out_features=1) 63 | ) 64 | self.to(device) 65 | 66 | def feature_extractor(self, x): 67 | if self.MaV: 68 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 69 | x = self.tsam(x) 70 | _, x = self.gru(x) 71 | x = torch.concat([x[-1], x[-2]], dim=-1) 72 | return self.linear(x) 73 | 74 | def forward(self, x, label=None): 75 | if len(x.shape) < 4: 76 | x = self.feature_extractor(x) 77 | return self.output(x) 78 | else: 79 | f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label) 80 | return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, weight 81 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Supervised Contrastive Learning based Dual-Mixer Model for Remaining Useful Life Prediction 2 | This repository is the implementation for the paper Supervised Contrastive Learning based Dual-Mixer Model for Remaining Useful Life Prediction. 3 | 4 | ***Our paper has been accepted by *Reliability Engineering & System Safety* and is now available online: https://www.sciencedirect.com/science/article/pii/S0951832024004708*** 5 | 6 | The paper proposes a novel progressive feature fusion model called **Dual-path Mixer Model (Dual-Mixer)** for RUL prediction, 7 | as well as a feature constraint method based on supervised contrastive learning called **Feature Space Global Relationship Invariance (FSGRI)**. 8 | 9 | ## Dual-Mixer Model 10 | The Dual-Mixer Model is constructed based on multi-layer MLP. 11 | 12 | The core module of this model is the Dual-path Mixer Layer, which implements a progressive feature fusion process layer by layer: 13 | 14 | ![Layer Structure](.assets/layer_structure.png "Dual-Mixer Layer") 15 | 16 | ## FSGRI 17 | FSGRI is a constraint method in feature space for the RUL prediction task. It expands the degradation relationship 18 | from the original data space to the feature space and is applicable to most deep learning RUL prediction methods: 19 | 20 | ![FSGRI](.assets/FSGRI.png "FSGRI") 21 | 22 | ## Quick Start 23 | 24 | ### Requirements 25 | torch >= 2.0 26 | matplotlib >= 3.7.0 27 | numpy >= 1.24.3 28 | pandas >= 2.0.0 29 | scikit-learn >= 1.3.0 30 | 31 | ### Dataset 32 | Our experiments are base on the CMAPSS dataset [1]. 33 | 34 | The raw data could be downloaded from: https://github.com/schwxd/LSTM-Keras-CMAPSS/tree/master/C-MAPSS-Data. 35 | 36 | After downloading, place the data files train_FD00X.txt, test_FD00X.txt, 37 | and RUL_FD00X.txt in the raw_data/ folder (or any other folder you specify). 38 | 39 | The dataset/cmapss.py contains all the code for preprocessing dataset, including the Gaussian Threshold Sampling method proposed 40 | in this paper. 41 | 42 | ### Train 43 | Simply run : 44 | > python ./models/RULPrediction/experiments.py 45 | 46 | This will execute the training process of the Dual-Mixer on the FD004 dataset. The training results are restored in 47 | the train/*model_flag* folder, where *model_flag* is generated automatically. 48 | 49 | If you placed the dataset in a folder other than raw_data/, you need to modify the parameter 50 | *cmapss.DEFAULT_ROOT* on **line 56** in the *experiments.py* file: 51 | > train, test, val, scalar = cmapss.get_data(cmapss.DEFAULT_ROOT, ... 52 | 53 | to: 54 | > train, test, val, scalar = cmapss.get_data(your_data_path, ... 55 | 56 | You can edit experiments.py to modify different training parameters, such as Batch Size (***bs***), ***mixer_layer_num***, etc. 57 | 58 | The ***net*** object in experiments.py can be changed to the following models: 59 | > MLPMixer / BiGRU_TSAM / IMDSSN et al. 60 | 61 | More details can be found in their respective code files. 62 | 63 | ## Citiation 64 | If you find our work useful, please cite our paper as follows: 65 | ``` 66 | @article{FU2024110398, 67 | title = {Supervised contrastive learning based dual-mixer model for Remaining Useful Life prediction}, 68 | journal = {Reliability Engineering & System Safety}, 69 | pages = {110398}, 70 | year = {2024}, 71 | issn = {0951-8320}, 72 | doi = {https://doi.org/10.1016/j.ress.2024.110398}, 73 | author = {En Fu and Yanyan Hu and Kaixiang Peng and Yuxin Chu}, 74 | } 75 | ``` 76 | ## References 77 | 1. Abhinav Saxena, Kai Goebel, Don Simon, and Neil Eklund. Damage propagation modeling for aircraft engine run-to-failure simulation. In 78 | 2008 International Conference on Prognostics and Health Management. IEEE, 2008. 79 | -------------------------------------------------------------------------------- /models/RULPrediction/ResBlockModel.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from sklearn import manifold 5 | 6 | import dataset.cmapss as cmapss 7 | from models.RULPrediction.ContrastiveModules import ContrastiveModel, MSEContrastiveLoss, pn_rul_compute 8 | 9 | 10 | class ResNet(ContrastiveModel): 11 | def __init__(self, in_features, window_size, 12 | model_flag="ContrastiveResNet", device="cuda"): 13 | super(ResNet, self).__init__(model_flag=model_flag, device=device, label_norm=True) 14 | # if window_size > 1000: 15 | # window_size = window_size // 32 16 | # self.MaV = nn.AvgPool1d(kernel_size=32, stride=32) 17 | # else: 18 | # window_size = window_size 19 | # self.MaV = None 20 | self.tsne = None 21 | self.visual_samples = None 22 | self.embedding = [] 23 | self.epoch_num = 0 24 | self.conv = nn.Conv1d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1) 25 | self.norm1 = nn.BatchNorm1d(64) 26 | self.norm2 = nn.BatchNorm1d(128) 27 | self.norm3 = nn.BatchNorm1d(128) 28 | self.res1 = nn.Sequential( 29 | nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), 30 | nn.ReLU(), 31 | nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1), 32 | ) 33 | self.res_con1 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=2, stride=2) 34 | self.res2 = nn.Sequential( 35 | nn.Conv1d(in_channels=64, out_channels=64, kernel_size=2, stride=2), 36 | nn.ReLU(), 37 | nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1), 38 | ) 39 | self.res_con2 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=2, stride=2) 40 | self.res3 = nn.Sequential( 41 | nn.Conv1d(in_channels=128, out_channels=128, kernel_size=2, stride=2), 42 | nn.ReLU(), 43 | nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1), 44 | ) 45 | self.flatten = nn.Flatten(start_dim=-2, end_dim=-1) 46 | self.dense = nn.Sequential( 47 | nn.Linear(in_features=128 * ((window_size // 2) // 2), out_features=64), 48 | nn.Dropout(), 49 | nn.Linear(in_features=64, out_features=1) 50 | ) 51 | self.to(device) 52 | 53 | def forward(self, x, labels=None): 54 | # x.shape=(batch, num, window, features) 55 | # labels.shape=(batch, num) 56 | if len(x.shape) == 4: # if len(shape) == 4, use the contrastive computing process 57 | assert labels is not None 58 | batch, num, w, f = x.shape 59 | x = x.view(batch, num, f, w) 60 | feature_pos, feature_pos_aug, feature_neg, neg_weights = self.generate_contrastive_samples(x, labels) 61 | return pn_rul_compute(self.dense, feature_pos, feature_neg), feature_pos, feature_pos_aug, feature_neg, w 62 | else: # if len(shape) == 3, use the regression computing process 63 | batch, w, f = x.shape 64 | x = x.view(batch, f, w) 65 | feature = self.feature_extractor(x) 66 | out = self.dense(feature) 67 | return out 68 | 69 | def feature_extractor(self, x): 70 | # if self.MaV: 71 | # x = self.MaV(x) 72 | x1 = self.conv(x) 73 | x2 = self.res1(x1) + x1 74 | x3 = self.res_con1(x2) + self.res2(x2) 75 | x4 = self.res_con2(x3) + self.res3(x3) 76 | flat = self.flatten(x4) 77 | return flat 78 | 79 | def set_visual_samples(self, samples): 80 | """ 81 | Sets the visualization samples used in epoch_start. 82 | 83 | :param samples: (batch, len, features) 84 | :return: 85 | """ 86 | self.visual_samples = samples 87 | self.visual_samples = torch.transpose(self.visual_samples, -1, -2) 88 | self.tsne = manifold.TSNE(n_components=2, random_state=2023) 89 | -------------------------------------------------------------------------------- /models/RULPrediction/experiments.py: -------------------------------------------------------------------------------- 1 | import dataset.cmapss as cmapss 2 | import models.RULPrediction as rul 3 | from dataset.utils import compute_metrics 4 | 5 | import torch 6 | import numpy as np 7 | from SimpleModels import * 8 | from MLPMixer import MLPMixer, DualMLPMixer 9 | from BiGRU_TSAM import BiGRU_TSAM 10 | from IMDSSN import IMDSSN 11 | 12 | 13 | def train_model(model, train_set, test_set, val_set, model_flag, 14 | batch_size, visual_sample=None, contra=True): 15 | Loss = "InfoNCE" if contra else "" 16 | model.flag = model_flag 17 | net = model 18 | net.set_visual_samples(visual_sample) 19 | net.prepare_data(train_set, test_set, val_set, batch_size=batch_size, num_workers=0, 20 | eval_shuffle=False) 21 | net.train_model(epoch=100, 22 | lr=1e-3, 23 | criterion=rul.MSEContrastiveLoss(contrastive=Loss) if contra else torch.nn.MSELoss(), 24 | optimizer="adam", 25 | # lr_lambda=lambda x: 10 ** -(x // 15), 26 | early_stop=5, 27 | show_batch_loss=False) 28 | return net 29 | 30 | 31 | def train_cmapss(model: rul.ContrastiveModel, 32 | window_size, 33 | neg_samples, 34 | batch_size, 35 | subset: cmapss.Subset, 36 | exp_time, 37 | contra=True, 38 | label_norm=True): 39 | threshold = 125 40 | batch_size = batch_size // neg_samples if contra else batch_size 41 | # batch_size = batch_size 42 | # batch_size = 256 43 | Loss = "InfoNCE" if contra else "" 44 | model_flag = "RUL-{}-norm{}-w{}-batch{}-thresh{}-{}-neg{}-{}". \ 45 | format(model.flag, 46 | 1 if label_norm else 0, 47 | window_size, 48 | batch_size, 49 | threshold, 50 | subset.value, 51 | neg_samples - 1 if contra else 0, 52 | # "GSampler", 53 | exp_time) 54 | train, test, val, scalar = cmapss.get_data(cmapss.DEFAULT_ROOT, 55 | subset, 56 | window_size=window_size, 57 | slide_step=1, 58 | sensors=cmapss.DEFAULT_SENSORS, 59 | rul_threshold=threshold, 60 | label_norm=label_norm, 61 | val_ratio=0.2) 62 | if contra: 63 | cmapss.CmapssGaussianNegativeSampler(train, neg_samples, thresh=0.5, std=0.3) 64 | visual_samples = torch.tensor(train.data[np.where(train.ids == 1)], dtype=torch.float32).to(model.device) 65 | model = train_model(model=model, train_set=train, test_set=test, val_set=val, 66 | model_flag=model_flag, batch_size=batch_size, visual_sample=visual_samples, contra=contra) 67 | return model 68 | 69 | 70 | if __name__ == '__main__': 71 | length = 30 72 | step_size = 1 # a step size to construct training samples 73 | negs = 5 # the number of negative samples if using FSGRI 74 | bs = 1024 75 | dataset = cmapss.Subset.FD004 # a enum object, see detail in cmapss.Subset. 76 | device = "cuda:0" # which device, 'cpu', 'cuda', 'cuda:*' 77 | exp_ti = 1 # experiment count, using to construct a model_flag 78 | contra_training = False # if using FSGRI 79 | label_norm = True # if True, the RUL label will be in [0, 1], else [0, number of cycles] 80 | # filter_size = 0 81 | 82 | # Dual-Mixer only 83 | mixer_layer_num = 6 84 | hidden_dim = 32 85 | dropout = 0 86 | net = DualMLPMixer(window_size=length, 87 | in_features=len(cmapss.DEFAULT_SENSORS), 88 | hidden_dim=hidden_dim, 89 | num_layers=mixer_layer_num, 90 | dropout=dropout, 91 | device=device, model_flag=f"MLPDualMixer-h{hidden_dim}-{mixer_layer_num}", label_norm=label_norm, 92 | filter_size=0) 93 | 94 | net = train_cmapss(net, length, negs, bs, dataset, exp_time=exp_ti, contra=contra_training, label_norm=label_norm, ) 95 | -------------------------------------------------------------------------------- /models/RULPrediction/CNN_GRU.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | 5 | import models.RULPrediction.ContrastiveModules 6 | from dataset import cmapss 7 | from ContrastiveModules import ContrastiveModel 8 | 9 | 10 | class CnnGru(ContrastiveModel): 11 | """ 12 | A model proposed by https://doi.org/10.1109/TIM.2022.3227956. 13 | """ 14 | def __init__(self, 15 | in_features, 16 | window_size, 17 | filter_size, 18 | label_norm=False, 19 | model_flag="CnnGru", 20 | device="cuda",): 21 | super(CnnGru, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm) 22 | if filter_size > 0: 23 | window_size = window_size // filter_size 24 | self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size) 25 | else: 26 | window_size = window_size 27 | self.MaV = None 28 | self.convs = nn.Sequential( 29 | nn.Conv1d(in_channels=window_size, out_channels=16, kernel_size=10, stride=1, padding="same"), 30 | nn.ReLU(), 31 | nn.MaxPool1d(kernel_size=2, stride=2), 32 | nn.Conv1d(in_channels=16, out_channels=32, kernel_size=10, stride=1, padding="same"), 33 | nn.ReLU(), 34 | nn.MaxPool1d(kernel_size=2, stride=2), 35 | nn.Conv1d(in_channels=32, out_channels=64, kernel_size=10, stride=1, padding="same"), 36 | nn.ReLU(), 37 | nn.MaxPool1d(kernel_size=2, stride=2), 38 | nn.Flatten(start_dim=-2, end_dim=-1), 39 | nn.Linear(in_features=64 * (((in_features // 2) // 2) // 2), out_features=256) 40 | ) 41 | self.grus = nn.GRU(input_size=in_features, hidden_size=128, num_layers=3, batch_first=True, 42 | bidirectional=True) 43 | self.linears = nn.Sequential( 44 | nn.Linear(in_features=256, out_features=64), 45 | nn.Dropout(), 46 | nn.ReLU(), 47 | nn.Linear(in_features=64, out_features=128), 48 | nn.Dropout(), 49 | nn.Linear(in_features=128, out_features=1) 50 | ) 51 | self.to(device) 52 | 53 | def forward(self, x, label=None): 54 | if len(x.shape)<4: 55 | fea = self.feature_extractor(x) 56 | return self.linears(fea) 57 | else: 58 | assert label is not None 59 | feature_pos, feature_pos_aug, feature_neg, neg_weights = self.generate_contrastive_samples(x, label) 60 | out_all = self.linears(feature_pos) 61 | neg_nums = feature_neg.shape[1] 62 | neg_out = [] 63 | for neg_i in range(neg_nums): 64 | neg_out.append(self.linears(feature_neg[:, neg_i])) 65 | neg_out = torch.concat(neg_out, dim=-1) 66 | return torch.concat([out_all, neg_out], dim=-1), feature_pos, feature_pos_aug, feature_neg, neg_weights 67 | 68 | def feature_extractor(self, x): 69 | # x.shape = (batch, length, features) 70 | # batch, l, f = x.shape 71 | if self.MaV: 72 | print(x.shape) 73 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 74 | print(x.shape) 75 | x_conv = x 76 | x_conv = self.convs(x_conv) 77 | _, x_grus = self.grus(x) # (batch, length, 256) 78 | x_grus = torch.concat([x_grus[-1], x_grus[-2]], dim=-1) 79 | fea = x_conv+x_grus 80 | return fea 81 | 82 | 83 | if __name__ == '__main__': 84 | from dataset.cmapss import Cmapss 85 | window_size = 30 86 | threshold = 125 87 | neg_num = 5 88 | batch_size = 1024//(neg_num-1) 89 | # batch_size = 1024 90 | subset = cmapss.Subset.FD003 91 | Loss = "InfoNCE" 92 | model_flag = "RUL-1DCNN_GRU-w{}-batch{}-thresh{}-{}-neg{}-2-{}". \ 93 | format(window_size, 94 | batch_size, 95 | threshold, 96 | subset.value, 97 | neg_num-1, 98 | Loss) 99 | train, test, val, scalar = cmapss.get_data(cmapss.DEFAULT_ROOT, 100 | subset, 101 | window_size=window_size, 102 | slide_step=1, 103 | sensors=cmapss.DEFAULT_SENSORS, 104 | rul_threshold=threshold, 105 | label_norm=True, 106 | val_ratio=0.2) 107 | net = CnnGru(len(cmapss.DEFAULT_SENSORS), window_size, model_flag, device="cuda:1") 108 | visual_samples = torch.tensor(train.data[np.where(train.ids == 1)], dtype=torch.float32, device="cuda:1") 109 | net.set_visual_samples(visual_samples) 110 | sampler = cmapss.CmapssPiecewiseNegativeSampler(train, engine_num=1, interval_num=neg_num) 111 | # sampler = cmapss.CmapssRandomNegtiveSampler(train, neg_num=neg_num, sample_thresh=0.05) 112 | net.prepare_data(train, test, val, batch_size=batch_size, num_workers=2) 113 | net.train_model(epoch=100, 114 | lr=0.0001, 115 | # criterion=nn.MSELoss(), 116 | criterion=models.RULPrediction.ContrastiveModules.MSEContrastiveLoss(contrastive=Loss), 117 | early_stop=10, 118 | lr_lambda=lambda epoch: 10 ** -(epoch // 15)) 119 | -------------------------------------------------------------------------------- /models/RULPrediction/SimpleModels.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from ContrastiveModules import ContrastiveModel, pn_rul_compute 6 | from train.trainable import TrainableModule 7 | 8 | """ 9 | Input shape: (batch, w, f0) 10 | Feature shape: (batch, f1) 11 | Output shape: (batch, 1) 12 | """ 13 | 14 | 15 | class LSTMNet(ContrastiveModel): 16 | def __init__(self, window_size, 17 | in_features, 18 | hidden_dim=256, 19 | label_norm=False, model_flag="LSTM", device="cuda:0"): 20 | super(LSTMNet, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm) 21 | if window_size > 1000: 22 | window_size = window_size // 32 23 | self.MaV = nn.AvgPool1d(kernel_size=32, stride=32) 24 | else: 25 | window_size = window_size 26 | self.MaV = None 27 | self.lstm = nn.LSTM(input_size=in_features, hidden_size=hidden_dim, num_layers=3, 28 | batch_first=True, dropout=0.4) 29 | # self.lstm1 = LSTM(input_size=in_features, hidden_size=256, dropout=0.4, device=device) 30 | # self.lstm2 = LSTM(input_size=256, hidden_size=256, dropout=0.4, device=device) 31 | # self.lstm3 = LSTM(input_size=256, hidden_size=256, dropout=0.4, device=device) 32 | self.linear = nn.Sequential(nn.Linear(in_features=hidden_dim, out_features=1)) 33 | self.to(device) 34 | 35 | def feature_extractor(self, x): 36 | if self.MaV: 37 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 38 | _, (ht, _) = self.lstm(x) 39 | # out, _ = self.lstm1(x) 40 | # out, _ = self.lstm2(out) 41 | # _, (ht, _) = self.lstm3(out) 42 | return ht[-1] 43 | # return ht 44 | 45 | def forward(self, x, label=None): 46 | if len(x.shape) < 4: 47 | x = self.feature_extractor(x) 48 | return self.linear(x) 49 | else: 50 | f_pos, f_apos, f_neg, weights = self.generate_contrastive_samples(x, label) 51 | return pn_rul_compute(self.linear, f_pos, f_neg), f_pos, f_apos, f_neg, weights 52 | 53 | 54 | class LSTM(nn.Module): 55 | def __init__(self, input_size, hidden_size, dropout=0.4, device="cuda:0"): 56 | super().__init__() 57 | self.wf = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size) 58 | self.wi = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size) 59 | self.wc = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size) 60 | self.wo = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size) 61 | self.dropout = nn.Dropout(dropout) if dropout > 0 else None 62 | self.hidden_size = hidden_size 63 | self.to(device) 64 | 65 | def forward(self, x): 66 | # x.shape=(batch, l, f) 67 | b, l, f = x.shape 68 | h = torch.zeros((b, self.hidden_size)).to(x.device) 69 | c = torch.zeros((b, self.hidden_size)).to(x.device) 70 | outputs = [] 71 | for i in range(l): 72 | ft = F.sigmoid(self.wf(torch.concat([x[:, i, :], h], dim=-1))) 73 | it = F.sigmoid(self.wi(torch.concat([x[:, i, :], h], dim=-1))) 74 | c_ = F.tanh(self.wc(torch.concat([x[:, i, :], h], dim=-1))) 75 | c = ft * c + it * c_ 76 | ot = F.sigmoid(self.wo(torch.concat([x[:, i, :], h], dim=-1))) 77 | h = ot * F.tanh(c) 78 | h = self.dropout(h) if self.dropout is not None else h 79 | outputs.append(h) 80 | return torch.stack(outputs, dim=1), (h, c) 81 | 82 | 83 | class MLP(ContrastiveModel): 84 | def __init__(self, 85 | window_size, 86 | in_features, 87 | filter_size=0, 88 | hidden_dim=256, 89 | label_norm=False, 90 | model_flag="MLP", device="cuda:0"): 91 | super(MLP, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm) 92 | if filter_size > 0: 93 | window_size = window_size // filter_size 94 | self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size) 95 | else: 96 | window_size = window_size 97 | self.MaV = None 98 | self.features_layer_1 = nn.Sequential( 99 | nn.Linear(in_features, hidden_dim), 100 | nn.GELU(), 101 | nn.Dropout(), 102 | nn.Linear(hidden_dim, hidden_dim), 103 | nn.GELU(), 104 | nn.Dropout(), 105 | ) 106 | self.temporal_layer_1 = nn.Sequential( 107 | nn.Linear(window_size, hidden_dim), 108 | nn.GELU(), 109 | nn.Dropout(), 110 | nn.Linear(hidden_dim, hidden_dim), 111 | nn.GELU(), 112 | nn.Dropout(), 113 | ) 114 | self.features_layer_2 = nn.Sequential( 115 | nn.Linear(hidden_dim, hidden_dim//2), 116 | nn.GELU(), 117 | nn.Dropout(), 118 | nn.Linear(hidden_dim//2, hidden_dim//8), 119 | nn.GELU(), 120 | nn.Dropout(), 121 | ) 122 | self.temporal_layer_2 = nn.Sequential( 123 | nn.Linear(hidden_dim, hidden_dim//2), 124 | nn.GELU(), 125 | nn.Dropout(), 126 | nn.Linear(hidden_dim//2, hidden_dim//8), 127 | nn.GELU(), 128 | nn.Dropout(), 129 | ) 130 | self.linear = nn.Sequential(nn.Dropout(), 131 | nn.Linear(in_features=(hidden_dim//8)**2, out_features=1)) 132 | self.to(device) 133 | 134 | def forward(self, x, label=None): 135 | if len(x.shape) < 4: 136 | x = self.feature_extractor(x) 137 | return self.linear(x) 138 | else: 139 | f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label) 140 | return pn_rul_compute(self.linear, f_pos, f_neg), f_pos, f_apos, f_neg, weight 141 | 142 | def feature_extractor(self, x): 143 | if self.MaV: 144 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 145 | # x.shape = (b, t, f) 146 | ff = self.features_layer_1(x) # (b, t, f) 147 | tf = self.temporal_layer_1(ff.transpose(-1, -2)) 148 | ff = self.features_layer_2(tf.transpose(-1, -2)) 149 | tf = self.temporal_layer_2(ff.transpose(-1, -2)) 150 | f = torch.flatten(tf, -2, -1) 151 | return f 152 | 153 | -------------------------------------------------------------------------------- /models/RULPrediction/DAMCNN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from models.RULPrediction.ContrastiveModules import ContrastiveModel 4 | 5 | """ 6 | 10.1109/TIM.2022.3210933 7 | """ 8 | 9 | 10 | class channel_attn(nn.Module): 11 | # input_size:(N, C, H, W) 12 | # output_size:(N, C, 1, 1) 13 | def __init__(self, window_size=8192, features=2): 14 | super().__init__() 15 | self.max_pool = nn.MaxPool2d([1, features]) # (2, 8192, 1, 2) -> (2, 8192, 1, 1) 16 | self.avg_pool = nn.AvgPool2d([1, features]) # (2, 8192, 1, 2) -> (2, 8192, 1, 1) 17 | self.channel_attn_fc1 = nn.Linear(window_size, window_size) # (2, 8192, 1, 1) -> (2, 8192, 1, 1) 18 | self.channel_attn_fc2 = nn.Linear(window_size, window_size) # (2, 8192, 1, 1) -> (2, 8192, 1, 1) 19 | self.window_size = window_size 20 | 21 | def forward(self, x): 22 | max_pool_x = self.max_pool(x).squeeze() 23 | avg_pool_x = self.avg_pool(x).squeeze() 24 | max_pool_x = self.channel_attn_fc1(max_pool_x) 25 | max_pool_x = self.channel_attn_fc2(max_pool_x) 26 | avg_pool_x = self.channel_attn_fc1(avg_pool_x) 27 | avg_pool_x = self.channel_attn_fc2(avg_pool_x) 28 | x = torch.sigmoid(max_pool_x + avg_pool_x) 29 | x = x.reshape(-1, self.window_size, 1, 1) 30 | return x 31 | 32 | 33 | class temp_attn(nn.Module): 34 | # input_size:(N, C, H, W) 35 | # output_size:(N, 1, H, W) 36 | def __init__(self): 37 | super().__init__() 38 | self.conv = nn.Conv2d(2, 1, 3, padding=1) 39 | 40 | def forward(self, x): 41 | max_pool_x, _ = torch.max(x, dim=1, keepdim=True) 42 | avg_pool_x = torch.mean(x, dim=1, keepdim=True) 43 | x = torch.cat((avg_pool_x, max_pool_x), dim=1) 44 | x = torch.sigmoid(self.conv(x)) 45 | return x 46 | 47 | 48 | class CBAM(nn.Module): 49 | # input_size:(N, C, H, W) 50 | # output_size:(N, C, H, W) 51 | def __init__(self, window_size, features): 52 | super().__init__() 53 | self.channel_attn = channel_attn(window_size, features) 54 | self.temp_attn = temp_attn() 55 | 56 | def forward(self, x): 57 | channel_x = self.channel_attn(x) 58 | x = channel_x * x 59 | temp_x = self.temp_attn(x) 60 | x = temp_x * x 61 | return x 62 | 63 | 64 | class MSCNN(nn.Module): 65 | # input_size: (N, window_size, 1, features) 66 | # output_size: (N, 512, 1, 32) 67 | def __init__(self, window_size, features): 68 | super().__init__() 69 | self.window_size = window_size 70 | self.features = features 71 | self.conv1 = nn.Sequential(nn.Conv1d(self.features, 8, 1, 10 if self.window_size % 10 == 0 else 16), 72 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 73 | nn.BatchNorm1d(8)) 74 | self.conv2 = nn.Sequential(nn.Conv1d(self.features, 8, 3, 10 if self.window_size % 10 == 0 else 16), 75 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 76 | nn.BatchNorm1d(8)) 77 | self.conv3 = nn.Sequential(nn.Conv1d(self.features, 8, 5, 10 if self.window_size % 10 == 0 else 16), 78 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 79 | nn.BatchNorm1d(8)) 80 | self.conv4 = nn.Sequential(nn.Conv1d(self.features, 8, 7, 10 if self.window_size % 10 == 0 else 16), 81 | nn.AvgPool2d(kernel_size=3, stride=1, padding=1), 82 | nn.BatchNorm1d(8)) 83 | 84 | def forward(self, x): 85 | x = x.reshape(x.size(0), x.size(-1), -1) 86 | x1 = self.conv1(x) 87 | x2 = self.conv2(x) 88 | x3 = self.conv3(x) 89 | x4 = self.conv4(x) 90 | x = torch.cat((x1, x2, x3, x4), dim=1) 91 | return x 92 | 93 | 94 | class PRED(nn.Module): 95 | def __init__(self): 96 | super().__init__() 97 | self.conv = nn.Conv2d(64, 1, 1) 98 | self.lstm = nn.LSTM(1, 32, batch_first=True) 99 | self.dense = nn.Linear(32, 1) 100 | 101 | def forward(self, x): 102 | x = self.conv(x) # (N, 1, 1, 128) 103 | x = x.contiguous().view(x.size(0), -1, 1) 104 | x, _ = self.lstm(x) 105 | x = x[:, -1, :].contiguous().view(x.size(0), -1) 106 | x = self.dense(x) 107 | return x 108 | 109 | 110 | class DAMCNN(ContrastiveModel): 111 | def __init__(self, window_size, features, 112 | label_norm=False, model_flag="Model", device="cuda:0"): 113 | super().__init__(model_flag=model_flag, device=device, label_norm=label_norm) 114 | self.cbam = CBAM(window_size=window_size, features=features) 115 | self.mscnn = MSCNN(window_size=window_size, features=features) 116 | self.conv1 = nn.Sequential(nn.Conv1d(32, 32, 3, 1, 1), 117 | nn.BatchNorm1d(32), 118 | nn.ReLU(), 119 | nn.Conv1d(32, 32, 3, 1, 1), 120 | nn.BatchNorm1d(32), 121 | nn.ReLU(), 122 | nn.AvgPool1d(2, 2)) 123 | self.conv2 = nn.Sequential(nn.Conv1d(32, 64, 3, 1, 1), 124 | nn.BatchNorm1d(64), 125 | nn.ReLU(), 126 | nn.Conv1d(64, 64, 3, 1, 1), 127 | nn.BatchNorm1d(64), 128 | nn.ReLU(), 129 | nn.AvgPool1d(2, 2)) 130 | self.conv = nn.Conv2d(64, 1, 1) 131 | # prediction layers 132 | self.conv_2 = nn.Conv2d(64, 1, 1) 133 | self.lstm_2 = nn.LSTM(1, 32, batch_first=True) 134 | self.dense = nn.Linear(32, 1) 135 | self.to(device) 136 | 137 | def forward(self, x, label=None): 138 | if len(x.shape) < 4: 139 | feature = self.feature_extractor(x) 140 | out = self.dense(feature) # (N, 1) 141 | return out 142 | else: 143 | assert label is not None 144 | pos, pos_aug, neg, weights = self.generate_contrastive_samples(x, label) 145 | out_all = self.dense(pos) 146 | neg_nums = neg.shape[1] 147 | neg_out = [] 148 | for neg_i in range(neg_nums): 149 | neg_out.append(self.dense(neg[:, neg_i])) 150 | neg_out = torch.concat(neg_out, dim=-1) 151 | return torch.concat([out_all, neg_out], dim=-1), pos, pos_aug, neg, weights 152 | 153 | def feature_extractor(self, x): 154 | x = torch.unsqueeze(x, -2) 155 | x = self.cbam(x) # (N, 8192, 1, 2) 156 | x = self.mscnn(x) # (N, 32, 512) 157 | x = self.conv1(x) # (N, 32, 256) 158 | x = self.conv2(x) # (N, 64, 128) 159 | x = x.unsqueeze(2) 160 | 161 | x = self.conv_2(x) # (N, 1, 1, 128) 162 | x = x.contiguous().view(x.size(0), -1, 1) 163 | x, _ = self.lstm_2(x) 164 | x = x[:, -1, :].contiguous().view(x.size(0), -1) 165 | return x 166 | 167 | def epoch_start(self): 168 | super(DAMCNN, self).epoch_start() 169 | -------------------------------------------------------------------------------- /models/RULPrediction/MLPMixer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from ContrastiveModules import ContrastiveModel, pn_rul_compute 5 | 6 | 7 | class MLPBlock(nn.Module): 8 | def __init__(self, 9 | in_features, 10 | hidden_dim, 11 | out_features, 12 | dropout=0.5, 13 | device="cuda:0"): 14 | super(MLPBlock, self).__init__() 15 | self.block = nn.Sequential( 16 | nn.Linear(in_features=in_features, out_features=hidden_dim), 17 | nn.GELU(), 18 | nn.Dropout(dropout), 19 | nn.Linear(in_features=hidden_dim, out_features=out_features), 20 | nn.Dropout(dropout) 21 | ) 22 | self.to(device) 23 | 24 | def forward(self, x): 25 | return self.block(x) 26 | 27 | 28 | class GatedAttention(nn.Module): 29 | def __init__(self, hidden_dim, dim=-1, device="cuda:0"): 30 | super(GatedAttention, self).__init__() 31 | self.encoder = nn.Sequential( 32 | nn.Linear(in_features=hidden_dim, out_features=hidden_dim), 33 | # nn.Softmax(dim=dim), 34 | nn.Sigmoid() 35 | ) 36 | self.weights = None 37 | self.to(device) 38 | 39 | def forward(self, x): 40 | weights = self.encoder(x) 41 | self.weights = weights 42 | return torch.mul(weights, x) 43 | 44 | 45 | class MLPLayer(nn.Module): 46 | def __init__(self, in_features, hidden_dim, out_features, device="cuda:0"): 47 | super(MLPLayer, self).__init__() 48 | self.mlp = MLPBlock(in_features=in_features, hidden_dim=hidden_dim, out_features=out_features, device=device) 49 | self.gat = GatedAttention(hidden_dim=out_features, device=device) 50 | self.to(device) 51 | 52 | def forward(self, x): 53 | f = self.mlp(x) 54 | f = self.gat(f) 55 | return f 56 | 57 | 58 | class MixerLayer(nn.Module): 59 | def __init__(self, in_features, hidden_dim, device="cuda:0"): 60 | super(MixerLayer, self).__init__() 61 | self.time_mixer = MLPLayer(in_features=hidden_dim, hidden_dim=hidden_dim*2, out_features=hidden_dim, 62 | device=device) 63 | self.feature_mixer = MLPLayer(in_features=in_features, hidden_dim=in_features*2, out_features=in_features, 64 | device=device) 65 | self.to(device) 66 | 67 | def forward(self, x): 68 | # x.shape = (b, h, f) 69 | x = x.transpose(-1, -2) 70 | f = self.time_mixer(x) + x # (b, f, h) 71 | f = f.transpose(-1, -2) 72 | f = self.feature_mixer(f) + f # (b, h, f) 73 | return f 74 | 75 | 76 | class MLPMixer(ContrastiveModel): 77 | def __init__(self, window_size, in_features, hidden_dim, num_layers, filter_size=0, 78 | device="cuda:0", model_flag="TSMixer", label_norm=True): 79 | super(MLPMixer, self).__init__(device=device, label_norm=label_norm, model_flag=model_flag) 80 | if filter_size > 0: 81 | window_size = window_size // filter_size 82 | self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size) 83 | else: 84 | window_size = window_size 85 | self.MaV = None 86 | self.window_size = window_size 87 | self.in_features = in_features 88 | self.input_embedding = nn.Linear(in_features=window_size, out_features=hidden_dim) 89 | self.layers = nn.Sequential() 90 | for _ in range(num_layers): 91 | self.layers.append(MixerLayer(in_features=in_features, hidden_dim=hidden_dim, device=device)) 92 | self.output = nn.Sequential( 93 | nn.Dropout(), 94 | nn.Linear(in_features=in_features*hidden_dim, out_features=1) 95 | ) 96 | self.to(device) 97 | 98 | def feature_extractor(self, x): 99 | if self.MaV: 100 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 101 | # x.shape = (b, w, f) 102 | emb = self.input_embedding(x.transpose(-1, -2)) # (b, f, h) 103 | f = self.layers(emb.transpose(-1, -2)) 104 | return torch.flatten(f, start_dim=-2, end_dim=-1) 105 | 106 | def forward(self, x, label=None): 107 | if len(x.shape) < 4: 108 | x = self.feature_extractor(x) 109 | return self.output(x) 110 | else: 111 | f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label) 112 | return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, weight 113 | 114 | 115 | class DualMLPLayer(nn.Module): 116 | def __init__(self, window_size, hidden_dim, dropout=0.5): 117 | super(DualMLPLayer, self).__init__() 118 | self.block1 = nn.Sequential( 119 | # nn.LayerNorm(normalized_shape=window_size, elementwise_affine=False), 120 | nn.Linear(in_features=window_size, out_features=window_size * 2), 121 | nn.GELU(), 122 | nn.Dropout(dropout), 123 | nn.Linear(in_features=window_size * 2, out_features=window_size), 124 | nn.Dropout(dropout), 125 | ) 126 | self.block2 = nn.Sequential( 127 | # nn.LayerNorm(normalized_shape=hidden_dim, elementwise_affine=False), 128 | nn.Linear(in_features=hidden_dim, out_features=hidden_dim * 2), 129 | nn.GELU(), 130 | nn.Dropout(dropout), 131 | nn.Linear(in_features=hidden_dim * 2, out_features=hidden_dim), 132 | nn.Dropout(dropout) 133 | ) 134 | self.ln1 = nn.LayerNorm(normalized_shape=window_size, elementwise_affine=True) 135 | self.ln2 = nn.LayerNorm(normalized_shape=hidden_dim, elementwise_affine=True) 136 | self.ln3 = nn.LayerNorm(normalized_shape=window_size, elementwise_affine=True) 137 | self.ln4 = nn.LayerNorm(normalized_shape=hidden_dim, elementwise_affine=True) 138 | self.gat_weights_1 = None 139 | self.gat_weights_2 = None 140 | self.gat1 = GatedAttention(hidden_dim=window_size, dim=-1) 141 | self.gat2 = GatedAttention(hidden_dim=hidden_dim, dim=-2) 142 | 143 | def forward(self, x1, x2): 144 | # x1.shape = (b, w, f), x2.shape = (b, w, f) 145 | x1 = x1.transpose(-1, -2) # x1.shape = (b, f, w) 146 | x1 = self.ln1(self.block1(x1) + x1) # x1.shape = (b, f, w) 147 | x2 = self.ln2(self.block2(x2) + x2) # x2.shape = (b, w, f) 148 | x1 = self.ln3(x1 + self.gat2(x2).transpose(-1, -2)) 149 | x2 = self.ln4(x2 + self.gat1(x1).transpose(-1, -2)) # x2.shape = (b, f, w) 150 | self.gat_weights_1 = self.gat1.weights 151 | self.gat_weights_2 = self.gat2.weights 152 | return x1.transpose(-1, -2), x2 153 | 154 | 155 | class DualMLPMixer(ContrastiveModel): 156 | def __init__(self, 157 | window_size, 158 | in_features, 159 | hidden_dim, 160 | num_layers, 161 | dropout=0.5, 162 | model_flag="MLPDualMixer", device="cuda:0", label_norm=True, 163 | filter_size=0): 164 | super(DualMLPMixer, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm) 165 | if filter_size > 0: 166 | window_size = window_size // filter_size 167 | self.MaV = nn.Conv1d(in_channels=in_features, out_channels=hidden_dim, kernel_size=filter_size, 168 | stride=filter_size) 169 | self.input_embedding = None 170 | else: 171 | window_size = window_size 172 | self.MaV = None 173 | self.input_embedding = nn.Linear(in_features=in_features, out_features=hidden_dim) 174 | 175 | self.layers = nn.ModuleList() 176 | for _ in range(num_layers): 177 | self.layers.append(DualMLPLayer(window_size=window_size, hidden_dim=hidden_dim, dropout=dropout)) 178 | self.out_gat1 = GatedAttention(hidden_dim=window_size) 179 | self.out_gat2 = GatedAttention(hidden_dim=hidden_dim, dim=-2) 180 | # self.fuse = nn.Linear(in_features=in_features*hidden_dim, out_features=768) 181 | self.output = nn.Sequential( 182 | # nn.Dropout(dropout), 183 | nn.Linear(in_features=hidden_dim*window_size, out_features=1) 184 | ) 185 | self.to(device) 186 | 187 | def feature_extractor(self, x): 188 | if self.MaV: 189 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 190 | self.hidden_out_1 = [] 191 | self.hidden_out_2 = [] 192 | # x.shape = (b, w, f) 193 | x = self.input_embedding(x) if self.input_embedding is not None else x # x.shape = (b, w, h) 194 | f1 = x 195 | f2 = x 196 | for l in self.layers: 197 | f1, f2 = l(f1, f2) 198 | f1 = self.out_gat1(f1.transpose(-1, -2)) 199 | f2 = self.out_gat2(f2) 200 | f = torch.flatten(f1.transpose(-1, -2) + f2, start_dim=-2, end_dim=-1) 201 | return f 202 | 203 | def forward(self, x, label=None): 204 | if len(x.shape) < 4: 205 | x = self.feature_extractor(x) 206 | return self.output(x) 207 | else: 208 | f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label) 209 | return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, weight 210 | 211 | def compute_loss(self, 212 | x: torch.Tensor, 213 | label: torch.Tensor, 214 | criterion) -> [torch.Tensor, torch.Tensor]: 215 | [loss, rul] = super(DualMLPMixer, self).compute_loss(x, label, criterion) 216 | return loss, rul 217 | -------------------------------------------------------------------------------- /models/RULPrediction/ContrastiveModules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import matplotlib.pyplot as plt 4 | import sklearn.manifold as manifold 5 | from train.trainable import TrainableModule 6 | from functools import wraps 7 | 8 | 9 | def pn_rul_compute(predictor, f_pos, f_neg): 10 | """ 11 | Used to compute Rul of the positive and negative samples. Because the Weighted Info 12 | NCE LOSS needs all the positive and negative rul to compute the final loss. 13 | 14 | :param predictor: The predictor layer 15 | :param f_pos: The positive samples with shape (batch, features) 16 | :param f_neg: The negative samples with shape (batch, nums, features), where nums indicates 17 | the number of negative samples. 18 | :return: All the rul with shape (batch, nums+1) 19 | """ 20 | out_all = predictor(f_pos) 21 | neg_nums = f_neg.shape[1] 22 | neg_out = [] 23 | for neg_i in range(neg_nums): 24 | neg_out.append(predictor(f_neg[:, neg_i])) 25 | neg_out = torch.concat(neg_out, dim=-1) 26 | return torch.concat([out_all, neg_out], dim=-1) 27 | 28 | 29 | class WeightedInfoNCELoss(nn.Module): 30 | def __init__(self, temperature=0.2): 31 | super(WeightedInfoNCELoss, self).__init__() 32 | self.temperature = temperature 33 | 34 | def forward(self, x, pos, neg, neg_weight=None): 35 | """ 36 | :param x: The input of the network with shape (batch, length, feature) or (batch, feature) 37 | :param pos: The positive samples of x with shape (batch, num_p, length, feature) or (batch, num_p, feature), 38 | where num is the number of the positive samples. 39 | :param neg: The negative samples of x with shape (batch, num_n, length, feature) 40 | :param neg_weight: The weight used for different negative samples with shape (batch, num_n). 41 | 42 | :return: A scalar of the contrastive loss. 43 | """ 44 | if len(x.shape) > 2: 45 | x = torch.flatten(x, 1) # (batch, feature) 46 | if len(pos.shape) > 2: 47 | pos = torch.flatten(pos, 2) # (batch, num_p, feature) 48 | if len(neg.shape) > 2: 49 | neg = torch.flatten(neg, 2) # (batch, num_n, feature) 50 | x = x.unsqueeze(dim=1) # (batch, 1, feature) 51 | pos_sim = torch.cosine_similarity(x, pos, dim=2) # positive samples similarity (batch, num_p) 52 | neg_sim = torch.cosine_similarity(x, neg, dim=2) # negative samples similarity (batch, num_n) 53 | if neg_weight is not None: 54 | neg_sim = torch.mul(neg_sim, neg_weight) 55 | nominator = torch.exp((torch.div(pos_sim, self.temperature))) # (batch, num_p) 56 | denominator = torch.exp( 57 | torch.div(torch.concat([pos_sim, neg_sim], dim=1), self.temperature) # (batch, num_p + num_n) 58 | ) 59 | nominator = nominator.sum(dim=-1) # (batch, ) 60 | denominator = denominator.sum(dim=-1) # (batch, ) 61 | loss = -torch.log(torch.mean(nominator / denominator)) 62 | return loss 63 | 64 | 65 | class MSEContrastiveLoss(nn.Module): 66 | def __init__(self, contrastive="InfoNCE"): 67 | super(MSEContrastiveLoss, self).__init__() 68 | self.mse = torch.nn.MSELoss() 69 | assert contrastive in ["InfoNCE", "Triplet"] 70 | if contrastive == "InfoNCE": 71 | self.contrastive = WeightedInfoNCELoss(0.2) 72 | elif contrastive == "Triplet": 73 | self.contrastive = TripletLoss() 74 | 75 | def forward(self, predict, label, x=None, pos=None, neg=None, neg_weight=None): 76 | if x is not None and pos is not None and neg is not None: 77 | # print(f"MSE:{self.mse(predict, label)}") 78 | # print(f"Contra:{self.contrastive(x, pos, neg, neg_weight)}") 79 | loss = self.mse(predict, label) + self.contrastive(x, pos, neg, neg_weight) 80 | else: 81 | loss = self.mse(predict, label) 82 | return loss 83 | 84 | 85 | class TripletLoss(nn.Module): 86 | def __init__(self): 87 | super(TripletLoss, self).__init__() 88 | 89 | def forward(self, x, pos, neg, neg_weight): 90 | """ 91 | 92 | :param x: Anchor samples with shape (b, f) 93 | :param pos: Positive sample with shape (b, f) 94 | :param neg: Negative sample with shape (b, n_n, f) 95 | :param neg_weight: Alpha for every negative samples with shape (b, n_n) 96 | :return: A scalar value of Triplet Loss. 97 | """ 98 | if neg_weight is None: 99 | raise RuntimeError("The neg_weight could not be None when using Triplet Loss.") 100 | x = torch.unsqueeze(x, dim=1) # (b, 1, f) 101 | pos = torch.unsqueeze(pos, dim=1) # (b, 1, f) 102 | pos_dis = torch.sum(torch.square(torch.subtract(x, pos)), 2) # (b, 1) 103 | neg_dis = torch.sum(torch.square(torch.subtract(x, neg)), 2) # (b, n) 104 | basic_loss = torch.add(torch.subtract(pos_dis, neg_dis), neg_weight) 105 | loss = torch.mean(torch.max(basic_loss, torch.zeros_like(basic_loss))) 106 | return loss 107 | 108 | 109 | class ContrastiveModel(TrainableModule): 110 | def __init__(self, label_norm, model_flag, device): 111 | super(ContrastiveModel, self).__init__(model_flag=model_flag, device=device) 112 | self.tsne = None 113 | self.visual_samples = None 114 | self.embedding = [] 115 | self.epoch_num = 0 116 | self.label_norm = label_norm 117 | 118 | def compute_loss(self, 119 | x: torch.Tensor, 120 | label: torch.Tensor, 121 | criterion) -> [torch.Tensor, torch.Tensor]: 122 | if len(x.shape) == 4: 123 | rul, f_pos, f_posa, f_neg, weights = self(x, label) 124 | loss = criterion(rul.to(self.device), label.to(self.device), f_pos, f_posa, f_neg, weights) 125 | else: 126 | rul = self(x) 127 | loss = criterion(rul.to(self.device), label.to(self.device)) 128 | return [loss, rul] 129 | 130 | def generate_contrastive_samples(self, x, labels): 131 | """ 132 | This method is used to provide a Contrastive Loss computing arguments. 133 | 134 | Note 135 | ---- 136 | This method is just used for the ContrastiveModule.MSEContrastiveLoss(). And you must override the 137 | feature_extractor() method to achieve the feature extracting process. 138 | 139 | :param x: x.shape = (batch, num, length, feature) 140 | :return: feature_pos, feature_pos_aug, feature_neg, neg_weights 141 | """ 142 | assert len(x.shape) == 4 143 | assert labels is not None 144 | batch, num, w, f = x.shape 145 | 146 | x_ = x.view(batch * num, w, f) 147 | pos = x[:, 0, :, :] 148 | mask = torch.normal(0, 0.15, (batch, w, f), device=pos.device) # random noise 149 | pos_aug = mask + pos 150 | all_features = self.feature_extractor(x_) 151 | feature_pos_aug = self.feature_extractor(pos_aug) 152 | features = all_features.view(batch, num, -1) 153 | feature_pos = features[:, 0] 154 | feature_neg = features[:, 1:] 155 | neg_weights = torch.abs(labels[:, 1:] - labels[:, 0:1]) * 2 156 | 157 | return feature_pos, feature_pos_aug, feature_neg, neg_weights 158 | 159 | def feature_extractor(self, x): 160 | """ 161 | Note 162 | ---- 163 | This method must be overridden to custom your own feature extracting process when you compute the contrastive 164 | loss by 165 | 166 | >>> self.generate_contrastive_samples(x, label) 167 | 168 | :param x: Input 169 | :return: tensors of feature 170 | """ 171 | raise NotImplementedError("The feature_extractor method must be implemented.") 172 | 173 | def forward(self, x, label=None): 174 | """ 175 | The forward method in contrastive models must have two parts: the one is normal 176 | forward process, and the other one is forward process with negative samples. 177 | 178 | Base Implamentation 179 | ---- 180 | 181 | >>> if len(x.shape) < 4: # the normal forward, default shape with (b, l, f) 182 | >>> x = self.feature_extractor(x) 183 | >>> return self.predictor(x) 184 | >>> else: # the forward with negative samples, default shape with (b, num, l, f) 185 | >>> f_pos, f_apos, f_neg, w = self.generate_contrastive_samples(x, label) 186 | >>> return pn_rul_compute(self.predictor, f_pos, f_neg), f_pos, f_apos, f_neg, w 187 | :return: rul, f_pos, f_apos, f_neg, w 188 | """ 189 | raise NotImplementedError("The forward method must be implemented.") 190 | 191 | def set_visual_samples(self, samples): 192 | """ 193 | Sets the visualization samples used in epoch_start. 194 | 195 | :param samples: (batch, len, features) 196 | :return: 197 | """ 198 | self.visual_samples = samples 199 | self.tsne = manifold.TSNE(n_components=2, random_state=2023) 200 | 201 | def epoch_start(self): 202 | if self.visual_samples is not None: 203 | print("Visualizing samples processing...") 204 | features = self.feature_extractor(self.visual_samples) 205 | features = features.cpu().detach().numpy().squeeze() 206 | embedding = self.tsne.fit_transform(features) 207 | self.embedding.append(embedding) 208 | # plt.figure(dpi=600) 209 | # plt.scatter(embedding[:, 0], embedding[:, 1], c=plt.cm.Spectral(range(len(embedding)))) 210 | # plt.title("Epoch:{}".format(self.epoch_num)) 211 | # plt.savefig(self.get_model_result_path()+"visual_embedding_{}.png".format(self.epoch_num)) 212 | self.epoch_num += 1 213 | else: 214 | print("Visualizing samples is None, ignored.") 215 | 216 | def train_end(self): 217 | plt.figure(dpi=600) 218 | plt.title("Total") 219 | index = 0 220 | emd_index = [0, len(self.embedding) // 2, len(self.embedding) - 1] 221 | for i in emd_index: 222 | plt.scatter(self.embedding[i][:, 0], self.embedding[i][:, 1], 223 | c=plt.cm.tab20(index), 224 | edgecolors=plt.cm.Wistia(range(len(self.embedding[i][:, 0]))), 225 | label="epoch: {}".format(i)) 226 | index += 1 227 | plt.legend() 228 | plt.savefig(self.get_model_result_path() + "total_embedding.png") 229 | 230 | -------------------------------------------------------------------------------- /train/trainable.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import matplotlib 4 | import matplotlib.pyplot as plt 5 | import torch 6 | import torch.nn as nn 7 | from torch.utils.data import Dataset, DataLoader 8 | import numpy as np 9 | 10 | import time 11 | 12 | root = os.path.dirname(__file__) 13 | 14 | 15 | def _check_path(path: str): 16 | if not os.path.exists(path): 17 | os.makedirs(path) 18 | 19 | 20 | class TrainableModule(nn.Module): 21 | """ 22 | The base module of Trainable Models. So call 'trainable' means the models can be trained by 23 | following method easily: 24 | 25 | >>> model.prepare_data(...) 26 | >>> model.train_model(...) 27 | """ 28 | 29 | def __init__(self, model_flag="model", device="cuda"): 30 | super(TrainableModule, self).__init__() 31 | self.eval_losses = None 32 | self.train_losses = None 33 | 34 | self.eval_loader = None 35 | self.test_loader = None 36 | self.train_loader = None 37 | 38 | self.optimizer = None 39 | self.criterion = None 40 | self.lr_schedular = None 41 | 42 | self.flag = model_flag 43 | self.device = device 44 | 45 | def prepare_data(self, 46 | train_set: Dataset, 47 | test_set: Dataset, 48 | eval_set: Dataset = None, 49 | batch_size: int = 256, 50 | num_workers: int = 8, 51 | eval_shuffle=True): 52 | self.train_loader = DataLoader(train_set, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True) 53 | self.test_loader = DataLoader(test_set, batch_size, shuffle=False, num_workers=num_workers) 54 | if eval_set is not None: 55 | self.eval_loader = DataLoader(eval_set, batch_size, shuffle=eval_shuffle, num_workers=num_workers) 56 | print("train size:{}".format(len(train_set))) 57 | print("test size:{}".format(len(test_set))) 58 | print("validate size:{}".format(len(eval_set))) 59 | _check_path(self.get_model_result_path()) 60 | 61 | def train_model(self, 62 | epoch: int, 63 | lr: float, 64 | criterion, 65 | optimizer: str = "adam", 66 | lr_lambda=None, 67 | early_stop=2, 68 | show_batch_loss=False): 69 | if self.train_loader is None: 70 | raise RuntimeError("The data_loader is None! Set the param data_loader not None or use " 71 | "model.prepare_data(Dataset, batch_size, num_workers) to provide the" 72 | "training data.") 73 | if optimizer == "adam": 74 | self.optimizer = torch.optim.Adam(lr=lr, params=self.parameters()) 75 | elif optimizer == "rms": 76 | self.optimizer = torch.optim.RMSprop(lr=lr, params=self.parameters()) 77 | elif optimizer == "sgd": 78 | self.optimizer = torch.optim.SGD(lr=lr, params=self.parameters()) 79 | else: 80 | raise RuntimeError("Unknown optimizer {}.".format(optimizer)) 81 | if lr_lambda is not None: 82 | self.lr_schedular = torch.optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda, last_epoch=-1) 83 | if early_stop is not None and early_stop > 0: 84 | mini_eval_loss = None 85 | patience = early_stop 86 | now_patience = 0 87 | self.criterion = criterion 88 | self.train_losses = [] 89 | self.eval_losses = [] 90 | print("Model flag: {}".format(self.flag)) 91 | print("Start training epoch {}".format(epoch)) 92 | 93 | # training 94 | start_time = time.time() 95 | self.train_start() # callback function 96 | for e in range(epoch): 97 | self.epoch_start() # callback function 98 | print("epoch: {}/{}".format(e + 1, epoch)) 99 | epoch_start_time = time.time() 100 | self.train() 101 | batch_losses = [] 102 | for step, (x, y) in enumerate(self.train_loader): 103 | x = x.to(torch.float32).to(self.device) 104 | y = y.to(torch.float32).to(self.device) 105 | loss, out = self.compute_loss(x, y, self.criterion) 106 | if step == 0 and e == epoch-1: 107 | np.save(self.get_model_result_path() + "train_x_batch{}".format(step), y.cpu().detach().numpy()) 108 | np.save(self.get_model_result_path() + "train_y_batch{}".format(step), x.cpu().detach().numpy()) 109 | if show_batch_loss: 110 | print("\tbatch: {}/{}, loss:{:.4f}".format(step + 1, len(self.train_loader), loss.item())) 111 | self.optimizer.zero_grad() 112 | loss.backward() 113 | self.optimizer.step() 114 | batch_losses.append(loss.item()) 115 | 116 | if self.lr_schedular is not None: 117 | self.lr_schedular.step() 118 | batch_loss = np.average(batch_losses) 119 | self.train_losses.append(batch_loss) 120 | batch_losses.clear() 121 | 122 | # evaluation 123 | if self.eval_loader is not None: 124 | self.eval() 125 | eval_losses = [] 126 | 127 | with torch.no_grad(): 128 | for step, (e_x, e_y) in enumerate(self.eval_loader): 129 | e_x = e_x.to(torch.float32).to(self.device) 130 | e_y = e_y.to(torch.float32) 131 | loss, _ = self.compute_loss(e_x, e_y, self.criterion) 132 | eval_losses.append(loss.item()) 133 | eval_loss = np.average(eval_losses) 134 | self.eval_losses.append(eval_loss) 135 | 136 | print("\ttraining loss: {:.4}\n \teval loss: {:.4} \tCurrent learning rate: {}". 137 | format(batch_loss, eval_loss, self.optimizer.state_dict()['param_groups'][0]['lr'])) 138 | else: 139 | print("\ttraining loss: {:.4}\n \tCurrent learning rate: {}". 140 | format(batch_loss, self.optimizer.state_dict()['param_groups'][0]['lr'])) 141 | 142 | print("\tEpoch time spent: %s s" % (time.time() - epoch_start_time)) 143 | # early stop 144 | if early_stop > 0: 145 | if mini_eval_loss is None: 146 | mini_eval_loss = eval_loss 147 | torch.save(self.state_dict(), self.get_model_result_path() + 'check_point.pt') 148 | continue 149 | if eval_loss >= mini_eval_loss: 150 | now_patience = now_patience + 1 151 | print("\tEarly Stopping Monitor: bigger eval loss, now patience score {}/{}" 152 | .format(now_patience, patience)) 153 | else: 154 | now_patience = 0 155 | mini_eval_loss = eval_loss 156 | print("\tEarly Stopping Monitor: smaller eval loss achieved, saving model...") 157 | torch.save(self.state_dict(), self.get_model_result_path() + 'check_point.pt') 158 | if now_patience >= patience: 159 | print("\tEarly Stopping in epoch {}".format(e)) 160 | self.load_state_dict(torch.load(self.get_model_result_path() + 'check_point.pt')) 161 | break 162 | self.epoch_end() # callback function 163 | end_time = time.time() 164 | self.train_end() # callback function 165 | print("Total time spent: %s s" % round(end_time - start_time, 2)) 166 | self.plot_losses() 167 | torch.save(self.state_dict(), self.get_model_result_path() + 'model.pt') 168 | self.test_model() 169 | 170 | def test_model(self): 171 | """ 172 | TODO: 修改结果切割保存的逻辑,目前的逻辑过于简陋,最好通过设备物理内存进行判断,决定切割大小 173 | """ 174 | self.test_start() # callback function 175 | output = None 176 | labels = None 177 | losses = [] 178 | self.eval() 179 | with torch.no_grad(): 180 | index = 1 181 | for step, (x, y) in enumerate(self.test_loader): 182 | x = x.to(torch.float32).to(self.device) 183 | y = y.to(torch.float32).to(self.device) 184 | loss, model_out = self.compute_loss(x, y, self.criterion) 185 | model_out = model_out.detach().cpu() 186 | y = y.detach().cpu() 187 | losses.append(loss.item()) 188 | output = torch.cat([output, model_out], dim=0) if output is not None else model_out 189 | labels = torch.cat([labels, y], dim=0) if labels is not None else y 190 | """ 191 | 此处进行预测结果和label的切割保存。为了节约加载时的内存占用,必须将结果切割成多个部分分别保存 192 | """ 193 | if output.numel() >= 40000000: # result cut 194 | print(output.numel()) 195 | print(labels.numel()) 196 | np.save(self.get_model_result_path() + "model_test_output_part{}".format(index), output.cpu().detach().numpy()) 197 | np.save(self.get_model_result_path() + "model_test_labels_part{}".format(index), labels.cpu().detach().numpy()) 198 | output, labels = None, None 199 | index += 1 200 | # output = torch.cat(output, dim=0) 201 | # labels = torch.cat(labels, dim=0) 202 | if output is not None: 203 | np.save(self.get_model_result_path() + "model_test_output_part{}".format(index), output.cpu().detach().numpy()) 204 | np.save(self.get_model_result_path() + "model_test_labels_part{}".format(index), labels.cpu().detach().numpy()) 205 | np.save(self.get_model_result_path() + "model_test_loss_part{}".format(index), np.average(losses)) 206 | self.test_end() # callback function 207 | 208 | def set_criterion(self, 209 | criterion): 210 | self.criterion = criterion 211 | 212 | def plot_losses(self, show=False): 213 | if self.train_losses is None or self.eval_losses is None: 214 | raise RuntimeWarning("The model is not trained by internal training method. " 215 | "You could call plot_losses(show=False) after training the model by:" 216 | ">>> model.prepare_data(...)" 217 | ">>> model.train_model(...)." 218 | "Tips: plot_losses(show=False) will not work if you train your model manually" 219 | "but not the above process.") 220 | if show: 221 | matplotlib.use("QtAgg") 222 | else: 223 | matplotlib.use("Agg") 224 | plt.suptitle("Model Loss") 225 | plt.plot(self.train_losses, label="training loss") 226 | plt.plot(self.eval_losses, label="evalidate loss") 227 | plt.xlabel("epoch") 228 | plt.ylabel("criterion loss") 229 | plt.legend() 230 | _check_path(self.get_model_result_path()) 231 | plt.savefig(self.get_model_result_path() + "train_eval_losses.png") 232 | plt.cla() 233 | if show: 234 | plt.show(block=True) 235 | 236 | def get_model_result_path(self): 237 | return root + "/model_result/" + self.flag + "/" 238 | 239 | def _criterion(self, y, label): 240 | if label.device.type == 'cpu': 241 | y = y.detach().cpu() 242 | label = label.detach().cpu() 243 | elif label.device.type != y.device.type: 244 | y = y.to(label.device) 245 | return self.criterion(y, label) 246 | 247 | def compute_loss(self, 248 | x: torch.Tensor, 249 | label: torch.Tensor, 250 | criterion) -> [torch.Tensor, torch.Tensor]: 251 | """ 252 | An overridable method for different process of loss computation. The default process is simple 253 | single output computation. This method should only be overrideen if custom loss computation is 254 | required when training the model by: 255 | >>> self.prepare_data(...) 256 | >>> self.train_model(...) 257 | 258 | return: must be a list containing [ loss, model_out ]. 259 | """ 260 | model_out = self(x) 261 | loss = criterion(model_out.to(self.device), label.to(self.device)) 262 | return [loss, model_out] 263 | 264 | def epoch_start(self): 265 | """ 266 | A callback function called before every training epoch starting. 267 | """ 268 | return 269 | 270 | def epoch_end(self): 271 | """ 272 | A callback function called after every training epoch finished. 273 | """ 274 | return 275 | 276 | def train_start(self): 277 | """ 278 | A callback function called before training process starting. 279 | """ 280 | return 281 | 282 | def train_end(self): 283 | """ 284 | A callback function called after training process finished. 285 | """ 286 | return 287 | 288 | def test_start(self): 289 | """ 290 | A callback function called before testing process starting. 291 | """ 292 | return 293 | 294 | def test_end(self): 295 | """ 296 | A callback function called after testing process finished. 297 | """ 298 | return 299 | -------------------------------------------------------------------------------- /dataset/cmapss.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import matplotlib.pyplot as plt 4 | import pandas as pd 5 | import numpy as np 6 | import sklearn.base 7 | import sklearn.preprocessing as pre 8 | import torch.utils.data 9 | from torch.utils.data import Dataset 10 | from enum import Enum 11 | from dataset.utils import Sampler, gaussian_distribution 12 | 13 | DEFAULT_ROOT = r"./raw_data/" 14 | 15 | DEFAULT_SENSORS = ["s_2", "s_3", "s_4", "s_7", "s_8", "s_9", "s_11", "s_12", "s_13", 16 | "s_14", "s_15", "s_17", "s_20", "s_21"] 17 | 18 | 19 | class Subset(Enum): 20 | FD001 = "FD001" 21 | FD002 = "FD002" 22 | FD003 = "FD003" 23 | FD004 = "FD004" 24 | 25 | 26 | class Cmapss(Dataset): 27 | """ 28 | The C-MAPSS dataset used for DataLoader. 29 | 30 | Notes 31 | ----- 32 | This class is supported to use dataset.utils.Sampler to customizing your own sampling method. If you do that, the 33 | sampling method when DataLoader calling __getitem__(index) will be changed. The Sampler could be indicated when 34 | you initialize Cmapss(... , sampler =...) or use method set_sampler(sampler = ...). 35 | """ 36 | 37 | def __init__(self, data: np.ndarray, ids: np.ndarray, labels: np.ndarray, sampler: Sampler = None): 38 | """ 39 | The dataset class used for DataLoader. 40 | 41 | :param data: The CMAPSS dataset samples. 42 | :param ids: The engine id of every sample. 43 | :param labels: The RUL label of every sample. 44 | """ 45 | self.data = data 46 | self.ids = ids 47 | self.labels = labels 48 | assert self.data.shape[0] == self.ids.shape[0] == self.labels.shape[0] 49 | self.__sampler = sampler if sampler is not None else None 50 | 51 | def __getitem__(self, item): 52 | if self.__sampler is not None: 53 | return self.__sampler.sample(item) 54 | else: 55 | return self.data[item], self.labels[item:item + 1] 56 | 57 | def __len__(self): 58 | return self.data.shape[0] 59 | 60 | def set_sampler(self, sampler: Sampler): 61 | self.__sampler = sampler 62 | 63 | def clear_sampler(self): 64 | self.__sampler = None 65 | 66 | def get_data_by_engine_id(self, engine_id): 67 | pass 68 | 69 | 70 | class CmapssPiecewiseNegativeSampler(Sampler): 71 | """ 72 | A Sampler used to construct a negative-positive pair to train a Contrastive Neural Network 73 | """ 74 | 75 | def __init__(self, dataset: Cmapss, engine_num=1, interval_num=4): 76 | """ 77 | :param dataset: The target dataset. 78 | :param engine_num: The number of sampling engine, should >= 1. The 'index' engine will be sampled at least. 79 | :param interval_num: The number of split intervals for one engine.This argument indicates the number of 80 | negative samples. 81 | """ 82 | super(CmapssPiecewiseNegativeSampler, self).__init__(dataset) 83 | dataset.set_sampler(self) 84 | self.ids = dataset.ids 85 | self.data = dataset.data 86 | self.labels = dataset.labels 87 | self.interval_nums = interval_num 88 | self.engine_num = engine_num 89 | 90 | def sample(self, index: int): 91 | engine_id = self.ids[index] 92 | engine_ids = np.random.choice(a=np.unique(self.ids), 93 | size=self.engine_num, 94 | replace=False) 95 | if engine_id not in engine_ids: 96 | engine_ids[0] = engine_id # 保证index所在的引擎被采样 97 | neg_samples = [0] * (self.interval_nums * self.engine_num) 98 | neg_labels = [0] * (self.interval_nums * self.engine_num) 99 | neg_ids = [0] * (self.interval_nums * self.engine_num) 100 | j = 1 # 负样本数组索引,负样本的数组从1开始存入负样本采样结果,因为0位置需要放入正样本 101 | # start sampling 102 | for engine in engine_ids: 103 | sample_indexes = np.argwhere(self.ids == engine) 104 | gap = sample_indexes.shape[0] // self.interval_nums 105 | for i in range(self.interval_nums): 106 | random_range_start = sample_indexes[0][0] + i * gap 107 | # 在最后一次循环内,保证采样边界到达同设备样本的最后一个下标,防止出现漏采 108 | random_range_end = random_range_start + gap \ 109 | if i != self.interval_nums - 1 else sample_indexes[-1][0] + 1 110 | if random_range_start <= index < random_range_end and engine == engine_id: 111 | continue 112 | sample_index = np.random.choice(range(random_range_start, random_range_end), 1, replace=True) 113 | neg_samples[j] = self.data[sample_index[0]] 114 | neg_labels[j] = self.labels[sample_index[0]] # n:n+1的方式保持label拥有最后一个维度 115 | neg_ids[j] = self.ids[sample_index[0]] 116 | j += 1 117 | # 最终数组的首位放入正样本 118 | neg_samples[0] = self.data[index] 119 | neg_labels[0] = self.labels[index] # n:n+1的方式来保持label拥有最后一个维度 120 | neg_ids[0] = engine_id # 用于测试,查看是否所有负样本与正样本来自同一个引擎 121 | return np.stack(neg_samples), np.array(neg_labels) 122 | 123 | 124 | class CmapssGaussianNegativeSampler(Sampler): 125 | def __init__(self, dataset: Cmapss, neg_num=5, thresh=0.2, std=1.): 126 | super(CmapssGaussianNegativeSampler, self).__init__(dataset) 127 | dataset.set_sampler(self) 128 | self.neg_num = neg_num - 1 129 | self.thresh = thresh 130 | self.std = std 131 | self.ids = dataset.ids 132 | self.data = dataset.data 133 | self.labels = dataset.labels 134 | import matplotlib.pyplot as plt 135 | 136 | def sample(self, index: int): 137 | engine_id = self.ids[index] 138 | sample_indexes = np.argwhere(self.ids == engine_id).squeeze() 139 | 140 | # 确定采样点在高斯分布的[-4,4]区间内的位置,高斯分布的中心点将根据采样点变化 141 | # 先将采样点转移到[0, 1],再通过*8-4转移到[-4, 4]区间内 142 | sample_mean = (index - sample_indexes.min()) / (sample_indexes.max() - sample_indexes.min()) 143 | sample_mean = sample_mean * 8 - 4 144 | 145 | # 去掉采样点周围Thresh个点不采样 146 | thresh_up = index + self.thresh / 2 * len(sample_indexes) 147 | thresh_down = index - self.thresh / 2 * len(sample_indexes) 148 | cut_sample_indexes = np.concatenate([sample_indexes[sample_indexes < thresh_down], 149 | sample_indexes[sample_indexes > thresh_up]]) 150 | length = len(cut_sample_indexes) # final sample indexes 151 | prob = gaussian_distribution(np.linspace(-4, 4, length), sample_mean, self.std) 152 | prob = torch.softmax(torch.tensor(prob), dim=0).numpy() 153 | results = np.random.choice(cut_sample_indexes, self.neg_num, replace=False, p=prob) 154 | neg_samples = [self.data[i] for i in results] 155 | neg_labels = [self.labels[i] for i in results] 156 | # prob_all = np.zeros(len(sample_indexes)) 157 | # for i in range(len(cut_sample_indexes)): 158 | # prob_all[sample_indexes == cut_sample_indexes[i]] = prob[i] 159 | # plt.title("index:{}".format(index)) 160 | # plt.plot(sample_indexes, prob_all) 161 | # plt.scatter(results, np.zeros(len(results)), c="red") 162 | # plt.grid() 163 | # plt.show() 164 | return np.stack(neg_samples), np.array(neg_labels) 165 | 166 | 167 | class CmapssRandomNegtiveSampler(Sampler): 168 | def __init__(self, dataset: Cmapss, neg_num=10, sample_thresh=0.2): 169 | super(CmapssRandomNegtiveSampler, self).__init__(dataset) 170 | dataset.set_sampler(self) 171 | self.neg_num = neg_num 172 | self.labels = dataset.labels 173 | self.data = dataset.data 174 | self.thresh = sample_thresh 175 | 176 | def sample(self, index: int): 177 | indexes = np.squeeze(np.argwhere(np.abs(self.labels - self.labels[index]) > self.thresh)) 178 | indexes = np.random.choice(a=indexes, size=self.neg_num + 1, replace=False) 179 | indexes[0] = index 180 | return self.data[indexes], self.labels[indexes] 181 | 182 | 183 | def generate_rul(df: pd.DataFrame, y_test: pd.DataFrame = None, normalize=False, threshold=0) -> pd.DataFrame: 184 | """ 185 | Generating RUL labels for original DataFrame. 186 | 187 | :param df: The CMAPSS DataFrame generated by get_data() methods. 188 | :param y_test: The DataFrame from RUL_FD00N.txt file. If not None, this method will process the df as training data, 189 | else this method will process the df as test data. 190 | :param normalize: Weather normalizing the RUL label to [0, 1]. 191 | :param threshold: Weather drop the RUL which bigger than the threshold. This argument will be processed earlier than 192 | normalize argument. Thus, if normalize = True, the dropped RUL will be 1. 193 | :return: A DataFrame contains RUL column with name "rul" and the maximum life cycle column with name "max_cycles". 194 | """ 195 | grouped = df.groupby(by="unit_nr") 196 | RUL_max = grouped["time_cycles"].max() 197 | if y_test is not None: 198 | y_test.index = RUL_max.index 199 | RUL_max = RUL_max + y_test[y_test.columns[0]] 200 | result = pd.merge(df, RUL_max.to_frame(name="max_cycles"), on="unit_nr") 201 | result["rul"] = result["max_cycles"] - result["time_cycles"] 202 | if threshold > 0: 203 | result.loc[result["rul"] > threshold, "rul"] = threshold 204 | result.loc[result["max_cycles"] > threshold, "max_cycles"] = threshold + 1 205 | if normalize: 206 | result["rul"] = (result["rul"] + 1) / result["max_cycles"] 207 | # result.drop("max_cycles", axis=1) 208 | return result 209 | 210 | 211 | def generate_window_sample(df: pd.DataFrame, window_size, slide_step, sensors): 212 | """ 213 | Transform the RULed DataFrame to window samples. 214 | 215 | :param df: The RULed DataFrame. 216 | :param window_size: The sample length. 217 | :param slide_step: Sampling step size. 218 | :param sensors: The sensors' data will be returned. If None, will return all the sensors' data. 219 | :return: [ndarray with window samples; ndarray with engine id for every window samples; ndarray with 220 | RUL labels for every window samples] 221 | """ 222 | engine_grouped = df.groupby(by="unit_nr") 223 | result = [] # engine sensor data 224 | engine_ids = [] # engine id 225 | labels = [] # rul labels 226 | for _, engine in list(engine_grouped): 227 | data = engine[sensors].values # shape = (n, f) 228 | if data.shape[0] < window_size: 229 | warnings.warn("The engine id {} with total length {} is shorter than window_size {}. " 230 | "Hence, these samples were dropped!".format(_, data.shape[0], window_size)) 231 | continue 232 | sample_nums = (data.shape[0] - window_size) // slide_step + 1 233 | s = [0] * sample_nums # temporal sensor data 234 | e = [0] * sample_nums # temporal engine data. To correspond with each sample. 235 | rul = [0] * sample_nums # temporal rul data. To correspond with each sample. 236 | engine_id = engine["unit_nr"].iloc[0] 237 | for j in range(len(s)): 238 | s[j] = data[j * slide_step:j * slide_step + window_size] 239 | e[j] = engine_id 240 | rul[j] = engine["rul"].iloc[ 241 | j * slide_step + window_size - 1] # The label is set to the last time stamp of the sample window. 242 | result.append(s) 243 | engine_ids.append(e) 244 | labels.append(rul) 245 | return np.concatenate(result, dtype=np.float64), \ 246 | np.concatenate(engine_ids, dtype=np.float64), \ 247 | np.concatenate(labels, dtype=np.float64) 248 | 249 | 250 | def get_data(path: str, subset: Subset, window_size: int, slide_step: int = 1, sensors: list = None, 251 | scaler: sklearn.base.TransformerMixin = pre.MinMaxScaler((-1, 1)), rul_threshold=0, label_norm=False, 252 | val_ratio=0.2): 253 | """ 254 | Return the training data, test data and validation data of C-MAPSS dataset. 255 | 256 | :param path: The root path of the C-MAPSS dataset. The cmapss.DEAFULT_ROOT is the default root path in server. 257 | :param subset: A enum indicated the subset. Should be the element of follows: [FD001, FD002, FD003, FD004] 258 | :param window_size: The sample length. 259 | :param slide_step: The sampling gap length, default 1. 260 | :param sensors: The sensor data will be returned. It should be from [s_1 ~ s_21]. If None, selecting all the 261 | sensor data. 262 | :param scaler: Used for normalizing the train and test data. It should be a sklearn scaler. 263 | :param rul_threshold: The rul threshold is applied to a piecewise linear RUL label function. If 0, will applied 264 | non-piecewise linear RUL label function. 265 | :param label_norm: Weather normalizing the RUL label to [0, 1]. 266 | :param val_ratio: The ratio of validation dataset. 267 | 268 | :return: train data set class (torch.utils.data.Dataset), 269 | test data set class (torch.utils.data.Dataset), 270 | val data set class (torch.utils.data.Dataset), 271 | Scaler (maybe) used to inverse transform the train data. 272 | 273 | Notes 274 | ----- 275 | This method could only process the original C-MAPSS data, which is formatted as: 276 | RUL_FD00X.txt/train_FD00X.txt/test_FD00X.txt 277 | """ 278 | # files 279 | train_file = 'train_' + subset.value + '.txt' 280 | test_file = 'test_' + subset.value + '.txt' 281 | # columns 282 | index_names = ['unit_nr', 'time_cycles'] 283 | setting_names = ['setting_1', 'setting_2', 'setting_3'] 284 | sensor_names = ['s_{}'.format(i + 1) for i in range(0, 21)] 285 | col_names = index_names + setting_names + sensor_names 286 | # data readout 287 | train = pd.read_csv((path + train_file), sep=r'\s+', header=None, 288 | names=col_names) 289 | test = pd.read_csv((path + test_file), sep=r'\s+', header=None, 290 | names=col_names) 291 | y_test = pd.read_csv((path + 'RUL_' + subset.value + '.txt'), sep=r'\s+', header=None, 292 | names=['RUL']) 293 | # generate rul label 294 | train = generate_rul(train, threshold=rul_threshold, normalize=label_norm) 295 | test = generate_rul(test, y_test, threshold=rul_threshold, normalize=label_norm) 296 | # split the val dataset from train set 297 | train, val = split_val_set(train, val_ratio) 298 | # normalization use train set (the normalization factors are all come from train set) 299 | assert isinstance(scaler, (pre.StandardScaler, pre.MinMaxScaler, pre.RobustScaler, pre.MaxAbsScaler)) 300 | sensors = sensor_names if sensors is None else sensors 301 | scaler.fit(train[sensors]) 302 | train[sensors] = scaler.transform(train[sensors]) 303 | test[sensors] = scaler.transform(test[sensors]) 304 | val[sensors] = scaler.transform(val[sensors]) 305 | 306 | if sensors is None or sensors == []: 307 | sensors = train.columns 308 | [train_data, train_ids, train_label] = generate_window_sample(train, window_size, slide_step, sensors) 309 | [val_data, val_ids, val_label] = generate_window_sample(val, window_size, slide_step, sensors) 310 | [test_data, test_ids, test_label] = generate_window_sample(test, window_size, slide_step, sensors) 311 | train_data = Cmapss(train_data, train_ids, train_label) 312 | test_data = Cmapss(test_data, test_ids, test_label) 313 | val_data = Cmapss(val_data, val_ids, val_label) 314 | return train_data, test_data, val_data, scaler 315 | 316 | 317 | def split_val_set(train_set: pd.DataFrame, val_size=0.2): 318 | """ 319 | This method is used to split the train_set to a train data and validation data. And normalize the validation data 320 | use the normalizing factors which are computed from train data. 321 | 322 | :param train_set: The data will be split. 323 | :param val_size: The validation data set ratio for train_set (Default 0.2). 324 | :return: train_data, val_data 325 | """ 326 | grouped = train_set.groupby(by="unit_nr") 327 | train_set_result = [] 328 | val_set_result = [] 329 | np.random.seed(2023) 330 | val_index = np.random.choice(range(1, len(grouped) + 1), int(len(grouped) * val_size), replace=False) 331 | if 1 in val_index: 332 | val_index = np.delete(val_index, np.argwhere(val_index == 1)) 333 | print(f"val_index:{val_index}") 334 | for i in range(1, len(grouped) + 1): 335 | data = train_set[train_set["unit_nr"] == i] 336 | if i in val_index: 337 | val_set_result.append(data) 338 | else: 339 | train_set_result.append(data) 340 | return pd.concat(train_set_result), pd.concat(val_set_result) 341 | 342 | 343 | if __name__ == '__main__': 344 | train1, test1, val1, scaler = get_data(DEFAULT_ROOT, 345 | Subset.FD004, 346 | window_size=40, 347 | slide_step=1, 348 | sensors=None, 349 | rul_threshold=0, 350 | label_norm=True, 351 | scaler=pre.MinMaxScaler(), 352 | val_ratio=0.1) 353 | sampler = CmapssGaussianNegativeSampler(train1, 5, std=0.3) 354 | loader = torch.utils.data.DataLoader(train1, 40, True) 355 | for _, (x, y) in enumerate(loader): 356 | print(x.shape) 357 | print(y.shape) 358 | break 359 | -------------------------------------------------------------------------------- /models/RULPrediction/IMDSSN.py: -------------------------------------------------------------------------------- 1 | """ 2 | Implementation of https://doi.org/10.1016/j.ress.2023.109096 3 | """ 4 | 5 | 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | import math 10 | import copy 11 | from torch.nn.parameter import Parameter 12 | from typing import Dict 13 | from math import sqrt 14 | from ContrastiveModules import ContrastiveModel, pn_rul_compute 15 | 16 | import torch 17 | 18 | 19 | class PositionEmbedding(nn.Module): 20 | def __init__(self, dim, window_size, dropout=0.5, device="cuda:0"): 21 | super(PositionEmbedding, self).__init__() 22 | self.pe = torch.zeros(window_size, dim) 23 | position = torch.arange(0, window_size).unsqueeze(1) 24 | div_term = torch.exp(torch.arange(0, dim, 2) * 25 | -(np.log(10000.0) / dim)) 26 | self.pe[:, 0::2] = torch.sin(position * div_term) 27 | self.pe[:, 1::2] = torch.cos(position * div_term) 28 | self.pe = self.pe.unsqueeze(0).to(device) 29 | self.dropout = nn.Dropout(dropout) 30 | 31 | def forward(self, x): 32 | x = x + self.pe 33 | return self.dropout(x) 34 | 35 | 36 | class TriangularCausalMask: 37 | def __init__(self, B, L, device="cpu"): 38 | mask_shape = [B, 1, L, L] 39 | with torch.no_grad(): 40 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 41 | 42 | @property 43 | def mask(self): 44 | return self._mask 45 | 46 | 47 | class ProbMask: 48 | def __init__(self, B, H, L, index, scores, device="cpu"): 49 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 50 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 51 | indicator = _mask_ex[torch.arange(B)[:, None, None], 52 | torch.arange(H)[None, :, None], 53 | index, :].to(device) 54 | self._mask = indicator.view(scores.shape).to(device) 55 | 56 | @property 57 | def mask(self): 58 | return self._mask 59 | 60 | 61 | def gelu(x): 62 | return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3)))) 63 | 64 | 65 | def swish(x): 66 | return x * torch.sigmoid(x) 67 | 68 | 69 | ACT_FNS = { 70 | 'relu': nn.ReLU(), 71 | 'swish': swish, 72 | 'gelu': gelu 73 | } 74 | 75 | 76 | class LogSparseAttention(nn.Module): 77 | """ 78 | Args: 79 | n_time_series: Number of time series present in input 80 | n_head: Number of heads in the MultiHeadAttention mechanism 81 | seq_num: The number of targets to forecast 82 | sub_len: sub_len of the sparse attention 83 | num_layer: The number of transformer blocks in the model. 84 | n_embd: The dimention of Position embedding and time series ID embedding 85 | forecast_history: The number of historical steps fed into the time series model 86 | dropout: The dropout for the embedding of the model. 87 | additional_params: Additional parameters used to initalize the attention model. Can inc 88 | """ 89 | 90 | def __init__(self, n_head, n_embd, win_len, scale: bool, q_len: int, sub_len, sparse=True, attn_pdrop=0.1, 91 | resid_pdrop=0.1): 92 | super(LogSparseAttention, self).__init__() 93 | 94 | if sparse: 95 | print('Activate log sparse!') 96 | mask = self.log_mask(win_len, sub_len) 97 | else: 98 | mask = torch.tril(torch.ones(win_len, win_len)).view(1, 1, win_len, win_len) 99 | 100 | self.register_buffer('mask_tri', mask) 101 | self.n_head = n_head 102 | self.split_size = n_embd * self.n_head 103 | self.scale = scale 104 | self.q_len = q_len 105 | self.query_key = nn.Conv1d(n_embd, n_embd * n_head * 2, self.q_len) 106 | self.value = Conv1D(n_embd * n_head, 1, n_embd) 107 | self.c_proj = Conv1D(n_embd, 1, n_embd * self.n_head) 108 | self.attn_dropout = nn.Dropout(attn_pdrop) 109 | self.resid_dropout = nn.Dropout(resid_pdrop) 110 | 111 | def log_mask(self, win_len, sub_len): 112 | mask = torch.zeros((win_len, win_len), dtype=torch.float) 113 | for i in range(win_len): 114 | mask[i] = self.row_mask(i, sub_len, win_len) 115 | return mask.view(1, 1, mask.size(0), mask.size(1)) 116 | 117 | def row_mask(self, index, sub_len, win_len): 118 | """ 119 | Remark: 120 | 1 . Currently, dense matrices with sparse multiplication are not supported by Pytorch. Efficient implementation 121 | should deal with CUDA kernel, which we haven't implemented yet. 122 | 123 | 2 . Our default setting here use Local attention and Restart attention. 124 | 125 | 3 . For index-th row, if its past is smaller than the number of cells the last 126 | cell can attend, we can allow current cell to attend all past cells to fully 127 | utilize parallel computing in dense matrices with sparse multiplication.""" 128 | log_l = math.ceil(np.log2(sub_len)) 129 | mask = torch.zeros((win_len), dtype=torch.float) 130 | if ((win_len // sub_len) * 2 * (log_l) > index): 131 | mask[:(index + 1)] = 1 132 | else: 133 | while (index >= 0): 134 | if ((index - log_l + 1) < 0): 135 | mask[:index] = 1 136 | break 137 | mask[index - log_l + 1:(index + 1)] = 1 # Local attention 138 | for i in range(0, log_l): 139 | new_index = index - log_l + 1 - 2 ** i 140 | if ((index - new_index) <= sub_len and new_index >= 0): 141 | mask[new_index] = 1 142 | index -= sub_len 143 | return mask 144 | 145 | def attn(self, query: torch.Tensor, key, value: torch.Tensor): 146 | activation = nn.Softmax(dim=-1) 147 | pre_att = torch.matmul(query, key) 148 | if self.scale: 149 | pre_att = pre_att / math.sqrt(value.size(-1)) 150 | mask = self.mask_tri[:, :, :pre_att.size(-2), :pre_att.size(-1)] 151 | pre_att = pre_att * mask + -1e9 * (1 - mask) 152 | pre_att = activation(pre_att) 153 | pre_att = self.attn_dropout(pre_att) 154 | attn = torch.matmul(pre_att, value) 155 | 156 | return attn 157 | 158 | def merge_heads(self, x): 159 | x = x.permute(0, 2, 1, 3).contiguous() 160 | new_x_shape = x.size()[:-2] + (x.size(-2) * x.size(-1),) 161 | return x.view(*new_x_shape) 162 | 163 | def split_heads(self, x, k=False): 164 | new_x_shape = x.size()[:-1] + (self.n_head, x.size(-1) // self.n_head) 165 | x = x.view(*new_x_shape) 166 | if k: 167 | return x.permute(0, 2, 3, 1) 168 | else: 169 | return x.permute(0, 2, 1, 3) 170 | 171 | def forward(self, x): 172 | 173 | value = self.value(x) 174 | qk_x = nn.functional.pad(x.permute(0, 2, 1), pad=(self.q_len - 1, 0)) 175 | query_key = self.query_key(qk_x).permute(0, 2, 1) 176 | query, key = query_key.split(self.split_size, dim=2) 177 | query = self.split_heads(query) 178 | key = self.split_heads(key, k=True) 179 | value = self.split_heads(value) 180 | attn = self.attn(query, key, value) 181 | attn = self.merge_heads(attn) 182 | attn = self.c_proj(attn) 183 | attn = self.resid_dropout(attn) 184 | return attn 185 | 186 | 187 | class Conv1D(nn.Module): 188 | def __init__(self, out_dim, rf, in_dim): 189 | super(Conv1D, self).__init__() 190 | self.rf = rf 191 | self.out_dim = out_dim 192 | if rf == 1: 193 | w = torch.empty(in_dim, out_dim) 194 | nn.init.normal_(w, std=0.02) 195 | self.w = Parameter(w) 196 | self.b = Parameter(torch.zeros(out_dim)) 197 | else: 198 | raise NotImplementedError 199 | 200 | def forward(self, x): 201 | if self.rf == 1: 202 | size_out = x.size()[:-1] + (self.out_dim,) 203 | x = torch.addmm(self.b, x.view(-1, x.size(-1)), self.w) 204 | x = x.view(*size_out) 205 | else: 206 | raise NotImplementedError 207 | return x 208 | 209 | 210 | class ProbAttention(nn.Module): 211 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 212 | super(ProbAttention, self).__init__() 213 | self.factor = factor 214 | self.scale = scale 215 | self.mask_flag = mask_flag 216 | self.output_attention = output_attention 217 | self.dropout = nn.Dropout(attention_dropout) 218 | 219 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 220 | # Q [B, H, L, D] 221 | B, H, L_K, E = K.shape 222 | _, _, L_Q, _ = Q.shape 223 | 224 | # calculate the sampled Q_K 225 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 226 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q 227 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] 228 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2) 229 | 230 | # find the Top_k query with sparisty measurement 231 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 232 | M_top = M.topk(n_top, sorted=False)[1] 233 | 234 | # use the reduced Q to calculate Q_K 235 | Q_reduce = Q[torch.arange(B)[:, None, None], 236 | torch.arange(H)[None, :, None], 237 | M_top, :] # factor*ln(L_q) 238 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 239 | 240 | return Q_K, M_top 241 | 242 | def _get_initial_context(self, V, L_Q): 243 | B, H, L_V, D = V.shape 244 | if not self.mask_flag: 245 | # V_sum = V.sum(dim=-2) 246 | V_sum = V.mean(dim=-2) 247 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() 248 | else: # use mask 249 | assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only 250 | contex = V.cumsum(dim=-2) 251 | return contex 252 | 253 | def _update_context(self, context_in, V, scores, index, L_Q): 254 | B, H, L_V, D = V.shape 255 | 256 | if self.mask_flag: 257 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 258 | scores.masked_fill_(attn_mask.mask, -np.inf) 259 | 260 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 261 | 262 | context_in[torch.arange(B)[:, None, None], 263 | torch.arange(H)[None, :, None], 264 | index, :] = torch.matmul(attn, V).type_as(context_in) 265 | if self.output_attention: 266 | attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device) 267 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn 268 | return (context_in, attns) 269 | else: 270 | return (context_in, None) 271 | 272 | def forward(self, queries, keys, values): 273 | B, L_Q, H, D = queries.shape 274 | _, L_K, _, _ = keys.shape 275 | 276 | queries = queries.transpose(2, 1) 277 | keys = keys.transpose(2, 1) 278 | values = values.transpose(2, 1) 279 | 280 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 281 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 282 | 283 | U_part = U_part if U_part < L_K else L_K 284 | u = u if u < L_Q else L_Q 285 | 286 | scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) 287 | 288 | # add scale factor 289 | scale = self.scale or 1. / sqrt(D) 290 | if scale is not None: 291 | scores_top = scores_top * scale 292 | # get the context 293 | context = self._get_initial_context(values, L_Q) 294 | # update the context with selected top_k queries 295 | context, attn = self._update_context(context, values, scores_top, index, L_Q) 296 | 297 | return context.transpose(2, 1).contiguous(), attn 298 | 299 | 300 | class ProbAttentionLayer(nn.Module): 301 | def __init__(self, d_model, n_heads, 302 | d_keys=None, d_values=None, mix=False): 303 | super(ProbAttentionLayer, self).__init__() 304 | 305 | d_keys = d_keys or (d_model // n_heads) 306 | d_values = d_values or (d_model // n_heads) 307 | 308 | self.inner_attention = ProbAttention(True, attention_dropout=0.1, output_attention=False) 309 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 310 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 311 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 312 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 313 | self.n_heads = n_heads 314 | self.mix = mix 315 | 316 | def forward(self, x): 317 | B, L, _ = x.shape 318 | H = self.n_heads 319 | 320 | queries = self.query_projection(x).view(B, L, H, -1) 321 | keys = self.key_projection(x).view(B, L, H, -1) 322 | values = self.value_projection(x).view(B, L, H, -1) 323 | 324 | out, _ = self.inner_attention( 325 | queries, 326 | keys, 327 | values 328 | ) 329 | if self.mix: 330 | out = out.transpose(2, 1).contiguous() 331 | out = out.view(B, L, -1) 332 | 333 | return self.out_projection(out) 334 | 335 | 336 | class Encoder(nn.Module): 337 | def __init__(self, window_size, hidden_dim, attention): 338 | super(Encoder, self).__init__() 339 | self.attention = attention 340 | self.window_size = window_size 341 | self.ln1 = nn.LayerNorm(normalized_shape=hidden_dim) 342 | self.ln2 = nn.LayerNorm(normalized_shape=hidden_dim) 343 | self.ffl = nn.Sequential( 344 | nn.Linear(in_features=hidden_dim, out_features=hidden_dim), 345 | nn.GELU(), 346 | nn.Linear(in_features=hidden_dim, out_features=hidden_dim) 347 | ) 348 | 349 | def forward(self, x): 350 | # x.shape = (b, w, h) 351 | att_x = self.attention(x) 352 | att_x = self.ln1(att_x + x) 353 | f_x = self.ln2(self.ffl(att_x) + att_x) 354 | return f_x 355 | 356 | 357 | class IMDSSN(ContrastiveModel): 358 | def __init__(self, 359 | window_size, 360 | in_features, 361 | hidden_dim, 362 | encoder_nums, 363 | n_heads, 364 | pe=True, 365 | label_norm=True, model_flag="IMDSSN", device="cuda:0", filter_size=0): 366 | super(IMDSSN, self).__init__(label_norm=label_norm, model_flag=model_flag, device=device) 367 | if filter_size > 0: 368 | self.window_size = window_size // filter_size 369 | self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size) 370 | else: 371 | self.window_size = window_size 372 | self.MaV = None 373 | 374 | self.input_mapper = nn.Linear(in_features=in_features, out_features=hidden_dim) 375 | self.pe = PositionEmbedding(dim=hidden_dim, 376 | window_size=self.window_size, 377 | dropout=0, 378 | device=device) if pe else None 379 | self.in_features = in_features 380 | self.hidden_dim = hidden_dim 381 | self.MLSNEncoders = nn.Sequential() 382 | self.MPSNEncoders = nn.Sequential() 383 | for _ in range(encoder_nums): 384 | self.MLSNEncoders.append(Encoder( 385 | self.window_size, hidden_dim, attention=LogSparseAttention( 386 | n_head=n_heads, 387 | n_embd=hidden_dim, 388 | win_len=self.window_size, 389 | q_len=5, 390 | sub_len=10, 391 | scale=True, 392 | ) 393 | )) 394 | self.MPSNEncoders.append(Encoder( 395 | self.window_size, hidden_dim, attention=ProbAttentionLayer( 396 | n_heads=n_heads, 397 | d_model=hidden_dim 398 | ) 399 | )) 400 | self.fuse = nn.Linear(in_features=hidden_dim * 2, 401 | out_features=hidden_dim, 402 | bias=False) 403 | self.output = nn.Sequential( 404 | nn.Linear(in_features=self.window_size * hidden_dim, out_features=1) 405 | ) 406 | self.to(device) 407 | 408 | def feature_extractor(self, x): 409 | # x.shape = (b, w, f) 410 | if self.MaV: 411 | x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2) 412 | x = self.input_mapper(x) 413 | x = self.pe(x) if self.pe else x 414 | f1 = self.MLSNEncoders(x) # (b, w, h) 415 | f2 = self.MPSNEncoders(x) # (b, w, h) 416 | f = torch.concat([f1, f2], dim=-1) # (b, w, 2*h) 417 | f = self.fuse(f) # (b, w, h) 418 | return torch.flatten(f, start_dim=-2, end_dim=-1) 419 | 420 | def forward(self, x, label=None): 421 | if len(x.shape) < 4: # the normal forward, default shape with (b, l, f) 422 | x = self.feature_extractor(x) 423 | return self.output(x) 424 | else: # the forward with negative samples, default shape with (b, num, l, f) 425 | f_pos, f_apos, f_neg, w = self.generate_contrastive_samples(x, label) 426 | return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, w 427 | --------------------------------------------------------------------------------