├── models
    ├── __init__.py
    └── RULPrediction
    │   ├── __init__.py
    │   ├── BiGRU_TSAM.py
    │   ├── ResBlockModel.py
    │   ├── experiments.py
    │   ├── CNN_GRU.py
    │   ├── SimpleModels.py
    │   ├── DAMCNN.py
    │   ├── MLPMixer.py
    │   ├── ContrastiveModules.py
    │   └── IMDSSN.py
├── dataset
    ├── __init__.py
    ├── utils.py
    └── cmapss.py
├── train
    ├── __init__.py
    ├── functions.py
    └── trainable.py
├── .assets
    ├── FSGRI.png
    ├── layer_structure.png
    └── model_structure.png
├── LICENSE
└── readme.md


/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 
3 | 


--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
1 | from .trainable import TrainableModule
2 | from .functions import *
3 | 


--------------------------------------------------------------------------------
/.assets/FSGRI.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fuen1590/PhmDeepLearningProjects/HEAD/.assets/FSGRI.png


--------------------------------------------------------------------------------
/.assets/layer_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fuen1590/PhmDeepLearningProjects/HEAD/.assets/layer_structure.png


--------------------------------------------------------------------------------
/.assets/model_structure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fuen1590/PhmDeepLearningProjects/HEAD/.assets/model_structure.png


--------------------------------------------------------------------------------
/models/RULPrediction/__init__.py:
--------------------------------------------------------------------------------
1 | from .CNN_GRU import CnnGru
2 | from .ContrastiveModules import ContrastiveModel, MSEContrastiveLoss
3 | from .ResBlockModel import ResNet
4 | from .DAMCNN import DAMCNN
5 | 


--------------------------------------------------------------------------------
/train/functions.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | 
 5 | def precision():
 6 |     pass
 7 | 
 8 | 
 9 | def accuracy():
10 |     pass
11 | 
12 | 
13 | def recall():
14 |     pass
15 | 
16 | 
17 | def confusion_matrix():
18 |     pass
19 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 fuen1590
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/dataset/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sklearn.metrics as me
 3 | 
 4 | from torch.utils.data import Dataset
 5 | 
 6 | 
 7 | class Sampler:
 8 |     """
 9 |     This class is used for customize yourself sampling method used for dataset.
10 | 
11 |     This class is first used in cmapss.CMAPSS dataset, and will be supported more custom dataset sampling.
12 | 
13 |     When customizing your own sampler, you should:
14 | 
15 |     1. override the sample(index) method. The sample(index) method should return the sample and label similar to
16 |     torch.utils.data.Dataset class.
17 | 
18 |     2. Making sure your __init__(dataset) method containing the sampling target argument "dataset".
19 |     The argument "dataset" should be a torch.utils.data.Dataset instance.
20 |     And call the super.__init__(dataset) at the first line in your __init__ method.
21 |     """
22 |     def __init__(self, dataset: Dataset):
23 |         self.dataset = dataset
24 | 
25 |     def sample(self, index: int):
26 |         raise NotImplementedError("You must define the Sampler.sample(index) method.")
27 | 
28 | 
29 | def compute_metrics(path):
30 |     out = np.load(path+r"/model_test_output_part1.npy")
31 |     label = np.load(path+r"/model_test_labels_part1.npy")
32 |     mse = me.mean_squared_error(out, label)
33 |     mape = me.mean_absolute_percentage_error(out, label)
34 |     print("MSE:{}".format(mse))
35 |     print("MAPE:{}".format(mape))
36 |     print("R2:{}".format(me.r2_score(out, label)))
37 |     return (out, label), mse, mape
38 | 
39 | 
40 | def count_parameters(model):
41 |     return sum(p.numel() for p in model.parameters() if p.requires_grad)
42 | 
43 | 
44 | def gaussian_distribution(x: int or np.ndarray, mean, std):
45 |     l1 = 1/((2*np.pi)**0.5 * std)
46 |     l2 = np.exp(-((x-mean)**2)/(2*std**2))
47 |     return l1*l2
48 | 
49 | 


--------------------------------------------------------------------------------
/models/RULPrediction/BiGRU_TSAM.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | from ContrastiveModules import ContrastiveModel, pn_rul_compute
 5 | 
 6 | """
 7 | Implementation of https://doi.org/10.1016/j.ress.2021.108297
 8 | """
 9 | 
10 | 
11 | class TSAM(nn.Module):
12 |     def __init__(self, window_size, in_features):
13 |         super(TSAM, self).__init__()
14 |         # self.layers = nn.ModuleList()
15 |         # for _ in range(window_size):
16 |         #     self.layers.append(nn.Sequential(
17 |         #         nn.Linear(in_features, 1),
18 |         #         nn.Sigmoid()
19 |         #     ))
20 |         self.layers = nn.Sequential(
21 |             nn.Linear(in_features, 1),
22 |             nn.Sigmoid()
23 |         )
24 |         self.window_size = window_size
25 | 
26 |         self.softmax = nn.Softmax(dim=-1)
27 | 
28 |     def forward(self, x):
29 |         # x.shape = (b, t, f)
30 |         _, t, f = x.shape
31 |         assert t == self.window_size
32 |         f = []
33 |         for i in range(t):
34 |             # f.append(self.layers[i](x[:, i, :]))  # (b, 1)
35 |             f.append(self.layers(x[:, i, :]))  # (b, 1)
36 |         f = torch.concat(f, dim=-1)  # (b, t)
37 |         f = self.softmax(f)  # (b, t)
38 |         f = f.unsqueeze(dim=-1) * x
39 |         return f
40 | 
41 | 
42 | class BiGRU_TSAM(ContrastiveModel):
43 |     def __init__(self, window_size, in_features, filter_size,
44 |                  model_flag="BiGRU_TSAM", label_norm=True, device="cuda:0"):
45 |         super(BiGRU_TSAM, self).__init__(model_flag=model_flag, label_norm=label_norm, device=device)
46 |         if filter_size > 0:
47 |             window_size = window_size // filter_size
48 |             self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size)
49 |         else:
50 |             window_size = window_size
51 |             self.MaV = None
52 |         self.tsam = TSAM(window_size=window_size, in_features=in_features)
53 |         self.gru = nn.GRU(input_size=in_features, hidden_size=256, num_layers=3,
54 |                           bidirectional=True, batch_first=True)
55 |         self.linear = nn.Sequential(
56 |             nn.Linear(in_features=512, out_features=64),
57 |             nn.LeakyReLU(),
58 |             nn.Linear(in_features=64, out_features=128),
59 |             nn.LeakyReLU(),
60 |         )
61 |         self.output = nn.Sequential(
62 |             nn.Linear(in_features=128, out_features=1)
63 |         )
64 |         self.to(device)
65 | 
66 |     def feature_extractor(self, x):
67 |         if self.MaV:
68 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
69 |         x = self.tsam(x)
70 |         _, x = self.gru(x)
71 |         x = torch.concat([x[-1], x[-2]], dim=-1)
72 |         return self.linear(x)
73 | 
74 |     def forward(self, x, label=None):
75 |         if len(x.shape) < 4:
76 |             x = self.feature_extractor(x)
77 |             return self.output(x)
78 |         else:
79 |             f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label)
80 |             return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, weight
81 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Supervised Contrastive Learning based Dual-Mixer Model for Remaining Useful Life Prediction
 2 | This repository is the implementation for the paper Supervised Contrastive Learning based Dual-Mixer Model for Remaining Useful Life Prediction. 
 3 | 
 4 | ***Our paper has been accepted by *Reliability Engineering & System Safety* and is now available online: https://www.sciencedirect.com/science/article/pii/S0951832024004708***
 5 | 
 6 | The paper proposes a novel progressive feature fusion model called **Dual-path Mixer Model (Dual-Mixer)** for RUL prediction, 
 7 | as well as a feature constraint method based on supervised contrastive learning called **Feature Space Global Relationship Invariance (FSGRI)**.
 8 | 
 9 | ## Dual-Mixer Model
10 | The Dual-Mixer Model is constructed based on multi-layer MLP. 
11 | 
12 | The core module of this model is the Dual-path Mixer Layer, which implements a progressive feature fusion process layer by layer:
13 | 
14 | ![Layer Structure](.assets/layer_structure.png "Dual-Mixer Layer")
15 | 
16 | ## FSGRI
17 | FSGRI is a constraint method in feature space for the RUL prediction task. It expands the degradation relationship 
18 | from the original data space to the feature space and is applicable to most deep learning RUL prediction methods:
19 | 
20 | ![FSGRI](.assets/FSGRI.png "FSGRI")
21 | 
22 | ## Quick Start
23 | 
24 | ### Requirements
25 | torch >= 2.0  
26 | matplotlib >= 3.7.0  
27 | numpy >= 1.24.3  
28 | pandas >= 2.0.0  
29 | scikit-learn >= 1.3.0
30 | 
31 | ### Dataset
32 | Our experiments are base on the CMAPSS dataset [1]. 
33 | 
34 | The raw data could be downloaded from: https://github.com/schwxd/LSTM-Keras-CMAPSS/tree/master/C-MAPSS-Data.
35 | 
36 | After downloading, place the data files train_FD00X.txt, test_FD00X.txt, 
37 | and RUL_FD00X.txt in the raw_data/ folder (or any other folder you specify).
38 | 
39 | The dataset/cmapss.py contains all the code for preprocessing dataset, including the Gaussian Threshold Sampling method proposed
40 | in this paper.
41 | 
42 | ### Train
43 | Simply run :
44 | > python ./models/RULPrediction/experiments.py
45 | 
46 | This will execute the training process of the Dual-Mixer on the FD004 dataset. The training results are restored in
47 | the train/*model_flag* folder, where *model_flag* is generated automatically.
48 | 
49 | If you placed the dataset in a folder other than raw_data/, you need to modify the parameter 
50 | *cmapss.DEFAULT_ROOT* on **line 56** in the *experiments.py* file:
51 | >     train, test, val, scalar = cmapss.get_data(cmapss.DEFAULT_ROOT, ...
52 | 
53 | to:
54 | >     train, test, val, scalar = cmapss.get_data(your_data_path, ...
55 | 
56 | You can edit experiments.py to modify different training parameters, such as Batch Size (***bs***), ***mixer_layer_num***, etc.
57 | 
58 | The ***net*** object in experiments.py can be changed to the following models:
59 | > MLPMixer / BiGRU_TSAM / IMDSSN et al.
60 | 
61 | More details can be found in their respective code files.
62 | 
63 | ## Citiation
64 | If you find our work useful, please cite our paper as follows:
65 | ```
66 | @article{FU2024110398,
67 | title = {Supervised contrastive learning based dual-mixer model for Remaining Useful Life prediction},
68 | journal = {Reliability Engineering & System Safety},
69 | pages = {110398},
70 | year = {2024},
71 | issn = {0951-8320},
72 | doi = {https://doi.org/10.1016/j.ress.2024.110398},
73 | author = {En Fu and Yanyan Hu and Kaixiang Peng and Yuxin Chu},
74 | }
75 | ```
76 | ## References
77 | 1. Abhinav Saxena, Kai Goebel, Don Simon, and Neil Eklund. Damage propagation modeling for aircraft engine run-to-failure simulation. In
78 | 2008 International Conference on Prognostics and Health Management. IEEE, 2008.
79 | 


--------------------------------------------------------------------------------
/models/RULPrediction/ResBlockModel.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import numpy as np
 4 | from sklearn import manifold
 5 | 
 6 | import dataset.cmapss as cmapss
 7 | from models.RULPrediction.ContrastiveModules import ContrastiveModel, MSEContrastiveLoss, pn_rul_compute
 8 | 
 9 | 
10 | class ResNet(ContrastiveModel):
11 |     def __init__(self, in_features, window_size,
12 |                  model_flag="ContrastiveResNet", device="cuda"):
13 |         super(ResNet, self).__init__(model_flag=model_flag, device=device, label_norm=True)
14 |         # if window_size > 1000:
15 |         #     window_size = window_size // 32
16 |         #     self.MaV = nn.AvgPool1d(kernel_size=32, stride=32)
17 |         # else:
18 |         #     window_size = window_size
19 |         #     self.MaV = None
20 |         self.tsne = None
21 |         self.visual_samples = None
22 |         self.embedding = []
23 |         self.epoch_num = 0
24 |         self.conv = nn.Conv1d(in_channels=in_features, out_channels=64, kernel_size=3, stride=1, padding=1)
25 |         self.norm1 = nn.BatchNorm1d(64)
26 |         self.norm2 = nn.BatchNorm1d(128)
27 |         self.norm3 = nn.BatchNorm1d(128)
28 |         self.res1 = nn.Sequential(
29 |             nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
30 |             nn.ReLU(),
31 |             nn.Conv1d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
32 |         )
33 |         self.res_con1 = nn.Conv1d(in_channels=64, out_channels=128, kernel_size=2, stride=2)
34 |         self.res2 = nn.Sequential(
35 |             nn.Conv1d(in_channels=64, out_channels=64, kernel_size=2, stride=2),
36 |             nn.ReLU(),
37 |             nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
38 |         )
39 |         self.res_con2 = nn.Conv1d(in_channels=128, out_channels=128, kernel_size=2, stride=2)
40 |         self.res3 = nn.Sequential(
41 |             nn.Conv1d(in_channels=128, out_channels=128, kernel_size=2, stride=2),
42 |             nn.ReLU(),
43 |             nn.Conv1d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
44 |         )
45 |         self.flatten = nn.Flatten(start_dim=-2, end_dim=-1)
46 |         self.dense = nn.Sequential(
47 |             nn.Linear(in_features=128 * ((window_size // 2) // 2), out_features=64),
48 |             nn.Dropout(),
49 |             nn.Linear(in_features=64, out_features=1)
50 |         )
51 |         self.to(device)
52 | 
53 |     def forward(self, x, labels=None):
54 |         # x.shape=(batch, num, window, features)
55 |         # labels.shape=(batch, num)
56 |         if len(x.shape) == 4:  # if len(shape) == 4, use the contrastive computing process
57 |             assert labels is not None
58 |             batch, num, w, f = x.shape
59 |             x = x.view(batch, num, f, w)
60 |             feature_pos, feature_pos_aug, feature_neg, neg_weights = self.generate_contrastive_samples(x, labels)
61 |             return pn_rul_compute(self.dense, feature_pos, feature_neg), feature_pos, feature_pos_aug, feature_neg, w
62 |         else:  # if len(shape) == 3, use the regression computing process
63 |             batch, w, f = x.shape
64 |             x = x.view(batch, f, w)
65 |             feature = self.feature_extractor(x)
66 |             out = self.dense(feature)
67 |             return out
68 | 
69 |     def feature_extractor(self, x):
70 |         # if self.MaV:
71 |         #     x = self.MaV(x)
72 |         x1 = self.conv(x)
73 |         x2 = self.res1(x1) + x1
74 |         x3 = self.res_con1(x2) + self.res2(x2)
75 |         x4 = self.res_con2(x3) + self.res3(x3)
76 |         flat = self.flatten(x4)
77 |         return flat
78 | 
79 |     def set_visual_samples(self, samples):
80 |         """
81 |         Sets the visualization samples used in epoch_start.
82 | 
83 |         :param samples: (batch, len, features)
84 |         :return:
85 |         """
86 |         self.visual_samples = samples
87 |         self.visual_samples = torch.transpose(self.visual_samples, -1, -2)
88 |         self.tsne = manifold.TSNE(n_components=2, random_state=2023)
89 | 


--------------------------------------------------------------------------------
/models/RULPrediction/experiments.py:
--------------------------------------------------------------------------------
 1 | import dataset.cmapss as cmapss
 2 | import models.RULPrediction as rul
 3 | from dataset.utils import compute_metrics
 4 | 
 5 | import torch
 6 | import numpy as np
 7 | from SimpleModels import *
 8 | from MLPMixer import MLPMixer, DualMLPMixer
 9 | from BiGRU_TSAM import BiGRU_TSAM
10 | from IMDSSN import IMDSSN
11 | 
12 | 
13 | def train_model(model, train_set, test_set, val_set, model_flag,
14 |                 batch_size, visual_sample=None, contra=True):
15 |     Loss = "InfoNCE" if contra else ""
16 |     model.flag = model_flag
17 |     net = model
18 |     net.set_visual_samples(visual_sample)
19 |     net.prepare_data(train_set, test_set, val_set, batch_size=batch_size, num_workers=0,
20 |                      eval_shuffle=False)
21 |     net.train_model(epoch=100,
22 |                     lr=1e-3,
23 |                     criterion=rul.MSEContrastiveLoss(contrastive=Loss) if contra else torch.nn.MSELoss(),
24 |                     optimizer="adam",
25 |                     # lr_lambda=lambda x: 10 ** -(x // 15),
26 |                     early_stop=5,
27 |                     show_batch_loss=False)
28 |     return net
29 | 
30 | 
31 | def train_cmapss(model: rul.ContrastiveModel,
32 |                  window_size,
33 |                  neg_samples,
34 |                  batch_size,
35 |                  subset: cmapss.Subset,
36 |                  exp_time,
37 |                  contra=True,
38 |                  label_norm=True):
39 |     threshold = 125
40 |     batch_size = batch_size // neg_samples if contra else batch_size
41 |     # batch_size = batch_size
42 |     # batch_size = 256
43 |     Loss = "InfoNCE" if contra else ""
44 |     model_flag = "RUL-{}-norm{}-w{}-batch{}-thresh{}-{}-neg{}-{}". \
45 |         format(model.flag,
46 |                1 if label_norm else 0,
47 |                window_size,
48 |                batch_size,
49 |                threshold,
50 |                subset.value,
51 |                neg_samples - 1 if contra else 0,
52 |                # "GSampler",
53 |                exp_time)
54 |     train, test, val, scalar = cmapss.get_data(cmapss.DEFAULT_ROOT,
55 |                                                subset,
56 |                                                window_size=window_size,
57 |                                                slide_step=1,
58 |                                                sensors=cmapss.DEFAULT_SENSORS,
59 |                                                rul_threshold=threshold,
60 |                                                label_norm=label_norm,
61 |                                                val_ratio=0.2)
62 |     if contra:
63 |         cmapss.CmapssGaussianNegativeSampler(train, neg_samples, thresh=0.5, std=0.3)
64 |     visual_samples = torch.tensor(train.data[np.where(train.ids == 1)], dtype=torch.float32).to(model.device)
65 |     model = train_model(model=model, train_set=train, test_set=test, val_set=val,
66 |                         model_flag=model_flag, batch_size=batch_size, visual_sample=visual_samples, contra=contra)
67 |     return model
68 | 
69 | 
70 | if __name__ == '__main__':
71 |     length = 30
72 |     step_size = 1  # a step size to construct training samples
73 |     negs = 5  # the number of negative samples if using FSGRI
74 |     bs = 1024
75 |     dataset = cmapss.Subset.FD004  # a enum object, see detail in cmapss.Subset.
76 |     device = "cuda:0"  # which device, 'cpu', 'cuda', 'cuda:*'
77 |     exp_ti = 1  # experiment count, using to construct a model_flag
78 |     contra_training = False  # if using FSGRI
79 |     label_norm = True  # if True, the RUL label will be in [0, 1], else [0, number of cycles]
80 |     # filter_size = 0
81 | 
82 |     # Dual-Mixer only
83 |     mixer_layer_num = 6
84 |     hidden_dim = 32
85 |     dropout = 0
86 |     net = DualMLPMixer(window_size=length,
87 |                        in_features=len(cmapss.DEFAULT_SENSORS),
88 |                        hidden_dim=hidden_dim,
89 |                        num_layers=mixer_layer_num,
90 |                        dropout=dropout,
91 |                        device=device, model_flag=f"MLPDualMixer-h{hidden_dim}-{mixer_layer_num}", label_norm=label_norm,
92 |                        filter_size=0)
93 | 
94 |     net = train_cmapss(net, length, negs, bs, dataset, exp_time=exp_ti, contra=contra_training, label_norm=label_norm, )
95 | 


--------------------------------------------------------------------------------
/models/RULPrediction/CNN_GRU.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | 
  5 | import models.RULPrediction.ContrastiveModules
  6 | from dataset import cmapss
  7 | from ContrastiveModules import ContrastiveModel
  8 | 
  9 | 
 10 | class CnnGru(ContrastiveModel):
 11 |     """
 12 |     A model proposed by https://doi.org/10.1109/TIM.2022.3227956.
 13 |     """
 14 |     def __init__(self,
 15 |                  in_features,
 16 |                  window_size,
 17 |                  filter_size,
 18 |                  label_norm=False,
 19 |                  model_flag="CnnGru",
 20 |                  device="cuda",):
 21 |         super(CnnGru, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm)
 22 |         if filter_size > 0:
 23 |             window_size = window_size // filter_size
 24 |             self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size)
 25 |         else:
 26 |             window_size = window_size
 27 |             self.MaV = None
 28 |         self.convs = nn.Sequential(
 29 |             nn.Conv1d(in_channels=window_size, out_channels=16, kernel_size=10, stride=1, padding="same"),
 30 |             nn.ReLU(),
 31 |             nn.MaxPool1d(kernel_size=2, stride=2),
 32 |             nn.Conv1d(in_channels=16, out_channels=32, kernel_size=10, stride=1, padding="same"),
 33 |             nn.ReLU(),
 34 |             nn.MaxPool1d(kernel_size=2, stride=2),
 35 |             nn.Conv1d(in_channels=32, out_channels=64, kernel_size=10, stride=1, padding="same"),
 36 |             nn.ReLU(),
 37 |             nn.MaxPool1d(kernel_size=2, stride=2),
 38 |             nn.Flatten(start_dim=-2, end_dim=-1),
 39 |             nn.Linear(in_features=64 * (((in_features // 2) // 2) // 2), out_features=256)
 40 |         )
 41 |         self.grus = nn.GRU(input_size=in_features, hidden_size=128, num_layers=3, batch_first=True,
 42 |                            bidirectional=True)
 43 |         self.linears = nn.Sequential(
 44 |             nn.Linear(in_features=256, out_features=64),
 45 |             nn.Dropout(),
 46 |             nn.ReLU(),
 47 |             nn.Linear(in_features=64, out_features=128),
 48 |             nn.Dropout(),
 49 |             nn.Linear(in_features=128, out_features=1)
 50 |         )
 51 |         self.to(device)
 52 | 
 53 |     def forward(self, x, label=None):
 54 |         if len(x.shape)<4:
 55 |             fea = self.feature_extractor(x)
 56 |             return self.linears(fea)
 57 |         else:
 58 |             assert label is not None
 59 |             feature_pos, feature_pos_aug, feature_neg, neg_weights = self.generate_contrastive_samples(x, label)
 60 |             out_all = self.linears(feature_pos)
 61 |             neg_nums = feature_neg.shape[1]
 62 |             neg_out = []
 63 |             for neg_i in range(neg_nums):
 64 |                 neg_out.append(self.linears(feature_neg[:, neg_i]))
 65 |             neg_out = torch.concat(neg_out, dim=-1)
 66 |             return torch.concat([out_all, neg_out], dim=-1), feature_pos, feature_pos_aug, feature_neg, neg_weights
 67 | 
 68 |     def feature_extractor(self, x):
 69 |         # x.shape = (batch, length, features)
 70 |         # batch, l, f = x.shape
 71 |         if self.MaV:
 72 |             print(x.shape)
 73 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
 74 |             print(x.shape)
 75 |         x_conv = x
 76 |         x_conv = self.convs(x_conv)
 77 |         _, x_grus = self.grus(x)  # (batch, length, 256)
 78 |         x_grus = torch.concat([x_grus[-1], x_grus[-2]], dim=-1)
 79 |         fea = x_conv+x_grus
 80 |         return fea
 81 | 
 82 | 
 83 | if __name__ == '__main__':
 84 |     from dataset.cmapss import Cmapss
 85 |     window_size = 30
 86 |     threshold = 125
 87 |     neg_num = 5
 88 |     batch_size = 1024//(neg_num-1)
 89 |     # batch_size = 1024
 90 |     subset = cmapss.Subset.FD003
 91 |     Loss = "InfoNCE"
 92 |     model_flag = "RUL-1DCNN_GRU-w{}-batch{}-thresh{}-{}-neg{}-2-{}". \
 93 |         format(window_size,
 94 |                batch_size,
 95 |                threshold,
 96 |                subset.value,
 97 |                neg_num-1,
 98 |                Loss)
 99 |     train, test, val, scalar = cmapss.get_data(cmapss.DEFAULT_ROOT,
100 |                                                subset,
101 |                                                window_size=window_size,
102 |                                                slide_step=1,
103 |                                                sensors=cmapss.DEFAULT_SENSORS,
104 |                                                rul_threshold=threshold,
105 |                                                label_norm=True,
106 |                                                val_ratio=0.2)
107 |     net = CnnGru(len(cmapss.DEFAULT_SENSORS), window_size, model_flag, device="cuda:1")
108 |     visual_samples = torch.tensor(train.data[np.where(train.ids == 1)], dtype=torch.float32, device="cuda:1")
109 |     net.set_visual_samples(visual_samples)
110 |     sampler = cmapss.CmapssPiecewiseNegativeSampler(train, engine_num=1, interval_num=neg_num)
111 |     # sampler = cmapss.CmapssRandomNegtiveSampler(train, neg_num=neg_num, sample_thresh=0.05)
112 |     net.prepare_data(train, test, val, batch_size=batch_size, num_workers=2)
113 |     net.train_model(epoch=100,
114 |                     lr=0.0001,
115 |                     # criterion=nn.MSELoss(),
116 |                     criterion=models.RULPrediction.ContrastiveModules.MSEContrastiveLoss(contrastive=Loss),
117 |                     early_stop=10,
118 |                     lr_lambda=lambda epoch: 10 ** -(epoch // 15))
119 | 


--------------------------------------------------------------------------------
/models/RULPrediction/SimpleModels.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | from ContrastiveModules import ContrastiveModel, pn_rul_compute
  6 | from train.trainable import TrainableModule
  7 | 
  8 | """
  9 | Input shape: (batch, w, f0)
 10 | Feature shape: (batch, f1)
 11 | Output shape: (batch, 1)
 12 | """
 13 | 
 14 | 
 15 | class LSTMNet(ContrastiveModel):
 16 |     def __init__(self, window_size,
 17 |                  in_features,
 18 |                  hidden_dim=256,
 19 |                  label_norm=False, model_flag="LSTM", device="cuda:0"):
 20 |         super(LSTMNet, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm)
 21 |         if window_size > 1000:
 22 |             window_size = window_size // 32
 23 |             self.MaV = nn.AvgPool1d(kernel_size=32, stride=32)
 24 |         else:
 25 |             window_size = window_size
 26 |             self.MaV = None
 27 |         self.lstm = nn.LSTM(input_size=in_features, hidden_size=hidden_dim, num_layers=3,
 28 |                             batch_first=True, dropout=0.4)
 29 |         # self.lstm1 = LSTM(input_size=in_features, hidden_size=256, dropout=0.4, device=device)
 30 |         # self.lstm2 = LSTM(input_size=256, hidden_size=256, dropout=0.4, device=device)
 31 |         # self.lstm3 = LSTM(input_size=256, hidden_size=256, dropout=0.4, device=device)
 32 |         self.linear = nn.Sequential(nn.Linear(in_features=hidden_dim, out_features=1))
 33 |         self.to(device)
 34 | 
 35 |     def feature_extractor(self, x):
 36 |         if self.MaV:
 37 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
 38 |         _, (ht, _) = self.lstm(x)
 39 |         # out, _ = self.lstm1(x)
 40 |         # out, _ = self.lstm2(out)
 41 |         # _, (ht, _) = self.lstm3(out)
 42 |         return ht[-1]
 43 |         # return ht
 44 | 
 45 |     def forward(self, x, label=None):
 46 |         if len(x.shape) < 4:
 47 |             x = self.feature_extractor(x)
 48 |             return self.linear(x)
 49 |         else:
 50 |             f_pos, f_apos, f_neg, weights = self.generate_contrastive_samples(x, label)
 51 |             return pn_rul_compute(self.linear, f_pos, f_neg), f_pos, f_apos, f_neg, weights
 52 | 
 53 | 
 54 | class LSTM(nn.Module):
 55 |     def __init__(self, input_size, hidden_size, dropout=0.4, device="cuda:0"):
 56 |         super().__init__()
 57 |         self.wf = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size)
 58 |         self.wi = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size)
 59 |         self.wc = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size)
 60 |         self.wo = nn.Linear(in_features=input_size + hidden_size, out_features=hidden_size)
 61 |         self.dropout = nn.Dropout(dropout) if dropout > 0 else None
 62 |         self.hidden_size = hidden_size
 63 |         self.to(device)
 64 | 
 65 |     def forward(self, x):
 66 |         # x.shape=(batch, l, f)
 67 |         b, l, f = x.shape
 68 |         h = torch.zeros((b, self.hidden_size)).to(x.device)
 69 |         c = torch.zeros((b, self.hidden_size)).to(x.device)
 70 |         outputs = []
 71 |         for i in range(l):
 72 |             ft = F.sigmoid(self.wf(torch.concat([x[:, i, :], h], dim=-1)))
 73 |             it = F.sigmoid(self.wi(torch.concat([x[:, i, :], h], dim=-1)))
 74 |             c_ = F.tanh(self.wc(torch.concat([x[:, i, :], h], dim=-1)))
 75 |             c = ft * c + it * c_
 76 |             ot = F.sigmoid(self.wo(torch.concat([x[:, i, :], h], dim=-1)))
 77 |             h = ot * F.tanh(c)
 78 |             h = self.dropout(h) if self.dropout is not None else h
 79 |             outputs.append(h)
 80 |         return torch.stack(outputs, dim=1), (h, c)
 81 | 
 82 | 
 83 | class MLP(ContrastiveModel):
 84 |     def __init__(self,
 85 |                  window_size,
 86 |                  in_features,
 87 |                  filter_size=0,
 88 |                  hidden_dim=256,
 89 |                  label_norm=False,
 90 |                  model_flag="MLP", device="cuda:0"):
 91 |         super(MLP, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm)
 92 |         if filter_size > 0:
 93 |             window_size = window_size // filter_size
 94 |             self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size)
 95 |         else:
 96 |             window_size = window_size
 97 |             self.MaV = None
 98 |         self.features_layer_1 = nn.Sequential(
 99 |             nn.Linear(in_features, hidden_dim),
100 |             nn.GELU(),
101 |             nn.Dropout(),
102 |             nn.Linear(hidden_dim, hidden_dim),
103 |             nn.GELU(),
104 |             nn.Dropout(),
105 |         )
106 |         self.temporal_layer_1 = nn.Sequential(
107 |             nn.Linear(window_size, hidden_dim),
108 |             nn.GELU(),
109 |             nn.Dropout(),
110 |             nn.Linear(hidden_dim, hidden_dim),
111 |             nn.GELU(),
112 |             nn.Dropout(),
113 |         )
114 |         self.features_layer_2 = nn.Sequential(
115 |             nn.Linear(hidden_dim, hidden_dim//2),
116 |             nn.GELU(),
117 |             nn.Dropout(),
118 |             nn.Linear(hidden_dim//2, hidden_dim//8),
119 |             nn.GELU(),
120 |             nn.Dropout(),
121 |         )
122 |         self.temporal_layer_2 = nn.Sequential(
123 |             nn.Linear(hidden_dim, hidden_dim//2),
124 |             nn.GELU(),
125 |             nn.Dropout(),
126 |             nn.Linear(hidden_dim//2, hidden_dim//8),
127 |             nn.GELU(),
128 |             nn.Dropout(),
129 |         )
130 |         self.linear = nn.Sequential(nn.Dropout(),
131 |                                     nn.Linear(in_features=(hidden_dim//8)**2, out_features=1))
132 |         self.to(device)
133 | 
134 |     def forward(self, x, label=None):
135 |         if len(x.shape) < 4:
136 |             x = self.feature_extractor(x)
137 |             return self.linear(x)
138 |         else:
139 |             f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label)
140 |             return pn_rul_compute(self.linear, f_pos, f_neg), f_pos, f_apos, f_neg, weight
141 | 
142 |     def feature_extractor(self, x):
143 |         if self.MaV:
144 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
145 |         # x.shape = (b, t, f)
146 |         ff = self.features_layer_1(x)  # (b, t, f)
147 |         tf = self.temporal_layer_1(ff.transpose(-1, -2))
148 |         ff = self.features_layer_2(tf.transpose(-1, -2))
149 |         tf = self.temporal_layer_2(ff.transpose(-1, -2))
150 |         f = torch.flatten(tf, -2, -1)
151 |         return f
152 | 
153 | 


--------------------------------------------------------------------------------
/models/RULPrediction/DAMCNN.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from models.RULPrediction.ContrastiveModules import ContrastiveModel
  4 | 
  5 | """
  6 | 10.1109/TIM.2022.3210933
  7 | """
  8 | 
  9 | 
 10 | class channel_attn(nn.Module):
 11 |     # input_size:(N, C, H, W)
 12 |     # output_size:(N, C, 1, 1)
 13 |     def __init__(self, window_size=8192, features=2):
 14 |         super().__init__()
 15 |         self.max_pool = nn.MaxPool2d([1, features])  # (2, 8192, 1, 2) -> (2, 8192, 1, 1)
 16 |         self.avg_pool = nn.AvgPool2d([1, features])  # (2, 8192, 1, 2) -> (2, 8192, 1, 1)
 17 |         self.channel_attn_fc1 = nn.Linear(window_size, window_size)  # (2, 8192, 1, 1) -> (2, 8192, 1, 1)
 18 |         self.channel_attn_fc2 = nn.Linear(window_size, window_size)  # (2, 8192, 1, 1) -> (2, 8192, 1, 1)
 19 |         self.window_size = window_size
 20 | 
 21 |     def forward(self, x):
 22 |         max_pool_x = self.max_pool(x).squeeze()
 23 |         avg_pool_x = self.avg_pool(x).squeeze()
 24 |         max_pool_x = self.channel_attn_fc1(max_pool_x)
 25 |         max_pool_x = self.channel_attn_fc2(max_pool_x)
 26 |         avg_pool_x = self.channel_attn_fc1(avg_pool_x)
 27 |         avg_pool_x = self.channel_attn_fc2(avg_pool_x)
 28 |         x = torch.sigmoid(max_pool_x + avg_pool_x)
 29 |         x = x.reshape(-1, self.window_size, 1, 1)
 30 |         return x
 31 | 
 32 | 
 33 | class temp_attn(nn.Module):
 34 |     # input_size:(N, C, H, W)
 35 |     # output_size:(N, 1, H, W)
 36 |     def __init__(self):
 37 |         super().__init__()
 38 |         self.conv = nn.Conv2d(2, 1, 3, padding=1)
 39 | 
 40 |     def forward(self, x):
 41 |         max_pool_x, _ = torch.max(x, dim=1, keepdim=True)
 42 |         avg_pool_x = torch.mean(x, dim=1, keepdim=True)
 43 |         x = torch.cat((avg_pool_x, max_pool_x), dim=1)
 44 |         x = torch.sigmoid(self.conv(x))
 45 |         return x
 46 | 
 47 | 
 48 | class CBAM(nn.Module):
 49 |     # input_size:(N, C, H, W)
 50 |     # output_size:(N, C, H, W)
 51 |     def __init__(self, window_size, features):
 52 |         super().__init__()
 53 |         self.channel_attn = channel_attn(window_size, features)
 54 |         self.temp_attn = temp_attn()
 55 | 
 56 |     def forward(self, x):
 57 |         channel_x = self.channel_attn(x)
 58 |         x = channel_x * x
 59 |         temp_x = self.temp_attn(x)
 60 |         x = temp_x * x
 61 |         return x
 62 | 
 63 | 
 64 | class MSCNN(nn.Module):
 65 |     # input_size: (N, window_size, 1, features)
 66 |     # output_size: (N, 512, 1, 32)
 67 |     def __init__(self, window_size, features):
 68 |         super().__init__()
 69 |         self.window_size = window_size
 70 |         self.features = features
 71 |         self.conv1 = nn.Sequential(nn.Conv1d(self.features, 8, 1, 10 if self.window_size % 10 == 0 else 16),
 72 |                                    nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
 73 |                                    nn.BatchNorm1d(8))
 74 |         self.conv2 = nn.Sequential(nn.Conv1d(self.features, 8, 3, 10 if self.window_size % 10 == 0 else 16),
 75 |                                    nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
 76 |                                    nn.BatchNorm1d(8))
 77 |         self.conv3 = nn.Sequential(nn.Conv1d(self.features, 8, 5, 10 if self.window_size % 10 == 0 else 16),
 78 |                                    nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
 79 |                                    nn.BatchNorm1d(8))
 80 |         self.conv4 = nn.Sequential(nn.Conv1d(self.features, 8, 7, 10 if self.window_size % 10 == 0 else 16),
 81 |                                    nn.AvgPool2d(kernel_size=3, stride=1, padding=1),
 82 |                                    nn.BatchNorm1d(8))
 83 | 
 84 |     def forward(self, x):
 85 |         x = x.reshape(x.size(0), x.size(-1), -1)
 86 |         x1 = self.conv1(x)
 87 |         x2 = self.conv2(x)
 88 |         x3 = self.conv3(x)
 89 |         x4 = self.conv4(x)
 90 |         x = torch.cat((x1, x2, x3, x4), dim=1)
 91 |         return x
 92 | 
 93 | 
 94 | class PRED(nn.Module):
 95 |     def __init__(self):
 96 |         super().__init__()
 97 |         self.conv = nn.Conv2d(64, 1, 1)
 98 |         self.lstm = nn.LSTM(1, 32, batch_first=True)
 99 |         self.dense = nn.Linear(32, 1)
100 | 
101 |     def forward(self, x):
102 |         x = self.conv(x)  # (N, 1, 1, 128)
103 |         x = x.contiguous().view(x.size(0), -1, 1)
104 |         x, _ = self.lstm(x)
105 |         x = x[:, -1, :].contiguous().view(x.size(0), -1)
106 |         x = self.dense(x)
107 |         return x
108 | 
109 | 
110 | class DAMCNN(ContrastiveModel):
111 |     def __init__(self, window_size, features,
112 |                  label_norm=False, model_flag="Model", device="cuda:0"):
113 |         super().__init__(model_flag=model_flag, device=device, label_norm=label_norm)
114 |         self.cbam = CBAM(window_size=window_size, features=features)
115 |         self.mscnn = MSCNN(window_size=window_size, features=features)
116 |         self.conv1 = nn.Sequential(nn.Conv1d(32, 32, 3, 1, 1),
117 |                                    nn.BatchNorm1d(32),
118 |                                    nn.ReLU(),
119 |                                    nn.Conv1d(32, 32, 3, 1, 1),
120 |                                    nn.BatchNorm1d(32),
121 |                                    nn.ReLU(),
122 |                                    nn.AvgPool1d(2, 2))
123 |         self.conv2 = nn.Sequential(nn.Conv1d(32, 64, 3, 1, 1),
124 |                                    nn.BatchNorm1d(64),
125 |                                    nn.ReLU(),
126 |                                    nn.Conv1d(64, 64, 3, 1, 1),
127 |                                    nn.BatchNorm1d(64),
128 |                                    nn.ReLU(),
129 |                                    nn.AvgPool1d(2, 2))
130 |         self.conv = nn.Conv2d(64, 1, 1)
131 |         # prediction layers
132 |         self.conv_2 = nn.Conv2d(64, 1, 1)
133 |         self.lstm_2 = nn.LSTM(1, 32, batch_first=True)
134 |         self.dense = nn.Linear(32, 1)
135 |         self.to(device)
136 | 
137 |     def forward(self, x, label=None):
138 |         if len(x.shape) < 4:
139 |             feature = self.feature_extractor(x)
140 |             out = self.dense(feature)  # (N, 1)
141 |             return out
142 |         else:
143 |             assert label is not None
144 |             pos, pos_aug, neg, weights = self.generate_contrastive_samples(x, label)
145 |             out_all = self.dense(pos)
146 |             neg_nums = neg.shape[1]
147 |             neg_out = []
148 |             for neg_i in range(neg_nums):
149 |                 neg_out.append(self.dense(neg[:, neg_i]))
150 |             neg_out = torch.concat(neg_out, dim=-1)
151 |             return torch.concat([out_all, neg_out], dim=-1), pos, pos_aug, neg, weights
152 | 
153 |     def feature_extractor(self, x):
154 |             x = torch.unsqueeze(x, -2)
155 |             x = self.cbam(x)  # (N, 8192, 1, 2)
156 |             x = self.mscnn(x)  # (N, 32, 512)
157 |             x = self.conv1(x)  # (N, 32, 256)
158 |             x = self.conv2(x)  # (N, 64, 128)
159 |             x = x.unsqueeze(2)
160 | 
161 |             x = self.conv_2(x)  # (N, 1, 1, 128)
162 |             x = x.contiguous().view(x.size(0), -1, 1)
163 |             x, _ = self.lstm_2(x)
164 |             x = x[:, -1, :].contiguous().view(x.size(0), -1)
165 |             return x
166 | 
167 |     def epoch_start(self):
168 |         super(DAMCNN, self).epoch_start()
169 | 


--------------------------------------------------------------------------------
/models/RULPrediction/MLPMixer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from ContrastiveModules import ContrastiveModel, pn_rul_compute
  5 | 
  6 | 
  7 | class MLPBlock(nn.Module):
  8 |     def __init__(self,
  9 |                  in_features,
 10 |                  hidden_dim,
 11 |                  out_features,
 12 |                  dropout=0.5,
 13 |                  device="cuda:0"):
 14 |         super(MLPBlock, self).__init__()
 15 |         self.block = nn.Sequential(
 16 |             nn.Linear(in_features=in_features, out_features=hidden_dim),
 17 |             nn.GELU(),
 18 |             nn.Dropout(dropout),
 19 |             nn.Linear(in_features=hidden_dim, out_features=out_features),
 20 |             nn.Dropout(dropout)
 21 |         )
 22 |         self.to(device)
 23 | 
 24 |     def forward(self, x):
 25 |         return self.block(x)
 26 | 
 27 | 
 28 | class GatedAttention(nn.Module):
 29 |     def __init__(self, hidden_dim, dim=-1, device="cuda:0"):
 30 |         super(GatedAttention, self).__init__()
 31 |         self.encoder = nn.Sequential(
 32 |             nn.Linear(in_features=hidden_dim, out_features=hidden_dim),
 33 |             # nn.Softmax(dim=dim),
 34 |             nn.Sigmoid()
 35 |         )
 36 |         self.weights = None
 37 |         self.to(device)
 38 | 
 39 |     def forward(self, x):
 40 |         weights = self.encoder(x)
 41 |         self.weights = weights
 42 |         return torch.mul(weights, x)
 43 | 
 44 | 
 45 | class MLPLayer(nn.Module):
 46 |     def __init__(self, in_features, hidden_dim, out_features, device="cuda:0"):
 47 |         super(MLPLayer, self).__init__()
 48 |         self.mlp = MLPBlock(in_features=in_features, hidden_dim=hidden_dim, out_features=out_features, device=device)
 49 |         self.gat = GatedAttention(hidden_dim=out_features, device=device)
 50 |         self.to(device)
 51 | 
 52 |     def forward(self, x):
 53 |         f = self.mlp(x)
 54 |         f = self.gat(f)
 55 |         return f
 56 | 
 57 | 
 58 | class MixerLayer(nn.Module):
 59 |     def __init__(self, in_features, hidden_dim, device="cuda:0"):
 60 |         super(MixerLayer, self).__init__()
 61 |         self.time_mixer = MLPLayer(in_features=hidden_dim, hidden_dim=hidden_dim*2, out_features=hidden_dim,
 62 |                                    device=device)
 63 |         self.feature_mixer = MLPLayer(in_features=in_features, hidden_dim=in_features*2, out_features=in_features,
 64 |                                       device=device)
 65 |         self.to(device)
 66 | 
 67 |     def forward(self, x):
 68 |         # x.shape = (b, h, f)
 69 |         x = x.transpose(-1, -2)
 70 |         f = self.time_mixer(x) + x  # (b, f, h)
 71 |         f = f.transpose(-1, -2)
 72 |         f = self.feature_mixer(f) + f  # (b, h, f)
 73 |         return f
 74 | 
 75 | 
 76 | class MLPMixer(ContrastiveModel):
 77 |     def __init__(self, window_size, in_features, hidden_dim, num_layers, filter_size=0,
 78 |                  device="cuda:0", model_flag="TSMixer", label_norm=True):
 79 |         super(MLPMixer, self).__init__(device=device, label_norm=label_norm, model_flag=model_flag)
 80 |         if filter_size > 0:
 81 |             window_size = window_size // filter_size
 82 |             self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size)
 83 |         else:
 84 |             window_size = window_size
 85 |             self.MaV = None
 86 |         self.window_size = window_size
 87 |         self.in_features = in_features
 88 |         self.input_embedding = nn.Linear(in_features=window_size, out_features=hidden_dim)
 89 |         self.layers = nn.Sequential()
 90 |         for _ in range(num_layers):
 91 |             self.layers.append(MixerLayer(in_features=in_features, hidden_dim=hidden_dim, device=device))
 92 |         self.output = nn.Sequential(
 93 |             nn.Dropout(),
 94 |             nn.Linear(in_features=in_features*hidden_dim, out_features=1)
 95 |         )
 96 |         self.to(device)
 97 | 
 98 |     def feature_extractor(self, x):
 99 |         if self.MaV:
100 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
101 |         # x.shape = (b, w, f)
102 |         emb = self.input_embedding(x.transpose(-1, -2))  # (b, f, h)
103 |         f = self.layers(emb.transpose(-1, -2))
104 |         return torch.flatten(f, start_dim=-2, end_dim=-1)
105 | 
106 |     def forward(self, x, label=None):
107 |         if len(x.shape) < 4:
108 |             x = self.feature_extractor(x)
109 |             return self.output(x)
110 |         else:
111 |             f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label)
112 |             return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, weight
113 | 
114 | 
115 | class DualMLPLayer(nn.Module):
116 |     def __init__(self, window_size, hidden_dim, dropout=0.5):
117 |         super(DualMLPLayer, self).__init__()
118 |         self.block1 = nn.Sequential(
119 |             # nn.LayerNorm(normalized_shape=window_size, elementwise_affine=False),
120 |             nn.Linear(in_features=window_size, out_features=window_size * 2),
121 |             nn.GELU(),
122 |             nn.Dropout(dropout),
123 |             nn.Linear(in_features=window_size * 2, out_features=window_size),
124 |             nn.Dropout(dropout),
125 |         )
126 |         self.block2 = nn.Sequential(
127 |             # nn.LayerNorm(normalized_shape=hidden_dim, elementwise_affine=False),
128 |             nn.Linear(in_features=hidden_dim, out_features=hidden_dim * 2),
129 |             nn.GELU(),
130 |             nn.Dropout(dropout),
131 |             nn.Linear(in_features=hidden_dim * 2, out_features=hidden_dim),
132 |             nn.Dropout(dropout)
133 |         )
134 |         self.ln1 = nn.LayerNorm(normalized_shape=window_size, elementwise_affine=True)
135 |         self.ln2 = nn.LayerNorm(normalized_shape=hidden_dim, elementwise_affine=True)
136 |         self.ln3 = nn.LayerNorm(normalized_shape=window_size, elementwise_affine=True)
137 |         self.ln4 = nn.LayerNorm(normalized_shape=hidden_dim, elementwise_affine=True)
138 |         self.gat_weights_1 = None
139 |         self.gat_weights_2 = None
140 |         self.gat1 = GatedAttention(hidden_dim=window_size, dim=-1)
141 |         self.gat2 = GatedAttention(hidden_dim=hidden_dim, dim=-2)
142 | 
143 |     def forward(self, x1, x2):
144 |         # x1.shape = (b, w, f), x2.shape = (b, w, f)
145 |         x1 = x1.transpose(-1, -2)  # x1.shape = (b, f, w)
146 |         x1 = self.ln1(self.block1(x1) + x1)  # x1.shape = (b, f, w)
147 |         x2 = self.ln2(self.block2(x2) + x2)  # x2.shape = (b, w, f)
148 |         x1 = self.ln3(x1 + self.gat2(x2).transpose(-1, -2))
149 |         x2 = self.ln4(x2 + self.gat1(x1).transpose(-1, -2))  # x2.shape = (b, f, w)
150 |         self.gat_weights_1 = self.gat1.weights
151 |         self.gat_weights_2 = self.gat2.weights
152 |         return x1.transpose(-1, -2), x2
153 | 
154 | 
155 | class DualMLPMixer(ContrastiveModel):
156 |     def __init__(self,
157 |                  window_size,
158 |                  in_features,
159 |                  hidden_dim,
160 |                  num_layers,
161 |                  dropout=0.5,
162 |                  model_flag="MLPDualMixer", device="cuda:0", label_norm=True,
163 |                  filter_size=0):
164 |         super(DualMLPMixer, self).__init__(model_flag=model_flag, device=device, label_norm=label_norm)
165 |         if filter_size > 0:
166 |             window_size = window_size // filter_size
167 |             self.MaV = nn.Conv1d(in_channels=in_features, out_channels=hidden_dim, kernel_size=filter_size,
168 |                                  stride=filter_size)
169 |             self.input_embedding = None
170 |         else:
171 |             window_size = window_size
172 |             self.MaV = None
173 |             self.input_embedding = nn.Linear(in_features=in_features, out_features=hidden_dim)
174 | 
175 |         self.layers = nn.ModuleList()
176 |         for _ in range(num_layers):
177 |             self.layers.append(DualMLPLayer(window_size=window_size, hidden_dim=hidden_dim, dropout=dropout))
178 |         self.out_gat1 = GatedAttention(hidden_dim=window_size)
179 |         self.out_gat2 = GatedAttention(hidden_dim=hidden_dim, dim=-2)
180 |         # self.fuse = nn.Linear(in_features=in_features*hidden_dim, out_features=768)
181 |         self.output = nn.Sequential(
182 |             # nn.Dropout(dropout),
183 |             nn.Linear(in_features=hidden_dim*window_size, out_features=1)
184 |         )
185 |         self.to(device)
186 | 
187 |     def feature_extractor(self, x):
188 |         if self.MaV:
189 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
190 |         self.hidden_out_1 = []
191 |         self.hidden_out_2 = []
192 |         # x.shape = (b, w, f)
193 |         x = self.input_embedding(x) if self.input_embedding is not None else x  # x.shape = (b, w, h)
194 |         f1 = x
195 |         f2 = x
196 |         for l in self.layers:
197 |             f1, f2 = l(f1, f2)
198 |         f1 = self.out_gat1(f1.transpose(-1, -2))
199 |         f2 = self.out_gat2(f2)
200 |         f = torch.flatten(f1.transpose(-1, -2) + f2, start_dim=-2, end_dim=-1)
201 |         return f
202 | 
203 |     def forward(self, x, label=None):
204 |         if len(x.shape) < 4:
205 |             x = self.feature_extractor(x)
206 |             return self.output(x)
207 |         else:
208 |             f_pos, f_apos, f_neg, weight = self.generate_contrastive_samples(x, label)
209 |             return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, weight
210 | 
211 |     def compute_loss(self,
212 |                      x: torch.Tensor,
213 |                      label: torch.Tensor,
214 |                      criterion) -> [torch.Tensor, torch.Tensor]:
215 |         [loss, rul] = super(DualMLPMixer, self).compute_loss(x, label, criterion)
216 |         return loss, rul
217 | 


--------------------------------------------------------------------------------
/models/RULPrediction/ContrastiveModules.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import matplotlib.pyplot as plt
  4 | import sklearn.manifold as manifold
  5 | from train.trainable import TrainableModule
  6 | from functools import wraps
  7 | 
  8 | 
  9 | def pn_rul_compute(predictor, f_pos, f_neg):
 10 |     """
 11 |     Used to compute Rul of the positive and negative samples. Because the Weighted Info
 12 |     NCE LOSS needs all the positive and negative rul to compute the final loss.
 13 | 
 14 |     :param predictor: The predictor layer
 15 |     :param f_pos: The positive samples with shape (batch, features)
 16 |     :param f_neg: The negative samples with shape (batch, nums, features), where nums indicates
 17 |                   the number of negative samples.
 18 |     :return: All the rul with shape (batch, nums+1)
 19 |     """
 20 |     out_all = predictor(f_pos)
 21 |     neg_nums = f_neg.shape[1]
 22 |     neg_out = []
 23 |     for neg_i in range(neg_nums):
 24 |         neg_out.append(predictor(f_neg[:, neg_i]))
 25 |     neg_out = torch.concat(neg_out, dim=-1)
 26 |     return torch.concat([out_all, neg_out], dim=-1)
 27 | 
 28 | 
 29 | class WeightedInfoNCELoss(nn.Module):
 30 |     def __init__(self, temperature=0.2):
 31 |         super(WeightedInfoNCELoss, self).__init__()
 32 |         self.temperature = temperature
 33 | 
 34 |     def forward(self, x, pos, neg, neg_weight=None):
 35 |         """
 36 |         :param x: The input of the network with shape (batch, length, feature) or (batch, feature)
 37 |         :param pos: The positive samples of x with shape (batch, num_p, length, feature) or (batch, num_p, feature),
 38 |                     where num is the number of the positive samples.
 39 |         :param neg: The negative samples of x with shape (batch, num_n, length, feature)
 40 |         :param neg_weight: The weight used for different negative samples with shape (batch, num_n).
 41 | 
 42 |         :return: A scalar of the contrastive loss.
 43 |         """
 44 |         if len(x.shape) > 2:
 45 |             x = torch.flatten(x, 1)  # (batch, feature)
 46 |         if len(pos.shape) > 2:
 47 |             pos = torch.flatten(pos, 2)  # (batch, num_p, feature)
 48 |         if len(neg.shape) > 2:
 49 |             neg = torch.flatten(neg, 2)  # (batch, num_n, feature)
 50 |         x = x.unsqueeze(dim=1)  # (batch, 1, feature)
 51 |         pos_sim = torch.cosine_similarity(x, pos, dim=2)  # positive samples similarity (batch, num_p)
 52 |         neg_sim = torch.cosine_similarity(x, neg, dim=2)  # negative samples similarity (batch, num_n)
 53 |         if neg_weight is not None:
 54 |             neg_sim = torch.mul(neg_sim, neg_weight)
 55 |         nominator = torch.exp((torch.div(pos_sim, self.temperature)))  # (batch, num_p)
 56 |         denominator = torch.exp(
 57 |             torch.div(torch.concat([pos_sim, neg_sim], dim=1), self.temperature)  # (batch, num_p + num_n)
 58 |         )
 59 |         nominator = nominator.sum(dim=-1)  # (batch, )
 60 |         denominator = denominator.sum(dim=-1)  # (batch, )
 61 |         loss = -torch.log(torch.mean(nominator / denominator))
 62 |         return loss
 63 | 
 64 | 
 65 | class MSEContrastiveLoss(nn.Module):
 66 |     def __init__(self, contrastive="InfoNCE"):
 67 |         super(MSEContrastiveLoss, self).__init__()
 68 |         self.mse = torch.nn.MSELoss()
 69 |         assert contrastive in ["InfoNCE", "Triplet"]
 70 |         if contrastive == "InfoNCE":
 71 |             self.contrastive = WeightedInfoNCELoss(0.2)
 72 |         elif contrastive == "Triplet":
 73 |             self.contrastive = TripletLoss()
 74 | 
 75 |     def forward(self, predict, label, x=None, pos=None, neg=None, neg_weight=None):
 76 |         if x is not None and pos is not None and neg is not None:
 77 |             # print(f"MSE:{self.mse(predict, label)}")
 78 |             # print(f"Contra:{self.contrastive(x, pos, neg, neg_weight)}")
 79 |             loss = self.mse(predict, label) + self.contrastive(x, pos, neg, neg_weight)
 80 |         else:
 81 |             loss = self.mse(predict, label)
 82 |         return loss
 83 | 
 84 | 
 85 | class TripletLoss(nn.Module):
 86 |     def __init__(self):
 87 |         super(TripletLoss, self).__init__()
 88 | 
 89 |     def forward(self, x, pos, neg, neg_weight):
 90 |         """
 91 | 
 92 |         :param x: Anchor samples with shape (b, f)
 93 |         :param pos: Positive sample with shape (b, f)
 94 |         :param neg: Negative sample with shape (b, n_n, f)
 95 |         :param neg_weight: Alpha for every negative samples with shape (b, n_n)
 96 |         :return: A scalar value of Triplet Loss.
 97 |         """
 98 |         if neg_weight is None:
 99 |             raise RuntimeError("The neg_weight could not be None when using Triplet Loss.")
100 |         x = torch.unsqueeze(x, dim=1)  # (b, 1, f)
101 |         pos = torch.unsqueeze(pos, dim=1)  # (b, 1, f)
102 |         pos_dis = torch.sum(torch.square(torch.subtract(x, pos)), 2)  # (b, 1)
103 |         neg_dis = torch.sum(torch.square(torch.subtract(x, neg)), 2)  # (b, n)
104 |         basic_loss = torch.add(torch.subtract(pos_dis, neg_dis), neg_weight)
105 |         loss = torch.mean(torch.max(basic_loss, torch.zeros_like(basic_loss)))
106 |         return loss
107 | 
108 | 
109 | class ContrastiveModel(TrainableModule):
110 |     def __init__(self, label_norm, model_flag, device):
111 |         super(ContrastiveModel, self).__init__(model_flag=model_flag, device=device)
112 |         self.tsne = None
113 |         self.visual_samples = None
114 |         self.embedding = []
115 |         self.epoch_num = 0
116 |         self.label_norm = label_norm
117 | 
118 |     def compute_loss(self,
119 |                      x: torch.Tensor,
120 |                      label: torch.Tensor,
121 |                      criterion) -> [torch.Tensor, torch.Tensor]:
122 |         if len(x.shape) == 4:
123 |             rul, f_pos, f_posa, f_neg, weights = self(x, label)
124 |             loss = criterion(rul.to(self.device), label.to(self.device), f_pos, f_posa, f_neg, weights)
125 |         else:
126 |             rul = self(x)
127 |             loss = criterion(rul.to(self.device), label.to(self.device))
128 |         return [loss, rul]
129 | 
130 |     def generate_contrastive_samples(self, x, labels):
131 |         """
132 |         This method is used to provide a Contrastive Loss computing arguments.
133 | 
134 |         Note
135 |         ----
136 |         This method is just used for the ContrastiveModule.MSEContrastiveLoss(). And you must override the
137 |         feature_extractor() method to achieve the feature extracting process.
138 | 
139 |         :param x: x.shape = (batch, num, length, feature)
140 |         :return: feature_pos, feature_pos_aug, feature_neg, neg_weights
141 |         """
142 |         assert len(x.shape) == 4
143 |         assert labels is not None
144 |         batch, num, w, f = x.shape
145 | 
146 |         x_ = x.view(batch * num, w, f)
147 |         pos = x[:, 0, :, :]
148 |         mask = torch.normal(0, 0.15, (batch, w, f), device=pos.device)  # random noise
149 |         pos_aug = mask + pos
150 |         all_features = self.feature_extractor(x_)
151 |         feature_pos_aug = self.feature_extractor(pos_aug)
152 |         features = all_features.view(batch, num, -1)
153 |         feature_pos = features[:, 0]
154 |         feature_neg = features[:, 1:]
155 |         neg_weights = torch.abs(labels[:, 1:] - labels[:, 0:1]) * 2
156 | 
157 |         return feature_pos, feature_pos_aug, feature_neg, neg_weights
158 | 
159 |     def feature_extractor(self, x):
160 |         """
161 |         Note
162 |         ----
163 |         This method must be overridden to custom your own feature extracting process when you compute the contrastive
164 |         loss by
165 | 
166 |         >>> self.generate_contrastive_samples(x, label)
167 | 
168 |         :param x: Input
169 |         :return: tensors of feature
170 |         """
171 |         raise NotImplementedError("The feature_extractor method must be implemented.")
172 | 
173 |     def forward(self, x, label=None):
174 |         """
175 |         The forward method in contrastive models must have two parts: the one is normal
176 |         forward process, and the other one is forward process with negative samples.
177 | 
178 |         Base Implamentation
179 |         ----
180 | 
181 |         >>> if len(x.shape) < 4:  # the normal forward, default shape with (b, l, f)
182 |         >>>     x = self.feature_extractor(x)
183 |         >>>     return self.predictor(x)
184 |         >>> else:  # the forward with negative samples, default shape with (b, num, l, f)
185 |         >>>     f_pos, f_apos, f_neg, w = self.generate_contrastive_samples(x, label)
186 |         >>>     return pn_rul_compute(self.predictor, f_pos, f_neg), f_pos, f_apos, f_neg, w
187 |         :return: rul, f_pos, f_apos, f_neg, w
188 |         """
189 |         raise NotImplementedError("The forward method must be implemented.")
190 | 
191 |     def set_visual_samples(self, samples):
192 |         """
193 |         Sets the visualization samples used in epoch_start.
194 | 
195 |         :param samples: (batch, len, features)
196 |         :return:
197 |         """
198 |         self.visual_samples = samples
199 |         self.tsne = manifold.TSNE(n_components=2, random_state=2023)
200 | 
201 |     def epoch_start(self):
202 |         if self.visual_samples is not None:
203 |             print("Visualizing samples processing...")
204 |             features = self.feature_extractor(self.visual_samples)
205 |             features = features.cpu().detach().numpy().squeeze()
206 |             embedding = self.tsne.fit_transform(features)
207 |             self.embedding.append(embedding)
208 |             # plt.figure(dpi=600)
209 |             # plt.scatter(embedding[:, 0], embedding[:, 1], c=plt.cm.Spectral(range(len(embedding))))
210 |             # plt.title("Epoch:{}".format(self.epoch_num))
211 |             # plt.savefig(self.get_model_result_path()+"visual_embedding_{}.png".format(self.epoch_num))
212 |             self.epoch_num += 1
213 |         else:
214 |             print("Visualizing samples is None, ignored.")
215 | 
216 |     def train_end(self):
217 |         plt.figure(dpi=600)
218 |         plt.title("Total")
219 |         index = 0
220 |         emd_index = [0, len(self.embedding) // 2, len(self.embedding) - 1]
221 |         for i in emd_index:
222 |             plt.scatter(self.embedding[i][:, 0], self.embedding[i][:, 1],
223 |                         c=plt.cm.tab20(index),
224 |                         edgecolors=plt.cm.Wistia(range(len(self.embedding[i][:, 0]))),
225 |                         label="epoch: {}".format(i))
226 |             index += 1
227 |         plt.legend()
228 |         plt.savefig(self.get_model_result_path() + "total_embedding.png")
229 | 
230 | 


--------------------------------------------------------------------------------
/train/trainable.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import matplotlib
  4 | import matplotlib.pyplot as plt
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.utils.data import Dataset, DataLoader
  8 | import numpy as np
  9 | 
 10 | import time
 11 | 
 12 | root = os.path.dirname(__file__)
 13 | 
 14 | 
 15 | def _check_path(path: str):
 16 |     if not os.path.exists(path):
 17 |         os.makedirs(path)
 18 | 
 19 | 
 20 | class TrainableModule(nn.Module):
 21 |     """
 22 |     The base module of Trainable Models. So call 'trainable' means the models can be trained by
 23 |     following method easily:
 24 | 
 25 |         >>> model.prepare_data(...)
 26 |         >>> model.train_model(...)
 27 |     """
 28 | 
 29 |     def __init__(self, model_flag="model", device="cuda"):
 30 |         super(TrainableModule, self).__init__()
 31 |         self.eval_losses = None
 32 |         self.train_losses = None
 33 | 
 34 |         self.eval_loader = None
 35 |         self.test_loader = None
 36 |         self.train_loader = None
 37 | 
 38 |         self.optimizer = None
 39 |         self.criterion = None
 40 |         self.lr_schedular = None
 41 | 
 42 |         self.flag = model_flag
 43 |         self.device = device
 44 | 
 45 |     def prepare_data(self,
 46 |                      train_set: Dataset,
 47 |                      test_set: Dataset,
 48 |                      eval_set: Dataset = None,
 49 |                      batch_size: int = 256,
 50 |                      num_workers: int = 8,
 51 |                      eval_shuffle=True):
 52 |         self.train_loader = DataLoader(train_set, batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
 53 |         self.test_loader = DataLoader(test_set, batch_size, shuffle=False, num_workers=num_workers)
 54 |         if eval_set is not None:
 55 |             self.eval_loader = DataLoader(eval_set, batch_size, shuffle=eval_shuffle, num_workers=num_workers)
 56 |         print("train size:{}".format(len(train_set)))
 57 |         print("test size:{}".format(len(test_set)))
 58 |         print("validate size:{}".format(len(eval_set)))
 59 |         _check_path(self.get_model_result_path())
 60 | 
 61 |     def train_model(self,
 62 |                     epoch: int,
 63 |                     lr: float,
 64 |                     criterion,
 65 |                     optimizer: str = "adam",
 66 |                     lr_lambda=None,
 67 |                     early_stop=2,
 68 |                     show_batch_loss=False):
 69 |         if self.train_loader is None:
 70 |             raise RuntimeError("The data_loader is None! Set the param data_loader not None or use "
 71 |                                "model.prepare_data(Dataset, batch_size, num_workers) to provide the"
 72 |                                "training data.")
 73 |         if optimizer == "adam":
 74 |             self.optimizer = torch.optim.Adam(lr=lr, params=self.parameters())
 75 |         elif optimizer == "rms":
 76 |             self.optimizer = torch.optim.RMSprop(lr=lr, params=self.parameters())
 77 |         elif optimizer == "sgd":
 78 |             self.optimizer = torch.optim.SGD(lr=lr, params=self.parameters())
 79 |         else:
 80 |             raise RuntimeError("Unknown optimizer {}.".format(optimizer))
 81 |         if lr_lambda is not None:
 82 |             self.lr_schedular = torch.optim.lr_scheduler.LambdaLR(self.optimizer, lr_lambda, last_epoch=-1)
 83 |         if early_stop is not None and early_stop > 0:
 84 |             mini_eval_loss = None
 85 |             patience = early_stop
 86 |             now_patience = 0
 87 |         self.criterion = criterion
 88 |         self.train_losses = []
 89 |         self.eval_losses = []
 90 |         print("Model flag: {}".format(self.flag))
 91 |         print("Start training epoch {}".format(epoch))
 92 | 
 93 |         # training
 94 |         start_time = time.time()
 95 |         self.train_start()  # callback function
 96 |         for e in range(epoch):
 97 |             self.epoch_start()  # callback function
 98 |             print("epoch: {}/{}".format(e + 1, epoch))
 99 |             epoch_start_time = time.time()
100 |             self.train()
101 |             batch_losses = []
102 |             for step, (x, y) in enumerate(self.train_loader):
103 |                 x = x.to(torch.float32).to(self.device)
104 |                 y = y.to(torch.float32).to(self.device)
105 |                 loss, out = self.compute_loss(x, y, self.criterion)
106 |                 if step == 0 and e == epoch-1:
107 |                     np.save(self.get_model_result_path() + "train_x_batch{}".format(step), y.cpu().detach().numpy())
108 |                     np.save(self.get_model_result_path() + "train_y_batch{}".format(step), x.cpu().detach().numpy())
109 |                 if show_batch_loss:
110 |                     print("\tbatch: {}/{}, loss:{:.4f}".format(step + 1, len(self.train_loader), loss.item()))
111 |                 self.optimizer.zero_grad()
112 |                 loss.backward()
113 |                 self.optimizer.step()
114 |                 batch_losses.append(loss.item())
115 | 
116 |             if self.lr_schedular is not None:
117 |                 self.lr_schedular.step()
118 |             batch_loss = np.average(batch_losses)
119 |             self.train_losses.append(batch_loss)
120 |             batch_losses.clear()
121 | 
122 |             # evaluation
123 |             if self.eval_loader is not None:
124 |                 self.eval()
125 |                 eval_losses = []
126 | 
127 |                 with torch.no_grad():
128 |                     for step, (e_x, e_y) in enumerate(self.eval_loader):
129 |                         e_x = e_x.to(torch.float32).to(self.device)
130 |                         e_y = e_y.to(torch.float32)
131 |                         loss, _ = self.compute_loss(e_x, e_y, self.criterion)
132 |                         eval_losses.append(loss.item())
133 |                     eval_loss = np.average(eval_losses)
134 |                     self.eval_losses.append(eval_loss)
135 | 
136 |                 print("\ttraining loss: {:.4}\n \teval loss: {:.4} \tCurrent learning rate: {}".
137 |                       format(batch_loss, eval_loss, self.optimizer.state_dict()['param_groups'][0]['lr']))
138 |             else:
139 |                 print("\ttraining loss: {:.4}\n \tCurrent learning rate: {}".
140 |                       format(batch_loss, self.optimizer.state_dict()['param_groups'][0]['lr']))
141 | 
142 |             print("\tEpoch time spent: %s s" % (time.time() - epoch_start_time))
143 |             # early stop
144 |             if early_stop > 0:
145 |                 if mini_eval_loss is None:
146 |                     mini_eval_loss = eval_loss
147 |                     torch.save(self.state_dict(), self.get_model_result_path() + 'check_point.pt')
148 |                     continue
149 |                 if eval_loss >= mini_eval_loss:
150 |                     now_patience = now_patience + 1
151 |                     print("\tEarly Stopping Monitor: bigger eval loss, now patience score {}/{}"
152 |                           .format(now_patience, patience))
153 |                 else:
154 |                     now_patience = 0
155 |                     mini_eval_loss = eval_loss
156 |                     print("\tEarly Stopping Monitor: smaller eval loss achieved, saving model...")
157 |                     torch.save(self.state_dict(), self.get_model_result_path() + 'check_point.pt')
158 |                 if now_patience >= patience:
159 |                     print("\tEarly Stopping in epoch {}".format(e))
160 |                     self.load_state_dict(torch.load(self.get_model_result_path() + 'check_point.pt'))
161 |                     break
162 |             self.epoch_end()  # callback function
163 |         end_time = time.time()
164 |         self.train_end()  # callback function
165 |         print("Total time spent: %s s" % round(end_time - start_time, 2))
166 |         self.plot_losses()
167 |         torch.save(self.state_dict(), self.get_model_result_path() + 'model.pt')
168 |         self.test_model()
169 | 
170 |     def test_model(self):
171 |         """
172 |         TODO: 修改结果切割保存的逻辑，目前的逻辑过于简陋，最好通过设备物理内存进行判断，决定切割大小
173 |         """
174 |         self.test_start()  # callback function
175 |         output = None
176 |         labels = None
177 |         losses = []
178 |         self.eval()
179 |         with torch.no_grad():
180 |             index = 1
181 |             for step, (x, y) in enumerate(self.test_loader):
182 |                 x = x.to(torch.float32).to(self.device)
183 |                 y = y.to(torch.float32).to(self.device)
184 |                 loss, model_out = self.compute_loss(x, y, self.criterion)
185 |                 model_out = model_out.detach().cpu()
186 |                 y = y.detach().cpu()
187 |                 losses.append(loss.item())
188 |                 output = torch.cat([output, model_out], dim=0) if output is not None else model_out
189 |                 labels = torch.cat([labels, y], dim=0) if labels is not None else y
190 |                 """
191 |                 此处进行预测结果和label的切割保存。为了节约加载时的内存占用，必须将结果切割成多个部分分别保存
192 |                 """
193 |                 if output.numel() >= 40000000:  # result cut
194 |                     print(output.numel())
195 |                     print(labels.numel())
196 |                     np.save(self.get_model_result_path() + "model_test_output_part{}".format(index), output.cpu().detach().numpy())
197 |                     np.save(self.get_model_result_path() + "model_test_labels_part{}".format(index), labels.cpu().detach().numpy())
198 |                     output, labels = None, None
199 |                     index += 1
200 |             # output = torch.cat(output, dim=0)
201 |             # labels = torch.cat(labels, dim=0)
202 |             if output is not None:
203 |                 np.save(self.get_model_result_path() + "model_test_output_part{}".format(index), output.cpu().detach().numpy())
204 |                 np.save(self.get_model_result_path() + "model_test_labels_part{}".format(index), labels.cpu().detach().numpy())
205 |                 np.save(self.get_model_result_path() + "model_test_loss_part{}".format(index), np.average(losses))
206 |         self.test_end()  # callback function
207 | 
208 |     def set_criterion(self,
209 |                       criterion):
210 |         self.criterion = criterion
211 | 
212 |     def plot_losses(self, show=False):
213 |         if self.train_losses is None or self.eval_losses is None:
214 |             raise RuntimeWarning("The model is not trained by internal training method. "
215 |                                  "You could call plot_losses(show=False) after training the model by:"
216 |                                  ">>> model.prepare_data(...)"
217 |                                  ">>> model.train_model(...)."
218 |                                  "Tips: plot_losses(show=False) will not work if you train your model manually"
219 |                                  "but not the above process.")
220 |         if show:
221 |             matplotlib.use("QtAgg")
222 |         else:
223 |             matplotlib.use("Agg")
224 |         plt.suptitle("Model Loss")
225 |         plt.plot(self.train_losses, label="training loss")
226 |         plt.plot(self.eval_losses, label="evalidate loss")
227 |         plt.xlabel("epoch")
228 |         plt.ylabel("criterion loss")
229 |         plt.legend()
230 |         _check_path(self.get_model_result_path())
231 |         plt.savefig(self.get_model_result_path() + "train_eval_losses.png")
232 |         plt.cla()
233 |         if show:
234 |             plt.show(block=True)
235 | 
236 |     def get_model_result_path(self):
237 |         return root + "/model_result/" + self.flag + "/"
238 | 
239 |     def _criterion(self, y, label):
240 |         if label.device.type == 'cpu':
241 |             y = y.detach().cpu()
242 |             label = label.detach().cpu()
243 |         elif label.device.type != y.device.type:
244 |             y = y.to(label.device)
245 |         return self.criterion(y, label)
246 | 
247 |     def compute_loss(self,
248 |                      x: torch.Tensor,
249 |                      label: torch.Tensor,
250 |                      criterion) -> [torch.Tensor, torch.Tensor]:
251 |         """
252 |         An overridable method for different process of loss computation. The default process is simple
253 |         single output computation. This method should only be overrideen if custom loss computation is
254 |         required when training the model by:
255 |             >>> self.prepare_data(...)
256 |             >>> self.train_model(...)
257 | 
258 |         return: must be a list containing [ loss, model_out ].
259 |         """
260 |         model_out = self(x)
261 |         loss = criterion(model_out.to(self.device), label.to(self.device))
262 |         return [loss, model_out]
263 | 
264 |     def epoch_start(self):
265 |         """
266 |         A callback function called before every training epoch starting.
267 |         """
268 |         return
269 | 
270 |     def epoch_end(self):
271 |         """
272 |         A callback function called after every training epoch finished.
273 |         """
274 |         return
275 | 
276 |     def train_start(self):
277 |         """
278 |         A callback function called before training process starting.
279 |         """
280 |         return
281 | 
282 |     def train_end(self):
283 |         """
284 |         A callback function called after training process finished.
285 |         """
286 |         return
287 | 
288 |     def test_start(self):
289 |         """
290 |         A callback function called before testing process starting.
291 |         """
292 |         return
293 | 
294 |     def test_end(self):
295 |         """
296 |         A callback function called after testing process finished.
297 |         """
298 |         return
299 | 


--------------------------------------------------------------------------------
/dataset/cmapss.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | 
  3 | import matplotlib.pyplot as plt
  4 | import pandas as pd
  5 | import numpy as np
  6 | import sklearn.base
  7 | import sklearn.preprocessing as pre
  8 | import torch.utils.data
  9 | from torch.utils.data import Dataset
 10 | from enum import Enum
 11 | from dataset.utils import Sampler, gaussian_distribution
 12 | 
 13 | DEFAULT_ROOT = r"./raw_data/"
 14 | 
 15 | DEFAULT_SENSORS = ["s_2", "s_3", "s_4", "s_7", "s_8", "s_9", "s_11", "s_12", "s_13",
 16 |                    "s_14", "s_15", "s_17", "s_20", "s_21"]
 17 | 
 18 | 
 19 | class Subset(Enum):
 20 |     FD001 = "FD001"
 21 |     FD002 = "FD002"
 22 |     FD003 = "FD003"
 23 |     FD004 = "FD004"
 24 | 
 25 | 
 26 | class Cmapss(Dataset):
 27 |     """
 28 |     The C-MAPSS dataset used for DataLoader.
 29 | 
 30 |     Notes
 31 |     -----
 32 |     This class is supported to use dataset.utils.Sampler to customizing your own sampling method. If you do that, the
 33 |     sampling method when DataLoader calling __getitem__(index) will be changed. The Sampler could be indicated when
 34 |     you initialize Cmapss(... , sampler =...) or use method set_sampler(sampler = ...).
 35 |     """
 36 | 
 37 |     def __init__(self, data: np.ndarray, ids: np.ndarray, labels: np.ndarray, sampler: Sampler = None):
 38 |         """
 39 |         The dataset class used for DataLoader.
 40 | 
 41 |         :param data: The CMAPSS dataset samples.
 42 |         :param ids: The engine id of every sample.
 43 |         :param labels: The RUL label of every sample.
 44 |         """
 45 |         self.data = data
 46 |         self.ids = ids
 47 |         self.labels = labels
 48 |         assert self.data.shape[0] == self.ids.shape[0] == self.labels.shape[0]
 49 |         self.__sampler = sampler if sampler is not None else None
 50 | 
 51 |     def __getitem__(self, item):
 52 |         if self.__sampler is not None:
 53 |             return self.__sampler.sample(item)
 54 |         else:
 55 |             return self.data[item], self.labels[item:item + 1]
 56 | 
 57 |     def __len__(self):
 58 |         return self.data.shape[0]
 59 | 
 60 |     def set_sampler(self, sampler: Sampler):
 61 |         self.__sampler = sampler
 62 | 
 63 |     def clear_sampler(self):
 64 |         self.__sampler = None
 65 | 
 66 |     def get_data_by_engine_id(self, engine_id):
 67 |         pass
 68 | 
 69 | 
 70 | class CmapssPiecewiseNegativeSampler(Sampler):
 71 |     """
 72 |     A Sampler used to construct a negative-positive pair to train a Contrastive Neural Network
 73 |     """
 74 | 
 75 |     def __init__(self, dataset: Cmapss, engine_num=1, interval_num=4):
 76 |         """
 77 |         :param dataset: The target dataset.
 78 |         :param engine_num: The number of sampling engine, should >= 1. The 'index' engine will be sampled at least.
 79 |         :param interval_num: The number of split intervals for one engine.This argument indicates the number of
 80 |                              negative samples.
 81 |         """
 82 |         super(CmapssPiecewiseNegativeSampler, self).__init__(dataset)
 83 |         dataset.set_sampler(self)
 84 |         self.ids = dataset.ids
 85 |         self.data = dataset.data
 86 |         self.labels = dataset.labels
 87 |         self.interval_nums = interval_num
 88 |         self.engine_num = engine_num
 89 | 
 90 |     def sample(self, index: int):
 91 |         engine_id = self.ids[index]
 92 |         engine_ids = np.random.choice(a=np.unique(self.ids),
 93 |                                       size=self.engine_num,
 94 |                                       replace=False)
 95 |         if engine_id not in engine_ids:
 96 |             engine_ids[0] = engine_id  # 保证index所在的引擎被采样
 97 |         neg_samples = [0] * (self.interval_nums * self.engine_num)
 98 |         neg_labels = [0] * (self.interval_nums * self.engine_num)
 99 |         neg_ids = [0] * (self.interval_nums * self.engine_num)
100 |         j = 1  # 负样本数组索引，负样本的数组从1开始存入负样本采样结果，因为0位置需要放入正样本
101 |         # start sampling
102 |         for engine in engine_ids:
103 |             sample_indexes = np.argwhere(self.ids == engine)
104 |             gap = sample_indexes.shape[0] // self.interval_nums
105 |             for i in range(self.interval_nums):
106 |                 random_range_start = sample_indexes[0][0] + i * gap
107 |                 # 在最后一次循环内，保证采样边界到达同设备样本的最后一个下标，防止出现漏采
108 |                 random_range_end = random_range_start + gap \
109 |                     if i != self.interval_nums - 1 else sample_indexes[-1][0] + 1
110 |                 if random_range_start <= index < random_range_end and engine == engine_id:
111 |                     continue
112 |                 sample_index = np.random.choice(range(random_range_start, random_range_end), 1, replace=True)
113 |                 neg_samples[j] = self.data[sample_index[0]]
114 |                 neg_labels[j] = self.labels[sample_index[0]]  # n:n+1的方式保持label拥有最后一个维度
115 |                 neg_ids[j] = self.ids[sample_index[0]]
116 |                 j += 1
117 |         # 最终数组的首位放入正样本
118 |         neg_samples[0] = self.data[index]
119 |         neg_labels[0] = self.labels[index]  # n:n+1的方式来保持label拥有最后一个维度
120 |         neg_ids[0] = engine_id  # 用于测试，查看是否所有负样本与正样本来自同一个引擎
121 |         return np.stack(neg_samples), np.array(neg_labels)
122 | 
123 | 
124 | class CmapssGaussianNegativeSampler(Sampler):
125 |     def __init__(self, dataset: Cmapss, neg_num=5, thresh=0.2, std=1.):
126 |         super(CmapssGaussianNegativeSampler, self).__init__(dataset)
127 |         dataset.set_sampler(self)
128 |         self.neg_num = neg_num - 1
129 |         self.thresh = thresh
130 |         self.std = std
131 |         self.ids = dataset.ids
132 |         self.data = dataset.data
133 |         self.labels = dataset.labels
134 |         import matplotlib.pyplot as plt
135 | 
136 |     def sample(self, index: int):
137 |         engine_id = self.ids[index]
138 |         sample_indexes = np.argwhere(self.ids == engine_id).squeeze()
139 | 
140 |         # 确定采样点在高斯分布的[-4,4]区间内的位置，高斯分布的中心点将根据采样点变化
141 |         # 先将采样点转移到[0, 1]，再通过*8-4转移到[-4, 4]区间内
142 |         sample_mean = (index - sample_indexes.min()) / (sample_indexes.max() - sample_indexes.min())
143 |         sample_mean = sample_mean * 8 - 4
144 | 
145 |         # 去掉采样点周围Thresh个点不采样
146 |         thresh_up = index + self.thresh / 2 * len(sample_indexes)
147 |         thresh_down = index - self.thresh / 2 * len(sample_indexes)
148 |         cut_sample_indexes = np.concatenate([sample_indexes[sample_indexes < thresh_down],
149 |                                              sample_indexes[sample_indexes > thresh_up]])
150 |         length = len(cut_sample_indexes)  # final sample indexes
151 |         prob = gaussian_distribution(np.linspace(-4, 4, length), sample_mean, self.std)
152 |         prob = torch.softmax(torch.tensor(prob), dim=0).numpy()
153 |         results = np.random.choice(cut_sample_indexes, self.neg_num, replace=False, p=prob)
154 |         neg_samples = [self.data[i] for i in results]
155 |         neg_labels = [self.labels[i] for i in results]
156 |         # prob_all = np.zeros(len(sample_indexes))
157 |         # for i in range(len(cut_sample_indexes)):
158 |         #     prob_all[sample_indexes == cut_sample_indexes[i]] = prob[i]
159 |         # plt.title("index:{}".format(index))
160 |         # plt.plot(sample_indexes, prob_all)
161 |         # plt.scatter(results, np.zeros(len(results)), c="red")
162 |         # plt.grid()
163 |         # plt.show()
164 |         return np.stack(neg_samples), np.array(neg_labels)
165 | 
166 | 
167 | class CmapssRandomNegtiveSampler(Sampler):
168 |     def __init__(self, dataset: Cmapss, neg_num=10, sample_thresh=0.2):
169 |         super(CmapssRandomNegtiveSampler, self).__init__(dataset)
170 |         dataset.set_sampler(self)
171 |         self.neg_num = neg_num
172 |         self.labels = dataset.labels
173 |         self.data = dataset.data
174 |         self.thresh = sample_thresh
175 | 
176 |     def sample(self, index: int):
177 |         indexes = np.squeeze(np.argwhere(np.abs(self.labels - self.labels[index]) > self.thresh))
178 |         indexes = np.random.choice(a=indexes, size=self.neg_num + 1, replace=False)
179 |         indexes[0] = index
180 |         return self.data[indexes], self.labels[indexes]
181 | 
182 | 
183 | def generate_rul(df: pd.DataFrame, y_test: pd.DataFrame = None, normalize=False, threshold=0) -> pd.DataFrame:
184 |     """
185 |     Generating RUL labels for original DataFrame.
186 | 
187 |     :param df: The CMAPSS DataFrame generated by get_data() methods.
188 |     :param y_test: The DataFrame from RUL_FD00N.txt file. If not None, this method will process the df as training data,
189 |                    else this method will process the df as test data.
190 |     :param normalize: Weather normalizing the RUL label to [0, 1].
191 |     :param threshold: Weather drop the RUL which bigger than the threshold. This argument will be processed earlier than
192 |                       normalize argument. Thus, if normalize = True, the dropped RUL will be 1.
193 |     :return: A DataFrame contains RUL column with name "rul" and the maximum life cycle column with name "max_cycles".
194 |     """
195 |     grouped = df.groupby(by="unit_nr")
196 |     RUL_max = grouped["time_cycles"].max()
197 |     if y_test is not None:
198 |         y_test.index = RUL_max.index
199 |         RUL_max = RUL_max + y_test[y_test.columns[0]]
200 |     result = pd.merge(df, RUL_max.to_frame(name="max_cycles"), on="unit_nr")
201 |     result["rul"] = result["max_cycles"] - result["time_cycles"]
202 |     if threshold > 0:
203 |         result.loc[result["rul"] > threshold, "rul"] = threshold
204 |         result.loc[result["max_cycles"] > threshold, "max_cycles"] = threshold + 1
205 |     if normalize:
206 |         result["rul"] = (result["rul"] + 1) / result["max_cycles"]
207 |     # result.drop("max_cycles", axis=1)
208 |     return result
209 | 
210 | 
211 | def generate_window_sample(df: pd.DataFrame, window_size, slide_step, sensors):
212 |     """
213 |     Transform the RULed DataFrame to window samples.
214 | 
215 |     :param df: The RULed DataFrame.
216 |     :param window_size: The sample length.
217 |     :param slide_step: Sampling step size.
218 |     :param sensors: The sensors' data will be returned. If None, will return all the sensors' data.
219 |     :return: [ndarray with window samples; ndarray with engine id for every window samples; ndarray with
220 |               RUL labels for every window samples]
221 |     """
222 |     engine_grouped = df.groupby(by="unit_nr")
223 |     result = []  # engine sensor data
224 |     engine_ids = []  # engine id
225 |     labels = []  # rul labels
226 |     for _, engine in list(engine_grouped):
227 |         data = engine[sensors].values  # shape = (n, f)
228 |         if data.shape[0] < window_size:
229 |             warnings.warn("The engine id {} with total length {} is shorter than window_size {}. "
230 |                           "Hence, these samples were dropped!".format(_, data.shape[0], window_size))
231 |             continue
232 |         sample_nums = (data.shape[0] - window_size) // slide_step + 1
233 |         s = [0] * sample_nums  # temporal sensor data
234 |         e = [0] * sample_nums  # temporal engine data. To correspond with each sample.
235 |         rul = [0] * sample_nums  # temporal rul data. To correspond with each sample.
236 |         engine_id = engine["unit_nr"].iloc[0]
237 |         for j in range(len(s)):
238 |             s[j] = data[j * slide_step:j * slide_step + window_size]
239 |             e[j] = engine_id
240 |             rul[j] = engine["rul"].iloc[
241 |                 j * slide_step + window_size - 1]  # The label is set to the last time stamp of the sample window.
242 |         result.append(s)
243 |         engine_ids.append(e)
244 |         labels.append(rul)
245 |     return np.concatenate(result, dtype=np.float64), \
246 |            np.concatenate(engine_ids, dtype=np.float64), \
247 |            np.concatenate(labels, dtype=np.float64)
248 | 
249 | 
250 | def get_data(path: str, subset: Subset, window_size: int, slide_step: int = 1, sensors: list = None,
251 |              scaler: sklearn.base.TransformerMixin = pre.MinMaxScaler((-1, 1)), rul_threshold=0, label_norm=False,
252 |              val_ratio=0.2):
253 |     """
254 |     Return the training data, test data and validation data of C-MAPSS dataset.
255 | 
256 |     :param path: The root path of the C-MAPSS dataset. The cmapss.DEAFULT_ROOT is the default root path in server.
257 |     :param subset: A enum indicated the subset. Should be the element of follows: [FD001, FD002, FD003, FD004]
258 |     :param window_size: The sample length.
259 |     :param slide_step: The sampling gap length, default 1.
260 |     :param sensors: The sensor data will be returned. It should be from [s_1 ~ s_21]. If None, selecting all the
261 |                     sensor data.
262 |     :param scaler: Used for normalizing the train and test data. It should be a sklearn scaler.
263 |     :param rul_threshold: The rul threshold is applied to a piecewise linear RUL label function. If 0, will applied
264 |                           non-piecewise linear RUL label function.
265 |     :param label_norm: Weather normalizing the RUL label to [0, 1].
266 |     :param val_ratio: The ratio of validation dataset.
267 | 
268 |     :return: train data set class (torch.utils.data.Dataset),
269 |              test data set class (torch.utils.data.Dataset),
270 |              val data set class (torch.utils.data.Dataset),
271 |              Scaler (maybe) used to inverse transform the train data.
272 | 
273 |     Notes
274 |     -----
275 |     This method could only process the original C-MAPSS data, which is formatted as:
276 |     RUL_FD00X.txt/train_FD00X.txt/test_FD00X.txt
277 |     """
278 |     # files
279 |     train_file = 'train_' + subset.value + '.txt'
280 |     test_file = 'test_' + subset.value + '.txt'
281 |     # columns
282 |     index_names = ['unit_nr', 'time_cycles']
283 |     setting_names = ['setting_1', 'setting_2', 'setting_3']
284 |     sensor_names = ['s_{}'.format(i + 1) for i in range(0, 21)]
285 |     col_names = index_names + setting_names + sensor_names
286 |     # data readout
287 |     train = pd.read_csv((path + train_file), sep=r'\s+', header=None,
288 |                         names=col_names)
289 |     test = pd.read_csv((path + test_file), sep=r'\s+', header=None,
290 |                        names=col_names)
291 |     y_test = pd.read_csv((path + 'RUL_' + subset.value + '.txt'), sep=r'\s+', header=None,
292 |                          names=['RUL'])
293 |     # generate rul label
294 |     train = generate_rul(train, threshold=rul_threshold, normalize=label_norm)
295 |     test = generate_rul(test, y_test, threshold=rul_threshold, normalize=label_norm)
296 |     # split the val dataset from train set
297 |     train, val = split_val_set(train, val_ratio)
298 |     # normalization use train set (the normalization factors are all come from train set)
299 |     assert isinstance(scaler, (pre.StandardScaler, pre.MinMaxScaler, pre.RobustScaler, pre.MaxAbsScaler))
300 |     sensors = sensor_names if sensors is None else sensors
301 |     scaler.fit(train[sensors])
302 |     train[sensors] = scaler.transform(train[sensors])
303 |     test[sensors] = scaler.transform(test[sensors])
304 |     val[sensors] = scaler.transform(val[sensors])
305 | 
306 |     if sensors is None or sensors == []:
307 |         sensors = train.columns
308 |     [train_data, train_ids, train_label] = generate_window_sample(train, window_size, slide_step, sensors)
309 |     [val_data, val_ids, val_label] = generate_window_sample(val, window_size, slide_step, sensors)
310 |     [test_data, test_ids, test_label] = generate_window_sample(test, window_size, slide_step, sensors)
311 |     train_data = Cmapss(train_data, train_ids, train_label)
312 |     test_data = Cmapss(test_data, test_ids, test_label)
313 |     val_data = Cmapss(val_data, val_ids, val_label)
314 |     return train_data, test_data, val_data, scaler
315 | 
316 | 
317 | def split_val_set(train_set: pd.DataFrame, val_size=0.2):
318 |     """
319 |     This method is used to split the train_set to a train data and validation data. And normalize the validation data
320 |     use the normalizing factors which are computed from train data.
321 | 
322 |     :param train_set: The data will be split.
323 |     :param val_size: The validation data set ratio for train_set (Default 0.2).
324 |     :return: train_data, val_data
325 |     """
326 |     grouped = train_set.groupby(by="unit_nr")
327 |     train_set_result = []
328 |     val_set_result = []
329 |     np.random.seed(2023)
330 |     val_index = np.random.choice(range(1, len(grouped) + 1), int(len(grouped) * val_size), replace=False)
331 |     if 1 in val_index:
332 |         val_index = np.delete(val_index, np.argwhere(val_index == 1))
333 |     print(f"val_index:{val_index}")
334 |     for i in range(1, len(grouped) + 1):
335 |         data = train_set[train_set["unit_nr"] == i]
336 |         if i in val_index:
337 |             val_set_result.append(data)
338 |         else:
339 |             train_set_result.append(data)
340 |     return pd.concat(train_set_result), pd.concat(val_set_result)
341 | 
342 | 
343 | if __name__ == '__main__':
344 |     train1, test1, val1, scaler = get_data(DEFAULT_ROOT,
345 |                                            Subset.FD004,
346 |                                            window_size=40,
347 |                                            slide_step=1,
348 |                                            sensors=None,
349 |                                            rul_threshold=0,
350 |                                            label_norm=True,
351 |                                            scaler=pre.MinMaxScaler(),
352 |                                            val_ratio=0.1)
353 |     sampler = CmapssGaussianNegativeSampler(train1, 5, std=0.3)
354 |     loader = torch.utils.data.DataLoader(train1, 40, True)
355 |     for _, (x, y) in enumerate(loader):
356 |         print(x.shape)
357 |         print(y.shape)
358 |         break
359 | 


--------------------------------------------------------------------------------
/models/RULPrediction/IMDSSN.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Implementation of https://doi.org/10.1016/j.ress.2023.109096
  3 | """
  4 | 
  5 | 
  6 | import numpy as np
  7 | import torch
  8 | import torch.nn as nn
  9 | import math
 10 | import copy
 11 | from torch.nn.parameter import Parameter
 12 | from typing import Dict
 13 | from math import sqrt
 14 | from ContrastiveModules import ContrastiveModel, pn_rul_compute
 15 | 
 16 | import torch
 17 | 
 18 | 
 19 | class PositionEmbedding(nn.Module):
 20 |     def __init__(self, dim, window_size, dropout=0.5, device="cuda:0"):
 21 |         super(PositionEmbedding, self).__init__()
 22 |         self.pe = torch.zeros(window_size, dim)
 23 |         position = torch.arange(0, window_size).unsqueeze(1)
 24 |         div_term = torch.exp(torch.arange(0, dim, 2) *
 25 |                              -(np.log(10000.0) / dim))
 26 |         self.pe[:, 0::2] = torch.sin(position * div_term)
 27 |         self.pe[:, 1::2] = torch.cos(position * div_term)
 28 |         self.pe = self.pe.unsqueeze(0).to(device)
 29 |         self.dropout = nn.Dropout(dropout)
 30 | 
 31 |     def forward(self, x):
 32 |         x = x + self.pe
 33 |         return self.dropout(x)
 34 | 
 35 | 
 36 | class TriangularCausalMask:
 37 |     def __init__(self, B, L, device="cpu"):
 38 |         mask_shape = [B, 1, L, L]
 39 |         with torch.no_grad():
 40 |             self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
 41 | 
 42 |     @property
 43 |     def mask(self):
 44 |         return self._mask
 45 | 
 46 | 
 47 | class ProbMask:
 48 |     def __init__(self, B, H, L, index, scores, device="cpu"):
 49 |         _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
 50 |         _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
 51 |         indicator = _mask_ex[torch.arange(B)[:, None, None],
 52 |                     torch.arange(H)[None, :, None],
 53 |                     index, :].to(device)
 54 |         self._mask = indicator.view(scores.shape).to(device)
 55 | 
 56 |     @property
 57 |     def mask(self):
 58 |         return self._mask
 59 | 
 60 | 
 61 | def gelu(x):
 62 |     return 0.5 * x * (1 + torch.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * torch.pow(x, 3))))
 63 | 
 64 | 
 65 | def swish(x):
 66 |     return x * torch.sigmoid(x)
 67 | 
 68 | 
 69 | ACT_FNS = {
 70 |     'relu': nn.ReLU(),
 71 |     'swish': swish,
 72 |     'gelu': gelu
 73 | }
 74 | 
 75 | 
 76 | class LogSparseAttention(nn.Module):
 77 |     """
 78 |     Args:
 79 |         n_time_series: Number of time series present in input
 80 |         n_head: Number of heads in the MultiHeadAttention mechanism
 81 |         seq_num: The number of targets to forecast
 82 |         sub_len: sub_len of the sparse attention
 83 |         num_layer: The number of transformer blocks in the model.
 84 |         n_embd: The dimention of Position embedding and time series ID embedding
 85 |         forecast_history: The number of historical steps fed into the time series model
 86 |         dropout: The dropout for the embedding of the model.
 87 |         additional_params: Additional parameters used to initalize the attention model. Can inc
 88 |     """
 89 | 
 90 |     def __init__(self, n_head, n_embd, win_len, scale: bool, q_len: int, sub_len, sparse=True, attn_pdrop=0.1,
 91 |                  resid_pdrop=0.1):
 92 |         super(LogSparseAttention, self).__init__()
 93 | 
 94 |         if sparse:
 95 |             print('Activate log sparse!')
 96 |             mask = self.log_mask(win_len, sub_len)
 97 |         else:
 98 |             mask = torch.tril(torch.ones(win_len, win_len)).view(1, 1, win_len, win_len)
 99 | 
100 |         self.register_buffer('mask_tri', mask)
101 |         self.n_head = n_head
102 |         self.split_size = n_embd * self.n_head
103 |         self.scale = scale
104 |         self.q_len = q_len
105 |         self.query_key = nn.Conv1d(n_embd, n_embd * n_head * 2, self.q_len)
106 |         self.value = Conv1D(n_embd * n_head, 1, n_embd)
107 |         self.c_proj = Conv1D(n_embd, 1, n_embd * self.n_head)
108 |         self.attn_dropout = nn.Dropout(attn_pdrop)
109 |         self.resid_dropout = nn.Dropout(resid_pdrop)
110 | 
111 |     def log_mask(self, win_len, sub_len):
112 |         mask = torch.zeros((win_len, win_len), dtype=torch.float)
113 |         for i in range(win_len):
114 |             mask[i] = self.row_mask(i, sub_len, win_len)
115 |         return mask.view(1, 1, mask.size(0), mask.size(1))
116 | 
117 |     def row_mask(self, index, sub_len, win_len):
118 |         """
119 |         Remark:
120 |         1 . Currently, dense matrices with sparse multiplication are not supported by Pytorch. Efficient implementation
121 |             should deal with CUDA kernel, which we haven't implemented yet.
122 | 
123 |         2 . Our default setting here use Local attention and Restart attention.
124 | 
125 |         3 . For index-th row, if its past is smaller than the number of cells the last
126 |             cell can attend, we can allow current cell to attend all past cells to fully
127 |             utilize parallel computing in dense matrices with sparse multiplication."""
128 |         log_l = math.ceil(np.log2(sub_len))
129 |         mask = torch.zeros((win_len), dtype=torch.float)
130 |         if ((win_len // sub_len) * 2 * (log_l) > index):
131 |             mask[:(index + 1)] = 1
132 |         else:
133 |             while (index >= 0):
134 |                 if ((index - log_l + 1) < 0):
135 |                     mask[:index] = 1
136 |                     break
137 |                 mask[index - log_l + 1:(index + 1)] = 1  # Local attention
138 |                 for i in range(0, log_l):
139 |                     new_index = index - log_l + 1 - 2 ** i
140 |                     if ((index - new_index) <= sub_len and new_index >= 0):
141 |                         mask[new_index] = 1
142 |                 index -= sub_len
143 |         return mask
144 | 
145 |     def attn(self, query: torch.Tensor, key, value: torch.Tensor):
146 |         activation = nn.Softmax(dim=-1)
147 |         pre_att = torch.matmul(query, key)
148 |         if self.scale:
149 |             pre_att = pre_att / math.sqrt(value.size(-1))
150 |         mask = self.mask_tri[:, :, :pre_att.size(-2), :pre_att.size(-1)]
151 |         pre_att = pre_att * mask + -1e9 * (1 - mask)
152 |         pre_att = activation(pre_att)
153 |         pre_att = self.attn_dropout(pre_att)
154 |         attn = torch.matmul(pre_att, value)
155 | 
156 |         return attn
157 | 
158 |     def merge_heads(self, x):
159 |         x = x.permute(0, 2, 1, 3).contiguous()
160 |         new_x_shape = x.size()[:-2] + (x.size(-2) * x.size(-1),)
161 |         return x.view(*new_x_shape)
162 | 
163 |     def split_heads(self, x, k=False):
164 |         new_x_shape = x.size()[:-1] + (self.n_head, x.size(-1) // self.n_head)
165 |         x = x.view(*new_x_shape)
166 |         if k:
167 |             return x.permute(0, 2, 3, 1)
168 |         else:
169 |             return x.permute(0, 2, 1, 3)
170 | 
171 |     def forward(self, x):
172 | 
173 |         value = self.value(x)
174 |         qk_x = nn.functional.pad(x.permute(0, 2, 1), pad=(self.q_len - 1, 0))
175 |         query_key = self.query_key(qk_x).permute(0, 2, 1)
176 |         query, key = query_key.split(self.split_size, dim=2)
177 |         query = self.split_heads(query)
178 |         key = self.split_heads(key, k=True)
179 |         value = self.split_heads(value)
180 |         attn = self.attn(query, key, value)
181 |         attn = self.merge_heads(attn)
182 |         attn = self.c_proj(attn)
183 |         attn = self.resid_dropout(attn)
184 |         return attn
185 | 
186 | 
187 | class Conv1D(nn.Module):
188 |     def __init__(self, out_dim, rf, in_dim):
189 |         super(Conv1D, self).__init__()
190 |         self.rf = rf
191 |         self.out_dim = out_dim
192 |         if rf == 1:
193 |             w = torch.empty(in_dim, out_dim)
194 |             nn.init.normal_(w, std=0.02)
195 |             self.w = Parameter(w)
196 |             self.b = Parameter(torch.zeros(out_dim))
197 |         else:
198 |             raise NotImplementedError
199 | 
200 |     def forward(self, x):
201 |         if self.rf == 1:
202 |             size_out = x.size()[:-1] + (self.out_dim,)
203 |             x = torch.addmm(self.b, x.view(-1, x.size(-1)), self.w)
204 |             x = x.view(*size_out)
205 |         else:
206 |             raise NotImplementedError
207 |         return x
208 | 
209 | 
210 | class ProbAttention(nn.Module):
211 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
212 |         super(ProbAttention, self).__init__()
213 |         self.factor = factor
214 |         self.scale = scale
215 |         self.mask_flag = mask_flag
216 |         self.output_attention = output_attention
217 |         self.dropout = nn.Dropout(attention_dropout)
218 | 
219 |     def _prob_QK(self, Q, K, sample_k, n_top):  # n_top: c*ln(L_q)
220 |         # Q [B, H, L, D]
221 |         B, H, L_K, E = K.shape
222 |         _, _, L_Q, _ = Q.shape
223 | 
224 |         # calculate the sampled Q_K
225 |         K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
226 |         index_sample = torch.randint(L_K, (L_Q, sample_k))  # real U = U_part(factor*ln(L_k))*L_q
227 |         K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
228 |         Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze(-2)
229 | 
230 |         # find the Top_k query with sparisty measurement
231 |         M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
232 |         M_top = M.topk(n_top, sorted=False)[1]
233 | 
234 |         # use the reduced Q to calculate Q_K
235 |         Q_reduce = Q[torch.arange(B)[:, None, None],
236 |                    torch.arange(H)[None, :, None],
237 |                    M_top, :]  # factor*ln(L_q)
238 |         Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))  # factor*ln(L_q)*L_k
239 | 
240 |         return Q_K, M_top
241 | 
242 |     def _get_initial_context(self, V, L_Q):
243 |         B, H, L_V, D = V.shape
244 |         if not self.mask_flag:
245 |             # V_sum = V.sum(dim=-2)
246 |             V_sum = V.mean(dim=-2)
247 |             contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
248 |         else:  # use mask
249 |             assert (L_Q == L_V)  # requires that L_Q == L_V, i.e. for self-attention only
250 |             contex = V.cumsum(dim=-2)
251 |         return contex
252 | 
253 |     def _update_context(self, context_in, V, scores, index, L_Q):
254 |         B, H, L_V, D = V.shape
255 | 
256 |         if self.mask_flag:
257 |             attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
258 |             scores.masked_fill_(attn_mask.mask, -np.inf)
259 | 
260 |         attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
261 | 
262 |         context_in[torch.arange(B)[:, None, None],
263 |         torch.arange(H)[None, :, None],
264 |         index, :] = torch.matmul(attn, V).type_as(context_in)
265 |         if self.output_attention:
266 |             attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device)
267 |             attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
268 |             return (context_in, attns)
269 |         else:
270 |             return (context_in, None)
271 | 
272 |     def forward(self, queries, keys, values):
273 |         B, L_Q, H, D = queries.shape
274 |         _, L_K, _, _ = keys.shape
275 | 
276 |         queries = queries.transpose(2, 1)
277 |         keys = keys.transpose(2, 1)
278 |         values = values.transpose(2, 1)
279 | 
280 |         U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item()  # c*ln(L_k)
281 |         u = self.factor * np.ceil(np.log(L_Q)).astype('int').item()  # c*ln(L_q)
282 | 
283 |         U_part = U_part if U_part < L_K else L_K
284 |         u = u if u < L_Q else L_Q
285 | 
286 |         scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u)
287 | 
288 |         # add scale factor
289 |         scale = self.scale or 1. / sqrt(D)
290 |         if scale is not None:
291 |             scores_top = scores_top * scale
292 |         # get the context
293 |         context = self._get_initial_context(values, L_Q)
294 |         # update the context with selected top_k queries
295 |         context, attn = self._update_context(context, values, scores_top, index, L_Q)
296 | 
297 |         return context.transpose(2, 1).contiguous(), attn
298 | 
299 | 
300 | class ProbAttentionLayer(nn.Module):
301 |     def __init__(self, d_model, n_heads,
302 |                  d_keys=None, d_values=None, mix=False):
303 |         super(ProbAttentionLayer, self).__init__()
304 | 
305 |         d_keys = d_keys or (d_model // n_heads)
306 |         d_values = d_values or (d_model // n_heads)
307 | 
308 |         self.inner_attention = ProbAttention(True, attention_dropout=0.1, output_attention=False)
309 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
310 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
311 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
312 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
313 |         self.n_heads = n_heads
314 |         self.mix = mix
315 | 
316 |     def forward(self, x):
317 |         B, L, _ = x.shape
318 |         H = self.n_heads
319 | 
320 |         queries = self.query_projection(x).view(B, L, H, -1)
321 |         keys = self.key_projection(x).view(B, L, H, -1)
322 |         values = self.value_projection(x).view(B, L, H, -1)
323 | 
324 |         out, _ = self.inner_attention(
325 |             queries,
326 |             keys,
327 |             values
328 |         )
329 |         if self.mix:
330 |             out = out.transpose(2, 1).contiguous()
331 |         out = out.view(B, L, -1)
332 | 
333 |         return self.out_projection(out)
334 | 
335 | 
336 | class Encoder(nn.Module):
337 |     def __init__(self, window_size, hidden_dim, attention):
338 |         super(Encoder, self).__init__()
339 |         self.attention = attention
340 |         self.window_size = window_size
341 |         self.ln1 = nn.LayerNorm(normalized_shape=hidden_dim)
342 |         self.ln2 = nn.LayerNorm(normalized_shape=hidden_dim)
343 |         self.ffl = nn.Sequential(
344 |             nn.Linear(in_features=hidden_dim, out_features=hidden_dim),
345 |             nn.GELU(),
346 |             nn.Linear(in_features=hidden_dim, out_features=hidden_dim)
347 |         )
348 | 
349 |     def forward(self, x):
350 |         # x.shape = (b, w, h)
351 |         att_x = self.attention(x)
352 |         att_x = self.ln1(att_x + x)
353 |         f_x = self.ln2(self.ffl(att_x) + att_x)
354 |         return f_x
355 | 
356 | 
357 | class IMDSSN(ContrastiveModel):
358 |     def __init__(self,
359 |                  window_size,
360 |                  in_features,
361 |                  hidden_dim,
362 |                  encoder_nums,
363 |                  n_heads,
364 |                  pe=True,
365 |                  label_norm=True, model_flag="IMDSSN", device="cuda:0", filter_size=0):
366 |         super(IMDSSN, self).__init__(label_norm=label_norm, model_flag=model_flag, device=device)
367 |         if filter_size > 0:
368 |             self.window_size = window_size // filter_size
369 |             self.MaV = nn.AvgPool1d(kernel_size=filter_size, stride=filter_size)
370 |         else:
371 |             self.window_size = window_size
372 |             self.MaV = None
373 | 
374 |         self.input_mapper = nn.Linear(in_features=in_features, out_features=hidden_dim)
375 |         self.pe = PositionEmbedding(dim=hidden_dim,
376 |                                     window_size=self.window_size,
377 |                                     dropout=0,
378 |                                     device=device) if pe else None
379 |         self.in_features = in_features
380 |         self.hidden_dim = hidden_dim
381 |         self.MLSNEncoders = nn.Sequential()
382 |         self.MPSNEncoders = nn.Sequential()
383 |         for _ in range(encoder_nums):
384 |             self.MLSNEncoders.append(Encoder(
385 |                 self.window_size, hidden_dim, attention=LogSparseAttention(
386 |                     n_head=n_heads,
387 |                     n_embd=hidden_dim,
388 |                     win_len=self.window_size,
389 |                     q_len=5,
390 |                     sub_len=10,
391 |                     scale=True,
392 |                 )
393 |             ))
394 |             self.MPSNEncoders.append(Encoder(
395 |                 self.window_size, hidden_dim, attention=ProbAttentionLayer(
396 |                     n_heads=n_heads,
397 |                     d_model=hidden_dim
398 |                 )
399 |             ))
400 |         self.fuse = nn.Linear(in_features=hidden_dim * 2,
401 |                               out_features=hidden_dim,
402 |                               bias=False)
403 |         self.output = nn.Sequential(
404 |             nn.Linear(in_features=self.window_size * hidden_dim, out_features=1)
405 |         )
406 |         self.to(device)
407 | 
408 |     def feature_extractor(self, x):
409 |         # x.shape = (b, w, f)
410 |         if self.MaV:
411 |             x = self.MaV(x.transpose(-1, -2)).transpose(-1, -2)
412 |         x = self.input_mapper(x)
413 |         x = self.pe(x) if self.pe else x
414 |         f1 = self.MLSNEncoders(x)  # (b, w, h)
415 |         f2 = self.MPSNEncoders(x)  # (b, w, h)
416 |         f = torch.concat([f1, f2], dim=-1)  # (b, w, 2*h)
417 |         f = self.fuse(f)  # (b, w, h)
418 |         return torch.flatten(f, start_dim=-2, end_dim=-1)
419 | 
420 |     def forward(self, x, label=None):
421 |         if len(x.shape) < 4:  # the normal forward, default shape with (b, l, f)
422 |             x = self.feature_extractor(x)
423 |             return self.output(x)
424 |         else:  # the forward with negative samples, default shape with (b, num, l, f)
425 |             f_pos, f_apos, f_neg, w = self.generate_contrastive_samples(x, label)
426 |             return pn_rul_compute(self.output, f_pos, f_neg), f_pos, f_apos, f_neg, w
427 | 


--------------------------------------------------------------------------------