├── README.md
├── data_provider
    ├── __init__.py
    ├── data_factory.py
    ├── data_loader.py
    ├── m4.py
    └── uea.py
├── exp
    ├── __init__.py
    ├── exp_anomaly_detection.py
    ├── exp_basic.py
    ├── exp_classification.py
    ├── exp_imputation.py
    ├── exp_long_term_forecasting.py
    └── exp_short_term_forecasting.py
├── layers
    ├── AutoCorrelation.py
    ├── Autoformer_EncDec.py
    ├── Conv_Blocks.py
    ├── Crossformer_EncDec.py
    ├── ETSformer_EncDec.py
    ├── Embed.py
    ├── FourierCorrelation.py
    ├── Invertible.py
    ├── LiftingScheme.py
    ├── LiftingSchemeLinear.py
    ├── MultiWaveletCorrelation.py
    ├── Pyraformer_EncDec.py
    ├── SelfAttention_Family.py
    ├── Transformer_EncDec.py
    └── __init__.py
├── models
    ├── AdaWaveNet.py
    ├── Autoformer.py
    ├── Crossformer.py
    ├── DLinear.py
    ├── ETSformer.py
    ├── FEDformer.py
    ├── FiLM.py
    ├── FreTS.py
    ├── Informer.py
    ├── Koopa.py
    ├── LightTS.py
    ├── MICN.py
    ├── Nonstationary_Transformer.py
    ├── PatchTST.py
    ├── Pyraformer.py
    ├── Reformer.py
    ├── TiDE.py
    ├── TimesNet.py
    ├── Transformer.py
    ├── __init__.py
    └── iTransformer.py
├── requirements.txt
├── run.py
├── scripts
    └── long_term_forecast
    │   ├── ECL_script
    │       └── AdaWaveNet.sh
    │   ├── ETT_script
    │       ├── AdaWaveNet_ETTh1.sh
    │       └── AdaWaveNet_ETTm1.sh
    │   ├── Exchange_script
    │       └── AdaWaveNet.sh
    │   ├── ILI_script
    │       └── AdaWaveNet.sh
    │   ├── Solar
    │       └── AdaWaveNet.sh
    │   ├── Traffic_script
    │       └── AdaWaveNet.sh
    │   └── Weather_script
    │       └── AdaWaveNet.sh
└── utils
    ├── __init__.py
    ├── losses.py
    ├── m4_summary.py
    ├── masking.py
    ├── metrics.py
    ├── print_args.py
    ├── timefeatures.py
    └── tools.py


/README.md:
--------------------------------------------------------------------------------
 1 | # AdaWaveNet
 2 | 
 3 | AdaWaveNet is a comprehensive framework for time series forecasting, imputation, and super-resolution tasks. 
 4 | 
 5 | Please refer to the paper for more details.
 6 | 
 7 | https://openreview.net/forum?id=m4bE9Y9FlX
 8 | 
 9 | ```
10 | @article{yu2025adawavenet,
11 |   title={AdaWaveNet: Adaptive Wavelet Network for Time Series Analysis},
12 |   author={Yu, Han and Guo, Peikun and Sano, Akane},
13 |   journal={Transactions on Machine Learning Research},
14 |   year={2025}
15 | }
16 | ```
17 | 
18 | ## Features
19 | 
20 | - **Long-term and Short-term Forecasting**: Supports models like Autoformer, Transformer, TimesNet, and more.
21 | - **Imputation**: Handles missing data in time series.
22 | - **Super Resolution**: Enhances the resolution of time series data.
23 | 
24 | ## Requirements
25 | 
26 | The project requires the following Python packages, which can be installed using the `requirements.txt` file:
27 | 
28 | 
29 | ## Usage
30 | 
31 | The main entry point for running experiments is the `run.py` script. It supports various command-line arguments to configure the experiments. Here is an example of how to run a long-term forecasting task:
32 | 
33 | ```
34 | python -u run.py \
35 | --task_name long_term_forecast \
36 | --is_training 1 \
37 | --root_path ./dataset/weather/ \
38 | --data_path weather.csv \
39 | --model_id weather_96_96 \
40 | --model AdaWaveNet \
41 | --data custom \
42 | --features M \
43 | --seq_len 96 \
44 | --label_len 48 \
45 | --pred_len 96 \
46 | --e_layers 3 \
47 | --d_layers 1 \
48 | --factor 3 \
49 | --enc_in 21 \
50 | --dec_in 21 \
51 | --c_out 21 \
52 | --des 'Exp' \
53 | --d_model 512 \
54 | --d_ff 512 \
55 | --itr 1 \
56 | --lifting_levels 3 \
57 | --lifting_kernel_size 7 \
58 | --n_cluster 4 \
59 | --learning_rate 0.0005 \
60 | --batch_size 16
61 | ```
62 | 
63 | ## Configuration
64 | 
65 | The `run.py` script accepts various arguments to configure the experiment:
66 | 
67 | - `--task_name`: The name of the task (e.g., long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection).
68 | - `--is_training`: Whether to train the model (1 for training, 0 for testing).
69 | - `--model`: The model to use (e.g., Autoformer, Transformer, TimesNet).
70 | - `--seq_len`, `--label_len`, `--pred_len`: Sequence lengths for input, label, and prediction.
71 | - `--e_layers`, `--d_layers`: Number of encoder and decoder layers.
72 | - `--learning_rate`: Learning rate for the optimizer.
73 | - `--batch_size`: Batch size for training.
74 | 
75 | For a full list of arguments, refer to the `run.py` script.
76 | 
77 | 
78 | ## Acknowledgments
79 | 
80 | This project is based on the Time-Series-Library Repository <https://github.com/thuml/Time-Series-Library> and other state-of-the-art time series models.
81 | 


--------------------------------------------------------------------------------
/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/data_provider/data_factory.py:
--------------------------------------------------------------------------------
 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \
 2 |     MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_Solar
 3 | from data_provider.uea import collate_fn
 4 | from torch.utils.data import DataLoader
 5 | 
 6 | data_dict = {
 7 |     'ETTh1': Dataset_ETT_hour,
 8 |     'ETTh2': Dataset_ETT_hour,
 9 |     'ETTm1': Dataset_ETT_minute,
10 |     'ETTm2': Dataset_ETT_minute,
11 |     'custom': Dataset_Custom,
12 |     'm4': Dataset_M4,
13 |     'PSM': PSMSegLoader,
14 |     'MSL': MSLSegLoader,
15 |     'SMAP': SMAPSegLoader,
16 |     'SMD': SMDSegLoader,
17 |     'SWAT': SWATSegLoader,
18 |     'UEA': UEAloader,
19 |     'Solar': Dataset_Solar
20 | }
21 | 
22 | 
23 | def data_provider(args, flag):
24 |     Data = data_dict[args.data]
25 |     timeenc = 0 if args.embed != 'timeF' else 1
26 | 
27 |     if flag == 'test':
28 |         shuffle_flag = False
29 |         drop_last = True
30 |         if args.task_name == 'anomaly_detection' or args.task_name == 'classification':
31 |             batch_size = args.batch_size
32 |         else:
33 |             batch_size = 1  # bsz=1 for evaluation
34 |         freq = args.freq
35 |     else:
36 |         shuffle_flag = True
37 |         drop_last = True
38 |         batch_size = args.batch_size  # bsz for train and valid
39 |         freq = args.freq
40 | 
41 |     if args.task_name == 'anomaly_detection':
42 |         drop_last = False
43 |         data_set = Data(
44 |             root_path=args.root_path,
45 |             win_size=args.seq_len,
46 |             flag=flag,
47 |         )
48 |         print(flag, len(data_set))
49 |         data_loader = DataLoader(
50 |             data_set,
51 |             batch_size=batch_size,
52 |             shuffle=shuffle_flag,
53 |             num_workers=args.num_workers,
54 |             drop_last=drop_last)
55 |         return data_set, data_loader
56 |     elif args.task_name == 'classification':
57 |         drop_last = False
58 |         data_set = Data(
59 |             root_path=args.root_path,
60 |             flag=flag,
61 |         )
62 | 
63 |         data_loader = DataLoader(
64 |             data_set,
65 |             batch_size=batch_size,
66 |             shuffle=shuffle_flag,
67 |             num_workers=args.num_workers,
68 |             drop_last=drop_last,
69 |             collate_fn=lambda x: collate_fn(x, max_len=args.seq_len)
70 |         )
71 |         return data_set, data_loader
72 |     else:
73 |         if args.data == 'm4':
74 |             drop_last = False
75 |         data_set = Data(
76 |             root_path=args.root_path,
77 |             data_path=args.data_path,
78 |             flag=flag,
79 |             size=[args.seq_len, args.label_len, args.pred_len],
80 |             features=args.features,
81 |             target=args.target,
82 |             timeenc=timeenc,
83 |             freq=freq,
84 |             seasonal_patterns=args.seasonal_patterns
85 |         )
86 |         print(flag, len(data_set))
87 |         data_loader = DataLoader(
88 |             data_set,
89 |             batch_size=batch_size,
90 |             shuffle=shuffle_flag,
91 |             num_workers=args.num_workers,
92 |             drop_last=drop_last)
93 |         return data_set, data_loader
94 | 


--------------------------------------------------------------------------------
/data_provider/m4.py:
--------------------------------------------------------------------------------
  1 | # This source code is provided for the purposes of scientific reproducibility
  2 | # under the following limited license from Element AI Inc. The code is an
  3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
  4 | # expansion analysis for interpretable time series forecasting,
  5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
  6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
  7 | # International license (CC BY-NC 4.0):
  8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
  9 | # for the benefit of third parties or internally in production) requires an
 10 | # explicit license. The subject-matter of the N-BEATS model and associated
 11 | # materials are the property of Element AI Inc. and may be subject to patent
 12 | # protection. No license to patents is granted hereunder (whether express or
 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
 14 | 
 15 | """
 16 | M4 Dataset
 17 | """
 18 | import logging
 19 | import os
 20 | from collections import OrderedDict
 21 | from dataclasses import dataclass
 22 | from glob import glob
 23 | 
 24 | import numpy as np
 25 | import pandas as pd
 26 | import patoolib
 27 | from tqdm import tqdm
 28 | import logging
 29 | import os
 30 | import pathlib
 31 | import sys
 32 | from urllib import request
 33 | 
 34 | 
 35 | def url_file_name(url: str) -> str:
 36 |     """
 37 |     Extract file name from url.
 38 | 
 39 |     :param url: URL to extract file name from.
 40 |     :return: File name.
 41 |     """
 42 |     return url.split('/')[-1] if len(url) > 0 else ''
 43 | 
 44 | 
 45 | def download(url: str, file_path: str) -> None:
 46 |     """
 47 |     Download a file to the given path.
 48 | 
 49 |     :param url: URL to download
 50 |     :param file_path: Where to download the content.
 51 |     """
 52 | 
 53 |     def progress(count, block_size, total_size):
 54 |         progress_pct = float(count * block_size) / float(total_size) * 100.0
 55 |         sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct))
 56 |         sys.stdout.flush()
 57 | 
 58 |     if not os.path.isfile(file_path):
 59 |         opener = request.build_opener()
 60 |         opener.addheaders = [('User-agent', 'Mozilla/5.0')]
 61 |         request.install_opener(opener)
 62 |         pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True)
 63 |         f, _ = request.urlretrieve(url, file_path, progress)
 64 |         sys.stdout.write('\n')
 65 |         sys.stdout.flush()
 66 |         file_info = os.stat(f)
 67 |         logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.')
 68 |     else:
 69 |         file_info = os.stat(file_path)
 70 |         logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.')
 71 | 
 72 | 
 73 | @dataclass()
 74 | class M4Dataset:
 75 |     ids: np.ndarray
 76 |     groups: np.ndarray
 77 |     frequencies: np.ndarray
 78 |     horizons: np.ndarray
 79 |     values: np.ndarray
 80 | 
 81 |     @staticmethod
 82 |     def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset':
 83 |         """
 84 |         Load cached dataset.
 85 | 
 86 |         :param training: Load training part if training is True, test part otherwise.
 87 |         """
 88 |         info_file = os.path.join(dataset_file, 'M4-info.csv')
 89 |         train_cache_file = os.path.join(dataset_file, 'training.npz')
 90 |         test_cache_file = os.path.join(dataset_file, 'test.npz')
 91 |         m4_info = pd.read_csv(info_file)
 92 |         return M4Dataset(ids=m4_info.M4id.values,
 93 |                          groups=m4_info.SP.values,
 94 |                          frequencies=m4_info.Frequency.values,
 95 |                          horizons=m4_info.Horizon.values,
 96 |                          values=np.load(
 97 |                              train_cache_file if training else test_cache_file,
 98 |                              allow_pickle=True))
 99 | 
100 | 
101 | @dataclass()
102 | class M4Meta:
103 |     seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly']
104 |     horizons = [6, 8, 18, 13, 14, 48]
105 |     frequencies = [1, 4, 12, 1, 1, 24]
106 |     horizons_map = {
107 |         'Yearly': 6,
108 |         'Quarterly': 8,
109 |         'Monthly': 18,
110 |         'Weekly': 13,
111 |         'Daily': 14,
112 |         'Hourly': 48
113 |     }  # different predict length
114 |     frequency_map = {
115 |         'Yearly': 1,
116 |         'Quarterly': 4,
117 |         'Monthly': 12,
118 |         'Weekly': 1,
119 |         'Daily': 1,
120 |         'Hourly': 24
121 |     }
122 |     history_size = {
123 |         'Yearly': 1.5,
124 |         'Quarterly': 1.5,
125 |         'Monthly': 1.5,
126 |         'Weekly': 10,
127 |         'Daily': 10,
128 |         'Hourly': 10
129 |     }  # from interpretable.gin
130 | 
131 | 
132 | def load_m4_info() -> pd.DataFrame:
133 |     """
134 |     Load M4Info file.
135 | 
136 |     :return: Pandas DataFrame of M4Info.
137 |     """
138 |     return pd.read_csv(INFO_FILE_PATH)
139 | 


--------------------------------------------------------------------------------
/data_provider/uea.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import torch
  5 | 
  6 | 
  7 | def collate_fn(data, max_len=None):
  8 |     """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create
  9 |     Args:
 10 |         data: len(batch_size) list of tuples (X, y).
 11 |             - X: torch tensor of shape (seq_length, feat_dim); variable seq_length.
 12 |             - y: torch tensor of shape (num_labels,) : class indices or numerical targets
 13 |                 (for classification or regression, respectively). num_labels > 1 for multi-task models
 14 |         max_len: global fixed sequence length. Used for architectures requiring fixed length input,
 15 |             where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s
 16 |     Returns:
 17 |         X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input)
 18 |         targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output)
 19 |         target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor
 20 |             0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values
 21 |         padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding
 22 |     """
 23 | 
 24 |     batch_size = len(data)
 25 |     features, labels = zip(*data)
 26 | 
 27 |     # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension)
 28 |     lengths = [X.shape[0] for X in features]  # original sequence length for each time series
 29 |     if max_len is None:
 30 |         max_len = max(lengths)
 31 | 
 32 |     X = torch.zeros(batch_size, max_len, features[0].shape[-1])  # (batch_size, padded_length, feat_dim)
 33 |     for i in range(batch_size):
 34 |         end = min(lengths[i], max_len)
 35 |         X[i, :end, :] = features[i][:end, :]
 36 | 
 37 |     targets = torch.stack(labels, dim=0)  # (batch_size, num_labels)
 38 | 
 39 |     padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),
 40 |                                  max_len=max_len)  # (batch_size, padded_length) boolean tensor, "1" means keep
 41 | 
 42 |     return X, targets, padding_masks
 43 | 
 44 | 
 45 | def padding_mask(lengths, max_len=None):
 46 |     """
 47 |     Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths,
 48 |     where 1 means keep element at this position (time step)
 49 |     """
 50 |     batch_size = lengths.numel()
 51 |     max_len = max_len or lengths.max_val()  # trick works because of overloading of 'or' operator for non-boolean types
 52 |     return (torch.arange(0, max_len, device=lengths.device)
 53 |             .type_as(lengths)
 54 |             .repeat(batch_size, 1)
 55 |             .lt(lengths.unsqueeze(1)))
 56 | 
 57 | 
 58 | class Normalizer(object):
 59 |     """
 60 |     Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
 61 |     """
 62 | 
 63 |     def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
 64 |         """
 65 |         Args:
 66 |             norm_type: choose from:
 67 |                 "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
 68 |                 "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
 69 |             mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
 70 |         """
 71 | 
 72 |         self.norm_type = norm_type
 73 |         self.mean = mean
 74 |         self.std = std
 75 |         self.min_val = min_val
 76 |         self.max_val = max_val
 77 | 
 78 |     def normalize(self, df):
 79 |         """
 80 |         Args:
 81 |             df: input dataframe
 82 |         Returns:
 83 |             df: normalized dataframe
 84 |         """
 85 |         if self.norm_type == "standardization":
 86 |             if self.mean is None:
 87 |                 self.mean = df.mean()
 88 |                 self.std = df.std()
 89 |             return (df - self.mean) / (self.std + np.finfo(float).eps)
 90 | 
 91 |         elif self.norm_type == "minmax":
 92 |             if self.max_val is None:
 93 |                 self.max_val = df.max()
 94 |                 self.min_val = df.min()
 95 |             return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)
 96 | 
 97 |         elif self.norm_type == "per_sample_std":
 98 |             grouped = df.groupby(by=df.index)
 99 |             return (df - grouped.transform('mean')) / grouped.transform('std')
100 | 
101 |         elif self.norm_type == "per_sample_minmax":
102 |             grouped = df.groupby(by=df.index)
103 |             min_vals = grouped.transform('min')
104 |             return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)
105 | 
106 |         else:
107 |             raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))
108 | 
109 | 
110 | def interpolate_missing(y):
111 |     """
112 |     Replaces NaN values in pd.Series `y` using linear interpolation
113 |     """
114 |     if y.isna().any():
115 |         y = y.interpolate(method='linear', limit_direction='both')
116 |     return y
117 | 
118 | 
119 | def subsample(y, limit=256, factor=2):
120 |     """
121 |     If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
122 |     """
123 |     if len(y) > limit:
124 |         return y[::factor].reset_index(drop=True)
125 |     return y
126 | 


--------------------------------------------------------------------------------
/exp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/exp/__init__.py


--------------------------------------------------------------------------------
/exp/exp_anomaly_detection.py:
--------------------------------------------------------------------------------
  1 | from data_provider.data_factory import data_provider
  2 | from exp.exp_basic import Exp_Basic
  3 | from utils.tools import EarlyStopping, adjust_learning_rate, adjustment
  4 | from sklearn.metrics import precision_recall_fscore_support
  5 | from sklearn.metrics import accuracy_score
  6 | import torch.multiprocessing
  7 | 
  8 | torch.multiprocessing.set_sharing_strategy('file_system')
  9 | import torch
 10 | import torch.nn as nn
 11 | from torch import optim
 12 | import os
 13 | import time
 14 | import warnings
 15 | import numpy as np
 16 | 
 17 | warnings.filterwarnings('ignore')
 18 | 
 19 | 
 20 | class Exp_Anomaly_Detection(Exp_Basic):
 21 |     def __init__(self, args):
 22 |         super(Exp_Anomaly_Detection, self).__init__(args)
 23 | 
 24 |     def _build_model(self):
 25 |         model = self.model_dict[self.args.model].Model(self.args).float()
 26 | 
 27 |         if self.args.use_multi_gpu and self.args.use_gpu:
 28 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 29 |         return model
 30 | 
 31 |     def _get_data(self, flag):
 32 |         data_set, data_loader = data_provider(self.args, flag)
 33 |         return data_set, data_loader
 34 | 
 35 |     def _select_optimizer(self):
 36 |         model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 37 |         return model_optim
 38 | 
 39 |     def _select_criterion(self):
 40 |         criterion = nn.MSELoss()
 41 |         return criterion
 42 | 
 43 |     def vali(self, vali_data, vali_loader, criterion):
 44 |         total_loss = []
 45 |         self.model.eval()
 46 |         with torch.no_grad():
 47 |             for i, (batch_x, _) in enumerate(vali_loader):
 48 |                 batch_x = batch_x.float().to(self.device)
 49 | 
 50 |                 outputs = self.model(batch_x, None, None, None)
 51 | 
 52 |                 f_dim = -1 if self.args.features == 'MS' else 0
 53 |                 outputs = outputs[:, :, f_dim:]
 54 |                 pred = outputs.detach().cpu()
 55 |                 true = batch_x.detach().cpu()
 56 | 
 57 |                 loss = criterion(pred, true)
 58 |                 total_loss.append(loss)
 59 |         total_loss = np.average(total_loss)
 60 |         self.model.train()
 61 |         return total_loss
 62 | 
 63 |     def train(self, setting):
 64 |         train_data, train_loader = self._get_data(flag='train')
 65 |         vali_data, vali_loader = self._get_data(flag='val')
 66 |         test_data, test_loader = self._get_data(flag='test')
 67 | 
 68 |         path = os.path.join(self.args.checkpoints, setting)
 69 |         if not os.path.exists(path):
 70 |             os.makedirs(path)
 71 | 
 72 |         time_now = time.time()
 73 | 
 74 |         train_steps = len(train_loader)
 75 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
 76 | 
 77 |         model_optim = self._select_optimizer()
 78 |         criterion = self._select_criterion()
 79 | 
 80 |         for epoch in range(self.args.train_epochs):
 81 |             iter_count = 0
 82 |             train_loss = []
 83 | 
 84 |             self.model.train()
 85 |             epoch_time = time.time()
 86 |             for i, (batch_x, batch_y) in enumerate(train_loader):
 87 |                 iter_count += 1
 88 |                 model_optim.zero_grad()
 89 | 
 90 |                 batch_x = batch_x.float().to(self.device)
 91 | 
 92 |                 outputs = self.model(batch_x, None, None, None)
 93 | 
 94 |                 f_dim = -1 if self.args.features == 'MS' else 0
 95 |                 outputs = outputs[:, :, f_dim:]
 96 |                 loss = criterion(outputs, batch_x)
 97 |                 train_loss.append(loss.item())
 98 | 
 99 |                 if (i + 1) % 100 == 0:
100 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
101 |                     speed = (time.time() - time_now) / iter_count
102 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
103 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
104 |                     iter_count = 0
105 |                     time_now = time.time()
106 | 
107 |                 loss.backward()
108 |                 model_optim.step()
109 | 
110 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
111 |             train_loss = np.average(train_loss)
112 |             vali_loss = self.vali(vali_data, vali_loader, criterion)
113 |             test_loss = self.vali(test_data, test_loader, criterion)
114 | 
115 |             print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
116 |                 epoch + 1, train_steps, train_loss, vali_loss, test_loss))
117 |             early_stopping(vali_loss, self.model, path)
118 |             if early_stopping.early_stop:
119 |                 print("Early stopping")
120 |                 break
121 |             adjust_learning_rate(model_optim, epoch + 1, self.args)
122 | 
123 |         best_model_path = path + '/' + 'checkpoint.pth'
124 |         self.model.load_state_dict(torch.load(best_model_path))
125 | 
126 |         return self.model
127 | 
128 |     def test(self, setting, test=0):
129 |         test_data, test_loader = self._get_data(flag='test')
130 |         train_data, train_loader = self._get_data(flag='train')
131 |         if test:
132 |             print('loading model')
133 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
134 | 
135 |         attens_energy = []
136 |         folder_path = './test_results/' + setting + '/'
137 |         if not os.path.exists(folder_path):
138 |             os.makedirs(folder_path)
139 | 
140 |         self.model.eval()
141 |         self.anomaly_criterion = nn.MSELoss(reduce=False)
142 | 
143 |         # (1) stastic on the train set
144 |         with torch.no_grad():
145 |             for i, (batch_x, batch_y) in enumerate(train_loader):
146 |                 batch_x = batch_x.float().to(self.device)
147 |                 # reconstruction
148 |                 outputs = self.model(batch_x, None, None, None)
149 |                 # criterion
150 |                 score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
151 |                 score = score.detach().cpu().numpy()
152 |                 attens_energy.append(score)
153 | 
154 |         attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
155 |         train_energy = np.array(attens_energy)
156 | 
157 |         # (2) find the threshold
158 |         attens_energy = []
159 |         test_labels = []
160 |         for i, (batch_x, batch_y) in enumerate(test_loader):
161 |             batch_x = batch_x.float().to(self.device)
162 |             # reconstruction
163 |             outputs = self.model(batch_x, None, None, None)
164 |             # criterion
165 |             score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
166 |             score = score.detach().cpu().numpy()
167 |             attens_energy.append(score)
168 |             test_labels.append(batch_y)
169 | 
170 |         attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
171 |         test_energy = np.array(attens_energy)
172 |         combined_energy = np.concatenate([train_energy, test_energy], axis=0)
173 |         threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio)
174 |         print("Threshold :", threshold)
175 | 
176 |         # (3) evaluation on the test set
177 |         pred = (test_energy > threshold).astype(int)
178 |         test_labels = np.concatenate(test_labels, axis=0).reshape(-1)
179 |         test_labels = np.array(test_labels)
180 |         gt = test_labels.astype(int)
181 | 
182 |         print("pred:   ", pred.shape)
183 |         print("gt:     ", gt.shape)
184 | 
185 |         # (4) detection adjustment
186 |         gt, pred = adjustment(gt, pred)
187 | 
188 |         pred = np.array(pred)
189 |         gt = np.array(gt)
190 |         print("pred: ", pred.shape)
191 |         print("gt:   ", gt.shape)
192 | 
193 |         accuracy = accuracy_score(gt, pred)
194 |         precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary')
195 |         print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
196 |             accuracy, precision,
197 |             recall, f_score))
198 | 
199 |         f = open("result_anomaly_detection.txt", 'a')
200 |         f.write(setting + "  \n")
201 |         f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
202 |             accuracy, precision,
203 |             recall, f_score))
204 |         f.write('\n')
205 |         f.write('\n')
206 |         f.close()
207 |         return
208 | 


--------------------------------------------------------------------------------
/exp/exp_basic.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from models import Autoformer, LSWaveNet, Transformer, TimesNet, Nonstationary_Transformer, DLinear, FEDformer, \
 4 |     Informer, LightTS, Reformer, ETSformer, Pyraformer, PatchTST, MICN, Crossformer, FiLM, Koopa, TiDE, FreTS, AdaWaveNet, iTransformer
 5 | 
 6 | 
 7 | class Exp_Basic(object):
 8 |     def __init__(self, args):
 9 |         self.args = args
10 |         self.model_dict = {
11 |             'TimesNet': TimesNet,
12 |             'Autoformer': Autoformer,
13 |             'Transformer': Transformer,
14 |             'Nonstationary_Transformer': Nonstationary_Transformer,
15 |             'DLinear': DLinear,
16 |             'FEDformer': FEDformer,
17 |             'Informer': Informer,
18 |             'LightTS': LightTS,
19 |             'Reformer': Reformer,
20 |             'ETSformer': ETSformer,
21 |             'PatchTST': PatchTST,
22 |             'Pyraformer': Pyraformer,
23 |             'MICN': MICN,
24 |             'Crossformer': Crossformer,
25 |             'FiLM': FiLM,
26 |             'LSWaveNet': LSWaveNet,
27 |             'Koopa': Koopa,
28 |             'TiDE': TiDE,
29 |             'FreTS': FreTS,
30 |             'AdaWaveNet':AdaWaveNet,
31 |             'iTransformer': iTransformer
32 |         }
33 |         self.device = self._acquire_device()
34 |         self.model = self._build_model().to(self.device)
35 | 
36 |     def _build_model(self):
37 |         raise NotImplementedError
38 |         return None
39 | 
40 |     def _acquire_device(self):
41 |         if self.args.use_gpu:
42 |             os.environ["CUDA_VISIBLE_DEVICES"] = str(
43 |                 self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
44 |             device = torch.device('cuda:{}'.format(self.args.gpu))
45 |             print('Use GPU: cuda:{}'.format(self.args.gpu))
46 |         else:
47 |             device = torch.device('cpu')
48 |             print('Use CPU')
49 |         return device
50 | 
51 |     def _get_data(self):
52 |         pass
53 | 
54 |     def vali(self):
55 |         pass
56 | 
57 |     def train(self):
58 |         pass
59 | 
60 |     def test(self):
61 |         pass
62 | 


--------------------------------------------------------------------------------
/exp/exp_classification.py:
--------------------------------------------------------------------------------
  1 | from data_provider.data_factory import data_provider
  2 | from exp.exp_basic import Exp_Basic
  3 | from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy
  4 | import torch
  5 | import torch.nn as nn
  6 | from torch import optim
  7 | import os
  8 | import time
  9 | import warnings
 10 | import numpy as np
 11 | import pdb
 12 | 
 13 | warnings.filterwarnings('ignore')
 14 | 
 15 | 
 16 | class Exp_Classification(Exp_Basic):
 17 |     def __init__(self, args):
 18 |         super(Exp_Classification, self).__init__(args)
 19 | 
 20 |     def _build_model(self):
 21 |         # model input depends on data
 22 |         train_data, train_loader = self._get_data(flag='TRAIN')
 23 |         test_data, test_loader = self._get_data(flag='TEST')
 24 |         self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len)
 25 |         self.args.pred_len = 0
 26 |         self.args.enc_in = train_data.feature_df.shape[1]
 27 |         self.args.num_class = len(train_data.class_names)
 28 |         # model init
 29 |         model = self.model_dict[self.args.model].Model(self.args).float()
 30 |         if self.args.use_multi_gpu and self.args.use_gpu:
 31 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 32 |         return model
 33 | 
 34 |     def _get_data(self, flag):
 35 |         data_set, data_loader = data_provider(self.args, flag)
 36 |         return data_set, data_loader
 37 | 
 38 |     def _select_optimizer(self):
 39 |         model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 40 |         return model_optim
 41 | 
 42 |     def _select_criterion(self):
 43 |         criterion = nn.CrossEntropyLoss()
 44 |         return criterion
 45 | 
 46 |     def vali(self, vali_data, vali_loader, criterion):
 47 |         total_loss = []
 48 |         preds = []
 49 |         trues = []
 50 |         self.model.eval()
 51 |         with torch.no_grad():
 52 |             for i, (batch_x, label, padding_mask) in enumerate(vali_loader):
 53 |                 batch_x = batch_x.float().to(self.device)
 54 |                 padding_mask = padding_mask.float().to(self.device)
 55 |                 label = label.to(self.device)
 56 | 
 57 |                 outputs = self.model(batch_x, padding_mask, None, None)
 58 | 
 59 |                 pred = outputs.detach().cpu()
 60 |                 loss = criterion(pred, label.long().squeeze().cpu())
 61 |                 total_loss.append(loss)
 62 | 
 63 |                 preds.append(outputs.detach())
 64 |                 trues.append(label)
 65 | 
 66 |         total_loss = np.average(total_loss)
 67 | 
 68 |         preds = torch.cat(preds, 0)
 69 |         trues = torch.cat(trues, 0)
 70 |         probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
 71 |         predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
 72 |         trues = trues.flatten().cpu().numpy()
 73 |         accuracy = cal_accuracy(predictions, trues)
 74 | 
 75 |         self.model.train()
 76 |         return total_loss, accuracy
 77 | 
 78 |     def train(self, setting):
 79 |         train_data, train_loader = self._get_data(flag='TRAIN')
 80 |         vali_data, vali_loader = self._get_data(flag='TEST')
 81 |         test_data, test_loader = self._get_data(flag='TEST')
 82 | 
 83 |         path = os.path.join(self.args.checkpoints, setting)
 84 |         if not os.path.exists(path):
 85 |             os.makedirs(path)
 86 | 
 87 |         time_now = time.time()
 88 | 
 89 |         train_steps = len(train_loader)
 90 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
 91 | 
 92 |         model_optim = self._select_optimizer()
 93 |         criterion = self._select_criterion()
 94 | 
 95 |         for epoch in range(self.args.train_epochs):
 96 |             iter_count = 0
 97 |             train_loss = []
 98 | 
 99 |             self.model.train()
100 |             epoch_time = time.time()
101 | 
102 |             for i, (batch_x, label, padding_mask) in enumerate(train_loader):
103 |                 iter_count += 1
104 |                 model_optim.zero_grad()
105 | 
106 |                 batch_x = batch_x.float().to(self.device)
107 |                 padding_mask = padding_mask.float().to(self.device)
108 |                 label = label.to(self.device)
109 | 
110 |                 outputs = self.model(batch_x, padding_mask, None, None)
111 |                 loss = criterion(outputs, label.long().squeeze(-1))
112 |                 train_loss.append(loss.item())
113 | 
114 |                 if (i + 1) % 100 == 0:
115 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
116 |                     speed = (time.time() - time_now) / iter_count
117 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
118 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
119 |                     iter_count = 0
120 |                     time_now = time.time()
121 | 
122 |                 loss.backward()
123 |                 nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0)
124 |                 model_optim.step()
125 | 
126 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
127 |             train_loss = np.average(train_loss)
128 |             vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion)
129 |             test_loss, test_accuracy = self.vali(test_data, test_loader, criterion)
130 | 
131 |             print(
132 |                 "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}"
133 |                 .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy))
134 |             early_stopping(-val_accuracy, self.model, path)
135 |             if early_stopping.early_stop:
136 |                 print("Early stopping")
137 |                 break
138 |             if (epoch + 1) % 5 == 0:
139 |                 adjust_learning_rate(model_optim, epoch + 1, self.args)
140 | 
141 |         best_model_path = path + '/' + 'checkpoint.pth'
142 |         self.model.load_state_dict(torch.load(best_model_path))
143 | 
144 |         return self.model
145 | 
146 |     def test(self, setting, test=0):
147 |         test_data, test_loader = self._get_data(flag='TEST')
148 |         if test:
149 |             print('loading model')
150 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
151 | 
152 |         preds = []
153 |         trues = []
154 |         folder_path = './test_results/' + setting + '/'
155 |         if not os.path.exists(folder_path):
156 |             os.makedirs(folder_path)
157 | 
158 |         self.model.eval()
159 |         with torch.no_grad():
160 |             for i, (batch_x, label, padding_mask) in enumerate(test_loader):
161 |                 batch_x = batch_x.float().to(self.device)
162 |                 padding_mask = padding_mask.float().to(self.device)
163 |                 label = label.to(self.device)
164 | 
165 |                 outputs = self.model(batch_x, padding_mask, None, None)
166 | 
167 |                 preds.append(outputs.detach())
168 |                 trues.append(label)
169 | 
170 |         preds = torch.cat(preds, 0)
171 |         trues = torch.cat(trues, 0)
172 |         print('test shape:', preds.shape, trues.shape)
173 | 
174 |         probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
175 |         predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
176 |         trues = trues.flatten().cpu().numpy()
177 |         accuracy = cal_accuracy(predictions, trues)
178 | 
179 |         # result save
180 |         folder_path = './results/' + setting + '/'
181 |         if not os.path.exists(folder_path):
182 |             os.makedirs(folder_path)
183 | 
184 |         print('accuracy:{}'.format(accuracy))
185 |         file_name='result_classification.txt'
186 |         f = open(os.path.join(folder_path,file_name), 'a')
187 |         f.write(setting + "  \n")
188 |         f.write('accuracy:{}'.format(accuracy))
189 |         f.write('\n')
190 |         f.write('\n')
191 |         f.close()
192 |         return
193 | 


--------------------------------------------------------------------------------
/layers/AutoCorrelation.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import math
  7 | from math import sqrt
  8 | import os
  9 | 
 10 | 
 11 | class AutoCorrelation(nn.Module):
 12 |     """
 13 |     AutoCorrelation Mechanism with the following two phases:
 14 |     (1) period-based dependencies discovery
 15 |     (2) time delay aggregation
 16 |     This block can replace the self-attention family mechanism seamlessly.
 17 |     """
 18 | 
 19 |     def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
 20 |         super(AutoCorrelation, self).__init__()
 21 |         self.factor = factor
 22 |         self.scale = scale
 23 |         self.mask_flag = mask_flag
 24 |         self.output_attention = output_attention
 25 |         self.dropout = nn.Dropout(attention_dropout)
 26 | 
 27 |     def time_delay_agg_training(self, values, corr):
 28 |         """
 29 |         SpeedUp version of Autocorrelation (a batch-normalization style design)
 30 |         This is for the training phase.
 31 |         """
 32 |         head = values.shape[1]
 33 |         channel = values.shape[2]
 34 |         length = values.shape[3]
 35 |         # find top k
 36 |         top_k = int(self.factor * math.log(length))
 37 |         mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
 38 |         index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
 39 |         weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
 40 |         # update corr
 41 |         tmp_corr = torch.softmax(weights, dim=-1)
 42 |         # aggregation
 43 |         tmp_values = values
 44 |         delays_agg = torch.zeros_like(values).float()
 45 |         for i in range(top_k):
 46 |             pattern = torch.roll(tmp_values, -int(index[i]), -1)
 47 |             delays_agg = delays_agg + pattern * \
 48 |                          (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
 49 |         return delays_agg
 50 | 
 51 |     def time_delay_agg_inference(self, values, corr):
 52 |         """
 53 |         SpeedUp version of Autocorrelation (a batch-normalization style design)
 54 |         This is for the inference phase.
 55 |         """
 56 |         batch = values.shape[0]
 57 |         head = values.shape[1]
 58 |         channel = values.shape[2]
 59 |         length = values.shape[3]
 60 |         # index init
 61 |         init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
 62 |         # find top k
 63 |         top_k = int(self.factor * math.log(length))
 64 |         mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
 65 |         weights, delay = torch.topk(mean_value, top_k, dim=-1)
 66 |         # update corr
 67 |         tmp_corr = torch.softmax(weights, dim=-1)
 68 |         # aggregation
 69 |         tmp_values = values.repeat(1, 1, 1, 2)
 70 |         delays_agg = torch.zeros_like(values).float()
 71 |         for i in range(top_k):
 72 |             tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
 73 |             pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
 74 |             delays_agg = delays_agg + pattern * \
 75 |                          (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
 76 |         return delays_agg
 77 | 
 78 |     def time_delay_agg_full(self, values, corr):
 79 |         """
 80 |         Standard version of Autocorrelation
 81 |         """
 82 |         batch = values.shape[0]
 83 |         head = values.shape[1]
 84 |         channel = values.shape[2]
 85 |         length = values.shape[3]
 86 |         # index init
 87 |         init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
 88 |         # find top k
 89 |         top_k = int(self.factor * math.log(length))
 90 |         weights, delay = torch.topk(corr, top_k, dim=-1)
 91 |         # update corr
 92 |         tmp_corr = torch.softmax(weights, dim=-1)
 93 |         # aggregation
 94 |         tmp_values = values.repeat(1, 1, 1, 2)
 95 |         delays_agg = torch.zeros_like(values).float()
 96 |         for i in range(top_k):
 97 |             tmp_delay = init_index + delay[..., i].unsqueeze(-1)
 98 |             pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
 99 |             delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
100 |         return delays_agg
101 | 
102 |     def forward(self, queries, keys, values, attn_mask):
103 |         B, L, H, E = queries.shape
104 |         _, S, _, D = values.shape
105 |         if L > S:
106 |             zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
107 |             values = torch.cat([values, zeros], dim=1)
108 |             keys = torch.cat([keys, zeros], dim=1)
109 |         else:
110 |             values = values[:, :L, :, :]
111 |             keys = keys[:, :L, :, :]
112 | 
113 |         # period-based dependencies
114 |         q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
115 |         k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
116 |         res = q_fft * torch.conj(k_fft)
117 |         corr = torch.fft.irfft(res, dim=-1)
118 | 
119 |         # time delay agg
120 |         if self.training:
121 |             V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
122 |         else:
123 |             V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
124 | 
125 |         if self.output_attention:
126 |             return (V.contiguous(), corr.permute(0, 3, 1, 2))
127 |         else:
128 |             return (V.contiguous(), None)
129 | 
130 | 
131 | class AutoCorrelationLayer(nn.Module):
132 |     def __init__(self, correlation, d_model, n_heads, d_keys=None,
133 |                  d_values=None):
134 |         super(AutoCorrelationLayer, self).__init__()
135 | 
136 |         d_keys = d_keys or (d_model // n_heads)
137 |         d_values = d_values or (d_model // n_heads)
138 | 
139 |         self.inner_correlation = correlation
140 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
141 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
142 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
143 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
144 |         self.n_heads = n_heads
145 | 
146 |     def forward(self, queries, keys, values, attn_mask):
147 |         B, L, _ = queries.shape
148 |         _, S, _ = keys.shape
149 |         H = self.n_heads
150 | 
151 |         queries = self.query_projection(queries).view(B, L, H, -1)
152 |         keys = self.key_projection(keys).view(B, S, H, -1)
153 |         values = self.value_projection(values).view(B, S, H, -1)
154 | 
155 |         out, attn = self.inner_correlation(
156 |             queries,
157 |             keys,
158 |             values,
159 |             attn_mask
160 |         )
161 |         out = out.view(B, L, -1)
162 | 
163 |         return self.out_projection(out), attn
164 | 


--------------------------------------------------------------------------------
/layers/Autoformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class my_Layernorm(nn.Module):
  7 |     """
  8 |     Special designed layernorm for the seasonal part
  9 |     """
 10 | 
 11 |     def __init__(self, channels):
 12 |         super(my_Layernorm, self).__init__()
 13 |         self.layernorm = nn.LayerNorm(channels)
 14 | 
 15 |     def forward(self, x):
 16 |         x_hat = self.layernorm(x)
 17 |         bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
 18 |         return x_hat - bias
 19 | 
 20 | 
 21 | class moving_avg(nn.Module):
 22 |     """
 23 |     Moving average block to highlight the trend of time series
 24 |     """
 25 | 
 26 |     def __init__(self, kernel_size, stride):
 27 |         super(moving_avg, self).__init__()
 28 |         self.kernel_size = kernel_size
 29 |         self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
 30 | 
 31 |     def forward(self, x):
 32 |         # padding on the both ends of time series
 33 |         front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
 34 |         end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
 35 |         x = torch.cat([front, x, end], dim=1)
 36 |         x = self.avg(x.permute(0, 2, 1))
 37 |         x = x.permute(0, 2, 1)
 38 |         return x
 39 | 
 40 | 
 41 | class series_decomp(nn.Module):
 42 |     """
 43 |     Series decomposition block
 44 |     """
 45 | 
 46 |     def __init__(self, kernel_size):
 47 |         super(series_decomp, self).__init__()
 48 |         self.moving_avg = moving_avg(kernel_size, stride=1)
 49 | 
 50 |     def forward(self, x):
 51 |         moving_mean = self.moving_avg(x)
 52 |         res = x - moving_mean
 53 |         return res, moving_mean
 54 | 
 55 | 
 56 | class series_decomp_multi(nn.Module):
 57 |     """
 58 |     Multiple Series decomposition block from FEDformer
 59 |     """
 60 | 
 61 |     def __init__(self, kernel_size):
 62 |         super(series_decomp_multi, self).__init__()
 63 |         self.kernel_size = kernel_size
 64 |         self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
 65 | 
 66 |     def forward(self, x):
 67 |         moving_mean = []
 68 |         res = []
 69 |         for func in self.series_decomp:
 70 |             sea, moving_avg = func(x)
 71 |             moving_mean.append(moving_avg)
 72 |             res.append(sea)
 73 | 
 74 |         sea = sum(res) / len(res)
 75 |         moving_mean = sum(moving_mean) / len(moving_mean)
 76 |         return sea, moving_mean
 77 | 
 78 | 
 79 | class EncoderLayer(nn.Module):
 80 |     """
 81 |     Autoformer encoder layer with the progressive decomposition architecture
 82 |     """
 83 | 
 84 |     def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
 85 |         super(EncoderLayer, self).__init__()
 86 |         d_ff = d_ff or 4 * d_model
 87 |         self.attention = attention
 88 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
 89 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
 90 |         self.decomp1 = series_decomp(moving_avg)
 91 |         self.decomp2 = series_decomp(moving_avg)
 92 |         self.dropout = nn.Dropout(dropout)
 93 |         self.activation = F.relu if activation == "relu" else F.gelu
 94 | 
 95 |     def forward(self, x, attn_mask=None):
 96 |         new_x, attn = self.attention(
 97 |             x, x, x,
 98 |             attn_mask=attn_mask
 99 |         )
100 |         x = x + self.dropout(new_x)
101 |         x, _ = self.decomp1(x)
102 |         y = x
103 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
104 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
105 |         res, _ = self.decomp2(x + y)
106 |         return res, attn
107 | 
108 | 
109 | class Encoder(nn.Module):
110 |     """
111 |     Autoformer encoder
112 |     """
113 | 
114 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
115 |         super(Encoder, self).__init__()
116 |         self.attn_layers = nn.ModuleList(attn_layers)
117 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
118 |         self.norm = norm_layer
119 | 
120 |     def forward(self, x, attn_mask=None):
121 |         attns = []
122 |         if self.conv_layers is not None:
123 |             for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
124 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
125 |                 x = conv_layer(x)
126 |                 attns.append(attn)
127 |             x, attn = self.attn_layers[-1](x)
128 |             attns.append(attn)
129 |         else:
130 |             for attn_layer in self.attn_layers:
131 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
132 |                 attns.append(attn)
133 | 
134 |         if self.norm is not None:
135 |             x = self.norm(x)
136 | 
137 |         return x, attns
138 | 
139 | 
140 | class DecoderLayer(nn.Module):
141 |     """
142 |     Autoformer decoder layer with the progressive decomposition architecture
143 |     """
144 | 
145 |     def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
146 |                  moving_avg=25, dropout=0.1, activation="relu"):
147 |         super(DecoderLayer, self).__init__()
148 |         d_ff = d_ff or 4 * d_model
149 |         self.self_attention = self_attention
150 |         self.cross_attention = cross_attention
151 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
152 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
153 |         self.decomp1 = series_decomp(moving_avg)
154 |         self.decomp2 = series_decomp(moving_avg)
155 |         self.decomp3 = series_decomp(moving_avg)
156 |         self.dropout = nn.Dropout(dropout)
157 |         self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
158 |                                     padding_mode='circular', bias=False)
159 |         self.activation = F.relu if activation == "relu" else F.gelu
160 | 
161 |     def forward(self, x, cross, x_mask=None, cross_mask=None):
162 |         x = x + self.dropout(self.self_attention(
163 |             x, x, x,
164 |             attn_mask=x_mask
165 |         )[0])
166 |         x, trend1 = self.decomp1(x)
167 |         x = x + self.dropout(self.cross_attention(
168 |             x, cross, cross,
169 |             attn_mask=cross_mask
170 |         )[0])
171 |         x, trend2 = self.decomp2(x)
172 |         y = x
173 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
174 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
175 |         x, trend3 = self.decomp3(x + y)
176 | 
177 |         residual_trend = trend1 + trend2 + trend3
178 |         residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
179 |         return x, residual_trend
180 | 
181 | 
182 | class Decoder(nn.Module):
183 |     """
184 |     Autoformer encoder
185 |     """
186 | 
187 |     def __init__(self, layers, norm_layer=None, projection=None):
188 |         super(Decoder, self).__init__()
189 |         self.layers = nn.ModuleList(layers)
190 |         self.norm = norm_layer
191 |         self.projection = projection
192 | 
193 |     def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
194 |         for layer in self.layers:
195 |             x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
196 |             trend = trend + residual_trend
197 | 
198 |         if self.norm is not None:
199 |             x = self.norm(x)
200 | 
201 |         if self.projection is not None:
202 |             x = self.projection(x)
203 |         return x, trend
204 | 


--------------------------------------------------------------------------------
/layers/Conv_Blocks.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Inception_Block_V1(nn.Module):
 6 |     def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
 7 |         super(Inception_Block_V1, self).__init__()
 8 |         self.in_channels = in_channels
 9 |         self.out_channels = out_channels
10 |         self.num_kernels = num_kernels
11 |         kernels = []
12 |         for i in range(self.num_kernels):
13 |             kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i))
14 |         self.kernels = nn.ModuleList(kernels)
15 |         if init_weight:
16 |             self._initialize_weights()
17 | 
18 |     def _initialize_weights(self):
19 |         for m in self.modules():
20 |             if isinstance(m, nn.Conv2d):
21 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
22 |                 if m.bias is not None:
23 |                     nn.init.constant_(m.bias, 0)
24 | 
25 |     def forward(self, x):
26 |         res_list = []
27 |         for i in range(self.num_kernels):
28 |             res_list.append(self.kernels[i](x))
29 |         res = torch.stack(res_list, dim=-1).mean(-1)
30 |         return res
31 | 
32 | 
33 | class Inception_Block_V2(nn.Module):
34 |     def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True):
35 |         super(Inception_Block_V2, self).__init__()
36 |         self.in_channels = in_channels
37 |         self.out_channels = out_channels
38 |         self.num_kernels = num_kernels
39 |         kernels = []
40 |         for i in range(self.num_kernels // 2):
41 |             kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1]))
42 |             kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0]))
43 |         kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1))
44 |         self.kernels = nn.ModuleList(kernels)
45 |         if init_weight:
46 |             self._initialize_weights()
47 | 
48 |     def _initialize_weights(self):
49 |         for m in self.modules():
50 |             if isinstance(m, nn.Conv2d):
51 |                 nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
52 |                 if m.bias is not None:
53 |                     nn.init.constant_(m.bias, 0)
54 | 
55 |     def forward(self, x):
56 |         res_list = []
57 |         for i in range(self.num_kernels + 1):
58 |             res_list.append(self.kernels[i](x))
59 |         res = torch.stack(res_list, dim=-1).mean(-1)
60 |         return res
61 | 


--------------------------------------------------------------------------------
/layers/Crossformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from einops import rearrange, repeat
  4 | from layers.SelfAttention_Family import TwoStageAttentionLayer
  5 | 
  6 | 
  7 | class SegMerging(nn.Module):
  8 |     def __init__(self, d_model, win_size, norm_layer=nn.LayerNorm):
  9 |         super().__init__()
 10 |         self.d_model = d_model
 11 |         self.win_size = win_size
 12 |         self.linear_trans = nn.Linear(win_size * d_model, d_model)
 13 |         self.norm = norm_layer(win_size * d_model)
 14 | 
 15 |     def forward(self, x):
 16 |         batch_size, ts_d, seg_num, d_model = x.shape
 17 |         pad_num = seg_num % self.win_size
 18 |         if pad_num != 0:
 19 |             pad_num = self.win_size - pad_num
 20 |             x = torch.cat((x, x[:, :, -pad_num:, :]), dim=-2)
 21 | 
 22 |         seg_to_merge = []
 23 |         for i in range(self.win_size):
 24 |             seg_to_merge.append(x[:, :, i::self.win_size, :])
 25 |         x = torch.cat(seg_to_merge, -1)
 26 | 
 27 |         x = self.norm(x)
 28 |         x = self.linear_trans(x)
 29 | 
 30 |         return x
 31 | 
 32 | 
 33 | class scale_block(nn.Module):
 34 |     def __init__(self, configs, win_size, d_model, n_heads, d_ff, depth, dropout, \
 35 |                  seg_num=10, factor=10):
 36 |         super(scale_block, self).__init__()
 37 | 
 38 |         if win_size > 1:
 39 |             self.merge_layer = SegMerging(d_model, win_size, nn.LayerNorm)
 40 |         else:
 41 |             self.merge_layer = None
 42 | 
 43 |         self.encode_layers = nn.ModuleList()
 44 | 
 45 |         for i in range(depth):
 46 |             self.encode_layers.append(TwoStageAttentionLayer(configs, seg_num, factor, d_model, n_heads, \
 47 |                                                              d_ff, dropout))
 48 | 
 49 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 50 |         _, ts_dim, _, _ = x.shape
 51 | 
 52 |         if self.merge_layer is not None:
 53 |             x = self.merge_layer(x)
 54 | 
 55 |         for layer in self.encode_layers:
 56 |             x = layer(x)
 57 | 
 58 |         return x, None
 59 | 
 60 | 
 61 | class Encoder(nn.Module):
 62 |     def __init__(self, attn_layers):
 63 |         super(Encoder, self).__init__()
 64 |         self.encode_blocks = nn.ModuleList(attn_layers)
 65 | 
 66 |     def forward(self, x):
 67 |         encode_x = []
 68 |         encode_x.append(x)
 69 | 
 70 |         for block in self.encode_blocks:
 71 |             x, attns = block(x)
 72 |             encode_x.append(x)
 73 | 
 74 |         return encode_x, None
 75 | 
 76 | 
 77 | class DecoderLayer(nn.Module):
 78 |     def __init__(self, self_attention, cross_attention, seg_len, d_model, d_ff=None, dropout=0.1):
 79 |         super(DecoderLayer, self).__init__()
 80 |         self.self_attention = self_attention
 81 |         self.cross_attention = cross_attention
 82 |         self.norm1 = nn.LayerNorm(d_model)
 83 |         self.norm2 = nn.LayerNorm(d_model)
 84 |         self.dropout = nn.Dropout(dropout)
 85 |         self.MLP1 = nn.Sequential(nn.Linear(d_model, d_model),
 86 |                                   nn.GELU(),
 87 |                                   nn.Linear(d_model, d_model))
 88 |         self.linear_pred = nn.Linear(d_model, seg_len)
 89 | 
 90 |     def forward(self, x, cross):
 91 |         batch = x.shape[0]
 92 |         x = self.self_attention(x)
 93 |         x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model')
 94 | 
 95 |         cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model')
 96 |         tmp, attn = self.cross_attention(x, cross, cross, None, None, None,)
 97 |         x = x + self.dropout(tmp)
 98 |         y = x = self.norm1(x)
 99 |         y = self.MLP1(y)
100 |         dec_output = self.norm2(x + y)
101 | 
102 |         dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b=batch)
103 |         layer_predict = self.linear_pred(dec_output)
104 |         layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len')
105 | 
106 |         return dec_output, layer_predict
107 | 
108 | 
109 | class Decoder(nn.Module):
110 |     def __init__(self, layers):
111 |         super(Decoder, self).__init__()
112 |         self.decode_layers = nn.ModuleList(layers)
113 | 
114 | 
115 |     def forward(self, x, cross):
116 |         final_predict = None
117 |         i = 0
118 | 
119 |         ts_d = x.shape[1]
120 |         for layer in self.decode_layers:
121 |             cross_enc = cross[i]
122 |             x, layer_predict = layer(x, cross_enc)
123 |             if final_predict is None:
124 |                 final_predict = layer_predict
125 |             else:
126 |                 final_predict = final_predict + layer_predict
127 |             i += 1
128 | 
129 |         final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d=ts_d)
130 | 
131 |         return final_predict
132 | 


--------------------------------------------------------------------------------
/layers/Embed.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn.utils import weight_norm
  5 | import math
  6 | 
  7 | 
  8 | class PositionalEmbedding(nn.Module):
  9 |     def __init__(self, d_model, max_len=5000):
 10 |         super(PositionalEmbedding, self).__init__()
 11 |         # Compute the positional encodings once in log space.
 12 |         pe = torch.zeros(max_len, d_model).float()
 13 |         pe.require_grad = False
 14 | 
 15 |         position = torch.arange(0, max_len).float().unsqueeze(1)
 16 |         div_term = (torch.arange(0, d_model, 2).float()
 17 |                     * -(math.log(10000.0) / d_model)).exp()
 18 | 
 19 |         pe[:, 0::2] = torch.sin(position * div_term)
 20 |         pe[:, 1::2] = torch.cos(position * div_term)
 21 | 
 22 |         pe = pe.unsqueeze(0)
 23 |         self.register_buffer('pe', pe)
 24 | 
 25 |     def forward(self, x):
 26 |         return self.pe[:, :x.size(1)]
 27 | 
 28 | 
 29 | class TokenEmbedding(nn.Module):
 30 |     def __init__(self, c_in, d_model):
 31 |         super(TokenEmbedding, self).__init__()
 32 |         padding = 1 if torch.__version__ >= '1.5.0' else 2
 33 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
 34 |                                    kernel_size=3, padding=padding, padding_mode='circular', bias=False)
 35 |         for m in self.modules():
 36 |             if isinstance(m, nn.Conv1d):
 37 |                 nn.init.kaiming_normal_(
 38 |                     m.weight, mode='fan_in', nonlinearity='leaky_relu')
 39 | 
 40 |     def forward(self, x):
 41 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
 42 |         return x
 43 | 
 44 | 
 45 | class FixedEmbedding(nn.Module):
 46 |     def __init__(self, c_in, d_model):
 47 |         super(FixedEmbedding, self).__init__()
 48 | 
 49 |         w = torch.zeros(c_in, d_model).float()
 50 |         w.require_grad = False
 51 | 
 52 |         position = torch.arange(0, c_in).float().unsqueeze(1)
 53 |         div_term = (torch.arange(0, d_model, 2).float()
 54 |                     * -(math.log(10000.0) / d_model)).exp()
 55 | 
 56 |         w[:, 0::2] = torch.sin(position * div_term)
 57 |         w[:, 1::2] = torch.cos(position * div_term)
 58 | 
 59 |         self.emb = nn.Embedding(c_in, d_model)
 60 |         self.emb.weight = nn.Parameter(w, requires_grad=False)
 61 | 
 62 |     def forward(self, x):
 63 |         return self.emb(x).detach()
 64 | 
 65 | 
 66 | class TemporalEmbedding(nn.Module):
 67 |     def __init__(self, d_model, embed_type='fixed', freq='h'):
 68 |         super(TemporalEmbedding, self).__init__()
 69 | 
 70 |         minute_size = 4
 71 |         hour_size = 24
 72 |         weekday_size = 7
 73 |         day_size = 32
 74 |         month_size = 13
 75 | 
 76 |         Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
 77 |         if freq == 't':
 78 |             self.minute_embed = Embed(minute_size, d_model)
 79 |         self.hour_embed = Embed(hour_size, d_model)
 80 |         self.weekday_embed = Embed(weekday_size, d_model)
 81 |         self.day_embed = Embed(day_size, d_model)
 82 |         self.month_embed = Embed(month_size, d_model)
 83 | 
 84 |     def forward(self, x):
 85 |         x = x.long()
 86 |         minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
 87 |             self, 'minute_embed') else 0.
 88 |         hour_x = self.hour_embed(x[:, :, 3])
 89 |         weekday_x = self.weekday_embed(x[:, :, 2])
 90 |         day_x = self.day_embed(x[:, :, 1])
 91 |         month_x = self.month_embed(x[:, :, 0])
 92 | 
 93 |         return hour_x + weekday_x + day_x + month_x + minute_x
 94 | 
 95 | 
 96 | class TimeFeatureEmbedding(nn.Module):
 97 |     def __init__(self, d_model, embed_type='timeF', freq='h'):
 98 |         super(TimeFeatureEmbedding, self).__init__()
 99 | 
100 |         freq_map = {'h': 4, 't': 5, 's': 6,
101 |                     'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
102 |         d_inp = freq_map[freq]
103 |         self.embed = nn.Linear(d_inp, d_model, bias=False)
104 | 
105 |     def forward(self, x):
106 |         return self.embed(x)
107 | 
108 | 
109 | class DataEmbedding(nn.Module):
110 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
111 |         super(DataEmbedding, self).__init__()
112 | 
113 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
114 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
115 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
116 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
117 |             d_model=d_model, embed_type=embed_type, freq=freq)
118 |         self.dropout = nn.Dropout(p=dropout)
119 | 
120 |     def forward(self, x, x_mark):
121 |         if x_mark is None:
122 |             x = self.value_embedding(x) + self.position_embedding(x)
123 |         else:
124 |             x = self.value_embedding(
125 |                 x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
126 |         return self.dropout(x)
127 | 
128 | 
129 | class DataEmbedding_inverted(nn.Module):
130 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
131 |         super(DataEmbedding_inverted, self).__init__()
132 |         self.value_embedding = nn.Linear(c_in, d_model)
133 |         self.dropout = nn.Dropout(p=dropout)
134 | 
135 |     def forward(self, x, x_mark):
136 |         x = x.permute(0, 2, 1)
137 |         # x: [Batch Variate Time]
138 |         if x_mark is None:
139 |             x = self.value_embedding(x)
140 |         else:
141 |             x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
142 |         # x: [Batch Variate d_model]
143 |         return self.dropout(x)
144 | 
145 | 
146 | class DataEmbedding_wo_pos(nn.Module):
147 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
148 |         super(DataEmbedding_wo_pos, self).__init__()
149 | 
150 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
151 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
152 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
153 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
154 |             d_model=d_model, embed_type=embed_type, freq=freq)
155 |         self.dropout = nn.Dropout(p=dropout)
156 | 
157 |     def forward(self, x, x_mark):
158 |         if x_mark is None:
159 |             x = self.value_embedding(x)
160 |         else:
161 |             x = self.value_embedding(x) + self.temporal_embedding(x_mark)
162 |         return self.dropout(x)
163 | 
164 | 
165 | class PatchEmbedding(nn.Module):
166 |     def __init__(self, d_model, patch_len, stride, padding, dropout):
167 |         super(PatchEmbedding, self).__init__()
168 |         # Patching
169 |         self.patch_len = patch_len
170 |         self.stride = stride
171 |         self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
172 | 
173 |         # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
174 |         self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
175 | 
176 |         # Positional embedding
177 |         self.position_embedding = PositionalEmbedding(d_model)
178 | 
179 |         # Residual dropout
180 |         self.dropout = nn.Dropout(dropout)
181 | 
182 |     def forward(self, x):
183 |         # do patching
184 |         n_vars = x.shape[1]
185 |         x = self.padding_patch_layer(x)
186 |         x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
187 |         x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
188 |         # Input encoding
189 |         x = self.value_embedding(x) + self.position_embedding(x)
190 |         return self.dropout(x), n_vars
191 | 


--------------------------------------------------------------------------------
/layers/FourierCorrelation.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | # author=maziqing
  3 | # email=maziqing.mzq@alibaba-inc.com
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import torch.nn as nn
  8 | 
  9 | 
 10 | def get_frequency_modes(seq_len, modes=64, mode_select_method='random'):
 11 |     """
 12 |     get modes on frequency domain:
 13 |     'random' means sampling randomly;
 14 |     'else' means sampling the lowest modes;
 15 |     """
 16 |     modes = min(modes, seq_len // 2)
 17 |     if mode_select_method == 'random':
 18 |         index = list(range(0, seq_len // 2))
 19 |         np.random.shuffle(index)
 20 |         index = index[:modes]
 21 |     else:
 22 |         index = list(range(0, modes))
 23 |     index.sort()
 24 |     return index
 25 | 
 26 | 
 27 | # ########## fourier layer #############
 28 | class FourierBlock(nn.Module):
 29 |     def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random'):
 30 |         super(FourierBlock, self).__init__()
 31 |         print('fourier enhanced block used!')
 32 |         """
 33 |         1D Fourier block. It performs representation learning on frequency domain, 
 34 |         it does FFT, linear transform, and Inverse FFT.    
 35 |         """
 36 |         # get modes on frequency domain
 37 |         self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method)
 38 |         print('modes={}, index={}'.format(modes, self.index))
 39 | 
 40 |         self.scale = (1 / (in_channels * out_channels))
 41 |         self.weights1 = nn.Parameter(
 42 |             self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.float))
 43 |         self.weights2 = nn.Parameter(
 44 |             self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.float))
 45 | 
 46 |     # Complex multiplication
 47 |     def compl_mul1d(self, order, x, weights):
 48 |         x_flag = True
 49 |         w_flag = True
 50 |         if not torch.is_complex(x):
 51 |             x_flag = False
 52 |             x = torch.complex(x, torch.zeros_like(x).to(x.device))
 53 |         if not torch.is_complex(weights):
 54 |             w_flag = False
 55 |             weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
 56 |         if x_flag or w_flag:
 57 |             return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
 58 |                                  torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
 59 |         else:
 60 |             return torch.einsum(order, x.real, weights.real)
 61 | 
 62 |     def forward(self, q, k, v, mask):
 63 |         # size = [B, L, H, E]
 64 |         B, L, H, E = q.shape
 65 |         x = q.permute(0, 2, 3, 1)
 66 |         # Compute Fourier coefficients
 67 |         x_ft = torch.fft.rfft(x, dim=-1)
 68 |         # Perform Fourier neural operations
 69 |         out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat)
 70 |         for wi, i in enumerate(self.index):
 71 |             if i >= x_ft.shape[3] or wi >= out_ft.shape[3]:
 72 |                 continue
 73 |             out_ft[:, :, :, wi] = self.compl_mul1d("bhi,hio->bho", x_ft[:, :, :, i],
 74 |                                                    torch.complex(self.weights1, self.weights2)[:, :, :, wi])
 75 |         # Return to time domain
 76 |         x = torch.fft.irfft(out_ft, n=x.size(-1))
 77 |         return (x, None)
 78 | 
 79 | 
 80 | # ########## Fourier Cross Former ####################
 81 | class FourierCrossAttention(nn.Module):
 82 |     def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random',
 83 |                  activation='tanh', policy=0, num_heads=8):
 84 |         super(FourierCrossAttention, self).__init__()
 85 |         print(' fourier enhanced cross attention used!')
 86 |         """
 87 |         1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT.    
 88 |         """
 89 |         self.activation = activation
 90 |         self.in_channels = in_channels
 91 |         self.out_channels = out_channels
 92 |         # get modes for queries and keys (& values) on frequency domain
 93 |         self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method)
 94 |         self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method)
 95 | 
 96 |         print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q))
 97 |         print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv))
 98 | 
 99 |         self.scale = (1 / (in_channels * out_channels))
100 |         self.weights1 = nn.Parameter(
101 |             self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float))
102 |         self.weights2 = nn.Parameter(
103 |             self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float))
104 | 
105 |     # Complex multiplication
106 |     def compl_mul1d(self, order, x, weights):
107 |         x_flag = True
108 |         w_flag = True
109 |         if not torch.is_complex(x):
110 |             x_flag = False
111 |             x = torch.complex(x, torch.zeros_like(x).to(x.device))
112 |         if not torch.is_complex(weights):
113 |             w_flag = False
114 |             weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device))
115 |         if x_flag or w_flag:
116 |             return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag),
117 |                                  torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real))
118 |         else:
119 |             return torch.einsum(order, x.real, weights.real)
120 | 
121 |     def forward(self, q, k, v, mask):
122 |         # size = [B, L, H, E]
123 |         B, L, H, E = q.shape
124 |         xq = q.permute(0, 2, 3, 1)  # size = [B, H, E, L]
125 |         xk = k.permute(0, 2, 3, 1)
126 |         xv = v.permute(0, 2, 3, 1)
127 | 
128 |         # Compute Fourier coefficients
129 |         xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat)
130 |         xq_ft = torch.fft.rfft(xq, dim=-1)
131 |         for i, j in enumerate(self.index_q):
132 |             if j >= xq_ft.shape[3]:
133 |                 continue
134 |             xq_ft_[:, :, :, i] = xq_ft[:, :, :, j]
135 |         xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat)
136 |         xk_ft = torch.fft.rfft(xk, dim=-1)
137 |         for i, j in enumerate(self.index_kv):
138 |             if j >= xk_ft.shape[3]:
139 |                 continue
140 |             xk_ft_[:, :, :, i] = xk_ft[:, :, :, j]
141 | 
142 |         # perform attention mechanism on frequency domain
143 |         xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_))
144 |         if self.activation == 'tanh':
145 |             xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh())
146 |         elif self.activation == 'softmax':
147 |             xqk_ft = torch.softmax(abs(xqk_ft), dim=-1)
148 |             xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft))
149 |         else:
150 |             raise Exception('{} actiation function is not implemented'.format(self.activation))
151 |         xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_)
152 |         xqkvw = self.compl_mul1d("bhex,heox->bhox", xqkv_ft, torch.complex(self.weights1, self.weights2))
153 |         out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat)
154 |         for i, j in enumerate(self.index_q):
155 |             if i >= xqkvw.shape[3] or j >= out_ft.shape[3]:
156 |                 continue
157 |             out_ft[:, :, :, j] = xqkvw[:, :, :, i]
158 |         # Return to time domain
159 |         out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1))
160 |         return (out, None)
161 | 


--------------------------------------------------------------------------------
/layers/Invertible.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class RevIN(nn.Module):
 5 |     def __init__(self, num_features: int, eps=1e-5, affine=True):
 6 |         """
 7 |         :param num_features: the number of features or channels
 8 |         :param eps: a value added for numerical stability
 9 |         :param affine: if True, RevIN has learnable affine parameters
10 |         """
11 |         super(RevIN, self).__init__()
12 | 
13 |         self.num_features = num_features
14 |         self.eps = eps
15 |         self.affine = affine
16 |         
17 |         if self.affine:
18 |             self._init_params()
19 | 
20 |     def forward(self, x, mode:str):
21 |         if mode == 'norm':
22 |             self._get_statistics(x)
23 |             x = self._normalize(x)
24 |         
25 |         elif mode == 'denorm':
26 |             x = self._denormalize(x)
27 |         
28 |         else: raise NotImplementedError
29 | 
30 |         return x
31 | 
32 |     def _init_params(self):
33 |         # initialize RevIN params: (C,)
34 |         self.affine_weight = nn.Parameter(torch.ones(self.num_features))
35 |         self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
36 | 
37 |     def _get_statistics(self, x):
38 |         dim2reduce = tuple(range(1, x.ndim-1))
39 |         self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
40 |         self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
41 | 
42 |     def _normalize(self, x):
43 |         x = x - self.mean
44 |         x = x / self.stdev
45 |         if self.affine:
46 |             x = x * self.affine_weight
47 |             x = x + self.affine_bias
48 | 
49 |         return x
50 | 
51 |     def _denormalize(self, x):
52 |         if self.affine:
53 |             x = x - self.affine_bias
54 |             x = x / (self.affine_weight + self.eps*self.eps)
55 |         x = x * self.stdev
56 |         x = x + self.mean
57 |         
58 |         return x


--------------------------------------------------------------------------------
/layers/LiftingScheme.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class Splitting(nn.Module):
  6 |     def __init__(self, channel_first):
  7 |         super(Splitting, self).__init__()
  8 |         # Deciding the stride base on the direction
  9 |         self.channel_first = channel_first
 10 |         if(channel_first):
 11 |             self.conv_even = lambda x: x[:, :, ::2]
 12 |             self.conv_odd = lambda x: x[:, :, 1::2]
 13 |         else:
 14 |             self.conv_even = lambda x: x[:, ::2, :]
 15 |             self.conv_odd = lambda x: x[:, 1::2, :]
 16 | 
 17 |     def forward(self, x):
 18 |         '''Returns the odd and even part'''
 19 |         return (self.conv_even(x), self.conv_odd(x))
 20 | 
 21 | class LiftingScheme(nn.Module):
 22 |     def __init__(self, in_channels, input_size, modified=True, splitting=True, k_size=4, simple_lifting=True):
 23 |         super(LiftingScheme, self).__init__()
 24 |         self.modified = modified
 25 |         kernel_size = k_size
 26 |         pad = (k_size // 2, k_size - 1 - k_size // 2)
 27 | 
 28 |         self.splitting = splitting
 29 |         self.split = Splitting(channel_first=True)
 30 | 
 31 |         # Dynamic build sequential network
 32 |         modules_P = []
 33 |         modules_U = []
 34 |         prev_size = 1
 35 | 
 36 |         # HARD CODED Architecture
 37 |         if simple_lifting:            
 38 |             modules_P += [
 39 |                 nn.ReflectionPad1d(pad),
 40 |                 nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1, groups=in_channels),
 41 |                 nn.GELU(),
 42 |                 nn.LayerNorm([in_channels, input_size // 2])
 43 |             ]
 44 |             modules_U += [
 45 |                 nn.ReflectionPad1d(pad),
 46 |                 nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1, groups=in_channels),
 47 |                 nn.GELU(),
 48 |                 nn.LayerNorm([in_channels, input_size // 2])
 49 |             ]
 50 |         else:
 51 |             size_hidden = 2
 52 |             
 53 |             modules_P += [
 54 |                 nn.ReflectionPad1d(pad),
 55 |                 nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1, groups=in_channels),
 56 |                 nn.Tanh()
 57 |             ]
 58 |             modules_U += [
 59 |                 nn.ReflectionPad1d(pad),
 60 |                 nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1, groups=in_channels),
 61 |                 nn.Tanh()
 62 |             ]
 63 |             prev_size = size_hidden
 64 | 
 65 |             # Final dense
 66 |             modules_P += [
 67 |                 nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1, groups=in_channels),
 68 |                 nn.Tanh()
 69 |             ]
 70 |             modules_U += [
 71 |                 nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1, groups=in_channels),
 72 |                 nn.Tanh()
 73 |             ]
 74 | 
 75 |         self.P = nn.Sequential(*modules_P)
 76 |         self.U = nn.Sequential(*modules_U)
 77 | 
 78 |     def forward(self, x):
 79 |         if self.splitting:
 80 |             (x_even, x_odd) = self.split(x)
 81 |         else:
 82 |             (x_even, x_odd) = x
 83 | 
 84 |         if self.modified:
 85 |             c = x_even + self.U(x_odd)
 86 |             d = x_odd - self.P(c)
 87 |             return (c, d)
 88 |         else:
 89 |             d = x_odd - self.P(x_even)
 90 |             c = x_even + self.U(d)
 91 |             return (c, d)
 92 |         
 93 |         
 94 | class InverseLiftingScheme(nn.Module):
 95 |     def __init__(self, in_channels, input_size, kernel_size=4, simple_lifting=False):
 96 |         super(InverseLiftingScheme, self).__init__()
 97 |         self.wavelet = LiftingScheme(in_channels, k_size=kernel_size, simple_lifting=simple_lifting, input_size=input_size * 2)
 98 | 
 99 |     def forward(self, c, d):
100 |         if self.wavelet.modified:
101 |             x_even = c - self.wavelet.U(d)
102 |             x_odd = d + self.wavelet.P(x_even)
103 |         else:
104 |             x_even = c - self.wavelet.U(d)
105 |             x_odd = d + self.wavelet.P(x_even)
106 | 
107 |         # Merge the even and odd components to reconstruct the original signal
108 |         B, C, L = c.size()  # or c.shape
109 |         x = torch.zeros((B, C, 2 * L), dtype=c.dtype, device=c.device)
110 |         x[..., ::2] = x_even
111 |         x[..., 1::2] = x_odd
112 | 
113 |         return x


--------------------------------------------------------------------------------
/layers/LiftingSchemeLinear.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | class Splitting(nn.Module):
  6 |     def __init__(self, channel_first):
  7 |         super(Splitting, self).__init__()
  8 |         # Deciding the stride base on the direction
  9 |         self.channel_first = channel_first
 10 |         if(channel_first):
 11 |             self.conv_even = lambda x: x[:, :, ::2]
 12 |             self.conv_odd = lambda x: x[:, :, 1::2]
 13 |         else:
 14 |             self.conv_even = lambda x: x[:, ::2, :]
 15 |             self.conv_odd = lambda x: x[:, 1::2, :]
 16 | 
 17 |     def forward(self, x):
 18 |         '''Returns the odd and even part'''
 19 |         return (self.conv_even(x), self.conv_odd(x))
 20 | 
 21 | class LiftingScheme(nn.Module):
 22 |     def __init__(self, in_channels, input_size, modified=True, splitting=True, k_size=4, simple_lifting=False):
 23 |         super(LiftingScheme, self).__init__()
 24 |         self.modified = modified
 25 |         kernel_size = k_size
 26 |         pad = (k_size // 2, k_size - 1 - k_size // 2)
 27 | 
 28 |         self.splitting = splitting
 29 |         self.split = Splitting(channel_first=True)
 30 | 
 31 |         # Dynamic build sequential network
 32 |         modules_P = []
 33 |         modules_U = []
 34 |         prev_size = 1
 35 | 
 36 |         # HARD CODED Architecture
 37 |         if simple_lifting:            
 38 |             modules_P += [
 39 |                 nn.ReflectionPad1d(pad),
 40 |                 nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1),
 41 |                 nn.GELU(),
 42 |                 nn.LayerNorm([in_channels, input_size // 2])
 43 |             ]
 44 |             modules_U += [
 45 |                 nn.ReflectionPad1d(pad),
 46 |                 nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1),
 47 |                 nn.GELU(),
 48 |                 nn.LayerNorm([in_channels, input_size // 2])
 49 |             ]
 50 |         else:
 51 |             size_hidden = 2
 52 |             
 53 |             modules_P += [
 54 |                 nn.ReflectionPad1d(pad),
 55 |                 nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1),
 56 |                 nn.ReLU()
 57 |             ]
 58 |             modules_U += [
 59 |                 nn.ReflectionPad1d(pad),
 60 |                 nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1),
 61 |                 nn.ReLU()
 62 |             ]
 63 |             prev_size = size_hidden
 64 | 
 65 |             # Final dense
 66 |             modules_P += [
 67 |                 nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1),
 68 |                 nn.Tanh()
 69 |             ]
 70 |             modules_U += [
 71 |                 nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1),
 72 |                 nn.Tanh()
 73 |             ]
 74 | 
 75 |         self.P = nn.Sequential(*modules_P)
 76 |         self.U = nn.Sequential(*modules_U)
 77 | 
 78 |     def forward(self, x):
 79 |         if self.splitting:
 80 |             (x_even, x_odd) = self.split(x)
 81 |         else:
 82 |             (x_even, x_odd) = x
 83 | 
 84 |         if self.modified:
 85 |             c = x_even + self.U(x_odd)
 86 |             d = x_odd - self.P(c)
 87 |             return (c, d)
 88 |         else:
 89 |             d = x_odd - self.P(x_even)
 90 |             c = x_even + self.U(d)
 91 |             return (c, d)
 92 |         
 93 |         
 94 | class InverseLiftingScheme(nn.Module):
 95 |     def __init__(self, in_channels, input_size, kernel_size=4, simple_lifting=False):
 96 |         super(InverseLiftingScheme, self).__init__()
 97 |         self.wavelet = LiftingScheme(in_channels, k_size=kernel_size, simple_lifting=simple_lifting, input_size=input_size * 2)
 98 | 
 99 |     def forward(self, c, d):
100 |         if self.wavelet.modified:
101 |             x_even = c - self.wavelet.U(d)
102 |             x_odd = d + self.wavelet.P(x_even)
103 |         else:
104 |             x_even = c - self.wavelet.U(d)
105 |             x_odd = d + self.wavelet.P(x_even)
106 | 
107 |         # Merge the even and odd components to reconstruct the original signal
108 |         B, C, L = c.size()  # or c.shape
109 |         x = torch.zeros((B, C, 2 * L), dtype=c.dtype, device=c.device)
110 |         x[..., ::2] = x_even
111 |         x[..., 1::2] = x_odd
112 | 
113 |         return x


--------------------------------------------------------------------------------
/layers/Pyraformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn.modules.linear import Linear
  5 | from layers.SelfAttention_Family import AttentionLayer, FullAttention
  6 | from layers.Embed import DataEmbedding
  7 | import math
  8 | 
  9 | 
 10 | def get_mask(input_size, window_size, inner_size):
 11 |     """Get the attention mask of PAM-Naive"""
 12 |     # Get the size of all layers
 13 |     all_size = []
 14 |     all_size.append(input_size)
 15 |     for i in range(len(window_size)):
 16 |         layer_size = math.floor(all_size[i] / window_size[i])
 17 |         all_size.append(layer_size)
 18 | 
 19 |     seq_length = sum(all_size)
 20 |     mask = torch.zeros(seq_length, seq_length)
 21 | 
 22 |     # get intra-scale mask
 23 |     inner_window = inner_size // 2
 24 |     for layer_idx in range(len(all_size)):
 25 |         start = sum(all_size[:layer_idx])
 26 |         for i in range(start, start + all_size[layer_idx]):
 27 |             left_side = max(i - inner_window, start)
 28 |             right_side = min(i + inner_window + 1, start + all_size[layer_idx])
 29 |             mask[i, left_side:right_side] = 1
 30 | 
 31 |     # get inter-scale mask
 32 |     for layer_idx in range(1, len(all_size)):
 33 |         start = sum(all_size[:layer_idx])
 34 |         for i in range(start, start + all_size[layer_idx]):
 35 |             left_side = (start - all_size[layer_idx - 1]) + \
 36 |                 (i - start) * window_size[layer_idx - 1]
 37 |             if i == (start + all_size[layer_idx] - 1):
 38 |                 right_side = start
 39 |             else:
 40 |                 right_side = (
 41 |                     start - all_size[layer_idx - 1]) + (i - start + 1) * window_size[layer_idx - 1]
 42 |             mask[i, left_side:right_side] = 1
 43 |             mask[left_side:right_side, i] = 1
 44 | 
 45 |     mask = (1 - mask).bool()
 46 | 
 47 |     return mask, all_size
 48 | 
 49 | 
 50 | def refer_points(all_sizes, window_size):
 51 |     """Gather features from PAM's pyramid sequences"""
 52 |     input_size = all_sizes[0]
 53 |     indexes = torch.zeros(input_size, len(all_sizes))
 54 | 
 55 |     for i in range(input_size):
 56 |         indexes[i][0] = i
 57 |         former_index = i
 58 |         for j in range(1, len(all_sizes)):
 59 |             start = sum(all_sizes[:j])
 60 |             inner_layer_idx = former_index - (start - all_sizes[j - 1])
 61 |             former_index = start + \
 62 |                 min(inner_layer_idx // window_size[j - 1], all_sizes[j] - 1)
 63 |             indexes[i][j] = former_index
 64 | 
 65 |     indexes = indexes.unsqueeze(0).unsqueeze(3)
 66 | 
 67 |     return indexes.long()
 68 | 
 69 | 
 70 | class RegularMask():
 71 |     def __init__(self, mask):
 72 |         self._mask = mask.unsqueeze(1)
 73 | 
 74 |     @property
 75 |     def mask(self):
 76 |         return self._mask
 77 | 
 78 | 
 79 | class EncoderLayer(nn.Module):
 80 |     """ Compose with two layers """
 81 | 
 82 |     def __init__(self, d_model, d_inner, n_head, dropout=0.1, normalize_before=True):
 83 |         super(EncoderLayer, self).__init__()
 84 | 
 85 |         self.slf_attn = AttentionLayer(
 86 |             FullAttention(mask_flag=True, factor=0,
 87 |                           attention_dropout=dropout, output_attention=False),
 88 |             d_model, n_head)
 89 |         self.pos_ffn = PositionwiseFeedForward(
 90 |             d_model, d_inner, dropout=dropout, normalize_before=normalize_before)
 91 | 
 92 |     def forward(self, enc_input, slf_attn_mask=None):
 93 |         attn_mask = RegularMask(slf_attn_mask)
 94 |         enc_output, _ = self.slf_attn(
 95 |             enc_input, enc_input, enc_input, attn_mask=attn_mask)
 96 |         enc_output = self.pos_ffn(enc_output)
 97 |         return enc_output
 98 | 
 99 | 
100 | class Encoder(nn.Module):
101 |     """ A encoder model with self attention mechanism. """
102 | 
103 |     def __init__(self, configs, window_size, inner_size):
104 |         super().__init__()
105 | 
106 |         d_bottleneck = configs.d_model//4
107 | 
108 |         self.mask, self.all_size = get_mask(
109 |             configs.seq_len, window_size, inner_size)
110 |         self.indexes = refer_points(self.all_size, window_size)
111 |         self.layers = nn.ModuleList([
112 |             EncoderLayer(configs.d_model, configs.d_ff, configs.n_heads, dropout=configs.dropout,
113 |                          normalize_before=False) for _ in range(configs.e_layers)
114 |         ])  # naive pyramid attention
115 | 
116 |         self.enc_embedding = DataEmbedding(
117 |             configs.enc_in, configs.d_model, configs.dropout)
118 |         self.conv_layers = Bottleneck_Construct(
119 |             configs.d_model, window_size, d_bottleneck)
120 | 
121 |     def forward(self, x_enc, x_mark_enc):
122 |         seq_enc = self.enc_embedding(x_enc, x_mark_enc)
123 | 
124 |         mask = self.mask.repeat(len(seq_enc), 1, 1).to(x_enc.device)
125 |         seq_enc = self.conv_layers(seq_enc)
126 | 
127 |         for i in range(len(self.layers)):
128 |             seq_enc = self.layers[i](seq_enc, mask)
129 | 
130 |         indexes = self.indexes.repeat(seq_enc.size(
131 |             0), 1, 1, seq_enc.size(2)).to(seq_enc.device)
132 |         indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2))
133 |         all_enc = torch.gather(seq_enc, 1, indexes)
134 |         seq_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1)
135 | 
136 |         return seq_enc
137 | 
138 | 
139 | class ConvLayer(nn.Module):
140 |     def __init__(self, c_in, window_size):
141 |         super(ConvLayer, self).__init__()
142 |         self.downConv = nn.Conv1d(in_channels=c_in,
143 |                                   out_channels=c_in,
144 |                                   kernel_size=window_size,
145 |                                   stride=window_size)
146 |         self.norm = nn.BatchNorm1d(c_in)
147 |         self.activation = nn.ELU()
148 | 
149 |     def forward(self, x):
150 |         x = self.downConv(x)
151 |         x = self.norm(x)
152 |         x = self.activation(x)
153 |         return x
154 | 
155 | 
156 | class Bottleneck_Construct(nn.Module):
157 |     """Bottleneck convolution CSCM"""
158 | 
159 |     def __init__(self, d_model, window_size, d_inner):
160 |         super(Bottleneck_Construct, self).__init__()
161 |         if not isinstance(window_size, list):
162 |             self.conv_layers = nn.ModuleList([
163 |                 ConvLayer(d_inner, window_size),
164 |                 ConvLayer(d_inner, window_size),
165 |                 ConvLayer(d_inner, window_size)
166 |             ])
167 |         else:
168 |             self.conv_layers = []
169 |             for i in range(len(window_size)):
170 |                 self.conv_layers.append(ConvLayer(d_inner, window_size[i]))
171 |             self.conv_layers = nn.ModuleList(self.conv_layers)
172 |         self.up = Linear(d_inner, d_model)
173 |         self.down = Linear(d_model, d_inner)
174 |         self.norm = nn.LayerNorm(d_model)
175 | 
176 |     def forward(self, enc_input):
177 |         temp_input = self.down(enc_input).permute(0, 2, 1)
178 |         all_inputs = []
179 |         for i in range(len(self.conv_layers)):
180 |             temp_input = self.conv_layers[i](temp_input)
181 |             all_inputs.append(temp_input)
182 | 
183 |         all_inputs = torch.cat(all_inputs, dim=2).transpose(1, 2)
184 |         all_inputs = self.up(all_inputs)
185 |         all_inputs = torch.cat([enc_input, all_inputs], dim=1)
186 | 
187 |         all_inputs = self.norm(all_inputs)
188 |         return all_inputs
189 | 
190 | 
191 | class PositionwiseFeedForward(nn.Module):
192 |     """ Two-layer position-wise feed-forward neural network. """
193 | 
194 |     def __init__(self, d_in, d_hid, dropout=0.1, normalize_before=True):
195 |         super().__init__()
196 | 
197 |         self.normalize_before = normalize_before
198 | 
199 |         self.w_1 = nn.Linear(d_in, d_hid)
200 |         self.w_2 = nn.Linear(d_hid, d_in)
201 | 
202 |         self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
203 |         self.dropout = nn.Dropout(dropout)
204 | 
205 |     def forward(self, x):
206 |         residual = x
207 |         if self.normalize_before:
208 |             x = self.layer_norm(x)
209 | 
210 |         x = F.gelu(self.w_1(x))
211 |         x = self.dropout(x)
212 |         x = self.w_2(x)
213 |         x = self.dropout(x)
214 |         x = x + residual
215 | 
216 |         if not self.normalize_before:
217 |             x = self.layer_norm(x)
218 |         return x
219 | 


--------------------------------------------------------------------------------
/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class ConvLayer(nn.Module):
  7 |     def __init__(self, c_in):
  8 |         super(ConvLayer, self).__init__()
  9 |         self.downConv = nn.Conv1d(in_channels=c_in,
 10 |                                   out_channels=c_in,
 11 |                                   kernel_size=3,
 12 |                                   padding=2,
 13 |                                   padding_mode='circular')
 14 |         self.norm = nn.BatchNorm1d(c_in)
 15 |         self.activation = nn.ELU()
 16 |         self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
 17 | 
 18 |     def forward(self, x):
 19 |         x = self.downConv(x.permute(0, 2, 1))
 20 |         x = self.norm(x)
 21 |         x = self.activation(x)
 22 |         x = self.maxPool(x)
 23 |         x = x.transpose(1, 2)
 24 |         return x
 25 | 
 26 | 
 27 | class EncoderLayer(nn.Module):
 28 |     def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
 29 |         super(EncoderLayer, self).__init__()
 30 |         d_ff = d_ff or 4 * d_model
 31 |         self.attention = attention
 32 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 33 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 34 |         self.norm1 = nn.LayerNorm(d_model)
 35 |         self.norm2 = nn.LayerNorm(d_model)
 36 |         self.dropout = nn.Dropout(dropout)
 37 |         self.activation = F.relu if activation == "relu" else F.gelu
 38 | 
 39 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 40 |         new_x, attn = self.attention(
 41 |             x, x, x,
 42 |             attn_mask=attn_mask,
 43 |             tau=tau, delta=delta
 44 |         )
 45 |         x = x + self.dropout(new_x)
 46 | 
 47 |         y = x = self.norm1(x)
 48 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
 49 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
 50 | 
 51 |         return self.norm2(x + y), attn
 52 | 
 53 | 
 54 | class Encoder(nn.Module):
 55 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
 56 |         super(Encoder, self).__init__()
 57 |         self.attn_layers = nn.ModuleList(attn_layers)
 58 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
 59 |         self.norm = norm_layer
 60 | 
 61 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 62 |         # x [B, L, D]
 63 |         attns = []
 64 |         if self.conv_layers is not None:
 65 |             for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
 66 |                 delta = delta if i == 0 else None
 67 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 68 |                 x = conv_layer(x)
 69 |                 attns.append(attn)
 70 |             x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
 71 |             attns.append(attn)
 72 |         else:
 73 |             for attn_layer in self.attn_layers:
 74 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 75 |                 attns.append(attn)
 76 | 
 77 |         if self.norm is not None:
 78 |             x = self.norm(x)
 79 | 
 80 |         return x, attns
 81 | 
 82 | 
 83 | class DecoderLayer(nn.Module):
 84 |     def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
 85 |                  dropout=0.1, activation="relu"):
 86 |         super(DecoderLayer, self).__init__()
 87 |         d_ff = d_ff or 4 * d_model
 88 |         self.self_attention = self_attention
 89 |         self.cross_attention = cross_attention
 90 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 91 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 92 |         self.norm1 = nn.LayerNorm(d_model)
 93 |         self.norm2 = nn.LayerNorm(d_model)
 94 |         self.norm3 = nn.LayerNorm(d_model)
 95 |         self.dropout = nn.Dropout(dropout)
 96 |         self.activation = F.relu if activation == "relu" else F.gelu
 97 | 
 98 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
 99 |         x = x + self.dropout(self.self_attention(
100 |             x, x, x,
101 |             attn_mask=x_mask,
102 |             tau=tau, delta=None
103 |         )[0])
104 |         x = self.norm1(x)
105 | 
106 |         x = x + self.dropout(self.cross_attention(
107 |             x, cross, cross,
108 |             attn_mask=cross_mask,
109 |             tau=tau, delta=delta
110 |         )[0])
111 | 
112 |         y = x = self.norm2(x)
113 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
114 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
115 | 
116 |         return self.norm3(x + y)
117 | 
118 | 
119 | class Decoder(nn.Module):
120 |     def __init__(self, layers, norm_layer=None, projection=None):
121 |         super(Decoder, self).__init__()
122 |         self.layers = nn.ModuleList(layers)
123 |         self.norm = norm_layer
124 |         self.projection = projection
125 | 
126 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
127 |         for layer in self.layers:
128 |             x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
129 | 
130 |         if self.norm is not None:
131 |             x = self.norm(x)
132 | 
133 |         if self.projection is not None:
134 |             x = self.projection(x)
135 |         return x
136 | 


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/layers/__init__.py


--------------------------------------------------------------------------------
/models/Autoformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos
  5 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer
  6 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp
  7 | import math
  8 | import numpy as np
  9 | 
 10 | 
 11 | class Model(nn.Module):
 12 |     """
 13 |     Autoformer is the first method to achieve the series-wise connection,
 14 |     with inherent O(LlogL) complexity
 15 |     Paper link: https://openreview.net/pdf?id=I55UqU-M11y
 16 |     """
 17 | 
 18 |     def __init__(self, configs):
 19 |         super(Model, self).__init__()
 20 |         self.task_name = configs.task_name
 21 |         self.seq_len = configs.seq_len
 22 |         self.label_len = configs.label_len
 23 |         self.pred_len = configs.pred_len
 24 |         self.output_attention = configs.output_attention
 25 | 
 26 |         # Decomp
 27 |         kernel_size = configs.moving_avg
 28 |         self.decomp = series_decomp(kernel_size)
 29 | 
 30 |         # Embedding
 31 |         self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq,
 32 |                                                   configs.dropout)
 33 |         # Encoder
 34 |         self.encoder = Encoder(
 35 |             [
 36 |                 EncoderLayer(
 37 |                     AutoCorrelationLayer(
 38 |                         AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
 39 |                                         output_attention=configs.output_attention),
 40 |                         configs.d_model, configs.n_heads),
 41 |                     configs.d_model,
 42 |                     configs.d_ff,
 43 |                     moving_avg=configs.moving_avg,
 44 |                     dropout=configs.dropout,
 45 |                     activation=configs.activation
 46 |                 ) for l in range(configs.e_layers)
 47 |             ],
 48 |             norm_layer=my_Layernorm(configs.d_model)
 49 |         )
 50 |         # Decoder
 51 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 52 |             self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq,
 53 |                                                       configs.dropout)
 54 |             self.decoder = Decoder(
 55 |                 [
 56 |                     DecoderLayer(
 57 |                         AutoCorrelationLayer(
 58 |                             AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout,
 59 |                                             output_attention=False),
 60 |                             configs.d_model, configs.n_heads),
 61 |                         AutoCorrelationLayer(
 62 |                             AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout,
 63 |                                             output_attention=False),
 64 |                             configs.d_model, configs.n_heads),
 65 |                         configs.d_model,
 66 |                         configs.c_out,
 67 |                         configs.d_ff,
 68 |                         moving_avg=configs.moving_avg,
 69 |                         dropout=configs.dropout,
 70 |                         activation=configs.activation,
 71 |                     )
 72 |                     for l in range(configs.d_layers)
 73 |                 ],
 74 |                 norm_layer=my_Layernorm(configs.d_model),
 75 |                 projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
 76 |             )
 77 |         if self.task_name == 'imputation':
 78 |             self.projection = nn.Linear(
 79 |                 configs.d_model, configs.c_out, bias=True)
 80 |         if self.task_name == 'anomaly_detection':
 81 |             self.projection = nn.Linear(
 82 |                 configs.d_model, configs.c_out, bias=True)
 83 |         if self.task_name == 'classification':
 84 |             self.act = F.gelu
 85 |             self.dropout = nn.Dropout(configs.dropout)
 86 |             self.projection = nn.Linear(
 87 |                 configs.d_model * configs.seq_len, configs.num_class)
 88 | 
 89 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 90 |         # decomp init
 91 |         mean = torch.mean(x_enc, dim=1).unsqueeze(
 92 |             1).repeat(1, self.pred_len, 1)
 93 |         zeros = torch.zeros([x_dec.shape[0], self.pred_len,
 94 |                              x_dec.shape[2]], device=x_enc.device)
 95 |         seasonal_init, trend_init = self.decomp(x_enc)
 96 |         # decoder input
 97 |         trend_init = torch.cat(
 98 |             [trend_init[:, -self.label_len:, :], mean], dim=1)
 99 |         seasonal_init = torch.cat(
100 |             [seasonal_init[:, -self.label_len:, :], zeros], dim=1)
101 |         # enc
102 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
103 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
104 |         # dec
105 |         dec_out = self.dec_embedding(seasonal_init, x_mark_dec)
106 |         seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None,
107 |                                                  trend=trend_init)
108 |         # final
109 |         dec_out = trend_part + seasonal_part
110 |         return dec_out
111 | 
112 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
113 |         # enc
114 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
115 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
116 |         # final
117 |         dec_out = self.projection(enc_out)
118 |         return dec_out
119 | 
120 |     def anomaly_detection(self, x_enc):
121 |         # enc
122 |         enc_out = self.enc_embedding(x_enc, None)
123 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
124 |         # final
125 |         dec_out = self.projection(enc_out)
126 |         return dec_out
127 | 
128 |     def classification(self, x_enc, x_mark_enc):
129 |         # enc
130 |         enc_out = self.enc_embedding(x_enc, None)
131 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
132 | 
133 |         # Output
134 |         # the output transformer encoder/decoder embeddings don't include non-linearity
135 |         output = self.act(enc_out)
136 |         output = self.dropout(output)
137 |         # zero-out padding embeddings
138 |         output = output * x_mark_enc.unsqueeze(-1)
139 |         # (batch_size, seq_length * d_model)
140 |         output = output.reshape(output.shape[0], -1)
141 |         output = self.projection(output)  # (batch_size, num_classes)
142 |         return output
143 | 
144 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
145 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
146 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
147 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
148 |         if self.task_name == 'imputation':
149 |             dec_out = self.imputation(
150 |                 x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
151 |             return dec_out  # [B, L, D]
152 |         if self.task_name == 'anomaly_detection':
153 |             dec_out = self.anomaly_detection(x_enc)
154 |             return dec_out  # [B, L, D]
155 |         if self.task_name == 'classification':
156 |             dec_out = self.classification(x_enc, x_mark_enc)
157 |             return dec_out  # [B, N]
158 |         return None
159 | 


--------------------------------------------------------------------------------
/models/Crossformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from einops import rearrange, repeat
  5 | from layers.Crossformer_EncDec import scale_block, Encoder, Decoder, DecoderLayer
  6 | from layers.Embed import PatchEmbedding
  7 | from layers.SelfAttention_Family import AttentionLayer, FullAttention, TwoStageAttentionLayer
  8 | from models.PatchTST import FlattenHead
  9 | 
 10 | 
 11 | from math import ceil
 12 | 
 13 | 
 14 | class Model(nn.Module):
 15 |     """
 16 |     Paper link: https://openreview.net/pdf?id=vSVLM2j9eie
 17 |     """
 18 |     def __init__(self, configs):
 19 |         super(Model, self).__init__()
 20 |         self.enc_in = configs.enc_in
 21 |         self.seq_len = configs.seq_len
 22 |         self.pred_len = configs.pred_len
 23 |         self.seg_len = 12
 24 |         self.win_size = 2
 25 |         self.task_name = configs.task_name
 26 | 
 27 |         # The padding operation to handle invisible sgemnet length
 28 |         self.pad_in_len = ceil(1.0 * configs.seq_len / self.seg_len) * self.seg_len
 29 |         self.pad_out_len = ceil(1.0 * configs.pred_len / self.seg_len) * self.seg_len
 30 |         self.in_seg_num = self.pad_in_len // self.seg_len
 31 |         self.out_seg_num = ceil(self.in_seg_num / (self.win_size ** (configs.e_layers - 1)))
 32 |         self.head_nf = configs.d_model * self.out_seg_num
 33 | 
 34 |         # Embedding
 35 |         self.enc_value_embedding = PatchEmbedding(configs.d_model, self.seg_len, self.seg_len, self.pad_in_len - configs.seq_len, 0)
 36 |         self.enc_pos_embedding = nn.Parameter(
 37 |             torch.randn(1, configs.enc_in, self.in_seg_num, configs.d_model))
 38 |         self.pre_norm = nn.LayerNorm(configs.d_model)
 39 | 
 40 |         # Encoder
 41 |         self.encoder = Encoder(
 42 |             [
 43 |                 scale_block(configs, 1 if l is 0 else self.win_size, configs.d_model, configs.n_heads, configs.d_ff,
 44 |                             1, configs.dropout,
 45 |                             self.in_seg_num if l is 0 else ceil(self.in_seg_num / self.win_size ** l), configs.factor
 46 |                             ) for l in range(configs.e_layers)
 47 |             ]
 48 |         )
 49 |         # Decoder
 50 |         self.dec_pos_embedding = nn.Parameter(
 51 |             torch.randn(1, configs.enc_in, (self.pad_out_len // self.seg_len), configs.d_model))
 52 | 
 53 |         self.decoder = Decoder(
 54 |             [
 55 |                 DecoderLayer(
 56 |                     TwoStageAttentionLayer(configs, (self.pad_out_len // self.seg_len), configs.factor, configs.d_model, configs.n_heads,
 57 |                                            configs.d_ff, configs.dropout),
 58 |                     AttentionLayer(
 59 |                         FullAttention(False, configs.factor, attention_dropout=configs.dropout,
 60 |                                       output_attention=False),
 61 |                         configs.d_model, configs.n_heads),
 62 |                     self.seg_len,
 63 |                     configs.d_model,
 64 |                     configs.d_ff,
 65 |                     dropout=configs.dropout,
 66 |                     # activation=configs.activation,
 67 |                 )
 68 |                 for l in range(configs.e_layers + 1)
 69 |             ],
 70 |         )
 71 |         if self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
 72 |             self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len,
 73 |                                     head_dropout=configs.dropout)
 74 |         elif self.task_name == 'classification':
 75 |             self.flatten = nn.Flatten(start_dim=-2)
 76 |             self.dropout = nn.Dropout(configs.dropout)
 77 |             self.projection = nn.Linear(
 78 |                 self.head_nf * configs.enc_in, configs.num_class)
 79 | 
 80 | 
 81 | 
 82 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 83 |         # embedding
 84 |         x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
 85 |         x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d = n_vars)
 86 |         x_enc += self.enc_pos_embedding
 87 |         x_enc = self.pre_norm(x_enc)
 88 |         enc_out, attns = self.encoder(x_enc)
 89 | 
 90 |         dec_in = repeat(self.dec_pos_embedding, 'b ts_d l d -> (repeat b) ts_d l d', repeat=x_enc.shape[0])
 91 |         dec_out = self.decoder(dec_in, enc_out)
 92 |         return dec_out
 93 | 
 94 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
 95 |         # embedding
 96 |         x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
 97 |         x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
 98 |         x_enc += self.enc_pos_embedding
 99 |         x_enc = self.pre_norm(x_enc)
100 |         enc_out, attns = self.encoder(x_enc)
101 | 
102 |         dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1)
103 | 
104 |         return dec_out
105 | 
106 |     def anomaly_detection(self, x_enc):
107 |         # embedding
108 |         x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
109 |         x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
110 |         x_enc += self.enc_pos_embedding
111 |         x_enc = self.pre_norm(x_enc)
112 |         enc_out, attns = self.encoder(x_enc)
113 | 
114 |         dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1)
115 |         return dec_out
116 | 
117 |     def classification(self, x_enc, x_mark_enc):
118 |         # embedding
119 |         x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1))
120 | 
121 |         x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars)
122 |         x_enc += self.enc_pos_embedding
123 |         x_enc = self.pre_norm(x_enc)
124 |         enc_out, attns = self.encoder(x_enc)
125 |         # Output from Non-stationary Transformer
126 |         output = self.flatten(enc_out[-1].permute(0, 1, 3, 2))
127 |         output = self.dropout(output)
128 |         output = output.reshape(output.shape[0], -1)
129 |         output = self.projection(output)
130 |         return output
131 | 
132 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
133 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
134 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
135 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
136 |         if self.task_name == 'imputation':
137 |             dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
138 |             return dec_out  # [B, L, D]
139 |         if self.task_name == 'anomaly_detection':
140 |             dec_out = self.anomaly_detection(x_enc)
141 |             return dec_out  # [B, L, D]
142 |         if self.task_name == 'classification':
143 |             dec_out = self.classification(x_enc, x_mark_enc)
144 |             return dec_out  # [B, N]
145 |         return None


--------------------------------------------------------------------------------
/models/DLinear.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Autoformer_EncDec import series_decomp
  5 | 
  6 | 
  7 | class Model(nn.Module):
  8 |     """
  9 |     Paper link: https://arxiv.org/pdf/2205.13504.pdf
 10 |     """
 11 | 
 12 |     def __init__(self, configs, individual=False):
 13 |         """
 14 |         individual: Bool, whether shared model among different variates.
 15 |         """
 16 |         super(Model, self).__init__()
 17 |         self.task_name = configs.task_name
 18 |         self.seq_len = configs.seq_len
 19 |         if self.task_name == "super_resolution":
 20 |             self.seq_len = configs.seq_len // configs.sr_ratio
 21 |         
 22 |         if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
 23 |             self.pred_len = configs.seq_len
 24 |         else:
 25 |             self.pred_len = configs.pred_len
 26 |         # Series decomposition block from Autoformer
 27 |         self.decompsition = series_decomp(configs.moving_avg)
 28 |         self.individual = individual
 29 |         self.channels = configs.enc_in
 30 | 
 31 |         if self.individual:
 32 |             self.Linear_Seasonal = nn.ModuleList()
 33 |             self.Linear_Trend = nn.ModuleList()
 34 | 
 35 |             for i in range(self.channels):
 36 |                 self.Linear_Seasonal.append(
 37 |                     nn.Linear(self.seq_len, self.pred_len))
 38 |                 self.Linear_Trend.append(
 39 |                     nn.Linear(self.seq_len, self.pred_len))
 40 | 
 41 |                 self.Linear_Seasonal[i].weight = nn.Parameter(
 42 |                     (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 43 |                 self.Linear_Trend[i].weight = nn.Parameter(
 44 |                     (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 45 |         else:
 46 |             self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len)
 47 |             self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len)
 48 | 
 49 |             self.Linear_Seasonal.weight = nn.Parameter(
 50 |                 (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 51 |             self.Linear_Trend.weight = nn.Parameter(
 52 |                 (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len]))
 53 | 
 54 |         if self.task_name == 'classification':
 55 |             self.act = F.gelu
 56 |             self.dropout = nn.Dropout(configs.dropout)
 57 |             self.projection = nn.Linear(
 58 |                 configs.enc_in * configs.seq_len, configs.num_class)
 59 | 
 60 |     def encoder(self, x):
 61 |         seasonal_init, trend_init = self.decompsition(x)
 62 |         seasonal_init, trend_init = seasonal_init.permute(
 63 |             0, 2, 1), trend_init.permute(0, 2, 1)
 64 |         if self.individual:
 65 |             seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len],
 66 |                                           dtype=seasonal_init.dtype).to(seasonal_init.device)
 67 |             trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len],
 68 |                                        dtype=trend_init.dtype).to(trend_init.device)
 69 |             for i in range(self.channels):
 70 |                 seasonal_output[:, i, :] = self.Linear_Seasonal[i](
 71 |                     seasonal_init[:, i, :])
 72 |                 trend_output[:, i, :] = self.Linear_Trend[i](
 73 |                     trend_init[:, i, :])
 74 |         else:
 75 |             seasonal_output = self.Linear_Seasonal(seasonal_init)
 76 |             trend_output = self.Linear_Trend(trend_init)
 77 |         x = seasonal_output + trend_output
 78 |         return x.permute(0, 2, 1)
 79 | 
 80 |     def forecast(self, x_enc):
 81 |         # Encoder
 82 |         return self.encoder(x_enc)
 83 | 
 84 |     def imputation(self, x_enc):
 85 |         # Encoder
 86 |         return self.encoder(x_enc)
 87 | 
 88 |     def anomaly_detection(self, x_enc):
 89 |         # Encoder
 90 |         return self.encoder(x_enc)
 91 | 
 92 |     def classification(self, x_enc):
 93 |         # Encoder
 94 |         enc_out = self.encoder(x_enc)
 95 |         # Output
 96 |         # (batch_size, seq_length * d_model)
 97 |         output = enc_out.reshape(enc_out.shape[0], -1)
 98 |         # (batch_size, num_classes)
 99 |         output = self.projection(output)
100 |         return output
101 | 
102 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
103 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast' or self.task_name == 'super_resolution':
104 |             dec_out = self.forecast(x_enc)
105 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
106 |         if self.task_name == 'imputation':
107 |             dec_out = self.imputation(x_enc)
108 |             return dec_out  # [B, L, D]
109 |         if self.task_name == 'anomaly_detection':
110 |             dec_out = self.anomaly_detection(x_enc)
111 |             return dec_out  # [B, L, D]
112 |         if self.task_name == 'classification':
113 |             dec_out = self.classification(x_enc)
114 |             return dec_out  # [B, N]
115 |         return None
116 | 


--------------------------------------------------------------------------------
/models/ETSformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from layers.Embed import DataEmbedding
  4 | from layers.ETSformer_EncDec import EncoderLayer, Encoder, DecoderLayer, Decoder, Transform
  5 | 
  6 | 
  7 | class Model(nn.Module):
  8 |     """
  9 |     Paper link: https://arxiv.org/abs/2202.01381
 10 |     """
 11 | 
 12 |     def __init__(self, configs):
 13 |         super(Model, self).__init__()
 14 |         self.task_name = configs.task_name
 15 |         self.seq_len = configs.seq_len
 16 |         self.label_len = configs.label_len
 17 |         if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
 18 |             self.pred_len = configs.seq_len
 19 |         else:
 20 |             self.pred_len = configs.pred_len
 21 | 
 22 |         assert configs.e_layers == configs.d_layers, "Encoder and decoder layers must be equal"
 23 | 
 24 |         # Embedding
 25 |         self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
 26 |                                            configs.dropout)
 27 | 
 28 |         # Encoder
 29 |         self.encoder = Encoder(
 30 |             [
 31 |                 EncoderLayer(
 32 |                     configs.d_model, configs.n_heads, configs.enc_in, configs.seq_len, self.pred_len, configs.top_k,
 33 |                     dim_feedforward=configs.d_ff,
 34 |                     dropout=configs.dropout,
 35 |                     activation=configs.activation,
 36 |                 ) for _ in range(configs.e_layers)
 37 |             ]
 38 |         )
 39 |         # Decoder
 40 |         self.decoder = Decoder(
 41 |             [
 42 |                 DecoderLayer(
 43 |                     configs.d_model, configs.n_heads, configs.c_out, self.pred_len,
 44 |                     dropout=configs.dropout,
 45 |                 ) for _ in range(configs.d_layers)
 46 |             ],
 47 |         )
 48 |         self.transform = Transform(sigma=0.2)
 49 | 
 50 |         if self.task_name == 'classification':
 51 |             self.act = torch.nn.functional.gelu
 52 |             self.dropout = nn.Dropout(configs.dropout)
 53 |             self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
 54 | 
 55 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 56 |         with torch.no_grad():
 57 |             if self.training:
 58 |                 x_enc = self.transform.transform(x_enc)
 59 |         res = self.enc_embedding(x_enc, x_mark_enc)
 60 |         level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
 61 | 
 62 |         growth, season = self.decoder(growths, seasons)
 63 |         preds = level[:, -1:] + growth + season
 64 |         return preds
 65 | 
 66 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
 67 |         res = self.enc_embedding(x_enc, x_mark_enc)
 68 |         level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
 69 |         growth, season = self.decoder(growths, seasons)
 70 |         preds = level[:, -1:] + growth + season
 71 |         return preds
 72 | 
 73 |     def anomaly_detection(self, x_enc):
 74 |         res = self.enc_embedding(x_enc, None)
 75 |         level, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
 76 |         growth, season = self.decoder(growths, seasons)
 77 |         preds = level[:, -1:] + growth + season
 78 |         return preds
 79 | 
 80 |     def classification(self, x_enc, x_mark_enc):
 81 |         res = self.enc_embedding(x_enc, None)
 82 |         _, growths, seasons = self.encoder(res, x_enc, attn_mask=None)
 83 | 
 84 |         growths = torch.sum(torch.stack(growths, 0), 0)[:, :self.seq_len, :]
 85 |         seasons = torch.sum(torch.stack(seasons, 0), 0)[:, :self.seq_len, :]
 86 | 
 87 |         enc_out = growths + seasons
 88 |         output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
 89 |         output = self.dropout(output)
 90 | 
 91 |         # Output
 92 |         output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
 93 |         output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
 94 |         output = self.projection(output)  # (batch_size, num_classes)
 95 |         return output
 96 | 
 97 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
 98 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 99 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
100 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
101 |         if self.task_name == 'imputation':
102 |             dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
103 |             return dec_out  # [B, L, D]
104 |         if self.task_name == 'anomaly_detection':
105 |             dec_out = self.anomaly_detection(x_enc)
106 |             return dec_out  # [B, L, D]
107 |         if self.task_name == 'classification':
108 |             dec_out = self.classification(x_enc, x_mark_enc)
109 |             return dec_out  # [B, N]
110 |         return None
111 | 


--------------------------------------------------------------------------------
/models/FreTS.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | 
  6 | class Model(nn.Module):
  7 |     """
  8 |     Paper link: https://arxiv.org/pdf/2311.06184.pdf
  9 |     """
 10 |     def __init__(self, configs):
 11 |         super(Model, self).__init__()
 12 |         self.task_name = configs.task_name
 13 |         if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
 14 |             self.pred_len = configs.seq_len
 15 |         else:
 16 |             self.pred_len = configs.pred_len
 17 |         self.embed_size = 128 #embed_size
 18 |         self.hidden_size = 256 #hidden_size
 19 |         self.pred_len = configs.pred_len
 20 |         self.feature_size = configs.enc_in #channels
 21 |         self.seq_len = configs.seq_len
 22 |         if self.task_name == 'super_resolution':
 23 |             self.seq_len = self.seq_len // configs.sr_ratio
 24 |         self.channel_independence = configs.channel_independence
 25 |         self.sparsity_threshold = 0.01
 26 |         self.scale = 0.02
 27 |         self.embeddings = nn.Parameter(torch.randn(1, self.embed_size))
 28 |         self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
 29 |         self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
 30 |         self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
 31 |         self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size))
 32 |         self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
 33 |         self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size))
 34 |         self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
 35 |         self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size))
 36 | 
 37 |         self.fc = nn.Sequential(
 38 |             nn.Linear(self.seq_len * self.embed_size, self.hidden_size),
 39 |             nn.LeakyReLU(),
 40 |             nn.Linear(self.hidden_size, self.pred_len)
 41 |         )
 42 | 
 43 |     # dimension extension
 44 |     def tokenEmb(self, x):
 45 |         # x: [Batch, Input length, Channel]
 46 |         x = x.permute(0, 2, 1)
 47 |         x = x.unsqueeze(3)
 48 |         # N*T*1 x 1*D = N*T*D
 49 |         y = self.embeddings
 50 |         return x * y
 51 | 
 52 |     # frequency temporal learner
 53 |     def MLP_temporal(self, x, B, N, L):
 54 |         # [B, N, T, D]
 55 |         x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on L dimension
 56 |         y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2)
 57 |         x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho")
 58 |         return x
 59 | 
 60 |     # frequency channel learner
 61 |     def MLP_channel(self, x, B, N, L):
 62 |         # [B, N, T, D]
 63 |         x = x.permute(0, 2, 1, 3)
 64 |         # [B, T, N, D]
 65 |         x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on N dimension
 66 |         y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1)
 67 |         x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho")
 68 |         x = x.permute(0, 2, 1, 3)
 69 |         # [B, N, T, D]
 70 |         return x
 71 | 
 72 |     # frequency-domain MLPs
 73 |     # dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights
 74 |     # rb: the real part of bias, ib: the imaginary part of bias
 75 |     def FreMLP(self, B, nd, dimension, x, r, i, rb, ib):
 76 |         o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
 77 |                               device=x.device)
 78 |         o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size],
 79 |                               device=x.device)
 80 | 
 81 |         o1_real = F.relu(
 82 |             torch.einsum('bijd,dd->bijd', x.real, r) - \
 83 |             torch.einsum('bijd,dd->bijd', x.imag, i) + \
 84 |             rb
 85 |         )
 86 | 
 87 |         o1_imag = F.relu(
 88 |             torch.einsum('bijd,dd->bijd', x.imag, r) + \
 89 |             torch.einsum('bijd,dd->bijd', x.real, i) + \
 90 |             ib
 91 |         )
 92 | 
 93 |         y = torch.stack([o1_real, o1_imag], dim=-1)
 94 |         y = F.softshrink(y, lambd=self.sparsity_threshold)
 95 |         y = torch.view_as_complex(y)
 96 |         return y
 97 | 
 98 |     def forecast(self, x_enc):
 99 |         # x: [Batch, Input length, Channel]
100 |         B, T, N = x_enc.shape
101 |         # embedding x: [B, N, T, D]
102 |         x = self.tokenEmb(x_enc)
103 |         bias = x
104 |         # [B, N, T, D]
105 |         if self.channel_independence == '1':
106 |             x = self.MLP_channel(x, B, N, T)
107 |         # [B, N, T, D]
108 |         x = self.MLP_temporal(x, B, N, T)
109 |         x = x + bias
110 |         x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1)
111 |         return x
112 | 
113 |     def super_resolution(self, x_enc):
114 |         # x: [Batch, Input length, Channel]
115 |         B, T, N = x_enc.shape
116 |         # embedding x: [B, N, T, D]
117 |         x = self.tokenEmb(x_enc)
118 |         bias = x
119 |         # [B, N, T, D]
120 |         if self.channel_independence == '1':
121 |             x = self.MLP_channel(x, B, N, T)
122 |         # [B, N, T, D]
123 |         x = self.MLP_temporal(x, B, N, T)
124 |         x = x + bias
125 |         x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1)
126 |         return x
127 |     
128 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
129 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast' or self.task_name == 'imputation':
130 |             dec_out = self.forecast(x_enc)
131 |             return dec_out[:, :, :]  # [B, L, D]
132 |         elif self.task_name == 'super_resolution':
133 |             dec_out = self.super_resolution(x_enc)
134 |             return dec_out[:, :, :]
135 |         else:
136 |             raise ValueError('Only forecast tasks implemented yet')
137 | 
138 | 


--------------------------------------------------------------------------------
/models/Informer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
  5 | from layers.SelfAttention_Family import ProbAttention, AttentionLayer
  6 | from layers.Embed import DataEmbedding
  7 | 
  8 | 
  9 | class Model(nn.Module):
 10 |     """
 11 |     Informer with Propspare attention in O(LlogL) complexity
 12 |     Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132
 13 |     """
 14 | 
 15 |     def __init__(self, configs):
 16 |         super(Model, self).__init__()
 17 |         self.task_name = configs.task_name
 18 |         self.pred_len = configs.pred_len
 19 |         self.label_len = configs.label_len
 20 | 
 21 |         # Embedding
 22 |         self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
 23 |                                            configs.dropout)
 24 |         self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
 25 |                                            configs.dropout)
 26 | 
 27 |         # Encoder
 28 |         self.encoder = Encoder(
 29 |             [
 30 |                 EncoderLayer(
 31 |                     AttentionLayer(
 32 |                         ProbAttention(False, configs.factor, attention_dropout=configs.dropout,
 33 |                                       output_attention=configs.output_attention),
 34 |                         configs.d_model, configs.n_heads),
 35 |                     configs.d_model,
 36 |                     configs.d_ff,
 37 |                     dropout=configs.dropout,
 38 |                     activation=configs.activation
 39 |                 ) for l in range(configs.e_layers)
 40 |             ],
 41 |             [
 42 |                 ConvLayer(
 43 |                     configs.d_model
 44 |                 ) for l in range(configs.e_layers - 1)
 45 |             ] if configs.distil and ('forecast' in configs.task_name) else None,
 46 |             norm_layer=torch.nn.LayerNorm(configs.d_model)
 47 |         )
 48 |         # Decoder
 49 |         self.decoder = Decoder(
 50 |             [
 51 |                 DecoderLayer(
 52 |                     AttentionLayer(
 53 |                         ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False),
 54 |                         configs.d_model, configs.n_heads),
 55 |                     AttentionLayer(
 56 |                         ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False),
 57 |                         configs.d_model, configs.n_heads),
 58 |                     configs.d_model,
 59 |                     configs.d_ff,
 60 |                     dropout=configs.dropout,
 61 |                     activation=configs.activation,
 62 |                 )
 63 |                 for l in range(configs.d_layers)
 64 |             ],
 65 |             norm_layer=torch.nn.LayerNorm(configs.d_model),
 66 |             projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
 67 |         )
 68 |         if self.task_name == 'imputation':
 69 |             self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
 70 |         if self.task_name == 'anomaly_detection':
 71 |             self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
 72 |         if self.task_name == 'classification':
 73 |             self.act = F.gelu
 74 |             self.dropout = nn.Dropout(configs.dropout)
 75 |             self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
 76 | 
 77 |     def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 78 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 79 |         dec_out = self.dec_embedding(x_dec, x_mark_dec)
 80 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 81 | 
 82 |         dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
 83 | 
 84 |         return dec_out  # [B, L, D]
 85 |     
 86 |     def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 87 |         # Normalization
 88 |         mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
 89 |         x_enc = x_enc - mean_enc
 90 |         std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
 91 |         x_enc = x_enc / std_enc
 92 | 
 93 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 94 |         dec_out = self.dec_embedding(x_dec, x_mark_dec)
 95 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 96 | 
 97 |         dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
 98 | 
 99 |         dec_out = dec_out * std_enc + mean_enc
100 |         return dec_out  # [B, L, D]
101 | 
102 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
103 |         # enc
104 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
105 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
106 |         # final
107 |         dec_out = self.projection(enc_out)
108 |         return dec_out
109 | 
110 |     def anomaly_detection(self, x_enc):
111 |         # enc
112 |         enc_out = self.enc_embedding(x_enc, None)
113 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
114 |         # final
115 |         dec_out = self.projection(enc_out)
116 |         return dec_out
117 | 
118 |     def classification(self, x_enc, x_mark_enc):
119 |         # enc
120 |         enc_out = self.enc_embedding(x_enc, None)
121 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
122 | 
123 |         # Output
124 |         output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
125 |         output = self.dropout(output)
126 |         output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
127 |         output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
128 |         output = self.projection(output)  # (batch_size, num_classes)
129 |         return output
130 | 
131 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
132 |         if self.task_name == 'long_term_forecast':
133 |             dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
134 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
135 |         if self.task_name == 'short_term_forecast':
136 |             dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
137 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
138 |         if self.task_name == 'imputation':
139 |             dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
140 |             return dec_out  # [B, L, D]
141 |         if self.task_name == 'anomaly_detection':
142 |             dec_out = self.anomaly_detection(x_enc)
143 |             return dec_out  # [B, L, D]
144 |         if self.task_name == 'classification':
145 |             dec_out = self.classification(x_enc, x_mark_enc)
146 |             return dec_out  # [B, N]
147 |         return None
148 | 


--------------------------------------------------------------------------------
/models/LightTS.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class IEBlock(nn.Module):
  7 |     def __init__(self, input_dim, hid_dim, output_dim, num_node):
  8 |         super(IEBlock, self).__init__()
  9 | 
 10 |         self.input_dim = input_dim
 11 |         self.hid_dim = hid_dim
 12 |         self.output_dim = output_dim
 13 |         self.num_node = num_node
 14 | 
 15 |         self._build()
 16 | 
 17 |     def _build(self):
 18 |         self.spatial_proj = nn.Sequential(
 19 |             nn.Linear(self.input_dim, self.hid_dim),
 20 |             nn.LeakyReLU(),
 21 |             nn.Linear(self.hid_dim, self.hid_dim // 4)
 22 |         )
 23 | 
 24 |         self.channel_proj = nn.Linear(self.num_node, self.num_node)
 25 |         torch.nn.init.eye_(self.channel_proj.weight)
 26 | 
 27 |         self.output_proj = nn.Linear(self.hid_dim // 4, self.output_dim)
 28 | 
 29 |     def forward(self, x):
 30 |         x = self.spatial_proj(x.permute(0, 2, 1))
 31 |         x = x.permute(0, 2, 1) + self.channel_proj(x.permute(0, 2, 1))
 32 |         x = self.output_proj(x.permute(0, 2, 1))
 33 | 
 34 |         x = x.permute(0, 2, 1)
 35 | 
 36 |         return x
 37 | 
 38 | 
 39 | class Model(nn.Module):
 40 |     """
 41 |     Paper link: https://arxiv.org/abs/2207.01186
 42 |     """
 43 | 
 44 |     def __init__(self, configs, chunk_size=24):
 45 |         """
 46 |         chunk_size: int, reshape T into [num_chunks, chunk_size]
 47 |         """
 48 |         super(Model, self).__init__()
 49 |         self.task_name = configs.task_name
 50 |         self.seq_len = configs.seq_len
 51 |         if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation':
 52 |             self.pred_len = configs.seq_len
 53 |         else:
 54 |             self.pred_len = configs.pred_len
 55 | 
 56 |         if configs.task_name == 'long_term_forecast' or configs.task_name == 'short_term_forecast':
 57 |             self.chunk_size = min(configs.pred_len, configs.seq_len, chunk_size)
 58 |         else:
 59 |             self.chunk_size = min(configs.seq_len, chunk_size)
 60 |         assert (self.seq_len % self.chunk_size == 0)
 61 |         self.num_chunks = self.seq_len // self.chunk_size
 62 | 
 63 |         self.d_model = configs.d_model
 64 |         self.enc_in = configs.enc_in
 65 |         self.dropout = configs.dropout
 66 |         if self.task_name == 'classification':
 67 |             self.act = F.gelu
 68 |             self.dropout = nn.Dropout(configs.dropout)
 69 |             self.projection = nn.Linear(configs.enc_in * configs.seq_len, configs.num_class)
 70 |         self._build()
 71 | 
 72 |     def _build(self):
 73 |         self.layer_1 = IEBlock(
 74 |             input_dim=self.chunk_size,
 75 |             hid_dim=self.d_model // 4,
 76 |             output_dim=self.d_model // 4,
 77 |             num_node=self.num_chunks
 78 |         )
 79 | 
 80 |         self.chunk_proj_1 = nn.Linear(self.num_chunks, 1)
 81 | 
 82 |         self.layer_2 = IEBlock(
 83 |             input_dim=self.chunk_size,
 84 |             hid_dim=self.d_model // 4,
 85 |             output_dim=self.d_model // 4,
 86 |             num_node=self.num_chunks
 87 |         )
 88 | 
 89 |         self.chunk_proj_2 = nn.Linear(self.num_chunks, 1)
 90 | 
 91 |         self.layer_3 = IEBlock(
 92 |             input_dim=self.d_model // 2,
 93 |             hid_dim=self.d_model // 2,
 94 |             output_dim=self.pred_len,
 95 |             num_node=self.enc_in
 96 |         )
 97 | 
 98 |         self.ar = nn.Linear(self.seq_len, self.pred_len)
 99 | 
100 |     def encoder(self, x):
101 |         B, T, N = x.size()
102 | 
103 |         highway = self.ar(x.permute(0, 2, 1))
104 |         highway = highway.permute(0, 2, 1)
105 | 
106 |         # continuous sampling
107 |         x1 = x.reshape(B, self.num_chunks, self.chunk_size, N)
108 |         x1 = x1.permute(0, 3, 2, 1)
109 |         x1 = x1.reshape(-1, self.chunk_size, self.num_chunks)
110 |         x1 = self.layer_1(x1)
111 |         x1 = self.chunk_proj_1(x1).squeeze(dim=-1)
112 | 
113 |         # interval sampling
114 |         x2 = x.reshape(B, self.chunk_size, self.num_chunks, N)
115 |         x2 = x2.permute(0, 3, 1, 2)
116 |         x2 = x2.reshape(-1, self.chunk_size, self.num_chunks)
117 |         x2 = self.layer_2(x2)
118 |         x2 = self.chunk_proj_2(x2).squeeze(dim=-1)
119 | 
120 |         x3 = torch.cat([x1, x2], dim=-1)
121 | 
122 |         x3 = x3.reshape(B, N, -1)
123 |         x3 = x3.permute(0, 2, 1)
124 | 
125 |         out = self.layer_3(x3)
126 | 
127 |         out = out + highway
128 |         return out
129 | 
130 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
131 |         return self.encoder(x_enc)
132 | 
133 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
134 |         return self.encoder(x_enc)
135 | 
136 |     def anomaly_detection(self, x_enc):
137 |         return self.encoder(x_enc)
138 | 
139 |     def classification(self, x_enc, x_mark_enc):
140 |         enc_out = self.encoder(x_enc)
141 | 
142 |         # Output
143 |         output = enc_out.reshape(enc_out.shape[0], -1)  # (batch_size, seq_length * d_model)
144 |         output = self.projection(output)  # (batch_size, num_classes)
145 |         return output
146 | 
147 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
148 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
149 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
150 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
151 |         if self.task_name == 'imputation':
152 |             dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
153 |             return dec_out  # [B, L, D]
154 |         if self.task_name == 'anomaly_detection':
155 |             dec_out = self.anomaly_detection(x_enc)
156 |             return dec_out  # [B, L, D]
157 |         if self.task_name == 'classification':
158 |             dec_out = self.classification(x_enc, x_mark_enc)
159 |             return dec_out  # [B, N]
160 |         return None
161 | 


--------------------------------------------------------------------------------
/models/Pyraformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from layers.Pyraformer_EncDec import Encoder
  4 | 
  5 | 
  6 | class Model(nn.Module):
  7 |     """ 
  8 |     Pyraformer: Pyramidal attention to reduce complexity
  9 |     Paper link: https://openreview.net/pdf?id=0EXmFzUn5I
 10 |     """
 11 | 
 12 |     def __init__(self, configs, window_size=[4,4], inner_size=5):
 13 |         """
 14 |         window_size: list, the downsample window size in pyramidal attention.
 15 |         inner_size: int, the size of neighbour attention
 16 |         """
 17 |         super().__init__()
 18 |         self.task_name = configs.task_name
 19 |         self.pred_len = configs.pred_len
 20 |         self.d_model = configs.d_model
 21 | 
 22 |         if self.task_name == 'short_term_forecast':
 23 |             window_size = [2,2]
 24 |         self.encoder = Encoder(configs, window_size, inner_size)
 25 | 
 26 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 27 |             self.projection = nn.Linear(
 28 |                 (len(window_size)+1)*self.d_model, self.pred_len * configs.enc_in)
 29 |         elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection':
 30 |             self.projection = nn.Linear(
 31 |                 (len(window_size)+1)*self.d_model, configs.enc_in, bias=True)
 32 |         elif self.task_name == 'classification':
 33 |             self.act = torch.nn.functional.gelu
 34 |             self.dropout = nn.Dropout(configs.dropout)
 35 |             self.projection = nn.Linear(
 36 |                 (len(window_size)+1)*self.d_model * configs.seq_len, configs.num_class)
 37 | 
 38 |     def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
 39 |         enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :]
 40 |         dec_out = self.projection(enc_out).view(
 41 |             enc_out.size(0), self.pred_len, -1)
 42 |         return dec_out
 43 |     
 44 |     def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
 45 |         # Normalization
 46 |         mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
 47 |         x_enc = x_enc - mean_enc
 48 |         std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
 49 |         x_enc = x_enc / std_enc
 50 | 
 51 |         enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :]
 52 |         dec_out = self.projection(enc_out).view(
 53 |             enc_out.size(0), self.pred_len, -1)
 54 |         
 55 |         dec_out = dec_out * std_enc + mean_enc
 56 |         return dec_out
 57 | 
 58 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
 59 |         enc_out = self.encoder(x_enc, x_mark_enc)
 60 |         dec_out = self.projection(enc_out)
 61 |         return dec_out
 62 | 
 63 |     def anomaly_detection(self, x_enc, x_mark_enc):
 64 |         enc_out = self.encoder(x_enc, x_mark_enc)
 65 |         dec_out = self.projection(enc_out)
 66 |         return dec_out
 67 | 
 68 |     def classification(self, x_enc, x_mark_enc):
 69 |         # enc
 70 |         enc_out = self.encoder(x_enc, x_mark_enc=None)
 71 | 
 72 |         # Output
 73 |         # the output transformer encoder/decoder embeddings don't include non-linearity
 74 |         output = self.act(enc_out)
 75 |         output = self.dropout(output)
 76 |         # zero-out padding embeddings
 77 |         output = output * x_mark_enc.unsqueeze(-1)
 78 |         # (batch_size, seq_length * d_model)
 79 |         output = output.reshape(output.shape[0], -1)
 80 |         output = self.projection(output)  # (batch_size, num_classes)
 81 | 
 82 |         return output
 83 | 
 84 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
 85 |         if self.task_name == 'long_term_forecast':
 86 |             dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
 87 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
 88 |         if self.task_name == 'short_term_forecast':
 89 |             dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
 90 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
 91 |         if self.task_name == 'imputation':
 92 |             dec_out = self.imputation(
 93 |                 x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
 94 |             return dec_out  # [B, L, D]
 95 |         if self.task_name == 'anomaly_detection':
 96 |             dec_out = self.anomaly_detection(x_enc, x_mark_enc)
 97 |             return dec_out  # [B, L, D]
 98 |         if self.task_name == 'classification':
 99 |             dec_out = self.classification(x_enc, x_mark_enc)
100 |             return dec_out  # [B, N]
101 |         return None
102 | 


--------------------------------------------------------------------------------
/models/Reformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Transformer_EncDec import Encoder, EncoderLayer
  5 | from layers.SelfAttention_Family import ReformerLayer
  6 | from layers.Embed import DataEmbedding
  7 | 
  8 | 
  9 | class Model(nn.Module):
 10 |     """
 11 |     Reformer with O(LlogL) complexity
 12 |     Paper link: https://openreview.net/forum?id=rkgNKkHtvB
 13 |     """
 14 | 
 15 |     def __init__(self, configs, bucket_size=4, n_hashes=4):
 16 |         """
 17 |         bucket_size: int, 
 18 |         n_hashes: int, 
 19 |         """
 20 |         super(Model, self).__init__()
 21 |         self.task_name = configs.task_name
 22 |         self.pred_len = configs.pred_len
 23 |         self.seq_len = configs.seq_len
 24 | 
 25 |         self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
 26 |                                            configs.dropout)
 27 |         # Encoder
 28 |         self.encoder = Encoder(
 29 |             [
 30 |                 EncoderLayer(
 31 |                     ReformerLayer(None, configs.d_model, configs.n_heads,
 32 |                                   bucket_size=bucket_size, n_hashes=n_hashes),
 33 |                     configs.d_model,
 34 |                     configs.d_ff,
 35 |                     dropout=configs.dropout,
 36 |                     activation=configs.activation
 37 |                 ) for l in range(configs.e_layers)
 38 |             ],
 39 |             norm_layer=torch.nn.LayerNorm(configs.d_model)
 40 |         )
 41 | 
 42 |         if self.task_name == 'classification':
 43 |             self.act = F.gelu
 44 |             self.dropout = nn.Dropout(configs.dropout)
 45 |             self.projection = nn.Linear(
 46 |                 configs.d_model * configs.seq_len, configs.num_class)
 47 |         else:
 48 |             self.projection = nn.Linear(
 49 |                 configs.d_model, configs.c_out, bias=True)
 50 | 
 51 |     def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 52 |         # add placeholder
 53 |         x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
 54 |         if x_mark_enc is not None:
 55 |             x_mark_enc = torch.cat(
 56 |                 [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
 57 | 
 58 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
 59 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 60 |         dec_out = self.projection(enc_out)
 61 | 
 62 |         return dec_out  # [B, L, D]
 63 |     
 64 |     def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 65 |         # Normalization
 66 |         mean_enc = x_enc.mean(1, keepdim=True).detach()  # B x 1 x E
 67 |         x_enc = x_enc - mean_enc
 68 |         std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach()  # B x 1 x E
 69 |         x_enc = x_enc / std_enc
 70 | 
 71 |         # add placeholder
 72 |         x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1)
 73 |         if x_mark_enc is not None:
 74 |             x_mark_enc = torch.cat(
 75 |                 [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1)
 76 | 
 77 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
 78 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 79 |         dec_out = self.projection(enc_out)
 80 | 
 81 |         dec_out = dec_out * std_enc + mean_enc
 82 |         return dec_out  # [B, L, D]
 83 | 
 84 |     def imputation(self, x_enc, x_mark_enc):
 85 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)  # [B,T,C]
 86 | 
 87 |         enc_out, attns = self.encoder(enc_out)
 88 |         enc_out = self.projection(enc_out)
 89 | 
 90 |         return enc_out  # [B, L, D]
 91 | 
 92 |     def anomaly_detection(self, x_enc):
 93 |         enc_out = self.enc_embedding(x_enc, None)  # [B,T,C]
 94 | 
 95 |         enc_out, attns = self.encoder(enc_out)
 96 |         enc_out = self.projection(enc_out)
 97 | 
 98 |         return enc_out  # [B, L, D]
 99 | 
100 |     def classification(self, x_enc, x_mark_enc):
101 |         # enc
102 |         enc_out = self.enc_embedding(x_enc, None)
103 |         enc_out, attns = self.encoder(enc_out)
104 | 
105 |         # Output
106 |         # the output transformer encoder/decoder embeddings don't include non-linearity
107 |         output = self.act(enc_out)
108 |         output = self.dropout(output)
109 |         # zero-out padding embeddings
110 |         output = output * x_mark_enc.unsqueeze(-1)
111 |         # (batch_size, seq_length * d_model)
112 |         output = output.reshape(output.shape[0], -1)
113 |         output = self.projection(output)  # (batch_size, num_classes)
114 |         return output
115 | 
116 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
117 |         if self.task_name == 'long_term_forecast':
118 |             dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
119 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
120 |         if self.task_name == 'short_term_forecast':
121 |             dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
122 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
123 |         if self.task_name == 'imputation':
124 |             dec_out = self.imputation(x_enc, x_mark_enc)
125 |             return dec_out  # [B, L, D]
126 |         if self.task_name == 'anomaly_detection':
127 |             dec_out = self.anomaly_detection(x_enc)
128 |             return dec_out  # [B, L, D]
129 |         if self.task_name == 'classification':
130 |             dec_out = self.classification(x_enc, x_mark_enc)
131 |             return dec_out  # [B, N]
132 |         return None
133 | 


--------------------------------------------------------------------------------
/models/TiDE.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class LayerNorm(nn.Module):
  7 |     """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """
  8 | 
  9 |     def __init__(self, ndim, bias):
 10 |         super().__init__()
 11 |         self.weight = nn.Parameter(torch.ones(ndim))
 12 |         self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None
 13 | 
 14 |     def forward(self, input):
 15 |         return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5)
 16 | 
 17 | 
 18 | 
 19 | class ResBlock(nn.Module):
 20 |     def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.1, bias=True): 
 21 |         super().__init__()
 22 | 
 23 |         self.fc1 = nn.Linear(input_dim, hidden_dim, bias=bias) 
 24 |         self.fc2 = nn.Linear(hidden_dim, output_dim, bias=bias)
 25 |         self.fc3 = nn.Linear(input_dim, output_dim, bias=bias)
 26 |         self.dropout = nn.Dropout(dropout)
 27 |         self.relu = nn.ReLU()
 28 |         self.ln = LayerNorm(output_dim, bias=bias)
 29 |         
 30 |     def forward(self, x):
 31 | 
 32 |         out = self.fc1(x)
 33 |         out = self.relu(out)
 34 |         out = self.fc2(out)
 35 |         out = self.dropout(out)
 36 |         out = out + self.fc3(x)
 37 |         out = self.ln(out)
 38 |         return out
 39 | 
 40 | 
 41 | #TiDE
 42 | class Model(nn.Module):  
 43 |     """
 44 |     paper: https://arxiv.org/pdf/2304.08424.pdf 
 45 |     """
 46 |     def __init__(self, configs, bias=True, feature_encode_dim=2): 
 47 |         super(Model, self).__init__()
 48 |         self.configs = configs
 49 |         self.task_name = configs.task_name
 50 |         self.seq_len = configs.seq_len  #L 
 51 |         self.label_len = configs.label_len
 52 |         self.pred_len = configs.pred_len  #H 
 53 |         self.hidden_dim=configs.d_model
 54 |         self.res_hidden=configs.d_model 
 55 |         self.encoder_num=configs.e_layers
 56 |         self.decoder_num=configs.d_layers
 57 |         self.freq=configs.freq
 58 |         self.feature_encode_dim=feature_encode_dim
 59 |         self.decode_dim = configs.c_out
 60 |         self.temporalDecoderHidden=configs.d_ff
 61 |         dropout=configs.dropout
 62 | 
 63 |         
 64 |         freq_map = {'h': 4, 't': 5, 's': 6,
 65 |                     'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
 66 |         
 67 |         self.feature_dim=freq_map[self.freq]
 68 | 
 69 | 
 70 |         flatten_dim = self.seq_len + (self.seq_len + self.pred_len) * self.feature_encode_dim
 71 | 
 72 |         self.feature_encoder = ResBlock(self.feature_dim, self.res_hidden, self.feature_encode_dim, dropout, bias)
 73 |         self.encoders = nn.Sequential(ResBlock(flatten_dim, self.res_hidden, self.hidden_dim, dropout, bias),*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.encoder_num-1)))
 74 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 75 |             self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.pred_len, dropout, bias))
 76 |             self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
 77 |             self.residual_proj = nn.Linear(self.seq_len, self.pred_len, bias=bias)
 78 |         if self.task_name == 'imputation':
 79 |             self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias))
 80 |             self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
 81 |             self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias)
 82 |         if self.task_name == 'anomaly_detection':
 83 |             self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias))
 84 |             self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias)
 85 |             self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias)
 86 |             
 87 |         
 88 |     def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark):
 89 |         # Normalization
 90 |         means = x_enc.mean(1, keepdim=True).detach()
 91 |         x_enc = x_enc - means
 92 |         stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
 93 |         x_enc /= stdev
 94 |         
 95 |         feature = self.feature_encoder(batch_y_mark)
 96 |         hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1))
 97 |         decoded = self.decoders(hidden).reshape(hidden.shape[0], self.pred_len, self.decode_dim)
 98 |         dec_out = self.temporalDecoder(torch.cat([feature[:,self.seq_len:], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc)
 99 |         
100 |         
101 |         # De-Normalization 
102 |         dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len))
103 |         dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len))
104 |         return dec_out
105 |     
106 |     def imputation(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask):
107 |         # Normalization
108 |         means = x_enc.mean(1, keepdim=True).detach()
109 |         x_enc = x_enc - means
110 |         stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
111 |         x_enc /= stdev
112 | 
113 |         feature = self.feature_encoder(x_mark_enc)
114 |         hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1))
115 |         decoded = self.decoders(hidden).reshape(hidden.shape[0], self.seq_len, self.decode_dim)
116 |         dec_out = self.temporalDecoder(torch.cat([feature[:,:self.seq_len], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc)
117 |     
118 |         # De-Normalization 
119 |         dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.seq_len))
120 |         dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.seq_len))
121 |         return dec_out
122 |     
123 |     
124 |     def forward(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask=None):
125 |         '''x_mark_enc is the exogenous dynamic feature described in the original paper'''
126 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
127 |             batch_y_mark=torch.concat([x_mark_enc, batch_y_mark[:, -self.pred_len:, :]],dim=1)
128 |             dec_out = torch.stack([self.forecast(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark) for feature in range(x_enc.shape[-1])],dim=-1)
129 |             return dec_out # [B, L, D]
130 |         if self.task_name == 'imputation':
131 |             dec_out = torch.stack([self.imputation(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark, mask) for feature in range(x_enc.shape[-1])],dim=-1)
132 |             return dec_out  # [B, L, D]
133 |         if self.task_name == 'anomaly_detection':
134 |             raise NotImplementedError("Task anomaly_detection for Tide is temporarily not supported")
135 |         if self.task_name == 'classification':
136 |             raise NotImplementedError("Task classification for Tide is temporarily not supported")
137 |         return None
138 |     
139 |     
140 | 
141 | 
142 | 
143 | 


--------------------------------------------------------------------------------
/models/Transformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer
  5 | from layers.SelfAttention_Family import FullAttention, AttentionLayer
  6 | from layers.Embed import DataEmbedding
  7 | import numpy as np
  8 | 
  9 | 
 10 | class Model(nn.Module):
 11 |     """
 12 |     Vanilla Transformer
 13 |     with O(L^2) complexity
 14 |     Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf
 15 |     """
 16 | 
 17 |     def __init__(self, configs):
 18 |         super(Model, self).__init__()
 19 |         self.task_name = configs.task_name
 20 |         self.pred_len = configs.pred_len
 21 |         self.output_attention = configs.output_attention
 22 |         # Embedding
 23 |         self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq,
 24 |                                            configs.dropout)
 25 |         # Encoder
 26 |         self.encoder = Encoder(
 27 |             [
 28 |                 EncoderLayer(
 29 |                     AttentionLayer(
 30 |                         FullAttention(False, configs.factor, attention_dropout=configs.dropout,
 31 |                                       output_attention=configs.output_attention), configs.d_model, configs.n_heads),
 32 |                     configs.d_model,
 33 |                     configs.d_ff,
 34 |                     dropout=configs.dropout,
 35 |                     activation=configs.activation
 36 |                 ) for l in range(configs.e_layers)
 37 |             ],
 38 |             norm_layer=torch.nn.LayerNorm(configs.d_model)
 39 |         )
 40 |         # Decoder
 41 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 42 |             self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq,
 43 |                                                configs.dropout)
 44 |             self.decoder = Decoder(
 45 |                 [
 46 |                     DecoderLayer(
 47 |                         AttentionLayer(
 48 |                             FullAttention(True, configs.factor, attention_dropout=configs.dropout,
 49 |                                           output_attention=False),
 50 |                             configs.d_model, configs.n_heads),
 51 |                         AttentionLayer(
 52 |                             FullAttention(False, configs.factor, attention_dropout=configs.dropout,
 53 |                                           output_attention=False),
 54 |                             configs.d_model, configs.n_heads),
 55 |                         configs.d_model,
 56 |                         configs.d_ff,
 57 |                         dropout=configs.dropout,
 58 |                         activation=configs.activation,
 59 |                     )
 60 |                     for l in range(configs.d_layers)
 61 |                 ],
 62 |                 norm_layer=torch.nn.LayerNorm(configs.d_model),
 63 |                 projection=nn.Linear(configs.d_model, configs.c_out, bias=True)
 64 |             )
 65 |         if self.task_name == 'imputation':
 66 |             self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
 67 |         if self.task_name == 'anomaly_detection':
 68 |             self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True)
 69 |         if self.task_name == 'classification':
 70 |             self.act = F.gelu
 71 |             self.dropout = nn.Dropout(configs.dropout)
 72 |             self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class)
 73 | 
 74 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 75 |         # Embedding
 76 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 77 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 78 | 
 79 |         dec_out = self.dec_embedding(x_dec, x_mark_dec)
 80 |         dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None)
 81 |         return dec_out
 82 | 
 83 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
 84 |         # Embedding
 85 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 86 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 87 | 
 88 |         dec_out = self.projection(enc_out)
 89 |         return dec_out
 90 | 
 91 |     def anomaly_detection(self, x_enc):
 92 |         # Embedding
 93 |         enc_out = self.enc_embedding(x_enc, None)
 94 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 95 | 
 96 |         dec_out = self.projection(enc_out)
 97 |         return dec_out
 98 | 
 99 |     def classification(self, x_enc, x_mark_enc):
100 |         # Embedding
101 |         enc_out = self.enc_embedding(x_enc, None)
102 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
103 | 
104 |         # Output
105 |         output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
106 |         output = self.dropout(output)
107 |         output = output * x_mark_enc.unsqueeze(-1)  # zero-out padding embeddings
108 |         output = output.reshape(output.shape[0], -1)  # (batch_size, seq_length * d_model)
109 |         output = self.projection(output)  # (batch_size, num_classes)
110 |         return output
111 | 
112 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
113 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
114 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
115 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
116 |         if self.task_name == 'imputation':
117 |             dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
118 |             return dec_out  # [B, L, D]
119 |         if self.task_name == 'anomaly_detection':
120 |             dec_out = self.anomaly_detection(x_enc)
121 |             return dec_out  # [B, L, D]
122 |         if self.task_name == 'classification':
123 |             dec_out = self.classification(x_enc, x_mark_enc)
124 |             return dec_out  # [B, N]
125 |         return None
126 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/models/__init__.py


--------------------------------------------------------------------------------
/models/iTransformer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from layers.Transformer_EncDec import Encoder, EncoderLayer
  5 | from layers.SelfAttention_Family import FullAttention, AttentionLayer
  6 | from layers.Embed import DataEmbedding_inverted
  7 | import numpy as np
  8 | 
  9 | class Model(nn.Module):
 10 |     """
 11 |     Paper link: https://arxiv.org/abs/2310.06625
 12 |     """
 13 | 
 14 |     def __init__(self, configs):
 15 |         super(Model, self).__init__()
 16 |         self.task_name = configs.task_name
 17 |         self.seq_len = configs.seq_len
 18 |         if self.task_name == 'super_resolution':
 19 |             self.sr_ratio = configs.sr_ratio
 20 |             self.seq_len = self.seq_len // configs.sr_ratio
 21 |         self.pred_len = configs.pred_len
 22 |         self.output_attention = configs.output_attention
 23 |         # Embedding
 24 |         self.enc_embedding = DataEmbedding_inverted(self.seq_len, configs.d_model, configs.embed, configs.freq,
 25 |                                                     configs.dropout)
 26 |         # Encoder
 27 |         self.encoder = Encoder(
 28 |             [
 29 |                 EncoderLayer(
 30 |                     AttentionLayer(
 31 |                         FullAttention(False, configs.factor, attention_dropout=configs.dropout,
 32 |                                       output_attention=configs.output_attention), configs.d_model, configs.n_heads),
 33 |                     configs.d_model,
 34 |                     configs.d_ff,
 35 |                     dropout=configs.dropout,
 36 |                     activation=configs.activation
 37 |                 ) for l in range(configs.e_layers)
 38 |             ],
 39 |             norm_layer=torch.nn.LayerNorm(configs.d_model)
 40 |         )
 41 |         # Decoder
 42 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
 43 |             self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
 44 |         if self.task_name == 'super_resolution':
 45 |             self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True)
 46 |         if self.task_name == 'imputation':
 47 |             self.projection = nn.Linear(configs.d_model, self.seq_len, bias=True)
 48 |         if self.task_name == 'anomaly_detection':
 49 |             self.projection = nn.Linear(configs.d_model, self.seq_len, bias=True)
 50 |         if self.task_name == 'classification':
 51 |             self.act = F.gelu
 52 |             self.dropout = nn.Dropout(configs.dropout)
 53 |             self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class)
 54 | 
 55 |     def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
 56 |         # Normalization from Non-stationary Transformer
 57 |         means = x_enc.mean(1, keepdim=True).detach()
 58 |         x_enc = x_enc - means
 59 |         stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
 60 |         x_enc /= stdev
 61 | 
 62 |         _, _, N = x_enc.shape
 63 | 
 64 |         # Embedding
 65 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 66 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 67 | 
 68 |         dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
 69 |         # De-Normalization from Non-stationary Transformer
 70 |         dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
 71 |         dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
 72 |         return dec_out
 73 | 
 74 |     def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask):
 75 |         # Normalization from Non-stationary Transformer
 76 |         means = x_enc.mean(1, keepdim=True).detach()
 77 |         x_enc = x_enc - means
 78 |         stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
 79 |         x_enc /= stdev
 80 | 
 81 |         _, L, N = x_enc.shape
 82 | 
 83 |         # Embedding
 84 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 85 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
 86 | 
 87 |         dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
 88 |         # De-Normalization from Non-stationary Transformer
 89 |         dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
 90 |         dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
 91 |         return dec_out
 92 | 
 93 |     def anomaly_detection(self, x_enc):
 94 |         # Normalization from Non-stationary Transformer
 95 |         means = x_enc.mean(1, keepdim=True).detach()
 96 |         x_enc = x_enc - means
 97 |         stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
 98 |         x_enc /= stdev
 99 | 
100 |         _, L, N = x_enc.shape
101 | 
102 |         # Embedding
103 |         enc_out = self.enc_embedding(x_enc, None)
104 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
105 | 
106 |         dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
107 |         # De-Normalization from Non-stationary Transformer
108 |         dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1))
109 |         dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1))
110 |         return dec_out
111 | 
112 |     def classification(self, x_enc, x_mark_enc):
113 |         # Embedding
114 |         enc_out = self.enc_embedding(x_enc, None)
115 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
116 | 
117 |         # Output
118 |         output = self.act(enc_out)  # the output transformer encoder/decoder embeddings don't include non-linearity
119 |         output = self.dropout(output)
120 |         output = output.reshape(output.shape[0], -1)  # (batch_size, c_in * d_model)
121 |         output = self.projection(output)  # (batch_size, num_classes)
122 |         return output
123 | 
124 |     def super_resolution(self, x_enc, x_mark_enc, x_dec, x_mark_dec):
125 |         # Normalization from Non-stationary Transformer
126 |         means = x_enc.mean(1, keepdim=True).detach()
127 |         x_enc = x_enc - means
128 |         stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5)
129 |         x_enc /= stdev
130 | 
131 |         _, _, N = x_enc.shape
132 | 
133 |         # Embedding
134 |         enc_out = self.enc_embedding(x_enc, x_mark_enc[:, ::self.sr_ratio, :])
135 |         enc_out, attns = self.encoder(enc_out, attn_mask=None)
136 | 
137 |         dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N]
138 |         # De-Normalization from Non-stationary Transformer
139 |         dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
140 |         dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1))
141 |         return dec_out
142 |     
143 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None):
144 |         if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast':
145 |             dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec)
146 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
147 |         if self.task_name == 'imputation':
148 |             dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask)
149 |             return dec_out  # [B, L, D]
150 |         if self.task_name == 'anomaly_detection':
151 |             dec_out = self.anomaly_detection(x_enc)
152 |             return dec_out  # [B, L, D]
153 |         if self.task_name == 'classification':
154 |             dec_out = self.classification(x_enc, x_mark_enc)
155 |             return dec_out  # [B, N]
156 |         if self.task_name == 'super_resolution':
157 |             dec_out = self.super_resolution(x_enc, x_mark_enc, x_dec, x_mark_dec)
158 |             return dec_out[:, -self.pred_len:, :]  # [B, L, D]
159 |         return None
160 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | einops==0.4.0
 2 | matplotlib==3.7.0
 3 | numpy==1.23.5
 4 | pandas==1.5.3
 5 | patool==1.12
 6 | reformer-pytorch==1.4.4
 7 | scikit-learn==1.2.2
 8 | scipy==1.10.1
 9 | sktime==0.16.1
10 | sympy==1.11.1
11 | torch==1.7.1
12 | tqdm==4.64.1
13 | 


--------------------------------------------------------------------------------
/scripts/long_term_forecast/ECL_script/AdaWaveNet.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=AdaWaveNet
  4 | 
  5 | python -u run.py \
  6 |   --task_name long_term_forecast \
  7 |   --is_training 1 \
  8 |   --root_path ./dataset/electricity/ \
  9 |   --data_path electricity.csv \
 10 |   --model_id ECL_96_96 \
 11 |   --model $model_name \
 12 |   --data custom \
 13 |   --features M \
 14 |   --seq_len 96 \
 15 |   --label_len 48 \
 16 |   --pred_len 96 \
 17 |   --e_layers 2 \
 18 |   --d_layers 1 \
 19 |   --factor 3 \
 20 |   --enc_in 321 \
 21 |   --dec_in 321 \
 22 |   --c_out 321 \
 23 |   --d_model 256\
 24 |   --d_ff 256\
 25 |   --batch_size 16\
 26 |   --learning_rate 0.0005\
 27 |   --itr 1\
 28 |   --lifting_levels 3\
 29 |   --lifting_kernel_size 7\
 30 |   --n_cluster=4
 31 | 
 32 | python -u run.py \
 33 |   --task_name long_term_forecast \
 34 |   --is_training 1 \
 35 |   --root_path ./dataset/electricity/ \
 36 |   --data_path electricity.csv \
 37 |   --model_id ECL_192_192 \
 38 |   --model $model_name \
 39 |   --data custom \
 40 |   --features M \
 41 |   --seq_len 192 \
 42 |   --label_len 48 \
 43 |   --pred_len 192 \
 44 |   --e_layers 2 \
 45 |   --d_layers 1 \
 46 |   --factor 3 \
 47 |   --enc_in 321 \
 48 |   --dec_in 321 \
 49 |   --c_out 321 \
 50 |   --d_model 256\
 51 |   --d_ff 256\
 52 |   --batch_size 16\
 53 |   --learning_rate 0.0005\
 54 |   --itr 1\
 55 |   --lifting_levels 3\
 56 |   --lifting_kernel_size 7\
 57 |   --n_cluster=4
 58 | 
 59 | python -u run.py \
 60 |   --task_name long_term_forecast \
 61 |   --is_training 1 \
 62 |   --root_path ./dataset/electricity/ \
 63 |   --data_path electricity.csv \
 64 |   --model_id ECL_336_336 \
 65 |   --model $model_name \
 66 |   --data custom \
 67 |   --features M \
 68 |   --seq_len 336 \
 69 |   --label_len 48 \
 70 |   --pred_len 336 \
 71 |   --e_layers 2 \
 72 |   --d_layers 1 \
 73 |   --factor 3 \
 74 |   --enc_in 321 \
 75 |   --dec_in 321 \
 76 |   --c_out 321 \
 77 |   --d_model 256\
 78 |   --d_ff 256\
 79 |   --batch_size 16\
 80 |   --learning_rate 0.0005\
 81 |   --itr 1\
 82 |   --lifting_levels 3\
 83 |   --lifting_kernel_size 7\
 84 |   --n_cluster=4
 85 | 
 86 | python -u run.py \
 87 |   --task_name long_term_forecast \
 88 |   --is_training 1 \
 89 |   --root_path ./dataset/electricity/ \
 90 |   --data_path electricity.csv \
 91 |   --model_id ECL_720_720 \
 92 |   --model $model_name \
 93 |   --data custom \
 94 |   --features M \
 95 |   --seq_len 720 \
 96 |   --label_len 48 \
 97 |   --pred_len 720 \
 98 |   --e_layers 2 \
 99 |   --d_layers 1 \
100 |   --factor 3 \
101 |   --enc_in 321 \
102 |   --dec_in 321 \
103 |   --c_out 321 \
104 |   --d_model 256\
105 |   --d_ff 256\
106 |   --batch_size 16\
107 |   --learning_rate 0.0005\
108 |   --itr 1\
109 |   --lifting_levels 3\
110 |   --lifting_kernel_size 7\
111 |   --n_cluster=4


--------------------------------------------------------------------------------
/scripts/long_term_forecast/ETT_script/AdaWaveNet_ETTh1.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py \
  6 |   --task_name long_term_forecast \
  7 |   --is_training 1 \
  8 |   --root_path ./dataset/ETT-small/ \
  9 |   --data_path ETTh1.csv \
 10 |   --model_id ETTh1_96_96 \
 11 |   --model $model_name \
 12 |   --data ETTh1 \
 13 |   --features M \
 14 |   --seq_len 96 \
 15 |   --label_len 48 \
 16 |   --pred_len 96 \
 17 |   --e_layers 3 \
 18 |   --d_layers 1 \
 19 |   --factor 3 \
 20 |   --enc_in 7 \
 21 |   --dec_in 7 \
 22 |   --c_out 7 \
 23 |   --des 'Exp' \
 24 |   --d_model 512\
 25 |   --d_ff 512\
 26 |   --itr 1 \
 27 |   --lifting_levels 4\
 28 |   --lifting_kernel_size 7\
 29 |   --n_cluster 4\
 30 |   --learning_rate 0.0005\
 31 |   --batch_size 16
 32 | 
 33 | python -u run.py \
 34 |   --task_name long_term_forecast \
 35 |   --is_training 1 \
 36 |   --root_path ./dataset/ETT-small/ \
 37 |   --data_path ETTh1.csv \
 38 |   --model_id ETTh1_192_192 \
 39 |   --model $model_name \
 40 |   --data ETTh1 \
 41 |   --features M \
 42 |   --seq_len 192 \
 43 |   --label_len 48 \
 44 |   --pred_len 192 \
 45 |   --e_layers 3 \
 46 |   --d_layers 1 \
 47 |   --factor 3 \
 48 |   --enc_in 7 \
 49 |   --dec_in 7 \
 50 |   --c_out 7 \
 51 |   --des 'Exp' \
 52 |   --d_model 512\
 53 |   --d_ff 512\
 54 |   --itr 1 \
 55 |   --lifting_levels 4\
 56 |   --lifting_kernel_size 7\
 57 |   --n_cluster 4\
 58 |   --learning_rate 0.0005\
 59 |   --batch_size 16
 60 | 
 61 | python -u run.py \
 62 |   --task_name long_term_forecast \
 63 |   --is_training 1 \
 64 |   --root_path ./dataset/ETT-small/ \
 65 |   --data_path ETTh1.csv \
 66 |   --model_id ETTh1_336_336 \
 67 |   --model $model_name \
 68 |   --data ETTh1 \
 69 |   --features M \
 70 |   --seq_len 336 \
 71 |   --label_len 48 \
 72 |   --pred_len 336 \
 73 |   --e_layers 3 \
 74 |   --d_layers 1 \
 75 |   --factor 3 \
 76 |   --enc_in 7 \
 77 |   --dec_in 7 \
 78 |   --c_out 7 \
 79 |   --des 'Exp' \
 80 |   --d_model 512\
 81 |   --d_ff 512\
 82 |   --itr 1 \
 83 |   --lifting_levels 4\
 84 |   --lifting_kernel_size 7\
 85 |   --n_cluster 4\
 86 |   --learning_rate 0.0005\
 87 |   --batch_size 16
 88 | 
 89 | python -u run.py \
 90 |   --task_name long_term_forecast \
 91 |   --is_training 1 \
 92 |   --root_path ./dataset/ETT-small/ \
 93 |   --data_path ETTh1.csv \
 94 |   --model_id ETTh1_720_720 \
 95 |   --model $model_name \
 96 |   --data ETTh1 \
 97 |   --features M \
 98 |   --seq_len 720 \
 99 |   --label_len 48 \
100 |   --pred_len 720 \
101 |   --e_layers 3 \
102 |   --d_layers 1 \
103 |   --factor 3 \
104 |   --enc_in 7 \
105 |   --dec_in 7 \
106 |   --c_out 7 \
107 |   --des 'Exp' \
108 |   --d_model 512\
109 |   --d_ff 512\
110 |   --itr 1 \
111 |   --lifting_levels 4\
112 |   --lifting_kernel_size 7\
113 |   --n_cluster 4\
114 |   --learning_rate 0.0005\
115 |   --batch_size 16


--------------------------------------------------------------------------------
/scripts/long_term_forecast/ETT_script/AdaWaveNet_ETTm1.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py \
  6 |   --task_name long_term_forecast \
  7 |   --is_training 1 \
  8 |   --root_path ./dataset/ETT-small/ \
  9 |   --data_path ETTm1.csv \
 10 |   --model_id ETTm1_96_96 \
 11 |   --model $model_name \
 12 |   --data ETTm1 \
 13 |   --features M \
 14 |   --seq_len 96 \
 15 |   --label_len 48 \
 16 |   --pred_len 96 \
 17 |   --e_layers 3 \
 18 |   --d_layers 1 \
 19 |   --factor 3 \
 20 |   --enc_in 7 \
 21 |   --dec_in 7 \
 22 |   --c_out 7 \
 23 |   --des 'Exp' \
 24 |   --d_model 512\
 25 |   --d_ff 512\
 26 |   --itr 1 \
 27 |   --lifting_levels 4\
 28 |   --lifting_kernel_size 7\
 29 |   --n_cluster 2\
 30 |   --learning_rate 0.0005\
 31 |   --batch_size 16
 32 | 
 33 | python -u run.py \
 34 |   --task_name long_term_forecast \
 35 |   --is_training 1 \
 36 |   --root_path ./dataset/ETT-small/ \
 37 |   --data_path ETTm1.csv \
 38 |   --model_id ETTm1_192_192 \
 39 |   --model $model_name \
 40 |   --data ETTm1 \
 41 |   --features M \
 42 |   --seq_len 192 \
 43 |   --label_len 48 \
 44 |   --pred_len 192 \
 45 |   --e_layers 3 \
 46 |   --d_layers 1 \
 47 |   --factor 3 \
 48 |   --enc_in 7 \
 49 |   --dec_in 7 \
 50 |   --c_out 7 \
 51 |   --des 'Exp' \
 52 |   --d_model 512\
 53 |   --d_ff 512\
 54 |   --itr 1 \
 55 |   --lifting_levels 4\
 56 |   --lifting_kernel_size 7\
 57 |   --n_cluster 2\
 58 |   --learning_rate 0.0005\
 59 |   --batch_size 16
 60 | 
 61 | python -u run.py \
 62 |   --task_name long_term_forecast \
 63 |   --is_training 1 \
 64 |   --root_path ./dataset/ETT-small/ \
 65 |   --data_path ETTm1.csv \
 66 |   --model_id ETTm1_336_336 \
 67 |   --model $model_name \
 68 |   --data ETTm1 \
 69 |   --features M \
 70 |   --seq_len 336 \
 71 |   --label_len 48 \
 72 |   --pred_len 336 \
 73 |   --e_layers 3 \
 74 |   --d_layers 1 \
 75 |   --factor 3 \
 76 |   --enc_in 7 \
 77 |   --dec_in 7 \
 78 |   --c_out 7 \
 79 |   --des 'Exp' \
 80 |   --d_model 512\
 81 |   --d_ff 512\
 82 |   --itr 1 \
 83 |   --lifting_levels 2\
 84 |   --lifting_kernel_size 7\
 85 |   --n_cluster 2\
 86 |   --learning_rate 0.0005\
 87 |   --batch_size 16
 88 | 
 89 | python -u run.py \
 90 |   --task_name long_term_forecast \
 91 |   --is_training 1 \
 92 |   --root_path ./dataset/ETT-small/ \
 93 |   --data_path ETTm1.csv \
 94 |   --model_id ETTm1_96_96 \
 95 |   --model $model_name \
 96 |   --data ETTm1 \
 97 |   --features M \
 98 |   --seq_len 96 \
 99 |   --label_len 48 \
100 |   --pred_len 96 \
101 |   --e_layers 3 \
102 |   --d_layers 1 \
103 |   --factor 3 \
104 |   --enc_in 7 \
105 |   --dec_in 7 \
106 |   --c_out 7 \
107 |   --des 'Exp' \
108 |   --d_model 512\
109 |   --d_ff 512\
110 |   --itr 1 \
111 |   --lifting_levels 3\
112 |   --lifting_kernel_size 7\
113 |   --n_cluster 4\
114 |   --learning_rate 0.0005\
115 |   --batch_size 16
116 | 
117 | python -u run.py \
118 |   --task_name long_term_forecast \
119 |   --is_training 1 \
120 |   --root_path ./dataset/ETT-small/ \
121 |   --data_path ETTm1.csv \
122 |   --model_id ETTm1_96_96 \
123 |   --model $model_name \
124 |   --data ETTm1 \
125 |   --features M \
126 |   --seq_len 96 \
127 |   --label_len 48 \
128 |   --pred_len 96 \
129 |   --e_layers 3 \
130 |   --d_layers 1 \
131 |   --factor 3 \
132 |   --enc_in 7 \
133 |   --dec_in 7 \
134 |   --c_out 7 \
135 |   --des 'Exp' \
136 |   --d_model 512\
137 |   --d_ff 512\
138 |   --itr 1 \
139 |   --lifting_levels 3\
140 |   --lifting_kernel_size 7\
141 |   --n_cluster 5\
142 |   --learning_rate 0.0005\
143 |   --batch_size 16
144 | 
145 | python -u run.py \
146 |   --task_name long_term_forecast \
147 |   --is_training 1 \
148 |   --root_path ./dataset/ETT-small/ \
149 |   --data_path ETTm1.csv \
150 |   --model_id ETTm1_96_96 \
151 |   --model $model_name \
152 |   --data ETTm1 \
153 |   --features M \
154 |   --seq_len 96 \
155 |   --label_len 48 \
156 |   --pred_len 96 \
157 |   --e_layers 3 \
158 |   --d_layers 1 \
159 |   --factor 3 \
160 |   --enc_in 7 \
161 |   --dec_in 7 \
162 |   --c_out 7 \
163 |   --des 'Exp' \
164 |   --d_model 512\
165 |   --d_ff 512\
166 |   --itr 1 \
167 |   --lifting_levels 3\
168 |   --lifting_kernel_size 7\
169 |   --n_cluster 6\
170 |   --learning_rate 0.0005\
171 |   --batch_size 16
172 | 
173 | python -u run.py \
174 |   --task_name long_term_forecast \
175 |   --is_training 1 \
176 |   --root_path ./dataset/ETT-small/ \
177 |   --data_path ETTm1.csv \
178 |   --model_id ETTm1_96_96 \
179 |   --model $model_name \
180 |   --data ETTm1 \
181 |   --features M \
182 |   --seq_len 96 \
183 |   --label_len 48 \
184 |   --pred_len 96 \
185 |   --e_layers 3 \
186 |   --d_layers 1 \
187 |   --factor 3 \
188 |   --enc_in 7 \
189 |   --dec_in 7 \
190 |   --c_out 7 \
191 |   --des 'Exp' \
192 |   --d_model 512\
193 |   --d_ff 512\
194 |   --itr 1 \
195 |   --lifting_levels 3\
196 |   --lifting_kernel_size 7\
197 |   --n_cluster 7\
198 |   --learning_rate 0.0005\
199 |   --batch_size 16


--------------------------------------------------------------------------------
/scripts/long_term_forecast/Exchange_script/AdaWaveNet.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py\
  6 |    --task_name long_term_forecast   \
  7 |    --is_training 1   \
  8 |    --root_path ./dataset/exchange_rate/   \
  9 |    --data_path exchange_rate.csv   \
 10 |    --model_id exchange_96_96   \
 11 |    --model $model_name   \
 12 |    --data custom   \
 13 |    --features M   \
 14 |    --seq_len 96   \
 15 |    --label_len 48   \
 16 |    --pred_len 96   \
 17 |    --e_layers 3   \
 18 |    --d_layers 1   \
 19 |    --factor 3   \
 20 |    --enc_in 8   \
 21 |    --dec_in 8   \
 22 |    --c_out 8   \
 23 |    --des 'Exp'   \
 24 |    --d_model 512  \
 25 |    --d_ff 512  \
 26 |    --itr 1   \
 27 |    --lifting_levels 4  \
 28 |    --lifting_kernel_size 7  \
 29 |    --n_cluster 1  \
 30 |    --learning_rate 0.0005  \
 31 |    --batch_size 32 \
 32 |    --adjust_lr True
 33 | 
 34 | 
 35 | python -u run.py\
 36 |    --task_name long_term_forecast\
 37 |    --is_training 1   \
 38 |    --root_path ./dataset/exchange_rate/   \
 39 |    --data_path exchange_rate.csv   \
 40 |    --model_id exchange_192_192   \
 41 |    --model $model_name   \
 42 |    --data custom   \
 43 |    --features M   \
 44 |    --seq_len 192   \
 45 |    --label_len 48   \
 46 |    --pred_len 192   \
 47 |    --e_layers 3   \
 48 |    --d_layers 1   \
 49 |    --factor 3   \
 50 |    --enc_in 8   \
 51 |    --dec_in 8   \
 52 |    --c_out 8   \
 53 |    --des 'Exp'   \
 54 |    --d_model 512  \
 55 |    --d_ff 512  \
 56 |    --itr 1   \
 57 |    --lifting_levels 5  \
 58 |    --lifting_kernel_size 7  \
 59 |    --n_cluster 1  \
 60 |    --learning_rate 0.0005  \
 61 |    --batch_size 16 \
 62 |    --adjust_lr True
 63 | 
 64 | 
 65 | python -u run.py \
 66 |   --task_name long_term_forecast \
 67 |   --is_training 1 \
 68 |   --root_path ./dataset/exchange_rate/   \
 69 |   --data_path exchange_rate.csv   \
 70 |   --model_id exchange_336_336   \
 71 |   --model $model_name   \
 72 |   --data custom   \
 73 |   --features M   \
 74 |   --seq_len 336   \
 75 |   --label_len 48   \
 76 |   --pred_len 336   \
 77 |   --e_layers 3   \
 78 |   --d_layers 1   \
 79 |   --factor 3   \
 80 |   --enc_in 8   \
 81 |   --dec_in 8   \
 82 |   --c_out 8   \
 83 |   --des 'Exp'   \
 84 |   --d_model 512  \
 85 |   --d_ff 512  \
 86 |   --itr 1   \
 87 |   --lifting_levels 4  \
 88 |   --lifting_kernel_size 7  \
 89 |   --n_cluster 1  \
 90 |   --learning_rate 0.0005  \
 91 |   --batch_size 16 \
 92 |   --adjust_lr True
 93 | 
 94 | 
 95 | python -u run.py \
 96 |   --task_name long_term_forecast \
 97 |   --is_training 1 \
 98 |   --root_path ./dataset/exchange_rate/   \
 99 |   --data_path exchange_rate.csv   \
100 |   --model_id exchange_720_720   \
101 |   --model $model_name \
102 |   --data custom \
103 |   --features M \
104 |   --seq_len 720 \
105 |   --label_len 48 \
106 |   --pred_len 720 \
107 |   --e_layers 3   \
108 |   --d_layers 1   \
109 |   --factor 3   \
110 |   --enc_in 8   \
111 |   --dec_in 8   \
112 |   --c_out 8   \
113 |   --des 'Exp'   \
114 |   --d_model 512  \
115 |   --d_ff 512  \
116 |   --itr 1   \
117 |   --lifting_levels 1  \
118 |   --lifting_kernel_size 7  \
119 |   --n_cluster 1  \
120 |   --learning_rate 0.0005  \
121 |   --batch_size 32 \
122 |   --adjust_lr True


--------------------------------------------------------------------------------
/scripts/long_term_forecast/ILI_script/AdaWaveNet.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py\
  6 |    --task_name long_term_forecast   \
  7 |    --is_training 1   \
  8 |    --root_path ./dataset/illness/ \
  9 |    --data_path national_illness.csv \
 10 |    --model_id ili_36_24 \
 11 |    --model $model_name \
 12 |    --data custom \
 13 |    --features M \
 14 |    --seq_len 36 \
 15 |    --label_len 18 \
 16 |    --pred_len 36 \
 17 |    --e_layers 3   \
 18 |    --d_layers 1   \
 19 |    --factor 3   \
 20 |    --enc_in 7   \
 21 |    --dec_in 7   \
 22 |    --c_out 7   \
 23 |    --des 'Exp'   \
 24 |    --d_model 512  \
 25 |    --d_ff 512  \
 26 |    --itr 1   \
 27 |    --lifting_levels 4  \
 28 |    --lifting_kernel_size 7  \
 29 |    --n_cluster 1  \
 30 |    --learning_rate 0.0005  \
 31 |    --batch_size 32 \
 32 |    --adjust_lr True
 33 | 
 34 | 
 35 | python -u run.py\
 36 |    --task_name long_term_forecast\
 37 |    --is_training 1   \
 38 |    --root_path ./dataset/exchange_rate/   \
 39 |    --data_path exchange_rate.csv   \
 40 |    --model_id exchange_192_192   \
 41 |    --model $model_name   \
 42 |    --data custom   \
 43 |    --features M   \
 44 |    --seq_len 192   \
 45 |    --label_len 48   \
 46 |    --pred_len 192   \
 47 |    --e_layers 3   \
 48 |    --d_layers 1   \
 49 |    --factor 3   \
 50 |    --enc_in 8   \
 51 |    --dec_in 8   \
 52 |    --c_out 8   \
 53 |    --des 'Exp'   \
 54 |    --d_model 512  \
 55 |    --d_ff 512  \
 56 |    --itr 1   \
 57 |    --lifting_levels 5  \
 58 |    --lifting_kernel_size 7  \
 59 |    --n_cluster 1  \
 60 |    --learning_rate 0.0005  \
 61 |    --batch_size 16 \
 62 |    --adjust_lr True
 63 | 
 64 | 
 65 | python -u run.py \
 66 |   --task_name long_term_forecast \
 67 |   --is_training 1 \
 68 |   --root_path ./dataset/exchange_rate/   \
 69 |   --data_path exchange_rate.csv   \
 70 |   --model_id exchange_336_336   \
 71 |   --model $model_name   \
 72 |   --data custom   \
 73 |   --features M   \
 74 |   --seq_len 336   \
 75 |   --label_len 48   \
 76 |   --pred_len 336   \
 77 |   --e_layers 3   \
 78 |   --d_layers 1   \
 79 |   --factor 3   \
 80 |   --enc_in 8   \
 81 |   --dec_in 8   \
 82 |   --c_out 8   \
 83 |   --des 'Exp'   \
 84 |   --d_model 512  \
 85 |   --d_ff 512  \
 86 |   --itr 1   \
 87 |   --lifting_levels 4  \
 88 |   --lifting_kernel_size 7  \
 89 |   --n_cluster 1  \
 90 |   --learning_rate 0.0005  \
 91 |   --batch_size 16 \
 92 |   --adjust_lr True
 93 | 
 94 | 
 95 | python -u run.py \
 96 |   --task_name long_term_forecast \
 97 |   --is_training 1 \
 98 |   --root_path ./dataset/exchange_rate/   \
 99 |   --data_path exchange_rate.csv   \
100 |   --model_id exchange_720_720   \
101 |   --model $model_name \
102 |   --data custom \
103 |   --features M \
104 |   --seq_len 720 \
105 |   --label_len 48 \
106 |   --pred_len 720 \
107 |   --e_layers 3   \
108 |   --d_layers 1   \
109 |   --factor 3   \
110 |   --enc_in 8   \
111 |   --dec_in 8   \
112 |   --c_out 8   \
113 |   --des 'Exp'   \
114 |   --d_model 512  \
115 |   --d_ff 512  \
116 |   --itr 1   \
117 |   --lifting_levels 1  \
118 |   --lifting_kernel_size 7  \
119 |   --n_cluster 1  \
120 |   --learning_rate 0.0005  \
121 |   --batch_size 32 \
122 |   --adjust_lr True


--------------------------------------------------------------------------------
/scripts/long_term_forecast/Solar/AdaWaveNet.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py \
  6 |   --task_name long_term_forecast   \
  7 |   --is_training 1 \
  8 |   --root_path ./dataset/Solar/ \
  9 |   --data_path solar_AL.txt \
 10 |   --model_id solar_96_96 \
 11 |   --model $model_name \
 12 |   --data Solar \
 13 |   --features M \
 14 |   --seq_len 96 \
 15 |   --pred_len 96 \
 16 |   --e_layers 2 \
 17 |   --enc_in 137 \
 18 |   --dec_in 137 \
 19 |   --c_out 137 \
 20 |   --des 'Exp' \
 21 |   --d_model 512 \
 22 |   --d_ff 512 \
 23 |   --learning_rate 0.0005 \
 24 |   --itr 1\
 25 |   --lifting_levels 1  \
 26 |   --lifting_kernel_size 7  \
 27 |   --n_cluster 1  \
 28 |   --learning_rate 0.0005  \
 29 |   --batch_size 16 \
 30 |   --adjust_lr True
 31 | 
 32 | python -u run.py \
 33 |   --task_name long_term_forecast   \
 34 |   --is_training 1 \
 35 |   --root_path ./dataset/Solar/ \
 36 |   --data_path solar_AL.txt \
 37 |   --model_id solar_192_192 \
 38 |   --model $model_name \
 39 |   --data Solar \
 40 |   --features M \
 41 |   --seq_len 192 \
 42 |   --pred_len 192 \
 43 |   --e_layers 2 \
 44 |   --enc_in 137 \
 45 |   --dec_in 137 \
 46 |   --c_out 137 \
 47 |   --des 'Exp' \
 48 |   --d_model 512 \
 49 |   --d_ff 512 \
 50 |   --learning_rate 0.0005 \
 51 |   --itr 1\
 52 |   --lifting_levels 1  \
 53 |   --lifting_kernel_size 7  \
 54 |   --n_cluster 1  \
 55 |   --learning_rate 0.0005  \
 56 |   --batch_size 16 \
 57 |   --adjust_lr True
 58 | 
 59 | python -u run.py \
 60 |   --task_name long_term_forecast   \
 61 |   --is_training 1 \
 62 |   --root_path ./dataset/Solar/ \
 63 |   --data_path solar_AL.txt \
 64 |   --model_id solar_336_336 \
 65 |   --model $model_name \
 66 |   --data Solar \
 67 |   --features M \
 68 |   --seq_len 336 \
 69 |   --pred_len 336 \
 70 |   --e_layers 2 \
 71 |   --enc_in 137 \
 72 |   --dec_in 137 \
 73 |   --c_out 137 \
 74 |   --des 'Exp' \
 75 |   --d_model 512 \
 76 |   --d_ff 512 \
 77 |   --learning_rate 0.0005 \
 78 |   --itr 1\
 79 |   --lifting_levels 1  \
 80 |   --lifting_kernel_size 7  \
 81 |   --n_cluster 1  \
 82 |   --learning_rate 0.0005  \
 83 |   --batch_size 16 \
 84 |   --adjust_lr True
 85 | 
 86 | python -u run.py \
 87 |   --task_name long_term_forecast   \
 88 |   --is_training 1 \
 89 |   --root_path ./dataset/Solar/ \
 90 |   --data_path solar_AL.txt \
 91 |   --model_id solar_720_720 \
 92 |   --model $model_name \
 93 |   --data Solar \
 94 |   --features M \
 95 |   --seq_len 720 \
 96 |   --pred_len 720 \
 97 |   --e_layers 2 \
 98 |   --enc_in 137 \
 99 |   --dec_in 137 \
100 |   --c_out 137 \
101 |   --des 'Exp' \
102 |   --d_model 512 \
103 |   --d_ff 512 \
104 |   --learning_rate 0.0005 \
105 |   --itr 1\
106 |   --lifting_levels 1  \
107 |   --lifting_kernel_size 7  \
108 |   --n_cluster 1  \
109 |   --learning_rate 0.0005  \
110 |   --batch_size 16 \
111 |   --adjust_lr True


--------------------------------------------------------------------------------
/scripts/long_term_forecast/Traffic_script/AdaWaveNet.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py \
  6 |   --task_name long_term_forecast \
  7 |   --is_training 1 \
  8 |   --root_path ./dataset/traffic/ \
  9 |   --data_path traffic.csv \
 10 |   --model_id traffic_96_96 \
 11 |   --model $model_name \
 12 |   --data custom \
 13 |   --features M \
 14 |   --seq_len 96 \
 15 |   --label_len 48 \
 16 |   --pred_len 96 \
 17 |   --e_layers 4 \
 18 |   --d_layers 1 \
 19 |   --factor 3 \
 20 |   --enc_in 862 \
 21 |   --dec_in 862 \
 22 |   --c_out 862 \
 23 |   --des 'Exp' \
 24 |   --d_model 512 \
 25 |   --d_ff 512 \
 26 |   --batch_size 32 \
 27 |   --learning_rate 0.001 \
 28 |   --itr 1 \
 29 |   --lifting_levels 1\
 30 |   --lifting_kernel_size 7\
 31 |   --n_cluster 9 \
 32 |   --train_epochs 20
 33 | 
 34 | 
 35 | python -u run.py \
 36 |   --task_name long_term_forecast \
 37 |   --is_training 1 \
 38 |   --root_path ./dataset/traffic/ \
 39 |   --data_path traffic.csv \
 40 |   --model_id traffic_192_192 \
 41 |   --model $model_name \
 42 |   --data custom \
 43 |   --features M \
 44 |   --seq_len 192 \
 45 |   --label_len 48 \
 46 |   --pred_len 192 \
 47 |   --e_layers 4 \
 48 |   --d_layers 1 \
 49 |   --factor 3 \
 50 |   --enc_in 862 \
 51 |   --dec_in 862 \
 52 |   --c_out 862 \
 53 |   --des 'Exp' \
 54 |   --d_model 512 \
 55 |   --d_ff 512 \
 56 |   --batch_size 32 \
 57 |   --learning_rate 0.001 \
 58 |   --itr 1 \
 59 |   --lifting_levels 1\
 60 |   --lifting_kernel_size 7\
 61 |   --n_cluster 9 \
 62 |   --train_epochs 20
 63 | 
 64 | 
 65 | python -u run.py \
 66 |   --task_name long_term_forecast \
 67 |   --is_training 1 \
 68 |   --root_path ./dataset/traffic/ \
 69 |   --data_path traffic.csv \
 70 |   --model_id traffic_336_336 \
 71 |   --model $model_name \
 72 |   --data custom \
 73 |   --features M \
 74 |   --seq_len 336 \
 75 |   --label_len 48 \
 76 |   --pred_len 336 \
 77 |   --e_layers 4 \
 78 |   --d_layers 1 \
 79 |   --factor 3 \
 80 |   --enc_in 862 \
 81 |   --dec_in 862 \
 82 |   --c_out 862 \
 83 |   --des 'Exp' \
 84 |   --d_model 512 \
 85 |   --d_ff 512 \
 86 |   --batch_size 32 \
 87 |   --learning_rate 0.001 \
 88 |   --itr 1 \
 89 |   --lifting_levels 1\
 90 |   --lifting_kernel_size 7\
 91 |   --n_cluster 9 \
 92 |   --train_epochs 20
 93 | 
 94 | 
 95 | python -u run.py \
 96 |   --task_name long_term_forecast \
 97 |   --is_training 1 \
 98 |   --root_path ./dataset/traffic/ \
 99 |   --data_path traffic.csv \
100 |   --model_id traffic_720_720 \
101 |   --model $model_name \
102 |   --data custom \
103 |   --features M \
104 |   --seq_len 720 \
105 |   --label_len 48 \
106 |   --pred_len 720 \
107 |   --e_layers 4 \
108 |   --d_layers 1 \
109 |   --factor 3 \
110 |   --enc_in 862 \
111 |   --dec_in 862 \
112 |   --c_out 862 \
113 |   --des 'Exp' \
114 |   --d_model 512 \
115 |   --d_ff 512 \
116 |   --batch_size 32 \
117 |   --learning_rate 0.001 \
118 |   --itr 1 \
119 |   --lifting_levels 1\
120 |   --lifting_kernel_size 7\
121 |   --n_cluster 9 \
122 |   --train_epochs 20


--------------------------------------------------------------------------------
/scripts/long_term_forecast/Weather_script/AdaWaveNet.sh:
--------------------------------------------------------------------------------
  1 | export CUDA_VISIBLE_DEVICES=0
  2 | 
  3 | model_name=LSWaveNet
  4 | 
  5 | python -u run.py \
  6 |   --task_name long_term_forecast \
  7 |   --is_training 1 \
  8 |   --root_path ./dataset/weather/ \
  9 |   --data_path weather.csv \
 10 |   --model_id weather_96_96 \
 11 |   --model $model_name \
 12 |   --data custom \
 13 |   --features M \
 14 |   --seq_len 96 \
 15 |   --label_len 48 \
 16 |   --pred_len 96 \
 17 |   --e_layers 3 \
 18 |   --d_layers 1 \
 19 |   --factor 3 \
 20 |   --enc_in 21 \
 21 |   --dec_in 21 \
 22 |   --c_out 21 \
 23 |   --des 'Exp' \
 24 |   --d_model 512\
 25 |   --d_ff 512\
 26 |   --itr 1 \
 27 |   --lifting_levels 3\
 28 |   --lifting_kernel_size 7\
 29 |   --n_cluster 4\
 30 |   --learning_rate 0.0005\
 31 |   --batch_size 16
 32 | 
 33 | 
 34 | python -u run.py \
 35 |   --task_name long_term_forecast \
 36 |   --is_training 1 \
 37 |   --root_path ./dataset/weather/ \
 38 |   --data_path weather.csv \
 39 |   --model_id weather_192_192 \
 40 |   --model $model_name \
 41 |   --data custom \
 42 |   --features M \
 43 |   --seq_len 192 \
 44 |   --label_len 48 \
 45 |   --pred_len 192 \
 46 |   --e_layers 3 \
 47 |   --d_layers 1 \
 48 |   --factor 3 \
 49 |   --enc_in 21 \
 50 |   --dec_in 21 \
 51 |   --c_out 21 \
 52 |   --des 'Exp' \
 53 |   --d_model 512\
 54 |   --d_ff 512\
 55 |   --itr 1 \
 56 |   --lifting_levels 3\
 57 |   --lifting_kernel_size 7\
 58 |   --n_cluster 4\
 59 |   --learning_rate 0.0005\
 60 |   --batch_size 16
 61 | 
 62 | 
 63 | python -u run.py \
 64 |   --task_name long_term_forecast \
 65 |   --is_training 1 \
 66 |   --root_path ./dataset/weather/ \
 67 |   --data_path weather.csv \
 68 |   --model_id weather_336_336 \
 69 |   --model $model_name \
 70 |   --data custom \
 71 |   --features M \
 72 |   --seq_len 336 \
 73 |   --label_len 48 \
 74 |   --pred_len 336 \
 75 |   --e_layers 3 \
 76 |   --d_layers 1 \
 77 |   --factor 3 \
 78 |   --enc_in 21 \
 79 |   --dec_in 21 \
 80 |   --c_out 21 \
 81 |   --des 'Exp' \
 82 |   --d_model 512\
 83 |   --d_ff 512\
 84 |   --itr 1 \
 85 |   --lifting_levels 3\
 86 |   --lifting_kernel_size 7\
 87 |   --n_cluster 4\
 88 |   --learning_rate 0.0005\
 89 |   --batch_size 16
 90 | 
 91 | 
 92 | python -u run.py \
 93 |   --task_name long_term_forecast \
 94 |   --is_training 1 \
 95 |   --root_path ./dataset/weather/ \
 96 |   --data_path weather.csv \
 97 |   --model_id weather_720_720 \
 98 |   --model $model_name \
 99 |   --data custom \
100 |   --features M \
101 |   --seq_len 720 \
102 |   --label_len 48 \
103 |   --pred_len 720 \
104 |   --e_layers 3 \
105 |   --d_layers 1 \
106 |   --factor 3 \
107 |   --enc_in 21 \
108 |   --dec_in 21 \
109 |   --c_out 21 \
110 |   --des 'Exp' \
111 |   --d_model 512\
112 |   --d_ff 512\
113 |   --itr 1 \
114 |   --lifting_levels 3\
115 |   --lifting_kernel_size 7\
116 |   --n_cluster 4\
117 |   --learning_rate 0.0005\
118 |   --batch_size 16


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/utils/__init__.py


--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
 1 | # This source code is provided for the purposes of scientific reproducibility
 2 | # under the following limited license from Element AI Inc. The code is an
 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
 4 | # expansion analysis for interpretable time series forecasting,
 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
 7 | # International license (CC BY-NC 4.0):
 8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
 9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
14 | 
15 | """
16 | Loss functions for PyTorch.
17 | """
18 | 
19 | import torch as t
20 | import torch.nn as nn
21 | import numpy as np
22 | import pdb
23 | 
24 | 
25 | def divide_no_nan(a, b):
26 |     """
27 |     a/b where the resulted NaN or Inf are replaced by 0.
28 |     """
29 |     result = a / b
30 |     result[result != result] = .0
31 |     result[result == np.inf] = .0
32 |     return result
33 | 
34 | 
35 | class mape_loss(nn.Module):
36 |     def __init__(self):
37 |         super(mape_loss, self).__init__()
38 | 
39 |     def forward(self, insample: t.Tensor, freq: int,
40 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
41 |         """
42 |         MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
43 | 
44 |         :param forecast: Forecast values. Shape: batch, time
45 |         :param target: Target values. Shape: batch, time
46 |         :param mask: 0/1 mask. Shape: batch, time
47 |         :return: Loss value
48 |         """
49 |         weights = divide_no_nan(mask, target)
50 |         return t.mean(t.abs((forecast - target) * weights))
51 | 
52 | 
53 | class smape_loss(nn.Module):
54 |     def __init__(self):
55 |         super(smape_loss, self).__init__()
56 | 
57 |     def forward(self, insample: t.Tensor, freq: int,
58 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
59 |         """
60 |         sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
61 | 
62 |         :param forecast: Forecast values. Shape: batch, time
63 |         :param target: Target values. Shape: batch, time
64 |         :param mask: 0/1 mask. Shape: batch, time
65 |         :return: Loss value
66 |         """
67 |         return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
68 |                                           t.abs(forecast.data) + t.abs(target.data)) * mask)
69 | 
70 | 
71 | class mase_loss(nn.Module):
72 |     def __init__(self):
73 |         super(mase_loss, self).__init__()
74 | 
75 |     def forward(self, insample: t.Tensor, freq: int,
76 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
77 |         """
78 |         MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
79 | 
80 |         :param insample: Insample values. Shape: batch, time_i
81 |         :param freq: Frequency value
82 |         :param forecast: Forecast values. Shape: batch, time_o
83 |         :param target: Target values. Shape: batch, time_o
84 |         :param mask: 0/1 mask. Shape: batch, time_o
85 |         :return: Loss value
86 |         """
87 |         masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
88 |         masked_masep_inv = divide_no_nan(mask, masep[:, None])
89 |         return t.mean(t.abs(target - forecast) * masked_masep_inv)
90 | 


--------------------------------------------------------------------------------
/utils/m4_summary.py:
--------------------------------------------------------------------------------
  1 | # This source code is provided for the purposes of scientific reproducibility
  2 | # under the following limited license from Element AI Inc. The code is an
  3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
  4 | # expansion analysis for interpretable time series forecasting,
  5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
  6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
  7 | # International license (CC BY-NC 4.0):
  8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
  9 | # for the benefit of third parties or internally in production) requires an
 10 | # explicit license. The subject-matter of the N-BEATS model and associated
 11 | # materials are the property of Element AI Inc. and may be subject to patent
 12 | # protection. No license to patents is granted hereunder (whether express or
 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved.
 14 | 
 15 | """
 16 | M4 Summary
 17 | """
 18 | from collections import OrderedDict
 19 | 
 20 | import numpy as np
 21 | import pandas as pd
 22 | 
 23 | from data_provider.m4 import M4Dataset
 24 | from data_provider.m4 import M4Meta
 25 | import os
 26 | 
 27 | 
 28 | def group_values(values, groups, group_name):
 29 |     return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
 30 | 
 31 | 
 32 | def mase(forecast, insample, outsample, frequency):
 33 |     return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
 34 | 
 35 | 
 36 | def smape_2(forecast, target):
 37 |     denom = np.abs(target) + np.abs(forecast)
 38 |     # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
 39 |     denom[denom == 0.0] = 1.0
 40 |     return 200 * np.abs(forecast - target) / denom
 41 | 
 42 | 
 43 | def mape(forecast, target):
 44 |     denom = np.abs(target)
 45 |     # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
 46 |     denom[denom == 0.0] = 1.0
 47 |     return 100 * np.abs(forecast - target) / denom
 48 | 
 49 | 
 50 | class M4Summary:
 51 |     def __init__(self, file_path, root_path):
 52 |         self.file_path = file_path
 53 |         self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
 54 |         self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
 55 |         self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
 56 | 
 57 |     def evaluate(self):
 58 |         """
 59 |         Evaluate forecasts using M4 test dataset.
 60 | 
 61 |         :param forecast: Forecasts. Shape: timeseries, time.
 62 |         :return: sMAPE and OWA grouped by seasonal patterns.
 63 |         """
 64 |         grouped_owa = OrderedDict()
 65 | 
 66 |         naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
 67 |         naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
 68 | 
 69 |         model_mases = {}
 70 |         naive2_smapes = {}
 71 |         naive2_mases = {}
 72 |         grouped_smapes = {}
 73 |         grouped_mapes = {}
 74 |         for group_name in M4Meta.seasonal_patterns:
 75 |             file_name = self.file_path + group_name + "_forecast.csv"
 76 |             if os.path.exists(file_name):
 77 |                 model_forecast = pd.read_csv(file_name).values
 78 | 
 79 |             naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
 80 |             target = group_values(self.test_set.values, self.test_set.groups, group_name)
 81 |             # all timeseries within group have same frequency
 82 |             frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
 83 |             insample = group_values(self.training_set.values, self.test_set.groups, group_name)
 84 | 
 85 |             model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
 86 |                                                     insample=insample[i],
 87 |                                                     outsample=target[i],
 88 |                                                     frequency=frequency) for i in range(len(model_forecast))])
 89 |             naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
 90 |                                                      insample=insample[i],
 91 |                                                      outsample=target[i],
 92 |                                                      frequency=frequency) for i in range(len(model_forecast))])
 93 | 
 94 |             naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
 95 |             grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
 96 |             grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
 97 | 
 98 |         grouped_smapes = self.summarize_groups(grouped_smapes)
 99 |         grouped_mapes = self.summarize_groups(grouped_mapes)
100 |         grouped_model_mases = self.summarize_groups(model_mases)
101 |         grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
102 |         grouped_naive2_mases = self.summarize_groups(naive2_mases)
103 |         for k in grouped_model_mases.keys():
104 |             grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
105 |                               grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
106 | 
107 |         def round_all(d):
108 |             return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
109 | 
110 |         return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
111 |             grouped_model_mases)
112 | 
113 |     def summarize_groups(self, scores):
114 |         """
115 |         Re-group scores respecting M4 rules.
116 |         :param scores: Scores per group.
117 |         :return: Grouped scores.
118 |         """
119 |         scores_summary = OrderedDict()
120 | 
121 |         def group_count(group_name):
122 |             return len(np.where(self.test_set.groups == group_name)[0])
123 | 
124 |         weighted_score = {}
125 |         for g in ['Yearly', 'Quarterly', 'Monthly']:
126 |             weighted_score[g] = scores[g] * group_count(g)
127 |             scores_summary[g] = scores[g]
128 | 
129 |         others_score = 0
130 |         others_count = 0
131 |         for g in ['Weekly', 'Daily', 'Hourly']:
132 |             others_score += scores[g] * group_count(g)
133 |             others_count += group_count(g)
134 |         weighted_score['Others'] = others_score
135 |         scores_summary['Others'] = others_score / others_count
136 | 
137 |         average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
138 |         scores_summary['Average'] = average
139 | 
140 |         return scores_summary
141 | 


--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class TriangularCausalMask():
 5 |     def __init__(self, B, L, device="cpu"):
 6 |         mask_shape = [B, 1, L, L]
 7 |         with torch.no_grad():
 8 |             self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
 9 | 
10 |     @property
11 |     def mask(self):
12 |         return self._mask
13 | 
14 | 
15 | class ProbMask():
16 |     def __init__(self, B, H, L, index, scores, device="cpu"):
17 |         _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 |         _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 |         indicator = _mask_ex[torch.arange(B)[:, None, None],
20 |                     torch.arange(H)[None, :, None],
21 |                     index, :].to(device)
22 |         self._mask = indicator.view(scores.shape).to(device)
23 | 
24 |     @property
25 |     def mask(self):
26 |         return self._mask
27 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def RSE(pred, true):
 5 |     return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
 6 | 
 7 | 
 8 | def CORR(pred, true):
 9 |     u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 |     d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
11 |     return (u / d).mean(-1)
12 | 
13 | 
14 | def MAE(pred, true):
15 |     return np.mean(np.abs(pred - true))
16 | 
17 | 
18 | def MSE(pred, true):
19 |     return np.mean((pred - true) ** 2)
20 | 
21 | 
22 | def RMSE(pred, true):
23 |     return np.sqrt(MSE(pred, true))
24 | 
25 | 
26 | def MAPE(pred, true):
27 |     return np.mean(np.abs((pred - true) / true))
28 | 
29 | 
30 | def MSPE(pred, true):
31 |     return np.mean(np.square((pred - true) / true))
32 | 
33 | 
34 | def metric(pred, true):
35 |     mae = MAE(pred, true)
36 |     mse = MSE(pred, true)
37 |     rmse = RMSE(pred, true)
38 |     mape = MAPE(pred, true)
39 |     mspe = MSPE(pred, true)
40 | 
41 |     return mae, mse, rmse, mape, mspe
42 | 


--------------------------------------------------------------------------------
/utils/print_args.py:
--------------------------------------------------------------------------------
 1 | def print_args(args):
 2 |     print("\033[1m" + "Basic Config" + "\033[0m")
 3 |     print(f'  {"Task Name:":<20}{args.task_name:<20}{"Is Training:":<20}{args.is_training:<20}')
 4 |     print(f'  {"Model ID:":<20}{args.model_id:<20}{"Model:":<20}{args.model:<20}')
 5 |     print()
 6 | 
 7 |     print("\033[1m" + "Data Loader" + "\033[0m")
 8 |     print(f'  {"Data:":<20}{args.data:<20}{"Root Path:":<20}{args.root_path:<20}')
 9 |     print(f'  {"Data Path:":<20}{args.data_path:<20}{"Features:":<20}{args.features:<20}')
10 |     print(f'  {"Target:":<20}{args.target:<20}{"Freq:":<20}{args.freq:<20}')
11 |     print(f'  {"Checkpoints:":<20}{args.checkpoints:<20}')
12 |     print()
13 | 
14 |     if args.task_name in ['long_term_forecast', 'short_term_forecast']:
15 |         print("\033[1m" + "Forecasting Task" + "\033[0m")
16 |         print(f'  {"Seq Len:":<20}{args.seq_len:<20}{"Label Len:":<20}{args.label_len:<20}')
17 |         print(f'  {"Pred Len:":<20}{args.pred_len:<20}{"Seasonal Patterns:":<20}{args.seasonal_patterns:<20}')
18 |         print(f'  {"Inverse:":<20}{args.inverse:<20}')
19 |         print()
20 | 
21 |     if args.task_name == 'imputation':
22 |         print("\033[1m" + "Imputation Task" + "\033[0m")
23 |         print(f'  {"Mask Rate:":<20}{args.mask_rate:<20}')
24 |         print()
25 | 
26 |     if args.task_name == 'anomaly_detection':
27 |         print("\033[1m" + "Anomaly Detection Task" + "\033[0m")
28 |         print(f'  {"Anomaly Ratio:":<20}{args.anomaly_ratio:<20}')
29 |         print()
30 | 
31 |     print("\033[1m" + "Model Parameters" + "\033[0m")
32 |     print(f'  {"Top k:":<20}{args.top_k:<20}{"Num Kernels:":<20}{args.num_kernels:<20}')
33 |     print(f'  {"Enc In:":<20}{args.enc_in:<20}{"Dec In:":<20}{args.dec_in:<20}')
34 |     print(f'  {"C Out:":<20}{args.c_out:<20}{"d model:":<20}{args.d_model:<20}')
35 |     print(f'  {"n heads:":<20}{args.n_heads:<20}{"e layers:":<20}{args.e_layers:<20}')
36 |     print(f'  {"d layers:":<20}{args.d_layers:<20}{"d FF:":<20}{args.d_ff:<20}')
37 |     print(f'  {"Moving Avg:":<20}{args.moving_avg:<20}{"Factor:":<20}{args.factor:<20}')
38 |     print(f'  {"Distil:":<20}{args.distil:<20}{"Dropout:":<20}{args.dropout:<20}')
39 |     print(f'  {"Embed:":<20}{args.embed:<20}{"Activation:":<20}{args.activation:<20}')
40 |     print(f'  {"Output Attention:":<20}{args.output_attention:<20}')
41 |     print()
42 | 
43 |     print("\033[1m" + "Run Parameters" + "\033[0m")
44 |     print(f'  {"Num Workers:":<20}{args.num_workers:<20}{"Itr:":<20}{args.itr:<20}')
45 |     print(f'  {"Train Epochs:":<20}{args.train_epochs:<20}{"Batch Size:":<20}{args.batch_size:<20}')
46 |     print(f'  {"Patience:":<20}{args.patience:<20}{"Learning Rate:":<20}{args.learning_rate:<20}')
47 |     print(f'  {"Des:":<20}{args.des:<20}{"Loss:":<20}{args.loss:<20}')
48 |     print(f'  {"Lradj:":<20}{args.lradj:<20}{"Use Amp:":<20}{args.use_amp:<20}')
49 |     print()
50 | 
51 |     print("\033[1m" + "GPU" + "\033[0m")
52 |     print(f'  {"Use GPU:":<20}{args.use_gpu:<20}{"GPU:":<20}{args.gpu:<20}')
53 |     print(f'  {"Use Multi GPU:":<20}{args.use_multi_gpu:<20}{"Devices:":<20}{args.devices:<20}')
54 |     print()
55 | 
56 |     print("\033[1m" + "De-stationary Projector Params" + "\033[0m")
57 |     p_hidden_dims_str = ', '.join(map(str, args.p_hidden_dims))
58 |     print(f'  {"P Hidden Dims:":<20}{p_hidden_dims_str:<20}{"P Hidden Layers:":<20}{args.p_hidden_layers:<20}') 
59 |     print()
60 | 


--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
  1 | # From: gluonts/src/gluonts/time_feature/_base.py
  2 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved.
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License").
  5 | # You may not use this file except in compliance with the License.
  6 | # A copy of the License is located at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # or in the "license" file accompanying this file. This file is distributed
 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 12 | # express or implied. See the License for the specific language governing
 13 | # permissions and limitations under the License.
 14 | 
 15 | from typing import List
 16 | 
 17 | import numpy as np
 18 | import pandas as pd
 19 | from pandas.tseries import offsets
 20 | from pandas.tseries.frequencies import to_offset
 21 | 
 22 | 
 23 | class TimeFeature:
 24 |     def __init__(self):
 25 |         pass
 26 | 
 27 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 28 |         pass
 29 | 
 30 |     def __repr__(self):
 31 |         return self.__class__.__name__ + "()"
 32 | 
 33 | 
 34 | class SecondOfMinute(TimeFeature):
 35 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 36 | 
 37 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 38 |         return index.second / 59.0 - 0.5
 39 | 
 40 | 
 41 | class MinuteOfHour(TimeFeature):
 42 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 43 | 
 44 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 45 |         return index.minute / 59.0 - 0.5
 46 | 
 47 | 
 48 | class HourOfDay(TimeFeature):
 49 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 50 | 
 51 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 52 |         return index.hour / 23.0 - 0.5
 53 | 
 54 | 
 55 | class DayOfWeek(TimeFeature):
 56 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 57 | 
 58 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 59 |         return index.dayofweek / 6.0 - 0.5
 60 | 
 61 | 
 62 | class DayOfMonth(TimeFeature):
 63 |     """Day of month encoded as value between [-0.5, 0.5]"""
 64 | 
 65 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 66 |         return (index.day - 1) / 30.0 - 0.5
 67 | 
 68 | 
 69 | class DayOfYear(TimeFeature):
 70 |     """Day of year encoded as value between [-0.5, 0.5]"""
 71 | 
 72 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 73 |         return (index.dayofyear - 1) / 365.0 - 0.5
 74 | 
 75 | 
 76 | class MonthOfYear(TimeFeature):
 77 |     """Month of year encoded as value between [-0.5, 0.5]"""
 78 | 
 79 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 80 |         return (index.month - 1) / 11.0 - 0.5
 81 | 
 82 | 
 83 | class WeekOfYear(TimeFeature):
 84 |     """Week of year encoded as value between [-0.5, 0.5]"""
 85 | 
 86 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 87 |         return (index.isocalendar().week - 1) / 52.0 - 0.5
 88 | 
 89 | 
 90 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
 91 |     """
 92 |     Returns a list of time features that will be appropriate for the given frequency string.
 93 |     Parameters
 94 |     ----------
 95 |     freq_str
 96 |         Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
 97 |     """
 98 | 
 99 |     features_by_offsets = {
100 |         offsets.YearEnd: [],
101 |         offsets.QuarterEnd: [MonthOfYear],
102 |         offsets.MonthEnd: [MonthOfYear],
103 |         offsets.Week: [DayOfMonth, WeekOfYear],
104 |         offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
105 |         offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
106 |         offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
107 |         offsets.Minute: [
108 |             MinuteOfHour,
109 |             HourOfDay,
110 |             DayOfWeek,
111 |             DayOfMonth,
112 |             DayOfYear,
113 |         ],
114 |         offsets.Second: [
115 |             SecondOfMinute,
116 |             MinuteOfHour,
117 |             HourOfDay,
118 |             DayOfWeek,
119 |             DayOfMonth,
120 |             DayOfYear,
121 |         ],
122 |     }
123 | 
124 |     offset = to_offset(freq_str)
125 | 
126 |     for offset_type, feature_classes in features_by_offsets.items():
127 |         if isinstance(offset, offset_type):
128 |             return [cls() for cls in feature_classes]
129 | 
130 |     supported_freq_msg = f"""
131 |     Unsupported frequency {freq_str}
132 |     The following frequencies are supported:
133 |         Y   - yearly
134 |             alias: A
135 |         M   - monthly
136 |         W   - weekly
137 |         D   - daily
138 |         B   - business days
139 |         H   - hourly
140 |         T   - minutely
141 |             alias: min
142 |         S   - secondly
143 |     """
144 |     raise RuntimeError(supported_freq_msg)
145 | 
146 | 
147 | def time_features(dates, freq='h'):
148 |     return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
149 | 


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import matplotlib.pyplot as plt
  6 | import pandas as pd
  7 | import math
  8 | 
  9 | plt.switch_backend('agg')
 10 | 
 11 | 
 12 | def adjust_learning_rate(optimizer, epoch, args):
 13 |     # lr = args.learning_rate * (0.2 ** (epoch // 2))
 14 |     if args.lradj == 'type1':
 15 |         lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
 16 |     elif args.lradj == 'type2':
 17 |         lr_adjust = {
 18 |             2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
 19 |             10: 5e-7, 15: 1e-7, 20: 5e-8
 20 |         }
 21 |     elif args.lradj == "cosine":
 22 |         lr_adjust = {epoch: args.learning_rate /2 * (1 + math.cos(epoch / args.train_epochs * math.pi))}
 23 |     if epoch in lr_adjust.keys():
 24 |         lr = lr_adjust[epoch]
 25 |         for param_group in optimizer.param_groups:
 26 |             param_group['lr'] = lr
 27 |         print('Updating learning rate to {}'.format(lr))
 28 | 
 29 | 
 30 | class EarlyStopping:
 31 |     def __init__(self, patience=7, verbose=False, delta=0):
 32 |         self.patience = patience
 33 |         self.verbose = verbose
 34 |         self.counter = 0
 35 |         self.best_score = None
 36 |         self.early_stop = False
 37 |         self.val_loss_min = np.Inf
 38 |         self.delta = delta
 39 | 
 40 |     def __call__(self, val_loss, model, path):
 41 |         score = -val_loss
 42 |         if self.best_score is None:
 43 |             self.best_score = score
 44 |             self.save_checkpoint(val_loss, model, path)
 45 |         elif score < self.best_score + self.delta:
 46 |             self.counter += 1
 47 |             print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
 48 |             if self.counter >= self.patience:
 49 |                 self.early_stop = True
 50 |         else:
 51 |             self.best_score = score
 52 |             self.save_checkpoint(val_loss, model, path)
 53 |             self.counter = 0
 54 | 
 55 |     def save_checkpoint(self, val_loss, model, path):
 56 |         if self.verbose:
 57 |             print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
 58 |         torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
 59 |         self.val_loss_min = val_loss
 60 | 
 61 | 
 62 | class dotdict(dict):
 63 |     """dot.notation access to dictionary attributes"""
 64 |     __getattr__ = dict.get
 65 |     __setattr__ = dict.__setitem__
 66 |     __delattr__ = dict.__delitem__
 67 | 
 68 | 
 69 | class StandardScaler():
 70 |     def __init__(self, mean, std):
 71 |         self.mean = mean
 72 |         self.std = std
 73 | 
 74 |     def transform(self, data):
 75 |         return (data - self.mean) / self.std
 76 | 
 77 |     def inverse_transform(self, data):
 78 |         return (data * self.std) + self.mean
 79 | 
 80 | 
 81 | def visual(true, preds=None, name='./pic/test.pdf'):
 82 |     """
 83 |     Results visualization
 84 |     """
 85 |     plt.figure()
 86 |     plt.plot(true, label='GroundTruth', linewidth=2)
 87 |     if preds is not None:
 88 |         plt.plot(preds, label='Prediction', linewidth=2)
 89 |     plt.legend()
 90 |     plt.savefig(name, bbox_inches='tight')
 91 | 
 92 | 
 93 | def adjustment(gt, pred):
 94 |     anomaly_state = False
 95 |     for i in range(len(gt)):
 96 |         if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
 97 |             anomaly_state = True
 98 |             for j in range(i, 0, -1):
 99 |                 if gt[j] == 0:
100 |                     break
101 |                 else:
102 |                     if pred[j] == 0:
103 |                         pred[j] = 1
104 |             for j in range(i, len(gt)):
105 |                 if gt[j] == 0:
106 |                     break
107 |                 else:
108 |                     if pred[j] == 0:
109 |                         pred[j] = 1
110 |         elif gt[i] == 0:
111 |             anomaly_state = False
112 |         if anomaly_state:
113 |             pred[i] = 1
114 |     return gt, pred
115 | 
116 | 
117 | def cal_accuracy(y_pred, y_true):
118 |     return np.mean(y_pred == y_true)


--------------------------------------------------------------------------------