├── README.md ├── data_provider ├── __init__.py ├── data_factory.py ├── data_loader.py ├── m4.py └── uea.py ├── exp ├── __init__.py ├── exp_anomaly_detection.py ├── exp_basic.py ├── exp_classification.py ├── exp_imputation.py ├── exp_long_term_forecasting.py └── exp_short_term_forecasting.py ├── layers ├── AutoCorrelation.py ├── Autoformer_EncDec.py ├── Conv_Blocks.py ├── Crossformer_EncDec.py ├── ETSformer_EncDec.py ├── Embed.py ├── FourierCorrelation.py ├── Invertible.py ├── LiftingScheme.py ├── LiftingSchemeLinear.py ├── MultiWaveletCorrelation.py ├── Pyraformer_EncDec.py ├── SelfAttention_Family.py ├── Transformer_EncDec.py └── __init__.py ├── models ├── AdaWaveNet.py ├── Autoformer.py ├── Crossformer.py ├── DLinear.py ├── ETSformer.py ├── FEDformer.py ├── FiLM.py ├── FreTS.py ├── Informer.py ├── Koopa.py ├── LightTS.py ├── MICN.py ├── Nonstationary_Transformer.py ├── PatchTST.py ├── Pyraformer.py ├── Reformer.py ├── TiDE.py ├── TimesNet.py ├── Transformer.py ├── __init__.py └── iTransformer.py ├── requirements.txt ├── run.py ├── scripts └── long_term_forecast │ ├── ECL_script │ └── AdaWaveNet.sh │ ├── ETT_script │ ├── AdaWaveNet_ETTh1.sh │ └── AdaWaveNet_ETTm1.sh │ ├── Exchange_script │ └── AdaWaveNet.sh │ ├── ILI_script │ └── AdaWaveNet.sh │ ├── Solar │ └── AdaWaveNet.sh │ ├── Traffic_script │ └── AdaWaveNet.sh │ └── Weather_script │ └── AdaWaveNet.sh └── utils ├── __init__.py ├── losses.py ├── m4_summary.py ├── masking.py ├── metrics.py ├── print_args.py ├── timefeatures.py └── tools.py /README.md: -------------------------------------------------------------------------------- 1 | # AdaWaveNet 2 | 3 | AdaWaveNet is a comprehensive framework for time series forecasting, imputation, and super-resolution tasks. 4 | 5 | Please refer to the paper for more details. 6 | 7 | https://openreview.net/forum?id=m4bE9Y9FlX 8 | 9 | ``` 10 | @article{yu2025adawavenet, 11 | title={AdaWaveNet: Adaptive Wavelet Network for Time Series Analysis}, 12 | author={Yu, Han and Guo, Peikun and Sano, Akane}, 13 | journal={Transactions on Machine Learning Research}, 14 | year={2025} 15 | } 16 | ``` 17 | 18 | ## Features 19 | 20 | - **Long-term and Short-term Forecasting**: Supports models like Autoformer, Transformer, TimesNet, and more. 21 | - **Imputation**: Handles missing data in time series. 22 | - **Super Resolution**: Enhances the resolution of time series data. 23 | 24 | ## Requirements 25 | 26 | The project requires the following Python packages, which can be installed using the `requirements.txt` file: 27 | 28 | 29 | ## Usage 30 | 31 | The main entry point for running experiments is the `run.py` script. It supports various command-line arguments to configure the experiments. Here is an example of how to run a long-term forecasting task: 32 | 33 | ``` 34 | python -u run.py \ 35 | --task_name long_term_forecast \ 36 | --is_training 1 \ 37 | --root_path ./dataset/weather/ \ 38 | --data_path weather.csv \ 39 | --model_id weather_96_96 \ 40 | --model AdaWaveNet \ 41 | --data custom \ 42 | --features M \ 43 | --seq_len 96 \ 44 | --label_len 48 \ 45 | --pred_len 96 \ 46 | --e_layers 3 \ 47 | --d_layers 1 \ 48 | --factor 3 \ 49 | --enc_in 21 \ 50 | --dec_in 21 \ 51 | --c_out 21 \ 52 | --des 'Exp' \ 53 | --d_model 512 \ 54 | --d_ff 512 \ 55 | --itr 1 \ 56 | --lifting_levels 3 \ 57 | --lifting_kernel_size 7 \ 58 | --n_cluster 4 \ 59 | --learning_rate 0.0005 \ 60 | --batch_size 16 61 | ``` 62 | 63 | ## Configuration 64 | 65 | The `run.py` script accepts various arguments to configure the experiment: 66 | 67 | - `--task_name`: The name of the task (e.g., long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection). 68 | - `--is_training`: Whether to train the model (1 for training, 0 for testing). 69 | - `--model`: The model to use (e.g., Autoformer, Transformer, TimesNet). 70 | - `--seq_len`, `--label_len`, `--pred_len`: Sequence lengths for input, label, and prediction. 71 | - `--e_layers`, `--d_layers`: Number of encoder and decoder layers. 72 | - `--learning_rate`: Learning rate for the optimizer. 73 | - `--batch_size`: Batch size for training. 74 | 75 | For a full list of arguments, refer to the `run.py` script. 76 | 77 | 78 | ## Acknowledgments 79 | 80 | This project is based on the Time-Series-Library Repository and other state-of-the-art time series models. 81 | -------------------------------------------------------------------------------- /data_provider/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \ 2 | MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_Solar 3 | from data_provider.uea import collate_fn 4 | from torch.utils.data import DataLoader 5 | 6 | data_dict = { 7 | 'ETTh1': Dataset_ETT_hour, 8 | 'ETTh2': Dataset_ETT_hour, 9 | 'ETTm1': Dataset_ETT_minute, 10 | 'ETTm2': Dataset_ETT_minute, 11 | 'custom': Dataset_Custom, 12 | 'm4': Dataset_M4, 13 | 'PSM': PSMSegLoader, 14 | 'MSL': MSLSegLoader, 15 | 'SMAP': SMAPSegLoader, 16 | 'SMD': SMDSegLoader, 17 | 'SWAT': SWATSegLoader, 18 | 'UEA': UEAloader, 19 | 'Solar': Dataset_Solar 20 | } 21 | 22 | 23 | def data_provider(args, flag): 24 | Data = data_dict[args.data] 25 | timeenc = 0 if args.embed != 'timeF' else 1 26 | 27 | if flag == 'test': 28 | shuffle_flag = False 29 | drop_last = True 30 | if args.task_name == 'anomaly_detection' or args.task_name == 'classification': 31 | batch_size = args.batch_size 32 | else: 33 | batch_size = 1 # bsz=1 for evaluation 34 | freq = args.freq 35 | else: 36 | shuffle_flag = True 37 | drop_last = True 38 | batch_size = args.batch_size # bsz for train and valid 39 | freq = args.freq 40 | 41 | if args.task_name == 'anomaly_detection': 42 | drop_last = False 43 | data_set = Data( 44 | root_path=args.root_path, 45 | win_size=args.seq_len, 46 | flag=flag, 47 | ) 48 | print(flag, len(data_set)) 49 | data_loader = DataLoader( 50 | data_set, 51 | batch_size=batch_size, 52 | shuffle=shuffle_flag, 53 | num_workers=args.num_workers, 54 | drop_last=drop_last) 55 | return data_set, data_loader 56 | elif args.task_name == 'classification': 57 | drop_last = False 58 | data_set = Data( 59 | root_path=args.root_path, 60 | flag=flag, 61 | ) 62 | 63 | data_loader = DataLoader( 64 | data_set, 65 | batch_size=batch_size, 66 | shuffle=shuffle_flag, 67 | num_workers=args.num_workers, 68 | drop_last=drop_last, 69 | collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) 70 | ) 71 | return data_set, data_loader 72 | else: 73 | if args.data == 'm4': 74 | drop_last = False 75 | data_set = Data( 76 | root_path=args.root_path, 77 | data_path=args.data_path, 78 | flag=flag, 79 | size=[args.seq_len, args.label_len, args.pred_len], 80 | features=args.features, 81 | target=args.target, 82 | timeenc=timeenc, 83 | freq=freq, 84 | seasonal_patterns=args.seasonal_patterns 85 | ) 86 | print(flag, len(data_set)) 87 | data_loader = DataLoader( 88 | data_set, 89 | batch_size=batch_size, 90 | shuffle=shuffle_flag, 91 | num_workers=args.num_workers, 92 | drop_last=drop_last) 93 | return data_set, data_loader 94 | -------------------------------------------------------------------------------- /data_provider/m4.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Dataset 17 | """ 18 | import logging 19 | import os 20 | from collections import OrderedDict 21 | from dataclasses import dataclass 22 | from glob import glob 23 | 24 | import numpy as np 25 | import pandas as pd 26 | import patoolib 27 | from tqdm import tqdm 28 | import logging 29 | import os 30 | import pathlib 31 | import sys 32 | from urllib import request 33 | 34 | 35 | def url_file_name(url: str) -> str: 36 | """ 37 | Extract file name from url. 38 | 39 | :param url: URL to extract file name from. 40 | :return: File name. 41 | """ 42 | return url.split('/')[-1] if len(url) > 0 else '' 43 | 44 | 45 | def download(url: str, file_path: str) -> None: 46 | """ 47 | Download a file to the given path. 48 | 49 | :param url: URL to download 50 | :param file_path: Where to download the content. 51 | """ 52 | 53 | def progress(count, block_size, total_size): 54 | progress_pct = float(count * block_size) / float(total_size) * 100.0 55 | sys.stdout.write('\rDownloading {} to {} {:.1f}%'.format(url, file_path, progress_pct)) 56 | sys.stdout.flush() 57 | 58 | if not os.path.isfile(file_path): 59 | opener = request.build_opener() 60 | opener.addheaders = [('User-agent', 'Mozilla/5.0')] 61 | request.install_opener(opener) 62 | pathlib.Path(os.path.dirname(file_path)).mkdir(parents=True, exist_ok=True) 63 | f, _ = request.urlretrieve(url, file_path, progress) 64 | sys.stdout.write('\n') 65 | sys.stdout.flush() 66 | file_info = os.stat(f) 67 | logging.info(f'Successfully downloaded {os.path.basename(file_path)} {file_info.st_size} bytes.') 68 | else: 69 | file_info = os.stat(file_path) 70 | logging.info(f'File already exists: {file_path} {file_info.st_size} bytes.') 71 | 72 | 73 | @dataclass() 74 | class M4Dataset: 75 | ids: np.ndarray 76 | groups: np.ndarray 77 | frequencies: np.ndarray 78 | horizons: np.ndarray 79 | values: np.ndarray 80 | 81 | @staticmethod 82 | def load(training: bool = True, dataset_file: str = '../dataset/m4') -> 'M4Dataset': 83 | """ 84 | Load cached dataset. 85 | 86 | :param training: Load training part if training is True, test part otherwise. 87 | """ 88 | info_file = os.path.join(dataset_file, 'M4-info.csv') 89 | train_cache_file = os.path.join(dataset_file, 'training.npz') 90 | test_cache_file = os.path.join(dataset_file, 'test.npz') 91 | m4_info = pd.read_csv(info_file) 92 | return M4Dataset(ids=m4_info.M4id.values, 93 | groups=m4_info.SP.values, 94 | frequencies=m4_info.Frequency.values, 95 | horizons=m4_info.Horizon.values, 96 | values=np.load( 97 | train_cache_file if training else test_cache_file, 98 | allow_pickle=True)) 99 | 100 | 101 | @dataclass() 102 | class M4Meta: 103 | seasonal_patterns = ['Yearly', 'Quarterly', 'Monthly', 'Weekly', 'Daily', 'Hourly'] 104 | horizons = [6, 8, 18, 13, 14, 48] 105 | frequencies = [1, 4, 12, 1, 1, 24] 106 | horizons_map = { 107 | 'Yearly': 6, 108 | 'Quarterly': 8, 109 | 'Monthly': 18, 110 | 'Weekly': 13, 111 | 'Daily': 14, 112 | 'Hourly': 48 113 | } # different predict length 114 | frequency_map = { 115 | 'Yearly': 1, 116 | 'Quarterly': 4, 117 | 'Monthly': 12, 118 | 'Weekly': 1, 119 | 'Daily': 1, 120 | 'Hourly': 24 121 | } 122 | history_size = { 123 | 'Yearly': 1.5, 124 | 'Quarterly': 1.5, 125 | 'Monthly': 1.5, 126 | 'Weekly': 10, 127 | 'Daily': 10, 128 | 'Hourly': 10 129 | } # from interpretable.gin 130 | 131 | 132 | def load_m4_info() -> pd.DataFrame: 133 | """ 134 | Load M4Info file. 135 | 136 | :return: Pandas DataFrame of M4Info. 137 | """ 138 | return pd.read_csv(INFO_FILE_PATH) 139 | -------------------------------------------------------------------------------- /data_provider/uea.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | 6 | 7 | def collate_fn(data, max_len=None): 8 | """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create 9 | Args: 10 | data: len(batch_size) list of tuples (X, y). 11 | - X: torch tensor of shape (seq_length, feat_dim); variable seq_length. 12 | - y: torch tensor of shape (num_labels,) : class indices or numerical targets 13 | (for classification or regression, respectively). num_labels > 1 for multi-task models 14 | max_len: global fixed sequence length. Used for architectures requiring fixed length input, 15 | where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s 16 | Returns: 17 | X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input) 18 | targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output) 19 | target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor 20 | 0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values 21 | padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding 22 | """ 23 | 24 | batch_size = len(data) 25 | features, labels = zip(*data) 26 | 27 | # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension) 28 | lengths = [X.shape[0] for X in features] # original sequence length for each time series 29 | if max_len is None: 30 | max_len = max(lengths) 31 | 32 | X = torch.zeros(batch_size, max_len, features[0].shape[-1]) # (batch_size, padded_length, feat_dim) 33 | for i in range(batch_size): 34 | end = min(lengths[i], max_len) 35 | X[i, :end, :] = features[i][:end, :] 36 | 37 | targets = torch.stack(labels, dim=0) # (batch_size, num_labels) 38 | 39 | padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16), 40 | max_len=max_len) # (batch_size, padded_length) boolean tensor, "1" means keep 41 | 42 | return X, targets, padding_masks 43 | 44 | 45 | def padding_mask(lengths, max_len=None): 46 | """ 47 | Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths, 48 | where 1 means keep element at this position (time step) 49 | """ 50 | batch_size = lengths.numel() 51 | max_len = max_len or lengths.max_val() # trick works because of overloading of 'or' operator for non-boolean types 52 | return (torch.arange(0, max_len, device=lengths.device) 53 | .type_as(lengths) 54 | .repeat(batch_size, 1) 55 | .lt(lengths.unsqueeze(1))) 56 | 57 | 58 | class Normalizer(object): 59 | """ 60 | Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization. 61 | """ 62 | 63 | def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None): 64 | """ 65 | Args: 66 | norm_type: choose from: 67 | "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps) 68 | "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows) 69 | mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values 70 | """ 71 | 72 | self.norm_type = norm_type 73 | self.mean = mean 74 | self.std = std 75 | self.min_val = min_val 76 | self.max_val = max_val 77 | 78 | def normalize(self, df): 79 | """ 80 | Args: 81 | df: input dataframe 82 | Returns: 83 | df: normalized dataframe 84 | """ 85 | if self.norm_type == "standardization": 86 | if self.mean is None: 87 | self.mean = df.mean() 88 | self.std = df.std() 89 | return (df - self.mean) / (self.std + np.finfo(float).eps) 90 | 91 | elif self.norm_type == "minmax": 92 | if self.max_val is None: 93 | self.max_val = df.max() 94 | self.min_val = df.min() 95 | return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps) 96 | 97 | elif self.norm_type == "per_sample_std": 98 | grouped = df.groupby(by=df.index) 99 | return (df - grouped.transform('mean')) / grouped.transform('std') 100 | 101 | elif self.norm_type == "per_sample_minmax": 102 | grouped = df.groupby(by=df.index) 103 | min_vals = grouped.transform('min') 104 | return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps) 105 | 106 | else: 107 | raise (NameError(f'Normalize method "{self.norm_type}" not implemented')) 108 | 109 | 110 | def interpolate_missing(y): 111 | """ 112 | Replaces NaN values in pd.Series `y` using linear interpolation 113 | """ 114 | if y.isna().any(): 115 | y = y.interpolate(method='linear', limit_direction='both') 116 | return y 117 | 118 | 119 | def subsample(y, limit=256, factor=2): 120 | """ 121 | If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor 122 | """ 123 | if len(y) > limit: 124 | return y[::factor].reset_index(drop=True) 125 | return y 126 | -------------------------------------------------------------------------------- /exp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/exp/__init__.py -------------------------------------------------------------------------------- /exp/exp_anomaly_detection.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_factory import data_provider 2 | from exp.exp_basic import Exp_Basic 3 | from utils.tools import EarlyStopping, adjust_learning_rate, adjustment 4 | from sklearn.metrics import precision_recall_fscore_support 5 | from sklearn.metrics import accuracy_score 6 | import torch.multiprocessing 7 | 8 | torch.multiprocessing.set_sharing_strategy('file_system') 9 | import torch 10 | import torch.nn as nn 11 | from torch import optim 12 | import os 13 | import time 14 | import warnings 15 | import numpy as np 16 | 17 | warnings.filterwarnings('ignore') 18 | 19 | 20 | class Exp_Anomaly_Detection(Exp_Basic): 21 | def __init__(self, args): 22 | super(Exp_Anomaly_Detection, self).__init__(args) 23 | 24 | def _build_model(self): 25 | model = self.model_dict[self.args.model].Model(self.args).float() 26 | 27 | if self.args.use_multi_gpu and self.args.use_gpu: 28 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 29 | return model 30 | 31 | def _get_data(self, flag): 32 | data_set, data_loader = data_provider(self.args, flag) 33 | return data_set, data_loader 34 | 35 | def _select_optimizer(self): 36 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 37 | return model_optim 38 | 39 | def _select_criterion(self): 40 | criterion = nn.MSELoss() 41 | return criterion 42 | 43 | def vali(self, vali_data, vali_loader, criterion): 44 | total_loss = [] 45 | self.model.eval() 46 | with torch.no_grad(): 47 | for i, (batch_x, _) in enumerate(vali_loader): 48 | batch_x = batch_x.float().to(self.device) 49 | 50 | outputs = self.model(batch_x, None, None, None) 51 | 52 | f_dim = -1 if self.args.features == 'MS' else 0 53 | outputs = outputs[:, :, f_dim:] 54 | pred = outputs.detach().cpu() 55 | true = batch_x.detach().cpu() 56 | 57 | loss = criterion(pred, true) 58 | total_loss.append(loss) 59 | total_loss = np.average(total_loss) 60 | self.model.train() 61 | return total_loss 62 | 63 | def train(self, setting): 64 | train_data, train_loader = self._get_data(flag='train') 65 | vali_data, vali_loader = self._get_data(flag='val') 66 | test_data, test_loader = self._get_data(flag='test') 67 | 68 | path = os.path.join(self.args.checkpoints, setting) 69 | if not os.path.exists(path): 70 | os.makedirs(path) 71 | 72 | time_now = time.time() 73 | 74 | train_steps = len(train_loader) 75 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 76 | 77 | model_optim = self._select_optimizer() 78 | criterion = self._select_criterion() 79 | 80 | for epoch in range(self.args.train_epochs): 81 | iter_count = 0 82 | train_loss = [] 83 | 84 | self.model.train() 85 | epoch_time = time.time() 86 | for i, (batch_x, batch_y) in enumerate(train_loader): 87 | iter_count += 1 88 | model_optim.zero_grad() 89 | 90 | batch_x = batch_x.float().to(self.device) 91 | 92 | outputs = self.model(batch_x, None, None, None) 93 | 94 | f_dim = -1 if self.args.features == 'MS' else 0 95 | outputs = outputs[:, :, f_dim:] 96 | loss = criterion(outputs, batch_x) 97 | train_loss.append(loss.item()) 98 | 99 | if (i + 1) % 100 == 0: 100 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 101 | speed = (time.time() - time_now) / iter_count 102 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 103 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 104 | iter_count = 0 105 | time_now = time.time() 106 | 107 | loss.backward() 108 | model_optim.step() 109 | 110 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 111 | train_loss = np.average(train_loss) 112 | vali_loss = self.vali(vali_data, vali_loader, criterion) 113 | test_loss = self.vali(test_data, test_loader, criterion) 114 | 115 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 116 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 117 | early_stopping(vali_loss, self.model, path) 118 | if early_stopping.early_stop: 119 | print("Early stopping") 120 | break 121 | adjust_learning_rate(model_optim, epoch + 1, self.args) 122 | 123 | best_model_path = path + '/' + 'checkpoint.pth' 124 | self.model.load_state_dict(torch.load(best_model_path)) 125 | 126 | return self.model 127 | 128 | def test(self, setting, test=0): 129 | test_data, test_loader = self._get_data(flag='test') 130 | train_data, train_loader = self._get_data(flag='train') 131 | if test: 132 | print('loading model') 133 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 134 | 135 | attens_energy = [] 136 | folder_path = './test_results/' + setting + '/' 137 | if not os.path.exists(folder_path): 138 | os.makedirs(folder_path) 139 | 140 | self.model.eval() 141 | self.anomaly_criterion = nn.MSELoss(reduce=False) 142 | 143 | # (1) stastic on the train set 144 | with torch.no_grad(): 145 | for i, (batch_x, batch_y) in enumerate(train_loader): 146 | batch_x = batch_x.float().to(self.device) 147 | # reconstruction 148 | outputs = self.model(batch_x, None, None, None) 149 | # criterion 150 | score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) 151 | score = score.detach().cpu().numpy() 152 | attens_energy.append(score) 153 | 154 | attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) 155 | train_energy = np.array(attens_energy) 156 | 157 | # (2) find the threshold 158 | attens_energy = [] 159 | test_labels = [] 160 | for i, (batch_x, batch_y) in enumerate(test_loader): 161 | batch_x = batch_x.float().to(self.device) 162 | # reconstruction 163 | outputs = self.model(batch_x, None, None, None) 164 | # criterion 165 | score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) 166 | score = score.detach().cpu().numpy() 167 | attens_energy.append(score) 168 | test_labels.append(batch_y) 169 | 170 | attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) 171 | test_energy = np.array(attens_energy) 172 | combined_energy = np.concatenate([train_energy, test_energy], axis=0) 173 | threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio) 174 | print("Threshold :", threshold) 175 | 176 | # (3) evaluation on the test set 177 | pred = (test_energy > threshold).astype(int) 178 | test_labels = np.concatenate(test_labels, axis=0).reshape(-1) 179 | test_labels = np.array(test_labels) 180 | gt = test_labels.astype(int) 181 | 182 | print("pred: ", pred.shape) 183 | print("gt: ", gt.shape) 184 | 185 | # (4) detection adjustment 186 | gt, pred = adjustment(gt, pred) 187 | 188 | pred = np.array(pred) 189 | gt = np.array(gt) 190 | print("pred: ", pred.shape) 191 | print("gt: ", gt.shape) 192 | 193 | accuracy = accuracy_score(gt, pred) 194 | precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary') 195 | print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format( 196 | accuracy, precision, 197 | recall, f_score)) 198 | 199 | f = open("result_anomaly_detection.txt", 'a') 200 | f.write(setting + " \n") 201 | f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format( 202 | accuracy, precision, 203 | recall, f_score)) 204 | f.write('\n') 205 | f.write('\n') 206 | f.close() 207 | return 208 | -------------------------------------------------------------------------------- /exp/exp_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from models import Autoformer, LSWaveNet, Transformer, TimesNet, Nonstationary_Transformer, DLinear, FEDformer, \ 4 | Informer, LightTS, Reformer, ETSformer, Pyraformer, PatchTST, MICN, Crossformer, FiLM, Koopa, TiDE, FreTS, AdaWaveNet, iTransformer 5 | 6 | 7 | class Exp_Basic(object): 8 | def __init__(self, args): 9 | self.args = args 10 | self.model_dict = { 11 | 'TimesNet': TimesNet, 12 | 'Autoformer': Autoformer, 13 | 'Transformer': Transformer, 14 | 'Nonstationary_Transformer': Nonstationary_Transformer, 15 | 'DLinear': DLinear, 16 | 'FEDformer': FEDformer, 17 | 'Informer': Informer, 18 | 'LightTS': LightTS, 19 | 'Reformer': Reformer, 20 | 'ETSformer': ETSformer, 21 | 'PatchTST': PatchTST, 22 | 'Pyraformer': Pyraformer, 23 | 'MICN': MICN, 24 | 'Crossformer': Crossformer, 25 | 'FiLM': FiLM, 26 | 'LSWaveNet': LSWaveNet, 27 | 'Koopa': Koopa, 28 | 'TiDE': TiDE, 29 | 'FreTS': FreTS, 30 | 'AdaWaveNet':AdaWaveNet, 31 | 'iTransformer': iTransformer 32 | } 33 | self.device = self._acquire_device() 34 | self.model = self._build_model().to(self.device) 35 | 36 | def _build_model(self): 37 | raise NotImplementedError 38 | return None 39 | 40 | def _acquire_device(self): 41 | if self.args.use_gpu: 42 | os.environ["CUDA_VISIBLE_DEVICES"] = str( 43 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices 44 | device = torch.device('cuda:{}'.format(self.args.gpu)) 45 | print('Use GPU: cuda:{}'.format(self.args.gpu)) 46 | else: 47 | device = torch.device('cpu') 48 | print('Use CPU') 49 | return device 50 | 51 | def _get_data(self): 52 | pass 53 | 54 | def vali(self): 55 | pass 56 | 57 | def train(self): 58 | pass 59 | 60 | def test(self): 61 | pass 62 | -------------------------------------------------------------------------------- /exp/exp_classification.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_factory import data_provider 2 | from exp.exp_basic import Exp_Basic 3 | from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy 4 | import torch 5 | import torch.nn as nn 6 | from torch import optim 7 | import os 8 | import time 9 | import warnings 10 | import numpy as np 11 | import pdb 12 | 13 | warnings.filterwarnings('ignore') 14 | 15 | 16 | class Exp_Classification(Exp_Basic): 17 | def __init__(self, args): 18 | super(Exp_Classification, self).__init__(args) 19 | 20 | def _build_model(self): 21 | # model input depends on data 22 | train_data, train_loader = self._get_data(flag='TRAIN') 23 | test_data, test_loader = self._get_data(flag='TEST') 24 | self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len) 25 | self.args.pred_len = 0 26 | self.args.enc_in = train_data.feature_df.shape[1] 27 | self.args.num_class = len(train_data.class_names) 28 | # model init 29 | model = self.model_dict[self.args.model].Model(self.args).float() 30 | if self.args.use_multi_gpu and self.args.use_gpu: 31 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 32 | return model 33 | 34 | def _get_data(self, flag): 35 | data_set, data_loader = data_provider(self.args, flag) 36 | return data_set, data_loader 37 | 38 | def _select_optimizer(self): 39 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 40 | return model_optim 41 | 42 | def _select_criterion(self): 43 | criterion = nn.CrossEntropyLoss() 44 | return criterion 45 | 46 | def vali(self, vali_data, vali_loader, criterion): 47 | total_loss = [] 48 | preds = [] 49 | trues = [] 50 | self.model.eval() 51 | with torch.no_grad(): 52 | for i, (batch_x, label, padding_mask) in enumerate(vali_loader): 53 | batch_x = batch_x.float().to(self.device) 54 | padding_mask = padding_mask.float().to(self.device) 55 | label = label.to(self.device) 56 | 57 | outputs = self.model(batch_x, padding_mask, None, None) 58 | 59 | pred = outputs.detach().cpu() 60 | loss = criterion(pred, label.long().squeeze().cpu()) 61 | total_loss.append(loss) 62 | 63 | preds.append(outputs.detach()) 64 | trues.append(label) 65 | 66 | total_loss = np.average(total_loss) 67 | 68 | preds = torch.cat(preds, 0) 69 | trues = torch.cat(trues, 0) 70 | probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample 71 | predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample 72 | trues = trues.flatten().cpu().numpy() 73 | accuracy = cal_accuracy(predictions, trues) 74 | 75 | self.model.train() 76 | return total_loss, accuracy 77 | 78 | def train(self, setting): 79 | train_data, train_loader = self._get_data(flag='TRAIN') 80 | vali_data, vali_loader = self._get_data(flag='TEST') 81 | test_data, test_loader = self._get_data(flag='TEST') 82 | 83 | path = os.path.join(self.args.checkpoints, setting) 84 | if not os.path.exists(path): 85 | os.makedirs(path) 86 | 87 | time_now = time.time() 88 | 89 | train_steps = len(train_loader) 90 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 91 | 92 | model_optim = self._select_optimizer() 93 | criterion = self._select_criterion() 94 | 95 | for epoch in range(self.args.train_epochs): 96 | iter_count = 0 97 | train_loss = [] 98 | 99 | self.model.train() 100 | epoch_time = time.time() 101 | 102 | for i, (batch_x, label, padding_mask) in enumerate(train_loader): 103 | iter_count += 1 104 | model_optim.zero_grad() 105 | 106 | batch_x = batch_x.float().to(self.device) 107 | padding_mask = padding_mask.float().to(self.device) 108 | label = label.to(self.device) 109 | 110 | outputs = self.model(batch_x, padding_mask, None, None) 111 | loss = criterion(outputs, label.long().squeeze(-1)) 112 | train_loss.append(loss.item()) 113 | 114 | if (i + 1) % 100 == 0: 115 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 116 | speed = (time.time() - time_now) / iter_count 117 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 118 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 119 | iter_count = 0 120 | time_now = time.time() 121 | 122 | loss.backward() 123 | nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0) 124 | model_optim.step() 125 | 126 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 127 | train_loss = np.average(train_loss) 128 | vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion) 129 | test_loss, test_accuracy = self.vali(test_data, test_loader, criterion) 130 | 131 | print( 132 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}" 133 | .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy)) 134 | early_stopping(-val_accuracy, self.model, path) 135 | if early_stopping.early_stop: 136 | print("Early stopping") 137 | break 138 | if (epoch + 1) % 5 == 0: 139 | adjust_learning_rate(model_optim, epoch + 1, self.args) 140 | 141 | best_model_path = path + '/' + 'checkpoint.pth' 142 | self.model.load_state_dict(torch.load(best_model_path)) 143 | 144 | return self.model 145 | 146 | def test(self, setting, test=0): 147 | test_data, test_loader = self._get_data(flag='TEST') 148 | if test: 149 | print('loading model') 150 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 151 | 152 | preds = [] 153 | trues = [] 154 | folder_path = './test_results/' + setting + '/' 155 | if not os.path.exists(folder_path): 156 | os.makedirs(folder_path) 157 | 158 | self.model.eval() 159 | with torch.no_grad(): 160 | for i, (batch_x, label, padding_mask) in enumerate(test_loader): 161 | batch_x = batch_x.float().to(self.device) 162 | padding_mask = padding_mask.float().to(self.device) 163 | label = label.to(self.device) 164 | 165 | outputs = self.model(batch_x, padding_mask, None, None) 166 | 167 | preds.append(outputs.detach()) 168 | trues.append(label) 169 | 170 | preds = torch.cat(preds, 0) 171 | trues = torch.cat(trues, 0) 172 | print('test shape:', preds.shape, trues.shape) 173 | 174 | probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample 175 | predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample 176 | trues = trues.flatten().cpu().numpy() 177 | accuracy = cal_accuracy(predictions, trues) 178 | 179 | # result save 180 | folder_path = './results/' + setting + '/' 181 | if not os.path.exists(folder_path): 182 | os.makedirs(folder_path) 183 | 184 | print('accuracy:{}'.format(accuracy)) 185 | file_name='result_classification.txt' 186 | f = open(os.path.join(folder_path,file_name), 'a') 187 | f.write(setting + " \n") 188 | f.write('accuracy:{}'.format(accuracy)) 189 | f.write('\n') 190 | f.write('\n') 191 | f.close() 192 | return 193 | -------------------------------------------------------------------------------- /layers/AutoCorrelation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import math 7 | from math import sqrt 8 | import os 9 | 10 | 11 | class AutoCorrelation(nn.Module): 12 | """ 13 | AutoCorrelation Mechanism with the following two phases: 14 | (1) period-based dependencies discovery 15 | (2) time delay aggregation 16 | This block can replace the self-attention family mechanism seamlessly. 17 | """ 18 | 19 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False): 20 | super(AutoCorrelation, self).__init__() 21 | self.factor = factor 22 | self.scale = scale 23 | self.mask_flag = mask_flag 24 | self.output_attention = output_attention 25 | self.dropout = nn.Dropout(attention_dropout) 26 | 27 | def time_delay_agg_training(self, values, corr): 28 | """ 29 | SpeedUp version of Autocorrelation (a batch-normalization style design) 30 | This is for the training phase. 31 | """ 32 | head = values.shape[1] 33 | channel = values.shape[2] 34 | length = values.shape[3] 35 | # find top k 36 | top_k = int(self.factor * math.log(length)) 37 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 38 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1] 39 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) 40 | # update corr 41 | tmp_corr = torch.softmax(weights, dim=-1) 42 | # aggregation 43 | tmp_values = values 44 | delays_agg = torch.zeros_like(values).float() 45 | for i in range(top_k): 46 | pattern = torch.roll(tmp_values, -int(index[i]), -1) 47 | delays_agg = delays_agg + pattern * \ 48 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 49 | return delays_agg 50 | 51 | def time_delay_agg_inference(self, values, corr): 52 | """ 53 | SpeedUp version of Autocorrelation (a batch-normalization style design) 54 | This is for the inference phase. 55 | """ 56 | batch = values.shape[0] 57 | head = values.shape[1] 58 | channel = values.shape[2] 59 | length = values.shape[3] 60 | # index init 61 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() 62 | # find top k 63 | top_k = int(self.factor * math.log(length)) 64 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 65 | weights, delay = torch.topk(mean_value, top_k, dim=-1) 66 | # update corr 67 | tmp_corr = torch.softmax(weights, dim=-1) 68 | # aggregation 69 | tmp_values = values.repeat(1, 1, 1, 2) 70 | delays_agg = torch.zeros_like(values).float() 71 | for i in range(top_k): 72 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) 73 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 74 | delays_agg = delays_agg + pattern * \ 75 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 76 | return delays_agg 77 | 78 | def time_delay_agg_full(self, values, corr): 79 | """ 80 | Standard version of Autocorrelation 81 | """ 82 | batch = values.shape[0] 83 | head = values.shape[1] 84 | channel = values.shape[2] 85 | length = values.shape[3] 86 | # index init 87 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() 88 | # find top k 89 | top_k = int(self.factor * math.log(length)) 90 | weights, delay = torch.topk(corr, top_k, dim=-1) 91 | # update corr 92 | tmp_corr = torch.softmax(weights, dim=-1) 93 | # aggregation 94 | tmp_values = values.repeat(1, 1, 1, 2) 95 | delays_agg = torch.zeros_like(values).float() 96 | for i in range(top_k): 97 | tmp_delay = init_index + delay[..., i].unsqueeze(-1) 98 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 99 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) 100 | return delays_agg 101 | 102 | def forward(self, queries, keys, values, attn_mask): 103 | B, L, H, E = queries.shape 104 | _, S, _, D = values.shape 105 | if L > S: 106 | zeros = torch.zeros_like(queries[:, :(L - S), :]).float() 107 | values = torch.cat([values, zeros], dim=1) 108 | keys = torch.cat([keys, zeros], dim=1) 109 | else: 110 | values = values[:, :L, :, :] 111 | keys = keys[:, :L, :, :] 112 | 113 | # period-based dependencies 114 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) 115 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) 116 | res = q_fft * torch.conj(k_fft) 117 | corr = torch.fft.irfft(res, dim=-1) 118 | 119 | # time delay agg 120 | if self.training: 121 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 122 | else: 123 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 124 | 125 | if self.output_attention: 126 | return (V.contiguous(), corr.permute(0, 3, 1, 2)) 127 | else: 128 | return (V.contiguous(), None) 129 | 130 | 131 | class AutoCorrelationLayer(nn.Module): 132 | def __init__(self, correlation, d_model, n_heads, d_keys=None, 133 | d_values=None): 134 | super(AutoCorrelationLayer, self).__init__() 135 | 136 | d_keys = d_keys or (d_model // n_heads) 137 | d_values = d_values or (d_model // n_heads) 138 | 139 | self.inner_correlation = correlation 140 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 141 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 142 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 143 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 144 | self.n_heads = n_heads 145 | 146 | def forward(self, queries, keys, values, attn_mask): 147 | B, L, _ = queries.shape 148 | _, S, _ = keys.shape 149 | H = self.n_heads 150 | 151 | queries = self.query_projection(queries).view(B, L, H, -1) 152 | keys = self.key_projection(keys).view(B, S, H, -1) 153 | values = self.value_projection(values).view(B, S, H, -1) 154 | 155 | out, attn = self.inner_correlation( 156 | queries, 157 | keys, 158 | values, 159 | attn_mask 160 | ) 161 | out = out.view(B, L, -1) 162 | 163 | return self.out_projection(out), attn 164 | -------------------------------------------------------------------------------- /layers/Autoformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class my_Layernorm(nn.Module): 7 | """ 8 | Special designed layernorm for the seasonal part 9 | """ 10 | 11 | def __init__(self, channels): 12 | super(my_Layernorm, self).__init__() 13 | self.layernorm = nn.LayerNorm(channels) 14 | 15 | def forward(self, x): 16 | x_hat = self.layernorm(x) 17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) 18 | return x_hat - bias 19 | 20 | 21 | class moving_avg(nn.Module): 22 | """ 23 | Moving average block to highlight the trend of time series 24 | """ 25 | 26 | def __init__(self, kernel_size, stride): 27 | super(moving_avg, self).__init__() 28 | self.kernel_size = kernel_size 29 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) 30 | 31 | def forward(self, x): 32 | # padding on the both ends of time series 33 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) 34 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) 35 | x = torch.cat([front, x, end], dim=1) 36 | x = self.avg(x.permute(0, 2, 1)) 37 | x = x.permute(0, 2, 1) 38 | return x 39 | 40 | 41 | class series_decomp(nn.Module): 42 | """ 43 | Series decomposition block 44 | """ 45 | 46 | def __init__(self, kernel_size): 47 | super(series_decomp, self).__init__() 48 | self.moving_avg = moving_avg(kernel_size, stride=1) 49 | 50 | def forward(self, x): 51 | moving_mean = self.moving_avg(x) 52 | res = x - moving_mean 53 | return res, moving_mean 54 | 55 | 56 | class series_decomp_multi(nn.Module): 57 | """ 58 | Multiple Series decomposition block from FEDformer 59 | """ 60 | 61 | def __init__(self, kernel_size): 62 | super(series_decomp_multi, self).__init__() 63 | self.kernel_size = kernel_size 64 | self.series_decomp = [series_decomp(kernel) for kernel in kernel_size] 65 | 66 | def forward(self, x): 67 | moving_mean = [] 68 | res = [] 69 | for func in self.series_decomp: 70 | sea, moving_avg = func(x) 71 | moving_mean.append(moving_avg) 72 | res.append(sea) 73 | 74 | sea = sum(res) / len(res) 75 | moving_mean = sum(moving_mean) / len(moving_mean) 76 | return sea, moving_mean 77 | 78 | 79 | class EncoderLayer(nn.Module): 80 | """ 81 | Autoformer encoder layer with the progressive decomposition architecture 82 | """ 83 | 84 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): 85 | super(EncoderLayer, self).__init__() 86 | d_ff = d_ff or 4 * d_model 87 | self.attention = attention 88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 90 | self.decomp1 = series_decomp(moving_avg) 91 | self.decomp2 = series_decomp(moving_avg) 92 | self.dropout = nn.Dropout(dropout) 93 | self.activation = F.relu if activation == "relu" else F.gelu 94 | 95 | def forward(self, x, attn_mask=None): 96 | new_x, attn = self.attention( 97 | x, x, x, 98 | attn_mask=attn_mask 99 | ) 100 | x = x + self.dropout(new_x) 101 | x, _ = self.decomp1(x) 102 | y = x 103 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 104 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 105 | res, _ = self.decomp2(x + y) 106 | return res, attn 107 | 108 | 109 | class Encoder(nn.Module): 110 | """ 111 | Autoformer encoder 112 | """ 113 | 114 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 115 | super(Encoder, self).__init__() 116 | self.attn_layers = nn.ModuleList(attn_layers) 117 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 118 | self.norm = norm_layer 119 | 120 | def forward(self, x, attn_mask=None): 121 | attns = [] 122 | if self.conv_layers is not None: 123 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 124 | x, attn = attn_layer(x, attn_mask=attn_mask) 125 | x = conv_layer(x) 126 | attns.append(attn) 127 | x, attn = self.attn_layers[-1](x) 128 | attns.append(attn) 129 | else: 130 | for attn_layer in self.attn_layers: 131 | x, attn = attn_layer(x, attn_mask=attn_mask) 132 | attns.append(attn) 133 | 134 | if self.norm is not None: 135 | x = self.norm(x) 136 | 137 | return x, attns 138 | 139 | 140 | class DecoderLayer(nn.Module): 141 | """ 142 | Autoformer decoder layer with the progressive decomposition architecture 143 | """ 144 | 145 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, 146 | moving_avg=25, dropout=0.1, activation="relu"): 147 | super(DecoderLayer, self).__init__() 148 | d_ff = d_ff or 4 * d_model 149 | self.self_attention = self_attention 150 | self.cross_attention = cross_attention 151 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 152 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 153 | self.decomp1 = series_decomp(moving_avg) 154 | self.decomp2 = series_decomp(moving_avg) 155 | self.decomp3 = series_decomp(moving_avg) 156 | self.dropout = nn.Dropout(dropout) 157 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, 158 | padding_mode='circular', bias=False) 159 | self.activation = F.relu if activation == "relu" else F.gelu 160 | 161 | def forward(self, x, cross, x_mask=None, cross_mask=None): 162 | x = x + self.dropout(self.self_attention( 163 | x, x, x, 164 | attn_mask=x_mask 165 | )[0]) 166 | x, trend1 = self.decomp1(x) 167 | x = x + self.dropout(self.cross_attention( 168 | x, cross, cross, 169 | attn_mask=cross_mask 170 | )[0]) 171 | x, trend2 = self.decomp2(x) 172 | y = x 173 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 174 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 175 | x, trend3 = self.decomp3(x + y) 176 | 177 | residual_trend = trend1 + trend2 + trend3 178 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) 179 | return x, residual_trend 180 | 181 | 182 | class Decoder(nn.Module): 183 | """ 184 | Autoformer encoder 185 | """ 186 | 187 | def __init__(self, layers, norm_layer=None, projection=None): 188 | super(Decoder, self).__init__() 189 | self.layers = nn.ModuleList(layers) 190 | self.norm = norm_layer 191 | self.projection = projection 192 | 193 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): 194 | for layer in self.layers: 195 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 196 | trend = trend + residual_trend 197 | 198 | if self.norm is not None: 199 | x = self.norm(x) 200 | 201 | if self.projection is not None: 202 | x = self.projection(x) 203 | return x, trend 204 | -------------------------------------------------------------------------------- /layers/Conv_Blocks.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Inception_Block_V1(nn.Module): 6 | def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True): 7 | super(Inception_Block_V1, self).__init__() 8 | self.in_channels = in_channels 9 | self.out_channels = out_channels 10 | self.num_kernels = num_kernels 11 | kernels = [] 12 | for i in range(self.num_kernels): 13 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=2 * i + 1, padding=i)) 14 | self.kernels = nn.ModuleList(kernels) 15 | if init_weight: 16 | self._initialize_weights() 17 | 18 | def _initialize_weights(self): 19 | for m in self.modules(): 20 | if isinstance(m, nn.Conv2d): 21 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 22 | if m.bias is not None: 23 | nn.init.constant_(m.bias, 0) 24 | 25 | def forward(self, x): 26 | res_list = [] 27 | for i in range(self.num_kernels): 28 | res_list.append(self.kernels[i](x)) 29 | res = torch.stack(res_list, dim=-1).mean(-1) 30 | return res 31 | 32 | 33 | class Inception_Block_V2(nn.Module): 34 | def __init__(self, in_channels, out_channels, num_kernels=6, init_weight=True): 35 | super(Inception_Block_V2, self).__init__() 36 | self.in_channels = in_channels 37 | self.out_channels = out_channels 38 | self.num_kernels = num_kernels 39 | kernels = [] 40 | for i in range(self.num_kernels // 2): 41 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[1, 2 * i + 3], padding=[0, i + 1])) 42 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=[2 * i + 3, 1], padding=[i + 1, 0])) 43 | kernels.append(nn.Conv2d(in_channels, out_channels, kernel_size=1)) 44 | self.kernels = nn.ModuleList(kernels) 45 | if init_weight: 46 | self._initialize_weights() 47 | 48 | def _initialize_weights(self): 49 | for m in self.modules(): 50 | if isinstance(m, nn.Conv2d): 51 | nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') 52 | if m.bias is not None: 53 | nn.init.constant_(m.bias, 0) 54 | 55 | def forward(self, x): 56 | res_list = [] 57 | for i in range(self.num_kernels + 1): 58 | res_list.append(self.kernels[i](x)) 59 | res = torch.stack(res_list, dim=-1).mean(-1) 60 | return res 61 | -------------------------------------------------------------------------------- /layers/Crossformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from einops import rearrange, repeat 4 | from layers.SelfAttention_Family import TwoStageAttentionLayer 5 | 6 | 7 | class SegMerging(nn.Module): 8 | def __init__(self, d_model, win_size, norm_layer=nn.LayerNorm): 9 | super().__init__() 10 | self.d_model = d_model 11 | self.win_size = win_size 12 | self.linear_trans = nn.Linear(win_size * d_model, d_model) 13 | self.norm = norm_layer(win_size * d_model) 14 | 15 | def forward(self, x): 16 | batch_size, ts_d, seg_num, d_model = x.shape 17 | pad_num = seg_num % self.win_size 18 | if pad_num != 0: 19 | pad_num = self.win_size - pad_num 20 | x = torch.cat((x, x[:, :, -pad_num:, :]), dim=-2) 21 | 22 | seg_to_merge = [] 23 | for i in range(self.win_size): 24 | seg_to_merge.append(x[:, :, i::self.win_size, :]) 25 | x = torch.cat(seg_to_merge, -1) 26 | 27 | x = self.norm(x) 28 | x = self.linear_trans(x) 29 | 30 | return x 31 | 32 | 33 | class scale_block(nn.Module): 34 | def __init__(self, configs, win_size, d_model, n_heads, d_ff, depth, dropout, \ 35 | seg_num=10, factor=10): 36 | super(scale_block, self).__init__() 37 | 38 | if win_size > 1: 39 | self.merge_layer = SegMerging(d_model, win_size, nn.LayerNorm) 40 | else: 41 | self.merge_layer = None 42 | 43 | self.encode_layers = nn.ModuleList() 44 | 45 | for i in range(depth): 46 | self.encode_layers.append(TwoStageAttentionLayer(configs, seg_num, factor, d_model, n_heads, \ 47 | d_ff, dropout)) 48 | 49 | def forward(self, x, attn_mask=None, tau=None, delta=None): 50 | _, ts_dim, _, _ = x.shape 51 | 52 | if self.merge_layer is not None: 53 | x = self.merge_layer(x) 54 | 55 | for layer in self.encode_layers: 56 | x = layer(x) 57 | 58 | return x, None 59 | 60 | 61 | class Encoder(nn.Module): 62 | def __init__(self, attn_layers): 63 | super(Encoder, self).__init__() 64 | self.encode_blocks = nn.ModuleList(attn_layers) 65 | 66 | def forward(self, x): 67 | encode_x = [] 68 | encode_x.append(x) 69 | 70 | for block in self.encode_blocks: 71 | x, attns = block(x) 72 | encode_x.append(x) 73 | 74 | return encode_x, None 75 | 76 | 77 | class DecoderLayer(nn.Module): 78 | def __init__(self, self_attention, cross_attention, seg_len, d_model, d_ff=None, dropout=0.1): 79 | super(DecoderLayer, self).__init__() 80 | self.self_attention = self_attention 81 | self.cross_attention = cross_attention 82 | self.norm1 = nn.LayerNorm(d_model) 83 | self.norm2 = nn.LayerNorm(d_model) 84 | self.dropout = nn.Dropout(dropout) 85 | self.MLP1 = nn.Sequential(nn.Linear(d_model, d_model), 86 | nn.GELU(), 87 | nn.Linear(d_model, d_model)) 88 | self.linear_pred = nn.Linear(d_model, seg_len) 89 | 90 | def forward(self, x, cross): 91 | batch = x.shape[0] 92 | x = self.self_attention(x) 93 | x = rearrange(x, 'b ts_d out_seg_num d_model -> (b ts_d) out_seg_num d_model') 94 | 95 | cross = rearrange(cross, 'b ts_d in_seg_num d_model -> (b ts_d) in_seg_num d_model') 96 | tmp, attn = self.cross_attention(x, cross, cross, None, None, None,) 97 | x = x + self.dropout(tmp) 98 | y = x = self.norm1(x) 99 | y = self.MLP1(y) 100 | dec_output = self.norm2(x + y) 101 | 102 | dec_output = rearrange(dec_output, '(b ts_d) seg_dec_num d_model -> b ts_d seg_dec_num d_model', b=batch) 103 | layer_predict = self.linear_pred(dec_output) 104 | layer_predict = rearrange(layer_predict, 'b out_d seg_num seg_len -> b (out_d seg_num) seg_len') 105 | 106 | return dec_output, layer_predict 107 | 108 | 109 | class Decoder(nn.Module): 110 | def __init__(self, layers): 111 | super(Decoder, self).__init__() 112 | self.decode_layers = nn.ModuleList(layers) 113 | 114 | 115 | def forward(self, x, cross): 116 | final_predict = None 117 | i = 0 118 | 119 | ts_d = x.shape[1] 120 | for layer in self.decode_layers: 121 | cross_enc = cross[i] 122 | x, layer_predict = layer(x, cross_enc) 123 | if final_predict is None: 124 | final_predict = layer_predict 125 | else: 126 | final_predict = final_predict + layer_predict 127 | i += 1 128 | 129 | final_predict = rearrange(final_predict, 'b (out_d seg_num) seg_len -> b (seg_num seg_len) out_d', out_d=ts_d) 130 | 131 | return final_predict 132 | -------------------------------------------------------------------------------- /layers/Embed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.utils import weight_norm 5 | import math 6 | 7 | 8 | class PositionalEmbedding(nn.Module): 9 | def __init__(self, d_model, max_len=5000): 10 | super(PositionalEmbedding, self).__init__() 11 | # Compute the positional encodings once in log space. 12 | pe = torch.zeros(max_len, d_model).float() 13 | pe.require_grad = False 14 | 15 | position = torch.arange(0, max_len).float().unsqueeze(1) 16 | div_term = (torch.arange(0, d_model, 2).float() 17 | * -(math.log(10000.0) / d_model)).exp() 18 | 19 | pe[:, 0::2] = torch.sin(position * div_term) 20 | pe[:, 1::2] = torch.cos(position * div_term) 21 | 22 | pe = pe.unsqueeze(0) 23 | self.register_buffer('pe', pe) 24 | 25 | def forward(self, x): 26 | return self.pe[:, :x.size(1)] 27 | 28 | 29 | class TokenEmbedding(nn.Module): 30 | def __init__(self, c_in, d_model): 31 | super(TokenEmbedding, self).__init__() 32 | padding = 1 if torch.__version__ >= '1.5.0' else 2 33 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 34 | kernel_size=3, padding=padding, padding_mode='circular', bias=False) 35 | for m in self.modules(): 36 | if isinstance(m, nn.Conv1d): 37 | nn.init.kaiming_normal_( 38 | m.weight, mode='fan_in', nonlinearity='leaky_relu') 39 | 40 | def forward(self, x): 41 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 42 | return x 43 | 44 | 45 | class FixedEmbedding(nn.Module): 46 | def __init__(self, c_in, d_model): 47 | super(FixedEmbedding, self).__init__() 48 | 49 | w = torch.zeros(c_in, d_model).float() 50 | w.require_grad = False 51 | 52 | position = torch.arange(0, c_in).float().unsqueeze(1) 53 | div_term = (torch.arange(0, d_model, 2).float() 54 | * -(math.log(10000.0) / d_model)).exp() 55 | 56 | w[:, 0::2] = torch.sin(position * div_term) 57 | w[:, 1::2] = torch.cos(position * div_term) 58 | 59 | self.emb = nn.Embedding(c_in, d_model) 60 | self.emb.weight = nn.Parameter(w, requires_grad=False) 61 | 62 | def forward(self, x): 63 | return self.emb(x).detach() 64 | 65 | 66 | class TemporalEmbedding(nn.Module): 67 | def __init__(self, d_model, embed_type='fixed', freq='h'): 68 | super(TemporalEmbedding, self).__init__() 69 | 70 | minute_size = 4 71 | hour_size = 24 72 | weekday_size = 7 73 | day_size = 32 74 | month_size = 13 75 | 76 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding 77 | if freq == 't': 78 | self.minute_embed = Embed(minute_size, d_model) 79 | self.hour_embed = Embed(hour_size, d_model) 80 | self.weekday_embed = Embed(weekday_size, d_model) 81 | self.day_embed = Embed(day_size, d_model) 82 | self.month_embed = Embed(month_size, d_model) 83 | 84 | def forward(self, x): 85 | x = x.long() 86 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr( 87 | self, 'minute_embed') else 0. 88 | hour_x = self.hour_embed(x[:, :, 3]) 89 | weekday_x = self.weekday_embed(x[:, :, 2]) 90 | day_x = self.day_embed(x[:, :, 1]) 91 | month_x = self.month_embed(x[:, :, 0]) 92 | 93 | return hour_x + weekday_x + day_x + month_x + minute_x 94 | 95 | 96 | class TimeFeatureEmbedding(nn.Module): 97 | def __init__(self, d_model, embed_type='timeF', freq='h'): 98 | super(TimeFeatureEmbedding, self).__init__() 99 | 100 | freq_map = {'h': 4, 't': 5, 's': 6, 101 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 102 | d_inp = freq_map[freq] 103 | self.embed = nn.Linear(d_inp, d_model, bias=False) 104 | 105 | def forward(self, x): 106 | return self.embed(x) 107 | 108 | 109 | class DataEmbedding(nn.Module): 110 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 111 | super(DataEmbedding, self).__init__() 112 | 113 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 114 | self.position_embedding = PositionalEmbedding(d_model=d_model) 115 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 116 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 117 | d_model=d_model, embed_type=embed_type, freq=freq) 118 | self.dropout = nn.Dropout(p=dropout) 119 | 120 | def forward(self, x, x_mark): 121 | if x_mark is None: 122 | x = self.value_embedding(x) + self.position_embedding(x) 123 | else: 124 | x = self.value_embedding( 125 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x) 126 | return self.dropout(x) 127 | 128 | 129 | class DataEmbedding_inverted(nn.Module): 130 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 131 | super(DataEmbedding_inverted, self).__init__() 132 | self.value_embedding = nn.Linear(c_in, d_model) 133 | self.dropout = nn.Dropout(p=dropout) 134 | 135 | def forward(self, x, x_mark): 136 | x = x.permute(0, 2, 1) 137 | # x: [Batch Variate Time] 138 | if x_mark is None: 139 | x = self.value_embedding(x) 140 | else: 141 | x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) 142 | # x: [Batch Variate d_model] 143 | return self.dropout(x) 144 | 145 | 146 | class DataEmbedding_wo_pos(nn.Module): 147 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 148 | super(DataEmbedding_wo_pos, self).__init__() 149 | 150 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 151 | self.position_embedding = PositionalEmbedding(d_model=d_model) 152 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 153 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 154 | d_model=d_model, embed_type=embed_type, freq=freq) 155 | self.dropout = nn.Dropout(p=dropout) 156 | 157 | def forward(self, x, x_mark): 158 | if x_mark is None: 159 | x = self.value_embedding(x) 160 | else: 161 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) 162 | return self.dropout(x) 163 | 164 | 165 | class PatchEmbedding(nn.Module): 166 | def __init__(self, d_model, patch_len, stride, padding, dropout): 167 | super(PatchEmbedding, self).__init__() 168 | # Patching 169 | self.patch_len = patch_len 170 | self.stride = stride 171 | self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) 172 | 173 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space 174 | self.value_embedding = nn.Linear(patch_len, d_model, bias=False) 175 | 176 | # Positional embedding 177 | self.position_embedding = PositionalEmbedding(d_model) 178 | 179 | # Residual dropout 180 | self.dropout = nn.Dropout(dropout) 181 | 182 | def forward(self, x): 183 | # do patching 184 | n_vars = x.shape[1] 185 | x = self.padding_patch_layer(x) 186 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) 187 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) 188 | # Input encoding 189 | x = self.value_embedding(x) + self.position_embedding(x) 190 | return self.dropout(x), n_vars 191 | -------------------------------------------------------------------------------- /layers/FourierCorrelation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # author=maziqing 3 | # email=maziqing.mzq@alibaba-inc.com 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | def get_frequency_modes(seq_len, modes=64, mode_select_method='random'): 11 | """ 12 | get modes on frequency domain: 13 | 'random' means sampling randomly; 14 | 'else' means sampling the lowest modes; 15 | """ 16 | modes = min(modes, seq_len // 2) 17 | if mode_select_method == 'random': 18 | index = list(range(0, seq_len // 2)) 19 | np.random.shuffle(index) 20 | index = index[:modes] 21 | else: 22 | index = list(range(0, modes)) 23 | index.sort() 24 | return index 25 | 26 | 27 | # ########## fourier layer ############# 28 | class FourierBlock(nn.Module): 29 | def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random'): 30 | super(FourierBlock, self).__init__() 31 | print('fourier enhanced block used!') 32 | """ 33 | 1D Fourier block. It performs representation learning on frequency domain, 34 | it does FFT, linear transform, and Inverse FFT. 35 | """ 36 | # get modes on frequency domain 37 | self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method) 38 | print('modes={}, index={}'.format(modes, self.index)) 39 | 40 | self.scale = (1 / (in_channels * out_channels)) 41 | self.weights1 = nn.Parameter( 42 | self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.float)) 43 | self.weights2 = nn.Parameter( 44 | self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.float)) 45 | 46 | # Complex multiplication 47 | def compl_mul1d(self, order, x, weights): 48 | x_flag = True 49 | w_flag = True 50 | if not torch.is_complex(x): 51 | x_flag = False 52 | x = torch.complex(x, torch.zeros_like(x).to(x.device)) 53 | if not torch.is_complex(weights): 54 | w_flag = False 55 | weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device)) 56 | if x_flag or w_flag: 57 | return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag), 58 | torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real)) 59 | else: 60 | return torch.einsum(order, x.real, weights.real) 61 | 62 | def forward(self, q, k, v, mask): 63 | # size = [B, L, H, E] 64 | B, L, H, E = q.shape 65 | x = q.permute(0, 2, 3, 1) 66 | # Compute Fourier coefficients 67 | x_ft = torch.fft.rfft(x, dim=-1) 68 | # Perform Fourier neural operations 69 | out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat) 70 | for wi, i in enumerate(self.index): 71 | if i >= x_ft.shape[3] or wi >= out_ft.shape[3]: 72 | continue 73 | out_ft[:, :, :, wi] = self.compl_mul1d("bhi,hio->bho", x_ft[:, :, :, i], 74 | torch.complex(self.weights1, self.weights2)[:, :, :, wi]) 75 | # Return to time domain 76 | x = torch.fft.irfft(out_ft, n=x.size(-1)) 77 | return (x, None) 78 | 79 | 80 | # ########## Fourier Cross Former #################### 81 | class FourierCrossAttention(nn.Module): 82 | def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random', 83 | activation='tanh', policy=0, num_heads=8): 84 | super(FourierCrossAttention, self).__init__() 85 | print(' fourier enhanced cross attention used!') 86 | """ 87 | 1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT. 88 | """ 89 | self.activation = activation 90 | self.in_channels = in_channels 91 | self.out_channels = out_channels 92 | # get modes for queries and keys (& values) on frequency domain 93 | self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method) 94 | self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method) 95 | 96 | print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q)) 97 | print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv)) 98 | 99 | self.scale = (1 / (in_channels * out_channels)) 100 | self.weights1 = nn.Parameter( 101 | self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float)) 102 | self.weights2 = nn.Parameter( 103 | self.scale * torch.rand(num_heads, in_channels // num_heads, out_channels // num_heads, len(self.index_q), dtype=torch.float)) 104 | 105 | # Complex multiplication 106 | def compl_mul1d(self, order, x, weights): 107 | x_flag = True 108 | w_flag = True 109 | if not torch.is_complex(x): 110 | x_flag = False 111 | x = torch.complex(x, torch.zeros_like(x).to(x.device)) 112 | if not torch.is_complex(weights): 113 | w_flag = False 114 | weights = torch.complex(weights, torch.zeros_like(weights).to(weights.device)) 115 | if x_flag or w_flag: 116 | return torch.complex(torch.einsum(order, x.real, weights.real) - torch.einsum(order, x.imag, weights.imag), 117 | torch.einsum(order, x.real, weights.imag) + torch.einsum(order, x.imag, weights.real)) 118 | else: 119 | return torch.einsum(order, x.real, weights.real) 120 | 121 | def forward(self, q, k, v, mask): 122 | # size = [B, L, H, E] 123 | B, L, H, E = q.shape 124 | xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L] 125 | xk = k.permute(0, 2, 3, 1) 126 | xv = v.permute(0, 2, 3, 1) 127 | 128 | # Compute Fourier coefficients 129 | xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat) 130 | xq_ft = torch.fft.rfft(xq, dim=-1) 131 | for i, j in enumerate(self.index_q): 132 | if j >= xq_ft.shape[3]: 133 | continue 134 | xq_ft_[:, :, :, i] = xq_ft[:, :, :, j] 135 | xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat) 136 | xk_ft = torch.fft.rfft(xk, dim=-1) 137 | for i, j in enumerate(self.index_kv): 138 | if j >= xk_ft.shape[3]: 139 | continue 140 | xk_ft_[:, :, :, i] = xk_ft[:, :, :, j] 141 | 142 | # perform attention mechanism on frequency domain 143 | xqk_ft = (self.compl_mul1d("bhex,bhey->bhxy", xq_ft_, xk_ft_)) 144 | if self.activation == 'tanh': 145 | xqk_ft = torch.complex(xqk_ft.real.tanh(), xqk_ft.imag.tanh()) 146 | elif self.activation == 'softmax': 147 | xqk_ft = torch.softmax(abs(xqk_ft), dim=-1) 148 | xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft)) 149 | else: 150 | raise Exception('{} actiation function is not implemented'.format(self.activation)) 151 | xqkv_ft = self.compl_mul1d("bhxy,bhey->bhex", xqk_ft, xk_ft_) 152 | xqkvw = self.compl_mul1d("bhex,heox->bhox", xqkv_ft, torch.complex(self.weights1, self.weights2)) 153 | out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat) 154 | for i, j in enumerate(self.index_q): 155 | if i >= xqkvw.shape[3] or j >= out_ft.shape[3]: 156 | continue 157 | out_ft[:, :, :, j] = xqkvw[:, :, :, i] 158 | # Return to time domain 159 | out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)) 160 | return (out, None) 161 | -------------------------------------------------------------------------------- /layers/Invertible.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class RevIN(nn.Module): 5 | def __init__(self, num_features: int, eps=1e-5, affine=True): 6 | """ 7 | :param num_features: the number of features or channels 8 | :param eps: a value added for numerical stability 9 | :param affine: if True, RevIN has learnable affine parameters 10 | """ 11 | super(RevIN, self).__init__() 12 | 13 | self.num_features = num_features 14 | self.eps = eps 15 | self.affine = affine 16 | 17 | if self.affine: 18 | self._init_params() 19 | 20 | def forward(self, x, mode:str): 21 | if mode == 'norm': 22 | self._get_statistics(x) 23 | x = self._normalize(x) 24 | 25 | elif mode == 'denorm': 26 | x = self._denormalize(x) 27 | 28 | else: raise NotImplementedError 29 | 30 | return x 31 | 32 | def _init_params(self): 33 | # initialize RevIN params: (C,) 34 | self.affine_weight = nn.Parameter(torch.ones(self.num_features)) 35 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features)) 36 | 37 | def _get_statistics(self, x): 38 | dim2reduce = tuple(range(1, x.ndim-1)) 39 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach() 40 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach() 41 | 42 | def _normalize(self, x): 43 | x = x - self.mean 44 | x = x / self.stdev 45 | if self.affine: 46 | x = x * self.affine_weight 47 | x = x + self.affine_bias 48 | 49 | return x 50 | 51 | def _denormalize(self, x): 52 | if self.affine: 53 | x = x - self.affine_bias 54 | x = x / (self.affine_weight + self.eps*self.eps) 55 | x = x * self.stdev 56 | x = x + self.mean 57 | 58 | return x -------------------------------------------------------------------------------- /layers/LiftingScheme.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Splitting(nn.Module): 6 | def __init__(self, channel_first): 7 | super(Splitting, self).__init__() 8 | # Deciding the stride base on the direction 9 | self.channel_first = channel_first 10 | if(channel_first): 11 | self.conv_even = lambda x: x[:, :, ::2] 12 | self.conv_odd = lambda x: x[:, :, 1::2] 13 | else: 14 | self.conv_even = lambda x: x[:, ::2, :] 15 | self.conv_odd = lambda x: x[:, 1::2, :] 16 | 17 | def forward(self, x): 18 | '''Returns the odd and even part''' 19 | return (self.conv_even(x), self.conv_odd(x)) 20 | 21 | class LiftingScheme(nn.Module): 22 | def __init__(self, in_channels, input_size, modified=True, splitting=True, k_size=4, simple_lifting=True): 23 | super(LiftingScheme, self).__init__() 24 | self.modified = modified 25 | kernel_size = k_size 26 | pad = (k_size // 2, k_size - 1 - k_size // 2) 27 | 28 | self.splitting = splitting 29 | self.split = Splitting(channel_first=True) 30 | 31 | # Dynamic build sequential network 32 | modules_P = [] 33 | modules_U = [] 34 | prev_size = 1 35 | 36 | # HARD CODED Architecture 37 | if simple_lifting: 38 | modules_P += [ 39 | nn.ReflectionPad1d(pad), 40 | nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1, groups=in_channels), 41 | nn.GELU(), 42 | nn.LayerNorm([in_channels, input_size // 2]) 43 | ] 44 | modules_U += [ 45 | nn.ReflectionPad1d(pad), 46 | nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1, groups=in_channels), 47 | nn.GELU(), 48 | nn.LayerNorm([in_channels, input_size // 2]) 49 | ] 50 | else: 51 | size_hidden = 2 52 | 53 | modules_P += [ 54 | nn.ReflectionPad1d(pad), 55 | nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1, groups=in_channels), 56 | nn.Tanh() 57 | ] 58 | modules_U += [ 59 | nn.ReflectionPad1d(pad), 60 | nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1, groups=in_channels), 61 | nn.Tanh() 62 | ] 63 | prev_size = size_hidden 64 | 65 | # Final dense 66 | modules_P += [ 67 | nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1, groups=in_channels), 68 | nn.Tanh() 69 | ] 70 | modules_U += [ 71 | nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1, groups=in_channels), 72 | nn.Tanh() 73 | ] 74 | 75 | self.P = nn.Sequential(*modules_P) 76 | self.U = nn.Sequential(*modules_U) 77 | 78 | def forward(self, x): 79 | if self.splitting: 80 | (x_even, x_odd) = self.split(x) 81 | else: 82 | (x_even, x_odd) = x 83 | 84 | if self.modified: 85 | c = x_even + self.U(x_odd) 86 | d = x_odd - self.P(c) 87 | return (c, d) 88 | else: 89 | d = x_odd - self.P(x_even) 90 | c = x_even + self.U(d) 91 | return (c, d) 92 | 93 | 94 | class InverseLiftingScheme(nn.Module): 95 | def __init__(self, in_channels, input_size, kernel_size=4, simple_lifting=False): 96 | super(InverseLiftingScheme, self).__init__() 97 | self.wavelet = LiftingScheme(in_channels, k_size=kernel_size, simple_lifting=simple_lifting, input_size=input_size * 2) 98 | 99 | def forward(self, c, d): 100 | if self.wavelet.modified: 101 | x_even = c - self.wavelet.U(d) 102 | x_odd = d + self.wavelet.P(x_even) 103 | else: 104 | x_even = c - self.wavelet.U(d) 105 | x_odd = d + self.wavelet.P(x_even) 106 | 107 | # Merge the even and odd components to reconstruct the original signal 108 | B, C, L = c.size() # or c.shape 109 | x = torch.zeros((B, C, 2 * L), dtype=c.dtype, device=c.device) 110 | x[..., ::2] = x_even 111 | x[..., 1::2] = x_odd 112 | 113 | return x -------------------------------------------------------------------------------- /layers/LiftingSchemeLinear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class Splitting(nn.Module): 6 | def __init__(self, channel_first): 7 | super(Splitting, self).__init__() 8 | # Deciding the stride base on the direction 9 | self.channel_first = channel_first 10 | if(channel_first): 11 | self.conv_even = lambda x: x[:, :, ::2] 12 | self.conv_odd = lambda x: x[:, :, 1::2] 13 | else: 14 | self.conv_even = lambda x: x[:, ::2, :] 15 | self.conv_odd = lambda x: x[:, 1::2, :] 16 | 17 | def forward(self, x): 18 | '''Returns the odd and even part''' 19 | return (self.conv_even(x), self.conv_odd(x)) 20 | 21 | class LiftingScheme(nn.Module): 22 | def __init__(self, in_channels, input_size, modified=True, splitting=True, k_size=4, simple_lifting=False): 23 | super(LiftingScheme, self).__init__() 24 | self.modified = modified 25 | kernel_size = k_size 26 | pad = (k_size // 2, k_size - 1 - k_size // 2) 27 | 28 | self.splitting = splitting 29 | self.split = Splitting(channel_first=True) 30 | 31 | # Dynamic build sequential network 32 | modules_P = [] 33 | modules_U = [] 34 | prev_size = 1 35 | 36 | # HARD CODED Architecture 37 | if simple_lifting: 38 | modules_P += [ 39 | nn.ReflectionPad1d(pad), 40 | nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1), 41 | nn.GELU(), 42 | nn.LayerNorm([in_channels, input_size // 2]) 43 | ] 44 | modules_U += [ 45 | nn.ReflectionPad1d(pad), 46 | nn.Conv1d(in_channels, in_channels, kernel_size=kernel_size, stride=1), 47 | nn.GELU(), 48 | nn.LayerNorm([in_channels, input_size // 2]) 49 | ] 50 | else: 51 | size_hidden = 2 52 | 53 | modules_P += [ 54 | nn.ReflectionPad1d(pad), 55 | nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1), 56 | nn.ReLU() 57 | ] 58 | modules_U += [ 59 | nn.ReflectionPad1d(pad), 60 | nn.Conv1d(in_channels*prev_size, in_channels*size_hidden, kernel_size=kernel_size, stride=1), 61 | nn.ReLU() 62 | ] 63 | prev_size = size_hidden 64 | 65 | # Final dense 66 | modules_P += [ 67 | nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1), 68 | nn.Tanh() 69 | ] 70 | modules_U += [ 71 | nn.Conv1d(in_channels*prev_size, in_channels, kernel_size=1, stride=1), 72 | nn.Tanh() 73 | ] 74 | 75 | self.P = nn.Sequential(*modules_P) 76 | self.U = nn.Sequential(*modules_U) 77 | 78 | def forward(self, x): 79 | if self.splitting: 80 | (x_even, x_odd) = self.split(x) 81 | else: 82 | (x_even, x_odd) = x 83 | 84 | if self.modified: 85 | c = x_even + self.U(x_odd) 86 | d = x_odd - self.P(c) 87 | return (c, d) 88 | else: 89 | d = x_odd - self.P(x_even) 90 | c = x_even + self.U(d) 91 | return (c, d) 92 | 93 | 94 | class InverseLiftingScheme(nn.Module): 95 | def __init__(self, in_channels, input_size, kernel_size=4, simple_lifting=False): 96 | super(InverseLiftingScheme, self).__init__() 97 | self.wavelet = LiftingScheme(in_channels, k_size=kernel_size, simple_lifting=simple_lifting, input_size=input_size * 2) 98 | 99 | def forward(self, c, d): 100 | if self.wavelet.modified: 101 | x_even = c - self.wavelet.U(d) 102 | x_odd = d + self.wavelet.P(x_even) 103 | else: 104 | x_even = c - self.wavelet.U(d) 105 | x_odd = d + self.wavelet.P(x_even) 106 | 107 | # Merge the even and odd components to reconstruct the original signal 108 | B, C, L = c.size() # or c.shape 109 | x = torch.zeros((B, C, 2 * L), dtype=c.dtype, device=c.device) 110 | x[..., ::2] = x_even 111 | x[..., 1::2] = x_odd 112 | 113 | return x -------------------------------------------------------------------------------- /layers/Pyraformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.modules.linear import Linear 5 | from layers.SelfAttention_Family import AttentionLayer, FullAttention 6 | from layers.Embed import DataEmbedding 7 | import math 8 | 9 | 10 | def get_mask(input_size, window_size, inner_size): 11 | """Get the attention mask of PAM-Naive""" 12 | # Get the size of all layers 13 | all_size = [] 14 | all_size.append(input_size) 15 | for i in range(len(window_size)): 16 | layer_size = math.floor(all_size[i] / window_size[i]) 17 | all_size.append(layer_size) 18 | 19 | seq_length = sum(all_size) 20 | mask = torch.zeros(seq_length, seq_length) 21 | 22 | # get intra-scale mask 23 | inner_window = inner_size // 2 24 | for layer_idx in range(len(all_size)): 25 | start = sum(all_size[:layer_idx]) 26 | for i in range(start, start + all_size[layer_idx]): 27 | left_side = max(i - inner_window, start) 28 | right_side = min(i + inner_window + 1, start + all_size[layer_idx]) 29 | mask[i, left_side:right_side] = 1 30 | 31 | # get inter-scale mask 32 | for layer_idx in range(1, len(all_size)): 33 | start = sum(all_size[:layer_idx]) 34 | for i in range(start, start + all_size[layer_idx]): 35 | left_side = (start - all_size[layer_idx - 1]) + \ 36 | (i - start) * window_size[layer_idx - 1] 37 | if i == (start + all_size[layer_idx] - 1): 38 | right_side = start 39 | else: 40 | right_side = ( 41 | start - all_size[layer_idx - 1]) + (i - start + 1) * window_size[layer_idx - 1] 42 | mask[i, left_side:right_side] = 1 43 | mask[left_side:right_side, i] = 1 44 | 45 | mask = (1 - mask).bool() 46 | 47 | return mask, all_size 48 | 49 | 50 | def refer_points(all_sizes, window_size): 51 | """Gather features from PAM's pyramid sequences""" 52 | input_size = all_sizes[0] 53 | indexes = torch.zeros(input_size, len(all_sizes)) 54 | 55 | for i in range(input_size): 56 | indexes[i][0] = i 57 | former_index = i 58 | for j in range(1, len(all_sizes)): 59 | start = sum(all_sizes[:j]) 60 | inner_layer_idx = former_index - (start - all_sizes[j - 1]) 61 | former_index = start + \ 62 | min(inner_layer_idx // window_size[j - 1], all_sizes[j] - 1) 63 | indexes[i][j] = former_index 64 | 65 | indexes = indexes.unsqueeze(0).unsqueeze(3) 66 | 67 | return indexes.long() 68 | 69 | 70 | class RegularMask(): 71 | def __init__(self, mask): 72 | self._mask = mask.unsqueeze(1) 73 | 74 | @property 75 | def mask(self): 76 | return self._mask 77 | 78 | 79 | class EncoderLayer(nn.Module): 80 | """ Compose with two layers """ 81 | 82 | def __init__(self, d_model, d_inner, n_head, dropout=0.1, normalize_before=True): 83 | super(EncoderLayer, self).__init__() 84 | 85 | self.slf_attn = AttentionLayer( 86 | FullAttention(mask_flag=True, factor=0, 87 | attention_dropout=dropout, output_attention=False), 88 | d_model, n_head) 89 | self.pos_ffn = PositionwiseFeedForward( 90 | d_model, d_inner, dropout=dropout, normalize_before=normalize_before) 91 | 92 | def forward(self, enc_input, slf_attn_mask=None): 93 | attn_mask = RegularMask(slf_attn_mask) 94 | enc_output, _ = self.slf_attn( 95 | enc_input, enc_input, enc_input, attn_mask=attn_mask) 96 | enc_output = self.pos_ffn(enc_output) 97 | return enc_output 98 | 99 | 100 | class Encoder(nn.Module): 101 | """ A encoder model with self attention mechanism. """ 102 | 103 | def __init__(self, configs, window_size, inner_size): 104 | super().__init__() 105 | 106 | d_bottleneck = configs.d_model//4 107 | 108 | self.mask, self.all_size = get_mask( 109 | configs.seq_len, window_size, inner_size) 110 | self.indexes = refer_points(self.all_size, window_size) 111 | self.layers = nn.ModuleList([ 112 | EncoderLayer(configs.d_model, configs.d_ff, configs.n_heads, dropout=configs.dropout, 113 | normalize_before=False) for _ in range(configs.e_layers) 114 | ]) # naive pyramid attention 115 | 116 | self.enc_embedding = DataEmbedding( 117 | configs.enc_in, configs.d_model, configs.dropout) 118 | self.conv_layers = Bottleneck_Construct( 119 | configs.d_model, window_size, d_bottleneck) 120 | 121 | def forward(self, x_enc, x_mark_enc): 122 | seq_enc = self.enc_embedding(x_enc, x_mark_enc) 123 | 124 | mask = self.mask.repeat(len(seq_enc), 1, 1).to(x_enc.device) 125 | seq_enc = self.conv_layers(seq_enc) 126 | 127 | for i in range(len(self.layers)): 128 | seq_enc = self.layers[i](seq_enc, mask) 129 | 130 | indexes = self.indexes.repeat(seq_enc.size( 131 | 0), 1, 1, seq_enc.size(2)).to(seq_enc.device) 132 | indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2)) 133 | all_enc = torch.gather(seq_enc, 1, indexes) 134 | seq_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1) 135 | 136 | return seq_enc 137 | 138 | 139 | class ConvLayer(nn.Module): 140 | def __init__(self, c_in, window_size): 141 | super(ConvLayer, self).__init__() 142 | self.downConv = nn.Conv1d(in_channels=c_in, 143 | out_channels=c_in, 144 | kernel_size=window_size, 145 | stride=window_size) 146 | self.norm = nn.BatchNorm1d(c_in) 147 | self.activation = nn.ELU() 148 | 149 | def forward(self, x): 150 | x = self.downConv(x) 151 | x = self.norm(x) 152 | x = self.activation(x) 153 | return x 154 | 155 | 156 | class Bottleneck_Construct(nn.Module): 157 | """Bottleneck convolution CSCM""" 158 | 159 | def __init__(self, d_model, window_size, d_inner): 160 | super(Bottleneck_Construct, self).__init__() 161 | if not isinstance(window_size, list): 162 | self.conv_layers = nn.ModuleList([ 163 | ConvLayer(d_inner, window_size), 164 | ConvLayer(d_inner, window_size), 165 | ConvLayer(d_inner, window_size) 166 | ]) 167 | else: 168 | self.conv_layers = [] 169 | for i in range(len(window_size)): 170 | self.conv_layers.append(ConvLayer(d_inner, window_size[i])) 171 | self.conv_layers = nn.ModuleList(self.conv_layers) 172 | self.up = Linear(d_inner, d_model) 173 | self.down = Linear(d_model, d_inner) 174 | self.norm = nn.LayerNorm(d_model) 175 | 176 | def forward(self, enc_input): 177 | temp_input = self.down(enc_input).permute(0, 2, 1) 178 | all_inputs = [] 179 | for i in range(len(self.conv_layers)): 180 | temp_input = self.conv_layers[i](temp_input) 181 | all_inputs.append(temp_input) 182 | 183 | all_inputs = torch.cat(all_inputs, dim=2).transpose(1, 2) 184 | all_inputs = self.up(all_inputs) 185 | all_inputs = torch.cat([enc_input, all_inputs], dim=1) 186 | 187 | all_inputs = self.norm(all_inputs) 188 | return all_inputs 189 | 190 | 191 | class PositionwiseFeedForward(nn.Module): 192 | """ Two-layer position-wise feed-forward neural network. """ 193 | 194 | def __init__(self, d_in, d_hid, dropout=0.1, normalize_before=True): 195 | super().__init__() 196 | 197 | self.normalize_before = normalize_before 198 | 199 | self.w_1 = nn.Linear(d_in, d_hid) 200 | self.w_2 = nn.Linear(d_hid, d_in) 201 | 202 | self.layer_norm = nn.LayerNorm(d_in, eps=1e-6) 203 | self.dropout = nn.Dropout(dropout) 204 | 205 | def forward(self, x): 206 | residual = x 207 | if self.normalize_before: 208 | x = self.layer_norm(x) 209 | 210 | x = F.gelu(self.w_1(x)) 211 | x = self.dropout(x) 212 | x = self.w_2(x) 213 | x = self.dropout(x) 214 | x = x + residual 215 | 216 | if not self.normalize_before: 217 | x = self.layer_norm(x) 218 | return x 219 | -------------------------------------------------------------------------------- /layers/Transformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ConvLayer(nn.Module): 7 | def __init__(self, c_in): 8 | super(ConvLayer, self).__init__() 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=2, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1, 2) 24 | return x 25 | 26 | 27 | class EncoderLayer(nn.Module): 28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 29 | super(EncoderLayer, self).__init__() 30 | d_ff = d_ff or 4 * d_model 31 | self.attention = attention 32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 34 | self.norm1 = nn.LayerNorm(d_model) 35 | self.norm2 = nn.LayerNorm(d_model) 36 | self.dropout = nn.Dropout(dropout) 37 | self.activation = F.relu if activation == "relu" else F.gelu 38 | 39 | def forward(self, x, attn_mask=None, tau=None, delta=None): 40 | new_x, attn = self.attention( 41 | x, x, x, 42 | attn_mask=attn_mask, 43 | tau=tau, delta=delta 44 | ) 45 | x = x + self.dropout(new_x) 46 | 47 | y = x = self.norm1(x) 48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 49 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 50 | 51 | return self.norm2(x + y), attn 52 | 53 | 54 | class Encoder(nn.Module): 55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 56 | super(Encoder, self).__init__() 57 | self.attn_layers = nn.ModuleList(attn_layers) 58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 59 | self.norm = norm_layer 60 | 61 | def forward(self, x, attn_mask=None, tau=None, delta=None): 62 | # x [B, L, D] 63 | attns = [] 64 | if self.conv_layers is not None: 65 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): 66 | delta = delta if i == 0 else None 67 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 68 | x = conv_layer(x) 69 | attns.append(attn) 70 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None) 71 | attns.append(attn) 72 | else: 73 | for attn_layer in self.attn_layers: 74 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 75 | attns.append(attn) 76 | 77 | if self.norm is not None: 78 | x = self.norm(x) 79 | 80 | return x, attns 81 | 82 | 83 | class DecoderLayer(nn.Module): 84 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 85 | dropout=0.1, activation="relu"): 86 | super(DecoderLayer, self).__init__() 87 | d_ff = d_ff or 4 * d_model 88 | self.self_attention = self_attention 89 | self.cross_attention = cross_attention 90 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 91 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 92 | self.norm1 = nn.LayerNorm(d_model) 93 | self.norm2 = nn.LayerNorm(d_model) 94 | self.norm3 = nn.LayerNorm(d_model) 95 | self.dropout = nn.Dropout(dropout) 96 | self.activation = F.relu if activation == "relu" else F.gelu 97 | 98 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 99 | x = x + self.dropout(self.self_attention( 100 | x, x, x, 101 | attn_mask=x_mask, 102 | tau=tau, delta=None 103 | )[0]) 104 | x = self.norm1(x) 105 | 106 | x = x + self.dropout(self.cross_attention( 107 | x, cross, cross, 108 | attn_mask=cross_mask, 109 | tau=tau, delta=delta 110 | )[0]) 111 | 112 | y = x = self.norm2(x) 113 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 114 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 115 | 116 | return self.norm3(x + y) 117 | 118 | 119 | class Decoder(nn.Module): 120 | def __init__(self, layers, norm_layer=None, projection=None): 121 | super(Decoder, self).__init__() 122 | self.layers = nn.ModuleList(layers) 123 | self.norm = norm_layer 124 | self.projection = projection 125 | 126 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 127 | for layer in self.layers: 128 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) 129 | 130 | if self.norm is not None: 131 | x = self.norm(x) 132 | 133 | if self.projection is not None: 134 | x = self.projection(x) 135 | return x 136 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/layers/__init__.py -------------------------------------------------------------------------------- /models/Autoformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos 5 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer 6 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp 7 | import math 8 | import numpy as np 9 | 10 | 11 | class Model(nn.Module): 12 | """ 13 | Autoformer is the first method to achieve the series-wise connection, 14 | with inherent O(LlogL) complexity 15 | Paper link: https://openreview.net/pdf?id=I55UqU-M11y 16 | """ 17 | 18 | def __init__(self, configs): 19 | super(Model, self).__init__() 20 | self.task_name = configs.task_name 21 | self.seq_len = configs.seq_len 22 | self.label_len = configs.label_len 23 | self.pred_len = configs.pred_len 24 | self.output_attention = configs.output_attention 25 | 26 | # Decomp 27 | kernel_size = configs.moving_avg 28 | self.decomp = series_decomp(kernel_size) 29 | 30 | # Embedding 31 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 32 | configs.dropout) 33 | # Encoder 34 | self.encoder = Encoder( 35 | [ 36 | EncoderLayer( 37 | AutoCorrelationLayer( 38 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 39 | output_attention=configs.output_attention), 40 | configs.d_model, configs.n_heads), 41 | configs.d_model, 42 | configs.d_ff, 43 | moving_avg=configs.moving_avg, 44 | dropout=configs.dropout, 45 | activation=configs.activation 46 | ) for l in range(configs.e_layers) 47 | ], 48 | norm_layer=my_Layernorm(configs.d_model) 49 | ) 50 | # Decoder 51 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 52 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 53 | configs.dropout) 54 | self.decoder = Decoder( 55 | [ 56 | DecoderLayer( 57 | AutoCorrelationLayer( 58 | AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, 59 | output_attention=False), 60 | configs.d_model, configs.n_heads), 61 | AutoCorrelationLayer( 62 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 63 | output_attention=False), 64 | configs.d_model, configs.n_heads), 65 | configs.d_model, 66 | configs.c_out, 67 | configs.d_ff, 68 | moving_avg=configs.moving_avg, 69 | dropout=configs.dropout, 70 | activation=configs.activation, 71 | ) 72 | for l in range(configs.d_layers) 73 | ], 74 | norm_layer=my_Layernorm(configs.d_model), 75 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 76 | ) 77 | if self.task_name == 'imputation': 78 | self.projection = nn.Linear( 79 | configs.d_model, configs.c_out, bias=True) 80 | if self.task_name == 'anomaly_detection': 81 | self.projection = nn.Linear( 82 | configs.d_model, configs.c_out, bias=True) 83 | if self.task_name == 'classification': 84 | self.act = F.gelu 85 | self.dropout = nn.Dropout(configs.dropout) 86 | self.projection = nn.Linear( 87 | configs.d_model * configs.seq_len, configs.num_class) 88 | 89 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 90 | # decomp init 91 | mean = torch.mean(x_enc, dim=1).unsqueeze( 92 | 1).repeat(1, self.pred_len, 1) 93 | zeros = torch.zeros([x_dec.shape[0], self.pred_len, 94 | x_dec.shape[2]], device=x_enc.device) 95 | seasonal_init, trend_init = self.decomp(x_enc) 96 | # decoder input 97 | trend_init = torch.cat( 98 | [trend_init[:, -self.label_len:, :], mean], dim=1) 99 | seasonal_init = torch.cat( 100 | [seasonal_init[:, -self.label_len:, :], zeros], dim=1) 101 | # enc 102 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 103 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 104 | # dec 105 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec) 106 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None, 107 | trend=trend_init) 108 | # final 109 | dec_out = trend_part + seasonal_part 110 | return dec_out 111 | 112 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 113 | # enc 114 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 115 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 116 | # final 117 | dec_out = self.projection(enc_out) 118 | return dec_out 119 | 120 | def anomaly_detection(self, x_enc): 121 | # enc 122 | enc_out = self.enc_embedding(x_enc, None) 123 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 124 | # final 125 | dec_out = self.projection(enc_out) 126 | return dec_out 127 | 128 | def classification(self, x_enc, x_mark_enc): 129 | # enc 130 | enc_out = self.enc_embedding(x_enc, None) 131 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 132 | 133 | # Output 134 | # the output transformer encoder/decoder embeddings don't include non-linearity 135 | output = self.act(enc_out) 136 | output = self.dropout(output) 137 | # zero-out padding embeddings 138 | output = output * x_mark_enc.unsqueeze(-1) 139 | # (batch_size, seq_length * d_model) 140 | output = output.reshape(output.shape[0], -1) 141 | output = self.projection(output) # (batch_size, num_classes) 142 | return output 143 | 144 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 145 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 146 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 147 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 148 | if self.task_name == 'imputation': 149 | dec_out = self.imputation( 150 | x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 151 | return dec_out # [B, L, D] 152 | if self.task_name == 'anomaly_detection': 153 | dec_out = self.anomaly_detection(x_enc) 154 | return dec_out # [B, L, D] 155 | if self.task_name == 'classification': 156 | dec_out = self.classification(x_enc, x_mark_enc) 157 | return dec_out # [B, N] 158 | return None 159 | -------------------------------------------------------------------------------- /models/Crossformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from einops import rearrange, repeat 5 | from layers.Crossformer_EncDec import scale_block, Encoder, Decoder, DecoderLayer 6 | from layers.Embed import PatchEmbedding 7 | from layers.SelfAttention_Family import AttentionLayer, FullAttention, TwoStageAttentionLayer 8 | from models.PatchTST import FlattenHead 9 | 10 | 11 | from math import ceil 12 | 13 | 14 | class Model(nn.Module): 15 | """ 16 | Paper link: https://openreview.net/pdf?id=vSVLM2j9eie 17 | """ 18 | def __init__(self, configs): 19 | super(Model, self).__init__() 20 | self.enc_in = configs.enc_in 21 | self.seq_len = configs.seq_len 22 | self.pred_len = configs.pred_len 23 | self.seg_len = 12 24 | self.win_size = 2 25 | self.task_name = configs.task_name 26 | 27 | # The padding operation to handle invisible sgemnet length 28 | self.pad_in_len = ceil(1.0 * configs.seq_len / self.seg_len) * self.seg_len 29 | self.pad_out_len = ceil(1.0 * configs.pred_len / self.seg_len) * self.seg_len 30 | self.in_seg_num = self.pad_in_len // self.seg_len 31 | self.out_seg_num = ceil(self.in_seg_num / (self.win_size ** (configs.e_layers - 1))) 32 | self.head_nf = configs.d_model * self.out_seg_num 33 | 34 | # Embedding 35 | self.enc_value_embedding = PatchEmbedding(configs.d_model, self.seg_len, self.seg_len, self.pad_in_len - configs.seq_len, 0) 36 | self.enc_pos_embedding = nn.Parameter( 37 | torch.randn(1, configs.enc_in, self.in_seg_num, configs.d_model)) 38 | self.pre_norm = nn.LayerNorm(configs.d_model) 39 | 40 | # Encoder 41 | self.encoder = Encoder( 42 | [ 43 | scale_block(configs, 1 if l is 0 else self.win_size, configs.d_model, configs.n_heads, configs.d_ff, 44 | 1, configs.dropout, 45 | self.in_seg_num if l is 0 else ceil(self.in_seg_num / self.win_size ** l), configs.factor 46 | ) for l in range(configs.e_layers) 47 | ] 48 | ) 49 | # Decoder 50 | self.dec_pos_embedding = nn.Parameter( 51 | torch.randn(1, configs.enc_in, (self.pad_out_len // self.seg_len), configs.d_model)) 52 | 53 | self.decoder = Decoder( 54 | [ 55 | DecoderLayer( 56 | TwoStageAttentionLayer(configs, (self.pad_out_len // self.seg_len), configs.factor, configs.d_model, configs.n_heads, 57 | configs.d_ff, configs.dropout), 58 | AttentionLayer( 59 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, 60 | output_attention=False), 61 | configs.d_model, configs.n_heads), 62 | self.seg_len, 63 | configs.d_model, 64 | configs.d_ff, 65 | dropout=configs.dropout, 66 | # activation=configs.activation, 67 | ) 68 | for l in range(configs.e_layers + 1) 69 | ], 70 | ) 71 | if self.task_name == 'imputation' or self.task_name == 'anomaly_detection': 72 | self.head = FlattenHead(configs.enc_in, self.head_nf, configs.seq_len, 73 | head_dropout=configs.dropout) 74 | elif self.task_name == 'classification': 75 | self.flatten = nn.Flatten(start_dim=-2) 76 | self.dropout = nn.Dropout(configs.dropout) 77 | self.projection = nn.Linear( 78 | self.head_nf * configs.enc_in, configs.num_class) 79 | 80 | 81 | 82 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 83 | # embedding 84 | x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) 85 | x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d = n_vars) 86 | x_enc += self.enc_pos_embedding 87 | x_enc = self.pre_norm(x_enc) 88 | enc_out, attns = self.encoder(x_enc) 89 | 90 | dec_in = repeat(self.dec_pos_embedding, 'b ts_d l d -> (repeat b) ts_d l d', repeat=x_enc.shape[0]) 91 | dec_out = self.decoder(dec_in, enc_out) 92 | return dec_out 93 | 94 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 95 | # embedding 96 | x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) 97 | x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars) 98 | x_enc += self.enc_pos_embedding 99 | x_enc = self.pre_norm(x_enc) 100 | enc_out, attns = self.encoder(x_enc) 101 | 102 | dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1) 103 | 104 | return dec_out 105 | 106 | def anomaly_detection(self, x_enc): 107 | # embedding 108 | x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) 109 | x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars) 110 | x_enc += self.enc_pos_embedding 111 | x_enc = self.pre_norm(x_enc) 112 | enc_out, attns = self.encoder(x_enc) 113 | 114 | dec_out = self.head(enc_out[-1].permute(0, 1, 3, 2)).permute(0, 2, 1) 115 | return dec_out 116 | 117 | def classification(self, x_enc, x_mark_enc): 118 | # embedding 119 | x_enc, n_vars = self.enc_value_embedding(x_enc.permute(0, 2, 1)) 120 | 121 | x_enc = rearrange(x_enc, '(b d) seg_num d_model -> b d seg_num d_model', d=n_vars) 122 | x_enc += self.enc_pos_embedding 123 | x_enc = self.pre_norm(x_enc) 124 | enc_out, attns = self.encoder(x_enc) 125 | # Output from Non-stationary Transformer 126 | output = self.flatten(enc_out[-1].permute(0, 1, 3, 2)) 127 | output = self.dropout(output) 128 | output = output.reshape(output.shape[0], -1) 129 | output = self.projection(output) 130 | return output 131 | 132 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 133 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 134 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 135 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 136 | if self.task_name == 'imputation': 137 | dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 138 | return dec_out # [B, L, D] 139 | if self.task_name == 'anomaly_detection': 140 | dec_out = self.anomaly_detection(x_enc) 141 | return dec_out # [B, L, D] 142 | if self.task_name == 'classification': 143 | dec_out = self.classification(x_enc, x_mark_enc) 144 | return dec_out # [B, N] 145 | return None -------------------------------------------------------------------------------- /models/DLinear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Autoformer_EncDec import series_decomp 5 | 6 | 7 | class Model(nn.Module): 8 | """ 9 | Paper link: https://arxiv.org/pdf/2205.13504.pdf 10 | """ 11 | 12 | def __init__(self, configs, individual=False): 13 | """ 14 | individual: Bool, whether shared model among different variates. 15 | """ 16 | super(Model, self).__init__() 17 | self.task_name = configs.task_name 18 | self.seq_len = configs.seq_len 19 | if self.task_name == "super_resolution": 20 | self.seq_len = configs.seq_len // configs.sr_ratio 21 | 22 | if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': 23 | self.pred_len = configs.seq_len 24 | else: 25 | self.pred_len = configs.pred_len 26 | # Series decomposition block from Autoformer 27 | self.decompsition = series_decomp(configs.moving_avg) 28 | self.individual = individual 29 | self.channels = configs.enc_in 30 | 31 | if self.individual: 32 | self.Linear_Seasonal = nn.ModuleList() 33 | self.Linear_Trend = nn.ModuleList() 34 | 35 | for i in range(self.channels): 36 | self.Linear_Seasonal.append( 37 | nn.Linear(self.seq_len, self.pred_len)) 38 | self.Linear_Trend.append( 39 | nn.Linear(self.seq_len, self.pred_len)) 40 | 41 | self.Linear_Seasonal[i].weight = nn.Parameter( 42 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 43 | self.Linear_Trend[i].weight = nn.Parameter( 44 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 45 | else: 46 | self.Linear_Seasonal = nn.Linear(self.seq_len, self.pred_len) 47 | self.Linear_Trend = nn.Linear(self.seq_len, self.pred_len) 48 | 49 | self.Linear_Seasonal.weight = nn.Parameter( 50 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 51 | self.Linear_Trend.weight = nn.Parameter( 52 | (1 / self.seq_len) * torch.ones([self.pred_len, self.seq_len])) 53 | 54 | if self.task_name == 'classification': 55 | self.act = F.gelu 56 | self.dropout = nn.Dropout(configs.dropout) 57 | self.projection = nn.Linear( 58 | configs.enc_in * configs.seq_len, configs.num_class) 59 | 60 | def encoder(self, x): 61 | seasonal_init, trend_init = self.decompsition(x) 62 | seasonal_init, trend_init = seasonal_init.permute( 63 | 0, 2, 1), trend_init.permute(0, 2, 1) 64 | if self.individual: 65 | seasonal_output = torch.zeros([seasonal_init.size(0), seasonal_init.size(1), self.pred_len], 66 | dtype=seasonal_init.dtype).to(seasonal_init.device) 67 | trend_output = torch.zeros([trend_init.size(0), trend_init.size(1), self.pred_len], 68 | dtype=trend_init.dtype).to(trend_init.device) 69 | for i in range(self.channels): 70 | seasonal_output[:, i, :] = self.Linear_Seasonal[i]( 71 | seasonal_init[:, i, :]) 72 | trend_output[:, i, :] = self.Linear_Trend[i]( 73 | trend_init[:, i, :]) 74 | else: 75 | seasonal_output = self.Linear_Seasonal(seasonal_init) 76 | trend_output = self.Linear_Trend(trend_init) 77 | x = seasonal_output + trend_output 78 | return x.permute(0, 2, 1) 79 | 80 | def forecast(self, x_enc): 81 | # Encoder 82 | return self.encoder(x_enc) 83 | 84 | def imputation(self, x_enc): 85 | # Encoder 86 | return self.encoder(x_enc) 87 | 88 | def anomaly_detection(self, x_enc): 89 | # Encoder 90 | return self.encoder(x_enc) 91 | 92 | def classification(self, x_enc): 93 | # Encoder 94 | enc_out = self.encoder(x_enc) 95 | # Output 96 | # (batch_size, seq_length * d_model) 97 | output = enc_out.reshape(enc_out.shape[0], -1) 98 | # (batch_size, num_classes) 99 | output = self.projection(output) 100 | return output 101 | 102 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 103 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast' or self.task_name == 'super_resolution': 104 | dec_out = self.forecast(x_enc) 105 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 106 | if self.task_name == 'imputation': 107 | dec_out = self.imputation(x_enc) 108 | return dec_out # [B, L, D] 109 | if self.task_name == 'anomaly_detection': 110 | dec_out = self.anomaly_detection(x_enc) 111 | return dec_out # [B, L, D] 112 | if self.task_name == 'classification': 113 | dec_out = self.classification(x_enc) 114 | return dec_out # [B, N] 115 | return None 116 | -------------------------------------------------------------------------------- /models/ETSformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from layers.Embed import DataEmbedding 4 | from layers.ETSformer_EncDec import EncoderLayer, Encoder, DecoderLayer, Decoder, Transform 5 | 6 | 7 | class Model(nn.Module): 8 | """ 9 | Paper link: https://arxiv.org/abs/2202.01381 10 | """ 11 | 12 | def __init__(self, configs): 13 | super(Model, self).__init__() 14 | self.task_name = configs.task_name 15 | self.seq_len = configs.seq_len 16 | self.label_len = configs.label_len 17 | if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': 18 | self.pred_len = configs.seq_len 19 | else: 20 | self.pred_len = configs.pred_len 21 | 22 | assert configs.e_layers == configs.d_layers, "Encoder and decoder layers must be equal" 23 | 24 | # Embedding 25 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 26 | configs.dropout) 27 | 28 | # Encoder 29 | self.encoder = Encoder( 30 | [ 31 | EncoderLayer( 32 | configs.d_model, configs.n_heads, configs.enc_in, configs.seq_len, self.pred_len, configs.top_k, 33 | dim_feedforward=configs.d_ff, 34 | dropout=configs.dropout, 35 | activation=configs.activation, 36 | ) for _ in range(configs.e_layers) 37 | ] 38 | ) 39 | # Decoder 40 | self.decoder = Decoder( 41 | [ 42 | DecoderLayer( 43 | configs.d_model, configs.n_heads, configs.c_out, self.pred_len, 44 | dropout=configs.dropout, 45 | ) for _ in range(configs.d_layers) 46 | ], 47 | ) 48 | self.transform = Transform(sigma=0.2) 49 | 50 | if self.task_name == 'classification': 51 | self.act = torch.nn.functional.gelu 52 | self.dropout = nn.Dropout(configs.dropout) 53 | self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) 54 | 55 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 56 | with torch.no_grad(): 57 | if self.training: 58 | x_enc = self.transform.transform(x_enc) 59 | res = self.enc_embedding(x_enc, x_mark_enc) 60 | level, growths, seasons = self.encoder(res, x_enc, attn_mask=None) 61 | 62 | growth, season = self.decoder(growths, seasons) 63 | preds = level[:, -1:] + growth + season 64 | return preds 65 | 66 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 67 | res = self.enc_embedding(x_enc, x_mark_enc) 68 | level, growths, seasons = self.encoder(res, x_enc, attn_mask=None) 69 | growth, season = self.decoder(growths, seasons) 70 | preds = level[:, -1:] + growth + season 71 | return preds 72 | 73 | def anomaly_detection(self, x_enc): 74 | res = self.enc_embedding(x_enc, None) 75 | level, growths, seasons = self.encoder(res, x_enc, attn_mask=None) 76 | growth, season = self.decoder(growths, seasons) 77 | preds = level[:, -1:] + growth + season 78 | return preds 79 | 80 | def classification(self, x_enc, x_mark_enc): 81 | res = self.enc_embedding(x_enc, None) 82 | _, growths, seasons = self.encoder(res, x_enc, attn_mask=None) 83 | 84 | growths = torch.sum(torch.stack(growths, 0), 0)[:, :self.seq_len, :] 85 | seasons = torch.sum(torch.stack(seasons, 0), 0)[:, :self.seq_len, :] 86 | 87 | enc_out = growths + seasons 88 | output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity 89 | output = self.dropout(output) 90 | 91 | # Output 92 | output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings 93 | output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) 94 | output = self.projection(output) # (batch_size, num_classes) 95 | return output 96 | 97 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 98 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 99 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 100 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 101 | if self.task_name == 'imputation': 102 | dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 103 | return dec_out # [B, L, D] 104 | if self.task_name == 'anomaly_detection': 105 | dec_out = self.anomaly_detection(x_enc) 106 | return dec_out # [B, L, D] 107 | if self.task_name == 'classification': 108 | dec_out = self.classification(x_enc, x_mark_enc) 109 | return dec_out # [B, N] 110 | return None 111 | -------------------------------------------------------------------------------- /models/FreTS.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class Model(nn.Module): 7 | """ 8 | Paper link: https://arxiv.org/pdf/2311.06184.pdf 9 | """ 10 | def __init__(self, configs): 11 | super(Model, self).__init__() 12 | self.task_name = configs.task_name 13 | if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': 14 | self.pred_len = configs.seq_len 15 | else: 16 | self.pred_len = configs.pred_len 17 | self.embed_size = 128 #embed_size 18 | self.hidden_size = 256 #hidden_size 19 | self.pred_len = configs.pred_len 20 | self.feature_size = configs.enc_in #channels 21 | self.seq_len = configs.seq_len 22 | if self.task_name == 'super_resolution': 23 | self.seq_len = self.seq_len // configs.sr_ratio 24 | self.channel_independence = configs.channel_independence 25 | self.sparsity_threshold = 0.01 26 | self.scale = 0.02 27 | self.embeddings = nn.Parameter(torch.randn(1, self.embed_size)) 28 | self.r1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) 29 | self.i1 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) 30 | self.rb1 = nn.Parameter(self.scale * torch.randn(self.embed_size)) 31 | self.ib1 = nn.Parameter(self.scale * torch.randn(self.embed_size)) 32 | self.r2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) 33 | self.i2 = nn.Parameter(self.scale * torch.randn(self.embed_size, self.embed_size)) 34 | self.rb2 = nn.Parameter(self.scale * torch.randn(self.embed_size)) 35 | self.ib2 = nn.Parameter(self.scale * torch.randn(self.embed_size)) 36 | 37 | self.fc = nn.Sequential( 38 | nn.Linear(self.seq_len * self.embed_size, self.hidden_size), 39 | nn.LeakyReLU(), 40 | nn.Linear(self.hidden_size, self.pred_len) 41 | ) 42 | 43 | # dimension extension 44 | def tokenEmb(self, x): 45 | # x: [Batch, Input length, Channel] 46 | x = x.permute(0, 2, 1) 47 | x = x.unsqueeze(3) 48 | # N*T*1 x 1*D = N*T*D 49 | y = self.embeddings 50 | return x * y 51 | 52 | # frequency temporal learner 53 | def MLP_temporal(self, x, B, N, L): 54 | # [B, N, T, D] 55 | x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on L dimension 56 | y = self.FreMLP(B, N, L, x, self.r2, self.i2, self.rb2, self.ib2) 57 | x = torch.fft.irfft(y, n=self.seq_len, dim=2, norm="ortho") 58 | return x 59 | 60 | # frequency channel learner 61 | def MLP_channel(self, x, B, N, L): 62 | # [B, N, T, D] 63 | x = x.permute(0, 2, 1, 3) 64 | # [B, T, N, D] 65 | x = torch.fft.rfft(x, dim=2, norm='ortho') # FFT on N dimension 66 | y = self.FreMLP(B, L, N, x, self.r1, self.i1, self.rb1, self.ib1) 67 | x = torch.fft.irfft(y, n=self.feature_size, dim=2, norm="ortho") 68 | x = x.permute(0, 2, 1, 3) 69 | # [B, N, T, D] 70 | return x 71 | 72 | # frequency-domain MLPs 73 | # dimension: FFT along the dimension, r: the real part of weights, i: the imaginary part of weights 74 | # rb: the real part of bias, ib: the imaginary part of bias 75 | def FreMLP(self, B, nd, dimension, x, r, i, rb, ib): 76 | o1_real = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size], 77 | device=x.device) 78 | o1_imag = torch.zeros([B, nd, dimension // 2 + 1, self.embed_size], 79 | device=x.device) 80 | 81 | o1_real = F.relu( 82 | torch.einsum('bijd,dd->bijd', x.real, r) - \ 83 | torch.einsum('bijd,dd->bijd', x.imag, i) + \ 84 | rb 85 | ) 86 | 87 | o1_imag = F.relu( 88 | torch.einsum('bijd,dd->bijd', x.imag, r) + \ 89 | torch.einsum('bijd,dd->bijd', x.real, i) + \ 90 | ib 91 | ) 92 | 93 | y = torch.stack([o1_real, o1_imag], dim=-1) 94 | y = F.softshrink(y, lambd=self.sparsity_threshold) 95 | y = torch.view_as_complex(y) 96 | return y 97 | 98 | def forecast(self, x_enc): 99 | # x: [Batch, Input length, Channel] 100 | B, T, N = x_enc.shape 101 | # embedding x: [B, N, T, D] 102 | x = self.tokenEmb(x_enc) 103 | bias = x 104 | # [B, N, T, D] 105 | if self.channel_independence == '1': 106 | x = self.MLP_channel(x, B, N, T) 107 | # [B, N, T, D] 108 | x = self.MLP_temporal(x, B, N, T) 109 | x = x + bias 110 | x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1) 111 | return x 112 | 113 | def super_resolution(self, x_enc): 114 | # x: [Batch, Input length, Channel] 115 | B, T, N = x_enc.shape 116 | # embedding x: [B, N, T, D] 117 | x = self.tokenEmb(x_enc) 118 | bias = x 119 | # [B, N, T, D] 120 | if self.channel_independence == '1': 121 | x = self.MLP_channel(x, B, N, T) 122 | # [B, N, T, D] 123 | x = self.MLP_temporal(x, B, N, T) 124 | x = x + bias 125 | x = self.fc(x.reshape(B, N, -1)).permute(0, 2, 1) 126 | return x 127 | 128 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 129 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast' or self.task_name == 'imputation': 130 | dec_out = self.forecast(x_enc) 131 | return dec_out[:, :, :] # [B, L, D] 132 | elif self.task_name == 'super_resolution': 133 | dec_out = self.super_resolution(x_enc) 134 | return dec_out[:, :, :] 135 | else: 136 | raise ValueError('Only forecast tasks implemented yet') 137 | 138 | -------------------------------------------------------------------------------- /models/Informer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 5 | from layers.SelfAttention_Family import ProbAttention, AttentionLayer 6 | from layers.Embed import DataEmbedding 7 | 8 | 9 | class Model(nn.Module): 10 | """ 11 | Informer with Propspare attention in O(LlogL) complexity 12 | Paper link: https://ojs.aaai.org/index.php/AAAI/article/view/17325/17132 13 | """ 14 | 15 | def __init__(self, configs): 16 | super(Model, self).__init__() 17 | self.task_name = configs.task_name 18 | self.pred_len = configs.pred_len 19 | self.label_len = configs.label_len 20 | 21 | # Embedding 22 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 23 | configs.dropout) 24 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 25 | configs.dropout) 26 | 27 | # Encoder 28 | self.encoder = Encoder( 29 | [ 30 | EncoderLayer( 31 | AttentionLayer( 32 | ProbAttention(False, configs.factor, attention_dropout=configs.dropout, 33 | output_attention=configs.output_attention), 34 | configs.d_model, configs.n_heads), 35 | configs.d_model, 36 | configs.d_ff, 37 | dropout=configs.dropout, 38 | activation=configs.activation 39 | ) for l in range(configs.e_layers) 40 | ], 41 | [ 42 | ConvLayer( 43 | configs.d_model 44 | ) for l in range(configs.e_layers - 1) 45 | ] if configs.distil and ('forecast' in configs.task_name) else None, 46 | norm_layer=torch.nn.LayerNorm(configs.d_model) 47 | ) 48 | # Decoder 49 | self.decoder = Decoder( 50 | [ 51 | DecoderLayer( 52 | AttentionLayer( 53 | ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), 54 | configs.d_model, configs.n_heads), 55 | AttentionLayer( 56 | ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), 57 | configs.d_model, configs.n_heads), 58 | configs.d_model, 59 | configs.d_ff, 60 | dropout=configs.dropout, 61 | activation=configs.activation, 62 | ) 63 | for l in range(configs.d_layers) 64 | ], 65 | norm_layer=torch.nn.LayerNorm(configs.d_model), 66 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 67 | ) 68 | if self.task_name == 'imputation': 69 | self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) 70 | if self.task_name == 'anomaly_detection': 71 | self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) 72 | if self.task_name == 'classification': 73 | self.act = F.gelu 74 | self.dropout = nn.Dropout(configs.dropout) 75 | self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) 76 | 77 | def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 78 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 79 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 80 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 81 | 82 | dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) 83 | 84 | return dec_out # [B, L, D] 85 | 86 | def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 87 | # Normalization 88 | mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E 89 | x_enc = x_enc - mean_enc 90 | std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E 91 | x_enc = x_enc / std_enc 92 | 93 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 94 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 95 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 96 | 97 | dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) 98 | 99 | dec_out = dec_out * std_enc + mean_enc 100 | return dec_out # [B, L, D] 101 | 102 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 103 | # enc 104 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 105 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 106 | # final 107 | dec_out = self.projection(enc_out) 108 | return dec_out 109 | 110 | def anomaly_detection(self, x_enc): 111 | # enc 112 | enc_out = self.enc_embedding(x_enc, None) 113 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 114 | # final 115 | dec_out = self.projection(enc_out) 116 | return dec_out 117 | 118 | def classification(self, x_enc, x_mark_enc): 119 | # enc 120 | enc_out = self.enc_embedding(x_enc, None) 121 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 122 | 123 | # Output 124 | output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity 125 | output = self.dropout(output) 126 | output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings 127 | output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) 128 | output = self.projection(output) # (batch_size, num_classes) 129 | return output 130 | 131 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 132 | if self.task_name == 'long_term_forecast': 133 | dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 134 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 135 | if self.task_name == 'short_term_forecast': 136 | dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 137 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 138 | if self.task_name == 'imputation': 139 | dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 140 | return dec_out # [B, L, D] 141 | if self.task_name == 'anomaly_detection': 142 | dec_out = self.anomaly_detection(x_enc) 143 | return dec_out # [B, L, D] 144 | if self.task_name == 'classification': 145 | dec_out = self.classification(x_enc, x_mark_enc) 146 | return dec_out # [B, N] 147 | return None 148 | -------------------------------------------------------------------------------- /models/LightTS.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class IEBlock(nn.Module): 7 | def __init__(self, input_dim, hid_dim, output_dim, num_node): 8 | super(IEBlock, self).__init__() 9 | 10 | self.input_dim = input_dim 11 | self.hid_dim = hid_dim 12 | self.output_dim = output_dim 13 | self.num_node = num_node 14 | 15 | self._build() 16 | 17 | def _build(self): 18 | self.spatial_proj = nn.Sequential( 19 | nn.Linear(self.input_dim, self.hid_dim), 20 | nn.LeakyReLU(), 21 | nn.Linear(self.hid_dim, self.hid_dim // 4) 22 | ) 23 | 24 | self.channel_proj = nn.Linear(self.num_node, self.num_node) 25 | torch.nn.init.eye_(self.channel_proj.weight) 26 | 27 | self.output_proj = nn.Linear(self.hid_dim // 4, self.output_dim) 28 | 29 | def forward(self, x): 30 | x = self.spatial_proj(x.permute(0, 2, 1)) 31 | x = x.permute(0, 2, 1) + self.channel_proj(x.permute(0, 2, 1)) 32 | x = self.output_proj(x.permute(0, 2, 1)) 33 | 34 | x = x.permute(0, 2, 1) 35 | 36 | return x 37 | 38 | 39 | class Model(nn.Module): 40 | """ 41 | Paper link: https://arxiv.org/abs/2207.01186 42 | """ 43 | 44 | def __init__(self, configs, chunk_size=24): 45 | """ 46 | chunk_size: int, reshape T into [num_chunks, chunk_size] 47 | """ 48 | super(Model, self).__init__() 49 | self.task_name = configs.task_name 50 | self.seq_len = configs.seq_len 51 | if self.task_name == 'classification' or self.task_name == 'anomaly_detection' or self.task_name == 'imputation': 52 | self.pred_len = configs.seq_len 53 | else: 54 | self.pred_len = configs.pred_len 55 | 56 | if configs.task_name == 'long_term_forecast' or configs.task_name == 'short_term_forecast': 57 | self.chunk_size = min(configs.pred_len, configs.seq_len, chunk_size) 58 | else: 59 | self.chunk_size = min(configs.seq_len, chunk_size) 60 | assert (self.seq_len % self.chunk_size == 0) 61 | self.num_chunks = self.seq_len // self.chunk_size 62 | 63 | self.d_model = configs.d_model 64 | self.enc_in = configs.enc_in 65 | self.dropout = configs.dropout 66 | if self.task_name == 'classification': 67 | self.act = F.gelu 68 | self.dropout = nn.Dropout(configs.dropout) 69 | self.projection = nn.Linear(configs.enc_in * configs.seq_len, configs.num_class) 70 | self._build() 71 | 72 | def _build(self): 73 | self.layer_1 = IEBlock( 74 | input_dim=self.chunk_size, 75 | hid_dim=self.d_model // 4, 76 | output_dim=self.d_model // 4, 77 | num_node=self.num_chunks 78 | ) 79 | 80 | self.chunk_proj_1 = nn.Linear(self.num_chunks, 1) 81 | 82 | self.layer_2 = IEBlock( 83 | input_dim=self.chunk_size, 84 | hid_dim=self.d_model // 4, 85 | output_dim=self.d_model // 4, 86 | num_node=self.num_chunks 87 | ) 88 | 89 | self.chunk_proj_2 = nn.Linear(self.num_chunks, 1) 90 | 91 | self.layer_3 = IEBlock( 92 | input_dim=self.d_model // 2, 93 | hid_dim=self.d_model // 2, 94 | output_dim=self.pred_len, 95 | num_node=self.enc_in 96 | ) 97 | 98 | self.ar = nn.Linear(self.seq_len, self.pred_len) 99 | 100 | def encoder(self, x): 101 | B, T, N = x.size() 102 | 103 | highway = self.ar(x.permute(0, 2, 1)) 104 | highway = highway.permute(0, 2, 1) 105 | 106 | # continuous sampling 107 | x1 = x.reshape(B, self.num_chunks, self.chunk_size, N) 108 | x1 = x1.permute(0, 3, 2, 1) 109 | x1 = x1.reshape(-1, self.chunk_size, self.num_chunks) 110 | x1 = self.layer_1(x1) 111 | x1 = self.chunk_proj_1(x1).squeeze(dim=-1) 112 | 113 | # interval sampling 114 | x2 = x.reshape(B, self.chunk_size, self.num_chunks, N) 115 | x2 = x2.permute(0, 3, 1, 2) 116 | x2 = x2.reshape(-1, self.chunk_size, self.num_chunks) 117 | x2 = self.layer_2(x2) 118 | x2 = self.chunk_proj_2(x2).squeeze(dim=-1) 119 | 120 | x3 = torch.cat([x1, x2], dim=-1) 121 | 122 | x3 = x3.reshape(B, N, -1) 123 | x3 = x3.permute(0, 2, 1) 124 | 125 | out = self.layer_3(x3) 126 | 127 | out = out + highway 128 | return out 129 | 130 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 131 | return self.encoder(x_enc) 132 | 133 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 134 | return self.encoder(x_enc) 135 | 136 | def anomaly_detection(self, x_enc): 137 | return self.encoder(x_enc) 138 | 139 | def classification(self, x_enc, x_mark_enc): 140 | enc_out = self.encoder(x_enc) 141 | 142 | # Output 143 | output = enc_out.reshape(enc_out.shape[0], -1) # (batch_size, seq_length * d_model) 144 | output = self.projection(output) # (batch_size, num_classes) 145 | return output 146 | 147 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 148 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 149 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 150 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 151 | if self.task_name == 'imputation': 152 | dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 153 | return dec_out # [B, L, D] 154 | if self.task_name == 'anomaly_detection': 155 | dec_out = self.anomaly_detection(x_enc) 156 | return dec_out # [B, L, D] 157 | if self.task_name == 'classification': 158 | dec_out = self.classification(x_enc, x_mark_enc) 159 | return dec_out # [B, N] 160 | return None 161 | -------------------------------------------------------------------------------- /models/Pyraformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from layers.Pyraformer_EncDec import Encoder 4 | 5 | 6 | class Model(nn.Module): 7 | """ 8 | Pyraformer: Pyramidal attention to reduce complexity 9 | Paper link: https://openreview.net/pdf?id=0EXmFzUn5I 10 | """ 11 | 12 | def __init__(self, configs, window_size=[4,4], inner_size=5): 13 | """ 14 | window_size: list, the downsample window size in pyramidal attention. 15 | inner_size: int, the size of neighbour attention 16 | """ 17 | super().__init__() 18 | self.task_name = configs.task_name 19 | self.pred_len = configs.pred_len 20 | self.d_model = configs.d_model 21 | 22 | if self.task_name == 'short_term_forecast': 23 | window_size = [2,2] 24 | self.encoder = Encoder(configs, window_size, inner_size) 25 | 26 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 27 | self.projection = nn.Linear( 28 | (len(window_size)+1)*self.d_model, self.pred_len * configs.enc_in) 29 | elif self.task_name == 'imputation' or self.task_name == 'anomaly_detection': 30 | self.projection = nn.Linear( 31 | (len(window_size)+1)*self.d_model, configs.enc_in, bias=True) 32 | elif self.task_name == 'classification': 33 | self.act = torch.nn.functional.gelu 34 | self.dropout = nn.Dropout(configs.dropout) 35 | self.projection = nn.Linear( 36 | (len(window_size)+1)*self.d_model * configs.seq_len, configs.num_class) 37 | 38 | def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 39 | enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :] 40 | dec_out = self.projection(enc_out).view( 41 | enc_out.size(0), self.pred_len, -1) 42 | return dec_out 43 | 44 | def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 45 | # Normalization 46 | mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E 47 | x_enc = x_enc - mean_enc 48 | std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E 49 | x_enc = x_enc / std_enc 50 | 51 | enc_out = self.encoder(x_enc, x_mark_enc)[:, -1, :] 52 | dec_out = self.projection(enc_out).view( 53 | enc_out.size(0), self.pred_len, -1) 54 | 55 | dec_out = dec_out * std_enc + mean_enc 56 | return dec_out 57 | 58 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 59 | enc_out = self.encoder(x_enc, x_mark_enc) 60 | dec_out = self.projection(enc_out) 61 | return dec_out 62 | 63 | def anomaly_detection(self, x_enc, x_mark_enc): 64 | enc_out = self.encoder(x_enc, x_mark_enc) 65 | dec_out = self.projection(enc_out) 66 | return dec_out 67 | 68 | def classification(self, x_enc, x_mark_enc): 69 | # enc 70 | enc_out = self.encoder(x_enc, x_mark_enc=None) 71 | 72 | # Output 73 | # the output transformer encoder/decoder embeddings don't include non-linearity 74 | output = self.act(enc_out) 75 | output = self.dropout(output) 76 | # zero-out padding embeddings 77 | output = output * x_mark_enc.unsqueeze(-1) 78 | # (batch_size, seq_length * d_model) 79 | output = output.reshape(output.shape[0], -1) 80 | output = self.projection(output) # (batch_size, num_classes) 81 | 82 | return output 83 | 84 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 85 | if self.task_name == 'long_term_forecast': 86 | dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 87 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 88 | if self.task_name == 'short_term_forecast': 89 | dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 90 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 91 | if self.task_name == 'imputation': 92 | dec_out = self.imputation( 93 | x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 94 | return dec_out # [B, L, D] 95 | if self.task_name == 'anomaly_detection': 96 | dec_out = self.anomaly_detection(x_enc, x_mark_enc) 97 | return dec_out # [B, L, D] 98 | if self.task_name == 'classification': 99 | dec_out = self.classification(x_enc, x_mark_enc) 100 | return dec_out # [B, N] 101 | return None 102 | -------------------------------------------------------------------------------- /models/Reformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Transformer_EncDec import Encoder, EncoderLayer 5 | from layers.SelfAttention_Family import ReformerLayer 6 | from layers.Embed import DataEmbedding 7 | 8 | 9 | class Model(nn.Module): 10 | """ 11 | Reformer with O(LlogL) complexity 12 | Paper link: https://openreview.net/forum?id=rkgNKkHtvB 13 | """ 14 | 15 | def __init__(self, configs, bucket_size=4, n_hashes=4): 16 | """ 17 | bucket_size: int, 18 | n_hashes: int, 19 | """ 20 | super(Model, self).__init__() 21 | self.task_name = configs.task_name 22 | self.pred_len = configs.pred_len 23 | self.seq_len = configs.seq_len 24 | 25 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 26 | configs.dropout) 27 | # Encoder 28 | self.encoder = Encoder( 29 | [ 30 | EncoderLayer( 31 | ReformerLayer(None, configs.d_model, configs.n_heads, 32 | bucket_size=bucket_size, n_hashes=n_hashes), 33 | configs.d_model, 34 | configs.d_ff, 35 | dropout=configs.dropout, 36 | activation=configs.activation 37 | ) for l in range(configs.e_layers) 38 | ], 39 | norm_layer=torch.nn.LayerNorm(configs.d_model) 40 | ) 41 | 42 | if self.task_name == 'classification': 43 | self.act = F.gelu 44 | self.dropout = nn.Dropout(configs.dropout) 45 | self.projection = nn.Linear( 46 | configs.d_model * configs.seq_len, configs.num_class) 47 | else: 48 | self.projection = nn.Linear( 49 | configs.d_model, configs.c_out, bias=True) 50 | 51 | def long_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 52 | # add placeholder 53 | x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) 54 | if x_mark_enc is not None: 55 | x_mark_enc = torch.cat( 56 | [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) 57 | 58 | enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] 59 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 60 | dec_out = self.projection(enc_out) 61 | 62 | return dec_out # [B, L, D] 63 | 64 | def short_forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 65 | # Normalization 66 | mean_enc = x_enc.mean(1, keepdim=True).detach() # B x 1 x E 67 | x_enc = x_enc - mean_enc 68 | std_enc = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5).detach() # B x 1 x E 69 | x_enc = x_enc / std_enc 70 | 71 | # add placeholder 72 | x_enc = torch.cat([x_enc, x_dec[:, -self.pred_len:, :]], dim=1) 73 | if x_mark_enc is not None: 74 | x_mark_enc = torch.cat( 75 | [x_mark_enc, x_mark_dec[:, -self.pred_len:, :]], dim=1) 76 | 77 | enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] 78 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 79 | dec_out = self.projection(enc_out) 80 | 81 | dec_out = dec_out * std_enc + mean_enc 82 | return dec_out # [B, L, D] 83 | 84 | def imputation(self, x_enc, x_mark_enc): 85 | enc_out = self.enc_embedding(x_enc, x_mark_enc) # [B,T,C] 86 | 87 | enc_out, attns = self.encoder(enc_out) 88 | enc_out = self.projection(enc_out) 89 | 90 | return enc_out # [B, L, D] 91 | 92 | def anomaly_detection(self, x_enc): 93 | enc_out = self.enc_embedding(x_enc, None) # [B,T,C] 94 | 95 | enc_out, attns = self.encoder(enc_out) 96 | enc_out = self.projection(enc_out) 97 | 98 | return enc_out # [B, L, D] 99 | 100 | def classification(self, x_enc, x_mark_enc): 101 | # enc 102 | enc_out = self.enc_embedding(x_enc, None) 103 | enc_out, attns = self.encoder(enc_out) 104 | 105 | # Output 106 | # the output transformer encoder/decoder embeddings don't include non-linearity 107 | output = self.act(enc_out) 108 | output = self.dropout(output) 109 | # zero-out padding embeddings 110 | output = output * x_mark_enc.unsqueeze(-1) 111 | # (batch_size, seq_length * d_model) 112 | output = output.reshape(output.shape[0], -1) 113 | output = self.projection(output) # (batch_size, num_classes) 114 | return output 115 | 116 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 117 | if self.task_name == 'long_term_forecast': 118 | dec_out = self.long_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 119 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 120 | if self.task_name == 'short_term_forecast': 121 | dec_out = self.short_forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 122 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 123 | if self.task_name == 'imputation': 124 | dec_out = self.imputation(x_enc, x_mark_enc) 125 | return dec_out # [B, L, D] 126 | if self.task_name == 'anomaly_detection': 127 | dec_out = self.anomaly_detection(x_enc) 128 | return dec_out # [B, L, D] 129 | if self.task_name == 'classification': 130 | dec_out = self.classification(x_enc, x_mark_enc) 131 | return dec_out # [B, N] 132 | return None 133 | -------------------------------------------------------------------------------- /models/TiDE.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class LayerNorm(nn.Module): 7 | """ LayerNorm but with an optional bias. PyTorch doesn't support simply bias=False """ 8 | 9 | def __init__(self, ndim, bias): 10 | super().__init__() 11 | self.weight = nn.Parameter(torch.ones(ndim)) 12 | self.bias = nn.Parameter(torch.zeros(ndim)) if bias else None 13 | 14 | def forward(self, input): 15 | return F.layer_norm(input, self.weight.shape, self.weight, self.bias, 1e-5) 16 | 17 | 18 | 19 | class ResBlock(nn.Module): 20 | def __init__(self, input_dim, hidden_dim, output_dim, dropout=0.1, bias=True): 21 | super().__init__() 22 | 23 | self.fc1 = nn.Linear(input_dim, hidden_dim, bias=bias) 24 | self.fc2 = nn.Linear(hidden_dim, output_dim, bias=bias) 25 | self.fc3 = nn.Linear(input_dim, output_dim, bias=bias) 26 | self.dropout = nn.Dropout(dropout) 27 | self.relu = nn.ReLU() 28 | self.ln = LayerNorm(output_dim, bias=bias) 29 | 30 | def forward(self, x): 31 | 32 | out = self.fc1(x) 33 | out = self.relu(out) 34 | out = self.fc2(out) 35 | out = self.dropout(out) 36 | out = out + self.fc3(x) 37 | out = self.ln(out) 38 | return out 39 | 40 | 41 | #TiDE 42 | class Model(nn.Module): 43 | """ 44 | paper: https://arxiv.org/pdf/2304.08424.pdf 45 | """ 46 | def __init__(self, configs, bias=True, feature_encode_dim=2): 47 | super(Model, self).__init__() 48 | self.configs = configs 49 | self.task_name = configs.task_name 50 | self.seq_len = configs.seq_len #L 51 | self.label_len = configs.label_len 52 | self.pred_len = configs.pred_len #H 53 | self.hidden_dim=configs.d_model 54 | self.res_hidden=configs.d_model 55 | self.encoder_num=configs.e_layers 56 | self.decoder_num=configs.d_layers 57 | self.freq=configs.freq 58 | self.feature_encode_dim=feature_encode_dim 59 | self.decode_dim = configs.c_out 60 | self.temporalDecoderHidden=configs.d_ff 61 | dropout=configs.dropout 62 | 63 | 64 | freq_map = {'h': 4, 't': 5, 's': 6, 65 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 66 | 67 | self.feature_dim=freq_map[self.freq] 68 | 69 | 70 | flatten_dim = self.seq_len + (self.seq_len + self.pred_len) * self.feature_encode_dim 71 | 72 | self.feature_encoder = ResBlock(self.feature_dim, self.res_hidden, self.feature_encode_dim, dropout, bias) 73 | self.encoders = nn.Sequential(ResBlock(flatten_dim, self.res_hidden, self.hidden_dim, dropout, bias),*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.encoder_num-1))) 74 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 75 | self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.pred_len, dropout, bias)) 76 | self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias) 77 | self.residual_proj = nn.Linear(self.seq_len, self.pred_len, bias=bias) 78 | if self.task_name == 'imputation': 79 | self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias)) 80 | self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias) 81 | self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias) 82 | if self.task_name == 'anomaly_detection': 83 | self.decoders = nn.Sequential(*([ ResBlock(self.hidden_dim, self.res_hidden, self.hidden_dim, dropout, bias)]*(self.decoder_num-1)),ResBlock(self.hidden_dim, self.res_hidden, self.decode_dim * self.seq_len, dropout, bias)) 84 | self.temporalDecoder = ResBlock(self.decode_dim + self.feature_encode_dim, self.temporalDecoderHidden, 1, dropout, bias) 85 | self.residual_proj = nn.Linear(self.seq_len, self.seq_len, bias=bias) 86 | 87 | 88 | def forecast(self, x_enc, x_mark_enc, x_dec, batch_y_mark): 89 | # Normalization 90 | means = x_enc.mean(1, keepdim=True).detach() 91 | x_enc = x_enc - means 92 | stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 93 | x_enc /= stdev 94 | 95 | feature = self.feature_encoder(batch_y_mark) 96 | hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1)) 97 | decoded = self.decoders(hidden).reshape(hidden.shape[0], self.pred_len, self.decode_dim) 98 | dec_out = self.temporalDecoder(torch.cat([feature[:,self.seq_len:], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc) 99 | 100 | 101 | # De-Normalization 102 | dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.pred_len)) 103 | dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.pred_len)) 104 | return dec_out 105 | 106 | def imputation(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask): 107 | # Normalization 108 | means = x_enc.mean(1, keepdim=True).detach() 109 | x_enc = x_enc - means 110 | stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 111 | x_enc /= stdev 112 | 113 | feature = self.feature_encoder(x_mark_enc) 114 | hidden = self.encoders(torch.cat([x_enc, feature.reshape(feature.shape[0], -1)], dim=-1)) 115 | decoded = self.decoders(hidden).reshape(hidden.shape[0], self.seq_len, self.decode_dim) 116 | dec_out = self.temporalDecoder(torch.cat([feature[:,:self.seq_len], decoded], dim=-1)).squeeze(-1) + self.residual_proj(x_enc) 117 | 118 | # De-Normalization 119 | dec_out = dec_out * (stdev[:, 0].unsqueeze(1).repeat(1, self.seq_len)) 120 | dec_out = dec_out + (means[:, 0].unsqueeze(1).repeat(1, self.seq_len)) 121 | return dec_out 122 | 123 | 124 | def forward(self, x_enc, x_mark_enc, x_dec, batch_y_mark, mask=None): 125 | '''x_mark_enc is the exogenous dynamic feature described in the original paper''' 126 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 127 | batch_y_mark=torch.concat([x_mark_enc, batch_y_mark[:, -self.pred_len:, :]],dim=1) 128 | dec_out = torch.stack([self.forecast(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark) for feature in range(x_enc.shape[-1])],dim=-1) 129 | return dec_out # [B, L, D] 130 | if self.task_name == 'imputation': 131 | dec_out = torch.stack([self.imputation(x_enc[:, :, feature], x_mark_enc, x_dec, batch_y_mark, mask) for feature in range(x_enc.shape[-1])],dim=-1) 132 | return dec_out # [B, L, D] 133 | if self.task_name == 'anomaly_detection': 134 | raise NotImplementedError("Task anomaly_detection for Tide is temporarily not supported") 135 | if self.task_name == 'classification': 136 | raise NotImplementedError("Task classification for Tide is temporarily not supported") 137 | return None 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /models/Transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 5 | from layers.SelfAttention_Family import FullAttention, AttentionLayer 6 | from layers.Embed import DataEmbedding 7 | import numpy as np 8 | 9 | 10 | class Model(nn.Module): 11 | """ 12 | Vanilla Transformer 13 | with O(L^2) complexity 14 | Paper link: https://proceedings.neurips.cc/paper/2017/file/3f5ee243547dee91fbd053c1c4a845aa-Paper.pdf 15 | """ 16 | 17 | def __init__(self, configs): 18 | super(Model, self).__init__() 19 | self.task_name = configs.task_name 20 | self.pred_len = configs.pred_len 21 | self.output_attention = configs.output_attention 22 | # Embedding 23 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 24 | configs.dropout) 25 | # Encoder 26 | self.encoder = Encoder( 27 | [ 28 | EncoderLayer( 29 | AttentionLayer( 30 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, 31 | output_attention=configs.output_attention), configs.d_model, configs.n_heads), 32 | configs.d_model, 33 | configs.d_ff, 34 | dropout=configs.dropout, 35 | activation=configs.activation 36 | ) for l in range(configs.e_layers) 37 | ], 38 | norm_layer=torch.nn.LayerNorm(configs.d_model) 39 | ) 40 | # Decoder 41 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 42 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 43 | configs.dropout) 44 | self.decoder = Decoder( 45 | [ 46 | DecoderLayer( 47 | AttentionLayer( 48 | FullAttention(True, configs.factor, attention_dropout=configs.dropout, 49 | output_attention=False), 50 | configs.d_model, configs.n_heads), 51 | AttentionLayer( 52 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, 53 | output_attention=False), 54 | configs.d_model, configs.n_heads), 55 | configs.d_model, 56 | configs.d_ff, 57 | dropout=configs.dropout, 58 | activation=configs.activation, 59 | ) 60 | for l in range(configs.d_layers) 61 | ], 62 | norm_layer=torch.nn.LayerNorm(configs.d_model), 63 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 64 | ) 65 | if self.task_name == 'imputation': 66 | self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) 67 | if self.task_name == 'anomaly_detection': 68 | self.projection = nn.Linear(configs.d_model, configs.c_out, bias=True) 69 | if self.task_name == 'classification': 70 | self.act = F.gelu 71 | self.dropout = nn.Dropout(configs.dropout) 72 | self.projection = nn.Linear(configs.d_model * configs.seq_len, configs.num_class) 73 | 74 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 75 | # Embedding 76 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 77 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 78 | 79 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 80 | dec_out = self.decoder(dec_out, enc_out, x_mask=None, cross_mask=None) 81 | return dec_out 82 | 83 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 84 | # Embedding 85 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 86 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 87 | 88 | dec_out = self.projection(enc_out) 89 | return dec_out 90 | 91 | def anomaly_detection(self, x_enc): 92 | # Embedding 93 | enc_out = self.enc_embedding(x_enc, None) 94 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 95 | 96 | dec_out = self.projection(enc_out) 97 | return dec_out 98 | 99 | def classification(self, x_enc, x_mark_enc): 100 | # Embedding 101 | enc_out = self.enc_embedding(x_enc, None) 102 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 103 | 104 | # Output 105 | output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity 106 | output = self.dropout(output) 107 | output = output * x_mark_enc.unsqueeze(-1) # zero-out padding embeddings 108 | output = output.reshape(output.shape[0], -1) # (batch_size, seq_length * d_model) 109 | output = self.projection(output) # (batch_size, num_classes) 110 | return output 111 | 112 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 113 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 114 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 115 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 116 | if self.task_name == 'imputation': 117 | dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 118 | return dec_out # [B, L, D] 119 | if self.task_name == 'anomaly_detection': 120 | dec_out = self.anomaly_detection(x_enc) 121 | return dec_out # [B, L, D] 122 | if self.task_name == 'classification': 123 | dec_out = self.classification(x_enc, x_mark_enc) 124 | return dec_out # [B, N] 125 | return None 126 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/models/__init__.py -------------------------------------------------------------------------------- /models/iTransformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Transformer_EncDec import Encoder, EncoderLayer 5 | from layers.SelfAttention_Family import FullAttention, AttentionLayer 6 | from layers.Embed import DataEmbedding_inverted 7 | import numpy as np 8 | 9 | class Model(nn.Module): 10 | """ 11 | Paper link: https://arxiv.org/abs/2310.06625 12 | """ 13 | 14 | def __init__(self, configs): 15 | super(Model, self).__init__() 16 | self.task_name = configs.task_name 17 | self.seq_len = configs.seq_len 18 | if self.task_name == 'super_resolution': 19 | self.sr_ratio = configs.sr_ratio 20 | self.seq_len = self.seq_len // configs.sr_ratio 21 | self.pred_len = configs.pred_len 22 | self.output_attention = configs.output_attention 23 | # Embedding 24 | self.enc_embedding = DataEmbedding_inverted(self.seq_len, configs.d_model, configs.embed, configs.freq, 25 | configs.dropout) 26 | # Encoder 27 | self.encoder = Encoder( 28 | [ 29 | EncoderLayer( 30 | AttentionLayer( 31 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, 32 | output_attention=configs.output_attention), configs.d_model, configs.n_heads), 33 | configs.d_model, 34 | configs.d_ff, 35 | dropout=configs.dropout, 36 | activation=configs.activation 37 | ) for l in range(configs.e_layers) 38 | ], 39 | norm_layer=torch.nn.LayerNorm(configs.d_model) 40 | ) 41 | # Decoder 42 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 43 | self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) 44 | if self.task_name == 'super_resolution': 45 | self.projection = nn.Linear(configs.d_model, configs.pred_len, bias=True) 46 | if self.task_name == 'imputation': 47 | self.projection = nn.Linear(configs.d_model, self.seq_len, bias=True) 48 | if self.task_name == 'anomaly_detection': 49 | self.projection = nn.Linear(configs.d_model, self.seq_len, bias=True) 50 | if self.task_name == 'classification': 51 | self.act = F.gelu 52 | self.dropout = nn.Dropout(configs.dropout) 53 | self.projection = nn.Linear(configs.d_model * configs.enc_in, configs.num_class) 54 | 55 | def forecast(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 56 | # Normalization from Non-stationary Transformer 57 | means = x_enc.mean(1, keepdim=True).detach() 58 | x_enc = x_enc - means 59 | stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 60 | x_enc /= stdev 61 | 62 | _, _, N = x_enc.shape 63 | 64 | # Embedding 65 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 66 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 67 | 68 | dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] 69 | # De-Normalization from Non-stationary Transformer 70 | dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) 71 | dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) 72 | return dec_out 73 | 74 | def imputation(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask): 75 | # Normalization from Non-stationary Transformer 76 | means = x_enc.mean(1, keepdim=True).detach() 77 | x_enc = x_enc - means 78 | stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 79 | x_enc /= stdev 80 | 81 | _, L, N = x_enc.shape 82 | 83 | # Embedding 84 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 85 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 86 | 87 | dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] 88 | # De-Normalization from Non-stationary Transformer 89 | dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) 90 | dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) 91 | return dec_out 92 | 93 | def anomaly_detection(self, x_enc): 94 | # Normalization from Non-stationary Transformer 95 | means = x_enc.mean(1, keepdim=True).detach() 96 | x_enc = x_enc - means 97 | stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 98 | x_enc /= stdev 99 | 100 | _, L, N = x_enc.shape 101 | 102 | # Embedding 103 | enc_out = self.enc_embedding(x_enc, None) 104 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 105 | 106 | dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] 107 | # De-Normalization from Non-stationary Transformer 108 | dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, L, 1)) 109 | dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, L, 1)) 110 | return dec_out 111 | 112 | def classification(self, x_enc, x_mark_enc): 113 | # Embedding 114 | enc_out = self.enc_embedding(x_enc, None) 115 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 116 | 117 | # Output 118 | output = self.act(enc_out) # the output transformer encoder/decoder embeddings don't include non-linearity 119 | output = self.dropout(output) 120 | output = output.reshape(output.shape[0], -1) # (batch_size, c_in * d_model) 121 | output = self.projection(output) # (batch_size, num_classes) 122 | return output 123 | 124 | def super_resolution(self, x_enc, x_mark_enc, x_dec, x_mark_dec): 125 | # Normalization from Non-stationary Transformer 126 | means = x_enc.mean(1, keepdim=True).detach() 127 | x_enc = x_enc - means 128 | stdev = torch.sqrt(torch.var(x_enc, dim=1, keepdim=True, unbiased=False) + 1e-5) 129 | x_enc /= stdev 130 | 131 | _, _, N = x_enc.shape 132 | 133 | # Embedding 134 | enc_out = self.enc_embedding(x_enc, x_mark_enc[:, ::self.sr_ratio, :]) 135 | enc_out, attns = self.encoder(enc_out, attn_mask=None) 136 | 137 | dec_out = self.projection(enc_out).permute(0, 2, 1)[:, :, :N] 138 | # De-Normalization from Non-stationary Transformer 139 | dec_out = dec_out * (stdev[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) 140 | dec_out = dec_out + (means[:, 0, :].unsqueeze(1).repeat(1, self.pred_len, 1)) 141 | return dec_out 142 | 143 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, mask=None): 144 | if self.task_name == 'long_term_forecast' or self.task_name == 'short_term_forecast': 145 | dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec) 146 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 147 | if self.task_name == 'imputation': 148 | dec_out = self.imputation(x_enc, x_mark_enc, x_dec, x_mark_dec, mask) 149 | return dec_out # [B, L, D] 150 | if self.task_name == 'anomaly_detection': 151 | dec_out = self.anomaly_detection(x_enc) 152 | return dec_out # [B, L, D] 153 | if self.task_name == 'classification': 154 | dec_out = self.classification(x_enc, x_mark_enc) 155 | return dec_out # [B, N] 156 | if self.task_name == 'super_resolution': 157 | dec_out = self.super_resolution(x_enc, x_mark_enc, x_dec, x_mark_dec) 158 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 159 | return None 160 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | einops==0.4.0 2 | matplotlib==3.7.0 3 | numpy==1.23.5 4 | pandas==1.5.3 5 | patool==1.12 6 | reformer-pytorch==1.4.4 7 | scikit-learn==1.2.2 8 | scipy==1.10.1 9 | sktime==0.16.1 10 | sympy==1.11.1 11 | torch==1.7.1 12 | tqdm==4.64.1 13 | -------------------------------------------------------------------------------- /scripts/long_term_forecast/ECL_script/AdaWaveNet.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=AdaWaveNet 4 | 5 | python -u run.py \ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/electricity/ \ 9 | --data_path electricity.csv \ 10 | --model_id ECL_96_96 \ 11 | --model $model_name \ 12 | --data custom \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --label_len 48 \ 16 | --pred_len 96 \ 17 | --e_layers 2 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 321 \ 21 | --dec_in 321 \ 22 | --c_out 321 \ 23 | --d_model 256\ 24 | --d_ff 256\ 25 | --batch_size 16\ 26 | --learning_rate 0.0005\ 27 | --itr 1\ 28 | --lifting_levels 3\ 29 | --lifting_kernel_size 7\ 30 | --n_cluster=4 31 | 32 | python -u run.py \ 33 | --task_name long_term_forecast \ 34 | --is_training 1 \ 35 | --root_path ./dataset/electricity/ \ 36 | --data_path electricity.csv \ 37 | --model_id ECL_192_192 \ 38 | --model $model_name \ 39 | --data custom \ 40 | --features M \ 41 | --seq_len 192 \ 42 | --label_len 48 \ 43 | --pred_len 192 \ 44 | --e_layers 2 \ 45 | --d_layers 1 \ 46 | --factor 3 \ 47 | --enc_in 321 \ 48 | --dec_in 321 \ 49 | --c_out 321 \ 50 | --d_model 256\ 51 | --d_ff 256\ 52 | --batch_size 16\ 53 | --learning_rate 0.0005\ 54 | --itr 1\ 55 | --lifting_levels 3\ 56 | --lifting_kernel_size 7\ 57 | --n_cluster=4 58 | 59 | python -u run.py \ 60 | --task_name long_term_forecast \ 61 | --is_training 1 \ 62 | --root_path ./dataset/electricity/ \ 63 | --data_path electricity.csv \ 64 | --model_id ECL_336_336 \ 65 | --model $model_name \ 66 | --data custom \ 67 | --features M \ 68 | --seq_len 336 \ 69 | --label_len 48 \ 70 | --pred_len 336 \ 71 | --e_layers 2 \ 72 | --d_layers 1 \ 73 | --factor 3 \ 74 | --enc_in 321 \ 75 | --dec_in 321 \ 76 | --c_out 321 \ 77 | --d_model 256\ 78 | --d_ff 256\ 79 | --batch_size 16\ 80 | --learning_rate 0.0005\ 81 | --itr 1\ 82 | --lifting_levels 3\ 83 | --lifting_kernel_size 7\ 84 | --n_cluster=4 85 | 86 | python -u run.py \ 87 | --task_name long_term_forecast \ 88 | --is_training 1 \ 89 | --root_path ./dataset/electricity/ \ 90 | --data_path electricity.csv \ 91 | --model_id ECL_720_720 \ 92 | --model $model_name \ 93 | --data custom \ 94 | --features M \ 95 | --seq_len 720 \ 96 | --label_len 48 \ 97 | --pred_len 720 \ 98 | --e_layers 2 \ 99 | --d_layers 1 \ 100 | --factor 3 \ 101 | --enc_in 321 \ 102 | --dec_in 321 \ 103 | --c_out 321 \ 104 | --d_model 256\ 105 | --d_ff 256\ 106 | --batch_size 16\ 107 | --learning_rate 0.0005\ 108 | --itr 1\ 109 | --lifting_levels 3\ 110 | --lifting_kernel_size 7\ 111 | --n_cluster=4 -------------------------------------------------------------------------------- /scripts/long_term_forecast/ETT_script/AdaWaveNet_ETTh1.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py \ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/ETT-small/ \ 9 | --data_path ETTh1.csv \ 10 | --model_id ETTh1_96_96 \ 11 | --model $model_name \ 12 | --data ETTh1 \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --label_len 48 \ 16 | --pred_len 96 \ 17 | --e_layers 3 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 7 \ 21 | --dec_in 7 \ 22 | --c_out 7 \ 23 | --des 'Exp' \ 24 | --d_model 512\ 25 | --d_ff 512\ 26 | --itr 1 \ 27 | --lifting_levels 4\ 28 | --lifting_kernel_size 7\ 29 | --n_cluster 4\ 30 | --learning_rate 0.0005\ 31 | --batch_size 16 32 | 33 | python -u run.py \ 34 | --task_name long_term_forecast \ 35 | --is_training 1 \ 36 | --root_path ./dataset/ETT-small/ \ 37 | --data_path ETTh1.csv \ 38 | --model_id ETTh1_192_192 \ 39 | --model $model_name \ 40 | --data ETTh1 \ 41 | --features M \ 42 | --seq_len 192 \ 43 | --label_len 48 \ 44 | --pred_len 192 \ 45 | --e_layers 3 \ 46 | --d_layers 1 \ 47 | --factor 3 \ 48 | --enc_in 7 \ 49 | --dec_in 7 \ 50 | --c_out 7 \ 51 | --des 'Exp' \ 52 | --d_model 512\ 53 | --d_ff 512\ 54 | --itr 1 \ 55 | --lifting_levels 4\ 56 | --lifting_kernel_size 7\ 57 | --n_cluster 4\ 58 | --learning_rate 0.0005\ 59 | --batch_size 16 60 | 61 | python -u run.py \ 62 | --task_name long_term_forecast \ 63 | --is_training 1 \ 64 | --root_path ./dataset/ETT-small/ \ 65 | --data_path ETTh1.csv \ 66 | --model_id ETTh1_336_336 \ 67 | --model $model_name \ 68 | --data ETTh1 \ 69 | --features M \ 70 | --seq_len 336 \ 71 | --label_len 48 \ 72 | --pred_len 336 \ 73 | --e_layers 3 \ 74 | --d_layers 1 \ 75 | --factor 3 \ 76 | --enc_in 7 \ 77 | --dec_in 7 \ 78 | --c_out 7 \ 79 | --des 'Exp' \ 80 | --d_model 512\ 81 | --d_ff 512\ 82 | --itr 1 \ 83 | --lifting_levels 4\ 84 | --lifting_kernel_size 7\ 85 | --n_cluster 4\ 86 | --learning_rate 0.0005\ 87 | --batch_size 16 88 | 89 | python -u run.py \ 90 | --task_name long_term_forecast \ 91 | --is_training 1 \ 92 | --root_path ./dataset/ETT-small/ \ 93 | --data_path ETTh1.csv \ 94 | --model_id ETTh1_720_720 \ 95 | --model $model_name \ 96 | --data ETTh1 \ 97 | --features M \ 98 | --seq_len 720 \ 99 | --label_len 48 \ 100 | --pred_len 720 \ 101 | --e_layers 3 \ 102 | --d_layers 1 \ 103 | --factor 3 \ 104 | --enc_in 7 \ 105 | --dec_in 7 \ 106 | --c_out 7 \ 107 | --des 'Exp' \ 108 | --d_model 512\ 109 | --d_ff 512\ 110 | --itr 1 \ 111 | --lifting_levels 4\ 112 | --lifting_kernel_size 7\ 113 | --n_cluster 4\ 114 | --learning_rate 0.0005\ 115 | --batch_size 16 -------------------------------------------------------------------------------- /scripts/long_term_forecast/ETT_script/AdaWaveNet_ETTm1.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py \ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/ETT-small/ \ 9 | --data_path ETTm1.csv \ 10 | --model_id ETTm1_96_96 \ 11 | --model $model_name \ 12 | --data ETTm1 \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --label_len 48 \ 16 | --pred_len 96 \ 17 | --e_layers 3 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 7 \ 21 | --dec_in 7 \ 22 | --c_out 7 \ 23 | --des 'Exp' \ 24 | --d_model 512\ 25 | --d_ff 512\ 26 | --itr 1 \ 27 | --lifting_levels 4\ 28 | --lifting_kernel_size 7\ 29 | --n_cluster 2\ 30 | --learning_rate 0.0005\ 31 | --batch_size 16 32 | 33 | python -u run.py \ 34 | --task_name long_term_forecast \ 35 | --is_training 1 \ 36 | --root_path ./dataset/ETT-small/ \ 37 | --data_path ETTm1.csv \ 38 | --model_id ETTm1_192_192 \ 39 | --model $model_name \ 40 | --data ETTm1 \ 41 | --features M \ 42 | --seq_len 192 \ 43 | --label_len 48 \ 44 | --pred_len 192 \ 45 | --e_layers 3 \ 46 | --d_layers 1 \ 47 | --factor 3 \ 48 | --enc_in 7 \ 49 | --dec_in 7 \ 50 | --c_out 7 \ 51 | --des 'Exp' \ 52 | --d_model 512\ 53 | --d_ff 512\ 54 | --itr 1 \ 55 | --lifting_levels 4\ 56 | --lifting_kernel_size 7\ 57 | --n_cluster 2\ 58 | --learning_rate 0.0005\ 59 | --batch_size 16 60 | 61 | python -u run.py \ 62 | --task_name long_term_forecast \ 63 | --is_training 1 \ 64 | --root_path ./dataset/ETT-small/ \ 65 | --data_path ETTm1.csv \ 66 | --model_id ETTm1_336_336 \ 67 | --model $model_name \ 68 | --data ETTm1 \ 69 | --features M \ 70 | --seq_len 336 \ 71 | --label_len 48 \ 72 | --pred_len 336 \ 73 | --e_layers 3 \ 74 | --d_layers 1 \ 75 | --factor 3 \ 76 | --enc_in 7 \ 77 | --dec_in 7 \ 78 | --c_out 7 \ 79 | --des 'Exp' \ 80 | --d_model 512\ 81 | --d_ff 512\ 82 | --itr 1 \ 83 | --lifting_levels 2\ 84 | --lifting_kernel_size 7\ 85 | --n_cluster 2\ 86 | --learning_rate 0.0005\ 87 | --batch_size 16 88 | 89 | python -u run.py \ 90 | --task_name long_term_forecast \ 91 | --is_training 1 \ 92 | --root_path ./dataset/ETT-small/ \ 93 | --data_path ETTm1.csv \ 94 | --model_id ETTm1_96_96 \ 95 | --model $model_name \ 96 | --data ETTm1 \ 97 | --features M \ 98 | --seq_len 96 \ 99 | --label_len 48 \ 100 | --pred_len 96 \ 101 | --e_layers 3 \ 102 | --d_layers 1 \ 103 | --factor 3 \ 104 | --enc_in 7 \ 105 | --dec_in 7 \ 106 | --c_out 7 \ 107 | --des 'Exp' \ 108 | --d_model 512\ 109 | --d_ff 512\ 110 | --itr 1 \ 111 | --lifting_levels 3\ 112 | --lifting_kernel_size 7\ 113 | --n_cluster 4\ 114 | --learning_rate 0.0005\ 115 | --batch_size 16 116 | 117 | python -u run.py \ 118 | --task_name long_term_forecast \ 119 | --is_training 1 \ 120 | --root_path ./dataset/ETT-small/ \ 121 | --data_path ETTm1.csv \ 122 | --model_id ETTm1_96_96 \ 123 | --model $model_name \ 124 | --data ETTm1 \ 125 | --features M \ 126 | --seq_len 96 \ 127 | --label_len 48 \ 128 | --pred_len 96 \ 129 | --e_layers 3 \ 130 | --d_layers 1 \ 131 | --factor 3 \ 132 | --enc_in 7 \ 133 | --dec_in 7 \ 134 | --c_out 7 \ 135 | --des 'Exp' \ 136 | --d_model 512\ 137 | --d_ff 512\ 138 | --itr 1 \ 139 | --lifting_levels 3\ 140 | --lifting_kernel_size 7\ 141 | --n_cluster 5\ 142 | --learning_rate 0.0005\ 143 | --batch_size 16 144 | 145 | python -u run.py \ 146 | --task_name long_term_forecast \ 147 | --is_training 1 \ 148 | --root_path ./dataset/ETT-small/ \ 149 | --data_path ETTm1.csv \ 150 | --model_id ETTm1_96_96 \ 151 | --model $model_name \ 152 | --data ETTm1 \ 153 | --features M \ 154 | --seq_len 96 \ 155 | --label_len 48 \ 156 | --pred_len 96 \ 157 | --e_layers 3 \ 158 | --d_layers 1 \ 159 | --factor 3 \ 160 | --enc_in 7 \ 161 | --dec_in 7 \ 162 | --c_out 7 \ 163 | --des 'Exp' \ 164 | --d_model 512\ 165 | --d_ff 512\ 166 | --itr 1 \ 167 | --lifting_levels 3\ 168 | --lifting_kernel_size 7\ 169 | --n_cluster 6\ 170 | --learning_rate 0.0005\ 171 | --batch_size 16 172 | 173 | python -u run.py \ 174 | --task_name long_term_forecast \ 175 | --is_training 1 \ 176 | --root_path ./dataset/ETT-small/ \ 177 | --data_path ETTm1.csv \ 178 | --model_id ETTm1_96_96 \ 179 | --model $model_name \ 180 | --data ETTm1 \ 181 | --features M \ 182 | --seq_len 96 \ 183 | --label_len 48 \ 184 | --pred_len 96 \ 185 | --e_layers 3 \ 186 | --d_layers 1 \ 187 | --factor 3 \ 188 | --enc_in 7 \ 189 | --dec_in 7 \ 190 | --c_out 7 \ 191 | --des 'Exp' \ 192 | --d_model 512\ 193 | --d_ff 512\ 194 | --itr 1 \ 195 | --lifting_levels 3\ 196 | --lifting_kernel_size 7\ 197 | --n_cluster 7\ 198 | --learning_rate 0.0005\ 199 | --batch_size 16 -------------------------------------------------------------------------------- /scripts/long_term_forecast/Exchange_script/AdaWaveNet.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py\ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/exchange_rate/ \ 9 | --data_path exchange_rate.csv \ 10 | --model_id exchange_96_96 \ 11 | --model $model_name \ 12 | --data custom \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --label_len 48 \ 16 | --pred_len 96 \ 17 | --e_layers 3 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 8 \ 21 | --dec_in 8 \ 22 | --c_out 8 \ 23 | --des 'Exp' \ 24 | --d_model 512 \ 25 | --d_ff 512 \ 26 | --itr 1 \ 27 | --lifting_levels 4 \ 28 | --lifting_kernel_size 7 \ 29 | --n_cluster 1 \ 30 | --learning_rate 0.0005 \ 31 | --batch_size 32 \ 32 | --adjust_lr True 33 | 34 | 35 | python -u run.py\ 36 | --task_name long_term_forecast\ 37 | --is_training 1 \ 38 | --root_path ./dataset/exchange_rate/ \ 39 | --data_path exchange_rate.csv \ 40 | --model_id exchange_192_192 \ 41 | --model $model_name \ 42 | --data custom \ 43 | --features M \ 44 | --seq_len 192 \ 45 | --label_len 48 \ 46 | --pred_len 192 \ 47 | --e_layers 3 \ 48 | --d_layers 1 \ 49 | --factor 3 \ 50 | --enc_in 8 \ 51 | --dec_in 8 \ 52 | --c_out 8 \ 53 | --des 'Exp' \ 54 | --d_model 512 \ 55 | --d_ff 512 \ 56 | --itr 1 \ 57 | --lifting_levels 5 \ 58 | --lifting_kernel_size 7 \ 59 | --n_cluster 1 \ 60 | --learning_rate 0.0005 \ 61 | --batch_size 16 \ 62 | --adjust_lr True 63 | 64 | 65 | python -u run.py \ 66 | --task_name long_term_forecast \ 67 | --is_training 1 \ 68 | --root_path ./dataset/exchange_rate/ \ 69 | --data_path exchange_rate.csv \ 70 | --model_id exchange_336_336 \ 71 | --model $model_name \ 72 | --data custom \ 73 | --features M \ 74 | --seq_len 336 \ 75 | --label_len 48 \ 76 | --pred_len 336 \ 77 | --e_layers 3 \ 78 | --d_layers 1 \ 79 | --factor 3 \ 80 | --enc_in 8 \ 81 | --dec_in 8 \ 82 | --c_out 8 \ 83 | --des 'Exp' \ 84 | --d_model 512 \ 85 | --d_ff 512 \ 86 | --itr 1 \ 87 | --lifting_levels 4 \ 88 | --lifting_kernel_size 7 \ 89 | --n_cluster 1 \ 90 | --learning_rate 0.0005 \ 91 | --batch_size 16 \ 92 | --adjust_lr True 93 | 94 | 95 | python -u run.py \ 96 | --task_name long_term_forecast \ 97 | --is_training 1 \ 98 | --root_path ./dataset/exchange_rate/ \ 99 | --data_path exchange_rate.csv \ 100 | --model_id exchange_720_720 \ 101 | --model $model_name \ 102 | --data custom \ 103 | --features M \ 104 | --seq_len 720 \ 105 | --label_len 48 \ 106 | --pred_len 720 \ 107 | --e_layers 3 \ 108 | --d_layers 1 \ 109 | --factor 3 \ 110 | --enc_in 8 \ 111 | --dec_in 8 \ 112 | --c_out 8 \ 113 | --des 'Exp' \ 114 | --d_model 512 \ 115 | --d_ff 512 \ 116 | --itr 1 \ 117 | --lifting_levels 1 \ 118 | --lifting_kernel_size 7 \ 119 | --n_cluster 1 \ 120 | --learning_rate 0.0005 \ 121 | --batch_size 32 \ 122 | --adjust_lr True -------------------------------------------------------------------------------- /scripts/long_term_forecast/ILI_script/AdaWaveNet.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py\ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/illness/ \ 9 | --data_path national_illness.csv \ 10 | --model_id ili_36_24 \ 11 | --model $model_name \ 12 | --data custom \ 13 | --features M \ 14 | --seq_len 36 \ 15 | --label_len 18 \ 16 | --pred_len 36 \ 17 | --e_layers 3 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 7 \ 21 | --dec_in 7 \ 22 | --c_out 7 \ 23 | --des 'Exp' \ 24 | --d_model 512 \ 25 | --d_ff 512 \ 26 | --itr 1 \ 27 | --lifting_levels 4 \ 28 | --lifting_kernel_size 7 \ 29 | --n_cluster 1 \ 30 | --learning_rate 0.0005 \ 31 | --batch_size 32 \ 32 | --adjust_lr True 33 | 34 | 35 | python -u run.py\ 36 | --task_name long_term_forecast\ 37 | --is_training 1 \ 38 | --root_path ./dataset/exchange_rate/ \ 39 | --data_path exchange_rate.csv \ 40 | --model_id exchange_192_192 \ 41 | --model $model_name \ 42 | --data custom \ 43 | --features M \ 44 | --seq_len 192 \ 45 | --label_len 48 \ 46 | --pred_len 192 \ 47 | --e_layers 3 \ 48 | --d_layers 1 \ 49 | --factor 3 \ 50 | --enc_in 8 \ 51 | --dec_in 8 \ 52 | --c_out 8 \ 53 | --des 'Exp' \ 54 | --d_model 512 \ 55 | --d_ff 512 \ 56 | --itr 1 \ 57 | --lifting_levels 5 \ 58 | --lifting_kernel_size 7 \ 59 | --n_cluster 1 \ 60 | --learning_rate 0.0005 \ 61 | --batch_size 16 \ 62 | --adjust_lr True 63 | 64 | 65 | python -u run.py \ 66 | --task_name long_term_forecast \ 67 | --is_training 1 \ 68 | --root_path ./dataset/exchange_rate/ \ 69 | --data_path exchange_rate.csv \ 70 | --model_id exchange_336_336 \ 71 | --model $model_name \ 72 | --data custom \ 73 | --features M \ 74 | --seq_len 336 \ 75 | --label_len 48 \ 76 | --pred_len 336 \ 77 | --e_layers 3 \ 78 | --d_layers 1 \ 79 | --factor 3 \ 80 | --enc_in 8 \ 81 | --dec_in 8 \ 82 | --c_out 8 \ 83 | --des 'Exp' \ 84 | --d_model 512 \ 85 | --d_ff 512 \ 86 | --itr 1 \ 87 | --lifting_levels 4 \ 88 | --lifting_kernel_size 7 \ 89 | --n_cluster 1 \ 90 | --learning_rate 0.0005 \ 91 | --batch_size 16 \ 92 | --adjust_lr True 93 | 94 | 95 | python -u run.py \ 96 | --task_name long_term_forecast \ 97 | --is_training 1 \ 98 | --root_path ./dataset/exchange_rate/ \ 99 | --data_path exchange_rate.csv \ 100 | --model_id exchange_720_720 \ 101 | --model $model_name \ 102 | --data custom \ 103 | --features M \ 104 | --seq_len 720 \ 105 | --label_len 48 \ 106 | --pred_len 720 \ 107 | --e_layers 3 \ 108 | --d_layers 1 \ 109 | --factor 3 \ 110 | --enc_in 8 \ 111 | --dec_in 8 \ 112 | --c_out 8 \ 113 | --des 'Exp' \ 114 | --d_model 512 \ 115 | --d_ff 512 \ 116 | --itr 1 \ 117 | --lifting_levels 1 \ 118 | --lifting_kernel_size 7 \ 119 | --n_cluster 1 \ 120 | --learning_rate 0.0005 \ 121 | --batch_size 32 \ 122 | --adjust_lr True -------------------------------------------------------------------------------- /scripts/long_term_forecast/Solar/AdaWaveNet.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py \ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/Solar/ \ 9 | --data_path solar_AL.txt \ 10 | --model_id solar_96_96 \ 11 | --model $model_name \ 12 | --data Solar \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --pred_len 96 \ 16 | --e_layers 2 \ 17 | --enc_in 137 \ 18 | --dec_in 137 \ 19 | --c_out 137 \ 20 | --des 'Exp' \ 21 | --d_model 512 \ 22 | --d_ff 512 \ 23 | --learning_rate 0.0005 \ 24 | --itr 1\ 25 | --lifting_levels 1 \ 26 | --lifting_kernel_size 7 \ 27 | --n_cluster 1 \ 28 | --learning_rate 0.0005 \ 29 | --batch_size 16 \ 30 | --adjust_lr True 31 | 32 | python -u run.py \ 33 | --task_name long_term_forecast \ 34 | --is_training 1 \ 35 | --root_path ./dataset/Solar/ \ 36 | --data_path solar_AL.txt \ 37 | --model_id solar_192_192 \ 38 | --model $model_name \ 39 | --data Solar \ 40 | --features M \ 41 | --seq_len 192 \ 42 | --pred_len 192 \ 43 | --e_layers 2 \ 44 | --enc_in 137 \ 45 | --dec_in 137 \ 46 | --c_out 137 \ 47 | --des 'Exp' \ 48 | --d_model 512 \ 49 | --d_ff 512 \ 50 | --learning_rate 0.0005 \ 51 | --itr 1\ 52 | --lifting_levels 1 \ 53 | --lifting_kernel_size 7 \ 54 | --n_cluster 1 \ 55 | --learning_rate 0.0005 \ 56 | --batch_size 16 \ 57 | --adjust_lr True 58 | 59 | python -u run.py \ 60 | --task_name long_term_forecast \ 61 | --is_training 1 \ 62 | --root_path ./dataset/Solar/ \ 63 | --data_path solar_AL.txt \ 64 | --model_id solar_336_336 \ 65 | --model $model_name \ 66 | --data Solar \ 67 | --features M \ 68 | --seq_len 336 \ 69 | --pred_len 336 \ 70 | --e_layers 2 \ 71 | --enc_in 137 \ 72 | --dec_in 137 \ 73 | --c_out 137 \ 74 | --des 'Exp' \ 75 | --d_model 512 \ 76 | --d_ff 512 \ 77 | --learning_rate 0.0005 \ 78 | --itr 1\ 79 | --lifting_levels 1 \ 80 | --lifting_kernel_size 7 \ 81 | --n_cluster 1 \ 82 | --learning_rate 0.0005 \ 83 | --batch_size 16 \ 84 | --adjust_lr True 85 | 86 | python -u run.py \ 87 | --task_name long_term_forecast \ 88 | --is_training 1 \ 89 | --root_path ./dataset/Solar/ \ 90 | --data_path solar_AL.txt \ 91 | --model_id solar_720_720 \ 92 | --model $model_name \ 93 | --data Solar \ 94 | --features M \ 95 | --seq_len 720 \ 96 | --pred_len 720 \ 97 | --e_layers 2 \ 98 | --enc_in 137 \ 99 | --dec_in 137 \ 100 | --c_out 137 \ 101 | --des 'Exp' \ 102 | --d_model 512 \ 103 | --d_ff 512 \ 104 | --learning_rate 0.0005 \ 105 | --itr 1\ 106 | --lifting_levels 1 \ 107 | --lifting_kernel_size 7 \ 108 | --n_cluster 1 \ 109 | --learning_rate 0.0005 \ 110 | --batch_size 16 \ 111 | --adjust_lr True -------------------------------------------------------------------------------- /scripts/long_term_forecast/Traffic_script/AdaWaveNet.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py \ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/traffic/ \ 9 | --data_path traffic.csv \ 10 | --model_id traffic_96_96 \ 11 | --model $model_name \ 12 | --data custom \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --label_len 48 \ 16 | --pred_len 96 \ 17 | --e_layers 4 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 862 \ 21 | --dec_in 862 \ 22 | --c_out 862 \ 23 | --des 'Exp' \ 24 | --d_model 512 \ 25 | --d_ff 512 \ 26 | --batch_size 32 \ 27 | --learning_rate 0.001 \ 28 | --itr 1 \ 29 | --lifting_levels 1\ 30 | --lifting_kernel_size 7\ 31 | --n_cluster 9 \ 32 | --train_epochs 20 33 | 34 | 35 | python -u run.py \ 36 | --task_name long_term_forecast \ 37 | --is_training 1 \ 38 | --root_path ./dataset/traffic/ \ 39 | --data_path traffic.csv \ 40 | --model_id traffic_192_192 \ 41 | --model $model_name \ 42 | --data custom \ 43 | --features M \ 44 | --seq_len 192 \ 45 | --label_len 48 \ 46 | --pred_len 192 \ 47 | --e_layers 4 \ 48 | --d_layers 1 \ 49 | --factor 3 \ 50 | --enc_in 862 \ 51 | --dec_in 862 \ 52 | --c_out 862 \ 53 | --des 'Exp' \ 54 | --d_model 512 \ 55 | --d_ff 512 \ 56 | --batch_size 32 \ 57 | --learning_rate 0.001 \ 58 | --itr 1 \ 59 | --lifting_levels 1\ 60 | --lifting_kernel_size 7\ 61 | --n_cluster 9 \ 62 | --train_epochs 20 63 | 64 | 65 | python -u run.py \ 66 | --task_name long_term_forecast \ 67 | --is_training 1 \ 68 | --root_path ./dataset/traffic/ \ 69 | --data_path traffic.csv \ 70 | --model_id traffic_336_336 \ 71 | --model $model_name \ 72 | --data custom \ 73 | --features M \ 74 | --seq_len 336 \ 75 | --label_len 48 \ 76 | --pred_len 336 \ 77 | --e_layers 4 \ 78 | --d_layers 1 \ 79 | --factor 3 \ 80 | --enc_in 862 \ 81 | --dec_in 862 \ 82 | --c_out 862 \ 83 | --des 'Exp' \ 84 | --d_model 512 \ 85 | --d_ff 512 \ 86 | --batch_size 32 \ 87 | --learning_rate 0.001 \ 88 | --itr 1 \ 89 | --lifting_levels 1\ 90 | --lifting_kernel_size 7\ 91 | --n_cluster 9 \ 92 | --train_epochs 20 93 | 94 | 95 | python -u run.py \ 96 | --task_name long_term_forecast \ 97 | --is_training 1 \ 98 | --root_path ./dataset/traffic/ \ 99 | --data_path traffic.csv \ 100 | --model_id traffic_720_720 \ 101 | --model $model_name \ 102 | --data custom \ 103 | --features M \ 104 | --seq_len 720 \ 105 | --label_len 48 \ 106 | --pred_len 720 \ 107 | --e_layers 4 \ 108 | --d_layers 1 \ 109 | --factor 3 \ 110 | --enc_in 862 \ 111 | --dec_in 862 \ 112 | --c_out 862 \ 113 | --des 'Exp' \ 114 | --d_model 512 \ 115 | --d_ff 512 \ 116 | --batch_size 32 \ 117 | --learning_rate 0.001 \ 118 | --itr 1 \ 119 | --lifting_levels 1\ 120 | --lifting_kernel_size 7\ 121 | --n_cluster 9 \ 122 | --train_epochs 20 -------------------------------------------------------------------------------- /scripts/long_term_forecast/Weather_script/AdaWaveNet.sh: -------------------------------------------------------------------------------- 1 | export CUDA_VISIBLE_DEVICES=0 2 | 3 | model_name=LSWaveNet 4 | 5 | python -u run.py \ 6 | --task_name long_term_forecast \ 7 | --is_training 1 \ 8 | --root_path ./dataset/weather/ \ 9 | --data_path weather.csv \ 10 | --model_id weather_96_96 \ 11 | --model $model_name \ 12 | --data custom \ 13 | --features M \ 14 | --seq_len 96 \ 15 | --label_len 48 \ 16 | --pred_len 96 \ 17 | --e_layers 3 \ 18 | --d_layers 1 \ 19 | --factor 3 \ 20 | --enc_in 21 \ 21 | --dec_in 21 \ 22 | --c_out 21 \ 23 | --des 'Exp' \ 24 | --d_model 512\ 25 | --d_ff 512\ 26 | --itr 1 \ 27 | --lifting_levels 3\ 28 | --lifting_kernel_size 7\ 29 | --n_cluster 4\ 30 | --learning_rate 0.0005\ 31 | --batch_size 16 32 | 33 | 34 | python -u run.py \ 35 | --task_name long_term_forecast \ 36 | --is_training 1 \ 37 | --root_path ./dataset/weather/ \ 38 | --data_path weather.csv \ 39 | --model_id weather_192_192 \ 40 | --model $model_name \ 41 | --data custom \ 42 | --features M \ 43 | --seq_len 192 \ 44 | --label_len 48 \ 45 | --pred_len 192 \ 46 | --e_layers 3 \ 47 | --d_layers 1 \ 48 | --factor 3 \ 49 | --enc_in 21 \ 50 | --dec_in 21 \ 51 | --c_out 21 \ 52 | --des 'Exp' \ 53 | --d_model 512\ 54 | --d_ff 512\ 55 | --itr 1 \ 56 | --lifting_levels 3\ 57 | --lifting_kernel_size 7\ 58 | --n_cluster 4\ 59 | --learning_rate 0.0005\ 60 | --batch_size 16 61 | 62 | 63 | python -u run.py \ 64 | --task_name long_term_forecast \ 65 | --is_training 1 \ 66 | --root_path ./dataset/weather/ \ 67 | --data_path weather.csv \ 68 | --model_id weather_336_336 \ 69 | --model $model_name \ 70 | --data custom \ 71 | --features M \ 72 | --seq_len 336 \ 73 | --label_len 48 \ 74 | --pred_len 336 \ 75 | --e_layers 3 \ 76 | --d_layers 1 \ 77 | --factor 3 \ 78 | --enc_in 21 \ 79 | --dec_in 21 \ 80 | --c_out 21 \ 81 | --des 'Exp' \ 82 | --d_model 512\ 83 | --d_ff 512\ 84 | --itr 1 \ 85 | --lifting_levels 3\ 86 | --lifting_kernel_size 7\ 87 | --n_cluster 4\ 88 | --learning_rate 0.0005\ 89 | --batch_size 16 90 | 91 | 92 | python -u run.py \ 93 | --task_name long_term_forecast \ 94 | --is_training 1 \ 95 | --root_path ./dataset/weather/ \ 96 | --data_path weather.csv \ 97 | --model_id weather_720_720 \ 98 | --model $model_name \ 99 | --data custom \ 100 | --features M \ 101 | --seq_len 720 \ 102 | --label_len 48 \ 103 | --pred_len 720 \ 104 | --e_layers 3 \ 105 | --d_layers 1 \ 106 | --factor 3 \ 107 | --enc_in 21 \ 108 | --dec_in 21 \ 109 | --c_out 21 \ 110 | --des 'Exp' \ 111 | --d_model 512\ 112 | --d_ff 512\ 113 | --itr 1 \ 114 | --lifting_levels 3\ 115 | --lifting_kernel_size 7\ 116 | --n_cluster 4\ 117 | --learning_rate 0.0005\ 118 | --batch_size 16 -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/comp-well-org/AdaWaveNet/a03f080c96af3420c1c5006c8d4c6f3e78449ee6/utils/__init__.py -------------------------------------------------------------------------------- /utils/losses.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | Loss functions for PyTorch. 17 | """ 18 | 19 | import torch as t 20 | import torch.nn as nn 21 | import numpy as np 22 | import pdb 23 | 24 | 25 | def divide_no_nan(a, b): 26 | """ 27 | a/b where the resulted NaN or Inf are replaced by 0. 28 | """ 29 | result = a / b 30 | result[result != result] = .0 31 | result[result == np.inf] = .0 32 | return result 33 | 34 | 35 | class mape_loss(nn.Module): 36 | def __init__(self): 37 | super(mape_loss, self).__init__() 38 | 39 | def forward(self, insample: t.Tensor, freq: int, 40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 41 | """ 42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error 43 | 44 | :param forecast: Forecast values. Shape: batch, time 45 | :param target: Target values. Shape: batch, time 46 | :param mask: 0/1 mask. Shape: batch, time 47 | :return: Loss value 48 | """ 49 | weights = divide_no_nan(mask, target) 50 | return t.mean(t.abs((forecast - target) * weights)) 51 | 52 | 53 | class smape_loss(nn.Module): 54 | def __init__(self): 55 | super(smape_loss, self).__init__() 56 | 57 | def forward(self, insample: t.Tensor, freq: int, 58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 59 | """ 60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) 61 | 62 | :param forecast: Forecast values. Shape: batch, time 63 | :param target: Target values. Shape: batch, time 64 | :param mask: 0/1 mask. Shape: batch, time 65 | :return: Loss value 66 | """ 67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target), 68 | t.abs(forecast.data) + t.abs(target.data)) * mask) 69 | 70 | 71 | class mase_loss(nn.Module): 72 | def __init__(self): 73 | super(mase_loss, self).__init__() 74 | 75 | def forward(self, insample: t.Tensor, freq: int, 76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 77 | """ 78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf 79 | 80 | :param insample: Insample values. Shape: batch, time_i 81 | :param freq: Frequency value 82 | :param forecast: Forecast values. Shape: batch, time_o 83 | :param target: Target values. Shape: batch, time_o 84 | :param mask: 0/1 mask. Shape: batch, time_o 85 | :return: Loss value 86 | """ 87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1) 88 | masked_masep_inv = divide_no_nan(mask, masep[:, None]) 89 | return t.mean(t.abs(target - forecast) * masked_masep_inv) 90 | -------------------------------------------------------------------------------- /utils/m4_summary.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Summary 17 | """ 18 | from collections import OrderedDict 19 | 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from data_provider.m4 import M4Dataset 24 | from data_provider.m4 import M4Meta 25 | import os 26 | 27 | 28 | def group_values(values, groups, group_name): 29 | return np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) 30 | 31 | 32 | def mase(forecast, insample, outsample, frequency): 33 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) 34 | 35 | 36 | def smape_2(forecast, target): 37 | denom = np.abs(target) + np.abs(forecast) 38 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 39 | denom[denom == 0.0] = 1.0 40 | return 200 * np.abs(forecast - target) / denom 41 | 42 | 43 | def mape(forecast, target): 44 | denom = np.abs(target) 45 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 46 | denom[denom == 0.0] = 1.0 47 | return 100 * np.abs(forecast - target) / denom 48 | 49 | 50 | class M4Summary: 51 | def __init__(self, file_path, root_path): 52 | self.file_path = file_path 53 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path) 54 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path) 55 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') 56 | 57 | def evaluate(self): 58 | """ 59 | Evaluate forecasts using M4 test dataset. 60 | 61 | :param forecast: Forecasts. Shape: timeseries, time. 62 | :return: sMAPE and OWA grouped by seasonal patterns. 63 | """ 64 | grouped_owa = OrderedDict() 65 | 66 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) 67 | naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts]) 68 | 69 | model_mases = {} 70 | naive2_smapes = {} 71 | naive2_mases = {} 72 | grouped_smapes = {} 73 | grouped_mapes = {} 74 | for group_name in M4Meta.seasonal_patterns: 75 | file_name = self.file_path + group_name + "_forecast.csv" 76 | if os.path.exists(file_name): 77 | model_forecast = pd.read_csv(file_name).values 78 | 79 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) 80 | target = group_values(self.test_set.values, self.test_set.groups, group_name) 81 | # all timeseries within group have same frequency 82 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] 83 | insample = group_values(self.training_set.values, self.test_set.groups, group_name) 84 | 85 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], 86 | insample=insample[i], 87 | outsample=target[i], 88 | frequency=frequency) for i in range(len(model_forecast))]) 89 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], 90 | insample=insample[i], 91 | outsample=target[i], 92 | frequency=frequency) for i in range(len(model_forecast))]) 93 | 94 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) 95 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) 96 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) 97 | 98 | grouped_smapes = self.summarize_groups(grouped_smapes) 99 | grouped_mapes = self.summarize_groups(grouped_mapes) 100 | grouped_model_mases = self.summarize_groups(model_mases) 101 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes) 102 | grouped_naive2_mases = self.summarize_groups(naive2_mases) 103 | for k in grouped_model_mases.keys(): 104 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + 105 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 106 | 107 | def round_all(d): 108 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) 109 | 110 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( 111 | grouped_model_mases) 112 | 113 | def summarize_groups(self, scores): 114 | """ 115 | Re-group scores respecting M4 rules. 116 | :param scores: Scores per group. 117 | :return: Grouped scores. 118 | """ 119 | scores_summary = OrderedDict() 120 | 121 | def group_count(group_name): 122 | return len(np.where(self.test_set.groups == group_name)[0]) 123 | 124 | weighted_score = {} 125 | for g in ['Yearly', 'Quarterly', 'Monthly']: 126 | weighted_score[g] = scores[g] * group_count(g) 127 | scores_summary[g] = scores[g] 128 | 129 | others_score = 0 130 | others_count = 0 131 | for g in ['Weekly', 'Daily', 'Hourly']: 132 | others_score += scores[g] * group_count(g) 133 | others_count += group_count(g) 134 | weighted_score['Others'] = others_score 135 | scores_summary['Others'] = others_score / others_count 136 | 137 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) 138 | scores_summary['Average'] = average 139 | 140 | return scores_summary 141 | -------------------------------------------------------------------------------- /utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TriangularCausalMask(): 5 | def __init__(self, B, L, device="cpu"): 6 | mask_shape = [B, 1, L, L] 7 | with torch.no_grad(): 8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 9 | 10 | @property 11 | def mask(self): 12 | return self._mask 13 | 14 | 15 | class ProbMask(): 16 | def __init__(self, B, H, L, index, scores, device="cpu"): 17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 19 | indicator = _mask_ex[torch.arange(B)[:, None, None], 20 | torch.arange(H)[None, :, None], 21 | index, :].to(device) 22 | self._mask = indicator.view(scores.shape).to(device) 23 | 24 | @property 25 | def mask(self): 26 | return self._mask 27 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | return (u / d).mean(-1) 12 | 13 | 14 | def MAE(pred, true): 15 | return np.mean(np.abs(pred - true)) 16 | 17 | 18 | def MSE(pred, true): 19 | return np.mean((pred - true) ** 2) 20 | 21 | 22 | def RMSE(pred, true): 23 | return np.sqrt(MSE(pred, true)) 24 | 25 | 26 | def MAPE(pred, true): 27 | return np.mean(np.abs((pred - true) / true)) 28 | 29 | 30 | def MSPE(pred, true): 31 | return np.mean(np.square((pred - true) / true)) 32 | 33 | 34 | def metric(pred, true): 35 | mae = MAE(pred, true) 36 | mse = MSE(pred, true) 37 | rmse = RMSE(pred, true) 38 | mape = MAPE(pred, true) 39 | mspe = MSPE(pred, true) 40 | 41 | return mae, mse, rmse, mape, mspe 42 | -------------------------------------------------------------------------------- /utils/print_args.py: -------------------------------------------------------------------------------- 1 | def print_args(args): 2 | print("\033[1m" + "Basic Config" + "\033[0m") 3 | print(f' {"Task Name:":<20}{args.task_name:<20}{"Is Training:":<20}{args.is_training:<20}') 4 | print(f' {"Model ID:":<20}{args.model_id:<20}{"Model:":<20}{args.model:<20}') 5 | print() 6 | 7 | print("\033[1m" + "Data Loader" + "\033[0m") 8 | print(f' {"Data:":<20}{args.data:<20}{"Root Path:":<20}{args.root_path:<20}') 9 | print(f' {"Data Path:":<20}{args.data_path:<20}{"Features:":<20}{args.features:<20}') 10 | print(f' {"Target:":<20}{args.target:<20}{"Freq:":<20}{args.freq:<20}') 11 | print(f' {"Checkpoints:":<20}{args.checkpoints:<20}') 12 | print() 13 | 14 | if args.task_name in ['long_term_forecast', 'short_term_forecast']: 15 | print("\033[1m" + "Forecasting Task" + "\033[0m") 16 | print(f' {"Seq Len:":<20}{args.seq_len:<20}{"Label Len:":<20}{args.label_len:<20}') 17 | print(f' {"Pred Len:":<20}{args.pred_len:<20}{"Seasonal Patterns:":<20}{args.seasonal_patterns:<20}') 18 | print(f' {"Inverse:":<20}{args.inverse:<20}') 19 | print() 20 | 21 | if args.task_name == 'imputation': 22 | print("\033[1m" + "Imputation Task" + "\033[0m") 23 | print(f' {"Mask Rate:":<20}{args.mask_rate:<20}') 24 | print() 25 | 26 | if args.task_name == 'anomaly_detection': 27 | print("\033[1m" + "Anomaly Detection Task" + "\033[0m") 28 | print(f' {"Anomaly Ratio:":<20}{args.anomaly_ratio:<20}') 29 | print() 30 | 31 | print("\033[1m" + "Model Parameters" + "\033[0m") 32 | print(f' {"Top k:":<20}{args.top_k:<20}{"Num Kernels:":<20}{args.num_kernels:<20}') 33 | print(f' {"Enc In:":<20}{args.enc_in:<20}{"Dec In:":<20}{args.dec_in:<20}') 34 | print(f' {"C Out:":<20}{args.c_out:<20}{"d model:":<20}{args.d_model:<20}') 35 | print(f' {"n heads:":<20}{args.n_heads:<20}{"e layers:":<20}{args.e_layers:<20}') 36 | print(f' {"d layers:":<20}{args.d_layers:<20}{"d FF:":<20}{args.d_ff:<20}') 37 | print(f' {"Moving Avg:":<20}{args.moving_avg:<20}{"Factor:":<20}{args.factor:<20}') 38 | print(f' {"Distil:":<20}{args.distil:<20}{"Dropout:":<20}{args.dropout:<20}') 39 | print(f' {"Embed:":<20}{args.embed:<20}{"Activation:":<20}{args.activation:<20}') 40 | print(f' {"Output Attention:":<20}{args.output_attention:<20}') 41 | print() 42 | 43 | print("\033[1m" + "Run Parameters" + "\033[0m") 44 | print(f' {"Num Workers:":<20}{args.num_workers:<20}{"Itr:":<20}{args.itr:<20}') 45 | print(f' {"Train Epochs:":<20}{args.train_epochs:<20}{"Batch Size:":<20}{args.batch_size:<20}') 46 | print(f' {"Patience:":<20}{args.patience:<20}{"Learning Rate:":<20}{args.learning_rate:<20}') 47 | print(f' {"Des:":<20}{args.des:<20}{"Loss:":<20}{args.loss:<20}') 48 | print(f' {"Lradj:":<20}{args.lradj:<20}{"Use Amp:":<20}{args.use_amp:<20}') 49 | print() 50 | 51 | print("\033[1m" + "GPU" + "\033[0m") 52 | print(f' {"Use GPU:":<20}{args.use_gpu:<20}{"GPU:":<20}{args.gpu:<20}') 53 | print(f' {"Use Multi GPU:":<20}{args.use_multi_gpu:<20}{"Devices:":<20}{args.devices:<20}') 54 | print() 55 | 56 | print("\033[1m" + "De-stationary Projector Params" + "\033[0m") 57 | p_hidden_dims_str = ', '.join(map(str, args.p_hidden_dims)) 58 | print(f' {"P Hidden Dims:":<20}{p_hidden_dims_str:<20}{"P Hidden Layers:":<20}{args.p_hidden_layers:<20}') 59 | print() 60 | -------------------------------------------------------------------------------- /utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | # From: gluonts/src/gluonts/time_feature/_base.py 2 | # Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"). 5 | # You may not use this file except in compliance with the License. 6 | # A copy of the License is located at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # or in the "license" file accompanying this file. This file is distributed 11 | # on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 12 | # express or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | 15 | from typing import List 16 | 17 | import numpy as np 18 | import pandas as pd 19 | from pandas.tseries import offsets 20 | from pandas.tseries.frequencies import to_offset 21 | 22 | 23 | class TimeFeature: 24 | def __init__(self): 25 | pass 26 | 27 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 28 | pass 29 | 30 | def __repr__(self): 31 | return self.__class__.__name__ + "()" 32 | 33 | 34 | class SecondOfMinute(TimeFeature): 35 | """Minute of hour encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.second / 59.0 - 0.5 39 | 40 | 41 | class MinuteOfHour(TimeFeature): 42 | """Minute of hour encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.minute / 59.0 - 0.5 46 | 47 | 48 | class HourOfDay(TimeFeature): 49 | """Hour of day encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return index.hour / 23.0 - 0.5 53 | 54 | 55 | class DayOfWeek(TimeFeature): 56 | """Hour of day encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return index.dayofweek / 6.0 - 0.5 60 | 61 | 62 | class DayOfMonth(TimeFeature): 63 | """Day of month encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.day - 1) / 30.0 - 0.5 67 | 68 | 69 | class DayOfYear(TimeFeature): 70 | """Day of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.dayofyear - 1) / 365.0 - 0.5 74 | 75 | 76 | class MonthOfYear(TimeFeature): 77 | """Month of year encoded as value between [-0.5, 0.5]""" 78 | 79 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 80 | return (index.month - 1) / 11.0 - 0.5 81 | 82 | 83 | class WeekOfYear(TimeFeature): 84 | """Week of year encoded as value between [-0.5, 0.5]""" 85 | 86 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 87 | return (index.isocalendar().week - 1) / 52.0 - 0.5 88 | 89 | 90 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 91 | """ 92 | Returns a list of time features that will be appropriate for the given frequency string. 93 | Parameters 94 | ---------- 95 | freq_str 96 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 97 | """ 98 | 99 | features_by_offsets = { 100 | offsets.YearEnd: [], 101 | offsets.QuarterEnd: [MonthOfYear], 102 | offsets.MonthEnd: [MonthOfYear], 103 | offsets.Week: [DayOfMonth, WeekOfYear], 104 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 105 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 106 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 107 | offsets.Minute: [ 108 | MinuteOfHour, 109 | HourOfDay, 110 | DayOfWeek, 111 | DayOfMonth, 112 | DayOfYear, 113 | ], 114 | offsets.Second: [ 115 | SecondOfMinute, 116 | MinuteOfHour, 117 | HourOfDay, 118 | DayOfWeek, 119 | DayOfMonth, 120 | DayOfYear, 121 | ], 122 | } 123 | 124 | offset = to_offset(freq_str) 125 | 126 | for offset_type, feature_classes in features_by_offsets.items(): 127 | if isinstance(offset, offset_type): 128 | return [cls() for cls in feature_classes] 129 | 130 | supported_freq_msg = f""" 131 | Unsupported frequency {freq_str} 132 | The following frequencies are supported: 133 | Y - yearly 134 | alias: A 135 | M - monthly 136 | W - weekly 137 | D - daily 138 | B - business days 139 | H - hourly 140 | T - minutely 141 | alias: min 142 | S - secondly 143 | """ 144 | raise RuntimeError(supported_freq_msg) 145 | 146 | 147 | def time_features(dates, freq='h'): 148 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) 149 | -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import numpy as np 4 | import torch 5 | import matplotlib.pyplot as plt 6 | import pandas as pd 7 | import math 8 | 9 | plt.switch_backend('agg') 10 | 11 | 12 | def adjust_learning_rate(optimizer, epoch, args): 13 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 14 | if args.lradj == 'type1': 15 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} 16 | elif args.lradj == 'type2': 17 | lr_adjust = { 18 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 19 | 10: 5e-7, 15: 1e-7, 20: 5e-8 20 | } 21 | elif args.lradj == "cosine": 22 | lr_adjust = {epoch: args.learning_rate /2 * (1 + math.cos(epoch / args.train_epochs * math.pi))} 23 | if epoch in lr_adjust.keys(): 24 | lr = lr_adjust[epoch] 25 | for param_group in optimizer.param_groups: 26 | param_group['lr'] = lr 27 | print('Updating learning rate to {}'.format(lr)) 28 | 29 | 30 | class EarlyStopping: 31 | def __init__(self, patience=7, verbose=False, delta=0): 32 | self.patience = patience 33 | self.verbose = verbose 34 | self.counter = 0 35 | self.best_score = None 36 | self.early_stop = False 37 | self.val_loss_min = np.Inf 38 | self.delta = delta 39 | 40 | def __call__(self, val_loss, model, path): 41 | score = -val_loss 42 | if self.best_score is None: 43 | self.best_score = score 44 | self.save_checkpoint(val_loss, model, path) 45 | elif score < self.best_score + self.delta: 46 | self.counter += 1 47 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 48 | if self.counter >= self.patience: 49 | self.early_stop = True 50 | else: 51 | self.best_score = score 52 | self.save_checkpoint(val_loss, model, path) 53 | self.counter = 0 54 | 55 | def save_checkpoint(self, val_loss, model, path): 56 | if self.verbose: 57 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 58 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') 59 | self.val_loss_min = val_loss 60 | 61 | 62 | class dotdict(dict): 63 | """dot.notation access to dictionary attributes""" 64 | __getattr__ = dict.get 65 | __setattr__ = dict.__setitem__ 66 | __delattr__ = dict.__delitem__ 67 | 68 | 69 | class StandardScaler(): 70 | def __init__(self, mean, std): 71 | self.mean = mean 72 | self.std = std 73 | 74 | def transform(self, data): 75 | return (data - self.mean) / self.std 76 | 77 | def inverse_transform(self, data): 78 | return (data * self.std) + self.mean 79 | 80 | 81 | def visual(true, preds=None, name='./pic/test.pdf'): 82 | """ 83 | Results visualization 84 | """ 85 | plt.figure() 86 | plt.plot(true, label='GroundTruth', linewidth=2) 87 | if preds is not None: 88 | plt.plot(preds, label='Prediction', linewidth=2) 89 | plt.legend() 90 | plt.savefig(name, bbox_inches='tight') 91 | 92 | 93 | def adjustment(gt, pred): 94 | anomaly_state = False 95 | for i in range(len(gt)): 96 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state: 97 | anomaly_state = True 98 | for j in range(i, 0, -1): 99 | if gt[j] == 0: 100 | break 101 | else: 102 | if pred[j] == 0: 103 | pred[j] = 1 104 | for j in range(i, len(gt)): 105 | if gt[j] == 0: 106 | break 107 | else: 108 | if pred[j] == 0: 109 | pred[j] = 1 110 | elif gt[i] == 0: 111 | anomaly_state = False 112 | if anomaly_state: 113 | pred[i] = 1 114 | return gt, pred 115 | 116 | 117 | def cal_accuracy(y_pred, y_true): 118 | return np.mean(y_pred == y_true) --------------------------------------------------------------------------------