├── exp ├── __init__.py ├── __pycache__ │ ├── __init__.cpython-310.pyc │ ├── exp_basic.cpython-310.pyc │ ├── exp_imputation.cpython-310.pyc │ ├── exp_classification.cpython-310.pyc │ ├── exp_anomaly_detection.cpython-310.pyc │ ├── exp_long_term_forecasting.cpython-310.pyc │ └── exp_short_term_forecasting.cpython-310.pyc ├── exp_basic.py ├── torchsummary.py ├── exp_classification.py ├── exp_anomaly_detection.py ├── exp_imputation.py ├── exp_short_term_forecasting.py └── exp_long_term_forecasting.py ├── layers ├── __init__.py ├── __pycache__ │ ├── Embed.cpython-310.pyc │ ├── Embed1.cpython-310.pyc │ ├── __init__.cpython-310.pyc │ ├── StandardNorm.cpython-310.pyc │ ├── Autoformer_EncDec.cpython-310.pyc │ ├── Transformer_EncDec1.cpython-310.pyc │ └── SelfAttention_Family1.cpython-310.pyc ├── StandardNorm.py ├── Transformer_EncDec1.py ├── Transformer_EncDec.py ├── Embed1.py ├── AutoCorrelation.py ├── Autoformer_EncDec.py ├── Embed.py ├── SelfAttention_Family.py └── SelfAttention_Family1.py ├── models ├── __init__.py └── __pycache__ │ ├── LLMMixer.cpython-310.pyc │ ├── __init__.cpython-310.pyc │ └── TimeMixer.cpython-310.pyc ├── utils ├── __init__.py ├── __pycache__ │ ├── losses.cpython-310.pyc │ ├── tools.cpython-310.pyc │ ├── __init__.cpython-310.pyc │ ├── masking.cpython-310.pyc │ ├── metrics.cpython-310.pyc │ ├── m4_summary.cpython-310.pyc │ └── timefeatures.cpython-310.pyc ├── masking.py ├── metrics.py ├── data_analysis.py ├── losses.py ├── timefeatures.py ├── tools.py └── m4_summary.py ├── scripts └── read.me ├── data_provider ├── __init__.py ├── __pycache__ │ ├── m4.cpython-310.pyc │ ├── uea.cpython-310.pyc │ ├── __init__.cpython-310.pyc │ ├── data_loader.cpython-310.pyc │ └── data_factory.cpython-310.pyc ├── data_factory.py ├── uea.py └── data_loader.py ├── figures ├── read.me ├── llmmixer.pdf └── llmmixer.png ├── requirements.txt ├── README.md ├── run.py └── LICENSE /exp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /layers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/read.me: -------------------------------------------------------------------------------- 1 | scripts 2 | -------------------------------------------------------------------------------- /data_provider/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /figures/read.me: -------------------------------------------------------------------------------- 1 | all figures 2 | -------------------------------------------------------------------------------- /figures/llmmixer.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/figures/llmmixer.pdf -------------------------------------------------------------------------------- /figures/llmmixer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/figures/llmmixer.png -------------------------------------------------------------------------------- /exp/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/Embed.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Embed.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/losses.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/losses.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tools.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/tools.cpython-310.pyc -------------------------------------------------------------------------------- /exp/__pycache__/exp_basic.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_basic.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/Embed1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Embed1.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/LLMMixer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/LLMMixer.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/masking.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/masking.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metrics.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/metrics.cpython-310.pyc -------------------------------------------------------------------------------- /data_provider/__pycache__/m4.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/m4.cpython-310.pyc -------------------------------------------------------------------------------- /data_provider/__pycache__/uea.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/uea.cpython-310.pyc -------------------------------------------------------------------------------- /models/__pycache__/TimeMixer.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/TimeMixer.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/m4_summary.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/m4_summary.cpython-310.pyc -------------------------------------------------------------------------------- /exp/__pycache__/exp_imputation.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_imputation.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/StandardNorm.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/StandardNorm.cpython-310.pyc -------------------------------------------------------------------------------- /utils/__pycache__/timefeatures.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/timefeatures.cpython-310.pyc -------------------------------------------------------------------------------- /data_provider/__pycache__/__init__.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/__init__.cpython-310.pyc -------------------------------------------------------------------------------- /exp/__pycache__/exp_classification.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_classification.cpython-310.pyc -------------------------------------------------------------------------------- /data_provider/__pycache__/data_loader.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/data_loader.cpython-310.pyc -------------------------------------------------------------------------------- /exp/__pycache__/exp_anomaly_detection.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_anomaly_detection.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/Autoformer_EncDec.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Autoformer_EncDec.cpython-310.pyc -------------------------------------------------------------------------------- /data_provider/__pycache__/data_factory.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/data_factory.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/Transformer_EncDec1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Transformer_EncDec1.cpython-310.pyc -------------------------------------------------------------------------------- /exp/__pycache__/exp_long_term_forecasting.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_long_term_forecasting.cpython-310.pyc -------------------------------------------------------------------------------- /exp/__pycache__/exp_short_term_forecasting.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_short_term_forecasting.cpython-310.pyc -------------------------------------------------------------------------------- /layers/__pycache__/SelfAttention_Family1.cpython-310.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/SelfAttention_Family1.cpython-310.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | einops==0.7.0 2 | matplotlib==3.7.0 3 | numpy==1.23.5 4 | pandas==1.5.3 5 | scikit_learn==1.2.2 6 | scipy==1.12.0 7 | tqdm==4.65.0 8 | patool==1.12 9 | reformer_pytorch==1.4.4 10 | sktime==0.4.1 11 | sympy==1.11.1 12 | torch==2.3.0 13 | accelerate==0.33.0 14 | transformers==4.44.0 15 | sentencepiece==0.2.0 16 | -------------------------------------------------------------------------------- /utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TriangularCausalMask(): 5 | def __init__(self, B, L, device="cpu"): 6 | mask_shape = [B, 1, L, L] 7 | with torch.no_grad(): 8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 9 | 10 | @property 11 | def mask(self): 12 | return self._mask 13 | 14 | 15 | class ProbMask(): 16 | def __init__(self, B, H, L, index, scores, device="cpu"): 17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 19 | indicator = _mask_ex[torch.arange(B)[:, None, None], 20 | torch.arange(H)[None, :, None], 21 | index, :].to(device) 22 | self._mask = indicator.view(scores.shape).to(device) 23 | 24 | @property 25 | def mask(self): 26 | return self._mask 27 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | return (u / d).mean(-1) 12 | 13 | 14 | def MAE(pred, true): 15 | return np.mean(np.abs(pred - true)) 16 | 17 | 18 | def MSE(pred, true): 19 | return np.mean((pred - true) ** 2) 20 | 21 | 22 | def RMSE(pred, true): 23 | return np.sqrt(MSE(pred, true)) 24 | 25 | 26 | def MAPE(pred, true): 27 | mape = np.abs((pred - true) / true) 28 | mape = np.where(mape > 5, 0, mape) 29 | return np.mean(mape) 30 | 31 | 32 | def MSPE(pred, true): 33 | return np.mean(np.square((pred - true) / true)) 34 | 35 | 36 | def metric(pred, true): 37 | mae = MAE(pred, true) 38 | mse = MSE(pred, true) 39 | rmse = RMSE(pred, true) 40 | mape = MAPE(pred, true) 41 | mspe = MSPE(pred, true) 42 | 43 | return mae, mse, rmse, mape, mspe 44 | -------------------------------------------------------------------------------- /exp/exp_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | from models import LLMMixer 4 | 5 | 6 | class Exp_Basic(object): 7 | def __init__(self, args): 8 | self.args = args 9 | self.model_dict = { 10 | 'LLMMixer': LLMMixer, 11 | } 12 | self.device = self._acquire_device() 13 | self.model = self._build_model().to(self.device) 14 | 15 | def _build_model(self): 16 | raise NotImplementedError 17 | return None 18 | 19 | def _acquire_device(self): 20 | if self.args.use_gpu: 21 | import platform 22 | if platform.system() == 'Darwin': 23 | device = torch.device('mps') 24 | print('Use MPS') 25 | return device 26 | os.environ["CUDA_VISIBLE_DEVICES"] = str( 27 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices 28 | device = torch.device('cuda:{}'.format(self.args.gpu)) 29 | if self.args.use_multi_gpu: 30 | print('Use GPU: cuda{}'.format(self.args.device_ids)) 31 | else: 32 | print('Use GPU: cuda:{}'.format(self.args.gpu)) 33 | else: 34 | device = torch.device('cpu') 35 | print('Use CPU') 36 | return device 37 | 38 | def _get_data(self): 39 | pass 40 | 41 | def vali(self): 42 | pass 43 | 44 | def train(self): 45 | pass 46 | 47 | def test(self): 48 | pass 49 | -------------------------------------------------------------------------------- /layers/StandardNorm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Normalize(nn.Module): 6 | def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False): 7 | """ 8 | :param num_features: the number of features or channels 9 | :param eps: a value added for numerical stability 10 | :param affine: if True, RevIN has learnable affine parameters 11 | """ 12 | super(Normalize, self).__init__() 13 | self.num_features = num_features 14 | self.eps = eps 15 | self.affine = affine 16 | self.subtract_last = subtract_last 17 | self.non_norm = non_norm 18 | if self.affine: 19 | self._init_params() 20 | 21 | def forward(self, x, mode: str): 22 | if mode == 'norm': 23 | self._get_statistics(x) 24 | x = self._normalize(x) 25 | elif mode == 'denorm': 26 | x = self._denormalize(x) 27 | else: 28 | raise NotImplementedError 29 | return x 30 | 31 | def _init_params(self): 32 | # initialize RevIN params: (C,) 33 | self.affine_weight = nn.Parameter(torch.ones(self.num_features)) 34 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features)) 35 | 36 | def _get_statistics(self, x): 37 | dim2reduce = tuple(range(1, x.ndim - 1)) 38 | if self.subtract_last: 39 | self.last = x[:, -1, :].unsqueeze(1) 40 | else: 41 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach() 42 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach() 43 | 44 | def _normalize(self, x): 45 | if self.non_norm: 46 | return x 47 | if self.subtract_last: 48 | x = x - self.last 49 | else: 50 | x = x - self.mean 51 | x = x / self.stdev 52 | if self.affine: 53 | x = x * self.affine_weight 54 | x = x + self.affine_bias 55 | return x 56 | 57 | def _denormalize(self, x): 58 | if self.non_norm: 59 | return x 60 | if self.affine: 61 | x = x - self.affine_bias 62 | x = x / (self.affine_weight + self.eps * self.eps) 63 | x = x * self.stdev 64 | if self.subtract_last: 65 | x = x + self.last 66 | else: 67 | x = x + self.mean 68 | return x 69 | -------------------------------------------------------------------------------- /data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \ 2 | MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_PEMS, \ 3 | Dataset_Solar 4 | from data_provider.uea import collate_fn 5 | from torch.utils.data import DataLoader 6 | 7 | data_dict = { 8 | 'ETTh1': Dataset_ETT_hour, 9 | 'ETTh2': Dataset_ETT_hour, 10 | 'ETTm1': Dataset_ETT_minute, 11 | 'ETTm2': Dataset_ETT_minute, 12 | 'custom': Dataset_Custom, 13 | } 14 | 15 | 16 | def data_provider(args, flag): 17 | Data = data_dict[args.data] 18 | timeenc = 0 if args.embed != 'timeF' else 1 19 | 20 | if flag == 'test': 21 | shuffle_flag = False 22 | drop_last = True 23 | if args.task_name == 'anomaly_detection' or args.task_name == 'classification': 24 | batch_size = args.batch_size 25 | else: 26 | batch_size = args.batch_size # bsz=1 for evaluation 27 | freq = args.freq 28 | else: 29 | shuffle_flag = True 30 | drop_last = True 31 | batch_size = args.batch_size # bsz for train and valid 32 | freq = args.freq 33 | 34 | if args.task_name == 'anomaly_detection': 35 | drop_last = False 36 | data_set = Data( 37 | root_path=args.root_path, 38 | win_size=args.seq_len, 39 | flag=flag, 40 | ) 41 | print(flag, len(data_set)) 42 | data_loader = DataLoader( 43 | data_set, 44 | batch_size=batch_size, 45 | shuffle=shuffle_flag, 46 | num_workers=args.num_workers, 47 | drop_last=drop_last) 48 | return data_set, data_loader 49 | elif args.task_name == 'classification': 50 | drop_last = False 51 | data_set = Data( 52 | root_path=args.root_path, 53 | flag=flag, 54 | ) 55 | print(flag, len(data_set)) 56 | data_loader = DataLoader( 57 | data_set, 58 | batch_size=batch_size, 59 | shuffle=shuffle_flag, 60 | num_workers=args.num_workers, 61 | drop_last=drop_last, 62 | collate_fn=lambda x: collate_fn(x, max_len=args.seq_len) 63 | ) 64 | return data_set, data_loader 65 | else: 66 | if args.data == 'm4': 67 | drop_last = False 68 | data_set = Data( 69 | root_path=args.root_path, 70 | data_path=args.data_path, 71 | flag=flag, 72 | size=[args.seq_len, args.label_len, args.pred_len], 73 | features=args.features, 74 | target=args.target, 75 | timeenc=timeenc, 76 | freq=freq, 77 | seasonal_patterns=args.seasonal_patterns 78 | ) 79 | print(flag, len(data_set)) 80 | data_loader = DataLoader( 81 | data_set, 82 | batch_size=batch_size, 83 | shuffle=shuffle_flag, 84 | num_workers=args.num_workers, 85 | drop_last=drop_last) 86 | return data_set, data_loader 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | 4 |

LLM-Mixer: Multiscale Mixing in LLMs for Time Series Forecasting

5 |
6 | 7 | 8 |
9 | 10 | ![](https://img.shields.io/github/last-commit/Kowsher/LLMMixer?color=green) 11 | ![](https://img.shields.io/github/stars/Kowsher/LLMMixer?color=yellow) 12 | ![](https://img.shields.io/github/forks/Kowsher/LLMMixer?color=lightblue) 13 | ![](https://img.shields.io/badge/PRs-Welcome-green) 14 | 15 |
16 | 17 |
18 | 19 | **[Paper Page]** 20 | **[Code]** 21 | 22 | 23 |
24 | 25 | 26 | --- 27 | > 28 | > 🙋 Please let us know if you find out a mistake or have any suggestions! 29 | > 30 | > 🌟 If you find this resource helpful, please consider to star this repository and cite our research: 31 | 32 | ``` 33 | @article{kowsher2024llm, 34 | title={LLM-Mixer: Multiscale Mixing in LLMs for Time Series Forecasting}, 35 | author={Kowsher, Md and Sobuj, Md Shohanur Islam and Prottasha, Nusrat Jahan and Alanis, E Alejandro and Garibay, Ozlem Ozmen and Yousefi, Niloofar}, 36 | journal={arXiv preprint arXiv:2410.11674}, 37 | year={2024} 38 | } 39 | 40 | ``` 41 | 42 | ## Introduction 43 | LLMMixer is an advanced framework designed to improve forecasting accuracy by integrating multiscale time series decomposition with the power of large language models (LLMs). By capturing both short-term and long-term temporal patterns, LLMMixer enhances the model's ability to understand complex trends, making it highly effective for time series forecasting tasks. 44 | 45 |

46 | 47 | 48 | 49 |

50 | 51 | ## Requirements 52 | Use Python 3.11 from MiniConda 53 | 54 | - torch==2.3.0 55 | - accelerate==0.33.0 56 | - einops==0.7.0 57 | - matplotlib==3.7.0 58 | - numpy==1.23.5 59 | - pandas==1.5.3 60 | - scikit_learn==1.2.2 61 | - scipy==1.12.0 62 | - tqdm==4.65.0 63 | - peft==0.12.0 64 | - transformers==4.44.0 65 | - deepspeed==0.15.1 66 | - sentencepiece==0.2.0 67 | 68 | 69 | 70 | ## Get Started 71 | 72 | 1. Install requirements. ```pip install -r requirements.txt``` 73 | 2. Download data. You can download the all datasets from [Google Driver](https://drive.google.com/u/0/uc?id=1NF7VEefXCmXuWNbnNe858WvQAkJ_7wuP&export=download), [Baidu Driver](https://pan.baidu.com/share/init?surl=r3KhGd0Q9PJIUZdfEYoymg&pwd=i9iy) or [Kaggle Datasets](https://www.kaggle.com/datasets/wentixiaogege/time-series-dataset). **All the datasets are well pre-processed** and can be used easily. 74 | 3. Train the model by following the example of `./scripts`. 75 | 76 | 77 | ## Acknowledgement 78 | 79 | We appreciate the following GitHub repos a lot for their valuable code and efforts. 80 | - Time-Series-Library (https://github.com/thuml/Time-Series-Library) 81 | - TimeMixer ([https://github.com/kwuking/TimeMixer](https://github.com/kwuking/TimeMixer)) 82 | - TimeLLM ([https://github.com/thuml/Autoformer](https://github.com/KimMeen/Time-LLM)) 83 | - Autoformer (https://github.com/thuml/Autoformer) 84 | - iTransformer ([https://github.com/thuml/Autoformer](https://github.com/thuml/iTransformer)) 85 | -------------------------------------------------------------------------------- /utils/data_analysis.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from scipy.stats import entropy 4 | 5 | 6 | def forecastabilty(ts): 7 | """Forecastability Measure. 8 | 9 | Args: 10 | ts: time series 11 | 12 | Returns: 13 | 1 - the entropy of the fourier transformation of 14 | time series / entropy of white noise 15 | """ 16 | ts = (ts - ts.min())/(ts.max()-ts.min()+0.1) 17 | # fourier_ts = np.fft.rfft(ts).real 18 | fourier_ts = abs(np.fft.rfft(ts)) 19 | fourier_ts = (fourier_ts - fourier_ts.min()) / ( 20 | fourier_ts.max() - fourier_ts.min()) 21 | fourier_ts /= fourier_ts.sum() 22 | entropy_ts = entropy(fourier_ts) 23 | fore_ts = 1-entropy_ts/(np.log(len(ts))) 24 | if np.isnan(fore_ts): 25 | return 0 26 | return fore_ts 27 | 28 | 29 | def forecastabilty_moving(ts, window, jump=1): 30 | """Calculates the forecastability of a moving window. 31 | 32 | Args: 33 | ts: time series 34 | window: length of slices 35 | jump: skipped step when taking subslices 36 | 37 | Returns: 38 | a list of forecastability measures for all slices. 39 | """ 40 | 41 | # ts = Trend(ts).detrend() 42 | if len(ts) <= 25: 43 | return forecastabilty(ts) 44 | fore_lst = np.array([ 45 | forecastabilty(ts[i - window:i]) 46 | for i in np.arange(window, len(ts), jump) 47 | ]) 48 | fore_lst = fore_lst[~np.isnan(fore_lst)] # drop nan 49 | return fore_lst 50 | 51 | 52 | class Trend(): 53 | """Trend test.""" 54 | 55 | def __init__(self, ts): 56 | self.ts = ts 57 | self.train_length = len(ts) 58 | self.a, self.b = self.find_trend(ts) 59 | 60 | def find_trend(self, insample_data): 61 | # fit a linear regression y=ax+b on the time series 62 | x = np.arange(len(insample_data)) 63 | a, b = np.polyfit(x, insample_data, 1) 64 | return a, b 65 | 66 | def detrend(self): 67 | # remove trend 68 | return self.ts - (self.a * np.arange(0, len(self.ts), 1) + self.b) 69 | 70 | def inverse_input(self, insample_data): 71 | # add trend back to the input part of time series 72 | return insample_data + (self.a * np.arange(0, len(self.ts), 1) + self.b) 73 | 74 | def inverse_pred(self, outsample_data): 75 | # add trend back to the predictions 76 | return outsample_data + ( 77 | self.a * np.arange(self.train_length, 78 | self.train_length + len(outsample_data), 1) + self.b) 79 | 80 | 81 | def seasonality_test(original_ts, ppy): 82 | """Seasonality test. 83 | 84 | Args: 85 | original_ts: time series 86 | ppy: periods per year/frequency 87 | 88 | Returns: 89 | boolean value: whether the TS is seasonal 90 | """ 91 | 92 | s = acf(original_ts, 1) 93 | for i in range(2, ppy): 94 | s = s + (acf(original_ts, i)**2) 95 | 96 | limit = 1.645 * (np.sqrt((1 + 2 * s) / len(original_ts))) 97 | 98 | return (abs(acf(original_ts, ppy))) > limit 99 | 100 | 101 | def acf(ts, k): 102 | """Autocorrelation function. 103 | 104 | Args: 105 | ts: time series 106 | k: lag 107 | 108 | Returns: 109 | acf value 110 | """ 111 | m = np.mean(ts) 112 | s1 = 0 113 | for i in range(k, len(ts)): 114 | s1 = s1 + ((ts[i] - m) * (ts[i - k] - m)) 115 | 116 | s2 = 0 117 | for i in range(0, len(ts)): 118 | s2 = s2 + ((ts[i] - m)**2) 119 | 120 | return float(s1 / s2) -------------------------------------------------------------------------------- /utils/losses.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | Loss functions for PyTorch. 17 | """ 18 | 19 | import torch as t 20 | import torch.nn as nn 21 | import numpy as np 22 | import pdb 23 | 24 | 25 | def divide_no_nan(a, b): 26 | """ 27 | a/b where the resulted NaN or Inf are replaced by 0. 28 | """ 29 | result = a / b 30 | result[result != result] = .0 31 | result[result == np.inf] = .0 32 | return result 33 | 34 | 35 | class mape_loss(nn.Module): 36 | def __init__(self): 37 | super(mape_loss, self).__init__() 38 | 39 | def forward(self, insample: t.Tensor, freq: int, 40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 41 | """ 42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error 43 | 44 | :param forecast: Forecast values. Shape: batch, time 45 | :param target: Target values. Shape: batch, time 46 | :param mask: 0/1 mask. Shape: batch, time 47 | :return: Loss value 48 | """ 49 | weights = divide_no_nan(mask, target) 50 | return t.mean(t.abs((forecast - target) * weights)) 51 | 52 | 53 | class smape_loss(nn.Module): 54 | def __init__(self): 55 | super(smape_loss, self).__init__() 56 | 57 | def forward(self, insample: t.Tensor, freq: int, 58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 59 | """ 60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993) 61 | 62 | :param forecast: Forecast values. Shape: batch, time 63 | :param target: Target values. Shape: batch, time 64 | :param mask: 0/1 mask. Shape: batch, time 65 | :return: Loss value 66 | """ 67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target), 68 | t.abs(forecast.data) + t.abs(target.data)) * mask) 69 | 70 | 71 | class mase_loss(nn.Module): 72 | def __init__(self): 73 | super(mase_loss, self).__init__() 74 | 75 | def forward(self, insample: t.Tensor, freq: int, 76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float: 77 | """ 78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf 79 | 80 | :param insample: Insample values. Shape: batch, time_i 81 | :param freq: Frequency value 82 | :param forecast: Forecast values. Shape: batch, time_o 83 | :param target: Target values. Shape: batch, time_o 84 | :param mask: 0/1 mask. Shape: batch, time_o 85 | :return: Loss value 86 | """ 87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1) 88 | masked_masep_inv = divide_no_nan(mask, masep[:, None]) 89 | return t.mean(t.abs(target - forecast) * masked_masep_inv) 90 | -------------------------------------------------------------------------------- /utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | 9 | class TimeFeature: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 14 | pass 15 | 16 | def __repr__(self): 17 | return self.__class__.__name__ + "()" 18 | 19 | 20 | class SecondOfMinute(TimeFeature): 21 | """Minute of hour encoded as value between [-0.5, 0.5]""" 22 | 23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 24 | return index.second / 59.0 - 0.5 25 | 26 | 27 | class MinuteOfHour(TimeFeature): 28 | """Minute of hour encoded as value between [-0.5, 0.5]""" 29 | 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.minute / 59.0 - 0.5 32 | 33 | 34 | class HourOfDay(TimeFeature): 35 | """Hour of day encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.hour / 23.0 - 0.5 39 | 40 | 41 | class DayOfWeek(TimeFeature): 42 | """Hour of day encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.dayofweek / 6.0 - 0.5 46 | 47 | 48 | class DayOfMonth(TimeFeature): 49 | """Day of month encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return (index.day - 1) / 30.0 - 0.5 53 | 54 | 55 | class DayOfYear(TimeFeature): 56 | """Day of year encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return (index.dayofyear - 1) / 365.0 - 0.5 60 | 61 | 62 | class MonthOfYear(TimeFeature): 63 | """Month of year encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.month - 1) / 11.0 - 0.5 67 | 68 | 69 | class WeekOfYear(TimeFeature): 70 | """Week of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.isocalendar().week - 1) / 52.0 - 0.5 74 | 75 | 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 77 | """ 78 | Returns a list of time features that will be appropriate for the given frequency string. 79 | Parameters 80 | ---------- 81 | freq_str 82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 83 | """ 84 | 85 | features_by_offsets = { 86 | offsets.YearEnd: [], 87 | offsets.QuarterEnd: [MonthOfYear], 88 | offsets.MonthEnd: [MonthOfYear], 89 | offsets.Week: [DayOfMonth, WeekOfYear], 90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 93 | offsets.Minute: [ 94 | MinuteOfHour, 95 | HourOfDay, 96 | DayOfWeek, 97 | DayOfMonth, 98 | DayOfYear, 99 | ], 100 | offsets.Second: [ 101 | SecondOfMinute, 102 | MinuteOfHour, 103 | HourOfDay, 104 | DayOfWeek, 105 | DayOfMonth, 106 | DayOfYear, 107 | ], 108 | } 109 | 110 | offset = to_offset(freq_str) 111 | 112 | for offset_type, feature_classes in features_by_offsets.items(): 113 | if isinstance(offset, offset_type): 114 | return [cls() for cls in feature_classes] 115 | 116 | supported_freq_msg = f""" 117 | Unsupported frequency {freq_str} 118 | The following frequencies are supported: 119 | Y - yearly 120 | alias: A 121 | M - monthly 122 | W - weekly 123 | D - daily 124 | B - business days 125 | H - hourly 126 | T - minutely 127 | alias: min 128 | S - secondly 129 | """ 130 | raise RuntimeError(supported_freq_msg) 131 | 132 | 133 | def time_features(dates, freq='h'): 134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) 135 | -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import torch 4 | import matplotlib.pyplot as plt 5 | 6 | plt.switch_backend('agg') 7 | 8 | 9 | def adjust_learning_rate(optimizer, scheduler, epoch, args, printout=True): 10 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 11 | if args.lradj == 'type1': 12 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} 13 | elif args.lradj == 'type2': 14 | lr_adjust = { 15 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 16 | 10: 5e-7, 15: 1e-7, 20: 5e-8 17 | } 18 | elif args.lradj == 'type3': 19 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} 20 | elif args.lradj == 'PEMS': 21 | lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))} 22 | elif args.lradj == 'TST': 23 | lr_adjust = {epoch: scheduler.get_last_lr()[0]} 24 | if epoch in lr_adjust.keys(): 25 | lr = lr_adjust[epoch] 26 | for param_group in optimizer.param_groups: 27 | param_group['lr'] = lr 28 | if printout: print('Updating learning rate to {}'.format(lr)) 29 | 30 | 31 | class EarlyStopping: 32 | def __init__(self, patience=7, verbose=False, delta=0): 33 | self.patience = patience 34 | self.verbose = verbose 35 | self.counter = 0 36 | self.best_score = None 37 | self.early_stop = False 38 | self.val_loss_min = np.Inf 39 | self.delta = delta 40 | 41 | def __call__(self, val_loss, model, path): 42 | score = -val_loss 43 | if self.best_score is None: 44 | self.best_score = score 45 | self.save_checkpoint(val_loss, model, path) 46 | elif score < self.best_score + self.delta: 47 | self.counter += 1 48 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 49 | if self.counter >= self.patience: 50 | self.early_stop = True 51 | else: 52 | self.best_score = score 53 | self.save_checkpoint(val_loss, model, path) 54 | self.counter = 0 55 | 56 | def save_checkpoint(self, val_loss, model, path): 57 | if self.verbose: 58 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 59 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') 60 | self.val_loss_min = val_loss 61 | 62 | 63 | class dotdict(dict): 64 | """dot.notation access to dictionary attributes""" 65 | __getattr__ = dict.get 66 | __setattr__ = dict.__setitem__ 67 | __delattr__ = dict.__delitem__ 68 | 69 | 70 | class StandardScaler(): 71 | def __init__(self, mean, std): 72 | self.mean = mean 73 | self.std = std 74 | 75 | def transform(self, data): 76 | return (data - self.mean) / self.std 77 | 78 | def inverse_transform(self, data): 79 | return (data * self.std) + self.mean 80 | 81 | 82 | def save_to_csv(true, preds=None, name='./pic/test.pdf'): 83 | """ 84 | Results visualization 85 | """ 86 | data = pd.DataFrame({'true': true, 'preds': preds}) 87 | data.to_csv(name, index=False, sep=',') 88 | 89 | 90 | def visual(true, preds=None, name='./pic/test.pdf'): 91 | """ 92 | Results visualization 93 | """ 94 | plt.figure() 95 | plt.plot(true, label='GroundTruth', linewidth=2) 96 | if preds is not None: 97 | plt.plot(preds, label='Prediction', linewidth=2) 98 | plt.legend() 99 | plt.savefig(name, bbox_inches='tight') 100 | 101 | 102 | def visual_weights(weights, name='./pic/test.pdf'): 103 | """ 104 | Weights visualization 105 | """ 106 | fig, ax = plt.subplots() 107 | # im = ax.imshow(weights, cmap='plasma_r') 108 | im = ax.imshow(weights, cmap='YlGnBu') 109 | fig.colorbar(im, pad=0.03, location='top') 110 | plt.savefig(name, dpi=500, pad_inches=0.02) 111 | plt.close() 112 | 113 | 114 | def adjustment(gt, pred): 115 | anomaly_state = False 116 | for i in range(len(gt)): 117 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state: 118 | anomaly_state = True 119 | for j in range(i, 0, -1): 120 | if gt[j] == 0: 121 | break 122 | else: 123 | if pred[j] == 0: 124 | pred[j] = 1 125 | for j in range(i, len(gt)): 126 | if gt[j] == 0: 127 | break 128 | else: 129 | if pred[j] == 0: 130 | pred[j] = 1 131 | elif gt[i] == 0: 132 | anomaly_state = False 133 | if anomaly_state: 134 | pred[i] = 1 135 | return gt, pred 136 | 137 | 138 | def cal_accuracy(y_pred, y_true): 139 | return np.mean(y_pred == y_true) 140 | -------------------------------------------------------------------------------- /exp/torchsummary.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from torch.autograd import Variable 4 | 5 | from collections import OrderedDict 6 | import numpy as np 7 | 8 | 9 | def summary(model, input_size, batch_size=-1, device="cuda"): 10 | 11 | def register_hook(module): 12 | 13 | def hook(module, input, output): 14 | class_name = str(module.__class__).split(".")[-1].split("'")[0] 15 | module_idx = len(summary) 16 | 17 | m_key = "%s-%i" % (class_name, module_idx + 1) 18 | summary[m_key] = OrderedDict() 19 | # summary[m_key]["input_shape"] = list(input[0].size()) 20 | if isinstance(input[0], (list, tuple)): 21 | summary[m_key]["input_shape"] = [ 22 | [-1] + list(i.size())[1:] for i in input[0] 23 | ] 24 | summary[m_key]["input_shape"][0] = batch_size 25 | else: 26 | summary[m_key]["input_shape"] = list(input[0].size()) 27 | summary[m_key]["input_shape"][0] = batch_size 28 | 29 | if isinstance(output, (list, tuple)): 30 | summary[m_key]["output_shape"] = [ 31 | [-1] + list(o.size())[1:] for o in output 32 | ] 33 | else: 34 | summary[m_key]["output_shape"] = list(output.size()) 35 | summary[m_key]["output_shape"][0] = batch_size 36 | 37 | params = 0 38 | if hasattr(module, "weight") and hasattr(module.weight, "size"): 39 | params += torch.prod(torch.LongTensor(list(module.weight.size()))) 40 | summary[m_key]["trainable"] = module.weight.requires_grad 41 | if hasattr(module, "bias") and hasattr(module.bias, "size"): 42 | params += torch.prod(torch.LongTensor(list(module.bias.size()))) 43 | summary[m_key]["nb_params"] = params 44 | 45 | if ( 46 | not isinstance(module, nn.Sequential) 47 | and not isinstance(module, nn.ModuleList) 48 | and not (module == model) 49 | ): 50 | hooks.append(module.register_forward_hook(hook)) 51 | 52 | device = device.lower() 53 | assert device in [ 54 | "cuda", 55 | "cpu", 56 | ], "Input device is not valid, please specify 'cuda' or 'cpu'" 57 | 58 | if device == "cuda" and torch.cuda.is_available(): 59 | dtype = torch.cuda.FloatTensor 60 | else: 61 | dtype = torch.FloatTensor 62 | 63 | # multiple inputs to the network 64 | if isinstance(input_size, tuple): 65 | input_size = [input_size] 66 | 67 | # batch_size of 2 for batchnorm 68 | x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size] 69 | # print(type(x[0])) 70 | 71 | # create properties 72 | summary = OrderedDict() 73 | hooks = [] 74 | 75 | # register hook 76 | model.apply(register_hook) 77 | 78 | # make a forward pass 79 | # print(x.shape) 80 | model(*x) 81 | 82 | # remove these hooks 83 | for h in hooks: 84 | h.remove() 85 | 86 | print("----------------------------------------------------------------") 87 | line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #") 88 | print(line_new) 89 | print("================================================================") 90 | total_params = 0 91 | total_output = 0 92 | trainable_params = 0 93 | for layer in summary: 94 | # input_shape, output_shape, trainable, nb_params 95 | line_new = "{:>20} {:>25} {:>15}".format( 96 | layer, 97 | str(summary[layer]["output_shape"]), 98 | "{0:,}".format(summary[layer]["nb_params"]), 99 | ) 100 | total_params += summary[layer]["nb_params"] 101 | total_output += np.prod(summary[layer]["output_shape"]) 102 | if "trainable" in summary[layer]: 103 | if summary[layer]["trainable"] == True: 104 | trainable_params += summary[layer]["nb_params"] 105 | print(line_new) 106 | 107 | # assume 4 bytes/number (float on cuda). 108 | # total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.)) 109 | total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients 110 | total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.)) 111 | # total_size = total_params_size + total_output_size + total_input_size 112 | 113 | print("================================================================") 114 | print("Total params: {0:,}".format(total_params)) 115 | print("Trainable params: {0:,}".format(trainable_params)) 116 | print("Non-trainable params: {0:,}".format(total_params - trainable_params)) 117 | print("----------------------------------------------------------------") 118 | # print("Input size (MB): %0.2f" % total_input_size) 119 | print("Forward/backward pass size (MB): %0.2f" % total_output_size) 120 | print("Params size (MB): %0.2f" % total_params_size) 121 | # print("Estimated Total Size (MB): %0.2f" % total_size) 122 | print("----------------------------------------------------------------") 123 | # return summary 124 | -------------------------------------------------------------------------------- /layers/Transformer_EncDec1.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | 5 | class ConvLayer(nn.Module): 6 | def __init__(self, c_in): 7 | super(ConvLayer, self).__init__() 8 | self.downConv = nn.Conv1d(in_channels=c_in, 9 | out_channels=c_in, 10 | kernel_size=3, 11 | padding=2, 12 | padding_mode='circular') 13 | self.norm = nn.BatchNorm1d(c_in) 14 | self.activation = nn.ELU() 15 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 16 | 17 | def forward(self, x): 18 | x = self.downConv(x.permute(0, 2, 1)) 19 | x = self.norm(x) 20 | x = self.activation(x) 21 | x = self.maxPool(x) 22 | x = x.transpose(1, 2) 23 | return x 24 | 25 | 26 | class EncoderLayer(nn.Module): 27 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 28 | super(EncoderLayer, self).__init__() 29 | d_ff = d_ff or 4 * d_model 30 | self.attention = attention 31 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 32 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 33 | self.norm1 = nn.LayerNorm(d_model) 34 | self.norm2 = nn.LayerNorm(d_model) 35 | self.dropout = nn.Dropout(dropout) 36 | self.activation = F.relu if activation == "relu" else F.gelu 37 | 38 | def forward(self, x, attn_mask=None, tau=None, delta=None): 39 | new_x, attn = self.attention( 40 | x, x, x, 41 | attn_mask=attn_mask, 42 | tau=tau, delta=delta 43 | ) 44 | x = x + self.dropout(new_x) 45 | 46 | y = x = self.norm1(x) 47 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 48 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 49 | 50 | return self.norm2(x + y), attn 51 | 52 | 53 | class Encoder(nn.Module): 54 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 55 | super(Encoder, self).__init__() 56 | self.attn_layers = nn.ModuleList(attn_layers) 57 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 58 | self.norm = norm_layer 59 | 60 | def forward(self, x, attn_mask=None, tau=None, delta=None): 61 | # x [B, L, D] 62 | attns = [] 63 | if self.conv_layers is not None: 64 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): 65 | delta = delta if i == 0 else None 66 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 67 | x = conv_layer(x) 68 | attns.append(attn) 69 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None) 70 | attns.append(attn) 71 | else: 72 | for attn_layer in self.attn_layers: 73 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 74 | attns.append(attn) 75 | 76 | if self.norm is not None: 77 | x = self.norm(x) 78 | 79 | return x, attns 80 | 81 | 82 | class DecoderLayer(nn.Module): 83 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 84 | dropout=0.1, activation="relu"): 85 | super(DecoderLayer, self).__init__() 86 | d_ff = d_ff or 4 * d_model 87 | self.self_attention = self_attention 88 | self.cross_attention = cross_attention 89 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 90 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 91 | self.norm1 = nn.LayerNorm(d_model) 92 | self.norm2 = nn.LayerNorm(d_model) 93 | self.norm3 = nn.LayerNorm(d_model) 94 | self.dropout = nn.Dropout(dropout) 95 | self.activation = F.relu if activation == "relu" else F.gelu 96 | 97 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 98 | x = x + self.dropout(self.self_attention( 99 | x, x, x, 100 | attn_mask=x_mask, 101 | tau=tau, delta=None 102 | )[0]) 103 | x = self.norm1(x) 104 | 105 | x = x + self.dropout(self.cross_attention( 106 | x, cross, cross, 107 | attn_mask=cross_mask, 108 | tau=tau, delta=delta 109 | )[0]) 110 | 111 | y = x = self.norm2(x) 112 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 113 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 114 | 115 | return self.norm3(x + y) 116 | 117 | 118 | class Decoder(nn.Module): 119 | def __init__(self, layers, norm_layer=None, projection=None): 120 | super(Decoder, self).__init__() 121 | self.layers = nn.ModuleList(layers) 122 | self.norm = norm_layer 123 | self.projection = projection 124 | 125 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 126 | for layer in self.layers: 127 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) 128 | 129 | if self.norm is not None: 130 | x = self.norm(x) 131 | 132 | if self.projection is not None: 133 | x = self.projection(x) 134 | return x 135 | -------------------------------------------------------------------------------- /layers/Transformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ConvLayer(nn.Module): 7 | def __init__(self, c_in): 8 | super(ConvLayer, self).__init__() 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=2, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1, 2) 24 | return x 25 | 26 | 27 | class EncoderLayer(nn.Module): 28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 29 | super(EncoderLayer, self).__init__() 30 | d_ff = d_ff or 4 * d_model 31 | self.attention = attention 32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 34 | self.norm1 = nn.LayerNorm(d_model) 35 | self.norm2 = nn.LayerNorm(d_model) 36 | self.dropout = nn.Dropout(dropout) 37 | self.activation = F.relu if activation == "relu" else F.gelu 38 | 39 | def forward(self, x, attn_mask=None, tau=None, delta=None): 40 | new_x, attn = self.attention( 41 | x, x, x, 42 | attn_mask=attn_mask, 43 | tau=tau, delta=delta 44 | ) 45 | x = x + self.dropout(new_x) 46 | 47 | y = x = self.norm1(x) 48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 49 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 50 | 51 | return self.norm2(x + y), attn 52 | 53 | 54 | class Encoder(nn.Module): 55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 56 | super(Encoder, self).__init__() 57 | self.attn_layers = nn.ModuleList(attn_layers) 58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 59 | self.norm = norm_layer 60 | 61 | def forward(self, x, attn_mask=None, tau=None, delta=None): 62 | # x [B, L, D] 63 | attns = [] 64 | if self.conv_layers is not None: 65 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)): 66 | delta = delta if i == 0 else None 67 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 68 | x = conv_layer(x) 69 | attns.append(attn) 70 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None) 71 | attns.append(attn) 72 | else: 73 | for attn_layer in self.attn_layers: 74 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta) 75 | attns.append(attn) 76 | 77 | if self.norm is not None: 78 | x = self.norm(x) 79 | 80 | return x, attns 81 | 82 | 83 | class DecoderLayer(nn.Module): 84 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 85 | dropout=0.1, activation="relu"): 86 | super(DecoderLayer, self).__init__() 87 | d_ff = d_ff or 4 * d_model 88 | self.self_attention = self_attention 89 | self.cross_attention = cross_attention 90 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 91 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 92 | self.norm1 = nn.LayerNorm(d_model) 93 | self.norm2 = nn.LayerNorm(d_model) 94 | self.norm3 = nn.LayerNorm(d_model) 95 | self.dropout = nn.Dropout(dropout) 96 | self.activation = F.relu if activation == "relu" else F.gelu 97 | 98 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 99 | x = x + self.dropout(self.self_attention( 100 | x, x, x, 101 | attn_mask=x_mask, 102 | tau=tau, delta=None 103 | )[0]) 104 | x = self.norm1(x) 105 | 106 | x = x + self.dropout(self.cross_attention( 107 | x, cross, cross, 108 | attn_mask=cross_mask, 109 | tau=tau, delta=delta 110 | )[0]) 111 | 112 | y = x = self.norm2(x) 113 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 114 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 115 | 116 | return self.norm3(x + y) 117 | 118 | 119 | class Decoder(nn.Module): 120 | def __init__(self, layers, norm_layer=None, projection=None): 121 | super(Decoder, self).__init__() 122 | self.layers = nn.ModuleList(layers) 123 | self.norm = norm_layer 124 | self.projection = projection 125 | 126 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None): 127 | for layer in self.layers: 128 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta) 129 | 130 | if self.norm is not None: 131 | x = self.norm(x) 132 | 133 | if self.projection is not None: 134 | x = self.projection(x) 135 | return x 136 | -------------------------------------------------------------------------------- /data_provider/uea.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import torch 5 | 6 | 7 | def collate_fn(data, max_len=None): 8 | """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create 9 | Args: 10 | data: len(batch_size) list of tuples (X, y). 11 | - X: torch tensor of shape (seq_length, feat_dim); variable seq_length. 12 | - y: torch tensor of shape (num_labels,) : class indices or numerical targets 13 | (for classification or regression, respectively). num_labels > 1 for multi-task models 14 | max_len: global fixed sequence length. Used for architectures requiring fixed length input, 15 | where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s 16 | Returns: 17 | X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input) 18 | targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output) 19 | target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor 20 | 0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values 21 | padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding 22 | """ 23 | 24 | batch_size = len(data) 25 | features, labels = zip(*data) 26 | 27 | # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension) 28 | lengths = [X.shape[0] for X in features] # original sequence length for each time series 29 | if max_len is None: 30 | max_len = max(lengths) 31 | X = torch.zeros(batch_size, max_len, features[0].shape[-1]) # (batch_size, padded_length, feat_dim) 32 | for i in range(batch_size): 33 | end = min(lengths[i], max_len) 34 | X[i, :end, :] = features[i][:end, :] 35 | 36 | targets = torch.stack(labels, dim=0) # (batch_size, num_labels) 37 | 38 | padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16), 39 | max_len=max_len) # (batch_size, padded_length) boolean tensor, "1" means keep 40 | 41 | return X, targets, padding_masks 42 | 43 | 44 | def padding_mask(lengths, max_len=None): 45 | """ 46 | Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths, 47 | where 1 means keep element at this position (time step) 48 | """ 49 | batch_size = lengths.numel() 50 | max_len = max_len or lengths.max_val() # trick works because of overloading of 'or' operator for non-boolean types 51 | return (torch.arange(0, max_len, device=lengths.device) 52 | .type_as(lengths) 53 | .repeat(batch_size, 1) 54 | .lt(lengths.unsqueeze(1))) 55 | 56 | 57 | class Normalizer(object): 58 | """ 59 | Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization. 60 | """ 61 | 62 | def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None): 63 | """ 64 | Args: 65 | norm_type: choose from: 66 | "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps) 67 | "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows) 68 | mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values 69 | """ 70 | 71 | self.norm_type = norm_type 72 | self.mean = mean 73 | self.std = std 74 | self.min_val = min_val 75 | self.max_val = max_val 76 | 77 | def normalize(self, df): 78 | """ 79 | Args: 80 | df: input dataframe 81 | Returns: 82 | df: normalized dataframe 83 | """ 84 | if self.norm_type == "standardization": 85 | if self.mean is None: 86 | self.mean = df.mean() 87 | self.std = df.std() 88 | return (df - self.mean) / (self.std + np.finfo(float).eps) 89 | 90 | elif self.norm_type == "minmax": 91 | if self.max_val is None: 92 | self.max_val = df.max() 93 | self.min_val = df.min() 94 | return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps) 95 | 96 | elif self.norm_type == "per_sample_std": 97 | grouped = df.groupby(by=df.index) 98 | return (df - grouped.transform('mean')) / grouped.transform('std') 99 | 100 | elif self.norm_type == "per_sample_minmax": 101 | grouped = df.groupby(by=df.index) 102 | min_vals = grouped.transform('min') 103 | return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps) 104 | 105 | else: 106 | raise (NameError(f'Normalize method "{self.norm_type}" not implemented')) 107 | 108 | 109 | def interpolate_missing(y): 110 | """ 111 | Replaces NaN values in pd.Series `y` using linear interpolation 112 | """ 113 | if y.isna().any(): 114 | y = y.interpolate(method='linear', limit_direction='both') 115 | return y 116 | 117 | 118 | def subsample(y, limit=256, factor=2): 119 | """ 120 | If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor 121 | """ 122 | if len(y) > limit: 123 | return y[::factor].reset_index(drop=True) 124 | return y 125 | -------------------------------------------------------------------------------- /layers/Embed1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import math 4 | 5 | 6 | class PositionalEmbedding(nn.Module): 7 | def __init__(self, d_model, max_len=5000): 8 | super(PositionalEmbedding, self).__init__() 9 | # Compute the positional encodings once in log space. 10 | pe = torch.zeros(max_len, d_model).float() 11 | pe.require_grad = False 12 | 13 | position = torch.arange(0, max_len).float().unsqueeze(1) 14 | div_term = (torch.arange(0, d_model, 2).float() 15 | * -(math.log(10000.0) / d_model)).exp() 16 | 17 | pe[:, 0::2] = torch.sin(position * div_term) 18 | pe[:, 1::2] = torch.cos(position * div_term) 19 | 20 | pe = pe.unsqueeze(0) 21 | self.register_buffer('pe', pe) 22 | 23 | def forward(self, x): 24 | return self.pe[:, :x.size(1)] 25 | 26 | 27 | class TokenEmbedding(nn.Module): 28 | def __init__(self, c_in, d_model): 29 | super(TokenEmbedding, self).__init__() 30 | padding = 1 if torch.__version__ >= '1.5.0' else 2 31 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 32 | kernel_size=3, padding=padding, padding_mode='circular', bias=False) 33 | for m in self.modules(): 34 | if isinstance(m, nn.Conv1d): 35 | nn.init.kaiming_normal_( 36 | m.weight, mode='fan_in', nonlinearity='leaky_relu') 37 | 38 | def forward(self, x): 39 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 40 | return x 41 | 42 | 43 | class FixedEmbedding(nn.Module): 44 | def __init__(self, c_in, d_model): 45 | super(FixedEmbedding, self).__init__() 46 | 47 | w = torch.zeros(c_in, d_model).float() 48 | w.require_grad = False 49 | 50 | position = torch.arange(0, c_in).float().unsqueeze(1) 51 | div_term = (torch.arange(0, d_model, 2).float() 52 | * -(math.log(10000.0) / d_model)).exp() 53 | 54 | w[:, 0::2] = torch.sin(position * div_term) 55 | w[:, 1::2] = torch.cos(position * div_term) 56 | 57 | self.emb = nn.Embedding(c_in, d_model) 58 | self.emb.weight = nn.Parameter(w, requires_grad=False) 59 | 60 | def forward(self, x): 61 | return self.emb(x).detach() 62 | 63 | 64 | class TemporalEmbedding(nn.Module): 65 | def __init__(self, d_model, embed_type='fixed', freq='h'): 66 | super(TemporalEmbedding, self).__init__() 67 | 68 | minute_size = 4 69 | hour_size = 24 70 | weekday_size = 7 71 | day_size = 32 72 | month_size = 13 73 | 74 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding 75 | if freq == 't': 76 | self.minute_embed = Embed(minute_size, d_model) 77 | self.hour_embed = Embed(hour_size, d_model) 78 | self.weekday_embed = Embed(weekday_size, d_model) 79 | self.day_embed = Embed(day_size, d_model) 80 | self.month_embed = Embed(month_size, d_model) 81 | 82 | def forward(self, x): 83 | x = x.long() 84 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr( 85 | self, 'minute_embed') else 0. 86 | hour_x = self.hour_embed(x[:, :, 3]) 87 | weekday_x = self.weekday_embed(x[:, :, 2]) 88 | day_x = self.day_embed(x[:, :, 1]) 89 | month_x = self.month_embed(x[:, :, 0]) 90 | 91 | return hour_x + weekday_x + day_x + month_x + minute_x 92 | 93 | 94 | class TimeFeatureEmbedding(nn.Module): 95 | def __init__(self, d_model, embed_type='timeF', freq='h'): 96 | super(TimeFeatureEmbedding, self).__init__() 97 | 98 | freq_map = {'h': 4, 't': 5, 's': 6, 99 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 100 | d_inp = freq_map[freq] 101 | self.embed = nn.Linear(d_inp, d_model, bias=False) 102 | 103 | def forward(self, x): 104 | return self.embed(x) 105 | 106 | 107 | class DataEmbedding(nn.Module): 108 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 109 | super(DataEmbedding, self).__init__() 110 | 111 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 112 | self.position_embedding = PositionalEmbedding(d_model=d_model) 113 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 114 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 115 | d_model=d_model, embed_type=embed_type, freq=freq) 116 | self.dropout = nn.Dropout(p=dropout) 117 | 118 | def forward(self, x, x_mark): 119 | if x_mark is None: 120 | x = self.value_embedding(x) + self.position_embedding(x) 121 | else: 122 | x = self.value_embedding( 123 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x) 124 | return self.dropout(x) 125 | 126 | 127 | class DataEmbedding_inverted(nn.Module): 128 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 129 | super(DataEmbedding_inverted, self).__init__() 130 | self.value_embedding = nn.Linear(c_in, d_model) 131 | self.dropout = nn.Dropout(p=dropout) 132 | 133 | def forward(self, x, x_mark): 134 | x = x.permute(0, 2, 1) 135 | # x: [Batch Variate Time] 136 | if x_mark is None: 137 | x = self.value_embedding(x) 138 | else: 139 | # the potential to take covariates (e.g. timestamps) as tokens 140 | x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) 141 | # x: [Batch Variate d_model] 142 | return self.dropout(x) 143 | 144 | -------------------------------------------------------------------------------- /utils/m4_summary.py: -------------------------------------------------------------------------------- 1 | # This source code is provided for the purposes of scientific reproducibility 2 | # under the following limited license from Element AI Inc. The code is an 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis 4 | # expansion analysis for interpretable time series forecasting, 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0 7 | # International license (CC BY-NC 4.0): 8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether 9 | # for the benefit of third parties or internally in production) requires an 10 | # explicit license. The subject-matter of the N-BEATS model and associated 11 | # materials are the property of Element AI Inc. and may be subject to patent 12 | # protection. No license to patents is granted hereunder (whether express or 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved. 14 | 15 | """ 16 | M4 Summary 17 | """ 18 | from collections import OrderedDict 19 | 20 | import numpy as np 21 | import pandas as pd 22 | 23 | from data_provider.m4 import M4Dataset 24 | from data_provider.m4 import M4Meta 25 | import os 26 | 27 | 28 | def group_values(values, groups, group_name): 29 | return np.array([v[~np.isnan(v)] for v in values[groups == group_name]]) 30 | 31 | 32 | def mase(forecast, insample, outsample, frequency): 33 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:])) 34 | 35 | 36 | def smape_2(forecast, target): 37 | denom = np.abs(target) + np.abs(forecast) 38 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 39 | denom[denom == 0.0] = 1.0 40 | return 200 * np.abs(forecast - target) / denom 41 | 42 | 43 | def mape(forecast, target): 44 | denom = np.abs(target) 45 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway. 46 | denom[denom == 0.0] = 1.0 47 | return 100 * np.abs(forecast - target) / denom 48 | 49 | 50 | class M4Summary: 51 | def __init__(self, file_path, root_path): 52 | self.file_path = file_path 53 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path) 54 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path) 55 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv') 56 | 57 | def evaluate(self): 58 | """ 59 | Evaluate forecasts using M4 test dataset. 60 | 61 | :param forecast: Forecasts. Shape: timeseries, time. 62 | :return: sMAPE and OWA grouped by seasonal patterns. 63 | """ 64 | grouped_owa = OrderedDict() 65 | 66 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32) 67 | naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts]) 68 | 69 | model_mases = {} 70 | naive2_smapes = {} 71 | naive2_mases = {} 72 | grouped_smapes = {} 73 | grouped_mapes = {} 74 | for group_name in M4Meta.seasonal_patterns: 75 | file_name = self.file_path + group_name + "_forecast.csv" 76 | if os.path.exists(file_name): 77 | model_forecast = pd.read_csv(file_name).values 78 | 79 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name) 80 | target = group_values(self.test_set.values, self.test_set.groups, group_name) 81 | # all timeseries within group have same frequency 82 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0] 83 | insample = group_values(self.training_set.values, self.test_set.groups, group_name) 84 | 85 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i], 86 | insample=insample[i], 87 | outsample=target[i], 88 | frequency=frequency) for i in range(len(model_forecast))]) 89 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i], 90 | insample=insample[i], 91 | outsample=target[i], 92 | frequency=frequency) for i in range(len(model_forecast))]) 93 | 94 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target)) 95 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target)) 96 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target)) 97 | 98 | grouped_smapes = self.summarize_groups(grouped_smapes) 99 | grouped_mapes = self.summarize_groups(grouped_mapes) 100 | grouped_model_mases = self.summarize_groups(model_mases) 101 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes) 102 | grouped_naive2_mases = self.summarize_groups(naive2_mases) 103 | for k in grouped_model_mases.keys(): 104 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] + 105 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2 106 | 107 | def round_all(d): 108 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items())) 109 | 110 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all( 111 | grouped_model_mases) 112 | 113 | def summarize_groups(self, scores): 114 | """ 115 | Re-group scores respecting M4 rules. 116 | :param scores: Scores per group. 117 | :return: Grouped scores. 118 | """ 119 | scores_summary = OrderedDict() 120 | 121 | def group_count(group_name): 122 | return len(np.where(self.test_set.groups == group_name)[0]) 123 | 124 | weighted_score = {} 125 | for g in ['Yearly', 'Quarterly', 'Monthly']: 126 | weighted_score[g] = scores[g] * group_count(g) 127 | scores_summary[g] = scores[g] 128 | 129 | others_score = 0 130 | others_count = 0 131 | for g in ['Weekly', 'Daily', 'Hourly']: 132 | others_score += scores[g] * group_count(g) 133 | others_count += group_count(g) 134 | weighted_score['Others'] = others_score 135 | scores_summary['Others'] = others_score / others_count 136 | 137 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups) 138 | scores_summary['Average'] = average 139 | 140 | return scores_summary 141 | -------------------------------------------------------------------------------- /layers/AutoCorrelation.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import math 7 | from math import sqrt 8 | import os 9 | 10 | 11 | class AutoCorrelation(nn.Module): 12 | """ 13 | AutoCorrelation Mechanism with the following two phases: 14 | (1) period-based dependencies discovery 15 | (2) time delay aggregation 16 | This block can replace the self-attention family mechanism seamlessly. 17 | """ 18 | 19 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False): 20 | super(AutoCorrelation, self).__init__() 21 | self.factor = factor 22 | self.scale = scale 23 | self.mask_flag = mask_flag 24 | self.output_attention = output_attention 25 | self.dropout = nn.Dropout(attention_dropout) 26 | 27 | def time_delay_agg_training(self, values, corr): 28 | """ 29 | SpeedUp version of Autocorrelation (a batch-normalization style design) 30 | This is for the training phase. 31 | """ 32 | head = values.shape[1] 33 | channel = values.shape[2] 34 | length = values.shape[3] 35 | # find top k 36 | top_k = int(self.factor * math.log(length)) 37 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 38 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1] 39 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1) 40 | # update corr 41 | tmp_corr = torch.softmax(weights, dim=-1) 42 | # aggregation 43 | tmp_values = values 44 | delays_agg = torch.zeros_like(values).float() 45 | for i in range(top_k): 46 | pattern = torch.roll(tmp_values, -int(index[i]), -1) 47 | delays_agg = delays_agg + pattern * \ 48 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 49 | return delays_agg 50 | 51 | def time_delay_agg_inference(self, values, corr): 52 | """ 53 | SpeedUp version of Autocorrelation (a batch-normalization style design) 54 | This is for the inference phase. 55 | """ 56 | batch = values.shape[0] 57 | head = values.shape[1] 58 | channel = values.shape[2] 59 | length = values.shape[3] 60 | # index init 61 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() 62 | # find top k 63 | top_k = int(self.factor * math.log(length)) 64 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1) 65 | weights, delay = torch.topk(mean_value, top_k, dim=-1) 66 | # update corr 67 | tmp_corr = torch.softmax(weights, dim=-1) 68 | # aggregation 69 | tmp_values = values.repeat(1, 1, 1, 2) 70 | delays_agg = torch.zeros_like(values).float() 71 | for i in range(top_k): 72 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length) 73 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 74 | delays_agg = delays_agg + pattern * \ 75 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)) 76 | return delays_agg 77 | 78 | def time_delay_agg_full(self, values, corr): 79 | """ 80 | Standard version of Autocorrelation 81 | """ 82 | batch = values.shape[0] 83 | head = values.shape[1] 84 | channel = values.shape[2] 85 | length = values.shape[3] 86 | # index init 87 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda() 88 | # find top k 89 | top_k = int(self.factor * math.log(length)) 90 | weights, delay = torch.topk(corr, top_k, dim=-1) 91 | # update corr 92 | tmp_corr = torch.softmax(weights, dim=-1) 93 | # aggregation 94 | tmp_values = values.repeat(1, 1, 1, 2) 95 | delays_agg = torch.zeros_like(values).float() 96 | for i in range(top_k): 97 | tmp_delay = init_index + delay[..., i].unsqueeze(-1) 98 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay) 99 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1)) 100 | return delays_agg 101 | 102 | def forward(self, queries, keys, values, attn_mask): 103 | B, L, H, E = queries.shape 104 | _, S, _, D = values.shape 105 | if L > S: 106 | zeros = torch.zeros_like(queries[:, :(L - S), :]).float() 107 | values = torch.cat([values, zeros], dim=1) 108 | keys = torch.cat([keys, zeros], dim=1) 109 | else: 110 | values = values[:, :L, :, :] 111 | keys = keys[:, :L, :, :] 112 | 113 | # period-based dependencies 114 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1) 115 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1) 116 | res = q_fft * torch.conj(k_fft) 117 | corr = torch.fft.irfft(res, dim=-1) 118 | 119 | # time delay agg 120 | if self.training: 121 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 122 | else: 123 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2) 124 | 125 | if self.output_attention: 126 | return (V.contiguous(), corr.permute(0, 3, 1, 2)) 127 | else: 128 | return (V.contiguous(), None) 129 | 130 | 131 | class AutoCorrelationLayer(nn.Module): 132 | def __init__(self, correlation, d_model, n_heads, d_keys=None, 133 | d_values=None): 134 | super(AutoCorrelationLayer, self).__init__() 135 | 136 | d_keys = d_keys or (d_model // n_heads) 137 | d_values = d_values or (d_model // n_heads) 138 | 139 | self.inner_correlation = correlation 140 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 141 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 142 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 143 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 144 | self.n_heads = n_heads 145 | 146 | def forward(self, queries, keys, values, attn_mask): 147 | B, L, _ = queries.shape 148 | _, S, _ = keys.shape 149 | H = self.n_heads 150 | 151 | queries = self.query_projection(queries).view(B, L, H, -1) 152 | keys = self.key_projection(keys).view(B, S, H, -1) 153 | values = self.value_projection(values).view(B, S, H, -1) 154 | 155 | out, attn = self.inner_correlation( 156 | queries, 157 | keys, 158 | values, 159 | attn_mask 160 | ) 161 | out = out.view(B, L, -1) 162 | 163 | return self.out_projection(out), attn 164 | -------------------------------------------------------------------------------- /layers/Autoformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class my_Layernorm(nn.Module): 7 | """ 8 | Special designed layernorm for the seasonal part 9 | """ 10 | 11 | def __init__(self, channels): 12 | super(my_Layernorm, self).__init__() 13 | self.layernorm = nn.LayerNorm(channels) 14 | 15 | def forward(self, x): 16 | x_hat = self.layernorm(x) 17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1) 18 | return x_hat - bias 19 | 20 | 21 | class moving_avg(nn.Module): 22 | """ 23 | Moving average block to highlight the trend of time series 24 | """ 25 | 26 | def __init__(self, kernel_size, stride): 27 | super(moving_avg, self).__init__() 28 | self.kernel_size = kernel_size 29 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) 30 | 31 | def forward(self, x): 32 | # padding on the both ends of time series 33 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) 34 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) 35 | x = torch.cat([front, x, end], dim=1) 36 | x = self.avg(x.permute(0, 2, 1)) 37 | x = x.permute(0, 2, 1) 38 | return x 39 | 40 | 41 | class series_decomp(nn.Module): 42 | """ 43 | Series decomposition block 44 | """ 45 | 46 | def __init__(self, kernel_size): 47 | super(series_decomp, self).__init__() 48 | self.moving_avg = moving_avg(kernel_size, stride=1) 49 | 50 | def forward(self, x): 51 | moving_mean = self.moving_avg(x) 52 | res = x - moving_mean 53 | return res, moving_mean 54 | 55 | 56 | class series_decomp_multi(nn.Module): 57 | """ 58 | Multiple Series decomposition block from FEDformer 59 | """ 60 | 61 | def __init__(self, kernel_size): 62 | super(series_decomp_multi, self).__init__() 63 | self.kernel_size = kernel_size 64 | self.series_decomp = [series_decomp(kernel) for kernel in kernel_size] 65 | 66 | def forward(self, x): 67 | moving_mean = [] 68 | res = [] 69 | for func in self.series_decomp: 70 | sea, moving_avg = func(x) 71 | moving_mean.append(moving_avg) 72 | res.append(sea) 73 | 74 | sea = sum(res) / len(res) 75 | moving_mean = sum(moving_mean) / len(moving_mean) 76 | return sea, moving_mean 77 | 78 | 79 | class EncoderLayer(nn.Module): 80 | """ 81 | Autoformer encoder layer with the progressive decomposition architecture 82 | """ 83 | 84 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"): 85 | super(EncoderLayer, self).__init__() 86 | d_ff = d_ff or 4 * d_model 87 | self.attention = attention 88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 90 | self.decomp1 = series_decomp(moving_avg) 91 | self.decomp2 = series_decomp(moving_avg) 92 | self.dropout = nn.Dropout(dropout) 93 | self.activation = F.relu if activation == "relu" else F.gelu 94 | 95 | def forward(self, x, attn_mask=None): 96 | new_x, attn = self.attention( 97 | x, x, x, 98 | attn_mask=attn_mask 99 | ) 100 | x = x + self.dropout(new_x) 101 | x, _ = self.decomp1(x) 102 | y = x 103 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 104 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 105 | res, _ = self.decomp2(x + y) 106 | return res, attn 107 | 108 | 109 | class Encoder(nn.Module): 110 | """ 111 | Autoformer encoder 112 | """ 113 | 114 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 115 | super(Encoder, self).__init__() 116 | self.attn_layers = nn.ModuleList(attn_layers) 117 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 118 | self.norm = norm_layer 119 | 120 | def forward(self, x, attn_mask=None): 121 | attns = [] 122 | if self.conv_layers is not None: 123 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 124 | x, attn = attn_layer(x, attn_mask=attn_mask) 125 | x = conv_layer(x) 126 | attns.append(attn) 127 | x, attn = self.attn_layers[-1](x) 128 | attns.append(attn) 129 | else: 130 | for attn_layer in self.attn_layers: 131 | x, attn = attn_layer(x, attn_mask=attn_mask) 132 | attns.append(attn) 133 | 134 | if self.norm is not None: 135 | x = self.norm(x) 136 | 137 | return x, attns 138 | 139 | 140 | class DecoderLayer(nn.Module): 141 | """ 142 | Autoformer decoder layer with the progressive decomposition architecture 143 | """ 144 | 145 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None, 146 | moving_avg=25, dropout=0.1, activation="relu"): 147 | super(DecoderLayer, self).__init__() 148 | d_ff = d_ff or 4 * d_model 149 | self.self_attention = self_attention 150 | self.cross_attention = cross_attention 151 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False) 152 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False) 153 | self.decomp1 = series_decomp(moving_avg) 154 | self.decomp2 = series_decomp(moving_avg) 155 | self.decomp3 = series_decomp(moving_avg) 156 | self.dropout = nn.Dropout(dropout) 157 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1, 158 | padding_mode='circular', bias=False) 159 | self.activation = F.relu if activation == "relu" else F.gelu 160 | 161 | def forward(self, x, cross, x_mask=None, cross_mask=None): 162 | x = x + self.dropout(self.self_attention( 163 | x, x, x, 164 | attn_mask=x_mask 165 | )[0]) 166 | x, trend1 = self.decomp1(x) 167 | x = x + self.dropout(self.cross_attention( 168 | x, cross, cross, 169 | attn_mask=cross_mask 170 | )[0]) 171 | x, trend2 = self.decomp2(x) 172 | y = x 173 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 174 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 175 | x, trend3 = self.decomp3(x + y) 176 | 177 | residual_trend = trend1 + trend2 + trend3 178 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2) 179 | return x, residual_trend 180 | 181 | 182 | class Decoder(nn.Module): 183 | """ 184 | Autoformer encoder 185 | """ 186 | 187 | def __init__(self, layers, norm_layer=None, projection=None): 188 | super(Decoder, self).__init__() 189 | self.layers = nn.ModuleList(layers) 190 | self.norm = norm_layer 191 | self.projection = projection 192 | 193 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None): 194 | for layer in self.layers: 195 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 196 | trend = trend + residual_trend 197 | 198 | if self.norm is not None: 199 | x = self.norm(x) 200 | 201 | if self.projection is not None: 202 | x = self.projection(x) 203 | return x, trend 204 | -------------------------------------------------------------------------------- /exp/exp_classification.py: -------------------------------------------------------------------------------- 1 | from torch.optim import lr_scheduler 2 | 3 | from data_provider.data_factory import data_provider 4 | from exp.exp_basic import Exp_Basic 5 | from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy 6 | import torch 7 | import torch.nn as nn 8 | from torch import optim 9 | import os 10 | import time 11 | import warnings 12 | import numpy as np 13 | import pdb 14 | 15 | warnings.filterwarnings('ignore') 16 | 17 | 18 | class Exp_Classification(Exp_Basic): 19 | def __init__(self, args): 20 | super(Exp_Classification, self).__init__(args) 21 | 22 | def _build_model(self): 23 | # model input depends on data 24 | train_data, train_loader = self._get_data(flag='TRAIN') 25 | test_data, test_loader = self._get_data(flag='TEST') 26 | self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len) 27 | self.args.pred_len = 0 28 | self.args.enc_in = train_data.feature_df.shape[1] 29 | self.args.num_class = len(train_data.class_names) 30 | # model init 31 | model = self.model_dict[self.args.model].Model(self.args).float() 32 | if self.args.use_multi_gpu and self.args.use_gpu: 33 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 34 | return model 35 | 36 | def _get_data(self, flag): 37 | data_set, data_loader = data_provider(self.args, flag) 38 | return data_set, data_loader 39 | 40 | def _select_optimizer(self): 41 | # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 42 | model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate) 43 | return model_optim 44 | 45 | def _select_criterion(self): 46 | criterion = nn.CrossEntropyLoss() 47 | return criterion 48 | 49 | def vali(self, vali_data, vali_loader, criterion): 50 | total_loss = [] 51 | preds = [] 52 | trues = [] 53 | self.model.eval() 54 | with torch.no_grad(): 55 | for i, (batch_x, label, padding_mask) in enumerate(vali_loader): 56 | batch_x = batch_x.float().to(self.device) 57 | padding_mask = padding_mask.float().to(self.device) 58 | label = label.to(self.device) 59 | 60 | outputs = self.model(batch_x, padding_mask, None, None) 61 | 62 | pred = outputs.detach() 63 | loss = criterion(pred, label.long().squeeze()) 64 | total_loss.append(loss.item()) 65 | 66 | preds.append(outputs.detach()) 67 | trues.append(label) 68 | 69 | total_loss = np.average(total_loss) 70 | 71 | preds = torch.cat(preds, 0) 72 | trues = torch.cat(trues, 0) 73 | probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample 74 | predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample 75 | trues = trues.flatten().cpu().numpy() 76 | accuracy = cal_accuracy(predictions, trues) 77 | 78 | self.model.train() 79 | return total_loss, accuracy 80 | 81 | def train(self, setting): 82 | train_data, train_loader = self._get_data(flag='TRAIN') 83 | vali_data, vali_loader = self._get_data(flag='TEST') 84 | test_data, test_loader = self._get_data(flag='TEST') 85 | 86 | path = os.path.join(self.args.checkpoints, setting) 87 | if not os.path.exists(path): 88 | os.makedirs(path) 89 | 90 | time_now = time.time() 91 | 92 | train_steps = len(train_loader) 93 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 94 | 95 | model_optim = self._select_optimizer() 96 | criterion = self._select_criterion() 97 | 98 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 99 | steps_per_epoch=train_steps, 100 | pct_start=self.args.pct_start, 101 | epochs=self.args.train_epochs, 102 | max_lr=self.args.learning_rate) 103 | 104 | for epoch in range(self.args.train_epochs): 105 | iter_count = 0 106 | train_loss = [] 107 | 108 | self.model.train() 109 | epoch_time = time.time() 110 | 111 | for i, (batch_x, label, padding_mask) in enumerate(train_loader): 112 | iter_count += 1 113 | model_optim.zero_grad() 114 | 115 | batch_x = batch_x.float().to(self.device) 116 | padding_mask = padding_mask.float().to(self.device) 117 | label = label.to(self.device) 118 | 119 | outputs = self.model(batch_x, padding_mask, None, None) 120 | loss = criterion(outputs, label.long().squeeze(-1)) 121 | train_loss.append(loss.item()) 122 | 123 | if (i + 1) % 100 == 0: 124 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 125 | speed = (time.time() - time_now) / iter_count 126 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 127 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 128 | iter_count = 0 129 | time_now = time.time() 130 | 131 | loss.backward() 132 | nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0) 133 | model_optim.step() 134 | 135 | # if self.args.lradj == 'TST': 136 | # adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) 137 | # scheduler.step() 138 | 139 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 140 | train_loss = np.average(train_loss) 141 | vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion) 142 | test_loss, test_accuracy = self.vali(test_data, test_loader, criterion) 143 | 144 | print( 145 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}" 146 | .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy)) 147 | early_stopping(-test_accuracy, self.model, path) 148 | if early_stopping.early_stop: 149 | print("Early stopping") 150 | break 151 | if (epoch + 1) % 5 == 0: 152 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) 153 | 154 | best_model_path = path + '/' + 'checkpoint.pth' 155 | self.model.load_state_dict(torch.load(best_model_path)) 156 | 157 | return self.model 158 | 159 | def test(self, setting, test=0): 160 | test_data, test_loader = self._get_data(flag='TEST') 161 | if test: 162 | print('loading model') 163 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 164 | 165 | preds = [] 166 | trues = [] 167 | folder_path = './test_results/' + setting + '/' 168 | if not os.path.exists(folder_path): 169 | os.makedirs(folder_path) 170 | 171 | self.model.eval() 172 | with torch.no_grad(): 173 | for i, (batch_x, label, padding_mask) in enumerate(test_loader): 174 | batch_x = batch_x.float().to(self.device) 175 | padding_mask = padding_mask.float().to(self.device) 176 | label = label.to(self.device) 177 | 178 | outputs = self.model(batch_x, padding_mask, None, None) 179 | 180 | preds.append(outputs.detach()) 181 | trues.append(label) 182 | 183 | preds = torch.cat(preds, 0) 184 | trues = torch.cat(trues, 0) 185 | print('test shape:', preds.shape, trues.shape) 186 | 187 | probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample 188 | predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample 189 | trues = trues.flatten().cpu().numpy() 190 | accuracy = cal_accuracy(predictions, trues) 191 | 192 | # result save 193 | folder_path = './results/' + setting + '/' 194 | if not os.path.exists(folder_path): 195 | os.makedirs(folder_path) 196 | 197 | print('accuracy:{}'.format(accuracy)) 198 | file_name='result_classification.txt' 199 | f = open(os.path.join(folder_path,file_name), 'a') 200 | f.write(setting + " \n") 201 | f.write('accuracy:{}'.format(accuracy)) 202 | f.write('\n') 203 | f.write('\n') 204 | f.close() 205 | return -------------------------------------------------------------------------------- /exp/exp_anomaly_detection.py: -------------------------------------------------------------------------------- 1 | from torch.optim import lr_scheduler 2 | 3 | from data_provider.data_factory import data_provider 4 | from exp.exp_basic import Exp_Basic 5 | from utils.tools import EarlyStopping, adjust_learning_rate, adjustment 6 | from sklearn.metrics import precision_recall_fscore_support 7 | from sklearn.metrics import accuracy_score 8 | import torch.multiprocessing 9 | 10 | torch.multiprocessing.set_sharing_strategy('file_system') 11 | import torch 12 | import torch.nn as nn 13 | from torch import optim 14 | import os 15 | import time 16 | import warnings 17 | import numpy as np 18 | 19 | warnings.filterwarnings('ignore') 20 | 21 | 22 | class Exp_Anomaly_Detection(Exp_Basic): 23 | def __init__(self, args): 24 | super(Exp_Anomaly_Detection, self).__init__(args) 25 | 26 | def _build_model(self): 27 | model = self.model_dict[self.args.model].Model(self.args).float() 28 | 29 | if self.args.use_multi_gpu and self.args.use_gpu: 30 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 31 | return model 32 | 33 | def _get_data(self, flag): 34 | data_set, data_loader = data_provider(self.args, flag) 35 | return data_set, data_loader 36 | 37 | def _select_optimizer(self): 38 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 39 | return model_optim 40 | 41 | def _select_criterion(self): 42 | criterion = nn.MSELoss() 43 | return criterion 44 | 45 | def vali(self, vali_data, vali_loader, criterion): 46 | total_loss = [] 47 | self.model.eval() 48 | with torch.no_grad(): 49 | for i, (batch_x, _) in enumerate(vali_loader): 50 | batch_x = batch_x.float().to(self.device) 51 | 52 | outputs = self.model(batch_x, None, None, None) 53 | 54 | f_dim = -1 if self.args.features == 'MS' else 0 55 | outputs = outputs[:, :, f_dim:] 56 | pred = outputs.detach() 57 | true = batch_x.detach() 58 | 59 | loss = criterion(pred, true) 60 | total_loss.append(loss.item()) 61 | total_loss = np.average(total_loss) 62 | self.model.train() 63 | return total_loss 64 | 65 | def train(self, setting): 66 | train_data, train_loader = self._get_data(flag='train') 67 | vali_data, vali_loader = self._get_data(flag='val') 68 | test_data, test_loader = self._get_data(flag='test') 69 | 70 | path = os.path.join(self.args.checkpoints, setting) 71 | if not os.path.exists(path): 72 | os.makedirs(path) 73 | 74 | time_now = time.time() 75 | 76 | train_steps = len(train_loader) 77 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 78 | 79 | model_optim = self._select_optimizer() 80 | criterion = self._select_criterion() 81 | 82 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 83 | steps_per_epoch=train_steps, 84 | pct_start=self.args.pct_start, 85 | epochs=self.args.train_epochs, 86 | max_lr=self.args.learning_rate) 87 | 88 | for epoch in range(self.args.train_epochs): 89 | iter_count = 0 90 | train_loss = [] 91 | 92 | self.model.train() 93 | epoch_time = time.time() 94 | for i, (batch_x, batch_y) in enumerate(train_loader): 95 | iter_count += 1 96 | model_optim.zero_grad() 97 | 98 | batch_x = batch_x.float().to(self.device) 99 | 100 | outputs = self.model(batch_x, None, None, None) 101 | 102 | f_dim = -1 if self.args.features == 'MS' else 0 103 | outputs = outputs[:, :, f_dim:] 104 | loss = criterion(outputs, batch_x) 105 | train_loss.append(loss.item()) 106 | 107 | if (i + 1) % 100 == 0: 108 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 109 | speed = (time.time() - time_now) / iter_count 110 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 111 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 112 | iter_count = 0 113 | time_now = time.time() 114 | 115 | loss.backward() 116 | model_optim.step() 117 | 118 | if self.args.lradj == 'TST': 119 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) 120 | scheduler.step() 121 | 122 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 123 | train_loss = np.average(train_loss) 124 | vali_loss = self.vali(vali_data, vali_loader, criterion) 125 | test_loss = self.vali(test_data, test_loader, criterion) 126 | 127 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 128 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 129 | early_stopping(test_loss, self.model, path) 130 | if early_stopping.early_stop: 131 | print("Early stopping") 132 | break 133 | if self.args.lradj != 'TST': 134 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True) 135 | else: 136 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 137 | 138 | best_model_path = path + '/' + 'checkpoint.pth' 139 | self.model.load_state_dict(torch.load(best_model_path)) 140 | 141 | return self.model 142 | 143 | def test(self, setting, test=0): 144 | test_data, test_loader = self._get_data(flag='test') 145 | train_data, train_loader = self._get_data(flag='train') 146 | if test: 147 | print('loading model') 148 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 149 | 150 | attens_energy = [] 151 | folder_path = './test_results/' + setting + '/' 152 | if not os.path.exists(folder_path): 153 | os.makedirs(folder_path) 154 | 155 | self.model.eval() 156 | self.anomaly_criterion = nn.MSELoss(reduce=False) 157 | 158 | # (1) stastic on the train set 159 | with torch.no_grad(): 160 | for i, (batch_x, batch_y) in enumerate(train_loader): 161 | batch_x = batch_x.float().to(self.device) 162 | # reconstruction 163 | outputs = self.model(batch_x, None, None, None) 164 | # criterion 165 | score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) 166 | score = score.detach().cpu().numpy() 167 | attens_energy.append(score) 168 | 169 | attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) 170 | train_energy = np.array(attens_energy) 171 | 172 | # (2) find the threshold 173 | attens_energy = [] 174 | test_labels = [] 175 | for i, (batch_x, batch_y) in enumerate(test_loader): 176 | batch_x = batch_x.float().to(self.device) 177 | # reconstruction 178 | outputs = self.model(batch_x, None, None, None) 179 | # criterion 180 | score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1) 181 | score = score.detach().cpu().numpy() 182 | attens_energy.append(score) 183 | test_labels.append(batch_y) 184 | 185 | attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1) 186 | test_energy = np.array(attens_energy) 187 | combined_energy = np.concatenate([train_energy, test_energy], axis=0) 188 | threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio) 189 | print("Threshold :", threshold) 190 | 191 | # (3) evaluation on the test set 192 | pred = (test_energy > threshold).astype(int) 193 | test_labels = np.concatenate(test_labels, axis=0).reshape(-1) 194 | test_labels = np.array(test_labels) 195 | gt = test_labels.astype(int) 196 | 197 | print("pred: ", pred.shape) 198 | print("gt: ", gt.shape) 199 | 200 | # (4) detection adjustment 201 | gt, pred = adjustment(gt, pred) 202 | 203 | pred = np.array(pred) 204 | gt = np.array(gt) 205 | print("pred: ", pred.shape) 206 | print("gt: ", gt.shape) 207 | 208 | accuracy = accuracy_score(gt, pred) 209 | precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary') 210 | print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format( 211 | accuracy, precision, 212 | recall, f_score)) 213 | 214 | f = open("result_anomaly_detection.txt", 'a') 215 | f.write(setting + " \n") 216 | f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format( 217 | accuracy, precision, 218 | recall, f_score)) 219 | f.write('\n') 220 | f.write('\n') 221 | f.close() 222 | return 223 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | 4 | from exp.exp_anomaly_detection import Exp_Anomaly_Detection 5 | from exp.exp_classification import Exp_Classification 6 | from exp.exp_imputation import Exp_Imputation 7 | from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast 8 | from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast 9 | import random 10 | import numpy as np 11 | 12 | fix_seed = 2021 13 | random.seed(fix_seed) 14 | torch.manual_seed(fix_seed) 15 | np.random.seed(fix_seed) 16 | 17 | parser = argparse.ArgumentParser(description='LLMMixer') 18 | 19 | # basic config 20 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast', 21 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]') 22 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status') 23 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id') 24 | parser.add_argument('--model', type=str, required=True, default='LLMMixer', 25 | help='model name, options: [LLMMixer]') 26 | 27 | # data loader 28 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type') 29 | parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file') 30 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file') 31 | parser.add_argument('--features', type=str, default='M', 32 | help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate') 33 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task') 34 | parser.add_argument('--freq', type=str, default='h', 35 | help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h') 36 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints') 37 | 38 | # forecasting task 39 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length') 40 | parser.add_argument('--label_len', type=int, default=48, help='start token length') 41 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length') 42 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4') 43 | parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False) 44 | 45 | # model define 46 | parser.add_argument('--llm_path', type=str, default='FacebookAI/roberta-base', help='path of pretrained LLM') 47 | parser.add_argument('--tokenizer_path', type=str, default='FacebookAI/roberta-base', help='path of tokenizer') 48 | parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock') 49 | parser.add_argument('--num_kernels', type=int, default=6, help='for Inception') 50 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size') 51 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size') 52 | parser.add_argument('--c_out', type=int, default=7, help='output size') 53 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model') 54 | parser.add_argument('--n_heads', type=int, default=4, help='num of heads') 55 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers') 56 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers') 57 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn') 58 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average') 59 | parser.add_argument('--factor', type=int, default=1, help='attn factor') 60 | parser.add_argument('--distil', action='store_false', 61 | help='whether to use distilling in encoder, using this argument means not using distilling', 62 | default=True) 63 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout') 64 | parser.add_argument('--embed', type=str, default='timeF', 65 | help='time features encoding, options:[timeF, fixed, learned]') 66 | parser.add_argument('--activation', type=str, default='gelu', help='activation') 67 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder') 68 | parser.add_argument('--channel_independence', type=int, default=1, 69 | help='0: channel dependence 1: channel independence for FreTS model') 70 | parser.add_argument('--decomp_method', type=str, default='moving_avg', 71 | help='method of series decompsition, only support moving_avg or dft_decomp') 72 | parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0') 73 | parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers') 74 | parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size') 75 | parser.add_argument('--down_sampling_method', type=str, default='avg', 76 | help='down sampling method, only support avg, max, conv') 77 | parser.add_argument('--use_future_temporal_feature', type=int, default=0, 78 | help='whether to use future_temporal_feature; True 1 False 0') 79 | 80 | # imputation task 81 | parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio') 82 | 83 | # anomaly detection task 84 | parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)') 85 | 86 | # optimization 87 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers') 88 | parser.add_argument('--itr', type=int, default=1, help='experiments times') 89 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs') 90 | parser.add_argument('--batch_size', type=int, default=16, help='batch size of train input data') 91 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience') 92 | parser.add_argument('--learning_rate', type=float, default=0.001, help='optimizer learning rate') 93 | parser.add_argument('--des', type=str, default='test', help='exp description') 94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function') 95 | parser.add_argument('--lradj', type=str, default='TST', help='adjust learning rate') 96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start') 97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False) 98 | parser.add_argument('--comment', type=str, default='none', help='com') 99 | 100 | # GPU 101 | parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu') 102 | parser.add_argument('--gpu', type=int, default=0, help='gpu') 103 | parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False) 104 | parser.add_argument('--devices', type=str, default='0,1', help='device ids of multile gpus') 105 | 106 | # de-stationary projector params 107 | parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128], 108 | help='hidden layer dimensions of projector (List)') 109 | parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector') 110 | 111 | args = parser.parse_args() 112 | args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False 113 | 114 | if args.use_gpu and args.use_multi_gpu: 115 | args.devices = args.devices.replace(' ', '') 116 | device_ids = args.devices.split(',') 117 | args.device_ids = [int(id_) for id_ in device_ids] 118 | args.gpu = args.device_ids[0] 119 | 120 | print('Args in experiment:') 121 | print(args) 122 | 123 | if args.task_name == 'long_term_forecast': 124 | Exp = Exp_Long_Term_Forecast 125 | elif args.task_name == 'short_term_forecast': 126 | Exp = Exp_Short_Term_Forecast 127 | else: 128 | Exp = Exp_Long_Term_Forecast 129 | 130 | if args.is_training: 131 | for ii in range(args.itr): 132 | # setting record of experiments 133 | setting = '{}_{}_{}_{}_{}_sl{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( 134 | args.task_name, 135 | args.model_id, 136 | args.comment, 137 | args.model, 138 | args.data, 139 | args.seq_len, 140 | args.pred_len, 141 | args.d_model, 142 | args.n_heads, 143 | args.e_layers, 144 | args.d_layers, 145 | args.d_ff, 146 | args.factor, 147 | args.embed, 148 | args.distil, 149 | args.des, ii) 150 | 151 | exp = Exp(args) # set experiments 152 | print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting)) 153 | exp.train(setting) 154 | 155 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) 156 | exp.test(setting) 157 | torch.cuda.empty_cache() 158 | else: 159 | ii = 0 160 | setting = '{}_{}_{}_{}_{}_sl{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format( 161 | args.task_name, 162 | args.model_id, 163 | args.comment, 164 | args.model, 165 | args.data, 166 | args.seq_len, 167 | args.pred_len, 168 | args.d_model, 169 | args.n_heads, 170 | args.e_layers, 171 | args.d_layers, 172 | args.d_ff, 173 | args.factor, 174 | args.embed, 175 | args.distil, 176 | args.des, ii) 177 | 178 | exp = Exp(args) # set experiments 179 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting)) 180 | exp.test(setting, test=1) 181 | torch.cuda.empty_cache() 182 | -------------------------------------------------------------------------------- /layers/Embed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from torch.nn.utils import weight_norm 5 | import math 6 | 7 | 8 | class PositionalEmbedding(nn.Module): 9 | def __init__(self, d_model, max_len=5000): 10 | super(PositionalEmbedding, self).__init__() 11 | # Compute the positional encodings once in log space. 12 | pe = torch.zeros(max_len, d_model).float() 13 | pe.require_grad = False 14 | 15 | position = torch.arange(0, max_len).float().unsqueeze(1) 16 | div_term = (torch.arange(0, d_model, 2).float() 17 | * -(math.log(10000.0) / d_model)).exp() 18 | 19 | pe[:, 0::2] = torch.sin(position * div_term) 20 | pe[:, 1::2] = torch.cos(position * div_term) 21 | 22 | pe = pe.unsqueeze(0) 23 | self.register_buffer('pe', pe) 24 | 25 | def forward(self, x): 26 | return self.pe[:, :x.size(1)] 27 | 28 | 29 | class TokenEmbedding(nn.Module): 30 | def __init__(self, c_in, d_model): 31 | super(TokenEmbedding, self).__init__() 32 | padding = 1 if torch.__version__ >= '1.5.0' else 2 33 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 34 | kernel_size=3, padding=padding, padding_mode='circular', bias=False) 35 | for m in self.modules(): 36 | if isinstance(m, nn.Conv1d): 37 | nn.init.kaiming_normal_( 38 | m.weight, mode='fan_in', nonlinearity='leaky_relu') 39 | 40 | def forward(self, x): 41 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2) 42 | return x 43 | 44 | 45 | class FixedEmbedding(nn.Module): 46 | def __init__(self, c_in, d_model): 47 | super(FixedEmbedding, self).__init__() 48 | 49 | w = torch.zeros(c_in, d_model).float() 50 | w.require_grad = False 51 | 52 | position = torch.arange(0, c_in).float().unsqueeze(1) 53 | div_term = (torch.arange(0, d_model, 2).float() 54 | * -(math.log(10000.0) / d_model)).exp() 55 | 56 | w[:, 0::2] = torch.sin(position * div_term) 57 | w[:, 1::2] = torch.cos(position * div_term) 58 | 59 | self.emb = nn.Embedding(c_in, d_model) 60 | self.emb.weight = nn.Parameter(w, requires_grad=False) 61 | 62 | def forward(self, x): 63 | return self.emb(x).detach() 64 | 65 | 66 | class TemporalEmbedding(nn.Module): 67 | def __init__(self, d_model, embed_type='fixed', freq='h'): 68 | super(TemporalEmbedding, self).__init__() 69 | 70 | minute_size = 4 71 | hour_size = 24 72 | weekday_size = 7 73 | day_size = 32 74 | month_size = 13 75 | 76 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding 77 | if freq == 't': 78 | self.minute_embed = Embed(minute_size, d_model) 79 | self.hour_embed = Embed(hour_size, d_model) 80 | self.weekday_embed = Embed(weekday_size, d_model) 81 | self.day_embed = Embed(day_size, d_model) 82 | self.month_embed = Embed(month_size, d_model) 83 | 84 | def forward(self, x): 85 | x = x.long() 86 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr( 87 | self, 'minute_embed') else 0. 88 | hour_x = self.hour_embed(x[:, :, 3]) 89 | weekday_x = self.weekday_embed(x[:, :, 2]) 90 | day_x = self.day_embed(x[:, :, 1]) 91 | month_x = self.month_embed(x[:, :, 0]) 92 | 93 | return hour_x + weekday_x + day_x + month_x + minute_x 94 | 95 | 96 | class TimeFeatureEmbedding(nn.Module): 97 | def __init__(self, d_model, embed_type='timeF', freq='h'): 98 | super(TimeFeatureEmbedding, self).__init__() 99 | 100 | freq_map = {'h': 4, 't': 5, 's': 6, 101 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3} 102 | d_inp = freq_map[freq] 103 | self.embed = nn.Linear(d_inp, d_model, bias=False) 104 | 105 | def forward(self, x): 106 | return self.embed(x) 107 | 108 | 109 | class DataEmbedding(nn.Module): 110 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 111 | super(DataEmbedding, self).__init__() 112 | self.c_in = c_in 113 | self.d_model = d_model 114 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 115 | self.position_embedding = PositionalEmbedding(d_model=d_model) 116 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 117 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 118 | d_model=d_model, embed_type=embed_type, freq=freq) 119 | self.dropout = nn.Dropout(p=dropout) 120 | 121 | def forward(self, x, x_mark): 122 | _, _, N = x.size() 123 | if N == self.c_in: 124 | if x_mark is None: 125 | x = self.value_embedding(x) + self.position_embedding(x) 126 | else: 127 | x = self.value_embedding( 128 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x) 129 | elif N == self.d_model: 130 | if x_mark is None: 131 | x = x + self.position_embedding(x) 132 | else: 133 | x = x + self.temporal_embedding(x_mark) + self.position_embedding(x) 134 | 135 | return self.dropout(x) 136 | 137 | 138 | class DataEmbedding_ms(nn.Module): 139 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 140 | super(DataEmbedding_ms, self).__init__() 141 | 142 | self.value_embedding = TokenEmbedding(c_in=1, d_model=d_model) 143 | self.position_embedding = PositionalEmbedding(d_model=d_model) 144 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 145 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 146 | d_model=d_model, embed_type=embed_type, freq=freq) 147 | self.dropout = nn.Dropout(p=dropout) 148 | 149 | def forward(self, x, x_mark): 150 | B, T, N = x.shape 151 | x1 = self.value_embedding(x.reshape(0, 2, 1).reshape(B * N, T).unsqueeze(-1)).reshape(B, N, T, -1).permute(0, 2, 152 | 1, 3) 153 | if x_mark is None: 154 | x = x1 155 | else: 156 | x = x1 + self.temporal_embedding(x_mark) 157 | return self.dropout(x) 158 | 159 | 160 | class DataEmbedding_wo_pos(nn.Module): 161 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 162 | super(DataEmbedding_wo_pos, self).__init__() 163 | 164 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 165 | self.position_embedding = PositionalEmbedding(d_model=d_model) 166 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, 167 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding( 168 | d_model=d_model, embed_type=embed_type, freq=freq) 169 | self.dropout = nn.Dropout(p=dropout) 170 | 171 | def forward(self, x, x_mark): 172 | if x is None and x_mark is not None: 173 | return self.temporal_embedding(x_mark) 174 | if x_mark is None: 175 | x = self.value_embedding(x) 176 | else: 177 | x = self.value_embedding(x) + self.temporal_embedding(x_mark) 178 | return self.dropout(x) 179 | 180 | 181 | class PatchEmbedding_crossformer(nn.Module): 182 | def __init__(self, d_model, patch_len, stride, padding, dropout): 183 | super(PatchEmbedding_crossformer, self).__init__() 184 | # Patching 185 | self.patch_len = patch_len 186 | self.stride = stride 187 | self.padding_patch_layer = nn.ReplicationPad1d((0, padding)) 188 | 189 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space 190 | self.value_embedding = nn.Linear(patch_len, d_model, bias=False) 191 | 192 | # Positional embedding 193 | self.position_embedding = PositionalEmbedding(d_model) 194 | 195 | # Residual dropout 196 | self.dropout = nn.Dropout(dropout) 197 | 198 | def forward(self, x): 199 | # do patching 200 | n_vars = x.shape[1] 201 | x = self.padding_patch_layer(x) 202 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) 203 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) 204 | # Input encoding 205 | x = self.value_embedding(x) + self.position_embedding(x) 206 | return self.dropout(x), n_vars 207 | 208 | 209 | class PatchEmbedding(nn.Module): 210 | def __init__(self, d_model, patch_len, stride, dropout): 211 | super(PatchEmbedding, self).__init__() 212 | # Patching 213 | self.patch_len = patch_len 214 | self.stride = stride 215 | self.padding_patch_layer = nn.ReplicationPad1d((0, stride)) 216 | 217 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space 218 | self.value_embedding = TokenEmbedding(patch_len, d_model) 219 | 220 | # Positional embedding 221 | self.position_embedding = PositionalEmbedding(d_model) 222 | 223 | # Residual dropout 224 | self.dropout = nn.Dropout(dropout) 225 | 226 | def forward(self, x): 227 | # do patching 228 | n_vars = x.shape[1] 229 | x = self.padding_patch_layer(x) 230 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride) 231 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3])) 232 | # Input encoding 233 | x = self.value_embedding(x) + self.position_embedding(x) 234 | return self.dropout(x), n_vars 235 | -------------------------------------------------------------------------------- /exp/exp_imputation.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.optim import lr_scheduler 3 | 4 | from data_provider.data_factory import data_provider 5 | from exp.exp_basic import Exp_Basic 6 | from utils.tools import EarlyStopping, adjust_learning_rate, visual 7 | from utils.metrics import metric 8 | import torch 9 | import torch.nn as nn 10 | from torch import optim 11 | import os 12 | import time 13 | import warnings 14 | import numpy as np 15 | 16 | warnings.filterwarnings('ignore') 17 | 18 | 19 | class Exp_Imputation(Exp_Basic): 20 | def __init__(self, args): 21 | super(Exp_Imputation, self).__init__(args) 22 | 23 | def _build_model(self): 24 | model = self.model_dict[self.args.model].Model(self.args).float() 25 | 26 | if self.args.use_multi_gpu and self.args.use_gpu: 27 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 28 | return model 29 | 30 | def _get_data(self, flag): 31 | data_set, data_loader = data_provider(self.args, flag) 32 | return data_set, data_loader 33 | 34 | def _select_optimizer(self): 35 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 36 | return model_optim 37 | 38 | def _select_criterion(self): 39 | criterion = nn.MSELoss() 40 | return criterion 41 | 42 | def vali(self, vali_data, vali_loader, criterion): 43 | total_loss = [] 44 | self.model.eval() 45 | with torch.no_grad(): 46 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): 47 | batch_x = batch_x.float().to(self.device) 48 | batch_x_mark = batch_x_mark.float().to(self.device) 49 | 50 | # random mask 51 | B, T, N = batch_x.shape 52 | """ 53 | B = batch size 54 | T = seq len 55 | N = number of features 56 | """ 57 | mask = torch.rand((B, T, N)).to(self.device) 58 | mask[mask <= self.args.mask_rate] = 0 # masked 59 | mask[mask > self.args.mask_rate] = 1 # remained 60 | inp = batch_x.masked_fill(mask == 0, 0) 61 | 62 | outputs = self.model(inp, batch_x_mark, None, None, mask) 63 | 64 | f_dim = -1 if self.args.features == 'MS' else 0 65 | outputs = outputs[:, :, f_dim:] 66 | 67 | # add support for MS 68 | batch_x = batch_x[:, :, f_dim:] 69 | mask = mask[:, :, f_dim:] 70 | 71 | pred = outputs.detach() 72 | true = batch_x.detach() 73 | mask = mask.detach() 74 | 75 | loss = criterion(pred[mask == 0], true[mask == 0]) 76 | total_loss.append(loss.item()) 77 | total_loss = np.average(total_loss) 78 | self.model.train() 79 | return total_loss 80 | 81 | def train(self, setting): 82 | train_data, train_loader = self._get_data(flag='train') 83 | vali_data, vali_loader = self._get_data(flag='val') 84 | test_data, test_loader = self._get_data(flag='test') 85 | 86 | path = os.path.join(self.args.checkpoints, setting) 87 | if not os.path.exists(path): 88 | os.makedirs(path) 89 | 90 | time_now = time.time() 91 | 92 | train_steps = len(train_loader) 93 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 94 | 95 | model_optim = self._select_optimizer() 96 | criterion = self._select_criterion() 97 | 98 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 99 | steps_per_epoch=train_steps, 100 | pct_start=self.args.pct_start, 101 | epochs=self.args.train_epochs, 102 | max_lr=self.args.learning_rate) 103 | 104 | for epoch in range(self.args.train_epochs): 105 | iter_count = 0 106 | train_loss = [] 107 | 108 | self.model.train() 109 | epoch_time = time.time() 110 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): 111 | iter_count += 1 112 | model_optim.zero_grad() 113 | 114 | batch_x = batch_x.float().to(self.device) 115 | batch_x_mark = batch_x_mark.float().to(self.device) 116 | 117 | # random mask 118 | B, T, N = batch_x.shape 119 | mask = torch.rand((B, T, N)).to(self.device) 120 | mask[mask <= self.args.mask_rate] = 0 # masked 121 | mask[mask > self.args.mask_rate] = 1 # remained 122 | inp = batch_x.masked_fill(mask == 0, 0) 123 | 124 | outputs = self.model(inp, batch_x_mark, None, None, mask) 125 | 126 | f_dim = -1 if self.args.features == 'MS' else 0 127 | outputs = outputs[:, :, f_dim:] 128 | 129 | # add support for MS 130 | batch_x = batch_x[:, :, f_dim:] 131 | mask = mask[:, :, f_dim:] 132 | 133 | loss = criterion(outputs[mask == 0], batch_x[mask == 0]) 134 | train_loss.append(loss.item()) 135 | 136 | if (i + 1) % 100 == 0: 137 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 138 | speed = (time.time() - time_now) / iter_count 139 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 140 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 141 | iter_count = 0 142 | time_now = time.time() 143 | 144 | loss.backward() 145 | model_optim.step() 146 | 147 | if self.args.lradj == 'TST': 148 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) 149 | scheduler.step() 150 | 151 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 152 | train_loss = np.average(train_loss) 153 | vali_loss = self.vali(vali_data, vali_loader, criterion) 154 | test_loss = self.vali(test_data, test_loader, criterion) 155 | 156 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 157 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 158 | early_stopping(test_loss, self.model, path) 159 | if early_stopping.early_stop: 160 | print("Early stopping") 161 | break 162 | 163 | if self.args.lradj != 'TST': 164 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True) 165 | else: 166 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 167 | 168 | best_model_path = path + '/' + 'checkpoint.pth' 169 | self.model.load_state_dict(torch.load(best_model_path)) 170 | 171 | return self.model 172 | 173 | def test(self, setting, test=0): 174 | test_data, test_loader = self._get_data(flag='test') 175 | if test: 176 | print('loading model') 177 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 178 | 179 | preds = [] 180 | trues = [] 181 | masks = [] 182 | folder_path = './test_results/' + setting + '/' 183 | if not os.path.exists(folder_path): 184 | os.makedirs(folder_path) 185 | 186 | self.model.eval() 187 | with torch.no_grad(): 188 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): 189 | batch_x = batch_x.float().to(self.device) 190 | batch_x_mark = batch_x_mark.float().to(self.device) 191 | 192 | # random mask 193 | B, T, N = batch_x.shape 194 | mask = torch.rand((B, T, N)).to(self.device) 195 | mask[mask <= self.args.mask_rate] = 0 # masked 196 | mask[mask > self.args.mask_rate] = 1 # remained 197 | inp = batch_x.masked_fill(mask == 0, 0) 198 | 199 | # imputation 200 | outputs = self.model(inp, batch_x_mark, None, None, mask) 201 | 202 | # eval 203 | f_dim = -1 if self.args.features == 'MS' else 0 204 | outputs = outputs[:, :, f_dim:] 205 | 206 | # add support for MS 207 | batch_x = batch_x[:, :, f_dim:] 208 | mask = mask[:, :, f_dim:] 209 | 210 | outputs = outputs.detach().cpu().numpy() 211 | pred = outputs 212 | true = batch_x.detach().cpu().numpy() 213 | preds.append(pred) 214 | trues.append(true) 215 | masks.append(mask.detach().cpu()) 216 | 217 | if i % 20 == 0: 218 | filled = true[0, :, -1].copy() 219 | filled = filled * mask[0, :, -1].detach().cpu().numpy() + \ 220 | pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy()) 221 | visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf')) 222 | 223 | preds = np.concatenate(preds, 0) 224 | trues = np.concatenate(trues, 0) 225 | masks = np.concatenate(masks, 0) 226 | print('test shape:', preds.shape, trues.shape) 227 | 228 | # result save 229 | folder_path = './results/' + setting + '/' 230 | if not os.path.exists(folder_path): 231 | os.makedirs(folder_path) 232 | 233 | mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0]) 234 | print('mse:{}, mae:{}'.format(mse, mae)) 235 | f = open("result_imputation.txt", 'a') 236 | f.write(setting + " \n") 237 | f.write('mse:{}, mae:{}'.format(mse, mae)) 238 | f.write('\n') 239 | f.write('\n') 240 | f.close() 241 | 242 | np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) 243 | np.save(folder_path + 'pred.npy', preds) 244 | np.save(folder_path + 'true.npy', trues) 245 | return 246 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /data_provider/data_loader.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | import glob 5 | import re 6 | import torch 7 | from sktime.datasets import load_from_tsfile_to_dataframe 8 | from torch.utils.data import Dataset 9 | from sklearn.preprocessing import StandardScaler 10 | from utils.timefeatures import time_features 11 | from data_provider.m4 import M4Dataset, M4Meta 12 | from data_provider.uea import Normalizer, interpolate_missing 13 | import warnings 14 | 15 | warnings.filterwarnings('ignore') 16 | 17 | 18 | class Dataset_ETT_hour(Dataset): 19 | def __init__(self, root_path, flag='train', size=None, 20 | features='S', data_path='ETTh1.csv', 21 | target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): 22 | # size [seq_len, label_len, pred_len] 23 | # info 24 | if size == None: 25 | self.seq_len = 24 * 4 * 4 26 | self.label_len = 24 * 4 27 | self.pred_len = 24 * 4 28 | else: 29 | self.seq_len = size[0] 30 | self.label_len = size[1] 31 | self.pred_len = size[2] 32 | # init 33 | assert flag in ['train', 'test', 'val'] 34 | type_map = {'train': 0, 'val': 1, 'test': 2} 35 | self.set_type = type_map[flag] 36 | 37 | self.features = features 38 | self.target = target 39 | self.scale = scale 40 | self.timeenc = timeenc 41 | self.freq = freq 42 | 43 | self.root_path = root_path 44 | self.data_path = data_path 45 | self.__read_data__() 46 | 47 | def __read_data__(self): 48 | self.scaler = StandardScaler() 49 | df_raw = pd.read_csv(os.path.join(self.root_path, 50 | self.data_path)) 51 | 52 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len] 53 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24] 54 | border1 = border1s[self.set_type] 55 | border2 = border2s[self.set_type] 56 | 57 | if self.features == 'M' or self.features == 'MS': 58 | cols_data = df_raw.columns[1:] 59 | df_data = df_raw[cols_data] 60 | elif self.features == 'S': 61 | df_data = df_raw[[self.target]] 62 | 63 | if self.scale: 64 | train_data = df_data[border1s[0]:border2s[0]] 65 | self.scaler.fit(train_data.values) 66 | data = self.scaler.transform(df_data.values) 67 | else: 68 | data = df_data.values 69 | 70 | df_stamp = df_raw[['date']][border1:border2] 71 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 72 | if self.timeenc == 0: 73 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 74 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 75 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 76 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 77 | data_stamp = df_stamp.drop(['date'], 1).values 78 | elif self.timeenc == 1: 79 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 80 | data_stamp = data_stamp.transpose(1, 0) 81 | 82 | self.data_x = data[border1:border2] 83 | self.data_y = data[border1:border2] 84 | self.data_stamp = data_stamp 85 | 86 | def __getitem__(self, index): 87 | s_begin = index 88 | s_end = s_begin + self.seq_len 89 | r_begin = s_end - self.label_len 90 | r_end = r_begin + self.label_len + self.pred_len 91 | 92 | seq_x = self.data_x[s_begin:s_end] 93 | seq_y = self.data_y[r_begin:r_end] 94 | seq_x_mark = self.data_stamp[s_begin:s_end] 95 | seq_y_mark = self.data_stamp[r_begin:r_end] 96 | 97 | return seq_x, seq_y, seq_x_mark, seq_y_mark 98 | 99 | def __len__(self): 100 | return len(self.data_x) - self.seq_len - self.pred_len + 1 101 | 102 | def inverse_transform(self, data): 103 | return self.scaler.inverse_transform(data) 104 | 105 | 106 | class Dataset_ETT_minute(Dataset): 107 | def __init__(self, root_path, flag='train', size=None, 108 | features='S', data_path='ETTm1.csv', 109 | target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None): 110 | # size [seq_len, label_len, pred_len] 111 | # info 112 | if size == None: 113 | self.seq_len = 24 * 4 * 4 114 | self.label_len = 24 * 4 115 | self.pred_len = 24 * 4 116 | else: 117 | self.seq_len = size[0] 118 | self.label_len = size[1] 119 | self.pred_len = size[2] 120 | # init 121 | assert flag in ['train', 'test', 'val'] 122 | type_map = {'train': 0, 'val': 1, 'test': 2} 123 | self.set_type = type_map[flag] 124 | 125 | self.features = features 126 | self.target = target 127 | self.scale = scale 128 | self.timeenc = timeenc 129 | self.freq = freq 130 | 131 | self.root_path = root_path 132 | self.data_path = data_path 133 | self.__read_data__() 134 | 135 | def __read_data__(self): 136 | self.scaler = StandardScaler() 137 | df_raw = pd.read_csv(os.path.join(self.root_path, 138 | self.data_path)) 139 | 140 | border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len] 141 | border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4] 142 | border1 = border1s[self.set_type] 143 | border2 = border2s[self.set_type] 144 | 145 | if self.features == 'M' or self.features == 'MS': 146 | cols_data = df_raw.columns[1:] 147 | df_data = df_raw[cols_data] 148 | elif self.features == 'S': 149 | df_data = df_raw[[self.target]] 150 | 151 | if self.scale: 152 | train_data = df_data[border1s[0]:border2s[0]] 153 | self.scaler.fit(train_data.values) 154 | data = self.scaler.transform(df_data.values) 155 | else: 156 | data = df_data.values 157 | 158 | df_stamp = df_raw[['date']][border1:border2] 159 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 160 | if self.timeenc == 0: 161 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 162 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 163 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 164 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 165 | df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1) 166 | df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15) 167 | data_stamp = df_stamp.drop(['date'], 1).values 168 | elif self.timeenc == 1: 169 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 170 | data_stamp = data_stamp.transpose(1, 0) 171 | 172 | self.data_x = data[border1:border2] 173 | self.data_y = data[border1:border2] 174 | self.data_stamp = data_stamp 175 | 176 | def __getitem__(self, index): 177 | s_begin = index 178 | s_end = s_begin + self.seq_len 179 | r_begin = s_end - self.label_len 180 | r_end = r_begin + self.label_len + self.pred_len 181 | 182 | seq_x = self.data_x[s_begin:s_end] 183 | seq_y = self.data_y[r_begin:r_end] 184 | seq_x_mark = self.data_stamp[s_begin:s_end] 185 | seq_y_mark = self.data_stamp[r_begin:r_end] 186 | 187 | return seq_x, seq_y, seq_x_mark, seq_y_mark 188 | 189 | def __len__(self): 190 | return len(self.data_x) - self.seq_len - self.pred_len + 1 191 | 192 | def inverse_transform(self, data): 193 | return self.scaler.inverse_transform(data) 194 | 195 | 196 | class Dataset_Custom(Dataset): 197 | def __init__(self, root_path, flag='train', size=None, 198 | features='S', data_path='ETTh1.csv', 199 | target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None): 200 | # size [seq_len, label_len, pred_len] 201 | # info 202 | if size == None: 203 | self.seq_len = 24 * 4 * 4 204 | self.label_len = 24 * 4 205 | self.pred_len = 24 * 4 206 | else: 207 | self.seq_len = size[0] 208 | self.label_len = size[1] 209 | self.pred_len = size[2] 210 | # init 211 | assert flag in ['train', 'test', 'val'] 212 | type_map = {'train': 0, 'val': 1, 'test': 2} 213 | self.set_type = type_map[flag] 214 | 215 | self.features = features 216 | self.target = target 217 | self.scale = scale 218 | self.timeenc = timeenc 219 | self.freq = freq 220 | 221 | self.root_path = root_path 222 | self.data_path = data_path 223 | self.__read_data__() 224 | 225 | def __read_data__(self): 226 | self.scaler = StandardScaler() 227 | df_raw = pd.read_csv(os.path.join(self.root_path, 228 | self.data_path)) 229 | 230 | ''' 231 | df_raw.columns: ['date', ...(other features), target feature] 232 | ''' 233 | cols = list(df_raw.columns) 234 | cols.remove(self.target) 235 | cols.remove('date') 236 | df_raw = df_raw[['date'] + cols + [self.target]] 237 | num_train = int(len(df_raw) * 0.7) 238 | num_test = int(len(df_raw) * 0.2) 239 | num_vali = len(df_raw) - num_train - num_test 240 | border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len] 241 | border2s = [num_train, num_train + num_vali, len(df_raw)] 242 | border1 = border1s[self.set_type] 243 | border2 = border2s[self.set_type] 244 | 245 | if self.features == 'M' or self.features == 'MS': 246 | cols_data = df_raw.columns[1:] 247 | df_data = df_raw[cols_data] 248 | elif self.features == 'S': 249 | df_data = df_raw[[self.target]] 250 | 251 | if self.scale: 252 | train_data = df_data[border1s[0]:border2s[0]] 253 | self.scaler.fit(train_data.values) 254 | data = self.scaler.transform(df_data.values) 255 | else: 256 | data = df_data.values 257 | 258 | df_stamp = df_raw[['date']][border1:border2] 259 | df_stamp['date'] = pd.to_datetime(df_stamp.date) 260 | if self.timeenc == 0: 261 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1) 262 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1) 263 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1) 264 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1) 265 | data_stamp = df_stamp.drop(['date'], 1).values 266 | elif self.timeenc == 1: 267 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq) 268 | data_stamp = data_stamp.transpose(1, 0) 269 | 270 | self.data_x = data[border1:border2] 271 | self.data_y = data[border1:border2] 272 | self.data_stamp = data_stamp 273 | 274 | def __getitem__(self, index): 275 | s_begin = index 276 | s_end = s_begin + self.seq_len 277 | r_begin = s_end - self.label_len 278 | r_end = r_begin + self.label_len + self.pred_len 279 | 280 | seq_x = self.data_x[s_begin:s_end] 281 | seq_y = self.data_y[r_begin:r_end] 282 | seq_x_mark = self.data_stamp[s_begin:s_end] 283 | seq_y_mark = self.data_stamp[r_begin:r_end] 284 | 285 | return seq_x, seq_y, seq_x_mark, seq_y_mark 286 | 287 | def __len__(self): 288 | return len(self.data_x) - self.seq_len - self.pred_len + 1 289 | 290 | def inverse_transform(self, data): 291 | return self.scaler.inverse_transform(data) 292 | -------------------------------------------------------------------------------- /exp/exp_short_term_forecasting.py: -------------------------------------------------------------------------------- 1 | from torch.optim import lr_scheduler 2 | 3 | from data_provider.data_factory import data_provider 4 | from data_provider.m4 import M4Meta 5 | from exp.exp_basic import Exp_Basic 6 | from utils.tools import EarlyStopping, adjust_learning_rate, visual, save_to_csv 7 | from utils.losses import mape_loss, mase_loss, smape_loss 8 | from utils.m4_summary import M4Summary 9 | import torch 10 | import torch.nn as nn 11 | from torch import optim 12 | import os 13 | import time 14 | import warnings 15 | import numpy as np 16 | import pandas 17 | 18 | warnings.filterwarnings('ignore') 19 | 20 | 21 | class Exp_Short_Term_Forecast(Exp_Basic): 22 | def __init__(self, args): 23 | super(Exp_Short_Term_Forecast, self).__init__(args) 24 | 25 | def _build_model(self): 26 | if self.args.data == 'm4': 27 | self.args.pred_len = M4Meta.horizons_map[self.args.seasonal_patterns] # Up to M4 config 28 | self.args.seq_len = 2 * self.args.pred_len # input_len = 2*pred_len 29 | self.args.label_len = self.args.pred_len 30 | self.args.frequency_map = M4Meta.frequency_map[self.args.seasonal_patterns] 31 | model = self.model_dict[self.args.model].Model(self.args).float() 32 | 33 | if self.args.use_multi_gpu and self.args.use_gpu: 34 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 35 | return model 36 | 37 | def _get_data(self, flag): 38 | data_set, data_loader = data_provider(self.args, flag) 39 | return data_set, data_loader 40 | 41 | def _select_optimizer(self): 42 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 43 | return model_optim 44 | 45 | def _select_criterion(self, loss_name='MSE'): 46 | if loss_name == 'MSE': 47 | return nn.MSELoss() 48 | elif loss_name == 'MAPE': 49 | return mape_loss() 50 | elif loss_name == 'MASE': 51 | return mase_loss() 52 | elif loss_name == 'SMAPE': 53 | return smape_loss() 54 | 55 | def train(self, setting): 56 | train_data, train_loader = self._get_data(flag='train') 57 | vali_data, vali_loader = self._get_data(flag='val') 58 | 59 | path = os.path.join(self.args.checkpoints, setting) 60 | if not os.path.exists(path): 61 | os.makedirs(path) 62 | 63 | time_now = time.time() 64 | 65 | train_steps = len(train_loader) 66 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 67 | 68 | model_optim = self._select_optimizer() 69 | criterion = self._select_criterion(self.args.loss) 70 | 71 | scheduler = lr_scheduler.OneCycleLR(optimizer = model_optim, 72 | steps_per_epoch = train_steps, 73 | pct_start = self.args.pct_start, 74 | epochs = self.args.train_epochs, 75 | max_lr = self.args.learning_rate) 76 | 77 | for epoch in range(self.args.train_epochs): 78 | iter_count = 0 79 | train_loss = [] 80 | 81 | self.model.train() 82 | epoch_time = time.time() 83 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): 84 | iter_count += 1 85 | model_optim.zero_grad() 86 | 87 | batch_x = batch_x.float().to(self.device) 88 | batch_y = batch_y.float().to(self.device) 89 | 90 | batch_y_mark = batch_y_mark.float().to(self.device) 91 | 92 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() 93 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) 94 | 95 | outputs = self.model(batch_x, None, dec_inp, None) 96 | f_dim = -1 if self.args.features == 'MS' else 0 97 | outputs = outputs[:, -self.args.pred_len:, f_dim:] 98 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) 99 | 100 | batch_y_mark = batch_y_mark[:, -self.args.pred_len:, f_dim:].to(self.device) 101 | loss_value = criterion(batch_x, self.args.frequency_map, outputs, batch_y, batch_y_mark) 102 | # loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :])) 103 | loss = loss_value # + loss_sharpness * 1e-5 104 | train_loss.append(loss.item()) 105 | 106 | if (i + 1) % 100 == 0: 107 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 108 | speed = (time.time() - time_now) / iter_count 109 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 110 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 111 | iter_count = 0 112 | time_now = time.time() 113 | 114 | loss.backward() 115 | model_optim.step() 116 | 117 | if self.args.lradj == 'TST': 118 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) 119 | scheduler.step() 120 | 121 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 122 | train_loss = np.average(train_loss) 123 | vali_loss = self.vali(train_loader, vali_loader, criterion) 124 | test_loss = vali_loss 125 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 126 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 127 | early_stopping(vali_loss, self.model, path) 128 | if early_stopping.early_stop: 129 | print("Early stopping") 130 | break 131 | 132 | if self.args.lradj != 'TST': 133 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True) 134 | else: 135 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 136 | 137 | best_model_path = path + '/' + 'checkpoint.pth' 138 | self.model.load_state_dict(torch.load(best_model_path)) 139 | 140 | return self.model 141 | 142 | def vali(self, train_loader, vali_loader, criterion): 143 | x, _ = train_loader.dataset.last_insample_window() 144 | y = vali_loader.dataset.timeseries 145 | x = torch.tensor(x, dtype=torch.float32).to(self.device) 146 | x = x.unsqueeze(-1) 147 | 148 | self.model.eval() 149 | with torch.no_grad(): 150 | # decoder input 151 | B, _, C = x.shape 152 | dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device) 153 | dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float() 154 | 155 | # encoder - decoder 156 | outputs = torch.zeros((B, self.args.pred_len, C)).float() # .to(self.device) 157 | id_list = np.arange(0, B, 500) # validation set size 158 | id_list = np.append(id_list, B) 159 | for i in range(len(id_list) - 1): 160 | x_enc = x[id_list[i]:id_list[i + 1]] 161 | outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x_enc, None, 162 | dec_inp[id_list[i]:id_list[i + 1]], 163 | None).detach().cpu() 164 | f_dim = -1 if self.args.features == 'MS' else 0 165 | outputs = outputs[:, -self.args.pred_len:, f_dim:] 166 | pred = outputs 167 | true = torch.from_numpy(np.array(y)) 168 | batch_y_mark = torch.ones(true.shape) 169 | 170 | loss = criterion(x.detach().cpu()[:, :, 0], self.args.frequency_map, pred[:, :, 0], true, batch_y_mark) 171 | 172 | self.model.train() 173 | return loss 174 | 175 | def test(self, setting, test=0): 176 | _, train_loader = self._get_data(flag='train') 177 | _, test_loader = self._get_data(flag='test') 178 | x, _ = train_loader.dataset.last_insample_window() 179 | y = test_loader.dataset.timeseries 180 | x = torch.tensor(x, dtype=torch.float32).to(self.device) 181 | x = x.unsqueeze(-1) 182 | 183 | if test: 184 | print('loading model') 185 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 186 | 187 | folder_path = './test_results/' + setting + '/' 188 | if not os.path.exists(folder_path): 189 | os.makedirs(folder_path) 190 | 191 | self.model.eval() 192 | with torch.no_grad(): 193 | B, _, C = x.shape 194 | dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device) 195 | dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float() 196 | # encoder - decoder 197 | outputs = torch.zeros((B, self.args.pred_len, C)).float().to(self.device) 198 | id_list = np.arange(0, B, 1) 199 | id_list = np.append(id_list, B) 200 | for i in range(len(id_list) - 1): 201 | x_enc = x[id_list[i]:id_list[i + 1]] 202 | outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x_enc, None, 203 | dec_inp[id_list[i]:id_list[i + 1]], None) 204 | 205 | if id_list[i] % 1000 == 0: 206 | print(id_list[i]) 207 | 208 | f_dim = -1 if self.args.features == 'MS' else 0 209 | outputs = outputs[:, -self.args.pred_len:, f_dim:] 210 | outputs = outputs.detach().cpu().numpy() 211 | 212 | preds = outputs 213 | trues = y 214 | x = x.detach().cpu().numpy() 215 | 216 | for i in range(0, preds.shape[0], preds.shape[0] // 10): 217 | gt = np.concatenate((x[i, :, 0], trues[i]), axis=0) 218 | pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0) 219 | visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) 220 | save_to_csv(gt, pd, os.path.join(folder_path, str(i) + '.csv')) 221 | 222 | print('test shape:', preds.shape) 223 | 224 | # result save 225 | folder_path = './m4_results/' + self.args.model + '/' 226 | if not os.path.exists(folder_path): 227 | os.makedirs(folder_path) 228 | 229 | forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(self.args.pred_len)]) 230 | forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]] 231 | forecasts_df.index.name = 'id' 232 | forecasts_df.set_index(forecasts_df.columns[0], inplace=True) 233 | forecasts_df.to_csv(folder_path + self.args.seasonal_patterns + '_forecast.csv') 234 | 235 | print(self.args.model) 236 | file_path = './m4_results/' + self.args.model + '/' 237 | if 'Weekly_forecast.csv' in os.listdir(file_path) \ 238 | and 'Monthly_forecast.csv' in os.listdir(file_path) \ 239 | and 'Yearly_forecast.csv' in os.listdir(file_path) \ 240 | and 'Daily_forecast.csv' in os.listdir(file_path) \ 241 | and 'Hourly_forecast.csv' in os.listdir(file_path) \ 242 | and 'Quarterly_forecast.csv' in os.listdir(file_path): 243 | m4_summary = M4Summary(file_path, self.args.root_path) 244 | # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True) 245 | smape_results, owa_results, mape, mase = m4_summary.evaluate() 246 | print('smape:', smape_results) 247 | print('mape:', mape) 248 | print('mase:', mase) 249 | print('owa:', owa_results) 250 | else: 251 | print('After all 6 tasks are finished, you can calculate the averaged index') 252 | return 253 | 254 | -------------------------------------------------------------------------------- /layers/SelfAttention_Family.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from math import sqrt 5 | 6 | from einops import rearrange, repeat 7 | 8 | from utils.masking import TriangularCausalMask, ProbMask 9 | from reformer_pytorch import LSHSelfAttention 10 | 11 | 12 | class DSAttention(nn.Module): 13 | '''De-stationary Attention''' 14 | 15 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 16 | super(DSAttention, self).__init__() 17 | self.scale = scale 18 | self.mask_flag = mask_flag 19 | self.output_attention = output_attention 20 | self.dropout = nn.Dropout(attention_dropout) 21 | 22 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 23 | B, L, H, E = queries.shape 24 | _, S, _, D = values.shape 25 | scale = self.scale or 1. / sqrt(E) 26 | 27 | tau = 1.0 if tau is None else tau.unsqueeze( 28 | 1).unsqueeze(1) # B x 1 x 1 x 1 29 | delta = 0.0 if delta is None else delta.unsqueeze( 30 | 1).unsqueeze(1) # B x 1 x 1 x S 31 | 32 | # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors 33 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta 34 | 35 | if self.mask_flag: 36 | if attn_mask is None: 37 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 38 | 39 | scores.masked_fill_(attn_mask.mask, -np.inf) 40 | 41 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 42 | V = torch.einsum("bhls,bshd->blhd", A, values) 43 | 44 | if self.output_attention: 45 | return (V.contiguous(), A) 46 | else: 47 | return (V.contiguous(), None) 48 | 49 | 50 | class FullAttention(nn.Module): 51 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 52 | super(FullAttention, self).__init__() 53 | self.scale = scale 54 | self.mask_flag = mask_flag 55 | self.output_attention = output_attention 56 | self.dropout = nn.Dropout(attention_dropout) 57 | 58 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 59 | B, L, H, E = queries.shape 60 | _, S, _, D = values.shape 61 | scale = self.scale or 1. / sqrt(E) 62 | 63 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 64 | 65 | if self.mask_flag: 66 | if attn_mask is None: 67 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 68 | 69 | scores.masked_fill_(attn_mask.mask, -np.inf) 70 | 71 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 72 | V = torch.einsum("bhls,bshd->blhd", A, values) 73 | 74 | if self.output_attention: 75 | return (V.contiguous(), A) 76 | else: 77 | return (V.contiguous(), None) 78 | 79 | 80 | class ProbAttention(nn.Module): 81 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 82 | super(ProbAttention, self).__init__() 83 | self.factor = factor 84 | self.scale = scale 85 | self.mask_flag = mask_flag 86 | self.output_attention = output_attention 87 | self.dropout = nn.Dropout(attention_dropout) 88 | 89 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 90 | # Q [B, H, L, D] 91 | B, H, L_K, E = K.shape 92 | _, _, L_Q, _ = Q.shape 93 | 94 | # calculate the sampled Q_K 95 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 96 | # real U = U_part(factor*ln(L_k))*L_q 97 | index_sample = torch.randint(L_K, (L_Q, sample_k)) 98 | K_sample = K_expand[:, :, torch.arange( 99 | L_Q).unsqueeze(1), index_sample, :] 100 | Q_K_sample = torch.matmul( 101 | Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 102 | 103 | # find the Top_k query with sparisty measurement 104 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 105 | M_top = M.topk(n_top, sorted=False)[1] 106 | 107 | # use the reduced Q to calculate Q_K 108 | Q_reduce = Q[torch.arange(B)[:, None, None], 109 | torch.arange(H)[None, :, None], 110 | M_top, :] # factor*ln(L_q) 111 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 112 | 113 | return Q_K, M_top 114 | 115 | def _get_initial_context(self, V, L_Q): 116 | B, H, L_V, D = V.shape 117 | if not self.mask_flag: 118 | # V_sum = V.sum(dim=-2) 119 | V_sum = V.mean(dim=-2) 120 | contex = V_sum.unsqueeze(-2).expand(B, H, 121 | L_Q, V_sum.shape[-1]).clone() 122 | else: # use mask 123 | # requires that L_Q == L_V, i.e. for self-attention only 124 | assert (L_Q == L_V) 125 | contex = V.cumsum(dim=-2) 126 | return contex 127 | 128 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 129 | B, H, L_V, D = V.shape 130 | 131 | if self.mask_flag: 132 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 133 | scores.masked_fill_(attn_mask.mask, -np.inf) 134 | 135 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 136 | 137 | context_in[torch.arange(B)[:, None, None], 138 | torch.arange(H)[None, :, None], 139 | index, :] = torch.matmul(attn, V).type_as(context_in) 140 | if self.output_attention: 141 | attns = (torch.ones([B, H, L_V, L_V]) / 142 | L_V).type_as(attn).to(attn.device) 143 | attns[torch.arange(B)[:, None, None], torch.arange(H)[ 144 | None, :, None], index, :] = attn 145 | return (context_in, attns) 146 | else: 147 | return (context_in, None) 148 | 149 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 150 | B, L_Q, H, D = queries.shape 151 | _, L_K, _, _ = keys.shape 152 | 153 | queries = queries.transpose(2, 1) 154 | keys = keys.transpose(2, 1) 155 | values = values.transpose(2, 1) 156 | 157 | U_part = self.factor * \ 158 | np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 159 | u = self.factor * \ 160 | np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 161 | 162 | U_part = U_part if U_part < L_K else L_K 163 | u = u if u < L_Q else L_Q 164 | 165 | scores_top, index = self._prob_QK( 166 | queries, keys, sample_k=U_part, n_top=u) 167 | 168 | # add scale factor 169 | scale = self.scale or 1. / sqrt(D) 170 | if scale is not None: 171 | scores_top = scores_top * scale 172 | # get the context 173 | context = self._get_initial_context(values, L_Q) 174 | # update the context with selected top_k queries 175 | context, attn = self._update_context( 176 | context, values, scores_top, index, L_Q, attn_mask) 177 | 178 | return context.contiguous(), attn 179 | 180 | 181 | class AttentionLayer(nn.Module): 182 | def __init__(self, attention, d_model, n_heads, d_keys=None, 183 | d_values=None): 184 | super(AttentionLayer, self).__init__() 185 | 186 | d_keys = d_keys or (d_model // n_heads) 187 | d_values = d_values or (d_model // n_heads) 188 | 189 | self.inner_attention = attention 190 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 191 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 192 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 193 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 194 | self.n_heads = n_heads 195 | 196 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 197 | B, L, _ = queries.shape 198 | _, S, _ = keys.shape 199 | H = self.n_heads 200 | 201 | queries = self.query_projection(queries).view(B, L, H, -1) 202 | keys = self.key_projection(keys).view(B, S, H, -1) 203 | values = self.value_projection(values).view(B, S, H, -1) 204 | 205 | out, attn = self.inner_attention( 206 | queries, 207 | keys, 208 | values, 209 | attn_mask, 210 | tau=tau, 211 | delta=delta 212 | ) 213 | out = out.view(B, L, -1) 214 | 215 | return self.out_projection(out), attn 216 | 217 | 218 | class ReformerLayer(nn.Module): 219 | def __init__(self, attention, d_model, n_heads, d_keys=None, 220 | d_values=None, causal=False, bucket_size=4, n_hashes=4): 221 | super().__init__() 222 | self.bucket_size = bucket_size 223 | self.attn = LSHSelfAttention( 224 | dim=d_model, 225 | heads=n_heads, 226 | bucket_size=bucket_size, 227 | n_hashes=n_hashes, 228 | causal=causal 229 | ) 230 | 231 | def fit_length(self, queries): 232 | # inside reformer: assert N % (bucket_size * 2) == 0 233 | B, N, C = queries.shape 234 | if N % (self.bucket_size * 2) == 0: 235 | return queries 236 | else: 237 | # fill the time series 238 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2)) 239 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1) 240 | 241 | def forward(self, queries, keys, values, attn_mask, tau, delta): 242 | # in Reformer: defalut queries=keys 243 | B, N, C = queries.shape 244 | queries = self.attn(self.fit_length(queries))[:, :N, :] 245 | return queries, None 246 | 247 | class TwoStageAttentionLayer(nn.Module): 248 | ''' 249 | The Two Stage Attention (TSA) Layer 250 | input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model] 251 | ''' 252 | 253 | def __init__(self, configs, 254 | seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1): 255 | super(TwoStageAttentionLayer, self).__init__() 256 | d_ff = d_ff or 4 * d_model 257 | self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, 258 | output_attention=configs.output_attention), d_model, n_heads) 259 | self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, 260 | output_attention=configs.output_attention), d_model, n_heads) 261 | self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout, 262 | output_attention=configs.output_attention), d_model, n_heads) 263 | self.router = nn.Parameter(torch.randn(seg_num, factor, d_model)) 264 | 265 | self.dropout = nn.Dropout(dropout) 266 | 267 | self.norm1 = nn.LayerNorm(d_model) 268 | self.norm2 = nn.LayerNorm(d_model) 269 | self.norm3 = nn.LayerNorm(d_model) 270 | self.norm4 = nn.LayerNorm(d_model) 271 | 272 | self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff), 273 | nn.GELU(), 274 | nn.Linear(d_ff, d_model)) 275 | self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff), 276 | nn.GELU(), 277 | nn.Linear(d_ff, d_model)) 278 | 279 | def forward(self, x, attn_mask=None, tau=None, delta=None): 280 | # Cross Time Stage: Directly apply MSA to each dimension 281 | batch = x.shape[0] 282 | time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model') 283 | time_enc, attn = self.time_attention( 284 | time_in, time_in, time_in, attn_mask=None, tau=None, delta=None 285 | ) 286 | dim_in = time_in + self.dropout(time_enc) 287 | dim_in = self.norm1(dim_in) 288 | dim_in = dim_in + self.dropout(self.MLP1(dim_in)) 289 | dim_in = self.norm2(dim_in) 290 | 291 | # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection 292 | dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch) 293 | batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch) 294 | dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None) 295 | dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None) 296 | dim_enc = dim_send + self.dropout(dim_receive) 297 | dim_enc = self.norm3(dim_enc) 298 | dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc)) 299 | dim_enc = self.norm4(dim_enc) 300 | 301 | final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch) 302 | 303 | return final_out -------------------------------------------------------------------------------- /layers/SelfAttention_Family1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import numpy as np 4 | from math import sqrt 5 | from utils.masking import TriangularCausalMask, ProbMask 6 | from reformer_pytorch import LSHSelfAttention 7 | from einops import rearrange 8 | 9 | 10 | # Code implementation from https://github.com/thuml/Flowformer 11 | class FlowAttention(nn.Module): 12 | def __init__(self, attention_dropout=0.1): 13 | super(FlowAttention, self).__init__() 14 | self.dropout = nn.Dropout(attention_dropout) 15 | 16 | def kernel_method(self, x): 17 | return torch.sigmoid(x) 18 | 19 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 20 | queries = queries.transpose(1, 2) 21 | keys = keys.transpose(1, 2) 22 | values = values.transpose(1, 2) 23 | # kernel 24 | queries = self.kernel_method(queries) 25 | keys = self.kernel_method(keys) 26 | # incoming and outgoing 27 | normalizer_row = 1.0 / (torch.einsum("nhld,nhd->nhl", queries + 1e-6, keys.sum(dim=2) + 1e-6)) 28 | normalizer_col = 1.0 / (torch.einsum("nhsd,nhd->nhs", keys + 1e-6, queries.sum(dim=2) + 1e-6)) 29 | # reweighting 30 | normalizer_row_refine = ( 31 | torch.einsum("nhld,nhd->nhl", queries + 1e-6, (keys * normalizer_col[:, :, :, None]).sum(dim=2) + 1e-6)) 32 | normalizer_col_refine = ( 33 | torch.einsum("nhsd,nhd->nhs", keys + 1e-6, (queries * normalizer_row[:, :, :, None]).sum(dim=2) + 1e-6)) 34 | # competition and allocation 35 | normalizer_row_refine = torch.sigmoid( 36 | normalizer_row_refine * (float(queries.shape[2]) / float(keys.shape[2]))) 37 | normalizer_col_refine = torch.softmax(normalizer_col_refine, dim=-1) * keys.shape[2] # B h L vis 38 | # multiply 39 | kv = keys.transpose(-2, -1) @ (values * normalizer_col_refine[:, :, :, None]) 40 | x = (((queries @ kv) * normalizer_row[:, :, :, None]) * normalizer_row_refine[:, :, :, None]).transpose(1, 41 | 2).contiguous() 42 | return x, None 43 | 44 | 45 | # Code implementation from https://github.com/shreyansh26/FlashAttention-PyTorch 46 | class FlashAttention(nn.Module): 47 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 48 | super(FlashAttention, self).__init__() 49 | self.scale = scale 50 | self.mask_flag = mask_flag 51 | self.output_attention = output_attention 52 | self.dropout = nn.Dropout(attention_dropout) 53 | 54 | def flash_attention_forward(self, Q, K, V, mask=None): 55 | BLOCK_SIZE = 32 56 | NEG_INF = -1e10 # -infinity 57 | EPSILON = 1e-10 58 | # mask = torch.randint(0, 2, (128, 8)).to(device='cuda') 59 | O = torch.zeros_like(Q, requires_grad=True) 60 | l = torch.zeros(Q.shape[:-1])[..., None] 61 | m = torch.ones(Q.shape[:-1])[..., None] * NEG_INF 62 | 63 | O = O.to(device='cuda') 64 | l = l.to(device='cuda') 65 | m = m.to(device='cuda') 66 | 67 | Q_BLOCK_SIZE = min(BLOCK_SIZE, Q.shape[-1]) 68 | KV_BLOCK_SIZE = BLOCK_SIZE 69 | 70 | Q_BLOCKS = torch.split(Q, Q_BLOCK_SIZE, dim=2) 71 | K_BLOCKS = torch.split(K, KV_BLOCK_SIZE, dim=2) 72 | V_BLOCKS = torch.split(V, KV_BLOCK_SIZE, dim=2) 73 | if mask is not None: 74 | mask_BLOCKS = list(torch.split(mask, KV_BLOCK_SIZE, dim=1)) 75 | 76 | Tr = len(Q_BLOCKS) 77 | Tc = len(K_BLOCKS) 78 | 79 | O_BLOCKS = list(torch.split(O, Q_BLOCK_SIZE, dim=2)) 80 | l_BLOCKS = list(torch.split(l, Q_BLOCK_SIZE, dim=2)) 81 | m_BLOCKS = list(torch.split(m, Q_BLOCK_SIZE, dim=2)) 82 | 83 | for j in range(Tc): 84 | Kj = K_BLOCKS[j] 85 | Vj = V_BLOCKS[j] 86 | if mask is not None: 87 | maskj = mask_BLOCKS[j] 88 | 89 | for i in range(Tr): 90 | Qi = Q_BLOCKS[i] 91 | Oi = O_BLOCKS[i] 92 | li = l_BLOCKS[i] 93 | mi = m_BLOCKS[i] 94 | 95 | scale = 1 / np.sqrt(Q.shape[-1]) 96 | Qi_scaled = Qi * scale 97 | 98 | S_ij = torch.einsum('... i d, ... j d -> ... i j', Qi_scaled, Kj) 99 | if mask is not None: 100 | # Masking 101 | maskj_temp = rearrange(maskj, 'b j -> b 1 1 j') 102 | S_ij = torch.where(maskj_temp > 0, S_ij, NEG_INF) 103 | 104 | m_block_ij, _ = torch.max(S_ij, dim=-1, keepdims=True) 105 | P_ij = torch.exp(S_ij - m_block_ij) 106 | if mask is not None: 107 | # Masking 108 | P_ij = torch.where(maskj_temp > 0, P_ij, 0.) 109 | 110 | l_block_ij = torch.sum(P_ij, dim=-1, keepdims=True) + EPSILON 111 | 112 | P_ij_Vj = torch.einsum('... i j, ... j d -> ... i d', P_ij, Vj) 113 | 114 | mi_new = torch.maximum(m_block_ij, mi) 115 | li_new = torch.exp(mi - mi_new) * li + torch.exp(m_block_ij - mi_new) * l_block_ij 116 | 117 | O_BLOCKS[i] = (li / li_new) * torch.exp(mi - mi_new) * Oi + ( 118 | torch.exp(m_block_ij - mi_new) / li_new) * P_ij_Vj 119 | l_BLOCKS[i] = li_new 120 | m_BLOCKS[i] = mi_new 121 | 122 | O = torch.cat(O_BLOCKS, dim=2) 123 | l = torch.cat(l_BLOCKS, dim=2) 124 | m = torch.cat(m_BLOCKS, dim=2) 125 | return O, l, m 126 | 127 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 128 | res = \ 129 | self.flash_attention_forward(queries.permute(0, 2, 1, 3), keys.permute(0, 2, 1, 3), values.permute(0, 2, 1, 3), 130 | attn_mask)[0] 131 | return res.permute(0, 2, 1, 3).contiguous(), None 132 | 133 | 134 | class FullAttention(nn.Module): 135 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 136 | super(FullAttention, self).__init__() 137 | self.scale = scale 138 | self.mask_flag = mask_flag 139 | self.output_attention = output_attention 140 | self.dropout = nn.Dropout(attention_dropout) 141 | 142 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 143 | B, L, H, E = queries.shape 144 | _, S, _, D = values.shape 145 | scale = self.scale or 1. / sqrt(E) 146 | 147 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 148 | 149 | if self.mask_flag: 150 | if attn_mask is None: 151 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 152 | 153 | scores.masked_fill_(attn_mask.mask, -np.inf) 154 | 155 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 156 | V = torch.einsum("bhls,bshd->blhd", A, values) 157 | 158 | if self.output_attention: 159 | return (V.contiguous(), A) 160 | else: 161 | return (V.contiguous(), None) 162 | 163 | 164 | # Code implementation from https://github.com/zhouhaoyi/Informer2020 165 | class ProbAttention(nn.Module): 166 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 167 | super(ProbAttention, self).__init__() 168 | self.factor = factor 169 | self.scale = scale 170 | self.mask_flag = mask_flag 171 | self.output_attention = output_attention 172 | self.dropout = nn.Dropout(attention_dropout) 173 | 174 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 175 | # Q [B, H, L, D] 176 | B, H, L_K, E = K.shape 177 | _, _, L_Q, _ = Q.shape 178 | 179 | # calculate the sampled Q_K 180 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 181 | # real U = U_part(factor*ln(L_k))*L_q 182 | index_sample = torch.randint(L_K, (L_Q, sample_k)) 183 | K_sample = K_expand[:, :, torch.arange( 184 | L_Q).unsqueeze(1), index_sample, :] 185 | Q_K_sample = torch.matmul( 186 | Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 187 | 188 | # find the Top_k query with sparisty measurement 189 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 190 | M_top = M.topk(n_top, sorted=False)[1] 191 | 192 | # use the reduced Q to calculate Q_K 193 | Q_reduce = Q[torch.arange(B)[:, None, None], 194 | torch.arange(H)[None, :, None], 195 | M_top, :] # factor*ln(L_q) 196 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 197 | 198 | return Q_K, M_top 199 | 200 | def _get_initial_context(self, V, L_Q): 201 | B, H, L_V, D = V.shape 202 | if not self.mask_flag: 203 | # V_sum = V.sum(dim=-2) 204 | V_sum = V.mean(dim=-2) 205 | contex = V_sum.unsqueeze(-2).expand(B, H, 206 | L_Q, V_sum.shape[-1]).clone() 207 | else: # use mask 208 | # requires that L_Q == L_V, i.e. for self-attention only 209 | assert (L_Q == L_V) 210 | contex = V.cumsum(dim=-2) 211 | return contex 212 | 213 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 214 | B, H, L_V, D = V.shape 215 | 216 | if self.mask_flag: 217 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 218 | scores.masked_fill_(attn_mask.mask, -np.inf) 219 | 220 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 221 | 222 | context_in[torch.arange(B)[:, None, None], 223 | torch.arange(H)[None, :, None], 224 | index, :] = torch.matmul(attn, V).type_as(context_in) 225 | if self.output_attention: 226 | attns = (torch.ones([B, H, L_V, L_V]) / 227 | L_V).type_as(attn).to(attn.device) 228 | attns[torch.arange(B)[:, None, None], torch.arange(H)[ 229 | None, :, None], index, :] = attn 230 | return (context_in, attns) 231 | else: 232 | return (context_in, None) 233 | 234 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 235 | B, L_Q, H, D = queries.shape 236 | _, L_K, _, _ = keys.shape 237 | 238 | queries = queries.transpose(2, 1) 239 | keys = keys.transpose(2, 1) 240 | values = values.transpose(2, 1) 241 | 242 | U_part = self.factor * \ 243 | np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 244 | u = self.factor * \ 245 | np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 246 | 247 | U_part = U_part if U_part < L_K else L_K 248 | u = u if u < L_Q else L_Q 249 | 250 | scores_top, index = self._prob_QK( 251 | queries, keys, sample_k=U_part, n_top=u) 252 | 253 | # add scale factor 254 | scale = self.scale or 1. / sqrt(D) 255 | if scale is not None: 256 | scores_top = scores_top * scale 257 | # get the context 258 | context = self._get_initial_context(values, L_Q) 259 | # update the context with selected top_k queries 260 | context, attn = self._update_context( 261 | context, values, scores_top, index, L_Q, attn_mask) 262 | 263 | return context.contiguous(), attn 264 | 265 | 266 | class AttentionLayer(nn.Module): 267 | def __init__(self, attention, d_model, n_heads, d_keys=None, 268 | d_values=None): 269 | super(AttentionLayer, self).__init__() 270 | 271 | d_keys = d_keys or (d_model // n_heads) 272 | d_values = d_values or (d_model // n_heads) 273 | 274 | self.inner_attention = attention 275 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 276 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 277 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 278 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 279 | self.n_heads = n_heads 280 | 281 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None): 282 | B, L, _ = queries.shape 283 | _, S, _ = keys.shape 284 | H = self.n_heads 285 | 286 | queries = self.query_projection(queries).view(B, L, H, -1) 287 | keys = self.key_projection(keys).view(B, S, H, -1) 288 | values = self.value_projection(values).view(B, S, H, -1) 289 | 290 | out, attn = self.inner_attention( 291 | queries, 292 | keys, 293 | values, 294 | attn_mask, 295 | tau=tau, 296 | delta=delta 297 | ) 298 | out = out.view(B, L, -1) 299 | 300 | return self.out_projection(out), attn 301 | 302 | 303 | class ReformerLayer(nn.Module): 304 | def __init__(self, attention, d_model, n_heads, d_keys=None, 305 | d_values=None, causal=False, bucket_size=4, n_hashes=4): 306 | super().__init__() 307 | self.bucket_size = bucket_size 308 | self.attn = LSHSelfAttention( 309 | dim=d_model, 310 | heads=n_heads, 311 | bucket_size=bucket_size, 312 | n_hashes=n_hashes, 313 | causal=causal 314 | ) 315 | 316 | def fit_length(self, queries): 317 | # inside reformer: assert N % (bucket_size * 2) == 0 318 | B, N, C = queries.shape 319 | if N % (self.bucket_size * 2) == 0: 320 | return queries 321 | else: 322 | # fill the time series 323 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2)) 324 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1) 325 | 326 | def forward(self, queries, keys, values, attn_mask, tau, delta): 327 | # in Reformer: defalut queries=keys 328 | B, N, C = queries.shape 329 | queries = self.attn(self.fit_length(queries))[:, :N, :] 330 | return queries, None 331 | 332 | -------------------------------------------------------------------------------- /exp/exp_long_term_forecasting.py: -------------------------------------------------------------------------------- 1 | from torch.optim import lr_scheduler 2 | 3 | from data_provider.data_factory import data_provider 4 | from exp.exp_basic import Exp_Basic 5 | from utils.tools import EarlyStopping, adjust_learning_rate, visual, save_to_csv, visual_weights 6 | from utils.metrics import metric 7 | import torch 8 | import torch.nn as nn 9 | from torch import optim 10 | import os 11 | import time 12 | import warnings 13 | import numpy as np 14 | 15 | warnings.filterwarnings('ignore') 16 | 17 | 18 | class Exp_Long_Term_Forecast(Exp_Basic): 19 | def __init__(self, args): 20 | super(Exp_Long_Term_Forecast, self).__init__(args) 21 | 22 | def _build_model(self): 23 | model = self.model_dict[self.args.model].Model(self.args).float() 24 | 25 | if self.args.use_multi_gpu and self.args.use_gpu: 26 | model = nn.DataParallel(model, device_ids=self.args.device_ids) 27 | return model 28 | 29 | def _get_data(self, flag): 30 | data_set, data_loader = data_provider(self.args, flag) 31 | return data_set, data_loader 32 | 33 | def _select_optimizer(self): 34 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate) 35 | return model_optim 36 | 37 | def _select_criterion(self): 38 | if self.args.data == 'PEMS': 39 | criterion = nn.L1Loss() 40 | else: 41 | criterion = nn.MSELoss() 42 | return criterion 43 | 44 | def vali(self, vali_data, vali_loader, criterion): 45 | total_loss = [] 46 | self.model.eval() 47 | with torch.no_grad(): 48 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader): 49 | batch_x = batch_x.float().to(self.device) 50 | batch_y = batch_y.float().to(self.device) 51 | 52 | batch_x_mark = batch_x_mark.float().to(self.device) 53 | batch_y_mark = batch_y_mark.float().to(self.device) 54 | 55 | if 'PEMS' == self.args.data or 'Solar' == self.args.data: 56 | batch_x_mark = None 57 | batch_y_mark = None 58 | 59 | if self.args.down_sampling_layers == 0: 60 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() 61 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) 62 | else: 63 | dec_inp = None 64 | 65 | # encoder - decoder 66 | if self.args.use_amp: 67 | with torch.cuda.amp.autocast(): 68 | if self.args.output_attention: 69 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 70 | else: 71 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 72 | else: 73 | if self.args.output_attention: 74 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 75 | else: 76 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 77 | f_dim = -1 if self.args.features == 'MS' else 0 78 | 79 | pred = outputs.detach() 80 | true = batch_y.detach() 81 | 82 | if self.args.data == 'PEMS': 83 | B, T, C = pred.shape 84 | pred = pred.cpu().numpy() 85 | true = true.cpu().numpy() 86 | pred = vali_data.inverse_transform(pred.reshape(-1, C)).reshape(B, T, C) 87 | true = vali_data.inverse_transform(true.reshape(-1, C)).reshape(B, T, C) 88 | mae, mse, rmse, mape, mspe = metric(pred, true) 89 | total_loss.append(mae) 90 | 91 | else: 92 | loss = criterion(pred, true) 93 | total_loss.append(loss.item()) 94 | 95 | total_loss = np.average(total_loss) 96 | self.model.train() 97 | return total_loss 98 | 99 | def train(self, setting): 100 | train_data, train_loader = self._get_data(flag='train') 101 | vali_data, vali_loader = self._get_data(flag='val') 102 | test_data, test_loader = self._get_data(flag='test') 103 | 104 | path = os.path.join(self.args.checkpoints, setting) 105 | if not os.path.exists(path): 106 | os.makedirs(path) 107 | 108 | time_now = time.time() 109 | 110 | train_steps = len(train_loader) 111 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True) 112 | 113 | model_optim = self._select_optimizer() 114 | criterion = self._select_criterion() 115 | 116 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim, 117 | steps_per_epoch=train_steps, 118 | pct_start=self.args.pct_start, 119 | epochs=self.args.train_epochs, 120 | max_lr=self.args.learning_rate) 121 | 122 | if self.args.use_amp: 123 | scaler = torch.cuda.amp.GradScaler() 124 | 125 | for epoch in range(self.args.train_epochs): 126 | iter_count = 0 127 | train_loss = [] 128 | 129 | self.model.train() 130 | epoch_time = time.time() 131 | 132 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader): 133 | iter_count += 1 134 | model_optim.zero_grad() 135 | 136 | batch_x = batch_x.float().to(self.device) 137 | batch_y = batch_y.float().to(self.device) 138 | 139 | batch_x_mark = batch_x_mark.float().to(self.device) 140 | batch_y_mark = batch_y_mark.float().to(self.device) 141 | 142 | if 'PEMS' == self.args.data or 'Solar' == self.args.data: 143 | batch_x_mark = None 144 | batch_y_mark = None 145 | 146 | if self.args.down_sampling_layers == 0: 147 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() 148 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) 149 | else: 150 | dec_inp = None 151 | 152 | # encoder - decoder 153 | if self.args.use_amp: 154 | with torch.cuda.amp.autocast(): 155 | if self.args.output_attention: 156 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 157 | else: 158 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 159 | 160 | f_dim = -1 if self.args.features == 'MS' else 0 161 | outputs = outputs[:, -self.args.pred_len:, f_dim:] 162 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device) 163 | loss = criterion(outputs, batch_y) 164 | train_loss.append(loss.item()) 165 | else: 166 | if self.args.output_attention: 167 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 168 | else: 169 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 170 | 171 | f_dim = -1 if self.args.features == 'MS' else 0 172 | 173 | loss = criterion(outputs, batch_y) 174 | train_loss.append(loss.item()) 175 | 176 | if (i + 1) % 100 == 0: 177 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item())) 178 | speed = (time.time() - time_now) / iter_count 179 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i) 180 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time)) 181 | iter_count = 0 182 | time_now = time.time() 183 | 184 | if self.args.use_amp: 185 | scaler.scale(loss).backward() 186 | scaler.step(model_optim) 187 | scaler.update() 188 | else: 189 | loss.backward() 190 | model_optim.step() 191 | 192 | if self.args.lradj == 'TST': 193 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False) 194 | scheduler.step() 195 | 196 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time)) 197 | train_loss = np.average(train_loss) 198 | vali_loss = self.vali(vali_data, vali_loader, criterion) 199 | test_loss = self.vali(test_data, test_loader, criterion) 200 | 201 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format( 202 | epoch + 1, train_steps, train_loss, vali_loss, test_loss)) 203 | early_stopping(vali_loss, self.model, path) 204 | if early_stopping.early_stop: 205 | print("Early stopping") 206 | break 207 | 208 | if self.args.lradj != 'TST': 209 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True) 210 | else: 211 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0])) 212 | 213 | best_model_path = path + '/' + 'checkpoint.pth' 214 | self.model.load_state_dict(torch.load(best_model_path)) 215 | 216 | return self.model 217 | 218 | def test(self, setting, test=0): 219 | test_data, test_loader = self._get_data(flag='test') 220 | if test: 221 | print('loading model') 222 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth'))) 223 | 224 | checkpoints_path = './checkpoints/' + setting + '/' 225 | preds = [] 226 | trues = [] 227 | folder_path = './test_results/' + setting + '/' 228 | if not os.path.exists(folder_path): 229 | os.makedirs(folder_path) 230 | 231 | self.model.eval() 232 | with torch.no_grad(): 233 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader): 234 | batch_x = batch_x.float().to(self.device) 235 | batch_y = batch_y.float().to(self.device) 236 | 237 | batch_x_mark = batch_x_mark.float().to(self.device) 238 | batch_y_mark = batch_y_mark.float().to(self.device) 239 | 240 | if 'PEMS' == self.args.data or 'Solar' == self.args.data: 241 | batch_x_mark = None 242 | batch_y_mark = None 243 | 244 | if self.args.down_sampling_layers == 0: 245 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float() 246 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device) 247 | else: 248 | dec_inp = None 249 | 250 | # encoder - decoder 251 | if self.args.use_amp: 252 | with torch.cuda.amp.autocast(): 253 | if self.args.output_attention: 254 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 255 | else: 256 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 257 | else: 258 | if self.args.output_attention: 259 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0] 260 | 261 | else: 262 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark) 263 | 264 | f_dim = -1 if self.args.features == 'MS' else 0 265 | 266 | outputs = outputs.detach().cpu().numpy() 267 | batch_y = batch_y.detach().cpu().numpy() 268 | 269 | pred = outputs 270 | true = batch_y 271 | 272 | preds.append(pred) 273 | trues.append(true) 274 | if i % 20 == 0: 275 | input = batch_x.detach().cpu().numpy() 276 | if test_data.scale and self.args.inverse: 277 | shape = input.shape 278 | input = test_data.inverse_transform(input.squeeze(0)).reshape(shape) 279 | gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0) 280 | pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0) 281 | visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf')) 282 | 283 | preds = np.array(preds) 284 | trues = np.array(trues) 285 | print('test shape:', preds.shape, trues.shape) 286 | preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1]) 287 | trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1]) 288 | print('test shape:', preds.shape, trues.shape) 289 | 290 | if self.args.data == 'PEMS': 291 | B, T, C = preds.shape 292 | preds = test_data.inverse_transform(preds.reshape(-1, C)).reshape(B, T, C) 293 | trues = test_data.inverse_transform(trues.reshape(-1, C)).reshape(B, T, C) 294 | 295 | # result save 296 | folder_path = './results/' + setting + '/' 297 | if not os.path.exists(folder_path): 298 | os.makedirs(folder_path) 299 | 300 | mae, mse, rmse, mape, mspe = metric(preds, trues) 301 | print('mse:{}, mae:{}'.format(mse, mae)) 302 | print('rmse:{}, mape:{}, mspe:{}'.format(rmse, mape, mspe)) 303 | 304 | f = open("result_long_term_forecast.txt", 'a') 305 | f.write(setting + " \n") 306 | if self.args.data == 'PEMS': 307 | f.write('mae:{}, mape:{}, rmse:{}'.format(mae, mape, rmse)) 308 | else: 309 | f.write('mse:{}, mae:{}'.format(mse, mae)) 310 | f.write('\n') 311 | f.write('\n') 312 | f.close() 313 | 314 | np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe])) 315 | np.save(folder_path + 'pred.npy', preds) 316 | np.save(folder_path + 'true.npy', trues) 317 | return 318 | --------------------------------------------------------------------------------