├── exp
    ├── __init__.py
    ├── __pycache__
    │   ├── __init__.cpython-310.pyc
    │   ├── exp_basic.cpython-310.pyc
    │   ├── exp_imputation.cpython-310.pyc
    │   ├── exp_classification.cpython-310.pyc
    │   ├── exp_anomaly_detection.cpython-310.pyc
    │   ├── exp_long_term_forecasting.cpython-310.pyc
    │   └── exp_short_term_forecasting.cpython-310.pyc
    ├── exp_basic.py
    ├── torchsummary.py
    ├── exp_classification.py
    ├── exp_anomaly_detection.py
    ├── exp_imputation.py
    ├── exp_short_term_forecasting.py
    └── exp_long_term_forecasting.py
├── layers
    ├── __init__.py
    ├── __pycache__
    │   ├── Embed.cpython-310.pyc
    │   ├── Embed1.cpython-310.pyc
    │   ├── __init__.cpython-310.pyc
    │   ├── StandardNorm.cpython-310.pyc
    │   ├── Autoformer_EncDec.cpython-310.pyc
    │   ├── Transformer_EncDec1.cpython-310.pyc
    │   └── SelfAttention_Family1.cpython-310.pyc
    ├── StandardNorm.py
    ├── Transformer_EncDec1.py
    ├── Transformer_EncDec.py
    ├── Embed1.py
    ├── AutoCorrelation.py
    ├── Autoformer_EncDec.py
    ├── Embed.py
    ├── SelfAttention_Family.py
    └── SelfAttention_Family1.py
├── models
    ├── __init__.py
    └── __pycache__
    │   ├── LLMMixer.cpython-310.pyc
    │   ├── __init__.cpython-310.pyc
    │   └── TimeMixer.cpython-310.pyc
├── utils
    ├── __init__.py
    ├── __pycache__
    │   ├── losses.cpython-310.pyc
    │   ├── tools.cpython-310.pyc
    │   ├── __init__.cpython-310.pyc
    │   ├── masking.cpython-310.pyc
    │   ├── metrics.cpython-310.pyc
    │   ├── m4_summary.cpython-310.pyc
    │   └── timefeatures.cpython-310.pyc
    ├── masking.py
    ├── metrics.py
    ├── data_analysis.py
    ├── losses.py
    ├── timefeatures.py
    ├── tools.py
    └── m4_summary.py
├── scripts
    └── read.me
├── data_provider
    ├── __init__.py
    ├── __pycache__
    │   ├── m4.cpython-310.pyc
    │   ├── uea.cpython-310.pyc
    │   ├── __init__.cpython-310.pyc
    │   ├── data_loader.cpython-310.pyc
    │   └── data_factory.cpython-310.pyc
    ├── data_factory.py
    ├── uea.py
    └── data_loader.py
├── figures
    ├── read.me
    ├── llmmixer.pdf
    └── llmmixer.png
├── requirements.txt
├── README.md
├── run.py
└── LICENSE


/exp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/read.me:
--------------------------------------------------------------------------------
1 | scripts
2 | 


--------------------------------------------------------------------------------
/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/figures/read.me:
--------------------------------------------------------------------------------
1 | all figures
2 | 


--------------------------------------------------------------------------------
/figures/llmmixer.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/figures/llmmixer.pdf


--------------------------------------------------------------------------------
/figures/llmmixer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/figures/llmmixer.png


--------------------------------------------------------------------------------
/exp/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/Embed.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Embed.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/losses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/losses.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/tools.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/tools.cpython-310.pyc


--------------------------------------------------------------------------------
/exp/__pycache__/exp_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_basic.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/Embed1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Embed1.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/models/__pycache__/LLMMixer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/LLMMixer.cpython-310.pyc


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/masking.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/masking.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/metrics.cpython-310.pyc


--------------------------------------------------------------------------------
/data_provider/__pycache__/m4.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/m4.cpython-310.pyc


--------------------------------------------------------------------------------
/data_provider/__pycache__/uea.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/uea.cpython-310.pyc


--------------------------------------------------------------------------------
/models/__pycache__/TimeMixer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/TimeMixer.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/m4_summary.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/m4_summary.cpython-310.pyc


--------------------------------------------------------------------------------
/exp/__pycache__/exp_imputation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_imputation.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/StandardNorm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/StandardNorm.cpython-310.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/timefeatures.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/timefeatures.cpython-310.pyc


--------------------------------------------------------------------------------
/data_provider/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/__init__.cpython-310.pyc


--------------------------------------------------------------------------------
/exp/__pycache__/exp_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_classification.cpython-310.pyc


--------------------------------------------------------------------------------
/data_provider/__pycache__/data_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/data_loader.cpython-310.pyc


--------------------------------------------------------------------------------
/exp/__pycache__/exp_anomaly_detection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_anomaly_detection.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/Autoformer_EncDec.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Autoformer_EncDec.cpython-310.pyc


--------------------------------------------------------------------------------
/data_provider/__pycache__/data_factory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/data_factory.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/Transformer_EncDec1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Transformer_EncDec1.cpython-310.pyc


--------------------------------------------------------------------------------
/exp/__pycache__/exp_long_term_forecasting.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_long_term_forecasting.cpython-310.pyc


--------------------------------------------------------------------------------
/exp/__pycache__/exp_short_term_forecasting.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_short_term_forecasting.cpython-310.pyc


--------------------------------------------------------------------------------
/layers/__pycache__/SelfAttention_Family1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/SelfAttention_Family1.cpython-310.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | einops==0.7.0
 2 | matplotlib==3.7.0
 3 | numpy==1.23.5
 4 | pandas==1.5.3
 5 | scikit_learn==1.2.2
 6 | scipy==1.12.0
 7 | tqdm==4.65.0
 8 | patool==1.12
 9 | reformer_pytorch==1.4.4
10 | sktime==0.4.1
11 | sympy==1.11.1
12 | torch==2.3.0
13 | accelerate==0.33.0
14 | transformers==4.44.0
15 | sentencepiece==0.2.0
16 | 


--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class TriangularCausalMask():
 5 |     def __init__(self, B, L, device="cpu"):
 6 |         mask_shape = [B, 1, L, L]
 7 |         with torch.no_grad():
 8 |             self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
 9 | 
10 |     @property
11 |     def mask(self):
12 |         return self._mask
13 | 
14 | 
15 | class ProbMask():
16 |     def __init__(self, B, H, L, index, scores, device="cpu"):
17 |         _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 |         _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 |         indicator = _mask_ex[torch.arange(B)[:, None, None],
20 |                     torch.arange(H)[None, :, None],
21 |                     index, :].to(device)
22 |         self._mask = indicator.view(scores.shape).to(device)
23 | 
24 |     @property
25 |     def mask(self):
26 |         return self._mask
27 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def RSE(pred, true):
 5 |     return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
 6 | 
 7 | 
 8 | def CORR(pred, true):
 9 |     u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 |     d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
11 |     return (u / d).mean(-1)
12 | 
13 | 
14 | def MAE(pred, true):
15 |     return np.mean(np.abs(pred - true))
16 | 
17 | 
18 | def MSE(pred, true):
19 |     return np.mean((pred - true) ** 2)
20 | 
21 | 
22 | def RMSE(pred, true):
23 |     return np.sqrt(MSE(pred, true))
24 | 
25 | 
26 | def MAPE(pred, true):
27 |     mape = np.abs((pred - true) / true)
28 |     mape = np.where(mape > 5, 0, mape)
29 |     return np.mean(mape)
30 | 
31 | 
32 | def MSPE(pred, true):
33 |     return np.mean(np.square((pred - true) / true))
34 | 
35 | 
36 | def metric(pred, true):
37 |     mae = MAE(pred, true)
38 |     mse = MSE(pred, true)
39 |     rmse = RMSE(pred, true)
40 |     mape = MAPE(pred, true)
41 |     mspe = MSPE(pred, true)
42 | 
43 |     return mae, mse, rmse, mape, mspe
44 | 


--------------------------------------------------------------------------------
/exp/exp_basic.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | from models import LLMMixer
 4 | 
 5 | 
 6 | class Exp_Basic(object):
 7 |     def __init__(self, args):
 8 |         self.args = args
 9 |         self.model_dict = {
10 |             'LLMMixer': LLMMixer,
11 |         }
12 |         self.device = self._acquire_device()
13 |         self.model = self._build_model().to(self.device)
14 | 
15 |     def _build_model(self):
16 |         raise NotImplementedError
17 |         return None
18 | 
19 |     def _acquire_device(self):
20 |         if self.args.use_gpu:
21 |             import platform
22 |             if platform.system() == 'Darwin':
23 |                 device = torch.device('mps')
24 |                 print('Use MPS')
25 |                 return device
26 |             os.environ["CUDA_VISIBLE_DEVICES"] = str(
27 |                 self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
28 |             device = torch.device('cuda:{}'.format(self.args.gpu))
29 |             if self.args.use_multi_gpu:
30 |                 print('Use GPU: cuda{}'.format(self.args.device_ids))
31 |             else:
32 |                 print('Use GPU: cuda:{}'.format(self.args.gpu))
33 |         else:
34 |             device = torch.device('cpu')
35 |             print('Use CPU')
36 |         return device
37 | 
38 |     def _get_data(self):
39 |         pass
40 | 
41 |     def vali(self):
42 |         pass
43 | 
44 |     def train(self):
45 |         pass
46 | 
47 |     def test(self):
48 |         pass
49 | 


--------------------------------------------------------------------------------
/layers/StandardNorm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class Normalize(nn.Module):
 6 |     def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
 7 |         """
 8 |         :param num_features: the number of features or channels
 9 |         :param eps: a value added for numerical stability
10 |         :param affine: if True, RevIN has learnable affine parameters
11 |         """
12 |         super(Normalize, self).__init__()
13 |         self.num_features = num_features
14 |         self.eps = eps
15 |         self.affine = affine
16 |         self.subtract_last = subtract_last
17 |         self.non_norm = non_norm
18 |         if self.affine:
19 |             self._init_params()
20 | 
21 |     def forward(self, x, mode: str):
22 |         if mode == 'norm':
23 |             self._get_statistics(x)
24 |             x = self._normalize(x)
25 |         elif mode == 'denorm':
26 |             x = self._denormalize(x)
27 |         else:
28 |             raise NotImplementedError
29 |         return x
30 | 
31 |     def _init_params(self):
32 |         # initialize RevIN params: (C,)
33 |         self.affine_weight = nn.Parameter(torch.ones(self.num_features))
34 |         self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
35 | 
36 |     def _get_statistics(self, x):
37 |         dim2reduce = tuple(range(1, x.ndim - 1))
38 |         if self.subtract_last:
39 |             self.last = x[:, -1, :].unsqueeze(1)
40 |         else:
41 |             self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
42 |         self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
43 | 
44 |     def _normalize(self, x):
45 |         if self.non_norm:
46 |             return x
47 |         if self.subtract_last:
48 |             x = x - self.last
49 |         else:
50 |             x = x - self.mean
51 |         x = x / self.stdev
52 |         if self.affine:
53 |             x = x * self.affine_weight
54 |             x = x + self.affine_bias
55 |         return x
56 | 
57 |     def _denormalize(self, x):
58 |         if self.non_norm:
59 |             return x
60 |         if self.affine:
61 |             x = x - self.affine_bias
62 |             x = x / (self.affine_weight + self.eps * self.eps)
63 |         x = x * self.stdev
64 |         if self.subtract_last:
65 |             x = x + self.last
66 |         else:
67 |             x = x + self.mean
68 |         return x
69 | 


--------------------------------------------------------------------------------
/data_provider/data_factory.py:
--------------------------------------------------------------------------------
 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \
 2 |     MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_PEMS, \
 3 |     Dataset_Solar
 4 | from data_provider.uea import collate_fn
 5 | from torch.utils.data import DataLoader
 6 | 
 7 | data_dict = {
 8 |     'ETTh1': Dataset_ETT_hour,
 9 |     'ETTh2': Dataset_ETT_hour,
10 |     'ETTm1': Dataset_ETT_minute,
11 |     'ETTm2': Dataset_ETT_minute,
12 |     'custom': Dataset_Custom,
13 | }
14 | 
15 | 
16 | def data_provider(args, flag):
17 |     Data = data_dict[args.data]
18 |     timeenc = 0 if args.embed != 'timeF' else 1
19 | 
20 |     if flag == 'test':
21 |         shuffle_flag = False
22 |         drop_last = True
23 |         if args.task_name == 'anomaly_detection' or args.task_name == 'classification':
24 |             batch_size = args.batch_size
25 |         else:
26 |             batch_size = args.batch_size  # bsz=1 for evaluation
27 |         freq = args.freq
28 |     else:
29 |         shuffle_flag = True
30 |         drop_last = True
31 |         batch_size = args.batch_size  # bsz for train and valid
32 |         freq = args.freq
33 | 
34 |     if args.task_name == 'anomaly_detection':
35 |         drop_last = False
36 |         data_set = Data(
37 |             root_path=args.root_path,
38 |             win_size=args.seq_len,
39 |             flag=flag,
40 |         )
41 |         print(flag, len(data_set))
42 |         data_loader = DataLoader(
43 |             data_set,
44 |             batch_size=batch_size,
45 |             shuffle=shuffle_flag,
46 |             num_workers=args.num_workers,
47 |             drop_last=drop_last)
48 |         return data_set, data_loader
49 |     elif args.task_name == 'classification':
50 |         drop_last = False
51 |         data_set = Data(
52 |             root_path=args.root_path,
53 |             flag=flag,
54 |         )
55 |         print(flag, len(data_set))
56 |         data_loader = DataLoader(
57 |             data_set,
58 |             batch_size=batch_size,
59 |             shuffle=shuffle_flag,
60 |             num_workers=args.num_workers,
61 |             drop_last=drop_last,
62 |             collate_fn=lambda x: collate_fn(x, max_len=args.seq_len)
63 |         )
64 |         return data_set, data_loader
65 |     else:
66 |         if args.data == 'm4':
67 |             drop_last = False
68 |         data_set = Data(
69 |             root_path=args.root_path,
70 |             data_path=args.data_path,
71 |             flag=flag,
72 |             size=[args.seq_len, args.label_len, args.pred_len],
73 |             features=args.features,
74 |             target=args.target,
75 |             timeenc=timeenc,
76 |             freq=freq,
77 |             seasonal_patterns=args.seasonal_patterns
78 |         )
79 |         print(flag, len(data_set))
80 |         data_loader = DataLoader(
81 |             data_set,
82 |             batch_size=batch_size,
83 |             shuffle=shuffle_flag,
84 |             num_workers=args.num_workers,
85 |             drop_last=drop_last)
86 |         return data_set, data_loader
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <div align="center">
 2 |   <!-- <h1><b> Time-LLM </b></h1> -->
 3 |   <!-- <h2><b> Time-LLM </b></h2> -->
 4 |   <h2><b> LLM-Mixer: Multiscale Mixing in LLMs for Time Series Forecasting </b></h2>
 5 | </div>
 6 | 
 7 | 
 8 | <div align="center">
 9 | 
10 | ![](https://img.shields.io/github/last-commit/Kowsher/LLMMixer?color=green)
11 | ![](https://img.shields.io/github/stars/Kowsher/LLMMixer?color=yellow)
12 | ![](https://img.shields.io/github/forks/Kowsher/LLMMixer?color=lightblue)
13 | ![](https://img.shields.io/badge/PRs-Welcome-green)
14 | 
15 | </div>
16 | 
17 | <div align="center">
18 | 
19 | **[<a href="https://openreview.net">Paper Page</a>]**
20 | **[<a href="https://github.com/Kowsher/LLMMixer">Code</a>]**
21 | 
22 | 
23 | </div>
24 | 
25 | 
26 | ---
27 | >
28 | > 🙋 Please let us know if you find out a mistake or have any suggestions!
29 | > 
30 | > 🌟 If you find this resource helpful, please consider to star this repository and cite our research:
31 | 
32 | ```
33 | @article{kowsher2024llm,
34 |   title={LLM-Mixer: Multiscale Mixing in LLMs for Time Series Forecasting},
35 |   author={Kowsher, Md and Sobuj, Md Shohanur Islam and Prottasha, Nusrat Jahan and Alanis, E Alejandro and Garibay, Ozlem Ozmen and Yousefi, Niloofar},
36 |   journal={arXiv preprint arXiv:2410.11674},
37 |   year={2024}
38 | }
39 | 
40 | ```
41 | 
42 | ## Introduction
43 | LLMMixer is an advanced framework designed to improve forecasting accuracy by integrating multiscale time series decomposition with the power of large language models (LLMs). By capturing both short-term and long-term temporal patterns, LLMMixer enhances the model's ability to understand complex trends, making it highly effective for time series forecasting tasks.
44 | 
45 | <p align="center">
46 | 
47 | <img src="./figures/llmmixer.png" width="870">
48 | 
49 | </p>
50 | 
51 | ## Requirements
52 | Use Python 3.11 from MiniConda
53 | 
54 | - torch==2.3.0
55 | - accelerate==0.33.0
56 | - einops==0.7.0
57 | - matplotlib==3.7.0
58 | - numpy==1.23.5
59 | - pandas==1.5.3
60 | - scikit_learn==1.2.2
61 | - scipy==1.12.0
62 | - tqdm==4.65.0
63 | - peft==0.12.0
64 | - transformers==4.44.0
65 | - deepspeed==0.15.1
66 | - sentencepiece==0.2.0
67 | 
68 | 
69 | 
70 | ## Get Started
71 | 
72 | 1. Install requirements. ```pip install -r requirements.txt```
73 | 2. Download data. You can download the all datasets from [Google Driver](https://drive.google.com/u/0/uc?id=1NF7VEefXCmXuWNbnNe858WvQAkJ_7wuP&export=download), [Baidu Driver](https://pan.baidu.com/share/init?surl=r3KhGd0Q9PJIUZdfEYoymg&pwd=i9iy) or [Kaggle Datasets](https://www.kaggle.com/datasets/wentixiaogege/time-series-dataset). **All the datasets are well pre-processed** and can be used easily.
74 | 3. Train the model by following the example of `./scripts`. 
75 | 
76 | 
77 | ## Acknowledgement
78 | 
79 | We appreciate the following GitHub repos a lot for their valuable code and efforts.
80 | - Time-Series-Library (https://github.com/thuml/Time-Series-Library)
81 | - TimeMixer ([https://github.com/kwuking/TimeMixer](https://github.com/kwuking/TimeMixer))
82 | - TimeLLM ([https://github.com/thuml/Autoformer](https://github.com/KimMeen/Time-LLM))
83 | - Autoformer (https://github.com/thuml/Autoformer)
84 | - iTransformer ([https://github.com/thuml/Autoformer](https://github.com/thuml/iTransformer))
85 | 


--------------------------------------------------------------------------------
/utils/data_analysis.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from scipy.stats import entropy
  4 | 
  5 | 
  6 | def forecastabilty(ts):
  7 |   """Forecastability Measure.
  8 | 
  9 |   Args:
 10 |     ts: time series
 11 | 
 12 |   Returns:
 13 |     1 - the entropy of the fourier transformation of
 14 |           time series / entropy of white noise
 15 |   """
 16 |   ts = (ts - ts.min())/(ts.max()-ts.min()+0.1)
 17 |   # fourier_ts = np.fft.rfft(ts).real
 18 |   fourier_ts = abs(np.fft.rfft(ts))
 19 |   fourier_ts = (fourier_ts - fourier_ts.min()) / (
 20 |       fourier_ts.max() - fourier_ts.min())
 21 |   fourier_ts /= fourier_ts.sum()
 22 |   entropy_ts = entropy(fourier_ts)
 23 |   fore_ts = 1-entropy_ts/(np.log(len(ts)))
 24 |   if np.isnan(fore_ts):
 25 |     return 0
 26 |   return fore_ts
 27 | 
 28 | 
 29 | def forecastabilty_moving(ts, window, jump=1):
 30 |   """Calculates the forecastability of a moving window.
 31 | 
 32 |   Args:
 33 |     ts: time series
 34 |     window: length of slices
 35 |     jump: skipped step when taking subslices
 36 | 
 37 |   Returns:
 38 |     a list of forecastability measures for all slices.
 39 |   """
 40 | 
 41 |   # ts = Trend(ts).detrend()
 42 |   if len(ts) <= 25:
 43 |     return forecastabilty(ts)
 44 |   fore_lst = np.array([
 45 |       forecastabilty(ts[i - window:i])
 46 |       for i in np.arange(window, len(ts), jump)
 47 |   ])
 48 |   fore_lst = fore_lst[~np.isnan(fore_lst)]  # drop nan
 49 |   return fore_lst
 50 | 
 51 | 
 52 | class Trend():
 53 |   """Trend test."""
 54 | 
 55 |   def __init__(self, ts):
 56 |     self.ts = ts
 57 |     self.train_length = len(ts)
 58 |     self.a, self.b = self.find_trend(ts)
 59 | 
 60 |   def find_trend(self, insample_data):
 61 |     # fit a linear regression y=ax+b on the time series
 62 |     x = np.arange(len(insample_data))
 63 |     a, b = np.polyfit(x, insample_data, 1)
 64 |     return a, b
 65 | 
 66 |   def detrend(self):
 67 |     # remove trend
 68 |     return self.ts - (self.a * np.arange(0, len(self.ts), 1) + self.b)
 69 | 
 70 |   def inverse_input(self, insample_data):
 71 |     # add trend back to the input part of time series
 72 |     return insample_data + (self.a * np.arange(0, len(self.ts), 1) + self.b)
 73 | 
 74 |   def inverse_pred(self, outsample_data):
 75 |     # add trend back to the predictions
 76 |     return outsample_data + (
 77 |         self.a * np.arange(self.train_length,
 78 |                            self.train_length + len(outsample_data), 1) + self.b)
 79 | 
 80 | 
 81 | def seasonality_test(original_ts, ppy):
 82 |   """Seasonality test.
 83 | 
 84 |   Args:
 85 |     original_ts: time series
 86 |     ppy: periods per year/frequency
 87 | 
 88 |   Returns:
 89 |     boolean value: whether the TS is seasonal
 90 |   """
 91 | 
 92 |   s = acf(original_ts, 1)
 93 |   for i in range(2, ppy):
 94 |     s = s + (acf(original_ts, i)**2)
 95 | 
 96 |   limit = 1.645 * (np.sqrt((1 + 2 * s) / len(original_ts)))
 97 | 
 98 |   return (abs(acf(original_ts, ppy))) > limit
 99 | 
100 | 
101 | def acf(ts, k):
102 |   """Autocorrelation function.
103 | 
104 |   Args:
105 |     ts: time series
106 |     k: lag
107 | 
108 |   Returns:
109 |     acf value
110 |   """
111 |   m = np.mean(ts)
112 |   s1 = 0
113 |   for i in range(k, len(ts)):
114 |     s1 = s1 + ((ts[i] - m) * (ts[i - k] - m))
115 | 
116 |   s2 = 0
117 |   for i in range(0, len(ts)):
118 |     s2 = s2 + ((ts[i] - m)**2)
119 | 
120 |   return float(s1 / s2)


--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
 1 | # This source code is provided for the purposes of scientific reproducibility
 2 | # under the following limited license from Element AI Inc. The code is an
 3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
 4 | # expansion analysis for interpretable time series forecasting,
 5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
 6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
 7 | # International license (CC BY-NC 4.0):
 8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
 9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
14 | 
15 | """
16 | Loss functions for PyTorch.
17 | """
18 | 
19 | import torch as t
20 | import torch.nn as nn
21 | import numpy as np
22 | import pdb
23 | 
24 | 
25 | def divide_no_nan(a, b):
26 |     """
27 |     a/b where the resulted NaN or Inf are replaced by 0.
28 |     """
29 |     result = a / b
30 |     result[result != result] = .0
31 |     result[result == np.inf] = .0
32 |     return result
33 | 
34 | 
35 | class mape_loss(nn.Module):
36 |     def __init__(self):
37 |         super(mape_loss, self).__init__()
38 | 
39 |     def forward(self, insample: t.Tensor, freq: int,
40 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
41 |         """
42 |         MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
43 | 
44 |         :param forecast: Forecast values. Shape: batch, time
45 |         :param target: Target values. Shape: batch, time
46 |         :param mask: 0/1 mask. Shape: batch, time
47 |         :return: Loss value
48 |         """
49 |         weights = divide_no_nan(mask, target)
50 |         return t.mean(t.abs((forecast - target) * weights))
51 | 
52 | 
53 | class smape_loss(nn.Module):
54 |     def __init__(self):
55 |         super(smape_loss, self).__init__()
56 | 
57 |     def forward(self, insample: t.Tensor, freq: int,
58 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
59 |         """
60 |         sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
61 | 
62 |         :param forecast: Forecast values. Shape: batch, time
63 |         :param target: Target values. Shape: batch, time
64 |         :param mask: 0/1 mask. Shape: batch, time
65 |         :return: Loss value
66 |         """
67 |         return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
68 |                                           t.abs(forecast.data) + t.abs(target.data)) * mask)
69 | 
70 | 
71 | class mase_loss(nn.Module):
72 |     def __init__(self):
73 |         super(mase_loss, self).__init__()
74 | 
75 |     def forward(self, insample: t.Tensor, freq: int,
76 |                 forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
77 |         """
78 |         MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
79 | 
80 |         :param insample: Insample values. Shape: batch, time_i
81 |         :param freq: Frequency value
82 |         :param forecast: Forecast values. Shape: batch, time_o
83 |         :param target: Target values. Shape: batch, time_o
84 |         :param mask: 0/1 mask. Shape: batch, time_o
85 |         :return: Loss value
86 |         """
87 |         masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
88 |         masked_masep_inv = divide_no_nan(mask, masep[:, None])
89 |         return t.mean(t.abs(target - forecast) * masked_masep_inv)
90 | 


--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.tseries import offsets
  6 | from pandas.tseries.frequencies import to_offset
  7 | 
  8 | 
  9 | class TimeFeature:
 10 |     def __init__(self):
 11 |         pass
 12 | 
 13 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 14 |         pass
 15 | 
 16 |     def __repr__(self):
 17 |         return self.__class__.__name__ + "()"
 18 | 
 19 | 
 20 | class SecondOfMinute(TimeFeature):
 21 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 22 | 
 23 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 24 |         return index.second / 59.0 - 0.5
 25 | 
 26 | 
 27 | class MinuteOfHour(TimeFeature):
 28 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 29 | 
 30 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 31 |         return index.minute / 59.0 - 0.5
 32 | 
 33 | 
 34 | class HourOfDay(TimeFeature):
 35 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 36 | 
 37 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 38 |         return index.hour / 23.0 - 0.5
 39 | 
 40 | 
 41 | class DayOfWeek(TimeFeature):
 42 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 43 | 
 44 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 45 |         return index.dayofweek / 6.0 - 0.5
 46 | 
 47 | 
 48 | class DayOfMonth(TimeFeature):
 49 |     """Day of month encoded as value between [-0.5, 0.5]"""
 50 | 
 51 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 52 |         return (index.day - 1) / 30.0 - 0.5
 53 | 
 54 | 
 55 | class DayOfYear(TimeFeature):
 56 |     """Day of year encoded as value between [-0.5, 0.5]"""
 57 | 
 58 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 59 |         return (index.dayofyear - 1) / 365.0 - 0.5
 60 | 
 61 | 
 62 | class MonthOfYear(TimeFeature):
 63 |     """Month of year encoded as value between [-0.5, 0.5]"""
 64 | 
 65 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 66 |         return (index.month - 1) / 11.0 - 0.5
 67 | 
 68 | 
 69 | class WeekOfYear(TimeFeature):
 70 |     """Week of year encoded as value between [-0.5, 0.5]"""
 71 | 
 72 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 73 |         return (index.isocalendar().week - 1) / 52.0 - 0.5
 74 | 
 75 | 
 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
 77 |     """
 78 |     Returns a list of time features that will be appropriate for the given frequency string.
 79 |     Parameters
 80 |     ----------
 81 |     freq_str
 82 |         Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
 83 |     """
 84 | 
 85 |     features_by_offsets = {
 86 |         offsets.YearEnd: [],
 87 |         offsets.QuarterEnd: [MonthOfYear],
 88 |         offsets.MonthEnd: [MonthOfYear],
 89 |         offsets.Week: [DayOfMonth, WeekOfYear],
 90 |         offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
 91 |         offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
 92 |         offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
 93 |         offsets.Minute: [
 94 |             MinuteOfHour,
 95 |             HourOfDay,
 96 |             DayOfWeek,
 97 |             DayOfMonth,
 98 |             DayOfYear,
 99 |         ],
100 |         offsets.Second: [
101 |             SecondOfMinute,
102 |             MinuteOfHour,
103 |             HourOfDay,
104 |             DayOfWeek,
105 |             DayOfMonth,
106 |             DayOfYear,
107 |         ],
108 |     }
109 | 
110 |     offset = to_offset(freq_str)
111 | 
112 |     for offset_type, feature_classes in features_by_offsets.items():
113 |         if isinstance(offset, offset_type):
114 |             return [cls() for cls in feature_classes]
115 | 
116 |     supported_freq_msg = f"""
117 |     Unsupported frequency {freq_str}
118 |     The following frequencies are supported:
119 |         Y   - yearly
120 |             alias: A
121 |         M   - monthly
122 |         W   - weekly
123 |         D   - daily
124 |         B   - business days
125 |         H   - hourly
126 |         T   - minutely
127 |             alias: min
128 |         S   - secondly
129 |     """
130 |     raise RuntimeError(supported_freq_msg)
131 | 
132 | 
133 | def time_features(dates, freq='h'):
134 |     return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
135 | 


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import torch
  4 | import matplotlib.pyplot as plt
  5 | 
  6 | plt.switch_backend('agg')
  7 | 
  8 | 
  9 | def adjust_learning_rate(optimizer, scheduler, epoch, args, printout=True):
 10 |     # lr = args.learning_rate * (0.2 ** (epoch // 2))
 11 |     if args.lradj == 'type1':
 12 |         lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
 13 |     elif args.lradj == 'type2':
 14 |         lr_adjust = {
 15 |             2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
 16 |             10: 5e-7, 15: 1e-7, 20: 5e-8
 17 |         }
 18 |     elif args.lradj == 'type3':
 19 |         lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
 20 |     elif args.lradj == 'PEMS':
 21 |         lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))}
 22 |     elif args.lradj == 'TST':
 23 |         lr_adjust = {epoch: scheduler.get_last_lr()[0]}
 24 |     if epoch in lr_adjust.keys():
 25 |         lr = lr_adjust[epoch]
 26 |         for param_group in optimizer.param_groups:
 27 |             param_group['lr'] = lr
 28 |         if printout: print('Updating learning rate to {}'.format(lr))
 29 | 
 30 | 
 31 | class EarlyStopping:
 32 |     def __init__(self, patience=7, verbose=False, delta=0):
 33 |         self.patience = patience
 34 |         self.verbose = verbose
 35 |         self.counter = 0
 36 |         self.best_score = None
 37 |         self.early_stop = False
 38 |         self.val_loss_min = np.Inf
 39 |         self.delta = delta
 40 | 
 41 |     def __call__(self, val_loss, model, path):
 42 |         score = -val_loss
 43 |         if self.best_score is None:
 44 |             self.best_score = score
 45 |             self.save_checkpoint(val_loss, model, path)
 46 |         elif score < self.best_score + self.delta:
 47 |             self.counter += 1
 48 |             print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
 49 |             if self.counter >= self.patience:
 50 |                 self.early_stop = True
 51 |         else:
 52 |             self.best_score = score
 53 |             self.save_checkpoint(val_loss, model, path)
 54 |             self.counter = 0
 55 | 
 56 |     def save_checkpoint(self, val_loss, model, path):
 57 |         if self.verbose:
 58 |             print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
 59 |         torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
 60 |         self.val_loss_min = val_loss
 61 | 
 62 | 
 63 | class dotdict(dict):
 64 |     """dot.notation access to dictionary attributes"""
 65 |     __getattr__ = dict.get
 66 |     __setattr__ = dict.__setitem__
 67 |     __delattr__ = dict.__delitem__
 68 | 
 69 | 
 70 | class StandardScaler():
 71 |     def __init__(self, mean, std):
 72 |         self.mean = mean
 73 |         self.std = std
 74 | 
 75 |     def transform(self, data):
 76 |         return (data - self.mean) / self.std
 77 | 
 78 |     def inverse_transform(self, data):
 79 |         return (data * self.std) + self.mean
 80 | 
 81 | 
 82 | def save_to_csv(true, preds=None, name='./pic/test.pdf'):
 83 |     """
 84 |     Results visualization
 85 |     """
 86 |     data = pd.DataFrame({'true': true, 'preds': preds})
 87 |     data.to_csv(name, index=False, sep=',')
 88 | 
 89 | 
 90 | def visual(true, preds=None, name='./pic/test.pdf'):
 91 |     """
 92 |     Results visualization
 93 |     """
 94 |     plt.figure()
 95 |     plt.plot(true, label='GroundTruth', linewidth=2)
 96 |     if preds is not None:
 97 |         plt.plot(preds, label='Prediction', linewidth=2)
 98 |     plt.legend()
 99 |     plt.savefig(name, bbox_inches='tight')
100 | 
101 | 
102 | def visual_weights(weights, name='./pic/test.pdf'):
103 |     """
104 |     Weights visualization
105 |     """
106 |     fig, ax = plt.subplots()
107 |     # im = ax.imshow(weights, cmap='plasma_r')
108 |     im = ax.imshow(weights, cmap='YlGnBu')
109 |     fig.colorbar(im, pad=0.03, location='top')
110 |     plt.savefig(name, dpi=500, pad_inches=0.02)
111 |     plt.close()
112 | 
113 | 
114 | def adjustment(gt, pred):
115 |     anomaly_state = False
116 |     for i in range(len(gt)):
117 |         if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
118 |             anomaly_state = True
119 |             for j in range(i, 0, -1):
120 |                 if gt[j] == 0:
121 |                     break
122 |                 else:
123 |                     if pred[j] == 0:
124 |                         pred[j] = 1
125 |             for j in range(i, len(gt)):
126 |                 if gt[j] == 0:
127 |                     break
128 |                 else:
129 |                     if pred[j] == 0:
130 |                         pred[j] = 1
131 |         elif gt[i] == 0:
132 |             anomaly_state = False
133 |         if anomaly_state:
134 |             pred[i] = 1
135 |     return gt, pred
136 | 
137 | 
138 | def cal_accuracy(y_pred, y_true):
139 |     return np.mean(y_pred == y_true)
140 | 


--------------------------------------------------------------------------------
/exp/torchsummary.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | 
  5 | from collections import OrderedDict
  6 | import numpy as np
  7 | 
  8 | 
  9 | def summary(model, input_size, batch_size=-1, device="cuda"):
 10 | 
 11 |     def register_hook(module):
 12 | 
 13 |         def hook(module, input, output):
 14 |             class_name = str(module.__class__).split(".")[-1].split("'")[0]
 15 |             module_idx = len(summary)
 16 | 
 17 |             m_key = "%s-%i" % (class_name, module_idx + 1)
 18 |             summary[m_key] = OrderedDict()
 19 |             # summary[m_key]["input_shape"] = list(input[0].size())
 20 |             if isinstance(input[0], (list, tuple)):
 21 |                 summary[m_key]["input_shape"] = [
 22 |                     [-1] + list(i.size())[1:] for i in input[0]
 23 |                 ]
 24 |                 summary[m_key]["input_shape"][0] = batch_size
 25 |             else:
 26 |                 summary[m_key]["input_shape"] = list(input[0].size())
 27 |                 summary[m_key]["input_shape"][0] = batch_size
 28 | 
 29 |             if isinstance(output, (list, tuple)):
 30 |                 summary[m_key]["output_shape"] = [
 31 |                     [-1] + list(o.size())[1:] for o in output
 32 |                 ]
 33 |             else:
 34 |                 summary[m_key]["output_shape"] = list(output.size())
 35 |                 summary[m_key]["output_shape"][0] = batch_size
 36 | 
 37 |             params = 0
 38 |             if hasattr(module, "weight") and hasattr(module.weight, "size"):
 39 |                 params += torch.prod(torch.LongTensor(list(module.weight.size())))
 40 |                 summary[m_key]["trainable"] = module.weight.requires_grad
 41 |             if hasattr(module, "bias") and hasattr(module.bias, "size"):
 42 |                 params += torch.prod(torch.LongTensor(list(module.bias.size())))
 43 |             summary[m_key]["nb_params"] = params
 44 | 
 45 |         if (
 46 |             not isinstance(module, nn.Sequential)
 47 |             and not isinstance(module, nn.ModuleList)
 48 |             and not (module == model)
 49 |         ):
 50 |             hooks.append(module.register_forward_hook(hook))
 51 | 
 52 |     device = device.lower()
 53 |     assert device in [
 54 |         "cuda",
 55 |         "cpu",
 56 |     ], "Input device is not valid, please specify 'cuda' or 'cpu'"
 57 | 
 58 |     if device == "cuda" and torch.cuda.is_available():
 59 |         dtype = torch.cuda.FloatTensor
 60 |     else:
 61 |         dtype = torch.FloatTensor
 62 | 
 63 |     # multiple inputs to the network
 64 |     if isinstance(input_size, tuple):
 65 |         input_size = [input_size]
 66 | 
 67 |     # batch_size of 2 for batchnorm
 68 |     x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
 69 |     # print(type(x[0]))
 70 | 
 71 |     # create properties
 72 |     summary = OrderedDict()
 73 |     hooks = []
 74 | 
 75 |     # register hook
 76 |     model.apply(register_hook)
 77 | 
 78 |     # make a forward pass
 79 |     # print(x.shape)
 80 |     model(*x)
 81 | 
 82 |     # remove these hooks
 83 |     for h in hooks:
 84 |         h.remove()
 85 | 
 86 |     print("----------------------------------------------------------------")
 87 |     line_new = "{:>20}  {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
 88 |     print(line_new)
 89 |     print("================================================================")
 90 |     total_params = 0
 91 |     total_output = 0
 92 |     trainable_params = 0
 93 |     for layer in summary:
 94 |         # input_shape, output_shape, trainable, nb_params
 95 |         line_new = "{:>20}  {:>25} {:>15}".format(
 96 |             layer,
 97 |             str(summary[layer]["output_shape"]),
 98 |             "{0:,}".format(summary[layer]["nb_params"]),
 99 |         )
100 |         total_params += summary[layer]["nb_params"]
101 |         total_output += np.prod(summary[layer]["output_shape"])
102 |         if "trainable" in summary[layer]:
103 |             if summary[layer]["trainable"] == True:
104 |                 trainable_params += summary[layer]["nb_params"]
105 |         print(line_new)
106 | 
107 |     # assume 4 bytes/number (float on cuda).
108 |     # total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
109 |     total_output_size = abs(2. * total_output * 4. / (1024 ** 2.))  # x2 for gradients
110 |     total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
111 |     # total_size = total_params_size + total_output_size + total_input_size
112 | 
113 |     print("================================================================")
114 |     print("Total params: {0:,}".format(total_params))
115 |     print("Trainable params: {0:,}".format(trainable_params))
116 |     print("Non-trainable params: {0:,}".format(total_params - trainable_params))
117 |     print("----------------------------------------------------------------")
118 |     # print("Input size (MB): %0.2f" % total_input_size)
119 |     print("Forward/backward pass size (MB): %0.2f" % total_output_size)
120 |     print("Params size (MB): %0.2f" % total_params_size)
121 |     # print("Estimated Total Size (MB): %0.2f" % total_size)
122 |     print("----------------------------------------------------------------")
123 |     # return summary
124 | 


--------------------------------------------------------------------------------
/layers/Transformer_EncDec1.py:
--------------------------------------------------------------------------------
  1 | import torch.nn as nn
  2 | import torch.nn.functional as F
  3 | 
  4 | 
  5 | class ConvLayer(nn.Module):
  6 |     def __init__(self, c_in):
  7 |         super(ConvLayer, self).__init__()
  8 |         self.downConv = nn.Conv1d(in_channels=c_in,
  9 |                                   out_channels=c_in,
 10 |                                   kernel_size=3,
 11 |                                   padding=2,
 12 |                                   padding_mode='circular')
 13 |         self.norm = nn.BatchNorm1d(c_in)
 14 |         self.activation = nn.ELU()
 15 |         self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
 16 | 
 17 |     def forward(self, x):
 18 |         x = self.downConv(x.permute(0, 2, 1))
 19 |         x = self.norm(x)
 20 |         x = self.activation(x)
 21 |         x = self.maxPool(x)
 22 |         x = x.transpose(1, 2)
 23 |         return x
 24 | 
 25 | 
 26 | class EncoderLayer(nn.Module):
 27 |     def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
 28 |         super(EncoderLayer, self).__init__()
 29 |         d_ff = d_ff or 4 * d_model
 30 |         self.attention = attention
 31 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 32 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 33 |         self.norm1 = nn.LayerNorm(d_model)
 34 |         self.norm2 = nn.LayerNorm(d_model)
 35 |         self.dropout = nn.Dropout(dropout)
 36 |         self.activation = F.relu if activation == "relu" else F.gelu
 37 | 
 38 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 39 |         new_x, attn = self.attention(
 40 |             x, x, x,
 41 |             attn_mask=attn_mask,
 42 |             tau=tau, delta=delta
 43 |         )
 44 |         x = x + self.dropout(new_x)
 45 | 
 46 |         y = x = self.norm1(x)
 47 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
 48 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
 49 | 
 50 |         return self.norm2(x + y), attn
 51 | 
 52 | 
 53 | class Encoder(nn.Module):
 54 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
 55 |         super(Encoder, self).__init__()
 56 |         self.attn_layers = nn.ModuleList(attn_layers)
 57 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
 58 |         self.norm = norm_layer
 59 | 
 60 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 61 |         # x [B, L, D]
 62 |         attns = []
 63 |         if self.conv_layers is not None:
 64 |             for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
 65 |                 delta = delta if i == 0 else None
 66 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 67 |                 x = conv_layer(x)
 68 |                 attns.append(attn)
 69 |             x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
 70 |             attns.append(attn)
 71 |         else:
 72 |             for attn_layer in self.attn_layers:
 73 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 74 |                 attns.append(attn)
 75 | 
 76 |         if self.norm is not None:
 77 |             x = self.norm(x)
 78 | 
 79 |         return x, attns
 80 | 
 81 | 
 82 | class DecoderLayer(nn.Module):
 83 |     def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
 84 |                  dropout=0.1, activation="relu"):
 85 |         super(DecoderLayer, self).__init__()
 86 |         d_ff = d_ff or 4 * d_model
 87 |         self.self_attention = self_attention
 88 |         self.cross_attention = cross_attention
 89 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 90 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 91 |         self.norm1 = nn.LayerNorm(d_model)
 92 |         self.norm2 = nn.LayerNorm(d_model)
 93 |         self.norm3 = nn.LayerNorm(d_model)
 94 |         self.dropout = nn.Dropout(dropout)
 95 |         self.activation = F.relu if activation == "relu" else F.gelu
 96 | 
 97 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
 98 |         x = x + self.dropout(self.self_attention(
 99 |             x, x, x,
100 |             attn_mask=x_mask,
101 |             tau=tau, delta=None
102 |         )[0])
103 |         x = self.norm1(x)
104 | 
105 |         x = x + self.dropout(self.cross_attention(
106 |             x, cross, cross,
107 |             attn_mask=cross_mask,
108 |             tau=tau, delta=delta
109 |         )[0])
110 | 
111 |         y = x = self.norm2(x)
112 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
113 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
114 | 
115 |         return self.norm3(x + y)
116 | 
117 | 
118 | class Decoder(nn.Module):
119 |     def __init__(self, layers, norm_layer=None, projection=None):
120 |         super(Decoder, self).__init__()
121 |         self.layers = nn.ModuleList(layers)
122 |         self.norm = norm_layer
123 |         self.projection = projection
124 | 
125 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
126 |         for layer in self.layers:
127 |             x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
128 | 
129 |         if self.norm is not None:
130 |             x = self.norm(x)
131 | 
132 |         if self.projection is not None:
133 |             x = self.projection(x)
134 |         return x
135 | 


--------------------------------------------------------------------------------
/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class ConvLayer(nn.Module):
  7 |     def __init__(self, c_in):
  8 |         super(ConvLayer, self).__init__()
  9 |         self.downConv = nn.Conv1d(in_channels=c_in,
 10 |                                   out_channels=c_in,
 11 |                                   kernel_size=3,
 12 |                                   padding=2,
 13 |                                   padding_mode='circular')
 14 |         self.norm = nn.BatchNorm1d(c_in)
 15 |         self.activation = nn.ELU()
 16 |         self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
 17 | 
 18 |     def forward(self, x):
 19 |         x = self.downConv(x.permute(0, 2, 1))
 20 |         x = self.norm(x)
 21 |         x = self.activation(x)
 22 |         x = self.maxPool(x)
 23 |         x = x.transpose(1, 2)
 24 |         return x
 25 | 
 26 | 
 27 | class EncoderLayer(nn.Module):
 28 |     def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
 29 |         super(EncoderLayer, self).__init__()
 30 |         d_ff = d_ff or 4 * d_model
 31 |         self.attention = attention
 32 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 33 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 34 |         self.norm1 = nn.LayerNorm(d_model)
 35 |         self.norm2 = nn.LayerNorm(d_model)
 36 |         self.dropout = nn.Dropout(dropout)
 37 |         self.activation = F.relu if activation == "relu" else F.gelu
 38 | 
 39 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 40 |         new_x, attn = self.attention(
 41 |             x, x, x,
 42 |             attn_mask=attn_mask,
 43 |             tau=tau, delta=delta
 44 |         )
 45 |         x = x + self.dropout(new_x)
 46 | 
 47 |         y = x = self.norm1(x)
 48 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
 49 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
 50 | 
 51 |         return self.norm2(x + y), attn
 52 | 
 53 | 
 54 | class Encoder(nn.Module):
 55 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
 56 |         super(Encoder, self).__init__()
 57 |         self.attn_layers = nn.ModuleList(attn_layers)
 58 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
 59 |         self.norm = norm_layer
 60 | 
 61 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
 62 |         # x [B, L, D]
 63 |         attns = []
 64 |         if self.conv_layers is not None:
 65 |             for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
 66 |                 delta = delta if i == 0 else None
 67 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 68 |                 x = conv_layer(x)
 69 |                 attns.append(attn)
 70 |             x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
 71 |             attns.append(attn)
 72 |         else:
 73 |             for attn_layer in self.attn_layers:
 74 |                 x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
 75 |                 attns.append(attn)
 76 | 
 77 |         if self.norm is not None:
 78 |             x = self.norm(x)
 79 | 
 80 |         return x, attns
 81 | 
 82 | 
 83 | class DecoderLayer(nn.Module):
 84 |     def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
 85 |                  dropout=0.1, activation="relu"):
 86 |         super(DecoderLayer, self).__init__()
 87 |         d_ff = d_ff or 4 * d_model
 88 |         self.self_attention = self_attention
 89 |         self.cross_attention = cross_attention
 90 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
 91 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
 92 |         self.norm1 = nn.LayerNorm(d_model)
 93 |         self.norm2 = nn.LayerNorm(d_model)
 94 |         self.norm3 = nn.LayerNorm(d_model)
 95 |         self.dropout = nn.Dropout(dropout)
 96 |         self.activation = F.relu if activation == "relu" else F.gelu
 97 | 
 98 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
 99 |         x = x + self.dropout(self.self_attention(
100 |             x, x, x,
101 |             attn_mask=x_mask,
102 |             tau=tau, delta=None
103 |         )[0])
104 |         x = self.norm1(x)
105 | 
106 |         x = x + self.dropout(self.cross_attention(
107 |             x, cross, cross,
108 |             attn_mask=cross_mask,
109 |             tau=tau, delta=delta
110 |         )[0])
111 | 
112 |         y = x = self.norm2(x)
113 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
114 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
115 | 
116 |         return self.norm3(x + y)
117 | 
118 | 
119 | class Decoder(nn.Module):
120 |     def __init__(self, layers, norm_layer=None, projection=None):
121 |         super(Decoder, self).__init__()
122 |         self.layers = nn.ModuleList(layers)
123 |         self.norm = norm_layer
124 |         self.projection = projection
125 | 
126 |     def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
127 |         for layer in self.layers:
128 |             x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
129 | 
130 |         if self.norm is not None:
131 |             x = self.norm(x)
132 | 
133 |         if self.projection is not None:
134 |             x = self.projection(x)
135 |         return x
136 | 


--------------------------------------------------------------------------------
/data_provider/uea.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import torch
  5 | 
  6 | 
  7 | def collate_fn(data, max_len=None):
  8 |     """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create
  9 |     Args:
 10 |         data: len(batch_size) list of tuples (X, y).
 11 |             - X: torch tensor of shape (seq_length, feat_dim); variable seq_length.
 12 |             - y: torch tensor of shape (num_labels,) : class indices or numerical targets
 13 |                 (for classification or regression, respectively). num_labels > 1 for multi-task models
 14 |         max_len: global fixed sequence length. Used for architectures requiring fixed length input,
 15 |             where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s
 16 |     Returns:
 17 |         X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input)
 18 |         targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output)
 19 |         target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor
 20 |             0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values
 21 |         padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding
 22 |     """
 23 | 
 24 |     batch_size = len(data)
 25 |     features, labels = zip(*data)
 26 | 
 27 |     # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension)
 28 |     lengths = [X.shape[0] for X in features]  # original sequence length for each time series
 29 |     if max_len is None:
 30 |         max_len = max(lengths)
 31 |     X = torch.zeros(batch_size, max_len, features[0].shape[-1])  # (batch_size, padded_length, feat_dim)
 32 |     for i in range(batch_size):
 33 |         end = min(lengths[i], max_len)
 34 |         X[i, :end, :] = features[i][:end, :]
 35 | 
 36 |     targets = torch.stack(labels, dim=0)  # (batch_size, num_labels)
 37 | 
 38 |     padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),
 39 |                                  max_len=max_len)  # (batch_size, padded_length) boolean tensor, "1" means keep
 40 | 
 41 |     return X, targets, padding_masks
 42 | 
 43 | 
 44 | def padding_mask(lengths, max_len=None):
 45 |     """
 46 |     Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths,
 47 |     where 1 means keep element at this position (time step)
 48 |     """
 49 |     batch_size = lengths.numel()
 50 |     max_len = max_len or lengths.max_val()  # trick works because of overloading of 'or' operator for non-boolean types
 51 |     return (torch.arange(0, max_len, device=lengths.device)
 52 |             .type_as(lengths)
 53 |             .repeat(batch_size, 1)
 54 |             .lt(lengths.unsqueeze(1)))
 55 | 
 56 | 
 57 | class Normalizer(object):
 58 |     """
 59 |     Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
 60 |     """
 61 | 
 62 |     def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
 63 |         """
 64 |         Args:
 65 |             norm_type: choose from:
 66 |                 "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
 67 |                 "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
 68 |             mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
 69 |         """
 70 | 
 71 |         self.norm_type = norm_type
 72 |         self.mean = mean
 73 |         self.std = std
 74 |         self.min_val = min_val
 75 |         self.max_val = max_val
 76 | 
 77 |     def normalize(self, df):
 78 |         """
 79 |         Args:
 80 |             df: input dataframe
 81 |         Returns:
 82 |             df: normalized dataframe
 83 |         """
 84 |         if self.norm_type == "standardization":
 85 |             if self.mean is None:
 86 |                 self.mean = df.mean()
 87 |                 self.std = df.std()
 88 |             return (df - self.mean) / (self.std + np.finfo(float).eps)
 89 | 
 90 |         elif self.norm_type == "minmax":
 91 |             if self.max_val is None:
 92 |                 self.max_val = df.max()
 93 |                 self.min_val = df.min()
 94 |             return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)
 95 | 
 96 |         elif self.norm_type == "per_sample_std":
 97 |             grouped = df.groupby(by=df.index)
 98 |             return (df - grouped.transform('mean')) / grouped.transform('std')
 99 | 
100 |         elif self.norm_type == "per_sample_minmax":
101 |             grouped = df.groupby(by=df.index)
102 |             min_vals = grouped.transform('min')
103 |             return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)
104 | 
105 |         else:
106 |             raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))
107 | 
108 | 
109 | def interpolate_missing(y):
110 |     """
111 |     Replaces NaN values in pd.Series `y` using linear interpolation
112 |     """
113 |     if y.isna().any():
114 |         y = y.interpolate(method='linear', limit_direction='both')
115 |     return y
116 | 
117 | 
118 | def subsample(y, limit=256, factor=2):
119 |     """
120 |     If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
121 |     """
122 |     if len(y) > limit:
123 |         return y[::factor].reset_index(drop=True)
124 |     return y
125 | 


--------------------------------------------------------------------------------
/layers/Embed1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import math
  4 | 
  5 | 
  6 | class PositionalEmbedding(nn.Module):
  7 |     def __init__(self, d_model, max_len=5000):
  8 |         super(PositionalEmbedding, self).__init__()
  9 |         # Compute the positional encodings once in log space.
 10 |         pe = torch.zeros(max_len, d_model).float()
 11 |         pe.require_grad = False
 12 | 
 13 |         position = torch.arange(0, max_len).float().unsqueeze(1)
 14 |         div_term = (torch.arange(0, d_model, 2).float()
 15 |                     * -(math.log(10000.0) / d_model)).exp()
 16 | 
 17 |         pe[:, 0::2] = torch.sin(position * div_term)
 18 |         pe[:, 1::2] = torch.cos(position * div_term)
 19 | 
 20 |         pe = pe.unsqueeze(0)
 21 |         self.register_buffer('pe', pe)
 22 | 
 23 |     def forward(self, x):
 24 |         return self.pe[:, :x.size(1)]
 25 | 
 26 | 
 27 | class TokenEmbedding(nn.Module):
 28 |     def __init__(self, c_in, d_model):
 29 |         super(TokenEmbedding, self).__init__()
 30 |         padding = 1 if torch.__version__ >= '1.5.0' else 2
 31 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
 32 |                                    kernel_size=3, padding=padding, padding_mode='circular', bias=False)
 33 |         for m in self.modules():
 34 |             if isinstance(m, nn.Conv1d):
 35 |                 nn.init.kaiming_normal_(
 36 |                     m.weight, mode='fan_in', nonlinearity='leaky_relu')
 37 | 
 38 |     def forward(self, x):
 39 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
 40 |         return x
 41 | 
 42 | 
 43 | class FixedEmbedding(nn.Module):
 44 |     def __init__(self, c_in, d_model):
 45 |         super(FixedEmbedding, self).__init__()
 46 | 
 47 |         w = torch.zeros(c_in, d_model).float()
 48 |         w.require_grad = False
 49 | 
 50 |         position = torch.arange(0, c_in).float().unsqueeze(1)
 51 |         div_term = (torch.arange(0, d_model, 2).float()
 52 |                     * -(math.log(10000.0) / d_model)).exp()
 53 | 
 54 |         w[:, 0::2] = torch.sin(position * div_term)
 55 |         w[:, 1::2] = torch.cos(position * div_term)
 56 | 
 57 |         self.emb = nn.Embedding(c_in, d_model)
 58 |         self.emb.weight = nn.Parameter(w, requires_grad=False)
 59 | 
 60 |     def forward(self, x):
 61 |         return self.emb(x).detach()
 62 | 
 63 | 
 64 | class TemporalEmbedding(nn.Module):
 65 |     def __init__(self, d_model, embed_type='fixed', freq='h'):
 66 |         super(TemporalEmbedding, self).__init__()
 67 | 
 68 |         minute_size = 4
 69 |         hour_size = 24
 70 |         weekday_size = 7
 71 |         day_size = 32
 72 |         month_size = 13
 73 | 
 74 |         Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
 75 |         if freq == 't':
 76 |             self.minute_embed = Embed(minute_size, d_model)
 77 |         self.hour_embed = Embed(hour_size, d_model)
 78 |         self.weekday_embed = Embed(weekday_size, d_model)
 79 |         self.day_embed = Embed(day_size, d_model)
 80 |         self.month_embed = Embed(month_size, d_model)
 81 | 
 82 |     def forward(self, x):
 83 |         x = x.long()
 84 |         minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
 85 |             self, 'minute_embed') else 0.
 86 |         hour_x = self.hour_embed(x[:, :, 3])
 87 |         weekday_x = self.weekday_embed(x[:, :, 2])
 88 |         day_x = self.day_embed(x[:, :, 1])
 89 |         month_x = self.month_embed(x[:, :, 0])
 90 | 
 91 |         return hour_x + weekday_x + day_x + month_x + minute_x
 92 | 
 93 | 
 94 | class TimeFeatureEmbedding(nn.Module):
 95 |     def __init__(self, d_model, embed_type='timeF', freq='h'):
 96 |         super(TimeFeatureEmbedding, self).__init__()
 97 | 
 98 |         freq_map = {'h': 4, 't': 5, 's': 6,
 99 |                     'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
100 |         d_inp = freq_map[freq]
101 |         self.embed = nn.Linear(d_inp, d_model, bias=False)
102 | 
103 |     def forward(self, x):
104 |         return self.embed(x)
105 | 
106 | 
107 | class DataEmbedding(nn.Module):
108 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
109 |         super(DataEmbedding, self).__init__()
110 | 
111 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
112 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
113 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
114 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
115 |             d_model=d_model, embed_type=embed_type, freq=freq)
116 |         self.dropout = nn.Dropout(p=dropout)
117 | 
118 |     def forward(self, x, x_mark):
119 |         if x_mark is None:
120 |             x = self.value_embedding(x) + self.position_embedding(x)
121 |         else:
122 |             x = self.value_embedding(
123 |                 x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
124 |         return self.dropout(x)
125 | 
126 | 
127 | class DataEmbedding_inverted(nn.Module):
128 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
129 |         super(DataEmbedding_inverted, self).__init__()
130 |         self.value_embedding = nn.Linear(c_in, d_model)
131 |         self.dropout = nn.Dropout(p=dropout)
132 | 
133 |     def forward(self, x, x_mark):
134 |         x = x.permute(0, 2, 1)
135 |         # x: [Batch Variate Time]
136 |         if x_mark is None:
137 |             x = self.value_embedding(x)
138 |         else:
139 |             # the potential to take covariates (e.g. timestamps) as tokens
140 |             x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1)) 
141 |         # x: [Batch Variate d_model]
142 |         return self.dropout(x)
143 | 
144 | 


--------------------------------------------------------------------------------
/utils/m4_summary.py:
--------------------------------------------------------------------------------
  1 | # This source code is provided for the purposes of scientific reproducibility
  2 | # under the following limited license from Element AI Inc. The code is an
  3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
  4 | # expansion analysis for interpretable time series forecasting,
  5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
  6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
  7 | # International license (CC BY-NC 4.0):
  8 | # https://creativecommons.org/licenses/by-nc/4.0/.  Any commercial use (whether
  9 | # for the benefit of third parties or internally in production) requires an
 10 | # explicit license. The subject-matter of the N-BEATS model and associated
 11 | # materials are the property of Element AI Inc. and may be subject to patent
 12 | # protection. No license to patents is granted hereunder (whether express or
 13 | # implied). Copyright 2020 Element AI Inc. All rights reserved.
 14 | 
 15 | """
 16 | M4 Summary
 17 | """
 18 | from collections import OrderedDict
 19 | 
 20 | import numpy as np
 21 | import pandas as pd
 22 | 
 23 | from data_provider.m4 import M4Dataset
 24 | from data_provider.m4 import M4Meta
 25 | import os
 26 | 
 27 | 
 28 | def group_values(values, groups, group_name):
 29 |     return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
 30 | 
 31 | 
 32 | def mase(forecast, insample, outsample, frequency):
 33 |     return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
 34 | 
 35 | 
 36 | def smape_2(forecast, target):
 37 |     denom = np.abs(target) + np.abs(forecast)
 38 |     # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
 39 |     denom[denom == 0.0] = 1.0
 40 |     return 200 * np.abs(forecast - target) / denom
 41 | 
 42 | 
 43 | def mape(forecast, target):
 44 |     denom = np.abs(target)
 45 |     # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
 46 |     denom[denom == 0.0] = 1.0
 47 |     return 100 * np.abs(forecast - target) / denom
 48 | 
 49 | 
 50 | class M4Summary:
 51 |     def __init__(self, file_path, root_path):
 52 |         self.file_path = file_path
 53 |         self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
 54 |         self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
 55 |         self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
 56 | 
 57 |     def evaluate(self):
 58 |         """
 59 |         Evaluate forecasts using M4 test dataset.
 60 | 
 61 |         :param forecast: Forecasts. Shape: timeseries, time.
 62 |         :return: sMAPE and OWA grouped by seasonal patterns.
 63 |         """
 64 |         grouped_owa = OrderedDict()
 65 | 
 66 |         naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
 67 |         naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
 68 | 
 69 |         model_mases = {}
 70 |         naive2_smapes = {}
 71 |         naive2_mases = {}
 72 |         grouped_smapes = {}
 73 |         grouped_mapes = {}
 74 |         for group_name in M4Meta.seasonal_patterns:
 75 |             file_name = self.file_path + group_name + "_forecast.csv"
 76 |             if os.path.exists(file_name):
 77 |                 model_forecast = pd.read_csv(file_name).values
 78 | 
 79 |             naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
 80 |             target = group_values(self.test_set.values, self.test_set.groups, group_name)
 81 |             # all timeseries within group have same frequency
 82 |             frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
 83 |             insample = group_values(self.training_set.values, self.test_set.groups, group_name)
 84 | 
 85 |             model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
 86 |                                                     insample=insample[i],
 87 |                                                     outsample=target[i],
 88 |                                                     frequency=frequency) for i in range(len(model_forecast))])
 89 |             naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
 90 |                                                      insample=insample[i],
 91 |                                                      outsample=target[i],
 92 |                                                      frequency=frequency) for i in range(len(model_forecast))])
 93 | 
 94 |             naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
 95 |             grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
 96 |             grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
 97 | 
 98 |         grouped_smapes = self.summarize_groups(grouped_smapes)
 99 |         grouped_mapes = self.summarize_groups(grouped_mapes)
100 |         grouped_model_mases = self.summarize_groups(model_mases)
101 |         grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
102 |         grouped_naive2_mases = self.summarize_groups(naive2_mases)
103 |         for k in grouped_model_mases.keys():
104 |             grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
105 |                               grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
106 | 
107 |         def round_all(d):
108 |             return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
109 | 
110 |         return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
111 |             grouped_model_mases)
112 | 
113 |     def summarize_groups(self, scores):
114 |         """
115 |         Re-group scores respecting M4 rules.
116 |         :param scores: Scores per group.
117 |         :return: Grouped scores.
118 |         """
119 |         scores_summary = OrderedDict()
120 | 
121 |         def group_count(group_name):
122 |             return len(np.where(self.test_set.groups == group_name)[0])
123 | 
124 |         weighted_score = {}
125 |         for g in ['Yearly', 'Quarterly', 'Monthly']:
126 |             weighted_score[g] = scores[g] * group_count(g)
127 |             scores_summary[g] = scores[g]
128 | 
129 |         others_score = 0
130 |         others_count = 0
131 |         for g in ['Weekly', 'Daily', 'Hourly']:
132 |             others_score += scores[g] * group_count(g)
133 |             others_count += group_count(g)
134 |         weighted_score['Others'] = others_score
135 |         scores_summary['Others'] = others_score / others_count
136 | 
137 |         average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
138 |         scores_summary['Average'] = average
139 | 
140 |         return scores_summary
141 | 


--------------------------------------------------------------------------------
/layers/AutoCorrelation.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | import math
  7 | from math import sqrt
  8 | import os
  9 | 
 10 | 
 11 | class AutoCorrelation(nn.Module):
 12 |     """
 13 |     AutoCorrelation Mechanism with the following two phases:
 14 |     (1) period-based dependencies discovery
 15 |     (2) time delay aggregation
 16 |     This block can replace the self-attention family mechanism seamlessly.
 17 |     """
 18 | 
 19 |     def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
 20 |         super(AutoCorrelation, self).__init__()
 21 |         self.factor = factor
 22 |         self.scale = scale
 23 |         self.mask_flag = mask_flag
 24 |         self.output_attention = output_attention
 25 |         self.dropout = nn.Dropout(attention_dropout)
 26 | 
 27 |     def time_delay_agg_training(self, values, corr):
 28 |         """
 29 |         SpeedUp version of Autocorrelation (a batch-normalization style design)
 30 |         This is for the training phase.
 31 |         """
 32 |         head = values.shape[1]
 33 |         channel = values.shape[2]
 34 |         length = values.shape[3]
 35 |         # find top k
 36 |         top_k = int(self.factor * math.log(length))
 37 |         mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
 38 |         index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
 39 |         weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
 40 |         # update corr
 41 |         tmp_corr = torch.softmax(weights, dim=-1)
 42 |         # aggregation
 43 |         tmp_values = values
 44 |         delays_agg = torch.zeros_like(values).float()
 45 |         for i in range(top_k):
 46 |             pattern = torch.roll(tmp_values, -int(index[i]), -1)
 47 |             delays_agg = delays_agg + pattern * \
 48 |                          (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
 49 |         return delays_agg
 50 | 
 51 |     def time_delay_agg_inference(self, values, corr):
 52 |         """
 53 |         SpeedUp version of Autocorrelation (a batch-normalization style design)
 54 |         This is for the inference phase.
 55 |         """
 56 |         batch = values.shape[0]
 57 |         head = values.shape[1]
 58 |         channel = values.shape[2]
 59 |         length = values.shape[3]
 60 |         # index init
 61 |         init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
 62 |         # find top k
 63 |         top_k = int(self.factor * math.log(length))
 64 |         mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
 65 |         weights, delay = torch.topk(mean_value, top_k, dim=-1)
 66 |         # update corr
 67 |         tmp_corr = torch.softmax(weights, dim=-1)
 68 |         # aggregation
 69 |         tmp_values = values.repeat(1, 1, 1, 2)
 70 |         delays_agg = torch.zeros_like(values).float()
 71 |         for i in range(top_k):
 72 |             tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
 73 |             pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
 74 |             delays_agg = delays_agg + pattern * \
 75 |                          (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
 76 |         return delays_agg
 77 | 
 78 |     def time_delay_agg_full(self, values, corr):
 79 |         """
 80 |         Standard version of Autocorrelation
 81 |         """
 82 |         batch = values.shape[0]
 83 |         head = values.shape[1]
 84 |         channel = values.shape[2]
 85 |         length = values.shape[3]
 86 |         # index init
 87 |         init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
 88 |         # find top k
 89 |         top_k = int(self.factor * math.log(length))
 90 |         weights, delay = torch.topk(corr, top_k, dim=-1)
 91 |         # update corr
 92 |         tmp_corr = torch.softmax(weights, dim=-1)
 93 |         # aggregation
 94 |         tmp_values = values.repeat(1, 1, 1, 2)
 95 |         delays_agg = torch.zeros_like(values).float()
 96 |         for i in range(top_k):
 97 |             tmp_delay = init_index + delay[..., i].unsqueeze(-1)
 98 |             pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
 99 |             delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
100 |         return delays_agg
101 | 
102 |     def forward(self, queries, keys, values, attn_mask):
103 |         B, L, H, E = queries.shape
104 |         _, S, _, D = values.shape
105 |         if L > S:
106 |             zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
107 |             values = torch.cat([values, zeros], dim=1)
108 |             keys = torch.cat([keys, zeros], dim=1)
109 |         else:
110 |             values = values[:, :L, :, :]
111 |             keys = keys[:, :L, :, :]
112 | 
113 |         # period-based dependencies
114 |         q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
115 |         k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
116 |         res = q_fft * torch.conj(k_fft)
117 |         corr = torch.fft.irfft(res, dim=-1)
118 | 
119 |         # time delay agg
120 |         if self.training:
121 |             V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
122 |         else:
123 |             V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
124 | 
125 |         if self.output_attention:
126 |             return (V.contiguous(), corr.permute(0, 3, 1, 2))
127 |         else:
128 |             return (V.contiguous(), None)
129 | 
130 | 
131 | class AutoCorrelationLayer(nn.Module):
132 |     def __init__(self, correlation, d_model, n_heads, d_keys=None,
133 |                  d_values=None):
134 |         super(AutoCorrelationLayer, self).__init__()
135 | 
136 |         d_keys = d_keys or (d_model // n_heads)
137 |         d_values = d_values or (d_model // n_heads)
138 | 
139 |         self.inner_correlation = correlation
140 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
141 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
142 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
143 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
144 |         self.n_heads = n_heads
145 | 
146 |     def forward(self, queries, keys, values, attn_mask):
147 |         B, L, _ = queries.shape
148 |         _, S, _ = keys.shape
149 |         H = self.n_heads
150 | 
151 |         queries = self.query_projection(queries).view(B, L, H, -1)
152 |         keys = self.key_projection(keys).view(B, S, H, -1)
153 |         values = self.value_projection(values).view(B, S, H, -1)
154 | 
155 |         out, attn = self.inner_correlation(
156 |             queries,
157 |             keys,
158 |             values,
159 |             attn_mask
160 |         )
161 |         out = out.view(B, L, -1)
162 | 
163 |         return self.out_projection(out), attn
164 | 


--------------------------------------------------------------------------------
/layers/Autoformer_EncDec.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | 
  6 | class my_Layernorm(nn.Module):
  7 |     """
  8 |     Special designed layernorm for the seasonal part
  9 |     """
 10 | 
 11 |     def __init__(self, channels):
 12 |         super(my_Layernorm, self).__init__()
 13 |         self.layernorm = nn.LayerNorm(channels)
 14 | 
 15 |     def forward(self, x):
 16 |         x_hat = self.layernorm(x)
 17 |         bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
 18 |         return x_hat - bias
 19 | 
 20 | 
 21 | class moving_avg(nn.Module):
 22 |     """
 23 |     Moving average block to highlight the trend of time series
 24 |     """
 25 | 
 26 |     def __init__(self, kernel_size, stride):
 27 |         super(moving_avg, self).__init__()
 28 |         self.kernel_size = kernel_size
 29 |         self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
 30 | 
 31 |     def forward(self, x):
 32 |         # padding on the both ends of time series
 33 |         front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
 34 |         end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
 35 |         x = torch.cat([front, x, end], dim=1)
 36 |         x = self.avg(x.permute(0, 2, 1))
 37 |         x = x.permute(0, 2, 1)
 38 |         return x
 39 | 
 40 | 
 41 | class series_decomp(nn.Module):
 42 |     """
 43 |     Series decomposition block
 44 |     """
 45 | 
 46 |     def __init__(self, kernel_size):
 47 |         super(series_decomp, self).__init__()
 48 |         self.moving_avg = moving_avg(kernel_size, stride=1)
 49 | 
 50 |     def forward(self, x):
 51 |         moving_mean = self.moving_avg(x)
 52 |         res = x - moving_mean
 53 |         return res, moving_mean
 54 | 
 55 | 
 56 | class series_decomp_multi(nn.Module):
 57 |     """
 58 |     Multiple Series decomposition block from FEDformer
 59 |     """
 60 | 
 61 |     def __init__(self, kernel_size):
 62 |         super(series_decomp_multi, self).__init__()
 63 |         self.kernel_size = kernel_size
 64 |         self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
 65 | 
 66 |     def forward(self, x):
 67 |         moving_mean = []
 68 |         res = []
 69 |         for func in self.series_decomp:
 70 |             sea, moving_avg = func(x)
 71 |             moving_mean.append(moving_avg)
 72 |             res.append(sea)
 73 | 
 74 |         sea = sum(res) / len(res)
 75 |         moving_mean = sum(moving_mean) / len(moving_mean)
 76 |         return sea, moving_mean
 77 | 
 78 | 
 79 | class EncoderLayer(nn.Module):
 80 |     """
 81 |     Autoformer encoder layer with the progressive decomposition architecture
 82 |     """
 83 | 
 84 |     def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
 85 |         super(EncoderLayer, self).__init__()
 86 |         d_ff = d_ff or 4 * d_model
 87 |         self.attention = attention
 88 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
 89 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
 90 |         self.decomp1 = series_decomp(moving_avg)
 91 |         self.decomp2 = series_decomp(moving_avg)
 92 |         self.dropout = nn.Dropout(dropout)
 93 |         self.activation = F.relu if activation == "relu" else F.gelu
 94 | 
 95 |     def forward(self, x, attn_mask=None):
 96 |         new_x, attn = self.attention(
 97 |             x, x, x,
 98 |             attn_mask=attn_mask
 99 |         )
100 |         x = x + self.dropout(new_x)
101 |         x, _ = self.decomp1(x)
102 |         y = x
103 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
104 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
105 |         res, _ = self.decomp2(x + y)
106 |         return res, attn
107 | 
108 | 
109 | class Encoder(nn.Module):
110 |     """
111 |     Autoformer encoder
112 |     """
113 | 
114 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
115 |         super(Encoder, self).__init__()
116 |         self.attn_layers = nn.ModuleList(attn_layers)
117 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
118 |         self.norm = norm_layer
119 | 
120 |     def forward(self, x, attn_mask=None):
121 |         attns = []
122 |         if self.conv_layers is not None:
123 |             for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
124 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
125 |                 x = conv_layer(x)
126 |                 attns.append(attn)
127 |             x, attn = self.attn_layers[-1](x)
128 |             attns.append(attn)
129 |         else:
130 |             for attn_layer in self.attn_layers:
131 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
132 |                 attns.append(attn)
133 | 
134 |         if self.norm is not None:
135 |             x = self.norm(x)
136 | 
137 |         return x, attns
138 | 
139 | 
140 | class DecoderLayer(nn.Module):
141 |     """
142 |     Autoformer decoder layer with the progressive decomposition architecture
143 |     """
144 | 
145 |     def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
146 |                  moving_avg=25, dropout=0.1, activation="relu"):
147 |         super(DecoderLayer, self).__init__()
148 |         d_ff = d_ff or 4 * d_model
149 |         self.self_attention = self_attention
150 |         self.cross_attention = cross_attention
151 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
152 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
153 |         self.decomp1 = series_decomp(moving_avg)
154 |         self.decomp2 = series_decomp(moving_avg)
155 |         self.decomp3 = series_decomp(moving_avg)
156 |         self.dropout = nn.Dropout(dropout)
157 |         self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
158 |                                     padding_mode='circular', bias=False)
159 |         self.activation = F.relu if activation == "relu" else F.gelu
160 | 
161 |     def forward(self, x, cross, x_mask=None, cross_mask=None):
162 |         x = x + self.dropout(self.self_attention(
163 |             x, x, x,
164 |             attn_mask=x_mask
165 |         )[0])
166 |         x, trend1 = self.decomp1(x)
167 |         x = x + self.dropout(self.cross_attention(
168 |             x, cross, cross,
169 |             attn_mask=cross_mask
170 |         )[0])
171 |         x, trend2 = self.decomp2(x)
172 |         y = x
173 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
174 |         y = self.dropout(self.conv2(y).transpose(-1, 1))
175 |         x, trend3 = self.decomp3(x + y)
176 | 
177 |         residual_trend = trend1 + trend2 + trend3
178 |         residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
179 |         return x, residual_trend
180 | 
181 | 
182 | class Decoder(nn.Module):
183 |     """
184 |     Autoformer encoder
185 |     """
186 | 
187 |     def __init__(self, layers, norm_layer=None, projection=None):
188 |         super(Decoder, self).__init__()
189 |         self.layers = nn.ModuleList(layers)
190 |         self.norm = norm_layer
191 |         self.projection = projection
192 | 
193 |     def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
194 |         for layer in self.layers:
195 |             x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
196 |             trend = trend + residual_trend
197 | 
198 |         if self.norm is not None:
199 |             x = self.norm(x)
200 | 
201 |         if self.projection is not None:
202 |             x = self.projection(x)
203 |         return x, trend
204 | 


--------------------------------------------------------------------------------
/exp/exp_classification.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import lr_scheduler
  2 | 
  3 | from data_provider.data_factory import data_provider
  4 | from exp.exp_basic import Exp_Basic
  5 | from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy
  6 | import torch
  7 | import torch.nn as nn
  8 | from torch import optim
  9 | import os
 10 | import time
 11 | import warnings
 12 | import numpy as np
 13 | import pdb
 14 | 
 15 | warnings.filterwarnings('ignore')
 16 | 
 17 | 
 18 | class Exp_Classification(Exp_Basic):
 19 |     def __init__(self, args):
 20 |         super(Exp_Classification, self).__init__(args)
 21 | 
 22 |     def _build_model(self):
 23 |         # model input depends on data
 24 |         train_data, train_loader = self._get_data(flag='TRAIN')
 25 |         test_data, test_loader = self._get_data(flag='TEST')
 26 |         self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len)
 27 |         self.args.pred_len = 0
 28 |         self.args.enc_in = train_data.feature_df.shape[1]
 29 |         self.args.num_class = len(train_data.class_names)
 30 |         # model init
 31 |         model = self.model_dict[self.args.model].Model(self.args).float()
 32 |         if self.args.use_multi_gpu and self.args.use_gpu:
 33 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 34 |         return model
 35 | 
 36 |     def _get_data(self, flag):
 37 |         data_set, data_loader = data_provider(self.args, flag)
 38 |         return data_set, data_loader
 39 | 
 40 |     def _select_optimizer(self):
 41 |         # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 42 |         model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate)
 43 |         return model_optim
 44 | 
 45 |     def _select_criterion(self):
 46 |         criterion = nn.CrossEntropyLoss()
 47 |         return criterion
 48 | 
 49 |     def vali(self, vali_data, vali_loader, criterion):
 50 |         total_loss = []
 51 |         preds = []
 52 |         trues = []
 53 |         self.model.eval()
 54 |         with torch.no_grad():
 55 |             for i, (batch_x, label, padding_mask) in enumerate(vali_loader):
 56 |                 batch_x = batch_x.float().to(self.device)
 57 |                 padding_mask = padding_mask.float().to(self.device)
 58 |                 label = label.to(self.device)
 59 | 
 60 |                 outputs = self.model(batch_x, padding_mask, None, None)
 61 | 
 62 |                 pred = outputs.detach()
 63 |                 loss = criterion(pred, label.long().squeeze())
 64 |                 total_loss.append(loss.item())
 65 | 
 66 |                 preds.append(outputs.detach())
 67 |                 trues.append(label)
 68 | 
 69 |         total_loss = np.average(total_loss)
 70 | 
 71 |         preds = torch.cat(preds, 0)
 72 |         trues = torch.cat(trues, 0)
 73 |         probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
 74 |         predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
 75 |         trues = trues.flatten().cpu().numpy()
 76 |         accuracy = cal_accuracy(predictions, trues)
 77 | 
 78 |         self.model.train()
 79 |         return total_loss, accuracy
 80 | 
 81 |     def train(self, setting):
 82 |         train_data, train_loader = self._get_data(flag='TRAIN')
 83 |         vali_data, vali_loader = self._get_data(flag='TEST')
 84 |         test_data, test_loader = self._get_data(flag='TEST')
 85 | 
 86 |         path = os.path.join(self.args.checkpoints, setting)
 87 |         if not os.path.exists(path):
 88 |             os.makedirs(path)
 89 | 
 90 |         time_now = time.time()
 91 | 
 92 |         train_steps = len(train_loader)
 93 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
 94 | 
 95 |         model_optim = self._select_optimizer()
 96 |         criterion = self._select_criterion()
 97 | 
 98 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
 99 |                                             steps_per_epoch=train_steps,
100 |                                             pct_start=self.args.pct_start,
101 |                                             epochs=self.args.train_epochs,
102 |                                             max_lr=self.args.learning_rate)
103 | 
104 |         for epoch in range(self.args.train_epochs):
105 |             iter_count = 0
106 |             train_loss = []
107 | 
108 |             self.model.train()
109 |             epoch_time = time.time()
110 | 
111 |             for i, (batch_x, label, padding_mask) in enumerate(train_loader):
112 |                 iter_count += 1
113 |                 model_optim.zero_grad()
114 | 
115 |                 batch_x = batch_x.float().to(self.device)
116 |                 padding_mask = padding_mask.float().to(self.device)
117 |                 label = label.to(self.device)
118 | 
119 |                 outputs = self.model(batch_x, padding_mask, None, None)
120 |                 loss = criterion(outputs, label.long().squeeze(-1))
121 |                 train_loss.append(loss.item())
122 | 
123 |                 if (i + 1) % 100 == 0:
124 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
125 |                     speed = (time.time() - time_now) / iter_count
126 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
127 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
128 |                     iter_count = 0
129 |                     time_now = time.time()
130 | 
131 |                 loss.backward()
132 |                 nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0)
133 |                 model_optim.step()
134 | 
135 |                 # if self.args.lradj == 'TST':
136 |                 #     adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
137 |                 #     scheduler.step()
138 | 
139 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
140 |             train_loss = np.average(train_loss)
141 |             vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion)
142 |             test_loss, test_accuracy = self.vali(test_data, test_loader, criterion)
143 | 
144 |             print(
145 |                 "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}"
146 |                 .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy))
147 |             early_stopping(-test_accuracy, self.model, path)
148 |             if early_stopping.early_stop:
149 |                 print("Early stopping")
150 |                 break
151 |             if (epoch + 1) % 5 == 0:
152 |                 adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
153 | 
154 |         best_model_path = path + '/' + 'checkpoint.pth'
155 |         self.model.load_state_dict(torch.load(best_model_path))
156 | 
157 |         return self.model
158 | 
159 |     def test(self, setting, test=0):
160 |         test_data, test_loader = self._get_data(flag='TEST')
161 |         if test:
162 |             print('loading model')
163 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
164 | 
165 |         preds = []
166 |         trues = []
167 |         folder_path = './test_results/' + setting + '/'
168 |         if not os.path.exists(folder_path):
169 |             os.makedirs(folder_path)
170 | 
171 |         self.model.eval()
172 |         with torch.no_grad():
173 |             for i, (batch_x, label, padding_mask) in enumerate(test_loader):
174 |                 batch_x = batch_x.float().to(self.device)
175 |                 padding_mask = padding_mask.float().to(self.device)
176 |                 label = label.to(self.device)
177 | 
178 |                 outputs = self.model(batch_x, padding_mask, None, None)
179 | 
180 |                 preds.append(outputs.detach())
181 |                 trues.append(label)
182 | 
183 |         preds = torch.cat(preds, 0)
184 |         trues = torch.cat(trues, 0)
185 |         print('test shape:', preds.shape, trues.shape)
186 | 
187 |         probs = torch.nn.functional.softmax(preds)  # (total_samples, num_classes) est. prob. for each class and sample
188 |         predictions = torch.argmax(probs, dim=1).cpu().numpy()  # (total_samples,) int class index for each sample
189 |         trues = trues.flatten().cpu().numpy()
190 |         accuracy = cal_accuracy(predictions, trues)
191 | 
192 |         # result save
193 |         folder_path = './results/' + setting + '/'
194 |         if not os.path.exists(folder_path):
195 |             os.makedirs(folder_path)
196 | 
197 |         print('accuracy:{}'.format(accuracy))
198 |         file_name='result_classification.txt'
199 |         f = open(os.path.join(folder_path,file_name), 'a')
200 |         f.write(setting + "  \n")
201 |         f.write('accuracy:{}'.format(accuracy))
202 |         f.write('\n')
203 |         f.write('\n')
204 |         f.close()
205 |         return


--------------------------------------------------------------------------------
/exp/exp_anomaly_detection.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import lr_scheduler
  2 | 
  3 | from data_provider.data_factory import data_provider
  4 | from exp.exp_basic import Exp_Basic
  5 | from utils.tools import EarlyStopping, adjust_learning_rate, adjustment
  6 | from sklearn.metrics import precision_recall_fscore_support
  7 | from sklearn.metrics import accuracy_score
  8 | import torch.multiprocessing
  9 | 
 10 | torch.multiprocessing.set_sharing_strategy('file_system')
 11 | import torch
 12 | import torch.nn as nn
 13 | from torch import optim
 14 | import os
 15 | import time
 16 | import warnings
 17 | import numpy as np
 18 | 
 19 | warnings.filterwarnings('ignore')
 20 | 
 21 | 
 22 | class Exp_Anomaly_Detection(Exp_Basic):
 23 |     def __init__(self, args):
 24 |         super(Exp_Anomaly_Detection, self).__init__(args)
 25 | 
 26 |     def _build_model(self):
 27 |         model = self.model_dict[self.args.model].Model(self.args).float()
 28 | 
 29 |         if self.args.use_multi_gpu and self.args.use_gpu:
 30 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 31 |         return model
 32 | 
 33 |     def _get_data(self, flag):
 34 |         data_set, data_loader = data_provider(self.args, flag)
 35 |         return data_set, data_loader
 36 | 
 37 |     def _select_optimizer(self):
 38 |         model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 39 |         return model_optim
 40 | 
 41 |     def _select_criterion(self):
 42 |         criterion = nn.MSELoss()
 43 |         return criterion
 44 | 
 45 |     def vali(self, vali_data, vali_loader, criterion):
 46 |         total_loss = []
 47 |         self.model.eval()
 48 |         with torch.no_grad():
 49 |             for i, (batch_x, _) in enumerate(vali_loader):
 50 |                 batch_x = batch_x.float().to(self.device)
 51 | 
 52 |                 outputs = self.model(batch_x, None, None, None)
 53 | 
 54 |                 f_dim = -1 if self.args.features == 'MS' else 0
 55 |                 outputs = outputs[:, :, f_dim:]
 56 |                 pred = outputs.detach()
 57 |                 true = batch_x.detach()
 58 | 
 59 |                 loss = criterion(pred, true)
 60 |                 total_loss.append(loss.item())
 61 |         total_loss = np.average(total_loss)
 62 |         self.model.train()
 63 |         return total_loss
 64 | 
 65 |     def train(self, setting):
 66 |         train_data, train_loader = self._get_data(flag='train')
 67 |         vali_data, vali_loader = self._get_data(flag='val')
 68 |         test_data, test_loader = self._get_data(flag='test')
 69 | 
 70 |         path = os.path.join(self.args.checkpoints, setting)
 71 |         if not os.path.exists(path):
 72 |             os.makedirs(path)
 73 | 
 74 |         time_now = time.time()
 75 | 
 76 |         train_steps = len(train_loader)
 77 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
 78 | 
 79 |         model_optim = self._select_optimizer()
 80 |         criterion = self._select_criterion()
 81 | 
 82 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
 83 |                                             steps_per_epoch=train_steps,
 84 |                                             pct_start=self.args.pct_start,
 85 |                                             epochs=self.args.train_epochs,
 86 |                                             max_lr=self.args.learning_rate)
 87 | 
 88 |         for epoch in range(self.args.train_epochs):
 89 |             iter_count = 0
 90 |             train_loss = []
 91 | 
 92 |             self.model.train()
 93 |             epoch_time = time.time()
 94 |             for i, (batch_x, batch_y) in enumerate(train_loader):
 95 |                 iter_count += 1
 96 |                 model_optim.zero_grad()
 97 | 
 98 |                 batch_x = batch_x.float().to(self.device)
 99 | 
100 |                 outputs = self.model(batch_x, None, None, None)
101 | 
102 |                 f_dim = -1 if self.args.features == 'MS' else 0
103 |                 outputs = outputs[:, :, f_dim:]
104 |                 loss = criterion(outputs, batch_x)
105 |                 train_loss.append(loss.item())
106 | 
107 |                 if (i + 1) % 100 == 0:
108 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
109 |                     speed = (time.time() - time_now) / iter_count
110 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
111 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
112 |                     iter_count = 0
113 |                     time_now = time.time()
114 | 
115 |                 loss.backward()
116 |                 model_optim.step()
117 | 
118 |                 if self.args.lradj == 'TST':
119 |                     adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
120 |                     scheduler.step()
121 | 
122 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
123 |             train_loss = np.average(train_loss)
124 |             vali_loss = self.vali(vali_data, vali_loader, criterion)
125 |             test_loss = self.vali(test_data, test_loader, criterion)
126 | 
127 |             print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
128 |                 epoch + 1, train_steps, train_loss, vali_loss, test_loss))
129 |             early_stopping(test_loss, self.model, path)
130 |             if early_stopping.early_stop:
131 |                 print("Early stopping")
132 |                 break
133 |             if self.args.lradj != 'TST':
134 |                 adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
135 |             else:
136 |                 print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
137 | 
138 |         best_model_path = path + '/' + 'checkpoint.pth'
139 |         self.model.load_state_dict(torch.load(best_model_path))
140 | 
141 |         return self.model
142 | 
143 |     def test(self, setting, test=0):
144 |         test_data, test_loader = self._get_data(flag='test')
145 |         train_data, train_loader = self._get_data(flag='train')
146 |         if test:
147 |             print('loading model')
148 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
149 | 
150 |         attens_energy = []
151 |         folder_path = './test_results/' + setting + '/'
152 |         if not os.path.exists(folder_path):
153 |             os.makedirs(folder_path)
154 | 
155 |         self.model.eval()
156 |         self.anomaly_criterion = nn.MSELoss(reduce=False)
157 | 
158 |         # (1) stastic on the train set
159 |         with torch.no_grad():
160 |             for i, (batch_x, batch_y) in enumerate(train_loader):
161 |                 batch_x = batch_x.float().to(self.device)
162 |                 # reconstruction
163 |                 outputs = self.model(batch_x, None, None, None)
164 |                 # criterion
165 |                 score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
166 |                 score = score.detach().cpu().numpy()
167 |                 attens_energy.append(score)
168 | 
169 |         attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
170 |         train_energy = np.array(attens_energy)
171 | 
172 |         # (2) find the threshold
173 |         attens_energy = []
174 |         test_labels = []
175 |         for i, (batch_x, batch_y) in enumerate(test_loader):
176 |             batch_x = batch_x.float().to(self.device)
177 |             # reconstruction
178 |             outputs = self.model(batch_x, None, None, None)
179 |             # criterion
180 |             score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
181 |             score = score.detach().cpu().numpy()
182 |             attens_energy.append(score)
183 |             test_labels.append(batch_y)
184 | 
185 |         attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
186 |         test_energy = np.array(attens_energy)
187 |         combined_energy = np.concatenate([train_energy, test_energy], axis=0)
188 |         threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio)
189 |         print("Threshold :", threshold)
190 | 
191 |         # (3) evaluation on the test set
192 |         pred = (test_energy > threshold).astype(int)
193 |         test_labels = np.concatenate(test_labels, axis=0).reshape(-1)
194 |         test_labels = np.array(test_labels)
195 |         gt = test_labels.astype(int)
196 | 
197 |         print("pred:   ", pred.shape)
198 |         print("gt:     ", gt.shape)
199 | 
200 |         # (4) detection adjustment
201 |         gt, pred = adjustment(gt, pred)
202 | 
203 |         pred = np.array(pred)
204 |         gt = np.array(gt)
205 |         print("pred: ", pred.shape)
206 |         print("gt:   ", gt.shape)
207 | 
208 |         accuracy = accuracy_score(gt, pred)
209 |         precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary')
210 |         print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
211 |             accuracy, precision,
212 |             recall, f_score))
213 | 
214 |         f = open("result_anomaly_detection.txt", 'a')
215 |         f.write(setting + "  \n")
216 |         f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
217 |             accuracy, precision,
218 |             recall, f_score))
219 |         f.write('\n')
220 |         f.write('\n')
221 |         f.close()
222 |         return
223 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | 
  4 | from exp.exp_anomaly_detection import Exp_Anomaly_Detection
  5 | from exp.exp_classification import Exp_Classification
  6 | from exp.exp_imputation import Exp_Imputation
  7 | from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
  8 | from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
  9 | import random
 10 | import numpy as np
 11 | 
 12 | fix_seed = 2021
 13 | random.seed(fix_seed)
 14 | torch.manual_seed(fix_seed)
 15 | np.random.seed(fix_seed)
 16 | 
 17 | parser = argparse.ArgumentParser(description='LLMMixer')
 18 | 
 19 | # basic config
 20 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
 21 |                     help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
 22 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
 23 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
 24 | parser.add_argument('--model', type=str, required=True, default='LLMMixer',
 25 |                     help='model name, options: [LLMMixer]')
 26 | 
 27 | # data loader
 28 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
 29 | parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
 30 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
 31 | parser.add_argument('--features', type=str, default='M',
 32 |                     help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
 33 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
 34 | parser.add_argument('--freq', type=str, default='h',
 35 |                     help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
 36 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
 37 | 
 38 | # forecasting task
 39 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
 40 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
 41 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
 42 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
 43 | parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
 44 | 
 45 | # model define
 46 | parser.add_argument('--llm_path', type=str, default='FacebookAI/roberta-base', help='path of pretrained LLM')
 47 | parser.add_argument('--tokenizer_path', type=str, default='FacebookAI/roberta-base', help='path of tokenizer')
 48 | parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
 49 | parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
 50 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
 51 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
 52 | parser.add_argument('--c_out', type=int, default=7, help='output size')
 53 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
 54 | parser.add_argument('--n_heads', type=int, default=4, help='num of heads')
 55 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
 56 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
 57 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
 58 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
 59 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
 60 | parser.add_argument('--distil', action='store_false',
 61 |                     help='whether to use distilling in encoder, using this argument means not using distilling',
 62 |                     default=True)
 63 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
 64 | parser.add_argument('--embed', type=str, default='timeF',
 65 |                     help='time features encoding, options:[timeF, fixed, learned]')
 66 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
 67 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
 68 | parser.add_argument('--channel_independence', type=int, default=1,
 69 |                     help='0: channel dependence 1: channel independence for FreTS model')
 70 | parser.add_argument('--decomp_method', type=str, default='moving_avg',
 71 |                     help='method of series decompsition, only support moving_avg or dft_decomp')
 72 | parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0')
 73 | parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers')
 74 | parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size')
 75 | parser.add_argument('--down_sampling_method', type=str, default='avg',
 76 |                     help='down sampling method, only support avg, max, conv')
 77 | parser.add_argument('--use_future_temporal_feature', type=int, default=0,
 78 |                     help='whether to use future_temporal_feature; True 1 False 0')
 79 | 
 80 | # imputation task
 81 | parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
 82 | 
 83 | # anomaly detection task
 84 | parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)')
 85 | 
 86 | # optimization
 87 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
 88 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
 89 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
 90 | parser.add_argument('--batch_size', type=int, default=16, help='batch size of train input data')
 91 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience')
 92 | parser.add_argument('--learning_rate', type=float, default=0.001, help='optimizer learning rate')
 93 | parser.add_argument('--des', type=str, default='test', help='exp description')
 94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
 95 | parser.add_argument('--lradj', type=str, default='TST', help='adjust learning rate')
 96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
 97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
 98 | parser.add_argument('--comment', type=str, default='none', help='com')
 99 | 
100 | # GPU
101 | parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
102 | parser.add_argument('--gpu', type=int, default=0, help='gpu')
103 | parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
104 | parser.add_argument('--devices', type=str, default='0,1', help='device ids of multile gpus')
105 | 
106 | # de-stationary projector params
107 | parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
108 |                     help='hidden layer dimensions of projector (List)')
109 | parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
110 | 
111 | args = parser.parse_args()
112 | args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
113 | 
114 | if args.use_gpu and args.use_multi_gpu:
115 |     args.devices = args.devices.replace(' ', '')
116 |     device_ids = args.devices.split(',')
117 |     args.device_ids = [int(id_) for id_ in device_ids]
118 |     args.gpu = args.device_ids[0]
119 | 
120 | print('Args in experiment:')
121 | print(args)
122 | 
123 | if args.task_name == 'long_term_forecast':
124 |     Exp = Exp_Long_Term_Forecast
125 | elif args.task_name == 'short_term_forecast':
126 |     Exp = Exp_Short_Term_Forecast
127 | else:
128 |     Exp = Exp_Long_Term_Forecast
129 | 
130 | if args.is_training:
131 |     for ii in range(args.itr):
132 |         # setting record of experiments
133 |         setting = '{}_{}_{}_{}_{}_sl{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
134 |             args.task_name,
135 |             args.model_id,
136 |             args.comment,
137 |             args.model,
138 |             args.data,
139 |             args.seq_len,
140 |             args.pred_len,
141 |             args.d_model,
142 |             args.n_heads,
143 |             args.e_layers,
144 |             args.d_layers,
145 |             args.d_ff,
146 |             args.factor,
147 |             args.embed,
148 |             args.distil,
149 |             args.des, ii)
150 | 
151 |         exp = Exp(args)  # set experiments
152 |         print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
153 |         exp.train(setting)
154 | 
155 |         print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
156 |         exp.test(setting)
157 |         torch.cuda.empty_cache()
158 | else:
159 |     ii = 0
160 |     setting = '{}_{}_{}_{}_{}_sl{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
161 |         args.task_name,
162 |         args.model_id,
163 |         args.comment,
164 |         args.model,
165 |         args.data,
166 |         args.seq_len,
167 |         args.pred_len,
168 |         args.d_model,
169 |         args.n_heads,
170 |         args.e_layers,
171 |         args.d_layers,
172 |         args.d_ff,
173 |         args.factor,
174 |         args.embed,
175 |         args.distil,
176 |         args.des, ii)
177 | 
178 |     exp = Exp(args)  # set experiments
179 |     print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
180 |     exp.test(setting, test=1)
181 |     torch.cuda.empty_cache()
182 | 


--------------------------------------------------------------------------------
/layers/Embed.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from torch.nn.utils import weight_norm
  5 | import math
  6 | 
  7 | 
  8 | class PositionalEmbedding(nn.Module):
  9 |     def __init__(self, d_model, max_len=5000):
 10 |         super(PositionalEmbedding, self).__init__()
 11 |         # Compute the positional encodings once in log space.
 12 |         pe = torch.zeros(max_len, d_model).float()
 13 |         pe.require_grad = False
 14 | 
 15 |         position = torch.arange(0, max_len).float().unsqueeze(1)
 16 |         div_term = (torch.arange(0, d_model, 2).float()
 17 |                     * -(math.log(10000.0) / d_model)).exp()
 18 | 
 19 |         pe[:, 0::2] = torch.sin(position * div_term)
 20 |         pe[:, 1::2] = torch.cos(position * div_term)
 21 | 
 22 |         pe = pe.unsqueeze(0)
 23 |         self.register_buffer('pe', pe)
 24 | 
 25 |     def forward(self, x):
 26 |         return self.pe[:, :x.size(1)]
 27 | 
 28 | 
 29 | class TokenEmbedding(nn.Module):
 30 |     def __init__(self, c_in, d_model):
 31 |         super(TokenEmbedding, self).__init__()
 32 |         padding = 1 if torch.__version__ >= '1.5.0' else 2
 33 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
 34 |                                    kernel_size=3, padding=padding, padding_mode='circular', bias=False)
 35 |         for m in self.modules():
 36 |             if isinstance(m, nn.Conv1d):
 37 |                 nn.init.kaiming_normal_(
 38 |                     m.weight, mode='fan_in', nonlinearity='leaky_relu')
 39 | 
 40 |     def forward(self, x):
 41 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
 42 |         return x
 43 | 
 44 | 
 45 | class FixedEmbedding(nn.Module):
 46 |     def __init__(self, c_in, d_model):
 47 |         super(FixedEmbedding, self).__init__()
 48 | 
 49 |         w = torch.zeros(c_in, d_model).float()
 50 |         w.require_grad = False
 51 | 
 52 |         position = torch.arange(0, c_in).float().unsqueeze(1)
 53 |         div_term = (torch.arange(0, d_model, 2).float()
 54 |                     * -(math.log(10000.0) / d_model)).exp()
 55 | 
 56 |         w[:, 0::2] = torch.sin(position * div_term)
 57 |         w[:, 1::2] = torch.cos(position * div_term)
 58 | 
 59 |         self.emb = nn.Embedding(c_in, d_model)
 60 |         self.emb.weight = nn.Parameter(w, requires_grad=False)
 61 | 
 62 |     def forward(self, x):
 63 |         return self.emb(x).detach()
 64 | 
 65 | 
 66 | class TemporalEmbedding(nn.Module):
 67 |     def __init__(self, d_model, embed_type='fixed', freq='h'):
 68 |         super(TemporalEmbedding, self).__init__()
 69 | 
 70 |         minute_size = 4
 71 |         hour_size = 24
 72 |         weekday_size = 7
 73 |         day_size = 32
 74 |         month_size = 13
 75 | 
 76 |         Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
 77 |         if freq == 't':
 78 |             self.minute_embed = Embed(minute_size, d_model)
 79 |         self.hour_embed = Embed(hour_size, d_model)
 80 |         self.weekday_embed = Embed(weekday_size, d_model)
 81 |         self.day_embed = Embed(day_size, d_model)
 82 |         self.month_embed = Embed(month_size, d_model)
 83 | 
 84 |     def forward(self, x):
 85 |         x = x.long()
 86 |         minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
 87 |             self, 'minute_embed') else 0.
 88 |         hour_x = self.hour_embed(x[:, :, 3])
 89 |         weekday_x = self.weekday_embed(x[:, :, 2])
 90 |         day_x = self.day_embed(x[:, :, 1])
 91 |         month_x = self.month_embed(x[:, :, 0])
 92 | 
 93 |         return hour_x + weekday_x + day_x + month_x + minute_x
 94 | 
 95 | 
 96 | class TimeFeatureEmbedding(nn.Module):
 97 |     def __init__(self, d_model, embed_type='timeF', freq='h'):
 98 |         super(TimeFeatureEmbedding, self).__init__()
 99 | 
100 |         freq_map = {'h': 4, 't': 5, 's': 6,
101 |                     'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
102 |         d_inp = freq_map[freq]
103 |         self.embed = nn.Linear(d_inp, d_model, bias=False)
104 | 
105 |     def forward(self, x):
106 |         return self.embed(x)
107 | 
108 | 
109 | class DataEmbedding(nn.Module):
110 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
111 |         super(DataEmbedding, self).__init__()
112 |         self.c_in = c_in
113 |         self.d_model = d_model
114 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
115 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
116 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
117 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
118 |             d_model=d_model, embed_type=embed_type, freq=freq)
119 |         self.dropout = nn.Dropout(p=dropout)
120 | 
121 |     def forward(self, x, x_mark):
122 |         _, _, N = x.size()
123 |         if N == self.c_in:
124 |             if x_mark is None:
125 |                 x = self.value_embedding(x) + self.position_embedding(x)
126 |             else:
127 |                 x = self.value_embedding(
128 |                     x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
129 |         elif N == self.d_model:
130 |             if x_mark is None:
131 |                 x = x + self.position_embedding(x)
132 |             else:
133 |                 x = x + self.temporal_embedding(x_mark) + self.position_embedding(x)
134 | 
135 |         return self.dropout(x)
136 | 
137 | 
138 | class DataEmbedding_ms(nn.Module):
139 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
140 |         super(DataEmbedding_ms, self).__init__()
141 | 
142 |         self.value_embedding = TokenEmbedding(c_in=1, d_model=d_model)
143 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
144 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
145 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
146 |             d_model=d_model, embed_type=embed_type, freq=freq)
147 |         self.dropout = nn.Dropout(p=dropout)
148 | 
149 |     def forward(self, x, x_mark):
150 |         B, T, N = x.shape
151 |         x1 = self.value_embedding(x.reshape(0, 2, 1).reshape(B * N, T).unsqueeze(-1)).reshape(B, N, T, -1).permute(0, 2,
152 |                                                                                                                    1, 3)
153 |         if x_mark is None:
154 |             x = x1
155 |         else:
156 |             x = x1 + self.temporal_embedding(x_mark)
157 |         return self.dropout(x)
158 | 
159 | 
160 | class DataEmbedding_wo_pos(nn.Module):
161 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
162 |         super(DataEmbedding_wo_pos, self).__init__()
163 | 
164 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
165 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
166 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
167 |                                                     freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
168 |             d_model=d_model, embed_type=embed_type, freq=freq)
169 |         self.dropout = nn.Dropout(p=dropout)
170 | 
171 |     def forward(self, x, x_mark):
172 |         if x is None and x_mark is not None:
173 |             return self.temporal_embedding(x_mark)
174 |         if x_mark is None:
175 |             x = self.value_embedding(x)
176 |         else:
177 |             x = self.value_embedding(x) + self.temporal_embedding(x_mark)
178 |         return self.dropout(x)
179 | 
180 | 
181 | class PatchEmbedding_crossformer(nn.Module):
182 |     def __init__(self, d_model, patch_len, stride, padding, dropout):
183 |         super(PatchEmbedding_crossformer, self).__init__()
184 |         # Patching
185 |         self.patch_len = patch_len
186 |         self.stride = stride
187 |         self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
188 | 
189 |         # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
190 |         self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
191 | 
192 |         # Positional embedding
193 |         self.position_embedding = PositionalEmbedding(d_model)
194 | 
195 |         # Residual dropout
196 |         self.dropout = nn.Dropout(dropout)
197 | 
198 |     def forward(self, x):
199 |         # do patching
200 |         n_vars = x.shape[1]
201 |         x = self.padding_patch_layer(x)
202 |         x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
203 |         x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
204 |         # Input encoding
205 |         x = self.value_embedding(x) + self.position_embedding(x)
206 |         return self.dropout(x), n_vars
207 | 
208 | 
209 | class PatchEmbedding(nn.Module):
210 |     def __init__(self, d_model, patch_len, stride, dropout):
211 |         super(PatchEmbedding, self).__init__()
212 |         # Patching
213 |         self.patch_len = patch_len
214 |         self.stride = stride
215 |         self.padding_patch_layer = nn.ReplicationPad1d((0, stride))
216 | 
217 |         # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
218 |         self.value_embedding = TokenEmbedding(patch_len, d_model)
219 | 
220 |         # Positional embedding
221 |         self.position_embedding = PositionalEmbedding(d_model)
222 | 
223 |         # Residual dropout
224 |         self.dropout = nn.Dropout(dropout)
225 | 
226 |     def forward(self, x):
227 |         # do patching
228 |         n_vars = x.shape[1]
229 |         x = self.padding_patch_layer(x)
230 |         x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
231 |         x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
232 |         # Input encoding
233 |         x = self.value_embedding(x) + self.position_embedding(x)
234 |         return self.dropout(x), n_vars
235 | 


--------------------------------------------------------------------------------
/exp/exp_imputation.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from torch.optim import lr_scheduler
  3 | 
  4 | from data_provider.data_factory import data_provider
  5 | from exp.exp_basic import Exp_Basic
  6 | from utils.tools import EarlyStopping, adjust_learning_rate, visual
  7 | from utils.metrics import metric
  8 | import torch
  9 | import torch.nn as nn
 10 | from torch import optim
 11 | import os
 12 | import time
 13 | import warnings
 14 | import numpy as np
 15 | 
 16 | warnings.filterwarnings('ignore')
 17 | 
 18 | 
 19 | class Exp_Imputation(Exp_Basic):
 20 |     def __init__(self, args):
 21 |         super(Exp_Imputation, self).__init__(args)
 22 | 
 23 |     def _build_model(self):
 24 |         model = self.model_dict[self.args.model].Model(self.args).float()
 25 | 
 26 |         if self.args.use_multi_gpu and self.args.use_gpu:
 27 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 28 |         return model
 29 | 
 30 |     def _get_data(self, flag):
 31 |         data_set, data_loader = data_provider(self.args, flag)
 32 |         return data_set, data_loader
 33 | 
 34 |     def _select_optimizer(self):
 35 |         model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 36 |         return model_optim
 37 | 
 38 |     def _select_criterion(self):
 39 |         criterion = nn.MSELoss()
 40 |         return criterion
 41 | 
 42 |     def vali(self, vali_data, vali_loader, criterion):
 43 |         total_loss = []
 44 |         self.model.eval()
 45 |         with torch.no_grad():
 46 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
 47 |                 batch_x = batch_x.float().to(self.device)
 48 |                 batch_x_mark = batch_x_mark.float().to(self.device)
 49 | 
 50 |                 # random mask
 51 |                 B, T, N = batch_x.shape
 52 |                 """
 53 |                 B = batch size
 54 |                 T = seq len
 55 |                 N = number of features
 56 |                 """
 57 |                 mask = torch.rand((B, T, N)).to(self.device)
 58 |                 mask[mask <= self.args.mask_rate] = 0  # masked
 59 |                 mask[mask > self.args.mask_rate] = 1  # remained
 60 |                 inp = batch_x.masked_fill(mask == 0, 0)
 61 | 
 62 |                 outputs = self.model(inp, batch_x_mark, None, None, mask)
 63 | 
 64 |                 f_dim = -1 if self.args.features == 'MS' else 0
 65 |                 outputs = outputs[:, :, f_dim:]
 66 | 
 67 |                 # add support for MS
 68 |                 batch_x = batch_x[:, :, f_dim:]
 69 |                 mask = mask[:, :, f_dim:]
 70 | 
 71 |                 pred = outputs.detach()
 72 |                 true = batch_x.detach()
 73 |                 mask = mask.detach()
 74 | 
 75 |                 loss = criterion(pred[mask == 0], true[mask == 0])
 76 |                 total_loss.append(loss.item())
 77 |         total_loss = np.average(total_loss)
 78 |         self.model.train()
 79 |         return total_loss
 80 | 
 81 |     def train(self, setting):
 82 |         train_data, train_loader = self._get_data(flag='train')
 83 |         vali_data, vali_loader = self._get_data(flag='val')
 84 |         test_data, test_loader = self._get_data(flag='test')
 85 | 
 86 |         path = os.path.join(self.args.checkpoints, setting)
 87 |         if not os.path.exists(path):
 88 |             os.makedirs(path)
 89 | 
 90 |         time_now = time.time()
 91 | 
 92 |         train_steps = len(train_loader)
 93 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
 94 | 
 95 |         model_optim = self._select_optimizer()
 96 |         criterion = self._select_criterion()
 97 | 
 98 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
 99 |                                             steps_per_epoch=train_steps,
100 |                                             pct_start=self.args.pct_start,
101 |                                             epochs=self.args.train_epochs,
102 |                                             max_lr=self.args.learning_rate)
103 | 
104 |         for epoch in range(self.args.train_epochs):
105 |             iter_count = 0
106 |             train_loss = []
107 | 
108 |             self.model.train()
109 |             epoch_time = time.time()
110 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
111 |                 iter_count += 1
112 |                 model_optim.zero_grad()
113 | 
114 |                 batch_x = batch_x.float().to(self.device)
115 |                 batch_x_mark = batch_x_mark.float().to(self.device)
116 | 
117 |                 # random mask
118 |                 B, T, N = batch_x.shape
119 |                 mask = torch.rand((B, T, N)).to(self.device)
120 |                 mask[mask <= self.args.mask_rate] = 0  # masked
121 |                 mask[mask > self.args.mask_rate] = 1  # remained
122 |                 inp = batch_x.masked_fill(mask == 0, 0)
123 | 
124 |                 outputs = self.model(inp, batch_x_mark, None, None, mask)
125 | 
126 |                 f_dim = -1 if self.args.features == 'MS' else 0
127 |                 outputs = outputs[:, :, f_dim:]
128 | 
129 |                 # add support for MS
130 |                 batch_x = batch_x[:, :, f_dim:]
131 |                 mask = mask[:, :, f_dim:]
132 | 
133 |                 loss = criterion(outputs[mask == 0], batch_x[mask == 0])
134 |                 train_loss.append(loss.item())
135 | 
136 |                 if (i + 1) % 100 == 0:
137 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
138 |                     speed = (time.time() - time_now) / iter_count
139 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
140 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
141 |                     iter_count = 0
142 |                     time_now = time.time()
143 | 
144 |                 loss.backward()
145 |                 model_optim.step()
146 | 
147 |                 if self.args.lradj == 'TST':
148 |                     adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
149 |                     scheduler.step()
150 | 
151 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
152 |             train_loss = np.average(train_loss)
153 |             vali_loss = self.vali(vali_data, vali_loader, criterion)
154 |             test_loss = self.vali(test_data, test_loader, criterion)
155 | 
156 |             print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
157 |                 epoch + 1, train_steps, train_loss, vali_loss, test_loss))
158 |             early_stopping(test_loss, self.model, path)
159 |             if early_stopping.early_stop:
160 |                 print("Early stopping")
161 |                 break
162 | 
163 |             if self.args.lradj != 'TST':
164 |                 adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
165 |             else:
166 |                 print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
167 | 
168 |         best_model_path = path + '/' + 'checkpoint.pth'
169 |         self.model.load_state_dict(torch.load(best_model_path))
170 | 
171 |         return self.model
172 | 
173 |     def test(self, setting, test=0):
174 |         test_data, test_loader = self._get_data(flag='test')
175 |         if test:
176 |             print('loading model')
177 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
178 | 
179 |         preds = []
180 |         trues = []
181 |         masks = []
182 |         folder_path = './test_results/' + setting + '/'
183 |         if not os.path.exists(folder_path):
184 |             os.makedirs(folder_path)
185 | 
186 |         self.model.eval()
187 |         with torch.no_grad():
188 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
189 |                 batch_x = batch_x.float().to(self.device)
190 |                 batch_x_mark = batch_x_mark.float().to(self.device)
191 | 
192 |                 # random mask
193 |                 B, T, N = batch_x.shape
194 |                 mask = torch.rand((B, T, N)).to(self.device)
195 |                 mask[mask <= self.args.mask_rate] = 0  # masked
196 |                 mask[mask > self.args.mask_rate] = 1  # remained
197 |                 inp = batch_x.masked_fill(mask == 0, 0)
198 | 
199 |                 # imputation
200 |                 outputs = self.model(inp, batch_x_mark, None, None, mask)
201 | 
202 |                 # eval
203 |                 f_dim = -1 if self.args.features == 'MS' else 0
204 |                 outputs = outputs[:, :, f_dim:]
205 | 
206 |                 # add support for MS
207 |                 batch_x = batch_x[:, :, f_dim:]
208 |                 mask = mask[:, :, f_dim:]
209 | 
210 |                 outputs = outputs.detach().cpu().numpy()
211 |                 pred = outputs
212 |                 true = batch_x.detach().cpu().numpy()
213 |                 preds.append(pred)
214 |                 trues.append(true)
215 |                 masks.append(mask.detach().cpu())
216 | 
217 |                 if i % 20 == 0:
218 |                     filled = true[0, :, -1].copy()
219 |                     filled = filled * mask[0, :, -1].detach().cpu().numpy() + \
220 |                              pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy())
221 |                     visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf'))
222 | 
223 |         preds = np.concatenate(preds, 0)
224 |         trues = np.concatenate(trues, 0)
225 |         masks = np.concatenate(masks, 0)
226 |         print('test shape:', preds.shape, trues.shape)
227 | 
228 |         # result save
229 |         folder_path = './results/' + setting + '/'
230 |         if not os.path.exists(folder_path):
231 |             os.makedirs(folder_path)
232 | 
233 |         mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0])
234 |         print('mse:{}, mae:{}'.format(mse, mae))
235 |         f = open("result_imputation.txt", 'a')
236 |         f.write(setting + "  \n")
237 |         f.write('mse:{}, mae:{}'.format(mse, mae))
238 |         f.write('\n')
239 |         f.write('\n')
240 |         f.close()
241 | 
242 |         np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
243 |         np.save(folder_path + 'pred.npy', preds)
244 |         np.save(folder_path + 'true.npy', trues)
245 |         return
246 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/data_provider/data_loader.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import pandas as pd
  4 | import glob
  5 | import re
  6 | import torch
  7 | from sktime.datasets import load_from_tsfile_to_dataframe
  8 | from torch.utils.data import Dataset
  9 | from sklearn.preprocessing import StandardScaler
 10 | from utils.timefeatures import time_features
 11 | from data_provider.m4 import M4Dataset, M4Meta
 12 | from data_provider.uea import Normalizer, interpolate_missing
 13 | import warnings
 14 | 
 15 | warnings.filterwarnings('ignore')
 16 | 
 17 | 
 18 | class Dataset_ETT_hour(Dataset):
 19 |     def __init__(self, root_path, flag='train', size=None,
 20 |                  features='S', data_path='ETTh1.csv',
 21 |                  target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
 22 |         # size [seq_len, label_len, pred_len]
 23 |         # info
 24 |         if size == None:
 25 |             self.seq_len = 24 * 4 * 4
 26 |             self.label_len = 24 * 4
 27 |             self.pred_len = 24 * 4
 28 |         else:
 29 |             self.seq_len = size[0]
 30 |             self.label_len = size[1]
 31 |             self.pred_len = size[2]
 32 |         # init
 33 |         assert flag in ['train', 'test', 'val']
 34 |         type_map = {'train': 0, 'val': 1, 'test': 2}
 35 |         self.set_type = type_map[flag]
 36 | 
 37 |         self.features = features
 38 |         self.target = target
 39 |         self.scale = scale
 40 |         self.timeenc = timeenc
 41 |         self.freq = freq
 42 | 
 43 |         self.root_path = root_path
 44 |         self.data_path = data_path
 45 |         self.__read_data__()
 46 | 
 47 |     def __read_data__(self):
 48 |         self.scaler = StandardScaler()
 49 |         df_raw = pd.read_csv(os.path.join(self.root_path,
 50 |                                           self.data_path))
 51 | 
 52 |         border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
 53 |         border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
 54 |         border1 = border1s[self.set_type]
 55 |         border2 = border2s[self.set_type]
 56 | 
 57 |         if self.features == 'M' or self.features == 'MS':
 58 |             cols_data = df_raw.columns[1:]
 59 |             df_data = df_raw[cols_data]
 60 |         elif self.features == 'S':
 61 |             df_data = df_raw[[self.target]]
 62 | 
 63 |         if self.scale:
 64 |             train_data = df_data[border1s[0]:border2s[0]]
 65 |             self.scaler.fit(train_data.values)
 66 |             data = self.scaler.transform(df_data.values)
 67 |         else:
 68 |             data = df_data.values
 69 | 
 70 |         df_stamp = df_raw[['date']][border1:border2]
 71 |         df_stamp['date'] = pd.to_datetime(df_stamp.date)
 72 |         if self.timeenc == 0:
 73 |             df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
 74 |             df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
 75 |             df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
 76 |             df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
 77 |             data_stamp = df_stamp.drop(['date'], 1).values
 78 |         elif self.timeenc == 1:
 79 |             data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
 80 |             data_stamp = data_stamp.transpose(1, 0)
 81 | 
 82 |         self.data_x = data[border1:border2]
 83 |         self.data_y = data[border1:border2]
 84 |         self.data_stamp = data_stamp
 85 | 
 86 |     def __getitem__(self, index):
 87 |         s_begin = index
 88 |         s_end = s_begin + self.seq_len
 89 |         r_begin = s_end - self.label_len
 90 |         r_end = r_begin + self.label_len + self.pred_len
 91 | 
 92 |         seq_x = self.data_x[s_begin:s_end]
 93 |         seq_y = self.data_y[r_begin:r_end]
 94 |         seq_x_mark = self.data_stamp[s_begin:s_end]
 95 |         seq_y_mark = self.data_stamp[r_begin:r_end]
 96 | 
 97 |         return seq_x, seq_y, seq_x_mark, seq_y_mark
 98 | 
 99 |     def __len__(self):
100 |         return len(self.data_x) - self.seq_len - self.pred_len + 1
101 | 
102 |     def inverse_transform(self, data):
103 |         return self.scaler.inverse_transform(data)
104 | 
105 | 
106 | class Dataset_ETT_minute(Dataset):
107 |     def __init__(self, root_path, flag='train', size=None,
108 |                  features='S', data_path='ETTm1.csv',
109 |                  target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
110 |         # size [seq_len, label_len, pred_len]
111 |         # info
112 |         if size == None:
113 |             self.seq_len = 24 * 4 * 4
114 |             self.label_len = 24 * 4
115 |             self.pred_len = 24 * 4
116 |         else:
117 |             self.seq_len = size[0]
118 |             self.label_len = size[1]
119 |             self.pred_len = size[2]
120 |         # init
121 |         assert flag in ['train', 'test', 'val']
122 |         type_map = {'train': 0, 'val': 1, 'test': 2}
123 |         self.set_type = type_map[flag]
124 | 
125 |         self.features = features
126 |         self.target = target
127 |         self.scale = scale
128 |         self.timeenc = timeenc
129 |         self.freq = freq
130 | 
131 |         self.root_path = root_path
132 |         self.data_path = data_path
133 |         self.__read_data__()
134 | 
135 |     def __read_data__(self):
136 |         self.scaler = StandardScaler()
137 |         df_raw = pd.read_csv(os.path.join(self.root_path,
138 |                                           self.data_path))
139 | 
140 |         border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
141 |         border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
142 |         border1 = border1s[self.set_type]
143 |         border2 = border2s[self.set_type]
144 | 
145 |         if self.features == 'M' or self.features == 'MS':
146 |             cols_data = df_raw.columns[1:]
147 |             df_data = df_raw[cols_data]
148 |         elif self.features == 'S':
149 |             df_data = df_raw[[self.target]]
150 | 
151 |         if self.scale:
152 |             train_data = df_data[border1s[0]:border2s[0]]
153 |             self.scaler.fit(train_data.values)
154 |             data = self.scaler.transform(df_data.values)
155 |         else:
156 |             data = df_data.values
157 | 
158 |         df_stamp = df_raw[['date']][border1:border2]
159 |         df_stamp['date'] = pd.to_datetime(df_stamp.date)
160 |         if self.timeenc == 0:
161 |             df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
162 |             df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
163 |             df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
164 |             df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
165 |             df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
166 |             df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
167 |             data_stamp = df_stamp.drop(['date'], 1).values
168 |         elif self.timeenc == 1:
169 |             data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
170 |             data_stamp = data_stamp.transpose(1, 0)
171 | 
172 |         self.data_x = data[border1:border2]
173 |         self.data_y = data[border1:border2]
174 |         self.data_stamp = data_stamp
175 | 
176 |     def __getitem__(self, index):
177 |         s_begin = index
178 |         s_end = s_begin + self.seq_len
179 |         r_begin = s_end - self.label_len
180 |         r_end = r_begin + self.label_len + self.pred_len
181 | 
182 |         seq_x = self.data_x[s_begin:s_end]
183 |         seq_y = self.data_y[r_begin:r_end]
184 |         seq_x_mark = self.data_stamp[s_begin:s_end]
185 |         seq_y_mark = self.data_stamp[r_begin:r_end]
186 | 
187 |         return seq_x, seq_y, seq_x_mark, seq_y_mark
188 | 
189 |     def __len__(self):
190 |         return len(self.data_x) - self.seq_len - self.pred_len + 1
191 | 
192 |     def inverse_transform(self, data):
193 |         return self.scaler.inverse_transform(data)
194 | 
195 | 
196 | class Dataset_Custom(Dataset):
197 |     def __init__(self, root_path, flag='train', size=None,
198 |                  features='S', data_path='ETTh1.csv',
199 |                  target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
200 |         # size [seq_len, label_len, pred_len]
201 |         # info
202 |         if size == None:
203 |             self.seq_len = 24 * 4 * 4
204 |             self.label_len = 24 * 4
205 |             self.pred_len = 24 * 4
206 |         else:
207 |             self.seq_len = size[0]
208 |             self.label_len = size[1]
209 |             self.pred_len = size[2]
210 |         # init
211 |         assert flag in ['train', 'test', 'val']
212 |         type_map = {'train': 0, 'val': 1, 'test': 2}
213 |         self.set_type = type_map[flag]
214 | 
215 |         self.features = features
216 |         self.target = target
217 |         self.scale = scale
218 |         self.timeenc = timeenc
219 |         self.freq = freq
220 | 
221 |         self.root_path = root_path
222 |         self.data_path = data_path
223 |         self.__read_data__()
224 | 
225 |     def __read_data__(self):
226 |         self.scaler = StandardScaler()
227 |         df_raw = pd.read_csv(os.path.join(self.root_path,
228 |                                           self.data_path))
229 | 
230 |         '''
231 |         df_raw.columns: ['date', ...(other features), target feature]
232 |         '''
233 |         cols = list(df_raw.columns)
234 |         cols.remove(self.target)
235 |         cols.remove('date')
236 |         df_raw = df_raw[['date'] + cols + [self.target]]
237 |         num_train = int(len(df_raw) * 0.7)
238 |         num_test = int(len(df_raw) * 0.2)
239 |         num_vali = len(df_raw) - num_train - num_test
240 |         border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
241 |         border2s = [num_train, num_train + num_vali, len(df_raw)]
242 |         border1 = border1s[self.set_type]
243 |         border2 = border2s[self.set_type]
244 | 
245 |         if self.features == 'M' or self.features == 'MS':
246 |             cols_data = df_raw.columns[1:]
247 |             df_data = df_raw[cols_data]
248 |         elif self.features == 'S':
249 |             df_data = df_raw[[self.target]]
250 | 
251 |         if self.scale:
252 |             train_data = df_data[border1s[0]:border2s[0]]
253 |             self.scaler.fit(train_data.values)
254 |             data = self.scaler.transform(df_data.values)
255 |         else:
256 |             data = df_data.values
257 | 
258 |         df_stamp = df_raw[['date']][border1:border2]
259 |         df_stamp['date'] = pd.to_datetime(df_stamp.date)
260 |         if self.timeenc == 0:
261 |             df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
262 |             df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
263 |             df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
264 |             df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
265 |             data_stamp = df_stamp.drop(['date'], 1).values
266 |         elif self.timeenc == 1:
267 |             data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
268 |             data_stamp = data_stamp.transpose(1, 0)
269 | 
270 |         self.data_x = data[border1:border2]
271 |         self.data_y = data[border1:border2]
272 |         self.data_stamp = data_stamp
273 | 
274 |     def __getitem__(self, index):
275 |         s_begin = index
276 |         s_end = s_begin + self.seq_len
277 |         r_begin = s_end - self.label_len
278 |         r_end = r_begin + self.label_len + self.pred_len
279 | 
280 |         seq_x = self.data_x[s_begin:s_end]
281 |         seq_y = self.data_y[r_begin:r_end]
282 |         seq_x_mark = self.data_stamp[s_begin:s_end]
283 |         seq_y_mark = self.data_stamp[r_begin:r_end]
284 | 
285 |         return seq_x, seq_y, seq_x_mark, seq_y_mark
286 | 
287 |     def __len__(self):
288 |         return len(self.data_x) - self.seq_len - self.pred_len + 1
289 | 
290 |     def inverse_transform(self, data):
291 |         return self.scaler.inverse_transform(data)
292 | 


--------------------------------------------------------------------------------
/exp/exp_short_term_forecasting.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import lr_scheduler
  2 | 
  3 | from data_provider.data_factory import data_provider
  4 | from data_provider.m4 import M4Meta
  5 | from exp.exp_basic import Exp_Basic
  6 | from utils.tools import EarlyStopping, adjust_learning_rate, visual, save_to_csv
  7 | from utils.losses import mape_loss, mase_loss, smape_loss
  8 | from utils.m4_summary import M4Summary
  9 | import torch
 10 | import torch.nn as nn
 11 | from torch import optim
 12 | import os
 13 | import time
 14 | import warnings
 15 | import numpy as np
 16 | import pandas
 17 | 
 18 | warnings.filterwarnings('ignore')
 19 | 
 20 | 
 21 | class Exp_Short_Term_Forecast(Exp_Basic):
 22 |     def __init__(self, args):
 23 |         super(Exp_Short_Term_Forecast, self).__init__(args)
 24 | 
 25 |     def _build_model(self):
 26 |         if self.args.data == 'm4':
 27 |             self.args.pred_len = M4Meta.horizons_map[self.args.seasonal_patterns]  # Up to M4 config
 28 |             self.args.seq_len = 2 * self.args.pred_len  # input_len = 2*pred_len
 29 |             self.args.label_len = self.args.pred_len
 30 |             self.args.frequency_map = M4Meta.frequency_map[self.args.seasonal_patterns]
 31 |         model = self.model_dict[self.args.model].Model(self.args).float()
 32 | 
 33 |         if self.args.use_multi_gpu and self.args.use_gpu:
 34 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 35 |         return model
 36 | 
 37 |     def _get_data(self, flag):
 38 |         data_set, data_loader = data_provider(self.args, flag)
 39 |         return data_set, data_loader
 40 | 
 41 |     def _select_optimizer(self):
 42 |         model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 43 |         return model_optim
 44 | 
 45 |     def _select_criterion(self, loss_name='MSE'):
 46 |         if loss_name == 'MSE':
 47 |             return nn.MSELoss()
 48 |         elif loss_name == 'MAPE':
 49 |             return mape_loss()
 50 |         elif loss_name == 'MASE':
 51 |             return mase_loss()
 52 |         elif loss_name == 'SMAPE':
 53 |             return smape_loss()
 54 | 
 55 |     def train(self, setting):
 56 |         train_data, train_loader = self._get_data(flag='train')
 57 |         vali_data, vali_loader = self._get_data(flag='val')
 58 | 
 59 |         path = os.path.join(self.args.checkpoints, setting)
 60 |         if not os.path.exists(path):
 61 |             os.makedirs(path)
 62 | 
 63 |         time_now = time.time()
 64 | 
 65 |         train_steps = len(train_loader)
 66 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
 67 | 
 68 |         model_optim = self._select_optimizer()
 69 |         criterion = self._select_criterion(self.args.loss)
 70 | 
 71 |         scheduler = lr_scheduler.OneCycleLR(optimizer = model_optim,
 72 |                                             steps_per_epoch = train_steps,
 73 |                                             pct_start = self.args.pct_start,
 74 |                                             epochs = self.args.train_epochs,
 75 |                                             max_lr = self.args.learning_rate)
 76 | 
 77 |         for epoch in range(self.args.train_epochs):
 78 |             iter_count = 0
 79 |             train_loss = []
 80 | 
 81 |             self.model.train()
 82 |             epoch_time = time.time()
 83 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
 84 |                 iter_count += 1
 85 |                 model_optim.zero_grad()
 86 | 
 87 |                 batch_x = batch_x.float().to(self.device)
 88 |                 batch_y = batch_y.float().to(self.device)
 89 | 
 90 |                 batch_y_mark = batch_y_mark.float().to(self.device)
 91 | 
 92 |                 dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
 93 |                 dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
 94 | 
 95 |                 outputs = self.model(batch_x, None, dec_inp, None)
 96 |                 f_dim = -1 if self.args.features == 'MS' else 0
 97 |                 outputs = outputs[:, -self.args.pred_len:, f_dim:]
 98 |                 batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
 99 | 
100 |                 batch_y_mark = batch_y_mark[:, -self.args.pred_len:, f_dim:].to(self.device)
101 |                 loss_value = criterion(batch_x, self.args.frequency_map, outputs, batch_y, batch_y_mark)
102 |                 # loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :]))
103 |                 loss = loss_value  # + loss_sharpness * 1e-5
104 |                 train_loss.append(loss.item())
105 | 
106 |                 if (i + 1) % 100 == 0:
107 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
108 |                     speed = (time.time() - time_now) / iter_count
109 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
110 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
111 |                     iter_count = 0
112 |                     time_now = time.time()
113 | 
114 |                 loss.backward()
115 |                 model_optim.step()
116 | 
117 |                 if self.args.lradj == 'TST':
118 |                     adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
119 |                     scheduler.step()
120 | 
121 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
122 |             train_loss = np.average(train_loss)
123 |             vali_loss = self.vali(train_loader, vali_loader, criterion)
124 |             test_loss = vali_loss
125 |             print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
126 |                 epoch + 1, train_steps, train_loss, vali_loss, test_loss))
127 |             early_stopping(vali_loss, self.model, path)
128 |             if early_stopping.early_stop:
129 |                 print("Early stopping")
130 |                 break
131 | 
132 |             if self.args.lradj != 'TST':
133 |                 adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
134 |             else:
135 |                 print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
136 | 
137 |         best_model_path = path + '/' + 'checkpoint.pth'
138 |         self.model.load_state_dict(torch.load(best_model_path))
139 | 
140 |         return self.model
141 | 
142 |     def vali(self, train_loader, vali_loader, criterion):
143 |         x, _ = train_loader.dataset.last_insample_window()
144 |         y = vali_loader.dataset.timeseries
145 |         x = torch.tensor(x, dtype=torch.float32).to(self.device)
146 |         x = x.unsqueeze(-1)
147 | 
148 |         self.model.eval()
149 |         with torch.no_grad():
150 |             # decoder input
151 |             B, _, C = x.shape
152 |             dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
153 |             dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
154 | 
155 |             # encoder - decoder
156 |             outputs = torch.zeros((B, self.args.pred_len, C)).float()  # .to(self.device)
157 |             id_list = np.arange(0, B, 500)  # validation set size
158 |             id_list = np.append(id_list, B)
159 |             for i in range(len(id_list) - 1):
160 |                 x_enc = x[id_list[i]:id_list[i + 1]]
161 |                 outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x_enc, None,
162 |                                                                       dec_inp[id_list[i]:id_list[i + 1]],
163 |                                                                       None).detach().cpu()
164 |             f_dim = -1 if self.args.features == 'MS' else 0
165 |             outputs = outputs[:, -self.args.pred_len:, f_dim:]
166 |             pred = outputs
167 |             true = torch.from_numpy(np.array(y))
168 |             batch_y_mark = torch.ones(true.shape)
169 | 
170 |             loss = criterion(x.detach().cpu()[:, :, 0], self.args.frequency_map, pred[:, :, 0], true, batch_y_mark)
171 | 
172 |         self.model.train()
173 |         return loss
174 | 
175 |     def test(self, setting, test=0):
176 |         _, train_loader = self._get_data(flag='train')
177 |         _, test_loader = self._get_data(flag='test')
178 |         x, _ = train_loader.dataset.last_insample_window()
179 |         y = test_loader.dataset.timeseries
180 |         x = torch.tensor(x, dtype=torch.float32).to(self.device)
181 |         x = x.unsqueeze(-1)
182 | 
183 |         if test:
184 |             print('loading model')
185 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
186 | 
187 |         folder_path = './test_results/' + setting + '/'
188 |         if not os.path.exists(folder_path):
189 |             os.makedirs(folder_path)
190 | 
191 |         self.model.eval()
192 |         with torch.no_grad():
193 |             B, _, C = x.shape
194 |             dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
195 |             dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
196 |             # encoder - decoder
197 |             outputs = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
198 |             id_list = np.arange(0, B, 1)
199 |             id_list = np.append(id_list, B)
200 |             for i in range(len(id_list) - 1):
201 |                 x_enc = x[id_list[i]:id_list[i + 1]]
202 |                 outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x_enc, None,
203 |                                                                       dec_inp[id_list[i]:id_list[i + 1]], None)
204 | 
205 |                 if id_list[i] % 1000 == 0:
206 |                     print(id_list[i])
207 | 
208 |             f_dim = -1 if self.args.features == 'MS' else 0
209 |             outputs = outputs[:, -self.args.pred_len:, f_dim:]
210 |             outputs = outputs.detach().cpu().numpy()
211 | 
212 |             preds = outputs
213 |             trues = y
214 |             x = x.detach().cpu().numpy()
215 | 
216 |             for i in range(0, preds.shape[0], preds.shape[0] // 10):
217 |                 gt = np.concatenate((x[i, :, 0], trues[i]), axis=0)
218 |                 pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0)
219 |                 visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
220 |                 save_to_csv(gt, pd, os.path.join(folder_path, str(i) + '.csv'))
221 | 
222 |         print('test shape:', preds.shape)
223 | 
224 |         # result save
225 |         folder_path = './m4_results/' + self.args.model + '/'
226 |         if not os.path.exists(folder_path):
227 |             os.makedirs(folder_path)
228 | 
229 |         forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(self.args.pred_len)])
230 |         forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
231 |         forecasts_df.index.name = 'id'
232 |         forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
233 |         forecasts_df.to_csv(folder_path + self.args.seasonal_patterns + '_forecast.csv')
234 | 
235 |         print(self.args.model)
236 |         file_path = './m4_results/' + self.args.model + '/'
237 |         if 'Weekly_forecast.csv' in os.listdir(file_path) \
238 |                 and 'Monthly_forecast.csv' in os.listdir(file_path) \
239 |                 and 'Yearly_forecast.csv' in os.listdir(file_path) \
240 |                 and 'Daily_forecast.csv' in os.listdir(file_path) \
241 |                 and 'Hourly_forecast.csv' in os.listdir(file_path) \
242 |                 and 'Quarterly_forecast.csv' in os.listdir(file_path):
243 |             m4_summary = M4Summary(file_path, self.args.root_path)
244 |             # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
245 |             smape_results, owa_results, mape, mase = m4_summary.evaluate()
246 |             print('smape:', smape_results)
247 |             print('mape:', mape)
248 |             print('mase:', mase)
249 |             print('owa:', owa_results)
250 |         else:
251 |             print('After all 6 tasks are finished, you can calculate the averaged index')
252 |         return
253 | 
254 | 


--------------------------------------------------------------------------------
/layers/SelfAttention_Family.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from math import sqrt
  5 | 
  6 | from einops import rearrange, repeat
  7 | 
  8 | from utils.masking import TriangularCausalMask, ProbMask
  9 | from reformer_pytorch import LSHSelfAttention
 10 | 
 11 | 
 12 | class DSAttention(nn.Module):
 13 |     '''De-stationary Attention'''
 14 | 
 15 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 16 |         super(DSAttention, self).__init__()
 17 |         self.scale = scale
 18 |         self.mask_flag = mask_flag
 19 |         self.output_attention = output_attention
 20 |         self.dropout = nn.Dropout(attention_dropout)
 21 | 
 22 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
 23 |         B, L, H, E = queries.shape
 24 |         _, S, _, D = values.shape
 25 |         scale = self.scale or 1. / sqrt(E)
 26 | 
 27 |         tau = 1.0 if tau is None else tau.unsqueeze(
 28 |             1).unsqueeze(1)  # B x 1 x 1 x 1
 29 |         delta = 0.0 if delta is None else delta.unsqueeze(
 30 |             1).unsqueeze(1)  # B x 1 x 1 x S
 31 | 
 32 |         # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
 33 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
 34 | 
 35 |         if self.mask_flag:
 36 |             if attn_mask is None:
 37 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
 38 | 
 39 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 40 | 
 41 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
 42 |         V = torch.einsum("bhls,bshd->blhd", A, values)
 43 | 
 44 |         if self.output_attention:
 45 |             return (V.contiguous(), A)
 46 |         else:
 47 |             return (V.contiguous(), None)
 48 | 
 49 | 
 50 | class FullAttention(nn.Module):
 51 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 52 |         super(FullAttention, self).__init__()
 53 |         self.scale = scale
 54 |         self.mask_flag = mask_flag
 55 |         self.output_attention = output_attention
 56 |         self.dropout = nn.Dropout(attention_dropout)
 57 | 
 58 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
 59 |         B, L, H, E = queries.shape
 60 |         _, S, _, D = values.shape
 61 |         scale = self.scale or 1. / sqrt(E)
 62 | 
 63 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys)
 64 | 
 65 |         if self.mask_flag:
 66 |             if attn_mask is None:
 67 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
 68 | 
 69 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 70 | 
 71 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
 72 |         V = torch.einsum("bhls,bshd->blhd", A, values)
 73 | 
 74 |         if self.output_attention:
 75 |             return (V.contiguous(), A)
 76 |         else:
 77 |             return (V.contiguous(), None)
 78 | 
 79 | 
 80 | class ProbAttention(nn.Module):
 81 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 82 |         super(ProbAttention, self).__init__()
 83 |         self.factor = factor
 84 |         self.scale = scale
 85 |         self.mask_flag = mask_flag
 86 |         self.output_attention = output_attention
 87 |         self.dropout = nn.Dropout(attention_dropout)
 88 | 
 89 |     def _prob_QK(self, Q, K, sample_k, n_top):  # n_top: c*ln(L_q)
 90 |         # Q [B, H, L, D]
 91 |         B, H, L_K, E = K.shape
 92 |         _, _, L_Q, _ = Q.shape
 93 | 
 94 |         # calculate the sampled Q_K
 95 |         K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
 96 |         # real U = U_part(factor*ln(L_k))*L_q
 97 |         index_sample = torch.randint(L_K, (L_Q, sample_k))
 98 |         K_sample = K_expand[:, :, torch.arange(
 99 |             L_Q).unsqueeze(1), index_sample, :]
100 |         Q_K_sample = torch.matmul(
101 |             Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
102 | 
103 |         # find the Top_k query with sparisty measurement
104 |         M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
105 |         M_top = M.topk(n_top, sorted=False)[1]
106 | 
107 |         # use the reduced Q to calculate Q_K
108 |         Q_reduce = Q[torch.arange(B)[:, None, None],
109 |                    torch.arange(H)[None, :, None],
110 |                    M_top, :]  # factor*ln(L_q)
111 |         Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))  # factor*ln(L_q)*L_k
112 | 
113 |         return Q_K, M_top
114 | 
115 |     def _get_initial_context(self, V, L_Q):
116 |         B, H, L_V, D = V.shape
117 |         if not self.mask_flag:
118 |             # V_sum = V.sum(dim=-2)
119 |             V_sum = V.mean(dim=-2)
120 |             contex = V_sum.unsqueeze(-2).expand(B, H,
121 |                                                 L_Q, V_sum.shape[-1]).clone()
122 |         else:  # use mask
123 |             # requires that L_Q == L_V, i.e. for self-attention only
124 |             assert (L_Q == L_V)
125 |             contex = V.cumsum(dim=-2)
126 |         return contex
127 | 
128 |     def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
129 |         B, H, L_V, D = V.shape
130 | 
131 |         if self.mask_flag:
132 |             attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
133 |             scores.masked_fill_(attn_mask.mask, -np.inf)
134 | 
135 |         attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
136 | 
137 |         context_in[torch.arange(B)[:, None, None],
138 |         torch.arange(H)[None, :, None],
139 |         index, :] = torch.matmul(attn, V).type_as(context_in)
140 |         if self.output_attention:
141 |             attns = (torch.ones([B, H, L_V, L_V]) /
142 |                      L_V).type_as(attn).to(attn.device)
143 |             attns[torch.arange(B)[:, None, None], torch.arange(H)[
144 |                                                   None, :, None], index, :] = attn
145 |             return (context_in, attns)
146 |         else:
147 |             return (context_in, None)
148 | 
149 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
150 |         B, L_Q, H, D = queries.shape
151 |         _, L_K, _, _ = keys.shape
152 | 
153 |         queries = queries.transpose(2, 1)
154 |         keys = keys.transpose(2, 1)
155 |         values = values.transpose(2, 1)
156 | 
157 |         U_part = self.factor * \
158 |                  np.ceil(np.log(L_K)).astype('int').item()  # c*ln(L_k)
159 |         u = self.factor * \
160 |             np.ceil(np.log(L_Q)).astype('int').item()  # c*ln(L_q)
161 | 
162 |         U_part = U_part if U_part < L_K else L_K
163 |         u = u if u < L_Q else L_Q
164 | 
165 |         scores_top, index = self._prob_QK(
166 |             queries, keys, sample_k=U_part, n_top=u)
167 | 
168 |         # add scale factor
169 |         scale = self.scale or 1. / sqrt(D)
170 |         if scale is not None:
171 |             scores_top = scores_top * scale
172 |         # get the context
173 |         context = self._get_initial_context(values, L_Q)
174 |         # update the context with selected top_k queries
175 |         context, attn = self._update_context(
176 |             context, values, scores_top, index, L_Q, attn_mask)
177 | 
178 |         return context.contiguous(), attn
179 | 
180 | 
181 | class AttentionLayer(nn.Module):
182 |     def __init__(self, attention, d_model, n_heads, d_keys=None,
183 |                  d_values=None):
184 |         super(AttentionLayer, self).__init__()
185 | 
186 |         d_keys = d_keys or (d_model // n_heads)
187 |         d_values = d_values or (d_model // n_heads)
188 | 
189 |         self.inner_attention = attention
190 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
191 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
192 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
193 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
194 |         self.n_heads = n_heads
195 | 
196 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
197 |         B, L, _ = queries.shape
198 |         _, S, _ = keys.shape
199 |         H = self.n_heads
200 | 
201 |         queries = self.query_projection(queries).view(B, L, H, -1)
202 |         keys = self.key_projection(keys).view(B, S, H, -1)
203 |         values = self.value_projection(values).view(B, S, H, -1)
204 | 
205 |         out, attn = self.inner_attention(
206 |             queries,
207 |             keys,
208 |             values,
209 |             attn_mask,
210 |             tau=tau,
211 |             delta=delta
212 |         )
213 |         out = out.view(B, L, -1)
214 | 
215 |         return self.out_projection(out), attn
216 | 
217 | 
218 | class ReformerLayer(nn.Module):
219 |     def __init__(self, attention, d_model, n_heads, d_keys=None,
220 |                  d_values=None, causal=False, bucket_size=4, n_hashes=4):
221 |         super().__init__()
222 |         self.bucket_size = bucket_size
223 |         self.attn = LSHSelfAttention(
224 |             dim=d_model,
225 |             heads=n_heads,
226 |             bucket_size=bucket_size,
227 |             n_hashes=n_hashes,
228 |             causal=causal
229 |         )
230 | 
231 |     def fit_length(self, queries):
232 |         # inside reformer: assert N % (bucket_size * 2) == 0
233 |         B, N, C = queries.shape
234 |         if N % (self.bucket_size * 2) == 0:
235 |             return queries
236 |         else:
237 |             # fill the time series
238 |             fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
239 |             return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
240 | 
241 |     def forward(self, queries, keys, values, attn_mask, tau, delta):
242 |         # in Reformer: defalut queries=keys
243 |         B, N, C = queries.shape
244 |         queries = self.attn(self.fit_length(queries))[:, :N, :]
245 |         return queries, None
246 | 
247 | class TwoStageAttentionLayer(nn.Module):
248 |     '''
249 |     The Two Stage Attention (TSA) Layer
250 |     input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
251 |     '''
252 | 
253 |     def __init__(self, configs,
254 |                  seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1):
255 |         super(TwoStageAttentionLayer, self).__init__()
256 |         d_ff = d_ff or 4 * d_model
257 |         self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
258 |                                                            output_attention=configs.output_attention), d_model, n_heads)
259 |         self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
260 |                                                        output_attention=configs.output_attention), d_model, n_heads)
261 |         self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
262 |                                                          output_attention=configs.output_attention), d_model, n_heads)
263 |         self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
264 | 
265 |         self.dropout = nn.Dropout(dropout)
266 | 
267 |         self.norm1 = nn.LayerNorm(d_model)
268 |         self.norm2 = nn.LayerNorm(d_model)
269 |         self.norm3 = nn.LayerNorm(d_model)
270 |         self.norm4 = nn.LayerNorm(d_model)
271 | 
272 |         self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff),
273 |                                   nn.GELU(),
274 |                                   nn.Linear(d_ff, d_model))
275 |         self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff),
276 |                                   nn.GELU(),
277 |                                   nn.Linear(d_ff, d_model))
278 | 
279 |     def forward(self, x, attn_mask=None, tau=None, delta=None):
280 |         # Cross Time Stage: Directly apply MSA to each dimension
281 |         batch = x.shape[0]
282 |         time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model')
283 |         time_enc, attn = self.time_attention(
284 |             time_in, time_in, time_in, attn_mask=None, tau=None, delta=None
285 |         )
286 |         dim_in = time_in + self.dropout(time_enc)
287 |         dim_in = self.norm1(dim_in)
288 |         dim_in = dim_in + self.dropout(self.MLP1(dim_in))
289 |         dim_in = self.norm2(dim_in)
290 | 
291 |         # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection
292 |         dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch)
293 |         batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch)
294 |         dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None)
295 |         dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None)
296 |         dim_enc = dim_send + self.dropout(dim_receive)
297 |         dim_enc = self.norm3(dim_enc)
298 |         dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
299 |         dim_enc = self.norm4(dim_enc)
300 | 
301 |         final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch)
302 | 
303 |         return final_out


--------------------------------------------------------------------------------
/layers/SelfAttention_Family1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import numpy as np
  4 | from math import sqrt
  5 | from utils.masking import TriangularCausalMask, ProbMask
  6 | from reformer_pytorch import LSHSelfAttention
  7 | from einops import rearrange
  8 | 
  9 | 
 10 | # Code implementation from https://github.com/thuml/Flowformer
 11 | class FlowAttention(nn.Module):
 12 |     def __init__(self, attention_dropout=0.1):
 13 |         super(FlowAttention, self).__init__()
 14 |         self.dropout = nn.Dropout(attention_dropout)
 15 | 
 16 |     def kernel_method(self, x):
 17 |         return torch.sigmoid(x)
 18 | 
 19 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
 20 |         queries = queries.transpose(1, 2)
 21 |         keys = keys.transpose(1, 2)
 22 |         values = values.transpose(1, 2)
 23 |         # kernel
 24 |         queries = self.kernel_method(queries)
 25 |         keys = self.kernel_method(keys)
 26 |         # incoming and outgoing
 27 |         normalizer_row = 1.0 / (torch.einsum("nhld,nhd->nhl", queries + 1e-6, keys.sum(dim=2) + 1e-6))
 28 |         normalizer_col = 1.0 / (torch.einsum("nhsd,nhd->nhs", keys + 1e-6, queries.sum(dim=2) + 1e-6))
 29 |         # reweighting
 30 |         normalizer_row_refine = (
 31 |             torch.einsum("nhld,nhd->nhl", queries + 1e-6, (keys * normalizer_col[:, :, :, None]).sum(dim=2) + 1e-6))
 32 |         normalizer_col_refine = (
 33 |             torch.einsum("nhsd,nhd->nhs", keys + 1e-6, (queries * normalizer_row[:, :, :, None]).sum(dim=2) + 1e-6))
 34 |         # competition and allocation
 35 |         normalizer_row_refine = torch.sigmoid(
 36 |             normalizer_row_refine * (float(queries.shape[2]) / float(keys.shape[2])))
 37 |         normalizer_col_refine = torch.softmax(normalizer_col_refine, dim=-1) * keys.shape[2]  # B h L vis
 38 |         # multiply
 39 |         kv = keys.transpose(-2, -1) @ (values * normalizer_col_refine[:, :, :, None])
 40 |         x = (((queries @ kv) * normalizer_row[:, :, :, None]) * normalizer_row_refine[:, :, :, None]).transpose(1,
 41 |                                                                                                                 2).contiguous()
 42 |         return x, None
 43 | 
 44 | 
 45 | # Code implementation from https://github.com/shreyansh26/FlashAttention-PyTorch
 46 | class FlashAttention(nn.Module):
 47 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 48 |         super(FlashAttention, self).__init__()
 49 |         self.scale = scale
 50 |         self.mask_flag = mask_flag
 51 |         self.output_attention = output_attention
 52 |         self.dropout = nn.Dropout(attention_dropout)
 53 | 
 54 |     def flash_attention_forward(self, Q, K, V, mask=None):
 55 |         BLOCK_SIZE = 32
 56 |         NEG_INF = -1e10  # -infinity
 57 |         EPSILON = 1e-10
 58 |         # mask = torch.randint(0, 2, (128, 8)).to(device='cuda')
 59 |         O = torch.zeros_like(Q, requires_grad=True)
 60 |         l = torch.zeros(Q.shape[:-1])[..., None]
 61 |         m = torch.ones(Q.shape[:-1])[..., None] * NEG_INF
 62 | 
 63 |         O = O.to(device='cuda')
 64 |         l = l.to(device='cuda')
 65 |         m = m.to(device='cuda')
 66 | 
 67 |         Q_BLOCK_SIZE = min(BLOCK_SIZE, Q.shape[-1])
 68 |         KV_BLOCK_SIZE = BLOCK_SIZE
 69 | 
 70 |         Q_BLOCKS = torch.split(Q, Q_BLOCK_SIZE, dim=2)
 71 |         K_BLOCKS = torch.split(K, KV_BLOCK_SIZE, dim=2)
 72 |         V_BLOCKS = torch.split(V, KV_BLOCK_SIZE, dim=2)
 73 |         if mask is not None:
 74 |             mask_BLOCKS = list(torch.split(mask, KV_BLOCK_SIZE, dim=1))
 75 | 
 76 |         Tr = len(Q_BLOCKS)
 77 |         Tc = len(K_BLOCKS)
 78 | 
 79 |         O_BLOCKS = list(torch.split(O, Q_BLOCK_SIZE, dim=2))
 80 |         l_BLOCKS = list(torch.split(l, Q_BLOCK_SIZE, dim=2))
 81 |         m_BLOCKS = list(torch.split(m, Q_BLOCK_SIZE, dim=2))
 82 | 
 83 |         for j in range(Tc):
 84 |             Kj = K_BLOCKS[j]
 85 |             Vj = V_BLOCKS[j]
 86 |             if mask is not None:
 87 |                 maskj = mask_BLOCKS[j]
 88 | 
 89 |             for i in range(Tr):
 90 |                 Qi = Q_BLOCKS[i]
 91 |                 Oi = O_BLOCKS[i]
 92 |                 li = l_BLOCKS[i]
 93 |                 mi = m_BLOCKS[i]
 94 | 
 95 |                 scale = 1 / np.sqrt(Q.shape[-1])
 96 |                 Qi_scaled = Qi * scale
 97 | 
 98 |                 S_ij = torch.einsum('... i d, ... j d -> ... i j', Qi_scaled, Kj)
 99 |                 if mask is not None:
100 |                     # Masking
101 |                     maskj_temp = rearrange(maskj, 'b j -> b 1 1 j')
102 |                     S_ij = torch.where(maskj_temp > 0, S_ij, NEG_INF)
103 | 
104 |                 m_block_ij, _ = torch.max(S_ij, dim=-1, keepdims=True)
105 |                 P_ij = torch.exp(S_ij - m_block_ij)
106 |                 if mask is not None:
107 |                     # Masking
108 |                     P_ij = torch.where(maskj_temp > 0, P_ij, 0.)
109 | 
110 |                 l_block_ij = torch.sum(P_ij, dim=-1, keepdims=True) + EPSILON
111 | 
112 |                 P_ij_Vj = torch.einsum('... i j, ... j d -> ... i d', P_ij, Vj)
113 | 
114 |                 mi_new = torch.maximum(m_block_ij, mi)
115 |                 li_new = torch.exp(mi - mi_new) * li + torch.exp(m_block_ij - mi_new) * l_block_ij
116 | 
117 |                 O_BLOCKS[i] = (li / li_new) * torch.exp(mi - mi_new) * Oi + (
118 |                         torch.exp(m_block_ij - mi_new) / li_new) * P_ij_Vj
119 |                 l_BLOCKS[i] = li_new
120 |                 m_BLOCKS[i] = mi_new
121 | 
122 |         O = torch.cat(O_BLOCKS, dim=2)
123 |         l = torch.cat(l_BLOCKS, dim=2)
124 |         m = torch.cat(m_BLOCKS, dim=2)
125 |         return O, l, m
126 | 
127 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
128 |         res = \
129 |         self.flash_attention_forward(queries.permute(0, 2, 1, 3), keys.permute(0, 2, 1, 3), values.permute(0, 2, 1, 3),
130 |                                      attn_mask)[0]
131 |         return res.permute(0, 2, 1, 3).contiguous(), None
132 | 
133 | 
134 | class FullAttention(nn.Module):
135 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
136 |         super(FullAttention, self).__init__()
137 |         self.scale = scale
138 |         self.mask_flag = mask_flag
139 |         self.output_attention = output_attention
140 |         self.dropout = nn.Dropout(attention_dropout)
141 | 
142 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
143 |         B, L, H, E = queries.shape
144 |         _, S, _, D = values.shape
145 |         scale = self.scale or 1. / sqrt(E)
146 | 
147 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys)
148 | 
149 |         if self.mask_flag:
150 |             if attn_mask is None:
151 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
152 | 
153 |             scores.masked_fill_(attn_mask.mask, -np.inf)
154 | 
155 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
156 |         V = torch.einsum("bhls,bshd->blhd", A, values)
157 | 
158 |         if self.output_attention:
159 |             return (V.contiguous(), A)
160 |         else:
161 |             return (V.contiguous(), None)
162 | 
163 | 
164 | # Code implementation from https://github.com/zhouhaoyi/Informer2020
165 | class ProbAttention(nn.Module):
166 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
167 |         super(ProbAttention, self).__init__()
168 |         self.factor = factor
169 |         self.scale = scale
170 |         self.mask_flag = mask_flag
171 |         self.output_attention = output_attention
172 |         self.dropout = nn.Dropout(attention_dropout)
173 | 
174 |     def _prob_QK(self, Q, K, sample_k, n_top):  # n_top: c*ln(L_q)
175 |         # Q [B, H, L, D]
176 |         B, H, L_K, E = K.shape
177 |         _, _, L_Q, _ = Q.shape
178 | 
179 |         # calculate the sampled Q_K
180 |         K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
181 |         # real U = U_part(factor*ln(L_k))*L_q
182 |         index_sample = torch.randint(L_K, (L_Q, sample_k))
183 |         K_sample = K_expand[:, :, torch.arange(
184 |             L_Q).unsqueeze(1), index_sample, :]
185 |         Q_K_sample = torch.matmul(
186 |             Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
187 | 
188 |         # find the Top_k query with sparisty measurement
189 |         M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
190 |         M_top = M.topk(n_top, sorted=False)[1]
191 | 
192 |         # use the reduced Q to calculate Q_K
193 |         Q_reduce = Q[torch.arange(B)[:, None, None],
194 |                    torch.arange(H)[None, :, None],
195 |                    M_top, :]  # factor*ln(L_q)
196 |         Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1))  # factor*ln(L_q)*L_k
197 | 
198 |         return Q_K, M_top
199 | 
200 |     def _get_initial_context(self, V, L_Q):
201 |         B, H, L_V, D = V.shape
202 |         if not self.mask_flag:
203 |             # V_sum = V.sum(dim=-2)
204 |             V_sum = V.mean(dim=-2)
205 |             contex = V_sum.unsqueeze(-2).expand(B, H,
206 |                                                 L_Q, V_sum.shape[-1]).clone()
207 |         else:  # use mask
208 |             # requires that L_Q == L_V, i.e. for self-attention only
209 |             assert (L_Q == L_V)
210 |             contex = V.cumsum(dim=-2)
211 |         return contex
212 | 
213 |     def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
214 |         B, H, L_V, D = V.shape
215 | 
216 |         if self.mask_flag:
217 |             attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
218 |             scores.masked_fill_(attn_mask.mask, -np.inf)
219 | 
220 |         attn = torch.softmax(scores, dim=-1)  # nn.Softmax(dim=-1)(scores)
221 | 
222 |         context_in[torch.arange(B)[:, None, None],
223 |         torch.arange(H)[None, :, None],
224 |         index, :] = torch.matmul(attn, V).type_as(context_in)
225 |         if self.output_attention:
226 |             attns = (torch.ones([B, H, L_V, L_V]) /
227 |                      L_V).type_as(attn).to(attn.device)
228 |             attns[torch.arange(B)[:, None, None], torch.arange(H)[
229 |                                                   None, :, None], index, :] = attn
230 |             return (context_in, attns)
231 |         else:
232 |             return (context_in, None)
233 | 
234 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
235 |         B, L_Q, H, D = queries.shape
236 |         _, L_K, _, _ = keys.shape
237 | 
238 |         queries = queries.transpose(2, 1)
239 |         keys = keys.transpose(2, 1)
240 |         values = values.transpose(2, 1)
241 | 
242 |         U_part = self.factor * \
243 |                  np.ceil(np.log(L_K)).astype('int').item()  # c*ln(L_k)
244 |         u = self.factor * \
245 |             np.ceil(np.log(L_Q)).astype('int').item()  # c*ln(L_q)
246 | 
247 |         U_part = U_part if U_part < L_K else L_K
248 |         u = u if u < L_Q else L_Q
249 | 
250 |         scores_top, index = self._prob_QK(
251 |             queries, keys, sample_k=U_part, n_top=u)
252 | 
253 |         # add scale factor
254 |         scale = self.scale or 1. / sqrt(D)
255 |         if scale is not None:
256 |             scores_top = scores_top * scale
257 |         # get the context
258 |         context = self._get_initial_context(values, L_Q)
259 |         # update the context with selected top_k queries
260 |         context, attn = self._update_context(
261 |             context, values, scores_top, index, L_Q, attn_mask)
262 | 
263 |         return context.contiguous(), attn
264 | 
265 | 
266 | class AttentionLayer(nn.Module):
267 |     def __init__(self, attention, d_model, n_heads, d_keys=None,
268 |                  d_values=None):
269 |         super(AttentionLayer, self).__init__()
270 | 
271 |         d_keys = d_keys or (d_model // n_heads)
272 |         d_values = d_values or (d_model // n_heads)
273 | 
274 |         self.inner_attention = attention
275 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
276 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
277 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
278 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
279 |         self.n_heads = n_heads
280 | 
281 |     def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
282 |         B, L, _ = queries.shape
283 |         _, S, _ = keys.shape
284 |         H = self.n_heads
285 | 
286 |         queries = self.query_projection(queries).view(B, L, H, -1)
287 |         keys = self.key_projection(keys).view(B, S, H, -1)
288 |         values = self.value_projection(values).view(B, S, H, -1)
289 | 
290 |         out, attn = self.inner_attention(
291 |             queries,
292 |             keys,
293 |             values,
294 |             attn_mask,
295 |             tau=tau,
296 |             delta=delta
297 |         )
298 |         out = out.view(B, L, -1)
299 | 
300 |         return self.out_projection(out), attn
301 | 
302 | 
303 | class ReformerLayer(nn.Module):
304 |     def __init__(self, attention, d_model, n_heads, d_keys=None,
305 |                  d_values=None, causal=False, bucket_size=4, n_hashes=4):
306 |         super().__init__()
307 |         self.bucket_size = bucket_size
308 |         self.attn = LSHSelfAttention(
309 |             dim=d_model,
310 |             heads=n_heads,
311 |             bucket_size=bucket_size,
312 |             n_hashes=n_hashes,
313 |             causal=causal
314 |         )
315 | 
316 |     def fit_length(self, queries):
317 |         # inside reformer: assert N % (bucket_size * 2) == 0
318 |         B, N, C = queries.shape
319 |         if N % (self.bucket_size * 2) == 0:
320 |             return queries
321 |         else:
322 |             # fill the time series
323 |             fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
324 |             return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
325 | 
326 |     def forward(self, queries, keys, values, attn_mask, tau, delta):
327 |         # in Reformer: defalut queries=keys
328 |         B, N, C = queries.shape
329 |         queries = self.attn(self.fit_length(queries))[:, :N, :]
330 |         return queries, None
331 | 
332 | 


--------------------------------------------------------------------------------
/exp/exp_long_term_forecasting.py:
--------------------------------------------------------------------------------
  1 | from torch.optim import lr_scheduler
  2 | 
  3 | from data_provider.data_factory import data_provider
  4 | from exp.exp_basic import Exp_Basic
  5 | from utils.tools import EarlyStopping, adjust_learning_rate, visual, save_to_csv, visual_weights
  6 | from utils.metrics import metric
  7 | import torch
  8 | import torch.nn as nn
  9 | from torch import optim
 10 | import os
 11 | import time
 12 | import warnings
 13 | import numpy as np
 14 | 
 15 | warnings.filterwarnings('ignore')
 16 | 
 17 | 
 18 | class Exp_Long_Term_Forecast(Exp_Basic):
 19 |     def __init__(self, args):
 20 |         super(Exp_Long_Term_Forecast, self).__init__(args)
 21 | 
 22 |     def _build_model(self):
 23 |         model = self.model_dict[self.args.model].Model(self.args).float()
 24 | 
 25 |         if self.args.use_multi_gpu and self.args.use_gpu:
 26 |             model = nn.DataParallel(model, device_ids=self.args.device_ids)
 27 |         return model
 28 | 
 29 |     def _get_data(self, flag):
 30 |         data_set, data_loader = data_provider(self.args, flag)
 31 |         return data_set, data_loader
 32 | 
 33 |     def _select_optimizer(self):
 34 |         model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
 35 |         return model_optim
 36 | 
 37 |     def _select_criterion(self):
 38 |         if self.args.data == 'PEMS':
 39 |             criterion = nn.L1Loss()
 40 |         else:
 41 |             criterion = nn.MSELoss()
 42 |         return criterion
 43 | 
 44 |     def vali(self, vali_data, vali_loader, criterion):
 45 |         total_loss = []
 46 |         self.model.eval()
 47 |         with torch.no_grad():
 48 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
 49 |                 batch_x = batch_x.float().to(self.device)
 50 |                 batch_y = batch_y.float().to(self.device)
 51 | 
 52 |                 batch_x_mark = batch_x_mark.float().to(self.device)
 53 |                 batch_y_mark = batch_y_mark.float().to(self.device)
 54 | 
 55 |                 if 'PEMS' == self.args.data or 'Solar' == self.args.data:
 56 |                     batch_x_mark = None
 57 |                     batch_y_mark = None
 58 | 
 59 |                 if self.args.down_sampling_layers == 0:
 60 |                     dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
 61 |                     dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
 62 |                 else:
 63 |                     dec_inp = None
 64 | 
 65 |                 # encoder - decoder
 66 |                 if self.args.use_amp:
 67 |                     with torch.cuda.amp.autocast():
 68 |                         if self.args.output_attention:
 69 |                             outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
 70 |                         else:
 71 |                             outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
 72 |                 else:
 73 |                     if self.args.output_attention:
 74 |                         outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
 75 |                     else:
 76 |                         outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
 77 |                 f_dim = -1 if self.args.features == 'MS' else 0
 78 | 
 79 |                 pred = outputs.detach()
 80 |                 true = batch_y.detach()
 81 | 
 82 |                 if self.args.data == 'PEMS':
 83 |                     B, T, C = pred.shape
 84 |                     pred = pred.cpu().numpy()
 85 |                     true = true.cpu().numpy()
 86 |                     pred = vali_data.inverse_transform(pred.reshape(-1, C)).reshape(B, T, C)
 87 |                     true = vali_data.inverse_transform(true.reshape(-1, C)).reshape(B, T, C)
 88 |                     mae, mse, rmse, mape, mspe = metric(pred, true)
 89 |                     total_loss.append(mae)
 90 | 
 91 |                 else:
 92 |                     loss = criterion(pred, true)
 93 |                     total_loss.append(loss.item())
 94 | 
 95 |         total_loss = np.average(total_loss)
 96 |         self.model.train()
 97 |         return total_loss
 98 | 
 99 |     def train(self, setting):
100 |         train_data, train_loader = self._get_data(flag='train')
101 |         vali_data, vali_loader = self._get_data(flag='val')
102 |         test_data, test_loader = self._get_data(flag='test')
103 | 
104 |         path = os.path.join(self.args.checkpoints, setting)
105 |         if not os.path.exists(path):
106 |             os.makedirs(path)
107 | 
108 |         time_now = time.time()
109 | 
110 |         train_steps = len(train_loader)
111 |         early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
112 | 
113 |         model_optim = self._select_optimizer()
114 |         criterion = self._select_criterion()
115 | 
116 |         scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
117 |                                             steps_per_epoch=train_steps,
118 |                                             pct_start=self.args.pct_start,
119 |                                             epochs=self.args.train_epochs,
120 |                                             max_lr=self.args.learning_rate)
121 | 
122 |         if self.args.use_amp:
123 |             scaler = torch.cuda.amp.GradScaler()
124 | 
125 |         for epoch in range(self.args.train_epochs):
126 |             iter_count = 0
127 |             train_loss = []
128 | 
129 |             self.model.train()
130 |             epoch_time = time.time()
131 | 
132 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
133 |                 iter_count += 1
134 |                 model_optim.zero_grad()
135 | 
136 |                 batch_x = batch_x.float().to(self.device)
137 |                 batch_y = batch_y.float().to(self.device)
138 | 
139 |                 batch_x_mark = batch_x_mark.float().to(self.device)
140 |                 batch_y_mark = batch_y_mark.float().to(self.device)
141 | 
142 |                 if 'PEMS' == self.args.data or 'Solar' == self.args.data:
143 |                     batch_x_mark = None
144 |                     batch_y_mark = None
145 | 
146 |                 if self.args.down_sampling_layers == 0:
147 |                     dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
148 |                     dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
149 |                 else:
150 |                     dec_inp = None
151 | 
152 |                 # encoder - decoder
153 |                 if self.args.use_amp:
154 |                     with torch.cuda.amp.autocast():
155 |                         if self.args.output_attention:
156 |                             outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
157 |                         else:
158 |                             outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
159 | 
160 |                         f_dim = -1 if self.args.features == 'MS' else 0
161 |                         outputs = outputs[:, -self.args.pred_len:, f_dim:]
162 |                         batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
163 |                         loss = criterion(outputs, batch_y)
164 |                         train_loss.append(loss.item())
165 |                 else:
166 |                     if self.args.output_attention:
167 |                         outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
168 |                     else:
169 |                         outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
170 | 
171 |                     f_dim = -1 if self.args.features == 'MS' else 0
172 | 
173 |                     loss = criterion(outputs, batch_y)
174 |                     train_loss.append(loss.item())
175 | 
176 |                 if (i + 1) % 100 == 0:
177 |                     print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
178 |                     speed = (time.time() - time_now) / iter_count
179 |                     left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
180 |                     print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
181 |                     iter_count = 0
182 |                     time_now = time.time()
183 | 
184 |                 if self.args.use_amp:
185 |                     scaler.scale(loss).backward()
186 |                     scaler.step(model_optim)
187 |                     scaler.update()
188 |                 else:
189 |                     loss.backward()
190 |                     model_optim.step()
191 | 
192 |                 if self.args.lradj == 'TST':
193 |                     adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
194 |                     scheduler.step()
195 | 
196 |             print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
197 |             train_loss = np.average(train_loss)
198 |             vali_loss = self.vali(vali_data, vali_loader, criterion)
199 |             test_loss = self.vali(test_data, test_loader, criterion)
200 | 
201 |             print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
202 |                 epoch + 1, train_steps, train_loss, vali_loss, test_loss))
203 |             early_stopping(vali_loss, self.model, path)
204 |             if early_stopping.early_stop:
205 |                 print("Early stopping")
206 |                 break
207 | 
208 |             if self.args.lradj != 'TST':
209 |                 adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
210 |             else:
211 |                 print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
212 | 
213 |         best_model_path = path + '/' + 'checkpoint.pth'
214 |         self.model.load_state_dict(torch.load(best_model_path))
215 | 
216 |         return self.model
217 | 
218 |     def test(self, setting, test=0):
219 |         test_data, test_loader = self._get_data(flag='test')
220 |         if test:
221 |             print('loading model')
222 |             self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
223 | 
224 |         checkpoints_path = './checkpoints/' + setting + '/'
225 |         preds = []
226 |         trues = []
227 |         folder_path = './test_results/' + setting + '/'
228 |         if not os.path.exists(folder_path):
229 |             os.makedirs(folder_path)
230 | 
231 |         self.model.eval()
232 |         with torch.no_grad():
233 |             for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
234 |                 batch_x = batch_x.float().to(self.device)
235 |                 batch_y = batch_y.float().to(self.device)
236 | 
237 |                 batch_x_mark = batch_x_mark.float().to(self.device)
238 |                 batch_y_mark = batch_y_mark.float().to(self.device)
239 | 
240 |                 if 'PEMS' == self.args.data or 'Solar' == self.args.data:
241 |                     batch_x_mark = None
242 |                     batch_y_mark = None
243 | 
244 |                 if self.args.down_sampling_layers == 0:
245 |                     dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
246 |                     dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
247 |                 else:
248 |                     dec_inp = None
249 | 
250 |                 # encoder - decoder
251 |                 if self.args.use_amp:
252 |                     with torch.cuda.amp.autocast():
253 |                         if self.args.output_attention:
254 |                             outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
255 |                         else:
256 |                             outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
257 |                 else:
258 |                     if self.args.output_attention:
259 |                         outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
260 | 
261 |                     else:
262 |                         outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
263 | 
264 |                 f_dim = -1 if self.args.features == 'MS' else 0
265 | 
266 |                 outputs = outputs.detach().cpu().numpy()
267 |                 batch_y = batch_y.detach().cpu().numpy()
268 | 
269 |                 pred = outputs
270 |                 true = batch_y
271 | 
272 |                 preds.append(pred)
273 |                 trues.append(true)
274 |                 if i % 20 == 0:
275 |                     input = batch_x.detach().cpu().numpy()
276 |                     if test_data.scale and self.args.inverse:
277 |                         shape = input.shape
278 |                         input = test_data.inverse_transform(input.squeeze(0)).reshape(shape)
279 |                     gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
280 |                     pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
281 |                     visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
282 | 
283 |         preds = np.array(preds)
284 |         trues = np.array(trues)
285 |         print('test shape:', preds.shape, trues.shape)
286 |         preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
287 |         trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
288 |         print('test shape:', preds.shape, trues.shape)
289 | 
290 |         if self.args.data == 'PEMS':
291 |             B, T, C = preds.shape
292 |             preds = test_data.inverse_transform(preds.reshape(-1, C)).reshape(B, T, C)
293 |             trues = test_data.inverse_transform(trues.reshape(-1, C)).reshape(B, T, C)
294 | 
295 |         # result save
296 |         folder_path = './results/' + setting + '/'
297 |         if not os.path.exists(folder_path):
298 |             os.makedirs(folder_path)
299 | 
300 |         mae, mse, rmse, mape, mspe = metric(preds, trues)
301 |         print('mse:{}, mae:{}'.format(mse, mae))
302 |         print('rmse:{}, mape:{}, mspe:{}'.format(rmse, mape, mspe))
303 | 
304 |         f = open("result_long_term_forecast.txt", 'a')
305 |         f.write(setting + "  \n")
306 |         if self.args.data == 'PEMS':
307 |             f.write('mae:{}, mape:{}, rmse:{}'.format(mae, mape, rmse))
308 |         else:
309 |             f.write('mse:{}, mae:{}'.format(mse, mae))
310 |         f.write('\n')
311 |         f.write('\n')
312 |         f.close()
313 | 
314 |         np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
315 |         np.save(folder_path + 'pred.npy', preds)
316 |         np.save(folder_path + 'true.npy', trues)
317 |         return
318 | 


--------------------------------------------------------------------------------