├── exp
├── __init__.py
├── __pycache__
│ ├── __init__.cpython-310.pyc
│ ├── exp_basic.cpython-310.pyc
│ ├── exp_imputation.cpython-310.pyc
│ ├── exp_classification.cpython-310.pyc
│ ├── exp_anomaly_detection.cpython-310.pyc
│ ├── exp_long_term_forecasting.cpython-310.pyc
│ └── exp_short_term_forecasting.cpython-310.pyc
├── exp_basic.py
├── torchsummary.py
├── exp_classification.py
├── exp_anomaly_detection.py
├── exp_imputation.py
├── exp_short_term_forecasting.py
└── exp_long_term_forecasting.py
├── layers
├── __init__.py
├── __pycache__
│ ├── Embed.cpython-310.pyc
│ ├── Embed1.cpython-310.pyc
│ ├── __init__.cpython-310.pyc
│ ├── StandardNorm.cpython-310.pyc
│ ├── Autoformer_EncDec.cpython-310.pyc
│ ├── Transformer_EncDec1.cpython-310.pyc
│ └── SelfAttention_Family1.cpython-310.pyc
├── StandardNorm.py
├── Transformer_EncDec1.py
├── Transformer_EncDec.py
├── Embed1.py
├── AutoCorrelation.py
├── Autoformer_EncDec.py
├── Embed.py
├── SelfAttention_Family.py
└── SelfAttention_Family1.py
├── models
├── __init__.py
└── __pycache__
│ ├── LLMMixer.cpython-310.pyc
│ ├── __init__.cpython-310.pyc
│ └── TimeMixer.cpython-310.pyc
├── utils
├── __init__.py
├── __pycache__
│ ├── losses.cpython-310.pyc
│ ├── tools.cpython-310.pyc
│ ├── __init__.cpython-310.pyc
│ ├── masking.cpython-310.pyc
│ ├── metrics.cpython-310.pyc
│ ├── m4_summary.cpython-310.pyc
│ └── timefeatures.cpython-310.pyc
├── masking.py
├── metrics.py
├── data_analysis.py
├── losses.py
├── timefeatures.py
├── tools.py
└── m4_summary.py
├── scripts
└── read.me
├── data_provider
├── __init__.py
├── __pycache__
│ ├── m4.cpython-310.pyc
│ ├── uea.cpython-310.pyc
│ ├── __init__.cpython-310.pyc
│ ├── data_loader.cpython-310.pyc
│ └── data_factory.cpython-310.pyc
├── data_factory.py
├── uea.py
└── data_loader.py
├── figures
├── read.me
├── llmmixer.pdf
└── llmmixer.png
├── requirements.txt
├── README.md
├── run.py
└── LICENSE
/exp/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/layers/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/scripts/read.me:
--------------------------------------------------------------------------------
1 | scripts
2 |
--------------------------------------------------------------------------------
/data_provider/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/figures/read.me:
--------------------------------------------------------------------------------
1 | all figures
2 |
--------------------------------------------------------------------------------
/figures/llmmixer.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/figures/llmmixer.pdf
--------------------------------------------------------------------------------
/figures/llmmixer.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/figures/llmmixer.png
--------------------------------------------------------------------------------
/exp/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/Embed.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Embed.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/losses.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/losses.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/tools.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/tools.cpython-310.pyc
--------------------------------------------------------------------------------
/exp/__pycache__/exp_basic.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_basic.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/Embed1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Embed1.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/models/__pycache__/LLMMixer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/LLMMixer.cpython-310.pyc
--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/masking.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/masking.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/metrics.cpython-310.pyc
--------------------------------------------------------------------------------
/data_provider/__pycache__/m4.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/m4.cpython-310.pyc
--------------------------------------------------------------------------------
/data_provider/__pycache__/uea.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/uea.cpython-310.pyc
--------------------------------------------------------------------------------
/models/__pycache__/TimeMixer.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/models/__pycache__/TimeMixer.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/m4_summary.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/m4_summary.cpython-310.pyc
--------------------------------------------------------------------------------
/exp/__pycache__/exp_imputation.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_imputation.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/StandardNorm.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/StandardNorm.cpython-310.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/timefeatures.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/utils/__pycache__/timefeatures.cpython-310.pyc
--------------------------------------------------------------------------------
/data_provider/__pycache__/__init__.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/__init__.cpython-310.pyc
--------------------------------------------------------------------------------
/exp/__pycache__/exp_classification.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_classification.cpython-310.pyc
--------------------------------------------------------------------------------
/data_provider/__pycache__/data_loader.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/data_loader.cpython-310.pyc
--------------------------------------------------------------------------------
/exp/__pycache__/exp_anomaly_detection.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_anomaly_detection.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/Autoformer_EncDec.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Autoformer_EncDec.cpython-310.pyc
--------------------------------------------------------------------------------
/data_provider/__pycache__/data_factory.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/data_provider/__pycache__/data_factory.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/Transformer_EncDec1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/Transformer_EncDec1.cpython-310.pyc
--------------------------------------------------------------------------------
/exp/__pycache__/exp_long_term_forecasting.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_long_term_forecasting.cpython-310.pyc
--------------------------------------------------------------------------------
/exp/__pycache__/exp_short_term_forecasting.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/exp/__pycache__/exp_short_term_forecasting.cpython-310.pyc
--------------------------------------------------------------------------------
/layers/__pycache__/SelfAttention_Family1.cpython-310.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kowsher/LLMMixer/HEAD/layers/__pycache__/SelfAttention_Family1.cpython-310.pyc
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | einops==0.7.0
2 | matplotlib==3.7.0
3 | numpy==1.23.5
4 | pandas==1.5.3
5 | scikit_learn==1.2.2
6 | scipy==1.12.0
7 | tqdm==4.65.0
8 | patool==1.12
9 | reformer_pytorch==1.4.4
10 | sktime==0.4.1
11 | sympy==1.11.1
12 | torch==2.3.0
13 | accelerate==0.33.0
14 | transformers==4.44.0
15 | sentencepiece==0.2.0
16 |
--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 |
4 | class TriangularCausalMask():
5 | def __init__(self, B, L, device="cpu"):
6 | mask_shape = [B, 1, L, L]
7 | with torch.no_grad():
8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
9 |
10 | @property
11 | def mask(self):
12 | return self._mask
13 |
14 |
15 | class ProbMask():
16 | def __init__(self, B, H, L, index, scores, device="cpu"):
17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
19 | indicator = _mask_ex[torch.arange(B)[:, None, None],
20 | torch.arange(H)[None, :, None],
21 | index, :].to(device)
22 | self._mask = indicator.view(scores.shape).to(device)
23 |
24 | @property
25 | def mask(self):
26 | return self._mask
27 |
--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 |
4 | def RSE(pred, true):
5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2))
6 |
7 |
8 | def CORR(pred, true):
9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0)
10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0))
11 | return (u / d).mean(-1)
12 |
13 |
14 | def MAE(pred, true):
15 | return np.mean(np.abs(pred - true))
16 |
17 |
18 | def MSE(pred, true):
19 | return np.mean((pred - true) ** 2)
20 |
21 |
22 | def RMSE(pred, true):
23 | return np.sqrt(MSE(pred, true))
24 |
25 |
26 | def MAPE(pred, true):
27 | mape = np.abs((pred - true) / true)
28 | mape = np.where(mape > 5, 0, mape)
29 | return np.mean(mape)
30 |
31 |
32 | def MSPE(pred, true):
33 | return np.mean(np.square((pred - true) / true))
34 |
35 |
36 | def metric(pred, true):
37 | mae = MAE(pred, true)
38 | mse = MSE(pred, true)
39 | rmse = RMSE(pred, true)
40 | mape = MAPE(pred, true)
41 | mspe = MSPE(pred, true)
42 |
43 | return mae, mse, rmse, mape, mspe
44 |
--------------------------------------------------------------------------------
/exp/exp_basic.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | from models import LLMMixer
4 |
5 |
6 | class Exp_Basic(object):
7 | def __init__(self, args):
8 | self.args = args
9 | self.model_dict = {
10 | 'LLMMixer': LLMMixer,
11 | }
12 | self.device = self._acquire_device()
13 | self.model = self._build_model().to(self.device)
14 |
15 | def _build_model(self):
16 | raise NotImplementedError
17 | return None
18 |
19 | def _acquire_device(self):
20 | if self.args.use_gpu:
21 | import platform
22 | if platform.system() == 'Darwin':
23 | device = torch.device('mps')
24 | print('Use MPS')
25 | return device
26 | os.environ["CUDA_VISIBLE_DEVICES"] = str(
27 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices
28 | device = torch.device('cuda:{}'.format(self.args.gpu))
29 | if self.args.use_multi_gpu:
30 | print('Use GPU: cuda{}'.format(self.args.device_ids))
31 | else:
32 | print('Use GPU: cuda:{}'.format(self.args.gpu))
33 | else:
34 | device = torch.device('cpu')
35 | print('Use CPU')
36 | return device
37 |
38 | def _get_data(self):
39 | pass
40 |
41 | def vali(self):
42 | pass
43 |
44 | def train(self):
45 | pass
46 |
47 | def test(self):
48 | pass
49 |
--------------------------------------------------------------------------------
/layers/StandardNorm.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 |
4 |
5 | class Normalize(nn.Module):
6 | def __init__(self, num_features: int, eps=1e-5, affine=False, subtract_last=False, non_norm=False):
7 | """
8 | :param num_features: the number of features or channels
9 | :param eps: a value added for numerical stability
10 | :param affine: if True, RevIN has learnable affine parameters
11 | """
12 | super(Normalize, self).__init__()
13 | self.num_features = num_features
14 | self.eps = eps
15 | self.affine = affine
16 | self.subtract_last = subtract_last
17 | self.non_norm = non_norm
18 | if self.affine:
19 | self._init_params()
20 |
21 | def forward(self, x, mode: str):
22 | if mode == 'norm':
23 | self._get_statistics(x)
24 | x = self._normalize(x)
25 | elif mode == 'denorm':
26 | x = self._denormalize(x)
27 | else:
28 | raise NotImplementedError
29 | return x
30 |
31 | def _init_params(self):
32 | # initialize RevIN params: (C,)
33 | self.affine_weight = nn.Parameter(torch.ones(self.num_features))
34 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features))
35 |
36 | def _get_statistics(self, x):
37 | dim2reduce = tuple(range(1, x.ndim - 1))
38 | if self.subtract_last:
39 | self.last = x[:, -1, :].unsqueeze(1)
40 | else:
41 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach()
42 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach()
43 |
44 | def _normalize(self, x):
45 | if self.non_norm:
46 | return x
47 | if self.subtract_last:
48 | x = x - self.last
49 | else:
50 | x = x - self.mean
51 | x = x / self.stdev
52 | if self.affine:
53 | x = x * self.affine_weight
54 | x = x + self.affine_bias
55 | return x
56 |
57 | def _denormalize(self, x):
58 | if self.non_norm:
59 | return x
60 | if self.affine:
61 | x = x - self.affine_bias
62 | x = x / (self.affine_weight + self.eps * self.eps)
63 | x = x * self.stdev
64 | if self.subtract_last:
65 | x = x + self.last
66 | else:
67 | x = x + self.mean
68 | return x
69 |
--------------------------------------------------------------------------------
/data_provider/data_factory.py:
--------------------------------------------------------------------------------
1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_M4, PSMSegLoader, \
2 | MSLSegLoader, SMAPSegLoader, SMDSegLoader, SWATSegLoader, UEAloader, Dataset_PEMS, \
3 | Dataset_Solar
4 | from data_provider.uea import collate_fn
5 | from torch.utils.data import DataLoader
6 |
7 | data_dict = {
8 | 'ETTh1': Dataset_ETT_hour,
9 | 'ETTh2': Dataset_ETT_hour,
10 | 'ETTm1': Dataset_ETT_minute,
11 | 'ETTm2': Dataset_ETT_minute,
12 | 'custom': Dataset_Custom,
13 | }
14 |
15 |
16 | def data_provider(args, flag):
17 | Data = data_dict[args.data]
18 | timeenc = 0 if args.embed != 'timeF' else 1
19 |
20 | if flag == 'test':
21 | shuffle_flag = False
22 | drop_last = True
23 | if args.task_name == 'anomaly_detection' or args.task_name == 'classification':
24 | batch_size = args.batch_size
25 | else:
26 | batch_size = args.batch_size # bsz=1 for evaluation
27 | freq = args.freq
28 | else:
29 | shuffle_flag = True
30 | drop_last = True
31 | batch_size = args.batch_size # bsz for train and valid
32 | freq = args.freq
33 |
34 | if args.task_name == 'anomaly_detection':
35 | drop_last = False
36 | data_set = Data(
37 | root_path=args.root_path,
38 | win_size=args.seq_len,
39 | flag=flag,
40 | )
41 | print(flag, len(data_set))
42 | data_loader = DataLoader(
43 | data_set,
44 | batch_size=batch_size,
45 | shuffle=shuffle_flag,
46 | num_workers=args.num_workers,
47 | drop_last=drop_last)
48 | return data_set, data_loader
49 | elif args.task_name == 'classification':
50 | drop_last = False
51 | data_set = Data(
52 | root_path=args.root_path,
53 | flag=flag,
54 | )
55 | print(flag, len(data_set))
56 | data_loader = DataLoader(
57 | data_set,
58 | batch_size=batch_size,
59 | shuffle=shuffle_flag,
60 | num_workers=args.num_workers,
61 | drop_last=drop_last,
62 | collate_fn=lambda x: collate_fn(x, max_len=args.seq_len)
63 | )
64 | return data_set, data_loader
65 | else:
66 | if args.data == 'm4':
67 | drop_last = False
68 | data_set = Data(
69 | root_path=args.root_path,
70 | data_path=args.data_path,
71 | flag=flag,
72 | size=[args.seq_len, args.label_len, args.pred_len],
73 | features=args.features,
74 | target=args.target,
75 | timeenc=timeenc,
76 | freq=freq,
77 | seasonal_patterns=args.seasonal_patterns
78 | )
79 | print(flag, len(data_set))
80 | data_loader = DataLoader(
81 | data_set,
82 | batch_size=batch_size,
83 | shuffle=shuffle_flag,
84 | num_workers=args.num_workers,
85 | drop_last=drop_last)
86 | return data_set, data_loader
87 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
LLM-Mixer: Multiscale Mixing in LLMs for Time Series Forecasting
5 |
6 |
7 |
8 |
9 |
10 | 
11 | 
12 | 
13 | 
14 |
15 |
16 |
17 |
24 |
25 |
26 | ---
27 | >
28 | > 🙋 Please let us know if you find out a mistake or have any suggestions!
29 | >
30 | > 🌟 If you find this resource helpful, please consider to star this repository and cite our research:
31 |
32 | ```
33 | @article{kowsher2024llm,
34 | title={LLM-Mixer: Multiscale Mixing in LLMs for Time Series Forecasting},
35 | author={Kowsher, Md and Sobuj, Md Shohanur Islam and Prottasha, Nusrat Jahan and Alanis, E Alejandro and Garibay, Ozlem Ozmen and Yousefi, Niloofar},
36 | journal={arXiv preprint arXiv:2410.11674},
37 | year={2024}
38 | }
39 |
40 | ```
41 |
42 | ## Introduction
43 | LLMMixer is an advanced framework designed to improve forecasting accuracy by integrating multiscale time series decomposition with the power of large language models (LLMs). By capturing both short-term and long-term temporal patterns, LLMMixer enhances the model's ability to understand complex trends, making it highly effective for time series forecasting tasks.
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 | ## Requirements
52 | Use Python 3.11 from MiniConda
53 |
54 | - torch==2.3.0
55 | - accelerate==0.33.0
56 | - einops==0.7.0
57 | - matplotlib==3.7.0
58 | - numpy==1.23.5
59 | - pandas==1.5.3
60 | - scikit_learn==1.2.2
61 | - scipy==1.12.0
62 | - tqdm==4.65.0
63 | - peft==0.12.0
64 | - transformers==4.44.0
65 | - deepspeed==0.15.1
66 | - sentencepiece==0.2.0
67 |
68 |
69 |
70 | ## Get Started
71 |
72 | 1. Install requirements. ```pip install -r requirements.txt```
73 | 2. Download data. You can download the all datasets from [Google Driver](https://drive.google.com/u/0/uc?id=1NF7VEefXCmXuWNbnNe858WvQAkJ_7wuP&export=download), [Baidu Driver](https://pan.baidu.com/share/init?surl=r3KhGd0Q9PJIUZdfEYoymg&pwd=i9iy) or [Kaggle Datasets](https://www.kaggle.com/datasets/wentixiaogege/time-series-dataset). **All the datasets are well pre-processed** and can be used easily.
74 | 3. Train the model by following the example of `./scripts`.
75 |
76 |
77 | ## Acknowledgement
78 |
79 | We appreciate the following GitHub repos a lot for their valuable code and efforts.
80 | - Time-Series-Library (https://github.com/thuml/Time-Series-Library)
81 | - TimeMixer ([https://github.com/kwuking/TimeMixer](https://github.com/kwuking/TimeMixer))
82 | - TimeLLM ([https://github.com/thuml/Autoformer](https://github.com/KimMeen/Time-LLM))
83 | - Autoformer (https://github.com/thuml/Autoformer)
84 | - iTransformer ([https://github.com/thuml/Autoformer](https://github.com/thuml/iTransformer))
85 |
--------------------------------------------------------------------------------
/utils/data_analysis.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from scipy.stats import entropy
4 |
5 |
6 | def forecastabilty(ts):
7 | """Forecastability Measure.
8 |
9 | Args:
10 | ts: time series
11 |
12 | Returns:
13 | 1 - the entropy of the fourier transformation of
14 | time series / entropy of white noise
15 | """
16 | ts = (ts - ts.min())/(ts.max()-ts.min()+0.1)
17 | # fourier_ts = np.fft.rfft(ts).real
18 | fourier_ts = abs(np.fft.rfft(ts))
19 | fourier_ts = (fourier_ts - fourier_ts.min()) / (
20 | fourier_ts.max() - fourier_ts.min())
21 | fourier_ts /= fourier_ts.sum()
22 | entropy_ts = entropy(fourier_ts)
23 | fore_ts = 1-entropy_ts/(np.log(len(ts)))
24 | if np.isnan(fore_ts):
25 | return 0
26 | return fore_ts
27 |
28 |
29 | def forecastabilty_moving(ts, window, jump=1):
30 | """Calculates the forecastability of a moving window.
31 |
32 | Args:
33 | ts: time series
34 | window: length of slices
35 | jump: skipped step when taking subslices
36 |
37 | Returns:
38 | a list of forecastability measures for all slices.
39 | """
40 |
41 | # ts = Trend(ts).detrend()
42 | if len(ts) <= 25:
43 | return forecastabilty(ts)
44 | fore_lst = np.array([
45 | forecastabilty(ts[i - window:i])
46 | for i in np.arange(window, len(ts), jump)
47 | ])
48 | fore_lst = fore_lst[~np.isnan(fore_lst)] # drop nan
49 | return fore_lst
50 |
51 |
52 | class Trend():
53 | """Trend test."""
54 |
55 | def __init__(self, ts):
56 | self.ts = ts
57 | self.train_length = len(ts)
58 | self.a, self.b = self.find_trend(ts)
59 |
60 | def find_trend(self, insample_data):
61 | # fit a linear regression y=ax+b on the time series
62 | x = np.arange(len(insample_data))
63 | a, b = np.polyfit(x, insample_data, 1)
64 | return a, b
65 |
66 | def detrend(self):
67 | # remove trend
68 | return self.ts - (self.a * np.arange(0, len(self.ts), 1) + self.b)
69 |
70 | def inverse_input(self, insample_data):
71 | # add trend back to the input part of time series
72 | return insample_data + (self.a * np.arange(0, len(self.ts), 1) + self.b)
73 |
74 | def inverse_pred(self, outsample_data):
75 | # add trend back to the predictions
76 | return outsample_data + (
77 | self.a * np.arange(self.train_length,
78 | self.train_length + len(outsample_data), 1) + self.b)
79 |
80 |
81 | def seasonality_test(original_ts, ppy):
82 | """Seasonality test.
83 |
84 | Args:
85 | original_ts: time series
86 | ppy: periods per year/frequency
87 |
88 | Returns:
89 | boolean value: whether the TS is seasonal
90 | """
91 |
92 | s = acf(original_ts, 1)
93 | for i in range(2, ppy):
94 | s = s + (acf(original_ts, i)**2)
95 |
96 | limit = 1.645 * (np.sqrt((1 + 2 * s) / len(original_ts)))
97 |
98 | return (abs(acf(original_ts, ppy))) > limit
99 |
100 |
101 | def acf(ts, k):
102 | """Autocorrelation function.
103 |
104 | Args:
105 | ts: time series
106 | k: lag
107 |
108 | Returns:
109 | acf value
110 | """
111 | m = np.mean(ts)
112 | s1 = 0
113 | for i in range(k, len(ts)):
114 | s1 = s1 + ((ts[i] - m) * (ts[i - k] - m))
115 |
116 | s2 = 0
117 | for i in range(0, len(ts)):
118 | s2 = s2 + ((ts[i] - m)**2)
119 |
120 | return float(s1 / s2)
--------------------------------------------------------------------------------
/utils/losses.py:
--------------------------------------------------------------------------------
1 | # This source code is provided for the purposes of scientific reproducibility
2 | # under the following limited license from Element AI Inc. The code is an
3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
4 | # expansion analysis for interpretable time series forecasting,
5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
7 | # International license (CC BY-NC 4.0):
8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright © 2020 Element AI Inc. All rights reserved.
14 |
15 | """
16 | Loss functions for PyTorch.
17 | """
18 |
19 | import torch as t
20 | import torch.nn as nn
21 | import numpy as np
22 | import pdb
23 |
24 |
25 | def divide_no_nan(a, b):
26 | """
27 | a/b where the resulted NaN or Inf are replaced by 0.
28 | """
29 | result = a / b
30 | result[result != result] = .0
31 | result[result == np.inf] = .0
32 | return result
33 |
34 |
35 | class mape_loss(nn.Module):
36 | def __init__(self):
37 | super(mape_loss, self).__init__()
38 |
39 | def forward(self, insample: t.Tensor, freq: int,
40 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
41 | """
42 | MAPE loss as defined in: https://en.wikipedia.org/wiki/Mean_absolute_percentage_error
43 |
44 | :param forecast: Forecast values. Shape: batch, time
45 | :param target: Target values. Shape: batch, time
46 | :param mask: 0/1 mask. Shape: batch, time
47 | :return: Loss value
48 | """
49 | weights = divide_no_nan(mask, target)
50 | return t.mean(t.abs((forecast - target) * weights))
51 |
52 |
53 | class smape_loss(nn.Module):
54 | def __init__(self):
55 | super(smape_loss, self).__init__()
56 |
57 | def forward(self, insample: t.Tensor, freq: int,
58 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
59 | """
60 | sMAPE loss as defined in https://robjhyndman.com/hyndsight/smape/ (Makridakis 1993)
61 |
62 | :param forecast: Forecast values. Shape: batch, time
63 | :param target: Target values. Shape: batch, time
64 | :param mask: 0/1 mask. Shape: batch, time
65 | :return: Loss value
66 | """
67 | return 200 * t.mean(divide_no_nan(t.abs(forecast - target),
68 | t.abs(forecast.data) + t.abs(target.data)) * mask)
69 |
70 |
71 | class mase_loss(nn.Module):
72 | def __init__(self):
73 | super(mase_loss, self).__init__()
74 |
75 | def forward(self, insample: t.Tensor, freq: int,
76 | forecast: t.Tensor, target: t.Tensor, mask: t.Tensor) -> t.float:
77 | """
78 | MASE loss as defined in "Scaled Errors" https://robjhyndman.com/papers/mase.pdf
79 |
80 | :param insample: Insample values. Shape: batch, time_i
81 | :param freq: Frequency value
82 | :param forecast: Forecast values. Shape: batch, time_o
83 | :param target: Target values. Shape: batch, time_o
84 | :param mask: 0/1 mask. Shape: batch, time_o
85 | :return: Loss value
86 | """
87 | masep = t.mean(t.abs(insample[:, freq:] - insample[:, :-freq]), dim=1)
88 | masked_masep_inv = divide_no_nan(mask, masep[:, None])
89 | return t.mean(t.abs(target - forecast) * masked_masep_inv)
90 |
--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from pandas.tseries import offsets
6 | from pandas.tseries.frequencies import to_offset
7 |
8 |
9 | class TimeFeature:
10 | def __init__(self):
11 | pass
12 |
13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
14 | pass
15 |
16 | def __repr__(self):
17 | return self.__class__.__name__ + "()"
18 |
19 |
20 | class SecondOfMinute(TimeFeature):
21 | """Minute of hour encoded as value between [-0.5, 0.5]"""
22 |
23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
24 | return index.second / 59.0 - 0.5
25 |
26 |
27 | class MinuteOfHour(TimeFeature):
28 | """Minute of hour encoded as value between [-0.5, 0.5]"""
29 |
30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
31 | return index.minute / 59.0 - 0.5
32 |
33 |
34 | class HourOfDay(TimeFeature):
35 | """Hour of day encoded as value between [-0.5, 0.5]"""
36 |
37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
38 | return index.hour / 23.0 - 0.5
39 |
40 |
41 | class DayOfWeek(TimeFeature):
42 | """Hour of day encoded as value between [-0.5, 0.5]"""
43 |
44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
45 | return index.dayofweek / 6.0 - 0.5
46 |
47 |
48 | class DayOfMonth(TimeFeature):
49 | """Day of month encoded as value between [-0.5, 0.5]"""
50 |
51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
52 | return (index.day - 1) / 30.0 - 0.5
53 |
54 |
55 | class DayOfYear(TimeFeature):
56 | """Day of year encoded as value between [-0.5, 0.5]"""
57 |
58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
59 | return (index.dayofyear - 1) / 365.0 - 0.5
60 |
61 |
62 | class MonthOfYear(TimeFeature):
63 | """Month of year encoded as value between [-0.5, 0.5]"""
64 |
65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
66 | return (index.month - 1) / 11.0 - 0.5
67 |
68 |
69 | class WeekOfYear(TimeFeature):
70 | """Week of year encoded as value between [-0.5, 0.5]"""
71 |
72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
73 | return (index.isocalendar().week - 1) / 52.0 - 0.5
74 |
75 |
76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
77 | """
78 | Returns a list of time features that will be appropriate for the given frequency string.
79 | Parameters
80 | ----------
81 | freq_str
82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
83 | """
84 |
85 | features_by_offsets = {
86 | offsets.YearEnd: [],
87 | offsets.QuarterEnd: [MonthOfYear],
88 | offsets.MonthEnd: [MonthOfYear],
89 | offsets.Week: [DayOfMonth, WeekOfYear],
90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
93 | offsets.Minute: [
94 | MinuteOfHour,
95 | HourOfDay,
96 | DayOfWeek,
97 | DayOfMonth,
98 | DayOfYear,
99 | ],
100 | offsets.Second: [
101 | SecondOfMinute,
102 | MinuteOfHour,
103 | HourOfDay,
104 | DayOfWeek,
105 | DayOfMonth,
106 | DayOfYear,
107 | ],
108 | }
109 |
110 | offset = to_offset(freq_str)
111 |
112 | for offset_type, feature_classes in features_by_offsets.items():
113 | if isinstance(offset, offset_type):
114 | return [cls() for cls in feature_classes]
115 |
116 | supported_freq_msg = f"""
117 | Unsupported frequency {freq_str}
118 | The following frequencies are supported:
119 | Y - yearly
120 | alias: A
121 | M - monthly
122 | W - weekly
123 | D - daily
124 | B - business days
125 | H - hourly
126 | T - minutely
127 | alias: min
128 | S - secondly
129 | """
130 | raise RuntimeError(supported_freq_msg)
131 |
132 |
133 | def time_features(dates, freq='h'):
134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)])
135 |
--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import torch
4 | import matplotlib.pyplot as plt
5 |
6 | plt.switch_backend('agg')
7 |
8 |
9 | def adjust_learning_rate(optimizer, scheduler, epoch, args, printout=True):
10 | # lr = args.learning_rate * (0.2 ** (epoch // 2))
11 | if args.lradj == 'type1':
12 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))}
13 | elif args.lradj == 'type2':
14 | lr_adjust = {
15 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
16 | 10: 5e-7, 15: 1e-7, 20: 5e-8
17 | }
18 | elif args.lradj == 'type3':
19 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))}
20 | elif args.lradj == 'PEMS':
21 | lr_adjust = {epoch: args.learning_rate * (0.95 ** (epoch // 1))}
22 | elif args.lradj == 'TST':
23 | lr_adjust = {epoch: scheduler.get_last_lr()[0]}
24 | if epoch in lr_adjust.keys():
25 | lr = lr_adjust[epoch]
26 | for param_group in optimizer.param_groups:
27 | param_group['lr'] = lr
28 | if printout: print('Updating learning rate to {}'.format(lr))
29 |
30 |
31 | class EarlyStopping:
32 | def __init__(self, patience=7, verbose=False, delta=0):
33 | self.patience = patience
34 | self.verbose = verbose
35 | self.counter = 0
36 | self.best_score = None
37 | self.early_stop = False
38 | self.val_loss_min = np.Inf
39 | self.delta = delta
40 |
41 | def __call__(self, val_loss, model, path):
42 | score = -val_loss
43 | if self.best_score is None:
44 | self.best_score = score
45 | self.save_checkpoint(val_loss, model, path)
46 | elif score < self.best_score + self.delta:
47 | self.counter += 1
48 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
49 | if self.counter >= self.patience:
50 | self.early_stop = True
51 | else:
52 | self.best_score = score
53 | self.save_checkpoint(val_loss, model, path)
54 | self.counter = 0
55 |
56 | def save_checkpoint(self, val_loss, model, path):
57 | if self.verbose:
58 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
59 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth')
60 | self.val_loss_min = val_loss
61 |
62 |
63 | class dotdict(dict):
64 | """dot.notation access to dictionary attributes"""
65 | __getattr__ = dict.get
66 | __setattr__ = dict.__setitem__
67 | __delattr__ = dict.__delitem__
68 |
69 |
70 | class StandardScaler():
71 | def __init__(self, mean, std):
72 | self.mean = mean
73 | self.std = std
74 |
75 | def transform(self, data):
76 | return (data - self.mean) / self.std
77 |
78 | def inverse_transform(self, data):
79 | return (data * self.std) + self.mean
80 |
81 |
82 | def save_to_csv(true, preds=None, name='./pic/test.pdf'):
83 | """
84 | Results visualization
85 | """
86 | data = pd.DataFrame({'true': true, 'preds': preds})
87 | data.to_csv(name, index=False, sep=',')
88 |
89 |
90 | def visual(true, preds=None, name='./pic/test.pdf'):
91 | """
92 | Results visualization
93 | """
94 | plt.figure()
95 | plt.plot(true, label='GroundTruth', linewidth=2)
96 | if preds is not None:
97 | plt.plot(preds, label='Prediction', linewidth=2)
98 | plt.legend()
99 | plt.savefig(name, bbox_inches='tight')
100 |
101 |
102 | def visual_weights(weights, name='./pic/test.pdf'):
103 | """
104 | Weights visualization
105 | """
106 | fig, ax = plt.subplots()
107 | # im = ax.imshow(weights, cmap='plasma_r')
108 | im = ax.imshow(weights, cmap='YlGnBu')
109 | fig.colorbar(im, pad=0.03, location='top')
110 | plt.savefig(name, dpi=500, pad_inches=0.02)
111 | plt.close()
112 |
113 |
114 | def adjustment(gt, pred):
115 | anomaly_state = False
116 | for i in range(len(gt)):
117 | if gt[i] == 1 and pred[i] == 1 and not anomaly_state:
118 | anomaly_state = True
119 | for j in range(i, 0, -1):
120 | if gt[j] == 0:
121 | break
122 | else:
123 | if pred[j] == 0:
124 | pred[j] = 1
125 | for j in range(i, len(gt)):
126 | if gt[j] == 0:
127 | break
128 | else:
129 | if pred[j] == 0:
130 | pred[j] = 1
131 | elif gt[i] == 0:
132 | anomaly_state = False
133 | if anomaly_state:
134 | pred[i] = 1
135 | return gt, pred
136 |
137 |
138 | def cal_accuracy(y_pred, y_true):
139 | return np.mean(y_pred == y_true)
140 |
--------------------------------------------------------------------------------
/exp/torchsummary.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.autograd import Variable
4 |
5 | from collections import OrderedDict
6 | import numpy as np
7 |
8 |
9 | def summary(model, input_size, batch_size=-1, device="cuda"):
10 |
11 | def register_hook(module):
12 |
13 | def hook(module, input, output):
14 | class_name = str(module.__class__).split(".")[-1].split("'")[0]
15 | module_idx = len(summary)
16 |
17 | m_key = "%s-%i" % (class_name, module_idx + 1)
18 | summary[m_key] = OrderedDict()
19 | # summary[m_key]["input_shape"] = list(input[0].size())
20 | if isinstance(input[0], (list, tuple)):
21 | summary[m_key]["input_shape"] = [
22 | [-1] + list(i.size())[1:] for i in input[0]
23 | ]
24 | summary[m_key]["input_shape"][0] = batch_size
25 | else:
26 | summary[m_key]["input_shape"] = list(input[0].size())
27 | summary[m_key]["input_shape"][0] = batch_size
28 |
29 | if isinstance(output, (list, tuple)):
30 | summary[m_key]["output_shape"] = [
31 | [-1] + list(o.size())[1:] for o in output
32 | ]
33 | else:
34 | summary[m_key]["output_shape"] = list(output.size())
35 | summary[m_key]["output_shape"][0] = batch_size
36 |
37 | params = 0
38 | if hasattr(module, "weight") and hasattr(module.weight, "size"):
39 | params += torch.prod(torch.LongTensor(list(module.weight.size())))
40 | summary[m_key]["trainable"] = module.weight.requires_grad
41 | if hasattr(module, "bias") and hasattr(module.bias, "size"):
42 | params += torch.prod(torch.LongTensor(list(module.bias.size())))
43 | summary[m_key]["nb_params"] = params
44 |
45 | if (
46 | not isinstance(module, nn.Sequential)
47 | and not isinstance(module, nn.ModuleList)
48 | and not (module == model)
49 | ):
50 | hooks.append(module.register_forward_hook(hook))
51 |
52 | device = device.lower()
53 | assert device in [
54 | "cuda",
55 | "cpu",
56 | ], "Input device is not valid, please specify 'cuda' or 'cpu'"
57 |
58 | if device == "cuda" and torch.cuda.is_available():
59 | dtype = torch.cuda.FloatTensor
60 | else:
61 | dtype = torch.FloatTensor
62 |
63 | # multiple inputs to the network
64 | if isinstance(input_size, tuple):
65 | input_size = [input_size]
66 |
67 | # batch_size of 2 for batchnorm
68 | x = [torch.rand(2, *in_size).type(dtype) for in_size in input_size]
69 | # print(type(x[0]))
70 |
71 | # create properties
72 | summary = OrderedDict()
73 | hooks = []
74 |
75 | # register hook
76 | model.apply(register_hook)
77 |
78 | # make a forward pass
79 | # print(x.shape)
80 | model(*x)
81 |
82 | # remove these hooks
83 | for h in hooks:
84 | h.remove()
85 |
86 | print("----------------------------------------------------------------")
87 | line_new = "{:>20} {:>25} {:>15}".format("Layer (type)", "Output Shape", "Param #")
88 | print(line_new)
89 | print("================================================================")
90 | total_params = 0
91 | total_output = 0
92 | trainable_params = 0
93 | for layer in summary:
94 | # input_shape, output_shape, trainable, nb_params
95 | line_new = "{:>20} {:>25} {:>15}".format(
96 | layer,
97 | str(summary[layer]["output_shape"]),
98 | "{0:,}".format(summary[layer]["nb_params"]),
99 | )
100 | total_params += summary[layer]["nb_params"]
101 | total_output += np.prod(summary[layer]["output_shape"])
102 | if "trainable" in summary[layer]:
103 | if summary[layer]["trainable"] == True:
104 | trainable_params += summary[layer]["nb_params"]
105 | print(line_new)
106 |
107 | # assume 4 bytes/number (float on cuda).
108 | # total_input_size = abs(np.prod(input_size) * batch_size * 4. / (1024 ** 2.))
109 | total_output_size = abs(2. * total_output * 4. / (1024 ** 2.)) # x2 for gradients
110 | total_params_size = abs(total_params.numpy() * 4. / (1024 ** 2.))
111 | # total_size = total_params_size + total_output_size + total_input_size
112 |
113 | print("================================================================")
114 | print("Total params: {0:,}".format(total_params))
115 | print("Trainable params: {0:,}".format(trainable_params))
116 | print("Non-trainable params: {0:,}".format(total_params - trainable_params))
117 | print("----------------------------------------------------------------")
118 | # print("Input size (MB): %0.2f" % total_input_size)
119 | print("Forward/backward pass size (MB): %0.2f" % total_output_size)
120 | print("Params size (MB): %0.2f" % total_params_size)
121 | # print("Estimated Total Size (MB): %0.2f" % total_size)
122 | print("----------------------------------------------------------------")
123 | # return summary
124 |
--------------------------------------------------------------------------------
/layers/Transformer_EncDec1.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch.nn.functional as F
3 |
4 |
5 | class ConvLayer(nn.Module):
6 | def __init__(self, c_in):
7 | super(ConvLayer, self).__init__()
8 | self.downConv = nn.Conv1d(in_channels=c_in,
9 | out_channels=c_in,
10 | kernel_size=3,
11 | padding=2,
12 | padding_mode='circular')
13 | self.norm = nn.BatchNorm1d(c_in)
14 | self.activation = nn.ELU()
15 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
16 |
17 | def forward(self, x):
18 | x = self.downConv(x.permute(0, 2, 1))
19 | x = self.norm(x)
20 | x = self.activation(x)
21 | x = self.maxPool(x)
22 | x = x.transpose(1, 2)
23 | return x
24 |
25 |
26 | class EncoderLayer(nn.Module):
27 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
28 | super(EncoderLayer, self).__init__()
29 | d_ff = d_ff or 4 * d_model
30 | self.attention = attention
31 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
32 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
33 | self.norm1 = nn.LayerNorm(d_model)
34 | self.norm2 = nn.LayerNorm(d_model)
35 | self.dropout = nn.Dropout(dropout)
36 | self.activation = F.relu if activation == "relu" else F.gelu
37 |
38 | def forward(self, x, attn_mask=None, tau=None, delta=None):
39 | new_x, attn = self.attention(
40 | x, x, x,
41 | attn_mask=attn_mask,
42 | tau=tau, delta=delta
43 | )
44 | x = x + self.dropout(new_x)
45 |
46 | y = x = self.norm1(x)
47 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
48 | y = self.dropout(self.conv2(y).transpose(-1, 1))
49 |
50 | return self.norm2(x + y), attn
51 |
52 |
53 | class Encoder(nn.Module):
54 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
55 | super(Encoder, self).__init__()
56 | self.attn_layers = nn.ModuleList(attn_layers)
57 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
58 | self.norm = norm_layer
59 |
60 | def forward(self, x, attn_mask=None, tau=None, delta=None):
61 | # x [B, L, D]
62 | attns = []
63 | if self.conv_layers is not None:
64 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
65 | delta = delta if i == 0 else None
66 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
67 | x = conv_layer(x)
68 | attns.append(attn)
69 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
70 | attns.append(attn)
71 | else:
72 | for attn_layer in self.attn_layers:
73 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
74 | attns.append(attn)
75 |
76 | if self.norm is not None:
77 | x = self.norm(x)
78 |
79 | return x, attns
80 |
81 |
82 | class DecoderLayer(nn.Module):
83 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
84 | dropout=0.1, activation="relu"):
85 | super(DecoderLayer, self).__init__()
86 | d_ff = d_ff or 4 * d_model
87 | self.self_attention = self_attention
88 | self.cross_attention = cross_attention
89 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
90 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
91 | self.norm1 = nn.LayerNorm(d_model)
92 | self.norm2 = nn.LayerNorm(d_model)
93 | self.norm3 = nn.LayerNorm(d_model)
94 | self.dropout = nn.Dropout(dropout)
95 | self.activation = F.relu if activation == "relu" else F.gelu
96 |
97 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
98 | x = x + self.dropout(self.self_attention(
99 | x, x, x,
100 | attn_mask=x_mask,
101 | tau=tau, delta=None
102 | )[0])
103 | x = self.norm1(x)
104 |
105 | x = x + self.dropout(self.cross_attention(
106 | x, cross, cross,
107 | attn_mask=cross_mask,
108 | tau=tau, delta=delta
109 | )[0])
110 |
111 | y = x = self.norm2(x)
112 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
113 | y = self.dropout(self.conv2(y).transpose(-1, 1))
114 |
115 | return self.norm3(x + y)
116 |
117 |
118 | class Decoder(nn.Module):
119 | def __init__(self, layers, norm_layer=None, projection=None):
120 | super(Decoder, self).__init__()
121 | self.layers = nn.ModuleList(layers)
122 | self.norm = norm_layer
123 | self.projection = projection
124 |
125 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
126 | for layer in self.layers:
127 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
128 |
129 | if self.norm is not None:
130 | x = self.norm(x)
131 |
132 | if self.projection is not None:
133 | x = self.projection(x)
134 | return x
135 |
--------------------------------------------------------------------------------
/layers/Transformer_EncDec.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class ConvLayer(nn.Module):
7 | def __init__(self, c_in):
8 | super(ConvLayer, self).__init__()
9 | self.downConv = nn.Conv1d(in_channels=c_in,
10 | out_channels=c_in,
11 | kernel_size=3,
12 | padding=2,
13 | padding_mode='circular')
14 | self.norm = nn.BatchNorm1d(c_in)
15 | self.activation = nn.ELU()
16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
17 |
18 | def forward(self, x):
19 | x = self.downConv(x.permute(0, 2, 1))
20 | x = self.norm(x)
21 | x = self.activation(x)
22 | x = self.maxPool(x)
23 | x = x.transpose(1, 2)
24 | return x
25 |
26 |
27 | class EncoderLayer(nn.Module):
28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
29 | super(EncoderLayer, self).__init__()
30 | d_ff = d_ff or 4 * d_model
31 | self.attention = attention
32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
34 | self.norm1 = nn.LayerNorm(d_model)
35 | self.norm2 = nn.LayerNorm(d_model)
36 | self.dropout = nn.Dropout(dropout)
37 | self.activation = F.relu if activation == "relu" else F.gelu
38 |
39 | def forward(self, x, attn_mask=None, tau=None, delta=None):
40 | new_x, attn = self.attention(
41 | x, x, x,
42 | attn_mask=attn_mask,
43 | tau=tau, delta=delta
44 | )
45 | x = x + self.dropout(new_x)
46 |
47 | y = x = self.norm1(x)
48 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
49 | y = self.dropout(self.conv2(y).transpose(-1, 1))
50 |
51 | return self.norm2(x + y), attn
52 |
53 |
54 | class Encoder(nn.Module):
55 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
56 | super(Encoder, self).__init__()
57 | self.attn_layers = nn.ModuleList(attn_layers)
58 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
59 | self.norm = norm_layer
60 |
61 | def forward(self, x, attn_mask=None, tau=None, delta=None):
62 | # x [B, L, D]
63 | attns = []
64 | if self.conv_layers is not None:
65 | for i, (attn_layer, conv_layer) in enumerate(zip(self.attn_layers, self.conv_layers)):
66 | delta = delta if i == 0 else None
67 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
68 | x = conv_layer(x)
69 | attns.append(attn)
70 | x, attn = self.attn_layers[-1](x, tau=tau, delta=None)
71 | attns.append(attn)
72 | else:
73 | for attn_layer in self.attn_layers:
74 | x, attn = attn_layer(x, attn_mask=attn_mask, tau=tau, delta=delta)
75 | attns.append(attn)
76 |
77 | if self.norm is not None:
78 | x = self.norm(x)
79 |
80 | return x, attns
81 |
82 |
83 | class DecoderLayer(nn.Module):
84 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
85 | dropout=0.1, activation="relu"):
86 | super(DecoderLayer, self).__init__()
87 | d_ff = d_ff or 4 * d_model
88 | self.self_attention = self_attention
89 | self.cross_attention = cross_attention
90 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
91 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
92 | self.norm1 = nn.LayerNorm(d_model)
93 | self.norm2 = nn.LayerNorm(d_model)
94 | self.norm3 = nn.LayerNorm(d_model)
95 | self.dropout = nn.Dropout(dropout)
96 | self.activation = F.relu if activation == "relu" else F.gelu
97 |
98 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
99 | x = x + self.dropout(self.self_attention(
100 | x, x, x,
101 | attn_mask=x_mask,
102 | tau=tau, delta=None
103 | )[0])
104 | x = self.norm1(x)
105 |
106 | x = x + self.dropout(self.cross_attention(
107 | x, cross, cross,
108 | attn_mask=cross_mask,
109 | tau=tau, delta=delta
110 | )[0])
111 |
112 | y = x = self.norm2(x)
113 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
114 | y = self.dropout(self.conv2(y).transpose(-1, 1))
115 |
116 | return self.norm3(x + y)
117 |
118 |
119 | class Decoder(nn.Module):
120 | def __init__(self, layers, norm_layer=None, projection=None):
121 | super(Decoder, self).__init__()
122 | self.layers = nn.ModuleList(layers)
123 | self.norm = norm_layer
124 | self.projection = projection
125 |
126 | def forward(self, x, cross, x_mask=None, cross_mask=None, tau=None, delta=None):
127 | for layer in self.layers:
128 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask, tau=tau, delta=delta)
129 |
130 | if self.norm is not None:
131 | x = self.norm(x)
132 |
133 | if self.projection is not None:
134 | x = self.projection(x)
135 | return x
136 |
--------------------------------------------------------------------------------
/data_provider/uea.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import torch
5 |
6 |
7 | def collate_fn(data, max_len=None):
8 | """Build mini-batch tensors from a list of (X, mask) tuples. Mask input. Create
9 | Args:
10 | data: len(batch_size) list of tuples (X, y).
11 | - X: torch tensor of shape (seq_length, feat_dim); variable seq_length.
12 | - y: torch tensor of shape (num_labels,) : class indices or numerical targets
13 | (for classification or regression, respectively). num_labels > 1 for multi-task models
14 | max_len: global fixed sequence length. Used for architectures requiring fixed length input,
15 | where the batch length cannot vary dynamically. Longer sequences are clipped, shorter are padded with 0s
16 | Returns:
17 | X: (batch_size, padded_length, feat_dim) torch tensor of masked features (input)
18 | targets: (batch_size, padded_length, feat_dim) torch tensor of unmasked features (output)
19 | target_masks: (batch_size, padded_length, feat_dim) boolean torch tensor
20 | 0 indicates masked values to be predicted, 1 indicates unaffected/"active" feature values
21 | padding_masks: (batch_size, padded_length) boolean tensor, 1 means keep vector at this position, 0 means padding
22 | """
23 |
24 | batch_size = len(data)
25 | features, labels = zip(*data)
26 |
27 | # Stack and pad features and masks (convert 2D to 3D tensors, i.e. add batch dimension)
28 | lengths = [X.shape[0] for X in features] # original sequence length for each time series
29 | if max_len is None:
30 | max_len = max(lengths)
31 | X = torch.zeros(batch_size, max_len, features[0].shape[-1]) # (batch_size, padded_length, feat_dim)
32 | for i in range(batch_size):
33 | end = min(lengths[i], max_len)
34 | X[i, :end, :] = features[i][:end, :]
35 |
36 | targets = torch.stack(labels, dim=0) # (batch_size, num_labels)
37 |
38 | padding_masks = padding_mask(torch.tensor(lengths, dtype=torch.int16),
39 | max_len=max_len) # (batch_size, padded_length) boolean tensor, "1" means keep
40 |
41 | return X, targets, padding_masks
42 |
43 |
44 | def padding_mask(lengths, max_len=None):
45 | """
46 | Used to mask padded positions: creates a (batch_size, max_len) boolean mask from a tensor of sequence lengths,
47 | where 1 means keep element at this position (time step)
48 | """
49 | batch_size = lengths.numel()
50 | max_len = max_len or lengths.max_val() # trick works because of overloading of 'or' operator for non-boolean types
51 | return (torch.arange(0, max_len, device=lengths.device)
52 | .type_as(lengths)
53 | .repeat(batch_size, 1)
54 | .lt(lengths.unsqueeze(1)))
55 |
56 |
57 | class Normalizer(object):
58 | """
59 | Normalizes dataframe across ALL contained rows (time steps). Different from per-sample normalization.
60 | """
61 |
62 | def __init__(self, norm_type='standardization', mean=None, std=None, min_val=None, max_val=None):
63 | """
64 | Args:
65 | norm_type: choose from:
66 | "standardization", "minmax": normalizes dataframe across ALL contained rows (time steps)
67 | "per_sample_std", "per_sample_minmax": normalizes each sample separately (i.e. across only its own rows)
68 | mean, std, min_val, max_val: optional (num_feat,) Series of pre-computed values
69 | """
70 |
71 | self.norm_type = norm_type
72 | self.mean = mean
73 | self.std = std
74 | self.min_val = min_val
75 | self.max_val = max_val
76 |
77 | def normalize(self, df):
78 | """
79 | Args:
80 | df: input dataframe
81 | Returns:
82 | df: normalized dataframe
83 | """
84 | if self.norm_type == "standardization":
85 | if self.mean is None:
86 | self.mean = df.mean()
87 | self.std = df.std()
88 | return (df - self.mean) / (self.std + np.finfo(float).eps)
89 |
90 | elif self.norm_type == "minmax":
91 | if self.max_val is None:
92 | self.max_val = df.max()
93 | self.min_val = df.min()
94 | return (df - self.min_val) / (self.max_val - self.min_val + np.finfo(float).eps)
95 |
96 | elif self.norm_type == "per_sample_std":
97 | grouped = df.groupby(by=df.index)
98 | return (df - grouped.transform('mean')) / grouped.transform('std')
99 |
100 | elif self.norm_type == "per_sample_minmax":
101 | grouped = df.groupby(by=df.index)
102 | min_vals = grouped.transform('min')
103 | return (df - min_vals) / (grouped.transform('max') - min_vals + np.finfo(float).eps)
104 |
105 | else:
106 | raise (NameError(f'Normalize method "{self.norm_type}" not implemented'))
107 |
108 |
109 | def interpolate_missing(y):
110 | """
111 | Replaces NaN values in pd.Series `y` using linear interpolation
112 | """
113 | if y.isna().any():
114 | y = y.interpolate(method='linear', limit_direction='both')
115 | return y
116 |
117 |
118 | def subsample(y, limit=256, factor=2):
119 | """
120 | If a given Series is longer than `limit`, returns subsampled sequence by the specified integer factor
121 | """
122 | if len(y) > limit:
123 | return y[::factor].reset_index(drop=True)
124 | return y
125 |
--------------------------------------------------------------------------------
/layers/Embed1.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import math
4 |
5 |
6 | class PositionalEmbedding(nn.Module):
7 | def __init__(self, d_model, max_len=5000):
8 | super(PositionalEmbedding, self).__init__()
9 | # Compute the positional encodings once in log space.
10 | pe = torch.zeros(max_len, d_model).float()
11 | pe.require_grad = False
12 |
13 | position = torch.arange(0, max_len).float().unsqueeze(1)
14 | div_term = (torch.arange(0, d_model, 2).float()
15 | * -(math.log(10000.0) / d_model)).exp()
16 |
17 | pe[:, 0::2] = torch.sin(position * div_term)
18 | pe[:, 1::2] = torch.cos(position * div_term)
19 |
20 | pe = pe.unsqueeze(0)
21 | self.register_buffer('pe', pe)
22 |
23 | def forward(self, x):
24 | return self.pe[:, :x.size(1)]
25 |
26 |
27 | class TokenEmbedding(nn.Module):
28 | def __init__(self, c_in, d_model):
29 | super(TokenEmbedding, self).__init__()
30 | padding = 1 if torch.__version__ >= '1.5.0' else 2
31 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
32 | kernel_size=3, padding=padding, padding_mode='circular', bias=False)
33 | for m in self.modules():
34 | if isinstance(m, nn.Conv1d):
35 | nn.init.kaiming_normal_(
36 | m.weight, mode='fan_in', nonlinearity='leaky_relu')
37 |
38 | def forward(self, x):
39 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
40 | return x
41 |
42 |
43 | class FixedEmbedding(nn.Module):
44 | def __init__(self, c_in, d_model):
45 | super(FixedEmbedding, self).__init__()
46 |
47 | w = torch.zeros(c_in, d_model).float()
48 | w.require_grad = False
49 |
50 | position = torch.arange(0, c_in).float().unsqueeze(1)
51 | div_term = (torch.arange(0, d_model, 2).float()
52 | * -(math.log(10000.0) / d_model)).exp()
53 |
54 | w[:, 0::2] = torch.sin(position * div_term)
55 | w[:, 1::2] = torch.cos(position * div_term)
56 |
57 | self.emb = nn.Embedding(c_in, d_model)
58 | self.emb.weight = nn.Parameter(w, requires_grad=False)
59 |
60 | def forward(self, x):
61 | return self.emb(x).detach()
62 |
63 |
64 | class TemporalEmbedding(nn.Module):
65 | def __init__(self, d_model, embed_type='fixed', freq='h'):
66 | super(TemporalEmbedding, self).__init__()
67 |
68 | minute_size = 4
69 | hour_size = 24
70 | weekday_size = 7
71 | day_size = 32
72 | month_size = 13
73 |
74 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
75 | if freq == 't':
76 | self.minute_embed = Embed(minute_size, d_model)
77 | self.hour_embed = Embed(hour_size, d_model)
78 | self.weekday_embed = Embed(weekday_size, d_model)
79 | self.day_embed = Embed(day_size, d_model)
80 | self.month_embed = Embed(month_size, d_model)
81 |
82 | def forward(self, x):
83 | x = x.long()
84 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
85 | self, 'minute_embed') else 0.
86 | hour_x = self.hour_embed(x[:, :, 3])
87 | weekday_x = self.weekday_embed(x[:, :, 2])
88 | day_x = self.day_embed(x[:, :, 1])
89 | month_x = self.month_embed(x[:, :, 0])
90 |
91 | return hour_x + weekday_x + day_x + month_x + minute_x
92 |
93 |
94 | class TimeFeatureEmbedding(nn.Module):
95 | def __init__(self, d_model, embed_type='timeF', freq='h'):
96 | super(TimeFeatureEmbedding, self).__init__()
97 |
98 | freq_map = {'h': 4, 't': 5, 's': 6,
99 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
100 | d_inp = freq_map[freq]
101 | self.embed = nn.Linear(d_inp, d_model, bias=False)
102 |
103 | def forward(self, x):
104 | return self.embed(x)
105 |
106 |
107 | class DataEmbedding(nn.Module):
108 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
109 | super(DataEmbedding, self).__init__()
110 |
111 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
112 | self.position_embedding = PositionalEmbedding(d_model=d_model)
113 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
114 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
115 | d_model=d_model, embed_type=embed_type, freq=freq)
116 | self.dropout = nn.Dropout(p=dropout)
117 |
118 | def forward(self, x, x_mark):
119 | if x_mark is None:
120 | x = self.value_embedding(x) + self.position_embedding(x)
121 | else:
122 | x = self.value_embedding(
123 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
124 | return self.dropout(x)
125 |
126 |
127 | class DataEmbedding_inverted(nn.Module):
128 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
129 | super(DataEmbedding_inverted, self).__init__()
130 | self.value_embedding = nn.Linear(c_in, d_model)
131 | self.dropout = nn.Dropout(p=dropout)
132 |
133 | def forward(self, x, x_mark):
134 | x = x.permute(0, 2, 1)
135 | # x: [Batch Variate Time]
136 | if x_mark is None:
137 | x = self.value_embedding(x)
138 | else:
139 | # the potential to take covariates (e.g. timestamps) as tokens
140 | x = self.value_embedding(torch.cat([x, x_mark.permute(0, 2, 1)], 1))
141 | # x: [Batch Variate d_model]
142 | return self.dropout(x)
143 |
144 |
--------------------------------------------------------------------------------
/utils/m4_summary.py:
--------------------------------------------------------------------------------
1 | # This source code is provided for the purposes of scientific reproducibility
2 | # under the following limited license from Element AI Inc. The code is an
3 | # implementation of the N-BEATS model (Oreshkin et al., N-BEATS: Neural basis
4 | # expansion analysis for interpretable time series forecasting,
5 | # https://arxiv.org/abs/1905.10437). The copyright to the source code is
6 | # licensed under the Creative Commons - Attribution-NonCommercial 4.0
7 | # International license (CC BY-NC 4.0):
8 | # https://creativecommons.org/licenses/by-nc/4.0/. Any commercial use (whether
9 | # for the benefit of third parties or internally in production) requires an
10 | # explicit license. The subject-matter of the N-BEATS model and associated
11 | # materials are the property of Element AI Inc. and may be subject to patent
12 | # protection. No license to patents is granted hereunder (whether express or
13 | # implied). Copyright 2020 Element AI Inc. All rights reserved.
14 |
15 | """
16 | M4 Summary
17 | """
18 | from collections import OrderedDict
19 |
20 | import numpy as np
21 | import pandas as pd
22 |
23 | from data_provider.m4 import M4Dataset
24 | from data_provider.m4 import M4Meta
25 | import os
26 |
27 |
28 | def group_values(values, groups, group_name):
29 | return np.array([v[~np.isnan(v)] for v in values[groups == group_name]])
30 |
31 |
32 | def mase(forecast, insample, outsample, frequency):
33 | return np.mean(np.abs(forecast - outsample)) / np.mean(np.abs(insample[:-frequency] - insample[frequency:]))
34 |
35 |
36 | def smape_2(forecast, target):
37 | denom = np.abs(target) + np.abs(forecast)
38 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
39 | denom[denom == 0.0] = 1.0
40 | return 200 * np.abs(forecast - target) / denom
41 |
42 |
43 | def mape(forecast, target):
44 | denom = np.abs(target)
45 | # divide by 1.0 instead of 0.0, in case when denom is zero the enumerator will be 0.0 anyway.
46 | denom[denom == 0.0] = 1.0
47 | return 100 * np.abs(forecast - target) / denom
48 |
49 |
50 | class M4Summary:
51 | def __init__(self, file_path, root_path):
52 | self.file_path = file_path
53 | self.training_set = M4Dataset.load(training=True, dataset_file=root_path)
54 | self.test_set = M4Dataset.load(training=False, dataset_file=root_path)
55 | self.naive_path = os.path.join(root_path, 'submission-Naive2.csv')
56 |
57 | def evaluate(self):
58 | """
59 | Evaluate forecasts using M4 test dataset.
60 |
61 | :param forecast: Forecasts. Shape: timeseries, time.
62 | :return: sMAPE and OWA grouped by seasonal patterns.
63 | """
64 | grouped_owa = OrderedDict()
65 |
66 | naive2_forecasts = pd.read_csv(self.naive_path).values[:, 1:].astype(np.float32)
67 | naive2_forecasts = np.array([v[~np.isnan(v)] for v in naive2_forecasts])
68 |
69 | model_mases = {}
70 | naive2_smapes = {}
71 | naive2_mases = {}
72 | grouped_smapes = {}
73 | grouped_mapes = {}
74 | for group_name in M4Meta.seasonal_patterns:
75 | file_name = self.file_path + group_name + "_forecast.csv"
76 | if os.path.exists(file_name):
77 | model_forecast = pd.read_csv(file_name).values
78 |
79 | naive2_forecast = group_values(naive2_forecasts, self.test_set.groups, group_name)
80 | target = group_values(self.test_set.values, self.test_set.groups, group_name)
81 | # all timeseries within group have same frequency
82 | frequency = self.training_set.frequencies[self.test_set.groups == group_name][0]
83 | insample = group_values(self.training_set.values, self.test_set.groups, group_name)
84 |
85 | model_mases[group_name] = np.mean([mase(forecast=model_forecast[i],
86 | insample=insample[i],
87 | outsample=target[i],
88 | frequency=frequency) for i in range(len(model_forecast))])
89 | naive2_mases[group_name] = np.mean([mase(forecast=naive2_forecast[i],
90 | insample=insample[i],
91 | outsample=target[i],
92 | frequency=frequency) for i in range(len(model_forecast))])
93 |
94 | naive2_smapes[group_name] = np.mean(smape_2(naive2_forecast, target))
95 | grouped_smapes[group_name] = np.mean(smape_2(forecast=model_forecast, target=target))
96 | grouped_mapes[group_name] = np.mean(mape(forecast=model_forecast, target=target))
97 |
98 | grouped_smapes = self.summarize_groups(grouped_smapes)
99 | grouped_mapes = self.summarize_groups(grouped_mapes)
100 | grouped_model_mases = self.summarize_groups(model_mases)
101 | grouped_naive2_smapes = self.summarize_groups(naive2_smapes)
102 | grouped_naive2_mases = self.summarize_groups(naive2_mases)
103 | for k in grouped_model_mases.keys():
104 | grouped_owa[k] = (grouped_model_mases[k] / grouped_naive2_mases[k] +
105 | grouped_smapes[k] / grouped_naive2_smapes[k]) / 2
106 |
107 | def round_all(d):
108 | return dict(map(lambda kv: (kv[0], np.round(kv[1], 3)), d.items()))
109 |
110 | return round_all(grouped_smapes), round_all(grouped_owa), round_all(grouped_mapes), round_all(
111 | grouped_model_mases)
112 |
113 | def summarize_groups(self, scores):
114 | """
115 | Re-group scores respecting M4 rules.
116 | :param scores: Scores per group.
117 | :return: Grouped scores.
118 | """
119 | scores_summary = OrderedDict()
120 |
121 | def group_count(group_name):
122 | return len(np.where(self.test_set.groups == group_name)[0])
123 |
124 | weighted_score = {}
125 | for g in ['Yearly', 'Quarterly', 'Monthly']:
126 | weighted_score[g] = scores[g] * group_count(g)
127 | scores_summary[g] = scores[g]
128 |
129 | others_score = 0
130 | others_count = 0
131 | for g in ['Weekly', 'Daily', 'Hourly']:
132 | others_score += scores[g] * group_count(g)
133 | others_count += group_count(g)
134 | weighted_score['Others'] = others_score
135 | scores_summary['Others'] = others_score / others_count
136 |
137 | average = np.sum(list(weighted_score.values())) / len(self.test_set.groups)
138 | scores_summary['Average'] = average
139 |
140 | return scores_summary
141 |
--------------------------------------------------------------------------------
/layers/AutoCorrelation.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | import matplotlib.pyplot as plt
5 | import numpy as np
6 | import math
7 | from math import sqrt
8 | import os
9 |
10 |
11 | class AutoCorrelation(nn.Module):
12 | """
13 | AutoCorrelation Mechanism with the following two phases:
14 | (1) period-based dependencies discovery
15 | (2) time delay aggregation
16 | This block can replace the self-attention family mechanism seamlessly.
17 | """
18 |
19 | def __init__(self, mask_flag=True, factor=1, scale=None, attention_dropout=0.1, output_attention=False):
20 | super(AutoCorrelation, self).__init__()
21 | self.factor = factor
22 | self.scale = scale
23 | self.mask_flag = mask_flag
24 | self.output_attention = output_attention
25 | self.dropout = nn.Dropout(attention_dropout)
26 |
27 | def time_delay_agg_training(self, values, corr):
28 | """
29 | SpeedUp version of Autocorrelation (a batch-normalization style design)
30 | This is for the training phase.
31 | """
32 | head = values.shape[1]
33 | channel = values.shape[2]
34 | length = values.shape[3]
35 | # find top k
36 | top_k = int(self.factor * math.log(length))
37 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
38 | index = torch.topk(torch.mean(mean_value, dim=0), top_k, dim=-1)[1]
39 | weights = torch.stack([mean_value[:, index[i]] for i in range(top_k)], dim=-1)
40 | # update corr
41 | tmp_corr = torch.softmax(weights, dim=-1)
42 | # aggregation
43 | tmp_values = values
44 | delays_agg = torch.zeros_like(values).float()
45 | for i in range(top_k):
46 | pattern = torch.roll(tmp_values, -int(index[i]), -1)
47 | delays_agg = delays_agg + pattern * \
48 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
49 | return delays_agg
50 |
51 | def time_delay_agg_inference(self, values, corr):
52 | """
53 | SpeedUp version of Autocorrelation (a batch-normalization style design)
54 | This is for the inference phase.
55 | """
56 | batch = values.shape[0]
57 | head = values.shape[1]
58 | channel = values.shape[2]
59 | length = values.shape[3]
60 | # index init
61 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
62 | # find top k
63 | top_k = int(self.factor * math.log(length))
64 | mean_value = torch.mean(torch.mean(corr, dim=1), dim=1)
65 | weights, delay = torch.topk(mean_value, top_k, dim=-1)
66 | # update corr
67 | tmp_corr = torch.softmax(weights, dim=-1)
68 | # aggregation
69 | tmp_values = values.repeat(1, 1, 1, 2)
70 | delays_agg = torch.zeros_like(values).float()
71 | for i in range(top_k):
72 | tmp_delay = init_index + delay[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length)
73 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
74 | delays_agg = delays_agg + pattern * \
75 | (tmp_corr[:, i].unsqueeze(1).unsqueeze(1).unsqueeze(1).repeat(1, head, channel, length))
76 | return delays_agg
77 |
78 | def time_delay_agg_full(self, values, corr):
79 | """
80 | Standard version of Autocorrelation
81 | """
82 | batch = values.shape[0]
83 | head = values.shape[1]
84 | channel = values.shape[2]
85 | length = values.shape[3]
86 | # index init
87 | init_index = torch.arange(length).unsqueeze(0).unsqueeze(0).unsqueeze(0).repeat(batch, head, channel, 1).cuda()
88 | # find top k
89 | top_k = int(self.factor * math.log(length))
90 | weights, delay = torch.topk(corr, top_k, dim=-1)
91 | # update corr
92 | tmp_corr = torch.softmax(weights, dim=-1)
93 | # aggregation
94 | tmp_values = values.repeat(1, 1, 1, 2)
95 | delays_agg = torch.zeros_like(values).float()
96 | for i in range(top_k):
97 | tmp_delay = init_index + delay[..., i].unsqueeze(-1)
98 | pattern = torch.gather(tmp_values, dim=-1, index=tmp_delay)
99 | delays_agg = delays_agg + pattern * (tmp_corr[..., i].unsqueeze(-1))
100 | return delays_agg
101 |
102 | def forward(self, queries, keys, values, attn_mask):
103 | B, L, H, E = queries.shape
104 | _, S, _, D = values.shape
105 | if L > S:
106 | zeros = torch.zeros_like(queries[:, :(L - S), :]).float()
107 | values = torch.cat([values, zeros], dim=1)
108 | keys = torch.cat([keys, zeros], dim=1)
109 | else:
110 | values = values[:, :L, :, :]
111 | keys = keys[:, :L, :, :]
112 |
113 | # period-based dependencies
114 | q_fft = torch.fft.rfft(queries.permute(0, 2, 3, 1).contiguous(), dim=-1)
115 | k_fft = torch.fft.rfft(keys.permute(0, 2, 3, 1).contiguous(), dim=-1)
116 | res = q_fft * torch.conj(k_fft)
117 | corr = torch.fft.irfft(res, dim=-1)
118 |
119 | # time delay agg
120 | if self.training:
121 | V = self.time_delay_agg_training(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
122 | else:
123 | V = self.time_delay_agg_inference(values.permute(0, 2, 3, 1).contiguous(), corr).permute(0, 3, 1, 2)
124 |
125 | if self.output_attention:
126 | return (V.contiguous(), corr.permute(0, 3, 1, 2))
127 | else:
128 | return (V.contiguous(), None)
129 |
130 |
131 | class AutoCorrelationLayer(nn.Module):
132 | def __init__(self, correlation, d_model, n_heads, d_keys=None,
133 | d_values=None):
134 | super(AutoCorrelationLayer, self).__init__()
135 |
136 | d_keys = d_keys or (d_model // n_heads)
137 | d_values = d_values or (d_model // n_heads)
138 |
139 | self.inner_correlation = correlation
140 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
141 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
142 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
143 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
144 | self.n_heads = n_heads
145 |
146 | def forward(self, queries, keys, values, attn_mask):
147 | B, L, _ = queries.shape
148 | _, S, _ = keys.shape
149 | H = self.n_heads
150 |
151 | queries = self.query_projection(queries).view(B, L, H, -1)
152 | keys = self.key_projection(keys).view(B, S, H, -1)
153 | values = self.value_projection(values).view(B, S, H, -1)
154 |
155 | out, attn = self.inner_correlation(
156 | queries,
157 | keys,
158 | values,
159 | attn_mask
160 | )
161 | out = out.view(B, L, -1)
162 |
163 | return self.out_projection(out), attn
164 |
--------------------------------------------------------------------------------
/layers/Autoformer_EncDec.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 |
6 | class my_Layernorm(nn.Module):
7 | """
8 | Special designed layernorm for the seasonal part
9 | """
10 |
11 | def __init__(self, channels):
12 | super(my_Layernorm, self).__init__()
13 | self.layernorm = nn.LayerNorm(channels)
14 |
15 | def forward(self, x):
16 | x_hat = self.layernorm(x)
17 | bias = torch.mean(x_hat, dim=1).unsqueeze(1).repeat(1, x.shape[1], 1)
18 | return x_hat - bias
19 |
20 |
21 | class moving_avg(nn.Module):
22 | """
23 | Moving average block to highlight the trend of time series
24 | """
25 |
26 | def __init__(self, kernel_size, stride):
27 | super(moving_avg, self).__init__()
28 | self.kernel_size = kernel_size
29 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0)
30 |
31 | def forward(self, x):
32 | # padding on the both ends of time series
33 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1)
34 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1)
35 | x = torch.cat([front, x, end], dim=1)
36 | x = self.avg(x.permute(0, 2, 1))
37 | x = x.permute(0, 2, 1)
38 | return x
39 |
40 |
41 | class series_decomp(nn.Module):
42 | """
43 | Series decomposition block
44 | """
45 |
46 | def __init__(self, kernel_size):
47 | super(series_decomp, self).__init__()
48 | self.moving_avg = moving_avg(kernel_size, stride=1)
49 |
50 | def forward(self, x):
51 | moving_mean = self.moving_avg(x)
52 | res = x - moving_mean
53 | return res, moving_mean
54 |
55 |
56 | class series_decomp_multi(nn.Module):
57 | """
58 | Multiple Series decomposition block from FEDformer
59 | """
60 |
61 | def __init__(self, kernel_size):
62 | super(series_decomp_multi, self).__init__()
63 | self.kernel_size = kernel_size
64 | self.series_decomp = [series_decomp(kernel) for kernel in kernel_size]
65 |
66 | def forward(self, x):
67 | moving_mean = []
68 | res = []
69 | for func in self.series_decomp:
70 | sea, moving_avg = func(x)
71 | moving_mean.append(moving_avg)
72 | res.append(sea)
73 |
74 | sea = sum(res) / len(res)
75 | moving_mean = sum(moving_mean) / len(moving_mean)
76 | return sea, moving_mean
77 |
78 |
79 | class EncoderLayer(nn.Module):
80 | """
81 | Autoformer encoder layer with the progressive decomposition architecture
82 | """
83 |
84 | def __init__(self, attention, d_model, d_ff=None, moving_avg=25, dropout=0.1, activation="relu"):
85 | super(EncoderLayer, self).__init__()
86 | d_ff = d_ff or 4 * d_model
87 | self.attention = attention
88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
90 | self.decomp1 = series_decomp(moving_avg)
91 | self.decomp2 = series_decomp(moving_avg)
92 | self.dropout = nn.Dropout(dropout)
93 | self.activation = F.relu if activation == "relu" else F.gelu
94 |
95 | def forward(self, x, attn_mask=None):
96 | new_x, attn = self.attention(
97 | x, x, x,
98 | attn_mask=attn_mask
99 | )
100 | x = x + self.dropout(new_x)
101 | x, _ = self.decomp1(x)
102 | y = x
103 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
104 | y = self.dropout(self.conv2(y).transpose(-1, 1))
105 | res, _ = self.decomp2(x + y)
106 | return res, attn
107 |
108 |
109 | class Encoder(nn.Module):
110 | """
111 | Autoformer encoder
112 | """
113 |
114 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
115 | super(Encoder, self).__init__()
116 | self.attn_layers = nn.ModuleList(attn_layers)
117 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
118 | self.norm = norm_layer
119 |
120 | def forward(self, x, attn_mask=None):
121 | attns = []
122 | if self.conv_layers is not None:
123 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
124 | x, attn = attn_layer(x, attn_mask=attn_mask)
125 | x = conv_layer(x)
126 | attns.append(attn)
127 | x, attn = self.attn_layers[-1](x)
128 | attns.append(attn)
129 | else:
130 | for attn_layer in self.attn_layers:
131 | x, attn = attn_layer(x, attn_mask=attn_mask)
132 | attns.append(attn)
133 |
134 | if self.norm is not None:
135 | x = self.norm(x)
136 |
137 | return x, attns
138 |
139 |
140 | class DecoderLayer(nn.Module):
141 | """
142 | Autoformer decoder layer with the progressive decomposition architecture
143 | """
144 |
145 | def __init__(self, self_attention, cross_attention, d_model, c_out, d_ff=None,
146 | moving_avg=25, dropout=0.1, activation="relu"):
147 | super(DecoderLayer, self).__init__()
148 | d_ff = d_ff or 4 * d_model
149 | self.self_attention = self_attention
150 | self.cross_attention = cross_attention
151 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1, bias=False)
152 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1, bias=False)
153 | self.decomp1 = series_decomp(moving_avg)
154 | self.decomp2 = series_decomp(moving_avg)
155 | self.decomp3 = series_decomp(moving_avg)
156 | self.dropout = nn.Dropout(dropout)
157 | self.projection = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=3, stride=1, padding=1,
158 | padding_mode='circular', bias=False)
159 | self.activation = F.relu if activation == "relu" else F.gelu
160 |
161 | def forward(self, x, cross, x_mask=None, cross_mask=None):
162 | x = x + self.dropout(self.self_attention(
163 | x, x, x,
164 | attn_mask=x_mask
165 | )[0])
166 | x, trend1 = self.decomp1(x)
167 | x = x + self.dropout(self.cross_attention(
168 | x, cross, cross,
169 | attn_mask=cross_mask
170 | )[0])
171 | x, trend2 = self.decomp2(x)
172 | y = x
173 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1))))
174 | y = self.dropout(self.conv2(y).transpose(-1, 1))
175 | x, trend3 = self.decomp3(x + y)
176 |
177 | residual_trend = trend1 + trend2 + trend3
178 | residual_trend = self.projection(residual_trend.permute(0, 2, 1)).transpose(1, 2)
179 | return x, residual_trend
180 |
181 |
182 | class Decoder(nn.Module):
183 | """
184 | Autoformer encoder
185 | """
186 |
187 | def __init__(self, layers, norm_layer=None, projection=None):
188 | super(Decoder, self).__init__()
189 | self.layers = nn.ModuleList(layers)
190 | self.norm = norm_layer
191 | self.projection = projection
192 |
193 | def forward(self, x, cross, x_mask=None, cross_mask=None, trend=None):
194 | for layer in self.layers:
195 | x, residual_trend = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
196 | trend = trend + residual_trend
197 |
198 | if self.norm is not None:
199 | x = self.norm(x)
200 |
201 | if self.projection is not None:
202 | x = self.projection(x)
203 | return x, trend
204 |
--------------------------------------------------------------------------------
/exp/exp_classification.py:
--------------------------------------------------------------------------------
1 | from torch.optim import lr_scheduler
2 |
3 | from data_provider.data_factory import data_provider
4 | from exp.exp_basic import Exp_Basic
5 | from utils.tools import EarlyStopping, adjust_learning_rate, cal_accuracy
6 | import torch
7 | import torch.nn as nn
8 | from torch import optim
9 | import os
10 | import time
11 | import warnings
12 | import numpy as np
13 | import pdb
14 |
15 | warnings.filterwarnings('ignore')
16 |
17 |
18 | class Exp_Classification(Exp_Basic):
19 | def __init__(self, args):
20 | super(Exp_Classification, self).__init__(args)
21 |
22 | def _build_model(self):
23 | # model input depends on data
24 | train_data, train_loader = self._get_data(flag='TRAIN')
25 | test_data, test_loader = self._get_data(flag='TEST')
26 | self.args.seq_len = max(train_data.max_seq_len, test_data.max_seq_len)
27 | self.args.pred_len = 0
28 | self.args.enc_in = train_data.feature_df.shape[1]
29 | self.args.num_class = len(train_data.class_names)
30 | # model init
31 | model = self.model_dict[self.args.model].Model(self.args).float()
32 | if self.args.use_multi_gpu and self.args.use_gpu:
33 | model = nn.DataParallel(model, device_ids=self.args.device_ids)
34 | return model
35 |
36 | def _get_data(self, flag):
37 | data_set, data_loader = data_provider(self.args, flag)
38 | return data_set, data_loader
39 |
40 | def _select_optimizer(self):
41 | # model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
42 | model_optim = optim.RAdam(self.model.parameters(), lr=self.args.learning_rate)
43 | return model_optim
44 |
45 | def _select_criterion(self):
46 | criterion = nn.CrossEntropyLoss()
47 | return criterion
48 |
49 | def vali(self, vali_data, vali_loader, criterion):
50 | total_loss = []
51 | preds = []
52 | trues = []
53 | self.model.eval()
54 | with torch.no_grad():
55 | for i, (batch_x, label, padding_mask) in enumerate(vali_loader):
56 | batch_x = batch_x.float().to(self.device)
57 | padding_mask = padding_mask.float().to(self.device)
58 | label = label.to(self.device)
59 |
60 | outputs = self.model(batch_x, padding_mask, None, None)
61 |
62 | pred = outputs.detach()
63 | loss = criterion(pred, label.long().squeeze())
64 | total_loss.append(loss.item())
65 |
66 | preds.append(outputs.detach())
67 | trues.append(label)
68 |
69 | total_loss = np.average(total_loss)
70 |
71 | preds = torch.cat(preds, 0)
72 | trues = torch.cat(trues, 0)
73 | probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample
74 | predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample
75 | trues = trues.flatten().cpu().numpy()
76 | accuracy = cal_accuracy(predictions, trues)
77 |
78 | self.model.train()
79 | return total_loss, accuracy
80 |
81 | def train(self, setting):
82 | train_data, train_loader = self._get_data(flag='TRAIN')
83 | vali_data, vali_loader = self._get_data(flag='TEST')
84 | test_data, test_loader = self._get_data(flag='TEST')
85 |
86 | path = os.path.join(self.args.checkpoints, setting)
87 | if not os.path.exists(path):
88 | os.makedirs(path)
89 |
90 | time_now = time.time()
91 |
92 | train_steps = len(train_loader)
93 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
94 |
95 | model_optim = self._select_optimizer()
96 | criterion = self._select_criterion()
97 |
98 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
99 | steps_per_epoch=train_steps,
100 | pct_start=self.args.pct_start,
101 | epochs=self.args.train_epochs,
102 | max_lr=self.args.learning_rate)
103 |
104 | for epoch in range(self.args.train_epochs):
105 | iter_count = 0
106 | train_loss = []
107 |
108 | self.model.train()
109 | epoch_time = time.time()
110 |
111 | for i, (batch_x, label, padding_mask) in enumerate(train_loader):
112 | iter_count += 1
113 | model_optim.zero_grad()
114 |
115 | batch_x = batch_x.float().to(self.device)
116 | padding_mask = padding_mask.float().to(self.device)
117 | label = label.to(self.device)
118 |
119 | outputs = self.model(batch_x, padding_mask, None, None)
120 | loss = criterion(outputs, label.long().squeeze(-1))
121 | train_loss.append(loss.item())
122 |
123 | if (i + 1) % 100 == 0:
124 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
125 | speed = (time.time() - time_now) / iter_count
126 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
127 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
128 | iter_count = 0
129 | time_now = time.time()
130 |
131 | loss.backward()
132 | nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=4.0)
133 | model_optim.step()
134 |
135 | # if self.args.lradj == 'TST':
136 | # adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
137 | # scheduler.step()
138 |
139 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
140 | train_loss = np.average(train_loss)
141 | vali_loss, val_accuracy = self.vali(vali_data, vali_loader, criterion)
142 | test_loss, test_accuracy = self.vali(test_data, test_loader, criterion)
143 |
144 | print(
145 | "Epoch: {0}, Steps: {1} | Train Loss: {2:.3f} Vali Loss: {3:.3f} Vali Acc: {4:.3f} Test Loss: {5:.3f} Test Acc: {6:.3f}"
146 | .format(epoch + 1, train_steps, train_loss, vali_loss, val_accuracy, test_loss, test_accuracy))
147 | early_stopping(-test_accuracy, self.model, path)
148 | if early_stopping.early_stop:
149 | print("Early stopping")
150 | break
151 | if (epoch + 1) % 5 == 0:
152 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
153 |
154 | best_model_path = path + '/' + 'checkpoint.pth'
155 | self.model.load_state_dict(torch.load(best_model_path))
156 |
157 | return self.model
158 |
159 | def test(self, setting, test=0):
160 | test_data, test_loader = self._get_data(flag='TEST')
161 | if test:
162 | print('loading model')
163 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
164 |
165 | preds = []
166 | trues = []
167 | folder_path = './test_results/' + setting + '/'
168 | if not os.path.exists(folder_path):
169 | os.makedirs(folder_path)
170 |
171 | self.model.eval()
172 | with torch.no_grad():
173 | for i, (batch_x, label, padding_mask) in enumerate(test_loader):
174 | batch_x = batch_x.float().to(self.device)
175 | padding_mask = padding_mask.float().to(self.device)
176 | label = label.to(self.device)
177 |
178 | outputs = self.model(batch_x, padding_mask, None, None)
179 |
180 | preds.append(outputs.detach())
181 | trues.append(label)
182 |
183 | preds = torch.cat(preds, 0)
184 | trues = torch.cat(trues, 0)
185 | print('test shape:', preds.shape, trues.shape)
186 |
187 | probs = torch.nn.functional.softmax(preds) # (total_samples, num_classes) est. prob. for each class and sample
188 | predictions = torch.argmax(probs, dim=1).cpu().numpy() # (total_samples,) int class index for each sample
189 | trues = trues.flatten().cpu().numpy()
190 | accuracy = cal_accuracy(predictions, trues)
191 |
192 | # result save
193 | folder_path = './results/' + setting + '/'
194 | if not os.path.exists(folder_path):
195 | os.makedirs(folder_path)
196 |
197 | print('accuracy:{}'.format(accuracy))
198 | file_name='result_classification.txt'
199 | f = open(os.path.join(folder_path,file_name), 'a')
200 | f.write(setting + " \n")
201 | f.write('accuracy:{}'.format(accuracy))
202 | f.write('\n')
203 | f.write('\n')
204 | f.close()
205 | return
--------------------------------------------------------------------------------
/exp/exp_anomaly_detection.py:
--------------------------------------------------------------------------------
1 | from torch.optim import lr_scheduler
2 |
3 | from data_provider.data_factory import data_provider
4 | from exp.exp_basic import Exp_Basic
5 | from utils.tools import EarlyStopping, adjust_learning_rate, adjustment
6 | from sklearn.metrics import precision_recall_fscore_support
7 | from sklearn.metrics import accuracy_score
8 | import torch.multiprocessing
9 |
10 | torch.multiprocessing.set_sharing_strategy('file_system')
11 | import torch
12 | import torch.nn as nn
13 | from torch import optim
14 | import os
15 | import time
16 | import warnings
17 | import numpy as np
18 |
19 | warnings.filterwarnings('ignore')
20 |
21 |
22 | class Exp_Anomaly_Detection(Exp_Basic):
23 | def __init__(self, args):
24 | super(Exp_Anomaly_Detection, self).__init__(args)
25 |
26 | def _build_model(self):
27 | model = self.model_dict[self.args.model].Model(self.args).float()
28 |
29 | if self.args.use_multi_gpu and self.args.use_gpu:
30 | model = nn.DataParallel(model, device_ids=self.args.device_ids)
31 | return model
32 |
33 | def _get_data(self, flag):
34 | data_set, data_loader = data_provider(self.args, flag)
35 | return data_set, data_loader
36 |
37 | def _select_optimizer(self):
38 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
39 | return model_optim
40 |
41 | def _select_criterion(self):
42 | criterion = nn.MSELoss()
43 | return criterion
44 |
45 | def vali(self, vali_data, vali_loader, criterion):
46 | total_loss = []
47 | self.model.eval()
48 | with torch.no_grad():
49 | for i, (batch_x, _) in enumerate(vali_loader):
50 | batch_x = batch_x.float().to(self.device)
51 |
52 | outputs = self.model(batch_x, None, None, None)
53 |
54 | f_dim = -1 if self.args.features == 'MS' else 0
55 | outputs = outputs[:, :, f_dim:]
56 | pred = outputs.detach()
57 | true = batch_x.detach()
58 |
59 | loss = criterion(pred, true)
60 | total_loss.append(loss.item())
61 | total_loss = np.average(total_loss)
62 | self.model.train()
63 | return total_loss
64 |
65 | def train(self, setting):
66 | train_data, train_loader = self._get_data(flag='train')
67 | vali_data, vali_loader = self._get_data(flag='val')
68 | test_data, test_loader = self._get_data(flag='test')
69 |
70 | path = os.path.join(self.args.checkpoints, setting)
71 | if not os.path.exists(path):
72 | os.makedirs(path)
73 |
74 | time_now = time.time()
75 |
76 | train_steps = len(train_loader)
77 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
78 |
79 | model_optim = self._select_optimizer()
80 | criterion = self._select_criterion()
81 |
82 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
83 | steps_per_epoch=train_steps,
84 | pct_start=self.args.pct_start,
85 | epochs=self.args.train_epochs,
86 | max_lr=self.args.learning_rate)
87 |
88 | for epoch in range(self.args.train_epochs):
89 | iter_count = 0
90 | train_loss = []
91 |
92 | self.model.train()
93 | epoch_time = time.time()
94 | for i, (batch_x, batch_y) in enumerate(train_loader):
95 | iter_count += 1
96 | model_optim.zero_grad()
97 |
98 | batch_x = batch_x.float().to(self.device)
99 |
100 | outputs = self.model(batch_x, None, None, None)
101 |
102 | f_dim = -1 if self.args.features == 'MS' else 0
103 | outputs = outputs[:, :, f_dim:]
104 | loss = criterion(outputs, batch_x)
105 | train_loss.append(loss.item())
106 |
107 | if (i + 1) % 100 == 0:
108 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
109 | speed = (time.time() - time_now) / iter_count
110 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
111 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
112 | iter_count = 0
113 | time_now = time.time()
114 |
115 | loss.backward()
116 | model_optim.step()
117 |
118 | if self.args.lradj == 'TST':
119 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
120 | scheduler.step()
121 |
122 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
123 | train_loss = np.average(train_loss)
124 | vali_loss = self.vali(vali_data, vali_loader, criterion)
125 | test_loss = self.vali(test_data, test_loader, criterion)
126 |
127 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
128 | epoch + 1, train_steps, train_loss, vali_loss, test_loss))
129 | early_stopping(test_loss, self.model, path)
130 | if early_stopping.early_stop:
131 | print("Early stopping")
132 | break
133 | if self.args.lradj != 'TST':
134 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
135 | else:
136 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
137 |
138 | best_model_path = path + '/' + 'checkpoint.pth'
139 | self.model.load_state_dict(torch.load(best_model_path))
140 |
141 | return self.model
142 |
143 | def test(self, setting, test=0):
144 | test_data, test_loader = self._get_data(flag='test')
145 | train_data, train_loader = self._get_data(flag='train')
146 | if test:
147 | print('loading model')
148 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
149 |
150 | attens_energy = []
151 | folder_path = './test_results/' + setting + '/'
152 | if not os.path.exists(folder_path):
153 | os.makedirs(folder_path)
154 |
155 | self.model.eval()
156 | self.anomaly_criterion = nn.MSELoss(reduce=False)
157 |
158 | # (1) stastic on the train set
159 | with torch.no_grad():
160 | for i, (batch_x, batch_y) in enumerate(train_loader):
161 | batch_x = batch_x.float().to(self.device)
162 | # reconstruction
163 | outputs = self.model(batch_x, None, None, None)
164 | # criterion
165 | score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
166 | score = score.detach().cpu().numpy()
167 | attens_energy.append(score)
168 |
169 | attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
170 | train_energy = np.array(attens_energy)
171 |
172 | # (2) find the threshold
173 | attens_energy = []
174 | test_labels = []
175 | for i, (batch_x, batch_y) in enumerate(test_loader):
176 | batch_x = batch_x.float().to(self.device)
177 | # reconstruction
178 | outputs = self.model(batch_x, None, None, None)
179 | # criterion
180 | score = torch.mean(self.anomaly_criterion(batch_x, outputs), dim=-1)
181 | score = score.detach().cpu().numpy()
182 | attens_energy.append(score)
183 | test_labels.append(batch_y)
184 |
185 | attens_energy = np.concatenate(attens_energy, axis=0).reshape(-1)
186 | test_energy = np.array(attens_energy)
187 | combined_energy = np.concatenate([train_energy, test_energy], axis=0)
188 | threshold = np.percentile(combined_energy, 100 - self.args.anomaly_ratio)
189 | print("Threshold :", threshold)
190 |
191 | # (3) evaluation on the test set
192 | pred = (test_energy > threshold).astype(int)
193 | test_labels = np.concatenate(test_labels, axis=0).reshape(-1)
194 | test_labels = np.array(test_labels)
195 | gt = test_labels.astype(int)
196 |
197 | print("pred: ", pred.shape)
198 | print("gt: ", gt.shape)
199 |
200 | # (4) detection adjustment
201 | gt, pred = adjustment(gt, pred)
202 |
203 | pred = np.array(pred)
204 | gt = np.array(gt)
205 | print("pred: ", pred.shape)
206 | print("gt: ", gt.shape)
207 |
208 | accuracy = accuracy_score(gt, pred)
209 | precision, recall, f_score, support = precision_recall_fscore_support(gt, pred, average='binary')
210 | print("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
211 | accuracy, precision,
212 | recall, f_score))
213 |
214 | f = open("result_anomaly_detection.txt", 'a')
215 | f.write(setting + " \n")
216 | f.write("Accuracy : {:0.4f}, Precision : {:0.4f}, Recall : {:0.4f}, F-score : {:0.4f} ".format(
217 | accuracy, precision,
218 | recall, f_score))
219 | f.write('\n')
220 | f.write('\n')
221 | f.close()
222 | return
223 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import torch
3 |
4 | from exp.exp_anomaly_detection import Exp_Anomaly_Detection
5 | from exp.exp_classification import Exp_Classification
6 | from exp.exp_imputation import Exp_Imputation
7 | from exp.exp_long_term_forecasting import Exp_Long_Term_Forecast
8 | from exp.exp_short_term_forecasting import Exp_Short_Term_Forecast
9 | import random
10 | import numpy as np
11 |
12 | fix_seed = 2021
13 | random.seed(fix_seed)
14 | torch.manual_seed(fix_seed)
15 | np.random.seed(fix_seed)
16 |
17 | parser = argparse.ArgumentParser(description='LLMMixer')
18 |
19 | # basic config
20 | parser.add_argument('--task_name', type=str, required=True, default='long_term_forecast',
21 | help='task name, options:[long_term_forecast, short_term_forecast, imputation, classification, anomaly_detection]')
22 | parser.add_argument('--is_training', type=int, required=True, default=1, help='status')
23 | parser.add_argument('--model_id', type=str, required=True, default='test', help='model id')
24 | parser.add_argument('--model', type=str, required=True, default='LLMMixer',
25 | help='model name, options: [LLMMixer]')
26 |
27 | # data loader
28 | parser.add_argument('--data', type=str, required=True, default='ETTm1', help='dataset type')
29 | parser.add_argument('--root_path', type=str, default='./data/ETT/', help='root path of the data file')
30 | parser.add_argument('--data_path', type=str, default='ETTh1.csv', help='data file')
31 | parser.add_argument('--features', type=str, default='M',
32 | help='forecasting task, options:[M, S, MS]; M:multivariate predict multivariate, S:univariate predict univariate, MS:multivariate predict univariate')
33 | parser.add_argument('--target', type=str, default='OT', help='target feature in S or MS task')
34 | parser.add_argument('--freq', type=str, default='h',
35 | help='freq for time features encoding, options:[s:secondly, t:minutely, h:hourly, d:daily, b:business days, w:weekly, m:monthly], you can also use more detailed freq like 15min or 3h')
36 | parser.add_argument('--checkpoints', type=str, default='./checkpoints/', help='location of model checkpoints')
37 |
38 | # forecasting task
39 | parser.add_argument('--seq_len', type=int, default=96, help='input sequence length')
40 | parser.add_argument('--label_len', type=int, default=48, help='start token length')
41 | parser.add_argument('--pred_len', type=int, default=96, help='prediction sequence length')
42 | parser.add_argument('--seasonal_patterns', type=str, default='Monthly', help='subset for M4')
43 | parser.add_argument('--inverse', action='store_true', help='inverse output data', default=False)
44 |
45 | # model define
46 | parser.add_argument('--llm_path', type=str, default='FacebookAI/roberta-base', help='path of pretrained LLM')
47 | parser.add_argument('--tokenizer_path', type=str, default='FacebookAI/roberta-base', help='path of tokenizer')
48 | parser.add_argument('--top_k', type=int, default=5, help='for TimesBlock')
49 | parser.add_argument('--num_kernels', type=int, default=6, help='for Inception')
50 | parser.add_argument('--enc_in', type=int, default=7, help='encoder input size')
51 | parser.add_argument('--dec_in', type=int, default=7, help='decoder input size')
52 | parser.add_argument('--c_out', type=int, default=7, help='output size')
53 | parser.add_argument('--d_model', type=int, default=16, help='dimension of model')
54 | parser.add_argument('--n_heads', type=int, default=4, help='num of heads')
55 | parser.add_argument('--e_layers', type=int, default=2, help='num of encoder layers')
56 | parser.add_argument('--d_layers', type=int, default=1, help='num of decoder layers')
57 | parser.add_argument('--d_ff', type=int, default=32, help='dimension of fcn')
58 | parser.add_argument('--moving_avg', type=int, default=25, help='window size of moving average')
59 | parser.add_argument('--factor', type=int, default=1, help='attn factor')
60 | parser.add_argument('--distil', action='store_false',
61 | help='whether to use distilling in encoder, using this argument means not using distilling',
62 | default=True)
63 | parser.add_argument('--dropout', type=float, default=0.1, help='dropout')
64 | parser.add_argument('--embed', type=str, default='timeF',
65 | help='time features encoding, options:[timeF, fixed, learned]')
66 | parser.add_argument('--activation', type=str, default='gelu', help='activation')
67 | parser.add_argument('--output_attention', action='store_true', help='whether to output attention in ecoder')
68 | parser.add_argument('--channel_independence', type=int, default=1,
69 | help='0: channel dependence 1: channel independence for FreTS model')
70 | parser.add_argument('--decomp_method', type=str, default='moving_avg',
71 | help='method of series decompsition, only support moving_avg or dft_decomp')
72 | parser.add_argument('--use_norm', type=int, default=1, help='whether to use normalize; True 1 False 0')
73 | parser.add_argument('--down_sampling_layers', type=int, default=0, help='num of down sampling layers')
74 | parser.add_argument('--down_sampling_window', type=int, default=1, help='down sampling window size')
75 | parser.add_argument('--down_sampling_method', type=str, default='avg',
76 | help='down sampling method, only support avg, max, conv')
77 | parser.add_argument('--use_future_temporal_feature', type=int, default=0,
78 | help='whether to use future_temporal_feature; True 1 False 0')
79 |
80 | # imputation task
81 | parser.add_argument('--mask_rate', type=float, default=0.25, help='mask ratio')
82 |
83 | # anomaly detection task
84 | parser.add_argument('--anomaly_ratio', type=float, default=0.25, help='prior anomaly ratio (%)')
85 |
86 | # optimization
87 | parser.add_argument('--num_workers', type=int, default=10, help='data loader num workers')
88 | parser.add_argument('--itr', type=int, default=1, help='experiments times')
89 | parser.add_argument('--train_epochs', type=int, default=10, help='train epochs')
90 | parser.add_argument('--batch_size', type=int, default=16, help='batch size of train input data')
91 | parser.add_argument('--patience', type=int, default=10, help='early stopping patience')
92 | parser.add_argument('--learning_rate', type=float, default=0.001, help='optimizer learning rate')
93 | parser.add_argument('--des', type=str, default='test', help='exp description')
94 | parser.add_argument('--loss', type=str, default='MSE', help='loss function')
95 | parser.add_argument('--lradj', type=str, default='TST', help='adjust learning rate')
96 | parser.add_argument('--pct_start', type=float, default=0.2, help='pct_start')
97 | parser.add_argument('--use_amp', action='store_true', help='use automatic mixed precision training', default=False)
98 | parser.add_argument('--comment', type=str, default='none', help='com')
99 |
100 | # GPU
101 | parser.add_argument('--use_gpu', type=bool, default=True, help='use gpu')
102 | parser.add_argument('--gpu', type=int, default=0, help='gpu')
103 | parser.add_argument('--use_multi_gpu', action='store_true', help='use multiple gpus', default=False)
104 | parser.add_argument('--devices', type=str, default='0,1', help='device ids of multile gpus')
105 |
106 | # de-stationary projector params
107 | parser.add_argument('--p_hidden_dims', type=int, nargs='+', default=[128, 128],
108 | help='hidden layer dimensions of projector (List)')
109 | parser.add_argument('--p_hidden_layers', type=int, default=2, help='number of hidden layers in projector')
110 |
111 | args = parser.parse_args()
112 | args.use_gpu = True if torch.cuda.is_available() and args.use_gpu else False
113 |
114 | if args.use_gpu and args.use_multi_gpu:
115 | args.devices = args.devices.replace(' ', '')
116 | device_ids = args.devices.split(',')
117 | args.device_ids = [int(id_) for id_ in device_ids]
118 | args.gpu = args.device_ids[0]
119 |
120 | print('Args in experiment:')
121 | print(args)
122 |
123 | if args.task_name == 'long_term_forecast':
124 | Exp = Exp_Long_Term_Forecast
125 | elif args.task_name == 'short_term_forecast':
126 | Exp = Exp_Short_Term_Forecast
127 | else:
128 | Exp = Exp_Long_Term_Forecast
129 |
130 | if args.is_training:
131 | for ii in range(args.itr):
132 | # setting record of experiments
133 | setting = '{}_{}_{}_{}_{}_sl{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
134 | args.task_name,
135 | args.model_id,
136 | args.comment,
137 | args.model,
138 | args.data,
139 | args.seq_len,
140 | args.pred_len,
141 | args.d_model,
142 | args.n_heads,
143 | args.e_layers,
144 | args.d_layers,
145 | args.d_ff,
146 | args.factor,
147 | args.embed,
148 | args.distil,
149 | args.des, ii)
150 |
151 | exp = Exp(args) # set experiments
152 | print('>>>>>>>start training : {}>>>>>>>>>>>>>>>>>>>>>>>>>>'.format(setting))
153 | exp.train(setting)
154 |
155 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
156 | exp.test(setting)
157 | torch.cuda.empty_cache()
158 | else:
159 | ii = 0
160 | setting = '{}_{}_{}_{}_{}_sl{}_pl{}_dm{}_nh{}_el{}_dl{}_df{}_fc{}_eb{}_dt{}_{}_{}'.format(
161 | args.task_name,
162 | args.model_id,
163 | args.comment,
164 | args.model,
165 | args.data,
166 | args.seq_len,
167 | args.pred_len,
168 | args.d_model,
169 | args.n_heads,
170 | args.e_layers,
171 | args.d_layers,
172 | args.d_ff,
173 | args.factor,
174 | args.embed,
175 | args.distil,
176 | args.des, ii)
177 |
178 | exp = Exp(args) # set experiments
179 | print('>>>>>>>testing : {}<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<'.format(setting))
180 | exp.test(setting, test=1)
181 | torch.cuda.empty_cache()
182 |
--------------------------------------------------------------------------------
/layers/Embed.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 | from torch.nn.utils import weight_norm
5 | import math
6 |
7 |
8 | class PositionalEmbedding(nn.Module):
9 | def __init__(self, d_model, max_len=5000):
10 | super(PositionalEmbedding, self).__init__()
11 | # Compute the positional encodings once in log space.
12 | pe = torch.zeros(max_len, d_model).float()
13 | pe.require_grad = False
14 |
15 | position = torch.arange(0, max_len).float().unsqueeze(1)
16 | div_term = (torch.arange(0, d_model, 2).float()
17 | * -(math.log(10000.0) / d_model)).exp()
18 |
19 | pe[:, 0::2] = torch.sin(position * div_term)
20 | pe[:, 1::2] = torch.cos(position * div_term)
21 |
22 | pe = pe.unsqueeze(0)
23 | self.register_buffer('pe', pe)
24 |
25 | def forward(self, x):
26 | return self.pe[:, :x.size(1)]
27 |
28 |
29 | class TokenEmbedding(nn.Module):
30 | def __init__(self, c_in, d_model):
31 | super(TokenEmbedding, self).__init__()
32 | padding = 1 if torch.__version__ >= '1.5.0' else 2
33 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
34 | kernel_size=3, padding=padding, padding_mode='circular', bias=False)
35 | for m in self.modules():
36 | if isinstance(m, nn.Conv1d):
37 | nn.init.kaiming_normal_(
38 | m.weight, mode='fan_in', nonlinearity='leaky_relu')
39 |
40 | def forward(self, x):
41 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1, 2)
42 | return x
43 |
44 |
45 | class FixedEmbedding(nn.Module):
46 | def __init__(self, c_in, d_model):
47 | super(FixedEmbedding, self).__init__()
48 |
49 | w = torch.zeros(c_in, d_model).float()
50 | w.require_grad = False
51 |
52 | position = torch.arange(0, c_in).float().unsqueeze(1)
53 | div_term = (torch.arange(0, d_model, 2).float()
54 | * -(math.log(10000.0) / d_model)).exp()
55 |
56 | w[:, 0::2] = torch.sin(position * div_term)
57 | w[:, 1::2] = torch.cos(position * div_term)
58 |
59 | self.emb = nn.Embedding(c_in, d_model)
60 | self.emb.weight = nn.Parameter(w, requires_grad=False)
61 |
62 | def forward(self, x):
63 | return self.emb(x).detach()
64 |
65 |
66 | class TemporalEmbedding(nn.Module):
67 | def __init__(self, d_model, embed_type='fixed', freq='h'):
68 | super(TemporalEmbedding, self).__init__()
69 |
70 | minute_size = 4
71 | hour_size = 24
72 | weekday_size = 7
73 | day_size = 32
74 | month_size = 13
75 |
76 | Embed = FixedEmbedding if embed_type == 'fixed' else nn.Embedding
77 | if freq == 't':
78 | self.minute_embed = Embed(minute_size, d_model)
79 | self.hour_embed = Embed(hour_size, d_model)
80 | self.weekday_embed = Embed(weekday_size, d_model)
81 | self.day_embed = Embed(day_size, d_model)
82 | self.month_embed = Embed(month_size, d_model)
83 |
84 | def forward(self, x):
85 | x = x.long()
86 | minute_x = self.minute_embed(x[:, :, 4]) if hasattr(
87 | self, 'minute_embed') else 0.
88 | hour_x = self.hour_embed(x[:, :, 3])
89 | weekday_x = self.weekday_embed(x[:, :, 2])
90 | day_x = self.day_embed(x[:, :, 1])
91 | month_x = self.month_embed(x[:, :, 0])
92 |
93 | return hour_x + weekday_x + day_x + month_x + minute_x
94 |
95 |
96 | class TimeFeatureEmbedding(nn.Module):
97 | def __init__(self, d_model, embed_type='timeF', freq='h'):
98 | super(TimeFeatureEmbedding, self).__init__()
99 |
100 | freq_map = {'h': 4, 't': 5, 's': 6,
101 | 'm': 1, 'a': 1, 'w': 2, 'd': 3, 'b': 3}
102 | d_inp = freq_map[freq]
103 | self.embed = nn.Linear(d_inp, d_model, bias=False)
104 |
105 | def forward(self, x):
106 | return self.embed(x)
107 |
108 |
109 | class DataEmbedding(nn.Module):
110 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
111 | super(DataEmbedding, self).__init__()
112 | self.c_in = c_in
113 | self.d_model = d_model
114 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
115 | self.position_embedding = PositionalEmbedding(d_model=d_model)
116 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
117 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
118 | d_model=d_model, embed_type=embed_type, freq=freq)
119 | self.dropout = nn.Dropout(p=dropout)
120 |
121 | def forward(self, x, x_mark):
122 | _, _, N = x.size()
123 | if N == self.c_in:
124 | if x_mark is None:
125 | x = self.value_embedding(x) + self.position_embedding(x)
126 | else:
127 | x = self.value_embedding(
128 | x) + self.temporal_embedding(x_mark) + self.position_embedding(x)
129 | elif N == self.d_model:
130 | if x_mark is None:
131 | x = x + self.position_embedding(x)
132 | else:
133 | x = x + self.temporal_embedding(x_mark) + self.position_embedding(x)
134 |
135 | return self.dropout(x)
136 |
137 |
138 | class DataEmbedding_ms(nn.Module):
139 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
140 | super(DataEmbedding_ms, self).__init__()
141 |
142 | self.value_embedding = TokenEmbedding(c_in=1, d_model=d_model)
143 | self.position_embedding = PositionalEmbedding(d_model=d_model)
144 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
145 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
146 | d_model=d_model, embed_type=embed_type, freq=freq)
147 | self.dropout = nn.Dropout(p=dropout)
148 |
149 | def forward(self, x, x_mark):
150 | B, T, N = x.shape
151 | x1 = self.value_embedding(x.reshape(0, 2, 1).reshape(B * N, T).unsqueeze(-1)).reshape(B, N, T, -1).permute(0, 2,
152 | 1, 3)
153 | if x_mark is None:
154 | x = x1
155 | else:
156 | x = x1 + self.temporal_embedding(x_mark)
157 | return self.dropout(x)
158 |
159 |
160 | class DataEmbedding_wo_pos(nn.Module):
161 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
162 | super(DataEmbedding_wo_pos, self).__init__()
163 |
164 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
165 | self.position_embedding = PositionalEmbedding(d_model=d_model)
166 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type,
167 | freq=freq) if embed_type != 'timeF' else TimeFeatureEmbedding(
168 | d_model=d_model, embed_type=embed_type, freq=freq)
169 | self.dropout = nn.Dropout(p=dropout)
170 |
171 | def forward(self, x, x_mark):
172 | if x is None and x_mark is not None:
173 | return self.temporal_embedding(x_mark)
174 | if x_mark is None:
175 | x = self.value_embedding(x)
176 | else:
177 | x = self.value_embedding(x) + self.temporal_embedding(x_mark)
178 | return self.dropout(x)
179 |
180 |
181 | class PatchEmbedding_crossformer(nn.Module):
182 | def __init__(self, d_model, patch_len, stride, padding, dropout):
183 | super(PatchEmbedding_crossformer, self).__init__()
184 | # Patching
185 | self.patch_len = patch_len
186 | self.stride = stride
187 | self.padding_patch_layer = nn.ReplicationPad1d((0, padding))
188 |
189 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
190 | self.value_embedding = nn.Linear(patch_len, d_model, bias=False)
191 |
192 | # Positional embedding
193 | self.position_embedding = PositionalEmbedding(d_model)
194 |
195 | # Residual dropout
196 | self.dropout = nn.Dropout(dropout)
197 |
198 | def forward(self, x):
199 | # do patching
200 | n_vars = x.shape[1]
201 | x = self.padding_patch_layer(x)
202 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
203 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
204 | # Input encoding
205 | x = self.value_embedding(x) + self.position_embedding(x)
206 | return self.dropout(x), n_vars
207 |
208 |
209 | class PatchEmbedding(nn.Module):
210 | def __init__(self, d_model, patch_len, stride, dropout):
211 | super(PatchEmbedding, self).__init__()
212 | # Patching
213 | self.patch_len = patch_len
214 | self.stride = stride
215 | self.padding_patch_layer = nn.ReplicationPad1d((0, stride))
216 |
217 | # Backbone, Input encoding: projection of feature vectors onto a d-dim vector space
218 | self.value_embedding = TokenEmbedding(patch_len, d_model)
219 |
220 | # Positional embedding
221 | self.position_embedding = PositionalEmbedding(d_model)
222 |
223 | # Residual dropout
224 | self.dropout = nn.Dropout(dropout)
225 |
226 | def forward(self, x):
227 | # do patching
228 | n_vars = x.shape[1]
229 | x = self.padding_patch_layer(x)
230 | x = x.unfold(dimension=-1, size=self.patch_len, step=self.stride)
231 | x = torch.reshape(x, (x.shape[0] * x.shape[1], x.shape[2], x.shape[3]))
232 | # Input encoding
233 | x = self.value_embedding(x) + self.position_embedding(x)
234 | return self.dropout(x), n_vars
235 |
--------------------------------------------------------------------------------
/exp/exp_imputation.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.optim import lr_scheduler
3 |
4 | from data_provider.data_factory import data_provider
5 | from exp.exp_basic import Exp_Basic
6 | from utils.tools import EarlyStopping, adjust_learning_rate, visual
7 | from utils.metrics import metric
8 | import torch
9 | import torch.nn as nn
10 | from torch import optim
11 | import os
12 | import time
13 | import warnings
14 | import numpy as np
15 |
16 | warnings.filterwarnings('ignore')
17 |
18 |
19 | class Exp_Imputation(Exp_Basic):
20 | def __init__(self, args):
21 | super(Exp_Imputation, self).__init__(args)
22 |
23 | def _build_model(self):
24 | model = self.model_dict[self.args.model].Model(self.args).float()
25 |
26 | if self.args.use_multi_gpu and self.args.use_gpu:
27 | model = nn.DataParallel(model, device_ids=self.args.device_ids)
28 | return model
29 |
30 | def _get_data(self, flag):
31 | data_set, data_loader = data_provider(self.args, flag)
32 | return data_set, data_loader
33 |
34 | def _select_optimizer(self):
35 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
36 | return model_optim
37 |
38 | def _select_criterion(self):
39 | criterion = nn.MSELoss()
40 | return criterion
41 |
42 | def vali(self, vali_data, vali_loader, criterion):
43 | total_loss = []
44 | self.model.eval()
45 | with torch.no_grad():
46 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
47 | batch_x = batch_x.float().to(self.device)
48 | batch_x_mark = batch_x_mark.float().to(self.device)
49 |
50 | # random mask
51 | B, T, N = batch_x.shape
52 | """
53 | B = batch size
54 | T = seq len
55 | N = number of features
56 | """
57 | mask = torch.rand((B, T, N)).to(self.device)
58 | mask[mask <= self.args.mask_rate] = 0 # masked
59 | mask[mask > self.args.mask_rate] = 1 # remained
60 | inp = batch_x.masked_fill(mask == 0, 0)
61 |
62 | outputs = self.model(inp, batch_x_mark, None, None, mask)
63 |
64 | f_dim = -1 if self.args.features == 'MS' else 0
65 | outputs = outputs[:, :, f_dim:]
66 |
67 | # add support for MS
68 | batch_x = batch_x[:, :, f_dim:]
69 | mask = mask[:, :, f_dim:]
70 |
71 | pred = outputs.detach()
72 | true = batch_x.detach()
73 | mask = mask.detach()
74 |
75 | loss = criterion(pred[mask == 0], true[mask == 0])
76 | total_loss.append(loss.item())
77 | total_loss = np.average(total_loss)
78 | self.model.train()
79 | return total_loss
80 |
81 | def train(self, setting):
82 | train_data, train_loader = self._get_data(flag='train')
83 | vali_data, vali_loader = self._get_data(flag='val')
84 | test_data, test_loader = self._get_data(flag='test')
85 |
86 | path = os.path.join(self.args.checkpoints, setting)
87 | if not os.path.exists(path):
88 | os.makedirs(path)
89 |
90 | time_now = time.time()
91 |
92 | train_steps = len(train_loader)
93 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
94 |
95 | model_optim = self._select_optimizer()
96 | criterion = self._select_criterion()
97 |
98 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
99 | steps_per_epoch=train_steps,
100 | pct_start=self.args.pct_start,
101 | epochs=self.args.train_epochs,
102 | max_lr=self.args.learning_rate)
103 |
104 | for epoch in range(self.args.train_epochs):
105 | iter_count = 0
106 | train_loss = []
107 |
108 | self.model.train()
109 | epoch_time = time.time()
110 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
111 | iter_count += 1
112 | model_optim.zero_grad()
113 |
114 | batch_x = batch_x.float().to(self.device)
115 | batch_x_mark = batch_x_mark.float().to(self.device)
116 |
117 | # random mask
118 | B, T, N = batch_x.shape
119 | mask = torch.rand((B, T, N)).to(self.device)
120 | mask[mask <= self.args.mask_rate] = 0 # masked
121 | mask[mask > self.args.mask_rate] = 1 # remained
122 | inp = batch_x.masked_fill(mask == 0, 0)
123 |
124 | outputs = self.model(inp, batch_x_mark, None, None, mask)
125 |
126 | f_dim = -1 if self.args.features == 'MS' else 0
127 | outputs = outputs[:, :, f_dim:]
128 |
129 | # add support for MS
130 | batch_x = batch_x[:, :, f_dim:]
131 | mask = mask[:, :, f_dim:]
132 |
133 | loss = criterion(outputs[mask == 0], batch_x[mask == 0])
134 | train_loss.append(loss.item())
135 |
136 | if (i + 1) % 100 == 0:
137 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
138 | speed = (time.time() - time_now) / iter_count
139 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
140 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
141 | iter_count = 0
142 | time_now = time.time()
143 |
144 | loss.backward()
145 | model_optim.step()
146 |
147 | if self.args.lradj == 'TST':
148 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
149 | scheduler.step()
150 |
151 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
152 | train_loss = np.average(train_loss)
153 | vali_loss = self.vali(vali_data, vali_loader, criterion)
154 | test_loss = self.vali(test_data, test_loader, criterion)
155 |
156 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
157 | epoch + 1, train_steps, train_loss, vali_loss, test_loss))
158 | early_stopping(test_loss, self.model, path)
159 | if early_stopping.early_stop:
160 | print("Early stopping")
161 | break
162 |
163 | if self.args.lradj != 'TST':
164 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
165 | else:
166 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
167 |
168 | best_model_path = path + '/' + 'checkpoint.pth'
169 | self.model.load_state_dict(torch.load(best_model_path))
170 |
171 | return self.model
172 |
173 | def test(self, setting, test=0):
174 | test_data, test_loader = self._get_data(flag='test')
175 | if test:
176 | print('loading model')
177 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
178 |
179 | preds = []
180 | trues = []
181 | masks = []
182 | folder_path = './test_results/' + setting + '/'
183 | if not os.path.exists(folder_path):
184 | os.makedirs(folder_path)
185 |
186 | self.model.eval()
187 | with torch.no_grad():
188 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
189 | batch_x = batch_x.float().to(self.device)
190 | batch_x_mark = batch_x_mark.float().to(self.device)
191 |
192 | # random mask
193 | B, T, N = batch_x.shape
194 | mask = torch.rand((B, T, N)).to(self.device)
195 | mask[mask <= self.args.mask_rate] = 0 # masked
196 | mask[mask > self.args.mask_rate] = 1 # remained
197 | inp = batch_x.masked_fill(mask == 0, 0)
198 |
199 | # imputation
200 | outputs = self.model(inp, batch_x_mark, None, None, mask)
201 |
202 | # eval
203 | f_dim = -1 if self.args.features == 'MS' else 0
204 | outputs = outputs[:, :, f_dim:]
205 |
206 | # add support for MS
207 | batch_x = batch_x[:, :, f_dim:]
208 | mask = mask[:, :, f_dim:]
209 |
210 | outputs = outputs.detach().cpu().numpy()
211 | pred = outputs
212 | true = batch_x.detach().cpu().numpy()
213 | preds.append(pred)
214 | trues.append(true)
215 | masks.append(mask.detach().cpu())
216 |
217 | if i % 20 == 0:
218 | filled = true[0, :, -1].copy()
219 | filled = filled * mask[0, :, -1].detach().cpu().numpy() + \
220 | pred[0, :, -1] * (1 - mask[0, :, -1].detach().cpu().numpy())
221 | visual(true[0, :, -1], filled, os.path.join(folder_path, str(i) + '.pdf'))
222 |
223 | preds = np.concatenate(preds, 0)
224 | trues = np.concatenate(trues, 0)
225 | masks = np.concatenate(masks, 0)
226 | print('test shape:', preds.shape, trues.shape)
227 |
228 | # result save
229 | folder_path = './results/' + setting + '/'
230 | if not os.path.exists(folder_path):
231 | os.makedirs(folder_path)
232 |
233 | mae, mse, rmse, mape, mspe = metric(preds[masks == 0], trues[masks == 0])
234 | print('mse:{}, mae:{}'.format(mse, mae))
235 | f = open("result_imputation.txt", 'a')
236 | f.write(setting + " \n")
237 | f.write('mse:{}, mae:{}'.format(mse, mae))
238 | f.write('\n')
239 | f.write('\n')
240 | f.close()
241 |
242 | np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
243 | np.save(folder_path + 'pred.npy', preds)
244 | np.save(folder_path + 'true.npy', trues)
245 | return
246 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/data_provider/data_loader.py:
--------------------------------------------------------------------------------
1 | import os
2 | import numpy as np
3 | import pandas as pd
4 | import glob
5 | import re
6 | import torch
7 | from sktime.datasets import load_from_tsfile_to_dataframe
8 | from torch.utils.data import Dataset
9 | from sklearn.preprocessing import StandardScaler
10 | from utils.timefeatures import time_features
11 | from data_provider.m4 import M4Dataset, M4Meta
12 | from data_provider.uea import Normalizer, interpolate_missing
13 | import warnings
14 |
15 | warnings.filterwarnings('ignore')
16 |
17 |
18 | class Dataset_ETT_hour(Dataset):
19 | def __init__(self, root_path, flag='train', size=None,
20 | features='S', data_path='ETTh1.csv',
21 | target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
22 | # size [seq_len, label_len, pred_len]
23 | # info
24 | if size == None:
25 | self.seq_len = 24 * 4 * 4
26 | self.label_len = 24 * 4
27 | self.pred_len = 24 * 4
28 | else:
29 | self.seq_len = size[0]
30 | self.label_len = size[1]
31 | self.pred_len = size[2]
32 | # init
33 | assert flag in ['train', 'test', 'val']
34 | type_map = {'train': 0, 'val': 1, 'test': 2}
35 | self.set_type = type_map[flag]
36 |
37 | self.features = features
38 | self.target = target
39 | self.scale = scale
40 | self.timeenc = timeenc
41 | self.freq = freq
42 |
43 | self.root_path = root_path
44 | self.data_path = data_path
45 | self.__read_data__()
46 |
47 | def __read_data__(self):
48 | self.scaler = StandardScaler()
49 | df_raw = pd.read_csv(os.path.join(self.root_path,
50 | self.data_path))
51 |
52 | border1s = [0, 12 * 30 * 24 - self.seq_len, 12 * 30 * 24 + 4 * 30 * 24 - self.seq_len]
53 | border2s = [12 * 30 * 24, 12 * 30 * 24 + 4 * 30 * 24, 12 * 30 * 24 + 8 * 30 * 24]
54 | border1 = border1s[self.set_type]
55 | border2 = border2s[self.set_type]
56 |
57 | if self.features == 'M' or self.features == 'MS':
58 | cols_data = df_raw.columns[1:]
59 | df_data = df_raw[cols_data]
60 | elif self.features == 'S':
61 | df_data = df_raw[[self.target]]
62 |
63 | if self.scale:
64 | train_data = df_data[border1s[0]:border2s[0]]
65 | self.scaler.fit(train_data.values)
66 | data = self.scaler.transform(df_data.values)
67 | else:
68 | data = df_data.values
69 |
70 | df_stamp = df_raw[['date']][border1:border2]
71 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
72 | if self.timeenc == 0:
73 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
74 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
75 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
76 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
77 | data_stamp = df_stamp.drop(['date'], 1).values
78 | elif self.timeenc == 1:
79 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
80 | data_stamp = data_stamp.transpose(1, 0)
81 |
82 | self.data_x = data[border1:border2]
83 | self.data_y = data[border1:border2]
84 | self.data_stamp = data_stamp
85 |
86 | def __getitem__(self, index):
87 | s_begin = index
88 | s_end = s_begin + self.seq_len
89 | r_begin = s_end - self.label_len
90 | r_end = r_begin + self.label_len + self.pred_len
91 |
92 | seq_x = self.data_x[s_begin:s_end]
93 | seq_y = self.data_y[r_begin:r_end]
94 | seq_x_mark = self.data_stamp[s_begin:s_end]
95 | seq_y_mark = self.data_stamp[r_begin:r_end]
96 |
97 | return seq_x, seq_y, seq_x_mark, seq_y_mark
98 |
99 | def __len__(self):
100 | return len(self.data_x) - self.seq_len - self.pred_len + 1
101 |
102 | def inverse_transform(self, data):
103 | return self.scaler.inverse_transform(data)
104 |
105 |
106 | class Dataset_ETT_minute(Dataset):
107 | def __init__(self, root_path, flag='train', size=None,
108 | features='S', data_path='ETTm1.csv',
109 | target='OT', scale=True, timeenc=0, freq='t', seasonal_patterns=None):
110 | # size [seq_len, label_len, pred_len]
111 | # info
112 | if size == None:
113 | self.seq_len = 24 * 4 * 4
114 | self.label_len = 24 * 4
115 | self.pred_len = 24 * 4
116 | else:
117 | self.seq_len = size[0]
118 | self.label_len = size[1]
119 | self.pred_len = size[2]
120 | # init
121 | assert flag in ['train', 'test', 'val']
122 | type_map = {'train': 0, 'val': 1, 'test': 2}
123 | self.set_type = type_map[flag]
124 |
125 | self.features = features
126 | self.target = target
127 | self.scale = scale
128 | self.timeenc = timeenc
129 | self.freq = freq
130 |
131 | self.root_path = root_path
132 | self.data_path = data_path
133 | self.__read_data__()
134 |
135 | def __read_data__(self):
136 | self.scaler = StandardScaler()
137 | df_raw = pd.read_csv(os.path.join(self.root_path,
138 | self.data_path))
139 |
140 | border1s = [0, 12 * 30 * 24 * 4 - self.seq_len, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4 - self.seq_len]
141 | border2s = [12 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 4 * 30 * 24 * 4, 12 * 30 * 24 * 4 + 8 * 30 * 24 * 4]
142 | border1 = border1s[self.set_type]
143 | border2 = border2s[self.set_type]
144 |
145 | if self.features == 'M' or self.features == 'MS':
146 | cols_data = df_raw.columns[1:]
147 | df_data = df_raw[cols_data]
148 | elif self.features == 'S':
149 | df_data = df_raw[[self.target]]
150 |
151 | if self.scale:
152 | train_data = df_data[border1s[0]:border2s[0]]
153 | self.scaler.fit(train_data.values)
154 | data = self.scaler.transform(df_data.values)
155 | else:
156 | data = df_data.values
157 |
158 | df_stamp = df_raw[['date']][border1:border2]
159 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
160 | if self.timeenc == 0:
161 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
162 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
163 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
164 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
165 | df_stamp['minute'] = df_stamp.date.apply(lambda row: row.minute, 1)
166 | df_stamp['minute'] = df_stamp.minute.map(lambda x: x // 15)
167 | data_stamp = df_stamp.drop(['date'], 1).values
168 | elif self.timeenc == 1:
169 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
170 | data_stamp = data_stamp.transpose(1, 0)
171 |
172 | self.data_x = data[border1:border2]
173 | self.data_y = data[border1:border2]
174 | self.data_stamp = data_stamp
175 |
176 | def __getitem__(self, index):
177 | s_begin = index
178 | s_end = s_begin + self.seq_len
179 | r_begin = s_end - self.label_len
180 | r_end = r_begin + self.label_len + self.pred_len
181 |
182 | seq_x = self.data_x[s_begin:s_end]
183 | seq_y = self.data_y[r_begin:r_end]
184 | seq_x_mark = self.data_stamp[s_begin:s_end]
185 | seq_y_mark = self.data_stamp[r_begin:r_end]
186 |
187 | return seq_x, seq_y, seq_x_mark, seq_y_mark
188 |
189 | def __len__(self):
190 | return len(self.data_x) - self.seq_len - self.pred_len + 1
191 |
192 | def inverse_transform(self, data):
193 | return self.scaler.inverse_transform(data)
194 |
195 |
196 | class Dataset_Custom(Dataset):
197 | def __init__(self, root_path, flag='train', size=None,
198 | features='S', data_path='ETTh1.csv',
199 | target='OT', scale=True, timeenc=0, freq='h', seasonal_patterns=None):
200 | # size [seq_len, label_len, pred_len]
201 | # info
202 | if size == None:
203 | self.seq_len = 24 * 4 * 4
204 | self.label_len = 24 * 4
205 | self.pred_len = 24 * 4
206 | else:
207 | self.seq_len = size[0]
208 | self.label_len = size[1]
209 | self.pred_len = size[2]
210 | # init
211 | assert flag in ['train', 'test', 'val']
212 | type_map = {'train': 0, 'val': 1, 'test': 2}
213 | self.set_type = type_map[flag]
214 |
215 | self.features = features
216 | self.target = target
217 | self.scale = scale
218 | self.timeenc = timeenc
219 | self.freq = freq
220 |
221 | self.root_path = root_path
222 | self.data_path = data_path
223 | self.__read_data__()
224 |
225 | def __read_data__(self):
226 | self.scaler = StandardScaler()
227 | df_raw = pd.read_csv(os.path.join(self.root_path,
228 | self.data_path))
229 |
230 | '''
231 | df_raw.columns: ['date', ...(other features), target feature]
232 | '''
233 | cols = list(df_raw.columns)
234 | cols.remove(self.target)
235 | cols.remove('date')
236 | df_raw = df_raw[['date'] + cols + [self.target]]
237 | num_train = int(len(df_raw) * 0.7)
238 | num_test = int(len(df_raw) * 0.2)
239 | num_vali = len(df_raw) - num_train - num_test
240 | border1s = [0, num_train - self.seq_len, len(df_raw) - num_test - self.seq_len]
241 | border2s = [num_train, num_train + num_vali, len(df_raw)]
242 | border1 = border1s[self.set_type]
243 | border2 = border2s[self.set_type]
244 |
245 | if self.features == 'M' or self.features == 'MS':
246 | cols_data = df_raw.columns[1:]
247 | df_data = df_raw[cols_data]
248 | elif self.features == 'S':
249 | df_data = df_raw[[self.target]]
250 |
251 | if self.scale:
252 | train_data = df_data[border1s[0]:border2s[0]]
253 | self.scaler.fit(train_data.values)
254 | data = self.scaler.transform(df_data.values)
255 | else:
256 | data = df_data.values
257 |
258 | df_stamp = df_raw[['date']][border1:border2]
259 | df_stamp['date'] = pd.to_datetime(df_stamp.date)
260 | if self.timeenc == 0:
261 | df_stamp['month'] = df_stamp.date.apply(lambda row: row.month, 1)
262 | df_stamp['day'] = df_stamp.date.apply(lambda row: row.day, 1)
263 | df_stamp['weekday'] = df_stamp.date.apply(lambda row: row.weekday(), 1)
264 | df_stamp['hour'] = df_stamp.date.apply(lambda row: row.hour, 1)
265 | data_stamp = df_stamp.drop(['date'], 1).values
266 | elif self.timeenc == 1:
267 | data_stamp = time_features(pd.to_datetime(df_stamp['date'].values), freq=self.freq)
268 | data_stamp = data_stamp.transpose(1, 0)
269 |
270 | self.data_x = data[border1:border2]
271 | self.data_y = data[border1:border2]
272 | self.data_stamp = data_stamp
273 |
274 | def __getitem__(self, index):
275 | s_begin = index
276 | s_end = s_begin + self.seq_len
277 | r_begin = s_end - self.label_len
278 | r_end = r_begin + self.label_len + self.pred_len
279 |
280 | seq_x = self.data_x[s_begin:s_end]
281 | seq_y = self.data_y[r_begin:r_end]
282 | seq_x_mark = self.data_stamp[s_begin:s_end]
283 | seq_y_mark = self.data_stamp[r_begin:r_end]
284 |
285 | return seq_x, seq_y, seq_x_mark, seq_y_mark
286 |
287 | def __len__(self):
288 | return len(self.data_x) - self.seq_len - self.pred_len + 1
289 |
290 | def inverse_transform(self, data):
291 | return self.scaler.inverse_transform(data)
292 |
--------------------------------------------------------------------------------
/exp/exp_short_term_forecasting.py:
--------------------------------------------------------------------------------
1 | from torch.optim import lr_scheduler
2 |
3 | from data_provider.data_factory import data_provider
4 | from data_provider.m4 import M4Meta
5 | from exp.exp_basic import Exp_Basic
6 | from utils.tools import EarlyStopping, adjust_learning_rate, visual, save_to_csv
7 | from utils.losses import mape_loss, mase_loss, smape_loss
8 | from utils.m4_summary import M4Summary
9 | import torch
10 | import torch.nn as nn
11 | from torch import optim
12 | import os
13 | import time
14 | import warnings
15 | import numpy as np
16 | import pandas
17 |
18 | warnings.filterwarnings('ignore')
19 |
20 |
21 | class Exp_Short_Term_Forecast(Exp_Basic):
22 | def __init__(self, args):
23 | super(Exp_Short_Term_Forecast, self).__init__(args)
24 |
25 | def _build_model(self):
26 | if self.args.data == 'm4':
27 | self.args.pred_len = M4Meta.horizons_map[self.args.seasonal_patterns] # Up to M4 config
28 | self.args.seq_len = 2 * self.args.pred_len # input_len = 2*pred_len
29 | self.args.label_len = self.args.pred_len
30 | self.args.frequency_map = M4Meta.frequency_map[self.args.seasonal_patterns]
31 | model = self.model_dict[self.args.model].Model(self.args).float()
32 |
33 | if self.args.use_multi_gpu and self.args.use_gpu:
34 | model = nn.DataParallel(model, device_ids=self.args.device_ids)
35 | return model
36 |
37 | def _get_data(self, flag):
38 | data_set, data_loader = data_provider(self.args, flag)
39 | return data_set, data_loader
40 |
41 | def _select_optimizer(self):
42 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
43 | return model_optim
44 |
45 | def _select_criterion(self, loss_name='MSE'):
46 | if loss_name == 'MSE':
47 | return nn.MSELoss()
48 | elif loss_name == 'MAPE':
49 | return mape_loss()
50 | elif loss_name == 'MASE':
51 | return mase_loss()
52 | elif loss_name == 'SMAPE':
53 | return smape_loss()
54 |
55 | def train(self, setting):
56 | train_data, train_loader = self._get_data(flag='train')
57 | vali_data, vali_loader = self._get_data(flag='val')
58 |
59 | path = os.path.join(self.args.checkpoints, setting)
60 | if not os.path.exists(path):
61 | os.makedirs(path)
62 |
63 | time_now = time.time()
64 |
65 | train_steps = len(train_loader)
66 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
67 |
68 | model_optim = self._select_optimizer()
69 | criterion = self._select_criterion(self.args.loss)
70 |
71 | scheduler = lr_scheduler.OneCycleLR(optimizer = model_optim,
72 | steps_per_epoch = train_steps,
73 | pct_start = self.args.pct_start,
74 | epochs = self.args.train_epochs,
75 | max_lr = self.args.learning_rate)
76 |
77 | for epoch in range(self.args.train_epochs):
78 | iter_count = 0
79 | train_loss = []
80 |
81 | self.model.train()
82 | epoch_time = time.time()
83 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
84 | iter_count += 1
85 | model_optim.zero_grad()
86 |
87 | batch_x = batch_x.float().to(self.device)
88 | batch_y = batch_y.float().to(self.device)
89 |
90 | batch_y_mark = batch_y_mark.float().to(self.device)
91 |
92 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
93 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
94 |
95 | outputs = self.model(batch_x, None, dec_inp, None)
96 | f_dim = -1 if self.args.features == 'MS' else 0
97 | outputs = outputs[:, -self.args.pred_len:, f_dim:]
98 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
99 |
100 | batch_y_mark = batch_y_mark[:, -self.args.pred_len:, f_dim:].to(self.device)
101 | loss_value = criterion(batch_x, self.args.frequency_map, outputs, batch_y, batch_y_mark)
102 | # loss_sharpness = mse((outputs[:, 1:, :] - outputs[:, :-1, :]), (batch_y[:, 1:, :] - batch_y[:, :-1, :]))
103 | loss = loss_value # + loss_sharpness * 1e-5
104 | train_loss.append(loss.item())
105 |
106 | if (i + 1) % 100 == 0:
107 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
108 | speed = (time.time() - time_now) / iter_count
109 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
110 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
111 | iter_count = 0
112 | time_now = time.time()
113 |
114 | loss.backward()
115 | model_optim.step()
116 |
117 | if self.args.lradj == 'TST':
118 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
119 | scheduler.step()
120 |
121 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
122 | train_loss = np.average(train_loss)
123 | vali_loss = self.vali(train_loader, vali_loader, criterion)
124 | test_loss = vali_loss
125 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
126 | epoch + 1, train_steps, train_loss, vali_loss, test_loss))
127 | early_stopping(vali_loss, self.model, path)
128 | if early_stopping.early_stop:
129 | print("Early stopping")
130 | break
131 |
132 | if self.args.lradj != 'TST':
133 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
134 | else:
135 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
136 |
137 | best_model_path = path + '/' + 'checkpoint.pth'
138 | self.model.load_state_dict(torch.load(best_model_path))
139 |
140 | return self.model
141 |
142 | def vali(self, train_loader, vali_loader, criterion):
143 | x, _ = train_loader.dataset.last_insample_window()
144 | y = vali_loader.dataset.timeseries
145 | x = torch.tensor(x, dtype=torch.float32).to(self.device)
146 | x = x.unsqueeze(-1)
147 |
148 | self.model.eval()
149 | with torch.no_grad():
150 | # decoder input
151 | B, _, C = x.shape
152 | dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
153 | dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
154 |
155 | # encoder - decoder
156 | outputs = torch.zeros((B, self.args.pred_len, C)).float() # .to(self.device)
157 | id_list = np.arange(0, B, 500) # validation set size
158 | id_list = np.append(id_list, B)
159 | for i in range(len(id_list) - 1):
160 | x_enc = x[id_list[i]:id_list[i + 1]]
161 | outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x_enc, None,
162 | dec_inp[id_list[i]:id_list[i + 1]],
163 | None).detach().cpu()
164 | f_dim = -1 if self.args.features == 'MS' else 0
165 | outputs = outputs[:, -self.args.pred_len:, f_dim:]
166 | pred = outputs
167 | true = torch.from_numpy(np.array(y))
168 | batch_y_mark = torch.ones(true.shape)
169 |
170 | loss = criterion(x.detach().cpu()[:, :, 0], self.args.frequency_map, pred[:, :, 0], true, batch_y_mark)
171 |
172 | self.model.train()
173 | return loss
174 |
175 | def test(self, setting, test=0):
176 | _, train_loader = self._get_data(flag='train')
177 | _, test_loader = self._get_data(flag='test')
178 | x, _ = train_loader.dataset.last_insample_window()
179 | y = test_loader.dataset.timeseries
180 | x = torch.tensor(x, dtype=torch.float32).to(self.device)
181 | x = x.unsqueeze(-1)
182 |
183 | if test:
184 | print('loading model')
185 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
186 |
187 | folder_path = './test_results/' + setting + '/'
188 | if not os.path.exists(folder_path):
189 | os.makedirs(folder_path)
190 |
191 | self.model.eval()
192 | with torch.no_grad():
193 | B, _, C = x.shape
194 | dec_inp = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
195 | dec_inp = torch.cat([x[:, -self.args.label_len:, :], dec_inp], dim=1).float()
196 | # encoder - decoder
197 | outputs = torch.zeros((B, self.args.pred_len, C)).float().to(self.device)
198 | id_list = np.arange(0, B, 1)
199 | id_list = np.append(id_list, B)
200 | for i in range(len(id_list) - 1):
201 | x_enc = x[id_list[i]:id_list[i + 1]]
202 | outputs[id_list[i]:id_list[i + 1], :, :] = self.model(x_enc, None,
203 | dec_inp[id_list[i]:id_list[i + 1]], None)
204 |
205 | if id_list[i] % 1000 == 0:
206 | print(id_list[i])
207 |
208 | f_dim = -1 if self.args.features == 'MS' else 0
209 | outputs = outputs[:, -self.args.pred_len:, f_dim:]
210 | outputs = outputs.detach().cpu().numpy()
211 |
212 | preds = outputs
213 | trues = y
214 | x = x.detach().cpu().numpy()
215 |
216 | for i in range(0, preds.shape[0], preds.shape[0] // 10):
217 | gt = np.concatenate((x[i, :, 0], trues[i]), axis=0)
218 | pd = np.concatenate((x[i, :, 0], preds[i, :, 0]), axis=0)
219 | visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
220 | save_to_csv(gt, pd, os.path.join(folder_path, str(i) + '.csv'))
221 |
222 | print('test shape:', preds.shape)
223 |
224 | # result save
225 | folder_path = './m4_results/' + self.args.model + '/'
226 | if not os.path.exists(folder_path):
227 | os.makedirs(folder_path)
228 |
229 | forecasts_df = pandas.DataFrame(preds[:, :, 0], columns=[f'V{i + 1}' for i in range(self.args.pred_len)])
230 | forecasts_df.index = test_loader.dataset.ids[:preds.shape[0]]
231 | forecasts_df.index.name = 'id'
232 | forecasts_df.set_index(forecasts_df.columns[0], inplace=True)
233 | forecasts_df.to_csv(folder_path + self.args.seasonal_patterns + '_forecast.csv')
234 |
235 | print(self.args.model)
236 | file_path = './m4_results/' + self.args.model + '/'
237 | if 'Weekly_forecast.csv' in os.listdir(file_path) \
238 | and 'Monthly_forecast.csv' in os.listdir(file_path) \
239 | and 'Yearly_forecast.csv' in os.listdir(file_path) \
240 | and 'Daily_forecast.csv' in os.listdir(file_path) \
241 | and 'Hourly_forecast.csv' in os.listdir(file_path) \
242 | and 'Quarterly_forecast.csv' in os.listdir(file_path):
243 | m4_summary = M4Summary(file_path, self.args.root_path)
244 | # m4_forecast.set_index(m4_winner_forecast.columns[0], inplace=True)
245 | smape_results, owa_results, mape, mase = m4_summary.evaluate()
246 | print('smape:', smape_results)
247 | print('mape:', mape)
248 | print('mase:', mase)
249 | print('owa:', owa_results)
250 | else:
251 | print('After all 6 tasks are finished, you can calculate the averaged index')
252 | return
253 |
254 |
--------------------------------------------------------------------------------
/layers/SelfAttention_Family.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from math import sqrt
5 |
6 | from einops import rearrange, repeat
7 |
8 | from utils.masking import TriangularCausalMask, ProbMask
9 | from reformer_pytorch import LSHSelfAttention
10 |
11 |
12 | class DSAttention(nn.Module):
13 | '''De-stationary Attention'''
14 |
15 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
16 | super(DSAttention, self).__init__()
17 | self.scale = scale
18 | self.mask_flag = mask_flag
19 | self.output_attention = output_attention
20 | self.dropout = nn.Dropout(attention_dropout)
21 |
22 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
23 | B, L, H, E = queries.shape
24 | _, S, _, D = values.shape
25 | scale = self.scale or 1. / sqrt(E)
26 |
27 | tau = 1.0 if tau is None else tau.unsqueeze(
28 | 1).unsqueeze(1) # B x 1 x 1 x 1
29 | delta = 0.0 if delta is None else delta.unsqueeze(
30 | 1).unsqueeze(1) # B x 1 x 1 x S
31 |
32 | # De-stationary Attention, rescaling pre-softmax score with learned de-stationary factors
33 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) * tau + delta
34 |
35 | if self.mask_flag:
36 | if attn_mask is None:
37 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
38 |
39 | scores.masked_fill_(attn_mask.mask, -np.inf)
40 |
41 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
42 | V = torch.einsum("bhls,bshd->blhd", A, values)
43 |
44 | if self.output_attention:
45 | return (V.contiguous(), A)
46 | else:
47 | return (V.contiguous(), None)
48 |
49 |
50 | class FullAttention(nn.Module):
51 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
52 | super(FullAttention, self).__init__()
53 | self.scale = scale
54 | self.mask_flag = mask_flag
55 | self.output_attention = output_attention
56 | self.dropout = nn.Dropout(attention_dropout)
57 |
58 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
59 | B, L, H, E = queries.shape
60 | _, S, _, D = values.shape
61 | scale = self.scale or 1. / sqrt(E)
62 |
63 | scores = torch.einsum("blhe,bshe->bhls", queries, keys)
64 |
65 | if self.mask_flag:
66 | if attn_mask is None:
67 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
68 |
69 | scores.masked_fill_(attn_mask.mask, -np.inf)
70 |
71 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
72 | V = torch.einsum("bhls,bshd->blhd", A, values)
73 |
74 | if self.output_attention:
75 | return (V.contiguous(), A)
76 | else:
77 | return (V.contiguous(), None)
78 |
79 |
80 | class ProbAttention(nn.Module):
81 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
82 | super(ProbAttention, self).__init__()
83 | self.factor = factor
84 | self.scale = scale
85 | self.mask_flag = mask_flag
86 | self.output_attention = output_attention
87 | self.dropout = nn.Dropout(attention_dropout)
88 |
89 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
90 | # Q [B, H, L, D]
91 | B, H, L_K, E = K.shape
92 | _, _, L_Q, _ = Q.shape
93 |
94 | # calculate the sampled Q_K
95 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
96 | # real U = U_part(factor*ln(L_k))*L_q
97 | index_sample = torch.randint(L_K, (L_Q, sample_k))
98 | K_sample = K_expand[:, :, torch.arange(
99 | L_Q).unsqueeze(1), index_sample, :]
100 | Q_K_sample = torch.matmul(
101 | Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
102 |
103 | # find the Top_k query with sparisty measurement
104 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
105 | M_top = M.topk(n_top, sorted=False)[1]
106 |
107 | # use the reduced Q to calculate Q_K
108 | Q_reduce = Q[torch.arange(B)[:, None, None],
109 | torch.arange(H)[None, :, None],
110 | M_top, :] # factor*ln(L_q)
111 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
112 |
113 | return Q_K, M_top
114 |
115 | def _get_initial_context(self, V, L_Q):
116 | B, H, L_V, D = V.shape
117 | if not self.mask_flag:
118 | # V_sum = V.sum(dim=-2)
119 | V_sum = V.mean(dim=-2)
120 | contex = V_sum.unsqueeze(-2).expand(B, H,
121 | L_Q, V_sum.shape[-1]).clone()
122 | else: # use mask
123 | # requires that L_Q == L_V, i.e. for self-attention only
124 | assert (L_Q == L_V)
125 | contex = V.cumsum(dim=-2)
126 | return contex
127 |
128 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
129 | B, H, L_V, D = V.shape
130 |
131 | if self.mask_flag:
132 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
133 | scores.masked_fill_(attn_mask.mask, -np.inf)
134 |
135 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
136 |
137 | context_in[torch.arange(B)[:, None, None],
138 | torch.arange(H)[None, :, None],
139 | index, :] = torch.matmul(attn, V).type_as(context_in)
140 | if self.output_attention:
141 | attns = (torch.ones([B, H, L_V, L_V]) /
142 | L_V).type_as(attn).to(attn.device)
143 | attns[torch.arange(B)[:, None, None], torch.arange(H)[
144 | None, :, None], index, :] = attn
145 | return (context_in, attns)
146 | else:
147 | return (context_in, None)
148 |
149 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
150 | B, L_Q, H, D = queries.shape
151 | _, L_K, _, _ = keys.shape
152 |
153 | queries = queries.transpose(2, 1)
154 | keys = keys.transpose(2, 1)
155 | values = values.transpose(2, 1)
156 |
157 | U_part = self.factor * \
158 | np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
159 | u = self.factor * \
160 | np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
161 |
162 | U_part = U_part if U_part < L_K else L_K
163 | u = u if u < L_Q else L_Q
164 |
165 | scores_top, index = self._prob_QK(
166 | queries, keys, sample_k=U_part, n_top=u)
167 |
168 | # add scale factor
169 | scale = self.scale or 1. / sqrt(D)
170 | if scale is not None:
171 | scores_top = scores_top * scale
172 | # get the context
173 | context = self._get_initial_context(values, L_Q)
174 | # update the context with selected top_k queries
175 | context, attn = self._update_context(
176 | context, values, scores_top, index, L_Q, attn_mask)
177 |
178 | return context.contiguous(), attn
179 |
180 |
181 | class AttentionLayer(nn.Module):
182 | def __init__(self, attention, d_model, n_heads, d_keys=None,
183 | d_values=None):
184 | super(AttentionLayer, self).__init__()
185 |
186 | d_keys = d_keys or (d_model // n_heads)
187 | d_values = d_values or (d_model // n_heads)
188 |
189 | self.inner_attention = attention
190 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
191 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
192 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
193 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
194 | self.n_heads = n_heads
195 |
196 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
197 | B, L, _ = queries.shape
198 | _, S, _ = keys.shape
199 | H = self.n_heads
200 |
201 | queries = self.query_projection(queries).view(B, L, H, -1)
202 | keys = self.key_projection(keys).view(B, S, H, -1)
203 | values = self.value_projection(values).view(B, S, H, -1)
204 |
205 | out, attn = self.inner_attention(
206 | queries,
207 | keys,
208 | values,
209 | attn_mask,
210 | tau=tau,
211 | delta=delta
212 | )
213 | out = out.view(B, L, -1)
214 |
215 | return self.out_projection(out), attn
216 |
217 |
218 | class ReformerLayer(nn.Module):
219 | def __init__(self, attention, d_model, n_heads, d_keys=None,
220 | d_values=None, causal=False, bucket_size=4, n_hashes=4):
221 | super().__init__()
222 | self.bucket_size = bucket_size
223 | self.attn = LSHSelfAttention(
224 | dim=d_model,
225 | heads=n_heads,
226 | bucket_size=bucket_size,
227 | n_hashes=n_hashes,
228 | causal=causal
229 | )
230 |
231 | def fit_length(self, queries):
232 | # inside reformer: assert N % (bucket_size * 2) == 0
233 | B, N, C = queries.shape
234 | if N % (self.bucket_size * 2) == 0:
235 | return queries
236 | else:
237 | # fill the time series
238 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
239 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
240 |
241 | def forward(self, queries, keys, values, attn_mask, tau, delta):
242 | # in Reformer: defalut queries=keys
243 | B, N, C = queries.shape
244 | queries = self.attn(self.fit_length(queries))[:, :N, :]
245 | return queries, None
246 |
247 | class TwoStageAttentionLayer(nn.Module):
248 | '''
249 | The Two Stage Attention (TSA) Layer
250 | input/output shape: [batch_size, Data_dim(D), Seg_num(L), d_model]
251 | '''
252 |
253 | def __init__(self, configs,
254 | seg_num, factor, d_model, n_heads, d_ff=None, dropout=0.1):
255 | super(TwoStageAttentionLayer, self).__init__()
256 | d_ff = d_ff or 4 * d_model
257 | self.time_attention = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
258 | output_attention=configs.output_attention), d_model, n_heads)
259 | self.dim_sender = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
260 | output_attention=configs.output_attention), d_model, n_heads)
261 | self.dim_receiver = AttentionLayer(FullAttention(False, configs.factor, attention_dropout=configs.dropout,
262 | output_attention=configs.output_attention), d_model, n_heads)
263 | self.router = nn.Parameter(torch.randn(seg_num, factor, d_model))
264 |
265 | self.dropout = nn.Dropout(dropout)
266 |
267 | self.norm1 = nn.LayerNorm(d_model)
268 | self.norm2 = nn.LayerNorm(d_model)
269 | self.norm3 = nn.LayerNorm(d_model)
270 | self.norm4 = nn.LayerNorm(d_model)
271 |
272 | self.MLP1 = nn.Sequential(nn.Linear(d_model, d_ff),
273 | nn.GELU(),
274 | nn.Linear(d_ff, d_model))
275 | self.MLP2 = nn.Sequential(nn.Linear(d_model, d_ff),
276 | nn.GELU(),
277 | nn.Linear(d_ff, d_model))
278 |
279 | def forward(self, x, attn_mask=None, tau=None, delta=None):
280 | # Cross Time Stage: Directly apply MSA to each dimension
281 | batch = x.shape[0]
282 | time_in = rearrange(x, 'b ts_d seg_num d_model -> (b ts_d) seg_num d_model')
283 | time_enc, attn = self.time_attention(
284 | time_in, time_in, time_in, attn_mask=None, tau=None, delta=None
285 | )
286 | dim_in = time_in + self.dropout(time_enc)
287 | dim_in = self.norm1(dim_in)
288 | dim_in = dim_in + self.dropout(self.MLP1(dim_in))
289 | dim_in = self.norm2(dim_in)
290 |
291 | # Cross Dimension Stage: use a small set of learnable vectors to aggregate and distribute messages to build the D-to-D connection
292 | dim_send = rearrange(dim_in, '(b ts_d) seg_num d_model -> (b seg_num) ts_d d_model', b=batch)
293 | batch_router = repeat(self.router, 'seg_num factor d_model -> (repeat seg_num) factor d_model', repeat=batch)
294 | dim_buffer, attn = self.dim_sender(batch_router, dim_send, dim_send, attn_mask=None, tau=None, delta=None)
295 | dim_receive, attn = self.dim_receiver(dim_send, dim_buffer, dim_buffer, attn_mask=None, tau=None, delta=None)
296 | dim_enc = dim_send + self.dropout(dim_receive)
297 | dim_enc = self.norm3(dim_enc)
298 | dim_enc = dim_enc + self.dropout(self.MLP2(dim_enc))
299 | dim_enc = self.norm4(dim_enc)
300 |
301 | final_out = rearrange(dim_enc, '(b seg_num) ts_d d_model -> b ts_d seg_num d_model', b=batch)
302 |
303 | return final_out
--------------------------------------------------------------------------------
/layers/SelfAttention_Family1.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import numpy as np
4 | from math import sqrt
5 | from utils.masking import TriangularCausalMask, ProbMask
6 | from reformer_pytorch import LSHSelfAttention
7 | from einops import rearrange
8 |
9 |
10 | # Code implementation from https://github.com/thuml/Flowformer
11 | class FlowAttention(nn.Module):
12 | def __init__(self, attention_dropout=0.1):
13 | super(FlowAttention, self).__init__()
14 | self.dropout = nn.Dropout(attention_dropout)
15 |
16 | def kernel_method(self, x):
17 | return torch.sigmoid(x)
18 |
19 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
20 | queries = queries.transpose(1, 2)
21 | keys = keys.transpose(1, 2)
22 | values = values.transpose(1, 2)
23 | # kernel
24 | queries = self.kernel_method(queries)
25 | keys = self.kernel_method(keys)
26 | # incoming and outgoing
27 | normalizer_row = 1.0 / (torch.einsum("nhld,nhd->nhl", queries + 1e-6, keys.sum(dim=2) + 1e-6))
28 | normalizer_col = 1.0 / (torch.einsum("nhsd,nhd->nhs", keys + 1e-6, queries.sum(dim=2) + 1e-6))
29 | # reweighting
30 | normalizer_row_refine = (
31 | torch.einsum("nhld,nhd->nhl", queries + 1e-6, (keys * normalizer_col[:, :, :, None]).sum(dim=2) + 1e-6))
32 | normalizer_col_refine = (
33 | torch.einsum("nhsd,nhd->nhs", keys + 1e-6, (queries * normalizer_row[:, :, :, None]).sum(dim=2) + 1e-6))
34 | # competition and allocation
35 | normalizer_row_refine = torch.sigmoid(
36 | normalizer_row_refine * (float(queries.shape[2]) / float(keys.shape[2])))
37 | normalizer_col_refine = torch.softmax(normalizer_col_refine, dim=-1) * keys.shape[2] # B h L vis
38 | # multiply
39 | kv = keys.transpose(-2, -1) @ (values * normalizer_col_refine[:, :, :, None])
40 | x = (((queries @ kv) * normalizer_row[:, :, :, None]) * normalizer_row_refine[:, :, :, None]).transpose(1,
41 | 2).contiguous()
42 | return x, None
43 |
44 |
45 | # Code implementation from https://github.com/shreyansh26/FlashAttention-PyTorch
46 | class FlashAttention(nn.Module):
47 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
48 | super(FlashAttention, self).__init__()
49 | self.scale = scale
50 | self.mask_flag = mask_flag
51 | self.output_attention = output_attention
52 | self.dropout = nn.Dropout(attention_dropout)
53 |
54 | def flash_attention_forward(self, Q, K, V, mask=None):
55 | BLOCK_SIZE = 32
56 | NEG_INF = -1e10 # -infinity
57 | EPSILON = 1e-10
58 | # mask = torch.randint(0, 2, (128, 8)).to(device='cuda')
59 | O = torch.zeros_like(Q, requires_grad=True)
60 | l = torch.zeros(Q.shape[:-1])[..., None]
61 | m = torch.ones(Q.shape[:-1])[..., None] * NEG_INF
62 |
63 | O = O.to(device='cuda')
64 | l = l.to(device='cuda')
65 | m = m.to(device='cuda')
66 |
67 | Q_BLOCK_SIZE = min(BLOCK_SIZE, Q.shape[-1])
68 | KV_BLOCK_SIZE = BLOCK_SIZE
69 |
70 | Q_BLOCKS = torch.split(Q, Q_BLOCK_SIZE, dim=2)
71 | K_BLOCKS = torch.split(K, KV_BLOCK_SIZE, dim=2)
72 | V_BLOCKS = torch.split(V, KV_BLOCK_SIZE, dim=2)
73 | if mask is not None:
74 | mask_BLOCKS = list(torch.split(mask, KV_BLOCK_SIZE, dim=1))
75 |
76 | Tr = len(Q_BLOCKS)
77 | Tc = len(K_BLOCKS)
78 |
79 | O_BLOCKS = list(torch.split(O, Q_BLOCK_SIZE, dim=2))
80 | l_BLOCKS = list(torch.split(l, Q_BLOCK_SIZE, dim=2))
81 | m_BLOCKS = list(torch.split(m, Q_BLOCK_SIZE, dim=2))
82 |
83 | for j in range(Tc):
84 | Kj = K_BLOCKS[j]
85 | Vj = V_BLOCKS[j]
86 | if mask is not None:
87 | maskj = mask_BLOCKS[j]
88 |
89 | for i in range(Tr):
90 | Qi = Q_BLOCKS[i]
91 | Oi = O_BLOCKS[i]
92 | li = l_BLOCKS[i]
93 | mi = m_BLOCKS[i]
94 |
95 | scale = 1 / np.sqrt(Q.shape[-1])
96 | Qi_scaled = Qi * scale
97 |
98 | S_ij = torch.einsum('... i d, ... j d -> ... i j', Qi_scaled, Kj)
99 | if mask is not None:
100 | # Masking
101 | maskj_temp = rearrange(maskj, 'b j -> b 1 1 j')
102 | S_ij = torch.where(maskj_temp > 0, S_ij, NEG_INF)
103 |
104 | m_block_ij, _ = torch.max(S_ij, dim=-1, keepdims=True)
105 | P_ij = torch.exp(S_ij - m_block_ij)
106 | if mask is not None:
107 | # Masking
108 | P_ij = torch.where(maskj_temp > 0, P_ij, 0.)
109 |
110 | l_block_ij = torch.sum(P_ij, dim=-1, keepdims=True) + EPSILON
111 |
112 | P_ij_Vj = torch.einsum('... i j, ... j d -> ... i d', P_ij, Vj)
113 |
114 | mi_new = torch.maximum(m_block_ij, mi)
115 | li_new = torch.exp(mi - mi_new) * li + torch.exp(m_block_ij - mi_new) * l_block_ij
116 |
117 | O_BLOCKS[i] = (li / li_new) * torch.exp(mi - mi_new) * Oi + (
118 | torch.exp(m_block_ij - mi_new) / li_new) * P_ij_Vj
119 | l_BLOCKS[i] = li_new
120 | m_BLOCKS[i] = mi_new
121 |
122 | O = torch.cat(O_BLOCKS, dim=2)
123 | l = torch.cat(l_BLOCKS, dim=2)
124 | m = torch.cat(m_BLOCKS, dim=2)
125 | return O, l, m
126 |
127 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
128 | res = \
129 | self.flash_attention_forward(queries.permute(0, 2, 1, 3), keys.permute(0, 2, 1, 3), values.permute(0, 2, 1, 3),
130 | attn_mask)[0]
131 | return res.permute(0, 2, 1, 3).contiguous(), None
132 |
133 |
134 | class FullAttention(nn.Module):
135 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
136 | super(FullAttention, self).__init__()
137 | self.scale = scale
138 | self.mask_flag = mask_flag
139 | self.output_attention = output_attention
140 | self.dropout = nn.Dropout(attention_dropout)
141 |
142 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
143 | B, L, H, E = queries.shape
144 | _, S, _, D = values.shape
145 | scale = self.scale or 1. / sqrt(E)
146 |
147 | scores = torch.einsum("blhe,bshe->bhls", queries, keys)
148 |
149 | if self.mask_flag:
150 | if attn_mask is None:
151 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
152 |
153 | scores.masked_fill_(attn_mask.mask, -np.inf)
154 |
155 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
156 | V = torch.einsum("bhls,bshd->blhd", A, values)
157 |
158 | if self.output_attention:
159 | return (V.contiguous(), A)
160 | else:
161 | return (V.contiguous(), None)
162 |
163 |
164 | # Code implementation from https://github.com/zhouhaoyi/Informer2020
165 | class ProbAttention(nn.Module):
166 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
167 | super(ProbAttention, self).__init__()
168 | self.factor = factor
169 | self.scale = scale
170 | self.mask_flag = mask_flag
171 | self.output_attention = output_attention
172 | self.dropout = nn.Dropout(attention_dropout)
173 |
174 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
175 | # Q [B, H, L, D]
176 | B, H, L_K, E = K.shape
177 | _, _, L_Q, _ = Q.shape
178 |
179 | # calculate the sampled Q_K
180 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
181 | # real U = U_part(factor*ln(L_k))*L_q
182 | index_sample = torch.randint(L_K, (L_Q, sample_k))
183 | K_sample = K_expand[:, :, torch.arange(
184 | L_Q).unsqueeze(1), index_sample, :]
185 | Q_K_sample = torch.matmul(
186 | Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
187 |
188 | # find the Top_k query with sparisty measurement
189 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
190 | M_top = M.topk(n_top, sorted=False)[1]
191 |
192 | # use the reduced Q to calculate Q_K
193 | Q_reduce = Q[torch.arange(B)[:, None, None],
194 | torch.arange(H)[None, :, None],
195 | M_top, :] # factor*ln(L_q)
196 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
197 |
198 | return Q_K, M_top
199 |
200 | def _get_initial_context(self, V, L_Q):
201 | B, H, L_V, D = V.shape
202 | if not self.mask_flag:
203 | # V_sum = V.sum(dim=-2)
204 | V_sum = V.mean(dim=-2)
205 | contex = V_sum.unsqueeze(-2).expand(B, H,
206 | L_Q, V_sum.shape[-1]).clone()
207 | else: # use mask
208 | # requires that L_Q == L_V, i.e. for self-attention only
209 | assert (L_Q == L_V)
210 | contex = V.cumsum(dim=-2)
211 | return contex
212 |
213 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
214 | B, H, L_V, D = V.shape
215 |
216 | if self.mask_flag:
217 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
218 | scores.masked_fill_(attn_mask.mask, -np.inf)
219 |
220 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
221 |
222 | context_in[torch.arange(B)[:, None, None],
223 | torch.arange(H)[None, :, None],
224 | index, :] = torch.matmul(attn, V).type_as(context_in)
225 | if self.output_attention:
226 | attns = (torch.ones([B, H, L_V, L_V]) /
227 | L_V).type_as(attn).to(attn.device)
228 | attns[torch.arange(B)[:, None, None], torch.arange(H)[
229 | None, :, None], index, :] = attn
230 | return (context_in, attns)
231 | else:
232 | return (context_in, None)
233 |
234 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
235 | B, L_Q, H, D = queries.shape
236 | _, L_K, _, _ = keys.shape
237 |
238 | queries = queries.transpose(2, 1)
239 | keys = keys.transpose(2, 1)
240 | values = values.transpose(2, 1)
241 |
242 | U_part = self.factor * \
243 | np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
244 | u = self.factor * \
245 | np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
246 |
247 | U_part = U_part if U_part < L_K else L_K
248 | u = u if u < L_Q else L_Q
249 |
250 | scores_top, index = self._prob_QK(
251 | queries, keys, sample_k=U_part, n_top=u)
252 |
253 | # add scale factor
254 | scale = self.scale or 1. / sqrt(D)
255 | if scale is not None:
256 | scores_top = scores_top * scale
257 | # get the context
258 | context = self._get_initial_context(values, L_Q)
259 | # update the context with selected top_k queries
260 | context, attn = self._update_context(
261 | context, values, scores_top, index, L_Q, attn_mask)
262 |
263 | return context.contiguous(), attn
264 |
265 |
266 | class AttentionLayer(nn.Module):
267 | def __init__(self, attention, d_model, n_heads, d_keys=None,
268 | d_values=None):
269 | super(AttentionLayer, self).__init__()
270 |
271 | d_keys = d_keys or (d_model // n_heads)
272 | d_values = d_values or (d_model // n_heads)
273 |
274 | self.inner_attention = attention
275 | self.query_projection = nn.Linear(d_model, d_keys * n_heads)
276 | self.key_projection = nn.Linear(d_model, d_keys * n_heads)
277 | self.value_projection = nn.Linear(d_model, d_values * n_heads)
278 | self.out_projection = nn.Linear(d_values * n_heads, d_model)
279 | self.n_heads = n_heads
280 |
281 | def forward(self, queries, keys, values, attn_mask, tau=None, delta=None):
282 | B, L, _ = queries.shape
283 | _, S, _ = keys.shape
284 | H = self.n_heads
285 |
286 | queries = self.query_projection(queries).view(B, L, H, -1)
287 | keys = self.key_projection(keys).view(B, S, H, -1)
288 | values = self.value_projection(values).view(B, S, H, -1)
289 |
290 | out, attn = self.inner_attention(
291 | queries,
292 | keys,
293 | values,
294 | attn_mask,
295 | tau=tau,
296 | delta=delta
297 | )
298 | out = out.view(B, L, -1)
299 |
300 | return self.out_projection(out), attn
301 |
302 |
303 | class ReformerLayer(nn.Module):
304 | def __init__(self, attention, d_model, n_heads, d_keys=None,
305 | d_values=None, causal=False, bucket_size=4, n_hashes=4):
306 | super().__init__()
307 | self.bucket_size = bucket_size
308 | self.attn = LSHSelfAttention(
309 | dim=d_model,
310 | heads=n_heads,
311 | bucket_size=bucket_size,
312 | n_hashes=n_hashes,
313 | causal=causal
314 | )
315 |
316 | def fit_length(self, queries):
317 | # inside reformer: assert N % (bucket_size * 2) == 0
318 | B, N, C = queries.shape
319 | if N % (self.bucket_size * 2) == 0:
320 | return queries
321 | else:
322 | # fill the time series
323 | fill_len = (self.bucket_size * 2) - (N % (self.bucket_size * 2))
324 | return torch.cat([queries, torch.zeros([B, fill_len, C]).to(queries.device)], dim=1)
325 |
326 | def forward(self, queries, keys, values, attn_mask, tau, delta):
327 | # in Reformer: defalut queries=keys
328 | B, N, C = queries.shape
329 | queries = self.attn(self.fit_length(queries))[:, :N, :]
330 | return queries, None
331 |
332 |
--------------------------------------------------------------------------------
/exp/exp_long_term_forecasting.py:
--------------------------------------------------------------------------------
1 | from torch.optim import lr_scheduler
2 |
3 | from data_provider.data_factory import data_provider
4 | from exp.exp_basic import Exp_Basic
5 | from utils.tools import EarlyStopping, adjust_learning_rate, visual, save_to_csv, visual_weights
6 | from utils.metrics import metric
7 | import torch
8 | import torch.nn as nn
9 | from torch import optim
10 | import os
11 | import time
12 | import warnings
13 | import numpy as np
14 |
15 | warnings.filterwarnings('ignore')
16 |
17 |
18 | class Exp_Long_Term_Forecast(Exp_Basic):
19 | def __init__(self, args):
20 | super(Exp_Long_Term_Forecast, self).__init__(args)
21 |
22 | def _build_model(self):
23 | model = self.model_dict[self.args.model].Model(self.args).float()
24 |
25 | if self.args.use_multi_gpu and self.args.use_gpu:
26 | model = nn.DataParallel(model, device_ids=self.args.device_ids)
27 | return model
28 |
29 | def _get_data(self, flag):
30 | data_set, data_loader = data_provider(self.args, flag)
31 | return data_set, data_loader
32 |
33 | def _select_optimizer(self):
34 | model_optim = optim.Adam(self.model.parameters(), lr=self.args.learning_rate)
35 | return model_optim
36 |
37 | def _select_criterion(self):
38 | if self.args.data == 'PEMS':
39 | criterion = nn.L1Loss()
40 | else:
41 | criterion = nn.MSELoss()
42 | return criterion
43 |
44 | def vali(self, vali_data, vali_loader, criterion):
45 | total_loss = []
46 | self.model.eval()
47 | with torch.no_grad():
48 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(vali_loader):
49 | batch_x = batch_x.float().to(self.device)
50 | batch_y = batch_y.float().to(self.device)
51 |
52 | batch_x_mark = batch_x_mark.float().to(self.device)
53 | batch_y_mark = batch_y_mark.float().to(self.device)
54 |
55 | if 'PEMS' == self.args.data or 'Solar' == self.args.data:
56 | batch_x_mark = None
57 | batch_y_mark = None
58 |
59 | if self.args.down_sampling_layers == 0:
60 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
61 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
62 | else:
63 | dec_inp = None
64 |
65 | # encoder - decoder
66 | if self.args.use_amp:
67 | with torch.cuda.amp.autocast():
68 | if self.args.output_attention:
69 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
70 | else:
71 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
72 | else:
73 | if self.args.output_attention:
74 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
75 | else:
76 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
77 | f_dim = -1 if self.args.features == 'MS' else 0
78 |
79 | pred = outputs.detach()
80 | true = batch_y.detach()
81 |
82 | if self.args.data == 'PEMS':
83 | B, T, C = pred.shape
84 | pred = pred.cpu().numpy()
85 | true = true.cpu().numpy()
86 | pred = vali_data.inverse_transform(pred.reshape(-1, C)).reshape(B, T, C)
87 | true = vali_data.inverse_transform(true.reshape(-1, C)).reshape(B, T, C)
88 | mae, mse, rmse, mape, mspe = metric(pred, true)
89 | total_loss.append(mae)
90 |
91 | else:
92 | loss = criterion(pred, true)
93 | total_loss.append(loss.item())
94 |
95 | total_loss = np.average(total_loss)
96 | self.model.train()
97 | return total_loss
98 |
99 | def train(self, setting):
100 | train_data, train_loader = self._get_data(flag='train')
101 | vali_data, vali_loader = self._get_data(flag='val')
102 | test_data, test_loader = self._get_data(flag='test')
103 |
104 | path = os.path.join(self.args.checkpoints, setting)
105 | if not os.path.exists(path):
106 | os.makedirs(path)
107 |
108 | time_now = time.time()
109 |
110 | train_steps = len(train_loader)
111 | early_stopping = EarlyStopping(patience=self.args.patience, verbose=True)
112 |
113 | model_optim = self._select_optimizer()
114 | criterion = self._select_criterion()
115 |
116 | scheduler = lr_scheduler.OneCycleLR(optimizer=model_optim,
117 | steps_per_epoch=train_steps,
118 | pct_start=self.args.pct_start,
119 | epochs=self.args.train_epochs,
120 | max_lr=self.args.learning_rate)
121 |
122 | if self.args.use_amp:
123 | scaler = torch.cuda.amp.GradScaler()
124 |
125 | for epoch in range(self.args.train_epochs):
126 | iter_count = 0
127 | train_loss = []
128 |
129 | self.model.train()
130 | epoch_time = time.time()
131 |
132 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(train_loader):
133 | iter_count += 1
134 | model_optim.zero_grad()
135 |
136 | batch_x = batch_x.float().to(self.device)
137 | batch_y = batch_y.float().to(self.device)
138 |
139 | batch_x_mark = batch_x_mark.float().to(self.device)
140 | batch_y_mark = batch_y_mark.float().to(self.device)
141 |
142 | if 'PEMS' == self.args.data or 'Solar' == self.args.data:
143 | batch_x_mark = None
144 | batch_y_mark = None
145 |
146 | if self.args.down_sampling_layers == 0:
147 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
148 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
149 | else:
150 | dec_inp = None
151 |
152 | # encoder - decoder
153 | if self.args.use_amp:
154 | with torch.cuda.amp.autocast():
155 | if self.args.output_attention:
156 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
157 | else:
158 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
159 |
160 | f_dim = -1 if self.args.features == 'MS' else 0
161 | outputs = outputs[:, -self.args.pred_len:, f_dim:]
162 | batch_y = batch_y[:, -self.args.pred_len:, f_dim:].to(self.device)
163 | loss = criterion(outputs, batch_y)
164 | train_loss.append(loss.item())
165 | else:
166 | if self.args.output_attention:
167 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
168 | else:
169 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
170 |
171 | f_dim = -1 if self.args.features == 'MS' else 0
172 |
173 | loss = criterion(outputs, batch_y)
174 | train_loss.append(loss.item())
175 |
176 | if (i + 1) % 100 == 0:
177 | print("\titers: {0}, epoch: {1} | loss: {2:.7f}".format(i + 1, epoch + 1, loss.item()))
178 | speed = (time.time() - time_now) / iter_count
179 | left_time = speed * ((self.args.train_epochs - epoch) * train_steps - i)
180 | print('\tspeed: {:.4f}s/iter; left time: {:.4f}s'.format(speed, left_time))
181 | iter_count = 0
182 | time_now = time.time()
183 |
184 | if self.args.use_amp:
185 | scaler.scale(loss).backward()
186 | scaler.step(model_optim)
187 | scaler.update()
188 | else:
189 | loss.backward()
190 | model_optim.step()
191 |
192 | if self.args.lradj == 'TST':
193 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=False)
194 | scheduler.step()
195 |
196 | print("Epoch: {} cost time: {}".format(epoch + 1, time.time() - epoch_time))
197 | train_loss = np.average(train_loss)
198 | vali_loss = self.vali(vali_data, vali_loader, criterion)
199 | test_loss = self.vali(test_data, test_loader, criterion)
200 |
201 | print("Epoch: {0}, Steps: {1} | Train Loss: {2:.7f} Vali Loss: {3:.7f} Test Loss: {4:.7f}".format(
202 | epoch + 1, train_steps, train_loss, vali_loss, test_loss))
203 | early_stopping(vali_loss, self.model, path)
204 | if early_stopping.early_stop:
205 | print("Early stopping")
206 | break
207 |
208 | if self.args.lradj != 'TST':
209 | adjust_learning_rate(model_optim, scheduler, epoch + 1, self.args, printout=True)
210 | else:
211 | print('Updating learning rate to {}'.format(scheduler.get_last_lr()[0]))
212 |
213 | best_model_path = path + '/' + 'checkpoint.pth'
214 | self.model.load_state_dict(torch.load(best_model_path))
215 |
216 | return self.model
217 |
218 | def test(self, setting, test=0):
219 | test_data, test_loader = self._get_data(flag='test')
220 | if test:
221 | print('loading model')
222 | self.model.load_state_dict(torch.load(os.path.join('./checkpoints/' + setting, 'checkpoint.pth')))
223 |
224 | checkpoints_path = './checkpoints/' + setting + '/'
225 | preds = []
226 | trues = []
227 | folder_path = './test_results/' + setting + '/'
228 | if not os.path.exists(folder_path):
229 | os.makedirs(folder_path)
230 |
231 | self.model.eval()
232 | with torch.no_grad():
233 | for i, (batch_x, batch_y, batch_x_mark, batch_y_mark) in enumerate(test_loader):
234 | batch_x = batch_x.float().to(self.device)
235 | batch_y = batch_y.float().to(self.device)
236 |
237 | batch_x_mark = batch_x_mark.float().to(self.device)
238 | batch_y_mark = batch_y_mark.float().to(self.device)
239 |
240 | if 'PEMS' == self.args.data or 'Solar' == self.args.data:
241 | batch_x_mark = None
242 | batch_y_mark = None
243 |
244 | if self.args.down_sampling_layers == 0:
245 | dec_inp = torch.zeros_like(batch_y[:, -self.args.pred_len:, :]).float()
246 | dec_inp = torch.cat([batch_y[:, :self.args.label_len, :], dec_inp], dim=1).float().to(self.device)
247 | else:
248 | dec_inp = None
249 |
250 | # encoder - decoder
251 | if self.args.use_amp:
252 | with torch.cuda.amp.autocast():
253 | if self.args.output_attention:
254 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
255 | else:
256 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
257 | else:
258 | if self.args.output_attention:
259 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)[0]
260 |
261 | else:
262 | outputs = self.model(batch_x, batch_x_mark, dec_inp, batch_y_mark)
263 |
264 | f_dim = -1 if self.args.features == 'MS' else 0
265 |
266 | outputs = outputs.detach().cpu().numpy()
267 | batch_y = batch_y.detach().cpu().numpy()
268 |
269 | pred = outputs
270 | true = batch_y
271 |
272 | preds.append(pred)
273 | trues.append(true)
274 | if i % 20 == 0:
275 | input = batch_x.detach().cpu().numpy()
276 | if test_data.scale and self.args.inverse:
277 | shape = input.shape
278 | input = test_data.inverse_transform(input.squeeze(0)).reshape(shape)
279 | gt = np.concatenate((input[0, :, -1], true[0, :, -1]), axis=0)
280 | pd = np.concatenate((input[0, :, -1], pred[0, :, -1]), axis=0)
281 | visual(gt, pd, os.path.join(folder_path, str(i) + '.pdf'))
282 |
283 | preds = np.array(preds)
284 | trues = np.array(trues)
285 | print('test shape:', preds.shape, trues.shape)
286 | preds = preds.reshape(-1, preds.shape[-2], preds.shape[-1])
287 | trues = trues.reshape(-1, trues.shape[-2], trues.shape[-1])
288 | print('test shape:', preds.shape, trues.shape)
289 |
290 | if self.args.data == 'PEMS':
291 | B, T, C = preds.shape
292 | preds = test_data.inverse_transform(preds.reshape(-1, C)).reshape(B, T, C)
293 | trues = test_data.inverse_transform(trues.reshape(-1, C)).reshape(B, T, C)
294 |
295 | # result save
296 | folder_path = './results/' + setting + '/'
297 | if not os.path.exists(folder_path):
298 | os.makedirs(folder_path)
299 |
300 | mae, mse, rmse, mape, mspe = metric(preds, trues)
301 | print('mse:{}, mae:{}'.format(mse, mae))
302 | print('rmse:{}, mape:{}, mspe:{}'.format(rmse, mape, mspe))
303 |
304 | f = open("result_long_term_forecast.txt", 'a')
305 | f.write(setting + " \n")
306 | if self.args.data == 'PEMS':
307 | f.write('mae:{}, mape:{}, rmse:{}'.format(mae, mape, rmse))
308 | else:
309 | f.write('mse:{}, mae:{}'.format(mse, mae))
310 | f.write('\n')
311 | f.write('\n')
312 | f.close()
313 |
314 | np.save(folder_path + 'metrics.npy', np.array([mae, mse, rmse, mape, mspe]))
315 | np.save(folder_path + 'pred.npy', preds)
316 | np.save(folder_path + 'true.npy', trues)
317 | return
318 |
--------------------------------------------------------------------------------