├── .gitignore ├── .ipynb_checkpoints └── data_proc_pems-checkpoint.ipynb ├── Formers ├── Autoformer.sh ├── FEDformer │ ├── LICENSE │ ├── README.md │ ├── data_provider │ │ ├── data_factory.py │ │ └── data_loader.py │ ├── exp │ │ ├── exp_basic.py │ │ └── exp_main.py │ ├── layers │ │ ├── AutoCorrelation.py │ │ ├── Autoformer_EncDec.py │ │ ├── Embed.py │ │ ├── FourierCorrelation.py │ │ ├── MultiWaveletCorrelation.py │ │ ├── SelfAttention_Family.py │ │ ├── Transformer_EncDec.py │ │ └── utils.py │ ├── models │ │ ├── Autoformer.py │ │ ├── FEDformer.py │ │ ├── Informer.py │ │ └── Transformer.py │ ├── run.py │ ├── scripts │ │ ├── LongForecasting.sh │ │ └── LookBackWindow.sh │ └── utils │ │ ├── masking.py │ │ ├── metrics.py │ │ ├── timefeatures.py │ │ └── tools.py ├── Informer.sh ├── Pyraformer │ ├── LEGAL.md │ ├── LICENSE │ ├── README.md │ ├── data_loader.py │ ├── long_range_main.py │ ├── preprocess_elect.py │ ├── preprocess_flow.py │ ├── preprocess_wind.py │ ├── pyraformer │ │ ├── Layers.py │ │ ├── Modules.py │ │ ├── PAM_TVM.py │ │ ├── Pyraformer_LR.py │ │ ├── Pyraformer_SS.py │ │ ├── SubLayers.py │ │ ├── embed.py │ │ ├── graph_attention.py │ │ ├── hierarchical_mm_tvm.py │ │ └── lib │ │ │ └── lib_hierarchical_mm_float32_cuda.so │ ├── requirements.txt │ ├── scripts │ │ ├── LongForecasting.sh │ │ └── LookBackWindow.sh │ ├── simulate_sin.py │ ├── single_step_main.py │ └── utils │ │ ├── timefeatures.py │ │ └── tools.py └── Transformer.sh ├── LICENSE ├── README.md ├── data_proc_pems.ipynb ├── data_provider ├── data_factory.py └── data_loader.py ├── exp ├── exp_basic.py ├── exp_main.py └── exp_main_JTFT.py ├── layers ├── AutoCorrelation.py ├── Autoformer_EncDec.py ├── Embed.py ├── FreqTST_backbone.py ├── PatchTST_backbone.py ├── PatchTST_layers.py ├── RevIN.py ├── SelfAttention_Family.py └── Transformer_EncDec.py ├── models ├── Autoformer.py ├── DLinear.py ├── Informer.py ├── JTFT.py ├── Linear.py ├── NLinear.py ├── PatchTST.py ├── Stat_models.py └── Transformer.py ├── requirements.txt ├── run_fd_analysis.py ├── run_longExp.py ├── scripts ├── electricity_m_gpu.sh ├── ettm2.sh ├── ettm2_m_gpu.sh ├── exchange.sh ├── illness.sh ├── pems04_m_gpu.sh ├── pems08_m_gpu.sh ├── traffic_m_gpu.sh └── weather.sh └── utils ├── masking.py ├── metrics.py ├── timefeatures.py └── tools.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.log 2 | *.pyc 3 | .vs/ 4 | dataset/ 5 | results/ 6 | pic/ 7 | checkpoints/ 8 | test_results/ 9 | result.txt 10 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/data_proc_pems-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 29, 6 | "id": "c2acb6ed", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import pandas as pd\n", 13 | "import datetime" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 38, 19 | "id": "aff427f7", 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "(16992, 307, 3)\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "data04=np.load(r'PEMS04.npz')\n", 32 | "print(data04['data'].shape)\n", 33 | "n_t,n_port,n_chan=data04['data'].shape\n", 34 | "df=pd.DataFrame(data04['data'][:,:,0])\n", 35 | "cols=[]\n", 36 | "for idx in range(n_port-1):\n", 37 | " cols.append(str(idx))\n", 38 | "cols.append('OT')\n", 39 | "cols=[]\n", 40 | "for idx in range(n_port-1):\n", 41 | " cols.append(str(idx))\n", 42 | "cols.append('OT')\n", 43 | "df.columns=cols\n", 44 | "df.insert(0,'date',r\"2020/1/1 0:10:00\")\n", 45 | "df.to_csv(\"PEMS04Flow.csv\",index=None)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 39, 51 | "id": "49811f66", 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "(17856, 170, 3)\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "data08=np.load(r'PEMS08.npz')\n", 64 | "print(data08['data'].shape)\n", 65 | "n_t,n_port,n_chan=data08['data'].shape\n", 66 | "df=pd.DataFrame(data08['data'][:,:,0])\n", 67 | "cols=[]\n", 68 | "for idx in range(n_port-1):\n", 69 | " cols.append(str(idx))\n", 70 | "cols.append('OT')\n", 71 | "cols=[]\n", 72 | "for idx in range(n_port-1):\n", 73 | " cols.append(str(idx))\n", 74 | "cols.append('OT')\n", 75 | "df.columns=cols\n", 76 | "df.insert(0,'date',r\"2020/1/1 0:10:00\")\n", 77 | "df.to_csv(\"PEMS08Flow.csv\",index=None)" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Python 3 (ipykernel)", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.9.12" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 5 102 | } 103 | -------------------------------------------------------------------------------- /Formers/Autoformer.sh: -------------------------------------------------------------------------------- 1 | # ALL scripts in this file come from Autoformer 2 | if [ ! -d "./logs" ]; then 3 | mkdir ./logs 4 | fi 5 | 6 | if [ ! -d "./logs/LongForecasting" ]; then 7 | mkdir ./logs/LongForecasting 8 | fi 9 | 10 | random_seed=2021 11 | model_name=Autoformer 12 | 13 | for pred_len in 96 192 336 720 14 | do 15 | python -u run_longExp.py \ 16 | --random_seed $random_seed \ 17 | --is_training 1 \ 18 | --root_path ./dataset/ \ 19 | --data_path exchange_rate.csv \ 20 | --model_id exchange_96_$pred_len \ 21 | --model $model_name \ 22 | --data custom \ 23 | --features M \ 24 | --seq_len 96 \ 25 | --label_len 48 \ 26 | --pred_len $pred_len \ 27 | --e_layers 2 \ 28 | --d_layers 1 \ 29 | --factor 3 \ 30 | --enc_in 8 \ 31 | --dec_in 8 \ 32 | --c_out 8 \ 33 | --des 'Exp' \ 34 | --itr 1 \ 35 | --train_epochs 1 >logs/LongForecasting/$model_name'_exchange_rate_'$pred_len.log 36 | 37 | python -u run_longExp.py \ 38 | --random_seed $random_seed \ 39 | --is_training 1 \ 40 | --root_path ./dataset/ \ 41 | --data_path electricity.csv \ 42 | --model_id electricity_96_$pred_len \ 43 | --model $model_name \ 44 | --data custom \ 45 | --features M \ 46 | --seq_len 96 \ 47 | --label_len 48 \ 48 | --pred_len $pred_len \ 49 | --e_layers 2 \ 50 | --d_layers 1 \ 51 | --factor 3 \ 52 | --enc_in 321 \ 53 | --dec_in 321 \ 54 | --c_out 321 \ 55 | --des 'Exp' \ 56 | --itr 1 >logs/LongForecasting/$model_name'_electricity_'$pred_len.log 57 | 58 | python -u run_longExp.py \ 59 | --random_seed $random_seed \ 60 | --is_training 1 \ 61 | --root_path ./dataset/ \ 62 | --data_path traffic.csv \ 63 | --model_id traffic_96_$pred_len \ 64 | --model $model_name \ 65 | --data custom \ 66 | --features M \ 67 | --seq_len 96 \ 68 | --label_len 48 \ 69 | --pred_len $pred_len \ 70 | --e_layers 2 \ 71 | --d_layers 1 \ 72 | --factor 3 \ 73 | --enc_in 862 \ 74 | --dec_in 862 \ 75 | --c_out 862 \ 76 | --des 'Exp' \ 77 | --itr 1 \ 78 | --train_epochs 3 >logs/LongForecasting/$model_name'_traffic_'$pred_len.log 79 | 80 | python -u run_longExp.py \ 81 | --random_seed $random_seed \ 82 | --is_training 1 \ 83 | --root_path ./dataset/ \ 84 | --data_path weather.csv \ 85 | --model_id weather_96_$pred_len \ 86 | --model $model_name \ 87 | --data custom \ 88 | --features M \ 89 | --seq_len 96 \ 90 | --label_len 48 \ 91 | --pred_len $pred_len \ 92 | --e_layers 2 \ 93 | --d_layers 1 \ 94 | --factor 3 \ 95 | --enc_in 21 \ 96 | --dec_in 21 \ 97 | --c_out 21 \ 98 | --des 'Exp' \ 99 | --itr 1 \ 100 | --train_epochs 2 >logs/LongForecasting/$model_name'_weather_'$pred_len.log 101 | 102 | python -u run_longExp.py \ 103 | --random_seed $random_seed \ 104 | --is_training 1 \ 105 | --root_path ./dataset/ \ 106 | --data_path ETTh1.csv \ 107 | --model_id ETTh1_96_$pred_len \ 108 | --model $model_name \ 109 | --data ETTh1 \ 110 | --features M \ 111 | --seq_len 96 \ 112 | --label_len 48 \ 113 | --pred_len $pred_len \ 114 | --e_layers 2 \ 115 | --d_layers 1 \ 116 | --factor 3 \ 117 | --enc_in 7 \ 118 | --dec_in 7 \ 119 | --c_out 7 \ 120 | --des 'Exp' \ 121 | --itr 1 >logs/LongForecasting/$model_name'_Etth1_'$pred_len.log 122 | 123 | python -u run_longExp.py \ 124 | --random_seed $random_seed \ 125 | --is_training 1 \ 126 | --root_path ./dataset/ \ 127 | --data_path ETTh2.csv \ 128 | --model_id ETTh2_96_$pred_len \ 129 | --model $model_name \ 130 | --data ETTh2 \ 131 | --features M \ 132 | --seq_len 96 \ 133 | --label_len 48 \ 134 | --pred_len $pred_len \ 135 | --e_layers 2 \ 136 | --d_layers 1 \ 137 | --factor 3 \ 138 | --enc_in 7 \ 139 | --dec_in 7 \ 140 | --c_out 7 \ 141 | --des 'Exp' \ 142 | --itr 1 >logs/LongForecasting/$model_name'_Etth2_'$pred_len.log 143 | 144 | python -u run_longExp.py \ 145 | --random_seed $random_seed \ 146 | --is_training 1 \ 147 | --root_path ./dataset/ \ 148 | --data_path ETTm1.csv \ 149 | --model_id ETTm1_96_$pred_len \ 150 | --model $model_name \ 151 | --data ETTm1 \ 152 | --features M \ 153 | --seq_len 96 \ 154 | --label_len 48 \ 155 | --pred_len $pred_len \ 156 | --e_layers 2 \ 157 | --d_layers 1 \ 158 | --factor 3 \ 159 | --enc_in 7 \ 160 | --dec_in 7 \ 161 | --c_out 7 \ 162 | --des 'Exp' \ 163 | --itr 1 >logs/LongForecasting/$model_name'_Ettm1_'$pred_len.log 164 | 165 | python -u run_longExp.py \ 166 | --random_seed $random_seed \ 167 | --is_training 1 \ 168 | --root_path ./dataset/ \ 169 | --data_path ETTm2.csv \ 170 | --model_id ETTm2_96_$pred_len \ 171 | --model $model_name \ 172 | --data ETTm2 \ 173 | --features M \ 174 | --seq_len 96 \ 175 | --label_len 48 \ 176 | --pred_len $pred_len \ 177 | --e_layers 2 \ 178 | --d_layers 1 \ 179 | --factor 3 \ 180 | --enc_in 7 \ 181 | --dec_in 7 \ 182 | --c_out 7 \ 183 | --des 'Exp' \ 184 | --itr 1 >logs/LongForecasting/$model_name'_Ettm2_'$pred_len.log 185 | done 186 | 187 | for pred_len in 24 36 48 60 188 | do 189 | python -u run_longExp.py \ 190 | --random_seed $random_seed \ 191 | --is_training 1 \ 192 | --root_path ./dataset/ \ 193 | --data_path national_illness.csv \ 194 | --model_id ili_36_$pred_len \ 195 | --model $model_name \ 196 | --data custom \ 197 | --features M \ 198 | --seq_len 36 \ 199 | --label_len 18 \ 200 | --pred_len $pred_len \ 201 | --e_layers 2 \ 202 | --d_layers 1 \ 203 | --factor 3 \ 204 | --enc_in 7 \ 205 | --dec_in 7 \ 206 | --c_out 7 \ 207 | --des 'Exp' \ 208 | --itr 1 >logs/LongForecasting/$model_name'_ili_'$pred_len.log 209 | done 210 | -------------------------------------------------------------------------------- /Formers/FEDformer/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 xxxx 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Formers/FEDformer/README.md: -------------------------------------------------------------------------------- 1 | # FEDformer 2 | From https://github.com/MAZiqing/FEDformer 3 | 4 | 5 | Frequency Enhanced Decomposed 6 | Transformer (FEDformer) is more efficient than 7 | standard Transformer with a linear complexity 8 | to the sequence length. 9 | 10 | Our empirical studies 11 | with six benchmark datasets show that compared 12 | with state-of-the-art methods, FEDformer can 13 | reduce prediction error by 14.8% and 22.6% 14 | for multivariate and univariate time series, 15 | respectively. 16 | 17 | 18 | ## Get Started 19 | 20 | 1. Install Python 3.6, PyTorch 1.9.0. 21 | 2. Download data. You can obtain all the six benchmarks from xxxx. 22 | 3. Train the model. We provide the experiment scripts of all benchmarks under the folder `./scripts`. You can reproduce the experiment results by: 23 | 24 | ```bash 25 | bash ./scripts/run_M.sh 26 | bash ./scripts/run_S.sh 27 | ``` 28 | 29 | 30 | ## Citation 31 | 32 | If you find this repo useful, please cite our paper. 33 | 34 | ``` 35 | xxxxx 36 | ``` 37 | 38 | ## Contact 39 | 40 | If you have any question or want to use the code, please contact xxx@xxxx . 41 | 42 | ## Acknowledgement 43 | 44 | We appreciate the following github repos a lot for their valuable code base or datasets: 45 | 46 | https://github.com/thuml/Autoformer 47 | 48 | https://github.com/zhouhaoyi/Informer2020 49 | 50 | https://github.com/zhouhaoyi/ETDataset 51 | 52 | https://github.com/laiguokun/multivariate-time-series-data 53 | 54 | -------------------------------------------------------------------------------- /Formers/FEDformer/data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom,Dataset_sin 2 | from torch.utils.data import DataLoader 3 | 4 | data_dict = { 5 | 'ETTh1': Dataset_ETT_hour, 6 | 'ETTh2': Dataset_ETT_hour, 7 | 'ETTm1': Dataset_ETT_minute, 8 | 'ETTm2': Dataset_ETT_minute, 9 | 'custom': Dataset_Custom, 10 | 'sin':Dataset_sin, 11 | } 12 | 13 | 14 | def data_provider(args, flag): 15 | Data = data_dict[args.data] 16 | timeenc = 0 if args.embed != 'timeF' else 1 17 | 18 | if flag == 'test': 19 | shuffle_flag = False 20 | drop_last = True 21 | batch_size = args.batch_size 22 | freq = args.freq 23 | elif flag == 'pred': 24 | shuffle_flag = False 25 | drop_last = False 26 | batch_size = 1 27 | freq = args.detail_freq 28 | Data = Dataset_Pred 29 | else: 30 | shuffle_flag = True 31 | drop_last = True 32 | batch_size = args.batch_size 33 | freq = args.freq 34 | 35 | data_set = Data( 36 | root_path=args.root_path, 37 | data_path=args.data_path, 38 | flag=flag, 39 | size=[args.seq_len, args.label_len, args.pred_len], 40 | features=args.features, 41 | target=args.target, 42 | timeenc=timeenc, 43 | freq=freq 44 | ) 45 | print(flag, len(data_set)) 46 | data_loader = DataLoader( 47 | data_set, 48 | batch_size=batch_size, 49 | shuffle=shuffle_flag, 50 | num_workers=args.num_workers, 51 | drop_last=drop_last) 52 | return data_set, data_loader 53 | -------------------------------------------------------------------------------- /Formers/FEDformer/exp/exp_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | 5 | 6 | class Exp_Basic(object): 7 | def __init__(self, args): 8 | self.args = args 9 | self.device = self._acquire_device() 10 | self.model = self._build_model().to(self.device) 11 | 12 | def _build_model(self): 13 | raise NotImplementedError 14 | return None 15 | 16 | def _acquire_device(self): 17 | if self.args.use_gpu: 18 | os.environ["CUDA_VISIBLE_DEVICES"] = str( 19 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices 20 | device = torch.device('cuda:{}'.format(self.args.gpu)) 21 | print('Use GPU: cuda:{}'.format(self.args.gpu)) 22 | else: 23 | device = torch.device('cpu') 24 | print('Use CPU') 25 | return device 26 | 27 | def _get_data(self): 28 | pass 29 | 30 | def vali(self): 31 | pass 32 | 33 | def train(self): 34 | pass 35 | 36 | def test(self): 37 | pass 38 | -------------------------------------------------------------------------------- /Formers/FEDformer/layers/FourierCorrelation.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # author=maziqing 3 | # email=maziqing.mzq@alibaba-inc.com 4 | 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | 9 | 10 | def get_frequency_modes(seq_len, modes=64, mode_select_method='random'): 11 | """ 12 | get modes on frequency domain: 13 | 'random' means sampling randomly; 14 | 'else' means sampling the lowest modes; 15 | """ 16 | modes = min(modes, seq_len//2) 17 | if mode_select_method == 'random': 18 | index = list(range(0, seq_len // 2)) 19 | np.random.shuffle(index) 20 | index = index[:modes] 21 | else: 22 | index = list(range(0, modes)) 23 | index.sort() 24 | return index 25 | 26 | 27 | # ########## fourier layer ############# 28 | class FourierBlock(nn.Module): 29 | def __init__(self, in_channels, out_channels, seq_len, modes=0, mode_select_method='random'): 30 | super(FourierBlock, self).__init__() 31 | print('fourier enhanced block used!') 32 | """ 33 | 1D Fourier block. It performs representation learning on frequency domain, 34 | it does FFT, linear transform, and Inverse FFT. 35 | """ 36 | # get modes on frequency domain 37 | self.index = get_frequency_modes(seq_len, modes=modes, mode_select_method=mode_select_method) 38 | print('modes={}, index={}'.format(modes, self.index)) 39 | 40 | self.scale = (1 / (in_channels * out_channels)) 41 | self.weights1 = nn.Parameter( 42 | self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index), dtype=torch.cfloat)) 43 | 44 | # Complex multiplication 45 | def compl_mul1d(self, input, weights): 46 | # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x) 47 | return torch.einsum("bhi,hio->bho", input, weights) 48 | 49 | def forward(self, q, k, v, mask): 50 | # size = [B, L, H, E] 51 | B, L, H, E = q.shape 52 | x = q.permute(0, 2, 3, 1) 53 | # Compute Fourier coefficients 54 | x_ft = torch.fft.rfft(x, dim=-1) 55 | # Perform Fourier neural operations 56 | out_ft = torch.zeros(B, H, E, L // 2 + 1, device=x.device, dtype=torch.cfloat) 57 | for wi, i in enumerate(self.index): 58 | if i >= x_ft.shape[3] or wi >= out_ft.shape[3]: 59 | continue 60 | out_ft[:, :, :, wi] = self.compl_mul1d(x_ft[:, :, :, i], self.weights1[:, :, :, wi]) 61 | # Return to time domain 62 | x = torch.fft.irfft(out_ft, n=x.size(-1)) 63 | return (x, None) 64 | 65 | 66 | # ########## Fourier Cross Former #################### 67 | class FourierCrossAttention(nn.Module): 68 | def __init__(self, in_channels, out_channels, seq_len_q, seq_len_kv, modes=64, mode_select_method='random', 69 | activation='tanh', policy=0): 70 | super(FourierCrossAttention, self).__init__() 71 | print(' fourier enhanced cross attention used!') 72 | """ 73 | 1D Fourier Cross Attention layer. It does FFT, linear transform, attention mechanism and Inverse FFT. 74 | """ 75 | self.activation = activation 76 | self.in_channels = in_channels 77 | self.out_channels = out_channels 78 | # get modes for queries and keys (& values) on frequency domain 79 | self.index_q = get_frequency_modes(seq_len_q, modes=modes, mode_select_method=mode_select_method) 80 | self.index_kv = get_frequency_modes(seq_len_kv, modes=modes, mode_select_method=mode_select_method) 81 | 82 | print('modes_q={}, index_q={}'.format(len(self.index_q), self.index_q)) 83 | print('modes_kv={}, index_kv={}'.format(len(self.index_kv), self.index_kv)) 84 | 85 | self.scale = (1 / (in_channels * out_channels)) 86 | self.weights1 = nn.Parameter( 87 | self.scale * torch.rand(8, in_channels // 8, out_channels // 8, len(self.index_q), dtype=torch.cfloat)) 88 | 89 | # Complex multiplication 90 | def compl_mul1d(self, input, weights): 91 | # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x) 92 | return torch.einsum("bhi,hio->bho", input, weights) 93 | 94 | def forward(self, q, k, v, mask): 95 | # size = [B, L, H, E] 96 | B, L, H, E = q.shape 97 | xq = q.permute(0, 2, 3, 1) # size = [B, H, E, L] 98 | xk = k.permute(0, 2, 3, 1) 99 | xv = v.permute(0, 2, 3, 1) 100 | 101 | # Compute Fourier coefficients 102 | xq_ft_ = torch.zeros(B, H, E, len(self.index_q), device=xq.device, dtype=torch.cfloat) 103 | xq_ft = torch.fft.rfft(xq, dim=-1) 104 | for i, j in enumerate(self.index_q): 105 | if j >= xq_ft.shape[3]: 106 | continue 107 | xq_ft_[:, :, :, i] = xq_ft[:, :, :, j] 108 | xk_ft_ = torch.zeros(B, H, E, len(self.index_kv), device=xq.device, dtype=torch.cfloat) 109 | xk_ft = torch.fft.rfft(xk, dim=-1) 110 | for i, j in enumerate(self.index_kv): 111 | if j >= xk_ft.shape[3]: 112 | continue 113 | xk_ft_[:, :, :, i] = xk_ft[:, :, :, j] 114 | 115 | # perform attention mechanism on frequency domain 116 | xqk_ft = (torch.einsum("bhex,bhey->bhxy", xq_ft_, xk_ft_)) 117 | if self.activation == 'tanh': 118 | xqk_ft = xqk_ft.tanh() 119 | elif self.activation == 'softmax': 120 | xqk_ft = torch.softmax(abs(xqk_ft), dim=-1) 121 | xqk_ft = torch.complex(xqk_ft, torch.zeros_like(xqk_ft)) 122 | else: 123 | raise Exception('{} actiation function is not implemented'.format(self.activation)) 124 | xqkv_ft = torch.einsum("bhxy,bhey->bhex", xqk_ft, xk_ft_) 125 | xqkvw = torch.einsum("bhex,heox->bhox", xqkv_ft, self.weights1) 126 | out_ft = torch.zeros(B, H, E, L // 2 + 1, device=xq.device, dtype=torch.cfloat) 127 | for i, j in enumerate(self.index_q): 128 | if i >= xqkvw.shape[3] or j >= out_ft.shape[3]: 129 | continue 130 | out_ft[:, :, :, j] = xqkvw[:, :, :, i] 131 | # Return to time domain 132 | out = torch.fft.irfft(out_ft / self.in_channels / self.out_channels, n=xq.size(-1)) 133 | return (out, None) 134 | 135 | 136 | 137 | 138 | -------------------------------------------------------------------------------- /Formers/FEDformer/layers/Transformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ConvLayer(nn.Module): 7 | def __init__(self, c_in): 8 | super(ConvLayer, self).__init__() 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=2, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1, 2) 24 | return x 25 | 26 | 27 | class EncoderLayer(nn.Module): 28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 29 | super(EncoderLayer, self).__init__() 30 | d_ff = d_ff or 4 * d_model 31 | self.attention = attention 32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 34 | self.norm1 = nn.LayerNorm(d_model) 35 | self.norm2 = nn.LayerNorm(d_model) 36 | self.dropout = nn.Dropout(dropout) 37 | self.activation = F.relu if activation == "relu" else F.gelu 38 | 39 | def forward(self, x, attn_mask=None): 40 | new_x, attn = self.attention( 41 | x, x, x, 42 | attn_mask=attn_mask 43 | ) 44 | x = x + self.dropout(new_x) 45 | 46 | y = x = self.norm1(x) 47 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 48 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 49 | 50 | return self.norm2(x + y), attn 51 | 52 | 53 | class Encoder(nn.Module): 54 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 55 | super(Encoder, self).__init__() 56 | self.attn_layers = nn.ModuleList(attn_layers) 57 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 58 | self.norm = norm_layer 59 | 60 | def forward(self, x, attn_mask=None): 61 | # x [B, L, D] 62 | attns = [] 63 | if self.conv_layers is not None: 64 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 65 | x, attn = attn_layer(x, attn_mask=attn_mask) 66 | x = conv_layer(x) 67 | attns.append(attn) 68 | x, attn = self.attn_layers[-1](x) 69 | attns.append(attn) 70 | else: 71 | for attn_layer in self.attn_layers: 72 | x, attn = attn_layer(x, attn_mask=attn_mask) 73 | attns.append(attn) 74 | 75 | if self.norm is not None: 76 | x = self.norm(x) 77 | 78 | return x, attns 79 | 80 | 81 | class DecoderLayer(nn.Module): 82 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 83 | dropout=0.1, activation="relu"): 84 | super(DecoderLayer, self).__init__() 85 | d_ff = d_ff or 4 * d_model 86 | self.self_attention = self_attention 87 | self.cross_attention = cross_attention 88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 90 | self.norm1 = nn.LayerNorm(d_model) 91 | self.norm2 = nn.LayerNorm(d_model) 92 | self.norm3 = nn.LayerNorm(d_model) 93 | self.dropout = nn.Dropout(dropout) 94 | self.activation = F.relu if activation == "relu" else F.gelu 95 | 96 | def forward(self, x, cross, x_mask=None, cross_mask=None): 97 | x = x + self.dropout(self.self_attention( 98 | x, x, x, 99 | attn_mask=x_mask 100 | )[0]) 101 | x = self.norm1(x) 102 | 103 | x = x + self.dropout(self.cross_attention( 104 | x, cross, cross, 105 | attn_mask=cross_mask 106 | )[0]) 107 | 108 | y = x = self.norm2(x) 109 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 110 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 111 | 112 | return self.norm3(x + y) 113 | 114 | 115 | class Decoder(nn.Module): 116 | def __init__(self, layers, norm_layer=None, projection=None): 117 | super(Decoder, self).__init__() 118 | self.layers = nn.ModuleList(layers) 119 | self.norm = norm_layer 120 | self.projection = projection 121 | 122 | def forward(self, x, cross, x_mask=None, cross_mask=None): 123 | for layer in self.layers: 124 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 125 | 126 | if self.norm is not None: 127 | x = self.norm(x) 128 | 129 | if self.projection is not None: 130 | x = self.projection(x) 131 | return x 132 | -------------------------------------------------------------------------------- /Formers/FEDformer/models/Autoformer.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | # author=maziqing 3 | # email=maziqing.mzq@alibaba-inc.com 4 | 5 | 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos 10 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer 11 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp 12 | 13 | 14 | class Model(nn.Module): 15 | """ 16 | Autoformer is the first method to achieve the series-wise connection, 17 | with inherent O(LlogL) complexity 18 | """ 19 | def __init__(self, configs): 20 | super(Model, self).__init__() 21 | self.seq_len = configs.seq_len 22 | self.label_len = configs.label_len 23 | self.pred_len = configs.pred_len 24 | self.output_attention = configs.output_attention 25 | 26 | # Decomp 27 | kernel_size = configs.moving_avg 28 | self.decomp = series_decomp(kernel_size) 29 | 30 | # Embedding 31 | # The series-wise connection inherently contains the sequential information. 32 | # Thus, we can discard the position embedding of transformers. 33 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 34 | configs.dropout) 35 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 36 | configs.dropout) 37 | 38 | # Encoder 39 | self.encoder = Encoder( 40 | [ 41 | EncoderLayer( 42 | AutoCorrelationLayer( 43 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 44 | output_attention=configs.output_attention), 45 | configs.d_model, configs.n_heads), 46 | configs.d_model, 47 | configs.d_ff, 48 | moving_avg=configs.moving_avg, 49 | dropout=configs.dropout, 50 | activation=configs.activation 51 | ) for l in range(configs.e_layers) 52 | ], 53 | norm_layer=my_Layernorm(configs.d_model) 54 | ) 55 | # Decoder 56 | self.decoder = Decoder( 57 | [ 58 | DecoderLayer( 59 | AutoCorrelationLayer( 60 | AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, 61 | output_attention=False), 62 | configs.d_model, configs.n_heads), 63 | AutoCorrelationLayer( 64 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 65 | output_attention=False), 66 | configs.d_model, configs.n_heads), 67 | configs.d_model, 68 | configs.c_out, 69 | configs.d_ff, 70 | moving_avg=configs.moving_avg, 71 | dropout=configs.dropout, 72 | activation=configs.activation, 73 | ) 74 | for l in range(configs.d_layers) 75 | ], 76 | norm_layer=my_Layernorm(configs.d_model), 77 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 78 | ) 79 | 80 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 81 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 82 | # decomp init 83 | mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) 84 | zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) 85 | seasonal_init, trend_init = self.decomp(x_enc) 86 | # decoder input 87 | trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) 88 | seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1) 89 | # enc 90 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 91 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 92 | # dec 93 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec) 94 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask, 95 | trend=trend_init) 96 | # final 97 | dec_out = trend_part + seasonal_part 98 | 99 | if self.output_attention: 100 | return dec_out[:, -self.pred_len:, :], attns 101 | else: 102 | return dec_out[:, -self.pred_len:, :] # [B, L, D] -------------------------------------------------------------------------------- /Formers/FEDformer/models/Informer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from utils.masking import TriangularCausalMask, ProbMask 5 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 6 | from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer 7 | from layers.Embed import DataEmbedding 8 | import numpy as np 9 | 10 | 11 | class Model(nn.Module): 12 | """ 13 | Informer with Propspare attention in O(LlogL) complexity 14 | """ 15 | def __init__(self, configs): 16 | super(Model, self).__init__() 17 | self.pred_len = configs.pred_len 18 | self.output_attention = configs.output_attention 19 | 20 | # Embedding 21 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 22 | configs.dropout) 23 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 24 | configs.dropout) 25 | 26 | # Encoder 27 | self.encoder = Encoder( 28 | [ 29 | EncoderLayer( 30 | AttentionLayer( 31 | ProbAttention(False, configs.factor, attention_dropout=configs.dropout, 32 | output_attention=configs.output_attention), 33 | configs.d_model, configs.n_heads), 34 | configs.d_model, 35 | configs.d_ff, 36 | dropout=configs.dropout, 37 | activation=configs.activation 38 | ) for l in range(configs.e_layers) 39 | ], 40 | [ 41 | ConvLayer( 42 | configs.d_model 43 | ) for l in range(configs.e_layers - 1) 44 | ] if configs.distil else None, 45 | norm_layer=torch.nn.LayerNorm(configs.d_model) 46 | ) 47 | # Decoder 48 | self.decoder = Decoder( 49 | [ 50 | DecoderLayer( 51 | AttentionLayer( 52 | ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), 53 | configs.d_model, configs.n_heads), 54 | AttentionLayer( 55 | ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), 56 | configs.d_model, configs.n_heads), 57 | configs.d_model, 58 | configs.d_ff, 59 | dropout=configs.dropout, 60 | activation=configs.activation, 61 | ) 62 | for l in range(configs.d_layers) 63 | ], 64 | norm_layer=torch.nn.LayerNorm(configs.d_model), 65 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 66 | ) 67 | 68 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 69 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 70 | 71 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 72 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 73 | 74 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 75 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 76 | 77 | if self.output_attention: 78 | return dec_out[:, -self.pred_len:, :], attns 79 | else: 80 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 81 | -------------------------------------------------------------------------------- /Formers/FEDformer/models/Transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 5 | from layers.SelfAttention_Family import FullAttention, AttentionLayer 6 | from layers.Embed import DataEmbedding 7 | 8 | 9 | class Model(nn.Module): 10 | """ 11 | Vanilla Transformer with O(L^2) complexity 12 | """ 13 | def __init__(self, configs): 14 | super(Model, self).__init__() 15 | self.pred_len = configs.pred_len 16 | self.output_attention = configs.output_attention 17 | 18 | # Embedding 19 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 20 | configs.dropout) 21 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 22 | configs.dropout) 23 | # Encoder 24 | self.encoder = Encoder( 25 | [ 26 | EncoderLayer( 27 | AttentionLayer( 28 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, 29 | output_attention=configs.output_attention), configs.d_model, configs.n_heads), 30 | configs.d_model, 31 | configs.d_ff, 32 | dropout=configs.dropout, 33 | activation=configs.activation 34 | ) for l in range(configs.e_layers) 35 | ], 36 | norm_layer=torch.nn.LayerNorm(configs.d_model) 37 | ) 38 | # Decoder 39 | self.decoder = Decoder( 40 | [ 41 | DecoderLayer( 42 | AttentionLayer( 43 | FullAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), 44 | configs.d_model, configs.n_heads), 45 | AttentionLayer( 46 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), 47 | configs.d_model, configs.n_heads), 48 | configs.d_model, 49 | configs.d_ff, 50 | dropout=configs.dropout, 51 | activation=configs.activation, 52 | ) 53 | for l in range(configs.d_layers) 54 | ], 55 | norm_layer=torch.nn.LayerNorm(configs.d_model), 56 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 57 | ) 58 | 59 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 60 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 61 | 62 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 63 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 64 | 65 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 66 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 67 | 68 | if self.output_attention: 69 | return dec_out[:, -self.pred_len:, :], attns 70 | else: 71 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 72 | -------------------------------------------------------------------------------- /Formers/FEDformer/scripts/LongForecasting.sh: -------------------------------------------------------------------------------- 1 | # cd FEDformer 2 | if [ ! -d "../logs" ]; then 3 | mkdir ../logs 4 | fi 5 | 6 | if [ ! -d "../logs/LongForecasting" ]; then 7 | mkdir ../logs/LongForecasting 8 | fi 9 | 10 | for preLen in 96 192 336 720 11 | do 12 | # ETTm1 13 | python -u run.py \ 14 | --is_training 1 \ 15 | --data_path ETTm1.csv \ 16 | --task_id ETTm1 \ 17 | --model FEDformer \ 18 | --data ETTm1 \ 19 | --features M \ 20 | --seq_len 96 \ 21 | --label_len 48 \ 22 | --pred_len $preLen \ 23 | --e_layers 2 \ 24 | --d_layers 1 \ 25 | --factor 3 \ 26 | --enc_in 7 \ 27 | --dec_in 7 \ 28 | --c_out 7 \ 29 | --des 'Exp' \ 30 | --d_model 512 \ 31 | --itr 1 >../logs/LongForecasting/FEDformer_ETTm1_$pred_len.log 32 | 33 | # ETTh1 34 | python -u run.py \ 35 | --is_training 1 \ 36 | --data_path ETTh1.csv \ 37 | --task_id ETTh1 \ 38 | --model FEDformer \ 39 | --data ETTh1 \ 40 | --features S \ 41 | --seq_len 96 \ 42 | --label_len 48 \ 43 | --pred_len $preLen \ 44 | --e_layers 2 \ 45 | --d_layers 1 \ 46 | --factor 3 \ 47 | --enc_in 7 \ 48 | --dec_in 7 \ 49 | --c_out 7 \ 50 | --des 'Exp' \ 51 | --d_model 512 \ 52 | --itr 1 >../logs/LongForecasting/FEDformer_ETTh1_$pred_len.log 53 | 54 | # ETTm2 55 | python -u run.py \ 56 | --is_training 1 \ 57 | --data_path ETTm2.csv \ 58 | --task_id ETTm2 \ 59 | --model FEDformer \ 60 | --data ETTm2 \ 61 | --features M \ 62 | --seq_len 96 \ 63 | --label_len 48 \ 64 | --pred_len $preLen \ 65 | --e_layers 2 \ 66 | --d_layers 1 \ 67 | --factor 3 \ 68 | --enc_in 7 \ 69 | --dec_in 7 \ 70 | --c_out 7 \ 71 | --des 'Exp' \ 72 | --d_model 512 \ 73 | --itr 1 >../logs/LongForecasting/FEDformer_ETTm2_$pred_len.log 74 | 75 | # ETTh2 76 | python -u run.py \ 77 | --is_training 1 \ 78 | --data_path ETTh2.csv \ 79 | --task_id ETTh2 \ 80 | --model FEDformer \ 81 | --data ETTh2 \ 82 | --features M \ 83 | --seq_len 96 \ 84 | --label_len 48 \ 85 | --pred_len $preLen \ 86 | --e_layers 2 \ 87 | --d_layers 1 \ 88 | --factor 3 \ 89 | --enc_in 7 \ 90 | --dec_in 7 \ 91 | --c_out 7 \ 92 | --des 'Exp' \ 93 | --d_model 512 \ 94 | --itr 1 >../logs/LongForecasting/FEDformer_ETTh2_$pred_len.log 95 | 96 | # electricity 97 | python -u run.py \ 98 | --is_training 1 \ 99 | --data_path electricity.csv \ 100 | --task_id ECL \ 101 | --model FEDformer \ 102 | --data custom \ 103 | --features M \ 104 | --seq_len 96 \ 105 | --label_len 48 \ 106 | --pred_len $preLen \ 107 | --e_layers 2 \ 108 | --d_layers 1 \ 109 | --factor 3 \ 110 | --enc_in 321 \ 111 | --dec_in 321 \ 112 | --c_out 321 \ 113 | --des 'Exp' \ 114 | --itr 1 >../logs/LongForecasting/FEDformer_electricity_$pred_len.log 115 | 116 | # exchange 117 | python -u run.py \ 118 | --is_training 1 \ 119 | --data_path exchange_rate.csv \ 120 | --task_id Exchange \ 121 | --model FEDformer \ 122 | --data custom \ 123 | --features S \ 124 | --seq_len 96 \ 125 | --label_len 48 \ 126 | --pred_len $preLen \ 127 | --e_layers 2 \ 128 | --d_layers 1 \ 129 | --factor 3 \ 130 | --enc_in 8 \ 131 | --dec_in 8 \ 132 | --c_out 8 \ 133 | --des 'Exp' \ 134 | --itr 1 >../logs/LongForecasting/FEDformer_exchange_rate_$pred_len.log 135 | 136 | # traffic 137 | python -u run.py \ 138 | --is_training 1 \ 139 | --data_path traffic.csv \ 140 | --task_id traffic \ 141 | --model FEDformer \ 142 | --data custom \ 143 | --features S \ 144 | --seq_len 96 \ 145 | --label_len 48 \ 146 | --pred_len $preLen \ 147 | --e_layers 2 \ 148 | --d_layers 1 \ 149 | --factor 3 \ 150 | --enc_in 862 \ 151 | --dec_in 862 \ 152 | --c_out 862 \ 153 | --des 'Exp' \ 154 | --itr 1 \ 155 | --train_epochs 3 >../logs/LongForecasting/FEDformer_traffic_$pred_len.log 156 | 157 | # weather 158 | python -u run.py \ 159 | --is_training 1 \ 160 | --data_path weather.csv \ 161 | --task_id weather \ 162 | --model FEDformer \ 163 | --data custom \ 164 | --features S \ 165 | --seq_len 96 \ 166 | --label_len 48 \ 167 | --pred_len $preLen \ 168 | --e_layers 2 \ 169 | --d_layers 1 \ 170 | --factor 3 \ 171 | --enc_in 21 \ 172 | --dec_in 21 \ 173 | --c_out 21 \ 174 | --des 'Exp' \ 175 | --itr 1 >../logs/LongForecasting/FEDformer_weather_$pred_len.log 176 | done 177 | 178 | 179 | for preLen in 24 36 48 60 180 | do 181 | # illness 182 | python -u run.py \ 183 | --is_training 1 \ 184 | --data_path national_illness.csv \ 185 | --task_id ili \ 186 | --model FEDformer \ 187 | --data custom \ 188 | --features S \ 189 | --seq_len 36 \ 190 | --label_len 18 \ 191 | --pred_len $preLen \ 192 | --e_layers 2 \ 193 | --d_layers 1 \ 194 | --factor 3 \ 195 | --enc_in 7 \ 196 | --dec_in 7 \ 197 | --c_out 7 \ 198 | --des 'Exp' \ 199 | --itr 1 >../logs/LongForecasting/FEDformer_ili_$pred_len.log 200 | done 201 | 202 | # cd .. -------------------------------------------------------------------------------- /Formers/FEDformer/scripts/LookBackWindow.sh: -------------------------------------------------------------------------------- 1 | # cd FEDformer 2 | if [ ! -d "../logs" ]; then 3 | mkdir ../logs 4 | fi 5 | 6 | if [ ! -d "../logs/LookBackWindow" ]; then 7 | mkdir ../logs/LookBackWindow 8 | fi 9 | 10 | for seqLen in 36 48 60 72 144 288 11 | do 12 | for pred_len in 24 576 13 | do 14 | python -u run.py \ 15 | --is_training 1 \ 16 | --root_path ../dataset/ \ 17 | --data_path ETTm1.csv \ 18 | --task_id ETTm1 \ 19 | --model FEDformer \ 20 | --data ETTm1 \ 21 | --features M \ 22 | --seq_len $seqLen \ 23 | --label_len 48 \ 24 | --pred_len $pred_len \ 25 | --e_layers 2 \ 26 | --d_layers 1 \ 27 | --factor 3 \ 28 | --enc_in 7 \ 29 | --dec_in 7 \ 30 | --c_out 7 \ 31 | --des 'Exp' \ 32 | --d_model 512 \ 33 | --itr 1 >../logs/LookBackWindow/FEDformer_ETTm2_$seqLen'_'$pred_len.log 34 | 35 | python -u run.py \ 36 | --is_training 1 \ 37 | --root_path ../dataset/ \ 38 | --data_path ETTm2.csv \ 39 | --task_id ETTm2 \ 40 | --model FEDformer \ 41 | --data ETTm2 \ 42 | --features M \ 43 | --seq_len $seqLen \ 44 | --label_len 48 \ 45 | --pred_len $pred_len \ 46 | --e_layers 2 \ 47 | --d_layers 1 \ 48 | --factor 3 \ 49 | --enc_in 7 \ 50 | --dec_in 7 \ 51 | --c_out 7 \ 52 | --des 'Exp' \ 53 | --d_model 512 \ 54 | --itr 1 >../logs/LookBackWindow/FEDformer_ETTm2_$seqLen'_'$pred_len.log 55 | done 56 | done 57 | 58 | for seqLen in 48 72 120 144 168 192 336 720 59 | do 60 | for pred_len in 24 720 61 | do 62 | # ETTh1 63 | python -u run.py \ 64 | --is_training 1 \ 65 | --root_path ../dataset/ \ 66 | --data_path ETTh1.csv \ 67 | --task_id ETTh1 \ 68 | --model FEDformer \ 69 | --data ETTh1 \ 70 | --features M \ 71 | --seq_len $seqLen \ 72 | --label_len 48 \ 73 | --pred_len $pred_len \ 74 | --e_layers 2 \ 75 | --d_layers 1 \ 76 | --factor 3 \ 77 | --enc_in 7 \ 78 | --dec_in 7 \ 79 | --c_out 7 \ 80 | --des 'Exp' \ 81 | --d_model 512 \ 82 | --itr 1 >../logs/LookBackWindow/FEDformer_ETTh1_$seqLen'_'$pred_len.log 83 | 84 | # ETTh2 85 | python -u run.py \ 86 | --is_training 1 \ 87 | --root_path ../dataset/ \ 88 | --data_path ETTh2.csv \ 89 | --task_id ETTh2 \ 90 | --model FEDformer \ 91 | --data ETTh2 \ 92 | --features M \ 93 | --seq_len $seqLen \ 94 | --label_len 48 \ 95 | --pred_len $pred_len \ 96 | --e_layers 2 \ 97 | --d_layers 1 \ 98 | --factor 3 \ 99 | --enc_in 7 \ 100 | --dec_in 7 \ 101 | --c_out 7 \ 102 | --des 'Exp' \ 103 | --d_model 512 \ 104 | --itr 1 >../logs/LookBackWindow/FEDformer_ETTh2_$seqLen'_'$pred_len.log 105 | 106 | ## electricity 107 | python -u run.py \ 108 | --is_training 1 \ 109 | --root_path ../dataset/ \ 110 | --data_path electricity.csv \ 111 | --task_id ECL \ 112 | --model FEDformer \ 113 | --data custom \ 114 | --features M \ 115 | --seq_len $seqLen \ 116 | --label_len 48 \ 117 | --pred_len $pred_len \ 118 | --e_layers 2 \ 119 | --d_layers 1 \ 120 | --factor 3 \ 121 | --enc_in 321 \ 122 | --dec_in 321 \ 123 | --c_out 321 \ 124 | --des 'Exp' \ 125 | --itr 1 >../logs/LookBackWindow/FEDformer_electricity_$seqLen'_'$pred_len.log 126 | 127 | # exchange 128 | python -u run.py \ 129 | --is_training 1 \ 130 | --root_path ../dataset/ \ 131 | --data_path exchange_rate.csv \ 132 | --task_id Exchange \ 133 | --model FEDformer \ 134 | --data custom \ 135 | --features M \ 136 | --seq_len $seqLen \ 137 | --label_len 48 \ 138 | --pred_len $pred_len \ 139 | --e_layers 2 \ 140 | --d_layers 1 \ 141 | --factor 3 \ 142 | --enc_in 8 \ 143 | --dec_in 8 \ 144 | --c_out 8 \ 145 | --des 'Exp' \ 146 | --itr 1 >../logs/LookBackWindow/FEDformer_exchange_rate_$seqLen'_'$pred_len.log 147 | 148 | # traffic 149 | python -u run.py \ 150 | --is_training 1 \ 151 | --root_path ../dataset/ \ 152 | --data_path traffic.csv \ 153 | --task_id traffic \ 154 | --model FEDformer \ 155 | --data custom \ 156 | --features M \ 157 | --seq_len $seqLen \ 158 | --label_len 48 \ 159 | --pred_len $pred_len \ 160 | --e_layers 2 \ 161 | --d_layers 1 \ 162 | --factor 3 \ 163 | --enc_in 862 \ 164 | --dec_in 862 \ 165 | --c_out 862 \ 166 | --des 'Exp' \ 167 | --itr 1 \ 168 | --train_epochs 3 >../logs/LookBackWindow/FEDformer_traffic_$seqLen'_'$pred_len.log 169 | 170 | # weather 171 | python -u run.py \ 172 | --is_training 1 \ 173 | --root_path ../dataset/ \ 174 | --data_path weather.csv \ 175 | --task_id weather \ 176 | --model FEDformer \ 177 | --data custom \ 178 | --features M \ 179 | --seq_len $seqLen \ 180 | --label_len 48 \ 181 | --pred_len $pred_len \ 182 | --e_layers 2 \ 183 | --d_layers 1 \ 184 | --factor 3 \ 185 | --enc_in 21 \ 186 | --dec_in 21 \ 187 | --c_out 21 \ 188 | --des 'Exp' \ 189 | --itr 1 >../logs/LookBackWindow/FEDformer_weather_$seqLen'_'$pred_len.log 190 | done 191 | done 192 | 193 | 194 | for seqLen in 26 52 78 104 130 156 208 195 | do 196 | # illness 197 | python -u run.py \ 198 | --is_training 1 \ 199 | --root_path ../dataset/ \ 200 | --data_path national_illness.csv \ 201 | --task_id ili \ 202 | --model FEDformer \ 203 | --data custom \ 204 | --features M \ 205 | --seq_len $seqLen \ 206 | --label_len 18 \ 207 | --pred_len 24 \ 208 | --e_layers 2 \ 209 | --d_layers 1 \ 210 | --factor 3 \ 211 | --enc_in 7 \ 212 | --dec_in 7 \ 213 | --c_out 7 \ 214 | --des 'Exp' \ 215 | --itr 1 >../logs/LookBackWindow/FEDformer_ili_$seqLen'_'24.log 216 | 217 | python -u run.py \ 218 | --is_training 1 \ 219 | --root_path ../dataset/ \ 220 | --data_path national_illness.csv \ 221 | --task_id ili \ 222 | --model FEDformer \ 223 | --data custom \ 224 | --features M \ 225 | --seq_len $seqLen \ 226 | --label_len 18 \ 227 | --pred_len 60 \ 228 | --e_layers 2 \ 229 | --d_layers 1 \ 230 | --factor 3 \ 231 | --enc_in 7 \ 232 | --dec_in 7 \ 233 | --c_out 7 \ 234 | --des 'Exp' \ 235 | --itr 1 >../logs/LookBackWindow/FEDformer_ili_$seqLen'_'60.log 236 | done 237 | # cd .. -------------------------------------------------------------------------------- /Formers/FEDformer/utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import math 4 | 5 | class TriangularCausalMask(): 6 | def __init__(self, B, L, device="cpu"): 7 | mask_shape = [B, 1, L, L] 8 | with torch.no_grad(): 9 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 10 | 11 | @property 12 | def mask(self): 13 | return self._mask 14 | 15 | 16 | class ProbMask(): 17 | def __init__(self, B, H, L, index, scores, device="cpu"): 18 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 19 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 20 | indicator = _mask_ex[torch.arange(B)[:, None, None], 21 | torch.arange(H)[None, :, None], 22 | index, :].to(device) 23 | self._mask = indicator.view(scores.shape).to(device) 24 | 25 | @property 26 | def mask(self): 27 | return self._mask 28 | 29 | class LocalMask(): 30 | def __init__(self, B, L,S,device="cpu"): 31 | mask_shape = [B, 1, L, S] 32 | with torch.no_grad(): 33 | self.len = math.ceil(np.log2(L)) 34 | self._mask1 = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 35 | self._mask2 = ~torch.triu(torch.ones(mask_shape,dtype=torch.bool),diagonal=-self.len).to(device) 36 | self._mask = self._mask1+self._mask2 37 | @property 38 | def mask(self): 39 | return self._mask -------------------------------------------------------------------------------- /Formers/FEDformer/utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | return (u / d).mean(-1) 12 | 13 | 14 | def MAE(pred, true): 15 | return np.mean(np.abs(pred - true)) 16 | 17 | 18 | def MSE(pred, true): 19 | return np.mean((pred - true) ** 2) 20 | 21 | 22 | def RMSE(pred, true): 23 | return np.sqrt(MSE(pred, true)) 24 | 25 | 26 | def MAPE(pred, true): 27 | return np.mean(np.abs((pred - true) / true)) 28 | 29 | 30 | def MSPE(pred, true): 31 | return np.mean(np.square((pred - true) / true)) 32 | 33 | 34 | def metric(pred, true): 35 | mae = MAE(pred, true) 36 | mse = MSE(pred, true) 37 | rmse = RMSE(pred, true) 38 | mape = MAPE(pred, true) 39 | mspe = MSPE(pred, true) 40 | 41 | return mae, mse, rmse, mape, mspe 42 | 43 | def metric2(pred, true): 44 | mae = MAE(pred, true) 45 | mse = MSE(pred, true) 46 | rmse = RMSE(pred, true) 47 | mape = MAPE(pred, true) 48 | mspe = MSPE(pred, true) 49 | rse = RSE(pred, true) 50 | corr = CORR(pred, true) 51 | return mae, mse, rmse, mape, mspe, rse, corr -------------------------------------------------------------------------------- /Formers/FEDformer/utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | 9 | class TimeFeature: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 14 | pass 15 | 16 | def __repr__(self): 17 | return self.__class__.__name__ + "()" 18 | 19 | 20 | class SecondOfMinute(TimeFeature): 21 | """Minute of hour encoded as value between [-0.5, 0.5]""" 22 | 23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 24 | return index.second / 59.0 - 0.5 25 | 26 | 27 | class MinuteOfHour(TimeFeature): 28 | """Minute of hour encoded as value between [-0.5, 0.5]""" 29 | 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.minute / 59.0 - 0.5 32 | 33 | 34 | class HourOfDay(TimeFeature): 35 | """Hour of day encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.hour / 23.0 - 0.5 39 | 40 | 41 | class DayOfWeek(TimeFeature): 42 | """Hour of day encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.dayofweek / 6.0 - 0.5 46 | 47 | 48 | class DayOfMonth(TimeFeature): 49 | """Day of month encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return (index.day - 1) / 30.0 - 0.5 53 | 54 | 55 | class DayOfYear(TimeFeature): 56 | """Day of year encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return (index.dayofyear - 1) / 365.0 - 0.5 60 | 61 | 62 | class MonthOfYear(TimeFeature): 63 | """Month of year encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.month - 1) / 11.0 - 0.5 67 | 68 | 69 | class WeekOfYear(TimeFeature): 70 | """Week of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.isocalendar().week - 1) / 52.0 - 0.5 74 | 75 | 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 77 | """ 78 | Returns a list of time features that will be appropriate for the given frequency string. 79 | Parameters 80 | ---------- 81 | freq_str 82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 83 | """ 84 | 85 | features_by_offsets = { 86 | offsets.YearEnd: [], 87 | offsets.QuarterEnd: [MonthOfYear], 88 | offsets.MonthEnd: [MonthOfYear], 89 | offsets.Week: [DayOfMonth, WeekOfYear], 90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 93 | offsets.Minute: [ 94 | MinuteOfHour, 95 | HourOfDay, 96 | DayOfWeek, 97 | DayOfMonth, 98 | DayOfYear, 99 | ], 100 | offsets.Second: [ 101 | SecondOfMinute, 102 | MinuteOfHour, 103 | HourOfDay, 104 | DayOfWeek, 105 | DayOfMonth, 106 | DayOfYear, 107 | ], 108 | } 109 | 110 | offset = to_offset(freq_str) 111 | 112 | for offset_type, feature_classes in features_by_offsets.items(): 113 | if isinstance(offset, offset_type): 114 | return [cls() for cls in feature_classes] 115 | 116 | supported_freq_msg = f""" 117 | Unsupported frequency {freq_str} 118 | The following frequencies are supported: 119 | Y - yearly 120 | alias: A 121 | M - monthly 122 | W - weekly 123 | D - daily 124 | B - business days 125 | H - hourly 126 | T - minutely 127 | alias: min 128 | S - secondly 129 | """ 130 | raise RuntimeError(supported_freq_msg) 131 | 132 | 133 | def time_features(dates, freq='h'): 134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) 135 | -------------------------------------------------------------------------------- /Formers/FEDformer/utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | 5 | plt.switch_backend('agg') 6 | 7 | 8 | def adjust_learning_rate(optimizer, epoch, args): 9 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 10 | if args.lradj == 'type1': 11 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} 12 | elif args.lradj == 'type2': 13 | lr_adjust = { 14 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 15 | 10: 5e-7, 15: 1e-7, 20: 5e-8 16 | } 17 | elif args.lradj =='type3': 18 | lr_adjust = {epoch: args.learning_rate} 19 | elif args.lradj == 'type4': 20 | lr_adjust = {epoch: args.learning_rate * (0.9 ** ((epoch - 1) // 1))} 21 | if epoch in lr_adjust.keys(): 22 | lr = lr_adjust[epoch] 23 | for param_group in optimizer.param_groups: 24 | param_group['lr'] = lr 25 | print('Updating learning rate to {}'.format(lr)) 26 | 27 | 28 | class EarlyStopping: 29 | def __init__(self, patience=7, verbose=False, delta=0): 30 | self.patience = patience 31 | self.verbose = verbose 32 | self.counter = 0 33 | self.best_score = None 34 | self.early_stop = False 35 | self.val_loss_min = np.Inf 36 | self.delta = delta 37 | 38 | def __call__(self, val_loss, model, path): 39 | score = -val_loss 40 | if self.best_score is None: 41 | self.best_score = score 42 | self.save_checkpoint(val_loss, model, path) 43 | elif score < self.best_score + self.delta: 44 | self.counter += 1 45 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 46 | if self.counter >= self.patience: 47 | self.early_stop = True 48 | else: 49 | self.best_score = score 50 | self.save_checkpoint(val_loss, model, path) 51 | self.counter = 0 52 | 53 | def save_checkpoint(self, val_loss, model, path): 54 | if self.verbose: 55 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 56 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') 57 | self.val_loss_min = val_loss 58 | 59 | 60 | class dotdict(dict): 61 | """dot.notation access to dictionary attributes""" 62 | __getattr__ = dict.get 63 | __setattr__ = dict.__setitem__ 64 | __delattr__ = dict.__delitem__ 65 | 66 | 67 | class StandardScaler(): 68 | def __init__(self, mean, std): 69 | self.mean = mean 70 | self.std = std 71 | 72 | def transform(self, data): 73 | return (data - self.mean) / self.std 74 | 75 | def inverse_transform(self, data): 76 | return (data * self.std) + self.mean 77 | 78 | 79 | def visual(true, preds=None, name='./pic/test.pdf'): 80 | """ 81 | Results visualization 82 | """ 83 | plt.figure() 84 | plt.plot(true, label='GroundTruth', linewidth=2) 85 | if preds is not None: 86 | plt.plot(preds, label='Prediction', linewidth=2) 87 | plt.legend() 88 | plt.savefig(name, bbox_inches='tight') 89 | -------------------------------------------------------------------------------- /Formers/Informer.sh: -------------------------------------------------------------------------------- 1 | # ALL scripts in this file come from Autoformer 2 | if [ ! -d "./logs" ]; then 3 | mkdir ./logs 4 | fi 5 | 6 | if [ ! -d "./logs/LongForecasting" ]; then 7 | mkdir ./logs/LongForecasting 8 | fi 9 | 10 | random_seed=2021 11 | model_name=Informer 12 | 13 | for pred_len in 96 192 336 720 14 | do 15 | python -u run_longExp.py \ 16 | --random_seed $random_seed \ 17 | --is_training 1 \ 18 | --root_path ./dataset/ \ 19 | --data_path exchange_rate.csv \ 20 | --model_id exchange_96_$pred_len \ 21 | --model $model_name \ 22 | --data custom \ 23 | --features M \ 24 | --seq_len 96 \ 25 | --label_len 48 \ 26 | --pred_len $pred_len \ 27 | --e_layers 2 \ 28 | --d_layers 1 \ 29 | --factor 3 \ 30 | --enc_in 8 \ 31 | --dec_in 8 \ 32 | --c_out 8 \ 33 | --des 'Exp' \ 34 | --itr 1 \ 35 | --train_epochs 1 >logs/LongForecasting/$model_name'_exchange_rate_'$pred_len.log 36 | 37 | python -u run_longExp.py \ 38 | --random_seed $random_seed \ 39 | --is_training 1 \ 40 | --root_path ./dataset/ \ 41 | --data_path electricity.csv \ 42 | --model_id electricity_96_$pred_len \ 43 | --model $model_name \ 44 | --data custom \ 45 | --features M \ 46 | --seq_len 96 \ 47 | --label_len 48 \ 48 | --pred_len $pred_len \ 49 | --e_layers 2 \ 50 | --d_layers 1 \ 51 | --factor 3 \ 52 | --enc_in 321 \ 53 | --dec_in 321 \ 54 | --c_out 321 \ 55 | --des 'Exp' \ 56 | --itr 1 >logs/LongForecasting/$model_name'_electricity_'$pred_len.log 57 | 58 | python -u run_longExp.py \ 59 | --random_seed $random_seed \ 60 | --is_training 1 \ 61 | --root_path ./dataset/ \ 62 | --data_path traffic.csv \ 63 | --model_id traffic_96_$pred_len \ 64 | --model $model_name \ 65 | --data custom \ 66 | --features M \ 67 | --seq_len 96 \ 68 | --label_len 48 \ 69 | --pred_len $pred_len \ 70 | --e_layers 2 \ 71 | --d_layers 1 \ 72 | --factor 3 \ 73 | --enc_in 862 \ 74 | --dec_in 862 \ 75 | --c_out 862 \ 76 | --des 'Exp' \ 77 | --itr 1 \ 78 | --train_epochs 3 >logs/LongForecasting/$model_name'_traffic_'$pred_len.log 79 | 80 | python -u run_longExp.py \ 81 | --random_seed $random_seed \ 82 | --is_training 1 \ 83 | --root_path ./dataset/ \ 84 | --data_path weather.csv \ 85 | --model_id weather_96_$pred_len \ 86 | --model $model_name \ 87 | --data custom \ 88 | --features M \ 89 | --seq_len 96 \ 90 | --label_len 48 \ 91 | --pred_len $pred_len \ 92 | --e_layers 2 \ 93 | --d_layers 1 \ 94 | --factor 3 \ 95 | --enc_in 21 \ 96 | --dec_in 21 \ 97 | --c_out 21 \ 98 | --des 'Exp' \ 99 | --itr 1 \ 100 | --train_epochs 2 >logs/LongForecasting/$model_name'_weather_'$pred_len.log 101 | 102 | python -u run_longExp.py \ 103 | --random_seed $random_seed \ 104 | --is_training 1 \ 105 | --root_path ./dataset/ \ 106 | --data_path ETTh1.csv \ 107 | --model_id ETTh1_96_$pred_len \ 108 | --model $model_name \ 109 | --data ETTh1 \ 110 | --features M \ 111 | --seq_len 96 \ 112 | --label_len 48 \ 113 | --pred_len $pred_len \ 114 | --e_layers 2 \ 115 | --d_layers 1 \ 116 | --factor 3 \ 117 | --enc_in 7 \ 118 | --dec_in 7 \ 119 | --c_out 7 \ 120 | --des 'Exp' \ 121 | --itr 1 >logs/LongForecasting/$model_name'_Etth1_'$pred_len.log 122 | 123 | python -u run_longExp.py \ 124 | --random_seed $random_seed \ 125 | --is_training 1 \ 126 | --root_path ./dataset/ \ 127 | --data_path ETTh2.csv \ 128 | --model_id ETTh2_96_$pred_len \ 129 | --model $model_name \ 130 | --data ETTh2 \ 131 | --features M \ 132 | --seq_len 96 \ 133 | --label_len 48 \ 134 | --pred_len $pred_len \ 135 | --e_layers 2 \ 136 | --d_layers 1 \ 137 | --factor 3 \ 138 | --enc_in 7 \ 139 | --dec_in 7 \ 140 | --c_out 7 \ 141 | --des 'Exp' \ 142 | --itr 1 >logs/LongForecasting/$model_name'_Etth2_'$pred_len.log 143 | 144 | python -u run_longExp.py \ 145 | --random_seed $random_seed \ 146 | --is_training 1 \ 147 | --root_path ./dataset/ \ 148 | --data_path ETTm1.csv \ 149 | --model_id ETTm1_96_$pred_len \ 150 | --model $model_name \ 151 | --data ETTm1 \ 152 | --features M \ 153 | --seq_len 96 \ 154 | --label_len 48 \ 155 | --pred_len $pred_len \ 156 | --e_layers 2 \ 157 | --d_layers 1 \ 158 | --factor 3 \ 159 | --enc_in 7 \ 160 | --dec_in 7 \ 161 | --c_out 7 \ 162 | --des 'Exp' \ 163 | --itr 1 >logs/LongForecasting/$model_name'_Ettm1_'$pred_len.log 164 | 165 | python -u run_longExp.py \ 166 | --random_seed $random_seed \ 167 | --is_training 1 \ 168 | --root_path ./dataset/ \ 169 | --data_path ETTm2.csv \ 170 | --model_id ETTm2_96_$pred_len \ 171 | --model $model_name \ 172 | --data ETTm2 \ 173 | --features M \ 174 | --seq_len 96 \ 175 | --label_len 48 \ 176 | --pred_len $pred_len \ 177 | --e_layers 2 \ 178 | --d_layers 1 \ 179 | --factor 3 \ 180 | --enc_in 7 \ 181 | --dec_in 7 \ 182 | --c_out 7 \ 183 | --des 'Exp' \ 184 | --itr 1 >logs/LongForecasting/$model_name'_Ettm2_'$pred_len.log 185 | done 186 | 187 | for pred_len in 24 36 48 60 188 | do 189 | python -u run_longExp.py \ 190 | --random_seed $random_seed \ 191 | --is_training 1 \ 192 | --root_path ./dataset/ \ 193 | --data_path national_illness.csv \ 194 | --model_id ili_36_$pred_len \ 195 | --model $model_name \ 196 | --data custom \ 197 | --features M \ 198 | --seq_len 36 \ 199 | --label_len 18 \ 200 | --pred_len $pred_len \ 201 | --e_layers 2 \ 202 | --d_layers 1 \ 203 | --factor 3 \ 204 | --enc_in 7 \ 205 | --dec_in 7 \ 206 | --c_out 7 \ 207 | --des 'Exp' \ 208 | --itr 1 >logs/LongForecasting/$model_name'_ili_'$pred_len.log 209 | done 210 | -------------------------------------------------------------------------------- /Formers/Pyraformer/LEGAL.md: -------------------------------------------------------------------------------- 1 | Legal Disclaimer 2 | 3 | Within this source code, the comments in Chinese shall be the original, governing version. Any comment in other languages are for reference only. In the event of any conflict between the Chinese language version comments and other language version comments, the Chinese language version shall prevail. 4 | 5 | 法律免责声明 6 | 7 | 关于代码注释部分,中文注释为官方版本,其它语言注释仅做参考。中文注释可能与其它语言注释存在不一致,当中文注释与其它语言注释存在不一致时,请以中文注释为准。 -------------------------------------------------------------------------------- /Formers/Pyraformer/README.md: -------------------------------------------------------------------------------- 1 | # Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting 2 | This is the Pytorch implementation of Pyraformer (Pyramidal Attention based Transformer) in the ICLR paper: [Pyraformer: Low-complexity Pyramidal Attention for Long-range Time Series Modeling and Forecasting](https://openreview.net/pdf?id=0EXmFzUn5I). 3 | From https://github.com/alipay/Pyraformer 4 | ![The network architecture of Pyraformer.](./img/Figure_1.png) 5 |
Figure 1. The network architecture of Pyraformer.
6 | 7 | ## Pyramidal Attention 8 | As demonstrated in Figure 2, we leverage a pyramidal graph to describe the temporal dependencies of the observed time series in a multiresolution fashion. We can decompose the pyramidal graph into two parts: the inter-scale and the intra-scale connections. The inter-scale connections form a C-ary tree, in which each parent has C children. For example, if we associate the finest scale of the pyramidal graph with hourly observations of the original time series, the nodes at coarser scales can be regarded as the daily, weekly, and even monthly features of the time series. As a consequence, the pyramidal graph offers a multiresolution representation of the original time series. Furthermore, it is easier to capture long-range dependencies (e.g., monthly dependence) in the coarser scales by simply connecting the neighboring nodes via the intra-scale connections. In other words, the coarser scales are instrumental in describing long-range correlations in a manner that is graphically far more parsimonious than could be solely captured with a single, finest scale model. 9 | 10 | 11 | ![The Pyramidal Attention Mechanism.](./img/Figure_2.png#center) 12 |
Figure 2. The Pyramidal Attention Mechanism.
13 | 14 | ## Requirements 15 | * Python 3.7 16 | * pytorch 1.8.0 17 | * CUDA 11.1 18 | * TVM 0.8.0 (optional) 19 | 20 | Dependencies can be installed by: 21 | 22 | pip install -r requirements.txt 23 | 24 | If you are using CUDA 11.1, you can use the compiled TVM runtime version in the our code to run PAM-TVM. Due to the short history length in the experiments, PAM-TVM does not provide a speed increase. If you want to compile our PAM-TVM kernel yourself, see [here](https://tvm.apache.org/docs/install/index.html) to compile TVM 0.8.0 first. 25 | 26 | ## Data preparetion 27 | The four datasets (Electricity, Wind, ETT and App Flow) used in this paper can be downloaded from the following links: 28 | * [Electricity](https://archive.ics.uci.edu/ml/datasets/ElectricityLoadDiagrams20112014) 29 | * [Wind](https://www.kaggle.com/sohier/30-years-of-european-wind-generation) 30 | * [ETT](https://github.com/zhouhaoyi/ETDataset) 31 | * [App Flow](https://github.com/alipay/Pyraformer/blob/master/data/app_zone_rpc_hour_encrypted.zip) 32 | 33 | The downloaded datasets can be put in the 'data' directory. For single step forecasting, we preprocess Electricity, Wind and App Flow using scripts preprocess_elect.py, preprocess_wind.py and preprocess_flow.py respectively. You can also download preprocessed data [here](https://drive.google.com/drive/folders/1-b9tR6Tgmx48smPMetzAhVSV7-95im3X?usp=sharing). and put them in the 'data' directory. The directory structure looks like: 34 | 35 | ${CODE_ROOT} 36 | ...... 37 | |-- data 38 | |-- elect 39 | |-- test_data_elect.npy 40 | |-- train_data_elect.npy 41 | ...... 42 | |-- flow 43 | ...... 44 | |-- wind 45 | ...... 46 | |-- ETT 47 | |-- ETTh1.csv 48 | |-- ETTh2.csv 49 | |-- ETTm1.csv 50 | |-- ETTm2.csv 51 | |-- LD2011_2014.txt 52 | |-- synthetic.npy 53 | 54 | Where synthetic.npy is generated by running: 55 | 56 | python simulate_sin.py 57 | 58 | ## Training 59 | To perform long-range forecasting, run: 60 | 61 | sh scripts/Pyraformer_LR_FC.sh 62 | 63 | To perform single step forecasting, run: 64 | 65 | sh scripts/Pyraformer_SS.sh 66 | 67 | The meaning of each command line argument is explained in long_range_main.py and single_step_main.py, respectively. 68 | 69 | ## Evaluate 70 | Evaluation can be done by adding the -eval option to the command line. We provide pretrained models [here](https://drive.google.com/drive/folders/15av5ZhHG8tbX8HuxZNNDGBybdnuxzA83?usp=sharing). The downloaded models should be put in the 'models' directory. The directory structure is as follows: 71 | 72 | ${CODE_ROOT} 73 | ...... 74 | |-- models 75 | |-- LongRange 76 | |-- elect 77 | |-- 168 78 | |-- best_iter0.pth 79 | |-- best_iter1.pth 80 | |-- best_iter2.pth 81 | |-- best_iter3.pth 82 | |-- best_iter4.pth 83 | |-- 336 84 | ...... 85 | |-- 720 86 | ...... 87 | |-- ETTh1 88 | ...... 89 | |-- ETTm1 90 | ...... 91 | |-- SingleStep 92 | |-- elect 93 | |-- best_model.pth 94 | |-- flow 95 | |-- best_model.pth 96 | |-- wind 97 | |-- best_model.pth 98 | 99 | Below are evaluation examples: 100 | 101 | python long_range_main.py -data ETTh1 -input_size 168 -predict_step 168 -n_head 6 -eval 102 | 103 | python single_step_main.py -data_path data/elect/ -dataset elect -eval 104 | 105 | ## Citation 106 | 107 | @inproceedings{liu2022pyraformer, 108 | title={Pyraformer: Low-Complexity Pyramidal Attention for Long-Range Time Series Modeling and Forecasting}, 109 | author={Liu, Shizhan and Yu, Hang and Liao, Cong and Li, Jianguo and Lin, Weiyao and Liu, Alex X and Dustdar, Schahram}, 110 | booktitle={International Conference on Learning Representations}, 111 | year={2022} 112 | } 113 | -------------------------------------------------------------------------------- /Formers/Pyraformer/preprocess_elect.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | from datetime import datetime, timedelta 7 | import pandas as pd 8 | import math 9 | import numpy as np 10 | import random 11 | from tqdm import trange 12 | 13 | from io import BytesIO 14 | from urllib.request import urlopen 15 | from zipfile import ZipFile 16 | 17 | from math import sqrt 18 | from pandas import read_csv, DataFrame 19 | from scipy import stats 20 | 21 | import matplotlib 22 | matplotlib.use('Agg') 23 | import matplotlib.pyplot as plt 24 | 25 | 26 | def prep_data(data, covariates, data_start, train = True): 27 | """Divide the training sequence into windows""" 28 | time_len = data.shape[0] 29 | input_size = window_size-stride_size 30 | windows_per_series = np.full((num_series), (time_len-input_size) // stride_size) 31 | if train: windows_per_series -= (data_start+stride_size-1) // stride_size 32 | total_windows = np.sum(windows_per_series) 33 | x_input = np.zeros((total_windows, window_size, 1 + num_covariates + 1), dtype='float32') 34 | label = np.zeros((total_windows, window_size), dtype='float32') 35 | v_input = np.zeros((total_windows, 2), dtype='float32') 36 | count = 0 37 | if not train: 38 | covariates = covariates[-time_len:] 39 | for series in trange(num_series): 40 | cov_age = stats.zscore(np.arange(total_time-data_start[series])) # shape:(series_len,) 41 | if train: 42 | covariates[data_start[series]:time_len, 0] = cov_age[:time_len-data_start[series]] 43 | else: 44 | covariates[:, 0] = cov_age[-time_len:] 45 | for i in range(windows_per_series[series]): 46 | if train: 47 | window_start = stride_size*i+data_start[series] 48 | else: 49 | window_start = stride_size*i 50 | window_end = window_start+window_size 51 | ''' 52 | print("x: ", x_input[count, 1:, 0].shape) 53 | print("window start: ", window_start) 54 | print("window end: ", window_end) 55 | print("data: ", data.shape) 56 | print("d: ", data[window_start:window_end-1, series].shape) 57 | ''' 58 | x_input[count, 1:, 0] = data[window_start:window_end-1, series] 59 | x_input[count, :, 1:1+num_covariates] = covariates[window_start:window_end, :] 60 | x_input[count, :, -1] = series 61 | label[count, :] = data[window_start:window_end, series] 62 | nonzero_sum = (x_input[count, 1:input_size, 0]!=0).sum() 63 | if nonzero_sum == 0: 64 | v_input[count, 0] = 0 65 | else: 66 | v_input[count, 0] = np.true_divide(x_input[count, 1:input_size, 0].sum(),nonzero_sum)+1 67 | x_input[count, :, 0] = x_input[count, :, 0]/v_input[count, 0] 68 | if train: 69 | label[count, :] = label[count, :]/v_input[count, 0] 70 | count += 1 71 | prefix = os.path.join(save_path, 'train_' if train else 'test_') 72 | np.save(prefix+'data_'+save_name, x_input) 73 | np.save(prefix+'v_'+save_name, v_input) 74 | np.save(prefix+'label_'+save_name, label) 75 | 76 | def gen_covariates(times, num_covariates): 77 | """Get covariates""" 78 | covariates = np.zeros((times.shape[0], num_covariates)) 79 | for i, input_time in enumerate(times): 80 | covariates[i, 1] = input_time.weekday() 81 | covariates[i, 2] = input_time.hour 82 | covariates[i, 3] = input_time.month 83 | for i in range(1,num_covariates): 84 | covariates[:,i] = stats.zscore(covariates[:,i]) 85 | return covariates[:, :num_covariates] 86 | 87 | def visualize(data, week_start): 88 | x = np.arange(window_size) 89 | f = plt.figure() 90 | plt.plot(x, data[week_start:week_start+window_size], color='b') 91 | f.savefig("visual.png") 92 | plt.close() 93 | 94 | if __name__ == '__main__': 95 | 96 | global save_path 97 | csv_path = 'data/LD2011_2014.txt' 98 | save_name = 'elect' 99 | window_size = 192 100 | stride_size = 24 101 | num_covariates = 4 102 | train_start = '2011-01-01 00:00:00' 103 | train_end = '2014-08-31 23:00:00' 104 | test_start = '2014-08-25 00:00:00' #need additional 7 days as given info 105 | test_end = '2014-09-07 23:00:00' 106 | pred_days = 7 107 | given_days = 7 108 | 109 | save_path = os.path.join('data', save_name) 110 | 111 | data_frame = pd.read_csv(csv_path, sep=";", index_col=0, parse_dates=True, decimal=',') 112 | data_frame = data_frame.resample('1H',label = 'left',closed = 'right').sum()[train_start:test_end] 113 | data_frame.fillna(0, inplace=True) 114 | covariates = gen_covariates(data_frame[train_start:test_end].index, num_covariates) 115 | train_data = data_frame[train_start:train_end].values # shape: [seq_length, user_num] 116 | test_data = data_frame[test_start:test_end].values 117 | data_start = (train_data!=0).argmax(axis=0) #find first nonzero value in each time series 118 | total_time = data_frame.shape[0] #32304 119 | num_series = data_frame.shape[1] #370 120 | prep_data(train_data, covariates, data_start) 121 | prep_data(test_data, covariates, data_start, train=False) 122 | -------------------------------------------------------------------------------- /Formers/Pyraformer/preprocess_flow.py: -------------------------------------------------------------------------------- 1 | from numpy.lib.npyio import save 2 | import pandas as pd 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | import os 6 | from tqdm import trange 7 | import zipfile 8 | 9 | 10 | def load_data(filedir): 11 | data_frame = pd.read_csv(filedir, header=0, parse_dates=True) #names=['app_name', 'zone', 'time', 'value'] 12 | data_frame = data_frame.drop(data_frame.columns[0], axis=1) 13 | grouped_data = list(data_frame.groupby(["app_name", "zone"])) 14 | # covariates = gen_covariates(data_frame.index, 3) 15 | all_data = [] 16 | for i in range(len(grouped_data)): 17 | single_df = grouped_data[i][1].drop(labels=['app_name', 'zone'], axis=1).sort_values(by="time", ascending=True) 18 | times = pd.to_datetime(single_df.time) 19 | single_df['weekday'] = times.dt.dayofweek / 6 20 | single_df['hour'] = times.dt.hour / 23 21 | single_df['month'] = times.dt.month / 12 22 | temp_data = single_df.values[:, 1:] 23 | if (temp_data[:, 0] == 0).sum() / len(temp_data) > 0.2: 24 | continue 25 | 26 | all_data.append(temp_data) 27 | 28 | return all_data 29 | 30 | 31 | def visualize(data, index, save_dir): 32 | os.makedirs(save_dir, exist_ok=True) 33 | for i in range(index): 34 | x = np.arange(len(data[i])) 35 | f = plt.figure() 36 | plt.plot(x, data[i][:, 0]) 37 | f.savefig(os.path.join(save_dir, "visual_{}.png".format(i))) 38 | plt.close() 39 | 40 | 41 | def split_seq(sequences, seq_length, slide_step, predict_length, save_dir): 42 | """Divide the training sequence into windows""" 43 | train_data = [] 44 | test_data = [] 45 | for seq_id in trange(len(sequences)): 46 | split_start = 0 47 | single_seq = sequences[seq_id][:, 0] 48 | single_covariate = sequences[seq_id][:, 1:] 49 | windows = (len(single_seq)-seq_length+slide_step) // slide_step 50 | count = 0 51 | train_count = int(0.97 * windows) 52 | while len(single_seq[split_start:]) > (seq_length + predict_length): 53 | seq_data = single_seq[split_start:(split_start+seq_length+predict_length-1)] 54 | single_data = np.zeros((seq_length+predict_length-1, 5)) 55 | single_data[:, 0] = seq_data.copy() 56 | single_data[:, 1:4] = single_covariate[split_start:(split_start+seq_length+predict_length-1)] 57 | single_data[:, -1] = seq_id 58 | 59 | count += 1 60 | if count < train_count: 61 | train_data.append(single_data) 62 | else: 63 | test_data.append(single_data) 64 | split_start += slide_step 65 | 66 | os.makedirs(save_dir, exist_ok=True) 67 | 68 | train_data = np.array(train_data, dtype=np.float32) 69 | train_data, v = normalize(train_data, seq_length) 70 | save(train_data, v, save_dir + 'train') 71 | test_data = np.array(test_data, dtype=np.float32) 72 | test_data, v = normalize(test_data, seq_length) 73 | save(test_data, v, save_dir + 'test') 74 | 75 | 76 | def normalize(inputs, seq_length): 77 | base_seq = inputs[:, :(seq_length-1), 0] 78 | nonzeros = (base_seq > 0).sum(1) 79 | v = base_seq.sum(1) / nonzeros 80 | v[v == 0] = 1 81 | inputs[:, :, 0] = inputs[:, :, 0] / v[:, None] 82 | 83 | return inputs, v 84 | 85 | 86 | def save(data, v, save_dir): 87 | np.save(save_dir+'_data_flow.npy', data) 88 | np.save(save_dir+'_v_flow.npy', v) 89 | 90 | 91 | def dezip(filedir): 92 | zip_file = zipfile.ZipFile(filedir) 93 | zip_list = zip_file.namelist() 94 | 95 | parent_dir = filedir.split('/')[0] 96 | for f in zip_list: 97 | zip_file.extract(f, parent_dir) 98 | 99 | zip_file.close() 100 | 101 | 102 | if __name__ == '__main__': 103 | zip_dir = 'data/app_zone_rpc_hour_encrypted.zip' 104 | dezip(zip_dir) 105 | data_dir = 'data/app_zone_rpc_hour_encrypted.csv' 106 | data = load_data(data_dir) 107 | split_seq(data, 192, 24, 24, 'data/flow/') 108 | -------------------------------------------------------------------------------- /Formers/Pyraformer/preprocess_wind.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.core.defchararray import split 3 | import pandas as pd 4 | from datetime import datetime 5 | from scipy import stats 6 | import os 7 | 8 | 9 | def load_data(datadir): 10 | df = pd.read_csv(datadir) 11 | data = (df.values).transpose(1, 0) 12 | 13 | return data 14 | 15 | 16 | def get_covariates(data_len, start_day): 17 | """Get covariates""" 18 | start_timestamp = datetime.timestamp(datetime.strptime(start_day, '%Y-%m-%d %H:%M:%S')) 19 | timestamps = np.arange(data_len) * 3600 + start_timestamp 20 | timestamps = [datetime.fromtimestamp(i) for i in timestamps] 21 | 22 | weekdays = stats.zscore(np.array([i.weekday() for i in timestamps])) 23 | hours = stats.zscore(np.array([i.hour for i in timestamps])) 24 | months = stats.zscore(np.array([i.month for i in timestamps])) 25 | 26 | covariates = np.stack([weekdays, hours, months], axis=1) 27 | 28 | return covariates 29 | 30 | 31 | def split_seq(sequences, covariates, seq_length, slide_step, predict_length, save_dir): 32 | """Divide the training sequence into windows""" 33 | data_length = len(sequences[0]) 34 | windows = (data_length-seq_length+slide_step) // slide_step 35 | train_windows = int(0.97 * windows) 36 | test_windows = windows - train_windows 37 | train_data = np.zeros((train_windows*len(sequences), seq_length+predict_length-1, 5), dtype=np.float32) 38 | test_data = np.zeros((test_windows*len(sequences), seq_length+predict_length-1, 5), dtype=np.float32) 39 | 40 | count = 0 41 | split_start = 0 42 | seq_ids = np.arange(len(sequences))[:, None] 43 | end = split_start + seq_length + predict_length - 1 44 | while end <= data_length: 45 | if count < train_windows: 46 | train_data[count*len(sequences):(count+1)*len(sequences), :, 0] = sequences[:, split_start:end] 47 | train_data[count*len(sequences):(count+1)*len(sequences), :, 1:4] = covariates[split_start:end, :] 48 | train_data[count*len(sequences):(count+1)*len(sequences), :, -1] = seq_ids 49 | else: 50 | test_data[(count-train_windows)*len(sequences):(count-train_windows+1)*len(sequences), :, 0] = sequences[:, split_start:end] 51 | test_data[(count-train_windows)*len(sequences):(count-train_windows+1)*len(sequences), :, 1:4] = covariates[split_start:end, :] 52 | test_data[(count-train_windows)*len(sequences):(count-train_windows+1)*len(sequences), :, -1] = seq_ids 53 | 54 | count += 1 55 | split_start += slide_step 56 | end = split_start + seq_length + predict_length - 1 57 | 58 | os.makedirs(save_dir, exist_ok=True) 59 | 60 | train_data, v = normalize(train_data, seq_length) 61 | save(train_data, v, save_dir + 'train') 62 | test_data, v = normalize(test_data, seq_length) 63 | save(test_data, v, save_dir + 'test') 64 | 65 | 66 | def normalize(inputs, seq_length): 67 | base_seq = inputs[:, :seq_length, 0] 68 | nonzeros = (base_seq > 0).sum(1) 69 | inputs = inputs[nonzeros > 0] 70 | 71 | base_seq = inputs[:, :seq_length, 0] 72 | nonzeros = nonzeros[nonzeros > 0] 73 | v = base_seq.sum(1) / nonzeros 74 | v[v == 0] = 1 75 | inputs[:, :, 0] = inputs[:, :, 0] / v[:, None] 76 | 77 | return inputs, v 78 | 79 | 80 | def save(data, v, save_dir): 81 | np.save(save_dir+'_data_wind.npy', data) 82 | np.save(save_dir+'_v_wind.npy', v) 83 | 84 | 85 | if __name__ == '__main__': 86 | datadir = 'data/EMHIRESPV_TSh_CF_Country_19862015.csv' 87 | all_data = load_data(datadir) 88 | covariates = get_covariates(len(all_data[0]), '1986-01-01 00:00:00') 89 | split_seq(all_data, covariates, 192, 24, 24, 'data/wind/') 90 | -------------------------------------------------------------------------------- /Formers/Pyraformer/pyraformer/Modules.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ScaledDotProductAttention(nn.Module): 7 | """ Scaled Dot-Product Attention """ 8 | 9 | def __init__(self, temperature, attn_dropout=0.2): 10 | super().__init__() 11 | 12 | self.temperature = temperature 13 | self.dropout = nn.Dropout(attn_dropout) 14 | 15 | def forward(self, q, k, v, mask=None): 16 | attn = torch.matmul(q / self.temperature, k.transpose(2, 3)) 17 | 18 | if mask is not None: 19 | attn = attn.masked_fill(mask, -1e9) 20 | 21 | attn = self.dropout(F.softmax(attn, dim=-1)) 22 | output = torch.matmul(attn, v) 23 | 24 | return output, attn 25 | 26 | -------------------------------------------------------------------------------- /Formers/Pyraformer/pyraformer/PAM_TVM.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | import math 4 | from .hierarchical_mm_tvm import graph_mm as graph_mm_tvm 5 | 6 | 7 | class PyramidalAttention(nn.Module): 8 | def __init__(self, n_head, d_model, d_k, d_v, dropout, normalize_before, q_k_mask, k_q_mask): 9 | super(PyramidalAttention, self).__init__() 10 | self.normalize_before = normalize_before 11 | self.n_head = n_head 12 | self.d_k = d_k 13 | 14 | self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False) 15 | self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False) 16 | self.w_vs = nn.Linear(d_model, n_head * d_k, bias=False) 17 | nn.init.xavier_uniform_(self.w_qs.weight) 18 | nn.init.xavier_uniform_(self.w_ks.weight) 19 | nn.init.xavier_uniform_(self.w_vs.weight) 20 | 21 | self.fc = nn.Linear(d_k * n_head, d_model) 22 | nn.init.xavier_uniform_(self.fc.weight) 23 | 24 | self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) 25 | self.dropout_attn = nn.Dropout(dropout) 26 | self.dropout_fc = nn.Dropout(dropout) 27 | self.q_k_mask = q_k_mask 28 | self.k_q_mask = k_q_mask 29 | 30 | def forward(self, hidden_states): 31 | residual = hidden_states 32 | 33 | hidden_states = hidden_states 34 | bsz, seq_len, _ = hidden_states.size() 35 | 36 | q = hidden_states 37 | if self.normalize_before: 38 | q = self.layer_norm(q) 39 | 40 | q = self.w_qs(q) 41 | k = self.w_ks(hidden_states) 42 | v = self.w_vs(hidden_states) 43 | q /= math.sqrt(self.d_k) 44 | 45 | q = q.view(bsz, seq_len, self.n_head, self.d_k) 46 | k = k.view(bsz, seq_len, self.n_head, self.d_k) 47 | q = q.float().contiguous() 48 | k = k.float().contiguous() 49 | # attn_weights.size(): (batch_size, L, num_heads, 11) 50 | attn_weights = graph_mm_tvm(q, k, self.q_k_mask, self.k_q_mask, False, 0) 51 | attn_weights = self.dropout_attn(F.softmax(attn_weights, dim=-1)) 52 | 53 | v = v.view(bsz, seq_len, self.n_head, self.d_k) 54 | v = v.float().contiguous() 55 | # is_t1_diagonaled=True 56 | attn = graph_mm_tvm(attn_weights, v, self.q_k_mask, self.k_q_mask, True, 0) 57 | attn = attn.reshape(bsz, seq_len, self.n_head * self.d_k).contiguous() 58 | context = self.dropout_fc(self.fc(attn)) 59 | context += residual 60 | 61 | if not self.normalize_before: 62 | context = self.layer_norm(context) 63 | 64 | return context 65 | 66 | -------------------------------------------------------------------------------- /Formers/Pyraformer/pyraformer/Pyraformer_LR.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .Layers import EncoderLayer, Decoder, Predictor 4 | from .Layers import Bottleneck_Construct, Conv_Construct, MaxPooling_Construct, AvgPooling_Construct 5 | from .Layers import get_mask, get_subsequent_mask, refer_points, get_k_q, get_q_k 6 | from .embed import DataEmbedding, CustomEmbedding 7 | 8 | 9 | class Encoder(nn.Module): 10 | """ A encoder model with self attention mechanism. """ 11 | 12 | def __init__(self, opt): 13 | super().__init__() 14 | 15 | self.d_model = opt.d_model 16 | self.model_type = opt.model 17 | self.window_size = opt.window_size 18 | self.truncate = opt.truncate 19 | if opt.decoder == 'attention': 20 | self.mask, self.all_size = get_mask(opt.input_size, opt.window_size, opt.inner_size, opt.device) 21 | else: 22 | self.mask, self.all_size = get_mask(opt.input_size+1, opt.window_size, opt.inner_size, opt.device) 23 | self.decoder_type = opt.decoder 24 | if opt.decoder == 'FC': 25 | self.indexes = refer_points(self.all_size, opt.window_size, opt.device) 26 | 27 | if opt.use_tvm: 28 | assert len(set(self.window_size)) == 1, "Only constant window size is supported." 29 | padding = 1 if opt.decoder == 'FC' else 0 30 | q_k_mask = get_q_k(opt.input_size + padding, opt.inner_size, opt.window_size[0], opt.device) 31 | k_q_mask = get_k_q(q_k_mask) 32 | self.layers = nn.ModuleList([ 33 | EncoderLayer(opt.d_model, opt.d_inner_hid, opt.n_head, opt.d_k, opt.d_v, dropout=opt.dropout, \ 34 | normalize_before=False, use_tvm=True, q_k_mask=q_k_mask, k_q_mask=k_q_mask) for i in range(opt.n_layer) 35 | ]) 36 | else: 37 | self.layers = nn.ModuleList([ 38 | EncoderLayer(opt.d_model, opt.d_inner_hid, opt.n_head, opt.d_k, opt.d_v, dropout=opt.dropout, \ 39 | normalize_before=False) for i in range(opt.n_layer) 40 | ]) 41 | 42 | if opt.embed_type == 'CustomEmbedding': 43 | self.enc_embedding = DataEmbedding(opt.enc_in, opt.d_model, opt.dropout) 44 | # self.enc_embedding = CustomEmbedding(opt.enc_in, opt.d_model, opt.covariate_size, opt.seq_num, opt.dropout) 45 | else: 46 | self.enc_embedding = DataEmbedding(opt.enc_in, opt.d_model, opt.dropout) 47 | 48 | self.conv_layers = eval(opt.CSCM)(opt.d_model, opt.window_size, opt.d_bottleneck) 49 | 50 | def forward(self, x_enc, x_mark_enc): 51 | 52 | seq_enc = self.enc_embedding(x_enc, x_mark_enc) 53 | 54 | mask = self.mask.repeat(len(seq_enc), 1, 1).to(x_enc.device) 55 | seq_enc = self.conv_layers(seq_enc) 56 | 57 | for i in range(len(self.layers)): 58 | seq_enc, _ = self.layers[i](seq_enc, mask) 59 | 60 | if self.decoder_type == 'FC': 61 | indexes = self.indexes.repeat(seq_enc.size(0), 1, 1, seq_enc.size(2)).to(seq_enc.device) 62 | indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2)) 63 | all_enc = torch.gather(seq_enc, 1, indexes) 64 | seq_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1) 65 | elif self.decoder_type == 'attention' and self.truncate: 66 | seq_enc = seq_enc[:, :self.all_size[0]] 67 | 68 | return seq_enc 69 | 70 | 71 | class Model(nn.Module): 72 | """ A sequence to sequence model with attention mechanism. """ 73 | 74 | def __init__(self, opt): 75 | super().__init__() 76 | 77 | self.predict_step = opt.predict_step 78 | self.d_model = opt.d_model 79 | self.input_size = opt.input_size 80 | self.decoder_type = opt.decoder 81 | self.channels = opt.enc_in 82 | 83 | self.encoder = Encoder(opt) 84 | if opt.decoder == 'attention': 85 | mask = get_subsequent_mask(opt.input_size, opt.window_size, opt.predict_step, opt.truncate) 86 | self.decoder = Decoder(opt, mask) 87 | self.predictor = Predictor(opt.d_model, opt.enc_in) 88 | elif opt.decoder == 'FC': 89 | self.predictor = Predictor(4 * opt.d_model, opt.predict_step * opt.enc_in) 90 | 91 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, pretrain): 92 | """ 93 | Return the hidden representations and predictions. 94 | For a sequence (l_1, l_2, ..., l_N), we predict (l_2, ..., l_N, l_{N+1}). 95 | Input: event_type: batch*seq_len; 96 | event_time: batch*seq_len. 97 | Output: enc_output: batch*seq_len*model_dim; 98 | type_prediction: batch*seq_len*num_classes (not normalized); 99 | time_prediction: batch*seq_len. 100 | """ 101 | 102 | if self.decoder_type == 'attention': 103 | enc_output = self.encoder(x_enc, x_mark_enc) 104 | dec_enc = self.decoder(x_dec, x_mark_dec, enc_output) 105 | 106 | if pretrain: 107 | dec_enc = torch.cat([enc_output[:, :self.input_size], dec_enc], dim=1) 108 | pred = self.predictor(dec_enc) 109 | else: 110 | pred = self.predictor(dec_enc) 111 | elif self.decoder_type == 'FC': 112 | enc_output = self.encoder(x_enc, x_mark_enc)[:, -1, :] 113 | pred = self.predictor(enc_output).view(enc_output.size(0), self.predict_step, -1) 114 | 115 | return pred 116 | 117 | -------------------------------------------------------------------------------- /Formers/Pyraformer/pyraformer/Pyraformer_SS.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | from .Layers import EncoderLayer, Predictor 4 | from .Layers import Bottleneck_Construct 5 | from .Layers import get_mask, refer_points, get_k_q, get_q_k 6 | from .embed import SingleStepEmbedding 7 | 8 | 9 | class Encoder(nn.Module): 10 | """ A encoder model with self attention mechanism. """ 11 | 12 | def __init__(self, opt): 13 | super().__init__() 14 | 15 | self.d_model = opt.d_model 16 | self.window_size = opt.window_size 17 | self.num_heads = opt.n_head 18 | self.mask, self.all_size = get_mask(opt.input_size, opt.window_size, opt.inner_size, opt.device) 19 | self.indexes = refer_points(self.all_size, opt.window_size, opt.device) 20 | 21 | if opt.use_tvm: 22 | assert len(set(self.window_size)) == 1, "Only constant window size is supported." 23 | q_k_mask = get_q_k(opt.input_size, opt.inner_size, opt.window_size[0], opt.device) 24 | k_q_mask = get_k_q(q_k_mask) 25 | self.layers = nn.ModuleList([ 26 | EncoderLayer(opt.d_model, opt.d_inner_hid, opt.n_head, opt.d_k, opt.d_v, dropout=opt.dropout, \ 27 | normalize_before=False, use_tvm=True, q_k_mask=q_k_mask, k_q_mask=k_q_mask) for i in range(opt.n_layer) 28 | ]) 29 | else: 30 | self.layers = nn.ModuleList([ 31 | EncoderLayer(opt.d_model, opt.d_inner_hid, opt.n_head, opt.d_k, opt.d_v, dropout=opt.dropout, \ 32 | normalize_before=False) for i in range(opt.n_layer) 33 | ]) 34 | 35 | self.embedding = SingleStepEmbedding(opt.covariate_size, opt.num_seq, opt.d_model, opt.input_size, opt.device) 36 | 37 | self.conv_layers = Bottleneck_Construct(opt.d_model, opt.window_size, opt.d_k) 38 | 39 | def forward(self, sequence): 40 | 41 | seq_enc = self.embedding(sequence) 42 | mask = self.mask.repeat(len(seq_enc), self.num_heads, 1, 1).to(sequence.device) 43 | 44 | seq_enc = self.conv_layers(seq_enc) 45 | 46 | for i in range(len(self.layers)): 47 | seq_enc, _ = self.layers[i](seq_enc, mask) 48 | 49 | indexes = self.indexes.repeat(seq_enc.size(0), 1, 1, seq_enc.size(2)).to(seq_enc.device) 50 | indexes = indexes.view(seq_enc.size(0), -1, seq_enc.size(2)) 51 | all_enc = torch.gather(seq_enc, 1, indexes) 52 | all_enc = all_enc.view(seq_enc.size(0), self.all_size[0], -1) 53 | 54 | return all_enc 55 | 56 | 57 | class Model(nn.Module): 58 | 59 | def __init__(self, opt): 60 | super().__init__() 61 | 62 | self.encoder = Encoder(opt) 63 | 64 | # convert hidden vectors into two scalar 65 | self.mean_hidden = Predictor(4 * opt.d_model, 1) 66 | self.var_hidden = Predictor(4 * opt.d_model, 1) 67 | 68 | self.softplus = nn.Softplus() 69 | 70 | def forward(self, data): 71 | enc_output = self.encoder(data) 72 | 73 | mean_pre = self.mean_hidden(enc_output) 74 | var_hid = self.var_hidden(enc_output) 75 | var_pre = self.softplus(var_hid) 76 | mean_pre = self.softplus(mean_pre) 77 | 78 | return mean_pre.squeeze(2), var_pre.squeeze(2) 79 | 80 | def test(self, data, v): 81 | mu, sigma = self(data) 82 | 83 | sample_mu = mu[:, -1] * v 84 | sample_sigma = sigma[:, -1] * v 85 | return sample_mu, sample_sigma 86 | 87 | -------------------------------------------------------------------------------- /Formers/Pyraformer/pyraformer/SubLayers.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch.nn.functional as F 3 | 4 | from .Modules import ScaledDotProductAttention 5 | 6 | 7 | class MultiHeadAttention(nn.Module): 8 | """ Multi-Head Attention module """ 9 | 10 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1, normalize_before=True): 11 | super().__init__() 12 | 13 | self.normalize_before = normalize_before 14 | self.n_head = n_head 15 | self.d_k = d_k 16 | self.d_v = d_v 17 | 18 | self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False) 19 | self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False) 20 | self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False) 21 | nn.init.xavier_uniform_(self.w_qs.weight) 22 | nn.init.xavier_uniform_(self.w_ks.weight) 23 | nn.init.xavier_uniform_(self.w_vs.weight) 24 | 25 | self.fc = nn.Linear(d_v * n_head, d_model) 26 | nn.init.xavier_uniform_(self.fc.weight) 27 | 28 | self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5, attn_dropout=dropout) 29 | 30 | self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) 31 | self.dropout = nn.Dropout(dropout) 32 | 33 | def forward(self, q, k, v, mask=None): 34 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 35 | sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1) 36 | 37 | residual = q 38 | if self.normalize_before: 39 | q = self.layer_norm(q) 40 | 41 | # Pass through the pre-attention projection: b x lq x (n*dv) 42 | # Separate different heads: b x lq x n x dv 43 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) 44 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 45 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 46 | 47 | # Transpose for attention dot product: b x n x lq x dv 48 | q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2) 49 | 50 | if mask is not None: 51 | if len(mask.size()) == 3: 52 | mask = mask.unsqueeze(1) # For head axis broadcasting. 53 | 54 | output, attn = self.attention(q, k, v, mask=mask) 55 | 56 | # Transpose to move the head dimension back: b x lq x n x dv 57 | # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv) 58 | output = output.transpose(1, 2).contiguous().view(sz_b, len_q, -1) 59 | output = self.dropout(self.fc(output)) 60 | output += residual 61 | 62 | if not self.normalize_before: 63 | output = self.layer_norm(output) 64 | return output, attn 65 | 66 | 67 | class PositionwiseFeedForward(nn.Module): 68 | """ Two-layer position-wise feed-forward neural network. """ 69 | 70 | def __init__(self, d_in, d_hid, dropout=0.1, normalize_before=True): 71 | super().__init__() 72 | 73 | self.normalize_before = normalize_before 74 | 75 | self.w_1 = nn.Linear(d_in, d_hid) 76 | self.w_2 = nn.Linear(d_hid, d_in) 77 | 78 | self.layer_norm = nn.LayerNorm(d_in, eps=1e-6) 79 | #self.layer_norm = GraphNorm(d_in) 80 | self.dropout = nn.Dropout(dropout) 81 | 82 | def forward(self, x): 83 | residual = x 84 | if self.normalize_before: 85 | x = self.layer_norm(x) 86 | 87 | x = F.gelu(self.w_1(x)) 88 | x = self.dropout(x) 89 | x = self.w_2(x) 90 | x = self.dropout(x) 91 | x = x + residual 92 | 93 | if not self.normalize_before: 94 | x = self.layer_norm(x) 95 | return x 96 | 97 | -------------------------------------------------------------------------------- /Formers/Pyraformer/pyraformer/lib/lib_hierarchical_mm_float32_cuda.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rationalspark/JTFT/ead749184725744c7bd268babaaccbfee31788af/Formers/Pyraformer/pyraformer/lib/lib_hierarchical_mm_float32_cuda.so -------------------------------------------------------------------------------- /Formers/Pyraformer/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.14.1 2 | AE==0.0.0 3 | axial-positional-embedding==0.2.1 4 | bcrypt==3.2.0 5 | cachetools==4.2.4 6 | certifi==2021.5.30 7 | cffi==1.14.6 8 | charset-normalizer==2.0.6 9 | # Editable install with no version control (compressai==1.1.9.dev0) 10 | -e /home/szliu/compressai 11 | crowdposetools==2.0 12 | cryptography==3.4.8 13 | cycler==0.10.0 14 | Cython==3.0.0a9 15 | einops==0.3.2 16 | fbm==0.3.0 17 | google-auth==1.35.0 18 | google-auth-oauthlib==0.4.6 19 | grpcio==1.41.0 20 | idna==3.2 21 | imageio==2.9.0 22 | importlib-metadata==4.8.1 23 | joblib==1.1.0 24 | json-tricks==3.15.5 25 | kiwisolver==1.3.2 26 | local-attention==1.4.3 27 | Markdown==3.3.4 28 | matplotlib==3.4.3 29 | mkl-fft==1.3.0 30 | mkl-random==1.2.2 31 | mkl-service==2.4.0 32 | munkres==1.1.4 33 | numpy==1.21.2 34 | oauthlib==3.1.1 35 | olefile @ file:///home/conda/feedstock_root/build_artifacts/olefile_1602866521163/work 36 | opencv-python==4.5.3.56 37 | packaging==21.0 38 | pandas==1.3.3 39 | paramiko==2.7.2 40 | Pillow @ file:///tmp/build/80754af9/pillow_1625670624344/work 41 | product-key-memory==0.1.10 42 | protobuf==3.18.0 43 | pyasn1==0.4.8 44 | pyasn1-modules==0.2.8 45 | pycocotools==2.0 46 | pycparser==2.20 47 | PyNaCl==1.4.0 48 | pynvml==11.4.1 49 | pyparsing==3.0.0rc1 50 | python-dateutil==2.8.2 51 | pytorch-msssim==0.2.1 52 | pytz==2021.1 53 | PyYAML==5.4.1 54 | reformer-pytorch==1.4.3 55 | requests==2.26.0 56 | requests-oauthlib==1.3.0 57 | rsa==4.7.2 58 | scikit-learn==1.0.2 59 | scipy==1.5.4 60 | setuptools-scm==6.3.2 61 | six @ file:///home/conda/feedstock_root/build_artifacts/six_1620240208055/work 62 | sklearn==0.0 63 | tensorboard==2.6.0 64 | tensorboard-data-server==0.6.1 65 | tensorboard-plugin-wit==1.8.0 66 | tensorboardX==2.4 67 | threadpoolctl==3.0.0 68 | timm==0.4.12 69 | tomli==1.2.1 70 | torch==1.8.0 71 | torchvision==0.9.0 72 | tqdm==4.62.2 73 | typing-extensions==3.10.0.2 74 | urllib3==1.26.7 75 | Werkzeug==2.0.2 76 | yacs==0.1.8 77 | zipp==3.6.0 78 | -------------------------------------------------------------------------------- /Formers/Pyraformer/scripts/LookBackWindow.sh: -------------------------------------------------------------------------------- 1 | # cd Pyraformer 2 | if [ ! -d "../logs" ]; then 3 | mkdir ../logs 4 | fi 5 | 6 | if [ ! -d "../logs/LookBackWindow" ]; then 7 | mkdir ../logs/LookBackWindow 8 | fi 9 | 10 | for seqlen in 24 48 72 96 120 144 168 336 504 672 720 11 | do 12 | for pred_len in 720 13 | do 14 | python long_range_main.py -window_size [2,2,2] -data_path electricity.csv -data electricity -input_size $seqlen -predict_step $pred_len -n_head 6 -lr 0.00001 -d_model 256 >../logs/LookBackWindow/Pyraformer_electricity_$seqlen'_'$pred_len.log 15 | python long_range_main.py -window_size [2,2,2] -data_path exchange_rate.csv -data exchange -input_size $seqlen -predict_step $pred_len -n_head 6 -lr 0.00001 -d_model 256 >../logs/LookBackWindow/Pyraformer_exchange_rate_$seqlen'_'$pred_len.log 16 | python long_range_main.py -window_size [2,2,2] -data_path traffic.csv -data traffic -input_size $seqlen -predict_step $pred_len -n_head 6 -lr 0.00001 -d_model 256 >../logs/LookBackWindow/Pyraformer_traffic_$seqlen'_'$pred_len.log 17 | python long_range_main.py -window_size [2,2,2] -data_path weather.csv -data weather -input_size $seqlen -predict_step $pred_len -n_head 6 -lr 0.00001 -d_model 256 >../logs/LookBackWindow/Pyraformer_weather_$seqlen'_'$pred_len.log 18 | python long_range_main.py -window_size [2,2,2] -data ETTh1 -input_size $seqlen -predict_step $pred_len -n_head 6 >../logs/LookBackWindow/Pyraformer_ETTh1_$seqlen'_'$pred_len.log 19 | python long_range_main.py -window_size [2,2,2] -data ETTh2 -data_path ETTh2.csv -input_size $seqlen -predict_step $pred_len -n_head 6 >../logs/LookBackWindow/Pyraformer_ETTh2_$seqlen'_'$pred_len.log 20 | done 21 | done 22 | 23 | for seqlen in 26 52 78 104 130 156 208 24 | do 25 | for pred_len in 24 60 26 | do 27 | python long_range_main.py -window_size [2,2,2] -data_path national_illness.csv -data ili -input_size $seqlen -predict_step $pred_len -n_head 6 -lr 0.00001 -d_model 256 >../logs/LookBackWindow/Pyraformer_ili_$seqlen'_'$pred_len.log 28 | done 29 | done 30 | 31 | for seqlen in 24 36 48 60 72 144 288 32 | do 33 | for pred_len in 24 576 34 | do 35 | python long_range_main.py -window_size [2,2,2] -data ETTm1 -data_path ETTm1.csv -input_size $seqlen -predict_step $pred_len -batch_size 16 -dropout 0.2 -n_head 6 -d_model 256 -d_bottleneck 64 -d_k 64 -d_v 64 >../logs/LookBackWindow/Pyraformer_ETTm1_$seqlen'_'$pred_len.log 36 | python long_range_main.py -window_size [2,2,2] -data ETTm2 -data_path ETTm2.csv -input_size $seqlen -predict_step $pred_len -batch_size 16 -dropout 0.2 -n_head 6 -d_model 256 -d_bottleneck 64 -d_k 64 -d_v 64 >../logs/LookBackWindow/Pyraformer_ETTm2_$seqlen'_'$pred_len.log 37 | done 38 | done 39 | # cd .. 40 | -------------------------------------------------------------------------------- /Formers/Pyraformer/simulate_sin.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | from fbm import FBM 4 | 5 | 6 | def generate_sin(x, T, A): 7 | """Generate a mixed sinusoidal sequence""" 8 | y = np.zeros(len(x)) 9 | for i in range(len(T)): 10 | y += A[i] * np.sin(2 * np.pi / T[i] * x) 11 | 12 | return y 13 | 14 | 15 | def gen_covariates(x, index): 16 | """Generate covariates""" 17 | covariates = np.zeros((x.shape[0], 4)) 18 | covariates[:, 0] = (x // 24) % 7 19 | covariates[:, 1] = x % 24 20 | covariates[:, 2] = (x // (24 * 30)) % 12 21 | covariates[:, 0] = covariates[:, 0] / 6 22 | covariates[:, 1] = covariates[:, 1] / 23 23 | covariates[:, 2] = covariates[:, 2] / 11 24 | 25 | covariates[:, -1] = np.zeros(x.shape[0]) + index 26 | return covariates 27 | 28 | 29 | def fractional_brownian_noise(length, hurst, step): 30 | """Genereate fractional brownian noise""" 31 | f = FBM(length, hurst, step) 32 | noise = f.fbm() 33 | return noise 34 | 35 | 36 | def synthesis_data(): 37 | """synthesis a mixed sinusoidal dataset""" 38 | T = [24, 168, 720] 39 | seq_num = 60 40 | seq_len = T[-1] * 20 41 | data = [] 42 | covariates = [] 43 | for i in range(seq_num): 44 | start = int(np.random.uniform(0, T[-1])) 45 | x = start + np.arange(seq_len) 46 | A = np.random.uniform(5, 10, 3) 47 | y = generate_sin(x, T, A) 48 | data.append(y) 49 | covariates.append(gen_covariates(x, i)) 50 | # plt.plot(x[:T[-1]], y[:T[-1]]) 51 | # plt.show() 52 | 53 | data = np.array(data) 54 | mean, cov = polynomial_decay_cov(seq_len) 55 | noise = multivariate_normal(mean, cov, seq_num) 56 | data = data + noise 57 | covariates = np.array(covariates) 58 | data = np.concatenate([data[:, :, None], covariates], axis=2) 59 | np.save('data/synthetic.npy', data) 60 | 61 | 62 | def covariance(data): 63 | """compute the covariance of the data""" 64 | data_mean = data.mean(0) 65 | data = data - data_mean 66 | length = data.shape[1] 67 | data_covariance = np.zeros((length, length)) 68 | 69 | for i in range(length): 70 | for j in range(length): 71 | data_covariance[i, j] = (data[:, i] * data[:, j]).mean() 72 | 73 | return data_covariance 74 | 75 | 76 | def test_fbm(): 77 | """Plot the covariance of the generated fractional brownian noise""" 78 | f = FBM(300, 0.3, 1) 79 | fbm_data = [] 80 | for i in range(100): 81 | sample = f.fbm() 82 | fbm_data.append(sample[1:]) 83 | fbm_data = np.array(fbm_data) 84 | cov = covariance(fbm_data) 85 | plt.imshow(cov) 86 | plt.savefig('fbm_cov.jpg') 87 | 88 | 89 | def polynomial_decay_cov(length): 90 | """Define the function of covariance decay with distance""" 91 | mean = np.zeros(length) 92 | 93 | x_axis = np.arange(length) 94 | distance = x_axis[:, None] - x_axis[None, :] 95 | distance = np.abs(distance) 96 | cov = 1 / (distance + 1) 97 | return mean, cov 98 | 99 | 100 | def multivariate_normal(mean, cov, seq_num): 101 | """Generate multivariate normal distribution""" 102 | noise = np.random.multivariate_normal(mean, cov, (seq_num,), 'raise') 103 | return noise 104 | 105 | 106 | if __name__ == '__main__': 107 | synthesis_data() 108 | 109 | -------------------------------------------------------------------------------- /Formers/Pyraformer/utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | class TimeFeature: 9 | def __init__(self): 10 | pass 11 | 12 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 13 | pass 14 | 15 | def __repr__(self): 16 | return self.__class__.__name__ + "()" 17 | 18 | class SecondOfMinute(TimeFeature): 19 | """Minute of hour encoded as value between [-0.5, 0.5]""" 20 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 21 | return index.second / 59.0 - 0.5 22 | 23 | class MinuteOfHour(TimeFeature): 24 | """Minute of hour encoded as value between [-0.5, 0.5]""" 25 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 26 | return index.minute / 59.0 - 0.5 27 | 28 | class HourOfDay(TimeFeature): 29 | """Hour of day encoded as value between [-0.5, 0.5]""" 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.hour / 23.0 - 0.5 32 | 33 | class DayOfWeek(TimeFeature): 34 | """Hour of day encoded as value between [-0.5, 0.5]""" 35 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 36 | return index.dayofweek / 6.0 - 0.5 37 | 38 | class DayOfMonth(TimeFeature): 39 | """Day of month encoded as value between [-0.5, 0.5]""" 40 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 41 | return (index.day - 1) / 30.0 - 0.5 42 | 43 | class DayOfYear(TimeFeature): 44 | """Day of year encoded as value between [-0.5, 0.5]""" 45 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 46 | return (index.dayofyear - 1) / 365.0 - 0.5 47 | 48 | class MonthOfYear(TimeFeature): 49 | """Month of year encoded as value between [-0.5, 0.5]""" 50 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 51 | return (index.month - 1) / 11.0 - 0.5 52 | 53 | class WeekOfYear(TimeFeature): 54 | """Week of year encoded as value between [-0.5, 0.5]""" 55 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 56 | return (index.isocalendar().week - 1) / 52.0 - 0.5 57 | 58 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 59 | """ 60 | Returns a list of time features that will be appropriate for the given frequency string. 61 | Parameters 62 | ---------- 63 | freq_str 64 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 65 | """ 66 | 67 | features_by_offsets = { 68 | offsets.YearEnd: [], 69 | offsets.QuarterEnd: [MonthOfYear], 70 | offsets.MonthEnd: [MonthOfYear], 71 | offsets.Week: [DayOfMonth, WeekOfYear], 72 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 73 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 74 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 75 | offsets.Minute: [ 76 | MinuteOfHour, 77 | HourOfDay, 78 | DayOfWeek, 79 | DayOfMonth, 80 | DayOfYear, 81 | ], 82 | offsets.Second: [ 83 | SecondOfMinute, 84 | MinuteOfHour, 85 | HourOfDay, 86 | DayOfWeek, 87 | DayOfMonth, 88 | DayOfYear, 89 | ], 90 | } 91 | 92 | offset = to_offset(freq_str) 93 | 94 | for offset_type, feature_classes in features_by_offsets.items(): 95 | if isinstance(offset, offset_type): 96 | return [cls() for cls in feature_classes] 97 | 98 | supported_freq_msg = f""" 99 | Unsupported frequency {freq_str} 100 | The following frequencies are supported: 101 | Y - yearly 102 | alias: A 103 | M - monthly 104 | W - weekly 105 | D - daily 106 | B - business days 107 | H - hourly 108 | T - minutely 109 | alias: min 110 | S - secondly 111 | """ 112 | raise RuntimeError(supported_freq_msg) 113 | 114 | def time_features(dates, timeenc=1, freq='h'): 115 | if timeenc==0: 116 | dates['month'] = dates.date.apply(lambda row:row.month,1) 117 | dates['day'] = dates.date.apply(lambda row:row.day,1) 118 | dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1) 119 | dates['hour'] = dates.date.apply(lambda row:row.hour,1) 120 | dates['minute'] = dates.date.apply(lambda row:row.minute,1) 121 | dates['minute'] = dates.minute.map(lambda x:x//15) 122 | freq_map = { 123 | 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'], 124 | 'b':['month','day','weekday'],'h':['month','day','weekday','hour'], 125 | 't':['month','day','weekday','hour','minute'], 126 | } 127 | return dates[freq_map[freq.lower()]].values 128 | if timeenc==1: 129 | dates = pd.to_datetime(dates.date.values) 130 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0) 131 | 132 | -------------------------------------------------------------------------------- /Formers/Pyraformer/utils/tools.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules import loss 2 | import torch 3 | import numpy as np 4 | 5 | 6 | def MAE(pred, true): 7 | return np.mean(np.abs(pred-true)) 8 | 9 | def MSE(pred, true): 10 | return np.mean((pred-true)**2) 11 | 12 | def RMSE(pred, true): 13 | return np.sqrt(MSE(pred, true)) 14 | 15 | def MAPE(pred, true): 16 | return np.mean(np.abs((pred - true) / true)) 17 | 18 | def MSPE(pred, true): 19 | return np.mean(np.square((pred - true) / true)) 20 | 21 | def metric(pred, true): 22 | mae = MAE(pred, true) 23 | mse = MSE(pred, true) 24 | rmse = RMSE(pred, true) 25 | mape = MAPE(pred, true) 26 | mspe = MSPE(pred, true) 27 | 28 | return mae,mse,rmse,mape,mspe 29 | 30 | class StandardScaler(): 31 | def __init__(self): 32 | self.mean = 0. 33 | self.std = 1. 34 | 35 | def fit(self, data): 36 | self.mean = data.mean(0) 37 | self.std = data.std(0) 38 | 39 | def transform(self, data): 40 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean 41 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std 42 | return (data - mean) / std 43 | 44 | def inverse_transform(self, data): 45 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean 46 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std 47 | return (data * std) + mean 48 | 49 | class TopkMSELoss(torch.nn.Module): 50 | def __init__(self, topk) -> None: 51 | super().__init__() 52 | self.topk = topk 53 | self.criterion = torch.nn.MSELoss(reduction='none') 54 | 55 | def forward(self, output, label): 56 | losses = self.criterion(output, label).mean(2).mean(1) 57 | losses = torch.topk(losses, self.topk)[0] 58 | 59 | return losses 60 | 61 | class SingleStepLoss(torch.nn.Module): 62 | """ Compute top-k log-likelihood and mse. """ 63 | 64 | def __init__(self, ignore_zero): 65 | super().__init__() 66 | self.ignore_zero = ignore_zero 67 | 68 | def forward(self, mu, sigma, labels, topk=0): 69 | if self.ignore_zero: 70 | indexes = (labels != 0) 71 | else: 72 | indexes = (labels >= 0) 73 | 74 | distribution = torch.distributions.normal.Normal(mu[indexes], sigma[indexes]) 75 | likelihood = -distribution.log_prob(labels[indexes]) 76 | 77 | diff = labels[indexes] - mu[indexes] 78 | se = diff * diff 79 | 80 | if 0 < topk < len(likelihood): 81 | likelihood = torch.topk(likelihood, topk)[0] 82 | se = torch.topk(se, topk)[0] 83 | 84 | return likelihood, se 85 | 86 | def AE_loss(mu, labels, ignore_zero): 87 | if ignore_zero: 88 | indexes = (labels != 0) 89 | else: 90 | indexes = (labels >= 0) 91 | 92 | ae = torch.abs(labels[indexes] - mu[indexes]) 93 | return ae 94 | -------------------------------------------------------------------------------- /Formers/Transformer.sh: -------------------------------------------------------------------------------- 1 | # ALL scripts in this file come from Autoformer 2 | if [ ! -d "./logs" ]; then 3 | mkdir ./logs 4 | fi 5 | 6 | if [ ! -d "./logs/LongForecasting" ]; then 7 | mkdir ./logs/LongForecasting 8 | fi 9 | 10 | random_seed=2021 11 | model_name=Transformer 12 | 13 | for pred_len in 96 192 336 720 14 | do 15 | python -u run_longExp.py \ 16 | --random_seed $random_seed \ 17 | --is_training 1 \ 18 | --root_path ./dataset/ \ 19 | --data_path exchange_rate.csv \ 20 | --model_id exchange_96_$pred_len \ 21 | --model $model_name \ 22 | --data custom \ 23 | --features M \ 24 | --seq_len 96 \ 25 | --label_len 48 \ 26 | --pred_len $pred_len \ 27 | --e_layers 2 \ 28 | --d_layers 1 \ 29 | --factor 3 \ 30 | --enc_in 8 \ 31 | --dec_in 8 \ 32 | --c_out 8 \ 33 | --des 'Exp' \ 34 | --itr 1 \ 35 | --train_epochs 1 >logs/LongForecasting/$model_name'_exchange_rate_'$pred_len.log 36 | 37 | python -u run_longExp.py \ 38 | --random_seed $random_seed \ 39 | --is_training 1 \ 40 | --root_path ./dataset/ \ 41 | --data_path electricity.csv \ 42 | --model_id electricity_96_$pred_len \ 43 | --model $model_name \ 44 | --data custom \ 45 | --features M \ 46 | --seq_len 96 \ 47 | --label_len 48 \ 48 | --pred_len $pred_len \ 49 | --e_layers 2 \ 50 | --d_layers 1 \ 51 | --factor 3 \ 52 | --enc_in 321 \ 53 | --dec_in 321 \ 54 | --c_out 321 \ 55 | --des 'Exp' \ 56 | --itr 1 >logs/LongForecasting/$model_name'_electricity_'$pred_len.log 57 | 58 | python -u run_longExp.py \ 59 | --random_seed $random_seed \ 60 | --is_training 1 \ 61 | --root_path ./dataset/ \ 62 | --data_path traffic.csv \ 63 | --model_id traffic_96_$pred_len \ 64 | --model $model_name \ 65 | --data custom \ 66 | --features M \ 67 | --seq_len 96 \ 68 | --label_len 48 \ 69 | --pred_len $pred_len \ 70 | --e_layers 2 \ 71 | --d_layers 1 \ 72 | --factor 3 \ 73 | --enc_in 862 \ 74 | --dec_in 862 \ 75 | --c_out 862 \ 76 | --des 'Exp' \ 77 | --itr 1 \ 78 | --train_epochs 3 >logs/LongForecasting/$model_name'_traffic_'$pred_len.log 79 | 80 | python -u run_longExp.py \ 81 | --random_seed $random_seed \ 82 | --is_training 1 \ 83 | --root_path ./dataset/ \ 84 | --data_path weather.csv \ 85 | --model_id weather_96_$pred_len \ 86 | --model $model_name \ 87 | --data custom \ 88 | --features M \ 89 | --seq_len 96 \ 90 | --label_len 48 \ 91 | --pred_len $pred_len \ 92 | --e_layers 2 \ 93 | --d_layers 1 \ 94 | --factor 3 \ 95 | --enc_in 21 \ 96 | --dec_in 21 \ 97 | --c_out 21 \ 98 | --des 'Exp' \ 99 | --itr 1 \ 100 | --train_epochs 2 >logs/LongForecasting/$model_name'_weather_'$pred_len.log 101 | 102 | python -u run_longExp.py \ 103 | --random_seed $random_seed \ 104 | --is_training 1 \ 105 | --root_path ./dataset/ \ 106 | --data_path ETTh1.csv \ 107 | --model_id ETTh1_96_$pred_len \ 108 | --model $model_name \ 109 | --data ETTh1 \ 110 | --features M \ 111 | --seq_len 96 \ 112 | --label_len 48 \ 113 | --pred_len $pred_len \ 114 | --e_layers 2 \ 115 | --d_layers 1 \ 116 | --factor 3 \ 117 | --enc_in 7 \ 118 | --dec_in 7 \ 119 | --c_out 7 \ 120 | --des 'Exp' \ 121 | --itr 1 >logs/LongForecasting/$model_name'_Etth1_'$pred_len.log 122 | 123 | python -u run_longExp.py \ 124 | --random_seed $random_seed \ 125 | --is_training 1 \ 126 | --root_path ./dataset/ \ 127 | --data_path ETTh2.csv \ 128 | --model_id ETTh2_96_$pred_len \ 129 | --model $model_name \ 130 | --data ETTh2 \ 131 | --features M \ 132 | --seq_len 96 \ 133 | --label_len 48 \ 134 | --pred_len $pred_len \ 135 | --e_layers 2 \ 136 | --d_layers 1 \ 137 | --factor 3 \ 138 | --enc_in 7 \ 139 | --dec_in 7 \ 140 | --c_out 7 \ 141 | --des 'Exp' \ 142 | --itr 1 >logs/LongForecasting/$model_name'_Etth2_'$pred_len.log 143 | 144 | python -u run_longExp.py \ 145 | --random_seed $random_seed \ 146 | --is_training 1 \ 147 | --root_path ./dataset/ \ 148 | --data_path ETTm1.csv \ 149 | --model_id ETTm1_96_$pred_len \ 150 | --model $model_name \ 151 | --data ETTm1 \ 152 | --features M \ 153 | --seq_len 96 \ 154 | --label_len 48 \ 155 | --pred_len $pred_len \ 156 | --e_layers 2 \ 157 | --d_layers 1 \ 158 | --factor 3 \ 159 | --enc_in 7 \ 160 | --dec_in 7 \ 161 | --c_out 7 \ 162 | --des 'Exp' \ 163 | --itr 1 >logs/LongForecasting/$model_name'_Ettm1_'$pred_len.log 164 | 165 | python -u run_longExp.py \ 166 | --random_seed $random_seed \ 167 | --is_training 1 \ 168 | --root_path ./dataset/ \ 169 | --data_path ETTm2.csv \ 170 | --model_id ETTm2_96_$pred_len \ 171 | --model $model_name \ 172 | --data ETTm2 \ 173 | --features M \ 174 | --seq_len 96 \ 175 | --label_len 48 \ 176 | --pred_len $pred_len \ 177 | --e_layers 2 \ 178 | --d_layers 1 \ 179 | --factor 3 \ 180 | --enc_in 7 \ 181 | --dec_in 7 \ 182 | --c_out 7 \ 183 | --des 'Exp' \ 184 | --itr 1 >logs/LongForecasting/$model_name'_Ettm2_'$pred_len.log 185 | done 186 | done 187 | 188 | for model_name in Autoformer Informer Transformer 189 | do 190 | for pred_len in 24 36 48 60 191 | do 192 | python -u run_longExp.py \ 193 | --random_seed $random_seed \ 194 | --is_training 1 \ 195 | --root_path ./dataset/ \ 196 | --data_path national_illness.csv \ 197 | --model_id ili_36_$pred_len \ 198 | --model $model_name \ 199 | --data custom \ 200 | --features M \ 201 | --seq_len 36 \ 202 | --label_len 18 \ 203 | --pred_len $pred_len \ 204 | --e_layers 2 \ 205 | --d_layers 1 \ 206 | --factor 3 \ 207 | --enc_in 7 \ 208 | --dec_in 7 \ 209 | --c_out 7 \ 210 | --des 'Exp' \ 211 | --itr 1 >logs/LongForecasting/$model_name'_ili_'$pred_len.log 212 | done 213 | done 214 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # JTFT 2 | 3 | This is an implementation of JTFT: "A Joint Time-frequency Domain Transformer for Multivariate Time Series Forecasting." 4 | 5 | ## Usage 6 | 7 | 1. Install requirements. ```pip install -r requirements.txt``` 8 | 9 | 2. Download data. You can download all the datasets execept for PEMS from [Autoformer](https://drive.google.com/drive/folders/1ZOYpTUa82_jCcxIdTmyr0LXQfvaM9vIy). Create a seperate folder ```./dataset``` and put all data files in the directory. The PEMS data can be downloaded from https://github.com/zezhishao/BasicTS. The npz files in the dataset can be processed to csv files using data_proc_pems.ipynb. 10 | 11 | 3. Training. All the scripts are in the directory ```./scripts/```. The scripts can be run using commands such as 12 | 13 | ``` 14 | sh ./scripts/weather.sh 15 | ``` 16 | 17 | The results will be displayed in the log files once the training is completed. The path of the log files will be printed at the beginning of the training. 18 | 19 | 20 | ## Acknowledgement 21 | 22 | We appreciate the following github repo very much for the valuable code base and datasets: 23 | 24 | https://github.com/yuqinie98/PatchTST 25 | 26 | https://github.com/cure-lab/LTSF-Linear 27 | 28 | https://github.com/zhouhaoyi/Informer2020 29 | 30 | https://github.com/thuml/Autoformer 31 | 32 | https://github.com/MAZiqing/FEDformer 33 | 34 | https://github.com/alipay/Pyraformer 35 | 36 | https://github.com/ts-kim/RevIN 37 | 38 | https://github.com/timeseriesAI/tsai 39 | 40 | https://github.com/zezhishao/BasicTS 41 | 42 | 43 | ## License 44 | 45 | Some of the codes are obtained from https://github.com/yuqinie98/PatchTST. These files are licensed under the Apache License Version 2.0. 46 | 47 | The new files of JTFT are licensed under the GNU General Public License (GPL) version 2.0. Comments to show the license appears at the beginning of these file. 48 | -------------------------------------------------------------------------------- /data_proc_pems.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 29, 6 | "id": "c2acb6ed", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "import pandas as pd\n", 13 | "import datetime" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 38, 19 | "id": "aff427f7", 20 | "metadata": {}, 21 | "outputs": [ 22 | { 23 | "name": "stdout", 24 | "output_type": "stream", 25 | "text": [ 26 | "(16992, 307, 3)\n" 27 | ] 28 | } 29 | ], 30 | "source": [ 31 | "data04=np.load(r'PEMS04.npz')\n", 32 | "print(data04['data'].shape)\n", 33 | "n_t,n_port,n_chan=data04['data'].shape\n", 34 | "df=pd.DataFrame(data04['data'][:,:,0])\n", 35 | "cols=[]\n", 36 | "for idx in range(n_port-1):\n", 37 | " cols.append(str(idx))\n", 38 | "cols.append('OT')\n", 39 | "cols=[]\n", 40 | "for idx in range(n_port-1):\n", 41 | " cols.append(str(idx))\n", 42 | "cols.append('OT')\n", 43 | "df.columns=cols\n", 44 | "df.insert(0,'date',r\"2020/1/1 0:10:00\")\n", 45 | "df.to_csv(\"PEMS04Flow.csv\",index=None)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 39, 51 | "id": "49811f66", 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "name": "stdout", 56 | "output_type": "stream", 57 | "text": [ 58 | "(17856, 170, 3)\n" 59 | ] 60 | } 61 | ], 62 | "source": [ 63 | "data08=np.load(r'PEMS08.npz')\n", 64 | "print(data08['data'].shape)\n", 65 | "n_t,n_port,n_chan=data08['data'].shape\n", 66 | "df=pd.DataFrame(data08['data'][:,:,0])\n", 67 | "cols=[]\n", 68 | "for idx in range(n_port-1):\n", 69 | " cols.append(str(idx))\n", 70 | "cols.append('OT')\n", 71 | "cols=[]\n", 72 | "for idx in range(n_port-1):\n", 73 | " cols.append(str(idx))\n", 74 | "cols.append('OT')\n", 75 | "df.columns=cols\n", 76 | "df.insert(0,'date',r\"2020/1/1 0:10:00\")\n", 77 | "df.to_csv(\"PEMS08Flow.csv\",index=None)" 78 | ] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Python 3 (ipykernel)", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.9.12" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 5 102 | } 103 | -------------------------------------------------------------------------------- /data_provider/data_factory.py: -------------------------------------------------------------------------------- 1 | from data_provider.data_loader import Dataset_ETT_hour, Dataset_ETT_minute, Dataset_Custom, Dataset_Pred 2 | from torch.utils.data import DataLoader 3 | 4 | data_dict = { 5 | 'ETTh1': Dataset_ETT_hour, 6 | 'ETTh2': Dataset_ETT_hour, 7 | 'ETTm1': Dataset_ETT_minute, 8 | 'ETTm2': Dataset_ETT_minute, 9 | 'custom': Dataset_Custom, 10 | } 11 | 12 | 13 | def data_provider(args, flag): 14 | Data = data_dict[args.data] 15 | timeenc = 0 if args.embed != 'timeF' else 1 16 | 17 | if flag == 'test': 18 | shuffle_flag = False 19 | drop_last = True 20 | batch_size = args.batch_size 21 | freq = args.freq 22 | elif flag == 'pred': 23 | shuffle_flag = False 24 | drop_last = False 25 | batch_size = 1 26 | freq = args.freq 27 | Data = Dataset_Pred 28 | else: 29 | shuffle_flag = True 30 | drop_last = True 31 | batch_size = args.batch_size 32 | freq = args.freq 33 | 34 | data_set = Data( 35 | root_path=args.root_path, 36 | data_path=args.data_path, 37 | flag=flag, 38 | size=[args.seq_len, args.label_len, args.pred_len], 39 | features=args.features, 40 | target=args.target, 41 | timeenc=timeenc, 42 | freq=freq 43 | ) 44 | len_data_set = len(data_set) 45 | print(flag, len_data_set) 46 | if batch_size >= len_data_set: 47 | print("Warning batch_size (%d) > len_data_set (%d), set to len_data_set."%(batch_size, len_data_set)) 48 | batch_size = len_data_set 49 | 50 | data_loader = DataLoader( 51 | data_set, 52 | batch_size=batch_size, 53 | shuffle=shuffle_flag, 54 | num_workers=args.num_workers, 55 | drop_last=drop_last) 56 | return data_set, data_loader 57 | -------------------------------------------------------------------------------- /exp/exp_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import numpy as np 4 | 5 | 6 | class Exp_Basic(object): 7 | def __init__(self, args): 8 | self.args = args 9 | self.device = self._acquire_device() 10 | self.model = self._build_model().to(self.device) 11 | #Compile model, remove if the PyTorch version is 1.x 12 | if not self.args.b_not_compile: 13 | self.model = torch.compile(self.model) #, mode="max-autotune" 14 | 15 | def _build_model(self): 16 | raise NotImplementedError 17 | return None 18 | 19 | def _acquire_device(self): 20 | if self.args.use_gpu: 21 | os.environ["CUDA_VISIBLE_DEVICES"] = str( 22 | self.args.gpu) if not self.args.use_multi_gpu else self.args.devices 23 | device = torch.device('cuda:{}'.format(self.args.gpu)) 24 | print('Use GPU: cuda:{}'.format(self.args.gpu)) 25 | else: 26 | device = torch.device('cpu') 27 | print('Use CPU') 28 | return device 29 | 30 | def _get_data(self): 31 | pass 32 | 33 | def vali(self): 34 | pass 35 | 36 | def train(self): 37 | pass 38 | 39 | def test(self): 40 | pass 41 | -------------------------------------------------------------------------------- /layers/PatchTST_layers.py: -------------------------------------------------------------------------------- 1 | __all__ = ['Transpose', 'get_activation_fn', 'moving_avg', 'series_decomp', 'PositionalEncoding', 'SinCosPosEncoding', 'Coord2dPosEncoding', 'Coord1dPosEncoding', 'positional_encoding'] 2 | 3 | import torch 4 | from torch import nn 5 | import math 6 | 7 | class Transpose(nn.Module): 8 | def __init__(self, *dims, contiguous=False): 9 | super().__init__() 10 | self.dims, self.contiguous = dims, contiguous 11 | def forward(self, x): 12 | if self.contiguous: return x.transpose(*self.dims).contiguous() 13 | else: return x.transpose(*self.dims) 14 | 15 | 16 | def get_activation_fn(activation): 17 | if callable(activation): return activation() 18 | elif activation.lower() == "relu": return nn.ReLU() 19 | elif activation.lower() == "gelu": return nn.GELU() 20 | raise ValueError(f'{activation} is not available. You can use "relu", "gelu", or a callable') 21 | 22 | 23 | # decomposition 24 | 25 | class moving_avg(nn.Module): 26 | """ 27 | Moving average block to highlight the trend of time series 28 | """ 29 | def __init__(self, kernel_size, stride): 30 | super(moving_avg, self).__init__() 31 | self.kernel_size = kernel_size 32 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) 33 | 34 | def forward(self, x): 35 | # padding on the both ends of time series 36 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) 37 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) 38 | x = torch.cat([front, x, end], dim=1) 39 | x = self.avg(x.permute(0, 2, 1)) 40 | x = x.permute(0, 2, 1) 41 | return x 42 | 43 | 44 | class series_decomp(nn.Module): 45 | """ 46 | Series decomposition block 47 | """ 48 | def __init__(self, kernel_size): 49 | super(series_decomp, self).__init__() 50 | self.moving_avg = moving_avg(kernel_size, stride=1) 51 | 52 | def forward(self, x): 53 | moving_mean = self.moving_avg(x) 54 | res = x - moving_mean 55 | return res, moving_mean 56 | 57 | 58 | 59 | # pos_encoding 60 | 61 | def PositionalEncoding(q_len, d_model, normalize=True): 62 | pe = torch.zeros(q_len, d_model) 63 | position = torch.arange(0, q_len).unsqueeze(1) 64 | div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model)) 65 | pe[:, 0::2] = torch.sin(position * div_term) 66 | pe[:, 1::2] = torch.cos(position * div_term) 67 | if normalize: 68 | pe = pe - pe.mean() 69 | pe = pe / (pe.std() * 10) 70 | return pe 71 | 72 | SinCosPosEncoding = PositionalEncoding 73 | 74 | def Coord2dPosEncoding(q_len, d_model, exponential=False, normalize=True, eps=1e-3, verbose=False): 75 | x = .5 if exponential else 1 76 | i = 0 77 | for i in range(100): 78 | cpe = 2 * (torch.linspace(0, 1, q_len).reshape(-1, 1) ** x) * (torch.linspace(0, 1, d_model).reshape(1, -1) ** x) - 1 79 | pv(f'{i:4.0f} {x:5.3f} {cpe.mean():+6.3f}', verbose) 80 | if abs(cpe.mean()) <= eps: break 81 | elif cpe.mean() > eps: x += .001 82 | else: x -= .001 83 | i += 1 84 | if normalize: 85 | cpe = cpe - cpe.mean() 86 | cpe = cpe / (cpe.std() * 10) 87 | return cpe 88 | 89 | def Coord1dPosEncoding(q_len, exponential=False, normalize=True): 90 | cpe = (2 * (torch.linspace(0, 1, q_len).reshape(-1, 1)**(.5 if exponential else 1)) - 1) 91 | if normalize: 92 | cpe = cpe - cpe.mean() 93 | cpe = cpe / (cpe.std() * 10) 94 | return cpe 95 | 96 | def positional_encoding(pe, learn_pe, q_len, d_model): 97 | # Positional encoding 98 | if pe == None: 99 | W_pos = torch.empty((q_len, d_model)) # pe = None and learn_pe = False can be used to measure impact of pe 100 | nn.init.uniform_(W_pos, -0.02, 0.02) 101 | learn_pe = False 102 | elif pe == 'zero': 103 | W_pos = torch.empty((q_len, 1)) 104 | nn.init.uniform_(W_pos, -0.02, 0.02) 105 | elif pe == 'zeros': 106 | W_pos = torch.empty((q_len, d_model)) 107 | nn.init.uniform_(W_pos, -0.02, 0.02) 108 | elif pe == 'normal' or pe == 'gauss': 109 | W_pos = torch.zeros((q_len, 1)) 110 | torch.nn.init.normal_(W_pos, mean=0.0, std=0.1) 111 | elif pe == 'uniform': 112 | W_pos = torch.zeros((q_len, 1)) 113 | nn.init.uniform_(W_pos, a=0.0, b=0.1) 114 | elif pe == 'lin1d': W_pos = Coord1dPosEncoding(q_len, exponential=False, normalize=True) 115 | elif pe == 'exp1d': W_pos = Coord1dPosEncoding(q_len, exponential=True, normalize=True) 116 | elif pe == 'lin2d': W_pos = Coord2dPosEncoding(q_len, d_model, exponential=False, normalize=True) 117 | elif pe == 'exp2d': W_pos = Coord2dPosEncoding(q_len, d_model, exponential=True, normalize=True) 118 | elif pe == 'sincos': W_pos = PositionalEncoding(q_len, d_model, normalize=True) 119 | else: raise ValueError(f"{pe} is not a valid pe (positional encoder. Available types: 'gauss'=='normal', \ 120 | 'zeros', 'zero', uniform', 'lin1d', 'exp1d', 'lin2d', 'exp2d', 'sincos', None.)") 121 | return nn.Parameter(W_pos, requires_grad=learn_pe) -------------------------------------------------------------------------------- /layers/RevIN.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | class RevIN(nn.Module): 5 | def __init__(self, num_features: int, eps=1e-5, affine=True, subtract_last=False, use_max_std=False): 6 | """ 7 | :param num_features: the number of features or channels 8 | :param eps: a value added for numerical stability 9 | :param affine: if True, RevIN has learnable affine parameters 10 | :param use_max_std: if True, use the max std instead of the std of each channel in order to keep the ratio of different channels 11 | """ 12 | super(RevIN, self).__init__() 13 | self.num_features = num_features 14 | self.eps = eps 15 | self.affine = affine 16 | self.subtract_last = subtract_last 17 | self.use_max_std = use_max_std 18 | if self.affine: 19 | self._init_params() 20 | 21 | def forward(self, x, mode:str): 22 | if mode == 'norm': 23 | self._get_statistics(x) 24 | x = self._normalize(x) 25 | elif mode == 'denorm': 26 | x = self._denormalize(x) 27 | else: raise NotImplementedError 28 | return x 29 | 30 | def _init_params(self): 31 | # initialize RevIN params: (C,) 32 | self.affine_weight = nn.Parameter(torch.ones(self.num_features)) 33 | self.affine_bias = nn.Parameter(torch.zeros(self.num_features)) 34 | 35 | def _get_statistics(self, x): 36 | dim2reduce = tuple(range(1, x.ndim-1)) 37 | if self.subtract_last: 38 | self.last = x[:,-1,:].unsqueeze(1) 39 | else: 40 | self.mean = torch.mean(x, dim=dim2reduce, keepdim=True).detach() 41 | self.stdev = torch.sqrt(torch.var(x, dim=dim2reduce, keepdim=True, unbiased=False) + self.eps).detach() 42 | torch.nn.functional 43 | if self.use_max_std: 44 | self.stdev = torch.max(self.stdev, dim=-1, keepdim=True).values.detach() 45 | 46 | def _normalize(self, x): 47 | if self.subtract_last: 48 | x = x - self.last 49 | else: 50 | x = x - self.mean 51 | x = x / self.stdev 52 | if self.affine: 53 | x = x * self.affine_weight 54 | x = x + self.affine_bias 55 | return x 56 | 57 | def _denormalize(self, x): 58 | if self.affine: 59 | x = x - self.affine_bias 60 | x = x / (self.affine_weight + self.eps*self.eps) 61 | x = x * self.stdev 62 | if self.subtract_last: 63 | x = x + self.last 64 | else: 65 | x = x + self.mean 66 | return x -------------------------------------------------------------------------------- /layers/SelfAttention_Family.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import matplotlib.pyplot as plt 6 | 7 | import numpy as np 8 | import math 9 | from math import sqrt 10 | from utils.masking import TriangularCausalMask, ProbMask 11 | import os 12 | 13 | 14 | class FullAttention(nn.Module): 15 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 16 | super(FullAttention, self).__init__() 17 | self.scale = scale 18 | self.mask_flag = mask_flag 19 | self.output_attention = output_attention 20 | self.dropout = nn.Dropout(attention_dropout) 21 | 22 | def forward(self, queries, keys, values, attn_mask): 23 | B, L, H, E = queries.shape 24 | _, S, _, D = values.shape 25 | scale = self.scale or 1. / sqrt(E) 26 | 27 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 28 | 29 | if self.mask_flag: 30 | if attn_mask is None: 31 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 32 | 33 | scores.masked_fill_(attn_mask.mask, -np.inf) 34 | 35 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 36 | V = torch.einsum("bhls,bshd->blhd", A, values) 37 | 38 | if self.output_attention: 39 | return (V.contiguous(), A) 40 | else: 41 | return (V.contiguous(), None) 42 | 43 | 44 | class ProbAttention(nn.Module): 45 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 46 | super(ProbAttention, self).__init__() 47 | self.factor = factor 48 | self.scale = scale 49 | self.mask_flag = mask_flag 50 | self.output_attention = output_attention 51 | self.dropout = nn.Dropout(attention_dropout) 52 | 53 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 54 | # Q [B, H, L, D] 55 | B, H, L_K, E = K.shape 56 | _, _, L_Q, _ = Q.shape 57 | 58 | # calculate the sampled Q_K 59 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 60 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q 61 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] 62 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 63 | 64 | # find the Top_k query with sparisty measurement 65 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 66 | M_top = M.topk(n_top, sorted=False)[1] 67 | 68 | # use the reduced Q to calculate Q_K 69 | Q_reduce = Q[torch.arange(B)[:, None, None], 70 | torch.arange(H)[None, :, None], 71 | M_top, :] # factor*ln(L_q) 72 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 73 | 74 | return Q_K, M_top 75 | 76 | def _get_initial_context(self, V, L_Q): 77 | B, H, L_V, D = V.shape 78 | if not self.mask_flag: 79 | # V_sum = V.sum(dim=-2) 80 | V_sum = V.mean(dim=-2) 81 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() 82 | else: # use mask 83 | assert (L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only 84 | contex = V.cumsum(dim=-2) 85 | return contex 86 | 87 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 88 | B, H, L_V, D = V.shape 89 | 90 | if self.mask_flag: 91 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 92 | scores.masked_fill_(attn_mask.mask, -np.inf) 93 | 94 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 95 | 96 | context_in[torch.arange(B)[:, None, None], 97 | torch.arange(H)[None, :, None], 98 | index, :] = torch.matmul(attn, V).type_as(context_in) 99 | if self.output_attention: 100 | attns = (torch.ones([B, H, L_V, L_V]) / L_V).type_as(attn).to(attn.device) 101 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn 102 | return (context_in, attns) 103 | else: 104 | return (context_in, None) 105 | 106 | def forward(self, queries, keys, values, attn_mask): 107 | B, L_Q, H, D = queries.shape 108 | _, L_K, _, _ = keys.shape 109 | 110 | queries = queries.transpose(2, 1) 111 | keys = keys.transpose(2, 1) 112 | values = values.transpose(2, 1) 113 | 114 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 115 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 116 | 117 | U_part = U_part if U_part < L_K else L_K 118 | u = u if u < L_Q else L_Q 119 | 120 | scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) 121 | 122 | # add scale factor 123 | scale = self.scale or 1. / sqrt(D) 124 | if scale is not None: 125 | scores_top = scores_top * scale 126 | # get the context 127 | context = self._get_initial_context(values, L_Q) 128 | # update the context with selected top_k queries 129 | context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask) 130 | 131 | return context.contiguous(), attn 132 | 133 | 134 | class AttentionLayer(nn.Module): 135 | def __init__(self, attention, d_model, n_heads, d_keys=None, 136 | d_values=None): 137 | super(AttentionLayer, self).__init__() 138 | 139 | d_keys = d_keys or (d_model // n_heads) 140 | d_values = d_values or (d_model // n_heads) 141 | 142 | self.inner_attention = attention 143 | self.query_projection = nn.Linear(d_model, d_keys * n_heads) 144 | self.key_projection = nn.Linear(d_model, d_keys * n_heads) 145 | self.value_projection = nn.Linear(d_model, d_values * n_heads) 146 | self.out_projection = nn.Linear(d_values * n_heads, d_model) 147 | self.n_heads = n_heads 148 | 149 | def forward(self, queries, keys, values, attn_mask): 150 | B, L, _ = queries.shape 151 | _, S, _ = keys.shape 152 | H = self.n_heads 153 | 154 | queries = self.query_projection(queries).view(B, L, H, -1) 155 | keys = self.key_projection(keys).view(B, S, H, -1) 156 | values = self.value_projection(values).view(B, S, H, -1) 157 | 158 | out, attn = self.inner_attention( 159 | queries, 160 | keys, 161 | values, 162 | attn_mask 163 | ) 164 | out = out.view(B, L, -1) 165 | 166 | return self.out_projection(out), attn 167 | -------------------------------------------------------------------------------- /layers/Transformer_EncDec.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | 6 | class ConvLayer(nn.Module): 7 | def __init__(self, c_in): 8 | super(ConvLayer, self).__init__() 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=2, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1, 2) 24 | return x 25 | 26 | 27 | class EncoderLayer(nn.Module): 28 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 29 | super(EncoderLayer, self).__init__() 30 | d_ff = d_ff or 4 * d_model 31 | self.attention = attention 32 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 33 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 34 | self.norm1 = nn.LayerNorm(d_model) 35 | self.norm2 = nn.LayerNorm(d_model) 36 | self.dropout = nn.Dropout(dropout) 37 | self.activation = F.relu if activation == "relu" else F.gelu 38 | 39 | def forward(self, x, attn_mask=None): 40 | new_x, attn = self.attention( 41 | x, x, x, 42 | attn_mask=attn_mask 43 | ) 44 | x = x + self.dropout(new_x) 45 | 46 | y = x = self.norm1(x) 47 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 48 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 49 | 50 | return self.norm2(x + y), attn 51 | 52 | 53 | class Encoder(nn.Module): 54 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 55 | super(Encoder, self).__init__() 56 | self.attn_layers = nn.ModuleList(attn_layers) 57 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 58 | self.norm = norm_layer 59 | 60 | def forward(self, x, attn_mask=None): 61 | # x [B, L, D] 62 | attns = [] 63 | if self.conv_layers is not None: 64 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 65 | x, attn = attn_layer(x, attn_mask=attn_mask) 66 | x = conv_layer(x) 67 | attns.append(attn) 68 | x, attn = self.attn_layers[-1](x) 69 | attns.append(attn) 70 | else: 71 | for attn_layer in self.attn_layers: 72 | x, attn = attn_layer(x, attn_mask=attn_mask) 73 | attns.append(attn) 74 | 75 | if self.norm is not None: 76 | x = self.norm(x) 77 | 78 | return x, attns 79 | 80 | 81 | class DecoderLayer(nn.Module): 82 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 83 | dropout=0.1, activation="relu"): 84 | super(DecoderLayer, self).__init__() 85 | d_ff = d_ff or 4 * d_model 86 | self.self_attention = self_attention 87 | self.cross_attention = cross_attention 88 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 89 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 90 | self.norm1 = nn.LayerNorm(d_model) 91 | self.norm2 = nn.LayerNorm(d_model) 92 | self.norm3 = nn.LayerNorm(d_model) 93 | self.dropout = nn.Dropout(dropout) 94 | self.activation = F.relu if activation == "relu" else F.gelu 95 | 96 | def forward(self, x, cross, x_mask=None, cross_mask=None): 97 | x = x + self.dropout(self.self_attention( 98 | x, x, x, 99 | attn_mask=x_mask 100 | )[0]) 101 | x = self.norm1(x) 102 | 103 | x = x + self.dropout(self.cross_attention( 104 | x, cross, cross, 105 | attn_mask=cross_mask 106 | )[0]) 107 | 108 | y = x = self.norm2(x) 109 | y = self.dropout(self.activation(self.conv1(y.transpose(-1, 1)))) 110 | y = self.dropout(self.conv2(y).transpose(-1, 1)) 111 | 112 | return self.norm3(x + y) 113 | 114 | 115 | class Decoder(nn.Module): 116 | def __init__(self, layers, norm_layer=None, projection=None): 117 | super(Decoder, self).__init__() 118 | self.layers = nn.ModuleList(layers) 119 | self.norm = norm_layer 120 | self.projection = projection 121 | 122 | def forward(self, x, cross, x_mask=None, cross_mask=None): 123 | for layer in self.layers: 124 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 125 | 126 | if self.norm is not None: 127 | x = self.norm(x) 128 | 129 | if self.projection is not None: 130 | x = self.projection(x) 131 | return x 132 | -------------------------------------------------------------------------------- /models/Autoformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Embed import DataEmbedding, DataEmbedding_wo_pos,DataEmbedding_wo_pos_temp,DataEmbedding_wo_temp 5 | from layers.AutoCorrelation import AutoCorrelation, AutoCorrelationLayer 6 | from layers.Autoformer_EncDec import Encoder, Decoder, EncoderLayer, DecoderLayer, my_Layernorm, series_decomp 7 | import math 8 | import numpy as np 9 | 10 | 11 | class Model(nn.Module): 12 | """ 13 | Autoformer is the first method to achieve the series-wise connection, 14 | with inherent O(LlogL) complexity 15 | """ 16 | def __init__(self, configs): 17 | super(Model, self).__init__() 18 | self.seq_len = configs.seq_len 19 | self.label_len = configs.label_len 20 | self.pred_len = configs.pred_len 21 | self.output_attention = configs.output_attention 22 | 23 | # Decomp 24 | kernel_size = configs.moving_avg 25 | self.decomp = series_decomp(kernel_size) 26 | 27 | # Embedding 28 | # The series-wise connection inherently contains the sequential information. 29 | # Thus, we can discard the position embedding of transformers. 30 | if configs.embed_type == 0: 31 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 32 | configs.dropout) 33 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 34 | configs.dropout) 35 | elif configs.embed_type == 1: 36 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 37 | configs.dropout) 38 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 39 | configs.dropout) 40 | elif configs.embed_type == 2: 41 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 42 | configs.dropout) 43 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 44 | configs.dropout) 45 | 46 | elif configs.embed_type == 3: 47 | self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, 48 | configs.dropout) 49 | self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, 50 | configs.dropout) 51 | elif configs.embed_type == 4: 52 | self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, 53 | configs.dropout) 54 | self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, 55 | configs.dropout) 56 | 57 | # Encoder 58 | self.encoder = Encoder( 59 | [ 60 | EncoderLayer( 61 | AutoCorrelationLayer( 62 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 63 | output_attention=configs.output_attention), 64 | configs.d_model, configs.n_heads), 65 | configs.d_model, 66 | configs.d_ff, 67 | moving_avg=configs.moving_avg, 68 | dropout=configs.dropout, 69 | activation=configs.activation 70 | ) for l in range(configs.e_layers) 71 | ], 72 | norm_layer=my_Layernorm(configs.d_model) 73 | ) 74 | # Decoder 75 | self.decoder = Decoder( 76 | [ 77 | DecoderLayer( 78 | AutoCorrelationLayer( 79 | AutoCorrelation(True, configs.factor, attention_dropout=configs.dropout, 80 | output_attention=False), 81 | configs.d_model, configs.n_heads), 82 | AutoCorrelationLayer( 83 | AutoCorrelation(False, configs.factor, attention_dropout=configs.dropout, 84 | output_attention=False), 85 | configs.d_model, configs.n_heads), 86 | configs.d_model, 87 | configs.c_out, 88 | configs.d_ff, 89 | moving_avg=configs.moving_avg, 90 | dropout=configs.dropout, 91 | activation=configs.activation, 92 | ) 93 | for l in range(configs.d_layers) 94 | ], 95 | norm_layer=my_Layernorm(configs.d_model), 96 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 97 | ) 98 | 99 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 100 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 101 | # decomp init 102 | mean = torch.mean(x_enc, dim=1).unsqueeze(1).repeat(1, self.pred_len, 1) 103 | zeros = torch.zeros([x_dec.shape[0], self.pred_len, x_dec.shape[2]], device=x_enc.device) 104 | seasonal_init, trend_init = self.decomp(x_enc) 105 | # decoder input 106 | trend_init = torch.cat([trend_init[:, -self.label_len:, :], mean], dim=1) 107 | seasonal_init = torch.cat([seasonal_init[:, -self.label_len:, :], zeros], dim=1) 108 | # enc 109 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 110 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 111 | # dec 112 | dec_out = self.dec_embedding(seasonal_init, x_mark_dec) 113 | seasonal_part, trend_part = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask, 114 | trend=trend_init) 115 | # final 116 | dec_out = trend_part + seasonal_part 117 | 118 | if self.output_attention: 119 | return dec_out[:, -self.pred_len:, :], attns 120 | else: 121 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 122 | -------------------------------------------------------------------------------- /models/DLinear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class moving_avg(nn.Module): 7 | """ 8 | Moving average block to highlight the trend of time series 9 | """ 10 | def __init__(self, kernel_size, stride): 11 | super(moving_avg, self).__init__() 12 | self.kernel_size = kernel_size 13 | self.avg = nn.AvgPool1d(kernel_size=kernel_size, stride=stride, padding=0) 14 | 15 | def forward(self, x): 16 | # padding on the both ends of time series 17 | front = x[:, 0:1, :].repeat(1, (self.kernel_size - 1) // 2, 1) 18 | end = x[:, -1:, :].repeat(1, (self.kernel_size - 1) // 2, 1) 19 | x = torch.cat([front, x, end], dim=1) 20 | x = self.avg(x.permute(0, 2, 1)) 21 | x = x.permute(0, 2, 1) 22 | return x 23 | 24 | 25 | class series_decomp(nn.Module): 26 | """ 27 | Series decomposition block 28 | """ 29 | def __init__(self, kernel_size): 30 | super(series_decomp, self).__init__() 31 | self.moving_avg = moving_avg(kernel_size, stride=1) 32 | 33 | def forward(self, x): 34 | moving_mean = self.moving_avg(x) 35 | res = x - moving_mean 36 | return res, moving_mean 37 | 38 | class Model(nn.Module): 39 | """ 40 | Decomposition-Linear 41 | """ 42 | def __init__(self, configs): 43 | super(Model, self).__init__() 44 | self.seq_len = configs.seq_len 45 | self.pred_len = configs.pred_len 46 | 47 | # Decompsition Kernel Size 48 | kernel_size = 25 49 | self.decompsition = series_decomp(kernel_size) 50 | self.individual = configs.individual 51 | self.channels = configs.enc_in 52 | 53 | if self.individual: 54 | self.Linear_Seasonal = nn.ModuleList() 55 | self.Linear_Trend = nn.ModuleList() 56 | 57 | for i in range(self.channels): 58 | self.Linear_Seasonal.append(nn.Linear(self.seq_len,self.pred_len)) 59 | self.Linear_Trend.append(nn.Linear(self.seq_len,self.pred_len)) 60 | 61 | # Use this two lines if you want to visualize the weights 62 | # self.Linear_Seasonal[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) 63 | # self.Linear_Trend[i].weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) 64 | else: 65 | self.Linear_Seasonal = nn.Linear(self.seq_len,self.pred_len) 66 | self.Linear_Trend = nn.Linear(self.seq_len,self.pred_len) 67 | 68 | # Use this two lines if you want to visualize the weights 69 | # self.Linear_Seasonal.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) 70 | # self.Linear_Trend.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) 71 | 72 | def forward(self, x): 73 | # x: [Batch, Input length, Channel] 74 | seasonal_init, trend_init = self.decompsition(x) 75 | seasonal_init, trend_init = seasonal_init.permute(0,2,1), trend_init.permute(0,2,1) 76 | if self.individual: 77 | seasonal_output = torch.zeros([seasonal_init.size(0),seasonal_init.size(1),self.pred_len],dtype=seasonal_init.dtype).to(seasonal_init.device) 78 | trend_output = torch.zeros([trend_init.size(0),trend_init.size(1),self.pred_len],dtype=trend_init.dtype).to(trend_init.device) 79 | for i in range(self.channels): 80 | seasonal_output[:,i,:] = self.Linear_Seasonal[i](seasonal_init[:,i,:]) 81 | trend_output[:,i,:] = self.Linear_Trend[i](trend_init[:,i,:]) 82 | else: 83 | seasonal_output = self.Linear_Seasonal(seasonal_init) 84 | trend_output = self.Linear_Trend(trend_init) 85 | 86 | x = seasonal_output + trend_output 87 | return x.permute(0,2,1) # to [Batch, Output length, Channel] 88 | -------------------------------------------------------------------------------- /models/Informer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from utils.masking import TriangularCausalMask, ProbMask 5 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 6 | from layers.SelfAttention_Family import FullAttention, ProbAttention, AttentionLayer 7 | from layers.Embed import DataEmbedding,DataEmbedding_wo_pos,DataEmbedding_wo_temp,DataEmbedding_wo_pos_temp 8 | import numpy as np 9 | 10 | 11 | class Model(nn.Module): 12 | """ 13 | Informer with Propspare attention in O(LlogL) complexity 14 | """ 15 | def __init__(self, configs): 16 | super(Model, self).__init__() 17 | self.pred_len = configs.pred_len 18 | self.output_attention = configs.output_attention 19 | 20 | # Embedding 21 | if configs.embed_type == 0: 22 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 23 | configs.dropout) 24 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 25 | configs.dropout) 26 | elif configs.embed_type == 1: 27 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 28 | configs.dropout) 29 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 30 | configs.dropout) 31 | elif configs.embed_type == 2: 32 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 33 | configs.dropout) 34 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 35 | configs.dropout) 36 | 37 | elif configs.embed_type == 3: 38 | self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, 39 | configs.dropout) 40 | self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, 41 | configs.dropout) 42 | elif configs.embed_type == 4: 43 | self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, 44 | configs.dropout) 45 | self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, 46 | configs.dropout) 47 | # Encoder 48 | self.encoder = Encoder( 49 | [ 50 | EncoderLayer( 51 | AttentionLayer( 52 | ProbAttention(False, configs.factor, attention_dropout=configs.dropout, 53 | output_attention=configs.output_attention), 54 | configs.d_model, configs.n_heads), 55 | configs.d_model, 56 | configs.d_ff, 57 | dropout=configs.dropout, 58 | activation=configs.activation 59 | ) for l in range(configs.e_layers) 60 | ], 61 | [ 62 | ConvLayer( 63 | configs.d_model 64 | ) for l in range(configs.e_layers - 1) 65 | ] if configs.distil else None, 66 | norm_layer=torch.nn.LayerNorm(configs.d_model) 67 | ) 68 | # Decoder 69 | self.decoder = Decoder( 70 | [ 71 | DecoderLayer( 72 | AttentionLayer( 73 | ProbAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), 74 | configs.d_model, configs.n_heads), 75 | AttentionLayer( 76 | ProbAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), 77 | configs.d_model, configs.n_heads), 78 | configs.d_model, 79 | configs.d_ff, 80 | dropout=configs.dropout, 81 | activation=configs.activation, 82 | ) 83 | for l in range(configs.d_layers) 84 | ], 85 | norm_layer=torch.nn.LayerNorm(configs.d_model), 86 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 87 | ) 88 | 89 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 90 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 91 | 92 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 93 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 94 | 95 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 96 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 97 | 98 | if self.output_attention: 99 | return dec_out[:, -self.pred_len:, :], attns 100 | else: 101 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 102 | -------------------------------------------------------------------------------- /models/JTFT.py: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | __all__ = ['JTFT'] 5 | 6 | # Cell 7 | from typing import Callable, Optional 8 | import torch 9 | from torch import nn 10 | from torch import Tensor 11 | import torch.nn.functional as F 12 | import numpy as np 13 | 14 | from layers.FreqTST_backbone import * 15 | from layers.PatchTST_layers import * 16 | 17 | class Model(nn.Module): 18 | def __init__(self, configs, 19 | b_aux_head:bool=True, 20 | max_seq_len:Optional[int]=1024, d_k:Optional[int]=None, d_v:Optional[int]=None, norm:str='BatchNorm', attn_dropout:float=0., 21 | act:str="gelu", key_padding_mask:bool='auto',padding_var:Optional[int]=None, attn_mask:Optional[Tensor]=None, res_attention:bool=True, 22 | pre_norm:bool=False, store_attn:bool=False, pe:str='zeros', learn_pe:bool=True, pretrain_head:bool=False, head_type = 'flatten', verbose:bool=False, **kwargs): 23 | 24 | super().__init__() 25 | 26 | # load parameters 27 | self.c_in = configs.enc_in 28 | context_window = configs.seq_len 29 | self.target_window = configs.pred_len 30 | self.seq_len = configs.seq_len 31 | n_concat_td = configs.n_concat_td 32 | self.n_decomp = configs.decomposition 33 | padding_patch = configs.padding_patch 34 | d_compress_max = configs.d_compress_max 35 | mod_scal_tfi = configs.mod_scal_tfi 36 | use_mark = configs.use_mark 37 | 38 | n_layers = configs.e_layers 39 | n_layers_tfi = configs.e_layers_tfi 40 | if n_layers_tfi == None: 41 | n_layers_tfi = n_layers 42 | n_heads = configs.n_heads 43 | d_model = configs.d_model 44 | d_ff = configs.d_ff 45 | dropout = configs.dropout 46 | fc_dropout = configs.fc_dropout 47 | head_dropout = configs.head_dropout 48 | self.patch_len = configs.patch_len 49 | n_freq = configs.n_freq 50 | self.stride = configs.stride 51 | self.b_learn_freq = True 52 | assert context_window % self.stride == 0, "Error: context_window % stride != 0" 53 | 54 | if hasattr(configs, 'fd_analysis'): 55 | #Analysis data and do not predict 56 | self.model=FreqTST_reconstuct(c_in=self.c_in, n_freq=n_freq, 57 | context_window=self.seq_len, b_learn_freq=False) 58 | return 59 | 60 | 61 | #Initialize model 62 | 63 | self.model=FreqTST_ci_tfi(c_in=self.c_in, 64 | n_freq=n_freq, n_concat_td=n_concat_td, 65 | context_window=self.seq_len, target_window=self.target_window, 66 | mod_scal_tfi = mod_scal_tfi, 67 | patch_len=self.patch_len, stride=self.stride, 68 | d_compress_max=d_compress_max, 69 | use_mark=use_mark, 70 | sep_time_freq=configs.sep_time_freq, 71 | b_learn_freq = self.b_learn_freq, 72 | n_decomp = self.n_decomp, #b_ori_router=True, 73 | n_layers=n_layers, n_layers_tfi= n_layers_tfi, d_model=d_model, n_heads=n_heads,d_ff=d_ff, 74 | dropout=dropout, fc_dropout=fc_dropout, head_dropout = head_dropout, padding_patch=padding_patch, **kwargs) 75 | 76 | 77 | 78 | 79 | def forward(self, x, z_mark, target_mark, **kwargs): # x: [bs x seq_len x channel] 80 | x = x[:, -self.seq_len:, :] #Get input for FD model 81 | #Call CI_TFI model. out_mod [bs x pred_len x channel] 82 | out_mod = self.model(x, z_mark=z_mark, target_mark=target_mark, **kwargs) 83 | return out_mod 84 | 85 | 86 | 87 | 88 | 89 | 90 | -------------------------------------------------------------------------------- /models/Linear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class Model(nn.Module): 7 | """ 8 | Just one Linear layer 9 | """ 10 | def __init__(self, configs): 11 | super(Model, self).__init__() 12 | self.seq_len = configs.seq_len 13 | self.pred_len = configs.pred_len 14 | self.Linear = nn.Linear(self.seq_len, self.pred_len) 15 | # Use this line if you want to visualize the weights 16 | # self.Linear.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) 17 | 18 | def forward(self, x): 19 | # x: [Batch, Input length, Channel] 20 | x = self.Linear(x.permute(0,2,1)).permute(0,2,1) 21 | return x # [Batch, Output length, Channel] -------------------------------------------------------------------------------- /models/NLinear.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | 6 | class Model(nn.Module): 7 | """ 8 | Normalization-Linear 9 | """ 10 | def __init__(self, configs): 11 | super(Model, self).__init__() 12 | self.seq_len = configs.seq_len 13 | self.pred_len = configs.pred_len 14 | self.Linear = nn.Linear(self.seq_len, self.pred_len) 15 | # Use this line if you want to visualize the weights 16 | # self.Linear.weight = nn.Parameter((1/self.seq_len)*torch.ones([self.pred_len,self.seq_len])) 17 | 18 | def forward(self, x): 19 | # x: [Batch, Input length, Channel] 20 | seq_last = x[:,-1:,:].detach() 21 | x = x - seq_last 22 | x = self.Linear(x.permute(0,2,1)).permute(0,2,1) 23 | x = x + seq_last 24 | return x # [Batch, Output length, Channel] -------------------------------------------------------------------------------- /models/PatchTST.py: -------------------------------------------------------------------------------- 1 | __all__ = ['PatchTST'] 2 | 3 | # Cell 4 | from typing import Callable, Optional 5 | import torch 6 | from torch import nn 7 | from torch import Tensor 8 | import torch.nn.functional as F 9 | import numpy as np 10 | 11 | from layers.PatchTST_backbone import PatchTST_backbone 12 | from layers.PatchTST_layers import series_decomp 13 | 14 | 15 | class Model(nn.Module): 16 | def __init__(self, configs, max_seq_len:Optional[int]=1024, d_k:Optional[int]=None, d_v:Optional[int]=None, norm:str='BatchNorm', attn_dropout:float=0., 17 | act:str="gelu", key_padding_mask:bool='auto',padding_var:Optional[int]=None, attn_mask:Optional[Tensor]=None, res_attention:bool=True, 18 | pre_norm:bool=False, store_attn:bool=False, pe:str='zeros', learn_pe:bool=True, pretrain_head:bool=False, head_type = 'flatten', verbose:bool=False, **kwargs): 19 | 20 | super().__init__() 21 | 22 | # load parameters 23 | c_in = configs.enc_in 24 | context_window = configs.seq_len 25 | target_window = configs.pred_len 26 | 27 | n_layers = configs.e_layers 28 | n_heads = configs.n_heads 29 | d_model = configs.d_model 30 | d_ff = configs.d_ff 31 | dropout = configs.dropout 32 | fc_dropout = configs.fc_dropout 33 | head_dropout = configs.head_dropout 34 | 35 | individual = configs.individual 36 | 37 | patch_len = configs.patch_len 38 | stride = configs.stride 39 | padding_patch = configs.padding_patch 40 | 41 | revin = configs.revin 42 | affine = configs.affine 43 | subtract_last = configs.subtract_last 44 | 45 | decomposition = configs.decomposition 46 | kernel_size = configs.kernel_size 47 | 48 | 49 | # model 50 | self.decomposition = decomposition 51 | if self.decomposition: 52 | self.decomp_module = series_decomp(kernel_size) 53 | self.model_trend = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride, 54 | max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model, 55 | n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout, 56 | dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var, 57 | attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn, 58 | pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch, 59 | pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine, 60 | subtract_last=subtract_last, verbose=verbose, **kwargs) 61 | self.model_res = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride, 62 | max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model, 63 | n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout, 64 | dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var, 65 | attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn, 66 | pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch, 67 | pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine, 68 | subtract_last=subtract_last, verbose=verbose, **kwargs) 69 | else: 70 | self.model = PatchTST_backbone(c_in=c_in, context_window = context_window, target_window=target_window, patch_len=patch_len, stride=stride, 71 | max_seq_len=max_seq_len, n_layers=n_layers, d_model=d_model, 72 | n_heads=n_heads, d_k=d_k, d_v=d_v, d_ff=d_ff, norm=norm, attn_dropout=attn_dropout, 73 | dropout=dropout, act=act, key_padding_mask=key_padding_mask, padding_var=padding_var, 74 | attn_mask=attn_mask, res_attention=res_attention, pre_norm=pre_norm, store_attn=store_attn, 75 | pe=pe, learn_pe=learn_pe, fc_dropout=fc_dropout, head_dropout=head_dropout, padding_patch = padding_patch, 76 | pretrain_head=pretrain_head, head_type=head_type, individual=individual, revin=revin, affine=affine, 77 | subtract_last=subtract_last, verbose=verbose, **kwargs) 78 | 79 | 80 | def forward(self, x): # x: [Batch, Input length, Channel] 81 | if self.decomposition: 82 | res_init, trend_init = self.decomp_module(x) 83 | res_init, trend_init = res_init.permute(0,2,1), trend_init.permute(0,2,1) # x: [Batch, Channel, Input length] 84 | res = self.model_res(res_init) 85 | trend = self.model_trend(trend_init) 86 | x = res + trend 87 | x = x.permute(0,2,1) # x: [Batch, Input length, Channel] 88 | else: 89 | x = x.permute(0,2,1) # x: [Batch, Channel, Input length] 90 | x = self.model(x) 91 | x = x.permute(0,2,1) # x: [Batch, Input length, Channel] 92 | return x -------------------------------------------------------------------------------- /models/Stat_models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from tqdm import tqdm 6 | import pmdarima as pm 7 | import threading 8 | from sklearn.ensemble import GradientBoostingRegressor 9 | 10 | class Naive_repeat(nn.Module): 11 | def __init__(self, configs): 12 | super(Naive_repeat, self).__init__() 13 | self.pred_len = configs.pred_len 14 | 15 | def forward(self, x): 16 | B,L,D = x.shape 17 | x = x[:,-1,:].reshape(B,1,D).repeat(self.pred_len,axis=1) 18 | return x # [B, L, D] 19 | 20 | class Naive_thread(threading.Thread): 21 | def __init__(self,func,args=()): 22 | super(Naive_thread,self).__init__() 23 | self.func = func 24 | self.args = args 25 | 26 | def run(self): 27 | self.results = self.func(*self.args) 28 | 29 | def return_result(self): 30 | threading.Thread.join(self) 31 | return self.results 32 | 33 | def _arima(seq,pred_len,bt,i): 34 | model = pm.auto_arima(seq) 35 | forecasts = model.predict(pred_len) 36 | return forecasts,bt,i 37 | 38 | class Arima(nn.Module): 39 | """ 40 | Extremely slow, please sample < 0.1 41 | """ 42 | def __init__(self, configs): 43 | super(Arima, self).__init__() 44 | self.pred_len = configs.pred_len 45 | 46 | def forward(self, x): 47 | result = np.zeros([x.shape[0],self.pred_len,x.shape[2]]) 48 | threads = [] 49 | for bt,seqs in tqdm(enumerate(x)): 50 | for i in range(seqs.shape[-1]): 51 | seq = seqs[:,i] 52 | one_seq = Naive_thread(func=_arima,args=(seq,self.pred_len,bt,i)) 53 | threads.append(one_seq) 54 | threads[-1].start() 55 | for every_thread in tqdm(threads): 56 | forcast,bt,i = every_thread.return_result() 57 | result[bt,:,i] = forcast 58 | 59 | return result # [B, L, D] 60 | 61 | def _sarima(season,seq,pred_len,bt,i): 62 | model = pm.auto_arima(seq, seasonal=True, m=season) 63 | forecasts = model.predict(pred_len) 64 | return forecasts,bt,i 65 | 66 | class SArima(nn.Module): 67 | """ 68 | Extremely extremely slow, please sample < 0.01 69 | """ 70 | def __init__(self, configs): 71 | super(SArima, self).__init__() 72 | self.pred_len = configs.pred_len 73 | self.seq_len = configs.seq_len 74 | self.season = 24 75 | if 'Ettm' in configs.data_path: 76 | self.season = 12 77 | elif 'ILI' in configs.data_path: 78 | self.season = 1 79 | if self.season >= self.seq_len: 80 | self.season = 1 81 | 82 | def forward(self, x): 83 | result = np.zeros([x.shape[0],self.pred_len,x.shape[2]]) 84 | threads = [] 85 | for bt,seqs in tqdm(enumerate(x)): 86 | for i in range(seqs.shape[-1]): 87 | seq = seqs[:,i] 88 | one_seq = Naive_thread(func=_sarima,args=(self.season,seq,self.pred_len,bt,i)) 89 | threads.append(one_seq) 90 | threads[-1].start() 91 | for every_thread in tqdm(threads): 92 | forcast,bt,i = every_thread.return_result() 93 | result[bt,:,i] = forcast 94 | return result # [B, L, D] 95 | 96 | def _gbrt(seq,seq_len,pred_len,bt,i): 97 | model = GradientBoostingRegressor() 98 | model.fit(np.arange(seq_len).reshape(-1,1),seq.reshape(-1,1)) 99 | forecasts = model.predict(np.arange(seq_len,seq_len+pred_len).reshape(-1,1)) 100 | return forecasts,bt,i 101 | 102 | class GBRT(nn.Module): 103 | def __init__(self, configs): 104 | super(GBRT, self).__init__() 105 | self.seq_len = configs.seq_len 106 | self.pred_len = configs.pred_len 107 | 108 | def forward(self, x): 109 | result = np.zeros([x.shape[0],self.pred_len,x.shape[2]]) 110 | threads = [] 111 | for bt,seqs in tqdm(enumerate(x)): 112 | for i in range(seqs.shape[-1]): 113 | seq = seqs[:,i] 114 | one_seq = Naive_thread(func=_gbrt,args=(seq,self.seq_len,self.pred_len,bt,i)) 115 | threads.append(one_seq) 116 | threads[-1].start() 117 | for every_thread in tqdm(threads): 118 | forcast,bt,i = every_thread.return_result() 119 | result[bt,:,i] = forcast 120 | return result # [B, L, D] 121 | -------------------------------------------------------------------------------- /models/Transformer.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from layers.Transformer_EncDec import Decoder, DecoderLayer, Encoder, EncoderLayer, ConvLayer 5 | from layers.SelfAttention_Family import FullAttention, AttentionLayer 6 | from layers.Embed import DataEmbedding,DataEmbedding_wo_pos,DataEmbedding_wo_temp,DataEmbedding_wo_pos_temp 7 | import numpy as np 8 | 9 | 10 | class Model(nn.Module): 11 | """ 12 | Vanilla Transformer with O(L^2) complexity 13 | """ 14 | def __init__(self, configs): 15 | super(Model, self).__init__() 16 | self.pred_len = configs.pred_len 17 | self.output_attention = configs.output_attention 18 | 19 | # Embedding 20 | if configs.embed_type == 0: 21 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 22 | configs.dropout) 23 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 24 | configs.dropout) 25 | elif configs.embed_type == 1: 26 | self.enc_embedding = DataEmbedding(configs.enc_in, configs.d_model, configs.embed, configs.freq, 27 | configs.dropout) 28 | self.dec_embedding = DataEmbedding(configs.dec_in, configs.d_model, configs.embed, configs.freq, 29 | configs.dropout) 30 | elif configs.embed_type == 2: 31 | self.enc_embedding = DataEmbedding_wo_pos(configs.enc_in, configs.d_model, configs.embed, configs.freq, 32 | configs.dropout) 33 | self.dec_embedding = DataEmbedding_wo_pos(configs.dec_in, configs.d_model, configs.embed, configs.freq, 34 | configs.dropout) 35 | 36 | elif configs.embed_type == 3: 37 | self.enc_embedding = DataEmbedding_wo_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, 38 | configs.dropout) 39 | self.dec_embedding = DataEmbedding_wo_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, 40 | configs.dropout) 41 | elif configs.embed_type == 4: 42 | self.enc_embedding = DataEmbedding_wo_pos_temp(configs.enc_in, configs.d_model, configs.embed, configs.freq, 43 | configs.dropout) 44 | self.dec_embedding = DataEmbedding_wo_pos_temp(configs.dec_in, configs.d_model, configs.embed, configs.freq, 45 | configs.dropout) 46 | # Encoder 47 | self.encoder = Encoder( 48 | [ 49 | EncoderLayer( 50 | AttentionLayer( 51 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, 52 | output_attention=configs.output_attention), configs.d_model, configs.n_heads), 53 | configs.d_model, 54 | configs.d_ff, 55 | dropout=configs.dropout, 56 | activation=configs.activation 57 | ) for l in range(configs.e_layers) 58 | ], 59 | norm_layer=torch.nn.LayerNorm(configs.d_model) 60 | ) 61 | # Decoder 62 | self.decoder = Decoder( 63 | [ 64 | DecoderLayer( 65 | AttentionLayer( 66 | FullAttention(True, configs.factor, attention_dropout=configs.dropout, output_attention=False), 67 | configs.d_model, configs.n_heads), 68 | AttentionLayer( 69 | FullAttention(False, configs.factor, attention_dropout=configs.dropout, output_attention=False), 70 | configs.d_model, configs.n_heads), 71 | configs.d_model, 72 | configs.d_ff, 73 | dropout=configs.dropout, 74 | activation=configs.activation, 75 | ) 76 | for l in range(configs.d_layers) 77 | ], 78 | norm_layer=torch.nn.LayerNorm(configs.d_model), 79 | projection=nn.Linear(configs.d_model, configs.c_out, bias=True) 80 | ) 81 | 82 | def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 83 | enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None): 84 | 85 | enc_out = self.enc_embedding(x_enc, x_mark_enc) 86 | enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask) 87 | 88 | dec_out = self.dec_embedding(x_dec, x_mark_dec) 89 | dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask) 90 | 91 | if self.output_attention: 92 | return dec_out[:, -self.pred_len:, :], attns 93 | else: 94 | return dec_out[:, -self.pred_len:, :] # [B, L, D] 95 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | einops==0.7.0 2 | fbm==0.3.0 3 | matplotlib==3.5.1 4 | numpy==1.26.2 5 | pandas==1.4.2 6 | pmdarima==2.0.4 7 | ptflops==0.7.1.2 8 | pynvml==11.5.0 9 | scikit_learn==1.0.2 10 | scipy==1.11.4 11 | sympy==1.10.1 12 | torch==2.0.1+cu117 13 | tqdm==4.64.0 14 | tvm==1.0.0 15 | -------------------------------------------------------------------------------- /scripts/electricity_m_gpu.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | 9 | seq_len=512 10 | seq_len=$seq_len 11 | model_name=JTFT 12 | e_layers=3 13 | e_layers_tfi=1 14 | n_heads=16 15 | d_model=128 16 | d_ff=256 17 | dropout=0.2 18 | fc_dropout=0.2 19 | head_dropout=0 20 | patch_len=16 21 | stride=8 22 | n_freq=16 23 | n_concat_td=32 #number of TD patches to concat 24 | mod_scal_tfi=0.5 25 | d_compress_max=32 26 | huber_delta=0.5 27 | train_epochs=100 28 | 29 | root_path_name=./dataset/electricity/ 30 | data_path_name=electricity.csv 31 | model_id_name=Electricity 32 | data_name=custom 33 | 34 | random_seed=1 35 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 36 | echo $log_name 37 | 38 | 39 | 40 | for pred_len in 96 192 336 720 41 | do 42 | echo pred_len $pred_len 43 | echo "" >>$log_name 44 | echo pred_len $pred_len >>$log_name 45 | echo "" >>$log_name 46 | python -u run_longExp.py \ 47 | --use_huber_loss \ 48 | --huber_delta $huber_delta \ 49 | --is_training 1 \ 50 | --use_multi_gpu \ 51 | --devices '0, 1' \ 52 | --decomposition 0 \ 53 | --root_path $root_path_name \ 54 | --data_path $data_path_name \ 55 | --model_id $model_id_name \ 56 | --model $model_name \ 57 | --data $data_name \ 58 | --features M \ 59 | --seq_len $seq_len \ 60 | --pred_len $pred_len \ 61 | --enc_in 321 \ 62 | --n_freq $n_freq \ 63 | --n_concat_td $n_concat_td \ 64 | --d_compress_max $d_compress_max\ 65 | --mod_scal_tfi $mod_scal_tfi \ 66 | --stride $stride \ 67 | --d_model $d_model \ 68 | --e_layers $e_layers \ 69 | --e_layers_tfi $e_layers_tfi \ 70 | --d_ff $d_ff \ 71 | --n_heads $n_heads \ 72 | --dropout $dropout\ 73 | --fc_dropout $fc_dropout\ 74 | --head_dropout $head_dropout \ 75 | --random_seed $random_seed \ 76 | --patch_len $patch_len\ 77 | --des 'Exp' \ 78 | --train_epochs $train_epochs\ 79 | --min_epochs 20\ 80 | --label_len 1 \ 81 | --num_workers 8 \ 82 | --patience 5\ 83 | --lradj 'TST'\ 84 | --pct_start 0.2 \ 85 | --itr 1 --batch_size 32 --learning_rate 0.0002 >>$log_name 86 | echo " " >>$log_name 87 | done 88 | 89 | -------------------------------------------------------------------------------- /scripts/ettm2.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | 9 | model_name=JTFT 10 | e_layers=3 11 | e_layers_tfi=1 12 | n_heads=4 13 | d_model=16 14 | d_ff=24 15 | dropout=0.2 16 | fc_dropout=0.2 17 | head_dropout=0 18 | patch_len=16 19 | stride=8 20 | n_freq=16 21 | n_concat_td=16 #number of TD patches to concat 22 | mod_scal_tfi=0.5 23 | d_compress_max=1 24 | lr=0.0001 25 | huber_delta=1.0 26 | 27 | gpu_id=6 28 | min_epochs=1 29 | train_epochs=100 30 | 31 | root_path_name=./dataset/ETT-small 32 | data_path_name=ETTm2.csv 33 | model_id_name=ETTm2 34 | data_name=ETTm2 35 | 36 | seq_len=512 37 | random_seed=1 38 | 39 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 40 | echo $log_name 41 | 42 | for pred_len in 96 #192 336 720 43 | do 44 | echo pred_len $pred_len 45 | echo "" >>$log_name 46 | echo pred_len $pred_len >>$log_name 47 | echo "" >>$log_name 48 | 49 | python -u run_longExp.py \ 50 | --use_huber_loss \ 51 | --huber_delta $huber_delta \ 52 | --is_training 1 \ 53 | --decomposition 0 \ 54 | --root_path $root_path_name \ 55 | --data_path $data_path_name \ 56 | --model_id $model_id_name \ 57 | --model $model_name \ 58 | --data $data_name \ 59 | --features M \ 60 | --seq_len $seq_len \ 61 | --pred_len $pred_len \ 62 | --gpu $gpu_id \ 63 | --enc_in 7 \ 64 | --n_freq $n_freq \ 65 | --n_concat_td $n_concat_td \ 66 | --d_compress_max $d_compress_max \ 67 | --mod_scal_tfi $mod_scal_tfi \ 68 | --stride $stride \ 69 | --d_model $d_model \ 70 | --e_layers $e_layers \ 71 | --e_layers_tfi $e_layers_tfi \ 72 | --d_ff $d_ff \ 73 | --n_heads $n_heads \ 74 | --dropout $dropout\ 75 | --fc_dropout $fc_dropout\ 76 | --head_dropout $head_dropout \ 77 | --random_seed $random_seed \ 78 | --patch_len $patch_len\ 79 | --des 'Exp' \ 80 | --train_epochs $train_epochs\ 81 | --min_epochs $min_epochs\ 82 | --label_len 1 \ 83 | --num_workers 4 \ 84 | --patience 10\ 85 | --lradj 'TST'\ 86 | --pct_start 0.4 \ 87 | --itr 1 --batch_size 128 --learning_rate $lr >>$log_name 88 | for i_rows in {1..5} 89 | do 90 | echo " " >>$log_name 91 | done 92 | done 93 | 94 | -------------------------------------------------------------------------------- /scripts/ettm2_m_gpu.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #SBATCH -J ETTm2 3 | #SBATCH -p cnGPU 4 | #SBATCH -N 1 5 | #SBATCH -n 1 6 | #SBATCH --gres=gpu:2 7 | #SBATCH -o ettm2.log 8 | #SBATCH -e ettm2.log 9 | 10 | if [ ! -d "./logs" ]; then 11 | mkdir ./logs 12 | fi 13 | 14 | if [ ! -d "./logs" ]; then 15 | mkdir ./logs 16 | fi 17 | 18 | 19 | seq_len=336 20 | model_name=JTFT #PatchTST # 21 | e_layers=3 22 | n_heads=16 23 | d_model=128 #channel idependent or no patching 24 | d_ff=256 25 | dropout=0.2 26 | fc_dropout=0.2 27 | head_dropout=0.0 #0.2 for channel mixed, 0.0 for channel independent 28 | patch_len=16 #$stride 29 | stride=8 30 | n_freq=16 31 | n_concat_td=8 #number of TD patches to concat 32 | mod_scal_tfi=0.5 33 | d_compress_max=2 34 | 35 | min_epochs=1 36 | train_epochs=100 37 | 38 | root_path_name=./dataset/ETT-small 39 | data_path_name=ETTm2.csv 40 | model_id_name=ETTm2 41 | data_name=ETTm2 42 | 43 | random_seed=1 44 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 45 | echo $log_name 46 | 47 | #<>$log_name 51 | done 52 | echo Train CI_TFI with routed TST, scale $mod_scal_tfi >>$log_name 53 | echo " " >>$log_name 54 | #COMMENT 55 | 56 | if [ ! -d "./logs" ]; then 57 | mkdir ./logs 58 | fi 59 | 60 | 61 | 62 | random_seed=1 63 | for pred_len in 96 #192 336 720 64 | do 65 | echo pred_len $pred_len 66 | echo "" >>$log_name 67 | echo pred_len $pred_len >>$log_name 68 | echo "" >>$log_name 69 | 70 | python -u run_longExp.py \ 71 | --use_multi_gpu \ 72 | --devices '0, 1' \ 73 | --is_training 1 \ 74 | --decomposition 0 \ 75 | --root_path $root_path_name \ 76 | --data_path $data_path_name \ 77 | --model_id $model_id_name \ 78 | --model $model_name \ 79 | --data $data_name \ 80 | --features M \ 81 | --seq_len $seq_len \ 82 | --pred_len $pred_len \ 83 | --enc_in 7 \ 84 | --n_freq $n_freq \ 85 | --n_concat_td $n_concat_td \ 86 | --d_compress_max $d_compress_max \ 87 | --mod_scal_tfi $mod_scal_tfi \ 88 | --stride $stride \ 89 | --d_model $d_model \ 90 | --e_layers $e_layers \ 91 | --d_ff $d_ff \ 92 | --n_heads $n_heads \ 93 | --dropout $dropout\ 94 | --fc_dropout $fc_dropout\ 95 | --head_dropout $head_dropout \ 96 | --random_seed $random_seed \ 97 | --patch_len $patch_len\ 98 | --des 'Exp' \ 99 | --train_epochs $train_epochs\ 100 | --min_epochs $min_epochs\ 101 | --label_len 1 \ 102 | --num_workers 8 \ 103 | --patience 20\ 104 | --lradj 'TST'\ 105 | --pct_start 0.4 \ 106 | --itr 1 --batch_size 128 --learning_rate 0.0002 >>$log_name 107 | for i_rows in {1..5} 108 | do 109 | echo " " >>$log_name 110 | done 111 | done 112 | 113 | grep mae $log_name -------------------------------------------------------------------------------- /scripts/exchange.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | model_name=JTFT 9 | e_layers=3 10 | e_layers_tfi=1 11 | n_heads=2 12 | d_model=8 13 | d_ff=12 14 | dropout=0.3 15 | fc_dropout=0.3 16 | head_dropout=0.3 17 | patch_len=4 18 | stride=2 19 | n_freq=16 20 | n_concat_td=32 #number of TD patches to concat 21 | d_compress_max=1 22 | mod_scal_tfi=0.5 23 | lr=0.001 24 | 25 | 26 | gpu_id=0 27 | min_epochs=1 28 | train_epochs=100 29 | 30 | root_path_name=./dataset/exchange_rate 31 | data_path_name=exchange_rate.csv 32 | model_id_name=exchange 33 | data_name=custom 34 | 35 | random_seed=1 36 | seq_len=128 37 | 38 | 39 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 40 | echo $log_name 41 | 42 | 43 | for pred_len in 96 192 336 720 44 | do 45 | echo pred_len $pred_len 46 | echo "" >>$log_name 47 | echo pred_len $pred_len >>$log_name 48 | echo "" >>$log_name 49 | 50 | python -u run_longExp.py \ 51 | --use_huber_loss \ 52 | --is_training 1 \ 53 | --root_path $root_path_name \ 54 | --data_path $data_path_name \ 55 | --decomposition 2 \ 56 | --model_id $model_id_name \ 57 | --model $model_name \ 58 | --data $data_name \ 59 | --features M \ 60 | --seq_len $seq_len \ 61 | --pred_len $pred_len \ 62 | --gpu $gpu_id \ 63 | --enc_in 8 \ 64 | --d_compress_max $d_compress_max \ 65 | --mod_scal_tfi $mod_scal_tfi \ 66 | --n_freq $n_freq \ 67 | --n_concat_td $n_concat_td \ 68 | --stride $stride \ 69 | --d_model $d_model \ 70 | --e_layers $e_layers \ 71 | --e_layers_tfi $e_layers_tfi \ 72 | --d_ff $d_ff \ 73 | --n_heads $n_heads \ 74 | --dropout $dropout\ 75 | --fc_dropout $fc_dropout\ 76 | --head_dropout $head_dropout \ 77 | --random_seed $random_seed \ 78 | --patch_len $patch_len\ 79 | --des 'Exp' \ 80 | --b_not_compile \ 81 | --train_epochs $train_epochs\ 82 | --min_epochs $min_epochs\ 83 | --lradj 'constant'\ 84 | --label_len 1 \ 85 | --num_workers 2 \ 86 | --patience 10\ 87 | --itr 1 --batch_size 64 --learning_rate $lr >>$log_name 88 | echo " " >>$log_name 89 | done -------------------------------------------------------------------------------- /scripts/illness.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | model_name=JTFT 9 | e_layers=3 10 | e_layers_tfi=1 11 | n_heads=2 12 | d_model=8 13 | d_ff=12 14 | dropout=0.3 15 | fc_dropout=0.3 16 | head_dropout=0 17 | patch_len=4 18 | stride=2 19 | n_freq=16 20 | n_concat_td=32 #number of TD patches to concat 21 | d_compress_max=1 22 | mod_scal_tfi=0.5 23 | lr=0.0025 24 | seq_len=128 25 | random_seed=1 26 | gpu_id=0 27 | train_epochs=100 28 | 29 | root_path_name=./dataset/illness 30 | data_path_name=national_illness.csv 31 | model_id_name=illness 32 | data_name=custom 33 | 34 | 35 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 36 | echo $log_name 37 | 38 | for pred_len in 24 36 48 60 39 | do 40 | echo pred_len $pred_len 41 | echo "" >>$log_name 42 | echo pred_len $pred_len >>$log_name 43 | echo "" >>$log_name 44 | python -u run_longExp.py \ 45 | --use_huber_loss \ 46 | --is_training 1 \ 47 | --decomposition 0 \ 48 | --root_path $root_path_name \ 49 | --data_path $data_path_name \ 50 | --model_id $model_id_name \ 51 | --model $model_name \ 52 | --data $data_name \ 53 | --features M \ 54 | --seq_len $seq_len \ 55 | --pred_len $pred_len \ 56 | --gpu $gpu_id \ 57 | --enc_in 7 \ 58 | --d_compress_max $d_compress_max \ 59 | --n_freq $n_freq \ 60 | --n_concat_td $n_concat_td \ 61 | --mod_scal_tfi $mod_scal_tfi \ 62 | --random_seed $random_seed \ 63 | --e_layers $e_layers \ 64 | --e_layers_tfi $e_layers_tfi \ 65 | --n_heads $n_heads\ 66 | --d_model $d_model \ 67 | --d_ff $d_ff \ 68 | --dropout $dropout\ 69 | --fc_dropout $fc_dropout\ 70 | --head_dropout $head_dropout\ 71 | --stride $stride\ 72 | --patch_len $patch_len\ 73 | --des 'Exp' \ 74 | --b_not_compile \ 75 | --train_epochs $train_epochs\ 76 | --min_epochs 80\ 77 | --lradj 'constant'\ 78 | --label_len 1 \ 79 | --num_workers 2 \ 80 | --itr 1 --batch_size 64 --learning_rate $lr >>$log_name 81 | echo " " >>$log_name 82 | done 83 | -------------------------------------------------------------------------------- /scripts/pems04_m_gpu.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | 9 | seq_len=512 10 | seq_len=$seq_len 11 | model_name=JTFT 12 | e_layers=3 13 | e_layers_tfi=1 14 | n_heads=16 15 | d_model=128 16 | d_ff=256 17 | dropout=0.2 18 | fc_dropout=0.2 19 | head_dropout=0 20 | patch_len=16 21 | stride=8 22 | n_freq=16 23 | n_concat_td=32 #number of TD patches to concat 24 | mod_scal_tfi=0.5 25 | d_compress_max=32 26 | huber_delta=0.5 27 | train_epochs=100 28 | 29 | root_path_name=./dataset/pems/ 30 | data_path_name=PEMS04Flow.csv 31 | model_id_name=PEMS04 32 | data_name=custom 33 | 34 | random_seed=1 35 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 36 | echo $log_name 37 | 38 | 39 | 40 | for pred_len in 96 192 336 720 41 | do 42 | echo pred_len $pred_len 43 | echo "" >>$log_name 44 | echo pred_len $pred_len >>$log_name 45 | echo "" >>$log_name 46 | python -u run_longExp.py \ 47 | --use_huber_loss \ 48 | --huber_delta $huber_delta \ 49 | --is_training 1 \ 50 | --use_multi_gpu \ 51 | --devices '0, 1' \ 52 | --decomposition 0 \ 53 | --root_path $root_path_name \ 54 | --data_path $data_path_name \ 55 | --model_id $model_id_name \ 56 | --model $model_name \ 57 | --data $data_name \ 58 | --features M \ 59 | --seq_len $seq_len \ 60 | --pred_len $pred_len \ 61 | --enc_in 307 \ 62 | --n_freq $n_freq \ 63 | --n_concat_td $n_concat_td \ 64 | --d_compress_max $d_compress_max\ 65 | --mod_scal_tfi $mod_scal_tfi \ 66 | --stride $stride \ 67 | --d_model $d_model \ 68 | --e_layers $e_layers \ 69 | --e_layers_tfi $e_layers_tfi \ 70 | --d_ff $d_ff \ 71 | --n_heads $n_heads \ 72 | --dropout $dropout\ 73 | --fc_dropout $fc_dropout\ 74 | --head_dropout $head_dropout \ 75 | --random_seed $random_seed \ 76 | --patch_len $patch_len\ 77 | --des 'Exp' \ 78 | --train_epochs $train_epochs\ 79 | --min_epochs 30\ 80 | --label_len 1 \ 81 | --num_workers 8 \ 82 | --patience 5\ 83 | --lradj 'TST'\ 84 | --pct_start 0.2 \ 85 | --itr 1 --batch_size 16 --learning_rate 0.0002 >>$log_name 86 | echo " " >>$log_name 87 | done 88 | 89 | -------------------------------------------------------------------------------- /scripts/pems08_m_gpu.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | 9 | seq_len=512 10 | seq_len=$seq_len 11 | model_name=JTFT 12 | e_layers=3 13 | e_layers_tfi=1 14 | n_heads=16 15 | d_model=128 16 | d_ff=256 17 | dropout=0.2 18 | fc_dropout=0.2 19 | head_dropout=0 20 | patch_len=16 21 | stride=8 22 | n_freq=16 23 | n_concat_td=32 #number of TD patches to concat 24 | mod_scal_tfi=0.5 25 | d_compress_max=32 26 | huber_delta=0.5 27 | train_epochs=100 28 | 29 | root_path_name=./dataset/pems/ 30 | data_path_name=PEMS08Flow.csv 31 | model_id_name=PEMS08 32 | data_name=custom 33 | 34 | random_seed=1 35 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 36 | echo $log_name 37 | 38 | 39 | 40 | for pred_len in 96 192 336 720 41 | do 42 | echo pred_len $pred_len 43 | echo "" >>$log_name 44 | echo pred_len $pred_len >>$log_name 45 | echo "" >>$log_name # 46 | python -u run_longExp.py \ 47 | --use_huber_loss \ 48 | --huber_delta $huber_delta \ 49 | --use_multi_gpu \ 50 | --devices '2' \ 51 | --is_training 1 \ 52 | --decomposition 0 \ 53 | --root_path $root_path_name \ 54 | --data_path $data_path_name \ 55 | --model_id $model_id_name \ 56 | --model $model_name \ 57 | --data $data_name \ 58 | --features M \ 59 | --seq_len $seq_len \ 60 | --pred_len $pred_len \ 61 | --enc_in 170 \ 62 | --n_freq $n_freq \ 63 | --n_concat_td $n_concat_td \ 64 | --d_compress_max $d_compress_max\ 65 | --mod_scal_tfi $mod_scal_tfi \ 66 | --stride $stride \ 67 | --d_model $d_model \ 68 | --e_layers $e_layers \ 69 | --e_layers_tfi $e_layers_tfi \ 70 | --d_ff $d_ff \ 71 | --n_heads $n_heads \ 72 | --dropout $dropout\ 73 | --fc_dropout $fc_dropout\ 74 | --head_dropout $head_dropout \ 75 | --random_seed $random_seed \ 76 | --patch_len $patch_len\ 77 | --des 'Exp' \ 78 | --train_epochs $train_epochs\ 79 | --min_epochs 30\ 80 | --label_len 1 \ 81 | --num_workers 8 \ 82 | --patience 5\ 83 | --lradj 'TST'\ 84 | --pct_start 0.2 \ 85 | --itr 1 --batch_size 16 --learning_rate 0.0002 >>$log_name 86 | echo " " >>$log_name 87 | done 88 | 89 | -------------------------------------------------------------------------------- /scripts/traffic_m_gpu.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | seq_len=512 9 | model_name=JTFT 10 | e_layers=3 11 | e_layers_tfi=0 12 | mod_scal_tfi=0.5 13 | n_heads=16 14 | d_model=128 15 | d_ff=256 16 | dropout=0.2 17 | fc_dropout=0.2 18 | head_dropout=$dropout 19 | stride=8 20 | n_freq=16 21 | n_concat_td=32 #number of TD patches to concat 22 | stride=8 23 | patch_len=16 24 | d_compress_max=1 25 | lr=0.0005 26 | huber_delta=1.0 27 | random_seed=1 28 | 29 | root_path_name=./dataset/traffic 30 | data_path_name=traffic.csv 31 | model_id_name=traffic 32 | data_name=custom 33 | 34 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 35 | echo $log_name 36 | 37 | for pred_len in 96 192 336 720 38 | do 39 | echo pred_len $pred_len 40 | echo "" >>$log_name 41 | echo pred_len $pred_len >>$log_name 42 | echo "" >>$log_name 43 | 44 | python -u run_longExp.py \ 45 | --use_huber_loss \ 46 | --huber_delta $huber_delta \ 47 | --use_multi_gpu \ 48 | --devices '0, 1' \ 49 | --e_layers_tfi $e_layers_tfi\ 50 | --is_training 1 \ 51 | --decomposition 0 \ 52 | --root_path $root_path_name \ 53 | --data_path $data_path_name \ 54 | --model_id $model_id_name \ 55 | --model $model_name \ 56 | --data $data_name \ 57 | --features M \ 58 | --seq_len $seq_len \ 59 | --pred_len $pred_len \ 60 | --enc_in 862 \ 61 | --n_freq $n_freq \ 62 | --n_concat_td $n_concat_td \ 63 | --d_compress_max $d_compress_max\ 64 | --mod_scal_tfi $mod_scal_tfi \ 65 | --stride $stride \ 66 | --d_model $d_model \ 67 | --e_layers $e_layers \ 68 | --d_ff $d_ff \ 69 | --n_heads $n_heads \ 70 | --dropout $dropout\ 71 | --fc_dropout $fc_dropout\ 72 | --head_dropout $head_dropout \ 73 | --random_seed $random_seed \ 74 | --patch_len $patch_len\ 75 | --des 'Exp' \ 76 | --train_epochs 100\ 77 | --min_epochs 10\ 78 | --num_workers 8 \ 79 | --patience 30\ 80 | --lradj 'TST'\ 81 | --pct_start 0.2\ 82 | --itr 1 --batch_size 16 --learning_rate $lr >>$log_name 83 | echo " ">>$log_name 84 | done 85 | 86 | -------------------------------------------------------------------------------- /scripts/weather.sh: -------------------------------------------------------------------------------- 1 | # This file is licensed under the GNU General Public License (GPL) version 2.0. 2 | # See the LICENSE file or https://www.gnu.org/licenses/gpl-2.0.html for more details. 3 | 4 | if [ ! -d "./logs" ]; then 5 | mkdir ./logs 6 | fi 7 | 8 | seq_len=512 9 | model_name=JTFT 10 | e_layers=3 11 | e_layers_tfi=1 12 | n_heads=16 13 | d_model=128 14 | d_ff=256 15 | dropout=0.2 16 | fc_dropout=0.2 17 | head_dropout=0.0 18 | patch_len=16 19 | stride=8 20 | n_freq=16 21 | n_concat_td=16 #number of TD patches to concat 22 | d_compress_max=8 23 | mod_scal_tfi=0.5 24 | lr=0.0001 25 | huber_delta=0.5 26 | 27 | gpu_id=0 28 | 29 | 30 | root_path_name=./dataset/weather/ 31 | data_path_name=weather.csv 32 | model_id_name=weather 33 | data_name=custom 34 | 35 | 36 | random_seed=1 37 | log_name=./logs/$model_name'_'$model_id_name'_ic'$seq_len'_el'$e_layers'_p'$patch_len'_s'$stride'_d'$d_model'_freq'$n_freq'_cat_td'$n_concat_td.log 38 | echo $log_name 39 | 40 | 41 | for pred_len in 96 192 336 720 42 | do 43 | echo pred_len $pred_len 44 | echo "" >>$log_name 45 | echo pred_len $pred_len >>$log_name 46 | echo "" >>$log_name 47 | python -u run_longExp.py \ 48 | --use_huber_loss \ 49 | --huber_delta $huber_delta \ 50 | --is_training 1 \ 51 | --decomposition 0 \ 52 | --root_path $root_path_name \ 53 | --data_path $data_path_name \ 54 | --model_id $model_id_name \ 55 | --model $model_name \ 56 | --data $data_name \ 57 | --features M \ 58 | --seq_len $seq_len \ 59 | --pred_len $pred_len \ 60 | --gpu $gpu_id \ 61 | --enc_in 21 \ 62 | --d_compress_max $d_compress_max \ 63 | --mod_scal_tfi $mod_scal_tfi \ 64 | --n_freq $n_freq \ 65 | --n_concat_td $n_concat_td \ 66 | --stride $stride \ 67 | --d_model $d_model \ 68 | --e_layers $e_layers \ 69 | --e_layers_tfi $e_layers_tfi \ 70 | --d_ff $d_ff \ 71 | --n_heads $n_heads \ 72 | --dropout $dropout\ 73 | --fc_dropout $fc_dropout\ 74 | --head_dropout $head_dropout \ 75 | --random_seed $random_seed \ 76 | --patch_len $patch_len\ 77 | --des 'Exp' \ 78 | --train_epochs 100\ 79 | --patience 10\ 80 | --num_workers 4 \ 81 | --b_not_compile \ 82 | --itr 1 --batch_size 128 --learning_rate $lr >>$log_name 83 | echo " " >>$log_name 84 | done 85 | -------------------------------------------------------------------------------- /utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class TriangularCausalMask(): 5 | def __init__(self, B, L, device="cpu"): 6 | mask_shape = [B, 1, L, L] 7 | with torch.no_grad(): 8 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 9 | 10 | @property 11 | def mask(self): 12 | return self._mask 13 | 14 | 15 | class ProbMask(): 16 | def __init__(self, B, H, L, index, scores, device="cpu"): 17 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 18 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 19 | indicator = _mask_ex[torch.arange(B)[:, None, None], 20 | torch.arange(H)[None, :, None], 21 | index, :].to(device) 22 | self._mask = indicator.view(scores.shape).to(device) 23 | 24 | @property 25 | def mask(self): 26 | return self._mask 27 | -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def RSE(pred, true): 5 | return np.sqrt(np.sum((true - pred) ** 2)) / np.sqrt(np.sum((true - true.mean()) ** 2)) 6 | 7 | 8 | def CORR(pred, true): 9 | u = ((true - true.mean(0)) * (pred - pred.mean(0))).sum(0) 10 | d = np.sqrt(((true - true.mean(0)) ** 2 * (pred - pred.mean(0)) ** 2).sum(0)) 11 | d += 1e-12 12 | return 0.01*(u / d).mean(-1) 13 | 14 | 15 | def MAE(pred, true): 16 | return np.mean(np.abs(pred - true)) 17 | 18 | 19 | def MSE(pred, true): 20 | return np.mean((pred - true) ** 2) 21 | 22 | 23 | def RMSE(pred, true): 24 | return np.sqrt(MSE(pred, true)) 25 | 26 | 27 | def MAPE(pred, true): 28 | return np.mean(np.abs((pred - true) / true)) 29 | 30 | 31 | def MSPE(pred, true): 32 | return np.mean(np.square((pred - true) / true)) 33 | 34 | 35 | def metric(pred, true): 36 | mae = MAE(pred, true) 37 | mse = MSE(pred, true) 38 | rmse = RMSE(pred, true) 39 | mape = MAPE(pred, true) 40 | mspe = MSPE(pred, true) 41 | rse = RSE(pred, true) 42 | corr = CORR(pred, true) 43 | 44 | return mae, mse, rmse, mape, mspe, rse, corr 45 | -------------------------------------------------------------------------------- /utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | 9 | class TimeFeature: 10 | def __init__(self): 11 | pass 12 | 13 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 14 | pass 15 | 16 | def __repr__(self): 17 | return self.__class__.__name__ + "()" 18 | 19 | 20 | class SecondOfMinute(TimeFeature): 21 | """Minute of hour encoded as value between [-0.5, 0.5]""" 22 | 23 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 24 | return index.second / 59.0 - 0.5 25 | 26 | 27 | class MinuteOfHour(TimeFeature): 28 | """Minute of hour encoded as value between [-0.5, 0.5]""" 29 | 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.minute / 59.0 - 0.5 32 | 33 | 34 | class HourOfDay(TimeFeature): 35 | """Hour of day encoded as value between [-0.5, 0.5]""" 36 | 37 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 38 | return index.hour / 23.0 - 0.5 39 | 40 | 41 | class DayOfWeek(TimeFeature): 42 | """Hour of day encoded as value between [-0.5, 0.5]""" 43 | 44 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 45 | return index.dayofweek / 6.0 - 0.5 46 | 47 | 48 | class DayOfMonth(TimeFeature): 49 | """Day of month encoded as value between [-0.5, 0.5]""" 50 | 51 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 52 | return (index.day - 1) / 30.0 - 0.5 53 | 54 | 55 | class DayOfYear(TimeFeature): 56 | """Day of year encoded as value between [-0.5, 0.5]""" 57 | 58 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 59 | return (index.dayofyear - 1) / 365.0 - 0.5 60 | 61 | 62 | class MonthOfYear(TimeFeature): 63 | """Month of year encoded as value between [-0.5, 0.5]""" 64 | 65 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 66 | return (index.month - 1) / 11.0 - 0.5 67 | 68 | 69 | class WeekOfYear(TimeFeature): 70 | """Week of year encoded as value between [-0.5, 0.5]""" 71 | 72 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 73 | return (index.isocalendar().week - 1) / 52.0 - 0.5 74 | 75 | 76 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 77 | """ 78 | Returns a list of time features that will be appropriate for the given frequency string. 79 | Parameters 80 | ---------- 81 | freq_str 82 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 83 | """ 84 | 85 | features_by_offsets = { 86 | offsets.YearEnd: [], 87 | offsets.QuarterEnd: [MonthOfYear], 88 | offsets.MonthEnd: [MonthOfYear], 89 | offsets.Week: [DayOfMonth, WeekOfYear], 90 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 91 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 92 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 93 | offsets.Minute: [ 94 | MinuteOfHour, 95 | HourOfDay, 96 | DayOfWeek, 97 | DayOfMonth, 98 | DayOfYear, 99 | ], 100 | offsets.Second: [ 101 | SecondOfMinute, 102 | MinuteOfHour, 103 | HourOfDay, 104 | DayOfWeek, 105 | DayOfMonth, 106 | DayOfYear, 107 | ], 108 | } 109 | 110 | offset = to_offset(freq_str) 111 | 112 | for offset_type, feature_classes in features_by_offsets.items(): 113 | if isinstance(offset, offset_type): 114 | return [cls() for cls in feature_classes] 115 | 116 | supported_freq_msg = f""" 117 | Unsupported frequency {freq_str} 118 | The following frequencies are supported: 119 | Y - yearly 120 | alias: A 121 | M - monthly 122 | W - weekly 123 | D - daily 124 | B - business days 125 | H - hourly 126 | T - minutely 127 | alias: min 128 | S - secondly 129 | """ 130 | raise RuntimeError(supported_freq_msg) 131 | 132 | 133 | def time_features(dates, freq='h'): 134 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]) 135 | -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import time 5 | 6 | plt.switch_backend('agg') 7 | 8 | 9 | def adjust_learning_rate(optimizer, scheduler, epoch, args, printout=True): 10 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 11 | if args.lradj == 'type1': 12 | lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch - 1) // 1))} 13 | elif args.lradj == 'type2': 14 | lr_adjust = { 15 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 16 | 10: 5e-7, 15: 1e-7, 20: 5e-8 17 | } 18 | elif args.lradj == 'type3': 19 | lr_adjust = {epoch: args.learning_rate if epoch < 3 else args.learning_rate * (0.9 ** ((epoch - 3) // 1))} 20 | elif args.lradj == 'constant': 21 | lr_adjust = {epoch: args.learning_rate} 22 | elif args.lradj == '3': 23 | lr_adjust = {epoch: args.learning_rate if epoch < 10 else args.learning_rate*0.1} 24 | elif args.lradj == '4': 25 | lr_adjust = {epoch: args.learning_rate if epoch < 15 else args.learning_rate*0.1} 26 | elif args.lradj == '5': 27 | lr_adjust = {epoch: args.learning_rate if epoch < 25 else args.learning_rate*0.1} 28 | elif args.lradj == '6': 29 | lr_adjust = {epoch: args.learning_rate if epoch < 5 else args.learning_rate*0.1} 30 | elif args.lradj == 'TST': 31 | lr_adjust = {epoch: scheduler.get_last_lr()[0]} 32 | 33 | if epoch in lr_adjust.keys(): 34 | lr = lr_adjust[epoch] 35 | for param_group in optimizer.param_groups: 36 | param_group['lr'] = lr 37 | if printout: print('Updating learning rate to {}'.format(lr)) 38 | 39 | 40 | class EarlyStopping: 41 | def __init__(self, patience=7, verbose=False, delta=0): 42 | self.patience = patience 43 | self.verbose = verbose 44 | self.counter = 0 45 | self.best_score = None 46 | self.early_stop = False 47 | self.val_loss_min = np.Inf 48 | self.delta = delta 49 | 50 | def __call__(self, val_loss, model, path): 51 | score = -val_loss 52 | if self.best_score is None: 53 | self.best_score = score 54 | self.save_checkpoint(val_loss, model, path) 55 | elif score < self.best_score + self.delta: 56 | self.counter += 1 57 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 58 | if self.counter >= self.patience: 59 | self.early_stop = True 60 | else: 61 | self.best_score = score 62 | self.save_checkpoint(val_loss, model, path) 63 | self.counter = 0 64 | 65 | def save_checkpoint(self, val_loss, model, path): 66 | if self.verbose: 67 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 68 | torch.save(model.state_dict(), path + '/' + 'checkpoint.pth') 69 | self.val_loss_min = val_loss 70 | 71 | 72 | class dotdict(dict): 73 | """dot.notation access to dictionary attributes""" 74 | __getattr__ = dict.get 75 | __setattr__ = dict.__setitem__ 76 | __delattr__ = dict.__delitem__ 77 | 78 | 79 | class StandardScaler(): 80 | def __init__(self, mean, std): 81 | self.mean = mean 82 | self.std = std 83 | 84 | def transform(self, data): 85 | return (data - self.mean) / self.std 86 | 87 | def inverse_transform(self, data): 88 | return (data * self.std) + self.mean 89 | 90 | 91 | def visual(true, preds=None, name='./pic/test.pdf'): 92 | """ 93 | Results visualization 94 | """ 95 | plt.figure() 96 | plt.plot(true, label='GroundTruth', linewidth=2) 97 | if preds is not None: 98 | plt.plot(preds, label='Prediction', linewidth=2) 99 | plt.legend() 100 | plt.savefig(name, bbox_inches='tight') 101 | 102 | def test_params_flop(model,x_shape): 103 | """ 104 | If you want to thest former's flop, you need to give default value to inputs in model.forward(), the following code can only pass one argument to forward() 105 | """ 106 | model_params = 0 107 | for parameter in model.parameters(): 108 | model_params += parameter.numel() 109 | print('INFO: Trainable parameter count: {:.2f}M'.format(model_params / 1000000.0)) 110 | from ptflops import get_model_complexity_info 111 | with torch.cuda.device(0): 112 | macs, params = get_model_complexity_info(model.cuda(), x_shape, as_strings=True, print_per_layer_stat=True) 113 | # print('Flops:' + flops) 114 | # print('Params:' + params) 115 | print('{:<30} {:<8}'.format('Computational complexity: ', macs)) 116 | print('{:<30} {:<8}'.format('Number of parameters: ', params)) --------------------------------------------------------------------------------