├── models
    ├── __init__.py
    ├── __pycache__
    │   ├── attn.cpython-38.pyc
    │   ├── embed.cpython-38.pyc
    │   ├── model.cpython-38.pyc
    │   ├── decoder.cpython-38.pyc
    │   ├── encoder.cpython-38.pyc
    │   └── __init__.cpython-38.pyc
    ├── decoder.py
    ├── encoder.py
    ├── embed.py
    ├── attn.py
    └── model.py
├── utils
    ├── __init__.py
    ├── __pycache__
    │   ├── tools.cpython-37.pyc
    │   ├── __init__.cpython-37.pyc
    │   ├── __init__.cpython-38.pyc
    │   ├── masking.cpython-37.pyc
    │   ├── masking.cpython-38.pyc
    │   ├── metrics.cpython-37.pyc
    │   └── timefeatures.cpython-37.pyc
    ├── masking.py
    ├── metrics.py
    ├── tools.py
    └── timefeatures.py
├── 서인천IC-부평IC 평균속도.csv
├── README.assets
    └── image-20210804131827418.png
└── README.md


/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/서인천IC-부평IC 평균속도.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/서인천IC-부평IC 평균속도.csv


--------------------------------------------------------------------------------
/models/__pycache__/attn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/attn.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/embed.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/embed.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/model.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/tools.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/tools.cpython-37.pyc


--------------------------------------------------------------------------------
/README.assets/image-20210804131827418.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/README.assets/image-20210804131827418.png


--------------------------------------------------------------------------------
/models/__pycache__/decoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/decoder.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/encoder.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/__init__.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/masking.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/masking.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/masking.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/masking.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/__init__.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/timefeatures.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/timefeatures.cpython-37.pyc


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TimeSeriesForecast-Informer
 2 | 
 3 | Code Implementation of  
 4 | **Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting (AAAI'21 Best Paper)**
 5 | &nbsp;
 6 | 
 7 | explain
 8 | 
 9 | <https://doheon.github.io/%EC%BD%94%EB%93%9C%EA%B5%AC%ED%98%84/time-series/ci-5.informer-post/>
10 | 
11 | 
12 | 
13 | **reference**
14 | 
15 | code: <https://github.com/zhouhaoyi/Informer2020>
16 | 
17 | paper: <https://arxiv.org/pdf/2012.07436v3.pdf>
18 | 
19 | &nbsp;
20 | 
21 | 
22 | 
23 | 
24 | 
25 | # Result
26 | 
27 | ![image-20210804131827418](README.assets/image-20210804131827418.png)
28 | 
29 | 


--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | class TriangularCausalMask():
 4 |     def __init__(self, B, L, device="cpu"):
 5 |         mask_shape = [B, 1, L, L]
 6 |         with torch.no_grad():
 7 |             self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
 8 | 
 9 |     @property
10 |     def mask(self):
11 |         return self._mask
12 | 
13 | class ProbMask():
14 |     def __init__(self, B, H, L, index, scores, device="cpu"):
15 |         _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
16 |         _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
17 |         indicator = _mask_ex[torch.arange(B)[:, None, None],
18 |                              torch.arange(H)[None, :, None],
19 |                              index, :].to(device)
20 |         self._mask = indicator.view(scores.shape).to(device)
21 |     
22 |     @property
23 |     def mask(self):
24 |         return self._mask


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def RSE(pred, true):
 4 |     return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2))
 5 | 
 6 | def CORR(pred, true):
 7 |     u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0) 
 8 |     d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0))
 9 |     return (u/d).mean(-1)
10 | 
11 | def MAE(pred, true):
12 |     return np.mean(np.abs(pred-true))
13 | 
14 | def MSE(pred, true):
15 |     return np.mean((pred-true)**2)
16 | 
17 | def RMSE(pred, true):
18 |     return np.sqrt(MSE(pred, true))
19 | 
20 | def MAPE(pred, true):
21 |     return np.mean(np.abs((pred - true) / true))
22 | 
23 | def MSPE(pred, true):
24 |     return np.mean(np.square((pred - true) / true))
25 | 
26 | def metric(pred, true):
27 |     mae = MAE(pred, true)
28 |     mse = MSE(pred, true)
29 |     rmse = RMSE(pred, true)
30 |     mape = MAPE(pred, true)
31 |     mspe = MSPE(pred, true)
32 |     
33 |     return mae,mse,rmse,mape,mspe


--------------------------------------------------------------------------------
/models/decoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class DecoderLayer(nn.Module):
 6 |     def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
 7 |                  dropout=0.1, activation="relu"):
 8 |         super(DecoderLayer, self).__init__()
 9 |         d_ff = d_ff or 4*d_model
10 |         self.self_attention = self_attention
11 |         self.cross_attention = cross_attention
12 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
13 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
14 |         self.norm1 = nn.LayerNorm(d_model)
15 |         self.norm2 = nn.LayerNorm(d_model)
16 |         self.norm3 = nn.LayerNorm(d_model)
17 |         self.dropout = nn.Dropout(dropout)
18 |         self.activation = F.relu if activation == "relu" else F.gelu
19 | 
20 |     def forward(self, x, cross, x_mask=None, cross_mask=None):
21 |         x = x + self.dropout(self.self_attention(
22 |             x, x, x,
23 |             attn_mask=x_mask
24 |         )[0])
25 |         x = self.norm1(x)
26 | 
27 |         x = x + self.dropout(self.cross_attention(
28 |             x, cross, cross,
29 |             attn_mask=cross_mask
30 |         )[0])
31 | 
32 |         y = x = self.norm2(x)
33 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
34 |         y = self.dropout(self.conv2(y).transpose(-1,1))
35 | 
36 |         return self.norm3(x+y)
37 | 
38 | class Decoder(nn.Module):
39 |     def __init__(self, layers, norm_layer=None):
40 |         super(Decoder, self).__init__()
41 |         self.layers = nn.ModuleList(layers)
42 |         self.norm = norm_layer
43 | 
44 |     def forward(self, x, cross, x_mask=None, cross_mask=None):
45 |         for layer in self.layers:
46 |             x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
47 | 
48 |         if self.norm is not None:
49 |             x = self.norm(x)
50 | 
51 |         return x


--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | 
 4 | def adjust_learning_rate(optimizer, epoch, learning_rate, args = "type1"):
 5 |     # lr = args.learning_rate * (0.2 ** (epoch // 2))
 6 | 
 7 |     # if args.lradj=='type1':
 8 |     #     lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch-1) // 1))}
 9 |     # elif args.lradj=='type2':
10 |     #     lr_adjust = {
11 |     #         2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 
12 |     #         10: 5e-7, 15: 1e-7, 20: 5e-8
13 |     #     }
14 | 
15 |     if args=='type1':
16 |         lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch-1) // 1))}
17 |     elif args=='type2':
18 |         lr_adjust = {
19 |             2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 
20 |             10: 5e-7, 15: 1e-7, 20: 5e-8
21 |         }
22 |     if epoch in lr_adjust.keys():
23 |         lr = lr_adjust[epoch]
24 |         for param_group in optimizer.param_groups:
25 |             param_group['lr'] = lr
26 |         print('Updating learning rate to {}'.format(lr))
27 | 
28 | class EarlyStopping:
29 |     def __init__(self, patience=7, verbose=False, delta=0):
30 |         self.patience = patience
31 |         self.verbose = verbose
32 |         self.counter = 0
33 |         self.best_score = None
34 |         self.early_stop = False
35 |         self.val_loss_min = np.Inf
36 |         self.delta = delta
37 | 
38 |     def __call__(self, val_loss, model, path):
39 |         score = -val_loss
40 |         if self.best_score is None:
41 |             self.best_score = score
42 |             self.save_checkpoint(val_loss, model, path)
43 |         elif score < self.best_score + self.delta:
44 |             self.counter += 1
45 |             print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
46 |             if self.counter >= self.patience:
47 |                 self.early_stop = True
48 |         else:
49 |             self.best_score = score
50 |             self.save_checkpoint(val_loss, model, path)
51 |             self.counter = 0
52 | 
53 |     def save_checkpoint(self, val_loss, model, path):
54 |         if self.verbose:
55 |             print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}).  Saving model ...')
56 |         torch.save(model.state_dict(), path+'/'+'checkpoint.pth')
57 |         self.val_loss_min = val_loss
58 | 
59 | class dotdict(dict):
60 |     """dot.notation access to dictionary attributes"""
61 |     __getattr__ = dict.get
62 |     __setattr__ = dict.__setitem__
63 |     __delattr__ = dict.__delitem__
64 | 
65 | class StandardScaler():
66 |     def __init__(self):
67 |         self.mean = 0.
68 |         self.std = 1.
69 |     
70 |     def fit(self, data):
71 |         self.mean = data.mean(0)
72 |         self.std = data.std(0)
73 | 
74 |     def transform(self, data):
75 |         mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
76 |         std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
77 |         return (data - mean) / std
78 | 
79 |     def inverse_transform(self, data):
80 |         mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
81 |         std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
82 |         return (data * std) + mean


--------------------------------------------------------------------------------
/models/encoder.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class ConvLayer(nn.Module):
 6 |     def __init__(self, c_in):
 7 |         super(ConvLayer, self).__init__()
 8 |         padding = 1 if torch.__version__>='1.5.0' else 2
 9 |         self.downConv = nn.Conv1d(in_channels=c_in,
10 |                                   out_channels=c_in,
11 |                                   kernel_size=3,
12 |                                   padding=padding,
13 |                                   padding_mode='circular')
14 |         self.norm = nn.BatchNorm1d(c_in)
15 |         self.activation = nn.ELU()
16 |         self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
17 | 
18 |     def forward(self, x):
19 |         x = self.downConv(x.permute(0, 2, 1))
20 |         x = self.norm(x)
21 |         x = self.activation(x)
22 |         x = self.maxPool(x)
23 |         x = x.transpose(1,2)
24 |         return x
25 | 
26 | class EncoderLayer(nn.Module):
27 |     def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
28 |         super(EncoderLayer, self).__init__()
29 |         d_ff = d_ff or 4*d_model
30 |         self.attention = attention
31 |         self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
32 |         self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
33 |         self.norm1 = nn.LayerNorm(d_model)
34 |         self.norm2 = nn.LayerNorm(d_model)
35 |         self.dropout = nn.Dropout(dropout)
36 |         self.activation = F.relu if activation == "relu" else F.gelu
37 | 
38 |     def forward(self, x, attn_mask=None):
39 |         # x [B, L, D]
40 |         # x = x + self.dropout(self.attention(
41 |         #     x, x, x,
42 |         #     attn_mask = attn_mask
43 |         # ))
44 |         new_x, attn = self.attention(
45 |             x, x, x,
46 |             attn_mask = attn_mask
47 |         )
48 |         x = x + self.dropout(new_x)
49 | 
50 |         y = x = self.norm1(x)
51 |         y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
52 |         y = self.dropout(self.conv2(y).transpose(-1,1))
53 | 
54 |         return self.norm2(x+y), attn
55 | 
56 | class Encoder(nn.Module):
57 |     def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
58 |         super(Encoder, self).__init__()
59 |         self.attn_layers = nn.ModuleList(attn_layers)
60 |         self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
61 |         self.norm = norm_layer
62 | 
63 |     def forward(self, x, attn_mask=None):
64 |         # x [B, L, D]
65 |         attns = []
66 |         if self.conv_layers is not None:
67 |             for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
68 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
69 |                 x = conv_layer(x)
70 |                 attns.append(attn)
71 |             x, attn = self.attn_layers[-1](x, attn_mask=attn_mask)
72 |             attns.append(attn)
73 |         else:
74 |             for attn_layer in self.attn_layers:
75 |                 x, attn = attn_layer(x, attn_mask=attn_mask)
76 |                 attns.append(attn)
77 | 
78 |         if self.norm is not None:
79 |             x = self.norm(x)
80 | 
81 |         return x, attns
82 | 
83 | class EncoderStack(nn.Module):
84 |     def __init__(self, encoders, inp_lens):
85 |         super(EncoderStack, self).__init__()
86 |         self.encoders = nn.ModuleList(encoders)
87 |         self.inp_lens = inp_lens
88 | 
89 |     def forward(self, x, attn_mask=None):
90 |         # x [B, L, D]
91 |         x_stack = []; attns = []
92 |         for i_len, encoder in zip(self.inp_lens, self.encoders):
93 |             inp_len = x.shape[1]//(2**i_len)
94 |             x_s, attn = encoder(x[:, -inp_len:, :])
95 |             x_stack.append(x_s); attns.append(attn)
96 |         x_stack = torch.cat(x_stack, -2)
97 |         
98 |         return x_stack, attns
99 | 


--------------------------------------------------------------------------------
/models/embed.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | import math
  6 | 
  7 | class PositionalEmbedding(nn.Module):
  8 |     def __init__(self, d_model, max_len=5000):
  9 |         super(PositionalEmbedding, self).__init__()
 10 |         # Compute the positional encodings once in log space.
 11 |         pe = torch.zeros(max_len, d_model).float()
 12 |         pe.require_grad = False
 13 | 
 14 |         position = torch.arange(0, max_len).float().unsqueeze(1)
 15 |         div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
 16 | 
 17 |         pe[:, 0::2] = torch.sin(position * div_term)
 18 |         pe[:, 1::2] = torch.cos(position * div_term)
 19 | 
 20 |         pe = pe.unsqueeze(0)
 21 |         self.register_buffer('pe', pe)
 22 | 
 23 |     def forward(self, x):
 24 |         return self.pe[:, :x.size(1)]
 25 | 
 26 | class TokenEmbedding(nn.Module):
 27 |     def __init__(self, c_in, d_model):
 28 |         super(TokenEmbedding, self).__init__()
 29 |         padding = 1 if torch.__version__>='1.5.0' else 2
 30 |         self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 
 31 |                                     kernel_size=3, padding=padding, padding_mode='circular')
 32 |         for m in self.modules():
 33 |             if isinstance(m, nn.Conv1d):
 34 |                 nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu')
 35 | 
 36 |     def forward(self, x):
 37 |         x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
 38 |         return x
 39 | 
 40 | class FixedEmbedding(nn.Module):
 41 |     def __init__(self, c_in, d_model):
 42 |         super(FixedEmbedding, self).__init__()
 43 | 
 44 |         w = torch.zeros(c_in, d_model).float()
 45 |         w.require_grad = False
 46 | 
 47 |         position = torch.arange(0, c_in).float().unsqueeze(1)
 48 |         div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
 49 | 
 50 |         w[:, 0::2] = torch.sin(position * div_term)
 51 |         w[:, 1::2] = torch.cos(position * div_term)
 52 | 
 53 |         self.emb = nn.Embedding(c_in, d_model)
 54 |         self.emb.weight = nn.Parameter(w, requires_grad=False)
 55 | 
 56 |     def forward(self, x):
 57 |         return self.emb(x).detach()
 58 | 
 59 | class TemporalEmbedding(nn.Module):
 60 |     def __init__(self, d_model, embed_type='fixed', freq='h'):
 61 |         super(TemporalEmbedding, self).__init__()
 62 | 
 63 |         minute_size = 4; hour_size = 24
 64 |         weekday_size = 7; day_size = 32; month_size = 13
 65 | 
 66 |         Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
 67 |         if freq=='t':
 68 |             self.minute_embed = Embed(minute_size, d_model)
 69 |         self.hour_embed = Embed(hour_size, d_model)
 70 |         self.weekday_embed = Embed(weekday_size, d_model)
 71 |         self.day_embed = Embed(day_size, d_model)
 72 |         self.month_embed = Embed(month_size, d_model)
 73 |     
 74 |     def forward(self, x):
 75 |         x = x.long()
 76 |         
 77 |         minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0.
 78 |         hour_x = self.hour_embed(x[:,:,3])
 79 |         weekday_x = self.weekday_embed(x[:,:,2])
 80 |         day_x = self.day_embed(x[:,:,1])
 81 |         month_x = self.month_embed(x[:,:,0])
 82 |         
 83 |         return hour_x + weekday_x + day_x + month_x + minute_x
 84 | 
 85 | class TimeFeatureEmbedding(nn.Module):
 86 |     def __init__(self, d_model, embed_type='timeF', freq='h'):
 87 |         super(TimeFeatureEmbedding, self).__init__()
 88 | 
 89 |         freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
 90 |         d_inp = freq_map[freq]
 91 |         self.embed = nn.Linear(d_inp, d_model)
 92 |     
 93 |     def forward(self, x):
 94 |         return self.embed(x)
 95 | 
 96 | class DataEmbedding(nn.Module):
 97 |     def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
 98 |         super(DataEmbedding, self).__init__()
 99 | 
100 |         self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
101 |         self.position_embedding = PositionalEmbedding(d_model=d_model)
102 |         self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
103 | 
104 |         self.dropout = nn.Dropout(p=dropout)
105 | 
106 |     def forward(self, x, x_mark):
107 |         x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark)
108 |         
109 |         return self.dropout(x)


--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | import numpy as np
  4 | import pandas as pd
  5 | from pandas.tseries import offsets
  6 | from pandas.tseries.frequencies import to_offset
  7 | 
  8 | class TimeFeature:
  9 |     def __init__(self):
 10 |         pass
 11 | 
 12 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 13 |         pass
 14 | 
 15 |     def __repr__(self):
 16 |         return self.__class__.__name__ + "()"
 17 | 
 18 | class SecondOfMinute(TimeFeature):
 19 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 20 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 21 |         return index.second / 59.0 - 0.5
 22 | 
 23 | class MinuteOfHour(TimeFeature):
 24 |     """Minute of hour encoded as value between [-0.5, 0.5]"""
 25 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 26 |         return index.minute / 59.0 - 0.5
 27 | 
 28 | class HourOfDay(TimeFeature):
 29 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 30 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 31 |         return index.hour / 23.0 - 0.5
 32 | 
 33 | class DayOfWeek(TimeFeature):
 34 |     """Hour of day encoded as value between [-0.5, 0.5]"""
 35 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 36 |         return index.dayofweek / 6.0 - 0.5
 37 | 
 38 | class DayOfMonth(TimeFeature):
 39 |     """Day of month encoded as value between [-0.5, 0.5]"""
 40 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 41 |         return (index.day - 1) / 30.0 - 0.5
 42 | 
 43 | class DayOfYear(TimeFeature):
 44 |     """Day of year encoded as value between [-0.5, 0.5]"""
 45 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 46 |         return (index.dayofyear - 1) / 365.0 - 0.5
 47 | 
 48 | class MonthOfYear(TimeFeature):
 49 |     """Month of year encoded as value between [-0.5, 0.5]"""
 50 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 51 |         return (index.month - 1) / 11.0 - 0.5
 52 | 
 53 | class WeekOfYear(TimeFeature):
 54 |     """Week of year encoded as value between [-0.5, 0.5]"""
 55 |     def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
 56 |         return (index.isocalendar().week - 1) / 52.0 - 0.5
 57 | 
 58 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
 59 |     """
 60 |     Returns a list of time features that will be appropriate for the given frequency string.
 61 |     Parameters
 62 |     ----------
 63 |     freq_str
 64 |         Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
 65 |     """
 66 | 
 67 |     features_by_offsets = {
 68 |         offsets.YearEnd: [],
 69 |         offsets.QuarterEnd: [MonthOfYear],
 70 |         offsets.MonthEnd: [MonthOfYear],
 71 |         offsets.Week: [DayOfMonth, WeekOfYear],
 72 |         offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
 73 |         offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
 74 |         offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
 75 |         offsets.Minute: [
 76 |             MinuteOfHour,
 77 |             HourOfDay,
 78 |             DayOfWeek,
 79 |             DayOfMonth,
 80 |             DayOfYear,
 81 |         ],
 82 |         offsets.Second: [
 83 |             SecondOfMinute,
 84 |             MinuteOfHour,
 85 |             HourOfDay,
 86 |             DayOfWeek,
 87 |             DayOfMonth,
 88 |             DayOfYear,
 89 |         ],
 90 |     }
 91 | 
 92 |     offset = to_offset(freq_str)
 93 | 
 94 |     for offset_type, feature_classes in features_by_offsets.items():
 95 |         if isinstance(offset, offset_type):
 96 |             return [cls() for cls in feature_classes]
 97 | 
 98 |     supported_freq_msg = f"""
 99 |     Unsupported frequency {freq_str}
100 |     The following frequencies are supported:
101 |         Y   - yearly
102 |             alias: A
103 |         M   - monthly
104 |         W   - weekly
105 |         D   - daily
106 |         B   - business days
107 |         H   - hourly
108 |         T   - minutely
109 |             alias: min
110 |         S   - secondly
111 |     """
112 |     raise RuntimeError(supported_freq_msg)
113 | 
114 | def time_features(dates, timeenc=1, freq='h'):
115 |     """
116 |     > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0: 
117 |     > * m - [month]
118 |     > * w - [month]
119 |     > * d - [month, day, weekday]
120 |     > * b - [month, day, weekday]
121 |     > * h - [month, day, weekday, hour]
122 |     > * t - [month, day, weekday, hour, *minute]
123 |     > 
124 |     > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]): 
125 |     > * Q - [month]
126 |     > * M - [month]
127 |     > * W - [Day of month, week of year]
128 |     > * D - [Day of week, day of month, day of year]
129 |     > * B - [Day of week, day of month, day of year]
130 |     > * H - [Hour of day, day of week, day of month, day of year]
131 |     > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
132 |     > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]
133 | 
134 |     *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
135 |     """
136 |     if timeenc==0:
137 |         dates['month'] = dates.date.apply(lambda row:row.month,1)
138 |         dates['day'] = dates.date.apply(lambda row:row.day,1)
139 |         dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
140 |         dates['hour'] = dates.date.apply(lambda row:row.hour,1)
141 |         dates['minute'] = dates.date.apply(lambda row:row.minute,1)
142 |         dates['minute'] = dates.minute.map(lambda x:x//15)
143 |         freq_map = {
144 |             'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
145 |             'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
146 |             't':['month','day','weekday','hour','minute'],
147 |         }
148 |         return dates[freq_map[freq.lower()]].values
149 |     if timeenc==1:
150 |         dates = pd.to_datetime(dates.date.values)
151 |         return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0)
152 | 


--------------------------------------------------------------------------------
/models/attn.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | import numpy as np
  6 | 
  7 | from math import sqrt
  8 | from utils.masking import TriangularCausalMask, ProbMask
  9 | 
 10 | class FullAttention(nn.Module):
 11 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 12 |         super(FullAttention, self).__init__()
 13 |         self.scale = scale
 14 |         self.mask_flag = mask_flag
 15 |         self.output_attention = output_attention
 16 |         self.dropout = nn.Dropout(attention_dropout)
 17 |         
 18 |     def forward(self, queries, keys, values, attn_mask):
 19 |         B, L, H, E = queries.shape
 20 |         _, S, _, D = values.shape
 21 |         scale = self.scale or 1./sqrt(E)
 22 | 
 23 |         scores = torch.einsum("blhe,bshe->bhls", queries, keys)
 24 |         if self.mask_flag:
 25 |             if attn_mask is None:
 26 |                 attn_mask = TriangularCausalMask(B, L, device=queries.device)
 27 | 
 28 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 29 | 
 30 |         A = self.dropout(torch.softmax(scale * scores, dim=-1))
 31 |         V = torch.einsum("bhls,bshd->blhd", A, values)
 32 |         
 33 |         if self.output_attention:
 34 |             return (V.contiguous(), A)
 35 |         else:
 36 |             return (V.contiguous(), None)
 37 | 
 38 | class ProbAttention(nn.Module):
 39 |     def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
 40 |         super(ProbAttention, self).__init__()
 41 |         self.factor = factor
 42 |         self.scale = scale
 43 |         self.mask_flag = mask_flag
 44 |         self.output_attention = output_attention
 45 |         self.dropout = nn.Dropout(attention_dropout)
 46 | 
 47 |     def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
 48 |         # Q [B, H, L, D]
 49 |         B, H, L_K, E = K.shape
 50 |         _, _, L_Q, _ = Q.shape
 51 | 
 52 |         # calculate the sampled Q_K
 53 |         K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
 54 |         index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
 55 |         K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
 56 |         Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
 57 | 
 58 |         # find the Top_k query with sparisty measurement
 59 |         M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
 60 |         M_top = M.topk(n_top, sorted=False)[1]
 61 | 
 62 |         # use the reduced Q to calculate Q_K
 63 |         Q_reduce = Q[torch.arange(B)[:, None, None],
 64 |                      torch.arange(H)[None, :, None],
 65 |                      M_top, :] # factor*ln(L_q)
 66 |         Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
 67 | 
 68 |         return Q_K, M_top
 69 | 
 70 |     def _get_initial_context(self, V, L_Q):
 71 |         B, H, L_V, D = V.shape
 72 |         if not self.mask_flag:
 73 |             # V_sum = V.sum(dim=-2)
 74 |             V_sum = V.mean(dim=-2)
 75 |             contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
 76 |         else: # use mask
 77 |             assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
 78 |             contex = V.cumsum(dim=-2)
 79 |         return contex
 80 | 
 81 |     def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
 82 |         B, H, L_V, D = V.shape
 83 | 
 84 |         if self.mask_flag:
 85 |             attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
 86 |             scores.masked_fill_(attn_mask.mask, -np.inf)
 87 | 
 88 |         attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
 89 | 
 90 |         context_in[torch.arange(B)[:, None, None],
 91 |                    torch.arange(H)[None, :, None],
 92 |                    index, :] = torch.matmul(attn, V).type_as(context_in)
 93 |         if self.output_attention:
 94 |             attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device)
 95 |             attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
 96 |             return (context_in, attns)
 97 |         else:
 98 |             return (context_in, None)
 99 | 
100 |     def forward(self, queries, keys, values, attn_mask):
101 |         B, L_Q, H, D = queries.shape
102 |         _, L_K, _, _ = keys.shape
103 | 
104 |         queries = queries.transpose(2,1)
105 |         keys = keys.transpose(2,1)
106 |         values = values.transpose(2,1)
107 | 
108 |         U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
109 |         u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 
110 | 
111 |         U_part = U_part if U_part<L_K else L_K
112 |         u = u if u<L_Q else L_Q
113 |         
114 |         scores_top, index = self._prob_QK(queries, keys, sample_k=U_part, n_top=u) 
115 | 
116 |         # add scale factor
117 |         scale = self.scale or 1./sqrt(D)
118 |         if scale is not None:
119 |             scores_top = scores_top * scale
120 |         # get the context
121 |         context = self._get_initial_context(values, L_Q)
122 |         # update the context with selected top_k queries
123 |         context, attn = self._update_context(context, values, scores_top, index, L_Q, attn_mask)
124 |         
125 |         return context.transpose(2,1).contiguous(), attn
126 | 
127 | 
128 | class AttentionLayer(nn.Module):
129 |     def __init__(self, attention, d_model, n_heads, 
130 |                  d_keys=None, d_values=None, mix=False):
131 |         super(AttentionLayer, self).__init__()
132 | 
133 |         d_keys = d_keys or (d_model//n_heads)
134 |         d_values = d_values or (d_model//n_heads)
135 | 
136 |         self.inner_attention = attention
137 |         self.query_projection = nn.Linear(d_model, d_keys * n_heads)
138 |         self.key_projection = nn.Linear(d_model, d_keys * n_heads)
139 |         self.value_projection = nn.Linear(d_model, d_values * n_heads)
140 |         self.out_projection = nn.Linear(d_values * n_heads, d_model)
141 |         self.n_heads = n_heads
142 |         self.mix = mix
143 | 
144 |     def forward(self, queries, keys, values, attn_mask):
145 |         B, L, _ = queries.shape
146 |         _, S, _ = keys.shape
147 |         H = self.n_heads
148 | 
149 |         queries = self.query_projection(queries).view(B, L, H, -1)
150 |         keys = self.key_projection(keys).view(B, S, H, -1)
151 |         values = self.value_projection(values).view(B, S, H, -1)
152 | 
153 |         out, attn = self.inner_attention(
154 |             queries,
155 |             keys,
156 |             values,
157 |             attn_mask
158 |         )
159 |         if self.mix:
160 |             out = out.transpose(2,1).contiguous()
161 |         out = out.view(B, L, -1)
162 | 
163 |         return self.out_projection(out), attn
164 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | 
  5 | from utils.masking import TriangularCausalMask, ProbMask
  6 | from models.encoder import Encoder, EncoderLayer, ConvLayer, EncoderStack
  7 | from models.decoder import Decoder, DecoderLayer
  8 | from models.attn import FullAttention, ProbAttention, AttentionLayer
  9 | from models.embed import DataEmbedding
 10 | 
 11 | class Informer(nn.Module):
 12 |     def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len, 
 13 |                 factor=5, d_model=512, n_heads=8, e_layers=3, d_layers=2, d_ff=512, 
 14 |                 dropout=0.0, attn='prob', embed='fixed', freq='h', activation='gelu', 
 15 |                 output_attention = False, distil=True, mix=True,
 16 |                 device=torch.device('cuda:0')):
 17 |         super(Informer, self).__init__()
 18 |         self.device = device
 19 |         self.pred_len = out_len
 20 |         self.attn = attn
 21 |         self.output_attention = output_attention
 22 | 
 23 |         # Encoding
 24 |         self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, dropout)
 25 |         self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq, dropout)
 26 |         # Attention
 27 |         Attn = ProbAttention if attn=='prob' else FullAttention
 28 |         # Encoder
 29 |         self.encoder = Encoder(
 30 |             [
 31 |                 EncoderLayer(
 32 |                     AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), 
 33 |                                 d_model, n_heads, mix=False),
 34 |                     d_model,
 35 |                     d_ff,
 36 |                     dropout=dropout,
 37 |                     activation=activation
 38 |                 ) for l in range(e_layers)
 39 |             ],
 40 |             [
 41 |                 ConvLayer(
 42 |                     d_model
 43 |                 ) for l in range(e_layers-1)
 44 |             ] if distil else None,
 45 |             norm_layer=torch.nn.LayerNorm(d_model)
 46 |         )
 47 |         # Decoder
 48 |         self.decoder = Decoder(
 49 |             [
 50 |                 DecoderLayer(
 51 |                     AttentionLayer(Attn(True, factor, attention_dropout=dropout, output_attention=False), 
 52 |                                 d_model, n_heads, mix=mix),
 53 |                     AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), 
 54 |                                 d_model, n_heads, mix=False),
 55 |                     d_model,
 56 |                     d_ff,
 57 |                     dropout=dropout,
 58 |                     activation=activation,
 59 |                 )
 60 |                 for l in range(d_layers)
 61 |             ],
 62 |             norm_layer=torch.nn.LayerNorm(d_model)
 63 |         )
 64 |         # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
 65 |         # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
 66 |         self.projection = nn.Linear(d_model, c_out, bias=True)
 67 |         
 68 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 
 69 |                 enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
 70 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
 71 |         enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
 72 | 
 73 |         dec_out = self.dec_embedding(x_dec, x_mark_dec)
 74 |         dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
 75 |         dec_out = self.projection(dec_out)
 76 |         
 77 |         # dec_out = self.end_conv1(dec_out)
 78 |         # dec_out = self.end_conv2(dec_out.transpose(2,1)).transpose(1,2)
 79 |         if self.output_attention:
 80 |             return dec_out[:,-self.pred_len:,:], attns
 81 |         else:
 82 |             return dec_out[:,-self.pred_len:,:] # [B, L, D]
 83 | 
 84 | 
 85 | class InformerStack(nn.Module):
 86 |     def __init__(self, enc_in, dec_in, c_out, seq_len, label_len, out_len, 
 87 |                 factor=5, d_model=512, n_heads=8, e_layers=[3,2,1], d_layers=2, d_ff=512, 
 88 |                 dropout=0.0, attn='prob', embed='fixed', freq='h', activation='gelu',
 89 |                 output_attention = False, distil=True, mix=True,
 90 |                 device=torch.device('cuda:0')):
 91 |         super(InformerStack, self).__init__()
 92 |         self.pred_len = out_len
 93 |         self.attn = attn
 94 |         self.output_attention = output_attention
 95 | 
 96 |         # Encoding
 97 |         self.enc_embedding = DataEmbedding(enc_in, d_model, embed, freq, dropout)
 98 |         self.dec_embedding = DataEmbedding(dec_in, d_model, embed, freq, dropout)
 99 |         # Attention
100 |         Attn = ProbAttention if attn=='prob' else FullAttention
101 |         # Encoder
102 | 
103 |         inp_lens = list(range(len(e_layers))) # [0,1,2,...] you can customize here
104 |         encoders = [
105 |             Encoder(
106 |                 [
107 |                     EncoderLayer(
108 |                         AttentionLayer(Attn(False, factor, attention_dropout=dropout, output_attention=output_attention), 
109 |                                     d_model, n_heads, mix=False),
110 |                         d_model,
111 |                         d_ff,
112 |                         dropout=dropout,
113 |                         activation=activation
114 |                     ) for l in range(el)
115 |                 ],
116 |                 [
117 |                     ConvLayer(
118 |                         d_model
119 |                     ) for l in range(el-1)
120 |                 ] if distil else None,
121 |                 norm_layer=torch.nn.LayerNorm(d_model)
122 |             ) for el in e_layers]
123 |         self.encoder = EncoderStack(encoders, inp_lens)
124 |         # Decoder
125 |         self.decoder = Decoder(
126 |             [
127 |                 DecoderLayer(
128 |                     AttentionLayer(Attn(True, factor, attention_dropout=dropout, output_attention=False), 
129 |                                 d_model, n_heads, mix=mix),
130 |                     AttentionLayer(FullAttention(False, factor, attention_dropout=dropout, output_attention=False), 
131 |                                 d_model, n_heads, mix=False),
132 |                     d_model,
133 |                     d_ff,
134 |                     dropout=dropout,
135 |                     activation=activation,
136 |                 )
137 |                 for l in range(d_layers)
138 |             ],
139 |             norm_layer=torch.nn.LayerNorm(d_model)
140 |         )
141 |         # self.end_conv1 = nn.Conv1d(in_channels=label_len+out_len, out_channels=out_len, kernel_size=1, bias=True)
142 |         # self.end_conv2 = nn.Conv1d(in_channels=d_model, out_channels=c_out, kernel_size=1, bias=True)
143 |         self.projection = nn.Linear(d_model, c_out, bias=True)
144 |         
145 |     def forward(self, x_enc, x_mark_enc, x_dec, x_mark_dec, 
146 |                 enc_self_mask=None, dec_self_mask=None, dec_enc_mask=None):
147 |         enc_out = self.enc_embedding(x_enc, x_mark_enc)
148 |         enc_out, attns = self.encoder(enc_out, attn_mask=enc_self_mask)
149 | 
150 |         dec_out = self.dec_embedding(x_dec, x_mark_dec)
151 |         dec_out = self.decoder(dec_out, enc_out, x_mask=dec_self_mask, cross_mask=dec_enc_mask)
152 |         dec_out = self.projection(dec_out)
153 |         
154 |         # dec_out = self.end_conv1(dec_out)
155 |         # dec_out = self.end_conv2(dec_out.transpose(2,1)).transpose(1,2)
156 |         if self.output_attention:
157 |             return dec_out[:,-self.pred_len:,:], attns
158 |         else:
159 |             return dec_out[:,-self.pred_len:,:] # [B, L, D]
160 | 


--------------------------------------------------------------------------------