├── models
├── __init__.py
├── __pycache__
│ ├── attn.cpython-38.pyc
│ ├── embed.cpython-38.pyc
│ ├── model.cpython-38.pyc
│ ├── decoder.cpython-38.pyc
│ ├── encoder.cpython-38.pyc
│ └── __init__.cpython-38.pyc
├── decoder.py
├── encoder.py
├── embed.py
├── attn.py
└── model.py
├── utils
├── __init__.py
├── __pycache__
│ ├── tools.cpython-37.pyc
│ ├── __init__.cpython-37.pyc
│ ├── __init__.cpython-38.pyc
│ ├── masking.cpython-37.pyc
│ ├── masking.cpython-38.pyc
│ ├── metrics.cpython-37.pyc
│ └── timefeatures.cpython-37.pyc
├── masking.py
├── metrics.py
├── tools.py
└── timefeatures.py
├── 서인천IC-부평IC 평균속도.csv
├── README.assets
└── image-20210804131827418.png
└── README.md
/models/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/서인천IC-부평IC 평균속도.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/서인천IC-부평IC 평균속도.csv
--------------------------------------------------------------------------------
/models/__pycache__/attn.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/attn.cpython-38.pyc
--------------------------------------------------------------------------------
/models/__pycache__/embed.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/embed.cpython-38.pyc
--------------------------------------------------------------------------------
/models/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/model.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/tools.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/tools.cpython-37.pyc
--------------------------------------------------------------------------------
/README.assets/image-20210804131827418.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/README.assets/image-20210804131827418.png
--------------------------------------------------------------------------------
/models/__pycache__/decoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/decoder.cpython-38.pyc
--------------------------------------------------------------------------------
/models/__pycache__/encoder.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/encoder.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/masking.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/masking.cpython-37.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/masking.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/masking.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/metrics.cpython-37.pyc
--------------------------------------------------------------------------------
/models/__pycache__/__init__.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/__init__.cpython-38.pyc
--------------------------------------------------------------------------------
/utils/__pycache__/timefeatures.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/timefeatures.cpython-37.pyc
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TimeSeriesForecast-Informer
2 |
3 | Code Implementation of
4 | **Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting (AAAI'21 Best Paper)**
5 |
6 |
7 | explain
8 |
9 |
10 |
11 |
12 |
13 | **reference**
14 |
15 | code:
16 |
17 | paper:
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 | # Result
26 |
27 | 
28 |
29 |
--------------------------------------------------------------------------------
/utils/masking.py:
--------------------------------------------------------------------------------
1 | import torch
2 |
3 | class TriangularCausalMask():
4 | def __init__(self, B, L, device="cpu"):
5 | mask_shape = [B, 1, L, L]
6 | with torch.no_grad():
7 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device)
8 |
9 | @property
10 | def mask(self):
11 | return self._mask
12 |
13 | class ProbMask():
14 | def __init__(self, B, H, L, index, scores, device="cpu"):
15 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1)
16 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1])
17 | indicator = _mask_ex[torch.arange(B)[:, None, None],
18 | torch.arange(H)[None, :, None],
19 | index, :].to(device)
20 | self._mask = indicator.view(scores.shape).to(device)
21 |
22 | @property
23 | def mask(self):
24 | return self._mask
--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | def RSE(pred, true):
4 | return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2))
5 |
6 | def CORR(pred, true):
7 | u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0)
8 | d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0))
9 | return (u/d).mean(-1)
10 |
11 | def MAE(pred, true):
12 | return np.mean(np.abs(pred-true))
13 |
14 | def MSE(pred, true):
15 | return np.mean((pred-true)**2)
16 |
17 | def RMSE(pred, true):
18 | return np.sqrt(MSE(pred, true))
19 |
20 | def MAPE(pred, true):
21 | return np.mean(np.abs((pred - true) / true))
22 |
23 | def MSPE(pred, true):
24 | return np.mean(np.square((pred - true) / true))
25 |
26 | def metric(pred, true):
27 | mae = MAE(pred, true)
28 | mse = MSE(pred, true)
29 | rmse = RMSE(pred, true)
30 | mape = MAPE(pred, true)
31 | mspe = MSPE(pred, true)
32 |
33 | return mae,mse,rmse,mape,mspe
--------------------------------------------------------------------------------
/models/decoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class DecoderLayer(nn.Module):
6 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None,
7 | dropout=0.1, activation="relu"):
8 | super(DecoderLayer, self).__init__()
9 | d_ff = d_ff or 4*d_model
10 | self.self_attention = self_attention
11 | self.cross_attention = cross_attention
12 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
13 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
14 | self.norm1 = nn.LayerNorm(d_model)
15 | self.norm2 = nn.LayerNorm(d_model)
16 | self.norm3 = nn.LayerNorm(d_model)
17 | self.dropout = nn.Dropout(dropout)
18 | self.activation = F.relu if activation == "relu" else F.gelu
19 |
20 | def forward(self, x, cross, x_mask=None, cross_mask=None):
21 | x = x + self.dropout(self.self_attention(
22 | x, x, x,
23 | attn_mask=x_mask
24 | )[0])
25 | x = self.norm1(x)
26 |
27 | x = x + self.dropout(self.cross_attention(
28 | x, cross, cross,
29 | attn_mask=cross_mask
30 | )[0])
31 |
32 | y = x = self.norm2(x)
33 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
34 | y = self.dropout(self.conv2(y).transpose(-1,1))
35 |
36 | return self.norm3(x+y)
37 |
38 | class Decoder(nn.Module):
39 | def __init__(self, layers, norm_layer=None):
40 | super(Decoder, self).__init__()
41 | self.layers = nn.ModuleList(layers)
42 | self.norm = norm_layer
43 |
44 | def forward(self, x, cross, x_mask=None, cross_mask=None):
45 | for layer in self.layers:
46 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask)
47 |
48 | if self.norm is not None:
49 | x = self.norm(x)
50 |
51 | return x
--------------------------------------------------------------------------------
/utils/tools.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 |
4 | def adjust_learning_rate(optimizer, epoch, learning_rate, args = "type1"):
5 | # lr = args.learning_rate * (0.2 ** (epoch // 2))
6 |
7 | # if args.lradj=='type1':
8 | # lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch-1) // 1))}
9 | # elif args.lradj=='type2':
10 | # lr_adjust = {
11 | # 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
12 | # 10: 5e-7, 15: 1e-7, 20: 5e-8
13 | # }
14 |
15 | if args=='type1':
16 | lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch-1) // 1))}
17 | elif args=='type2':
18 | lr_adjust = {
19 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6,
20 | 10: 5e-7, 15: 1e-7, 20: 5e-8
21 | }
22 | if epoch in lr_adjust.keys():
23 | lr = lr_adjust[epoch]
24 | for param_group in optimizer.param_groups:
25 | param_group['lr'] = lr
26 | print('Updating learning rate to {}'.format(lr))
27 |
28 | class EarlyStopping:
29 | def __init__(self, patience=7, verbose=False, delta=0):
30 | self.patience = patience
31 | self.verbose = verbose
32 | self.counter = 0
33 | self.best_score = None
34 | self.early_stop = False
35 | self.val_loss_min = np.Inf
36 | self.delta = delta
37 |
38 | def __call__(self, val_loss, model, path):
39 | score = -val_loss
40 | if self.best_score is None:
41 | self.best_score = score
42 | self.save_checkpoint(val_loss, model, path)
43 | elif score < self.best_score + self.delta:
44 | self.counter += 1
45 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
46 | if self.counter >= self.patience:
47 | self.early_stop = True
48 | else:
49 | self.best_score = score
50 | self.save_checkpoint(val_loss, model, path)
51 | self.counter = 0
52 |
53 | def save_checkpoint(self, val_loss, model, path):
54 | if self.verbose:
55 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...')
56 | torch.save(model.state_dict(), path+'/'+'checkpoint.pth')
57 | self.val_loss_min = val_loss
58 |
59 | class dotdict(dict):
60 | """dot.notation access to dictionary attributes"""
61 | __getattr__ = dict.get
62 | __setattr__ = dict.__setitem__
63 | __delattr__ = dict.__delitem__
64 |
65 | class StandardScaler():
66 | def __init__(self):
67 | self.mean = 0.
68 | self.std = 1.
69 |
70 | def fit(self, data):
71 | self.mean = data.mean(0)
72 | self.std = data.std(0)
73 |
74 | def transform(self, data):
75 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
76 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
77 | return (data - mean) / std
78 |
79 | def inverse_transform(self, data):
80 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean
81 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std
82 | return (data * std) + mean
--------------------------------------------------------------------------------
/models/encoder.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | class ConvLayer(nn.Module):
6 | def __init__(self, c_in):
7 | super(ConvLayer, self).__init__()
8 | padding = 1 if torch.__version__>='1.5.0' else 2
9 | self.downConv = nn.Conv1d(in_channels=c_in,
10 | out_channels=c_in,
11 | kernel_size=3,
12 | padding=padding,
13 | padding_mode='circular')
14 | self.norm = nn.BatchNorm1d(c_in)
15 | self.activation = nn.ELU()
16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1)
17 |
18 | def forward(self, x):
19 | x = self.downConv(x.permute(0, 2, 1))
20 | x = self.norm(x)
21 | x = self.activation(x)
22 | x = self.maxPool(x)
23 | x = x.transpose(1,2)
24 | return x
25 |
26 | class EncoderLayer(nn.Module):
27 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"):
28 | super(EncoderLayer, self).__init__()
29 | d_ff = d_ff or 4*d_model
30 | self.attention = attention
31 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1)
32 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1)
33 | self.norm1 = nn.LayerNorm(d_model)
34 | self.norm2 = nn.LayerNorm(d_model)
35 | self.dropout = nn.Dropout(dropout)
36 | self.activation = F.relu if activation == "relu" else F.gelu
37 |
38 | def forward(self, x, attn_mask=None):
39 | # x [B, L, D]
40 | # x = x + self.dropout(self.attention(
41 | # x, x, x,
42 | # attn_mask = attn_mask
43 | # ))
44 | new_x, attn = self.attention(
45 | x, x, x,
46 | attn_mask = attn_mask
47 | )
48 | x = x + self.dropout(new_x)
49 |
50 | y = x = self.norm1(x)
51 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1))))
52 | y = self.dropout(self.conv2(y).transpose(-1,1))
53 |
54 | return self.norm2(x+y), attn
55 |
56 | class Encoder(nn.Module):
57 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None):
58 | super(Encoder, self).__init__()
59 | self.attn_layers = nn.ModuleList(attn_layers)
60 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None
61 | self.norm = norm_layer
62 |
63 | def forward(self, x, attn_mask=None):
64 | # x [B, L, D]
65 | attns = []
66 | if self.conv_layers is not None:
67 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers):
68 | x, attn = attn_layer(x, attn_mask=attn_mask)
69 | x = conv_layer(x)
70 | attns.append(attn)
71 | x, attn = self.attn_layers[-1](x, attn_mask=attn_mask)
72 | attns.append(attn)
73 | else:
74 | for attn_layer in self.attn_layers:
75 | x, attn = attn_layer(x, attn_mask=attn_mask)
76 | attns.append(attn)
77 |
78 | if self.norm is not None:
79 | x = self.norm(x)
80 |
81 | return x, attns
82 |
83 | class EncoderStack(nn.Module):
84 | def __init__(self, encoders, inp_lens):
85 | super(EncoderStack, self).__init__()
86 | self.encoders = nn.ModuleList(encoders)
87 | self.inp_lens = inp_lens
88 |
89 | def forward(self, x, attn_mask=None):
90 | # x [B, L, D]
91 | x_stack = []; attns = []
92 | for i_len, encoder in zip(self.inp_lens, self.encoders):
93 | inp_len = x.shape[1]//(2**i_len)
94 | x_s, attn = encoder(x[:, -inp_len:, :])
95 | x_stack.append(x_s); attns.append(attn)
96 | x_stack = torch.cat(x_stack, -2)
97 |
98 | return x_stack, attns
99 |
--------------------------------------------------------------------------------
/models/embed.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | import math
6 |
7 | class PositionalEmbedding(nn.Module):
8 | def __init__(self, d_model, max_len=5000):
9 | super(PositionalEmbedding, self).__init__()
10 | # Compute the positional encodings once in log space.
11 | pe = torch.zeros(max_len, d_model).float()
12 | pe.require_grad = False
13 |
14 | position = torch.arange(0, max_len).float().unsqueeze(1)
15 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
16 |
17 | pe[:, 0::2] = torch.sin(position * div_term)
18 | pe[:, 1::2] = torch.cos(position * div_term)
19 |
20 | pe = pe.unsqueeze(0)
21 | self.register_buffer('pe', pe)
22 |
23 | def forward(self, x):
24 | return self.pe[:, :x.size(1)]
25 |
26 | class TokenEmbedding(nn.Module):
27 | def __init__(self, c_in, d_model):
28 | super(TokenEmbedding, self).__init__()
29 | padding = 1 if torch.__version__>='1.5.0' else 2
30 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model,
31 | kernel_size=3, padding=padding, padding_mode='circular')
32 | for m in self.modules():
33 | if isinstance(m, nn.Conv1d):
34 | nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu')
35 |
36 | def forward(self, x):
37 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2)
38 | return x
39 |
40 | class FixedEmbedding(nn.Module):
41 | def __init__(self, c_in, d_model):
42 | super(FixedEmbedding, self).__init__()
43 |
44 | w = torch.zeros(c_in, d_model).float()
45 | w.require_grad = False
46 |
47 | position = torch.arange(0, c_in).float().unsqueeze(1)
48 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()
49 |
50 | w[:, 0::2] = torch.sin(position * div_term)
51 | w[:, 1::2] = torch.cos(position * div_term)
52 |
53 | self.emb = nn.Embedding(c_in, d_model)
54 | self.emb.weight = nn.Parameter(w, requires_grad=False)
55 |
56 | def forward(self, x):
57 | return self.emb(x).detach()
58 |
59 | class TemporalEmbedding(nn.Module):
60 | def __init__(self, d_model, embed_type='fixed', freq='h'):
61 | super(TemporalEmbedding, self).__init__()
62 |
63 | minute_size = 4; hour_size = 24
64 | weekday_size = 7; day_size = 32; month_size = 13
65 |
66 | Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
67 | if freq=='t':
68 | self.minute_embed = Embed(minute_size, d_model)
69 | self.hour_embed = Embed(hour_size, d_model)
70 | self.weekday_embed = Embed(weekday_size, d_model)
71 | self.day_embed = Embed(day_size, d_model)
72 | self.month_embed = Embed(month_size, d_model)
73 |
74 | def forward(self, x):
75 | x = x.long()
76 |
77 | minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0.
78 | hour_x = self.hour_embed(x[:,:,3])
79 | weekday_x = self.weekday_embed(x[:,:,2])
80 | day_x = self.day_embed(x[:,:,1])
81 | month_x = self.month_embed(x[:,:,0])
82 |
83 | return hour_x + weekday_x + day_x + month_x + minute_x
84 |
85 | class TimeFeatureEmbedding(nn.Module):
86 | def __init__(self, d_model, embed_type='timeF', freq='h'):
87 | super(TimeFeatureEmbedding, self).__init__()
88 |
89 | freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3}
90 | d_inp = freq_map[freq]
91 | self.embed = nn.Linear(d_inp, d_model)
92 |
93 | def forward(self, x):
94 | return self.embed(x)
95 |
96 | class DataEmbedding(nn.Module):
97 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1):
98 | super(DataEmbedding, self).__init__()
99 |
100 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model)
101 | self.position_embedding = PositionalEmbedding(d_model=d_model)
102 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq)
103 |
104 | self.dropout = nn.Dropout(p=dropout)
105 |
106 | def forward(self, x, x_mark):
107 | x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark)
108 |
109 | return self.dropout(x)
--------------------------------------------------------------------------------
/utils/timefeatures.py:
--------------------------------------------------------------------------------
1 | from typing import List
2 |
3 | import numpy as np
4 | import pandas as pd
5 | from pandas.tseries import offsets
6 | from pandas.tseries.frequencies import to_offset
7 |
8 | class TimeFeature:
9 | def __init__(self):
10 | pass
11 |
12 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
13 | pass
14 |
15 | def __repr__(self):
16 | return self.__class__.__name__ + "()"
17 |
18 | class SecondOfMinute(TimeFeature):
19 | """Minute of hour encoded as value between [-0.5, 0.5]"""
20 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
21 | return index.second / 59.0 - 0.5
22 |
23 | class MinuteOfHour(TimeFeature):
24 | """Minute of hour encoded as value between [-0.5, 0.5]"""
25 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
26 | return index.minute / 59.0 - 0.5
27 |
28 | class HourOfDay(TimeFeature):
29 | """Hour of day encoded as value between [-0.5, 0.5]"""
30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
31 | return index.hour / 23.0 - 0.5
32 |
33 | class DayOfWeek(TimeFeature):
34 | """Hour of day encoded as value between [-0.5, 0.5]"""
35 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
36 | return index.dayofweek / 6.0 - 0.5
37 |
38 | class DayOfMonth(TimeFeature):
39 | """Day of month encoded as value between [-0.5, 0.5]"""
40 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
41 | return (index.day - 1) / 30.0 - 0.5
42 |
43 | class DayOfYear(TimeFeature):
44 | """Day of year encoded as value between [-0.5, 0.5]"""
45 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
46 | return (index.dayofyear - 1) / 365.0 - 0.5
47 |
48 | class MonthOfYear(TimeFeature):
49 | """Month of year encoded as value between [-0.5, 0.5]"""
50 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
51 | return (index.month - 1) / 11.0 - 0.5
52 |
53 | class WeekOfYear(TimeFeature):
54 | """Week of year encoded as value between [-0.5, 0.5]"""
55 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
56 | return (index.isocalendar().week - 1) / 52.0 - 0.5
57 |
58 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]:
59 | """
60 | Returns a list of time features that will be appropriate for the given frequency string.
61 | Parameters
62 | ----------
63 | freq_str
64 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc.
65 | """
66 |
67 | features_by_offsets = {
68 | offsets.YearEnd: [],
69 | offsets.QuarterEnd: [MonthOfYear],
70 | offsets.MonthEnd: [MonthOfYear],
71 | offsets.Week: [DayOfMonth, WeekOfYear],
72 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear],
73 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear],
74 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear],
75 | offsets.Minute: [
76 | MinuteOfHour,
77 | HourOfDay,
78 | DayOfWeek,
79 | DayOfMonth,
80 | DayOfYear,
81 | ],
82 | offsets.Second: [
83 | SecondOfMinute,
84 | MinuteOfHour,
85 | HourOfDay,
86 | DayOfWeek,
87 | DayOfMonth,
88 | DayOfYear,
89 | ],
90 | }
91 |
92 | offset = to_offset(freq_str)
93 |
94 | for offset_type, feature_classes in features_by_offsets.items():
95 | if isinstance(offset, offset_type):
96 | return [cls() for cls in feature_classes]
97 |
98 | supported_freq_msg = f"""
99 | Unsupported frequency {freq_str}
100 | The following frequencies are supported:
101 | Y - yearly
102 | alias: A
103 | M - monthly
104 | W - weekly
105 | D - daily
106 | B - business days
107 | H - hourly
108 | T - minutely
109 | alias: min
110 | S - secondly
111 | """
112 | raise RuntimeError(supported_freq_msg)
113 |
114 | def time_features(dates, timeenc=1, freq='h'):
115 | """
116 | > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0:
117 | > * m - [month]
118 | > * w - [month]
119 | > * d - [month, day, weekday]
120 | > * b - [month, day, weekday]
121 | > * h - [month, day, weekday, hour]
122 | > * t - [month, day, weekday, hour, *minute]
123 | >
124 | > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]):
125 | > * Q - [month]
126 | > * M - [month]
127 | > * W - [Day of month, week of year]
128 | > * D - [Day of week, day of month, day of year]
129 | > * B - [Day of week, day of month, day of year]
130 | > * H - [Hour of day, day of week, day of month, day of year]
131 | > * T - [Minute of hour*, hour of day, day of week, day of month, day of year]
132 | > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year]
133 |
134 | *minute returns a number from 0-3 corresponding to the 15 minute period it falls into.
135 | """
136 | if timeenc==0:
137 | dates['month'] = dates.date.apply(lambda row:row.month,1)
138 | dates['day'] = dates.date.apply(lambda row:row.day,1)
139 | dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1)
140 | dates['hour'] = dates.date.apply(lambda row:row.hour,1)
141 | dates['minute'] = dates.date.apply(lambda row:row.minute,1)
142 | dates['minute'] = dates.minute.map(lambda x:x//15)
143 | freq_map = {
144 | 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'],
145 | 'b':['month','day','weekday'],'h':['month','day','weekday','hour'],
146 | 't':['month','day','weekday','hour','minute'],
147 | }
148 | return dates[freq_map[freq.lower()]].values
149 | if timeenc==1:
150 | dates = pd.to_datetime(dates.date.values)
151 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0)
152 |
--------------------------------------------------------------------------------
/models/attn.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | import torch.nn.functional as F
4 |
5 | import numpy as np
6 |
7 | from math import sqrt
8 | from utils.masking import TriangularCausalMask, ProbMask
9 |
10 | class FullAttention(nn.Module):
11 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
12 | super(FullAttention, self).__init__()
13 | self.scale = scale
14 | self.mask_flag = mask_flag
15 | self.output_attention = output_attention
16 | self.dropout = nn.Dropout(attention_dropout)
17 |
18 | def forward(self, queries, keys, values, attn_mask):
19 | B, L, H, E = queries.shape
20 | _, S, _, D = values.shape
21 | scale = self.scale or 1./sqrt(E)
22 |
23 | scores = torch.einsum("blhe,bshe->bhls", queries, keys)
24 | if self.mask_flag:
25 | if attn_mask is None:
26 | attn_mask = TriangularCausalMask(B, L, device=queries.device)
27 |
28 | scores.masked_fill_(attn_mask.mask, -np.inf)
29 |
30 | A = self.dropout(torch.softmax(scale * scores, dim=-1))
31 | V = torch.einsum("bhls,bshd->blhd", A, values)
32 |
33 | if self.output_attention:
34 | return (V.contiguous(), A)
35 | else:
36 | return (V.contiguous(), None)
37 |
38 | class ProbAttention(nn.Module):
39 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False):
40 | super(ProbAttention, self).__init__()
41 | self.factor = factor
42 | self.scale = scale
43 | self.mask_flag = mask_flag
44 | self.output_attention = output_attention
45 | self.dropout = nn.Dropout(attention_dropout)
46 |
47 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q)
48 | # Q [B, H, L, D]
49 | B, H, L_K, E = K.shape
50 | _, _, L_Q, _ = Q.shape
51 |
52 | # calculate the sampled Q_K
53 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E)
54 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q
55 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :]
56 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze()
57 |
58 | # find the Top_k query with sparisty measurement
59 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K)
60 | M_top = M.topk(n_top, sorted=False)[1]
61 |
62 | # use the reduced Q to calculate Q_K
63 | Q_reduce = Q[torch.arange(B)[:, None, None],
64 | torch.arange(H)[None, :, None],
65 | M_top, :] # factor*ln(L_q)
66 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k
67 |
68 | return Q_K, M_top
69 |
70 | def _get_initial_context(self, V, L_Q):
71 | B, H, L_V, D = V.shape
72 | if not self.mask_flag:
73 | # V_sum = V.sum(dim=-2)
74 | V_sum = V.mean(dim=-2)
75 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone()
76 | else: # use mask
77 | assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only
78 | contex = V.cumsum(dim=-2)
79 | return contex
80 |
81 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask):
82 | B, H, L_V, D = V.shape
83 |
84 | if self.mask_flag:
85 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device)
86 | scores.masked_fill_(attn_mask.mask, -np.inf)
87 |
88 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores)
89 |
90 | context_in[torch.arange(B)[:, None, None],
91 | torch.arange(H)[None, :, None],
92 | index, :] = torch.matmul(attn, V).type_as(context_in)
93 | if self.output_attention:
94 | attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device)
95 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn
96 | return (context_in, attns)
97 | else:
98 | return (context_in, None)
99 |
100 | def forward(self, queries, keys, values, attn_mask):
101 | B, L_Q, H, D = queries.shape
102 | _, L_K, _, _ = keys.shape
103 |
104 | queries = queries.transpose(2,1)
105 | keys = keys.transpose(2,1)
106 | values = values.transpose(2,1)
107 |
108 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k)
109 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q)
110 |
111 | U_part = U_part if U_part