├── models ├── __init__.py ├── __pycache__ │ ├── attn.cpython-38.pyc │ ├── embed.cpython-38.pyc │ ├── model.cpython-38.pyc │ ├── decoder.cpython-38.pyc │ ├── encoder.cpython-38.pyc │ └── __init__.cpython-38.pyc ├── decoder.py ├── encoder.py ├── embed.py ├── attn.py └── model.py ├── utils ├── __init__.py ├── __pycache__ │ ├── tools.cpython-37.pyc │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-38.pyc │ ├── masking.cpython-37.pyc │ ├── masking.cpython-38.pyc │ ├── metrics.cpython-37.pyc │ └── timefeatures.cpython-37.pyc ├── masking.py ├── metrics.py ├── tools.py └── timefeatures.py ├── 서인천IC-부평IC 평균속도.csv ├── README.assets └── image-20210804131827418.png └── README.md /models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /서인천IC-부평IC 평균속도.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/서인천IC-부평IC 평균속도.csv -------------------------------------------------------------------------------- /models/__pycache__/attn.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/attn.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/embed.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/embed.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/model.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/model.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/tools.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/tools.cpython-37.pyc -------------------------------------------------------------------------------- /README.assets/image-20210804131827418.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/README.assets/image-20210804131827418.png -------------------------------------------------------------------------------- /models/__pycache__/decoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/decoder.cpython-38.pyc -------------------------------------------------------------------------------- /models/__pycache__/encoder.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/encoder.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/masking.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/masking.cpython-37.pyc -------------------------------------------------------------------------------- /utils/__pycache__/masking.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/masking.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/metrics.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/metrics.cpython-37.pyc -------------------------------------------------------------------------------- /models/__pycache__/__init__.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/models/__pycache__/__init__.cpython-38.pyc -------------------------------------------------------------------------------- /utils/__pycache__/timefeatures.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Doheon/TimeSeriesForecast-Informer/HEAD/utils/__pycache__/timefeatures.cpython-37.pyc -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TimeSeriesForecast-Informer 2 | 3 | Code Implementation of 4 | **Informer: Beyond Efficient Transformer for Long Sequence Time-Series Forecasting (AAAI'21 Best Paper)** 5 |   6 | 7 | explain 8 | 9 | 10 | 11 | 12 | 13 | **reference** 14 | 15 | code: 16 | 17 | paper: 18 | 19 |   20 | 21 | 22 | 23 | 24 | 25 | # Result 26 | 27 | ![image-20210804131827418](README.assets/image-20210804131827418.png) 28 | 29 | -------------------------------------------------------------------------------- /utils/masking.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class TriangularCausalMask(): 4 | def __init__(self, B, L, device="cpu"): 5 | mask_shape = [B, 1, L, L] 6 | with torch.no_grad(): 7 | self._mask = torch.triu(torch.ones(mask_shape, dtype=torch.bool), diagonal=1).to(device) 8 | 9 | @property 10 | def mask(self): 11 | return self._mask 12 | 13 | class ProbMask(): 14 | def __init__(self, B, H, L, index, scores, device="cpu"): 15 | _mask = torch.ones(L, scores.shape[-1], dtype=torch.bool).to(device).triu(1) 16 | _mask_ex = _mask[None, None, :].expand(B, H, L, scores.shape[-1]) 17 | indicator = _mask_ex[torch.arange(B)[:, None, None], 18 | torch.arange(H)[None, :, None], 19 | index, :].to(device) 20 | self._mask = indicator.view(scores.shape).to(device) 21 | 22 | @property 23 | def mask(self): 24 | return self._mask -------------------------------------------------------------------------------- /utils/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def RSE(pred, true): 4 | return np.sqrt(np.sum((true-pred)**2)) / np.sqrt(np.sum((true-true.mean())**2)) 5 | 6 | def CORR(pred, true): 7 | u = ((true-true.mean(0))*(pred-pred.mean(0))).sum(0) 8 | d = np.sqrt(((true-true.mean(0))**2*(pred-pred.mean(0))**2).sum(0)) 9 | return (u/d).mean(-1) 10 | 11 | def MAE(pred, true): 12 | return np.mean(np.abs(pred-true)) 13 | 14 | def MSE(pred, true): 15 | return np.mean((pred-true)**2) 16 | 17 | def RMSE(pred, true): 18 | return np.sqrt(MSE(pred, true)) 19 | 20 | def MAPE(pred, true): 21 | return np.mean(np.abs((pred - true) / true)) 22 | 23 | def MSPE(pred, true): 24 | return np.mean(np.square((pred - true) / true)) 25 | 26 | def metric(pred, true): 27 | mae = MAE(pred, true) 28 | mse = MSE(pred, true) 29 | rmse = RMSE(pred, true) 30 | mape = MAPE(pred, true) 31 | mspe = MSPE(pred, true) 32 | 33 | return mae,mse,rmse,mape,mspe -------------------------------------------------------------------------------- /models/decoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class DecoderLayer(nn.Module): 6 | def __init__(self, self_attention, cross_attention, d_model, d_ff=None, 7 | dropout=0.1, activation="relu"): 8 | super(DecoderLayer, self).__init__() 9 | d_ff = d_ff or 4*d_model 10 | self.self_attention = self_attention 11 | self.cross_attention = cross_attention 12 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 13 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 14 | self.norm1 = nn.LayerNorm(d_model) 15 | self.norm2 = nn.LayerNorm(d_model) 16 | self.norm3 = nn.LayerNorm(d_model) 17 | self.dropout = nn.Dropout(dropout) 18 | self.activation = F.relu if activation == "relu" else F.gelu 19 | 20 | def forward(self, x, cross, x_mask=None, cross_mask=None): 21 | x = x + self.dropout(self.self_attention( 22 | x, x, x, 23 | attn_mask=x_mask 24 | )[0]) 25 | x = self.norm1(x) 26 | 27 | x = x + self.dropout(self.cross_attention( 28 | x, cross, cross, 29 | attn_mask=cross_mask 30 | )[0]) 31 | 32 | y = x = self.norm2(x) 33 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1)))) 34 | y = self.dropout(self.conv2(y).transpose(-1,1)) 35 | 36 | return self.norm3(x+y) 37 | 38 | class Decoder(nn.Module): 39 | def __init__(self, layers, norm_layer=None): 40 | super(Decoder, self).__init__() 41 | self.layers = nn.ModuleList(layers) 42 | self.norm = norm_layer 43 | 44 | def forward(self, x, cross, x_mask=None, cross_mask=None): 45 | for layer in self.layers: 46 | x = layer(x, cross, x_mask=x_mask, cross_mask=cross_mask) 47 | 48 | if self.norm is not None: 49 | x = self.norm(x) 50 | 51 | return x -------------------------------------------------------------------------------- /utils/tools.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | def adjust_learning_rate(optimizer, epoch, learning_rate, args = "type1"): 5 | # lr = args.learning_rate * (0.2 ** (epoch // 2)) 6 | 7 | # if args.lradj=='type1': 8 | # lr_adjust = {epoch: args.learning_rate * (0.5 ** ((epoch-1) // 1))} 9 | # elif args.lradj=='type2': 10 | # lr_adjust = { 11 | # 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 12 | # 10: 5e-7, 15: 1e-7, 20: 5e-8 13 | # } 14 | 15 | if args=='type1': 16 | lr_adjust = {epoch: learning_rate * (0.5 ** ((epoch-1) // 1))} 17 | elif args=='type2': 18 | lr_adjust = { 19 | 2: 5e-5, 4: 1e-5, 6: 5e-6, 8: 1e-6, 20 | 10: 5e-7, 15: 1e-7, 20: 5e-8 21 | } 22 | if epoch in lr_adjust.keys(): 23 | lr = lr_adjust[epoch] 24 | for param_group in optimizer.param_groups: 25 | param_group['lr'] = lr 26 | print('Updating learning rate to {}'.format(lr)) 27 | 28 | class EarlyStopping: 29 | def __init__(self, patience=7, verbose=False, delta=0): 30 | self.patience = patience 31 | self.verbose = verbose 32 | self.counter = 0 33 | self.best_score = None 34 | self.early_stop = False 35 | self.val_loss_min = np.Inf 36 | self.delta = delta 37 | 38 | def __call__(self, val_loss, model, path): 39 | score = -val_loss 40 | if self.best_score is None: 41 | self.best_score = score 42 | self.save_checkpoint(val_loss, model, path) 43 | elif score < self.best_score + self.delta: 44 | self.counter += 1 45 | print(f'EarlyStopping counter: {self.counter} out of {self.patience}') 46 | if self.counter >= self.patience: 47 | self.early_stop = True 48 | else: 49 | self.best_score = score 50 | self.save_checkpoint(val_loss, model, path) 51 | self.counter = 0 52 | 53 | def save_checkpoint(self, val_loss, model, path): 54 | if self.verbose: 55 | print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model ...') 56 | torch.save(model.state_dict(), path+'/'+'checkpoint.pth') 57 | self.val_loss_min = val_loss 58 | 59 | class dotdict(dict): 60 | """dot.notation access to dictionary attributes""" 61 | __getattr__ = dict.get 62 | __setattr__ = dict.__setitem__ 63 | __delattr__ = dict.__delitem__ 64 | 65 | class StandardScaler(): 66 | def __init__(self): 67 | self.mean = 0. 68 | self.std = 1. 69 | 70 | def fit(self, data): 71 | self.mean = data.mean(0) 72 | self.std = data.std(0) 73 | 74 | def transform(self, data): 75 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean 76 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std 77 | return (data - mean) / std 78 | 79 | def inverse_transform(self, data): 80 | mean = torch.from_numpy(self.mean).type_as(data).to(data.device) if torch.is_tensor(data) else self.mean 81 | std = torch.from_numpy(self.std).type_as(data).to(data.device) if torch.is_tensor(data) else self.std 82 | return (data * std) + mean -------------------------------------------------------------------------------- /models/encoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | class ConvLayer(nn.Module): 6 | def __init__(self, c_in): 7 | super(ConvLayer, self).__init__() 8 | padding = 1 if torch.__version__>='1.5.0' else 2 9 | self.downConv = nn.Conv1d(in_channels=c_in, 10 | out_channels=c_in, 11 | kernel_size=3, 12 | padding=padding, 13 | padding_mode='circular') 14 | self.norm = nn.BatchNorm1d(c_in) 15 | self.activation = nn.ELU() 16 | self.maxPool = nn.MaxPool1d(kernel_size=3, stride=2, padding=1) 17 | 18 | def forward(self, x): 19 | x = self.downConv(x.permute(0, 2, 1)) 20 | x = self.norm(x) 21 | x = self.activation(x) 22 | x = self.maxPool(x) 23 | x = x.transpose(1,2) 24 | return x 25 | 26 | class EncoderLayer(nn.Module): 27 | def __init__(self, attention, d_model, d_ff=None, dropout=0.1, activation="relu"): 28 | super(EncoderLayer, self).__init__() 29 | d_ff = d_ff or 4*d_model 30 | self.attention = attention 31 | self.conv1 = nn.Conv1d(in_channels=d_model, out_channels=d_ff, kernel_size=1) 32 | self.conv2 = nn.Conv1d(in_channels=d_ff, out_channels=d_model, kernel_size=1) 33 | self.norm1 = nn.LayerNorm(d_model) 34 | self.norm2 = nn.LayerNorm(d_model) 35 | self.dropout = nn.Dropout(dropout) 36 | self.activation = F.relu if activation == "relu" else F.gelu 37 | 38 | def forward(self, x, attn_mask=None): 39 | # x [B, L, D] 40 | # x = x + self.dropout(self.attention( 41 | # x, x, x, 42 | # attn_mask = attn_mask 43 | # )) 44 | new_x, attn = self.attention( 45 | x, x, x, 46 | attn_mask = attn_mask 47 | ) 48 | x = x + self.dropout(new_x) 49 | 50 | y = x = self.norm1(x) 51 | y = self.dropout(self.activation(self.conv1(y.transpose(-1,1)))) 52 | y = self.dropout(self.conv2(y).transpose(-1,1)) 53 | 54 | return self.norm2(x+y), attn 55 | 56 | class Encoder(nn.Module): 57 | def __init__(self, attn_layers, conv_layers=None, norm_layer=None): 58 | super(Encoder, self).__init__() 59 | self.attn_layers = nn.ModuleList(attn_layers) 60 | self.conv_layers = nn.ModuleList(conv_layers) if conv_layers is not None else None 61 | self.norm = norm_layer 62 | 63 | def forward(self, x, attn_mask=None): 64 | # x [B, L, D] 65 | attns = [] 66 | if self.conv_layers is not None: 67 | for attn_layer, conv_layer in zip(self.attn_layers, self.conv_layers): 68 | x, attn = attn_layer(x, attn_mask=attn_mask) 69 | x = conv_layer(x) 70 | attns.append(attn) 71 | x, attn = self.attn_layers[-1](x, attn_mask=attn_mask) 72 | attns.append(attn) 73 | else: 74 | for attn_layer in self.attn_layers: 75 | x, attn = attn_layer(x, attn_mask=attn_mask) 76 | attns.append(attn) 77 | 78 | if self.norm is not None: 79 | x = self.norm(x) 80 | 81 | return x, attns 82 | 83 | class EncoderStack(nn.Module): 84 | def __init__(self, encoders, inp_lens): 85 | super(EncoderStack, self).__init__() 86 | self.encoders = nn.ModuleList(encoders) 87 | self.inp_lens = inp_lens 88 | 89 | def forward(self, x, attn_mask=None): 90 | # x [B, L, D] 91 | x_stack = []; attns = [] 92 | for i_len, encoder in zip(self.inp_lens, self.encoders): 93 | inp_len = x.shape[1]//(2**i_len) 94 | x_s, attn = encoder(x[:, -inp_len:, :]) 95 | x_stack.append(x_s); attns.append(attn) 96 | x_stack = torch.cat(x_stack, -2) 97 | 98 | return x_stack, attns 99 | -------------------------------------------------------------------------------- /models/embed.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import math 6 | 7 | class PositionalEmbedding(nn.Module): 8 | def __init__(self, d_model, max_len=5000): 9 | super(PositionalEmbedding, self).__init__() 10 | # Compute the positional encodings once in log space. 11 | pe = torch.zeros(max_len, d_model).float() 12 | pe.require_grad = False 13 | 14 | position = torch.arange(0, max_len).float().unsqueeze(1) 15 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 16 | 17 | pe[:, 0::2] = torch.sin(position * div_term) 18 | pe[:, 1::2] = torch.cos(position * div_term) 19 | 20 | pe = pe.unsqueeze(0) 21 | self.register_buffer('pe', pe) 22 | 23 | def forward(self, x): 24 | return self.pe[:, :x.size(1)] 25 | 26 | class TokenEmbedding(nn.Module): 27 | def __init__(self, c_in, d_model): 28 | super(TokenEmbedding, self).__init__() 29 | padding = 1 if torch.__version__>='1.5.0' else 2 30 | self.tokenConv = nn.Conv1d(in_channels=c_in, out_channels=d_model, 31 | kernel_size=3, padding=padding, padding_mode='circular') 32 | for m in self.modules(): 33 | if isinstance(m, nn.Conv1d): 34 | nn.init.kaiming_normal_(m.weight,mode='fan_in',nonlinearity='leaky_relu') 35 | 36 | def forward(self, x): 37 | x = self.tokenConv(x.permute(0, 2, 1)).transpose(1,2) 38 | return x 39 | 40 | class FixedEmbedding(nn.Module): 41 | def __init__(self, c_in, d_model): 42 | super(FixedEmbedding, self).__init__() 43 | 44 | w = torch.zeros(c_in, d_model).float() 45 | w.require_grad = False 46 | 47 | position = torch.arange(0, c_in).float().unsqueeze(1) 48 | div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp() 49 | 50 | w[:, 0::2] = torch.sin(position * div_term) 51 | w[:, 1::2] = torch.cos(position * div_term) 52 | 53 | self.emb = nn.Embedding(c_in, d_model) 54 | self.emb.weight = nn.Parameter(w, requires_grad=False) 55 | 56 | def forward(self, x): 57 | return self.emb(x).detach() 58 | 59 | class TemporalEmbedding(nn.Module): 60 | def __init__(self, d_model, embed_type='fixed', freq='h'): 61 | super(TemporalEmbedding, self).__init__() 62 | 63 | minute_size = 4; hour_size = 24 64 | weekday_size = 7; day_size = 32; month_size = 13 65 | 66 | Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding 67 | if freq=='t': 68 | self.minute_embed = Embed(minute_size, d_model) 69 | self.hour_embed = Embed(hour_size, d_model) 70 | self.weekday_embed = Embed(weekday_size, d_model) 71 | self.day_embed = Embed(day_size, d_model) 72 | self.month_embed = Embed(month_size, d_model) 73 | 74 | def forward(self, x): 75 | x = x.long() 76 | 77 | minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0. 78 | hour_x = self.hour_embed(x[:,:,3]) 79 | weekday_x = self.weekday_embed(x[:,:,2]) 80 | day_x = self.day_embed(x[:,:,1]) 81 | month_x = self.month_embed(x[:,:,0]) 82 | 83 | return hour_x + weekday_x + day_x + month_x + minute_x 84 | 85 | class TimeFeatureEmbedding(nn.Module): 86 | def __init__(self, d_model, embed_type='timeF', freq='h'): 87 | super(TimeFeatureEmbedding, self).__init__() 88 | 89 | freq_map = {'h':4, 't':5, 's':6, 'm':1, 'a':1, 'w':2, 'd':3, 'b':3} 90 | d_inp = freq_map[freq] 91 | self.embed = nn.Linear(d_inp, d_model) 92 | 93 | def forward(self, x): 94 | return self.embed(x) 95 | 96 | class DataEmbedding(nn.Module): 97 | def __init__(self, c_in, d_model, embed_type='fixed', freq='h', dropout=0.1): 98 | super(DataEmbedding, self).__init__() 99 | 100 | self.value_embedding = TokenEmbedding(c_in=c_in, d_model=d_model) 101 | self.position_embedding = PositionalEmbedding(d_model=d_model) 102 | self.temporal_embedding = TemporalEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) if embed_type!='timeF' else TimeFeatureEmbedding(d_model=d_model, embed_type=embed_type, freq=freq) 103 | 104 | self.dropout = nn.Dropout(p=dropout) 105 | 106 | def forward(self, x, x_mark): 107 | x = self.value_embedding(x) + self.position_embedding(x) + self.temporal_embedding(x_mark) 108 | 109 | return self.dropout(x) -------------------------------------------------------------------------------- /utils/timefeatures.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | import pandas as pd 5 | from pandas.tseries import offsets 6 | from pandas.tseries.frequencies import to_offset 7 | 8 | class TimeFeature: 9 | def __init__(self): 10 | pass 11 | 12 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 13 | pass 14 | 15 | def __repr__(self): 16 | return self.__class__.__name__ + "()" 17 | 18 | class SecondOfMinute(TimeFeature): 19 | """Minute of hour encoded as value between [-0.5, 0.5]""" 20 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 21 | return index.second / 59.0 - 0.5 22 | 23 | class MinuteOfHour(TimeFeature): 24 | """Minute of hour encoded as value between [-0.5, 0.5]""" 25 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 26 | return index.minute / 59.0 - 0.5 27 | 28 | class HourOfDay(TimeFeature): 29 | """Hour of day encoded as value between [-0.5, 0.5]""" 30 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 31 | return index.hour / 23.0 - 0.5 32 | 33 | class DayOfWeek(TimeFeature): 34 | """Hour of day encoded as value between [-0.5, 0.5]""" 35 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 36 | return index.dayofweek / 6.0 - 0.5 37 | 38 | class DayOfMonth(TimeFeature): 39 | """Day of month encoded as value between [-0.5, 0.5]""" 40 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 41 | return (index.day - 1) / 30.0 - 0.5 42 | 43 | class DayOfYear(TimeFeature): 44 | """Day of year encoded as value between [-0.5, 0.5]""" 45 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 46 | return (index.dayofyear - 1) / 365.0 - 0.5 47 | 48 | class MonthOfYear(TimeFeature): 49 | """Month of year encoded as value between [-0.5, 0.5]""" 50 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 51 | return (index.month - 1) / 11.0 - 0.5 52 | 53 | class WeekOfYear(TimeFeature): 54 | """Week of year encoded as value between [-0.5, 0.5]""" 55 | def __call__(self, index: pd.DatetimeIndex) -> np.ndarray: 56 | return (index.isocalendar().week - 1) / 52.0 - 0.5 57 | 58 | def time_features_from_frequency_str(freq_str: str) -> List[TimeFeature]: 59 | """ 60 | Returns a list of time features that will be appropriate for the given frequency string. 61 | Parameters 62 | ---------- 63 | freq_str 64 | Frequency string of the form [multiple][granularity] such as "12H", "5min", "1D" etc. 65 | """ 66 | 67 | features_by_offsets = { 68 | offsets.YearEnd: [], 69 | offsets.QuarterEnd: [MonthOfYear], 70 | offsets.MonthEnd: [MonthOfYear], 71 | offsets.Week: [DayOfMonth, WeekOfYear], 72 | offsets.Day: [DayOfWeek, DayOfMonth, DayOfYear], 73 | offsets.BusinessDay: [DayOfWeek, DayOfMonth, DayOfYear], 74 | offsets.Hour: [HourOfDay, DayOfWeek, DayOfMonth, DayOfYear], 75 | offsets.Minute: [ 76 | MinuteOfHour, 77 | HourOfDay, 78 | DayOfWeek, 79 | DayOfMonth, 80 | DayOfYear, 81 | ], 82 | offsets.Second: [ 83 | SecondOfMinute, 84 | MinuteOfHour, 85 | HourOfDay, 86 | DayOfWeek, 87 | DayOfMonth, 88 | DayOfYear, 89 | ], 90 | } 91 | 92 | offset = to_offset(freq_str) 93 | 94 | for offset_type, feature_classes in features_by_offsets.items(): 95 | if isinstance(offset, offset_type): 96 | return [cls() for cls in feature_classes] 97 | 98 | supported_freq_msg = f""" 99 | Unsupported frequency {freq_str} 100 | The following frequencies are supported: 101 | Y - yearly 102 | alias: A 103 | M - monthly 104 | W - weekly 105 | D - daily 106 | B - business days 107 | H - hourly 108 | T - minutely 109 | alias: min 110 | S - secondly 111 | """ 112 | raise RuntimeError(supported_freq_msg) 113 | 114 | def time_features(dates, timeenc=1, freq='h'): 115 | """ 116 | > `time_features` takes in a `dates` dataframe with a 'dates' column and extracts the date down to `freq` where freq can be any of the following if `timeenc` is 0: 117 | > * m - [month] 118 | > * w - [month] 119 | > * d - [month, day, weekday] 120 | > * b - [month, day, weekday] 121 | > * h - [month, day, weekday, hour] 122 | > * t - [month, day, weekday, hour, *minute] 123 | > 124 | > If `timeenc` is 1, a similar, but different list of `freq` values are supported (all encoded between [-0.5 and 0.5]): 125 | > * Q - [month] 126 | > * M - [month] 127 | > * W - [Day of month, week of year] 128 | > * D - [Day of week, day of month, day of year] 129 | > * B - [Day of week, day of month, day of year] 130 | > * H - [Hour of day, day of week, day of month, day of year] 131 | > * T - [Minute of hour*, hour of day, day of week, day of month, day of year] 132 | > * S - [Second of minute, minute of hour, hour of day, day of week, day of month, day of year] 133 | 134 | *minute returns a number from 0-3 corresponding to the 15 minute period it falls into. 135 | """ 136 | if timeenc==0: 137 | dates['month'] = dates.date.apply(lambda row:row.month,1) 138 | dates['day'] = dates.date.apply(lambda row:row.day,1) 139 | dates['weekday'] = dates.date.apply(lambda row:row.weekday(),1) 140 | dates['hour'] = dates.date.apply(lambda row:row.hour,1) 141 | dates['minute'] = dates.date.apply(lambda row:row.minute,1) 142 | dates['minute'] = dates.minute.map(lambda x:x//15) 143 | freq_map = { 144 | 'y':[],'m':['month'],'w':['month'],'d':['month','day','weekday'], 145 | 'b':['month','day','weekday'],'h':['month','day','weekday','hour'], 146 | 't':['month','day','weekday','hour','minute'], 147 | } 148 | return dates[freq_map[freq.lower()]].values 149 | if timeenc==1: 150 | dates = pd.to_datetime(dates.date.values) 151 | return np.vstack([feat(dates) for feat in time_features_from_frequency_str(freq)]).transpose(1,0) 152 | -------------------------------------------------------------------------------- /models/attn.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | 5 | import numpy as np 6 | 7 | from math import sqrt 8 | from utils.masking import TriangularCausalMask, ProbMask 9 | 10 | class FullAttention(nn.Module): 11 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 12 | super(FullAttention, self).__init__() 13 | self.scale = scale 14 | self.mask_flag = mask_flag 15 | self.output_attention = output_attention 16 | self.dropout = nn.Dropout(attention_dropout) 17 | 18 | def forward(self, queries, keys, values, attn_mask): 19 | B, L, H, E = queries.shape 20 | _, S, _, D = values.shape 21 | scale = self.scale or 1./sqrt(E) 22 | 23 | scores = torch.einsum("blhe,bshe->bhls", queries, keys) 24 | if self.mask_flag: 25 | if attn_mask is None: 26 | attn_mask = TriangularCausalMask(B, L, device=queries.device) 27 | 28 | scores.masked_fill_(attn_mask.mask, -np.inf) 29 | 30 | A = self.dropout(torch.softmax(scale * scores, dim=-1)) 31 | V = torch.einsum("bhls,bshd->blhd", A, values) 32 | 33 | if self.output_attention: 34 | return (V.contiguous(), A) 35 | else: 36 | return (V.contiguous(), None) 37 | 38 | class ProbAttention(nn.Module): 39 | def __init__(self, mask_flag=True, factor=5, scale=None, attention_dropout=0.1, output_attention=False): 40 | super(ProbAttention, self).__init__() 41 | self.factor = factor 42 | self.scale = scale 43 | self.mask_flag = mask_flag 44 | self.output_attention = output_attention 45 | self.dropout = nn.Dropout(attention_dropout) 46 | 47 | def _prob_QK(self, Q, K, sample_k, n_top): # n_top: c*ln(L_q) 48 | # Q [B, H, L, D] 49 | B, H, L_K, E = K.shape 50 | _, _, L_Q, _ = Q.shape 51 | 52 | # calculate the sampled Q_K 53 | K_expand = K.unsqueeze(-3).expand(B, H, L_Q, L_K, E) 54 | index_sample = torch.randint(L_K, (L_Q, sample_k)) # real U = U_part(factor*ln(L_k))*L_q 55 | K_sample = K_expand[:, :, torch.arange(L_Q).unsqueeze(1), index_sample, :] 56 | Q_K_sample = torch.matmul(Q.unsqueeze(-2), K_sample.transpose(-2, -1)).squeeze() 57 | 58 | # find the Top_k query with sparisty measurement 59 | M = Q_K_sample.max(-1)[0] - torch.div(Q_K_sample.sum(-1), L_K) 60 | M_top = M.topk(n_top, sorted=False)[1] 61 | 62 | # use the reduced Q to calculate Q_K 63 | Q_reduce = Q[torch.arange(B)[:, None, None], 64 | torch.arange(H)[None, :, None], 65 | M_top, :] # factor*ln(L_q) 66 | Q_K = torch.matmul(Q_reduce, K.transpose(-2, -1)) # factor*ln(L_q)*L_k 67 | 68 | return Q_K, M_top 69 | 70 | def _get_initial_context(self, V, L_Q): 71 | B, H, L_V, D = V.shape 72 | if not self.mask_flag: 73 | # V_sum = V.sum(dim=-2) 74 | V_sum = V.mean(dim=-2) 75 | contex = V_sum.unsqueeze(-2).expand(B, H, L_Q, V_sum.shape[-1]).clone() 76 | else: # use mask 77 | assert(L_Q == L_V) # requires that L_Q == L_V, i.e. for self-attention only 78 | contex = V.cumsum(dim=-2) 79 | return contex 80 | 81 | def _update_context(self, context_in, V, scores, index, L_Q, attn_mask): 82 | B, H, L_V, D = V.shape 83 | 84 | if self.mask_flag: 85 | attn_mask = ProbMask(B, H, L_Q, index, scores, device=V.device) 86 | scores.masked_fill_(attn_mask.mask, -np.inf) 87 | 88 | attn = torch.softmax(scores, dim=-1) # nn.Softmax(dim=-1)(scores) 89 | 90 | context_in[torch.arange(B)[:, None, None], 91 | torch.arange(H)[None, :, None], 92 | index, :] = torch.matmul(attn, V).type_as(context_in) 93 | if self.output_attention: 94 | attns = (torch.ones([B, H, L_V, L_V])/L_V).type_as(attn).to(attn.device) 95 | attns[torch.arange(B)[:, None, None], torch.arange(H)[None, :, None], index, :] = attn 96 | return (context_in, attns) 97 | else: 98 | return (context_in, None) 99 | 100 | def forward(self, queries, keys, values, attn_mask): 101 | B, L_Q, H, D = queries.shape 102 | _, L_K, _, _ = keys.shape 103 | 104 | queries = queries.transpose(2,1) 105 | keys = keys.transpose(2,1) 106 | values = values.transpose(2,1) 107 | 108 | U_part = self.factor * np.ceil(np.log(L_K)).astype('int').item() # c*ln(L_k) 109 | u = self.factor * np.ceil(np.log(L_Q)).astype('int').item() # c*ln(L_q) 110 | 111 | U_part = U_part if U_part