├── .gitattributes ├── .gitignore ├── .idea └── vcs.xml ├── LICENSE ├── Optim.py ├── README.md ├── ele.sh ├── main.py ├── models ├── LSTNet.py ├── __init__.py └── exchange_rate.pt ├── solar.sh ├── stock.sh ├── traffic.sh └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | **/.idea 2 | **/__pycache__ 3 | data/ -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Aaqib Ali 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Optim.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | import torch 4 | 5 | 6 | class Optim(object): 7 | 8 | def make_optimizer(self): 9 | if self.method == 'sgd': 10 | self.optimizer = torch.optim.SGD(self.params, lr=self.lr) 11 | elif self.method == 'adagrad': 12 | self.optimizer = torch.optim.Adagrad(self.params, lr=self.lr) 13 | elif self.method == 'adadelta': 14 | self.optimizer = torch.optim.Adadelta(self.params, lr=self.lr) 15 | elif self.method == 'adam': 16 | self.optimizer = torch.optim.Adam(self.params, lr=self.lr) 17 | else: 18 | raise RuntimeError("Invalid optim method: " + self.method) 19 | 20 | def __init__(self, params, method, lr, max_grad_norm, lr_decay=1, start_decay_at=None): 21 | self.params = list(params) # careful: params may be a generator 22 | self.last_ppl = None 23 | self.lr = lr 24 | self.max_grad_norm = max_grad_norm 25 | self.method = method 26 | self.lr_decay = lr_decay 27 | self.start_decay_at = start_decay_at 28 | self.start_decay = False 29 | self.optimizer = None 30 | self.make_optimizer() 31 | 32 | def step(self): 33 | # Compute gradients norm. 34 | grad_norm = 0 35 | for param in self.params: 36 | grad_norm += math.pow(param.grad.data.norm(), 2) 37 | 38 | grad_norm = math.sqrt(grad_norm) 39 | if grad_norm > 0: 40 | shrinkage = self.max_grad_norm / grad_norm 41 | else: 42 | shrinkage = 1. 43 | 44 | for param in self.params: 45 | if shrinkage < 1: 46 | param.grad.data.mul_(shrinkage) 47 | 48 | self.optimizer.step() 49 | return grad_norm 50 | 51 | # decay learning rate if val perf does not improve or we hit the start_decay_at limit 52 | def update_learning_rate(self, ppl, epoch): 53 | if self.start_decay_at is not None and epoch >= self.start_decay_at: 54 | self.start_decay = True 55 | if self.last_ppl is not None and ppl > self.last_ppl: 56 | self.start_decay = True 57 | 58 | if self.start_decay: 59 | self.lr = self.lr * self.lr_decay 60 | print("Decaying learning rate to %g" % self.lr) 61 | # only decay for one epoch 62 | self.start_decay = False 63 | 64 | self.last_ppl = ppl 65 | 66 | self.make_optimizer() 67 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Time series forecasting 2 | 3 | ### Research paper: 4 | 5 | Modeling Long and Short-Term Temporal Patterns with Deep Neural Networks.(https://arxiv.org/abs/1703.07015) 6 | 7 | ### Dataset: 8 | 9 | You can download the datasets mentioned in the paper at https://github.com/laiguokun/multivariate-time-series-data. 10 | 11 | ### Bash script: 12 | We give the examples to run different datasets in ele.sh, traffic.sh, solar.sh and stock.sh. 13 | 14 | ### Environment and Dependencies: 15 | 1. Create Conda Environment with Python 3.7 16 | 2. Install compatible PyTorch from https://pytorch.org/ 17 | 3. Create folder data and clone the dataset 18 | 4. Run any Bash file on cmd or Power shell (windows) 19 | -------------------------------------------------------------------------------- /ele.sh: -------------------------------------------------------------------------------- 1 | python main.py --gpu None --horizon 24 --data data/electricity.txt --save save/elec.pt --output_fun Linear 2 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import math 3 | import time 4 | 5 | from models import LSTNet 6 | import torch.nn as nn 7 | 8 | from Optim import Optim 9 | from utils import * 10 | 11 | 12 | def evaluate(data, X, Y, model, evaluateL2, evaluateL1, batch_size): 13 | model.eval() 14 | total_loss = 0 15 | total_loss_l1 = 0 16 | n_samples = 0 17 | predict = None 18 | test = None 19 | 20 | for X, Y in data.get_batches(X, Y, batch_size, False): 21 | output = model(X) 22 | if predict is None: 23 | predict = output 24 | test = Y 25 | else: 26 | predict = torch.cat((predict, output)) 27 | test = torch.cat((test, Y)) 28 | 29 | scale = data.scale.expand(output.size(0), data.m) 30 | total_loss += evaluateL2(output * scale, Y * scale).item() 31 | total_loss_l1 += evaluateL1(output * scale, Y * scale).item() 32 | n_samples += (output.size(0) * data.m) 33 | rse = math.sqrt(total_loss / n_samples) / data.rse 34 | rae = (total_loss_l1 / n_samples) / data.rae 35 | 36 | predict = predict.data.cpu().numpy() 37 | Ytest = test.data.cpu().numpy() 38 | sigma_p = predict.std(axis=0) 39 | sigma_g = Ytest.std(axis=0) 40 | mean_p = predict.mean(axis=0) 41 | mean_g = Ytest.mean(axis=0) 42 | index = (sigma_g != 0) 43 | correlation = ((predict - mean_p) * (Ytest - mean_g)).mean(axis=0) / (sigma_p * sigma_g) 44 | correlation = (correlation[index]).mean() 45 | return rse, rae, correlation 46 | 47 | 48 | def train(data, X, Y, model, criterion, optim, batch_size): 49 | model.train() 50 | total_loss = 0 51 | n_samples = 0 52 | for X, Y in data.get_batches(X, Y, batch_size, True): 53 | model.zero_grad() 54 | output = model(X) 55 | scale = data.scale.expand(output.size(0), data.m) 56 | loss = criterion(output * scale, Y * scale) 57 | loss.backward() 58 | optim.step() 59 | total_loss += loss.item() 60 | n_samples += (output.size(0) * data.m) 61 | return total_loss / n_samples 62 | 63 | 64 | parser = argparse.ArgumentParser(description='PyTorch Time series forecasting') 65 | parser.add_argument('--data', type=str, default='data/exchange_rate.txt', 66 | help='location of the data file') # required=True, 67 | parser.add_argument('--model', type=str, default='LSTNet', 68 | help='') 69 | parser.add_argument('--hidCNN', type=int, default=50, 70 | help='number of CNN hidden units') 71 | parser.add_argument('--hidRNN', type=int, default=50, 72 | help='number of RNN hidden units') 73 | parser.add_argument('--window', type=int, default=24 * 7, 74 | help='window size') 75 | parser.add_argument('--CNN_kernel', type=int, default=6, 76 | help='the kernel size of the CNN layers') 77 | parser.add_argument('--highway_window', type=int, default=24, 78 | help='The window size of the highway component') 79 | parser.add_argument('--clip', type=float, default=10., 80 | help='gradient clipping') 81 | parser.add_argument('--epochs', type=int, default=100, 82 | help='upper epoch limit') 83 | parser.add_argument('--batch_size', type=int, default=128, metavar='N', 84 | help='batch size') 85 | parser.add_argument('--dropout', type=float, default=0.2, 86 | help='dropout applied to layers (0 = no dropout)') 87 | parser.add_argument('--seed', type=int, default=54321, 88 | help='random seed') 89 | parser.add_argument('--gpu', type=int, default=None) 90 | parser.add_argument('--log_interval', type=int, default=2000, metavar='N', 91 | help='report interval') 92 | parser.add_argument('--save', type=str, default='models/exchange_rate.pt', 93 | help='path to save the final model') 94 | parser.add_argument('--cuda', type=str, default=False) 95 | parser.add_argument('--optim', type=str, default='adam') 96 | parser.add_argument('--lr', type=float, default=0.001) 97 | parser.add_argument('--horizon', type=int, default=12) 98 | parser.add_argument('--skip', type=float, default=24) 99 | parser.add_argument('--hidSkip', type=int, default=5) 100 | parser.add_argument('--L1Loss', type=bool, default=False) 101 | parser.add_argument('--normalize', type=int, default=2) 102 | parser.add_argument('--output_fun', type=str, default='None') 103 | args = parser.parse_args() 104 | 105 | args.cuda = args.gpu is not None 106 | if args.cuda: 107 | torch.cuda.set_device(args.gpu) 108 | 109 | """ Set the random seed manually for reproducibility. """ 110 | torch.manual_seed(args.seed) 111 | if torch.cuda.is_available(): 112 | if not args.cuda: 113 | print("WARNING: You have a CUDA device, so you should probably run with --cuda") 114 | else: 115 | torch.cuda.manual_seed(args.seed) 116 | 117 | Data = Data_Utility(args.data, 0.6, 0.2, args.cuda, args.horizon, args.window, args.normalize) 118 | print(Data.rse) 119 | 120 | model = eval(args.model).Model(args, Data) 121 | 122 | if args.cuda: 123 | model.cuda() 124 | 125 | nParams = sum([p.nelement() for p in model.parameters()]) 126 | print('* number of parameters: %d' % nParams) 127 | 128 | if args.L1Loss: 129 | criterion = nn.L1Loss(reduction='sum') 130 | else: 131 | criterion = nn.MSELoss(reduction='sum') 132 | evaluateL2 = nn.MSELoss(reduction='sum') 133 | evaluateL1 = nn.L1Loss(reduction='sum') 134 | if args.cuda: 135 | criterion = criterion.cuda() 136 | evaluateL1 = evaluateL1.cuda() 137 | evaluateL2 = evaluateL2.cuda() 138 | 139 | best_val = 10000000 140 | optim = Optim(model.parameters(), args.optim, args.lr, args.clip, ) 141 | 142 | # At any point you can hit Ctrl + C to break out of training early. 143 | try: 144 | print('Start training....') 145 | for epoch in range(1, args.epochs + 1): 146 | epoch_start_time = time.time() 147 | train_loss = train(Data, Data.train[0], Data.train[1], model, criterion, optim, args.batch_size) 148 | val_loss, val_rae, val_corr = evaluate(Data, Data.valid[0], Data.valid[1], model, evaluateL2, evaluateL1, 149 | args.batch_size) 150 | print( 151 | '| end of epoch {:3d} | time: {:5.2f}s | train_loss {:5.4f} | valid rse {:5.4f} | valid rae {:5.4f} | valid corr {:5.4f}'.format( 152 | epoch, (time.time() - epoch_start_time), train_loss, val_loss, val_rae, val_corr)) 153 | # Save the model if the validation loss is the best we've seen so far. 154 | 155 | if val_loss < best_val: 156 | with open(args.save, 'wb') as f: 157 | torch.save(model, f) 158 | best_val = val_loss 159 | if epoch % 5 == 0: 160 | test_acc, test_rae, test_corr = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, 161 | args.batch_size) 162 | print("test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format(test_acc, test_rae, test_corr)) 163 | 164 | except KeyboardInterrupt: 165 | print('-' * 89) 166 | print('Exiting from training early') 167 | 168 | # Load the best saved model. 169 | with open(args.save, 'rb') as f: 170 | model = torch.load(f) 171 | test_acc, test_rae, test_corr = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, 172 | args.batch_size) 173 | print("test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format(test_acc, test_rae, test_corr)) 174 | -------------------------------------------------------------------------------- /models/LSTNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class Model(nn.Module): 6 | def __init__(self, args, data): 7 | super(Model, self).__init__() 8 | self.use_cuda = args.cuda 9 | self.P = args.window 10 | self.m = data.m 11 | self.hidR = args.hidRNN 12 | self.hidC = args.hidCNN 13 | self.hidS = args.hidSkip 14 | self.Ck = args.CNN_kernel 15 | self.skip = args.skip 16 | self.pt = (self.P - self.Ck) // self.skip 17 | self.hw = args.highway_window 18 | self.conv1 = nn.Conv2d(1, self.hidC, kernel_size=(self.Ck, self.m)) 19 | self.GRU1 = nn.GRU(self.hidC, self.hidR) 20 | self.dropout = nn.Dropout(p=args.dropout) 21 | if self.skip > 0: 22 | self.GRUskip = nn.GRU(self.hidC, self.hidS) 23 | self.linear1 = nn.Linear(self.hidR + self.skip * self.hidS, self.m) 24 | else: 25 | self.linear1 = nn.Linear(self.hidR, self.m) 26 | if self.hw > 0: 27 | self.highway = nn.Linear(self.hw, 1) 28 | self.output = None 29 | if args.output_fun == 'sigmoid': 30 | self.output = torch.sigmoid 31 | if args.output_fun == 'tanh': 32 | self.output = torch.tanh 33 | 34 | def forward(self, x): 35 | batch_size = x.size(0) 36 | 37 | # CNN 38 | c = x.view(-1, 1, self.P, self.m) 39 | c = torch.relu(self.conv1(c)) 40 | c = self.dropout(c) 41 | c = torch.squeeze(c, 3) 42 | 43 | # RNN 44 | r = c.permute(2, 0, 1).contiguous() 45 | _, r = self.GRU1(r) 46 | r = self.dropout(torch.squeeze(r, 0)) 47 | 48 | # skip-rnn 49 | 50 | if self.skip > 0: 51 | s = c[:, :, int(-self.pt * self.skip):].contiguous() 52 | s = s.view(batch_size, self.hidC, self.pt, self.skip) 53 | s = s.permute(2, 0, 3, 1).contiguous() 54 | s = s.view(self.pt, batch_size * self.skip, self.hidC) 55 | _, s = self.GRUskip(s) 56 | s = s.view(batch_size, self.skip * self.hidS) 57 | s = self.dropout(s) 58 | r = torch.cat((r, s), 1) 59 | 60 | res = self.linear1(r) 61 | 62 | # highway 63 | if self.hw > 0: 64 | z = x[:, -self.hw:, :] 65 | z = z.permute(0, 2, 1).contiguous().view(-1, self.hw) 66 | z = self.highway(z) 67 | z = z.view(-1, self.m) 68 | res = res + z 69 | 70 | if self.output: 71 | res = self.output(res) 72 | return res 73 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aaqib-ali/LSTNet_Pytorch/31ccacd6ed8a9053f8445d7c79d20a8d8a11de0b/models/__init__.py -------------------------------------------------------------------------------- /models/exchange_rate.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aaqib-ali/LSTNet_Pytorch/31ccacd6ed8a9053f8445d7c79d20a8d8a11de0b/models/exchange_rate.pt -------------------------------------------------------------------------------- /solar.sh: -------------------------------------------------------------------------------- 1 | python main.py --gpu 3 --data data/solar_AL.txt --save save/solar_AL.pt --hidSkip 10 --output_fun Linear 2 | -------------------------------------------------------------------------------- /stock.sh: -------------------------------------------------------------------------------- 1 | python main.py --gpu 3 --data data/exchange_rate.txt --save save/exchange_rate.pt --hidCNN 50 --hidRNN 50 --L1Loss False --output_fun None 2 | -------------------------------------------------------------------------------- /traffic.sh: -------------------------------------------------------------------------------- 1 | python main.py --gpu 3 --data data/traffic.txt --save save/traffic.pt --hidSkip 10 2 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | 5 | def normal_std(x): 6 | return x.std() * np.sqrt((len(x) - 1.) / (len(x))) 7 | 8 | 9 | class Data_Utility(object): 10 | # train and valid is the ratio of training set and validation set. test = 1 - train - valid 11 | def __init__(self, file_name, train, valid, cuda, horizon, window, normalize=2): 12 | self.cuda = cuda 13 | self.P = window 14 | self.h = horizon 15 | self.raw_data = np.loadtxt(file_name, delimiter=',') 16 | self.dat = np.zeros(self.raw_data.shape) 17 | self.n, self.m = self.dat.shape 18 | self.normalize = 2 19 | self.scale = np.ones(self.m) 20 | self._normalized(normalize) 21 | self._split(int(train * self.n), int((train + valid) * self.n)) 22 | 23 | self.scale = torch.from_numpy(self.scale).float() 24 | tmp = self.test[1] * self.scale.expand(self.test[1].size(0), self.m) 25 | 26 | if self.cuda: 27 | self.scale = self.scale.cuda() 28 | 29 | self.rse = normal_std(tmp) 30 | self.rae = torch.mean(torch.abs(tmp - torch.mean(tmp))) 31 | 32 | def _normalized(self, normalize): 33 | # normalized by the maximum value of entire matrix. 34 | 35 | if normalize == 0: 36 | self.dat = self.raw_data 37 | 38 | if normalize == 1: 39 | self.dat = self.raw_data / np.max(self.raw_data) 40 | 41 | # normalized by the maximum value of each row(sensor). 42 | if normalize == 2: 43 | for i in range(self.m): 44 | self.scale[i] = np.max(np.abs(self.raw_data[:, i])) 45 | self.dat[:, i] = self.raw_data[:, i] / np.max(np.abs(self.raw_data[:, i])) 46 | 47 | def _split(self, train, valid): 48 | 49 | train_set = range(self.P + self.h - 1, train) 50 | valid_set = range(train, valid) 51 | test_set = range(valid, self.n) 52 | self.train = self._batchify(train_set) 53 | self.valid = self._batchify(valid_set) 54 | self.test = self._batchify(test_set) 55 | 56 | def _batchify(self, idx_set): 57 | 58 | n = len(idx_set) 59 | X = torch.zeros((n, self.P, self.m)) 60 | Y = torch.zeros((n, self.m)) 61 | 62 | for i in range(n): 63 | end = idx_set[i] - self.h + 1 64 | start = end - self.P 65 | X[i, :, :] = torch.from_numpy(self.dat[start:end, :]) 66 | Y[i, :] = torch.from_numpy(self.dat[idx_set[i], :]) 67 | 68 | return [X, Y] 69 | 70 | def get_batches(self, inputs, targets, batch_size, shuffle=True): 71 | length = len(inputs) 72 | if shuffle: 73 | index = torch.randperm(length) 74 | else: 75 | index = torch.LongTensor(range(length)) 76 | start_idx = 0 77 | while start_idx < length: 78 | end_idx = min(length, start_idx + batch_size) 79 | excerpt = index[start_idx:end_idx] 80 | X = inputs[excerpt] 81 | Y = targets[excerpt] 82 | if self.cuda: 83 | X = X.cuda() 84 | Y = Y.cuda() 85 | yield X, Y 86 | start_idx += batch_size 87 | --------------------------------------------------------------------------------