├── .gitignore ├── LSTNet.py ├── README.md ├── data.zip ├── ele.sh ├── er.sh ├── main.py ├── solar.sh ├── traffic.sh └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | *.tmp 3 | raw/ 4 | data/ 5 | save/ 6 | logs/ 7 | __pycache__/ 8 | -------------------------------------------------------------------------------- /LSTNet.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import keras 3 | from keras.layers import Input, Dense, Conv1D, GRU, Dropout, Flatten, Activation 4 | from keras.layers import concatenate, add, Lambda 5 | from keras.models import Model, Sequential 6 | from keras.optimizers import Adam 7 | import keras.backend as K 8 | 9 | class LSTNet(object): 10 | def __init__(self, args, dims): 11 | super(LSTNet, self).__init__() 12 | self.P = args.window 13 | self.m = dims 14 | self.hidR = args.hidRNN 15 | self.hidC = args.hidCNN 16 | self.hidS = args.hidSkip 17 | self.Ck = args.CNN_kernel 18 | self.skip = args.skip 19 | self.pt = int((self.P-self.Ck)/self.skip) 20 | self.hw = args.highway_window 21 | self.dropout = args.dropout 22 | self.output = args.output_fun 23 | self.lr = args.lr 24 | self.loss = args.loss 25 | self.clip = args.clip 26 | 27 | def make_model(self): 28 | 29 | x = Input(shape=(self.P, self.m)) 30 | 31 | # CNN 32 | c = Conv1D(self.hidC, self.Ck, activation='relu')(x) 33 | c = Dropout(self.dropout)(c) 34 | 35 | # RNN 36 | 37 | r = GRU(self.hidR)(c) 38 | r = Lambda(lambda k: K.reshape(k, (-1, self.hidR)))(r) 39 | r = Dropout(self.dropout)(r) 40 | 41 | # skip-RNN 42 | if self.skip > 0: 43 | # c: batch_size*steps*filters, steps=P-Ck 44 | s = Lambda(lambda k: k[:, int(-self.pt*self.skip):, :])(c) 45 | s = Lambda(lambda k: K.reshape(k, (-1, self.pt, self.skip, self.hidC)))(s) 46 | s = Lambda(lambda k: K.permute_dimensions(k, (0,2,1,3)))(s) 47 | s = Lambda(lambda k: K.reshape(k, (-1, self.pt, self.hidC)))(s) 48 | 49 | s = GRU(self.hidS)(s) 50 | s = Lambda(lambda k: K.reshape(k, (-1, self.skip*self.hidS)))(s) 51 | s = Dropout(self.dropout)(s) 52 | r = concatenate([r,s]) 53 | 54 | res = Dense(self.m)(r) 55 | 56 | # highway 57 | if self.hw > 0: 58 | z = Lambda(lambda k: k[:, -self.hw:, :])(x) 59 | z = Lambda(lambda k: K.permute_dimensions(k, (0,2,1)))(z) 60 | z = Lambda(lambda k: K.reshape(k, (-1, self.hw)))(z) 61 | z = Dense(1)(z) 62 | z = Lambda(lambda k: K.reshape(k, (-1, self.m)))(z) 63 | res = add([res, z]) 64 | 65 | if self.output != 'no': 66 | res = Activation(self.output)(res) 67 | 68 | model = Model(inputs=x, outputs=res) 69 | model.compile(optimizer=Adam(lr=self.lr, clipnorm=self.clip), loss=self.loss) 70 | return model 71 | 72 | class LSTNet_multi_inputs(object): 73 | def __init__(self, args, dims): 74 | super(LSTNet_multi_inputs, self).__init__() 75 | self.P = args.window 76 | self.m = dims 77 | self.hidR = args.hidRNN 78 | self.hidC = args.hidCNN 79 | self.hidS = args.hidSkip 80 | self.Ck = args.CNN_kernel 81 | self.skip = args.skip 82 | #self.pt = int((self.P-self.Ck)/self.skip) 83 | self.pt = args.ps 84 | self.hw = args.highway_window 85 | self.dropout = args.dropout 86 | self.output = args.output_fun 87 | self.lr = args.lr 88 | self.loss = args.loss 89 | self.clip = args.clip 90 | 91 | def make_model(self): 92 | 93 | # Input1: short-term time series 94 | input1 = Input(shape=(self.P, self.m)) 95 | # CNN 96 | conv1 = Conv1D(self.hidC, self.Ck, strides=1, activation='relu') # for input1 97 | # It's a probelm that I can't find any way to use the same Conv1D layer to train the two inputs, 98 | # since input2's strides should be Ck, not 1 as input1 99 | conv2 = Conv1D(self.hidC, self.Ck, strides=self.Ck, activation='relu') # for input2 100 | conv2.set_weights(conv1.get_weights()) # at least use same weight 101 | 102 | c1 = conv1(input1) 103 | c1 = Dropout(self.dropout)(c1) 104 | # RNN 105 | r1 = GRU(self.hidR)(c1) 106 | #r1 = Lambda(lambda k: K.reshape(k, (-1, self.hidR)))(r1) 107 | r1 = Dropout(self.dropout)(r1) 108 | 109 | # Input2: long-term time series with period 110 | input2 = Input(shape=(self.pt*self.Ck, self.m)) 111 | # CNN 112 | c2 = conv2(input2) 113 | c2 = Dropout(self.dropout)(c2) 114 | # RNN 115 | r2 = GRU(self.hidS)(c2) 116 | #r2 = Lambda(lambda k: K.reshape(k, (-1, self.hidR)))(r2) 117 | r2 = Dropout(self.dropout)(r2) 118 | 119 | r = concatenate([r1,r2]) 120 | res = Dense(self.m)(r) 121 | 122 | # highway 123 | if self.hw > 0: 124 | z = Lambda(lambda k: k[:, -self.hw:, :])(input1) 125 | z = Lambda(lambda k: K.permute_dimensions(k, (0,2,1)))(z) 126 | z = Lambda(lambda k: K.reshape(k, (-1, self.hw)))(z) 127 | z = Dense(1)(z) 128 | z = Lambda(lambda k: K.reshape(k, (-1, self.m)))(z) 129 | res = add([res, z]) 130 | 131 | if self.output != 'no': 132 | res = Activation(self.output)(res) 133 | 134 | model = Model(inputs=[input1, input2], outputs=res) 135 | model.compile(optimizer=Adam(lr=self.lr, clipnorm=self.clip), loss=self.loss) 136 | return model 137 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Keras version of LSTNet 2 | 3 | ### Environment 4 | * python 3.6.0 5 | * tensorflow 1.12.0 6 | * Keras 2.2.0 7 | 8 | ### Usage 9 | ``` 10 | unzip data.zip 11 | mkdir save/ logs/ 12 | ./er.sh 13 | ``` 14 | 15 | ### Multi-input 16 | The original version is a little redundant since it should put the huge tensor into the model as the input.
17 | However, if the time interval is small, like 5 or 10 mins, the input may be too huge for memory and lacking of efficiency during training.
18 | Therefore, I wrote a version called **LSTNet_multi_inputs** which deconstructs the input as (1) short-term time series, like (t-3, t-2, t-1, t) and (2) long-term skip time series, like (t-2xskip, t-skip, t).
19 | The result is as good as the original one, but much faster.
20 | 21 | 22 | -------------------------------------------------------------------------------- /data.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Lorne0/LSTNet_keras/e28fb6113e47f2fd5984ab2ec759ebfc607ffd5b/data.zip -------------------------------------------------------------------------------- /ele.sh: -------------------------------------------------------------------------------- 1 | 2 | time python main.py --data ./data/ele.npz --save ./save/ele.pk --log ./logs/ele.log --exps 3 --patience 10 \ 3 | --normalize 1 --loss mae --hidCNN 200 --hidRNN 200 --hidSkip 100 --output_fun linear \ 4 | --multi 1 --horizon 3 --highway_window 12 --window 48 --skip 24 --ps 4 5 | 6 | -------------------------------------------------------------------------------- /er.sh: -------------------------------------------------------------------------------- 1 | 2 | time python main.py --data ./data/er.npz --save ./save/er.pk --log ./logs/er.log --exps 5 --patience 15 \ 3 | --normalize 1 --loss mae --hidCNN 100 --hidRNN 100 --hidSkip 50 --output_fun no \ 4 | --multi 1 --horizon 3 --highway_window 7 --window 14 --skip 7 --ps 3 5 | 6 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import time 3 | import datetime 4 | from utils import * 5 | from LSTNet import LSTNet, LSTNet_multi_inputs 6 | import numpy as np 7 | from keras.models import model_from_yaml 8 | import pickle as pk 9 | import keras.backend as K 10 | import tensorflow as tf 11 | 12 | # limit gpu memory 13 | def get_session(gpu_fraction=0.1): 14 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) 15 | return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 16 | K.set_session(get_session()) 17 | 18 | def print_shape(data): 19 | for i in range(len(data.train)): 20 | print(data.train[i].shape, end=' ') 21 | print("") 22 | for i in range(len(data.valid)): 23 | print(data.valid[i].shape, end=' ') 24 | print("") 25 | for i in range(len(data.test)): 26 | print(data.test[i].shape, end=' ') 27 | print("") 28 | 29 | def evaluate(y, yp): 30 | # rrse 31 | rrse = np.sqrt(np.sum(np.square(y-yp)) / np.sum(np.square(np.mean(y)-y))) 32 | # corr 33 | #m, mp = np.mean(y, axis=0), np.mean(yp, axis=0) 34 | #corr = np.mean(np.sum((y-m)*(yp-mp), axis=0) / np.sqrt(np.sum(np.square(y-m), axis=0)*np.sum(np.square(yp-mp), axis=0))) 35 | m, mp, sig, sigp = y.mean(axis=0), yp.mean(axis=0), y.std(axis=0), yp.std(axis=0) 36 | corr = ((((y-m)*(yp-mp)).mean(axis=0) / (sig*sigp))[sig!=0]).mean() 37 | #corr = ((((y-m)*(yp-mp)).mean(axis=0) / (sig*sigp))).mean() 38 | 39 | return rrse, corr 40 | 41 | def main(args, exp): 42 | K.clear_session() 43 | flog = open(args.log, "a") 44 | s = "\nExp {}".format(exp) 45 | print(s) 46 | flog.write(s+"\n") 47 | now=str(datetime.datetime.now()) 48 | print(now) 49 | flog.write(now+"\n") 50 | flog.flush() 51 | 52 | data = Data(args) 53 | print_shape(data) 54 | if args.multi==1: 55 | model = LSTNet_multi_inputs(args, data.m).make_model() 56 | else: 57 | model = LSTNet(args, data.m).make_model() 58 | 59 | ### Train ### 60 | test_result = [1e6, -1e6] 61 | best_valid = [1e6, -1e6] 62 | pat = 0 63 | bs = int(args.batch_size) 64 | l = len(data.train[0]) 65 | order = np.arange(l) 66 | train_batch_num = int(l/bs) 67 | for e in range(1,args.epochs+1): 68 | tt = time.time() 69 | np.random.shuffle(order) 70 | if args.multi: 71 | x1, x2, y = data.train[0][order].copy(), data.train[1][order].copy(), data.train[2][order].copy() 72 | else: 73 | x, y = data.train[0][order].copy(), data.train[1][order].copy() 74 | for b in range(train_batch_num): 75 | print("\r%d/%d" %(b+1,train_batch_num), end='') 76 | if args.multi: 77 | b_x1, b_x2, b_y = x1[b*bs:(b+1)*bs], x2[b*bs:(b+1)*bs], y[b*bs:(b+1)*bs] 78 | model.train_on_batch([b_x1, b_x2], b_y) 79 | else: 80 | b_x, b_y = x[b*bs:(b+1)*bs], y[b*bs:(b+1)*bs] 81 | model.train_on_batch(b_x, b_y) 82 | rrse, corr = evaluate(data.valid[-1], model.predict(data.valid[:-1], batch_size=bs)) 83 | et = time.time()-tt 84 | print("\r%d | Valid | rrse: %.4f | corr: %.4f | time: %.2fs" %(e, rrse, corr, et)) 85 | 86 | if (corr-rrse) >= (best_valid[1]-best_valid[0]): 87 | best_valid = [rrse, corr] 88 | pat = 0 89 | # test 90 | rrse, corr = evaluate(data.test[-1], model.predict(data.test[:-1], batch_size=bs)) 91 | s = "{} | Test | rrse: {:.4f} | corr: {:.4f} | approx epoch time: {:.2f}s".format(e, rrse, corr, et) 92 | print("\t"+s) 93 | flog.write(s+"\n") 94 | flog.flush() 95 | test_result = [rrse, corr] 96 | #can't use model.save(args.save) due to JSON Serializable error, so need to save like this: 97 | yaml = model.to_yaml() 98 | W = model.get_weights() 99 | with open(args.save, "wb") as fw: 100 | pk.dump(yaml, fw, protocol=pk.HIGHEST_PROTOCOL) 101 | pk.dump(W, fw, protocol=pk.HIGHEST_PROTOCOL) 102 | ''' 103 | # Test loaded model 104 | with open(args.save, "rb") as fp: 105 | new_yaml = pk.load(fp) 106 | new_W = pk.load(fp) 107 | new_model = model_from_yaml(new_yaml) 108 | new_model.set_weights(new_W) 109 | rrse, corr, rmse = evaluate(data.test[1], new_model.predict(data.test[0]), data.col_max[0]) 110 | print("\tLoaded Test | rrse: %.4f | corr: %.4f | rmse: %.4f" %(rrse, corr, rmse)) 111 | ''' 112 | else: 113 | pat += 1 114 | if pat==args.patience: # early stopping 115 | break 116 | 117 | s = "End of Exp {}".format(exp) 118 | print(s) 119 | flog.write(s+"\n") 120 | flog.flush() 121 | flog.close() 122 | return test_result 123 | 124 | if __name__ == '__main__': 125 | parser = argparse.ArgumentParser(description='Keras Time series forecasting') 126 | parser.add_argument('--data', type=str, required=True, help='location of the data file') 127 | parser.add_argument('--hidCNN', type=int, default=100, help='number of CNN hidden units') 128 | parser.add_argument('--hidRNN', type=int, default=100, help='number of RNN hidden units') 129 | parser.add_argument('--hidSkip', type=int, default=10) 130 | parser.add_argument('--window', type=int, default=24*7, help='window size') 131 | parser.add_argument('--horizon', type=int, default=3) 132 | parser.add_argument('--skip', type=int, default=24, help='period') 133 | parser.add_argument('--ps', type=int, default=3, help='number of skip (periods)') 134 | parser.add_argument('--CNN_kernel', type=int, default=6, help='the kernel size of the CNN layers') 135 | parser.add_argument('--highway_window', type=int, default=3, help='The window size of the highway component') 136 | parser.add_argument('--clip', type=float, default=10., help='gradient clipping') 137 | parser.add_argument('--epochs', type=int, default=1000, help='upper epoch limit') 138 | parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='batch size') 139 | parser.add_argument('--dropout', type=float, default=0.2, help='dropout applied to layers (0 = no dropout)') 140 | parser.add_argument('--seed', type=int, default=54321, help='random seed') 141 | #parser.add_argument('--gpu', type=int, default=None) 142 | parser.add_argument('--multi', type=int, default=0, help='original(0) or multi-input(1) LSTNet') 143 | parser.add_argument('--log_interval', type=int, default=2000, metavar='N', help='report interval') 144 | parser.add_argument('--save', type=str, default='save/model.pt', help='path to save the final model') 145 | parser.add_argument('--log', type=str, default='logs/model.pt', help='path to save the testing logs') 146 | #parser.add_argument('--cuda', type=str, default=True) 147 | parser.add_argument('--optim', type=str, default='adam') 148 | parser.add_argument('--lr', type=float, default=0.0005) 149 | parser.add_argument('--loss', type=str, default='mae') 150 | parser.add_argument('--normalize', type=int, default=2) 151 | parser.add_argument('--output_fun', type=str, default='sigmoid') 152 | parser.add_argument('--exps', type=int, default=1, help='number of experiments') 153 | parser.add_argument('--patience', type=int, default=10, help='patience of early stopping') 154 | args = parser.parse_args() 155 | 156 | test = [] 157 | for exp in range(1,args.exps+1): 158 | test.append(main(args, exp)) 159 | test = np.array(test) 160 | avg = np.mean(test, axis=0) 161 | best = test[np.argmax(test[:,1]-test[:,0]), :] 162 | s = 'Average result | rrse: {:.4f} | corr: {:.4f}'.format(avg[0], avg[1]) 163 | ss = 'Best result | rrse: {:.4f} | corr: {:.4f}'.format(best[0], best[1]) 164 | with open(args.log, "a") as flog: 165 | flog.write(s+"\n") 166 | flog.write(ss+"\n") 167 | 168 | -------------------------------------------------------------------------------- /solar.sh: -------------------------------------------------------------------------------- 1 | 2 | time python main.py --data ./data/solar.npz --save ./save/solar.pk --log ./logs/solar.log --exps 3 --patience 10 \ 3 | --normalize 1 --loss mae --hidCNN 100 --hidRNN 100 --hidSkip 50 --output_fun linear \ 4 | --multi 1 --horizon 3 --highway_window 12 --window 36 --skip 144 --ps 3 5 | 6 | -------------------------------------------------------------------------------- /traffic.sh: -------------------------------------------------------------------------------- 1 | 2 | time python main.py --data ./data/traffic.npz --save ./save/traffic.pk --log ./logs/traffic.log --exps 3 --patience 10 \ 3 | --normalize 1 --loss mae --hidCNN 100 --hidRNN 100 --hidSkip 50 --output_fun linear \ 4 | --multi 1 --horizon 3 --highway_window 12 --window 48 --skip 24 --ps 3 5 | 6 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pickle as pk 4 | 5 | def raw_to_npz(fn): 6 | df = pd.read_csv("./raw/"+fn, header=None) 7 | A = df.values.astype(np.float32) 8 | fn = fn.split(".")[0] 9 | np.savez_compressed('./data/'+fn, a=A) 10 | 11 | class Data(object): 12 | def __init__(self, args, tn=0.6, vd=0.2): 13 | self.h, self.w, self.skip, self.ps, self.Ck = args.horizon, args.window, args.skip, args.ps, args.CNN_kernel 14 | self.raw = np.load(args.data)['a'] 15 | self.n, self.m = self.raw.shape 16 | self.tn, self.vd = tn, vd 17 | 18 | if args.normalize==1: 19 | self.mx = np.max(np.abs(self.raw)) 20 | self.raw /= self.mx 21 | elif args.normalize==2: 22 | self.col_max = np.max(np.abs(self.raw), axis=0)+1 23 | self.raw /= self.col_max 24 | 25 | if args.multi: 26 | self._split(self._slice_multi()) 27 | else: 28 | self._split(self._slice()) 29 | 30 | def _slice(self): 31 | s = self.w+self.h-1 32 | X = np.zeros((self.n-s, self.w, self.m)) 33 | Y = np.zeros((self.n-s, self.m)) 34 | for i in range(s, self.n): 35 | #X[i-s] = self.raw[i-s:i-s+self.w].copy() 36 | X[i-s] = self.raw[i-self.h+1-self.w:i-self.h+1].copy() 37 | Y[i-s] = self.raw[i].copy() 38 | return X, Y 39 | 40 | def _slice_multi(self): 41 | s = self.ps*self.skip+self.Ck-1 + self.h-1 42 | X1 = np.zeros((self.n-s, self.w, self.m)) 43 | X2 = np.zeros((self.n-s, self.ps*self.Ck, self.m)) 44 | Y = np.zeros((self.n-s, self.m)) 45 | for i in range(s, self.n): 46 | t = i-self.h+1 47 | X1[i-s] = self.raw[t-self.w:t].copy() 48 | idx = [] 49 | for k in range(self.ps): 50 | idx = list(range(t-self.Ck-k*self.skip, t-k*self.skip)) + idx 51 | idx = np.array(idx, dtype=int) 52 | X2[i-s] = self.raw[idx].copy() 53 | Y[i-s] = self.raw[i].copy() 54 | return X1, X2, Y 55 | 56 | def _split(self, *args): 57 | tn = int(self.n*self.tn) 58 | vd = int(self.n*(self.tn+self.vd)) 59 | self.train, self.valid, self.test = [], [], [] 60 | arg = args[0] 61 | for A in arg: 62 | self.train.append(A[:tn].copy()) 63 | self.valid.append(A[tn:vd].copy()) 64 | self.test.append(A[vd:].copy()) 65 | 66 | --------------------------------------------------------------------------------