├── .gitignore
├── LSTNet.py
├── README.md
├── data.zip
├── ele.sh
├── er.sh
├── main.py
├── solar.sh
├── traffic.sh
└── utils.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | *.tmp
3 | raw/
4 | data/
5 | save/
6 | logs/
7 | __pycache__/
8 |
--------------------------------------------------------------------------------
/LSTNet.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import keras
3 | from keras.layers import Input, Dense, Conv1D, GRU, Dropout, Flatten, Activation
4 | from keras.layers import concatenate, add, Lambda
5 | from keras.models import Model, Sequential
6 | from keras.optimizers import Adam
7 | import keras.backend as K
8 |
9 | class LSTNet(object):
10 | def __init__(self, args, dims):
11 | super(LSTNet, self).__init__()
12 | self.P = args.window
13 | self.m = dims
14 | self.hidR = args.hidRNN
15 | self.hidC = args.hidCNN
16 | self.hidS = args.hidSkip
17 | self.Ck = args.CNN_kernel
18 | self.skip = args.skip
19 | self.pt = int((self.P-self.Ck)/self.skip)
20 | self.hw = args.highway_window
21 | self.dropout = args.dropout
22 | self.output = args.output_fun
23 | self.lr = args.lr
24 | self.loss = args.loss
25 | self.clip = args.clip
26 |
27 | def make_model(self):
28 |
29 | x = Input(shape=(self.P, self.m))
30 |
31 | # CNN
32 | c = Conv1D(self.hidC, self.Ck, activation='relu')(x)
33 | c = Dropout(self.dropout)(c)
34 |
35 | # RNN
36 |
37 | r = GRU(self.hidR)(c)
38 | r = Lambda(lambda k: K.reshape(k, (-1, self.hidR)))(r)
39 | r = Dropout(self.dropout)(r)
40 |
41 | # skip-RNN
42 | if self.skip > 0:
43 | # c: batch_size*steps*filters, steps=P-Ck
44 | s = Lambda(lambda k: k[:, int(-self.pt*self.skip):, :])(c)
45 | s = Lambda(lambda k: K.reshape(k, (-1, self.pt, self.skip, self.hidC)))(s)
46 | s = Lambda(lambda k: K.permute_dimensions(k, (0,2,1,3)))(s)
47 | s = Lambda(lambda k: K.reshape(k, (-1, self.pt, self.hidC)))(s)
48 |
49 | s = GRU(self.hidS)(s)
50 | s = Lambda(lambda k: K.reshape(k, (-1, self.skip*self.hidS)))(s)
51 | s = Dropout(self.dropout)(s)
52 | r = concatenate([r,s])
53 |
54 | res = Dense(self.m)(r)
55 |
56 | # highway
57 | if self.hw > 0:
58 | z = Lambda(lambda k: k[:, -self.hw:, :])(x)
59 | z = Lambda(lambda k: K.permute_dimensions(k, (0,2,1)))(z)
60 | z = Lambda(lambda k: K.reshape(k, (-1, self.hw)))(z)
61 | z = Dense(1)(z)
62 | z = Lambda(lambda k: K.reshape(k, (-1, self.m)))(z)
63 | res = add([res, z])
64 |
65 | if self.output != 'no':
66 | res = Activation(self.output)(res)
67 |
68 | model = Model(inputs=x, outputs=res)
69 | model.compile(optimizer=Adam(lr=self.lr, clipnorm=self.clip), loss=self.loss)
70 | return model
71 |
72 | class LSTNet_multi_inputs(object):
73 | def __init__(self, args, dims):
74 | super(LSTNet_multi_inputs, self).__init__()
75 | self.P = args.window
76 | self.m = dims
77 | self.hidR = args.hidRNN
78 | self.hidC = args.hidCNN
79 | self.hidS = args.hidSkip
80 | self.Ck = args.CNN_kernel
81 | self.skip = args.skip
82 | #self.pt = int((self.P-self.Ck)/self.skip)
83 | self.pt = args.ps
84 | self.hw = args.highway_window
85 | self.dropout = args.dropout
86 | self.output = args.output_fun
87 | self.lr = args.lr
88 | self.loss = args.loss
89 | self.clip = args.clip
90 |
91 | def make_model(self):
92 |
93 | # Input1: short-term time series
94 | input1 = Input(shape=(self.P, self.m))
95 | # CNN
96 | conv1 = Conv1D(self.hidC, self.Ck, strides=1, activation='relu') # for input1
97 | # It's a probelm that I can't find any way to use the same Conv1D layer to train the two inputs,
98 | # since input2's strides should be Ck, not 1 as input1
99 | conv2 = Conv1D(self.hidC, self.Ck, strides=self.Ck, activation='relu') # for input2
100 | conv2.set_weights(conv1.get_weights()) # at least use same weight
101 |
102 | c1 = conv1(input1)
103 | c1 = Dropout(self.dropout)(c1)
104 | # RNN
105 | r1 = GRU(self.hidR)(c1)
106 | #r1 = Lambda(lambda k: K.reshape(k, (-1, self.hidR)))(r1)
107 | r1 = Dropout(self.dropout)(r1)
108 |
109 | # Input2: long-term time series with period
110 | input2 = Input(shape=(self.pt*self.Ck, self.m))
111 | # CNN
112 | c2 = conv2(input2)
113 | c2 = Dropout(self.dropout)(c2)
114 | # RNN
115 | r2 = GRU(self.hidS)(c2)
116 | #r2 = Lambda(lambda k: K.reshape(k, (-1, self.hidR)))(r2)
117 | r2 = Dropout(self.dropout)(r2)
118 |
119 | r = concatenate([r1,r2])
120 | res = Dense(self.m)(r)
121 |
122 | # highway
123 | if self.hw > 0:
124 | z = Lambda(lambda k: k[:, -self.hw:, :])(input1)
125 | z = Lambda(lambda k: K.permute_dimensions(k, (0,2,1)))(z)
126 | z = Lambda(lambda k: K.reshape(k, (-1, self.hw)))(z)
127 | z = Dense(1)(z)
128 | z = Lambda(lambda k: K.reshape(k, (-1, self.m)))(z)
129 | res = add([res, z])
130 |
131 | if self.output != 'no':
132 | res = Activation(self.output)(res)
133 |
134 | model = Model(inputs=[input1, input2], outputs=res)
135 | model.compile(optimizer=Adam(lr=self.lr, clipnorm=self.clip), loss=self.loss)
136 | return model
137 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Keras version of LSTNet
2 |
3 | ### Environment
4 | * python 3.6.0
5 | * tensorflow 1.12.0
6 | * Keras 2.2.0
7 |
8 | ### Usage
9 | ```
10 | unzip data.zip
11 | mkdir save/ logs/
12 | ./er.sh
13 | ```
14 |
15 | ### Multi-input
16 | The original version is a little redundant since it should put the huge tensor into the model as the input.
17 | However, if the time interval is small, like 5 or 10 mins, the input may be too huge for memory and lacking of efficiency during training.
18 | Therefore, I wrote a version called **LSTNet_multi_inputs** which deconstructs the input as (1) short-term time series, like (t-3, t-2, t-1, t) and (2) long-term skip time series, like (t-2xskip, t-skip, t).
19 | The result is as good as the original one, but much faster.
20 |
21 |
22 |
--------------------------------------------------------------------------------
/data.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Lorne0/LSTNet_keras/e28fb6113e47f2fd5984ab2ec759ebfc607ffd5b/data.zip
--------------------------------------------------------------------------------
/ele.sh:
--------------------------------------------------------------------------------
1 |
2 | time python main.py --data ./data/ele.npz --save ./save/ele.pk --log ./logs/ele.log --exps 3 --patience 10 \
3 | --normalize 1 --loss mae --hidCNN 200 --hidRNN 200 --hidSkip 100 --output_fun linear \
4 | --multi 1 --horizon 3 --highway_window 12 --window 48 --skip 24 --ps 4
5 |
6 |
--------------------------------------------------------------------------------
/er.sh:
--------------------------------------------------------------------------------
1 |
2 | time python main.py --data ./data/er.npz --save ./save/er.pk --log ./logs/er.log --exps 5 --patience 15 \
3 | --normalize 1 --loss mae --hidCNN 100 --hidRNN 100 --hidSkip 50 --output_fun no \
4 | --multi 1 --horizon 3 --highway_window 7 --window 14 --skip 7 --ps 3
5 |
6 |
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import time
3 | import datetime
4 | from utils import *
5 | from LSTNet import LSTNet, LSTNet_multi_inputs
6 | import numpy as np
7 | from keras.models import model_from_yaml
8 | import pickle as pk
9 | import keras.backend as K
10 | import tensorflow as tf
11 |
12 | # limit gpu memory
13 | def get_session(gpu_fraction=0.1):
14 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction)
15 | return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options))
16 | K.set_session(get_session())
17 |
18 | def print_shape(data):
19 | for i in range(len(data.train)):
20 | print(data.train[i].shape, end=' ')
21 | print("")
22 | for i in range(len(data.valid)):
23 | print(data.valid[i].shape, end=' ')
24 | print("")
25 | for i in range(len(data.test)):
26 | print(data.test[i].shape, end=' ')
27 | print("")
28 |
29 | def evaluate(y, yp):
30 | # rrse
31 | rrse = np.sqrt(np.sum(np.square(y-yp)) / np.sum(np.square(np.mean(y)-y)))
32 | # corr
33 | #m, mp = np.mean(y, axis=0), np.mean(yp, axis=0)
34 | #corr = np.mean(np.sum((y-m)*(yp-mp), axis=0) / np.sqrt(np.sum(np.square(y-m), axis=0)*np.sum(np.square(yp-mp), axis=0)))
35 | m, mp, sig, sigp = y.mean(axis=0), yp.mean(axis=0), y.std(axis=0), yp.std(axis=0)
36 | corr = ((((y-m)*(yp-mp)).mean(axis=0) / (sig*sigp))[sig!=0]).mean()
37 | #corr = ((((y-m)*(yp-mp)).mean(axis=0) / (sig*sigp))).mean()
38 |
39 | return rrse, corr
40 |
41 | def main(args, exp):
42 | K.clear_session()
43 | flog = open(args.log, "a")
44 | s = "\nExp {}".format(exp)
45 | print(s)
46 | flog.write(s+"\n")
47 | now=str(datetime.datetime.now())
48 | print(now)
49 | flog.write(now+"\n")
50 | flog.flush()
51 |
52 | data = Data(args)
53 | print_shape(data)
54 | if args.multi==1:
55 | model = LSTNet_multi_inputs(args, data.m).make_model()
56 | else:
57 | model = LSTNet(args, data.m).make_model()
58 |
59 | ### Train ###
60 | test_result = [1e6, -1e6]
61 | best_valid = [1e6, -1e6]
62 | pat = 0
63 | bs = int(args.batch_size)
64 | l = len(data.train[0])
65 | order = np.arange(l)
66 | train_batch_num = int(l/bs)
67 | for e in range(1,args.epochs+1):
68 | tt = time.time()
69 | np.random.shuffle(order)
70 | if args.multi:
71 | x1, x2, y = data.train[0][order].copy(), data.train[1][order].copy(), data.train[2][order].copy()
72 | else:
73 | x, y = data.train[0][order].copy(), data.train[1][order].copy()
74 | for b in range(train_batch_num):
75 | print("\r%d/%d" %(b+1,train_batch_num), end='')
76 | if args.multi:
77 | b_x1, b_x2, b_y = x1[b*bs:(b+1)*bs], x2[b*bs:(b+1)*bs], y[b*bs:(b+1)*bs]
78 | model.train_on_batch([b_x1, b_x2], b_y)
79 | else:
80 | b_x, b_y = x[b*bs:(b+1)*bs], y[b*bs:(b+1)*bs]
81 | model.train_on_batch(b_x, b_y)
82 | rrse, corr = evaluate(data.valid[-1], model.predict(data.valid[:-1], batch_size=bs))
83 | et = time.time()-tt
84 | print("\r%d | Valid | rrse: %.4f | corr: %.4f | time: %.2fs" %(e, rrse, corr, et))
85 |
86 | if (corr-rrse) >= (best_valid[1]-best_valid[0]):
87 | best_valid = [rrse, corr]
88 | pat = 0
89 | # test
90 | rrse, corr = evaluate(data.test[-1], model.predict(data.test[:-1], batch_size=bs))
91 | s = "{} | Test | rrse: {:.4f} | corr: {:.4f} | approx epoch time: {:.2f}s".format(e, rrse, corr, et)
92 | print("\t"+s)
93 | flog.write(s+"\n")
94 | flog.flush()
95 | test_result = [rrse, corr]
96 | #can't use model.save(args.save) due to JSON Serializable error, so need to save like this:
97 | yaml = model.to_yaml()
98 | W = model.get_weights()
99 | with open(args.save, "wb") as fw:
100 | pk.dump(yaml, fw, protocol=pk.HIGHEST_PROTOCOL)
101 | pk.dump(W, fw, protocol=pk.HIGHEST_PROTOCOL)
102 | '''
103 | # Test loaded model
104 | with open(args.save, "rb") as fp:
105 | new_yaml = pk.load(fp)
106 | new_W = pk.load(fp)
107 | new_model = model_from_yaml(new_yaml)
108 | new_model.set_weights(new_W)
109 | rrse, corr, rmse = evaluate(data.test[1], new_model.predict(data.test[0]), data.col_max[0])
110 | print("\tLoaded Test | rrse: %.4f | corr: %.4f | rmse: %.4f" %(rrse, corr, rmse))
111 | '''
112 | else:
113 | pat += 1
114 | if pat==args.patience: # early stopping
115 | break
116 |
117 | s = "End of Exp {}".format(exp)
118 | print(s)
119 | flog.write(s+"\n")
120 | flog.flush()
121 | flog.close()
122 | return test_result
123 |
124 | if __name__ == '__main__':
125 | parser = argparse.ArgumentParser(description='Keras Time series forecasting')
126 | parser.add_argument('--data', type=str, required=True, help='location of the data file')
127 | parser.add_argument('--hidCNN', type=int, default=100, help='number of CNN hidden units')
128 | parser.add_argument('--hidRNN', type=int, default=100, help='number of RNN hidden units')
129 | parser.add_argument('--hidSkip', type=int, default=10)
130 | parser.add_argument('--window', type=int, default=24*7, help='window size')
131 | parser.add_argument('--horizon', type=int, default=3)
132 | parser.add_argument('--skip', type=int, default=24, help='period')
133 | parser.add_argument('--ps', type=int, default=3, help='number of skip (periods)')
134 | parser.add_argument('--CNN_kernel', type=int, default=6, help='the kernel size of the CNN layers')
135 | parser.add_argument('--highway_window', type=int, default=3, help='The window size of the highway component')
136 | parser.add_argument('--clip', type=float, default=10., help='gradient clipping')
137 | parser.add_argument('--epochs', type=int, default=1000, help='upper epoch limit')
138 | parser.add_argument('--batch_size', type=int, default=128, metavar='N', help='batch size')
139 | parser.add_argument('--dropout', type=float, default=0.2, help='dropout applied to layers (0 = no dropout)')
140 | parser.add_argument('--seed', type=int, default=54321, help='random seed')
141 | #parser.add_argument('--gpu', type=int, default=None)
142 | parser.add_argument('--multi', type=int, default=0, help='original(0) or multi-input(1) LSTNet')
143 | parser.add_argument('--log_interval', type=int, default=2000, metavar='N', help='report interval')
144 | parser.add_argument('--save', type=str, default='save/model.pt', help='path to save the final model')
145 | parser.add_argument('--log', type=str, default='logs/model.pt', help='path to save the testing logs')
146 | #parser.add_argument('--cuda', type=str, default=True)
147 | parser.add_argument('--optim', type=str, default='adam')
148 | parser.add_argument('--lr', type=float, default=0.0005)
149 | parser.add_argument('--loss', type=str, default='mae')
150 | parser.add_argument('--normalize', type=int, default=2)
151 | parser.add_argument('--output_fun', type=str, default='sigmoid')
152 | parser.add_argument('--exps', type=int, default=1, help='number of experiments')
153 | parser.add_argument('--patience', type=int, default=10, help='patience of early stopping')
154 | args = parser.parse_args()
155 |
156 | test = []
157 | for exp in range(1,args.exps+1):
158 | test.append(main(args, exp))
159 | test = np.array(test)
160 | avg = np.mean(test, axis=0)
161 | best = test[np.argmax(test[:,1]-test[:,0]), :]
162 | s = 'Average result | rrse: {:.4f} | corr: {:.4f}'.format(avg[0], avg[1])
163 | ss = 'Best result | rrse: {:.4f} | corr: {:.4f}'.format(best[0], best[1])
164 | with open(args.log, "a") as flog:
165 | flog.write(s+"\n")
166 | flog.write(ss+"\n")
167 |
168 |
--------------------------------------------------------------------------------
/solar.sh:
--------------------------------------------------------------------------------
1 |
2 | time python main.py --data ./data/solar.npz --save ./save/solar.pk --log ./logs/solar.log --exps 3 --patience 10 \
3 | --normalize 1 --loss mae --hidCNN 100 --hidRNN 100 --hidSkip 50 --output_fun linear \
4 | --multi 1 --horizon 3 --highway_window 12 --window 36 --skip 144 --ps 3
5 |
6 |
--------------------------------------------------------------------------------
/traffic.sh:
--------------------------------------------------------------------------------
1 |
2 | time python main.py --data ./data/traffic.npz --save ./save/traffic.pk --log ./logs/traffic.log --exps 3 --patience 10 \
3 | --normalize 1 --loss mae --hidCNN 100 --hidRNN 100 --hidSkip 50 --output_fun linear \
4 | --multi 1 --horizon 3 --highway_window 12 --window 48 --skip 24 --ps 3
5 |
6 |
--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import pickle as pk
4 |
5 | def raw_to_npz(fn):
6 | df = pd.read_csv("./raw/"+fn, header=None)
7 | A = df.values.astype(np.float32)
8 | fn = fn.split(".")[0]
9 | np.savez_compressed('./data/'+fn, a=A)
10 |
11 | class Data(object):
12 | def __init__(self, args, tn=0.6, vd=0.2):
13 | self.h, self.w, self.skip, self.ps, self.Ck = args.horizon, args.window, args.skip, args.ps, args.CNN_kernel
14 | self.raw = np.load(args.data)['a']
15 | self.n, self.m = self.raw.shape
16 | self.tn, self.vd = tn, vd
17 |
18 | if args.normalize==1:
19 | self.mx = np.max(np.abs(self.raw))
20 | self.raw /= self.mx
21 | elif args.normalize==2:
22 | self.col_max = np.max(np.abs(self.raw), axis=0)+1
23 | self.raw /= self.col_max
24 |
25 | if args.multi:
26 | self._split(self._slice_multi())
27 | else:
28 | self._split(self._slice())
29 |
30 | def _slice(self):
31 | s = self.w+self.h-1
32 | X = np.zeros((self.n-s, self.w, self.m))
33 | Y = np.zeros((self.n-s, self.m))
34 | for i in range(s, self.n):
35 | #X[i-s] = self.raw[i-s:i-s+self.w].copy()
36 | X[i-s] = self.raw[i-self.h+1-self.w:i-self.h+1].copy()
37 | Y[i-s] = self.raw[i].copy()
38 | return X, Y
39 |
40 | def _slice_multi(self):
41 | s = self.ps*self.skip+self.Ck-1 + self.h-1
42 | X1 = np.zeros((self.n-s, self.w, self.m))
43 | X2 = np.zeros((self.n-s, self.ps*self.Ck, self.m))
44 | Y = np.zeros((self.n-s, self.m))
45 | for i in range(s, self.n):
46 | t = i-self.h+1
47 | X1[i-s] = self.raw[t-self.w:t].copy()
48 | idx = []
49 | for k in range(self.ps):
50 | idx = list(range(t-self.Ck-k*self.skip, t-k*self.skip)) + idx
51 | idx = np.array(idx, dtype=int)
52 | X2[i-s] = self.raw[idx].copy()
53 | Y[i-s] = self.raw[i].copy()
54 | return X1, X2, Y
55 |
56 | def _split(self, *args):
57 | tn = int(self.n*self.tn)
58 | vd = int(self.n*(self.tn+self.vd))
59 | self.train, self.valid, self.test = [], [], []
60 | arg = args[0]
61 | for A in arg:
62 | self.train.append(A[:tn].copy())
63 | self.valid.append(A[tn:vd].copy())
64 | self.test.append(A[vd:].copy())
65 |
66 |
--------------------------------------------------------------------------------