├── .gitignore
├── LICENSE
├── README.md
├── data
    ├── dataset.png
    └── predictions.png
├── international-airline-passengers.csv
├── main.py
└── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 
91 | .idea/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Michael Egger
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Time series predictions with Keras
 2 | 
 3 | #### Requirements
 4 | * Theano
 5 | * Keras
 6 | * matplotlib
 7 | * pandas
 8 | * scikit-learn
 9 | * tqdm
10 | * numpy
11 | 
12 | 
13 | #### Usage
14 | ```
15 | git clone https://github.com/gcarq/keras-timeseries-prediction.git
16 | cd keras-timeseries-prediction/
17 | pip install -r requirements.txt
18 | python main.py
19 | ```
20 | 
21 | #### Dataset
22 | 
23 | The dataset is `international-airline-passengers.csv` which contains 144 data points ranging from Jan 1949 to Dec 1960.
24 | Each data point represents monthly passengers in thousands.
25 | 
26 | ![Dataset](data/dataset.png)
27 | 
28 | #### Model
29 | 
30 | 
31 | ```
32 | model = Sequential()
33 | model.add(LSTM(64,
34 |                activation='relu',
35 |                batch_input_shape=(batch_size, look_back, 1),
36 |                stateful=True,
37 |                return_sequences=False))
38 | model.add(Dense(1, activation='linear'))
39 | model.compile(loss='mean_squared_error', optimizer='adam')
40 | ```
41 | 
42 | #### Results
43 | 
44 | ![Predictions](data/predictions.png)


--------------------------------------------------------------------------------
/data/dataset.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcarq/keras-timeseries-prediction/5d58d174bc1386c5fd60b01de0b9d6c4998a7242/data/dataset.png


--------------------------------------------------------------------------------
/data/predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gcarq/keras-timeseries-prediction/5d58d174bc1386c5fd60b01de0b9d6c4998a7242/data/predictions.png


--------------------------------------------------------------------------------
/international-airline-passengers.csv:
--------------------------------------------------------------------------------
  1 | "month","passengers"
  2 | "1949-01",112
  3 | "1949-02",118
  4 | "1949-03",132
  5 | "1949-04",129
  6 | "1949-05",121
  7 | "1949-06",135
  8 | "1949-07",148
  9 | "1949-08",148
 10 | "1949-09",136
 11 | "1949-10",119
 12 | "1949-11",104
 13 | "1949-12",118
 14 | "1950-01",115
 15 | "1950-02",126
 16 | "1950-03",141
 17 | "1950-04",135
 18 | "1950-05",125
 19 | "1950-06",149
 20 | "1950-07",170
 21 | "1950-08",170
 22 | "1950-09",158
 23 | "1950-10",133
 24 | "1950-11",114
 25 | "1950-12",140
 26 | "1951-01",145
 27 | "1951-02",150
 28 | "1951-03",178
 29 | "1951-04",163
 30 | "1951-05",172
 31 | "1951-06",178
 32 | "1951-07",199
 33 | "1951-08",199
 34 | "1951-09",184
 35 | "1951-10",162
 36 | "1951-11",146
 37 | "1951-12",166
 38 | "1952-01",171
 39 | "1952-02",180
 40 | "1952-03",193
 41 | "1952-04",181
 42 | "1952-05",183
 43 | "1952-06",218
 44 | "1952-07",230
 45 | "1952-08",242
 46 | "1952-09",209
 47 | "1952-10",191
 48 | "1952-11",172
 49 | "1952-12",194
 50 | "1953-01",196
 51 | "1953-02",196
 52 | "1953-03",236
 53 | "1953-04",235
 54 | "1953-05",229
 55 | "1953-06",243
 56 | "1953-07",264
 57 | "1953-08",272
 58 | "1953-09",237
 59 | "1953-10",211
 60 | "1953-11",180
 61 | "1953-12",201
 62 | "1954-01",204
 63 | "1954-02",188
 64 | "1954-03",235
 65 | "1954-04",227
 66 | "1954-05",234
 67 | "1954-06",264
 68 | "1954-07",302
 69 | "1954-08",293
 70 | "1954-09",259
 71 | "1954-10",229
 72 | "1954-11",203
 73 | "1954-12",229
 74 | "1955-01",242
 75 | "1955-02",233
 76 | "1955-03",267
 77 | "1955-04",269
 78 | "1955-05",270
 79 | "1955-06",315
 80 | "1955-07",364
 81 | "1955-08",347
 82 | "1955-09",312
 83 | "1955-10",274
 84 | "1955-11",237
 85 | "1955-12",278
 86 | "1956-01",284
 87 | "1956-02",277
 88 | "1956-03",317
 89 | "1956-04",313
 90 | "1956-05",318
 91 | "1956-06",374
 92 | "1956-07",413
 93 | "1956-08",405
 94 | "1956-09",355
 95 | "1956-10",306
 96 | "1956-11",271
 97 | "1956-12",306
 98 | "1957-01",315
 99 | "1957-02",301
100 | "1957-03",356
101 | "1957-04",348
102 | "1957-05",355
103 | "1957-06",422
104 | "1957-07",465
105 | "1957-08",467
106 | "1957-09",404
107 | "1957-10",347
108 | "1957-11",305
109 | "1957-12",336
110 | "1958-01",340
111 | "1958-02",318
112 | "1958-03",362
113 | "1958-04",348
114 | "1958-05",363
115 | "1958-06",435
116 | "1958-07",491
117 | "1958-08",505
118 | "1958-09",404
119 | "1958-10",359
120 | "1958-11",310
121 | "1958-12",337
122 | "1959-01",360
123 | "1959-02",342
124 | "1959-03",406
125 | "1959-04",396
126 | "1959-05",420
127 | "1959-06",472
128 | "1959-07",548
129 | "1959-08",559
130 | "1959-09",463
131 | "1959-10",407
132 | "1959-11",362
133 | "1959-12",405
134 | "1960-01",417
135 | "1960-02",391
136 | "1960-03",419
137 | "1960-04",461
138 | "1960-05",472
139 | "1960-06",535
140 | "1960-07",622
141 | "1960-08",606
142 | "1960-09",508
143 | "1960-10",461
144 | "1960-11",390
145 | "1960-12",432


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import pandas
  3 | import matplotlib.pyplot as plt
  4 | 
  5 | from keras.layers import Dense, LSTM
  6 | from keras.models import Sequential
  7 | from sklearn.metrics import mean_squared_error
  8 | 
  9 | from sklearn.preprocessing import MinMaxScaler
 10 | 
 11 | from tqdm import trange
 12 | 
 13 | 
 14 | # fix random seed for reproducibility
 15 | numpy.random.seed(7)
 16 | 
 17 | 
 18 | def load_dataset(datasource: str) -> (numpy.ndarray, MinMaxScaler):
 19 |     """
 20 |     The function loads dataset from given file name and uses MinMaxScaler to transform data
 21 |     :param datasource: file name of data source
 22 |     :return: tuple of dataset and the used MinMaxScaler
 23 |     """
 24 |     # load the dataset
 25 |     dataframe = pandas.read_csv(datasource, usecols=[1])
 26 |     dataframe = dataframe.fillna(method='pad')
 27 |     dataset = dataframe.values
 28 |     dataset = dataset.astype('float32')
 29 | 
 30 |     plt.plot(dataset)
 31 |     plt.show()
 32 | 
 33 |     # normalize the dataset
 34 |     scaler = MinMaxScaler(feature_range=(0, 1))
 35 |     dataset = scaler.fit_transform(dataset)
 36 |     return dataset, scaler
 37 | 
 38 | 
 39 | def create_dataset(dataset: numpy.ndarray, look_back: int=1) -> (numpy.ndarray, numpy.ndarray):
 40 |     """
 41 |     The function takes two arguments: the `dataset`, which is a NumPy array that we want to convert into a dataset,
 42 |     and the `look_back`, which is the number of previous time steps to use as input variables
 43 |     to predict the next time period — in this case defaulted to 1.
 44 |     :param dataset: numpy dataset
 45 |     :param look_back: number of previous time steps as int
 46 |     :return: tuple of input and output dataset
 47 |     """
 48 |     data_x, data_y = [], []
 49 |     for i in range(len(dataset)-look_back-1):
 50 |         a = dataset[i:(i+look_back), 0]
 51 |         data_x.append(a)
 52 |         data_y.append(dataset[i + look_back, 0])
 53 |     return numpy.array(data_x), numpy.array(data_y)
 54 | 
 55 | 
 56 | def split_dataset(dataset: numpy.ndarray, train_size, look_back) -> (numpy.ndarray, numpy.ndarray):
 57 |     """
 58 |     Splits dataset into training and test datasets. The last `look_back` rows in train dataset
 59 |     will be used as `look_back` for the test dataset.
 60 |     :param dataset: source dataset
 61 |     :param train_size: specifies the train data size
 62 |     :param look_back: number of previous time steps as int
 63 |     :return: tuple of training data and test dataset
 64 |     """
 65 |     if not train_size > look_back:
 66 |         raise ValueError('train_size must be lager than look_back')
 67 |     train, test = dataset[0:train_size, :], dataset[train_size - look_back:len(dataset), :]
 68 |     print('train_dataset: {}, test_dataset: {}'.format(len(train), len(test)))
 69 |     return train, test
 70 | 
 71 | 
 72 | def build_model(look_back: int, batch_size: int=1) -> Sequential:
 73 |     """
 74 |     The function builds a keras Sequential model
 75 |     :param look_back: number of previous time steps as int
 76 |     :param batch_size: batch_size as int, defaults to 1
 77 |     :return: keras Sequential model
 78 |     """
 79 |     model = Sequential()
 80 |     model.add(LSTM(64,
 81 |                    activation='relu',
 82 |                    batch_input_shape=(batch_size, look_back, 1),
 83 |                    stateful=True,
 84 |                    return_sequences=False))
 85 |     model.add(Dense(1, activation='linear'))
 86 |     model.compile(loss='mean_squared_error', optimizer='adam')
 87 |     return model
 88 | 
 89 | 
 90 | def plot_data(dataset: numpy.ndarray,
 91 |               look_back: int,
 92 |               train_predict: numpy.ndarray,
 93 |               test_predict: numpy.ndarray,
 94 |               forecast_predict: numpy.ndarray):
 95 |     """
 96 |     Plots baseline and predictions.
 97 | 
 98 |     blue: baseline
 99 |     green: prediction with training data
100 |     red: prediction with test data
101 |     cyan: prediction based on predictions
102 | 
103 |     :param dataset: dataset used for predictions
104 |     :param look_back: number of previous time steps as int
105 |     :param train_predict: predicted values based on training data
106 |     :param test_predict: predicted values based on test data
107 |     :param forecast_predict: predicted values based on previous predictions
108 |     :return: None
109 |     """
110 |     plt.plot(dataset)
111 |     plt.plot([None for _ in range(look_back)] +
112 |              [x for x in train_predict])
113 |     plt.plot([None for _ in range(look_back)] +
114 |              [None for _ in train_predict] +
115 |              [x for x in test_predict])
116 |     plt.plot([None for _ in range(look_back)] +
117 |              [None for _ in train_predict] +
118 |              [None for _ in test_predict] +
119 |              [x for x in forecast_predict])
120 |     plt.show()
121 | 
122 | 
123 | def make_forecast(model: Sequential, look_back_buffer: numpy.ndarray, timesteps: int=1, batch_size: int=1):
124 |     forecast_predict = numpy.empty((0, 1), dtype=numpy.float32)
125 |     for _ in trange(timesteps, desc='predicting data\t', mininterval=1.0):
126 |         # make prediction with current lookback buffer
127 |         cur_predict = model.predict(look_back_buffer, batch_size)
128 |         # add prediction to result
129 |         forecast_predict = numpy.concatenate([forecast_predict, cur_predict], axis=0)
130 |         # add new axis to prediction to make it suitable as input
131 |         cur_predict = numpy.reshape(cur_predict, (cur_predict.shape[1], cur_predict.shape[0], 1))
132 |         # remove oldest prediction from buffer
133 |         look_back_buffer = numpy.delete(look_back_buffer, 0, axis=1)
134 |         # concat buffer with newest prediction
135 |         look_back_buffer = numpy.concatenate([look_back_buffer, cur_predict], axis=1)
136 |     return forecast_predict
137 | 
138 | 
139 | def main():
140 |     datasource = 'international-airline-passengers.csv'
141 |     dataset, scaler = load_dataset(datasource)
142 | 
143 |     # split into train and test sets
144 |     look_back = int(len(dataset) * 0.20)
145 |     train_size = int(len(dataset) * 0.70)
146 |     train, test = split_dataset(dataset, train_size, look_back)
147 | 
148 |     # reshape into X=t and Y=t+1
149 |     train_x, train_y = create_dataset(train, look_back)
150 |     test_x, test_y = create_dataset(test, look_back)
151 | 
152 |     # reshape input to be [samples, time steps, features]
153 |     train_x = numpy.reshape(train_x, (train_x.shape[0], train_x.shape[1], 1))
154 |     test_x = numpy.reshape(test_x, (test_x.shape[0], test_x.shape[1], 1))
155 | 
156 |     # create and fit Multilayer Perceptron model
157 |     batch_size = 1
158 |     model = build_model(look_back, batch_size=batch_size)
159 |     for _ in trange(100, desc='fitting model\t', mininterval=1.0):
160 |         model.fit(train_x, train_y, nb_epoch=1, batch_size=batch_size, verbose=0, shuffle=False)
161 |         model.reset_states()
162 | 
163 |     # generate predictions for training
164 |     train_predict = model.predict(train_x, batch_size)
165 |     test_predict = model.predict(test_x, batch_size)
166 | 
167 |     # generate forecast predictions
168 |     forecast_predict = make_forecast(model, test_x[-1::], timesteps=100, batch_size=batch_size)
169 | 
170 |     # invert dataset and predictions
171 |     dataset = scaler.inverse_transform(dataset)
172 |     train_predict = scaler.inverse_transform(train_predict)
173 |     train_y = scaler.inverse_transform([train_y])
174 |     test_predict = scaler.inverse_transform(test_predict)
175 |     test_y = scaler.inverse_transform([test_y])
176 |     forecast_predict = scaler.inverse_transform(forecast_predict)
177 | 
178 |     # calculate root mean squared error
179 |     train_score = numpy.sqrt(mean_squared_error(train_y[0], train_predict[:, 0]))
180 |     print('Train Score: %.2f RMSE' % train_score)
181 |     test_score = numpy.sqrt(mean_squared_error(test_y[0], test_predict[:, 0]))
182 |     print('Test Score: %.2f RMSE' % test_score)
183 | 
184 |     plot_data(dataset, look_back, train_predict, test_predict, forecast_predict)
185 | 
186 | if __name__ == '__main__':
187 |     main()
188 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | git+git://github.com/Theano/Theano@master#egg=theano
2 | git+git://github.com/fchollet/keras@master#egg=keras
3 | matplotlib==1.5.3
4 | pandas==0.19.2
5 | scikit-learn==0.18.1
6 | tqdm==4.11.0
7 | numpy==1.12.0
8 | 


--------------------------------------------------------------------------------