├── .gitattributes ├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── notebooks ├── baseline.ipynb ├── gaussian_process.ipynb └── test_models.ipynb ├── requirements.txt ├── setup.cfg ├── setup.py ├── tests └── test_dataset.py └── time_series ├── __init__.py ├── dataset ├── __init__.py ├── time_series.py └── utils.py ├── models ├── LSTM │ └── __init__.py ├── TCN │ └── __init__.py ├── __init__.py ├── deepar │ ├── README.md │ ├── __init__.py │ ├── imgs │ │ ├── gaussian.png │ │ └── prediction.png │ ├── layers.py │ └── loss.py ├── gaussian_process │ └── __init__.py ├── nbeats │ └── __init__.py └── transformer │ └── __init__.py ├── settings.py └── utils └── __init__.py /.gitattributes: -------------------------------------------------------------------------------- 1 | *.py linguist-detectable=true 2 | *.ipynb linguist-detectable=false 3 | 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | ipynb 3 | # Byte-compiled / optimized / DLL files 4 | __pycache__/ 5 | *.py[cod] 6 | *$py.class 7 | 8 | # C extensions 9 | *.so 10 | 11 | # Distribution / packaging 12 | .Python 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | .hypothesis/ 50 | .pytest_cache/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | db.sqlite3 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | env.bak/ 93 | venv.bak/ 94 | 95 | # Spyder project settings 96 | .spyderproject 97 | .spyproject 98 | 99 | # Rope project settings 100 | .ropeproject 101 | 102 | # mkdocs documentation 103 | /site 104 | 105 | # mypy 106 | .mypy_cache/ 107 | 108 | # mac 109 | .DS_Store 110 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.6" 4 | # command to install dependencies 5 | install: 6 | - pip install -r requirements.txt 7 | - python setup.py install 8 | # command to run tests 9 | script: 10 | - pytest 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Alberto Arrigoni 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Time-Series 2 | 3 | **time-series** is a Python module for machine learning for time-series built on top of tensorflow and is distributed under the MIT license. 4 | 5 | This repository was created as a companion repository for chapter 12, **Multivariate Forecasting**, of the book [Machine Learning for Time-Series with Python](https://amzn.to/3Eb62VH). 6 | 7 | Tensorflow implementations of Time-Series models including these: 8 | * Amazon DeepAR 9 | * Gaussian Processes 10 | * LSTM 11 | * TCN 12 | * Transformer, and 13 | * NBEATS 14 | 15 | The `time_series.dataset` package, part of this library, includes utility functions for loading datasets. 16 | 17 | Please see the example notebook for usage and training results. 18 | 19 | ## Installation 20 | 21 | ```python 22 | pip install git+https://github.com/benman1/time-series.git 23 | ``` 24 | 25 | ## Contribute 26 | 27 | Pull requests welcome! 28 | 29 | ## List of Contributors 30 | 31 | Contributions from various people have found their way into this repository. Thanks to everyone for their hard work! 32 | 33 | * [Alberto Arrigoni](https://github.com/arrigonialberto86) 34 | * [ketan-b](https://github.com/ketan-b) 35 | * [Philippe Remy](https://github.com/philipperemy) 36 | * [Theodoros Ntakouris](https://github.com/ntakouris) 37 | -------------------------------------------------------------------------------- /notebooks/baseline.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "660fcbda-4d8e-4d4e-b186-d901d50f860d", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "%load_ext autoreload\n", 11 | "%autoreload 2" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 4, 17 | "id": "6a83e95d-402c-4284-b1b6-256d1350fd90", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "from time_series.dataset.utils import get_energy_demand\n", 22 | "from time_series.dataset.time_series import TrainingDataSet\n", 23 | "from time_series.utils import evaluate_model\n", 24 | "\n", 25 | "train_df = get_energy_demand()\n", 26 | "tds = TrainingDataSet(train_df)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 16, 32 | "id": "4c6818d4-1394-4e53-b129-9990c2c8d894", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from statsmodels.tsa.api import ExponentialSmoothing\n", 37 | "from joblib import Parallel, delayed\n", 38 | "\n", 39 | "\n", 40 | "def univariate_forecast(column):\n", 41 | " fit1 = ExponentialSmoothing(\n", 42 | " train_df[column].values[:split_point],\n", 43 | " seasonal_periods=4,\n", 44 | " trend=\"add\",\n", 45 | " seasonal=\"add\",\n", 46 | " use_boxcox=False,\n", 47 | " initialization_method=\"estimated\",\n", 48 | " ).fit()\n", 49 | " return fit1.forecast(10)\n", 50 | "\n", 51 | "\n", 52 | "split_point = int(len(train_df) * tds.train_split)\n", 53 | "forecasts = Parallel(n_jobs=10)(\n", 54 | " delayed(univariate_forecast)(column) for column in train_df.columns\n", 55 | ")" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 30, 61 | "id": "a7d8248d-52e1-4877-8421-3708f539b710", 62 | "metadata": {}, 63 | "outputs": [ 64 | { 65 | "data": { 66 | "text/plain": [ 67 | "11.28" 68 | ] 69 | }, 70 | "execution_count": 30, 71 | "metadata": {}, 72 | "output_type": "execute_result" 73 | } 74 | ], 75 | "source": [ 76 | "import numpy as np\n", 77 | "from sklearn.metrics import mean_squared_error\n", 78 | "\n", 79 | "actual = train_df.values[split_point:][:10, :]\n", 80 | "round(mean_squared_error(actual, np.transpose(np.array(forecasts))), 2)" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "id": "40b7b17c-e9b8-4431-84bd-fd1127f4d724", 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [] 90 | } 91 | ], 92 | "metadata": { 93 | "kernelspec": { 94 | "display_name": "Python 3", 95 | "language": "python", 96 | "name": "python3" 97 | }, 98 | "language_info": { 99 | "codemirror_mode": { 100 | "name": "ipython", 101 | "version": 3 102 | }, 103 | "file_extension": ".py", 104 | "mimetype": "text/x-python", 105 | "name": "python", 106 | "nbconvert_exporter": "python", 107 | "pygments_lexer": "ipython3", 108 | "version": "3.8.8" 109 | } 110 | }, 111 | "nbformat": 4, 112 | "nbformat_minor": 5 113 | } 114 | -------------------------------------------------------------------------------- /notebooks/gaussian_process.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "fd7a4738-8476-45aa-951a-b79bed1ed9a6", 6 | "metadata": {}, 7 | "source": [ 8 | "\"Open" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "01b7dada", 15 | "metadata": { 16 | "collapsed": false, 17 | "jupyter": { 18 | "outputs_hidden": false 19 | }, 20 | "pycharm": { 21 | "name": "#%%\n" 22 | } 23 | }, 24 | "outputs": [], 25 | "source": [ 26 | "%load_ext autoreload\n", 27 | "%autoreload 2" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "id": "73a9d219-67c2-41f9-af0e-71f264a70d56", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# pip install git+https://github.com/benman1/time-series.git" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 2, 43 | "id": "892bb0e5", 44 | "metadata": { 45 | "collapsed": false, 46 | "jupyter": { 47 | "outputs_hidden": false 48 | }, 49 | "pycharm": { 50 | "name": "#%%\n" 51 | } 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "from time_series.dataset.utils import get_energy_demand\n", 56 | "from time_series.dataset.time_series import TrainingDataSet\n", 57 | "from time_series.models.gaussian_process import GaussianProcess\n", 58 | "\n", 59 | "train_df = get_energy_demand()\n", 60 | "tds2d = TrainingDataSet(train_df.head(500), train_split=0.1, two_dim=True)" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 8, 66 | "id": "7fc55cf8", 67 | "metadata": { 68 | "collapsed": false, 69 | "jupyter": { 70 | "outputs_hidden": false 71 | }, 72 | "pycharm": { 73 | "name": "#%%\n" 74 | } 75 | }, 76 | "outputs": [ 77 | { 78 | "data": { 79 | "text/html": [ 80 | "
\n", 81 | "\n", 94 | "\n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | "
zoneCTMASSMENEMASSBOSTNHRISEMASSTOTALVTWCMASS
ts
2003-03-01 00:00:00-0.318751-0.574674-0.966582-0.615339-1.003294-0.727829-0.624415-0.591646-0.543866-0.446601
2003-03-01 01:00:00-0.480297-0.731304-1.193852-0.786355-1.145521-0.879446-0.758124-0.756747-0.728727-0.599752
2003-03-01 02:00:00-0.567380-0.813741-1.241200-0.876271-1.209149-0.972373-0.822407-0.838081-0.807953-0.686537
2003-03-01 03:00:00-0.607767-0.838472-1.255404-0.886850-1.220378-0.991936-0.837835-0.866931-0.878376-0.740140
2003-03-01 04:00:00-0.596408-0.812991-1.127565-0.876271-1.171721-0.977264-0.791551-0.835648-0.843165-0.714615
\n", 191 | "
" 192 | ], 193 | "text/plain": [ 194 | "zone CT MASS ME NEMASSBOST NH \\\n", 195 | "ts \n", 196 | "2003-03-01 00:00:00 -0.318751 -0.574674 -0.966582 -0.615339 -1.003294 \n", 197 | "2003-03-01 01:00:00 -0.480297 -0.731304 -1.193852 -0.786355 -1.145521 \n", 198 | "2003-03-01 02:00:00 -0.567380 -0.813741 -1.241200 -0.876271 -1.209149 \n", 199 | "2003-03-01 03:00:00 -0.607767 -0.838472 -1.255404 -0.886850 -1.220378 \n", 200 | "2003-03-01 04:00:00 -0.596408 -0.812991 -1.127565 -0.876271 -1.171721 \n", 201 | "\n", 202 | "zone RI SEMASS TOTAL VT WCMASS \n", 203 | "ts \n", 204 | "2003-03-01 00:00:00 -0.727829 -0.624415 -0.591646 -0.543866 -0.446601 \n", 205 | "2003-03-01 01:00:00 -0.879446 -0.758124 -0.756747 -0.728727 -0.599752 \n", 206 | "2003-03-01 02:00:00 -0.972373 -0.822407 -0.838081 -0.807953 -0.686537 \n", 207 | "2003-03-01 03:00:00 -0.991936 -0.837835 -0.866931 -0.878376 -0.740140 \n", 208 | "2003-03-01 04:00:00 -0.977264 -0.791551 -0.835648 -0.843165 -0.714615 " 209 | ] 210 | }, 211 | "execution_count": 8, 212 | "metadata": {}, 213 | "output_type": "execute_result" 214 | } 215 | ], 216 | "source": [ 217 | "train_df.head()" 218 | ] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "execution_count": 9, 223 | "id": "39c9aff1", 224 | "metadata": { 225 | "collapsed": false, 226 | "jupyter": { 227 | "outputs_hidden": false 228 | }, 229 | "pycharm": { 230 | "name": "#%%\n" 231 | } 232 | }, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/plain": [ 237 | "Index(['CT', 'MASS', 'ME', 'NEMASSBOST', 'NH', 'RI', 'SEMASS', 'TOTAL', 'VT',\n", 238 | " 'WCMASS'],\n", 239 | " dtype='object', name='zone')" 240 | ] 241 | }, 242 | "execution_count": 9, 243 | "metadata": {}, 244 | "output_type": "execute_result" 245 | } 246 | ], 247 | "source": [ 248 | "train_df.columns" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 10, 254 | "id": "8c133d78", 255 | "metadata": { 256 | "collapsed": false, 257 | "jupyter": { 258 | "outputs_hidden": false 259 | }, 260 | "pycharm": { 261 | "name": "#%%\n" 262 | } 263 | }, 264 | "outputs": [ 265 | { 266 | "data": { 267 | "text/html": [ 268 | "
\n", 269 | "\n", 282 | "\n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | "
zoneCTMASSMENEMASSBOSTNHRISEMASSTOTALVTWCMASS
count1.241710e+051.241710e+051.241710e+051.241710e+051.241710e+051.241710e+051.241710e+051.241710e+051.241710e+051.241710e+05
mean1.111590e-14-5.550440e-145.267530e-141.964806e-14-1.417297e-14-5.904490e-152.278479e-14-4.836024e-154.680011e-144.847010e-15
std1.000004e+001.000004e+001.000004e+001.000004e+001.000004e+001.000004e+001.000004e+001.000004e+001.000004e+001.000004e+00
min-2.860578e+00-2.078022e+00-2.462776e+00-2.025786e+00-3.001960e+00-2.811343e+00-2.190355e+00-2.353885e+00-2.645718e+00-3.295209e+00
25%-7.251402e-01-7.402971e-01-8.150690e-01-7.387527e-01-7.712391e-01-7.327200e-01-7.269825e-01-7.449296e-01-7.815442e-01-7.549140e-01
50%-3.099671e-025.381168e-031.327082e-011.231042e-028.586684e-02-1.865201e-02-2.786611e-021.592663e-028.114064e-028.986296e-05
75%5.937324e-015.952086e-017.142682e-015.923568e-016.435471e-015.437988e-015.378266e-016.151574e-017.325557e-016.509817e-01
max4.705586e+004.776973e+003.389425e+004.821935e+004.225427e+005.023842e+005.104509e+004.537964e+003.223778e+004.415944e+00
\n", 405 | "
" 406 | ], 407 | "text/plain": [ 408 | "zone CT MASS ME NEMASSBOST NH \\\n", 409 | "count 1.241710e+05 1.241710e+05 1.241710e+05 1.241710e+05 1.241710e+05 \n", 410 | "mean 1.111590e-14 -5.550440e-14 5.267530e-14 1.964806e-14 -1.417297e-14 \n", 411 | "std 1.000004e+00 1.000004e+00 1.000004e+00 1.000004e+00 1.000004e+00 \n", 412 | "min -2.860578e+00 -2.078022e+00 -2.462776e+00 -2.025786e+00 -3.001960e+00 \n", 413 | "25% -7.251402e-01 -7.402971e-01 -8.150690e-01 -7.387527e-01 -7.712391e-01 \n", 414 | "50% -3.099671e-02 5.381168e-03 1.327082e-01 1.231042e-02 8.586684e-02 \n", 415 | "75% 5.937324e-01 5.952086e-01 7.142682e-01 5.923568e-01 6.435471e-01 \n", 416 | "max 4.705586e+00 4.776973e+00 3.389425e+00 4.821935e+00 4.225427e+00 \n", 417 | "\n", 418 | "zone RI SEMASS TOTAL VT WCMASS \n", 419 | "count 1.241710e+05 1.241710e+05 1.241710e+05 1.241710e+05 1.241710e+05 \n", 420 | "mean -5.904490e-15 2.278479e-14 -4.836024e-15 4.680011e-14 4.847010e-15 \n", 421 | "std 1.000004e+00 1.000004e+00 1.000004e+00 1.000004e+00 1.000004e+00 \n", 422 | "min -2.811343e+00 -2.190355e+00 -2.353885e+00 -2.645718e+00 -3.295209e+00 \n", 423 | "25% -7.327200e-01 -7.269825e-01 -7.449296e-01 -7.815442e-01 -7.549140e-01 \n", 424 | "50% -1.865201e-02 -2.786611e-02 1.592663e-02 8.114064e-02 8.986296e-05 \n", 425 | "75% 5.437988e-01 5.378266e-01 6.151574e-01 7.325557e-01 6.509817e-01 \n", 426 | "max 5.023842e+00 5.104509e+00 4.537964e+00 3.223778e+00 4.415944e+00 " 427 | ] 428 | }, 429 | "execution_count": 10, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "train_df.describe()" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "id": "28222b80", 442 | "metadata": { 443 | "collapsed": false, 444 | "jupyter": { 445 | "outputs_hidden": false 446 | }, 447 | "pycharm": { 448 | "name": "#%%\n" 449 | } 450 | }, 451 | "outputs": [], 452 | "source": [ 453 | "# please note this only works in tensorflow Eager mode!\n", 454 | "N_EPOCHS = 100\n", 455 | "gp = GaussianProcess(tds2d)\n", 456 | "gp.instantiate_and_fit(maxiter=N_EPOCHS)" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": 6, 462 | "id": "9965f68c", 463 | "metadata": { 464 | "collapsed": false, 465 | "jupyter": { 466 | "outputs_hidden": false 467 | }, 468 | "pycharm": { 469 | "name": "#%%\n" 470 | } 471 | }, 472 | "outputs": [ 473 | { 474 | "name": "stdout", 475 | "output_type": "stream", 476 | "text": [ 477 | "MSE: 0.4221\n", 478 | "----------\n", 479 | "CT: 1.68\n", 480 | "MASS: 1.12\n", 481 | "ME: 1.51\n", 482 | "NEMASSBOST: 0.86\n", 483 | "NH: 1.62\n", 484 | "RI: 1.02\n", 485 | "SEMASS: 1.0\n", 486 | "TOTAL: 1.38\n", 487 | "VT: 1.75\n", 488 | "WCMASS: 1.59\n" 489 | ] 490 | }, 491 | { 492 | "data": { 493 | "image/png": "\n", 494 | "text/plain": [ 495 | "
" 496 | ] 497 | }, 498 | "metadata": { 499 | "needs_background": "light" 500 | }, 501 | "output_type": "display_data" 502 | } 503 | ], 504 | "source": [ 505 | "from time_series.utils import evaluate_model\n", 506 | "\n", 507 | "y_predicted = gp.predict(tds2d.X_test)[0].numpy().reshape(-1, tds2d.dimensions, tds2d.n_steps)\n", 508 | "evaluate_model(tds=tds2d, y_predicted=y_predicted, columns=train_df.columns, first_n=10)" 509 | ] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "execution_count": null, 514 | "id": "230c78ac-57b4-400f-bd16-1cafa36a68f2", 515 | "metadata": {}, 516 | "outputs": [], 517 | "source": [] 518 | } 519 | ], 520 | "metadata": { 521 | "kernelspec": { 522 | "display_name": "Python 3", 523 | "language": "python", 524 | "name": "python3" 525 | }, 526 | "language_info": { 527 | "codemirror_mode": { 528 | "name": "ipython", 529 | "version": 3 530 | }, 531 | "file_extension": ".py", 532 | "mimetype": "text/x-python", 533 | "name": "python", 534 | "nbconvert_exporter": "python", 535 | "pygments_lexer": "ipython3", 536 | "version": "3.8.8" 537 | } 538 | }, 539 | "nbformat": 4, 540 | "nbformat_minor": 5 541 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.19.0 2 | pandas~=1.2.4 3 | tensorflow>2.5.0 4 | pytest 5 | requests~=2.25.1 6 | pyreadr~=0.4.2 7 | scikit-learn~=0.24.1 8 | statsmodels~=0.12.2 9 | fastcache~=1.1.0 10 | tensorflow-addons 11 | gpflow~=2.2.1 12 | scipy~=1.6.2 13 | joblib~=1.0.1 14 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = .git 3 | max-line-length = 99 4 | ignore = W503, W391, E203 5 | max-complexity = 10 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | 4 | with open("requirements.txt") as f: 5 | install_requires = [ 6 | req 7 | for req in 8 | f.read().strip().split("\n") 9 | if req 10 | ] 11 | 12 | print(install_requires) 13 | 14 | setup( 15 | name="time-series", 16 | version="0.2", 17 | description="Time-Series models with keras and Tensorflow", 18 | author="Ben Auffarth", 19 | url="https://github.com/benman1/time-series/tree/master", 20 | install_requires=install_requires, 21 | packages=find_packages(), 22 | ) 23 | -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import unittest 3 | from time_series.dataset.time_series import TimeSeries 4 | 5 | 6 | class TestRecurrentTs(unittest.TestCase): 7 | def setUp(self): 8 | 9 | self.data_to_pad = pd.DataFrame( 10 | { 11 | "feature_1": [i for i in range(6)], 12 | "feature_2": [i for i in range(6)], 13 | "target": [i for i in range(6)], 14 | } 15 | ) 16 | 17 | self.input_data = pd.DataFrame( 18 | { 19 | "feature_1": [i for i in range(100)], 20 | "feature_2": [i for i in range(100)], 21 | "target": [i for i in range(100)], 22 | "category": [str(int(i // 10 + 1)) for i in range(100)], 23 | } 24 | ) 25 | 26 | self.data_to_pad_with_categorical = pd.DataFrame( 27 | { 28 | "one_hot_yes": [1, 1, 1, 1, 1, 1], 29 | "feature_2": [i for i in range(6)], 30 | "one_hot_no": [0, 0, 0, 0, 0, 0], 31 | "target": [i for i in range(6)], 32 | } 33 | ) 34 | self.data_to_pad_with_multiple_categorical = pd.DataFrame( 35 | { 36 | "one_hot_yes": [1, 1, 1, 1, 1, 1], 37 | "feature_2": [i for i in range(6)], 38 | "one_hot_no": [0, 0, 0, 0, 0, 0], 39 | "other_no": [0, 0, 0, 0, 0, 0], 40 | "other_yes": [1, 1, 1, 1, 1, 1], 41 | "target": [i for i in range(6)], 42 | } 43 | ) 44 | 45 | def test_len_padding(self): 46 | rec_instance = TimeSeries(pandas_df=self.data_to_pad) 47 | results = rec_instance._pad_ts(pandas_df=self.data_to_pad, desired_len=10) 48 | self.assertEqual(results.shape[0], 10) 49 | 50 | def test_zero_len_padding(self): 51 | rec_instance = TimeSeries(pandas_df=self.data_to_pad) 52 | results = rec_instance._pad_ts( 53 | pandas_df=self.data_to_pad, desired_len=6 54 | ) # len is the same as the original time series 55 | self.assertEqual(results.shape[0], 6) 56 | 57 | def test_next_batch_production(self): 58 | rec_ts = TimeSeries(self.input_data) 59 | X_feature_space, y_target = rec_ts.next_batch(batch_size=4, n_steps=10) 60 | self.assertEqual(len(X_feature_space), 4) 61 | self.assertEqual(len(X_feature_space[0]), 10) 62 | self.assertEqual(len(X_feature_space[0][0]), 2) 63 | self.assertEqual(X_feature_space[3][0][0], y_target[3][0][0]) 64 | 65 | def test_padding_with_one_hot(self): 66 | rec_ts = TimeSeries( 67 | pandas_df=self.data_to_pad_with_categorical, one_hot_root_list=["one_hot"] 68 | ) 69 | results = rec_ts._pad_ts( 70 | pandas_df=self.data_to_pad_with_categorical, desired_len=10 71 | ) 72 | 73 | self.assertEqual(results.shape[0], 10) 74 | self.assertEqual(results.one_hot_yes.values[0], 1) 75 | self.assertEqual(results.one_hot_no.values[0], 0) 76 | 77 | def test_padding_with_one_hot_multiple(self): 78 | rec_ts = TimeSeries( 79 | pandas_df=self.data_to_pad_with_categorical, 80 | one_hot_root_list=["one_hot", "other"], 81 | ) 82 | 83 | results = rec_ts._pad_ts( 84 | pandas_df=self.data_to_pad_with_multiple_categorical, desired_len=10 85 | ) 86 | 87 | self.assertEqual(results.shape[0], 10) 88 | self.assertEqual(results.one_hot_yes.values[0], 1) 89 | self.assertEqual(results.one_hot_no.values[0], 0) 90 | self.assertEqual(results.other_yes.values[0], 1) 91 | self.assertEqual(results.other_no.values[0], 0) 92 | 93 | def test_next_batch_covariates(self): 94 | """ 95 | Feature space is supplied in input if target_only is False (no need to lag y dataset) 96 | """ 97 | rec_ts = TimeSeries(self.input_data) 98 | X_feature_space, y_target = rec_ts.next_batch(batch_size=1, n_steps=10) 99 | self.assertEqual(len(X_feature_space), 1) 100 | self.assertEqual(len(X_feature_space[0][0]), 2) 101 | 102 | def test_sample_ts(self): 103 | """ 104 | When the length of the pandas df is longer than required length the function should sample 105 | from the time series and return that sample 106 | """ 107 | rec_instance = TimeSeries(pandas_df=self.data_to_pad) 108 | results = rec_instance._sample_ts(pandas_df=self.data_to_pad, desired_len=3) 109 | self.assertEqual(results.shape[0], 3) 110 | 111 | 112 | if __name__ == "__main__": 113 | unittest.main() 114 | -------------------------------------------------------------------------------- /time_series/__init__.py: -------------------------------------------------------------------------------- 1 | """Init.""" 2 | -------------------------------------------------------------------------------- /time_series/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | 3 | 4 | class Dataset(ABC): 5 | def __init__(self): 6 | super().__init__() 7 | 8 | def next_batch(self, **kwargs): 9 | pass 10 | -------------------------------------------------------------------------------- /time_series/dataset/time_series.py: -------------------------------------------------------------------------------- 1 | """Time-series data classes.""" 2 | from dataclasses import dataclass 3 | import logging 4 | from typing import Union 5 | 6 | import numpy as np 7 | import pandas as pd 8 | 9 | import tensorflow as tf 10 | from statsmodels.tsa.tsatools import lagmat 11 | 12 | from time_series.dataset import Dataset 13 | 14 | 15 | ArrayLike = Union[np.ndarray, pd.Series, pd.DataFrame] 16 | LOGGER = logging.getLogger(__file__) 17 | 18 | 19 | class TimeSeries(Dataset): 20 | def __init__( 21 | self, pandas_df: pd.DataFrame, n_steps: int = 1, batch_size: int = 10, 22 | ): 23 | super().__init__() 24 | assert isinstance( 25 | pandas_df, (pd.Series, pd.DataFrame) 26 | ), "Must provide a Pandas df to instantiate this class" 27 | self.batch_size = batch_size 28 | self.n_steps = n_steps 29 | self.dimensions = len(pandas_df.columns) 30 | 31 | data = np.array(pandas_df, dtype=np.float32) 32 | self.ds = tf.keras.preprocessing.timeseries_dataset_from_array( 33 | data=data, 34 | targets=None, 35 | sequence_length=self.n_steps, 36 | sequence_stride=1, 37 | shuffle=True, 38 | batch_size=self.batch_size, 39 | ) 40 | 41 | def __next__(self): 42 | """Iterator.""" 43 | return self.ds.next() 44 | 45 | 46 | def sample_to_input(sample: pd.DataFrame, lag: int, two_dim: bool = False) -> ArrayLike: 47 | """Reshape a time-series to be suitable for the models. 48 | 49 | Arguments: 50 | sample (pd.DataFrame): time x value columns. 51 | lag (int): the number of previous steps to use as predictors. 52 | two_dim (bool): whether to reshape as 2D (default 3D) 53 | Output: 54 | points x time/lag x columns or (for 2D) time x (columns*lag) 55 | """ 56 | in_dim = sample.shape[1] 57 | # drop rows with unknown values both at beginning and end 58 | if two_dim: 59 | return lagmat(sample.values, maxlag=lag, trim="both") 60 | else: 61 | return np.concatenate( 62 | [ 63 | np.expand_dims( 64 | lagmat(sample.values[:, i], maxlag=lag, trim="both"), axis=2 65 | ) 66 | for i in range(in_dim) 67 | ], 68 | axis=2, 69 | ) 70 | 71 | 72 | @dataclass 73 | class TrainingDataSet: 74 | """Utility class that can be used for training and testing. 75 | 76 | Create lags and split between train and test. 77 | 78 | Attributes: 79 | lag, train_split, X_train, y_train, X_test, y_test. 80 | """ 81 | 82 | X_train: ArrayLike 83 | y_train: ArrayLike 84 | X_test: ArrayLike 85 | y_test: ArrayLike 86 | 87 | def __init__( 88 | self, 89 | df: pd.DataFrame, 90 | lag: int = 10, 91 | train_split: float = 0.8, 92 | two_dim: bool = False, 93 | ): 94 | self.lag = lag 95 | self.train_split = train_split 96 | self.two_dim = two_dim 97 | lagged = sample_to_input(df, lag, two_dim=two_dim) 98 | y = np.roll(lagged, shift=-lag, axis=0) 99 | split_point = int(len(df) * train_split) # points for training 100 | self.X_train, self.X_test = ( 101 | lagged[:split_point, ...], 102 | lagged[split_point:, ...], 103 | ) 104 | self.y_train, self.y_test = ( 105 | y[:split_point, ...], 106 | y[split_point:, ...], 107 | ) 108 | 109 | @property 110 | def n_steps(self): 111 | """How many steps (lags) to use as predictors.""" 112 | return self.X_train.shape[1] if not self.two_dim else 1 113 | 114 | @property 115 | def dimensions(self): 116 | """Number of dimensions.""" 117 | return self.X_train.shape[-1] 118 | 119 | @property 120 | def n_classes(self): 121 | """Number of classes. 122 | 123 | This is appropriate for classification tasks. 124 | """ 125 | return len(np.unique(self.y_train)) 126 | 127 | @property 128 | def input_shape(self): 129 | """The input shape for a model.""" 130 | return self.X_train.shape[1:] 131 | 132 | @property 133 | def output_shape(self): 134 | """The input shape for a model.""" 135 | return self.y_train.shape[1:] 136 | 137 | @property 138 | def exo_dim(self): 139 | """This class doesn't handle exogenous attributes.""" 140 | return 0 141 | 142 | @property 143 | def horizon(self): 144 | """How many steps to forecast to?""" 145 | return self.y_train.shape[1] if not self.two_dim else 1 146 | -------------------------------------------------------------------------------- /time_series/dataset/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for data loading.""" 2 | import requests 3 | import pyreadr 4 | import numpy as np 5 | import pandas as pd 6 | from sklearn.preprocessing import StandardScaler 7 | from fastcache import lru_cache 8 | 9 | 10 | @lru_cache(maxsize=1, typed=False) 11 | def get_energy_demand(scale: bool = True): 12 | resp = requests.get( 13 | "https://github.com/camroach87/gefcom2017data/raw/master/data/gefcom.rda", 14 | allow_redirects=True, 15 | ) 16 | open("gefcom.rda", "wb").write(resp.content) 17 | result = pyreadr.read_r("gefcom.rda") 18 | df = result["gefcom"].pivot(index="ts", columns="zone", values="demand") 19 | if not scale: 20 | return df 21 | return pd.DataFrame(data=StandardScaler().fit_transform(df), columns=df.columns, index=df.index) 22 | 23 | 24 | @lru_cache(maxsize=1, typed=False) 25 | def get_ford(train: bool = True): 26 | """Classification dataset.""" 27 | root_url = "https://raw.githubusercontent.com/hfawaz/cd-diagram/master/FordA/" 28 | filename = root_url + "FordA_TRAIN.tsv" 29 | if not train: 30 | filename = root_url + "FordA_TEST.tsv" 31 | data = pd.read_csv(filename, sep="\t") 32 | y = data.values[:, 0].astype(int) 33 | x = data.values[:, 1:] 34 | y[y == -1] = 0 35 | return np.expand_dims(x, -1), y 36 | -------------------------------------------------------------------------------- /time_series/models/LSTM/__init__.py: -------------------------------------------------------------------------------- 1 | """Recurrent neural network.""" 2 | from typing import Sequence 3 | 4 | import tensorflow as tf 5 | 6 | from time_series.dataset.time_series import TrainingDataSet 7 | from time_series.models.transformer import Transformer 8 | 9 | 10 | class LSTM(Transformer): 11 | """Forecasting with an LSTM.""" 12 | 13 | def __init__(self, data: TrainingDataSet, lstm_units: Sequence[int] = (100,)): 14 | super().__init__(data) 15 | self.lstm_units = lstm_units 16 | 17 | def recurrent_layers(self, inputs): 18 | x = inputs 19 | for i, dim in enumerate(self.lstm_units): 20 | x = tf.keras.layers.LSTM(units=dim, return_sequences=True)(x) 21 | x = tf.keras.layers.Dense(self.data.dimensions)(x) 22 | return x 23 | 24 | def build_model(self): 25 | """Build model.""" 26 | inputs = tf.keras.Input(shape=self.data.input_shape) 27 | lstm_output = self.recurrent_layers(inputs) 28 | self.model = tf.keras.Model(inputs, lstm_output) 29 | self.model.compile( 30 | loss="mse" if self.regression else "sparse_categorical_crossentropy", 31 | optimizer=tf.keras.optimizers.Adam(), 32 | metrics=self.metrics, 33 | ) 34 | print(self.model.summary()) 35 | -------------------------------------------------------------------------------- /time_series/models/TCN/__init__.py: -------------------------------------------------------------------------------- 1 | """Temporal Convolutional Neural Network. 2 | 3 | Based on the implementation by Philippe Remy: https://github.com/philipperemy/keras-tcn 4 | """ 5 | import inspect 6 | import logging 7 | from typing import List, Optional 8 | 9 | import numpy as np 10 | from tensorflow.keras import backend as K, Model, Sequential, Input, optimizers 11 | from tensorflow.keras import layers 12 | from tensorflow.keras.layers import Activation, SpatialDropout1D, Lambda 13 | from tensorflow.keras.layers import ( 14 | Layer, 15 | Conv1D, 16 | Dense, 17 | BatchNormalization, 18 | LayerNormalization, 19 | Reshape 20 | ) 21 | 22 | from time_series.dataset.time_series import TrainingDataSet 23 | from time_series.models.transformer import Transformer 24 | 25 | 26 | LOGGER = logging.getLogger(__file__) 27 | 28 | 29 | def is_power_of_two(num: int): 30 | return num != 0 and ((num & (num - 1)) == 0) 31 | 32 | 33 | def adjust_dilations(dilations: list): 34 | if all([is_power_of_two(i) for i in dilations]): 35 | return dilations 36 | else: 37 | new_dilations = [2 ** i for i in dilations] 38 | return new_dilations 39 | 40 | 41 | class ResidualBlock(Layer): 42 | def __init__( 43 | self, 44 | dilation_rate: int, 45 | nb_filters: int, 46 | kernel_size: int, 47 | padding: str, 48 | activation: str = "relu", 49 | dropout_rate: float = 0, 50 | kernel_initializer: str = "he_normal", 51 | use_batch_norm: bool = False, 52 | use_layer_norm: bool = False, 53 | use_weight_norm: bool = False, 54 | **kwargs 55 | ): 56 | """Defines the residual block for the WaveNet TCN 57 | Args: 58 | x: The previous layer in the model 59 | training: boolean indicating whether the layer should behave in training mode or in inference mode 60 | dilation_rate: The dilation power of 2 we are using for this residual block 61 | nb_filters: The number of convolutional filters to use in this block 62 | kernel_size: The size of the convolutional kernel 63 | padding: The padding used in the convolutional layers, 'same' or 'causal'. 64 | activation: The final activation used in o = Activation(x + F(x)) 65 | dropout_rate: Float between 0 and 1. Fraction of the input units to drop. 66 | kernel_initializer: Initializer for the kernel weights matrix (Conv1D). 67 | use_batch_norm: Whether to use batch normalization in the residual layers or not. 68 | use_layer_norm: Whether to use layer normalization in the residual layers or not. 69 | use_weight_norm: Whether to use weight normalization in the residual layers or not. 70 | kwargs: Any initializers for Layer class. 71 | """ 72 | 73 | self.dilation_rate = dilation_rate 74 | self.nb_filters = nb_filters 75 | self.kernel_size = kernel_size 76 | self.padding = padding 77 | self.activation = activation 78 | self.dropout_rate = dropout_rate 79 | self.use_batch_norm = use_batch_norm 80 | self.use_layer_norm = use_layer_norm 81 | self.use_weight_norm = use_weight_norm 82 | self.kernel_initializer = kernel_initializer 83 | self.layers = [] 84 | self.layers_outputs = [] 85 | self.shape_match_conv = None 86 | self.res_output_shape = None 87 | self.final_activation = None 88 | 89 | super(ResidualBlock, self).__init__(**kwargs) 90 | 91 | def _build_layer(self, layer): 92 | """Helper function for building layer 93 | Args: 94 | layer: Appends layer to internal layer list and builds it based on the current output 95 | shape of ResidualBlocK. Updates current output shape. 96 | """ 97 | self.layers.append(layer) 98 | self.layers[-1].build(self.res_output_shape) 99 | self.res_output_shape = self.layers[-1].compute_output_shape( 100 | self.res_output_shape 101 | ) 102 | 103 | def build(self, input_shape): 104 | 105 | with K.name_scope( 106 | self.name 107 | ): # name scope used to make sure weights get unique names 108 | self.layers = [] 109 | self.res_output_shape = input_shape 110 | 111 | for k in range(2): 112 | name = "conv1D_{}".format(k) 113 | with K.name_scope( 114 | name 115 | ): # name scope used to make sure weights get unique names 116 | conv = Conv1D( 117 | filters=self.nb_filters, 118 | kernel_size=self.kernel_size, 119 | dilation_rate=self.dilation_rate, 120 | padding=self.padding, 121 | name=name, 122 | kernel_initializer=self.kernel_initializer, 123 | ) 124 | if self.use_weight_norm: 125 | from tensorflow_addons.layers import WeightNormalization 126 | 127 | # wrap it. WeightNormalization API is different than BatchNormalization or LayerNormalization. 128 | with K.name_scope("norm_{}".format(k)): 129 | conv = WeightNormalization(conv) 130 | self._build_layer(conv) 131 | 132 | with K.name_scope("norm_{}".format(k)): 133 | if self.use_batch_norm: 134 | self._build_layer(BatchNormalization()) 135 | elif self.use_layer_norm: 136 | self._build_layer(LayerNormalization()) 137 | elif self.use_weight_norm: 138 | pass # done above. 139 | 140 | self._build_layer(Activation(self.activation)) 141 | self._build_layer(SpatialDropout1D(rate=self.dropout_rate)) 142 | 143 | if self.nb_filters != input_shape[-1]: 144 | # 1x1 conv to match the shapes (channel dimension). 145 | name = "matching_conv1D" 146 | with K.name_scope(name): 147 | # make and build this layer separately because it directly uses input_shape 148 | self.shape_match_conv = Conv1D( 149 | filters=self.nb_filters, 150 | kernel_size=1, 151 | padding="same", 152 | name=name, 153 | kernel_initializer=self.kernel_initializer, 154 | ) 155 | else: 156 | name = "matching_identity" 157 | self.shape_match_conv = Lambda(lambda x: x, name=name) 158 | 159 | with K.name_scope(name): 160 | self.shape_match_conv.build(input_shape) 161 | self.res_output_shape = self.shape_match_conv.compute_output_shape( 162 | input_shape 163 | ) 164 | 165 | self._build_layer(Activation(self.activation)) 166 | self.final_activation = Activation(self.activation) 167 | self.final_activation.build( 168 | self.res_output_shape 169 | ) # probably isn't necessary 170 | 171 | # this is done to force Keras to add the layers in the list to self._layers 172 | for layer in self.layers: 173 | self.__setattr__(layer.name, layer) 174 | self.__setattr__(self.shape_match_conv.name, self.shape_match_conv) 175 | self.__setattr__(self.final_activation.name, self.final_activation) 176 | 177 | super(ResidualBlock, self).build( 178 | input_shape 179 | ) # done to make sure self.built is set True 180 | 181 | def call(self, inputs, training=None): 182 | """ 183 | Returns: A tuple where the first element is the residual model tensor, and the second 184 | is the skip connection tensor. 185 | """ 186 | x = inputs 187 | self.layers_outputs = [x] 188 | for layer in self.layers: 189 | training_flag = "training" in dict(inspect.signature(layer.call).parameters) 190 | x = layer(x, training=training) if training_flag else layer(x) 191 | self.layers_outputs.append(x) 192 | x2 = self.shape_match_conv(inputs) 193 | self.layers_outputs.append(x2) 194 | res_x = layers.add([x2, x]) 195 | self.layers_outputs.append(res_x) 196 | 197 | res_act_x = self.final_activation(res_x) 198 | self.layers_outputs.append(res_act_x) 199 | return [res_act_x, x] 200 | 201 | def compute_output_shape(self, input_shape): 202 | return [self.res_output_shape, self.res_output_shape] 203 | 204 | 205 | class TCN(Layer): 206 | """Creates a TCN layer. 207 | 208 | Input shape: 209 | A tensor of shape (batch_size, timesteps, input_dim). 210 | 211 | Args: 212 | nb_filters: The number of filters to use in the convolutional layers. Can be a list. 213 | kernel_size: The size of the kernel to use in each convolutional layer. 214 | dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64]. 215 | nb_stacks : The number of stacks of residual blocks to use. 216 | padding: The padding to use in the convolutional layers, 'causal' or 'same'. 217 | use_skip_connections: Boolean. If we want to add skip connections from input to each residual blocK. 218 | return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. 219 | activation: The activation used in the residual blocks o = Activation(x + F(x)). 220 | dropout_rate: Float between 0 and 1. Fraction of the input units to drop. 221 | kernel_initializer: Initializer for the kernel weights matrix (Conv1D). 222 | use_batch_norm: Whether to use batch normalization in the residual layers or not. 223 | use_layer_norm: Whether to use layer normalization in the residual layers or not. 224 | use_weight_norm: Whether to use weight normalization in the residual layers or not. 225 | kwargs: Any other arguments for configuring parent class Layer. For example "name=str", Name of the model. 226 | Use unique names when using multiple TCN. 227 | 228 | Returns: 229 | A TCN layer. 230 | """ 231 | 232 | def __init__( 233 | self, 234 | nb_filters=64, 235 | kernel_size=3, 236 | nb_stacks=1, 237 | dilations=(1, 2, 4, 8, 16, 32), 238 | padding="causal", 239 | use_skip_connections=True, 240 | dropout_rate=0.0, 241 | return_sequences=False, 242 | activation="relu", 243 | kernel_initializer="he_normal", 244 | use_batch_norm=False, 245 | use_layer_norm=False, 246 | use_weight_norm=False, 247 | **kwargs 248 | ): 249 | 250 | self.return_sequences = return_sequences 251 | self.dropout_rate = dropout_rate 252 | self.use_skip_connections = use_skip_connections 253 | self.dilations = dilations 254 | self.nb_stacks = nb_stacks 255 | self.kernel_size = kernel_size 256 | self.nb_filters = nb_filters 257 | self.activation = activation 258 | self.padding = padding 259 | self.kernel_initializer = kernel_initializer 260 | self.use_batch_norm = use_batch_norm 261 | self.use_layer_norm = use_layer_norm 262 | self.use_weight_norm = use_weight_norm 263 | self.skip_connections = [] 264 | self.residual_blocks = [] 265 | self.layers_outputs = [] 266 | self.build_output_shape = None 267 | self.slicer_layer = None # in case return_sequence=False 268 | self.output_slice_index = None # in case return_sequence=False 269 | self.padding_same_and_time_dim_unknown = ( 270 | False # edge case if padding='same' and time_dim = None 271 | ) 272 | 273 | if self.use_batch_norm + self.use_layer_norm + self.use_weight_norm > 1: 274 | raise ValueError("Only one normalization can be specified at once.") 275 | 276 | if isinstance(self.nb_filters, list): 277 | assert len(self.nb_filters) == len(self.dilations) 278 | 279 | if padding != "causal" and padding != "same": 280 | raise ValueError( 281 | "Only 'causal' or 'same' padding are compatible for this layer." 282 | ) 283 | 284 | # initialize parent class 285 | super(TCN, self).__init__(**kwargs) 286 | 287 | @property 288 | def receptive_field(self): 289 | return 1 + 2 * (self.kernel_size - 1) * self.nb_stacks * sum(self.dilations) 290 | 291 | def build(self, input_shape): 292 | 293 | # member to hold current output shape of the layer for building purposes 294 | self.build_output_shape = input_shape 295 | 296 | # list to hold all the member ResidualBlocks 297 | self.residual_blocks = [] 298 | total_num_blocks = self.nb_stacks * len(self.dilations) 299 | if not self.use_skip_connections: 300 | total_num_blocks += 1 # cheap way to do a false case for below 301 | 302 | for s in range(self.nb_stacks): 303 | for i, d in enumerate(self.dilations): 304 | res_block_filters = ( 305 | self.nb_filters[i] 306 | if isinstance(self.nb_filters, list) 307 | else self.nb_filters 308 | ) 309 | self.residual_blocks.append( 310 | ResidualBlock( 311 | dilation_rate=d, 312 | nb_filters=res_block_filters, 313 | kernel_size=self.kernel_size, 314 | padding=self.padding, 315 | activation=self.activation, 316 | dropout_rate=self.dropout_rate, 317 | use_batch_norm=self.use_batch_norm, 318 | use_layer_norm=self.use_layer_norm, 319 | use_weight_norm=self.use_weight_norm, 320 | kernel_initializer=self.kernel_initializer, 321 | name="residual_block_{}".format(len(self.residual_blocks)), 322 | ) 323 | ) 324 | # build newest residual block 325 | self.residual_blocks[-1].build(self.build_output_shape) 326 | self.build_output_shape = self.residual_blocks[-1].res_output_shape 327 | 328 | # this is done to force keras to add the layers in the list to self._layers 329 | for layer in self.residual_blocks: 330 | self.__setattr__(layer.name, layer) 331 | 332 | self.output_slice_index = None 333 | if self.padding == "same": 334 | time = self.build_output_shape.as_list()[1] 335 | if ( 336 | time is not None 337 | ): # if time dimension is defined. e.g. shape = (bs, 500, input_dim). 338 | self.output_slice_index = int(self.build_output_shape.as_list()[1] / 2) 339 | else: 340 | # It will known at call time. c.f. self.call. 341 | self.padding_same_and_time_dim_unknown = True 342 | 343 | else: 344 | self.output_slice_index = -1 # causal case. 345 | self.slicer_layer = Lambda(lambda tt: tt[:, self.output_slice_index, :]) 346 | 347 | def compute_output_shape(self, input_shape): 348 | """ 349 | Overridden in case keras uses it somewhere... no idea. Just trying to avoid future errors. 350 | """ 351 | if not self.built: 352 | self.build(input_shape) 353 | if not self.return_sequences: 354 | batch_size = self.build_output_shape[0] 355 | batch_size = ( 356 | batch_size.value if hasattr(batch_size, "value") else batch_size 357 | ) 358 | nb_filters = self.build_output_shape[-1] 359 | return [batch_size, nb_filters] 360 | else: 361 | # Compatibility tensorflow 1.x 362 | return [ 363 | v.value if hasattr(v, "value") else v for v in self.build_output_shape 364 | ] 365 | 366 | def call(self, inputs, training=None): 367 | x = inputs 368 | self.layers_outputs = [x] 369 | self.skip_connections = [] 370 | for layer in self.residual_blocks: 371 | try: 372 | x, skip_out = layer(x, training=training) 373 | except TypeError: # compatibility with tensorflow 1.x 374 | x, skip_out = layer(K.cast(x, "float32"), training=training) 375 | self.skip_connections.append(skip_out) 376 | self.layers_outputs.append(x) 377 | 378 | if self.use_skip_connections: 379 | x = layers.add(self.skip_connections) 380 | self.layers_outputs.append(x) 381 | 382 | if not self.return_sequences: 383 | # case: time dimension is unknown. e.g. (bs, None, input_dim). 384 | if self.padding_same_and_time_dim_unknown: 385 | self.output_slice_index = K.shape(self.layers_outputs[-1])[1] // 2 386 | x = self.slicer_layer(x) 387 | self.layers_outputs.append(x) 388 | return x 389 | 390 | def get_config(self): 391 | """ 392 | Returns the config of a the layer. This is used for saving and loading from a model 393 | :return: python dictionary with specs to rebuild layer 394 | """ 395 | config = super(TCN, self).get_config() 396 | config["nb_filters"] = self.nb_filters 397 | config["kernel_size"] = self.kernel_size 398 | config["nb_stacks"] = self.nb_stacks 399 | config["dilations"] = self.dilations 400 | config["padding"] = self.padding 401 | config["use_skip_connections"] = self.use_skip_connections 402 | config["dropout_rate"] = self.dropout_rate 403 | config["return_sequences"] = self.return_sequences 404 | config["activation"] = self.activation 405 | config["use_batch_norm"] = self.use_batch_norm 406 | config["use_layer_norm"] = self.use_layer_norm 407 | config["use_weight_norm"] = self.use_weight_norm 408 | config["kernel_initializer"] = self.kernel_initializer 409 | return config 410 | 411 | 412 | def compiled_tcn( 413 | num_feat, # type: int 414 | num_classes, # type: int 415 | nb_filters, # type: int 416 | kernel_size, # type: int 417 | dilations, # type: List[int] 418 | nb_stacks, # type: int 419 | max_len, # type: int 420 | output_len=1, # type: int 421 | padding="causal", # type: str 422 | use_skip_connections=False, # type: bool 423 | return_sequences=True, 424 | regression=False, # type: bool 425 | dropout_rate=0.05, # type: float 426 | name="tcn", # type: str, 427 | kernel_initializer="he_normal", # type: str, 428 | activation="relu", # type:str, 429 | opt="adam", 430 | lr=0.002, 431 | use_batch_norm=False, 432 | use_layer_norm=False, 433 | use_weight_norm=False, 434 | ): 435 | # type: (...) -> Model 436 | """Creates a compiled TCN model for a given task (i.e. regression or classification). 437 | Classification uses a sparse categorical loss. Please input class ids and not one-hot encodings. 438 | 439 | Args: 440 | num_feat: The number of features of your input, i.e. the last dimension of: (batch_size, timesteps, input_dim). 441 | num_classes: The size of the final dense layer, how many classes we are predicting. 442 | nb_filters: The number of filters to use in the convolutional layers. 443 | kernel_size: The size of the kernel to use in each convolutional layer. 444 | dilations: The list of the dilations. Example is: [1, 2, 4, 8, 16, 32, 64]. 445 | nb_stacks : The number of stacks of residual blocks to use. 446 | max_len: The maximum sequence length, use None if the sequence length is dynamic. 447 | padding: The padding to use in the convolutional layers. 448 | use_skip_connections: Boolean. If we want to add skip connections from input to each residual blocK. 449 | return_sequences: Boolean. Whether to return the last output in the output sequence, or the full sequence. 450 | regression: Whether the output should be continuous or discrete. 451 | dropout_rate: Float between 0 and 1. Fraction of the input units to drop. 452 | activation: The activation used in the residual blocks o = Activation(x + F(x)). 453 | name: Name of the model. Useful when having multiple TCN. 454 | kernel_initializer: Initializer for the kernel weights matrix (Conv1D). 455 | opt: Optimizer name. 456 | lr: Learning rate. 457 | use_batch_norm: Whether to use batch normalization in the residual layers or not. 458 | use_layer_norm: Whether to use layer normalization in the residual layers or not. 459 | use_weight_norm: Whether to use weight normalization in the residual layers or not. 460 | Returns: 461 | A compiled keras TCN. 462 | """ 463 | 464 | dilations = adjust_dilations(dilations) 465 | 466 | input_layer = Input(shape=(max_len, num_feat)) 467 | 468 | x = TCN( 469 | nb_filters, 470 | kernel_size, 471 | nb_stacks, 472 | dilations, 473 | padding, 474 | use_skip_connections, 475 | dropout_rate, 476 | return_sequences, 477 | activation, 478 | kernel_initializer, 479 | use_batch_norm, 480 | use_layer_norm, 481 | use_weight_norm, 482 | name=name, 483 | )(input_layer) 484 | 485 | print("x.shape=", x.shape) 486 | 487 | def get_opt(): 488 | if opt == "adam": 489 | return optimizers.Adam(lr=lr, clipnorm=1.0) 490 | elif opt == "rmsprop": 491 | return optimizers.RMSprop(lr=lr, clipnorm=1.0) 492 | else: 493 | raise Exception("Only Adam and RMSProp are available here") 494 | 495 | if not regression: 496 | # classification 497 | x = Dense(num_classes)(x) 498 | x = Activation("softmax")(x) 499 | output_layer = x 500 | model = Model(input_layer, output_layer) 501 | 502 | # https://github.com/keras-team/keras/pull/11373 503 | # It's now in Keras@master but still not available with pip. 504 | # TODO remove later. 505 | def accuracy(y_true, y_pred): 506 | # reshape in case it's in shape (num_samples, 1) instead of (num_samples,) 507 | if K.ndim(y_true) == K.ndim(y_pred): 508 | y_true = K.squeeze(y_true, -1) 509 | # convert dense predictions to labels 510 | y_pred_labels = K.argmax(y_pred, axis=-1) 511 | y_pred_labels = K.cast(y_pred_labels, K.floatx()) 512 | return K.cast(K.equal(y_true, y_pred_labels), K.floatx()) 513 | 514 | model.compile( 515 | get_opt(), loss="sparse_categorical_crossentropy", metrics=[accuracy] 516 | ) 517 | else: 518 | # regression 519 | x = Dense(output_len)(x) 520 | x = Activation("linear")(x) 521 | output_layer = x 522 | model = Model(input_layer, output_layer) 523 | model.compile(get_opt(), loss="mean_squared_error") 524 | print("model.x = {}".format(input_layer.shape)) 525 | print("model.y = {}".format(output_layer.shape)) 526 | return model 527 | 528 | 529 | def tcn_full_summary(model: Model, expand_residual_blocks=True): 530 | import tensorflow as tf 531 | 532 | # 2.6.0-rc1, 2.5.0... 533 | versions = [int(v) for v in tf.__version__.split("-")[0].split(".")] 534 | if versions[0] <= 2 and versions[1] < 5: 535 | layers = model._layers.copy() # store existing layers 536 | model._layers.clear() # clear layers 537 | 538 | for i in range(len(layers)): 539 | if isinstance(layers[i], TCN): 540 | for layer in layers[i]._layers: 541 | if not isinstance(layer, ResidualBlock): 542 | if not hasattr(layer, "__iter__"): 543 | model._layers.append(layer) 544 | else: 545 | if expand_residual_blocks: 546 | for lyr in layer._layers: 547 | if not hasattr(lyr, "__iter__"): 548 | model._layers.append(lyr) 549 | else: 550 | model._layers.append(layer) 551 | else: 552 | model._layers.append(layers[i]) 553 | 554 | model.summary() # print summary 555 | 556 | # restore original layers 557 | model._layers.clear() 558 | [model._layers.append(lyr) for lyr in layers] 559 | else: 560 | print("WARNING: tcn_full_summary: Compatible with tensorflow 2.5.0 or below.") 561 | 562 | 563 | # if time_steps > tcn_layer.receptive_field, then we should not 564 | # be able to solve this task. 565 | batch_size, time_steps, input_dim = None, 20, 1 566 | 567 | 568 | def get_x_y(size=1000): 569 | import numpy as np 570 | 571 | pos_indices = np.random.choice(size, size=int(size // 2), replace=False) 572 | x_train = np.zeros(shape=(size, time_steps, 1)) 573 | y_train = np.zeros(shape=(size, 1)) 574 | x_train[ 575 | pos_indices, 0 576 | ] = 1.0 # we introduce the target in the first timestep of the sequence. 577 | y_train[ 578 | pos_indices, 0 579 | ] = 1.0 # the task is to see if the TCN can go back in time to find it. 580 | return x_train, y_train 581 | 582 | 583 | class TCNModel(Transformer): 584 | """Temporal Convolutional Neural Model.""" 585 | 586 | def __init__(self, data: TrainingDataSet): 587 | super().__init__(data) 588 | self.model: Optional[Model] = None 589 | 590 | def build_model(self): 591 | tcn_layer = TCN(input_shape=self.data.input_shape) 592 | self.model = Sequential( 593 | [tcn_layer, Dense(np.prod(self.data.output_shape)), Reshape(self.data.output_shape)] 594 | ) 595 | self.model.build(self.data.input_shape) 596 | self.model.compile( 597 | loss="mse" if self.regression else "sparse_categorical_crossentropy", 598 | optimizer="adam", 599 | metrics=self.metrics, 600 | ) 601 | LOGGER.info(self.model.summary()) 602 | -------------------------------------------------------------------------------- /time_series/models/__init__.py: -------------------------------------------------------------------------------- 1 | """Template for models.""" 2 | from abc import ABC 3 | import tensorflow as tf 4 | 5 | 6 | class NNModel(ABC): 7 | """Model class.""" 8 | 9 | metrics = ["mean_absolute_percentage_error", "mae", "mse"] 10 | callbacks = [ 11 | tf.keras.callbacks.EarlyStopping( 12 | monitor="loss", patience=5, restore_best_weights=True 13 | ) 14 | ] 15 | 16 | def __init__(self): 17 | super().__init__() 18 | 19 | def net_structure(self, **kwargs): 20 | pass 21 | 22 | def instantiate_and_fit(self, **kwargs): 23 | pass 24 | 25 | @staticmethod 26 | def load(filepath, custom_objects=None, compile=True): 27 | from tensorflow.keras.models import load_model 28 | 29 | return load_model(filepath, custom_objects, compile) 30 | -------------------------------------------------------------------------------- /time_series/models/deepar/README.md: -------------------------------------------------------------------------------- 1 | # Time-Series 2 | 3 | Tensorflow implementations of Time-Series models including Amazon DeepAR, Gaussian Processes, Transformer, and NBEATS. 4 | 5 | ## Example usage: 6 | Fit a univariate time series: 7 | 8 | ```python 9 | from tensorflow.python.framework.ops import disable_eager_execution 10 | 11 | disable_eager_execution() 12 | 13 | from time_series.dataset.time_series import MockTs 14 | from time_series.models.deepar import DeepAR 15 | 16 | ts = MockTs(dimensions=1) # you can change this for multivariate time-series! 17 | dp_model = DeepAR(ts, epochs=50) 18 | dp_model.instantiate_and_fit() 19 | ``` 20 | 21 | Plot results with uncertainty bands: 22 | ```python 23 | import tqdm 24 | import pandas as pd 25 | from matplotlib import pyplot as plt 26 | import numpy as np 27 | 28 | batch = ts.next_batch(1, ts.n_steps) 29 | 30 | ress = [] 31 | for i in tqdm.tqdm(range(300)): 32 | ress.append(np.expand_dims( 33 | dp_model.get_sample_prediction( 34 | batch[0] 35 | ), axis=0, 36 | )) 37 | 38 | res_np = np.concatenate(ress, axis=0) 39 | fig = plt.figure(figsize=(12, 10)) 40 | 41 | for dim in range(ts.dimensions): 42 | ax = fig.add_subplot(ts.dimensions, 1, dim+1) 43 | res_df = pd.DataFrame(res_np[:, :, 0]).T 44 | tot_res = res_df 45 | 46 | ax.plot(batch[1].reshape((ts.n_steps, ts.dimensions))[:, dim], linewidth=6) 47 | tot_res['mu'] = tot_res.apply(lambda x: np.mean(x), axis=1) 48 | tot_res['upper'] = tot_res.apply(lambda x: np.mean(x) + np.std(x), axis=1) 49 | tot_res['lower'] = tot_res.apply(lambda x: np.mean(x) - np.std(x), axis=1) 50 | tot_res['two_upper'] = tot_res.apply(lambda x: np.mean(x) + 2*np.std(x), axis=1) 51 | tot_res['two_lower'] = tot_res.apply(lambda x: np.mean(x) - 2*np.std(x), axis=1) 52 | 53 | ax.plot(tot_res.mu, 'bo') 54 | ax.plot(tot_res.mu, linewidth=2) 55 | ax.fill_between(x = tot_res.index, y1=tot_res.lower, y2=tot_res.upper, alpha=0.5) 56 | ax.fill_between(x = tot_res.index, y1=tot_res.two_lower, y2=tot_res.two_upper, alpha=0.5) 57 | fig.suptitle('Prediction uncertainty') 58 | 59 | ``` 60 | 61 | ![Image of gaussian](imgs/prediction.png) 62 | -------------------------------------------------------------------------------- /time_series/models/deepar/__init__.py: -------------------------------------------------------------------------------- 1 | """DeepAR model. 2 | 3 | Based on https://github.com/arrigonialberto86/deepar 4 | By Alberto Arrigoni. 5 | """ 6 | from functools import partial 7 | import logging 8 | from typing import Optional, Union 9 | 10 | import numpy as np 11 | from numpy.random import normal 12 | import pandas as pd 13 | 14 | from tensorflow.keras.layers import Dense, Input, LSTM 15 | from tensorflow.keras.models import Model 16 | 17 | from time_series.dataset.time_series import TrainingDataSet 18 | from time_series.models.deepar.loss import gaussian_likelihood 19 | from time_series.models import NNModel 20 | from time_series.models.deepar.layers import GaussianLayer 21 | 22 | 23 | LOGGER = logging.getLogger(__name__) 24 | 25 | 26 | class DeepAR(NNModel): 27 | """DeepAR model.""" 28 | 29 | def __init__( 30 | self, data: TrainingDataSet, loss=gaussian_likelihood, optimizer: str = "adam", 31 | ): 32 | """Init. 33 | 34 | Arguments: 35 | df (pd.DataFrame): a dataframe of shape time x value columns 36 | loss: a loss function. 37 | optimizer: which optimizer to use. 38 | """ 39 | self.data = data 40 | self.inputs, self.z_sample = None, None 41 | self.loss = loss 42 | self.optimizer = optimizer 43 | self.model: Optional[Model] = None 44 | self.nn_structure = partial( 45 | DeepAR.basic_structure, n_steps=data.n_steps, dimensions=data.dimensions 46 | ) 47 | self._output_layer_name = "main_output" 48 | self.gaussian_layer: Optional[Model] = None 49 | 50 | @staticmethod 51 | def basic_structure(n_steps=20, dimensions=1): 52 | """ 53 | This is the method that needs to be patched when changing NN structure 54 | :return: inputs_shape (tuple), inputs (Tensor), [loc, scale] (a list of theta parameters 55 | of the target likelihood). 56 | 57 | Please note that I've made up scaling rules of the hidden layer dimensions. 58 | """ 59 | input_shape = (n_steps, dimensions) 60 | inputs = Input(shape=input_shape) 61 | x = LSTM( 62 | 4, # int(4 * (1 + math.pow(math.log(dimensions), 4))), 63 | return_sequences=True, 64 | dropout=0.1, 65 | )(inputs) 66 | # int(4 * (1 + math.log(dimensions))), 67 | x = Dense(4, activation="relu")(x) 68 | loc, scale = GaussianLayer(dimensions, name="main_output")(x) 69 | return input_shape, inputs, [loc, scale] 70 | 71 | def fit( 72 | self, **fit_kwargs, 73 | ): 74 | """Fit models. 75 | 76 | This is called from instantiate and fit(). 77 | """ 78 | self.model.fit( 79 | self.data.X_train, self.data.y_train, callbacks=self.callbacks, **fit_kwargs 80 | ) 81 | 82 | def build_model(self): 83 | input_shape, inputs, theta = self.nn_structure() 84 | self.model = Model(inputs, theta[0]) 85 | LOGGER.info(self.model.summary()) 86 | self.gaussian_layer = Model( 87 | self.model.input, self.model.get_layer(self._output_layer_name).output, 88 | ) 89 | self.model.compile( 90 | loss=self.loss(theta[1]), optimizer=self.optimizer, metrics=self.metrics 91 | ) 92 | self.gaussian_layer.compile(loss="mse", optimizer="adam") 93 | 94 | def instantiate_and_fit(self, do_fit: bool = True, **fit_kwargs): 95 | """Compile and train models.""" 96 | self.build_model() 97 | if do_fit: 98 | self.fit(**fit_kwargs) 99 | 100 | def predict_theta_from_input(self, input_list): 101 | """Predict from GaussianLayer. 102 | 103 | This function takes an input of size equal to the n_steps specified in 'Input' when building the 104 | network. 105 | :param input_list: 106 | :return: [[]], a list of list. E.g. when using Gaussian layer this returns a list of two list, 107 | corresponding to [[mu_values], [sigma_values]] 108 | """ 109 | if not self.model.history: 110 | raise ValueError("Model must be trained first!") 111 | 112 | return self.gaussian_layer.predict(input_list) 113 | 114 | def get_sample_prediction(self, sample_df: pd.DataFrame): 115 | """WIP.""" 116 | self.ts_obj.test_df = sample_df 117 | sample = self.ts_obj.test 118 | output = self.predict_theta_from_input(sample) 119 | samples = [] 120 | for mu, sigma in zip(output[0].reshape(-1), output[1].reshape(-1)): 121 | sample = normal( 122 | loc=mu, scale=np.sqrt(sigma), size=1 123 | ) # self.ts_obj.dimensions) 124 | samples.append(sample) 125 | 126 | return np.array(samples).reshape( 127 | (self.ts_obj.label_width, self.ts_obj.dimensions) 128 | ) 129 | 130 | 131 | if __name__ == "__main__": 132 | """For debugging.""" 133 | from tensorflow.python.framework.ops import disable_eager_execution 134 | 135 | disable_eager_execution() 136 | from tensorflow.compat.v1.experimental import output_all_intermediates 137 | 138 | output_all_intermediates(True) 139 | 140 | from time_series.dataset.utils import get_energy_demand 141 | 142 | train_df = get_energy_demand() 143 | 144 | dp_model = DeepAR(train_df, epochs=10) 145 | dp_model.instantiate_and_fit(verbose=1, epochs=1) 146 | -------------------------------------------------------------------------------- /time_series/models/deepar/imgs/gaussian.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benman1/time-series/b075d9009cb9ec7fee86f10b16207b00a356a6ac/time_series/models/deepar/imgs/gaussian.png -------------------------------------------------------------------------------- /time_series/models/deepar/imgs/prediction.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/benman1/time-series/b075d9009cb9ec7fee86f10b16207b00a356a6ac/time_series/models/deepar/imgs/prediction.png -------------------------------------------------------------------------------- /time_series/models/deepar/layers.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import backend as K 2 | from tensorflow.keras.initializers import glorot_normal 3 | from tensorflow.keras.layers import Layer 4 | 5 | 6 | class GaussianLayer(Layer): 7 | def __init__(self, output_dim, **kwargs): 8 | """Init.""" 9 | self.output_dim = output_dim 10 | self.kernel_1, self.kernel_2, self.bias_1, self.bias_2 = [], [], [], [] 11 | super(GaussianLayer, self).__init__(**kwargs) 12 | 13 | def build(self, input_shape): 14 | """Build the weights and biases.""" 15 | n_weight_rows = input_shape[2] 16 | self.kernel_1 = self.add_weight( 17 | name="kernel_1", 18 | shape=(n_weight_rows, self.output_dim), 19 | initializer=glorot_normal(), 20 | trainable=True, 21 | ) 22 | self.kernel_2 = self.add_weight( 23 | name="kernel_2", 24 | shape=(n_weight_rows, self.output_dim), 25 | initializer=glorot_normal(), 26 | trainable=True, 27 | ) 28 | self.bias_1 = self.add_weight( 29 | name="bias_1", 30 | shape=(self.output_dim,), 31 | initializer=glorot_normal(), 32 | trainable=True, 33 | ) 34 | self.bias_2 = self.add_weight( 35 | name="bias_2", 36 | shape=(self.output_dim,), 37 | initializer=glorot_normal(), 38 | trainable=True, 39 | ) 40 | super(GaussianLayer, self).build(input_shape) 41 | 42 | def call(self, x): 43 | """Do the layer computation.""" 44 | output_mu = K.dot(x, self.kernel_1) + self.bias_1 45 | output_sig = K.dot(x, self.kernel_2) + self.bias_2 46 | output_sig_pos = K.log(1 + K.exp(output_sig)) + 1e-06 47 | return [output_mu, output_sig_pos] 48 | 49 | def compute_output_shape(self, input_shape): 50 | """Calculate the output dimensions. 51 | 52 | The assumption here is that the output ts is always one-dimensional; 53 | """ 54 | return [(input_shape[0], self.output_dim), (input_shape[0], self.output_dim)] 55 | -------------------------------------------------------------------------------- /time_series/models/deepar/loss.py: -------------------------------------------------------------------------------- 1 | import math 2 | import tensorflow as tf 3 | 4 | 5 | def gaussian_likelihood(sigma): 6 | """Likelihood as per the paper.""" 7 | 8 | def gaussian_loss(y_true, y_pred): 9 | """Updated from paper. 10 | 11 | See DeepAR: Probabilistic Forecasting with Autoregressive Recurrent Networks. 12 | """ 13 | return tf.reduce_mean( 14 | tf.math.log(tf.math.sqrt(2 * math.pi)) 15 | + tf.math.log(sigma) 16 | + tf.math.truediv( 17 | tf.math.square(y_true - y_pred), 2 * tf.math.square(sigma) 18 | ) 19 | ) 20 | 21 | return gaussian_loss 22 | -------------------------------------------------------------------------------- /time_series/models/gaussian_process/__init__.py: -------------------------------------------------------------------------------- 1 | """Gaussian Process models.""" 2 | import logging 3 | from typing import Optional 4 | 5 | import gpflow 6 | from gpflow.utilities import print_summary 7 | 8 | from time_series.dataset.time_series import TrainingDataSet 9 | from time_series.models import NNModel 10 | 11 | 12 | LOGGER = logging.getLogger(__name__) 13 | 14 | 15 | class GaussianProcess(NNModel): 16 | """Gaussian Process model based on GPFlow library. 17 | 18 | Data should come in this shape (we'll have to reshape our data to 2D): 19 | X_train: (instances x variables) -> y_train: (instances x values) 20 | """ 21 | 22 | def __init__( 23 | self, 24 | data: TrainingDataSet, 25 | kernel: gpflow.kernels.Kernel = gpflow.kernels.Matern52(), 26 | meanf: Optional[gpflow.mean_functions.MeanFunction] = None, 27 | ): 28 | self.data = data 29 | self.kernel = kernel 30 | print_summary(self.kernel) 31 | self.meanf = meanf 32 | self.model: Optional[gpflow.models.BayesianModel] = None 33 | self.opt = gpflow.optimizers.Scipy() 34 | 35 | def build_model(self): 36 | """Build model.""" 37 | self.model = gpflow.models.GPR( 38 | data=(self.data.X_train, self.data.y_train), 39 | kernel=self.kernel, 40 | mean_function=self.meanf, 41 | ) 42 | print_summary(self.model) 43 | 44 | def fit(self, **fit_kwargs): 45 | """Fit the model.""" 46 | _ = self.opt.minimize( 47 | self.model.training_loss, 48 | self.model.trainable_variables, 49 | options=fit_kwargs, 50 | ) 51 | print_summary(self.model) 52 | 53 | def instantiate_and_fit(self, **fit_kwargs): 54 | """Create model and fit.""" 55 | self.build_model() 56 | self.fit(**fit_kwargs) 57 | 58 | def predict(self, X_test): 59 | """Return predictions for new data.""" 60 | mean, var = self.model.predict_f(X_test) 61 | return mean, var 62 | -------------------------------------------------------------------------------- /time_series/models/nbeats/__init__.py: -------------------------------------------------------------------------------- 1 | """Keras implementation of N-BEATS. 2 | 3 | Based on Philippe Rémy's implementation at https://github.com/philipperemy/n-beats. 4 | Paper: NBEATS: Neural basis expansion analysis for interpretable time series forecasting 5 | """ 6 | import logging 7 | from typing import Dict, Optional 8 | 9 | import numpy as np 10 | from tensorflow.keras import backend as K 11 | from tensorflow.keras.layers import Concatenate 12 | from tensorflow.keras.layers import Input, Dense, Lambda, Subtract, Add, Reshape 13 | from tensorflow.keras.models import Model 14 | 15 | from time_series.dataset.time_series import TrainingDataSet 16 | from time_series.models import NNModel 17 | 18 | 19 | LOGGER = logging.getLogger(__name__) 20 | 21 | GENERIC_BLOCK = "generic" 22 | TREND_BLOCK = "trend" 23 | SEASONALITY_BLOCK = "seasonality" 24 | 25 | _BACKCAST = "backcast" 26 | _FORECAST = "forecast" 27 | 28 | 29 | def linear_space(backcast_length, forecast_length, is_forecast=True): 30 | ls = K.arange(-float(backcast_length), float(forecast_length), 1) / forecast_length 31 | return ( 32 | ls[backcast_length:] 33 | if is_forecast 34 | else K.abs(K.reverse(ls[:backcast_length], axes=0)) 35 | ) 36 | 37 | 38 | def seasonality_model(thetas, backcast_length, forecast_length, is_forecast): 39 | p = thetas.get_shape().as_list()[-1] 40 | p1, p2 = (p // 2, p // 2) if p % 2 == 0 else (p // 2, p // 2 + 1) 41 | t = linear_space(backcast_length, forecast_length, is_forecast=is_forecast) 42 | s1 = K.stack([K.cos(2 * np.pi * i * t) for i in range(p1)]) 43 | s2 = K.stack([K.sin(2 * np.pi * i * t) for i in range(p2)]) 44 | if p == 1: 45 | s = s2 46 | else: 47 | s = K.concatenate([s1, s2], axis=0) 48 | s = K.cast(s, np.float32) 49 | return K.dot(thetas, s) 50 | 51 | 52 | def trend_model(thetas, backcast_length, forecast_length, is_forecast): 53 | p = thetas.shape[-1] 54 | t = linear_space(backcast_length, forecast_length, is_forecast=is_forecast) 55 | t = K.transpose(K.stack([t ** i for i in range(p)])) 56 | t = K.cast(t, np.float32) 57 | return K.dot(thetas, K.transpose(t)) 58 | 59 | 60 | class NBeatsNet(NNModel): 61 | """NBeats model with exogenous variables. 62 | 63 | Data come in as (num_samples, time_steps, input_dim). 64 | 65 | We could be moving a window generator here: 66 | self.ts_obj = WindowGenerator(input_width=10, label_width=10, shift=8, train_df=df) 67 | """ 68 | 69 | cast_type: str = _FORECAST 70 | 71 | def __init__( 72 | self, 73 | data: TrainingDataSet, 74 | backcast_length=10, 75 | stack_types=(TREND_BLOCK, SEASONALITY_BLOCK), 76 | nb_blocks_per_stack=3, 77 | thetas_dim=(4, 8), 78 | share_weights_in_stack=False, 79 | hidden_layer_units=256, 80 | nb_harmonics=None, 81 | ): 82 | self.data = data 83 | self.stack_types = stack_types 84 | self.nb_blocks_per_stack = nb_blocks_per_stack 85 | self.thetas_dim = thetas_dim 86 | self.units = hidden_layer_units 87 | self.share_weights_in_stack = share_weights_in_stack 88 | self.backcast_length = backcast_length 89 | self.input_shape = (self.backcast_length, self.data.dimensions) 90 | self.exo_shape = (self.backcast_length, self.data.exo_dim) 91 | self.output_shape = (self.data.n_steps, self.data.dimensions) 92 | self.weights = {} 93 | self.nb_harmonics = nb_harmonics 94 | assert len(self.stack_types) == len(self.thetas_dim) 95 | self.models: Optional[Dict[str, Model]] = None 96 | 97 | def net_structure(self): 98 | """Build the network structure.""" 99 | x = Input(shape=self.input_shape, name="input_variable") 100 | x_ = {} 101 | for k in range(self.data.dimensions): 102 | x_[k] = Lambda(lambda z: z[..., k])(x) 103 | e_ = {} 104 | if self.has_exog(): 105 | e = Input(shape=self.exo_shape, name="exos_variables") 106 | for k in range(self.data.exo_dim): 107 | e_[k] = Lambda(lambda z: z[..., k])(e) 108 | else: 109 | e = None 110 | y_ = {} 111 | 112 | for stack_id in range(len(self.stack_types)): 113 | stack_type = self.stack_types[stack_id] 114 | nb_poly = self.thetas_dim[stack_id] 115 | for block_id in range(self.nb_blocks_per_stack): 116 | backcast, forecast = self.create_block( 117 | x_, e_, stack_id, block_id, stack_type, nb_poly 118 | ) 119 | for k in range(self.data.dimensions): 120 | x_[k] = Subtract()([x_[k], backcast[k]]) 121 | if stack_id == 0 and block_id == 0: 122 | y_[k] = forecast[k] 123 | else: 124 | y_[k] = Add()([y_[k], forecast[k]]) 125 | 126 | for k in range(self.data.dimensions): 127 | y_[k] = Reshape(target_shape=(self.data.n_steps, 1))(y_[k]) 128 | x_[k] = Reshape(target_shape=(self.backcast_length, 1))(x_[k]) 129 | if self.data.dimensions > 1: 130 | y_ = Concatenate()([y_[ll] for ll in range(self.data.dimensions)]) 131 | x_ = Concatenate()([x_[ll] for ll in range(self.data.dimensions)]) 132 | else: 133 | y_ = y_[0] 134 | x_ = x_[0] 135 | 136 | if self.has_exog(): 137 | n_beats_forecast = Model([x, e], y_, name=_FORECAST) 138 | n_beats_backcast = Model([x, e], x_, name=_BACKCAST) 139 | else: 140 | n_beats_forecast = Model(x, y_, name=_FORECAST) 141 | n_beats_backcast = Model(x, x_, name=_BACKCAST) 142 | return n_beats_forecast, n_beats_backcast 143 | 144 | def build_model(self): 145 | """Build the models.""" 146 | n_beats_forecast, n_beats_backcast = self.net_structure() 147 | self.models = { 148 | model.name: model for model in [n_beats_backcast, n_beats_forecast] 149 | } 150 | self.models[_FORECAST].compile(loss="mae", optimizer="adam") 151 | LOGGER.info(self.models[_FORECAST].summary()) 152 | 153 | def has_exog(self): 154 | # exo/exog is short for 'exogenous variable', i.e. any input 155 | # features other than the target time-series itself. 156 | return self.data.exo_dim > 0 157 | 158 | def _restore(self, layer_with_weights, stack_id): 159 | """Mechanism to restore weights when block share the same weights. 160 | 161 | This is only useful when share_weights_in_stack=True. 162 | """ 163 | if self.share_weights_in_stack: 164 | layer_name = layer_with_weights.name.split("/")[-1] 165 | try: 166 | reused_weights = self.weights[stack_id][layer_name] 167 | return reused_weights 168 | except KeyError: 169 | pass 170 | if stack_id not in self.weights: 171 | self.weights[stack_id] = {} 172 | self.weights[stack_id][layer_name] = layer_with_weights 173 | return layer_with_weights 174 | 175 | def create_block(self, x, e, stack_id, block_id, stack_type, nb_poly): 176 | """Register weights. 177 | 178 | This is useful when share_weights_in_stack=True. 179 | """ 180 | 181 | def reg(layer): 182 | return self._restore(layer, stack_id) 183 | 184 | # update name (useful when share_weights_in_stack=True) 185 | def n(layer_name): 186 | return "/".join([str(stack_id), str(block_id), stack_type, layer_name]) 187 | 188 | backcast_ = {} 189 | forecast_ = {} 190 | d1 = reg(Dense(self.units, activation="relu", name=n("d1"))) 191 | d2 = reg(Dense(self.units, activation="relu", name=n("d2"))) 192 | d3 = reg(Dense(self.units, activation="relu", name=n("d3"))) 193 | d4 = reg(Dense(self.units, activation="relu", name=n("d4"))) 194 | if stack_type == "generic": 195 | theta_b = reg( 196 | Dense(nb_poly, activation="linear", use_bias=False, name=n("theta_b")) 197 | ) 198 | theta_f = reg( 199 | Dense(nb_poly, activation="linear", use_bias=False, name=n("theta_f")) 200 | ) 201 | backcast = reg( 202 | Dense(self.backcast_length, activation="linear", name=n("backcast")) 203 | ) 204 | forecast = reg( 205 | Dense(self.data.n_steps, activation="linear", name=n("forecast")) 206 | ) 207 | elif stack_type == "trend": 208 | theta_f = theta_b = reg( 209 | Dense(nb_poly, activation="linear", use_bias=False, name=n("theta_f_b")) 210 | ) 211 | backcast = Lambda( 212 | trend_model, 213 | arguments={ 214 | "is_forecast": False, 215 | "backcast_length": self.backcast_length, 216 | "forecast_length": self.data.n_steps, 217 | }, 218 | ) 219 | forecast = Lambda( 220 | trend_model, 221 | arguments={ 222 | "is_forecast": True, 223 | "backcast_length": self.backcast_length, 224 | "forecast_length": self.data.n_steps, 225 | }, 226 | ) 227 | else: # 'seasonality' 228 | if self.nb_harmonics: 229 | theta_b = reg( 230 | Dense( 231 | self.nb_harmonics, 232 | activation="linear", 233 | use_bias=False, 234 | name=n("theta_b"), 235 | ) 236 | ) 237 | else: 238 | theta_b = reg( 239 | Dense( 240 | self.data.n_steps, 241 | activation="linear", 242 | use_bias=False, 243 | name=n("theta_b"), 244 | ) 245 | ) 246 | theta_f = reg( 247 | Dense( 248 | self.data.n_steps, 249 | activation="linear", 250 | use_bias=False, 251 | name=n("theta_f"), 252 | ) 253 | ) 254 | backcast = Lambda( 255 | seasonality_model, 256 | arguments={ 257 | "is_forecast": False, 258 | "backcast_length": self.backcast_length, 259 | "forecast_length": self.data.n_steps, 260 | }, 261 | ) 262 | forecast = Lambda( 263 | seasonality_model, 264 | arguments={ 265 | "is_forecast": True, 266 | "backcast_length": self.backcast_length, 267 | "forecast_length": self.data.n_steps, 268 | }, 269 | ) 270 | for k in range(self.data.dimensions): 271 | if self.has_exog(): 272 | d0 = Concatenate()([x[k]] + [e[ll] for ll in range(self.exo_dim)]) 273 | else: 274 | d0 = x[k] 275 | d1_ = d1(d0) 276 | d2_ = d2(d1_) 277 | d3_ = d3(d2_) 278 | d4_ = d4(d3_) 279 | theta_f_ = theta_f(d4_) 280 | theta_b_ = theta_b(d4_) 281 | backcast_[k] = backcast(theta_b_) 282 | forecast_[k] = forecast(theta_f_) 283 | 284 | return backcast_, forecast_ 285 | 286 | def fit(self, **fit_kwargs): 287 | """Fit model.""" 288 | self.models[_FORECAST].fit( 289 | self.data.X_train, self.data.y_train, callbacks=self.callbacks, **fit_kwargs 290 | ) 291 | 292 | def instantiate_and_fit(self, **fit_kwargs): 293 | self.build_model() 294 | LOGGER.info("Model built!") 295 | self.fit(**fit_kwargs) 296 | 297 | @property 298 | def model(self): 299 | """Get the forecast model.""" 300 | return self.models[_FORECAST] 301 | -------------------------------------------------------------------------------- /time_series/models/transformer/__init__.py: -------------------------------------------------------------------------------- 1 | """Time-series forecast using a Transformer model. 2 | Based on: Timeseries classification with a Transformer model 3 | By Theodoros Ntakouris, https://github.com/ntakouris 4 | """ 5 | import logging 6 | from typing import Optional, Sequence 7 | 8 | import tensorflow as tf 9 | from tensorflow.keras import Model 10 | 11 | from time_series.dataset.time_series import TrainingDataSet 12 | from time_series.models import NNModel 13 | from tensorflow.keras import layers 14 | 15 | 16 | LOGGER = logging.getLogger(__file__) 17 | 18 | 19 | class Transformer(NNModel): 20 | """Transformer model for time-series. 21 | 22 | The model includes residual connections, layer normalization, and dropout. 23 | Data come in as (batch size, sequence length, features). 24 | """ 25 | 26 | def __init__(self, data: TrainingDataSet, regression: bool = True): 27 | self.data = data 28 | self.model: Optional[Model] = None 29 | self.regression = regression 30 | 31 | def fit(self, **fit_kwargs): 32 | self.model.fit( 33 | self.data.X_train, self.data.y_train, callbacks=self.callbacks, **fit_kwargs 34 | ) 35 | 36 | def instantiate_and_fit(self, **fit_kwargs): 37 | """Create model and fit.""" 38 | self.build_model() 39 | self.fit(**fit_kwargs) 40 | 41 | @staticmethod 42 | def transformer_encoder( 43 | inputs, 44 | head_size: int, 45 | num_heads: int, 46 | ff_dim: int, 47 | dropout: float = 0.0, 48 | kernel_size: int = 1, 49 | ): 50 | """Encoder: Attention and Normalization and Feed-Forward.""" 51 | # 1. Attention and Normalization: 52 | x = layers.MultiHeadAttention( 53 | key_dim=head_size, num_heads=num_heads, dropout=dropout 54 | )(inputs, inputs) 55 | x = layers.Dropout(dropout)(x) 56 | x = layers.LayerNormalization(epsilon=1e-6)(x) 57 | res = x + inputs 58 | 59 | # 2. Feed Forward Part: 60 | x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(res) 61 | x = layers.Dropout(dropout)(x) 62 | x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=kernel_size)(x) 63 | x = layers.LayerNormalization(epsilon=1e-6)(x) 64 | return x + res 65 | 66 | def nn_structure( 67 | self, 68 | head_size: int, 69 | num_heads: int, 70 | ff_dim: int, 71 | num_transformer_blocks: int, 72 | mlp_units: Sequence[int], 73 | dropout: float = 0.0, 74 | mlp_dropout: float = 0.0, 75 | kernel_size: int = 1, 76 | ): 77 | inputs = tf.keras.Input(shape=self.data.input_shape) 78 | x = inputs 79 | for _ in range(num_transformer_blocks): 80 | x = Transformer.transformer_encoder( 81 | x, head_size, num_heads, ff_dim, dropout, kernel_size 82 | ) 83 | 84 | # conv_layer = tf.keras.layers.Conv1D(64, self.data.dimensions) 85 | # x = tf.keras.layers.TimeDistributed(conv_layer)(x) 86 | 87 | x = layers.GlobalAveragePooling1D(data_format="channels_first")(x) 88 | for dim in mlp_units: 89 | x = layers.Dense(dim, activation="relu")(x) 90 | x = layers.Dropout(mlp_dropout)(x) 91 | outputs_d = layers.Dense( 92 | self.data.dimensions * self.data.n_steps 93 | if self.regression 94 | else self.data.n_classes, 95 | activation="softmax", 96 | )(x) 97 | outputs = tf.reshape(outputs_d, (-1, self.data.horizon, self.data.dimensions)) 98 | return inputs, outputs 99 | 100 | def build_model(self): 101 | inputs, outputs = self.nn_structure( 102 | head_size=256, 103 | num_heads=2, 104 | ff_dim=self.data.n_steps, 105 | num_transformer_blocks=1, 106 | mlp_units=[256], 107 | mlp_dropout=0.4, 108 | dropout=0.25, 109 | kernel_size=self.data.n_steps, 110 | ) 111 | self.model = Model(inputs, outputs) 112 | self.model.compile( 113 | loss="mse" if self.regression else "sparse_categorical_crossentropy", 114 | optimizer="adam", 115 | metrics=self.metrics, 116 | ) 117 | LOGGER.info(self.model.summary()) 118 | -------------------------------------------------------------------------------- /time_series/settings.py: -------------------------------------------------------------------------------- 1 | import logging.config 2 | import os 3 | 4 | LOG_CONF = { 5 | "version": 1, 6 | "disable_existing_loggers": False, 7 | "formatters": { 8 | "simple": {"format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s"} 9 | }, 10 | "handlers": { 11 | "stream": { 12 | "class": "logging.StreamHandler", 13 | "level": "DEBUG", 14 | "formatter": "simple", 15 | "stream": "ext://sys.stdout", 16 | }, 17 | }, 18 | "loggers": { 19 | "time_series": { 20 | "handlers": ["stream"], 21 | "level": os.getenv("DF_LOG_LEVEL", "DEBUG"), 22 | } 23 | }, 24 | } 25 | 26 | logging.config.dictConfig(LOG_CONF) 27 | -------------------------------------------------------------------------------- /time_series/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functions.""" 2 | from typing import Sequence 3 | 4 | from matplotlib import pyplot as plt 5 | from sklearn.metrics import mean_squared_error 6 | from tensorflow.keras import backend as K 7 | import tensorflow as tf 8 | import numpy as np 9 | 10 | from time_series.dataset.time_series import TimeSeries 11 | 12 | 13 | def set_seed_and_reset_graph(seed=42): 14 | tf.reset_default_graph() 15 | tf.set_random_seed(seed) 16 | np.random.seed(seed) 17 | 18 | 19 | def clear_keras_session(): 20 | K.clear_session() 21 | 22 | 23 | def evaluate_model(tds: TimeSeries, y_predicted: np.ndarray, columns=Sequence[str], first_n: int = 0): 24 | """Evaluate the model based on the 1step-ahead prediction""" 25 | print(f"MSE: {mean_squared_error(y_predicted.reshape(-1,), tds.y_test.reshape(-1,)):.4f}") 26 | print("----------") 27 | dimensions = len(columns) 28 | plt.figure(figsize=(12, 18)) 29 | grid = plt.GridSpec(dimensions, 1 if first_n else 2, wspace=0.5, hspace=0.2) 30 | 31 | for i in range(dimensions): 32 | if len(tds.y_train.shape) == 2: 33 | pred, y_actual = ( 34 | y_predicted[:first_n, i], 35 | tds.y_test[:first_n, i] 36 | ) 37 | else: 38 | pred, y_actual = ( 39 | y_predicted[:first_n, 1, i], 40 | tds.y_test[:first_n, 1, i] 41 | ) 42 | 43 | ax = plt.subplot(grid[i, 0]) 44 | plt.plot(pred, 'r+--', label="predicted") 45 | plt.plot(y_actual, 'bo-.', label="actual") 46 | ax.set_title(list(columns)[i]) 47 | print(f"{columns[i]}: {round(mean_squared_error(y_actual, pred), 2)}") 48 | plt.legend() 49 | --------------------------------------------------------------------------------