├── .gitignore ├── Data Analysis with Pandas ├── 01.Dealing with datetime.ipynb ├── 02.Input_Output.ipynb ├── 03. Moving Up and Down.ipynb ├── 04.Window and Lags.ipynb └── 05. corr and autocorr.ipynb ├── Financial Time Series ├── 01.Financial Time Series Analysis.ipynb └── 02. sentiment_score.ipynb ├── ODSC Workshop.pdf ├── README.md ├── Statistical models ├── 01. Time Series components.ipynb ├── 02. Stationarity.ipynb ├── 03.ARMA Process Models.ipynb ├── 04. ARIMA Models.ipynb └── 05.GARCH Models.ipynb ├── Time Series Boosting └── Trees_and_Boosting_with_TS.ipynb ├── Time Series with Deep Learning ├── 01.Time Series Forecasting with MLP.ipynb ├── 02.Time Series Forecasting with LSTM.ipynb └── What went wrong with this LSTM.ipynb ├── data ├── 50words_TEST.csv ├── AirPassengers.csv ├── All-Transactions House Price Index.csv ├── All_India_Area_Weighted_Monthly_Rainfall.csv ├── Earthquakes.csv ├── daily_female_births.csv ├── daily_min_tempratures.csv ├── exercise3.csv ├── exercise_2.csv ├── exercise_4.csv ├── exercise_sample.csv ├── flotation-cell.csv ├── gdp_india.csv ├── gdp_uk.csv ├── pollution.csv ├── reliance_data_day.csv ├── sales.csv ├── sample.csv ├── sample_2.csv ├── stock_data.csv ├── test_data.csv ├── yesbank_data.csv └── yesbank_data_day.csv ├── images └── pandas_dtypes.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | -------------------------------------------------------------------------------- /Data Analysis with Pandas/02.Input_Output.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "02.Input_Output.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "LguJbQ55Gcwe", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "# All imports\n", 38 | "import pandas as pd\n", 39 | "import numpy as np" 40 | ], 41 | "execution_count": 0, 42 | "outputs": [] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": { 47 | "id": "dfT4xBKQGcwk", 48 | "colab_type": "text" 49 | }, 50 | "source": [ 51 | "**Data Structures in \"pandas\"**\n", 52 | "\n", 53 | "* Series is a one-dimensional labeled array capable of holding any data type\n", 54 | "\n", 55 | "* DataFrame is a 2-dimensional labeled data structure with columns of potentially different types.\n", 56 | "\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "collapsed": true, 63 | "id": "oU55fsdUGcwl", 64 | "colab_type": "text" 65 | }, 66 | "source": [ 67 | "# Input/Output" 68 | ] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "metadata": { 73 | "id": "4jzFyTSPGcwm", 74 | "colab_type": "code", 75 | "colab": {} 76 | }, 77 | "source": [ 78 | "# Pandas support a lot of formats as input like json, csv, text, html, etc. \n", 79 | "# Here we will be taking the input as csv [comma seperated values].\n", 80 | "# we can also use various parameters in read_csv like parse_dates [parses string date values]\n", 81 | "\n", 82 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n", 83 | "ts_data = pd.read_csv(colab_path+\"data/stock_data.csv\")" 84 | ], 85 | "execution_count": 0, 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "metadata": { 91 | "id": "zVVhbeLHGcwo", 92 | "colab_type": "code", 93 | "colab": {} 94 | }, 95 | "source": [ 96 | "# A look at the head of the dataframe\n", 97 | "ts_data.head()" 98 | ], 99 | "execution_count": 0, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "id": "nibPOAGZGcwr", 106 | "colab_type": "code", 107 | "colab": {} 108 | }, 109 | "source": [ 110 | "#lets check the datatypes of columns in dataframe\n", 111 | "\n", 112 | "ts_data.info()" 113 | ], 114 | "execution_count": 0, 115 | "outputs": [] 116 | }, 117 | { 118 | "cell_type": "markdown", 119 | "metadata": { 120 | "id": "DCXWym0XGcws", 121 | "colab_type": "text" 122 | }, 123 | "source": [ 124 | "### A Quick look at pandas data types\n", 125 | "\n", 126 | "![dtypes](https://github.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/blob/master/images/pandas_dtypes.png?raw=1)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "metadata": { 132 | "id": "hmWRCLqTGcwt", 133 | "colab_type": "code", 134 | "colab": {} 135 | }, 136 | "source": [ 137 | "ts_data.describe()" 138 | ], 139 | "execution_count": 0, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "id": "EgN1T3AuGcwx", 146 | "colab_type": "code", 147 | "colab": {} 148 | }, 149 | "source": [ 150 | "#converts string type to datetime format\n", 151 | "ts_data['Date']= pd.to_datetime(ts_data['date']) \n", 152 | "ts_data.info()" 153 | ], 154 | "execution_count": 0, 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "metadata": { 160 | "id": "SSEKayyIGcwz", 161 | "colab_type": "code", 162 | "colab": {} 163 | }, 164 | "source": [ 165 | "# read csv by parsing dates\n", 166 | "df = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=['date'], index_col=0)" 167 | ], 168 | "execution_count": 0, 169 | "outputs": [] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "metadata": { 174 | "id": "swgQJVyqGcw1", 175 | "colab_type": "code", 176 | "colab": {} 177 | }, 178 | "source": [ 179 | "df" 180 | ], 181 | "execution_count": 0, 182 | "outputs": [] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "metadata": { 187 | "id": "Jg7eTv-cGcw4", 188 | "colab_type": "code", 189 | "colab": {} 190 | }, 191 | "source": [ 192 | "# getting data using date index\n", 193 | "# [from 2008 october to 2009 january]\n", 194 | "df['2008 10':'2009 01']" 195 | ], 196 | "execution_count": 0, 197 | "outputs": [] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "metadata": { 202 | "id": "2mRSUKISGcw9", 203 | "colab_type": "code", 204 | "colab": {} 205 | }, 206 | "source": [ 207 | "# Truncates a sorted DataFrame/Series before and/or after some\n", 208 | "# particular index value. If the axis contains only datetime values,\n", 209 | "# before/after parameters are converted to datetime values.\n", 210 | "df.truncate?" 211 | ], 212 | "execution_count": 0, 213 | "outputs": [] 214 | }, 215 | { 216 | "cell_type": "code", 217 | "metadata": { 218 | "scrolled": true, 219 | "id": "MdzEbNp_Gcw_", 220 | "colab_type": "code", 221 | "colab": {} 222 | }, 223 | "source": [ 224 | "df.truncate(before='2008 10', after='2009')" 225 | ], 226 | "execution_count": 0, 227 | "outputs": [] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "metadata": { 232 | "id": "gPvmIF11GcxB", 233 | "colab_type": "code", 234 | "colab": {} 235 | }, 236 | "source": [ 237 | "# parsing date from multiple columns\n", 238 | "pd.read_csv(colab_path+'data/sample_2.csv').head()" 239 | ], 240 | "execution_count": 0, 241 | "outputs": [] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "id": "KCr2ccc0GcxE", 247 | "colab_type": "code", 248 | "colab": {} 249 | }, 250 | "source": [ 251 | "# combining all date columns to get date\n", 252 | "pd.read_csv(colab_path+'data/sample_2.csv', parse_dates={'date':[0,1,2]}, index_col='date').head()" 253 | ], 254 | "execution_count": 0, 255 | "outputs": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "id": "MhF7kXxsGcxG", 261 | "colab_type": "text" 262 | }, 263 | "source": [ 264 | "## dateparser" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "metadata": { 270 | "id": "TLfgdeYlGcxH", 271 | "colab_type": "code", 272 | "colab": {} 273 | }, 274 | "source": [ 275 | "pd.read_csv(colab_path+'data/sample.csv')" 276 | ], 277 | "execution_count": 0, 278 | "outputs": [] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "metadata": { 283 | "id": "YrvYoxNrGcxJ", 284 | "colab_type": "code", 285 | "colab": {} 286 | }, 287 | "source": [ 288 | "pd.read_csv(colab_path+'data/sample.csv', parse_dates= ['x']).info()" 289 | ], 290 | "execution_count": 0, 291 | "outputs": [] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "metadata": { 296 | "id": "fiojLQJJGcxL", 297 | "colab_type": "code", 298 | "colab": {} 299 | }, 300 | "source": [ 301 | "from datetime import datetime" 302 | ], 303 | "execution_count": 0, 304 | "outputs": [] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "metadata": { 309 | "id": "3LeVhBPNGcxN", 310 | "colab_type": "code", 311 | "colab": {} 312 | }, 313 | "source": [ 314 | "datetime.strptime('2018-11-01T12-12-00', '%Y-%m-%dT%H-%M-%S')" 315 | ], 316 | "execution_count": 0, 317 | "outputs": [] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "metadata": { 322 | "id": "B4xFq0GjGcxP", 323 | "colab_type": "code", 324 | "colab": {} 325 | }, 326 | "source": [ 327 | "# Specifying the format to parse the datetime\n", 328 | "# Find out various format specifiers here (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior)\n", 329 | "def dateparse(x):\n", 330 | " return datetime.strptime(x, '%Y-%m-%dT%H-%M-%S')" 331 | ], 332 | "execution_count": 0, 333 | "outputs": [] 334 | }, 335 | { 336 | "cell_type": "code", 337 | "metadata": { 338 | "id": "HWDve0otGcxR", 339 | "colab_type": "code", 340 | "colab": {} 341 | }, 342 | "source": [ 343 | "#pd.Timestamp('2018-11-01T0-10-1')" 344 | ], 345 | "execution_count": 0, 346 | "outputs": [] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "metadata": { 351 | "id": "9T64KRa1GcxU", 352 | "colab_type": "code", 353 | "colab": {} 354 | }, 355 | "source": [ 356 | "dateparse('2018-11-01T0-10-1')" 357 | ], 358 | "execution_count": 0, 359 | "outputs": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "metadata": { 364 | "id": "DZFv_QjLGcxX", 365 | "colab_type": "code", 366 | "colab": {} 367 | }, 368 | "source": [ 369 | "pd.read_csv(colab_path+'/data/sample.csv', parse_dates=['x'], date_parser=dateparse)" 370 | ], 371 | "execution_count": 0, 372 | "outputs": [] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "metadata": { 377 | "id": "it3UaMhqGcxa", 378 | "colab_type": "text" 379 | }, 380 | "source": [ 381 | "## Write data" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "metadata": { 387 | "id": "M7VPMaLCGcxa", 388 | "colab_type": "code", 389 | "colab": {} 390 | }, 391 | "source": [ 392 | "# Creating date index with start and end having frequency of second\n", 393 | "date_index = pd.date_range(start='20181217', freq='S', end='20181221')" 394 | ], 395 | "execution_count": 0, 396 | "outputs": [] 397 | }, 398 | { 399 | "cell_type": "code", 400 | "metadata": { 401 | "id": "Dc6LrVgnGcxc", 402 | "colab_type": "code", 403 | "colab": {} 404 | }, 405 | "source": [ 406 | "len(date_index)" 407 | ], 408 | "execution_count": 0, 409 | "outputs": [] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "metadata": { 414 | "id": "aQw18bwYGcxe", 415 | "colab_type": "code", 416 | "colab": {} 417 | }, 418 | "source": [ 419 | "# Generated Index\n", 420 | "date_index" 421 | ], 422 | "execution_count": 0, 423 | "outputs": [] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "metadata": { 428 | "id": "AoCFA6ExGcxi", 429 | "colab_type": "code", 430 | "colab": {} 431 | }, 432 | "source": [ 433 | "# Creating the dataframe with the above generated index\n", 434 | "df = pd.DataFrame(data=np.random.randint(0,100, len(date_index)), index=date_index)" 435 | ], 436 | "execution_count": 0, 437 | "outputs": [] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "metadata": { 442 | "id": "ZyFUG2u8Gcxj", 443 | "colab_type": "code", 444 | "colab": {} 445 | }, 446 | "source": [ 447 | "#changing name of the coloumn \n", 448 | "df.columns = ['Value']" 449 | ], 450 | "execution_count": 0, 451 | "outputs": [] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "metadata": { 456 | "id": "SBhrabp3Gcxl", 457 | "colab_type": "code", 458 | "colab": {} 459 | }, 460 | "source": [ 461 | "#df.head()" 462 | ], 463 | "execution_count": 0, 464 | "outputs": [] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "metadata": { 469 | "id": "hVqzhAo_Gcxn", 470 | "colab_type": "code", 471 | "colab": {} 472 | }, 473 | "source": [ 474 | "# Exporting data to csv\n", 475 | "df.to_csv('test_data.csv')" 476 | ], 477 | "execution_count": 0, 478 | "outputs": [] 479 | }, 480 | { 481 | "cell_type": "code", 482 | "metadata": { 483 | "id": "maOX4A1RGcxo", 484 | "colab_type": "code", 485 | "colab": {} 486 | }, 487 | "source": [ 488 | "" 489 | ], 490 | "execution_count": 0, 491 | "outputs": [] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": { 496 | "id": "IKO6sQNyGcxq", 497 | "colab_type": "text" 498 | }, 499 | "source": [ 500 | "# Exercise" 501 | ] 502 | }, 503 | { 504 | "cell_type": "code", 505 | "metadata": { 506 | "id": "P2xZrO7fGcxr", 507 | "colab_type": "code", 508 | "colab": {} 509 | }, 510 | "source": [ 511 | "# Q1:\n", 512 | "# 1. read \"../data/exercise_sample.csv\"\n", 513 | "# 2. change first column datatype to datetime\n", 514 | "# 4. Make the first column index\n", 515 | "# 5. Reverse the index order\n", 516 | "# 6. export it to cleaned_sample.csv" 517 | ], 518 | "execution_count": 0, 519 | "outputs": [] 520 | }, 521 | { 522 | "cell_type": "code", 523 | "metadata": { 524 | "id": "Jbb6vflJGcxt", 525 | "colab_type": "code", 526 | "colab": {} 527 | }, 528 | "source": [ 529 | "" 530 | ], 531 | "execution_count": 0, 532 | "outputs": [] 533 | } 534 | ] 535 | } -------------------------------------------------------------------------------- /Data Analysis with Pandas/03. Moving Up and Down.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "03. Moving Up and Down.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "2lkctECUGda5", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import pandas as pd" 38 | ], 39 | "execution_count": 0, 40 | "outputs": [] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": { 45 | "id": "cCWBO4KTGda-", 46 | "colab_type": "text" 47 | }, 48 | "source": [ 49 | "# Moving Up and Down\n", 50 | "\n", 51 | "\n", 52 | "## asfreq\n", 53 | "\n", 54 | " Convert TimeSeries to specified frequency.Optionally provide filling method to pad/backfill missing values.\n", 55 | "\n", 56 | "Frequency strings can be found here:\n", 57 | "* https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases\n" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "cH5pugSnGda_", 64 | "colab_type": "code", 65 | "colab": {} 66 | }, 67 | "source": [ 68 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n", 69 | "df = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=[0], index_col=0)" 70 | ], 71 | "execution_count": 0, 72 | "outputs": [] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "metadata": { 77 | "id": "FC2-C0EFGdbB", 78 | "colab_type": "code", 79 | "colab": {} 80 | }, 81 | "source": [ 82 | "#df" 83 | ], 84 | "execution_count": 0, 85 | "outputs": [] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "metadata": { 90 | "id": "Of_K8p5PGdbD", 91 | "colab_type": "code", 92 | "colab": {} 93 | }, 94 | "source": [ 95 | "df.index" 96 | ], 97 | "execution_count": 0, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "metadata": { 103 | "id": "-PyQcksuGdbG", 104 | "colab_type": "code", 105 | "colab": {} 106 | }, 107 | "source": [ 108 | "#daily frequency\n", 109 | "df.asfreq('12H').index" 110 | ], 111 | "execution_count": 0, 112 | "outputs": [] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "metadata": { 117 | "id": "yvS6NMkcGdbJ", 118 | "colab_type": "code", 119 | "colab": {} 120 | }, 121 | "source": [ 122 | "df.asfreq?" 123 | ], 124 | "execution_count": 0, 125 | "outputs": [] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "metadata": { 130 | "id": "aSfeVALsGdbL", 131 | "colab_type": "code", 132 | "colab": {} 133 | }, 134 | "source": [ 135 | "df = df.asfreq('12H')" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "metadata": { 143 | "id": "AgACx7iqGdbO", 144 | "colab_type": "code", 145 | "colab": {} 146 | }, 147 | "source": [ 148 | "df" 149 | ], 150 | "execution_count": 0, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "metadata": { 156 | "id": "GmNPGuxNGdbR", 157 | "colab_type": "code", 158 | "colab": {} 159 | }, 160 | "source": [ 161 | "df.loc['2011-10-13 00:00']" 162 | ], 163 | "execution_count": 0, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "metadata": { 169 | "id": "YE4Y-AOYGdbU", 170 | "colab_type": "code", 171 | "colab": {} 172 | }, 173 | "source": [ 174 | "# Hours to Days --> Downsampling\n", 175 | "df.asfreq('D')" 176 | ], 177 | "execution_count": 0, 178 | "outputs": [] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "metadata": { 183 | "id": "8AoCzUlXGdbd", 184 | "colab_type": "code", 185 | "colab": {} 186 | }, 187 | "source": [ 188 | "# Days to hours --> Upsampling\n", 189 | "df.asfreq('8H')" 190 | ], 191 | "execution_count": 0, 192 | "outputs": [] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "metadata": { 197 | "id": "727TYZP8Gdbh", 198 | "colab_type": "code", 199 | "colab": {} 200 | }, 201 | "source": [ 202 | "# ffill propagate last valid observation forward to next valid\n", 203 | "# Note: this does not fill NaNs that already were present\n", 204 | "df.asfreq('4H', method='ffill')" 205 | ], 206 | "execution_count": 0, 207 | "outputs": [] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "metadata": { 212 | "id": "LPp1_GhsGdbj", 213 | "colab_type": "code", 214 | "colab": {} 215 | }, 216 | "source": [ 217 | "# bfill use NEXT valid observation to fill\n", 218 | "# Note: this does not fill NaNs that already were present):\n", 219 | "df.asfreq('2H',method='bfill')" 220 | ], 221 | "execution_count": 0, 222 | "outputs": [] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "metadata": { 227 | "id": "ayuofNoLGdbm", 228 | "colab_type": "code", 229 | "colab": {} 230 | }, 231 | "source": [ 232 | "# To fill with certain default value\n", 233 | "# Note: this does not fill NaNs that already were present):\n", 234 | "df.asfreq('H',fill_value=9.0)" 235 | ], 236 | "execution_count": 0, 237 | "outputs": [] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "id": "1pANcBTEGdbo", 243 | "colab_type": "text" 244 | }, 245 | "source": [ 246 | "## resample\n", 247 | "\n", 248 | "Convenience method for frequency conversion and resampling of time\n", 249 | "series." 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "metadata": { 255 | "id": "lKPkDOl5Gdbo", 256 | "colab_type": "code", 257 | "colab": {} 258 | }, 259 | "source": [ 260 | "df.resample?" 261 | ], 262 | "execution_count": 0, 263 | "outputs": [] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "metadata": { 268 | "id": "hH_s-GohGdbq", 269 | "colab_type": "code", 270 | "colab": {} 271 | }, 272 | "source": [ 273 | "df.resample('2H')" 274 | ], 275 | "execution_count": 0, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "id": "NfkOW9P2Gdbs", 282 | "colab_type": "code", 283 | "colab": {} 284 | }, 285 | "source": [ 286 | "df.resample('2H').sum()" 287 | ], 288 | "execution_count": 0, 289 | "outputs": [] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "metadata": { 294 | "id": "Krgy735lGdbv", 295 | "colab_type": "code", 296 | "colab": {} 297 | }, 298 | "source": [ 299 | "def get_range(x):\n", 300 | " return x.max() - x.min()" 301 | ], 302 | "execution_count": 0, 303 | "outputs": [] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "metadata": { 308 | "id": "fNCHOV65Gdbx", 309 | "colab_type": "code", 310 | "colab": {} 311 | }, 312 | "source": [ 313 | "df.resample('M').agg({'mean', 'max', get_range})" 314 | ], 315 | "execution_count": 0, 316 | "outputs": [] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": { 321 | "id": "md1T2hWQGdb1", 322 | "colab_type": "text" 323 | }, 324 | "source": [ 325 | "## fillna\n", 326 | "\n", 327 | "Fill NA/NaN values using the specified method" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "metadata": { 333 | "id": "g4Ybxew4Gdb3", 334 | "colab_type": "code", 335 | "colab": {} 336 | }, 337 | "source": [ 338 | "df1 = df.asfreq('6M')" 339 | ], 340 | "execution_count": 0, 341 | "outputs": [] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "metadata": { 346 | "id": "EqYnq0IvGdb6", 347 | "colab_type": "code", 348 | "colab": {} 349 | }, 350 | "source": [ 351 | "df1" 352 | ], 353 | "execution_count": 0, 354 | "outputs": [] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "metadata": { 359 | "id": "hsbbZd_SGdb9", 360 | "colab_type": "code", 361 | "colab": {} 362 | }, 363 | "source": [ 364 | "df1.fillna?" 365 | ], 366 | "execution_count": 0, 367 | "outputs": [] 368 | }, 369 | { 370 | "cell_type": "code", 371 | "metadata": { 372 | "id": "ulGkbCJRGdcA", 373 | "colab_type": "code", 374 | "colab": {} 375 | }, 376 | "source": [ 377 | "df1.fillna(method='ffill', inplace=True)" 378 | ], 379 | "execution_count": 0, 380 | "outputs": [] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "metadata": { 385 | "id": "aVh8UNQqGdcE", 386 | "colab_type": "code", 387 | "colab": {} 388 | }, 389 | "source": [ 390 | "df1" 391 | ], 392 | "execution_count": 0, 393 | "outputs": [] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": { 398 | "id": "UH2gCQXbGdcG", 399 | "colab_type": "text" 400 | }, 401 | "source": [ 402 | "## reindex\n", 403 | "\n", 404 | " Change Series to new index with optional filling logic" 405 | ] 406 | }, 407 | { 408 | "cell_type": "code", 409 | "metadata": { 410 | "id": "noGRluXxGdcH", 411 | "colab_type": "code", 412 | "colab": {} 413 | }, 414 | "source": [ 415 | "df2 = df.resample('Y').mean()" 416 | ], 417 | "execution_count": 0, 418 | "outputs": [] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "metadata": { 423 | "id": "RrKqFBcOGdcL", 424 | "colab_type": "code", 425 | "colab": {} 426 | }, 427 | "source": [ 428 | "df2" 429 | ], 430 | "execution_count": 0, 431 | "outputs": [] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "metadata": { 436 | "id": "j1rBA2ziGdcN", 437 | "colab_type": "code", 438 | "colab": {} 439 | }, 440 | "source": [ 441 | "date_index = pd.date_range('20071201', end='20111231', freq='7D')" 442 | ], 443 | "execution_count": 0, 444 | "outputs": [] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "metadata": { 449 | "id": "VRf9hIl0GdcQ", 450 | "colab_type": "code", 451 | "colab": {} 452 | }, 453 | "source": [ 454 | "date_index" 455 | ], 456 | "execution_count": 0, 457 | "outputs": [] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "metadata": { 462 | "id": "iT2LwyldGdcS", 463 | "colab_type": "code", 464 | "colab": {} 465 | }, 466 | "source": [ 467 | "df2 = df2.reindex(date_index,method='ffill')" 468 | ], 469 | "execution_count": 0, 470 | "outputs": [] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "metadata": { 475 | "id": "Vrwe5z4EGdcX", 476 | "colab_type": "code", 477 | "colab": {} 478 | }, 479 | "source": [ 480 | "df2" 481 | ], 482 | "execution_count": 0, 483 | "outputs": [] 484 | }, 485 | { 486 | "cell_type": "code", 487 | "metadata": { 488 | "id": "Kl69m-2QGdcZ", 489 | "colab_type": "code", 490 | "colab": {} 491 | }, 492 | "source": [ 493 | "df2.fillna(method='ffill', inplace=True)" 494 | ], 495 | "execution_count": 0, 496 | "outputs": [] 497 | }, 498 | { 499 | "cell_type": "code", 500 | "metadata": { 501 | "id": "TQ2T9TpOGdcb", 502 | "colab_type": "code", 503 | "colab": {} 504 | }, 505 | "source": [ 506 | "df2" 507 | ], 508 | "execution_count": 0, 509 | "outputs": [] 510 | }, 511 | { 512 | "cell_type": "code", 513 | "metadata": { 514 | "id": "fRiw0lqdGdcd", 515 | "colab_type": "code", 516 | "colab": {} 517 | }, 518 | "source": [ 519 | "df2.fillna(value='0')" 520 | ], 521 | "execution_count": 0, 522 | "outputs": [] 523 | }, 524 | { 525 | "cell_type": "markdown", 526 | "metadata": { 527 | "id": "jniNyCQKGdcf", 528 | "colab_type": "text" 529 | }, 530 | "source": [ 531 | "# Exercise" 532 | ] 533 | }, 534 | { 535 | "cell_type": "code", 536 | "metadata": { 537 | "id": "Sn_nwmOgGdcg", 538 | "colab_type": "code", 539 | "colab": {} 540 | }, 541 | "source": [ 542 | "# Q3: \n", 543 | "# read \"../data/exercise3.csv\"\n", 544 | "# change 'Date' column to datetime\n", 545 | "# set 'Date' as index\n", 546 | "# fillna using 'bfill'\n", 547 | "# resample to one month" 548 | ], 549 | "execution_count": 0, 550 | "outputs": [] 551 | }, 552 | { 553 | "cell_type": "code", 554 | "metadata": { 555 | "id": "zWjDMXsbGdci", 556 | "colab_type": "code", 557 | "colab": {} 558 | }, 559 | "source": [ 560 | "" 561 | ], 562 | "execution_count": 0, 563 | "outputs": [] 564 | } 565 | ] 566 | } -------------------------------------------------------------------------------- /Data Analysis with Pandas/04.Window and Lags.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "04.Window and Lags.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "KpowcnYnGerc", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import pandas as pd\n", 38 | "import numpy as np\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n", 41 | "import warnings\n", 42 | "warnings.filterwarnings('ignore')\n", 43 | "import seaborn as sns\n", 44 | "import datetime" 45 | ], 46 | "execution_count": 0, 47 | "outputs": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "p18JZJ1UGerj", 53 | "colab_type": "text" 54 | }, 55 | "source": [ 56 | "# Window and Lags\n" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "id": "2LvSOgUZGerl", 63 | "colab_type": "text" 64 | }, 65 | "source": [ 66 | "## shift & diff\n", 67 | "\n", 68 | " Shift index by desired number of periods with an optional time freq." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "metadata": { 74 | "id": "D0emyIc6Germ", 75 | "colab_type": "code", 76 | "colab": {} 77 | }, 78 | "source": [ 79 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n", 80 | "df = pd.read_csv(colab_path+\"data/stock_data.csv\")" 81 | ], 82 | "execution_count": 0, 83 | "outputs": [] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "metadata": { 88 | "id": "lHxLL_qpGerr", 89 | "colab_type": "code", 90 | "colab": {} 91 | }, 92 | "source": [ 93 | "df['AAPL_return'] = df['AAPL'] / df['AAPL'].shift(1)" 94 | ], 95 | "execution_count": 0, 96 | "outputs": [] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "metadata": { 101 | "id": "RapVGXpUGeru", 102 | "colab_type": "code", 103 | "colab": {} 104 | }, 105 | "source": [ 106 | "df.head()" 107 | ], 108 | "execution_count": 0, 109 | "outputs": [] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "metadata": { 114 | "id": "VYsMGKm7Gerw", 115 | "colab_type": "code", 116 | "colab": {} 117 | }, 118 | "source": [ 119 | "#df" 120 | ], 121 | "execution_count": 0, 122 | "outputs": [] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "metadata": { 127 | "id": "CgpwEyOnGery", 128 | "colab_type": "code", 129 | "colab": {} 130 | }, 131 | "source": [ 132 | "df.diff?" 133 | ], 134 | "execution_count": 0, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "metadata": { 140 | "id": "bMO5GwKNGer1", 141 | "colab_type": "code", 142 | "colab": {} 143 | }, 144 | "source": [ 145 | "df['AAPL_5D_range'] = df['AAPL'].diff(5)" 146 | ], 147 | "execution_count": 0, 148 | "outputs": [] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "metadata": { 153 | "id": "jhN3Ze_UGer4", 154 | "colab_type": "code", 155 | "colab": {} 156 | }, 157 | "source": [ 158 | "df[['AAPL', 'AAPL_5D_range']].plot(figsize=(20,10), secondary_y='AAPL_5D_range')\n" 159 | ], 160 | "execution_count": 0, 161 | "outputs": [] 162 | }, 163 | { 164 | "cell_type": "markdown", 165 | "metadata": { 166 | "id": "k6YFJCaVGer7", 167 | "colab_type": "text" 168 | }, 169 | "source": [ 170 | "## Rolling\n", 171 | "\n", 172 | " Provide rolling window calculations" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "metadata": { 178 | "id": "LPfldfyOGer9", 179 | "colab_type": "code", 180 | "colab": {} 181 | }, 182 | "source": [ 183 | "df.rolling?\n" 184 | ], 185 | "execution_count": 0, 186 | "outputs": [] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "metadata": { 191 | "id": "50w9aOKRGesB", 192 | "colab_type": "code", 193 | "colab": {} 194 | }, 195 | "source": [ 196 | "df.rolling(30)" 197 | ], 198 | "execution_count": 0, 199 | "outputs": [] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "metadata": { 204 | "id": "VOModRMxGesE", 205 | "colab_type": "code", 206 | "colab": {} 207 | }, 208 | "source": [ 209 | "df['AAPL_rollmean'] = df['AAPL'].rolling(30).mean()" 210 | ], 211 | "execution_count": 0, 212 | "outputs": [] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "metadata": { 217 | "id": "MNUp1JY8GesH", 218 | "colab_type": "code", 219 | "colab": {} 220 | }, 221 | "source": [ 222 | "df.head(50)" 223 | ], 224 | "execution_count": 0, 225 | "outputs": [] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "metadata": { 230 | "id": "tfgaP_EVGesK", 231 | "colab_type": "code", 232 | "colab": {} 233 | }, 234 | "source": [ 235 | "df[['AAPL', 'AAPL_rollmean']].plot(figsize=(20,10))" 236 | ], 237 | "execution_count": 0, 238 | "outputs": [] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": { 243 | "id": "2-gzcku6GesN", 244 | "colab_type": "text" 245 | }, 246 | "source": [ 247 | "## expanding" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "FJO9PVM8GesN", 254 | "colab_type": "code", 255 | "colab": {} 256 | }, 257 | "source": [ 258 | "df['AAPL'].expanding().max()" 259 | ], 260 | "execution_count": 0, 261 | "outputs": [] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "metadata": { 266 | "id": "xSzd7crwGesP", 267 | "colab_type": "code", 268 | "colab": {} 269 | }, 270 | "source": [ 271 | "#cumulative product\n", 272 | "df['AAPL_return'].cumprod()\n" 273 | ], 274 | "execution_count": 0, 275 | "outputs": [] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "metadata": { 280 | "id": "10PrE_vDGesR", 281 | "colab_type": "code", 282 | "colab": {} 283 | }, 284 | "source": [ 285 | "#df['AAPL_return'].cumprod" 286 | ], 287 | "execution_count": 0, 288 | "outputs": [] 289 | }, 290 | { 291 | "cell_type": "markdown", 292 | "metadata": { 293 | "id": "QVfoNrmtGesW", 294 | "colab_type": "text" 295 | }, 296 | "source": [ 297 | "# Exercise" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "metadata": { 303 | "id": "laq-mlElGesX", 304 | "colab_type": "code", 305 | "colab": {} 306 | }, 307 | "source": [ 308 | "# Q4:\n", 309 | "# read \"../data/exercise_4.csv\"\n", 310 | "# create temp_1 column with shift 1 \n", 311 | "# create temp_diff column with diff 2\n", 312 | "# create a temp_roll by calculate rolling mean with 10 sample window\n", 313 | "# delete all NaN values" 314 | ], 315 | "execution_count": 0, 316 | "outputs": [] 317 | } 318 | ] 319 | } -------------------------------------------------------------------------------- /Data Analysis with Pandas/05. corr and autocorr.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "05. corr and autocorr.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "-vDPbuyJGd6x", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import pandas as pd\n", 38 | "import numpy as np\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n", 41 | "import warnings\n", 42 | "warnings.filterwarnings('ignore')\n", 43 | "import seaborn as sns\n", 44 | "import datetime" 45 | ], 46 | "execution_count": 0, 47 | "outputs": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "-onwMnOEGd64", 53 | "colab_type": "text" 54 | }, 55 | "source": [ 56 | "# Correlation and Auto Correlation\n", 57 | "\n", 58 | "\n", 59 | "## correlations\n", 60 | " What is correlation ?\n", 61 | " Correlation is a statistical measure that indicates the extent to which two or more variables fluctuate together. A positive correlation indicates the extent to which those variables increase or decrease in parallel; a negative correlation indicates the extent to which one variable increases as the other decreases.\n", 62 | "\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "metadata": { 68 | "id": "8krbyZ6vGd65", 69 | "colab_type": "code", 70 | "colab": {} 71 | }, 72 | "source": [ 73 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n", 74 | "s = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates= [0], index_col=[0])" 75 | ], 76 | "execution_count": 0, 77 | "outputs": [] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "metadata": { 82 | "id": "OcUeUINdGd68", 83 | "colab_type": "code", 84 | "colab": {} 85 | }, 86 | "source": [ 87 | "#Investigate if the data has missing values\n", 88 | "# If less drop or else try imputing using some method" 89 | ], 90 | "execution_count": 0, 91 | "outputs": [] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "metadata": { 96 | "id": "S2EYk_mQGd7A", 97 | "colab_type": "code", 98 | "colab": {} 99 | }, 100 | "source": [ 101 | "# Computes pairwise correlation of columns, excluding NA/null values.\n", 102 | "# various methods available are: {‘pearson’, ‘kendall’, ‘spearman’} or callable\n", 103 | "# Pearson correlation: measures the linear association between continuous variables.\n", 104 | "# Spearman's rank correlation: measures monotonic association (only strictly increasing or decreasing, but not mixed) \n", 105 | "# This makes it appropriate to use with both continuous and discrete data.\n", 106 | "# Kendall correlation: works well for discrete data\n", 107 | "s.corr()" 108 | ], 109 | "execution_count": 0, 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "metadata": { 115 | "id": "s3DGnpSxGd7F", 116 | "colab_type": "code", 117 | "colab": {} 118 | }, 119 | "source": [ 120 | "#Plotting the values in a heatmap\n", 121 | "sns.heatmap(s.corr())" 122 | ], 123 | "execution_count": 0, 124 | "outputs": [] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "metadata": { 129 | "id": "3mma-AH9Gd7J", 130 | "colab_type": "code", 131 | "colab": {} 132 | }, 133 | "source": [ 134 | "#relation between two can be found as\n", 135 | "s['AAPL'].corr(s['AAPL'])" 136 | ], 137 | "execution_count": 0, 138 | "outputs": [] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "metadata": { 143 | "id": "RnRp1vsgGd7a", 144 | "colab_type": "code", 145 | "colab": {} 146 | }, 147 | "source": [ 148 | "" 149 | ], 150 | "execution_count": 0, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "metadata": { 156 | "id": "kq2G_3ydGd7e", 157 | "colab_type": "text" 158 | }, 159 | "source": [ 160 | "## auto correlation\n", 161 | "\n", 162 | " correlation between the elements of a series and others from the same series separated from them by a given interval." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "metadata": { 168 | "id": "8k01gqk1Gd7f", 169 | "colab_type": "code", 170 | "colab": {} 171 | }, 172 | "source": [ 173 | "from statsmodels.tsa.stattools import *\n", 174 | "from statsmodels.graphics.tsaplots import *" 175 | ], 176 | "execution_count": 0, 177 | "outputs": [] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "metadata": { 182 | "id": "9RVV-HMtGd7k", 183 | "colab_type": "code", 184 | "colab": {} 185 | }, 186 | "source": [ 187 | "#The NumPy linspace function (sometimes called np.linspace) is a tool in Python for creating numeric sequences.\n", 188 | "# Trigonometric sine\n", 189 | "data = np.sin(np.linspace(start=0,stop=100,num=100))" 190 | ], 191 | "execution_count": 0, 192 | "outputs": [] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "metadata": { 197 | "id": "Is1W5ckJGd7o", 198 | "colab_type": "code", 199 | "colab": {} 200 | }, 201 | "source": [ 202 | "# Autocorrelation function for 1d arrays.\n", 203 | "# correlation between the elements of a series and others from the same series separated from them by a given interval.\n", 204 | "ac_res = acf(data, nlags=100)\n" 205 | ], 206 | "execution_count": 0, 207 | "outputs": [] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "metadata": { 212 | "id": "JbPo9wF5Gd7t", 213 | "colab_type": "code", 214 | "colab": {} 215 | }, 216 | "source": [ 217 | "plt.plot(ac_res)\n", 218 | "plt.axhline(y=-1.96/np.sqrt(800), linestyle='--')\n", 219 | "plt.axhline(y=1.96/np.sqrt(800), linestyle='--')" 220 | ], 221 | "execution_count": 0, 222 | "outputs": [] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "metadata": { 227 | "id": "euwVA5Y8Gd7z", 228 | "colab_type": "code", 229 | "colab": {} 230 | }, 231 | "source": [ 232 | "ap = pd.read_csv(colab_path+'data/AirPassengers.csv', header=0, parse_dates=[0], index_col=0)" 233 | ], 234 | "execution_count": 0, 235 | "outputs": [] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "metadata": { 240 | "id": "SlPuvI6QGd72", 241 | "colab_type": "code", 242 | "colab": {} 243 | }, 244 | "source": [ 245 | "ap.plot()" 246 | ], 247 | "execution_count": 0, 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "oCDdTuMGGd77", 254 | "colab_type": "code", 255 | "colab": {} 256 | }, 257 | "source": [ 258 | "plt.plot(acf(ap))" 259 | ], 260 | "execution_count": 0, 261 | "outputs": [] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "metadata": { 266 | "id": "3b-3ktcRGd7-", 267 | "colab_type": "code", 268 | "colab": {} 269 | }, 270 | "source": [ 271 | "ac_plot = plot_acf(ap)" 272 | ], 273 | "execution_count": 0, 274 | "outputs": [] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "metadata": { 279 | "id": "-Oc0Eb8wGd8C", 280 | "colab_type": "code", 281 | "colab": {} 282 | }, 283 | "source": [ 284 | "dtrend_ap = np.log(ap).diff().dropna()" 285 | ], 286 | "execution_count": 0, 287 | "outputs": [] 288 | }, 289 | { 290 | "cell_type": "code", 291 | "metadata": { 292 | "id": "ekAHgfhlGd8F", 293 | "colab_type": "code", 294 | "colab": {} 295 | }, 296 | "source": [ 297 | "dtrend_ap.plot()" 298 | ], 299 | "execution_count": 0, 300 | "outputs": [] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "metadata": { 305 | "id": "Y73uSCj4Gd8I", 306 | "colab_type": "code", 307 | "colab": {} 308 | }, 309 | "source": [ 310 | "acf_plot = plot_acf(dtrend_ap)" 311 | ], 312 | "execution_count": 0, 313 | "outputs": [] 314 | }, 315 | { 316 | "cell_type": "code", 317 | "metadata": { 318 | "id": "kuatoefiGd8L", 319 | "colab_type": "code", 320 | "colab": {} 321 | }, 322 | "source": [ 323 | "# Q5:\n", 324 | "# read \"../data/stock_data.csv\"\n", 325 | "# Plot acf using statsmodels for IBM stock data\n", 326 | "# detrend \n", 327 | "# plot the detrend data" 328 | ], 329 | "execution_count": 0, 330 | "outputs": [] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "metadata": { 335 | "id": "ORKAF9yQGd8O", 336 | "colab_type": "code", 337 | "colab": {} 338 | }, 339 | "source": [ 340 | "" 341 | ], 342 | "execution_count": 0, 343 | "outputs": [] 344 | } 345 | ] 346 | } -------------------------------------------------------------------------------- /Financial Time Series/01.Financial Time Series Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.1" 21 | }, 22 | "colab": { 23 | "name": "01.Financial Time Series Analysis.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "uqgDqJ64GevB", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "## Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": { 42 | "id": "V7qlTXfFGevD", 43 | "colab_type": "code", 44 | "colab": {} 45 | }, 46 | "source": [ 47 | "import math\n", 48 | "import numpy as np\n", 49 | "import pandas as pd\n", 50 | "from pylab import plt\n", 51 | "\n", 52 | "plt.style.use('seaborn')\n", 53 | "%matplotlib inline" 54 | ], 55 | "execution_count": 0, 56 | "outputs": [] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": { 61 | "id": "qiER5T0sGevI", 62 | "colab_type": "text" 63 | }, 64 | "source": [ 65 | "## Data Preprocessing" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "metadata": { 71 | "id": "gRqcgvlLGevJ", 72 | "colab_type": "code", 73 | "colab": {} 74 | }, 75 | "source": [ 76 | "%%time\n", 77 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n", 78 | "df = pd.read_csv(colab_path+'data/yesbank_data.csv', index_col=0, parse_dates=True)" 79 | ], 80 | "execution_count": 0, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "metadata": { 86 | "id": "eYot4tJJGevO", 87 | "colab_type": "code", 88 | "colab": {} 89 | }, 90 | "source": [ 91 | "df.rename(columns={'close': 'c', 'open':'o', 'high':'h', 'low':'l', 'volume':'v'}, inplace=True)" 92 | ], 93 | "execution_count": 0, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "3ab7Q9DLGevS", 100 | "colab_type": "code", 101 | "colab": {} 102 | }, 103 | "source": [ 104 | "df.head()" 105 | ], 106 | "execution_count": 0, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "metadata": { 112 | "id": "EkMzXxSZGevV", 113 | "colab_type": "code", 114 | "colab": {} 115 | }, 116 | "source": [ 117 | "df['c'].plot()" 118 | ], 119 | "execution_count": 0, 120 | "outputs": [] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": { 125 | "id": "do7ljwgkGevY", 126 | "colab_type": "text" 127 | }, 128 | "source": [ 129 | "## Implementing a simple crossover strategy" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "metadata": { 135 | "id": "HQjXYvTLGeva", 136 | "colab_type": "code", 137 | "colab": {} 138 | }, 139 | "source": [ 140 | "df['r'] = np.log(df['c'] / df['c'].shift(1))\n", 141 | "df['sma1'] = df['c'].rolling(15).mean()\n", 142 | "df['sma2'] = df['c'].rolling(30).mean()\n", 143 | "df['sma3'] = df['c'].rolling(60).mean()" 144 | ], 145 | "execution_count": 0, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "pnXo5aOsGeve", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "df.dropna(inplace=True)" 157 | ], 158 | "execution_count": 0, 159 | "outputs": [] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "metadata": { 164 | "id": "ysBgZsT2Gevi", 165 | "colab_type": "code", 166 | "colab": {} 167 | }, 168 | "source": [ 169 | "df['market_dir'] = np.where(df['r'] > 0, 1, -1)" 170 | ], 171 | "execution_count": 0, 172 | "outputs": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "metadata": { 177 | "id": "15OEulyTGevm", 178 | "colab_type": "code", 179 | "colab": {} 180 | }, 181 | "source": [ 182 | "df['strat_dir'] = np.where(df['sma1'] > df['sma3'], 1, -1)" 183 | ], 184 | "execution_count": 0, 185 | "outputs": [] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "metadata": { 190 | "id": "-Fv5cyBbGevp", 191 | "colab_type": "code", 192 | "colab": {} 193 | }, 194 | "source": [ 195 | "df.head(10)" 196 | ], 197 | "execution_count": 0, 198 | "outputs": [] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "metadata": { 203 | "id": "G3JT2IadGevu", 204 | "colab_type": "code", 205 | "colab": {} 206 | }, 207 | "source": [ 208 | "df['strat_dir'].diff().value_counts()" 209 | ], 210 | "execution_count": 0, 211 | "outputs": [] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "metadata": { 216 | "id": "nNsN3QDeGev0", 217 | "colab_type": "code", 218 | "colab": {} 219 | }, 220 | "source": [ 221 | "df['s'] = df['strat_dir'] * df['r']" 222 | ], 223 | "execution_count": 0, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "metadata": { 229 | "id": "TTkhLu3hGev4", 230 | "colab_type": "code", 231 | "colab": {} 232 | }, 233 | "source": [ 234 | "df[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 10));" 235 | ], 236 | "execution_count": 0, 237 | "outputs": [] 238 | }, 239 | { 240 | "cell_type": "markdown", 241 | "metadata": { 242 | "id": "TpPHBq0BGev6", 243 | "colab_type": "text" 244 | }, 245 | "source": [ 246 | "## Add a machine learning model and perform accuracy testing" 247 | ] 248 | }, 249 | { 250 | "cell_type": "code", 251 | "metadata": { 252 | "id": "XWZ3Kyk5Gev8", 253 | "colab_type": "code", 254 | "colab": {} 255 | }, 256 | "source": [ 257 | "from sklearn.linear_model import LogisticRegression\n", 258 | "from sklearn.metrics import accuracy_score" 259 | ], 260 | "execution_count": 0, 261 | "outputs": [] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "metadata": { 266 | "id": "xPFqYhcrGewB", 267 | "colab_type": "code", 268 | "colab": {} 269 | }, 270 | "source": [ 271 | "mdf = df.copy()" 272 | ], 273 | "execution_count": 0, 274 | "outputs": [] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "metadata": { 279 | "id": "K1ShkwN4GewE", 280 | "colab_type": "code", 281 | "colab": {} 282 | }, 283 | "source": [ 284 | "for i in range(1,6):\n", 285 | " mdf[f'lag_{i}'] = mdf['market_dir'].shift(i)" 286 | ], 287 | "execution_count": 0, 288 | "outputs": [] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "metadata": { 293 | "id": "eKFawK2jGewH", 294 | "colab_type": "code", 295 | "colab": {} 296 | }, 297 | "source": [ 298 | "mdf.dropna(inplace=True)\n", 299 | "mdf.head()" 300 | ], 301 | "execution_count": 0, 302 | "outputs": [] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "metadata": { 307 | "id": "zMYT9E6UGewJ", 308 | "colab_type": "code", 309 | "colab": {} 310 | }, 311 | "source": [ 312 | "X = mdf[['lag_1','lag_2', 'lag_3', 'lag_4','lag_5']]" 313 | ], 314 | "execution_count": 0, 315 | "outputs": [] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "metadata": { 320 | "id": "bGTqJVD3GewM", 321 | "colab_type": "code", 322 | "colab": {} 323 | }, 324 | "source": [ 325 | "y = mdf['market_dir']" 326 | ], 327 | "execution_count": 0, 328 | "outputs": [] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "metadata": { 333 | "id": "v6Nt5G6wGewQ", 334 | "colab_type": "code", 335 | "colab": {} 336 | }, 337 | "source": [ 338 | "X_train = X[:500]\n", 339 | "X_test = X[500:]\n", 340 | "y_train = y[:500]\n", 341 | "y_test = y[500:]" 342 | ], 343 | "execution_count": 0, 344 | "outputs": [] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "metadata": { 349 | "id": "QcLdo1KpGewU", 350 | "colab_type": "code", 351 | "colab": {} 352 | }, 353 | "source": [ 354 | "classifier = LogisticRegression()" 355 | ], 356 | "execution_count": 0, 357 | "outputs": [] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "metadata": { 362 | "id": "qcCSy8ksGewX", 363 | "colab_type": "code", 364 | "colab": {} 365 | }, 366 | "source": [ 367 | "from sklearn.svm import SVC\n", 368 | "classifier = SVC(kernel='linear')" 369 | ], 370 | "execution_count": 0, 371 | "outputs": [] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "metadata": { 376 | "id": "FUOeiiNzGewZ", 377 | "colab_type": "code", 378 | "colab": {} 379 | }, 380 | "source": [ 381 | "classifier.fit(X_train,y_train)" 382 | ], 383 | "execution_count": 0, 384 | "outputs": [] 385 | }, 386 | { 387 | "cell_type": "markdown", 388 | "metadata": { 389 | "id": "2G5Nx8g2Gewc", 390 | "colab_type": "text" 391 | }, 392 | "source": [ 393 | "## In Sample testing" 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "metadata": { 399 | "id": "xez0UptIGewd", 400 | "colab_type": "code", 401 | "colab": {} 402 | }, 403 | "source": [ 404 | "y_pred = classifier.predict(X_train)\n", 405 | "accuracy_score(y_pred, y_train)" 406 | ], 407 | "execution_count": 0, 408 | "outputs": [] 409 | }, 410 | { 411 | "cell_type": "markdown", 412 | "metadata": { 413 | "id": "3RnPjrjpGewg", 414 | "colab_type": "text" 415 | }, 416 | "source": [ 417 | "## Holdout Testing" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "metadata": { 423 | "id": "1kseIkV4Gewh", 424 | "colab_type": "code", 425 | "colab": {} 426 | }, 427 | "source": [ 428 | "y_pred = classifier.predict(X_test)\n", 429 | "accuracy_score(y_test,y_pred)" 430 | ], 431 | "execution_count": 0, 432 | "outputs": [] 433 | }, 434 | { 435 | "cell_type": "code", 436 | "metadata": { 437 | "id": "3L55wa-rGewk", 438 | "colab_type": "code", 439 | "colab": {} 440 | }, 441 | "source": [ 442 | "" 443 | ], 444 | "execution_count": 0, 445 | "outputs": [] 446 | } 447 | ] 448 | } -------------------------------------------------------------------------------- /Financial Time Series/02. sentiment_score.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import requests\n", 11 | "import pandas as pd\n", 12 | "pd.options.display.max_colwidth = 1000" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "response = requests.get(\"https://api.bseindia.com/BseIndiaAPI/api/AnnGetData/w?strCat=-1&strPrevDate=20190730&strScrip=&strSearch=P&strToDate=20190730&strType=C\")" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "bse_news_json = json.loads(response.text)" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": 8, 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "name": "stdout", 40 | "output_type": "stream", 41 | "text": [ 42 | "WARNING:tensorflow:From /miniconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n", 43 | "Instructions for updating:\n", 44 | "Colocations handled automatically by placer.\n", 45 | "WARNING:tensorflow:From /Users/ram/OneDrive/Talks/scipy/generating-reviews-discovering-sentiment/encoder.py:59: calling l2_normalize (from tensorflow.python.ops.nn_impl) with dim is deprecated and will be removed in a future version.\n", 46 | "Instructions for updating:\n", 47 | "dim is deprecated, use axis instead\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "from encoder import Model\n", 53 | "\n", 54 | "model = Model()" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 9, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "import re\n", 64 | "def clean_headline(headline):\n", 65 | " return ' '.join(re.sub(\"(@[A-Za-z0-9]+)|([^0-9A-Za-z \\t])|(\\w+:\\/\\/\\S+)|[0-9]\", \" \", headline).split())" 66 | ] 67 | }, 68 | { 69 | "cell_type": "markdown", 70 | "metadata": {}, 71 | "source": [ 72 | "reference: https://modeldepot.io/afowler/sentiment-neuron" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 21, 78 | "metadata": {}, 79 | "outputs": [], 80 | "source": [ 81 | "ccd_news = [clean_headline(news['HEADLINE']) for news in bse_news_json['Table'] if 'Siddhartha' in news['HEADLINE']]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 22, 87 | "metadata": {}, 88 | "outputs": [ 89 | { 90 | "name": "stdout", 91 | "output_type": "stream", 92 | "text": [ 93 | "16.797 seconds to transform 5 examples\n", 94 | "-0.14523332\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "text_features = model.transform(ccd_news)\n", 100 | "print(text_features[:,2388][0])" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": 33, 106 | "metadata": {}, 107 | "outputs": [ 108 | { 109 | "data": { 110 | "text/html": [ 111 | "
\n", 112 | "\n", 125 | "\n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | "
01
00.009919This is to inform you that Mr V G Siddhartha Chairman and Managing Director of Coffee Day Enterprises Limited has not reachable since yesterday evening We are taking the help of concerned authorities Company is professionally managed and led by competent leadership team which will ensure continuity of business BR BR We will keep you posted as and when we receive further updates BR
1-0.068979The board of directors of the company held an emergency meeting today to discuss developments following its earlier communication to the stock exchanges BR BR A press release is attached Also attached is a copy of the letter purportedly signed by Mr V G Siddhartha dated July BR BR This is for your information and records BR
2-0.102269The Exchange has sought clarification from Sical Logistics Ltd on July with reference to news appeared in www moneycontrol com dated July quoting VG Siddhartha missing Helicopters Coast Guard called to find Cafe Coffee Day founder BR BR The reply is awaited
3-0.145233The Exchange has sought clarification from Coffee Day Enterprises Ltd with reference to the media reports titled Authenticity of Siddhartha s last note doubtful claims I T source BR BR Link Day Enterprises Ltd response is awaited
4-0.353844Certain media reports have published a document purportedly written by Mr V G Siddhartha and which is addressed to the Board and Employees of Coffee Data Enterprises However it has been observed that the Company has not disclosed the same with the Exchange BR BR In this regard a clarification has been sought from Coffee Day Enterprises Ltd with regard to non disclosure of material information related to Mr V G Siddhartha who is reportedly missing Company response is awaited
\n", 161 | "
" 162 | ], 163 | "text/plain": [ 164 | " 0 \\\n", 165 | "0 0.009919 \n", 166 | "1 -0.068979 \n", 167 | "2 -0.102269 \n", 168 | "3 -0.145233 \n", 169 | "4 -0.353844 \n", 170 | "\n", 171 | " 1 \n", 172 | "0 This is to inform you that Mr V G Siddhartha Chairman and Managing Director of Coffee Day Enterprises Limited has not reachable since yesterday evening We are taking the help of concerned authorities Company is professionally managed and led by competent leadership team which will ensure continuity of business BR BR We will keep you posted as and when we receive further updates BR \n", 173 | "1 The board of directors of the company held an emergency meeting today to discuss developments following its earlier communication to the stock exchanges BR BR A press release is attached Also attached is a copy of the letter purportedly signed by Mr V G Siddhartha dated July BR BR This is for your information and records BR \n", 174 | "2 The Exchange has sought clarification from Sical Logistics Ltd on July with reference to news appeared in www moneycontrol com dated July quoting VG Siddhartha missing Helicopters Coast Guard called to find Cafe Coffee Day founder BR BR The reply is awaited \n", 175 | "3 The Exchange has sought clarification from Coffee Day Enterprises Ltd with reference to the media reports titled Authenticity of Siddhartha s last note doubtful claims I T source BR BR Link Day Enterprises Ltd response is awaited \n", 176 | "4 Certain media reports have published a document purportedly written by Mr V G Siddhartha and which is addressed to the Board and Employees of Coffee Data Enterprises However it has been observed that the Company has not disclosed the same with the Exchange BR BR In this regard a clarification has been sought from Coffee Day Enterprises Ltd with regard to non disclosure of material information related to Mr V G Siddhartha who is reportedly missing Company response is awaited " 177 | ] 178 | }, 179 | "execution_count": 33, 180 | "metadata": {}, 181 | "output_type": "execute_result" 182 | } 183 | ], 184 | "source": [ 185 | "pd.DataFrame(set(zip(text_features[:,2388], ccd_news)))" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": {}, 192 | "outputs": [], 193 | "source": [] 194 | } 195 | ], 196 | "metadata": { 197 | "kernelspec": { 198 | "display_name": "Python 3", 199 | "language": "python", 200 | "name": "python3" 201 | }, 202 | "language_info": { 203 | "codemirror_mode": { 204 | "name": "ipython", 205 | "version": 3 206 | }, 207 | "file_extension": ".py", 208 | "mimetype": "text/x-python", 209 | "name": "python", 210 | "nbconvert_exporter": "python", 211 | "pygments_lexer": "ipython3", 212 | "version": "3.6.1" 213 | } 214 | }, 215 | "nbformat": 4, 216 | "nbformat_minor": 2 217 | } 218 | -------------------------------------------------------------------------------- /ODSC Workshop.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/562b5b1189d6d5c93f8b1c89fb8ecbc42350024b/ODSC Workshop.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![open in colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/poornagurram/TimeSeriesAnalysis_ODSC_2019/) 2 | 3 | # ODSC 2019 4 | 5 | ## Proposal 6 | 7 | # Time Series Analysis in Python Workshop 8 | Time is precious so is Time Series Analysis” 9 | 10 | Time series analysis has been around for centuries helping us to solve from astronomical problems to business problems and advanced scientific research around us now. Time stores precious information, which most machine learning algorithms don’t deal with. But time series analysis, which is a mix of machine learning and statistics helps us to get useful insights. Time series can be applied to various fields like economy forecasting, budgetary analysis, sales forecasting, census analysis and much more. In this workshop, We will look at how to dive deep into time series data and make use of deep learning to make accurate predictions. 11 | 12 | **Structure of the workshop goes like this** 13 | 14 | * Introduction to Time series analysis 15 | * Time Series Exploratory Data Analysis and Data manipulation with pandas 16 | * Forecast Time series data with some classical method (AR, MA, ARMA, ARIMA, GARCH, E-GARCH) 17 | * Introduction to Deep Learning and Time series forecasting using MLP and LSTM 18 | * Forecasting using XGBoost 19 | * Financial Time Series data 20 | 21 | ## Libraries Used: 22 | > install libraries using pip install -r requirements.txt 23 | 24 | * Keras (with Tensorflow backend) 25 | * jupyter 26 | * matplotlib 27 | * pandas 28 | * statsmodels 29 | * sklearn 30 | * seaborn 31 | * arch 32 | * xgboost 33 | 34 | ## Outline/Structure of the Workshop 35 | 36 | * Introduction to Time series analysis (10 mins) 37 | * Time Series Exploratory Data Analysis and Data manipulation with pandas (45 mins) 38 | * Forecast Time series data with some classical method (AR, MA, ARMA, ARIMA, GARCH, E-GARCH) (60 mins) 39 | * Introduction to Deep Learning and Time series forecasting using MLP and LSTM (60 mins) 40 | * Forecasting using XGBoost - (20 mins) 41 | * Financial Time Series data - (30 Mins) 42 | 43 | **Note: Session timings including exercises for attendees to work on** 44 | 45 | ## Prerequisites for Attendees 46 | * Basics of Python 47 | * Basics of Time series analysis 48 | * Basics of Pandas 49 | * Introduction to Deep Neural Networks 50 | 51 | ## Credits : 52 | 53 | * Immensely thankful to the great workshop by [Alieen](https://github.com/AileenNielsen/TimeSeriesAnalysisWithPython) 54 | * Amazing blogs on Time Series by [Jason Brownlee](https://machinelearningmastery.com/) 55 | 56 | ## Tasks : 57 | * Non-Linear Time Series 58 | * Dataset Curation 59 | * Other Examples 60 | * Optimize pandas tutorail 61 | * Improve LSTM 62 | -------------------------------------------------------------------------------- /Statistical models/01. Time Series components.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.1" 21 | }, 22 | "colab": { 23 | "name": "01. Time Series components.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "riHGQ2LjGf89", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "## Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": { 42 | "id": "e2z51lilGf8_", 43 | "colab_type": "code", 44 | "colab": {} 45 | }, 46 | "source": [ 47 | "import pandas as pd\n", 48 | "import numpy as np\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "plt.rcParams[\"figure.figsize\"] = (20,8)" 51 | ], 52 | "execution_count": 0, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": { 58 | "id": "7Fl-lX7lGf9C", 59 | "colab_type": "text" 60 | }, 61 | "source": [ 62 | "## Some Theory to take care of" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "id": "Qaxs6TtsGf9C", 69 | "colab_type": "text" 70 | }, 71 | "source": [ 72 | "#### What is a white noise?\n", 73 | " - No Correlation \n", 74 | " - Unable to fit a model\n", 75 | "\n", 76 | "#### Need for a Forecasting\n", 77 | "\n", 78 | "#### Components of Time Series Analysis\n", 79 | " - Trend\n", 80 | " - Seasonality\n", 81 | " - Additive vs Multiplicative\n", 82 | " \n", 83 | "#### Residuals\n" 84 | ] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "metadata": { 89 | "id": "CcKsFjqYGf9D", 90 | "colab_type": "text" 91 | }, 92 | "source": [ 93 | "## Time Series Decomposition" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "Jo5YDW_QGf9E", 100 | "colab_type": "code", 101 | "colab": {} 102 | }, 103 | "source": [ 104 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n", 105 | "df = pd.read_csv(colab_path+'data/AirPassengers.csv', parse_dates=True, index_col=0)" 106 | ], 107 | "execution_count": 0, 108 | "outputs": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "metadata": { 113 | "id": "OCCXUzllGf9G", 114 | "colab_type": "code", 115 | "colab": {} 116 | }, 117 | "source": [ 118 | "df.plot()" 119 | ], 120 | "execution_count": 0, 121 | "outputs": [] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "metadata": { 126 | "id": "UDTQE9FqGf9J", 127 | "colab_type": "code", 128 | "colab": {} 129 | }, 130 | "source": [ 131 | "df.head()" 132 | ], 133 | "execution_count": 0, 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "metadata": { 139 | "id": "SoUPBxvQGf9M", 140 | "colab_type": "code", 141 | "colab": {} 142 | }, 143 | "source": [ 144 | "from statsmodels.tsa.seasonal import seasonal_decompose" 145 | ], 146 | "execution_count": 0, 147 | "outputs": [] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "metadata": { 152 | "id": "CLrs9pY1Gf9P", 153 | "colab_type": "code", 154 | "colab": {} 155 | }, 156 | "source": [ 157 | "components = seasonal_decompose(df)" 158 | ], 159 | "execution_count": 0, 160 | "outputs": [] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "metadata": { 165 | "id": "MIaSdYBXGf9R", 166 | "colab_type": "code", 167 | "colab": {} 168 | }, 169 | "source": [ 170 | "components.plot()" 171 | ], 172 | "execution_count": 0, 173 | "outputs": [] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "metadata": { 178 | "id": "VjbYDqtaGf9T", 179 | "colab_type": "text" 180 | }, 181 | "source": [ 182 | "## Exercise" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": { 188 | "id": "defWuJihGf9U", 189 | "colab_type": "text" 190 | }, 191 | "source": [ 192 | "### Read the file 'data/daily_min_temperature.csv' and find out the time series components.\n", 193 | "\n", 194 | "1. The index does not have frequency set. Set the frequency Hint: Use 'asfreq' method.\n", 195 | "2. There are some missing values. Fill the missing values using 'bfill' method. Hint: Use fillna method\n", 196 | "3. Perform decomposition" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "metadata": { 202 | "id": "8bB2103nM4A2", 203 | "colab_type": "text" 204 | }, 205 | "source": [ 206 | "## Exponential Smoothing" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "metadata": { 212 | "id": "A92qFwKQM7Zy", 213 | "colab_type": "code", 214 | "colab": {} 215 | }, 216 | "source": [ 217 | "from statsmodels.tsa.api import ExponentialSmoothing" 218 | ], 219 | "execution_count": 0, 220 | "outputs": [] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "metadata": { 225 | "id": "DiqmNt52OT-v", 226 | "colab_type": "code", 227 | "colab": {} 228 | }, 229 | "source": [ 230 | "ExponentialSmoothing?" 231 | ], 232 | "execution_count": 0, 233 | "outputs": [] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "metadata": { 238 | "id": "YGonaKD5NAM4", 239 | "colab_type": "code", 240 | "colab": {} 241 | }, 242 | "source": [ 243 | "model = ExponentialSmoothing(df['#Passengers'].values, \n", 244 | " trend='mul', \n", 245 | " damped=False,\n", 246 | " seasonal='add',\n", 247 | " seasonal_periods=12)" 248 | ], 249 | "execution_count": 0, 250 | "outputs": [] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "metadata": { 255 | "id": "lPM1Gng5Odwe", 256 | "colab_type": "code", 257 | "colab": {} 258 | }, 259 | "source": [ 260 | "res = model.fit()" 261 | ], 262 | "execution_count": 0, 263 | "outputs": [] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "metadata": { 268 | "id": "ZFGvmOtoPZe7", 269 | "colab_type": "code", 270 | "colab": {} 271 | }, 272 | "source": [ 273 | "res.params" 274 | ], 275 | "execution_count": 0, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "id": "eHldR55cPeh6", 282 | "colab_type": "code", 283 | "colab": {} 284 | }, 285 | "source": [ 286 | "res.forecast?" 287 | ], 288 | "execution_count": 0, 289 | "outputs": [] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "metadata": { 294 | "id": "e613NQMWPiQj", 295 | "colab_type": "code", 296 | "colab": {} 297 | }, 298 | "source": [ 299 | "fc = res.forecast(60).astype(int)" 300 | ], 301 | "execution_count": 0, 302 | "outputs": [] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "metadata": { 307 | "id": "jW8z4vEtPm2C", 308 | "colab_type": "code", 309 | "colab": {} 310 | }, 311 | "source": [ 312 | "data = list(df['#Passengers'].values)\n", 313 | "data.extend(fc)" 314 | ], 315 | "execution_count": 0, 316 | "outputs": [] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "metadata": { 321 | "id": "ZVBQLZhDPs--", 322 | "colab_type": "code", 323 | "colab": {} 324 | }, 325 | "source": [ 326 | "df1 = pd.DataFrame(data)" 327 | ], 328 | "execution_count": 0, 329 | "outputs": [] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "metadata": { 334 | "id": "zlHmOFfnPwHW", 335 | "colab_type": "code", 336 | "colab": {} 337 | }, 338 | "source": [ 339 | "" 340 | ], 341 | "execution_count": 0, 342 | "outputs": [] 343 | } 344 | ] 345 | } -------------------------------------------------------------------------------- /Statistical models/02. Stationarity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.1" 21 | }, 22 | "colab": { 23 | "name": "02. Stationarity.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "ggjzGxCwGhyt", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "## Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": { 42 | "id": "KCRNY4R3Ghyu", 43 | "colab_type": "code", 44 | "colab": {} 45 | }, 46 | "source": [ 47 | "import pandas as pd\n", 48 | "import numpy as np\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n", 51 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"" 52 | ], 53 | "execution_count": 0, 54 | "outputs": [] 55 | }, 56 | { 57 | "cell_type": "markdown", 58 | "metadata": { 59 | "id": "JPBgkpOxGhyy", 60 | "colab_type": "text" 61 | }, 62 | "source": [ 63 | "## Stationarity" 64 | ] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": { 69 | "id": "_7NhORiSGhyz", 70 | "colab_type": "text" 71 | }, 72 | "source": [ 73 | "### Constant Mean, variance, autocorr\n", 74 | "\n", 75 | "Y(t) = rho * Y(t-1) + epsilon\n", 76 | "\n", 77 | "Data is not stationary if rho == 1\n" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "metadata": { 83 | "id": "LvAIYob5Ghy1", 84 | "colab_type": "code", 85 | "colab": {} 86 | }, 87 | "source": [ 88 | "min_temp_df = pd.read_csv(colab_path+'data/daily_min_tempratures.csv', parse_dates=[0], index_col=0)\n", 89 | "min_temp_df['temp'] = min_temp_df['temp'].astype(float)" 90 | ], 91 | "execution_count": 0, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "metadata": { 97 | "id": "RytriVc-Ghy4", 98 | "colab_type": "code", 99 | "colab": {} 100 | }, 101 | "source": [ 102 | "min_temp_df[min_temp_df.temp.isna()]" 103 | ], 104 | "execution_count": 0, 105 | "outputs": [] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "metadata": { 110 | "id": "euczIvV6Ghy8", 111 | "colab_type": "code", 112 | "colab": {} 113 | }, 114 | "source": [ 115 | "min_temp_df.head()" 116 | ], 117 | "execution_count": 0, 118 | "outputs": [] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "metadata": { 123 | "id": "TwuOJAoVGhy-", 124 | "colab_type": "code", 125 | "colab": {} 126 | }, 127 | "source": [ 128 | "min_temp_df['roll_mean'] = min_temp_df['temp'].rolling(50).mean()\n", 129 | "min_temp_df['roll_std'] = min_temp_df['temp'].rolling(50).std()" 130 | ], 131 | "execution_count": 0, 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "metadata": { 137 | "id": "PSEEHoPVGhzA", 138 | "colab_type": "code", 139 | "colab": {} 140 | }, 141 | "source": [ 142 | "min_temp_df.plot(figsize=(20,10))\n", 143 | "\n" 144 | ], 145 | "execution_count": 0, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "Xb5GZcPGGhzC", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "air_pass_df = pd.read_csv(colab_path+'data/AirPassengers.csv', parse_dates=[0], index_col=0)\n", 157 | "air_pass_df['#Passengers'].fillna(method='ffill', inplace=True)\n", 158 | "\n" 159 | ], 160 | "execution_count": 0, 161 | "outputs": [] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "metadata": { 166 | "id": "tFuzq0eZGhzF", 167 | "colab_type": "code", 168 | "colab": {} 169 | }, 170 | "source": [ 171 | "air_pass_df.head()" 172 | ], 173 | "execution_count": 0, 174 | "outputs": [] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "metadata": { 179 | "id": "D_3kWwm7GhzI", 180 | "colab_type": "code", 181 | "colab": {} 182 | }, 183 | "source": [ 184 | "air_pass_df.plot(figsize=(20,10))" 185 | ], 186 | "execution_count": 0, 187 | "outputs": [] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": { 192 | "id": "YCXf7I9IGhzL", 193 | "colab_type": "text" 194 | }, 195 | "source": [ 196 | "## ADFuller test" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "metadata": { 202 | "id": "irQp4BgPGhzM", 203 | "colab_type": "code", 204 | "colab": {} 205 | }, 206 | "source": [ 207 | "from statsmodels.tsa.stattools import adfuller" 208 | ], 209 | "execution_count": 0, 210 | "outputs": [] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "metadata": { 215 | "id": "WNj6E8fNGhzQ", 216 | "colab_type": "code", 217 | "colab": {} 218 | }, 219 | "source": [ 220 | "adfuller(min_temp_df['temp'])" 221 | ], 222 | "execution_count": 0, 223 | "outputs": [] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "metadata": { 228 | "id": "9Cg-oigbGhzT", 229 | "colab_type": "code", 230 | "colab": {} 231 | }, 232 | "source": [ 233 | "adfuller(air_pass_df['#Passengers'])" 234 | ], 235 | "execution_count": 0, 236 | "outputs": [] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "metadata": { 241 | "id": "XBIZgDdLGhzW", 242 | "colab_type": "text" 243 | }, 244 | "source": [ 245 | "## Detrend" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "metadata": { 251 | "id": "6I1cewpfGhzX", 252 | "colab_type": "code", 253 | "colab": {} 254 | }, 255 | "source": [ 256 | "sd = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=True, index_col=0)" 257 | ], 258 | "execution_count": 0, 259 | "outputs": [] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "metadata": { 264 | "id": "_4mwpEkbGhzb", 265 | "colab_type": "code", 266 | "colab": {} 267 | }, 268 | "source": [ 269 | "sd['AAPL'].plot(figsize=(10,8))" 270 | ], 271 | "execution_count": 0, 272 | "outputs": [] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "metadata": { 277 | "id": "bpqk4ELhGhzg", 278 | "colab_type": "code", 279 | "colab": {} 280 | }, 281 | "source": [ 282 | "sd['AAPL_d'] = sd['AAPL'].diff()" 283 | ], 284 | "execution_count": 0, 285 | "outputs": [] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "metadata": { 290 | "id": "nrfaj4AmGhzk", 291 | "colab_type": "code", 292 | "colab": {} 293 | }, 294 | "source": [ 295 | "sd['AAPL_d'].plot(figsize=(10,8))" 296 | ], 297 | "execution_count": 0, 298 | "outputs": [] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "metadata": { 303 | "id": "wRUVSocjGhzo", 304 | "colab_type": "code", 305 | "colab": {} 306 | }, 307 | "source": [ 308 | "adfuller(sd['AAPL_d'].dropna())" 309 | ], 310 | "execution_count": 0, 311 | "outputs": [] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "metadata": { 316 | "id": "ehWY5e0WGhzr", 317 | "colab_type": "text" 318 | }, 319 | "source": [ 320 | "## Desasonalize" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "metadata": { 326 | "id": "Up8bJwYwGhzt", 327 | "colab_type": "code", 328 | "colab": {} 329 | }, 330 | "source": [ 331 | "min_temp_df['temp'].plot(figsize=(10,8))" 332 | ], 333 | "execution_count": 0, 334 | "outputs": [] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "metadata": { 339 | "id": "PigSU4rtGhz0", 340 | "colab_type": "code", 341 | "colab": {} 342 | }, 343 | "source": [ 344 | "min_temp_df['temp'].diff(12).plot(figsize=(10,8))" 345 | ], 346 | "execution_count": 0, 347 | "outputs": [] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "metadata": { 352 | "id": "0vas-gwjGhz9", 353 | "colab_type": "code", 354 | "colab": {} 355 | }, 356 | "source": [ 357 | "adfuller(min_temp_df['temp'].diff(12).dropna())" 358 | ], 359 | "execution_count": 0, 360 | "outputs": [] 361 | }, 362 | { 363 | "cell_type": "markdown", 364 | "metadata": { 365 | "id": "pjJsrmQbGh0B", 366 | "colab_type": "text" 367 | }, 368 | "source": [ 369 | "## Exercise\n", 370 | "\n", 371 | "Detrend & Deseasonalize Airpassengers data" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "metadata": { 377 | "id": "n6mY1VLZGh0C", 378 | "colab_type": "code", 379 | "colab": {} 380 | }, 381 | "source": [ 382 | "air_pass_df['#Passengers'].diff().diff(12).plot()" 383 | ], 384 | "execution_count": 0, 385 | "outputs": [] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "metadata": { 390 | "id": "bbtkk4TUGh0F", 391 | "colab_type": "code", 392 | "colab": {} 393 | }, 394 | "source": [ 395 | "adfuller(air_pass_df['#Passengers'].diff().diff(12).dropna())" 396 | ], 397 | "execution_count": 0, 398 | "outputs": [] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "metadata": { 403 | "id": "hpWM1AeuGh0H", 404 | "colab_type": "code", 405 | "colab": {} 406 | }, 407 | "source": [ 408 | "" 409 | ], 410 | "execution_count": 0, 411 | "outputs": [] 412 | } 413 | ] 414 | } -------------------------------------------------------------------------------- /Statistical models/03.ARMA Process Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.1" 21 | }, 22 | "colab": { 23 | "name": "03.ARMA Process Models.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "xgOIlFrwqzZn", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import pandas as pd\n", 38 | "import numpy as np\n", 39 | "import matplotlib.pyplot as plt\n", 40 | "plt.rcParams[\"figure.figsize\"] = (20,10)" 41 | ], 42 | "execution_count": 0, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "vxgtRPp1qzZu", 49 | "colab_type": "text" 50 | }, 51 | "source": [ 52 | "## AR Models\n", 53 | "\n", 54 | "X(t) = phi * X(t-1) + epsilon " 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "metadata": { 60 | "id": "CTs297buqzZv", 61 | "colab_type": "code", 62 | "colab": {} 63 | }, 64 | "source": [ 65 | "from statsmodels.tsa.arima_process import ArmaProcess" 66 | ], 67 | "execution_count": 0, 68 | "outputs": [] 69 | }, 70 | { 71 | "cell_type": "code", 72 | "metadata": { 73 | "id": "BC5TDYDIqzZy", 74 | "colab_type": "code", 75 | "colab": {} 76 | }, 77 | "source": [ 78 | "ArmaProcess?" 79 | ], 80 | "execution_count": 0, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "metadata": { 86 | "id": "dpSBFZzkqzZ1", 87 | "colab_type": "code", 88 | "colab": {} 89 | }, 90 | "source": [ 91 | "ar1 = ArmaProcess(ar=(1, -.9, 0.7, -0.42))" 92 | ], 93 | "execution_count": 0, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "metadata": { 99 | "id": "2yiNWFZpqzZ6", 100 | "colab_type": "code", 101 | "colab": {} 102 | }, 103 | "source": [ 104 | "ar1_data = ar1.generate_sample(nsample=1000)" 105 | ], 106 | "execution_count": 0, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "metadata": { 112 | "id": "NQXXhZ6YqzZ8", 113 | "colab_type": "code", 114 | "colab": {} 115 | }, 116 | "source": [ 117 | "plt.plot(ar1_data)" 118 | ], 119 | "execution_count": 0, 120 | "outputs": [] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "metadata": { 125 | "id": "-b3iN5owqzaA", 126 | "colab_type": "code", 127 | "colab": {} 128 | }, 129 | "source": [ 130 | "from statsmodels.tsa.stattools import adfuller" 131 | ], 132 | "execution_count": 0, 133 | "outputs": [] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "metadata": { 138 | "id": "BPU7FwenqzaC", 139 | "colab_type": "code", 140 | "colab": {} 141 | }, 142 | "source": [ 143 | "adfuller(ar1_data)" 144 | ], 145 | "execution_count": 0, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "DSXU0-idqzaF", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "from statsmodels.tsa.arima_model import ARMA" 157 | ], 158 | "execution_count": 0, 159 | "outputs": [] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "metadata": { 164 | "id": "9UVPvKUKqzaI", 165 | "colab_type": "code", 166 | "colab": {} 167 | }, 168 | "source": [ 169 | "ar_model = ARMA(ar1_data, order=(3,0))" 170 | ], 171 | "execution_count": 0, 172 | "outputs": [] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "metadata": { 177 | "id": "w5SHG3SiqzaM", 178 | "colab_type": "code", 179 | "colab": {} 180 | }, 181 | "source": [ 182 | "res = ar_model.fit()" 183 | ], 184 | "execution_count": 0, 185 | "outputs": [] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "metadata": { 190 | "id": "3Wd8BGvRqzaQ", 191 | "colab_type": "code", 192 | "colab": {} 193 | }, 194 | "source": [ 195 | "res.params" 196 | ], 197 | "execution_count": 0, 198 | "outputs": [] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "metadata": { 203 | "id": "_8NikJENqzaT", 204 | "colab_type": "code", 205 | "colab": {} 206 | }, 207 | "source": [ 208 | "res.k_ar" 209 | ], 210 | "execution_count": 0, 211 | "outputs": [] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "metadata": { 216 | "id": "OLglRPH0qzaV", 217 | "colab_type": "code", 218 | "colab": {} 219 | }, 220 | "source": [ 221 | "print(res.summary())" 222 | ], 223 | "execution_count": 0, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "metadata": { 229 | "id": "l1C8jigdqzaX", 230 | "colab_type": "code", 231 | "colab": {} 232 | }, 233 | "source": [ 234 | "plt.plot(ar1_data)\n", 235 | "plt.plot(res.fittedvalues)\n", 236 | "plt.show()" 237 | ], 238 | "execution_count": 0, 239 | "outputs": [] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "metadata": { 244 | "id": "rkI9NGrOqzaa", 245 | "colab_type": "code", 246 | "colab": {} 247 | }, 248 | "source": [ 249 | "r = res.predict(start=3, end=1003)" 250 | ], 251 | "execution_count": 0, 252 | "outputs": [] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "metadata": { 257 | "id": "fuHxsE-pqzae", 258 | "colab_type": "code", 259 | "colab": {} 260 | }, 261 | "source": [ 262 | "r" 263 | ], 264 | "execution_count": 0, 265 | "outputs": [] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "metadata": { 270 | "id": "RhHdflraqzah", 271 | "colab_type": "code", 272 | "colab": {} 273 | }, 274 | "source": [ 275 | "# plt.plot(ar1_data)\n", 276 | "plt.plot(ar1_data)\n", 277 | "plt.plot(r)\n", 278 | "plt.show()" 279 | ], 280 | "execution_count": 0, 281 | "outputs": [] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": { 286 | "id": "RcVJZ2vYqzak", 287 | "colab_type": "text" 288 | }, 289 | "source": [ 290 | "## MA Models\n", 291 | "\n", 292 | "X(T) = MEAN + E + theta1 * E(t-1)" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "metadata": { 298 | "id": "LOKNnpzMqzal", 299 | "colab_type": "code", 300 | "colab": {} 301 | }, 302 | "source": [ 303 | "ma1 = ArmaProcess(ma=(1, 0.9))" 304 | ], 305 | "execution_count": 0, 306 | "outputs": [] 307 | }, 308 | { 309 | "cell_type": "code", 310 | "metadata": { 311 | "id": "GA-PrLeXqzap", 312 | "colab_type": "code", 313 | "colab": {} 314 | }, 315 | "source": [ 316 | "ma1_data = ma1.generate_sample(nsample=1000)" 317 | ], 318 | "execution_count": 0, 319 | "outputs": [] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "metadata": { 324 | "id": "QJpNiw8Qqzaw", 325 | "colab_type": "code", 326 | "colab": {} 327 | }, 328 | "source": [ 329 | "ma1_model = ARMA(ma1_data, order=(0,1))" 330 | ], 331 | "execution_count": 0, 332 | "outputs": [] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "metadata": { 337 | "id": "Wdpt6CWwqza4", 338 | "colab_type": "code", 339 | "colab": {} 340 | }, 341 | "source": [ 342 | "res = ma1_model.fit()" 343 | ], 344 | "execution_count": 0, 345 | "outputs": [] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "metadata": { 350 | "id": "KkLnMajMqza8", 351 | "colab_type": "code", 352 | "colab": {} 353 | }, 354 | "source": [ 355 | "res.params" 356 | ], 357 | "execution_count": 0, 358 | "outputs": [] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "metadata": { 363 | "id": "MX8JlQedqzbL", 364 | "colab_type": "code", 365 | "colab": {} 366 | }, 367 | "source": [ 368 | "res.summary()" 369 | ], 370 | "execution_count": 0, 371 | "outputs": [] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "metadata": { 376 | "id": "J_aKi_JrqzbO", 377 | "colab_type": "code", 378 | "colab": {} 379 | }, 380 | "source": [ 381 | "plt.plot(ma1_data)\n", 382 | "plt.plot(res.fittedvalues)" 383 | ], 384 | "execution_count": 0, 385 | "outputs": [] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "metadata": { 390 | "id": "xsFU8a1tqzbQ", 391 | "colab_type": "text" 392 | }, 393 | "source": [ 394 | "## ARMA Models" 395 | ] 396 | }, 397 | { 398 | "cell_type": "code", 399 | "metadata": { 400 | "id": "_KSWEHngqzbQ", 401 | "colab_type": "code", 402 | "colab": {} 403 | }, 404 | "source": [ 405 | "alphas = np.array([0.5, -0.25])\n", 406 | "betas = np.array([0.5, -0.3])\n", 407 | "ar = np.r_[1, -alphas]\n", 408 | "ma = np.r_[1, betas]\n", 409 | "ar_ma = ArmaProcess(ar=ar, ma=ma)" 410 | ], 411 | "execution_count": 0, 412 | "outputs": [] 413 | }, 414 | { 415 | "cell_type": "code", 416 | "metadata": { 417 | "id": "1i7BMfo3qzbS", 418 | "colab_type": "code", 419 | "colab": {} 420 | }, 421 | "source": [ 422 | "ar_ma_data = ar_ma.generate_sample(nsample=5000)" 423 | ], 424 | "execution_count": 0, 425 | "outputs": [] 426 | }, 427 | { 428 | "cell_type": "code", 429 | "metadata": { 430 | "id": "elIZGH8PqzbU", 431 | "colab_type": "code", 432 | "colab": {} 433 | }, 434 | "source": [ 435 | "arma_model = ARMA(ar_ma_data, order=(2,2))" 436 | ], 437 | "execution_count": 0, 438 | "outputs": [] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "metadata": { 443 | "id": "M3dTzh7IqzbY", 444 | "colab_type": "code", 445 | "colab": {} 446 | }, 447 | "source": [ 448 | "res = arma_model.fit()" 449 | ], 450 | "execution_count": 0, 451 | "outputs": [] 452 | }, 453 | { 454 | "cell_type": "code", 455 | "metadata": { 456 | "id": "OdHbkzEWqzbb", 457 | "colab_type": "code", 458 | "colab": {} 459 | }, 460 | "source": [ 461 | "res.params" 462 | ], 463 | "execution_count": 0, 464 | "outputs": [] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "metadata": { 469 | "id": "CMDTWqG-qzbd", 470 | "colab_type": "code", 471 | "colab": {} 472 | }, 473 | "source": [ 474 | "res.summary()" 475 | ], 476 | "execution_count": 0, 477 | "outputs": [] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "metadata": { 482 | "id": "r9Miba8Fqzbh", 483 | "colab_type": "code", 484 | "colab": {} 485 | }, 486 | "source": [ 487 | "plt.plot(ar_ma_data)\n", 488 | "plt.plot(res.fittedvalues)" 489 | ], 490 | "execution_count": 0, 491 | "outputs": [] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "metadata": { 496 | "id": "gE_bOeCzqzbk", 497 | "colab_type": "code", 498 | "colab": {} 499 | }, 500 | "source": [ 501 | "res.k_ar" 502 | ], 503 | "execution_count": 0, 504 | "outputs": [] 505 | }, 506 | { 507 | "cell_type": "code", 508 | "metadata": { 509 | "id": "Z0n9nmRuqzbm", 510 | "colab_type": "code", 511 | "colab": {} 512 | }, 513 | "source": [ 514 | "res.k_ma" 515 | ], 516 | "execution_count": 0, 517 | "outputs": [] 518 | }, 519 | { 520 | "cell_type": "code", 521 | "metadata": { 522 | "id": "vNw4xFyJqzbp", 523 | "colab_type": "code", 524 | "colab": {} 525 | }, 526 | "source": [ 527 | "res.k_trend" 528 | ], 529 | "execution_count": 0, 530 | "outputs": [] 531 | }, 532 | { 533 | "cell_type": "code", 534 | "metadata": { 535 | "id": "-RxRZOdZqzbr", 536 | "colab_type": "code", 537 | "colab": {} 538 | }, 539 | "source": [ 540 | "" 541 | ], 542 | "execution_count": 0, 543 | "outputs": [] 544 | } 545 | ] 546 | } -------------------------------------------------------------------------------- /Statistical models/04. ARIMA Models.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.7.1" 21 | }, 22 | "colab": { 23 | "name": "04. ARIMA Models.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "OrSdVdoeGhq0", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "## Imports" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "metadata": { 42 | "id": "2bJT4wIsGhq4", 43 | "colab_type": "code", 44 | "colab": {} 45 | }, 46 | "source": [ 47 | "import pandas as pd\n", 48 | "import numpy as np\n", 49 | "import matplotlib.pyplot as plt\n", 50 | "%matplotlib inline\n", 51 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n", 52 | "import warnings\n", 53 | "warnings.filterwarnings(\"ignore\")\n", 54 | "colab_path = \"https://github.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/blob/master/\"" 55 | ], 56 | "execution_count": 0, 57 | "outputs": [] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": { 62 | "id": "2JBjgaLuGhq-", 63 | "colab_type": "text" 64 | }, 65 | "source": [ 66 | "## ARIMA Models" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "metadata": { 72 | "id": "ib_UpU6TGhq_", 73 | "colab_type": "code", 74 | "colab": {} 75 | }, 76 | "source": [ 77 | "from statsmodels.tsa.arima_model import ARIMA" 78 | ], 79 | "execution_count": 0, 80 | "outputs": [] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "metadata": { 85 | "id": "WvLLd7NAGhrD", 86 | "colab_type": "code", 87 | "colab": {} 88 | }, 89 | "source": [ 90 | "ARIMA?" 91 | ], 92 | "execution_count": 0, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "metadata": { 98 | "id": "lXSy84QIGhrI", 99 | "colab_type": "code", 100 | "colab": {} 101 | }, 102 | "source": [ 103 | "df = pd.read_csv(colab_path+'data/sales.csv', parse_dates=True, index_col=0)" 104 | ], 105 | "execution_count": 0, 106 | "outputs": [] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "metadata": { 111 | "id": "zk_mUUKoGhrZ", 112 | "colab_type": "code", 113 | "colab": {} 114 | }, 115 | "source": [ 116 | "from sklearn.metrics import mean_squared_error\n", 117 | "from math import sqrt\n", 118 | "\n", 119 | "def evaluate_modelperf(data, arima_order):\n", 120 | " train_size = int(len(data) * 0.66)\n", 121 | " train, test = data[:train_size], data[train_size:]\n", 122 | " history = [x for x in train]\n", 123 | " \n", 124 | "\n", 125 | " predictions = list()\n", 126 | " for t in range(len(test)):\n", 127 | " model = ARIMA(history, order=arima_order)\n", 128 | " try:\n", 129 | " res = model.fit(disp=0)\n", 130 | " \n", 131 | " pred = res.forecast()[0]\n", 132 | "\n", 133 | " predictions.append(pred)\n", 134 | " history.append(test[t])\n", 135 | " except:\n", 136 | " return None\n", 137 | "\n", 138 | " try:\n", 139 | " rmse = sqrt(mean_squared_error(test, predictions))\n", 140 | " return rmse\n", 141 | " except:\n", 142 | " print('Error encountered in RMSE calc')\n", 143 | " return None" 144 | ], 145 | "execution_count": 0, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "_q_6L1HwGhrc", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "data= df['Sales'].values\n", 157 | "data = data.astype('float32')" 158 | ], 159 | "execution_count": 0, 160 | "outputs": [] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "metadata": { 165 | "id": "jFX2bH9CGhre", 166 | "colab_type": "code", 167 | "colab": {} 168 | }, 169 | "source": [ 170 | "evaluate_modelperf(data, (1, 1, 0))" 171 | ], 172 | "execution_count": 0, 173 | "outputs": [] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "metadata": { 178 | "id": "t8RxhoMGGhrg", 179 | "colab_type": "code", 180 | "colab": {} 181 | }, 182 | "source": [ 183 | "p_values = [0, 1, 2, 4, 6, 8, 10]\n", 184 | "d_values = [0, 1, 2]\n", 185 | "q_values = [0, 1, 2]" 186 | ], 187 | "execution_count": 0, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "metadata": { 193 | "id": "ADqyS9muGhrj", 194 | "colab_type": "code", 195 | "colab": {} 196 | }, 197 | "source": [ 198 | "import itertools\n", 199 | "combinations = list(itertools.product(*[p_values, d_values, q_values]))" 200 | ], 201 | "execution_count": 0, 202 | "outputs": [] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "metadata": { 207 | "id": "9IloTFy5Ghrn", 208 | "colab_type": "code", 209 | "colab": {} 210 | }, 211 | "source": [ 212 | "best, low_rmse = None, None\n", 213 | "for order in combinations:\n", 214 | " rmse = evaluate_modelperf(data, order)\n", 215 | " if rmse is not None:\n", 216 | " print(f'RMSE for order: {order} = {rmse}')\n", 217 | " best, low_rmse = (order, rmse) if low_rmse is None or rmse < low_rmse else (best, low_rmse)\n", 218 | " else:\n", 219 | " print(f'Error encountered for order:{order}')\n", 220 | "\n", 221 | "print(f'Best Order: {best}. Low RMSE: {low_rmse}')\n" 222 | ], 223 | "execution_count": 0, 224 | "outputs": [] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": { 229 | "id": "qxkx3CInGhrp", 230 | "colab_type": "text" 231 | }, 232 | "source": [ 233 | "## Exercise\n", 234 | "\n", 235 | " - Try the same with the 'data/daily_female_births.csv' file" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "metadata": { 241 | "id": "Tma9T-sYGhrq", 242 | "colab_type": "code", 243 | "colab": {} 244 | }, 245 | "source": [ 246 | "df = pd.read_csv(colab_path+'data/daily_female_births.csv', parse_dates=True, index_col=0)" 247 | ], 248 | "execution_count": 0, 249 | "outputs": [] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "metadata": { 254 | "id": "1nayFHOGGhru", 255 | "colab_type": "code", 256 | "colab": {} 257 | }, 258 | "source": [ 259 | "data= df['Births'].values\n", 260 | "data = data.astype('float32')" 261 | ], 262 | "execution_count": 0, 263 | "outputs": [] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "metadata": { 268 | "id": "yqrLbDZBGhrv", 269 | "colab_type": "code", 270 | "colab": {} 271 | }, 272 | "source": [ 273 | "" 274 | ], 275 | "execution_count": 0, 276 | "outputs": [] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "metadata": { 281 | "id": "SvbXHwxyGhry", 282 | "colab_type": "code", 283 | "colab": {} 284 | }, 285 | "source": [ 286 | "" 287 | ], 288 | "execution_count": 0, 289 | "outputs": [] 290 | } 291 | ] 292 | } -------------------------------------------------------------------------------- /Time Series Boosting/Trees_and_Boosting_with_TS.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "Trees_and_Boosting_with_TS.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "markdown", 31 | "metadata": { 32 | "id": "Gl0KbU7NJbSz", 33 | "colab_type": "text" 34 | }, 35 | "source": [ 36 | "## Trees, Ensembles and XGBoost" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": { 42 | "id": "PtXnrfkUJbS1", 43 | "colab_type": "text" 44 | }, 45 | "source": [ 46 | " -- Introduction\n", 47 | " -- How they work?\n", 48 | " -- What are Trees useful for?" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "metadata": { 54 | "id": "52frCEIbJbS5", 55 | "colab_type": "text" 56 | }, 57 | "source": [ 58 | "## Imports and initialization" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "metadata": { 64 | "id": "HYWxk1z_JbS6", 65 | "colab_type": "code", 66 | "colab": {} 67 | }, 68 | "source": [ 69 | "import pandas as pd\n", 70 | "import numpy as np\n", 71 | "import matplotlib.pyplot as plt\n", 72 | "%matplotlib inline\n", 73 | "import matplotlib\n", 74 | "matplotlib.rcParams['figure.figsize'] = [20, 10]\n", 75 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"" 76 | ], 77 | "execution_count": 0, 78 | "outputs": [] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": { 83 | "id": "yKY7uggAJbS9", 84 | "colab_type": "text" 85 | }, 86 | "source": [ 87 | "## Data Preparation" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "metadata": { 93 | "id": "qY1vDqu8JbS-", 94 | "colab_type": "code", 95 | "colab": {} 96 | }, 97 | "source": [ 98 | "data = pd.read_csv(colab_path+'data/gdp_uk.csv')" 99 | ], 100 | "execution_count": 0, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "metadata": { 106 | "id": "iyvHmCz2JbTA", 107 | "colab_type": "code", 108 | "colab": {} 109 | }, 110 | "source": [ 111 | "data[['year', 'value']].plot(x='year', y='value')" 112 | ], 113 | "execution_count": 0, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "rmxVpvTzJbTE", 120 | "colab_type": "code", 121 | "colab": {} 122 | }, 123 | "source": [ 124 | "data['gdp_growth'] = np.log(data.value / data.value.shift(1))\n", 125 | "data['is_inc'] = np.where(data.value / data.value.shift(1) > 1, 1, 0)" 126 | ], 127 | "execution_count": 0, 128 | "outputs": [] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "metadata": { 133 | "id": "ZpqqSug0JbTG", 134 | "colab_type": "code", 135 | "colab": {} 136 | }, 137 | "source": [ 138 | "data.head(10)" 139 | ], 140 | "execution_count": 0, 141 | "outputs": [] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "metadata": { 146 | "id": "UGpkZ15OJbTJ", 147 | "colab_type": "code", 148 | "colab": {} 149 | }, 150 | "source": [ 151 | "for lag in range(1, 6):\n", 152 | " data[f'gdp_growth_lag_{lag}'] = data['gdp_growth'].shift(lag)" 153 | ], 154 | "execution_count": 0, 155 | "outputs": [] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "metadata": { 160 | "id": "EzaKWbB7JbTL", 161 | "colab_type": "code", 162 | "colab": {} 163 | }, 164 | "source": [ 165 | "data.dropna(inplace=True)" 166 | ], 167 | "execution_count": 0, 168 | "outputs": [] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "metadata": { 173 | "id": "UgkdeJXeJbTN", 174 | "colab_type": "code", 175 | "colab": {} 176 | }, 177 | "source": [ 178 | "df = data[['year', \n", 179 | " 'gdp_growth_lag_1', \n", 180 | " 'gdp_growth_lag_2',\n", 181 | " 'gdp_growth_lag_3',\n", 182 | " 'gdp_growth_lag_4',\n", 183 | " 'gdp_growth_lag_5',\n", 184 | " 'gdp_growth',\n", 185 | " 'is_inc']].copy()" 186 | ], 187 | "execution_count": 0, 188 | "outputs": [] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "metadata": { 193 | "id": "EJjSBUWcJbTP", 194 | "colab_type": "code", 195 | "colab": {} 196 | }, 197 | "source": [ 198 | "df.head(10)" 199 | ], 200 | "execution_count": 0, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "metadata": { 206 | "id": "5L6kSPzGJbTS", 207 | "colab_type": "code", 208 | "colab": {} 209 | }, 210 | "source": [ 211 | "features_columns = ['gdp_growth_lag_1', 'gdp_growth_lag_2','gdp_growth_lag_3', 'gdp_growth_lag_4', 'gdp_growth_lag_5']\n", 212 | "target = 'is_inc'" 213 | ], 214 | "execution_count": 0, 215 | "outputs": [] 216 | }, 217 | { 218 | "cell_type": "markdown", 219 | "metadata": { 220 | "id": "QIRCUqNGJbTU", 221 | "colab_type": "text" 222 | }, 223 | "source": [ 224 | "## Trees & XGBoost" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "metadata": { 230 | "id": "Kdu8IWN4JbTV", 231 | "colab_type": "code", 232 | "colab": {} 233 | }, 234 | "source": [ 235 | "import xgboost as xgb\n", 236 | "from sklearn.ensemble import RandomForestClassifier\n", 237 | "from sklearn.tree import DecisionTreeClassifier" 238 | ], 239 | "execution_count": 0, 240 | "outputs": [] 241 | }, 242 | { 243 | "cell_type": "code", 244 | "metadata": { 245 | "id": "MDNBPa5sJbTX", 246 | "colab_type": "code", 247 | "colab": {} 248 | }, 249 | "source": [ 250 | "model = xgb.XGBClassifier(max_depth=5)\n", 251 | "# model = RandomForestClassifier(n_estimators=20, max_depth=5)\n", 252 | "# model = DecisionTreeClassifier(max_depth=5)" 253 | ], 254 | "execution_count": 0, 255 | "outputs": [] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "metadata": { 260 | "id": "ceal6ax7JbTa", 261 | "colab_type": "text" 262 | }, 263 | "source": [ 264 | "## Train" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "metadata": { 270 | "id": "KvvYmyHfJbTb", 271 | "colab_type": "code", 272 | "colab": {} 273 | }, 274 | "source": [ 275 | "train_df = df[df.year < 1990].copy()\n", 276 | "test_df = df[df.year >= 1990].copy()" 277 | ], 278 | "execution_count": 0, 279 | "outputs": [] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "metadata": { 284 | "id": "rHgid13QJbTc", 285 | "colab_type": "code", 286 | "colab": {} 287 | }, 288 | "source": [ 289 | "model.fit(train_df[features_columns], train_df[target])" 290 | ], 291 | "execution_count": 0, 292 | "outputs": [] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "metadata": { 297 | "id": "DYq8TciHJbTe", 298 | "colab_type": "code", 299 | "colab": {} 300 | }, 301 | "source": [ 302 | "model.feature_importances_" 303 | ], 304 | "execution_count": 0, 305 | "outputs": [] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": { 310 | "id": "6rwnESI9JbTg", 311 | "colab_type": "text" 312 | }, 313 | "source": [ 314 | "## Test" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "metadata": { 320 | "id": "y1SkxdwCJbTh", 321 | "colab_type": "code", 322 | "colab": {} 323 | }, 324 | "source": [ 325 | "df['is_inc_pred'] = model.predict(df[features_columns])\n", 326 | "test_df['is_inc_pred'] = model.predict(test_df[features_columns])" 327 | ], 328 | "execution_count": 0, 329 | "outputs": [] 330 | }, 331 | { 332 | "cell_type": "code", 333 | "metadata": { 334 | "id": "2jhLUFYaJbTj", 335 | "colab_type": "code", 336 | "colab": {} 337 | }, 338 | "source": [ 339 | "from sklearn.metrics import accuracy_score" 340 | ], 341 | "execution_count": 0, 342 | "outputs": [] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "metadata": { 347 | "id": "0pKidA2MJbTm", 348 | "colab_type": "code", 349 | "colab": {} 350 | }, 351 | "source": [ 352 | "accuracy_score(test_df['is_inc'], test_df['is_inc_pred'])" 353 | ], 354 | "execution_count": 0, 355 | "outputs": [] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "metadata": { 360 | "id": "vgIbauk3JbTq", 361 | "colab_type": "code", 362 | "colab": {} 363 | }, 364 | "source": [ 365 | "accuracy_score(df['is_inc'], df['is_inc_pred'])" 366 | ], 367 | "execution_count": 0, 368 | "outputs": [] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": { 373 | "id": "7kdN7fLyJbTt", 374 | "colab_type": "text" 375 | }, 376 | "source": [ 377 | "## Regressor" 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "metadata": { 383 | "id": "hgFDvCzSJbTv", 384 | "colab_type": "code", 385 | "colab": {} 386 | }, 387 | "source": [ 388 | "model = xgb.XGBRegressor()" 389 | ], 390 | "execution_count": 0, 391 | "outputs": [] 392 | }, 393 | { 394 | "cell_type": "code", 395 | "metadata": { 396 | "id": "3U4lpd7PJbTx", 397 | "colab_type": "code", 398 | "colab": {} 399 | }, 400 | "source": [ 401 | "dir(model)" 402 | ], 403 | "execution_count": 0, 404 | "outputs": [] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "metadata": { 409 | "id": "RRalGBkHJbTz", 410 | "colab_type": "code", 411 | "colab": {} 412 | }, 413 | "source": [ 414 | "model.fit(train_df[features_columns], train_df['gdp_growth']) " 415 | ], 416 | "execution_count": 0, 417 | "outputs": [] 418 | }, 419 | { 420 | "cell_type": "code", 421 | "metadata": { 422 | "id": "7b4JvBodJbT1", 423 | "colab_type": "code", 424 | "colab": {} 425 | }, 426 | "source": [ 427 | "df['gdp_growth_pred'] = model.predict(df[features_columns])\n", 428 | "test_df['gdp_growth_pred'] = model.predict(test_df[features_columns])" 429 | ], 430 | "execution_count": 0, 431 | "outputs": [] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "metadata": { 436 | "id": "ORBNP_7iJbT5", 437 | "colab_type": "code", 438 | "colab": {} 439 | }, 440 | "source": [ 441 | "df[['year', 'gdp_growth', 'gdp_growth_pred']].plot(x='year', y=['gdp_growth_pred', 'gdp_growth'])" 442 | ], 443 | "execution_count": 0, 444 | "outputs": [] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "metadata": { 449 | "id": "mYLfxZpzJbT6", 450 | "colab_type": "code", 451 | "colab": {} 452 | }, 453 | "source": [ 454 | "test_df[['year', 'gdp_growth', 'gdp_growth_pred']].plot(x='year', y=['gdp_growth_pred', 'gdp_growth'])" 455 | ], 456 | "execution_count": 0, 457 | "outputs": [] 458 | }, 459 | { 460 | "cell_type": "markdown", 461 | "metadata": { 462 | "id": "TkGBbm4WJbT8", 463 | "colab_type": "text" 464 | }, 465 | "source": [ 466 | "## Gotchas with Trees" 467 | ] 468 | }, 469 | { 470 | "cell_type": "markdown", 471 | "metadata": { 472 | "id": "kMALpLcwJbT9", 473 | "colab_type": "text" 474 | }, 475 | "source": [ 476 | " -- Do not capture linear relationships\n", 477 | " -- Time series is not inherent. So need to input Time series flavour forcefully\n", 478 | " -- Work wonderfully for structured data\n", 479 | " -- One hot encoding is mandatory (ordinality is assumed)" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "metadata": { 485 | "id": "9ZkeHuZIJbT9", 486 | "colab_type": "code", 487 | "colab": {} 488 | }, 489 | "source": [ 490 | "" 491 | ], 492 | "execution_count": 0, 493 | "outputs": [] 494 | } 495 | ] 496 | } -------------------------------------------------------------------------------- /Time Series with Deep Learning/01.Time Series Forecasting with MLP.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "01.Time Series Forecasting with MLP.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "hpQDh6LuJdMF", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import keras\n", 38 | "import pandas as pd\n", 39 | "from matplotlib import pyplot as plt\n", 40 | "from sklearn.model_selection import TimeSeriesSplit\n", 41 | "from sklearn.preprocessing import MinMaxScaler\n", 42 | "from keras.models import Sequential\n", 43 | "from keras.layers import Dense,Dropout\n", 44 | "from keras.optimizers import SGD\n", 45 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"" 46 | ], 47 | "execution_count": 0, 48 | "outputs": [] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "metadata": { 53 | "id": "QNoFd5cNJdMK", 54 | "colab_type": "code", 55 | "colab": {} 56 | }, 57 | "source": [ 58 | "rainfall_data_monthly = pd.read_csv(colab_path+\"data/All_India_Area_Weighted_Monthly_Rainfall.csv\")" 59 | ], 60 | "execution_count": 0, 61 | "outputs": [] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "metadata": { 66 | "id": "FzcMFuBnJdMO", 67 | "colab_type": "code", 68 | "colab": {} 69 | }, 70 | "source": [ 71 | "rainfall_data_monthly.head()" 72 | ], 73 | "execution_count": 0, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "metadata": { 79 | "id": "AXqu0JUZJdMT", 80 | "colab_type": "code", 81 | "colab": {} 82 | }, 83 | "source": [ 84 | "rainfall_data_monthly['Value'].plot()" 85 | ], 86 | "execution_count": 0, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "id": "d3EC8jq7JdMW", 93 | "colab_type": "code", 94 | "colab": {} 95 | }, 96 | "source": [ 97 | "rainfall_data_monthly['Time'] = pd.to_datetime(rainfall_data_monthly['Time'])" 98 | ], 99 | "execution_count": 0, 100 | "outputs": [] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "metadata": { 105 | "id": "WjiDUjTzJdMZ", 106 | "colab_type": "code", 107 | "colab": {} 108 | }, 109 | "source": [ 110 | "rainfall_data_monthly = rainfall_data_monthly.set_index('Time')" 111 | ], 112 | "execution_count": 0, 113 | "outputs": [] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "metadata": { 118 | "id": "LQak2yB4JdMc", 119 | "colab_type": "code", 120 | "colab": {} 121 | }, 122 | "source": [ 123 | "rainfall_data_monthly['Value'].resample('6M').mean().plot()" 124 | ], 125 | "execution_count": 0, 126 | "outputs": [] 127 | }, 128 | { 129 | "cell_type": "code", 130 | "metadata": { 131 | "id": "mNf_nBs2JdMf", 132 | "colab_type": "code", 133 | "colab": {} 134 | }, 135 | "source": [ 136 | "rainfall_data_monthly = rainfall_data_monthly.reset_index()" 137 | ], 138 | "execution_count": 0, 139 | "outputs": [] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "metadata": { 144 | "id": "XxcA5YRoJdMh", 145 | "colab_type": "code", 146 | "colab": {} 147 | }, 148 | "source": [ 149 | "rainfall_data_monthly['Value_s_1']=rainfall_data_monthly['Value'].shift(1)\n", 150 | "rainfall_data_monthly['Value_s_2']= rainfall_data_monthly['Value'].shift(2)\n", 151 | "rainfall_data_monthly['Value_d_1']= rainfall_data_monthly['Value'].diff(1)" 152 | ], 153 | "execution_count": 0, 154 | "outputs": [] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "metadata": { 159 | "id": "-37eKSFPJdMk", 160 | "colab_type": "code", 161 | "colab": {} 162 | }, 163 | "source": [ 164 | "rainfall_data_monthly[['Value','Value_s_1','Value_s_2','Value_d_1']].loc[3]" 165 | ], 166 | "execution_count": 0, 167 | "outputs": [] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "metadata": { 172 | "id": "xSRpciAKJdMn", 173 | "colab_type": "code", 174 | "colab": {} 175 | }, 176 | "source": [ 177 | "# lets split the data into train and test\n", 178 | "# train ==> 4 splits , test ==> 1 split\n", 179 | "\n", 180 | "tscv = TimeSeriesSplit(n_splits=5)\n", 181 | "\n", 182 | "for train_index, test_index in tscv.split(rainfall_data_monthly.Time):\n", 183 | " y_train,y_test = rainfall_data_monthly['Value'][train_index],rainfall_data_monthly['Value'][test_index]\n", 184 | " x_train,x_test = rainfall_data_monthly[['Value_s_1','Value_s_2','Value_d_1']].loc[train_index],rainfall_data_monthly[['Value_s_1','Value_s_2','Value_d_1']].loc[test_index]" 185 | ], 186 | "execution_count": 0, 187 | "outputs": [] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "metadata": { 192 | "id": "MRcK9G0JJdMp", 193 | "colab_type": "code", 194 | "colab": {} 195 | }, 196 | "source": [ 197 | "x_train = x_train[3:]\n", 198 | "y_train = y_train[3:]" 199 | ], 200 | "execution_count": 0, 201 | "outputs": [] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "metadata": { 206 | "id": "l_MlA6J4JdMs", 207 | "colab_type": "code", 208 | "colab": {} 209 | }, 210 | "source": [ 211 | "" 212 | ], 213 | "execution_count": 0, 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "metadata": { 219 | "id": "loW6c7hVJdMy", 220 | "colab_type": "code", 221 | "colab": {} 222 | }, 223 | "source": [ 224 | "#x_train = x_train.values.reshape(-1,1)\n", 225 | "#x_test = x_test.values.reshape(-1,1)" 226 | ], 227 | "execution_count": 0, 228 | "outputs": [] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "metadata": { 233 | "id": "6wAi2RbTJdM0", 234 | "colab_type": "code", 235 | "colab": {} 236 | }, 237 | "source": [ 238 | "#y_train" 239 | ], 240 | "execution_count": 0, 241 | "outputs": [] 242 | }, 243 | { 244 | "cell_type": "code", 245 | "metadata": { 246 | "id": "oIsu3jPYJdM3", 247 | "colab_type": "code", 248 | "colab": {} 249 | }, 250 | "source": [ 251 | "# Scaling Y\n", 252 | "\n", 253 | "scaler = MinMaxScaler(feature_range=(0,1))\n", 254 | "\n", 255 | "x_train = scaler.fit_transform(x_train)\n", 256 | "x_test = scaler.fit_transform(x_test)\n" 257 | ], 258 | "execution_count": 0, 259 | "outputs": [] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "metadata": { 264 | "id": "Kf6YLoeSJdM6", 265 | "colab_type": "code", 266 | "colab": {} 267 | }, 268 | "source": [ 269 | "y_train = y_train.values.reshape(-1,1)\n", 270 | "y_test = y_test.values.reshape(-1,1)\n", 271 | "y_train = scaler.fit_transform(y_train)\n", 272 | "y_test = scaler.fit_transform(y_test)\n" 273 | ], 274 | "execution_count": 0, 275 | "outputs": [] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "metadata": { 280 | "id": "9ycJ5KAoJdM8", 281 | "colab_type": "code", 282 | "colab": {} 283 | }, 284 | "source": [ 285 | "mlp_model = Sequential()\n", 286 | "mlp_model.add(Dense(100, activation='sigmoid', input_dim=x_train.shape[1]))\n", 287 | "mlp_model.add(Dense(100,activation='sigmoid'))\n", 288 | "mlp_model.add(Dropout(0.2))\n", 289 | "mlp_model.add(Dense(1,activation='sigmoid'))\n", 290 | "opt = SGD(lr=0.001)\n", 291 | "mlp_model.compile(loss='mean_squared_error', optimizer='adam')" 292 | ], 293 | "execution_count": 0, 294 | "outputs": [] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "metadata": { 299 | "id": "wHWJgeHjJdM-", 300 | "colab_type": "code", 301 | "colab": {} 302 | }, 303 | "source": [ 304 | "mlp_model.fit(x_train,y_train,nb_epoch=50, batch_size=50, validation_split=0.2)" 305 | ], 306 | "execution_count": 0, 307 | "outputs": [] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "metadata": { 312 | "id": "wnuRmmEjJdNA", 313 | "colab_type": "code", 314 | "colab": {} 315 | }, 316 | "source": [ 317 | "preds = mlp_model.predict(x_test)" 318 | ], 319 | "execution_count": 0, 320 | "outputs": [] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "metadata": { 325 | "id": "_2XVYZXyJdND", 326 | "colab_type": "code", 327 | "colab": {} 328 | }, 329 | "source": [ 330 | "plt.figure(figsize=(10, 5.5))\n", 331 | "plt.plot(preds,linestyle='-', marker='*',color='b')\n", 332 | "plt.plot(y_test,linestyle='-', marker='.',color='r')\n", 333 | "plt.legend(['Predicted','Actual'], loc=2)\n", 334 | "plt.title('Actual vs Predicted Rainfall')\n", 335 | "plt.ylabel('rainfall in mm')\n", 336 | "plt.xlabel('Index')\n", 337 | "plt.savefig('rain_fall_mlp', format='png', dpi=300)" 338 | ], 339 | "execution_count": 0, 340 | "outputs": [] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "metadata": { 345 | "id": "fMxloxHcJdNH", 346 | "colab_type": "code", 347 | "colab": {} 348 | }, 349 | "source": [ 350 | "from sklearn.metrics import r2_score\n", 351 | "\n", 352 | "r2_score(y_test,preds)" 353 | ], 354 | "execution_count": 0, 355 | "outputs": [] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "metadata": { 360 | "id": "WF9ZHRTjJdNJ", 361 | "colab_type": "code", 362 | "colab": {} 363 | }, 364 | "source": [ 365 | "" 366 | ], 367 | "execution_count": 0, 368 | "outputs": [] 369 | } 370 | ] 371 | } -------------------------------------------------------------------------------- /Time Series with Deep Learning/02.Time Series Forecasting with LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "02.Time Series Forecasting with LSTM.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "Dggq19RmJeta", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import pandas as pd\n", 38 | "import numpy as np\n", 39 | "from math import sqrt\n", 40 | "from sklearn.metrics import r2_score\n", 41 | "import matplotlib.pyplot as plt\n", 42 | "from keras.models import Sequential\n", 43 | "from keras.layers import Dense, Dropout,LSTM\n", 44 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"" 45 | ], 46 | "execution_count": 0, 47 | "outputs": [] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": { 52 | "id": "ZIMSGUXLJetg", 53 | "colab_type": "text" 54 | }, 55 | "source": [ 56 | "# Univariate Forecasting" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "metadata": { 62 | "id": "mOA7YgKlJeti", 63 | "colab_type": "code", 64 | "colab": {} 65 | }, 66 | "source": [ 67 | "rainfall_data = pd.read_csv(colab_path+\"data/All_India_Area_Weighted_Monthly_Rainfall.csv\")" 68 | ], 69 | "execution_count": 0, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "metadata": { 75 | "id": "QAO3OiHTJetl", 76 | "colab_type": "code", 77 | "colab": {} 78 | }, 79 | "source": [ 80 | "\n", 81 | "def makeXy(ts, nb_timesteps):\n", 82 | " \"\"\"\n", 83 | " Input: \n", 84 | " ts: original time series\n", 85 | " nb_timesteps: number of time steps in the regressors\n", 86 | " Output: \n", 87 | " X: 2-D array of regressors\n", 88 | " y: 1-D array of target \n", 89 | " \"\"\"\n", 90 | " X = []\n", 91 | " y = []\n", 92 | " for i in range(nb_timesteps, ts.shape[0]):\n", 93 | " X.append(list(ts.loc[i-nb_timesteps:i-1]))\n", 94 | " y.append(ts.loc[i])\n", 95 | " X, y = np.array(X), np.array(y)\n", 96 | " return X, y" 97 | ], 98 | "execution_count": 0, 99 | "outputs": [] 100 | }, 101 | { 102 | "cell_type": "code", 103 | "metadata": { 104 | "id": "T_Bt5ePYJeto", 105 | "colab_type": "code", 106 | "colab": {} 107 | }, 108 | "source": [ 109 | "from sklearn.preprocessing import MinMaxScaler\n", 110 | "scaler = MinMaxScaler(feature_range=(0, 1))\n", 111 | "rainfall_data['Value'] = scaler.fit_transform(np.array(rainfall_data['Value']).reshape(-1, 1))" 112 | ], 113 | "execution_count": 0, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "qpOmUCoTJetr", 120 | "colab_type": "code", 121 | "colab": {} 122 | }, 123 | "source": [ 124 | "X,y = makeXy(rainfall_data['Value'],10)\n", 125 | "\n", 126 | "X_train = X[:1100]\n", 127 | "X_test = X[1100:]\n", 128 | "y_train = y[:1100]\n", 129 | "y_test = y[1100:]" 130 | ], 131 | "execution_count": 0, 132 | "outputs": [] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "metadata": { 137 | "id": "7VJTBIMaJetv", 138 | "colab_type": "code", 139 | "colab": {} 140 | }, 141 | "source": [ 142 | "X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))\n", 143 | "X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))" 144 | ], 145 | "execution_count": 0, 146 | "outputs": [] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "metadata": { 151 | "id": "DBoPapo5Jety", 152 | "colab_type": "code", 153 | "colab": {} 154 | }, 155 | "source": [ 156 | "lstm_model = Sequential()\n", 157 | "lstm_model.add(LSTM(50, input_shape = (X_train.shape[1],1), return_sequences=True, activation = 'relu'))\n", 158 | "lstm_model.add(Dropout(0.1))\n", 159 | "lstm_model.add(LSTM(50,activation='relu'))\n", 160 | "lstm_model.add(Dropout(0.1))\n", 161 | "lstm_model.add(Dense(1))\n", 162 | "lstm_model.compile(loss = 'mean_squared_error',\n", 163 | " optimizer = 'adam',\n", 164 | " metrics = ['mse'])\n", 165 | "lstm_model.fit(X_train, y_train, \n", 166 | " nb_epoch=50, batch_size=50,validation_split=0.2)" 167 | ], 168 | "execution_count": 0, 169 | "outputs": [] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "metadata": { 174 | "id": "WEbaLK_1JeuB", 175 | "colab_type": "code", 176 | "colab": {} 177 | }, 178 | "source": [ 179 | "preds = lstm_model.predict(X_test)" 180 | ], 181 | "execution_count": 0, 182 | "outputs": [] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "metadata": { 187 | "id": "d5DcJCMEJeuG", 188 | "colab_type": "code", 189 | "colab": {} 190 | }, 191 | "source": [ 192 | "def plot_predictions(preds,y_test):\n", 193 | " plt.figure(figsize=(10, 5.5))\n", 194 | " plt.plot(preds,linestyle='-',color='b')\n", 195 | " plt.plot(y_test,linestyle='-',color='r')\n", 196 | " plt.legend(['Predicted','Actual'], loc=2)" 197 | ], 198 | "execution_count": 0, 199 | "outputs": [] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "metadata": { 204 | "id": "FpAVjSwfJeuN", 205 | "colab_type": "code", 206 | "colab": {} 207 | }, 208 | "source": [ 209 | "r2_score(y_test,preds)" 210 | ], 211 | "execution_count": 0, 212 | "outputs": [] 213 | }, 214 | { 215 | "cell_type": "markdown", 216 | "metadata": { 217 | "id": "A3ki9EkVJeuX", 218 | "colab_type": "text" 219 | }, 220 | "source": [ 221 | "# MultiVariate Forecasting" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "metadata": { 227 | "id": "Llz63bnuJeua", 228 | "colab_type": "code", 229 | "colab": {} 230 | }, 231 | "source": [ 232 | "multi_data = pd.read_csv(colab_path+\"data/pollution.csv\",parse_dates=True,index_col=0)" 233 | ], 234 | "execution_count": 0, 235 | "outputs": [] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "metadata": { 240 | "id": "pQVytBclJeuf", 241 | "colab_type": "code", 242 | "colab": {} 243 | }, 244 | "source": [ 245 | "multi_data.head()" 246 | ], 247 | "execution_count": 0, 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "jzVFmMGlJeun", 254 | "colab_type": "code", 255 | "colab": {} 256 | }, 257 | "source": [ 258 | "def create_lags(df):\n", 259 | " for col in df.columns:\n", 260 | " df[col+\"_1\"] = df[col].shift(1)\n", 261 | " return df.dropna()" 262 | ], 263 | "execution_count": 0, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "metadata": { 269 | "id": "Z-F5OeC8Jeus", 270 | "colab_type": "code", 271 | "colab": {} 272 | }, 273 | "source": [ 274 | "reframed_multi_data = create_lags(multi_data)" 275 | ], 276 | "execution_count": 0, 277 | "outputs": [] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "metadata": { 282 | "id": "pbT9oEufJeux", 283 | "colab_type": "code", 284 | "colab": {} 285 | }, 286 | "source": [ 287 | "reframed_multi_data.reset_index(inplace=True)\n", 288 | "del reframed_multi_data['date']" 289 | ], 290 | "execution_count": 0, 291 | "outputs": [] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "metadata": { 296 | "id": "yrKRxDsnJeu2", 297 | "colab_type": "code", 298 | "colab": {} 299 | }, 300 | "source": [ 301 | "reframed_multi_data.columns" 302 | ], 303 | "execution_count": 0, 304 | "outputs": [] 305 | }, 306 | { 307 | "cell_type": "code", 308 | "metadata": { 309 | "id": "5IoDgK2UJeu9", 310 | "colab_type": "code", 311 | "colab": {} 312 | }, 313 | "source": [ 314 | "y = reframed_multi_data['pollution']" 315 | ], 316 | "execution_count": 0, 317 | "outputs": [] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "metadata": { 322 | "id": "TCJhWDfxJevD", 323 | "colab_type": "code", 324 | "colab": {} 325 | }, 326 | "source": [ 327 | "x = reframed_multi_data[['dew', 'temp', 'press', 'wnd_spd', 'pollution_1', 'dew_1',\n", 328 | " 'temp_1', 'press_1', 'wnd_spd_1']]" 329 | ], 330 | "execution_count": 0, 331 | "outputs": [] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "metadata": { 336 | "id": "ULuguLHpJevJ", 337 | "colab_type": "code", 338 | "colab": {} 339 | }, 340 | "source": [ 341 | "x_train = x[:35799]\n", 342 | "x_test = x[35799:]" 343 | ], 344 | "execution_count": 0, 345 | "outputs": [] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "metadata": { 350 | "id": "WzkZhGKIJevN", 351 | "colab_type": "code", 352 | "colab": {} 353 | }, 354 | "source": [ 355 | "y_train = y[:35799]\n", 356 | "y_test = y[35799:]" 357 | ], 358 | "execution_count": 0, 359 | "outputs": [] 360 | }, 361 | { 362 | "cell_type": "code", 363 | "metadata": { 364 | "id": "Dkry6nXDJevY", 365 | "colab_type": "code", 366 | "colab": {} 367 | }, 368 | "source": [ 369 | "from sklearn.preprocessing import MinMaxScaler\n", 370 | "scaler = MinMaxScaler(feature_range=(0,1))\n", 371 | "x_train = scaler.fit_transform(x_train)\n", 372 | "x_test = scaler.transform(x_test)\n", 373 | "y_train = scaler.fit_transform(y_train.values.reshape(-1,1))\n", 374 | "y_test = scaler.transform(y_test.values.reshape(-1,1))\n", 375 | "\n", 376 | "x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1])\n", 377 | "x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1])" 378 | ], 379 | "execution_count": 0, 380 | "outputs": [] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "metadata": { 385 | "id": "xgG_8s8eJeve", 386 | "colab_type": "code", 387 | "colab": {} 388 | }, 389 | "source": [ 390 | "# Samples, Timesteps, features\n", 391 | "x_train.shape" 392 | ], 393 | "execution_count": 0, 394 | "outputs": [] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "metadata": { 399 | "id": "SZWbffB-Jevh", 400 | "colab_type": "code", 401 | "colab": {} 402 | }, 403 | "source": [ 404 | "multi_lstm_model = Sequential()\n", 405 | "multi_lstm_model.add(LSTM(100,input_shape=(x_train.shape[1],x_train.shape[2])))\n", 406 | "multi_lstm_model.add(Dense(1))\n", 407 | "multi_lstm_model.compile(loss=\"mse\",optimizer=\"adam\")" 408 | ], 409 | "execution_count": 0, 410 | "outputs": [] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "metadata": { 415 | "id": "Dq6r4Qs0Jevl", 416 | "colab_type": "code", 417 | "colab": {} 418 | }, 419 | "source": [ 420 | "multi_lstm_model.fit(x_train,y_train,epochs=25,batch_size=500)" 421 | ], 422 | "execution_count": 0, 423 | "outputs": [] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "metadata": { 428 | "id": "xEMikB1GJevt", 429 | "colab_type": "code", 430 | "colab": {} 431 | }, 432 | "source": [ 433 | "y_pred = multi_lstm_model.predict(x_test)" 434 | ], 435 | "execution_count": 0, 436 | "outputs": [] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "metadata": { 441 | "id": "P_U53VUcJevy", 442 | "colab_type": "code", 443 | "colab": {} 444 | }, 445 | "source": [ 446 | "def plot_predictions(preds,y_test):\n", 447 | " plt.figure(figsize=(10, 5.5))\n", 448 | " plt.plot(preds,linestyle='-',color='b')\n", 449 | " plt.plot(y_test,linestyle='-',color='r')\n", 450 | " plt.legend(['Predicted','Actual'], loc=2)\n", 451 | " plt.title('Actual vs Predicted')" 452 | ], 453 | "execution_count": 0, 454 | "outputs": [] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "metadata": { 459 | "id": "1oOxrP5QJev2", 460 | "colab_type": "code", 461 | "colab": {} 462 | }, 463 | "source": [ 464 | "plot_predictions(y_pred[:100],y_test[:100])" 465 | ], 466 | "execution_count": 0, 467 | "outputs": [] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "metadata": { 472 | "id": "QpcKbEb8Jev9", 473 | "colab_type": "code", 474 | "colab": {} 475 | }, 476 | "source": [ 477 | "r2_score(y_test,y_pred)" 478 | ], 479 | "execution_count": 0, 480 | "outputs": [] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "metadata": { 485 | "id": "J1UUvQU3JewC", 486 | "colab_type": "code", 487 | "colab": {} 488 | }, 489 | "source": [ 490 | "" 491 | ], 492 | "execution_count": 0, 493 | "outputs": [] 494 | } 495 | ] 496 | } -------------------------------------------------------------------------------- /Time Series with Deep Learning/What went wrong with this LSTM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "kernelspec": { 6 | "display_name": "Python 3", 7 | "language": "python", 8 | "name": "python3" 9 | }, 10 | "language_info": { 11 | "codemirror_mode": { 12 | "name": "ipython", 13 | "version": 3 14 | }, 15 | "file_extension": ".py", 16 | "mimetype": "text/x-python", 17 | "name": "python", 18 | "nbconvert_exporter": "python", 19 | "pygments_lexer": "ipython3", 20 | "version": "3.6.1" 21 | }, 22 | "colab": { 23 | "name": "What went wrong with this LSTM.ipynb", 24 | "version": "0.3.2", 25 | "provenance": [] 26 | } 27 | }, 28 | "cells": [ 29 | { 30 | "cell_type": "code", 31 | "metadata": { 32 | "id": "bbupexIPKuso", 33 | "colab_type": "code", 34 | "colab": {} 35 | }, 36 | "source": [ 37 | "import pandas as pd\n", 38 | "from keras.models import Sequential\n", 39 | "from keras.layers import LSTM,Dense\n", 40 | "import matplotlib.pyplot as plt\n", 41 | "from sklearn.metrics import r2_score\n", 42 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"" 43 | ], 44 | "execution_count": 0, 45 | "outputs": [] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "metadata": { 50 | "id": "o8EtOpvLKuss", 51 | "colab_type": "code", 52 | "colab": {} 53 | }, 54 | "source": [ 55 | "multi_data = pd.read_csv(colab_path+\"data/flotation-cell.csv\",parse_dates=True,index_col=0)" 56 | ], 57 | "execution_count": 0, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "metadata": { 63 | "id": "u-89zwJ1Kusu", 64 | "colab_type": "code", 65 | "colab": {} 66 | }, 67 | "source": [ 68 | "def create_lags(df):\n", 69 | " for col in df.columns:\n", 70 | " df[col+\"_1\"] = df[col].shift(1)\n", 71 | " return df.dropna()" 72 | ], 73 | "execution_count": 0, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "metadata": { 79 | "id": "9XsOYqMvKusw", 80 | "colab_type": "code", 81 | "colab": {} 82 | }, 83 | "source": [ 84 | "# Look at the dataset because something is wrong ?" 85 | ], 86 | "execution_count": 0, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "id": "GfqdxiAaKusy", 93 | "colab_type": "code", 94 | "colab": {} 95 | }, 96 | "source": [ 97 | "multi_data.reset_index(inplace=True)\n", 98 | "del multi_data['Date and time']" 99 | ], 100 | "execution_count": 0, 101 | "outputs": [] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "metadata": { 106 | "id": "lVp3P7GMKus0", 107 | "colab_type": "code", 108 | "colab": {} 109 | }, 110 | "source": [ 111 | "multi_data.dropna(inplace=True)" 112 | ], 113 | "execution_count": 0, 114 | "outputs": [] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "metadata": { 119 | "id": "wGtrnQFmKus2", 120 | "colab_type": "code", 121 | "colab": {} 122 | }, 123 | "source": [ 124 | "reframed_multi_data = create_lags(multi_data)" 125 | ], 126 | "execution_count": 0, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "metadata": { 132 | "id": "Ssnoxz0tKus3", 133 | "colab_type": "code", 134 | "colab": {} 135 | }, 136 | "source": [ 137 | "reframed_multi_data.columns" 138 | ], 139 | "execution_count": 0, 140 | "outputs": [] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "metadata": { 145 | "id": "i2gGvpjCKus5", 146 | "colab_type": "code", 147 | "colab": {} 148 | }, 149 | "source": [ 150 | "y = reframed_multi_data['Air flow rate']" 151 | ], 152 | "execution_count": 0, 153 | "outputs": [] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "metadata": { 158 | "id": "Q722-7ojKus8", 159 | "colab_type": "code", 160 | "colab": {} 161 | }, 162 | "source": [ 163 | "x = reframed_multi_data[['Upstream pH', 'CuSO4 added', 'Pulp level',\n", 164 | " 'Feed rate', 'Feed rate_1', 'Upstream pH_1', 'CuSO4 added_1',\n", 165 | " 'Pulp level_1', 'Air flow rate_1']]" 166 | ], 167 | "execution_count": 0, 168 | "outputs": [] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "metadata": { 173 | "id": "YLwuoN-VKus-", 174 | "colab_type": "code", 175 | "colab": {} 176 | }, 177 | "source": [ 178 | "x_train = x[:2500]\n", 179 | "x_test = x[2500:]" 180 | ], 181 | "execution_count": 0, 182 | "outputs": [] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "metadata": { 187 | "id": "81HOwb0OKutA", 188 | "colab_type": "code", 189 | "colab": {} 190 | }, 191 | "source": [ 192 | "y_train = y[:2500]\n", 193 | "y_test = y[2500:]" 194 | ], 195 | "execution_count": 0, 196 | "outputs": [] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "metadata": { 201 | "id": "8pozYKSPKutD", 202 | "colab_type": "code", 203 | "colab": {} 204 | }, 205 | "source": [ 206 | "# Something fishy here\n", 207 | "from sklearn.preprocessing import MinMaxScaler\n", 208 | "scaler = MinMaxScaler(feature_range=(0,1))\n", 209 | "x_train = scaler.fit_transform(x_train)\n", 210 | "x_test = scaler.fit_transform(x_test)\n", 211 | "y_train = scaler.fit_transform(y_train.values.reshape(-1,1))\n", 212 | "y_test = scaler.fit_transform(y_test.values.reshape(-1,1))\n", 213 | "\n", 214 | "x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1])\n", 215 | "x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1])" 216 | ], 217 | "execution_count": 0, 218 | "outputs": [] 219 | }, 220 | { 221 | "cell_type": "code", 222 | "metadata": { 223 | "id": "7qxJGZljKutF", 224 | "colab_type": "code", 225 | "colab": {} 226 | }, 227 | "source": [ 228 | "x_train.shape" 229 | ], 230 | "execution_count": 0, 231 | "outputs": [] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "metadata": { 236 | "id": "lQOJ-dG-KutH", 237 | "colab_type": "code", 238 | "colab": {} 239 | }, 240 | "source": [ 241 | "# does hyper parameter tuning help ?\n", 242 | "multi_lstm_model = Sequential()\n", 243 | "multi_lstm_model.add(LSTM(100,input_shape=(x_train.shape[1],x_train.shape[2])))\n", 244 | "multi_lstm_model.add(Dense(1))\n", 245 | "multi_lstm_model.compile(loss=\"mse\",optimizer=\"adam\")" 246 | ], 247 | "execution_count": 0, 248 | "outputs": [] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "metadata": { 253 | "id": "QbbWWyU_KutJ", 254 | "colab_type": "code", 255 | "colab": {} 256 | }, 257 | "source": [ 258 | "multi_lstm_model.fit(x_train,y_train,nb_epoch=25,batch_size=100)" 259 | ], 260 | "execution_count": 0, 261 | "outputs": [] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "metadata": { 266 | "id": "lXVNTQkbKutK", 267 | "colab_type": "code", 268 | "colab": {} 269 | }, 270 | "source": [ 271 | "y_pred = multi_lstm_model.predict(x_test)" 272 | ], 273 | "execution_count": 0, 274 | "outputs": [] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "metadata": { 279 | "id": "Xf5x9Cc8KutM", 280 | "colab_type": "code", 281 | "colab": {} 282 | }, 283 | "source": [ 284 | "def plot_predictions(preds,y_test):\n", 285 | " plt.figure(figsize=(10, 5.5))\n", 286 | " plt.plot(preds,linestyle='-',color='b')\n", 287 | " plt.plot(y_test,linestyle='-',color='r')\n", 288 | " plt.legend(['Predicted','Actual'], loc=2)\n", 289 | " plt.title('Actual vs Predicted')" 290 | ], 291 | "execution_count": 0, 292 | "outputs": [] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "metadata": { 297 | "id": "6-uhppEyKutO", 298 | "colab_type": "code", 299 | "colab": {} 300 | }, 301 | "source": [ 302 | "plot_predictions(y_pred[:100],y_test[:100])" 303 | ], 304 | "execution_count": 0, 305 | "outputs": [] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "metadata": { 310 | "id": "2u3zn_ZjKutP", 311 | "colab_type": "code", 312 | "colab": {} 313 | }, 314 | "source": [ 315 | "r2_score(y_test,y_pred)" 316 | ], 317 | "execution_count": 0, 318 | "outputs": [] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "metadata": { 323 | "id": "L6dEQAqGKutS", 324 | "colab_type": "code", 325 | "colab": {} 326 | }, 327 | "source": [ 328 | "" 329 | ], 330 | "execution_count": 0, 331 | "outputs": [] 332 | } 333 | ] 334 | } -------------------------------------------------------------------------------- /data/AirPassengers.csv: -------------------------------------------------------------------------------- 1 | Month,#Passengers 1949-01,112 1949-02,118 1949-03,132 1949-04,129 1949-05,121 1949-06,135 1949-07,148 1949-08,148 1949-09,136 1949-10,119 1949-11,104 1949-12,118 1950-01,115 1950-02,126 1950-03,141 1950-04,135 1950-05,125 1950-06,149 1950-07,170 1950-08,170 1950-09,158 1950-10,133 1950-11,114 1950-12,140 1951-01,145 1951-02,150 1951-03,178 1951-04,163 1951-05,172 1951-06,178 1951-07,199 1951-08,199 1951-09,184 1951-10,162 1951-11,146 1951-12,166 1952-01,171 1952-02,180 1952-03,193 1952-04,181 1952-05,183 1952-06,218 1952-07,230 1952-08,242 1952-09,209 1952-10,191 1952-11,172 1952-12,194 1953-01,196 1953-02,196 1953-03,236 1953-04,235 1953-05,229 1953-06,243 1953-07,264 1953-08,272 1953-09,237 1953-10,211 1953-11,180 1953-12,201 1954-01,204 1954-02,188 1954-03,235 1954-04,227 1954-05,234 1954-06,264 1954-07,302 1954-08,293 1954-09,259 1954-10,229 1954-11,203 1954-12,229 1955-01,242 1955-02,233 1955-03,267 1955-04,269 1955-05,270 1955-06,315 1955-07,364 1955-08,347 1955-09,312 1955-10,274 1955-11,237 1955-12,278 1956-01,284 1956-02,277 1956-03,317 1956-04,313 1956-05,318 1956-06,374 1956-07,413 1956-08,405 1956-09,355 1956-10,306 1956-11,271 1956-12,306 1957-01,315 1957-02,301 1957-03,356 1957-04,348 1957-05,355 1957-06,422 1957-07,465 1957-08,467 1957-09,404 1957-10,347 1957-11,305 1957-12,336 1958-01,340 1958-02,318 1958-03,362 1958-04,348 1958-05,363 1958-06,435 1958-07,491 1958-08,505 1958-09,404 1958-10,359 1958-11,310 1958-12,337 1959-01,360 1959-02,342 1959-03,406 1959-04,396 1959-05,420 1959-06,472 1959-07,548 1959-08,559 1959-09,463 1959-10,407 1959-11,362 1959-12,405 1960-01,417 1960-02,391 1960-03,419 1960-04,461 1960-05,472 1960-06,535 1960-07,622 1960-08,606 1960-09,508 1960-10,461 1960-11,390 1960-12,432 -------------------------------------------------------------------------------- /data/All-Transactions House Price Index.csv: -------------------------------------------------------------------------------- 1 | Date,Value 2 | 1990-01-01,164.17 3 | 1990-04-01,164.84 4 | 1990-07-01,165.83 5 | 1990-10-01,165.19 6 | 1991-01-01,166.56 7 | 1991-04-01,167.68 8 | 1991-07-01,167.98 9 | 1991-10-01,170.34 10 | 1992-01-01,171.66 11 | 1992-04-01,171.62 12 | 1992-07-01,173.58 13 | 1992-10-01,174.46 14 | 1993-01-01,174.53 15 | 1993-04-01,176.19 16 | 1993-07-01,177.61 17 | 1993-10-01,179.08 18 | 1994-01-01,180.22 19 | 1994-04-01,181.32 20 | 1994-07-01,182.06 21 | 1994-10-01,181.9 22 | 1995-01-01,182.87 23 | 1995-04-01,185.86 24 | 1995-07-01,188.71 25 | 1995-10-01,190.2 26 | 1996-01-01,192.34 27 | 1996-04-01,192.68 28 | 1996-07-01,193.53 29 | 1996-10-01,195.05 30 | 1997-01-01,196.67 31 | 1997-04-01,198.32 32 | 1997-07-01,201.04 33 | 1997-10-01,203.66 34 | 1998-01-01,206.69 35 | 1998-04-01,208.56 36 | 1998-07-01,211.46 37 | 1998-10-01,213.97 38 | 1999-01-01,216.22 39 | 1999-04-01,219.12 40 | 1999-07-01,222.23 41 | 1999-10-01,224.52 42 | 2000-01-01,228.81 43 | 2000-04-01,232.54 44 | 2000-07-01,236.77 45 | 2000-10-01,240.42 46 | 2001-01-01,246.34 47 | 2001-04-01,250.47 48 | 2001-07-01,254.36 49 | 2001-10-01,257.51 50 | 2002-01-01,261.21 51 | 2002-04-01,265.65 52 | 2002-07-01,270.92 53 | 2002-10-01,274.76 54 | 2003-01-01,278.11 55 | 2003-04-01,281.49 56 | 2003-07-01,285.8 57 | 2003-10-01,293.88 58 | 2004-01-01,298.47 59 | 2004-04-01,305.7 60 | 2004-07-01,317.4 61 | 2004-10-01,324.16 62 | 2005-01-01,331.63 63 | 2005-04-01,342.23 64 | 2005-07-01,352.54 65 | 2005-10-01,360.53 66 | 2006-01-01,366.27 67 | 2006-04-01,370 68 | 2006-07-01,373.08 69 | 2006-10-01,376.87 70 | 2007-01-01,378.23 71 | 2007-04-01,377.98 72 | 2007-07-01,373.79 73 | 2007-10-01,372.64 74 | 2008-01-01,369.98 75 | 2008-04-01,360.71 76 | 2008-07-01,349.42 77 | 2008-10-01,346.25 78 | 2009-01-01,348.8 79 | 2009-04-01,339.58 80 | 2009-07-01,330.58 81 | 2009-10-01,328.16 82 | 2010-01-01,324.21 83 | 2010-04-01,321.32 84 | 2010-07-01,324.39 85 | 2010-10-01,322.03 86 | 2011-01-01,313.19 87 | 2011-04-01,307.78 88 | 2011-07-01,310.13 89 | 2011-10-01,311.5 90 | 2012-01-01,308.25 91 | 2012-04-01,307.02 92 | 2012-07-01,310.98 93 | 2012-10-01,313.51 94 | 2013-01-01,315.16 95 | 2013-04-01,320.16 96 | 2013-07-01,325.31 97 | 2013-10-01,328.2 98 | 2014-01-01,331.12 99 | 2014-04-01,337.56 100 | 2014-07-01,342.53 101 | 2014-10-01,345.41 102 | 2015-01-01,348.86 103 | 2015-04-01,355.22 104 | 2015-07-01,360.9 105 | 2015-10-01,364.34 106 | 2016-01-01,367.93 107 | 2016-04-01,375.33 108 | 2016-07-01,381.93 109 | 2016-10-01,385.38 110 | 2017-01-01,388.91 111 | 2017-04-01,399.18 -------------------------------------------------------------------------------- /data/daily_female_births.csv: -------------------------------------------------------------------------------- 1 | "Date","Births" 2 | "1959-01-01",35 3 | "1959-01-02",32 4 | "1959-01-03",30 5 | "1959-01-04",31 6 | "1959-01-05",44 7 | "1959-01-06",29 8 | "1959-01-07",45 9 | "1959-01-08",43 10 | "1959-01-09",38 11 | "1959-01-10",27 12 | "1959-01-11",38 13 | "1959-01-12",33 14 | "1959-01-13",55 15 | "1959-01-14",47 16 | "1959-01-15",45 17 | "1959-01-16",37 18 | "1959-01-17",50 19 | "1959-01-18",43 20 | "1959-01-19",41 21 | "1959-01-20",52 22 | "1959-01-21",34 23 | "1959-01-22",53 24 | "1959-01-23",39 25 | "1959-01-24",32 26 | "1959-01-25",37 27 | "1959-01-26",43 28 | "1959-01-27",39 29 | "1959-01-28",35 30 | "1959-01-29",44 31 | "1959-01-30",38 32 | "1959-01-31",24 33 | "1959-02-01",23 34 | "1959-02-02",31 35 | "1959-02-03",44 36 | "1959-02-04",38 37 | "1959-02-05",50 38 | "1959-02-06",38 39 | "1959-02-07",51 40 | "1959-02-08",31 41 | "1959-02-09",31 42 | "1959-02-10",51 43 | "1959-02-11",36 44 | "1959-02-12",45 45 | "1959-02-13",51 46 | "1959-02-14",34 47 | "1959-02-15",52 48 | "1959-02-16",47 49 | "1959-02-17",45 50 | "1959-02-18",46 51 | "1959-02-19",39 52 | "1959-02-20",48 53 | "1959-02-21",37 54 | "1959-02-22",35 55 | "1959-02-23",52 56 | "1959-02-24",42 57 | "1959-02-25",45 58 | "1959-02-26",39 59 | "1959-02-27",37 60 | "1959-02-28",30 61 | "1959-03-01",35 62 | "1959-03-02",28 63 | "1959-03-03",45 64 | "1959-03-04",34 65 | "1959-03-05",36 66 | "1959-03-06",50 67 | "1959-03-07",44 68 | "1959-03-08",39 69 | "1959-03-09",32 70 | "1959-03-10",39 71 | "1959-03-11",45 72 | "1959-03-12",43 73 | "1959-03-13",39 74 | "1959-03-14",31 75 | "1959-03-15",27 76 | "1959-03-16",30 77 | "1959-03-17",42 78 | "1959-03-18",46 79 | "1959-03-19",41 80 | "1959-03-20",36 81 | "1959-03-21",45 82 | "1959-03-22",46 83 | "1959-03-23",43 84 | "1959-03-24",38 85 | "1959-03-25",34 86 | "1959-03-26",35 87 | "1959-03-27",56 88 | "1959-03-28",36 89 | "1959-03-29",32 90 | "1959-03-30",50 91 | "1959-03-31",41 92 | "1959-04-01",39 93 | "1959-04-02",41 94 | "1959-04-03",47 95 | "1959-04-04",34 96 | "1959-04-05",36 97 | "1959-04-06",33 98 | "1959-04-07",35 99 | "1959-04-08",38 100 | "1959-04-09",38 101 | "1959-04-10",34 102 | "1959-04-11",53 103 | "1959-04-12",34 104 | "1959-04-13",34 105 | "1959-04-14",38 106 | "1959-04-15",35 107 | "1959-04-16",32 108 | "1959-04-17",42 109 | "1959-04-18",34 110 | "1959-04-19",46 111 | "1959-04-20",30 112 | "1959-04-21",46 113 | "1959-04-22",45 114 | "1959-04-23",54 115 | "1959-04-24",34 116 | "1959-04-25",37 117 | "1959-04-26",35 118 | "1959-04-27",40 119 | "1959-04-28",42 120 | "1959-04-29",58 121 | "1959-04-30",51 122 | "1959-05-01",32 123 | "1959-05-02",35 124 | "1959-05-03",38 125 | "1959-05-04",33 126 | "1959-05-05",39 127 | "1959-05-06",47 128 | "1959-05-07",38 129 | "1959-05-08",52 130 | "1959-05-09",30 131 | "1959-05-10",34 132 | "1959-05-11",40 133 | "1959-05-12",35 134 | "1959-05-13",42 135 | "1959-05-14",41 136 | "1959-05-15",42 137 | "1959-05-16",38 138 | "1959-05-17",24 139 | "1959-05-18",34 140 | "1959-05-19",43 141 | "1959-05-20",36 142 | "1959-05-21",55 143 | "1959-05-22",41 144 | "1959-05-23",45 145 | "1959-05-24",41 146 | "1959-05-25",37 147 | "1959-05-26",43 148 | "1959-05-27",39 149 | "1959-05-28",33 150 | "1959-05-29",43 151 | "1959-05-30",40 152 | "1959-05-31",38 153 | "1959-06-01",45 154 | "1959-06-02",46 155 | "1959-06-03",34 156 | "1959-06-04",35 157 | "1959-06-05",48 158 | "1959-06-06",51 159 | "1959-06-07",36 160 | "1959-06-08",33 161 | "1959-06-09",46 162 | "1959-06-10",42 163 | "1959-06-11",48 164 | "1959-06-12",34 165 | "1959-06-13",41 166 | "1959-06-14",35 167 | "1959-06-15",40 168 | "1959-06-16",34 169 | "1959-06-17",30 170 | "1959-06-18",36 171 | "1959-06-19",40 172 | "1959-06-20",39 173 | "1959-06-21",45 174 | "1959-06-22",38 175 | "1959-06-23",47 176 | "1959-06-24",33 177 | "1959-06-25",30 178 | "1959-06-26",42 179 | "1959-06-27",43 180 | "1959-06-28",41 181 | "1959-06-29",41 182 | "1959-06-30",59 183 | "1959-07-01",43 184 | "1959-07-02",45 185 | "1959-07-03",38 186 | "1959-07-04",37 187 | "1959-07-05",45 188 | "1959-07-06",42 189 | "1959-07-07",57 190 | "1959-07-08",46 191 | "1959-07-09",51 192 | "1959-07-10",41 193 | "1959-07-11",47 194 | "1959-07-12",26 195 | "1959-07-13",35 196 | "1959-07-14",44 197 | "1959-07-15",41 198 | "1959-07-16",42 199 | "1959-07-17",36 200 | "1959-07-18",45 201 | "1959-07-19",45 202 | "1959-07-20",45 203 | "1959-07-21",47 204 | "1959-07-22",38 205 | "1959-07-23",42 206 | "1959-07-24",35 207 | "1959-07-25",36 208 | "1959-07-26",39 209 | "1959-07-27",45 210 | "1959-07-28",43 211 | "1959-07-29",47 212 | "1959-07-30",36 213 | "1959-07-31",41 214 | "1959-08-01",50 215 | "1959-08-02",39 216 | "1959-08-03",41 217 | "1959-08-04",46 218 | "1959-08-05",64 219 | "1959-08-06",45 220 | "1959-08-07",34 221 | "1959-08-08",38 222 | "1959-08-09",44 223 | "1959-08-10",48 224 | "1959-08-11",46 225 | "1959-08-12",44 226 | "1959-08-13",37 227 | "1959-08-14",39 228 | "1959-08-15",44 229 | "1959-08-16",45 230 | "1959-08-17",33 231 | "1959-08-18",44 232 | "1959-08-19",38 233 | "1959-08-20",46 234 | "1959-08-21",46 235 | "1959-08-22",40 236 | "1959-08-23",39 237 | "1959-08-24",44 238 | "1959-08-25",48 239 | "1959-08-26",50 240 | "1959-08-27",41 241 | "1959-08-28",42 242 | "1959-08-29",51 243 | "1959-08-30",41 244 | "1959-08-31",44 245 | "1959-09-01",38 246 | "1959-09-02",68 247 | "1959-09-03",40 248 | "1959-09-04",42 249 | "1959-09-05",51 250 | "1959-09-06",44 251 | "1959-09-07",45 252 | "1959-09-08",36 253 | "1959-09-09",57 254 | "1959-09-10",44 255 | "1959-09-11",42 256 | "1959-09-12",53 257 | "1959-09-13",42 258 | "1959-09-14",34 259 | "1959-09-15",40 260 | "1959-09-16",56 261 | "1959-09-17",44 262 | "1959-09-18",53 263 | "1959-09-19",55 264 | "1959-09-20",39 265 | "1959-09-21",59 266 | "1959-09-22",55 267 | "1959-09-23",73 268 | "1959-09-24",55 269 | "1959-09-25",44 270 | "1959-09-26",43 271 | "1959-09-27",40 272 | "1959-09-28",47 273 | "1959-09-29",51 274 | "1959-09-30",56 275 | "1959-10-01",49 276 | "1959-10-02",54 277 | "1959-10-03",56 278 | "1959-10-04",47 279 | "1959-10-05",44 280 | "1959-10-06",43 281 | "1959-10-07",42 282 | "1959-10-08",45 283 | "1959-10-09",50 284 | "1959-10-10",48 285 | "1959-10-11",43 286 | "1959-10-12",40 287 | "1959-10-13",59 288 | "1959-10-14",41 289 | "1959-10-15",42 290 | "1959-10-16",51 291 | "1959-10-17",49 292 | "1959-10-18",45 293 | "1959-10-19",43 294 | "1959-10-20",42 295 | "1959-10-21",38 296 | "1959-10-22",47 297 | "1959-10-23",38 298 | "1959-10-24",36 299 | "1959-10-25",42 300 | "1959-10-26",35 301 | "1959-10-27",28 302 | "1959-10-28",44 303 | "1959-10-29",36 304 | "1959-10-30",45 305 | "1959-10-31",46 306 | "1959-11-01",48 307 | "1959-11-02",49 308 | "1959-11-03",43 309 | "1959-11-04",42 310 | "1959-11-05",59 311 | "1959-11-06",45 312 | "1959-11-07",52 313 | "1959-11-08",46 314 | "1959-11-09",42 315 | "1959-11-10",40 316 | "1959-11-11",40 317 | "1959-11-12",45 318 | "1959-11-13",35 319 | "1959-11-14",35 320 | "1959-11-15",40 321 | "1959-11-16",39 322 | "1959-11-17",33 323 | "1959-11-18",42 324 | "1959-11-19",47 325 | "1959-11-20",51 326 | "1959-11-21",44 327 | "1959-11-22",40 328 | "1959-11-23",57 329 | "1959-11-24",49 330 | "1959-11-25",45 331 | "1959-11-26",49 332 | "1959-11-27",51 333 | "1959-11-28",46 334 | "1959-11-29",44 335 | "1959-11-30",52 336 | "1959-12-01",45 337 | "1959-12-02",32 338 | "1959-12-03",46 339 | "1959-12-04",41 340 | "1959-12-05",34 341 | "1959-12-06",33 342 | "1959-12-07",36 343 | "1959-12-08",49 344 | "1959-12-09",43 345 | "1959-12-10",43 346 | "1959-12-11",34 347 | "1959-12-12",39 348 | "1959-12-13",35 349 | "1959-12-14",52 350 | "1959-12-15",47 351 | "1959-12-16",52 352 | "1959-12-17",39 353 | "1959-12-18",40 354 | "1959-12-19",42 355 | "1959-12-20",42 356 | "1959-12-21",53 357 | "1959-12-22",39 358 | "1959-12-23",40 359 | "1959-12-24",38 360 | "1959-12-25",44 361 | "1959-12-26",34 362 | "1959-12-27",37 363 | "1959-12-28",52 364 | "1959-12-29",48 365 | "1959-12-30",55 366 | "1959-12-31",50 -------------------------------------------------------------------------------- /data/exercise3.csv: -------------------------------------------------------------------------------- 1 | Date,Value 2 | 1990-01-01,164.17 3 | 1990-04-01,164.84 4 | 1990-07-01,165.83 5 | 1990-10-01,NaN 6 | 1991-01-01,166.56 7 | 1991-04-01,167.68 8 | 1991-07-01,167.98 9 | 1991-10-01,170.34 10 | 1992-01-01,NaN 11 | 1992-04-01,171.62 12 | 1992-07-01,173.58 13 | 1992-10-01,NaN 14 | 1993-01-01,174.53 15 | 1993-04-01,176.19 16 | 1993-07-01,177.61 17 | 1993-10-01,179.08 18 | 1994-01-01,NaN 19 | 1994-04-01,181.32 20 | 1994-07-01,182.06 21 | 1994-10-01,181.9 22 | 1995-01-01,182.87 23 | 1995-04-01,185.86 24 | 1995-07-01,188.71 25 | 1995-10-01,190.2 26 | 1996-01-01,192.34 27 | 1996-04-01,192.68 28 | 1996-07-01,NaN 29 | 1996-10-01,195.05 30 | 1997-01-01,196.67 31 | 1997-04-01,198.32 32 | 1997-07-01,201.04 33 | 1997-10-01,203.66 34 | 1998-01-01,206.69 35 | 1998-04-01,208.56 36 | 1998-07-01,211.46 37 | 1998-10-01,213.97 38 | 1999-01-01,216.22 39 | 1999-04-01,NaN 40 | 1999-07-01,222.23 41 | 1999-10-01,224.52 42 | 2000-01-01,228.81 43 | 2000-04-01,232.54 44 | 2000-07-01,236.77 45 | 2000-10-01,240.42 -------------------------------------------------------------------------------- /data/exercise_4.csv: -------------------------------------------------------------------------------- 1 | date,temp 2 | 01/01/81,20.70 3 | 02/01/81,17.90 4 | 03/01/81,18.80 5 | 04/01/81,14.60 6 | 05/01/81,15.80 7 | 06/01/81,15.80 8 | 07/01/81,15.80 9 | 08/01/81,17.40 10 | 09/01/81,21.80 11 | 10/01/81,20.00 12 | 11/01/81,16.20 13 | 12/01/81,13.30 14 | 13/01/81,16.70 15 | 14/01/81,21.50 16 | 15/01/81,25.00 17 | 16/01/81,20.70 18 | 17/01/81,20.60 19 | 18/01/81,24.80 20 | 19/01/81,17.70 21 | 20/01/81,15.50 22 | 21/01/81,18.20 23 | 22/01/81,12.10 24 | 23/01/81,14.40 25 | 24/01/81,16.00 26 | 25/01/81,16.50 27 | 26/01/81,18.70 28 | 27/01/81,19.40 29 | 28/01/81,17.20 30 | 29/01/81,15.50 31 | 30/01/81,15.10 32 | 31/01/81,15.40 33 | 01/02/81,15.30 34 | 02/02/81,18.80 35 | 03/02/81,21.90 36 | 04/02/81,19.90 37 | 05/02/81,16.60 38 | 06/02/81,16.80 39 | 07/02/81,14.60 40 | 08/02/81,17.10 41 | 09/02/81,25.00 42 | 10/02/81,15.00 43 | 11/02/81,13.70 44 | 12/02/81,13.90 45 | 13/02/81,18.30 46 | 14/02/81,22.00 47 | 15/02/81,22.10 48 | 16/02/81,21.20 49 | 17/02/81,18.40 50 | 18/02/81,16.60 51 | 19/02/81,16.10 52 | 20/02/81,15.70 53 | 21/02/81,16.60 54 | 22/02/81,16.50 55 | 23/02/81,14.40 56 | 24/02/81,14.40 57 | 25/02/81,18.50 58 | 26/02/81,16.90 59 | 27/02/81,17.50 60 | 28/02/81,21.20 61 | 01/03/81,17.80 62 | 02/03/81,18.60 63 | 03/03/81,17.00 64 | 04/03/81,16.00 65 | 05/03/81,13.30 66 | 06/03/81,14.30 67 | 07/03/81,11.40 68 | 08/03/81,16.30 69 | 09/03/81,16.10 70 | 10/03/81,11.80 71 | 11/03/81,12.20 72 | 12/03/81,14.70 73 | 13/03/81,11.80 74 | 14/03/81,11.30 75 | 15/03/81,10.60 76 | 16/03/81,11.70 77 | 17/03/81,14.20 78 | 18/03/81,11.20 79 | 19/03/81,16.90 80 | 20/03/81,16.70 81 | 21/03/81,8.10 82 | 22/03/81,8.00 83 | 23/03/81,8.80 84 | 24/03/81,13.40 85 | 25/03/81,10.90 86 | 26/03/81,13.40 87 | 27/03/81,11.00 88 | 28/03/81,15.00 89 | 29/03/81,15.70 90 | 30/03/81,14.50 91 | 31/03/81,15.80 92 | 01/04/81,16.70 93 | 02/04/81,16.80 94 | 03/04/81,17.50 95 | 04/04/81,17.10 96 | 05/04/81,18.10 97 | 06/04/81,16.60 98 | 07/04/81,10.00 99 | 08/04/81,14.90 100 | 09/04/81,15.90 101 | 10/04/81,13.00 102 | 11/04/81,7.60 103 | 12/04/81,11.50 104 | 13/04/81,13.50 105 | 14/04/81,13.00 106 | 15/04/81,13.30 107 | 16/04/81,12.10 108 | 17/04/81,12.40 109 | 18/04/81,13.20 110 | 19/04/81,13.80 111 | 20/04/81,10.60 112 | 21/04/81,9.00 113 | 22/04/81,10.00 114 | 23/04/81,9.80 115 | 24/04/81,11.50 116 | 25/04/81,8.90 117 | 26/04/81,7.40 118 | 27/04/81,9.90 119 | 28/04/81,9.30 120 | 29/04/81,9.90 121 | 30/04/81,7.40 122 | 01/05/81,8.60 123 | 02/05/81,11.90 124 | 03/05/81,14.00 125 | 04/05/81,8.60 126 | 05/05/81,10.00 127 | 06/05/81,13.50 128 | 07/05/81,12.00 129 | 08/05/81,10.50 130 | 09/05/81,10.70 131 | 10/05/81,8.10 132 | 11/05/81,10.10 133 | 12/05/81,10.60 134 | 13/05/81,5.30 135 | 14/05/81,6.60 136 | 15/05/81,8.50 137 | 16/05/81,11.20 138 | 17/05/81,9.80 139 | 18/05/81,5.90 140 | 19/05/81,3.20 141 | 20/05/81,2.10 142 | 21/05/81,3.40 143 | 22/05/81,5.40 144 | 23/05/81,9.60 145 | 24/05/81,11.50 146 | 25/05/81,12.30 147 | 26/05/81,12.60 148 | 27/05/81,11.00 149 | 28/05/81,11.20 150 | 29/05/81,11.40 151 | 30/05/81,11.80 152 | 31/05/81,12.80 153 | 01/06/81,11.60 154 | 02/06/81,10.60 155 | 03/06/81,9.80 156 | 04/06/81,11.20 157 | 05/06/81,5.70 158 | 06/06/81,7.10 159 | 07/06/81,2.50 160 | 08/06/81,3.50 161 | 09/06/81,4.60 162 | 10/06/81,11.00 163 | 11/06/81,5.70 164 | 12/06/81,7.70 165 | 13/06/81,10.40 166 | 14/06/81,11.40 167 | 15/06/81,9.20 168 | 16/06/81,6.10 169 | 17/06/81,2.70 170 | 18/06/81,4.30 171 | 19/06/81,6.30 172 | 20/06/81,3.80 173 | 21/06/81,4.40 174 | 22/06/81,7.10 175 | 23/06/81,4.80 176 | 24/06/81,5.80 177 | 25/06/81,6.20 178 | 26/06/81,7.30 179 | 27/06/81,9.20 180 | 28/06/81,10.20 181 | 29/06/81,9.50 182 | 30/06/81,9.50 183 | -------------------------------------------------------------------------------- /data/exercise_sample.csv: -------------------------------------------------------------------------------- 1 | x,value 2 | 2018-11-01T0-10-1,-0.803908052 3 | 2018-11-02T3-45-3,1.586827482 4 | 2018-11-03T0-34-10,0.224111754 5 | 2018-11-01T0-10-1,-0.803908052 6 | 2018-11-02T3-45-3,1.586827482 7 | 2018-11-03T0-34-10,0.224111754 8 | 2018-11-04T11-3-4,-0.715936787 -------------------------------------------------------------------------------- /data/gdp_india.csv: -------------------------------------------------------------------------------- 1 | Country Name,Country Code,Indicator Name,Indicator Code,Attribute,year,value 2 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1951,25.9378 3 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1952,26.90571429 4 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1953,25.90192308 5 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1954,25.81030928 6 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1955,27.7407767 7 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1956,26.38644068 8 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1957,28.75166667 9 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1958,30.16119403 10 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1959,34.115 11 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1960,36.46 12 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1961,38.69375 13 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1962,36.16648649 14 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1963,35.49433962 15 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1964,33.77056452 16 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1965,35.99348659 17 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1966,35.99348659 18 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1967,38.50895954 19 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1968,40.27629428 20 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1969,38.7700495 21 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1970,38.14814815 22 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1971,37.55053996 23 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1972,37.16431373 24 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1973,32.69516129 25 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1974,36.66120219 26 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1975,38.60532995 27 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1976,36.2426384 28 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1977,38.60561915 29 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1978,38.34932821 30 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1979,40.85664336 31 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1980,41.25 32 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1981,40.40050063 33 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1982,41.98764739 34 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1983,38.97453213 35 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1984,40.87122449 36 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1985,35.8421 37 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1986,39.0676 38 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1987,41.4087 39 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1988,41.9725 40 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1989,47.1218 41 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1990,49.90853192 42 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1991,75.33224179 43 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1992,77.40538509 44 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1993,76.97819715 45 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1994,73.46440213 46 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1995,69.65384282 47 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1996,65.97664655 48 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1997,67.8180393 49 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1998,68.0898994 50 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1999,70.03924264 51 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2000,73.64899508 52 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2001,78.72846968 53 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2002,82.84996788 54 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2003,84.24303728 55 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2004,83.28859967 56 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2005,80.89367746 57 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2006,77.10831335 58 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2007,74.02651796 59 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2008,74.53613636 60 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2009,72.52660659 -------------------------------------------------------------------------------- /data/reliance_data_day.csv: -------------------------------------------------------------------------------- 1 | date,high,low,open,close,volume 2 | 2018-01-01 00:00:00+05:30,922.7,907.5,922.7,909.75,4321686 3 | 2018-01-02 00:00:00+05:30,919.55,906.4,913,911.15,4342815 4 | 2018-01-03 00:00:00+05:30,926,913.05,925,914.8,6175312 5 | 2018-01-04 00:00:00+05:30,921.8,915.7,918.15,920.3,4118581 6 | 2018-01-05 00:00:00+05:30,926.9,920.25,921.8,923.25,3401905 7 | 2018-01-08 00:00:00+05:30,931,923.5,926.1,928.55,4035417 8 | 2018-01-09 00:00:00+05:30,943.9,924,928.15,940.95,6534997 9 | 2018-01-10 00:00:00+05:30,947.4,935.5,943,942.35,5361502 10 | 2018-01-11 00:00:00+05:30,942.65,935,941.8,937.75,3588727 11 | 2018-01-12 00:00:00+05:30,952.8,938.25,943,949,6890028 12 | 2018-01-15 00:00:00+05:30,958.5,945.2,950,949.15,5084113 13 | 2018-01-16 00:00:00+05:30,947.7,920,947.7,922.95,4948895 14 | 2018-01-17 00:00:00+05:30,929,907,926,924.5,6036432 15 | 2018-01-18 00:00:00+05:30,929.6,915.1,929,919.7,4289053 16 | 2018-01-19 00:00:00+05:30,934.4,922.1,923,931.3,4559564 17 | 2018-01-22 00:00:00+05:30,974.3,940,948,971.5,20892838 18 | 2018-01-23 00:00:00+05:30,990.95,975,975,983.25,10298500 19 | 2018-01-24 00:00:00+05:30,981.75,962.65,981.75,966.35,6532672 20 | 2018-01-25 00:00:00+05:30,972,956.15,967.9,965.9,6249419 21 | 2018-01-29 00:00:00+05:30,977.3,959.3,966.2,964.5,4524605 22 | 2018-01-30 00:00:00+05:30,965.8,949,965.8,950.4,4198139 23 | 2018-01-31 00:00:00+05:30,964.5,941.55,950,961.3,5738209 24 | 2018-02-01 00:00:00+05:30,972.6,936.6,963.25,943.85,7554587 25 | 2018-02-02 00:00:00+05:30,943.95,901,935,904.35,13242095 26 | 2018-02-05 00:00:00+05:30,912.5,881.8,894,902.15,8773525 27 | 2018-02-06 00:00:00+05:30,898,871,877,892.1,11000567 28 | 2018-02-07 00:00:00+05:30,905.4,890,902.55,894.4,7100838 29 | 2018-02-08 00:00:00+05:30,910.65,892.35,897.9,904.55,6320734 30 | 2018-02-09 00:00:00+05:30,902,886,889,897.85,5294279 31 | 2018-02-12 00:00:00+05:30,918.2,902.05,903,915.5,5429792 32 | 2018-02-14 00:00:00+05:30,942.5,921,923,933.65,10170959 33 | 2018-02-15 00:00:00+05:30,945.75,928.15,937.65,935.65,6109454 34 | 2018-02-16 00:00:00+05:30,942.6,918.1,939.6,921.3,4401672 35 | 2018-02-19 00:00:00+05:30,932,913.55,921,927,4087936 36 | 2018-02-20 00:00:00+05:30,929.55,918.8,929,921.1,3866919 37 | 2018-02-21 00:00:00+05:30,932.5,922.15,930,929.35,5448230 38 | 2018-02-22 00:00:00+05:30,931,920,922,924.35,5118850 39 | 2018-02-23 00:00:00+05:30,938,922.75,925.4,934.25,4850839 40 | 2018-02-26 00:00:00+05:30,944.9,936.6,939,938.7,3856478 41 | 2018-02-27 00:00:00+05:30,955.8,938.35,939,950.5,6410097 42 | 2018-02-28 00:00:00+05:30,957.1,943.2,945.95,954.55,25898694 43 | 2018-03-01 00:00:00+05:30,959.9,945.95,949.75,948.4,4094652 44 | 2018-03-05 00:00:00+05:30,950.15,921.6,943,924.1,5634893 45 | 2018-03-06 00:00:00+05:30,936.2,905.1,929,910.85,7323680 46 | 2018-03-07 00:00:00+05:30,909.25,888.05,909.25,889.75,7247708 47 | 2018-03-08 00:00:00+05:30,914.5,893.3,897.65,911.45,6072983 48 | 2018-03-09 00:00:00+05:30,919.35,911.05,914.9,913.1,4863577 49 | 2018-03-12 00:00:00+05:30,933.1,917.55,920,930.3,3801407 50 | 2018-03-13 00:00:00+05:30,936,921.35,931.9,931.85,5096825 51 | 2018-03-14 00:00:00+05:30,932.05,919.2,928.8,929.05,6731530 52 | 2018-03-15 00:00:00+05:30,929.45,910,925,911.8,5811424 53 | 2018-03-16 00:00:00+05:30,909.7,886.05,909,900.05,14087564 54 | 2018-03-19 00:00:00+05:30,906.9,892,901,895.5,5774392 55 | 2018-03-20 00:00:00+05:30,904,887.05,893.3,889.8,6945721 56 | 2018-03-21 00:00:00+05:30,902.35,893.3,894.85,896.9,6727261 57 | 2018-03-22 00:00:00+05:30,911.95,895.75,896,908.15,10116965 58 | 2018-03-23 00:00:00+05:30,900,889.1,890.6,893.9,10729694 59 | 2018-03-26 00:00:00+05:30,904.3,892.5,893.95,901.1,6047262 60 | 2018-03-27 00:00:00+05:30,910.75,897,907.2,899.8,6939577 61 | 2018-03-28 00:00:00+05:30,895.85,880,895.8,882.7,8759586 62 | 2018-04-02 00:00:00+05:30,900.95,886.5,893,892.95,5712065 63 | 2018-04-03 00:00:00+05:30,901.7,885.25,891,899.55,6364728 64 | 2018-04-04 00:00:00+05:30,911.8,892.05,904.7,894.9,6498407 65 | 2018-04-05 00:00:00+05:30,910,903.5,905.1,908.2,3898676 66 | 2018-04-06 00:00:00+05:30,918.5,905,908,911,4418462 67 | 2018-04-09 00:00:00+05:30,919,912.55,912.55,916,3572251 68 | 2018-04-10 00:00:00+05:30,923.9,914.15,918.4,917.05,3865402 69 | 2018-04-11 00:00:00+05:30,932.75,916.05,921.8,930.85,6007539 70 | 2018-04-12 00:00:00+05:30,936.65,924.2,929.85,928.7,4947725 71 | 2018-04-13 00:00:00+05:30,941.7,928.7,932.7,938.85,5767984 72 | 2018-04-16 00:00:00+05:30,940,928.5,934.45,937.05,4084450 73 | 2018-04-17 00:00:00+05:30,947.2,936,940,944.25,4388713 74 | 2018-04-18 00:00:00+05:30,948.2,935.05,945.25,938,3365196 75 | 2018-04-19 00:00:00+05:30,945,935,939.1,942.3,4481413 76 | 2018-04-20 00:00:00+05:30,941.85,921.2,938,927.9,3349519 77 | 2018-04-23 00:00:00+05:30,944.35,928.5,930,936,3508515 78 | 2018-04-24 00:00:00+05:30,975.7,935,935.8,970.05,9107264 79 | 2018-04-25 00:00:00+05:30,981.8,963.55,973.65,970.5,6436620 80 | 2018-04-26 00:00:00+05:30,988,962.25,973,975.35,15588001 81 | 2018-04-27 00:00:00+05:30,1011,985.5,989.8,996.3,11666267 82 | 2018-04-30 00:00:00+05:30,992.6,960,982,963.3,8871802 83 | 2018-05-02 00:00:00+05:30,979.2,965.05,967,972.7,6117761 84 | 2018-05-03 00:00:00+05:30,977.9,959.25,977.9,961.6,3947546 85 | 2018-05-04 00:00:00+05:30,964.7,950,962.85,953.95,3795692 86 | 2018-05-07 00:00:00+05:30,973.25,956.35,958.85,971,3512532 87 | 2018-05-08 00:00:00+05:30,982,965.2,977.5,967.1,4934036 88 | 2018-05-09 00:00:00+05:30,984.3,962.25,965.9,976.7,4494423 89 | 2018-05-10 00:00:00+05:30,984.6,974.25,980,981.15,4014645 90 | 2018-05-11 00:00:00+05:30,991.3,977.75,981.25,989.25,4011516 91 | 2018-05-14 00:00:00+05:30,991.95,976.7,991.95,986.6,2579561 92 | 2018-05-15 00:00:00+05:30,1000,976.65,983.05,978.95,4800923 93 | 2018-05-16 00:00:00+05:30,974,952.45,974,956.15,8613363 94 | 2018-05-17 00:00:00+05:30,960,941.5,959.35,945.35,4733477 95 | 2018-05-18 00:00:00+05:30,953.5,932,945.6,933.85,5230037 96 | 2018-05-21 00:00:00+05:30,935.7,922.1,930.75,931.35,6624893 97 | 2018-05-22 00:00:00+05:30,934,925,933.15,926.75,5170585 98 | 2018-05-23 00:00:00+05:30,926,909.15,926,912.25,4567562 99 | 2018-05-24 00:00:00+05:30,921,910.15,914,915.85,5317983 100 | 2018-05-25 00:00:00+05:30,923.4,914.45,916,920.8,8129520 101 | 2018-05-28 00:00:00+05:30,927,918.1,924.9,920.4,5197835 102 | 2018-05-29 00:00:00+05:30,928,914.4,921.5,917.45,8173396 103 | 2018-05-30 00:00:00+05:30,923.2,906.6,914.85,916.1,5991222 104 | 2018-05-31 00:00:00+05:30,922.3,914.55,921,921.35,16951801 105 | 2018-06-01 00:00:00+05:30,935.75,918.45,920,930.55,8765835 106 | 2018-06-04 00:00:00+05:30,944,930.55,933.4,940.85,5600357 107 | 2018-06-05 00:00:00+05:30,955,941,942.8,947.65,7971524 108 | 2018-06-06 00:00:00+05:30,956,942.65,947,954.4,4189483 109 | 2018-06-07 00:00:00+05:30,976,954.45,954.9,971.35,6857300 110 | 2018-06-08 00:00:00+05:30,986.55,960.6,966.35,984.35,6532599 111 | 2018-06-11 00:00:00+05:30,999.95,979.05,987.1,982.65,6866053 112 | 2018-06-12 00:00:00+05:30,1000,980.3,983.4,997.15,7445221 113 | 2018-06-13 00:00:00+05:30,1005.9,989.5,1000,1002.5,5926020 114 | 2018-06-14 00:00:00+05:30,1011.8,987.4,997.45,1007.95,5484867 115 | 2018-06-15 00:00:00+05:30,1023.5,999.25,1006,1014.2,11294702 116 | 2018-06-18 00:00:00+05:30,1020.45,1007,1008.8,1015.55,5652374 117 | 2018-06-19 00:00:00+05:30,1014.8,993.65,1013.9,996.05,6346101 118 | 2018-06-20 00:00:00+05:30,1022.9,994.85,996,1019.25,7193824 119 | 2018-06-21 00:00:00+05:30,1036,1021.3,1023.45,1031.95,10509756 120 | 2018-06-22 00:00:00+05:30,1029.2,1008.45,1028.4,1012.5,16169978 121 | 2018-06-25 00:00:00+05:30,1016.5,1002.3,1011.4,1004.45,5197694 122 | 2018-06-26 00:00:00+05:30,1006,975.25,1006,978.95,8068585 123 | 2018-06-27 00:00:00+05:30,985.4,962.7,978.5,965.85,8958555 124 | 2018-06-28 00:00:00+05:30,968.5,940.3,962,944.95,8581234 125 | 2018-06-29 00:00:00+05:30,975.5,949,949.1,972.45,7966401 126 | 2018-07-02 00:00:00+05:30,972.35,957,972.35,960.6,6660916 127 | 2018-07-03 00:00:00+05:30,978.45,960.35,965,971.3,5067261 128 | 2018-07-04 00:00:00+05:30,992,971,971.8,990.1,5000724 129 | 2018-07-05 00:00:00+05:30,1008.4,958.6,997,964.5,18719353 130 | 2018-07-06 00:00:00+05:30,983,964.5,964.75,977.55,9161266 131 | 2018-07-09 00:00:00+05:30,1000,983.5,987.1,997.1,5170189 132 | 2018-07-10 00:00:00+05:30,1028.5,1000,1002.75,1025.7,10444513 133 | 2018-07-11 00:00:00+05:30,1040.9,1017.85,1025,1038.8,8665072 134 | 2018-07-12 00:00:00+05:30,1099,1042.6,1044.35,1082.35,24109297 135 | 2018-07-13 00:00:00+05:30,1109,1080,1080.5,1099.8,17967282 136 | 2018-07-16 00:00:00+05:30,1108.9,1073.05,1099.8,1076.2,6595781 137 | 2018-07-17 00:00:00+05:30,1095.95,1069.6,1078.3,1092.35,6469256 138 | 2018-07-18 00:00:00+05:30,1104.6,1077.3,1098.4,1092.55,4682099 139 | 2018-07-19 00:00:00+05:30,1115,1086.35,1093.3,1104.85,5433617 140 | 2018-07-20 00:00:00+05:30,1138.5,1111.9,1113.4,1128.65,8018150 141 | 2018-07-23 00:00:00+05:30,1129.8,1113.1,1129.8,1120.3,5842496 142 | 2018-07-24 00:00:00+05:30,1126.75,1107.4,1122,1110.95,4469144 143 | 2018-07-25 00:00:00+05:30,1123,1104.2,1110,1115.15,5949680 144 | 2018-07-26 00:00:00+05:30,1121.5,1107.8,1110.1,1110.65,6424867 145 | 2018-07-27 00:00:00+05:30,1137.4,1112.15,1119.5,1129.85,6679468 146 | 2018-07-30 00:00:00+05:30,1157.3,1128,1130,1151.4,13872467 147 | 2018-07-31 00:00:00+05:30,1190.4,1147.15,1151,1186,13875377 148 | 2018-08-01 00:00:00+05:30,1202.9,1185.4,1190.4,1192.35,9704295 149 | 2018-08-02 00:00:00+05:30,1191.5,1165.95,1191.5,1168.35,6248975 150 | 2018-08-03 00:00:00+05:30,1183.5,1169.55,1175,1176.95,6645397 151 | 2018-08-06 00:00:00+05:30,1196,1175.35,1176.7,1192.6,4016418 152 | 2018-08-07 00:00:00+05:30,1201.4,1177,1194.9,1184.2,4059460 153 | 2018-08-08 00:00:00+05:30,1222.5,1185,1185,1217.7,7912437 154 | 2018-08-09 00:00:00+05:30,1231.7,1212,1221.5,1217.95,6846928 155 | 2018-08-10 00:00:00+05:30,1215,1202.3,1211,1204.2,3581210 156 | 2018-08-13 00:00:00+05:30,1199.95,1182.7,1197.55,1187.7,4668159 157 | 2018-08-14 00:00:00+05:30,1217.2,1184.3,1190,1210.6,5299536 158 | 2018-08-16 00:00:00+05:30,1216,1195.5,1203.25,1200.8,5982490 159 | 2018-08-17 00:00:00+05:30,1215.4,1200.05,1209,1203.75,3830992 160 | 2018-08-20 00:00:00+05:30,1236.8,1207.65,1207.9,1234.9,7607036 161 | 2018-08-21 00:00:00+05:30,1251.5,1227,1237.25,1247.2,7279641 162 | 2018-08-23 00:00:00+05:30,1274,1238.4,1246.3,1269.45,7968123 163 | 2018-08-24 00:00:00+05:30,1279.95,1260.05,1265.25,1277.5,6652005 164 | 2018-08-27 00:00:00+05:30,1295.85,1278.75,1279.4,1291.5,5572177 165 | 2018-08-28 00:00:00+05:30,1323,1295,1296,1319,8694751 166 | 2018-08-29 00:00:00+05:30,1329,1290.2,1316.3,1294.2,11077203 167 | 2018-08-30 00:00:00+05:30,1297.15,1270.45,1293.6,1274.45,9850042 168 | 2018-08-31 00:00:00+05:30,1270,1236.6,1270,1241.65,15305857 169 | 2018-09-03 00:00:00+05:30,1257.9,1226.05,1245,1229.15,12200022 170 | 2018-09-04 00:00:00+05:30,1254.8,1229,1235.15,1241.95,8838770 171 | 2018-09-05 00:00:00+05:30,1252.65,1208.5,1242.5,1227,10324923 172 | 2018-09-06 00:00:00+05:30,1265.7,1225.35,1230.15,1261.4,11195043 173 | 2018-09-07 00:00:00+05:30,1281.35,1254.5,1264.9,1278.6,10692787 174 | 2018-09-10 00:00:00+05:30,1277,1250.1,1275,1255.85,7059008 175 | 2018-09-11 00:00:00+05:30,1268.4,1234.25,1259,1237.7,10874768 176 | 2018-09-12 00:00:00+05:30,1258.7,1228.85,1251.8,1252.5,8065553 177 | 2018-09-14 00:00:00+05:30,1275.6,1247.65,1259.4,1253.15,7878583 178 | 2018-09-17 00:00:00+05:30,1250.75,1221.6,1250.75,1225.9,6754086 179 | 2018-09-18 00:00:00+05:30,1239.4,1214.35,1226,1217.15,7616412 180 | 2018-09-19 00:00:00+05:30,1224,1201,1224,1210.75,11265786 181 | 2018-09-21 00:00:00+05:30,1237.4,1183.5,1219,1217.5,19070172 182 | 2018-09-24 00:00:00+05:30,1239.9,1204.5,1214.45,1232.05,10102589 183 | 2018-09-25 00:00:00+05:30,1245,1195,1235.5,1230.6,9715020 184 | 2018-09-26 00:00:00+05:30,1253.95,1227.15,1239,1251.4,6396828 185 | 2018-09-27 00:00:00+05:30,1263,1237.5,1255,1253.75,10808749 186 | 2018-09-28 00:00:00+05:30,1271.7,1247.3,1259.9,1257.95,11073023 187 | 2018-10-01 00:00:00+05:30,1257.7,1204,1257.7,1231.7,7332693 188 | 2018-10-03 00:00:00+05:30,1239.6,1200.05,1229.5,1205.1,7376575 189 | 2018-10-04 00:00:00+05:30,1190.4,1106,1189.5,1122.25,23212443 190 | 2018-10-05 00:00:00+05:30,1115.3,1038.15,1099.5,1048.85,21943190 191 | 2018-10-08 00:00:00+05:30,1117,1025.55,1050,1109.4,24679428 192 | 2018-10-09 00:00:00+05:30,1123.3,1081.2,1121.65,1090.05,10991196 193 | 2018-10-10 00:00:00+05:30,1115.85,1084.3,1093.9,1102.1,9502176 194 | 2018-10-11 00:00:00+05:30,1114.75,1043,1064,1087.8,13224172 195 | 2018-10-12 00:00:00+05:30,1129,1101.9,1104.95,1126.55,9058536 196 | 2018-10-15 00:00:00+05:30,1146.9,1118.15,1129.9,1139.75,8871776 197 | 2018-10-16 00:00:00+05:30,1167.4,1140,1146,1163.8,7478601 198 | 2018-10-17 00:00:00+05:30,1179.35,1140,1179,1151.3,12785646 199 | 2018-10-19 00:00:00+05:30,1111,1070.25,1100,1101.3,25421229 200 | 2018-10-22 00:00:00+05:30,1114.8,1057.35,1110,1062.65,13165542 201 | 2018-10-23 00:00:00+05:30,1064.7,1043.65,1052.75,1054.7,10573730 202 | 2018-10-24 00:00:00+05:30,1075.95,1031.4,1074.9,1045.75,13655337 203 | 2018-10-25 00:00:00+05:30,1040.65,1016.4,1036,1030.8,14096237 204 | 2018-10-26 00:00:00+05:30,1057,1022,1032.9,1044.9,9182613 205 | 2018-10-29 00:00:00+05:30,1093.85,1046,1046,1087.8,7671807 206 | 2018-10-30 00:00:00+05:30,1085.5,1052.1,1085.5,1057,8806701 207 | 2018-10-31 00:00:00+05:30,1065.85,1047,1059,1061.25,16457691 208 | 2018-11-01 00:00:00+05:30,1067.9,1050,1067.9,1056.25,9396027 209 | 2018-11-02 00:00:00+05:30,1081,1063.15,1067,1074.9,16796200 210 | 2018-11-05 00:00:00+05:30,1092.7,1065,1073.45,1090.3,7582518 211 | 2018-11-06 00:00:00+05:30,1112,1091.6,1094,1104.1,8634266 212 | 2018-11-07 00:00:00+05:30,1115,1106.7,1115,1110.7,1283997 213 | 2018-11-09 00:00:00+05:30,1113.8,1085,1106.25,1093.45,10840462 214 | 2018-11-12 00:00:00+05:30,1098.8,1074.55,1097.7,1080,5801720 215 | 2018-11-13 00:00:00+05:30,1103,1070.05,1074,1099.45,6811598 216 | 2018-11-14 00:00:00+05:30,1112.65,1085.5,1105.05,1097.95,7787530 217 | 2018-11-15 00:00:00+05:30,1103,1086.5,1096.95,1096.85,6831740 218 | 2018-11-16 00:00:00+05:30,1129.95,1097.1,1097.95,1127.4,11031889 219 | 2018-11-19 00:00:00+05:30,1151.7,1131,1132.9,1150,7308981 220 | 2018-11-20 00:00:00+05:30,1155.9,1130,1145,1137.4,5719901 221 | 2018-11-21 00:00:00+05:30,1145.5,1107.05,1137.5,1112.9,7507812 222 | 2018-11-22 00:00:00+05:30,1125.35,1100.1,1114.5,1102.85,5792568 223 | 2018-11-26 00:00:00+05:30,1117.5,1097.4,1109,1109.75,6386556 224 | 2018-11-27 00:00:00+05:30,1130,1105.45,1107.9,1128.2,6810228 225 | 2018-11-28 00:00:00+05:30,1157.75,1126.3,1132.45,1152.65,8003964 226 | 2018-11-29 00:00:00+05:30,1179.9,1160,1161,1168.5,12636169 227 | 2018-11-30 00:00:00+05:30,1186,1162.3,1172.3,1167.55,10717448 228 | 2018-12-03 00:00:00+05:30,1174.9,1148.5,1174.9,1156.45,5569578 229 | 2018-12-04 00:00:00+05:30,1158,1142.2,1158,1152.3,5192825 230 | 2018-12-05 00:00:00+05:30,1158.8,1139.3,1143.75,1155.15,5819618 231 | 2018-12-06 00:00:00+05:30,1149,1118.25,1149,1123.65,5866108 232 | 2018-12-07 00:00:00+05:30,1136.95,1108.35,1129.45,1133.8,7214078 233 | 2018-12-10 00:00:00+05:30,1114.3,1083.1,1110,1090.25,9381544 234 | 2018-12-11 00:00:00+05:30,1102.1,1055,1066,1097.55,13494665 235 | 2018-12-12 00:00:00+05:30,1113.5,1087.75,1099,1110.5,12605431 236 | 2018-12-13 00:00:00+05:30,1128,1095.05,1128,1107.05,9297518 237 | 2018-12-14 00:00:00+05:30,1114.9,1091.8,1103.1,1112.2,7221324 238 | 2018-12-17 00:00:00+05:30,1133,1111.5,1115.95,1129.9,5064357 239 | 2018-12-18 00:00:00+05:30,1139,1120.5,1123.3,1136.4,5870928 240 | 2018-12-19 00:00:00+05:30,1147.3,1133.2,1141.5,1136.8,7368799 241 | 2018-12-20 00:00:00+05:30,1132.5,1116.15,1121,1128.45,5764731 242 | 2018-12-21 00:00:00+05:30,1131.25,1096,1125,1100.2,8281941 243 | 2018-12-24 00:00:00+05:30,1103.2,1086.55,1100.9,1089.15,4144535 244 | 2018-12-26 00:00:00+05:30,1100,1065.65,1085.8,1098.35,7210059 245 | 2018-12-27 00:00:00+05:30,1127,1104,1107.2,1120.2,12478000 246 | 2018-12-28 00:00:00+05:30,1135.25,1121.2,1125.5,1125.55,7666016 247 | 2018-12-31 00:00:00+05:30,1133.85,1116.2,1130.95,1121.25,7222817 -------------------------------------------------------------------------------- /data/sales.csv: -------------------------------------------------------------------------------- 1 | "Month","Sales" 2 | "2001-01",266.0 3 | "2001-02",145.9 4 | "2001-03",183.1 5 | "2001-04",119.3 6 | "2001-05",180.3 7 | "2001-06",168.5 8 | "2001-07",231.8 9 | "2001-08",224.5 10 | "2001-09",192.8 11 | "2001-10",122.9 12 | "2001-11",336.5 13 | "2001-12",185.9 14 | "2002-01",194.3 15 | "2002-02",149.5 16 | "2002-03",210.1 17 | "2002-04",273.3 18 | "2002-05",191.4 19 | "2002-06",287.0 20 | "2002-07",226.0 21 | "2002-08",303.6 22 | "2002-09",289.9 23 | "2002-10",421.6 24 | "2002-11",264.5 25 | "2002-12",342.3 26 | "2003-01",339.7 27 | "2003-02",440.4 28 | "2003-03",315.9 29 | "2003-04",439.3 30 | "2003-05",401.3 31 | "2003-06",437.4 32 | "2003-07",575.5 33 | "2003-08",407.6 34 | "2003-09",682.0 35 | "2003-10",475.3 36 | "2003-11",581.3 37 | "2003-12",646.9 -------------------------------------------------------------------------------- /data/sample.csv: -------------------------------------------------------------------------------- 1 | x,random 2 | 2018-11-01T0-10-1,-0.803908052 3 | 2018-11-02T3-45-3,1.586827482 4 | 2018-11-03T0-34-10,0.224111754 5 | 2018-11-04T11-3-4,-0.715936787 -------------------------------------------------------------------------------- /data/sample_2.csv: -------------------------------------------------------------------------------- 1 | year,month,day,random 2 | 2018,11,1,-0.803908052 3 | 2018,11,2,1.586827482 4 | 2018,11,3,0.224111754 5 | 2018,11,4,-0.715936787 6 | 2018,11,5,0.010597412 7 | 2018,11,6,0.782264074 8 | 2018,11,7,-1.046751639 9 | 2018,11,8,0.825305985 10 | 2018,11,9,-0.658712868 11 | 2018,11,10,0.226168727 12 | 2018,11,11,-0.569545596 13 | 2018,11,12,1.591374293 14 | 2018,11,13,0.470490328 15 | 2018,11,14,-0.363814613 16 | 2018,11,15,-1.70058898 17 | 2018,11,16,0.994420266 18 | 2018,11,17,0.26923845 19 | 2018,11,18,-0.946182168 20 | 2018,11,19,0.753910186 21 | 2018,11,20,1.895683364 22 | 2018,11,21,-1.653269029 23 | 2018,11,22,1.398829468 24 | 2018,11,23,0.064970122 25 | 2018,11,24,-0.579068492 26 | 2018,11,25,0.761552316 27 | 2018,11,26,-0.859265507 28 | 2018,11,27,-0.544670526 29 | 2018,11,28,0.020763882 30 | 2018,11,29,-0.266145079 31 | 2018,11,30,1.298599805 -------------------------------------------------------------------------------- /data/yesbank_data_day.csv: -------------------------------------------------------------------------------- 1 | date,high,low,open,close,volume 2 | 2018-01-01 00:00:00+05:30,317.75,311.3,315.5,312.6,4019878 3 | 2018-01-02 00:00:00+05:30,314,307.15,313.4,311.65,5224976 4 | 2018-01-03 00:00:00+05:30,316.5,311.15,312,315.85,5672263 5 | 2018-01-04 00:00:00+05:30,318.4,313,316,317.1,5667580 6 | 2018-01-05 00:00:00+05:30,337.9,317.45,317.5,332.85,30720675 7 | 2018-01-08 00:00:00+05:30,341.3,331.3,336,333.6,12747890 8 | 2018-01-09 00:00:00+05:30,342.8,327.55,334.9,341.35,13282560 9 | 2018-01-10 00:00:00+05:30,342.35,335.45,341.5,339.8,10385044 10 | 2018-01-11 00:00:00+05:30,344.25,335.3,339,343.15,8266266 11 | 2018-01-12 00:00:00+05:30,344.7,337.55,344.1,340.9,5688676 12 | 2018-01-15 00:00:00+05:30,343.7,335.1,341.9,336,7142164 13 | 2018-01-16 00:00:00+05:30,338.75,328,336,334.85,7296505 14 | 2018-01-17 00:00:00+05:30,343.5,331.4,335.1,342.4,7985222 15 | 2018-01-18 00:00:00+05:30,356.9,332.35,350,341.2,35465087 16 | 2018-01-19 00:00:00+05:30,352.25,339.1,347.5,349.35,21425789 17 | 2018-01-22 00:00:00+05:30,358.25,348.75,349.95,355.35,13456538 18 | 2018-01-23 00:00:00+05:30,360.4,352.3,359.85,359.55,10196645 19 | 2018-01-24 00:00:00+05:30,366.3,356,357,364.8,11258771 20 | 2018-01-25 00:00:00+05:30,364.5,355.65,364.5,361.6,8963188 21 | 2018-01-29 00:00:00+05:30,363.7,355.55,361.2,358,7931235 22 | 2018-01-30 00:00:00+05:30,360.8,351.85,358,353.35,7890491 23 | 2018-01-31 00:00:00+05:30,356.55,350.45,353,354.4,8527044 24 | 2018-02-01 00:00:00+05:30,367.25,352.65,355,359.9,15217926 25 | 2018-02-02 00:00:00+05:30,356,341.8,354.2,349.05,16298953 26 | 2018-02-05 00:00:00+05:30,349,333.6,340,343.6,13407059 27 | 2018-02-06 00:00:00+05:30,342.9,324,325,338.75,12557261 28 | 2018-02-07 00:00:00+05:30,344,330.6,344,332.9,11681640 29 | 2018-02-08 00:00:00+05:30,340.35,331.5,332.9,335,7785799 30 | 2018-02-09 00:00:00+05:30,331.45,324,330,325.55,9395513 31 | 2018-02-12 00:00:00+05:30,337.2,326.6,326.6,335.4,12049356 32 | 2018-02-14 00:00:00+05:30,337.85,318.95,336,320.35,13548524 33 | 2018-02-15 00:00:00+05:30,328.8,317.7,321.2,319.8,15482667 34 | 2018-02-16 00:00:00+05:30,325,309.65,324,311.8,18611798 35 | 2018-02-19 00:00:00+05:30,315,307.55,313.85,312.05,9311433 36 | 2018-02-20 00:00:00+05:30,316.1,307.5,314.35,308.7,11389041 37 | 2018-02-21 00:00:00+05:30,313.9,304.5,311,312.35,13342678 38 | 2018-02-22 00:00:00+05:30,316.9,308.15,311.9,316.1,14787238 39 | 2018-02-23 00:00:00+05:30,326,316.45,316.45,323.45,11399732 40 | 2018-02-26 00:00:00+05:30,328.2,319.35,326.5,326.15,9225197 41 | 2018-02-27 00:00:00+05:30,334.25,325,325.3,327.15,15104405 42 | 2018-02-28 00:00:00+05:30,325.2,318.05,323,322.3,10985771 43 | 2018-03-01 00:00:00+05:30,326,318.35,322.1,321.05,7333939 44 | 2018-03-05 00:00:00+05:30,319,311.15,318.5,312.95,7973146 45 | 2018-03-06 00:00:00+05:30,320.5,309.85,317,312.15,8299126 46 | 2018-03-07 00:00:00+05:30,314.9,308,312,311.95,8420190 47 | 2018-03-08 00:00:00+05:30,313.25,294.7,312.3,308.55,24027679 48 | 2018-03-09 00:00:00+05:30,310.95,301.3,310,303.25,11596136 49 | 2018-03-12 00:00:00+05:30,314.5,301.35,305.2,311.15,12860205 50 | 2018-03-13 00:00:00+05:30,315.4,309.8,310,312.8,11436348 51 | 2018-03-14 00:00:00+05:30,321.9,308.2,311.8,318.85,12905495 52 | 2018-03-15 00:00:00+05:30,321,310.85,318.95,311.85,9999620 53 | 2018-03-16 00:00:00+05:30,316.85,310,312.4,312.9,17094363 54 | 2018-03-19 00:00:00+05:30,316.5,302.5,316,304.8,11267355 55 | 2018-03-20 00:00:00+05:30,305.8,299.7,303,302.4,9874137 56 | 2018-03-21 00:00:00+05:30,309.05,300.05,305,300.75,13383435 57 | 2018-03-22 00:00:00+05:30,303.7,295.75,301.8,298.25,19356591 58 | 2018-03-23 00:00:00+05:30,293.05,285,293,286.65,21617995 59 | 2018-03-26 00:00:00+05:30,304.9,286,286.5,303.35,24240033 60 | 2018-03-27 00:00:00+05:30,309.25,300.7,307,303.5,15267419 61 | 2018-03-28 00:00:00+05:30,307.5,299.1,300.15,304.85,14952643 62 | 2018-04-02 00:00:00+05:30,308.5,303.6,307,306.25,8844027 63 | 2018-04-03 00:00:00+05:30,314.2,304,306.45,313.1,10392997 64 | 2018-04-04 00:00:00+05:30,320,304,313,305.45,13516525 65 | 2018-04-05 00:00:00+05:30,315.6,307.8,311.8,313.05,11449637 66 | 2018-04-06 00:00:00+05:30,316,311.6,314.7,314.5,8598435 67 | 2018-04-09 00:00:00+05:30,319.9,314.4,316,316.25,14002415 68 | 2018-04-10 00:00:00+05:30,320.5,315,320.1,319.25,9976629 69 | 2018-04-11 00:00:00+05:30,320,310.5,320,312,9921987 70 | 2018-04-12 00:00:00+05:30,313.35,308,312.2,311.75,11359462 71 | 2018-04-13 00:00:00+05:30,314.45,305.7,312.6,309.55,12849690 72 | 2018-04-16 00:00:00+05:30,311.1,306.45,307,310.35,10808237 73 | 2018-04-17 00:00:00+05:30,313.35,307.25,311.9,307.9,12509310 74 | 2018-04-18 00:00:00+05:30,313.7,308.75,311,309.55,11323144 75 | 2018-04-19 00:00:00+05:30,319.5,309,311,318.5,13498117 76 | 2018-04-20 00:00:00+05:30,317.7,306.05,317.7,308.55,21800867 77 | 2018-04-23 00:00:00+05:30,317,309,310.9,313.05,10927239 78 | 2018-04-24 00:00:00+05:30,328.8,314,314,324,24392058 79 | 2018-04-25 00:00:00+05:30,328.5,317.45,323.75,325.25,15370686 80 | 2018-04-26 00:00:00+05:30,360.25,323.6,327.95,352.4,62364794 81 | 2018-04-27 00:00:00+05:30,369,345.1,365,348.9,52230752 82 | 2018-04-30 00:00:00+05:30,367.2,350.3,350.9,362,28400085 83 | 2018-05-02 00:00:00+05:30,364.7,351.7,361.95,354.25,13058846 84 | 2018-05-03 00:00:00+05:30,359.25,346.5,354,353.95,15431044 85 | 2018-05-04 00:00:00+05:30,357.75,344.2,354.5,345.1,12241946 86 | 2018-05-07 00:00:00+05:30,350,342.65,347.8,347.9,6781161 87 | 2018-05-08 00:00:00+05:30,353.95,340.5,350,343.3,11670852 88 | 2018-05-09 00:00:00+05:30,349.75,343.2,344.45,347.85,9802803 89 | 2018-05-10 00:00:00+05:30,350.95,342.65,350.95,344.65,7330619 90 | 2018-05-11 00:00:00+05:30,351.8,341.8,346.5,350.25,7642090 91 | 2018-05-14 00:00:00+05:30,353.8,343.1,350.8,344.8,7468232 92 | 2018-05-15 00:00:00+05:30,354.6,343.6,346,345.75,12805393 93 | 2018-05-16 00:00:00+05:30,356.45,342,344,349.75,12708109 94 | 2018-05-17 00:00:00+05:30,352,344,351.5,345.3,6702419 95 | 2018-05-18 00:00:00+05:30,348.8,343.1,346.5,345.5,8938377 96 | 2018-05-21 00:00:00+05:30,349.2,333.05,346,334.15,8748958 97 | 2018-05-22 00:00:00+05:30,340.75,331,334.1,334.7,10869535 98 | 2018-05-23 00:00:00+05:30,338.75,331.5,335,333.7,7738850 99 | 2018-05-24 00:00:00+05:30,337,325.6,336.7,330,8077461 100 | 2018-05-25 00:00:00+05:30,341.9,332,333,339.4,6957024 101 | 2018-05-28 00:00:00+05:30,348.25,340.75,341.5,345.2,8982939 102 | 2018-05-29 00:00:00+05:30,345.9,335.1,344.4,338.05,7870400 103 | 2018-05-30 00:00:00+05:30,344,332.65,335.75,343.15,10036644 104 | 2018-05-31 00:00:00+05:30,349.4,338,345.4,346.2,17910411 105 | 2018-06-01 00:00:00+05:30,348.7,340.25,347,342.7,9470256 106 | 2018-06-04 00:00:00+05:30,350.9,342,343.9,342.95,14501600 107 | 2018-06-05 00:00:00+05:30,342.75,333.65,342.5,336.75,8492155 108 | 2018-06-06 00:00:00+05:30,340.55,332.55,337.95,339.15,8211104 109 | 2018-06-07 00:00:00+05:30,345.5,338,341.2,339.55,9630695 110 | 2018-06-08 00:00:00+05:30,340.25,335.05,337.5,337.65,5851442 111 | 2018-06-11 00:00:00+05:30,344.85,334.7,338.6,335.7,11980621 112 | 2018-06-12 00:00:00+05:30,337.95,331.2,335.7,332.2,9851496 113 | 2018-06-13 00:00:00+05:30,336.9,331.5,336.9,333.2,7530350 114 | 2018-06-14 00:00:00+05:30,338.35,330.55,333,337.2,6819965 115 | 2018-06-15 00:00:00+05:30,336.2,328.65,333,330.65,8518379 116 | 2018-06-18 00:00:00+05:30,333.85,327.7,330.8,331.65,6785348 117 | 2018-06-19 00:00:00+05:30,334.5,328.5,330.95,330.9,8566372 118 | 2018-06-20 00:00:00+05:30,337,331.4,331.9,336.1,7720519 119 | 2018-06-21 00:00:00+05:30,337.6,331.2,336.8,332.95,5133865 120 | 2018-06-22 00:00:00+05:30,335.7,329.5,333.6,335.1,6006046 121 | 2018-06-25 00:00:00+05:30,338.95,332.9,334.75,334.15,5792052 122 | 2018-06-26 00:00:00+05:30,337.75,332.05,333.1,335.95,4706265 123 | 2018-06-27 00:00:00+05:30,337.75,331.05,336,335.6,6930662 124 | 2018-06-28 00:00:00+05:30,336.4,327.3,336.4,329.3,8130250 125 | 2018-06-29 00:00:00+05:30,341.75,329.65,330.7,339.65,8963268 126 | 2018-07-02 00:00:00+05:30,341.7,332.35,338.05,337.05,8091469 127 | 2018-07-03 00:00:00+05:30,340.5,335.3,337.7,336.9,4998778 128 | 2018-07-04 00:00:00+05:30,337.5,332.5,337,335.55,4507001 129 | 2018-07-05 00:00:00+05:30,350.8,338.6,338.75,348.65,21239763 130 | 2018-07-06 00:00:00+05:30,357.7,347,348.3,352.6,17790500 131 | 2018-07-09 00:00:00+05:30,366.9,355,356.1,363.3,16834621 132 | 2018-07-10 00:00:00+05:30,373.1,360,363.9,371.4,20404168 133 | 2018-07-11 00:00:00+05:30,373.35,367.45,370.95,371.6,9423127 134 | 2018-07-12 00:00:00+05:30,385,372.5,372.5,374.8,17868601 135 | 2018-07-13 00:00:00+05:30,380.3,367.3,376.9,376,15448753 136 | 2018-07-16 00:00:00+05:30,380.8,369.6,376,378.5,9093938 137 | 2018-07-17 00:00:00+05:30,385.9,375.6,377.9,380.8,12586691 138 | 2018-07-18 00:00:00+05:30,388,378,381.9,383.65,10472260 139 | 2018-07-19 00:00:00+05:30,394.35,379.4,380.95,392.3,17050256 140 | 2018-07-20 00:00:00+05:30,392.5,385.3,391.1,386.5,10076070 141 | 2018-07-23 00:00:00+05:30,392.35,382.75,387.9,386.6,9511403 142 | 2018-07-24 00:00:00+05:30,390,382.7,389.2,384.35,9785989 143 | 2018-07-25 00:00:00+05:30,392.7,381.5,386,382.9,12467278 144 | 2018-07-26 00:00:00+05:30,386.3,358.55,385,369.75,35251243 145 | 2018-07-27 00:00:00+05:30,374.9,362.4,374.9,370,17100821 146 | 2018-07-30 00:00:00+05:30,373,365.25,372,369.3,11469162 147 | 2018-07-31 00:00:00+05:30,374.7,366.4,370,367.95,9734450 148 | 2018-08-01 00:00:00+05:30,371.55,362,369.9,365.85,7742871 149 | 2018-08-02 00:00:00+05:30,365.65,356.65,364.95,361.8,8365895 150 | 2018-08-03 00:00:00+05:30,374,364,364,373.2,10625834 151 | 2018-08-06 00:00:00+05:30,380.7,373.45,374,377.9,12018227 152 | 2018-08-07 00:00:00+05:30,386.4,376.5,380,381.65,13526041 153 | 2018-08-08 00:00:00+05:30,385.75,379.55,382.6,382.75,6767532 154 | 2018-08-09 00:00:00+05:30,384.7,377.65,383.5,381.05,5854134 155 | 2018-08-10 00:00:00+05:30,391,381,382,382.85,13916077 156 | 2018-08-13 00:00:00+05:30,383,368.3,379,370.7,9205915 157 | 2018-08-14 00:00:00+05:30,384.3,368.15,369,382.5,12351125 158 | 2018-08-16 00:00:00+05:30,384.85,374.2,381.2,378.45,11567658 159 | 2018-08-17 00:00:00+05:30,395.65,380.3,382.4,393.2,16094026 160 | 2018-08-20 00:00:00+05:30,404,392.55,396.8,394,15247130 161 | 2018-08-21 00:00:00+05:30,397.45,389.5,396,391.35,9991245 162 | 2018-08-23 00:00:00+05:30,395.5,383.35,395.25,388.6,9262319 163 | 2018-08-24 00:00:00+05:30,388.7,372.85,387,374.2,16301075 164 | 2018-08-27 00:00:00+05:30,384.65,377,378,383,9563369 165 | 2018-08-28 00:00:00+05:30,386,362.8,386,370.6,26309238 166 | 2018-08-29 00:00:00+05:30,378.55,362,373.65,365.15,30544751 167 | 2018-08-30 00:00:00+05:30,369,358,369,361.75,28157288 168 | 2018-08-31 00:00:00+05:30,350,336.25,343,343.5,84019022 169 | 2018-09-03 00:00:00+05:30,348,337.2,347.95,339.05,26137760 170 | 2018-09-04 00:00:00+05:30,343.4,332.55,340.75,334.05,22057643 171 | 2018-09-05 00:00:00+05:30,344.9,332.25,332.9,343.8,24809578 172 | 2018-09-06 00:00:00+05:30,347.8,337.9,346.55,339.2,17527530 173 | 2018-09-07 00:00:00+05:30,341.05,321.8,341,323.4,48830425 174 | 2018-09-10 00:00:00+05:30,328.9,316,323.65,323.65,43295622 175 | 2018-09-11 00:00:00+05:30,327.85,314.6,326,316.6,28553718 176 | 2018-09-12 00:00:00+05:30,319.95,310.6,318,314.3,26121445 177 | 2018-09-14 00:00:00+05:30,328.5,316.7,318.5,323.1,36624891 178 | 2018-09-17 00:00:00+05:30,321.85,315.95,317.5,318.6,18554055 179 | 2018-09-18 00:00:00+05:30,328.95,319.1,319.35,323.55,30243505 180 | 2018-09-19 00:00:00+05:30,328.75,318.05,326,319.2,16185496 181 | 2018-09-21 00:00:00+05:30,287.3,218.1,287.3,226.5,293552756 182 | 2018-09-24 00:00:00+05:30,237.5,215.85,236.5,226.4,105701372 183 | 2018-09-25 00:00:00+05:30,238.85,197.25,230,219.7,110217421 184 | 2018-09-26 00:00:00+05:30,229.8,217,223,223.75,62814786 185 | 2018-09-27 00:00:00+05:30,227,202.05,226,203.25,91197198 186 | 2018-09-28 00:00:00+05:30,204,165,203.6,183.65,148654874 187 | 2018-10-01 00:00:00+05:30,203.8,170.6,180,200.85,130992623 188 | 2018-10-03 00:00:00+05:30,222.95,202.25,205,212.75,97945695 189 | 2018-10-04 00:00:00+05:30,220,205.05,209.4,215,65604016 190 | 2018-10-05 00:00:00+05:30,218.5,203.1,215,206,40546788 191 | 2018-10-08 00:00:00+05:30,226.6,207.4,210,221.2,62349640 192 | 2018-10-09 00:00:00+05:30,233.8,222.6,225.5,224.65,49480210 193 | 2018-10-10 00:00:00+05:30,237,219.4,229,233.9,52351177 194 | 2018-10-11 00:00:00+05:30,268.95,216.65,222,240.2,94679663 195 | 2018-10-12 00:00:00+05:30,254.4,244.5,247,246.45,43535321 196 | 2018-10-15 00:00:00+05:30,250.6,241.55,248.95,246,24694397 197 | 2018-10-16 00:00:00+05:30,251.4,245.6,247,248.9,18099074 198 | 2018-10-17 00:00:00+05:30,254.85,228.5,252.5,231.9,37767849 199 | 2018-10-19 00:00:00+05:30,230.55,213,225,217.9,50336402 200 | 2018-10-22 00:00:00+05:30,222.35,208.5,221,211.5,33633639 201 | 2018-10-23 00:00:00+05:30,216.7,206.5,209,213.2,28293781 202 | 2018-10-24 00:00:00+05:30,220,202,217.5,204,38009322 203 | 2018-10-25 00:00:00+05:30,207.5,195,202.6,198.35,59170164 204 | 2018-10-26 00:00:00+05:30,191.65,168.6,178.55,180.7,95760078 205 | 2018-10-29 00:00:00+05:30,185.9,174.75,185,181.3,52331613 206 | 2018-10-30 00:00:00+05:30,189.9,180.65,182,182.05,43428186 207 | 2018-10-31 00:00:00+05:30,189.5,179.65,185,188.1,53264490 208 | 2018-11-01 00:00:00+05:30,205.75,189.05,192,204.05,69202992 209 | 2018-11-02 00:00:00+05:30,215.9,207.65,212.75,209.1,51686587 210 | 2018-11-05 00:00:00+05:30,212.35,206.1,209,210.1,21134350 211 | 2018-11-06 00:00:00+05:30,217.9,211,212.25,214.45,26924066 212 | 2018-11-07 00:00:00+05:30,218,215.5,217.45,215.95,3962712 213 | 2018-11-09 00:00:00+05:30,229.4,213.2,214,227.9,40754087 214 | 2018-11-12 00:00:00+05:30,230.85,221.1,230,223.05,29069172 215 | 2018-11-13 00:00:00+05:30,226.85,220,222,225.45,19014119 216 | 2018-11-14 00:00:00+05:30,229,221,226.9,222.4,19780790 217 | 2018-11-15 00:00:00+05:30,217.95,202.25,215,205.85,60944632 218 | 2018-11-16 00:00:00+05:30,201.75,187.8,201.75,191,88101871 219 | 2018-11-19 00:00:00+05:30,205.95,194.35,198.1,204.8,64329717 220 | 2018-11-20 00:00:00+05:30,199.75,191,198,192.1,77354754 221 | 2018-11-21 00:00:00+05:30,200.8,194.6,196.1,198.15,47627190 222 | 2018-11-22 00:00:00+05:30,203.85,194.65,200,195.55,44350629 223 | 2018-11-26 00:00:00+05:30,191.95,182.2,189,187.9,60418383 224 | 2018-11-27 00:00:00+05:30,193.8,180.35,193,182.65,70065939 225 | 2018-11-28 00:00:00+05:30,181,160.3,180.4,162.1,140720495 226 | 2018-11-29 00:00:00+05:30,167,146.75,157.05,160.45,292043464 227 | 2018-11-30 00:00:00+05:30,171.35,160.45,164,169.8,120153166 228 | 2018-12-03 00:00:00+05:30,179.5,165.3,174,178,104720593 229 | 2018-12-04 00:00:00+05:30,182.45,175.25,180.35,176.5,68038860 230 | 2018-12-05 00:00:00+05:30,178.45,172.2,173.5,173.4,52289539 231 | 2018-12-06 00:00:00+05:30,172.5,167.35,169,168.45,38867660 232 | 2018-12-07 00:00:00+05:30,172,162,170,166.2,49750931 233 | 2018-12-10 00:00:00+05:30,169.5,160,162,165.65,50379613 234 | 2018-12-11 00:00:00+05:30,179.85,163.8,164.5,177.85,85103084 235 | 2018-12-12 00:00:00+05:30,187.5,181.55,182,186.6,71832334 236 | 2018-12-13 00:00:00+05:30,193.2,172.3,193,174.7,108920029 237 | 2018-12-14 00:00:00+05:30,181.6,168.25,172.7,180.35,79566067 238 | 2018-12-17 00:00:00+05:30,184.45,179.45,183.25,181,35269769 239 | 2018-12-18 00:00:00+05:30,180.9,176.75,180,179,35729865 240 | 2018-12-19 00:00:00+05:30,183.15,178.6,180.5,179.55,40247602 241 | 2018-12-20 00:00:00+05:30,187.6,178,179,186.75,61970822 242 | 2018-12-21 00:00:00+05:30,188.45,180.85,188,182.95,40864241 243 | 2018-12-24 00:00:00+05:30,185.5,181.25,184,182.3,32861722 244 | 2018-12-26 00:00:00+05:30,182.65,174.25,182.5,179.45,47421246 245 | 2018-12-27 00:00:00+05:30,183.45,177.05,183,178.1,44303877 246 | 2018-12-28 00:00:00+05:30,183.3,180,180,181.45,27142501 247 | 2018-12-31 00:00:00+05:30,183.85,181,183.2,181.8,18970865 -------------------------------------------------------------------------------- /images/pandas_dtypes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/562b5b1189d6d5c93f8b1c89fb8ecbc42350024b/images/pandas_dtypes.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jupyter 2 | pandas 3 | matplotlib 4 | statsmodels 5 | keras 6 | sklearn 7 | seaborn 8 | xgboost 9 | arch 10 | --------------------------------------------------------------------------------