├── .gitignore
├── Data Analysis with Pandas
├── 01.Dealing with datetime.ipynb
├── 02.Input_Output.ipynb
├── 03. Moving Up and Down.ipynb
├── 04.Window and Lags.ipynb
└── 05. corr and autocorr.ipynb
├── Financial Time Series
├── 01.Financial Time Series Analysis.ipynb
└── 02. sentiment_score.ipynb
├── ODSC Workshop.pdf
├── README.md
├── Statistical models
├── 01. Time Series components.ipynb
├── 02. Stationarity.ipynb
├── 03.ARMA Process Models.ipynb
├── 04. ARIMA Models.ipynb
└── 05.GARCH Models.ipynb
├── Time Series Boosting
└── Trees_and_Boosting_with_TS.ipynb
├── Time Series with Deep Learning
├── 01.Time Series Forecasting with MLP.ipynb
├── 02.Time Series Forecasting with LSTM.ipynb
└── What went wrong with this LSTM.ipynb
├── data
├── 50words_TEST.csv
├── AirPassengers.csv
├── All-Transactions House Price Index.csv
├── All_India_Area_Weighted_Monthly_Rainfall.csv
├── Earthquakes.csv
├── daily_female_births.csv
├── daily_min_tempratures.csv
├── exercise3.csv
├── exercise_2.csv
├── exercise_4.csv
├── exercise_sample.csv
├── flotation-cell.csv
├── gdp_india.csv
├── gdp_uk.csv
├── pollution.csv
├── reliance_data_day.csv
├── sales.csv
├── sample.csv
├── sample_2.csv
├── stock_data.csv
├── test_data.csv
├── yesbank_data.csv
└── yesbank_data_day.csv
├── images
└── pandas_dtypes.png
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | .ipynb_checkpoints
3 |
--------------------------------------------------------------------------------
/Data Analysis with Pandas/02.Input_Output.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "02.Input_Output.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "LguJbQ55Gcwe",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "# All imports\n",
38 | "import pandas as pd\n",
39 | "import numpy as np"
40 | ],
41 | "execution_count": 0,
42 | "outputs": []
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {
47 | "id": "dfT4xBKQGcwk",
48 | "colab_type": "text"
49 | },
50 | "source": [
51 | "**Data Structures in \"pandas\"**\n",
52 | "\n",
53 | "* Series is a one-dimensional labeled array capable of holding any data type\n",
54 | "\n",
55 | "* DataFrame is a 2-dimensional labeled data structure with columns of potentially different types.\n",
56 | "\n"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "collapsed": true,
63 | "id": "oU55fsdUGcwl",
64 | "colab_type": "text"
65 | },
66 | "source": [
67 | "# Input/Output"
68 | ]
69 | },
70 | {
71 | "cell_type": "code",
72 | "metadata": {
73 | "id": "4jzFyTSPGcwm",
74 | "colab_type": "code",
75 | "colab": {}
76 | },
77 | "source": [
78 | "# Pandas support a lot of formats as input like json, csv, text, html, etc. \n",
79 | "# Here we will be taking the input as csv [comma seperated values].\n",
80 | "# we can also use various parameters in read_csv like parse_dates [parses string date values]\n",
81 | "\n",
82 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n",
83 | "ts_data = pd.read_csv(colab_path+\"data/stock_data.csv\")"
84 | ],
85 | "execution_count": 0,
86 | "outputs": []
87 | },
88 | {
89 | "cell_type": "code",
90 | "metadata": {
91 | "id": "zVVhbeLHGcwo",
92 | "colab_type": "code",
93 | "colab": {}
94 | },
95 | "source": [
96 | "# A look at the head of the dataframe\n",
97 | "ts_data.head()"
98 | ],
99 | "execution_count": 0,
100 | "outputs": []
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "id": "nibPOAGZGcwr",
106 | "colab_type": "code",
107 | "colab": {}
108 | },
109 | "source": [
110 | "#lets check the datatypes of columns in dataframe\n",
111 | "\n",
112 | "ts_data.info()"
113 | ],
114 | "execution_count": 0,
115 | "outputs": []
116 | },
117 | {
118 | "cell_type": "markdown",
119 | "metadata": {
120 | "id": "DCXWym0XGcws",
121 | "colab_type": "text"
122 | },
123 | "source": [
124 | "### A Quick look at pandas data types\n",
125 | "\n",
126 | ""
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "metadata": {
132 | "id": "hmWRCLqTGcwt",
133 | "colab_type": "code",
134 | "colab": {}
135 | },
136 | "source": [
137 | "ts_data.describe()"
138 | ],
139 | "execution_count": 0,
140 | "outputs": []
141 | },
142 | {
143 | "cell_type": "code",
144 | "metadata": {
145 | "id": "EgN1T3AuGcwx",
146 | "colab_type": "code",
147 | "colab": {}
148 | },
149 | "source": [
150 | "#converts string type to datetime format\n",
151 | "ts_data['Date']= pd.to_datetime(ts_data['date']) \n",
152 | "ts_data.info()"
153 | ],
154 | "execution_count": 0,
155 | "outputs": []
156 | },
157 | {
158 | "cell_type": "code",
159 | "metadata": {
160 | "id": "SSEKayyIGcwz",
161 | "colab_type": "code",
162 | "colab": {}
163 | },
164 | "source": [
165 | "# read csv by parsing dates\n",
166 | "df = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=['date'], index_col=0)"
167 | ],
168 | "execution_count": 0,
169 | "outputs": []
170 | },
171 | {
172 | "cell_type": "code",
173 | "metadata": {
174 | "id": "swgQJVyqGcw1",
175 | "colab_type": "code",
176 | "colab": {}
177 | },
178 | "source": [
179 | "df"
180 | ],
181 | "execution_count": 0,
182 | "outputs": []
183 | },
184 | {
185 | "cell_type": "code",
186 | "metadata": {
187 | "id": "Jg7eTv-cGcw4",
188 | "colab_type": "code",
189 | "colab": {}
190 | },
191 | "source": [
192 | "# getting data using date index\n",
193 | "# [from 2008 october to 2009 january]\n",
194 | "df['2008 10':'2009 01']"
195 | ],
196 | "execution_count": 0,
197 | "outputs": []
198 | },
199 | {
200 | "cell_type": "code",
201 | "metadata": {
202 | "id": "2mRSUKISGcw9",
203 | "colab_type": "code",
204 | "colab": {}
205 | },
206 | "source": [
207 | "# Truncates a sorted DataFrame/Series before and/or after some\n",
208 | "# particular index value. If the axis contains only datetime values,\n",
209 | "# before/after parameters are converted to datetime values.\n",
210 | "df.truncate?"
211 | ],
212 | "execution_count": 0,
213 | "outputs": []
214 | },
215 | {
216 | "cell_type": "code",
217 | "metadata": {
218 | "scrolled": true,
219 | "id": "MdzEbNp_Gcw_",
220 | "colab_type": "code",
221 | "colab": {}
222 | },
223 | "source": [
224 | "df.truncate(before='2008 10', after='2009')"
225 | ],
226 | "execution_count": 0,
227 | "outputs": []
228 | },
229 | {
230 | "cell_type": "code",
231 | "metadata": {
232 | "id": "gPvmIF11GcxB",
233 | "colab_type": "code",
234 | "colab": {}
235 | },
236 | "source": [
237 | "# parsing date from multiple columns\n",
238 | "pd.read_csv(colab_path+'data/sample_2.csv').head()"
239 | ],
240 | "execution_count": 0,
241 | "outputs": []
242 | },
243 | {
244 | "cell_type": "code",
245 | "metadata": {
246 | "id": "KCr2ccc0GcxE",
247 | "colab_type": "code",
248 | "colab": {}
249 | },
250 | "source": [
251 | "# combining all date columns to get date\n",
252 | "pd.read_csv(colab_path+'data/sample_2.csv', parse_dates={'date':[0,1,2]}, index_col='date').head()"
253 | ],
254 | "execution_count": 0,
255 | "outputs": []
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {
260 | "id": "MhF7kXxsGcxG",
261 | "colab_type": "text"
262 | },
263 | "source": [
264 | "## dateparser"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "metadata": {
270 | "id": "TLfgdeYlGcxH",
271 | "colab_type": "code",
272 | "colab": {}
273 | },
274 | "source": [
275 | "pd.read_csv(colab_path+'data/sample.csv')"
276 | ],
277 | "execution_count": 0,
278 | "outputs": []
279 | },
280 | {
281 | "cell_type": "code",
282 | "metadata": {
283 | "id": "YrvYoxNrGcxJ",
284 | "colab_type": "code",
285 | "colab": {}
286 | },
287 | "source": [
288 | "pd.read_csv(colab_path+'data/sample.csv', parse_dates= ['x']).info()"
289 | ],
290 | "execution_count": 0,
291 | "outputs": []
292 | },
293 | {
294 | "cell_type": "code",
295 | "metadata": {
296 | "id": "fiojLQJJGcxL",
297 | "colab_type": "code",
298 | "colab": {}
299 | },
300 | "source": [
301 | "from datetime import datetime"
302 | ],
303 | "execution_count": 0,
304 | "outputs": []
305 | },
306 | {
307 | "cell_type": "code",
308 | "metadata": {
309 | "id": "3LeVhBPNGcxN",
310 | "colab_type": "code",
311 | "colab": {}
312 | },
313 | "source": [
314 | "datetime.strptime('2018-11-01T12-12-00', '%Y-%m-%dT%H-%M-%S')"
315 | ],
316 | "execution_count": 0,
317 | "outputs": []
318 | },
319 | {
320 | "cell_type": "code",
321 | "metadata": {
322 | "id": "B4xFq0GjGcxP",
323 | "colab_type": "code",
324 | "colab": {}
325 | },
326 | "source": [
327 | "# Specifying the format to parse the datetime\n",
328 | "# Find out various format specifiers here (https://docs.python.org/3/library/datetime.html#strftime-and-strptime-behavior)\n",
329 | "def dateparse(x):\n",
330 | " return datetime.strptime(x, '%Y-%m-%dT%H-%M-%S')"
331 | ],
332 | "execution_count": 0,
333 | "outputs": []
334 | },
335 | {
336 | "cell_type": "code",
337 | "metadata": {
338 | "id": "HWDve0otGcxR",
339 | "colab_type": "code",
340 | "colab": {}
341 | },
342 | "source": [
343 | "#pd.Timestamp('2018-11-01T0-10-1')"
344 | ],
345 | "execution_count": 0,
346 | "outputs": []
347 | },
348 | {
349 | "cell_type": "code",
350 | "metadata": {
351 | "id": "9T64KRa1GcxU",
352 | "colab_type": "code",
353 | "colab": {}
354 | },
355 | "source": [
356 | "dateparse('2018-11-01T0-10-1')"
357 | ],
358 | "execution_count": 0,
359 | "outputs": []
360 | },
361 | {
362 | "cell_type": "code",
363 | "metadata": {
364 | "id": "DZFv_QjLGcxX",
365 | "colab_type": "code",
366 | "colab": {}
367 | },
368 | "source": [
369 | "pd.read_csv(colab_path+'/data/sample.csv', parse_dates=['x'], date_parser=dateparse)"
370 | ],
371 | "execution_count": 0,
372 | "outputs": []
373 | },
374 | {
375 | "cell_type": "markdown",
376 | "metadata": {
377 | "id": "it3UaMhqGcxa",
378 | "colab_type": "text"
379 | },
380 | "source": [
381 | "## Write data"
382 | ]
383 | },
384 | {
385 | "cell_type": "code",
386 | "metadata": {
387 | "id": "M7VPMaLCGcxa",
388 | "colab_type": "code",
389 | "colab": {}
390 | },
391 | "source": [
392 | "# Creating date index with start and end having frequency of second\n",
393 | "date_index = pd.date_range(start='20181217', freq='S', end='20181221')"
394 | ],
395 | "execution_count": 0,
396 | "outputs": []
397 | },
398 | {
399 | "cell_type": "code",
400 | "metadata": {
401 | "id": "Dc6LrVgnGcxc",
402 | "colab_type": "code",
403 | "colab": {}
404 | },
405 | "source": [
406 | "len(date_index)"
407 | ],
408 | "execution_count": 0,
409 | "outputs": []
410 | },
411 | {
412 | "cell_type": "code",
413 | "metadata": {
414 | "id": "aQw18bwYGcxe",
415 | "colab_type": "code",
416 | "colab": {}
417 | },
418 | "source": [
419 | "# Generated Index\n",
420 | "date_index"
421 | ],
422 | "execution_count": 0,
423 | "outputs": []
424 | },
425 | {
426 | "cell_type": "code",
427 | "metadata": {
428 | "id": "AoCFA6ExGcxi",
429 | "colab_type": "code",
430 | "colab": {}
431 | },
432 | "source": [
433 | "# Creating the dataframe with the above generated index\n",
434 | "df = pd.DataFrame(data=np.random.randint(0,100, len(date_index)), index=date_index)"
435 | ],
436 | "execution_count": 0,
437 | "outputs": []
438 | },
439 | {
440 | "cell_type": "code",
441 | "metadata": {
442 | "id": "ZyFUG2u8Gcxj",
443 | "colab_type": "code",
444 | "colab": {}
445 | },
446 | "source": [
447 | "#changing name of the coloumn \n",
448 | "df.columns = ['Value']"
449 | ],
450 | "execution_count": 0,
451 | "outputs": []
452 | },
453 | {
454 | "cell_type": "code",
455 | "metadata": {
456 | "id": "SBhrabp3Gcxl",
457 | "colab_type": "code",
458 | "colab": {}
459 | },
460 | "source": [
461 | "#df.head()"
462 | ],
463 | "execution_count": 0,
464 | "outputs": []
465 | },
466 | {
467 | "cell_type": "code",
468 | "metadata": {
469 | "id": "hVqzhAo_Gcxn",
470 | "colab_type": "code",
471 | "colab": {}
472 | },
473 | "source": [
474 | "# Exporting data to csv\n",
475 | "df.to_csv('test_data.csv')"
476 | ],
477 | "execution_count": 0,
478 | "outputs": []
479 | },
480 | {
481 | "cell_type": "code",
482 | "metadata": {
483 | "id": "maOX4A1RGcxo",
484 | "colab_type": "code",
485 | "colab": {}
486 | },
487 | "source": [
488 | ""
489 | ],
490 | "execution_count": 0,
491 | "outputs": []
492 | },
493 | {
494 | "cell_type": "markdown",
495 | "metadata": {
496 | "id": "IKO6sQNyGcxq",
497 | "colab_type": "text"
498 | },
499 | "source": [
500 | "# Exercise"
501 | ]
502 | },
503 | {
504 | "cell_type": "code",
505 | "metadata": {
506 | "id": "P2xZrO7fGcxr",
507 | "colab_type": "code",
508 | "colab": {}
509 | },
510 | "source": [
511 | "# Q1:\n",
512 | "# 1. read \"../data/exercise_sample.csv\"\n",
513 | "# 2. change first column datatype to datetime\n",
514 | "# 4. Make the first column index\n",
515 | "# 5. Reverse the index order\n",
516 | "# 6. export it to cleaned_sample.csv"
517 | ],
518 | "execution_count": 0,
519 | "outputs": []
520 | },
521 | {
522 | "cell_type": "code",
523 | "metadata": {
524 | "id": "Jbb6vflJGcxt",
525 | "colab_type": "code",
526 | "colab": {}
527 | },
528 | "source": [
529 | ""
530 | ],
531 | "execution_count": 0,
532 | "outputs": []
533 | }
534 | ]
535 | }
--------------------------------------------------------------------------------
/Data Analysis with Pandas/03. Moving Up and Down.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "03. Moving Up and Down.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "2lkctECUGda5",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import pandas as pd"
38 | ],
39 | "execution_count": 0,
40 | "outputs": []
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {
45 | "id": "cCWBO4KTGda-",
46 | "colab_type": "text"
47 | },
48 | "source": [
49 | "# Moving Up and Down\n",
50 | "\n",
51 | "\n",
52 | "## asfreq\n",
53 | "\n",
54 | " Convert TimeSeries to specified frequency.Optionally provide filling method to pad/backfill missing values.\n",
55 | "\n",
56 | "Frequency strings can be found here:\n",
57 | "* https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases\n"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "id": "cH5pugSnGda_",
64 | "colab_type": "code",
65 | "colab": {}
66 | },
67 | "source": [
68 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n",
69 | "df = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=[0], index_col=0)"
70 | ],
71 | "execution_count": 0,
72 | "outputs": []
73 | },
74 | {
75 | "cell_type": "code",
76 | "metadata": {
77 | "id": "FC2-C0EFGdbB",
78 | "colab_type": "code",
79 | "colab": {}
80 | },
81 | "source": [
82 | "#df"
83 | ],
84 | "execution_count": 0,
85 | "outputs": []
86 | },
87 | {
88 | "cell_type": "code",
89 | "metadata": {
90 | "id": "Of_K8p5PGdbD",
91 | "colab_type": "code",
92 | "colab": {}
93 | },
94 | "source": [
95 | "df.index"
96 | ],
97 | "execution_count": 0,
98 | "outputs": []
99 | },
100 | {
101 | "cell_type": "code",
102 | "metadata": {
103 | "id": "-PyQcksuGdbG",
104 | "colab_type": "code",
105 | "colab": {}
106 | },
107 | "source": [
108 | "#daily frequency\n",
109 | "df.asfreq('12H').index"
110 | ],
111 | "execution_count": 0,
112 | "outputs": []
113 | },
114 | {
115 | "cell_type": "code",
116 | "metadata": {
117 | "id": "yvS6NMkcGdbJ",
118 | "colab_type": "code",
119 | "colab": {}
120 | },
121 | "source": [
122 | "df.asfreq?"
123 | ],
124 | "execution_count": 0,
125 | "outputs": []
126 | },
127 | {
128 | "cell_type": "code",
129 | "metadata": {
130 | "id": "aSfeVALsGdbL",
131 | "colab_type": "code",
132 | "colab": {}
133 | },
134 | "source": [
135 | "df = df.asfreq('12H')"
136 | ],
137 | "execution_count": 0,
138 | "outputs": []
139 | },
140 | {
141 | "cell_type": "code",
142 | "metadata": {
143 | "id": "AgACx7iqGdbO",
144 | "colab_type": "code",
145 | "colab": {}
146 | },
147 | "source": [
148 | "df"
149 | ],
150 | "execution_count": 0,
151 | "outputs": []
152 | },
153 | {
154 | "cell_type": "code",
155 | "metadata": {
156 | "id": "GmNPGuxNGdbR",
157 | "colab_type": "code",
158 | "colab": {}
159 | },
160 | "source": [
161 | "df.loc['2011-10-13 00:00']"
162 | ],
163 | "execution_count": 0,
164 | "outputs": []
165 | },
166 | {
167 | "cell_type": "code",
168 | "metadata": {
169 | "id": "YE4Y-AOYGdbU",
170 | "colab_type": "code",
171 | "colab": {}
172 | },
173 | "source": [
174 | "# Hours to Days --> Downsampling\n",
175 | "df.asfreq('D')"
176 | ],
177 | "execution_count": 0,
178 | "outputs": []
179 | },
180 | {
181 | "cell_type": "code",
182 | "metadata": {
183 | "id": "8AoCzUlXGdbd",
184 | "colab_type": "code",
185 | "colab": {}
186 | },
187 | "source": [
188 | "# Days to hours --> Upsampling\n",
189 | "df.asfreq('8H')"
190 | ],
191 | "execution_count": 0,
192 | "outputs": []
193 | },
194 | {
195 | "cell_type": "code",
196 | "metadata": {
197 | "id": "727TYZP8Gdbh",
198 | "colab_type": "code",
199 | "colab": {}
200 | },
201 | "source": [
202 | "# ffill propagate last valid observation forward to next valid\n",
203 | "# Note: this does not fill NaNs that already were present\n",
204 | "df.asfreq('4H', method='ffill')"
205 | ],
206 | "execution_count": 0,
207 | "outputs": []
208 | },
209 | {
210 | "cell_type": "code",
211 | "metadata": {
212 | "id": "LPp1_GhsGdbj",
213 | "colab_type": "code",
214 | "colab": {}
215 | },
216 | "source": [
217 | "# bfill use NEXT valid observation to fill\n",
218 | "# Note: this does not fill NaNs that already were present):\n",
219 | "df.asfreq('2H',method='bfill')"
220 | ],
221 | "execution_count": 0,
222 | "outputs": []
223 | },
224 | {
225 | "cell_type": "code",
226 | "metadata": {
227 | "id": "ayuofNoLGdbm",
228 | "colab_type": "code",
229 | "colab": {}
230 | },
231 | "source": [
232 | "# To fill with certain default value\n",
233 | "# Note: this does not fill NaNs that already were present):\n",
234 | "df.asfreq('H',fill_value=9.0)"
235 | ],
236 | "execution_count": 0,
237 | "outputs": []
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "id": "1pANcBTEGdbo",
243 | "colab_type": "text"
244 | },
245 | "source": [
246 | "## resample\n",
247 | "\n",
248 | "Convenience method for frequency conversion and resampling of time\n",
249 | "series."
250 | ]
251 | },
252 | {
253 | "cell_type": "code",
254 | "metadata": {
255 | "id": "lKPkDOl5Gdbo",
256 | "colab_type": "code",
257 | "colab": {}
258 | },
259 | "source": [
260 | "df.resample?"
261 | ],
262 | "execution_count": 0,
263 | "outputs": []
264 | },
265 | {
266 | "cell_type": "code",
267 | "metadata": {
268 | "id": "hH_s-GohGdbq",
269 | "colab_type": "code",
270 | "colab": {}
271 | },
272 | "source": [
273 | "df.resample('2H')"
274 | ],
275 | "execution_count": 0,
276 | "outputs": []
277 | },
278 | {
279 | "cell_type": "code",
280 | "metadata": {
281 | "id": "NfkOW9P2Gdbs",
282 | "colab_type": "code",
283 | "colab": {}
284 | },
285 | "source": [
286 | "df.resample('2H').sum()"
287 | ],
288 | "execution_count": 0,
289 | "outputs": []
290 | },
291 | {
292 | "cell_type": "code",
293 | "metadata": {
294 | "id": "Krgy735lGdbv",
295 | "colab_type": "code",
296 | "colab": {}
297 | },
298 | "source": [
299 | "def get_range(x):\n",
300 | " return x.max() - x.min()"
301 | ],
302 | "execution_count": 0,
303 | "outputs": []
304 | },
305 | {
306 | "cell_type": "code",
307 | "metadata": {
308 | "id": "fNCHOV65Gdbx",
309 | "colab_type": "code",
310 | "colab": {}
311 | },
312 | "source": [
313 | "df.resample('M').agg({'mean', 'max', get_range})"
314 | ],
315 | "execution_count": 0,
316 | "outputs": []
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "metadata": {
321 | "id": "md1T2hWQGdb1",
322 | "colab_type": "text"
323 | },
324 | "source": [
325 | "## fillna\n",
326 | "\n",
327 | "Fill NA/NaN values using the specified method"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "metadata": {
333 | "id": "g4Ybxew4Gdb3",
334 | "colab_type": "code",
335 | "colab": {}
336 | },
337 | "source": [
338 | "df1 = df.asfreq('6M')"
339 | ],
340 | "execution_count": 0,
341 | "outputs": []
342 | },
343 | {
344 | "cell_type": "code",
345 | "metadata": {
346 | "id": "EqYnq0IvGdb6",
347 | "colab_type": "code",
348 | "colab": {}
349 | },
350 | "source": [
351 | "df1"
352 | ],
353 | "execution_count": 0,
354 | "outputs": []
355 | },
356 | {
357 | "cell_type": "code",
358 | "metadata": {
359 | "id": "hsbbZd_SGdb9",
360 | "colab_type": "code",
361 | "colab": {}
362 | },
363 | "source": [
364 | "df1.fillna?"
365 | ],
366 | "execution_count": 0,
367 | "outputs": []
368 | },
369 | {
370 | "cell_type": "code",
371 | "metadata": {
372 | "id": "ulGkbCJRGdcA",
373 | "colab_type": "code",
374 | "colab": {}
375 | },
376 | "source": [
377 | "df1.fillna(method='ffill', inplace=True)"
378 | ],
379 | "execution_count": 0,
380 | "outputs": []
381 | },
382 | {
383 | "cell_type": "code",
384 | "metadata": {
385 | "id": "aVh8UNQqGdcE",
386 | "colab_type": "code",
387 | "colab": {}
388 | },
389 | "source": [
390 | "df1"
391 | ],
392 | "execution_count": 0,
393 | "outputs": []
394 | },
395 | {
396 | "cell_type": "markdown",
397 | "metadata": {
398 | "id": "UH2gCQXbGdcG",
399 | "colab_type": "text"
400 | },
401 | "source": [
402 | "## reindex\n",
403 | "\n",
404 | " Change Series to new index with optional filling logic"
405 | ]
406 | },
407 | {
408 | "cell_type": "code",
409 | "metadata": {
410 | "id": "noGRluXxGdcH",
411 | "colab_type": "code",
412 | "colab": {}
413 | },
414 | "source": [
415 | "df2 = df.resample('Y').mean()"
416 | ],
417 | "execution_count": 0,
418 | "outputs": []
419 | },
420 | {
421 | "cell_type": "code",
422 | "metadata": {
423 | "id": "RrKqFBcOGdcL",
424 | "colab_type": "code",
425 | "colab": {}
426 | },
427 | "source": [
428 | "df2"
429 | ],
430 | "execution_count": 0,
431 | "outputs": []
432 | },
433 | {
434 | "cell_type": "code",
435 | "metadata": {
436 | "id": "j1rBA2ziGdcN",
437 | "colab_type": "code",
438 | "colab": {}
439 | },
440 | "source": [
441 | "date_index = pd.date_range('20071201', end='20111231', freq='7D')"
442 | ],
443 | "execution_count": 0,
444 | "outputs": []
445 | },
446 | {
447 | "cell_type": "code",
448 | "metadata": {
449 | "id": "VRf9hIl0GdcQ",
450 | "colab_type": "code",
451 | "colab": {}
452 | },
453 | "source": [
454 | "date_index"
455 | ],
456 | "execution_count": 0,
457 | "outputs": []
458 | },
459 | {
460 | "cell_type": "code",
461 | "metadata": {
462 | "id": "iT2LwyldGdcS",
463 | "colab_type": "code",
464 | "colab": {}
465 | },
466 | "source": [
467 | "df2 = df2.reindex(date_index,method='ffill')"
468 | ],
469 | "execution_count": 0,
470 | "outputs": []
471 | },
472 | {
473 | "cell_type": "code",
474 | "metadata": {
475 | "id": "Vrwe5z4EGdcX",
476 | "colab_type": "code",
477 | "colab": {}
478 | },
479 | "source": [
480 | "df2"
481 | ],
482 | "execution_count": 0,
483 | "outputs": []
484 | },
485 | {
486 | "cell_type": "code",
487 | "metadata": {
488 | "id": "Kl69m-2QGdcZ",
489 | "colab_type": "code",
490 | "colab": {}
491 | },
492 | "source": [
493 | "df2.fillna(method='ffill', inplace=True)"
494 | ],
495 | "execution_count": 0,
496 | "outputs": []
497 | },
498 | {
499 | "cell_type": "code",
500 | "metadata": {
501 | "id": "TQ2T9TpOGdcb",
502 | "colab_type": "code",
503 | "colab": {}
504 | },
505 | "source": [
506 | "df2"
507 | ],
508 | "execution_count": 0,
509 | "outputs": []
510 | },
511 | {
512 | "cell_type": "code",
513 | "metadata": {
514 | "id": "fRiw0lqdGdcd",
515 | "colab_type": "code",
516 | "colab": {}
517 | },
518 | "source": [
519 | "df2.fillna(value='0')"
520 | ],
521 | "execution_count": 0,
522 | "outputs": []
523 | },
524 | {
525 | "cell_type": "markdown",
526 | "metadata": {
527 | "id": "jniNyCQKGdcf",
528 | "colab_type": "text"
529 | },
530 | "source": [
531 | "# Exercise"
532 | ]
533 | },
534 | {
535 | "cell_type": "code",
536 | "metadata": {
537 | "id": "Sn_nwmOgGdcg",
538 | "colab_type": "code",
539 | "colab": {}
540 | },
541 | "source": [
542 | "# Q3: \n",
543 | "# read \"../data/exercise3.csv\"\n",
544 | "# change 'Date' column to datetime\n",
545 | "# set 'Date' as index\n",
546 | "# fillna using 'bfill'\n",
547 | "# resample to one month"
548 | ],
549 | "execution_count": 0,
550 | "outputs": []
551 | },
552 | {
553 | "cell_type": "code",
554 | "metadata": {
555 | "id": "zWjDMXsbGdci",
556 | "colab_type": "code",
557 | "colab": {}
558 | },
559 | "source": [
560 | ""
561 | ],
562 | "execution_count": 0,
563 | "outputs": []
564 | }
565 | ]
566 | }
--------------------------------------------------------------------------------
/Data Analysis with Pandas/04.Window and Lags.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "04.Window and Lags.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "KpowcnYnGerc",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import pandas as pd\n",
38 | "import numpy as np\n",
39 | "import matplotlib.pyplot as plt\n",
40 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n",
41 | "import warnings\n",
42 | "warnings.filterwarnings('ignore')\n",
43 | "import seaborn as sns\n",
44 | "import datetime"
45 | ],
46 | "execution_count": 0,
47 | "outputs": []
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "id": "p18JZJ1UGerj",
53 | "colab_type": "text"
54 | },
55 | "source": [
56 | "# Window and Lags\n"
57 | ]
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "id": "2LvSOgUZGerl",
63 | "colab_type": "text"
64 | },
65 | "source": [
66 | "## shift & diff\n",
67 | "\n",
68 | " Shift index by desired number of periods with an optional time freq."
69 | ]
70 | },
71 | {
72 | "cell_type": "code",
73 | "metadata": {
74 | "id": "D0emyIc6Germ",
75 | "colab_type": "code",
76 | "colab": {}
77 | },
78 | "source": [
79 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n",
80 | "df = pd.read_csv(colab_path+\"data/stock_data.csv\")"
81 | ],
82 | "execution_count": 0,
83 | "outputs": []
84 | },
85 | {
86 | "cell_type": "code",
87 | "metadata": {
88 | "id": "lHxLL_qpGerr",
89 | "colab_type": "code",
90 | "colab": {}
91 | },
92 | "source": [
93 | "df['AAPL_return'] = df['AAPL'] / df['AAPL'].shift(1)"
94 | ],
95 | "execution_count": 0,
96 | "outputs": []
97 | },
98 | {
99 | "cell_type": "code",
100 | "metadata": {
101 | "id": "RapVGXpUGeru",
102 | "colab_type": "code",
103 | "colab": {}
104 | },
105 | "source": [
106 | "df.head()"
107 | ],
108 | "execution_count": 0,
109 | "outputs": []
110 | },
111 | {
112 | "cell_type": "code",
113 | "metadata": {
114 | "id": "VYsMGKm7Gerw",
115 | "colab_type": "code",
116 | "colab": {}
117 | },
118 | "source": [
119 | "#df"
120 | ],
121 | "execution_count": 0,
122 | "outputs": []
123 | },
124 | {
125 | "cell_type": "code",
126 | "metadata": {
127 | "id": "CgpwEyOnGery",
128 | "colab_type": "code",
129 | "colab": {}
130 | },
131 | "source": [
132 | "df.diff?"
133 | ],
134 | "execution_count": 0,
135 | "outputs": []
136 | },
137 | {
138 | "cell_type": "code",
139 | "metadata": {
140 | "id": "bMO5GwKNGer1",
141 | "colab_type": "code",
142 | "colab": {}
143 | },
144 | "source": [
145 | "df['AAPL_5D_range'] = df['AAPL'].diff(5)"
146 | ],
147 | "execution_count": 0,
148 | "outputs": []
149 | },
150 | {
151 | "cell_type": "code",
152 | "metadata": {
153 | "id": "jhN3Ze_UGer4",
154 | "colab_type": "code",
155 | "colab": {}
156 | },
157 | "source": [
158 | "df[['AAPL', 'AAPL_5D_range']].plot(figsize=(20,10), secondary_y='AAPL_5D_range')\n"
159 | ],
160 | "execution_count": 0,
161 | "outputs": []
162 | },
163 | {
164 | "cell_type": "markdown",
165 | "metadata": {
166 | "id": "k6YFJCaVGer7",
167 | "colab_type": "text"
168 | },
169 | "source": [
170 | "## Rolling\n",
171 | "\n",
172 | " Provide rolling window calculations"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "metadata": {
178 | "id": "LPfldfyOGer9",
179 | "colab_type": "code",
180 | "colab": {}
181 | },
182 | "source": [
183 | "df.rolling?\n"
184 | ],
185 | "execution_count": 0,
186 | "outputs": []
187 | },
188 | {
189 | "cell_type": "code",
190 | "metadata": {
191 | "id": "50w9aOKRGesB",
192 | "colab_type": "code",
193 | "colab": {}
194 | },
195 | "source": [
196 | "df.rolling(30)"
197 | ],
198 | "execution_count": 0,
199 | "outputs": []
200 | },
201 | {
202 | "cell_type": "code",
203 | "metadata": {
204 | "id": "VOModRMxGesE",
205 | "colab_type": "code",
206 | "colab": {}
207 | },
208 | "source": [
209 | "df['AAPL_rollmean'] = df['AAPL'].rolling(30).mean()"
210 | ],
211 | "execution_count": 0,
212 | "outputs": []
213 | },
214 | {
215 | "cell_type": "code",
216 | "metadata": {
217 | "id": "MNUp1JY8GesH",
218 | "colab_type": "code",
219 | "colab": {}
220 | },
221 | "source": [
222 | "df.head(50)"
223 | ],
224 | "execution_count": 0,
225 | "outputs": []
226 | },
227 | {
228 | "cell_type": "code",
229 | "metadata": {
230 | "id": "tfgaP_EVGesK",
231 | "colab_type": "code",
232 | "colab": {}
233 | },
234 | "source": [
235 | "df[['AAPL', 'AAPL_rollmean']].plot(figsize=(20,10))"
236 | ],
237 | "execution_count": 0,
238 | "outputs": []
239 | },
240 | {
241 | "cell_type": "markdown",
242 | "metadata": {
243 | "id": "2-gzcku6GesN",
244 | "colab_type": "text"
245 | },
246 | "source": [
247 | "## expanding"
248 | ]
249 | },
250 | {
251 | "cell_type": "code",
252 | "metadata": {
253 | "id": "FJO9PVM8GesN",
254 | "colab_type": "code",
255 | "colab": {}
256 | },
257 | "source": [
258 | "df['AAPL'].expanding().max()"
259 | ],
260 | "execution_count": 0,
261 | "outputs": []
262 | },
263 | {
264 | "cell_type": "code",
265 | "metadata": {
266 | "id": "xSzd7crwGesP",
267 | "colab_type": "code",
268 | "colab": {}
269 | },
270 | "source": [
271 | "#cumulative product\n",
272 | "df['AAPL_return'].cumprod()\n"
273 | ],
274 | "execution_count": 0,
275 | "outputs": []
276 | },
277 | {
278 | "cell_type": "code",
279 | "metadata": {
280 | "id": "10PrE_vDGesR",
281 | "colab_type": "code",
282 | "colab": {}
283 | },
284 | "source": [
285 | "#df['AAPL_return'].cumprod"
286 | ],
287 | "execution_count": 0,
288 | "outputs": []
289 | },
290 | {
291 | "cell_type": "markdown",
292 | "metadata": {
293 | "id": "QVfoNrmtGesW",
294 | "colab_type": "text"
295 | },
296 | "source": [
297 | "# Exercise"
298 | ]
299 | },
300 | {
301 | "cell_type": "code",
302 | "metadata": {
303 | "id": "laq-mlElGesX",
304 | "colab_type": "code",
305 | "colab": {}
306 | },
307 | "source": [
308 | "# Q4:\n",
309 | "# read \"../data/exercise_4.csv\"\n",
310 | "# create temp_1 column with shift 1 \n",
311 | "# create temp_diff column with diff 2\n",
312 | "# create a temp_roll by calculate rolling mean with 10 sample window\n",
313 | "# delete all NaN values"
314 | ],
315 | "execution_count": 0,
316 | "outputs": []
317 | }
318 | ]
319 | }
--------------------------------------------------------------------------------
/Data Analysis with Pandas/05. corr and autocorr.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "05. corr and autocorr.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "-vDPbuyJGd6x",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import pandas as pd\n",
38 | "import numpy as np\n",
39 | "import matplotlib.pyplot as plt\n",
40 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n",
41 | "import warnings\n",
42 | "warnings.filterwarnings('ignore')\n",
43 | "import seaborn as sns\n",
44 | "import datetime"
45 | ],
46 | "execution_count": 0,
47 | "outputs": []
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "id": "-onwMnOEGd64",
53 | "colab_type": "text"
54 | },
55 | "source": [
56 | "# Correlation and Auto Correlation\n",
57 | "\n",
58 | "\n",
59 | "## correlations\n",
60 | " What is correlation ?\n",
61 | " Correlation is a statistical measure that indicates the extent to which two or more variables fluctuate together. A positive correlation indicates the extent to which those variables increase or decrease in parallel; a negative correlation indicates the extent to which one variable increases as the other decreases.\n",
62 | "\n"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "metadata": {
68 | "id": "8krbyZ6vGd65",
69 | "colab_type": "code",
70 | "colab": {}
71 | },
72 | "source": [
73 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n",
74 | "s = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates= [0], index_col=[0])"
75 | ],
76 | "execution_count": 0,
77 | "outputs": []
78 | },
79 | {
80 | "cell_type": "code",
81 | "metadata": {
82 | "id": "OcUeUINdGd68",
83 | "colab_type": "code",
84 | "colab": {}
85 | },
86 | "source": [
87 | "#Investigate if the data has missing values\n",
88 | "# If less drop or else try imputing using some method"
89 | ],
90 | "execution_count": 0,
91 | "outputs": []
92 | },
93 | {
94 | "cell_type": "code",
95 | "metadata": {
96 | "id": "S2EYk_mQGd7A",
97 | "colab_type": "code",
98 | "colab": {}
99 | },
100 | "source": [
101 | "# Computes pairwise correlation of columns, excluding NA/null values.\n",
102 | "# various methods available are: {‘pearson’, ‘kendall’, ‘spearman’} or callable\n",
103 | "# Pearson correlation: measures the linear association between continuous variables.\n",
104 | "# Spearman's rank correlation: measures monotonic association (only strictly increasing or decreasing, but not mixed) \n",
105 | "# This makes it appropriate to use with both continuous and discrete data.\n",
106 | "# Kendall correlation: works well for discrete data\n",
107 | "s.corr()"
108 | ],
109 | "execution_count": 0,
110 | "outputs": []
111 | },
112 | {
113 | "cell_type": "code",
114 | "metadata": {
115 | "id": "s3DGnpSxGd7F",
116 | "colab_type": "code",
117 | "colab": {}
118 | },
119 | "source": [
120 | "#Plotting the values in a heatmap\n",
121 | "sns.heatmap(s.corr())"
122 | ],
123 | "execution_count": 0,
124 | "outputs": []
125 | },
126 | {
127 | "cell_type": "code",
128 | "metadata": {
129 | "id": "3mma-AH9Gd7J",
130 | "colab_type": "code",
131 | "colab": {}
132 | },
133 | "source": [
134 | "#relation between two can be found as\n",
135 | "s['AAPL'].corr(s['AAPL'])"
136 | ],
137 | "execution_count": 0,
138 | "outputs": []
139 | },
140 | {
141 | "cell_type": "code",
142 | "metadata": {
143 | "id": "RnRp1vsgGd7a",
144 | "colab_type": "code",
145 | "colab": {}
146 | },
147 | "source": [
148 | ""
149 | ],
150 | "execution_count": 0,
151 | "outputs": []
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {
156 | "id": "kq2G_3ydGd7e",
157 | "colab_type": "text"
158 | },
159 | "source": [
160 | "## auto correlation\n",
161 | "\n",
162 | " correlation between the elements of a series and others from the same series separated from them by a given interval."
163 | ]
164 | },
165 | {
166 | "cell_type": "code",
167 | "metadata": {
168 | "id": "8k01gqk1Gd7f",
169 | "colab_type": "code",
170 | "colab": {}
171 | },
172 | "source": [
173 | "from statsmodels.tsa.stattools import *\n",
174 | "from statsmodels.graphics.tsaplots import *"
175 | ],
176 | "execution_count": 0,
177 | "outputs": []
178 | },
179 | {
180 | "cell_type": "code",
181 | "metadata": {
182 | "id": "9RVV-HMtGd7k",
183 | "colab_type": "code",
184 | "colab": {}
185 | },
186 | "source": [
187 | "#The NumPy linspace function (sometimes called np.linspace) is a tool in Python for creating numeric sequences.\n",
188 | "# Trigonometric sine\n",
189 | "data = np.sin(np.linspace(start=0,stop=100,num=100))"
190 | ],
191 | "execution_count": 0,
192 | "outputs": []
193 | },
194 | {
195 | "cell_type": "code",
196 | "metadata": {
197 | "id": "Is1W5ckJGd7o",
198 | "colab_type": "code",
199 | "colab": {}
200 | },
201 | "source": [
202 | "# Autocorrelation function for 1d arrays.\n",
203 | "# correlation between the elements of a series and others from the same series separated from them by a given interval.\n",
204 | "ac_res = acf(data, nlags=100)\n"
205 | ],
206 | "execution_count": 0,
207 | "outputs": []
208 | },
209 | {
210 | "cell_type": "code",
211 | "metadata": {
212 | "id": "JbPo9wF5Gd7t",
213 | "colab_type": "code",
214 | "colab": {}
215 | },
216 | "source": [
217 | "plt.plot(ac_res)\n",
218 | "plt.axhline(y=-1.96/np.sqrt(800), linestyle='--')\n",
219 | "plt.axhline(y=1.96/np.sqrt(800), linestyle='--')"
220 | ],
221 | "execution_count": 0,
222 | "outputs": []
223 | },
224 | {
225 | "cell_type": "code",
226 | "metadata": {
227 | "id": "euwVA5Y8Gd7z",
228 | "colab_type": "code",
229 | "colab": {}
230 | },
231 | "source": [
232 | "ap = pd.read_csv(colab_path+'data/AirPassengers.csv', header=0, parse_dates=[0], index_col=0)"
233 | ],
234 | "execution_count": 0,
235 | "outputs": []
236 | },
237 | {
238 | "cell_type": "code",
239 | "metadata": {
240 | "id": "SlPuvI6QGd72",
241 | "colab_type": "code",
242 | "colab": {}
243 | },
244 | "source": [
245 | "ap.plot()"
246 | ],
247 | "execution_count": 0,
248 | "outputs": []
249 | },
250 | {
251 | "cell_type": "code",
252 | "metadata": {
253 | "id": "oCDdTuMGGd77",
254 | "colab_type": "code",
255 | "colab": {}
256 | },
257 | "source": [
258 | "plt.plot(acf(ap))"
259 | ],
260 | "execution_count": 0,
261 | "outputs": []
262 | },
263 | {
264 | "cell_type": "code",
265 | "metadata": {
266 | "id": "3b-3ktcRGd7-",
267 | "colab_type": "code",
268 | "colab": {}
269 | },
270 | "source": [
271 | "ac_plot = plot_acf(ap)"
272 | ],
273 | "execution_count": 0,
274 | "outputs": []
275 | },
276 | {
277 | "cell_type": "code",
278 | "metadata": {
279 | "id": "-Oc0Eb8wGd8C",
280 | "colab_type": "code",
281 | "colab": {}
282 | },
283 | "source": [
284 | "dtrend_ap = np.log(ap).diff().dropna()"
285 | ],
286 | "execution_count": 0,
287 | "outputs": []
288 | },
289 | {
290 | "cell_type": "code",
291 | "metadata": {
292 | "id": "ekAHgfhlGd8F",
293 | "colab_type": "code",
294 | "colab": {}
295 | },
296 | "source": [
297 | "dtrend_ap.plot()"
298 | ],
299 | "execution_count": 0,
300 | "outputs": []
301 | },
302 | {
303 | "cell_type": "code",
304 | "metadata": {
305 | "id": "Y73uSCj4Gd8I",
306 | "colab_type": "code",
307 | "colab": {}
308 | },
309 | "source": [
310 | "acf_plot = plot_acf(dtrend_ap)"
311 | ],
312 | "execution_count": 0,
313 | "outputs": []
314 | },
315 | {
316 | "cell_type": "code",
317 | "metadata": {
318 | "id": "kuatoefiGd8L",
319 | "colab_type": "code",
320 | "colab": {}
321 | },
322 | "source": [
323 | "# Q5:\n",
324 | "# read \"../data/stock_data.csv\"\n",
325 | "# Plot acf using statsmodels for IBM stock data\n",
326 | "# detrend \n",
327 | "# plot the detrend data"
328 | ],
329 | "execution_count": 0,
330 | "outputs": []
331 | },
332 | {
333 | "cell_type": "code",
334 | "metadata": {
335 | "id": "ORKAF9yQGd8O",
336 | "colab_type": "code",
337 | "colab": {}
338 | },
339 | "source": [
340 | ""
341 | ],
342 | "execution_count": 0,
343 | "outputs": []
344 | }
345 | ]
346 | }
--------------------------------------------------------------------------------
/Financial Time Series/01.Financial Time Series Analysis.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.7.1"
21 | },
22 | "colab": {
23 | "name": "01.Financial Time Series Analysis.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "uqgDqJ64GevB",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "## Imports"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "id": "V7qlTXfFGevD",
43 | "colab_type": "code",
44 | "colab": {}
45 | },
46 | "source": [
47 | "import math\n",
48 | "import numpy as np\n",
49 | "import pandas as pd\n",
50 | "from pylab import plt\n",
51 | "\n",
52 | "plt.style.use('seaborn')\n",
53 | "%matplotlib inline"
54 | ],
55 | "execution_count": 0,
56 | "outputs": []
57 | },
58 | {
59 | "cell_type": "markdown",
60 | "metadata": {
61 | "id": "qiER5T0sGevI",
62 | "colab_type": "text"
63 | },
64 | "source": [
65 | "## Data Preprocessing"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "metadata": {
71 | "id": "gRqcgvlLGevJ",
72 | "colab_type": "code",
73 | "colab": {}
74 | },
75 | "source": [
76 | "%%time\n",
77 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n",
78 | "df = pd.read_csv(colab_path+'data/yesbank_data.csv', index_col=0, parse_dates=True)"
79 | ],
80 | "execution_count": 0,
81 | "outputs": []
82 | },
83 | {
84 | "cell_type": "code",
85 | "metadata": {
86 | "id": "eYot4tJJGevO",
87 | "colab_type": "code",
88 | "colab": {}
89 | },
90 | "source": [
91 | "df.rename(columns={'close': 'c', 'open':'o', 'high':'h', 'low':'l', 'volume':'v'}, inplace=True)"
92 | ],
93 | "execution_count": 0,
94 | "outputs": []
95 | },
96 | {
97 | "cell_type": "code",
98 | "metadata": {
99 | "id": "3ab7Q9DLGevS",
100 | "colab_type": "code",
101 | "colab": {}
102 | },
103 | "source": [
104 | "df.head()"
105 | ],
106 | "execution_count": 0,
107 | "outputs": []
108 | },
109 | {
110 | "cell_type": "code",
111 | "metadata": {
112 | "id": "EkMzXxSZGevV",
113 | "colab_type": "code",
114 | "colab": {}
115 | },
116 | "source": [
117 | "df['c'].plot()"
118 | ],
119 | "execution_count": 0,
120 | "outputs": []
121 | },
122 | {
123 | "cell_type": "markdown",
124 | "metadata": {
125 | "id": "do7ljwgkGevY",
126 | "colab_type": "text"
127 | },
128 | "source": [
129 | "## Implementing a simple crossover strategy"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "metadata": {
135 | "id": "HQjXYvTLGeva",
136 | "colab_type": "code",
137 | "colab": {}
138 | },
139 | "source": [
140 | "df['r'] = np.log(df['c'] / df['c'].shift(1))\n",
141 | "df['sma1'] = df['c'].rolling(15).mean()\n",
142 | "df['sma2'] = df['c'].rolling(30).mean()\n",
143 | "df['sma3'] = df['c'].rolling(60).mean()"
144 | ],
145 | "execution_count": 0,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "pnXo5aOsGeve",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "df.dropna(inplace=True)"
157 | ],
158 | "execution_count": 0,
159 | "outputs": []
160 | },
161 | {
162 | "cell_type": "code",
163 | "metadata": {
164 | "id": "ysBgZsT2Gevi",
165 | "colab_type": "code",
166 | "colab": {}
167 | },
168 | "source": [
169 | "df['market_dir'] = np.where(df['r'] > 0, 1, -1)"
170 | ],
171 | "execution_count": 0,
172 | "outputs": []
173 | },
174 | {
175 | "cell_type": "code",
176 | "metadata": {
177 | "id": "15OEulyTGevm",
178 | "colab_type": "code",
179 | "colab": {}
180 | },
181 | "source": [
182 | "df['strat_dir'] = np.where(df['sma1'] > df['sma3'], 1, -1)"
183 | ],
184 | "execution_count": 0,
185 | "outputs": []
186 | },
187 | {
188 | "cell_type": "code",
189 | "metadata": {
190 | "id": "-Fv5cyBbGevp",
191 | "colab_type": "code",
192 | "colab": {}
193 | },
194 | "source": [
195 | "df.head(10)"
196 | ],
197 | "execution_count": 0,
198 | "outputs": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "metadata": {
203 | "id": "G3JT2IadGevu",
204 | "colab_type": "code",
205 | "colab": {}
206 | },
207 | "source": [
208 | "df['strat_dir'].diff().value_counts()"
209 | ],
210 | "execution_count": 0,
211 | "outputs": []
212 | },
213 | {
214 | "cell_type": "code",
215 | "metadata": {
216 | "id": "nNsN3QDeGev0",
217 | "colab_type": "code",
218 | "colab": {}
219 | },
220 | "source": [
221 | "df['s'] = df['strat_dir'] * df['r']"
222 | ],
223 | "execution_count": 0,
224 | "outputs": []
225 | },
226 | {
227 | "cell_type": "code",
228 | "metadata": {
229 | "id": "TTkhLu3hGev4",
230 | "colab_type": "code",
231 | "colab": {}
232 | },
233 | "source": [
234 | "df[['r', 's']].cumsum().apply(np.exp).plot(figsize=(10, 10));"
235 | ],
236 | "execution_count": 0,
237 | "outputs": []
238 | },
239 | {
240 | "cell_type": "markdown",
241 | "metadata": {
242 | "id": "TpPHBq0BGev6",
243 | "colab_type": "text"
244 | },
245 | "source": [
246 | "## Add a machine learning model and perform accuracy testing"
247 | ]
248 | },
249 | {
250 | "cell_type": "code",
251 | "metadata": {
252 | "id": "XWZ3Kyk5Gev8",
253 | "colab_type": "code",
254 | "colab": {}
255 | },
256 | "source": [
257 | "from sklearn.linear_model import LogisticRegression\n",
258 | "from sklearn.metrics import accuracy_score"
259 | ],
260 | "execution_count": 0,
261 | "outputs": []
262 | },
263 | {
264 | "cell_type": "code",
265 | "metadata": {
266 | "id": "xPFqYhcrGewB",
267 | "colab_type": "code",
268 | "colab": {}
269 | },
270 | "source": [
271 | "mdf = df.copy()"
272 | ],
273 | "execution_count": 0,
274 | "outputs": []
275 | },
276 | {
277 | "cell_type": "code",
278 | "metadata": {
279 | "id": "K1ShkwN4GewE",
280 | "colab_type": "code",
281 | "colab": {}
282 | },
283 | "source": [
284 | "for i in range(1,6):\n",
285 | " mdf[f'lag_{i}'] = mdf['market_dir'].shift(i)"
286 | ],
287 | "execution_count": 0,
288 | "outputs": []
289 | },
290 | {
291 | "cell_type": "code",
292 | "metadata": {
293 | "id": "eKFawK2jGewH",
294 | "colab_type": "code",
295 | "colab": {}
296 | },
297 | "source": [
298 | "mdf.dropna(inplace=True)\n",
299 | "mdf.head()"
300 | ],
301 | "execution_count": 0,
302 | "outputs": []
303 | },
304 | {
305 | "cell_type": "code",
306 | "metadata": {
307 | "id": "zMYT9E6UGewJ",
308 | "colab_type": "code",
309 | "colab": {}
310 | },
311 | "source": [
312 | "X = mdf[['lag_1','lag_2', 'lag_3', 'lag_4','lag_5']]"
313 | ],
314 | "execution_count": 0,
315 | "outputs": []
316 | },
317 | {
318 | "cell_type": "code",
319 | "metadata": {
320 | "id": "bGTqJVD3GewM",
321 | "colab_type": "code",
322 | "colab": {}
323 | },
324 | "source": [
325 | "y = mdf['market_dir']"
326 | ],
327 | "execution_count": 0,
328 | "outputs": []
329 | },
330 | {
331 | "cell_type": "code",
332 | "metadata": {
333 | "id": "v6Nt5G6wGewQ",
334 | "colab_type": "code",
335 | "colab": {}
336 | },
337 | "source": [
338 | "X_train = X[:500]\n",
339 | "X_test = X[500:]\n",
340 | "y_train = y[:500]\n",
341 | "y_test = y[500:]"
342 | ],
343 | "execution_count": 0,
344 | "outputs": []
345 | },
346 | {
347 | "cell_type": "code",
348 | "metadata": {
349 | "id": "QcLdo1KpGewU",
350 | "colab_type": "code",
351 | "colab": {}
352 | },
353 | "source": [
354 | "classifier = LogisticRegression()"
355 | ],
356 | "execution_count": 0,
357 | "outputs": []
358 | },
359 | {
360 | "cell_type": "code",
361 | "metadata": {
362 | "id": "qcCSy8ksGewX",
363 | "colab_type": "code",
364 | "colab": {}
365 | },
366 | "source": [
367 | "from sklearn.svm import SVC\n",
368 | "classifier = SVC(kernel='linear')"
369 | ],
370 | "execution_count": 0,
371 | "outputs": []
372 | },
373 | {
374 | "cell_type": "code",
375 | "metadata": {
376 | "id": "FUOeiiNzGewZ",
377 | "colab_type": "code",
378 | "colab": {}
379 | },
380 | "source": [
381 | "classifier.fit(X_train,y_train)"
382 | ],
383 | "execution_count": 0,
384 | "outputs": []
385 | },
386 | {
387 | "cell_type": "markdown",
388 | "metadata": {
389 | "id": "2G5Nx8g2Gewc",
390 | "colab_type": "text"
391 | },
392 | "source": [
393 | "## In Sample testing"
394 | ]
395 | },
396 | {
397 | "cell_type": "code",
398 | "metadata": {
399 | "id": "xez0UptIGewd",
400 | "colab_type": "code",
401 | "colab": {}
402 | },
403 | "source": [
404 | "y_pred = classifier.predict(X_train)\n",
405 | "accuracy_score(y_pred, y_train)"
406 | ],
407 | "execution_count": 0,
408 | "outputs": []
409 | },
410 | {
411 | "cell_type": "markdown",
412 | "metadata": {
413 | "id": "3RnPjrjpGewg",
414 | "colab_type": "text"
415 | },
416 | "source": [
417 | "## Holdout Testing"
418 | ]
419 | },
420 | {
421 | "cell_type": "code",
422 | "metadata": {
423 | "id": "1kseIkV4Gewh",
424 | "colab_type": "code",
425 | "colab": {}
426 | },
427 | "source": [
428 | "y_pred = classifier.predict(X_test)\n",
429 | "accuracy_score(y_test,y_pred)"
430 | ],
431 | "execution_count": 0,
432 | "outputs": []
433 | },
434 | {
435 | "cell_type": "code",
436 | "metadata": {
437 | "id": "3L55wa-rGewk",
438 | "colab_type": "code",
439 | "colab": {}
440 | },
441 | "source": [
442 | ""
443 | ],
444 | "execution_count": 0,
445 | "outputs": []
446 | }
447 | ]
448 | }
--------------------------------------------------------------------------------
/Financial Time Series/02. sentiment_score.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import json\n",
10 | "import requests\n",
11 | "import pandas as pd\n",
12 | "pd.options.display.max_colwidth = 1000"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "response = requests.get(\"https://api.bseindia.com/BseIndiaAPI/api/AnnGetData/w?strCat=-1&strPrevDate=20190730&strScrip=&strSearch=P&strToDate=20190730&strType=C\")"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 3,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "bse_news_json = json.loads(response.text)"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 8,
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "name": "stdout",
40 | "output_type": "stream",
41 | "text": [
42 | "WARNING:tensorflow:From /miniconda3/lib/python3.7/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
43 | "Instructions for updating:\n",
44 | "Colocations handled automatically by placer.\n",
45 | "WARNING:tensorflow:From /Users/ram/OneDrive/Talks/scipy/generating-reviews-discovering-sentiment/encoder.py:59: calling l2_normalize (from tensorflow.python.ops.nn_impl) with dim is deprecated and will be removed in a future version.\n",
46 | "Instructions for updating:\n",
47 | "dim is deprecated, use axis instead\n"
48 | ]
49 | }
50 | ],
51 | "source": [
52 | "from encoder import Model\n",
53 | "\n",
54 | "model = Model()"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 9,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "import re\n",
64 | "def clean_headline(headline):\n",
65 | " return ' '.join(re.sub(\"(@[A-Za-z0-9]+)|([^0-9A-Za-z \\t])|(\\w+:\\/\\/\\S+)|[0-9]\", \" \", headline).split())"
66 | ]
67 | },
68 | {
69 | "cell_type": "markdown",
70 | "metadata": {},
71 | "source": [
72 | "reference: https://modeldepot.io/afowler/sentiment-neuron"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 21,
78 | "metadata": {},
79 | "outputs": [],
80 | "source": [
81 | "ccd_news = [clean_headline(news['HEADLINE']) for news in bse_news_json['Table'] if 'Siddhartha' in news['HEADLINE']]"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 22,
87 | "metadata": {},
88 | "outputs": [
89 | {
90 | "name": "stdout",
91 | "output_type": "stream",
92 | "text": [
93 | "16.797 seconds to transform 5 examples\n",
94 | "-0.14523332\n"
95 | ]
96 | }
97 | ],
98 | "source": [
99 | "text_features = model.transform(ccd_news)\n",
100 | "print(text_features[:,2388][0])"
101 | ]
102 | },
103 | {
104 | "cell_type": "code",
105 | "execution_count": 33,
106 | "metadata": {},
107 | "outputs": [
108 | {
109 | "data": {
110 | "text/html": [
111 | "
\n",
112 | "\n",
125 | "
\n",
126 | " \n",
127 | " \n",
128 | " | \n",
129 | " 0 | \n",
130 | " 1 | \n",
131 | "
\n",
132 | " \n",
133 | " \n",
134 | " \n",
135 | " 0 | \n",
136 | " 0.009919 | \n",
137 | " This is to inform you that Mr V G Siddhartha Chairman and Managing Director of Coffee Day Enterprises Limited has not reachable since yesterday evening We are taking the help of concerned authorities Company is professionally managed and led by competent leadership team which will ensure continuity of business BR BR We will keep you posted as and when we receive further updates BR | \n",
138 | "
\n",
139 | " \n",
140 | " 1 | \n",
141 | " -0.068979 | \n",
142 | " The board of directors of the company held an emergency meeting today to discuss developments following its earlier communication to the stock exchanges BR BR A press release is attached Also attached is a copy of the letter purportedly signed by Mr V G Siddhartha dated July BR BR This is for your information and records BR | \n",
143 | "
\n",
144 | " \n",
145 | " 2 | \n",
146 | " -0.102269 | \n",
147 | " The Exchange has sought clarification from Sical Logistics Ltd on July with reference to news appeared in www moneycontrol com dated July quoting VG Siddhartha missing Helicopters Coast Guard called to find Cafe Coffee Day founder BR BR The reply is awaited | \n",
148 | "
\n",
149 | " \n",
150 | " 3 | \n",
151 | " -0.145233 | \n",
152 | " The Exchange has sought clarification from Coffee Day Enterprises Ltd with reference to the media reports titled Authenticity of Siddhartha s last note doubtful claims I T source BR BR Link Day Enterprises Ltd response is awaited | \n",
153 | "
\n",
154 | " \n",
155 | " 4 | \n",
156 | " -0.353844 | \n",
157 | " Certain media reports have published a document purportedly written by Mr V G Siddhartha and which is addressed to the Board and Employees of Coffee Data Enterprises However it has been observed that the Company has not disclosed the same with the Exchange BR BR In this regard a clarification has been sought from Coffee Day Enterprises Ltd with regard to non disclosure of material information related to Mr V G Siddhartha who is reportedly missing Company response is awaited | \n",
158 | "
\n",
159 | " \n",
160 | "
\n",
161 | "
"
162 | ],
163 | "text/plain": [
164 | " 0 \\\n",
165 | "0 0.009919 \n",
166 | "1 -0.068979 \n",
167 | "2 -0.102269 \n",
168 | "3 -0.145233 \n",
169 | "4 -0.353844 \n",
170 | "\n",
171 | " 1 \n",
172 | "0 This is to inform you that Mr V G Siddhartha Chairman and Managing Director of Coffee Day Enterprises Limited has not reachable since yesterday evening We are taking the help of concerned authorities Company is professionally managed and led by competent leadership team which will ensure continuity of business BR BR We will keep you posted as and when we receive further updates BR \n",
173 | "1 The board of directors of the company held an emergency meeting today to discuss developments following its earlier communication to the stock exchanges BR BR A press release is attached Also attached is a copy of the letter purportedly signed by Mr V G Siddhartha dated July BR BR This is for your information and records BR \n",
174 | "2 The Exchange has sought clarification from Sical Logistics Ltd on July with reference to news appeared in www moneycontrol com dated July quoting VG Siddhartha missing Helicopters Coast Guard called to find Cafe Coffee Day founder BR BR The reply is awaited \n",
175 | "3 The Exchange has sought clarification from Coffee Day Enterprises Ltd with reference to the media reports titled Authenticity of Siddhartha s last note doubtful claims I T source BR BR Link Day Enterprises Ltd response is awaited \n",
176 | "4 Certain media reports have published a document purportedly written by Mr V G Siddhartha and which is addressed to the Board and Employees of Coffee Data Enterprises However it has been observed that the Company has not disclosed the same with the Exchange BR BR In this regard a clarification has been sought from Coffee Day Enterprises Ltd with regard to non disclosure of material information related to Mr V G Siddhartha who is reportedly missing Company response is awaited "
177 | ]
178 | },
179 | "execution_count": 33,
180 | "metadata": {},
181 | "output_type": "execute_result"
182 | }
183 | ],
184 | "source": [
185 | "pd.DataFrame(set(zip(text_features[:,2388], ccd_news)))"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": null,
191 | "metadata": {},
192 | "outputs": [],
193 | "source": []
194 | }
195 | ],
196 | "metadata": {
197 | "kernelspec": {
198 | "display_name": "Python 3",
199 | "language": "python",
200 | "name": "python3"
201 | },
202 | "language_info": {
203 | "codemirror_mode": {
204 | "name": "ipython",
205 | "version": 3
206 | },
207 | "file_extension": ".py",
208 | "mimetype": "text/x-python",
209 | "name": "python",
210 | "nbconvert_exporter": "python",
211 | "pygments_lexer": "ipython3",
212 | "version": "3.6.1"
213 | }
214 | },
215 | "nbformat": 4,
216 | "nbformat_minor": 2
217 | }
218 |
--------------------------------------------------------------------------------
/ODSC Workshop.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/562b5b1189d6d5c93f8b1c89fb8ecbc42350024b/ODSC Workshop.pdf
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://colab.research.google.com/github/poornagurram/TimeSeriesAnalysis_ODSC_2019/)
2 |
3 | # ODSC 2019
4 |
5 | ## Proposal
6 |
7 | # Time Series Analysis in Python Workshop
8 | Time is precious so is Time Series Analysis”
9 |
10 | Time series analysis has been around for centuries helping us to solve from astronomical problems to business problems and advanced scientific research around us now. Time stores precious information, which most machine learning algorithms don’t deal with. But time series analysis, which is a mix of machine learning and statistics helps us to get useful insights. Time series can be applied to various fields like economy forecasting, budgetary analysis, sales forecasting, census analysis and much more. In this workshop, We will look at how to dive deep into time series data and make use of deep learning to make accurate predictions.
11 |
12 | **Structure of the workshop goes like this**
13 |
14 | * Introduction to Time series analysis
15 | * Time Series Exploratory Data Analysis and Data manipulation with pandas
16 | * Forecast Time series data with some classical method (AR, MA, ARMA, ARIMA, GARCH, E-GARCH)
17 | * Introduction to Deep Learning and Time series forecasting using MLP and LSTM
18 | * Forecasting using XGBoost
19 | * Financial Time Series data
20 |
21 | ## Libraries Used:
22 | > install libraries using pip install -r requirements.txt
23 |
24 | * Keras (with Tensorflow backend)
25 | * jupyter
26 | * matplotlib
27 | * pandas
28 | * statsmodels
29 | * sklearn
30 | * seaborn
31 | * arch
32 | * xgboost
33 |
34 | ## Outline/Structure of the Workshop
35 |
36 | * Introduction to Time series analysis (10 mins)
37 | * Time Series Exploratory Data Analysis and Data manipulation with pandas (45 mins)
38 | * Forecast Time series data with some classical method (AR, MA, ARMA, ARIMA, GARCH, E-GARCH) (60 mins)
39 | * Introduction to Deep Learning and Time series forecasting using MLP and LSTM (60 mins)
40 | * Forecasting using XGBoost - (20 mins)
41 | * Financial Time Series data - (30 Mins)
42 |
43 | **Note: Session timings including exercises for attendees to work on**
44 |
45 | ## Prerequisites for Attendees
46 | * Basics of Python
47 | * Basics of Time series analysis
48 | * Basics of Pandas
49 | * Introduction to Deep Neural Networks
50 |
51 | ## Credits :
52 |
53 | * Immensely thankful to the great workshop by [Alieen](https://github.com/AileenNielsen/TimeSeriesAnalysisWithPython)
54 | * Amazing blogs on Time Series by [Jason Brownlee](https://machinelearningmastery.com/)
55 |
56 | ## Tasks :
57 | * Non-Linear Time Series
58 | * Dataset Curation
59 | * Other Examples
60 | * Optimize pandas tutorail
61 | * Improve LSTM
62 |
--------------------------------------------------------------------------------
/Statistical models/01. Time Series components.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.7.1"
21 | },
22 | "colab": {
23 | "name": "01. Time Series components.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "riHGQ2LjGf89",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "## Imports"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "id": "e2z51lilGf8_",
43 | "colab_type": "code",
44 | "colab": {}
45 | },
46 | "source": [
47 | "import pandas as pd\n",
48 | "import numpy as np\n",
49 | "import matplotlib.pyplot as plt\n",
50 | "plt.rcParams[\"figure.figsize\"] = (20,8)"
51 | ],
52 | "execution_count": 0,
53 | "outputs": []
54 | },
55 | {
56 | "cell_type": "markdown",
57 | "metadata": {
58 | "id": "7Fl-lX7lGf9C",
59 | "colab_type": "text"
60 | },
61 | "source": [
62 | "## Some Theory to take care of"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "metadata": {
68 | "id": "Qaxs6TtsGf9C",
69 | "colab_type": "text"
70 | },
71 | "source": [
72 | "#### What is a white noise?\n",
73 | " - No Correlation \n",
74 | " - Unable to fit a model\n",
75 | "\n",
76 | "#### Need for a Forecasting\n",
77 | "\n",
78 | "#### Components of Time Series Analysis\n",
79 | " - Trend\n",
80 | " - Seasonality\n",
81 | " - Additive vs Multiplicative\n",
82 | " \n",
83 | "#### Residuals\n"
84 | ]
85 | },
86 | {
87 | "cell_type": "markdown",
88 | "metadata": {
89 | "id": "CcKsFjqYGf9D",
90 | "colab_type": "text"
91 | },
92 | "source": [
93 | "## Time Series Decomposition"
94 | ]
95 | },
96 | {
97 | "cell_type": "code",
98 | "metadata": {
99 | "id": "Jo5YDW_QGf9E",
100 | "colab_type": "code",
101 | "colab": {}
102 | },
103 | "source": [
104 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\"\n",
105 | "df = pd.read_csv(colab_path+'data/AirPassengers.csv', parse_dates=True, index_col=0)"
106 | ],
107 | "execution_count": 0,
108 | "outputs": []
109 | },
110 | {
111 | "cell_type": "code",
112 | "metadata": {
113 | "id": "OCCXUzllGf9G",
114 | "colab_type": "code",
115 | "colab": {}
116 | },
117 | "source": [
118 | "df.plot()"
119 | ],
120 | "execution_count": 0,
121 | "outputs": []
122 | },
123 | {
124 | "cell_type": "code",
125 | "metadata": {
126 | "id": "UDTQE9FqGf9J",
127 | "colab_type": "code",
128 | "colab": {}
129 | },
130 | "source": [
131 | "df.head()"
132 | ],
133 | "execution_count": 0,
134 | "outputs": []
135 | },
136 | {
137 | "cell_type": "code",
138 | "metadata": {
139 | "id": "SoUPBxvQGf9M",
140 | "colab_type": "code",
141 | "colab": {}
142 | },
143 | "source": [
144 | "from statsmodels.tsa.seasonal import seasonal_decompose"
145 | ],
146 | "execution_count": 0,
147 | "outputs": []
148 | },
149 | {
150 | "cell_type": "code",
151 | "metadata": {
152 | "id": "CLrs9pY1Gf9P",
153 | "colab_type": "code",
154 | "colab": {}
155 | },
156 | "source": [
157 | "components = seasonal_decompose(df)"
158 | ],
159 | "execution_count": 0,
160 | "outputs": []
161 | },
162 | {
163 | "cell_type": "code",
164 | "metadata": {
165 | "id": "MIaSdYBXGf9R",
166 | "colab_type": "code",
167 | "colab": {}
168 | },
169 | "source": [
170 | "components.plot()"
171 | ],
172 | "execution_count": 0,
173 | "outputs": []
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {
178 | "id": "VjbYDqtaGf9T",
179 | "colab_type": "text"
180 | },
181 | "source": [
182 | "## Exercise"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {
188 | "id": "defWuJihGf9U",
189 | "colab_type": "text"
190 | },
191 | "source": [
192 | "### Read the file 'data/daily_min_temperature.csv' and find out the time series components.\n",
193 | "\n",
194 | "1. The index does not have frequency set. Set the frequency Hint: Use 'asfreq' method.\n",
195 | "2. There are some missing values. Fill the missing values using 'bfill' method. Hint: Use fillna method\n",
196 | "3. Perform decomposition"
197 | ]
198 | },
199 | {
200 | "cell_type": "markdown",
201 | "metadata": {
202 | "id": "8bB2103nM4A2",
203 | "colab_type": "text"
204 | },
205 | "source": [
206 | "## Exponential Smoothing"
207 | ]
208 | },
209 | {
210 | "cell_type": "code",
211 | "metadata": {
212 | "id": "A92qFwKQM7Zy",
213 | "colab_type": "code",
214 | "colab": {}
215 | },
216 | "source": [
217 | "from statsmodels.tsa.api import ExponentialSmoothing"
218 | ],
219 | "execution_count": 0,
220 | "outputs": []
221 | },
222 | {
223 | "cell_type": "code",
224 | "metadata": {
225 | "id": "DiqmNt52OT-v",
226 | "colab_type": "code",
227 | "colab": {}
228 | },
229 | "source": [
230 | "ExponentialSmoothing?"
231 | ],
232 | "execution_count": 0,
233 | "outputs": []
234 | },
235 | {
236 | "cell_type": "code",
237 | "metadata": {
238 | "id": "YGonaKD5NAM4",
239 | "colab_type": "code",
240 | "colab": {}
241 | },
242 | "source": [
243 | "model = ExponentialSmoothing(df['#Passengers'].values, \n",
244 | " trend='mul', \n",
245 | " damped=False,\n",
246 | " seasonal='add',\n",
247 | " seasonal_periods=12)"
248 | ],
249 | "execution_count": 0,
250 | "outputs": []
251 | },
252 | {
253 | "cell_type": "code",
254 | "metadata": {
255 | "id": "lPM1Gng5Odwe",
256 | "colab_type": "code",
257 | "colab": {}
258 | },
259 | "source": [
260 | "res = model.fit()"
261 | ],
262 | "execution_count": 0,
263 | "outputs": []
264 | },
265 | {
266 | "cell_type": "code",
267 | "metadata": {
268 | "id": "ZFGvmOtoPZe7",
269 | "colab_type": "code",
270 | "colab": {}
271 | },
272 | "source": [
273 | "res.params"
274 | ],
275 | "execution_count": 0,
276 | "outputs": []
277 | },
278 | {
279 | "cell_type": "code",
280 | "metadata": {
281 | "id": "eHldR55cPeh6",
282 | "colab_type": "code",
283 | "colab": {}
284 | },
285 | "source": [
286 | "res.forecast?"
287 | ],
288 | "execution_count": 0,
289 | "outputs": []
290 | },
291 | {
292 | "cell_type": "code",
293 | "metadata": {
294 | "id": "e613NQMWPiQj",
295 | "colab_type": "code",
296 | "colab": {}
297 | },
298 | "source": [
299 | "fc = res.forecast(60).astype(int)"
300 | ],
301 | "execution_count": 0,
302 | "outputs": []
303 | },
304 | {
305 | "cell_type": "code",
306 | "metadata": {
307 | "id": "jW8z4vEtPm2C",
308 | "colab_type": "code",
309 | "colab": {}
310 | },
311 | "source": [
312 | "data = list(df['#Passengers'].values)\n",
313 | "data.extend(fc)"
314 | ],
315 | "execution_count": 0,
316 | "outputs": []
317 | },
318 | {
319 | "cell_type": "code",
320 | "metadata": {
321 | "id": "ZVBQLZhDPs--",
322 | "colab_type": "code",
323 | "colab": {}
324 | },
325 | "source": [
326 | "df1 = pd.DataFrame(data)"
327 | ],
328 | "execution_count": 0,
329 | "outputs": []
330 | },
331 | {
332 | "cell_type": "code",
333 | "metadata": {
334 | "id": "zlHmOFfnPwHW",
335 | "colab_type": "code",
336 | "colab": {}
337 | },
338 | "source": [
339 | ""
340 | ],
341 | "execution_count": 0,
342 | "outputs": []
343 | }
344 | ]
345 | }
--------------------------------------------------------------------------------
/Statistical models/02. Stationarity.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.7.1"
21 | },
22 | "colab": {
23 | "name": "02. Stationarity.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "ggjzGxCwGhyt",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "## Imports"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "id": "KCRNY4R3Ghyu",
43 | "colab_type": "code",
44 | "colab": {}
45 | },
46 | "source": [
47 | "import pandas as pd\n",
48 | "import numpy as np\n",
49 | "import matplotlib.pyplot as plt\n",
50 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n",
51 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\""
52 | ],
53 | "execution_count": 0,
54 | "outputs": []
55 | },
56 | {
57 | "cell_type": "markdown",
58 | "metadata": {
59 | "id": "JPBgkpOxGhyy",
60 | "colab_type": "text"
61 | },
62 | "source": [
63 | "## Stationarity"
64 | ]
65 | },
66 | {
67 | "cell_type": "markdown",
68 | "metadata": {
69 | "id": "_7NhORiSGhyz",
70 | "colab_type": "text"
71 | },
72 | "source": [
73 | "### Constant Mean, variance, autocorr\n",
74 | "\n",
75 | "Y(t) = rho * Y(t-1) + epsilon\n",
76 | "\n",
77 | "Data is not stationary if rho == 1\n"
78 | ]
79 | },
80 | {
81 | "cell_type": "code",
82 | "metadata": {
83 | "id": "LvAIYob5Ghy1",
84 | "colab_type": "code",
85 | "colab": {}
86 | },
87 | "source": [
88 | "min_temp_df = pd.read_csv(colab_path+'data/daily_min_tempratures.csv', parse_dates=[0], index_col=0)\n",
89 | "min_temp_df['temp'] = min_temp_df['temp'].astype(float)"
90 | ],
91 | "execution_count": 0,
92 | "outputs": []
93 | },
94 | {
95 | "cell_type": "code",
96 | "metadata": {
97 | "id": "RytriVc-Ghy4",
98 | "colab_type": "code",
99 | "colab": {}
100 | },
101 | "source": [
102 | "min_temp_df[min_temp_df.temp.isna()]"
103 | ],
104 | "execution_count": 0,
105 | "outputs": []
106 | },
107 | {
108 | "cell_type": "code",
109 | "metadata": {
110 | "id": "euczIvV6Ghy8",
111 | "colab_type": "code",
112 | "colab": {}
113 | },
114 | "source": [
115 | "min_temp_df.head()"
116 | ],
117 | "execution_count": 0,
118 | "outputs": []
119 | },
120 | {
121 | "cell_type": "code",
122 | "metadata": {
123 | "id": "TwuOJAoVGhy-",
124 | "colab_type": "code",
125 | "colab": {}
126 | },
127 | "source": [
128 | "min_temp_df['roll_mean'] = min_temp_df['temp'].rolling(50).mean()\n",
129 | "min_temp_df['roll_std'] = min_temp_df['temp'].rolling(50).std()"
130 | ],
131 | "execution_count": 0,
132 | "outputs": []
133 | },
134 | {
135 | "cell_type": "code",
136 | "metadata": {
137 | "id": "PSEEHoPVGhzA",
138 | "colab_type": "code",
139 | "colab": {}
140 | },
141 | "source": [
142 | "min_temp_df.plot(figsize=(20,10))\n",
143 | "\n"
144 | ],
145 | "execution_count": 0,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "Xb5GZcPGGhzC",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "air_pass_df = pd.read_csv(colab_path+'data/AirPassengers.csv', parse_dates=[0], index_col=0)\n",
157 | "air_pass_df['#Passengers'].fillna(method='ffill', inplace=True)\n",
158 | "\n"
159 | ],
160 | "execution_count": 0,
161 | "outputs": []
162 | },
163 | {
164 | "cell_type": "code",
165 | "metadata": {
166 | "id": "tFuzq0eZGhzF",
167 | "colab_type": "code",
168 | "colab": {}
169 | },
170 | "source": [
171 | "air_pass_df.head()"
172 | ],
173 | "execution_count": 0,
174 | "outputs": []
175 | },
176 | {
177 | "cell_type": "code",
178 | "metadata": {
179 | "id": "D_3kWwm7GhzI",
180 | "colab_type": "code",
181 | "colab": {}
182 | },
183 | "source": [
184 | "air_pass_df.plot(figsize=(20,10))"
185 | ],
186 | "execution_count": 0,
187 | "outputs": []
188 | },
189 | {
190 | "cell_type": "markdown",
191 | "metadata": {
192 | "id": "YCXf7I9IGhzL",
193 | "colab_type": "text"
194 | },
195 | "source": [
196 | "## ADFuller test"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "metadata": {
202 | "id": "irQp4BgPGhzM",
203 | "colab_type": "code",
204 | "colab": {}
205 | },
206 | "source": [
207 | "from statsmodels.tsa.stattools import adfuller"
208 | ],
209 | "execution_count": 0,
210 | "outputs": []
211 | },
212 | {
213 | "cell_type": "code",
214 | "metadata": {
215 | "id": "WNj6E8fNGhzQ",
216 | "colab_type": "code",
217 | "colab": {}
218 | },
219 | "source": [
220 | "adfuller(min_temp_df['temp'])"
221 | ],
222 | "execution_count": 0,
223 | "outputs": []
224 | },
225 | {
226 | "cell_type": "code",
227 | "metadata": {
228 | "id": "9Cg-oigbGhzT",
229 | "colab_type": "code",
230 | "colab": {}
231 | },
232 | "source": [
233 | "adfuller(air_pass_df['#Passengers'])"
234 | ],
235 | "execution_count": 0,
236 | "outputs": []
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "metadata": {
241 | "id": "XBIZgDdLGhzW",
242 | "colab_type": "text"
243 | },
244 | "source": [
245 | "## Detrend"
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "metadata": {
251 | "id": "6I1cewpfGhzX",
252 | "colab_type": "code",
253 | "colab": {}
254 | },
255 | "source": [
256 | "sd = pd.read_csv(colab_path+'data/stock_data.csv', parse_dates=True, index_col=0)"
257 | ],
258 | "execution_count": 0,
259 | "outputs": []
260 | },
261 | {
262 | "cell_type": "code",
263 | "metadata": {
264 | "id": "_4mwpEkbGhzb",
265 | "colab_type": "code",
266 | "colab": {}
267 | },
268 | "source": [
269 | "sd['AAPL'].plot(figsize=(10,8))"
270 | ],
271 | "execution_count": 0,
272 | "outputs": []
273 | },
274 | {
275 | "cell_type": "code",
276 | "metadata": {
277 | "id": "bpqk4ELhGhzg",
278 | "colab_type": "code",
279 | "colab": {}
280 | },
281 | "source": [
282 | "sd['AAPL_d'] = sd['AAPL'].diff()"
283 | ],
284 | "execution_count": 0,
285 | "outputs": []
286 | },
287 | {
288 | "cell_type": "code",
289 | "metadata": {
290 | "id": "nrfaj4AmGhzk",
291 | "colab_type": "code",
292 | "colab": {}
293 | },
294 | "source": [
295 | "sd['AAPL_d'].plot(figsize=(10,8))"
296 | ],
297 | "execution_count": 0,
298 | "outputs": []
299 | },
300 | {
301 | "cell_type": "code",
302 | "metadata": {
303 | "id": "wRUVSocjGhzo",
304 | "colab_type": "code",
305 | "colab": {}
306 | },
307 | "source": [
308 | "adfuller(sd['AAPL_d'].dropna())"
309 | ],
310 | "execution_count": 0,
311 | "outputs": []
312 | },
313 | {
314 | "cell_type": "markdown",
315 | "metadata": {
316 | "id": "ehWY5e0WGhzr",
317 | "colab_type": "text"
318 | },
319 | "source": [
320 | "## Desasonalize"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "metadata": {
326 | "id": "Up8bJwYwGhzt",
327 | "colab_type": "code",
328 | "colab": {}
329 | },
330 | "source": [
331 | "min_temp_df['temp'].plot(figsize=(10,8))"
332 | ],
333 | "execution_count": 0,
334 | "outputs": []
335 | },
336 | {
337 | "cell_type": "code",
338 | "metadata": {
339 | "id": "PigSU4rtGhz0",
340 | "colab_type": "code",
341 | "colab": {}
342 | },
343 | "source": [
344 | "min_temp_df['temp'].diff(12).plot(figsize=(10,8))"
345 | ],
346 | "execution_count": 0,
347 | "outputs": []
348 | },
349 | {
350 | "cell_type": "code",
351 | "metadata": {
352 | "id": "0vas-gwjGhz9",
353 | "colab_type": "code",
354 | "colab": {}
355 | },
356 | "source": [
357 | "adfuller(min_temp_df['temp'].diff(12).dropna())"
358 | ],
359 | "execution_count": 0,
360 | "outputs": []
361 | },
362 | {
363 | "cell_type": "markdown",
364 | "metadata": {
365 | "id": "pjJsrmQbGh0B",
366 | "colab_type": "text"
367 | },
368 | "source": [
369 | "## Exercise\n",
370 | "\n",
371 | "Detrend & Deseasonalize Airpassengers data"
372 | ]
373 | },
374 | {
375 | "cell_type": "code",
376 | "metadata": {
377 | "id": "n6mY1VLZGh0C",
378 | "colab_type": "code",
379 | "colab": {}
380 | },
381 | "source": [
382 | "air_pass_df['#Passengers'].diff().diff(12).plot()"
383 | ],
384 | "execution_count": 0,
385 | "outputs": []
386 | },
387 | {
388 | "cell_type": "code",
389 | "metadata": {
390 | "id": "bbtkk4TUGh0F",
391 | "colab_type": "code",
392 | "colab": {}
393 | },
394 | "source": [
395 | "adfuller(air_pass_df['#Passengers'].diff().diff(12).dropna())"
396 | ],
397 | "execution_count": 0,
398 | "outputs": []
399 | },
400 | {
401 | "cell_type": "code",
402 | "metadata": {
403 | "id": "hpWM1AeuGh0H",
404 | "colab_type": "code",
405 | "colab": {}
406 | },
407 | "source": [
408 | ""
409 | ],
410 | "execution_count": 0,
411 | "outputs": []
412 | }
413 | ]
414 | }
--------------------------------------------------------------------------------
/Statistical models/03.ARMA Process Models.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.7.1"
21 | },
22 | "colab": {
23 | "name": "03.ARMA Process Models.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "xgOIlFrwqzZn",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import pandas as pd\n",
38 | "import numpy as np\n",
39 | "import matplotlib.pyplot as plt\n",
40 | "plt.rcParams[\"figure.figsize\"] = (20,10)"
41 | ],
42 | "execution_count": 0,
43 | "outputs": []
44 | },
45 | {
46 | "cell_type": "markdown",
47 | "metadata": {
48 | "id": "vxgtRPp1qzZu",
49 | "colab_type": "text"
50 | },
51 | "source": [
52 | "## AR Models\n",
53 | "\n",
54 | "X(t) = phi * X(t-1) + epsilon "
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "metadata": {
60 | "id": "CTs297buqzZv",
61 | "colab_type": "code",
62 | "colab": {}
63 | },
64 | "source": [
65 | "from statsmodels.tsa.arima_process import ArmaProcess"
66 | ],
67 | "execution_count": 0,
68 | "outputs": []
69 | },
70 | {
71 | "cell_type": "code",
72 | "metadata": {
73 | "id": "BC5TDYDIqzZy",
74 | "colab_type": "code",
75 | "colab": {}
76 | },
77 | "source": [
78 | "ArmaProcess?"
79 | ],
80 | "execution_count": 0,
81 | "outputs": []
82 | },
83 | {
84 | "cell_type": "code",
85 | "metadata": {
86 | "id": "dpSBFZzkqzZ1",
87 | "colab_type": "code",
88 | "colab": {}
89 | },
90 | "source": [
91 | "ar1 = ArmaProcess(ar=(1, -.9, 0.7, -0.42))"
92 | ],
93 | "execution_count": 0,
94 | "outputs": []
95 | },
96 | {
97 | "cell_type": "code",
98 | "metadata": {
99 | "id": "2yiNWFZpqzZ6",
100 | "colab_type": "code",
101 | "colab": {}
102 | },
103 | "source": [
104 | "ar1_data = ar1.generate_sample(nsample=1000)"
105 | ],
106 | "execution_count": 0,
107 | "outputs": []
108 | },
109 | {
110 | "cell_type": "code",
111 | "metadata": {
112 | "id": "NQXXhZ6YqzZ8",
113 | "colab_type": "code",
114 | "colab": {}
115 | },
116 | "source": [
117 | "plt.plot(ar1_data)"
118 | ],
119 | "execution_count": 0,
120 | "outputs": []
121 | },
122 | {
123 | "cell_type": "code",
124 | "metadata": {
125 | "id": "-b3iN5owqzaA",
126 | "colab_type": "code",
127 | "colab": {}
128 | },
129 | "source": [
130 | "from statsmodels.tsa.stattools import adfuller"
131 | ],
132 | "execution_count": 0,
133 | "outputs": []
134 | },
135 | {
136 | "cell_type": "code",
137 | "metadata": {
138 | "id": "BPU7FwenqzaC",
139 | "colab_type": "code",
140 | "colab": {}
141 | },
142 | "source": [
143 | "adfuller(ar1_data)"
144 | ],
145 | "execution_count": 0,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "DSXU0-idqzaF",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "from statsmodels.tsa.arima_model import ARMA"
157 | ],
158 | "execution_count": 0,
159 | "outputs": []
160 | },
161 | {
162 | "cell_type": "code",
163 | "metadata": {
164 | "id": "9UVPvKUKqzaI",
165 | "colab_type": "code",
166 | "colab": {}
167 | },
168 | "source": [
169 | "ar_model = ARMA(ar1_data, order=(3,0))"
170 | ],
171 | "execution_count": 0,
172 | "outputs": []
173 | },
174 | {
175 | "cell_type": "code",
176 | "metadata": {
177 | "id": "w5SHG3SiqzaM",
178 | "colab_type": "code",
179 | "colab": {}
180 | },
181 | "source": [
182 | "res = ar_model.fit()"
183 | ],
184 | "execution_count": 0,
185 | "outputs": []
186 | },
187 | {
188 | "cell_type": "code",
189 | "metadata": {
190 | "id": "3Wd8BGvRqzaQ",
191 | "colab_type": "code",
192 | "colab": {}
193 | },
194 | "source": [
195 | "res.params"
196 | ],
197 | "execution_count": 0,
198 | "outputs": []
199 | },
200 | {
201 | "cell_type": "code",
202 | "metadata": {
203 | "id": "_8NikJENqzaT",
204 | "colab_type": "code",
205 | "colab": {}
206 | },
207 | "source": [
208 | "res.k_ar"
209 | ],
210 | "execution_count": 0,
211 | "outputs": []
212 | },
213 | {
214 | "cell_type": "code",
215 | "metadata": {
216 | "id": "OLglRPH0qzaV",
217 | "colab_type": "code",
218 | "colab": {}
219 | },
220 | "source": [
221 | "print(res.summary())"
222 | ],
223 | "execution_count": 0,
224 | "outputs": []
225 | },
226 | {
227 | "cell_type": "code",
228 | "metadata": {
229 | "id": "l1C8jigdqzaX",
230 | "colab_type": "code",
231 | "colab": {}
232 | },
233 | "source": [
234 | "plt.plot(ar1_data)\n",
235 | "plt.plot(res.fittedvalues)\n",
236 | "plt.show()"
237 | ],
238 | "execution_count": 0,
239 | "outputs": []
240 | },
241 | {
242 | "cell_type": "code",
243 | "metadata": {
244 | "id": "rkI9NGrOqzaa",
245 | "colab_type": "code",
246 | "colab": {}
247 | },
248 | "source": [
249 | "r = res.predict(start=3, end=1003)"
250 | ],
251 | "execution_count": 0,
252 | "outputs": []
253 | },
254 | {
255 | "cell_type": "code",
256 | "metadata": {
257 | "id": "fuHxsE-pqzae",
258 | "colab_type": "code",
259 | "colab": {}
260 | },
261 | "source": [
262 | "r"
263 | ],
264 | "execution_count": 0,
265 | "outputs": []
266 | },
267 | {
268 | "cell_type": "code",
269 | "metadata": {
270 | "id": "RhHdflraqzah",
271 | "colab_type": "code",
272 | "colab": {}
273 | },
274 | "source": [
275 | "# plt.plot(ar1_data)\n",
276 | "plt.plot(ar1_data)\n",
277 | "plt.plot(r)\n",
278 | "plt.show()"
279 | ],
280 | "execution_count": 0,
281 | "outputs": []
282 | },
283 | {
284 | "cell_type": "markdown",
285 | "metadata": {
286 | "id": "RcVJZ2vYqzak",
287 | "colab_type": "text"
288 | },
289 | "source": [
290 | "## MA Models\n",
291 | "\n",
292 | "X(T) = MEAN + E + theta1 * E(t-1)"
293 | ]
294 | },
295 | {
296 | "cell_type": "code",
297 | "metadata": {
298 | "id": "LOKNnpzMqzal",
299 | "colab_type": "code",
300 | "colab": {}
301 | },
302 | "source": [
303 | "ma1 = ArmaProcess(ma=(1, 0.9))"
304 | ],
305 | "execution_count": 0,
306 | "outputs": []
307 | },
308 | {
309 | "cell_type": "code",
310 | "metadata": {
311 | "id": "GA-PrLeXqzap",
312 | "colab_type": "code",
313 | "colab": {}
314 | },
315 | "source": [
316 | "ma1_data = ma1.generate_sample(nsample=1000)"
317 | ],
318 | "execution_count": 0,
319 | "outputs": []
320 | },
321 | {
322 | "cell_type": "code",
323 | "metadata": {
324 | "id": "QJpNiw8Qqzaw",
325 | "colab_type": "code",
326 | "colab": {}
327 | },
328 | "source": [
329 | "ma1_model = ARMA(ma1_data, order=(0,1))"
330 | ],
331 | "execution_count": 0,
332 | "outputs": []
333 | },
334 | {
335 | "cell_type": "code",
336 | "metadata": {
337 | "id": "Wdpt6CWwqza4",
338 | "colab_type": "code",
339 | "colab": {}
340 | },
341 | "source": [
342 | "res = ma1_model.fit()"
343 | ],
344 | "execution_count": 0,
345 | "outputs": []
346 | },
347 | {
348 | "cell_type": "code",
349 | "metadata": {
350 | "id": "KkLnMajMqza8",
351 | "colab_type": "code",
352 | "colab": {}
353 | },
354 | "source": [
355 | "res.params"
356 | ],
357 | "execution_count": 0,
358 | "outputs": []
359 | },
360 | {
361 | "cell_type": "code",
362 | "metadata": {
363 | "id": "MX8JlQedqzbL",
364 | "colab_type": "code",
365 | "colab": {}
366 | },
367 | "source": [
368 | "res.summary()"
369 | ],
370 | "execution_count": 0,
371 | "outputs": []
372 | },
373 | {
374 | "cell_type": "code",
375 | "metadata": {
376 | "id": "J_aKi_JrqzbO",
377 | "colab_type": "code",
378 | "colab": {}
379 | },
380 | "source": [
381 | "plt.plot(ma1_data)\n",
382 | "plt.plot(res.fittedvalues)"
383 | ],
384 | "execution_count": 0,
385 | "outputs": []
386 | },
387 | {
388 | "cell_type": "markdown",
389 | "metadata": {
390 | "id": "xsFU8a1tqzbQ",
391 | "colab_type": "text"
392 | },
393 | "source": [
394 | "## ARMA Models"
395 | ]
396 | },
397 | {
398 | "cell_type": "code",
399 | "metadata": {
400 | "id": "_KSWEHngqzbQ",
401 | "colab_type": "code",
402 | "colab": {}
403 | },
404 | "source": [
405 | "alphas = np.array([0.5, -0.25])\n",
406 | "betas = np.array([0.5, -0.3])\n",
407 | "ar = np.r_[1, -alphas]\n",
408 | "ma = np.r_[1, betas]\n",
409 | "ar_ma = ArmaProcess(ar=ar, ma=ma)"
410 | ],
411 | "execution_count": 0,
412 | "outputs": []
413 | },
414 | {
415 | "cell_type": "code",
416 | "metadata": {
417 | "id": "1i7BMfo3qzbS",
418 | "colab_type": "code",
419 | "colab": {}
420 | },
421 | "source": [
422 | "ar_ma_data = ar_ma.generate_sample(nsample=5000)"
423 | ],
424 | "execution_count": 0,
425 | "outputs": []
426 | },
427 | {
428 | "cell_type": "code",
429 | "metadata": {
430 | "id": "elIZGH8PqzbU",
431 | "colab_type": "code",
432 | "colab": {}
433 | },
434 | "source": [
435 | "arma_model = ARMA(ar_ma_data, order=(2,2))"
436 | ],
437 | "execution_count": 0,
438 | "outputs": []
439 | },
440 | {
441 | "cell_type": "code",
442 | "metadata": {
443 | "id": "M3dTzh7IqzbY",
444 | "colab_type": "code",
445 | "colab": {}
446 | },
447 | "source": [
448 | "res = arma_model.fit()"
449 | ],
450 | "execution_count": 0,
451 | "outputs": []
452 | },
453 | {
454 | "cell_type": "code",
455 | "metadata": {
456 | "id": "OdHbkzEWqzbb",
457 | "colab_type": "code",
458 | "colab": {}
459 | },
460 | "source": [
461 | "res.params"
462 | ],
463 | "execution_count": 0,
464 | "outputs": []
465 | },
466 | {
467 | "cell_type": "code",
468 | "metadata": {
469 | "id": "CMDTWqG-qzbd",
470 | "colab_type": "code",
471 | "colab": {}
472 | },
473 | "source": [
474 | "res.summary()"
475 | ],
476 | "execution_count": 0,
477 | "outputs": []
478 | },
479 | {
480 | "cell_type": "code",
481 | "metadata": {
482 | "id": "r9Miba8Fqzbh",
483 | "colab_type": "code",
484 | "colab": {}
485 | },
486 | "source": [
487 | "plt.plot(ar_ma_data)\n",
488 | "plt.plot(res.fittedvalues)"
489 | ],
490 | "execution_count": 0,
491 | "outputs": []
492 | },
493 | {
494 | "cell_type": "code",
495 | "metadata": {
496 | "id": "gE_bOeCzqzbk",
497 | "colab_type": "code",
498 | "colab": {}
499 | },
500 | "source": [
501 | "res.k_ar"
502 | ],
503 | "execution_count": 0,
504 | "outputs": []
505 | },
506 | {
507 | "cell_type": "code",
508 | "metadata": {
509 | "id": "Z0n9nmRuqzbm",
510 | "colab_type": "code",
511 | "colab": {}
512 | },
513 | "source": [
514 | "res.k_ma"
515 | ],
516 | "execution_count": 0,
517 | "outputs": []
518 | },
519 | {
520 | "cell_type": "code",
521 | "metadata": {
522 | "id": "vNw4xFyJqzbp",
523 | "colab_type": "code",
524 | "colab": {}
525 | },
526 | "source": [
527 | "res.k_trend"
528 | ],
529 | "execution_count": 0,
530 | "outputs": []
531 | },
532 | {
533 | "cell_type": "code",
534 | "metadata": {
535 | "id": "-RxRZOdZqzbr",
536 | "colab_type": "code",
537 | "colab": {}
538 | },
539 | "source": [
540 | ""
541 | ],
542 | "execution_count": 0,
543 | "outputs": []
544 | }
545 | ]
546 | }
--------------------------------------------------------------------------------
/Statistical models/04. ARIMA Models.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.7.1"
21 | },
22 | "colab": {
23 | "name": "04. ARIMA Models.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "OrSdVdoeGhq0",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "## Imports"
37 | ]
38 | },
39 | {
40 | "cell_type": "code",
41 | "metadata": {
42 | "id": "2bJT4wIsGhq4",
43 | "colab_type": "code",
44 | "colab": {}
45 | },
46 | "source": [
47 | "import pandas as pd\n",
48 | "import numpy as np\n",
49 | "import matplotlib.pyplot as plt\n",
50 | "%matplotlib inline\n",
51 | "plt.rcParams[\"figure.figsize\"] = (20,8)\n",
52 | "import warnings\n",
53 | "warnings.filterwarnings(\"ignore\")\n",
54 | "colab_path = \"https://github.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/blob/master/\""
55 | ],
56 | "execution_count": 0,
57 | "outputs": []
58 | },
59 | {
60 | "cell_type": "markdown",
61 | "metadata": {
62 | "id": "2JBjgaLuGhq-",
63 | "colab_type": "text"
64 | },
65 | "source": [
66 | "## ARIMA Models"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "metadata": {
72 | "id": "ib_UpU6TGhq_",
73 | "colab_type": "code",
74 | "colab": {}
75 | },
76 | "source": [
77 | "from statsmodels.tsa.arima_model import ARIMA"
78 | ],
79 | "execution_count": 0,
80 | "outputs": []
81 | },
82 | {
83 | "cell_type": "code",
84 | "metadata": {
85 | "id": "WvLLd7NAGhrD",
86 | "colab_type": "code",
87 | "colab": {}
88 | },
89 | "source": [
90 | "ARIMA?"
91 | ],
92 | "execution_count": 0,
93 | "outputs": []
94 | },
95 | {
96 | "cell_type": "code",
97 | "metadata": {
98 | "id": "lXSy84QIGhrI",
99 | "colab_type": "code",
100 | "colab": {}
101 | },
102 | "source": [
103 | "df = pd.read_csv(colab_path+'data/sales.csv', parse_dates=True, index_col=0)"
104 | ],
105 | "execution_count": 0,
106 | "outputs": []
107 | },
108 | {
109 | "cell_type": "code",
110 | "metadata": {
111 | "id": "zk_mUUKoGhrZ",
112 | "colab_type": "code",
113 | "colab": {}
114 | },
115 | "source": [
116 | "from sklearn.metrics import mean_squared_error\n",
117 | "from math import sqrt\n",
118 | "\n",
119 | "def evaluate_modelperf(data, arima_order):\n",
120 | " train_size = int(len(data) * 0.66)\n",
121 | " train, test = data[:train_size], data[train_size:]\n",
122 | " history = [x for x in train]\n",
123 | " \n",
124 | "\n",
125 | " predictions = list()\n",
126 | " for t in range(len(test)):\n",
127 | " model = ARIMA(history, order=arima_order)\n",
128 | " try:\n",
129 | " res = model.fit(disp=0)\n",
130 | " \n",
131 | " pred = res.forecast()[0]\n",
132 | "\n",
133 | " predictions.append(pred)\n",
134 | " history.append(test[t])\n",
135 | " except:\n",
136 | " return None\n",
137 | "\n",
138 | " try:\n",
139 | " rmse = sqrt(mean_squared_error(test, predictions))\n",
140 | " return rmse\n",
141 | " except:\n",
142 | " print('Error encountered in RMSE calc')\n",
143 | " return None"
144 | ],
145 | "execution_count": 0,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "_q_6L1HwGhrc",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "data= df['Sales'].values\n",
157 | "data = data.astype('float32')"
158 | ],
159 | "execution_count": 0,
160 | "outputs": []
161 | },
162 | {
163 | "cell_type": "code",
164 | "metadata": {
165 | "id": "jFX2bH9CGhre",
166 | "colab_type": "code",
167 | "colab": {}
168 | },
169 | "source": [
170 | "evaluate_modelperf(data, (1, 1, 0))"
171 | ],
172 | "execution_count": 0,
173 | "outputs": []
174 | },
175 | {
176 | "cell_type": "code",
177 | "metadata": {
178 | "id": "t8RxhoMGGhrg",
179 | "colab_type": "code",
180 | "colab": {}
181 | },
182 | "source": [
183 | "p_values = [0, 1, 2, 4, 6, 8, 10]\n",
184 | "d_values = [0, 1, 2]\n",
185 | "q_values = [0, 1, 2]"
186 | ],
187 | "execution_count": 0,
188 | "outputs": []
189 | },
190 | {
191 | "cell_type": "code",
192 | "metadata": {
193 | "id": "ADqyS9muGhrj",
194 | "colab_type": "code",
195 | "colab": {}
196 | },
197 | "source": [
198 | "import itertools\n",
199 | "combinations = list(itertools.product(*[p_values, d_values, q_values]))"
200 | ],
201 | "execution_count": 0,
202 | "outputs": []
203 | },
204 | {
205 | "cell_type": "code",
206 | "metadata": {
207 | "id": "9IloTFy5Ghrn",
208 | "colab_type": "code",
209 | "colab": {}
210 | },
211 | "source": [
212 | "best, low_rmse = None, None\n",
213 | "for order in combinations:\n",
214 | " rmse = evaluate_modelperf(data, order)\n",
215 | " if rmse is not None:\n",
216 | " print(f'RMSE for order: {order} = {rmse}')\n",
217 | " best, low_rmse = (order, rmse) if low_rmse is None or rmse < low_rmse else (best, low_rmse)\n",
218 | " else:\n",
219 | " print(f'Error encountered for order:{order}')\n",
220 | "\n",
221 | "print(f'Best Order: {best}. Low RMSE: {low_rmse}')\n"
222 | ],
223 | "execution_count": 0,
224 | "outputs": []
225 | },
226 | {
227 | "cell_type": "markdown",
228 | "metadata": {
229 | "id": "qxkx3CInGhrp",
230 | "colab_type": "text"
231 | },
232 | "source": [
233 | "## Exercise\n",
234 | "\n",
235 | " - Try the same with the 'data/daily_female_births.csv' file"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "metadata": {
241 | "id": "Tma9T-sYGhrq",
242 | "colab_type": "code",
243 | "colab": {}
244 | },
245 | "source": [
246 | "df = pd.read_csv(colab_path+'data/daily_female_births.csv', parse_dates=True, index_col=0)"
247 | ],
248 | "execution_count": 0,
249 | "outputs": []
250 | },
251 | {
252 | "cell_type": "code",
253 | "metadata": {
254 | "id": "1nayFHOGGhru",
255 | "colab_type": "code",
256 | "colab": {}
257 | },
258 | "source": [
259 | "data= df['Births'].values\n",
260 | "data = data.astype('float32')"
261 | ],
262 | "execution_count": 0,
263 | "outputs": []
264 | },
265 | {
266 | "cell_type": "code",
267 | "metadata": {
268 | "id": "yqrLbDZBGhrv",
269 | "colab_type": "code",
270 | "colab": {}
271 | },
272 | "source": [
273 | ""
274 | ],
275 | "execution_count": 0,
276 | "outputs": []
277 | },
278 | {
279 | "cell_type": "code",
280 | "metadata": {
281 | "id": "SvbXHwxyGhry",
282 | "colab_type": "code",
283 | "colab": {}
284 | },
285 | "source": [
286 | ""
287 | ],
288 | "execution_count": 0,
289 | "outputs": []
290 | }
291 | ]
292 | }
--------------------------------------------------------------------------------
/Time Series Boosting/Trees_and_Boosting_with_TS.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "Trees_and_Boosting_with_TS.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {
32 | "id": "Gl0KbU7NJbSz",
33 | "colab_type": "text"
34 | },
35 | "source": [
36 | "## Trees, Ensembles and XGBoost"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "metadata": {
42 | "id": "PtXnrfkUJbS1",
43 | "colab_type": "text"
44 | },
45 | "source": [
46 | " -- Introduction\n",
47 | " -- How they work?\n",
48 | " -- What are Trees useful for?"
49 | ]
50 | },
51 | {
52 | "cell_type": "markdown",
53 | "metadata": {
54 | "id": "52frCEIbJbS5",
55 | "colab_type": "text"
56 | },
57 | "source": [
58 | "## Imports and initialization"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "metadata": {
64 | "id": "HYWxk1z_JbS6",
65 | "colab_type": "code",
66 | "colab": {}
67 | },
68 | "source": [
69 | "import pandas as pd\n",
70 | "import numpy as np\n",
71 | "import matplotlib.pyplot as plt\n",
72 | "%matplotlib inline\n",
73 | "import matplotlib\n",
74 | "matplotlib.rcParams['figure.figsize'] = [20, 10]\n",
75 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\""
76 | ],
77 | "execution_count": 0,
78 | "outputs": []
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {
83 | "id": "yKY7uggAJbS9",
84 | "colab_type": "text"
85 | },
86 | "source": [
87 | "## Data Preparation"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "metadata": {
93 | "id": "qY1vDqu8JbS-",
94 | "colab_type": "code",
95 | "colab": {}
96 | },
97 | "source": [
98 | "data = pd.read_csv(colab_path+'data/gdp_uk.csv')"
99 | ],
100 | "execution_count": 0,
101 | "outputs": []
102 | },
103 | {
104 | "cell_type": "code",
105 | "metadata": {
106 | "id": "iyvHmCz2JbTA",
107 | "colab_type": "code",
108 | "colab": {}
109 | },
110 | "source": [
111 | "data[['year', 'value']].plot(x='year', y='value')"
112 | ],
113 | "execution_count": 0,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "code",
118 | "metadata": {
119 | "id": "rmxVpvTzJbTE",
120 | "colab_type": "code",
121 | "colab": {}
122 | },
123 | "source": [
124 | "data['gdp_growth'] = np.log(data.value / data.value.shift(1))\n",
125 | "data['is_inc'] = np.where(data.value / data.value.shift(1) > 1, 1, 0)"
126 | ],
127 | "execution_count": 0,
128 | "outputs": []
129 | },
130 | {
131 | "cell_type": "code",
132 | "metadata": {
133 | "id": "ZpqqSug0JbTG",
134 | "colab_type": "code",
135 | "colab": {}
136 | },
137 | "source": [
138 | "data.head(10)"
139 | ],
140 | "execution_count": 0,
141 | "outputs": []
142 | },
143 | {
144 | "cell_type": "code",
145 | "metadata": {
146 | "id": "UGpkZ15OJbTJ",
147 | "colab_type": "code",
148 | "colab": {}
149 | },
150 | "source": [
151 | "for lag in range(1, 6):\n",
152 | " data[f'gdp_growth_lag_{lag}'] = data['gdp_growth'].shift(lag)"
153 | ],
154 | "execution_count": 0,
155 | "outputs": []
156 | },
157 | {
158 | "cell_type": "code",
159 | "metadata": {
160 | "id": "EzaKWbB7JbTL",
161 | "colab_type": "code",
162 | "colab": {}
163 | },
164 | "source": [
165 | "data.dropna(inplace=True)"
166 | ],
167 | "execution_count": 0,
168 | "outputs": []
169 | },
170 | {
171 | "cell_type": "code",
172 | "metadata": {
173 | "id": "UgkdeJXeJbTN",
174 | "colab_type": "code",
175 | "colab": {}
176 | },
177 | "source": [
178 | "df = data[['year', \n",
179 | " 'gdp_growth_lag_1', \n",
180 | " 'gdp_growth_lag_2',\n",
181 | " 'gdp_growth_lag_3',\n",
182 | " 'gdp_growth_lag_4',\n",
183 | " 'gdp_growth_lag_5',\n",
184 | " 'gdp_growth',\n",
185 | " 'is_inc']].copy()"
186 | ],
187 | "execution_count": 0,
188 | "outputs": []
189 | },
190 | {
191 | "cell_type": "code",
192 | "metadata": {
193 | "id": "EJjSBUWcJbTP",
194 | "colab_type": "code",
195 | "colab": {}
196 | },
197 | "source": [
198 | "df.head(10)"
199 | ],
200 | "execution_count": 0,
201 | "outputs": []
202 | },
203 | {
204 | "cell_type": "code",
205 | "metadata": {
206 | "id": "5L6kSPzGJbTS",
207 | "colab_type": "code",
208 | "colab": {}
209 | },
210 | "source": [
211 | "features_columns = ['gdp_growth_lag_1', 'gdp_growth_lag_2','gdp_growth_lag_3', 'gdp_growth_lag_4', 'gdp_growth_lag_5']\n",
212 | "target = 'is_inc'"
213 | ],
214 | "execution_count": 0,
215 | "outputs": []
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {
220 | "id": "QIRCUqNGJbTU",
221 | "colab_type": "text"
222 | },
223 | "source": [
224 | "## Trees & XGBoost"
225 | ]
226 | },
227 | {
228 | "cell_type": "code",
229 | "metadata": {
230 | "id": "Kdu8IWN4JbTV",
231 | "colab_type": "code",
232 | "colab": {}
233 | },
234 | "source": [
235 | "import xgboost as xgb\n",
236 | "from sklearn.ensemble import RandomForestClassifier\n",
237 | "from sklearn.tree import DecisionTreeClassifier"
238 | ],
239 | "execution_count": 0,
240 | "outputs": []
241 | },
242 | {
243 | "cell_type": "code",
244 | "metadata": {
245 | "id": "MDNBPa5sJbTX",
246 | "colab_type": "code",
247 | "colab": {}
248 | },
249 | "source": [
250 | "model = xgb.XGBClassifier(max_depth=5)\n",
251 | "# model = RandomForestClassifier(n_estimators=20, max_depth=5)\n",
252 | "# model = DecisionTreeClassifier(max_depth=5)"
253 | ],
254 | "execution_count": 0,
255 | "outputs": []
256 | },
257 | {
258 | "cell_type": "markdown",
259 | "metadata": {
260 | "id": "ceal6ax7JbTa",
261 | "colab_type": "text"
262 | },
263 | "source": [
264 | "## Train"
265 | ]
266 | },
267 | {
268 | "cell_type": "code",
269 | "metadata": {
270 | "id": "KvvYmyHfJbTb",
271 | "colab_type": "code",
272 | "colab": {}
273 | },
274 | "source": [
275 | "train_df = df[df.year < 1990].copy()\n",
276 | "test_df = df[df.year >= 1990].copy()"
277 | ],
278 | "execution_count": 0,
279 | "outputs": []
280 | },
281 | {
282 | "cell_type": "code",
283 | "metadata": {
284 | "id": "rHgid13QJbTc",
285 | "colab_type": "code",
286 | "colab": {}
287 | },
288 | "source": [
289 | "model.fit(train_df[features_columns], train_df[target])"
290 | ],
291 | "execution_count": 0,
292 | "outputs": []
293 | },
294 | {
295 | "cell_type": "code",
296 | "metadata": {
297 | "id": "DYq8TciHJbTe",
298 | "colab_type": "code",
299 | "colab": {}
300 | },
301 | "source": [
302 | "model.feature_importances_"
303 | ],
304 | "execution_count": 0,
305 | "outputs": []
306 | },
307 | {
308 | "cell_type": "markdown",
309 | "metadata": {
310 | "id": "6rwnESI9JbTg",
311 | "colab_type": "text"
312 | },
313 | "source": [
314 | "## Test"
315 | ]
316 | },
317 | {
318 | "cell_type": "code",
319 | "metadata": {
320 | "id": "y1SkxdwCJbTh",
321 | "colab_type": "code",
322 | "colab": {}
323 | },
324 | "source": [
325 | "df['is_inc_pred'] = model.predict(df[features_columns])\n",
326 | "test_df['is_inc_pred'] = model.predict(test_df[features_columns])"
327 | ],
328 | "execution_count": 0,
329 | "outputs": []
330 | },
331 | {
332 | "cell_type": "code",
333 | "metadata": {
334 | "id": "2jhLUFYaJbTj",
335 | "colab_type": "code",
336 | "colab": {}
337 | },
338 | "source": [
339 | "from sklearn.metrics import accuracy_score"
340 | ],
341 | "execution_count": 0,
342 | "outputs": []
343 | },
344 | {
345 | "cell_type": "code",
346 | "metadata": {
347 | "id": "0pKidA2MJbTm",
348 | "colab_type": "code",
349 | "colab": {}
350 | },
351 | "source": [
352 | "accuracy_score(test_df['is_inc'], test_df['is_inc_pred'])"
353 | ],
354 | "execution_count": 0,
355 | "outputs": []
356 | },
357 | {
358 | "cell_type": "code",
359 | "metadata": {
360 | "id": "vgIbauk3JbTq",
361 | "colab_type": "code",
362 | "colab": {}
363 | },
364 | "source": [
365 | "accuracy_score(df['is_inc'], df['is_inc_pred'])"
366 | ],
367 | "execution_count": 0,
368 | "outputs": []
369 | },
370 | {
371 | "cell_type": "markdown",
372 | "metadata": {
373 | "id": "7kdN7fLyJbTt",
374 | "colab_type": "text"
375 | },
376 | "source": [
377 | "## Regressor"
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "metadata": {
383 | "id": "hgFDvCzSJbTv",
384 | "colab_type": "code",
385 | "colab": {}
386 | },
387 | "source": [
388 | "model = xgb.XGBRegressor()"
389 | ],
390 | "execution_count": 0,
391 | "outputs": []
392 | },
393 | {
394 | "cell_type": "code",
395 | "metadata": {
396 | "id": "3U4lpd7PJbTx",
397 | "colab_type": "code",
398 | "colab": {}
399 | },
400 | "source": [
401 | "dir(model)"
402 | ],
403 | "execution_count": 0,
404 | "outputs": []
405 | },
406 | {
407 | "cell_type": "code",
408 | "metadata": {
409 | "id": "RRalGBkHJbTz",
410 | "colab_type": "code",
411 | "colab": {}
412 | },
413 | "source": [
414 | "model.fit(train_df[features_columns], train_df['gdp_growth']) "
415 | ],
416 | "execution_count": 0,
417 | "outputs": []
418 | },
419 | {
420 | "cell_type": "code",
421 | "metadata": {
422 | "id": "7b4JvBodJbT1",
423 | "colab_type": "code",
424 | "colab": {}
425 | },
426 | "source": [
427 | "df['gdp_growth_pred'] = model.predict(df[features_columns])\n",
428 | "test_df['gdp_growth_pred'] = model.predict(test_df[features_columns])"
429 | ],
430 | "execution_count": 0,
431 | "outputs": []
432 | },
433 | {
434 | "cell_type": "code",
435 | "metadata": {
436 | "id": "ORBNP_7iJbT5",
437 | "colab_type": "code",
438 | "colab": {}
439 | },
440 | "source": [
441 | "df[['year', 'gdp_growth', 'gdp_growth_pred']].plot(x='year', y=['gdp_growth_pred', 'gdp_growth'])"
442 | ],
443 | "execution_count": 0,
444 | "outputs": []
445 | },
446 | {
447 | "cell_type": "code",
448 | "metadata": {
449 | "id": "mYLfxZpzJbT6",
450 | "colab_type": "code",
451 | "colab": {}
452 | },
453 | "source": [
454 | "test_df[['year', 'gdp_growth', 'gdp_growth_pred']].plot(x='year', y=['gdp_growth_pred', 'gdp_growth'])"
455 | ],
456 | "execution_count": 0,
457 | "outputs": []
458 | },
459 | {
460 | "cell_type": "markdown",
461 | "metadata": {
462 | "id": "TkGBbm4WJbT8",
463 | "colab_type": "text"
464 | },
465 | "source": [
466 | "## Gotchas with Trees"
467 | ]
468 | },
469 | {
470 | "cell_type": "markdown",
471 | "metadata": {
472 | "id": "kMALpLcwJbT9",
473 | "colab_type": "text"
474 | },
475 | "source": [
476 | " -- Do not capture linear relationships\n",
477 | " -- Time series is not inherent. So need to input Time series flavour forcefully\n",
478 | " -- Work wonderfully for structured data\n",
479 | " -- One hot encoding is mandatory (ordinality is assumed)"
480 | ]
481 | },
482 | {
483 | "cell_type": "code",
484 | "metadata": {
485 | "id": "9ZkeHuZIJbT9",
486 | "colab_type": "code",
487 | "colab": {}
488 | },
489 | "source": [
490 | ""
491 | ],
492 | "execution_count": 0,
493 | "outputs": []
494 | }
495 | ]
496 | }
--------------------------------------------------------------------------------
/Time Series with Deep Learning/01.Time Series Forecasting with MLP.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "01.Time Series Forecasting with MLP.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "hpQDh6LuJdMF",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import keras\n",
38 | "import pandas as pd\n",
39 | "from matplotlib import pyplot as plt\n",
40 | "from sklearn.model_selection import TimeSeriesSplit\n",
41 | "from sklearn.preprocessing import MinMaxScaler\n",
42 | "from keras.models import Sequential\n",
43 | "from keras.layers import Dense,Dropout\n",
44 | "from keras.optimizers import SGD\n",
45 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\""
46 | ],
47 | "execution_count": 0,
48 | "outputs": []
49 | },
50 | {
51 | "cell_type": "code",
52 | "metadata": {
53 | "id": "QNoFd5cNJdMK",
54 | "colab_type": "code",
55 | "colab": {}
56 | },
57 | "source": [
58 | "rainfall_data_monthly = pd.read_csv(colab_path+\"data/All_India_Area_Weighted_Monthly_Rainfall.csv\")"
59 | ],
60 | "execution_count": 0,
61 | "outputs": []
62 | },
63 | {
64 | "cell_type": "code",
65 | "metadata": {
66 | "id": "FzcMFuBnJdMO",
67 | "colab_type": "code",
68 | "colab": {}
69 | },
70 | "source": [
71 | "rainfall_data_monthly.head()"
72 | ],
73 | "execution_count": 0,
74 | "outputs": []
75 | },
76 | {
77 | "cell_type": "code",
78 | "metadata": {
79 | "id": "AXqu0JUZJdMT",
80 | "colab_type": "code",
81 | "colab": {}
82 | },
83 | "source": [
84 | "rainfall_data_monthly['Value'].plot()"
85 | ],
86 | "execution_count": 0,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "id": "d3EC8jq7JdMW",
93 | "colab_type": "code",
94 | "colab": {}
95 | },
96 | "source": [
97 | "rainfall_data_monthly['Time'] = pd.to_datetime(rainfall_data_monthly['Time'])"
98 | ],
99 | "execution_count": 0,
100 | "outputs": []
101 | },
102 | {
103 | "cell_type": "code",
104 | "metadata": {
105 | "id": "WjiDUjTzJdMZ",
106 | "colab_type": "code",
107 | "colab": {}
108 | },
109 | "source": [
110 | "rainfall_data_monthly = rainfall_data_monthly.set_index('Time')"
111 | ],
112 | "execution_count": 0,
113 | "outputs": []
114 | },
115 | {
116 | "cell_type": "code",
117 | "metadata": {
118 | "id": "LQak2yB4JdMc",
119 | "colab_type": "code",
120 | "colab": {}
121 | },
122 | "source": [
123 | "rainfall_data_monthly['Value'].resample('6M').mean().plot()"
124 | ],
125 | "execution_count": 0,
126 | "outputs": []
127 | },
128 | {
129 | "cell_type": "code",
130 | "metadata": {
131 | "id": "mNf_nBs2JdMf",
132 | "colab_type": "code",
133 | "colab": {}
134 | },
135 | "source": [
136 | "rainfall_data_monthly = rainfall_data_monthly.reset_index()"
137 | ],
138 | "execution_count": 0,
139 | "outputs": []
140 | },
141 | {
142 | "cell_type": "code",
143 | "metadata": {
144 | "id": "XxcA5YRoJdMh",
145 | "colab_type": "code",
146 | "colab": {}
147 | },
148 | "source": [
149 | "rainfall_data_monthly['Value_s_1']=rainfall_data_monthly['Value'].shift(1)\n",
150 | "rainfall_data_monthly['Value_s_2']= rainfall_data_monthly['Value'].shift(2)\n",
151 | "rainfall_data_monthly['Value_d_1']= rainfall_data_monthly['Value'].diff(1)"
152 | ],
153 | "execution_count": 0,
154 | "outputs": []
155 | },
156 | {
157 | "cell_type": "code",
158 | "metadata": {
159 | "id": "-37eKSFPJdMk",
160 | "colab_type": "code",
161 | "colab": {}
162 | },
163 | "source": [
164 | "rainfall_data_monthly[['Value','Value_s_1','Value_s_2','Value_d_1']].loc[3]"
165 | ],
166 | "execution_count": 0,
167 | "outputs": []
168 | },
169 | {
170 | "cell_type": "code",
171 | "metadata": {
172 | "id": "xSRpciAKJdMn",
173 | "colab_type": "code",
174 | "colab": {}
175 | },
176 | "source": [
177 | "# lets split the data into train and test\n",
178 | "# train ==> 4 splits , test ==> 1 split\n",
179 | "\n",
180 | "tscv = TimeSeriesSplit(n_splits=5)\n",
181 | "\n",
182 | "for train_index, test_index in tscv.split(rainfall_data_monthly.Time):\n",
183 | " y_train,y_test = rainfall_data_monthly['Value'][train_index],rainfall_data_monthly['Value'][test_index]\n",
184 | " x_train,x_test = rainfall_data_monthly[['Value_s_1','Value_s_2','Value_d_1']].loc[train_index],rainfall_data_monthly[['Value_s_1','Value_s_2','Value_d_1']].loc[test_index]"
185 | ],
186 | "execution_count": 0,
187 | "outputs": []
188 | },
189 | {
190 | "cell_type": "code",
191 | "metadata": {
192 | "id": "MRcK9G0JJdMp",
193 | "colab_type": "code",
194 | "colab": {}
195 | },
196 | "source": [
197 | "x_train = x_train[3:]\n",
198 | "y_train = y_train[3:]"
199 | ],
200 | "execution_count": 0,
201 | "outputs": []
202 | },
203 | {
204 | "cell_type": "code",
205 | "metadata": {
206 | "id": "l_MlA6J4JdMs",
207 | "colab_type": "code",
208 | "colab": {}
209 | },
210 | "source": [
211 | ""
212 | ],
213 | "execution_count": 0,
214 | "outputs": []
215 | },
216 | {
217 | "cell_type": "code",
218 | "metadata": {
219 | "id": "loW6c7hVJdMy",
220 | "colab_type": "code",
221 | "colab": {}
222 | },
223 | "source": [
224 | "#x_train = x_train.values.reshape(-1,1)\n",
225 | "#x_test = x_test.values.reshape(-1,1)"
226 | ],
227 | "execution_count": 0,
228 | "outputs": []
229 | },
230 | {
231 | "cell_type": "code",
232 | "metadata": {
233 | "id": "6wAi2RbTJdM0",
234 | "colab_type": "code",
235 | "colab": {}
236 | },
237 | "source": [
238 | "#y_train"
239 | ],
240 | "execution_count": 0,
241 | "outputs": []
242 | },
243 | {
244 | "cell_type": "code",
245 | "metadata": {
246 | "id": "oIsu3jPYJdM3",
247 | "colab_type": "code",
248 | "colab": {}
249 | },
250 | "source": [
251 | "# Scaling Y\n",
252 | "\n",
253 | "scaler = MinMaxScaler(feature_range=(0,1))\n",
254 | "\n",
255 | "x_train = scaler.fit_transform(x_train)\n",
256 | "x_test = scaler.fit_transform(x_test)\n"
257 | ],
258 | "execution_count": 0,
259 | "outputs": []
260 | },
261 | {
262 | "cell_type": "code",
263 | "metadata": {
264 | "id": "Kf6YLoeSJdM6",
265 | "colab_type": "code",
266 | "colab": {}
267 | },
268 | "source": [
269 | "y_train = y_train.values.reshape(-1,1)\n",
270 | "y_test = y_test.values.reshape(-1,1)\n",
271 | "y_train = scaler.fit_transform(y_train)\n",
272 | "y_test = scaler.fit_transform(y_test)\n"
273 | ],
274 | "execution_count": 0,
275 | "outputs": []
276 | },
277 | {
278 | "cell_type": "code",
279 | "metadata": {
280 | "id": "9ycJ5KAoJdM8",
281 | "colab_type": "code",
282 | "colab": {}
283 | },
284 | "source": [
285 | "mlp_model = Sequential()\n",
286 | "mlp_model.add(Dense(100, activation='sigmoid', input_dim=x_train.shape[1]))\n",
287 | "mlp_model.add(Dense(100,activation='sigmoid'))\n",
288 | "mlp_model.add(Dropout(0.2))\n",
289 | "mlp_model.add(Dense(1,activation='sigmoid'))\n",
290 | "opt = SGD(lr=0.001)\n",
291 | "mlp_model.compile(loss='mean_squared_error', optimizer='adam')"
292 | ],
293 | "execution_count": 0,
294 | "outputs": []
295 | },
296 | {
297 | "cell_type": "code",
298 | "metadata": {
299 | "id": "wHWJgeHjJdM-",
300 | "colab_type": "code",
301 | "colab": {}
302 | },
303 | "source": [
304 | "mlp_model.fit(x_train,y_train,nb_epoch=50, batch_size=50, validation_split=0.2)"
305 | ],
306 | "execution_count": 0,
307 | "outputs": []
308 | },
309 | {
310 | "cell_type": "code",
311 | "metadata": {
312 | "id": "wnuRmmEjJdNA",
313 | "colab_type": "code",
314 | "colab": {}
315 | },
316 | "source": [
317 | "preds = mlp_model.predict(x_test)"
318 | ],
319 | "execution_count": 0,
320 | "outputs": []
321 | },
322 | {
323 | "cell_type": "code",
324 | "metadata": {
325 | "id": "_2XVYZXyJdND",
326 | "colab_type": "code",
327 | "colab": {}
328 | },
329 | "source": [
330 | "plt.figure(figsize=(10, 5.5))\n",
331 | "plt.plot(preds,linestyle='-', marker='*',color='b')\n",
332 | "plt.plot(y_test,linestyle='-', marker='.',color='r')\n",
333 | "plt.legend(['Predicted','Actual'], loc=2)\n",
334 | "plt.title('Actual vs Predicted Rainfall')\n",
335 | "plt.ylabel('rainfall in mm')\n",
336 | "plt.xlabel('Index')\n",
337 | "plt.savefig('rain_fall_mlp', format='png', dpi=300)"
338 | ],
339 | "execution_count": 0,
340 | "outputs": []
341 | },
342 | {
343 | "cell_type": "code",
344 | "metadata": {
345 | "id": "fMxloxHcJdNH",
346 | "colab_type": "code",
347 | "colab": {}
348 | },
349 | "source": [
350 | "from sklearn.metrics import r2_score\n",
351 | "\n",
352 | "r2_score(y_test,preds)"
353 | ],
354 | "execution_count": 0,
355 | "outputs": []
356 | },
357 | {
358 | "cell_type": "code",
359 | "metadata": {
360 | "id": "WF9ZHRTjJdNJ",
361 | "colab_type": "code",
362 | "colab": {}
363 | },
364 | "source": [
365 | ""
366 | ],
367 | "execution_count": 0,
368 | "outputs": []
369 | }
370 | ]
371 | }
--------------------------------------------------------------------------------
/Time Series with Deep Learning/02.Time Series Forecasting with LSTM.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "02.Time Series Forecasting with LSTM.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "Dggq19RmJeta",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import pandas as pd\n",
38 | "import numpy as np\n",
39 | "from math import sqrt\n",
40 | "from sklearn.metrics import r2_score\n",
41 | "import matplotlib.pyplot as plt\n",
42 | "from keras.models import Sequential\n",
43 | "from keras.layers import Dense, Dropout,LSTM\n",
44 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\""
45 | ],
46 | "execution_count": 0,
47 | "outputs": []
48 | },
49 | {
50 | "cell_type": "markdown",
51 | "metadata": {
52 | "id": "ZIMSGUXLJetg",
53 | "colab_type": "text"
54 | },
55 | "source": [
56 | "# Univariate Forecasting"
57 | ]
58 | },
59 | {
60 | "cell_type": "code",
61 | "metadata": {
62 | "id": "mOA7YgKlJeti",
63 | "colab_type": "code",
64 | "colab": {}
65 | },
66 | "source": [
67 | "rainfall_data = pd.read_csv(colab_path+\"data/All_India_Area_Weighted_Monthly_Rainfall.csv\")"
68 | ],
69 | "execution_count": 0,
70 | "outputs": []
71 | },
72 | {
73 | "cell_type": "code",
74 | "metadata": {
75 | "id": "QAO3OiHTJetl",
76 | "colab_type": "code",
77 | "colab": {}
78 | },
79 | "source": [
80 | "\n",
81 | "def makeXy(ts, nb_timesteps):\n",
82 | " \"\"\"\n",
83 | " Input: \n",
84 | " ts: original time series\n",
85 | " nb_timesteps: number of time steps in the regressors\n",
86 | " Output: \n",
87 | " X: 2-D array of regressors\n",
88 | " y: 1-D array of target \n",
89 | " \"\"\"\n",
90 | " X = []\n",
91 | " y = []\n",
92 | " for i in range(nb_timesteps, ts.shape[0]):\n",
93 | " X.append(list(ts.loc[i-nb_timesteps:i-1]))\n",
94 | " y.append(ts.loc[i])\n",
95 | " X, y = np.array(X), np.array(y)\n",
96 | " return X, y"
97 | ],
98 | "execution_count": 0,
99 | "outputs": []
100 | },
101 | {
102 | "cell_type": "code",
103 | "metadata": {
104 | "id": "T_Bt5ePYJeto",
105 | "colab_type": "code",
106 | "colab": {}
107 | },
108 | "source": [
109 | "from sklearn.preprocessing import MinMaxScaler\n",
110 | "scaler = MinMaxScaler(feature_range=(0, 1))\n",
111 | "rainfall_data['Value'] = scaler.fit_transform(np.array(rainfall_data['Value']).reshape(-1, 1))"
112 | ],
113 | "execution_count": 0,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "code",
118 | "metadata": {
119 | "id": "qpOmUCoTJetr",
120 | "colab_type": "code",
121 | "colab": {}
122 | },
123 | "source": [
124 | "X,y = makeXy(rainfall_data['Value'],10)\n",
125 | "\n",
126 | "X_train = X[:1100]\n",
127 | "X_test = X[1100:]\n",
128 | "y_train = y[:1100]\n",
129 | "y_test = y[1100:]"
130 | ],
131 | "execution_count": 0,
132 | "outputs": []
133 | },
134 | {
135 | "cell_type": "code",
136 | "metadata": {
137 | "id": "7VJTBIMaJetv",
138 | "colab_type": "code",
139 | "colab": {}
140 | },
141 | "source": [
142 | "X_train = np.reshape(X_train, (X_train.shape[0],X_train.shape[1],1))\n",
143 | "X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))"
144 | ],
145 | "execution_count": 0,
146 | "outputs": []
147 | },
148 | {
149 | "cell_type": "code",
150 | "metadata": {
151 | "id": "DBoPapo5Jety",
152 | "colab_type": "code",
153 | "colab": {}
154 | },
155 | "source": [
156 | "lstm_model = Sequential()\n",
157 | "lstm_model.add(LSTM(50, input_shape = (X_train.shape[1],1), return_sequences=True, activation = 'relu'))\n",
158 | "lstm_model.add(Dropout(0.1))\n",
159 | "lstm_model.add(LSTM(50,activation='relu'))\n",
160 | "lstm_model.add(Dropout(0.1))\n",
161 | "lstm_model.add(Dense(1))\n",
162 | "lstm_model.compile(loss = 'mean_squared_error',\n",
163 | " optimizer = 'adam',\n",
164 | " metrics = ['mse'])\n",
165 | "lstm_model.fit(X_train, y_train, \n",
166 | " nb_epoch=50, batch_size=50,validation_split=0.2)"
167 | ],
168 | "execution_count": 0,
169 | "outputs": []
170 | },
171 | {
172 | "cell_type": "code",
173 | "metadata": {
174 | "id": "WEbaLK_1JeuB",
175 | "colab_type": "code",
176 | "colab": {}
177 | },
178 | "source": [
179 | "preds = lstm_model.predict(X_test)"
180 | ],
181 | "execution_count": 0,
182 | "outputs": []
183 | },
184 | {
185 | "cell_type": "code",
186 | "metadata": {
187 | "id": "d5DcJCMEJeuG",
188 | "colab_type": "code",
189 | "colab": {}
190 | },
191 | "source": [
192 | "def plot_predictions(preds,y_test):\n",
193 | " plt.figure(figsize=(10, 5.5))\n",
194 | " plt.plot(preds,linestyle='-',color='b')\n",
195 | " plt.plot(y_test,linestyle='-',color='r')\n",
196 | " plt.legend(['Predicted','Actual'], loc=2)"
197 | ],
198 | "execution_count": 0,
199 | "outputs": []
200 | },
201 | {
202 | "cell_type": "code",
203 | "metadata": {
204 | "id": "FpAVjSwfJeuN",
205 | "colab_type": "code",
206 | "colab": {}
207 | },
208 | "source": [
209 | "r2_score(y_test,preds)"
210 | ],
211 | "execution_count": 0,
212 | "outputs": []
213 | },
214 | {
215 | "cell_type": "markdown",
216 | "metadata": {
217 | "id": "A3ki9EkVJeuX",
218 | "colab_type": "text"
219 | },
220 | "source": [
221 | "# MultiVariate Forecasting"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "metadata": {
227 | "id": "Llz63bnuJeua",
228 | "colab_type": "code",
229 | "colab": {}
230 | },
231 | "source": [
232 | "multi_data = pd.read_csv(colab_path+\"data/pollution.csv\",parse_dates=True,index_col=0)"
233 | ],
234 | "execution_count": 0,
235 | "outputs": []
236 | },
237 | {
238 | "cell_type": "code",
239 | "metadata": {
240 | "id": "pQVytBclJeuf",
241 | "colab_type": "code",
242 | "colab": {}
243 | },
244 | "source": [
245 | "multi_data.head()"
246 | ],
247 | "execution_count": 0,
248 | "outputs": []
249 | },
250 | {
251 | "cell_type": "code",
252 | "metadata": {
253 | "id": "jzVFmMGlJeun",
254 | "colab_type": "code",
255 | "colab": {}
256 | },
257 | "source": [
258 | "def create_lags(df):\n",
259 | " for col in df.columns:\n",
260 | " df[col+\"_1\"] = df[col].shift(1)\n",
261 | " return df.dropna()"
262 | ],
263 | "execution_count": 0,
264 | "outputs": []
265 | },
266 | {
267 | "cell_type": "code",
268 | "metadata": {
269 | "id": "Z-F5OeC8Jeus",
270 | "colab_type": "code",
271 | "colab": {}
272 | },
273 | "source": [
274 | "reframed_multi_data = create_lags(multi_data)"
275 | ],
276 | "execution_count": 0,
277 | "outputs": []
278 | },
279 | {
280 | "cell_type": "code",
281 | "metadata": {
282 | "id": "pbT9oEufJeux",
283 | "colab_type": "code",
284 | "colab": {}
285 | },
286 | "source": [
287 | "reframed_multi_data.reset_index(inplace=True)\n",
288 | "del reframed_multi_data['date']"
289 | ],
290 | "execution_count": 0,
291 | "outputs": []
292 | },
293 | {
294 | "cell_type": "code",
295 | "metadata": {
296 | "id": "yrKRxDsnJeu2",
297 | "colab_type": "code",
298 | "colab": {}
299 | },
300 | "source": [
301 | "reframed_multi_data.columns"
302 | ],
303 | "execution_count": 0,
304 | "outputs": []
305 | },
306 | {
307 | "cell_type": "code",
308 | "metadata": {
309 | "id": "5IoDgK2UJeu9",
310 | "colab_type": "code",
311 | "colab": {}
312 | },
313 | "source": [
314 | "y = reframed_multi_data['pollution']"
315 | ],
316 | "execution_count": 0,
317 | "outputs": []
318 | },
319 | {
320 | "cell_type": "code",
321 | "metadata": {
322 | "id": "TCJhWDfxJevD",
323 | "colab_type": "code",
324 | "colab": {}
325 | },
326 | "source": [
327 | "x = reframed_multi_data[['dew', 'temp', 'press', 'wnd_spd', 'pollution_1', 'dew_1',\n",
328 | " 'temp_1', 'press_1', 'wnd_spd_1']]"
329 | ],
330 | "execution_count": 0,
331 | "outputs": []
332 | },
333 | {
334 | "cell_type": "code",
335 | "metadata": {
336 | "id": "ULuguLHpJevJ",
337 | "colab_type": "code",
338 | "colab": {}
339 | },
340 | "source": [
341 | "x_train = x[:35799]\n",
342 | "x_test = x[35799:]"
343 | ],
344 | "execution_count": 0,
345 | "outputs": []
346 | },
347 | {
348 | "cell_type": "code",
349 | "metadata": {
350 | "id": "WzkZhGKIJevN",
351 | "colab_type": "code",
352 | "colab": {}
353 | },
354 | "source": [
355 | "y_train = y[:35799]\n",
356 | "y_test = y[35799:]"
357 | ],
358 | "execution_count": 0,
359 | "outputs": []
360 | },
361 | {
362 | "cell_type": "code",
363 | "metadata": {
364 | "id": "Dkry6nXDJevY",
365 | "colab_type": "code",
366 | "colab": {}
367 | },
368 | "source": [
369 | "from sklearn.preprocessing import MinMaxScaler\n",
370 | "scaler = MinMaxScaler(feature_range=(0,1))\n",
371 | "x_train = scaler.fit_transform(x_train)\n",
372 | "x_test = scaler.transform(x_test)\n",
373 | "y_train = scaler.fit_transform(y_train.values.reshape(-1,1))\n",
374 | "y_test = scaler.transform(y_test.values.reshape(-1,1))\n",
375 | "\n",
376 | "x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1])\n",
377 | "x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1])"
378 | ],
379 | "execution_count": 0,
380 | "outputs": []
381 | },
382 | {
383 | "cell_type": "code",
384 | "metadata": {
385 | "id": "xgG_8s8eJeve",
386 | "colab_type": "code",
387 | "colab": {}
388 | },
389 | "source": [
390 | "# Samples, Timesteps, features\n",
391 | "x_train.shape"
392 | ],
393 | "execution_count": 0,
394 | "outputs": []
395 | },
396 | {
397 | "cell_type": "code",
398 | "metadata": {
399 | "id": "SZWbffB-Jevh",
400 | "colab_type": "code",
401 | "colab": {}
402 | },
403 | "source": [
404 | "multi_lstm_model = Sequential()\n",
405 | "multi_lstm_model.add(LSTM(100,input_shape=(x_train.shape[1],x_train.shape[2])))\n",
406 | "multi_lstm_model.add(Dense(1))\n",
407 | "multi_lstm_model.compile(loss=\"mse\",optimizer=\"adam\")"
408 | ],
409 | "execution_count": 0,
410 | "outputs": []
411 | },
412 | {
413 | "cell_type": "code",
414 | "metadata": {
415 | "id": "Dq6r4Qs0Jevl",
416 | "colab_type": "code",
417 | "colab": {}
418 | },
419 | "source": [
420 | "multi_lstm_model.fit(x_train,y_train,epochs=25,batch_size=500)"
421 | ],
422 | "execution_count": 0,
423 | "outputs": []
424 | },
425 | {
426 | "cell_type": "code",
427 | "metadata": {
428 | "id": "xEMikB1GJevt",
429 | "colab_type": "code",
430 | "colab": {}
431 | },
432 | "source": [
433 | "y_pred = multi_lstm_model.predict(x_test)"
434 | ],
435 | "execution_count": 0,
436 | "outputs": []
437 | },
438 | {
439 | "cell_type": "code",
440 | "metadata": {
441 | "id": "P_U53VUcJevy",
442 | "colab_type": "code",
443 | "colab": {}
444 | },
445 | "source": [
446 | "def plot_predictions(preds,y_test):\n",
447 | " plt.figure(figsize=(10, 5.5))\n",
448 | " plt.plot(preds,linestyle='-',color='b')\n",
449 | " plt.plot(y_test,linestyle='-',color='r')\n",
450 | " plt.legend(['Predicted','Actual'], loc=2)\n",
451 | " plt.title('Actual vs Predicted')"
452 | ],
453 | "execution_count": 0,
454 | "outputs": []
455 | },
456 | {
457 | "cell_type": "code",
458 | "metadata": {
459 | "id": "1oOxrP5QJev2",
460 | "colab_type": "code",
461 | "colab": {}
462 | },
463 | "source": [
464 | "plot_predictions(y_pred[:100],y_test[:100])"
465 | ],
466 | "execution_count": 0,
467 | "outputs": []
468 | },
469 | {
470 | "cell_type": "code",
471 | "metadata": {
472 | "id": "QpcKbEb8Jev9",
473 | "colab_type": "code",
474 | "colab": {}
475 | },
476 | "source": [
477 | "r2_score(y_test,y_pred)"
478 | ],
479 | "execution_count": 0,
480 | "outputs": []
481 | },
482 | {
483 | "cell_type": "code",
484 | "metadata": {
485 | "id": "J1UUvQU3JewC",
486 | "colab_type": "code",
487 | "colab": {}
488 | },
489 | "source": [
490 | ""
491 | ],
492 | "execution_count": 0,
493 | "outputs": []
494 | }
495 | ]
496 | }
--------------------------------------------------------------------------------
/Time Series with Deep Learning/What went wrong with this LSTM.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "kernelspec": {
6 | "display_name": "Python 3",
7 | "language": "python",
8 | "name": "python3"
9 | },
10 | "language_info": {
11 | "codemirror_mode": {
12 | "name": "ipython",
13 | "version": 3
14 | },
15 | "file_extension": ".py",
16 | "mimetype": "text/x-python",
17 | "name": "python",
18 | "nbconvert_exporter": "python",
19 | "pygments_lexer": "ipython3",
20 | "version": "3.6.1"
21 | },
22 | "colab": {
23 | "name": "What went wrong with this LSTM.ipynb",
24 | "version": "0.3.2",
25 | "provenance": []
26 | }
27 | },
28 | "cells": [
29 | {
30 | "cell_type": "code",
31 | "metadata": {
32 | "id": "bbupexIPKuso",
33 | "colab_type": "code",
34 | "colab": {}
35 | },
36 | "source": [
37 | "import pandas as pd\n",
38 | "from keras.models import Sequential\n",
39 | "from keras.layers import LSTM,Dense\n",
40 | "import matplotlib.pyplot as plt\n",
41 | "from sklearn.metrics import r2_score\n",
42 | "colab_path = \"https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/master/\""
43 | ],
44 | "execution_count": 0,
45 | "outputs": []
46 | },
47 | {
48 | "cell_type": "code",
49 | "metadata": {
50 | "id": "o8EtOpvLKuss",
51 | "colab_type": "code",
52 | "colab": {}
53 | },
54 | "source": [
55 | "multi_data = pd.read_csv(colab_path+\"data/flotation-cell.csv\",parse_dates=True,index_col=0)"
56 | ],
57 | "execution_count": 0,
58 | "outputs": []
59 | },
60 | {
61 | "cell_type": "code",
62 | "metadata": {
63 | "id": "u-89zwJ1Kusu",
64 | "colab_type": "code",
65 | "colab": {}
66 | },
67 | "source": [
68 | "def create_lags(df):\n",
69 | " for col in df.columns:\n",
70 | " df[col+\"_1\"] = df[col].shift(1)\n",
71 | " return df.dropna()"
72 | ],
73 | "execution_count": 0,
74 | "outputs": []
75 | },
76 | {
77 | "cell_type": "code",
78 | "metadata": {
79 | "id": "9XsOYqMvKusw",
80 | "colab_type": "code",
81 | "colab": {}
82 | },
83 | "source": [
84 | "# Look at the dataset because something is wrong ?"
85 | ],
86 | "execution_count": 0,
87 | "outputs": []
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "id": "GfqdxiAaKusy",
93 | "colab_type": "code",
94 | "colab": {}
95 | },
96 | "source": [
97 | "multi_data.reset_index(inplace=True)\n",
98 | "del multi_data['Date and time']"
99 | ],
100 | "execution_count": 0,
101 | "outputs": []
102 | },
103 | {
104 | "cell_type": "code",
105 | "metadata": {
106 | "id": "lVp3P7GMKus0",
107 | "colab_type": "code",
108 | "colab": {}
109 | },
110 | "source": [
111 | "multi_data.dropna(inplace=True)"
112 | ],
113 | "execution_count": 0,
114 | "outputs": []
115 | },
116 | {
117 | "cell_type": "code",
118 | "metadata": {
119 | "id": "wGtrnQFmKus2",
120 | "colab_type": "code",
121 | "colab": {}
122 | },
123 | "source": [
124 | "reframed_multi_data = create_lags(multi_data)"
125 | ],
126 | "execution_count": 0,
127 | "outputs": []
128 | },
129 | {
130 | "cell_type": "code",
131 | "metadata": {
132 | "id": "Ssnoxz0tKus3",
133 | "colab_type": "code",
134 | "colab": {}
135 | },
136 | "source": [
137 | "reframed_multi_data.columns"
138 | ],
139 | "execution_count": 0,
140 | "outputs": []
141 | },
142 | {
143 | "cell_type": "code",
144 | "metadata": {
145 | "id": "i2gGvpjCKus5",
146 | "colab_type": "code",
147 | "colab": {}
148 | },
149 | "source": [
150 | "y = reframed_multi_data['Air flow rate']"
151 | ],
152 | "execution_count": 0,
153 | "outputs": []
154 | },
155 | {
156 | "cell_type": "code",
157 | "metadata": {
158 | "id": "Q722-7ojKus8",
159 | "colab_type": "code",
160 | "colab": {}
161 | },
162 | "source": [
163 | "x = reframed_multi_data[['Upstream pH', 'CuSO4 added', 'Pulp level',\n",
164 | " 'Feed rate', 'Feed rate_1', 'Upstream pH_1', 'CuSO4 added_1',\n",
165 | " 'Pulp level_1', 'Air flow rate_1']]"
166 | ],
167 | "execution_count": 0,
168 | "outputs": []
169 | },
170 | {
171 | "cell_type": "code",
172 | "metadata": {
173 | "id": "YLwuoN-VKus-",
174 | "colab_type": "code",
175 | "colab": {}
176 | },
177 | "source": [
178 | "x_train = x[:2500]\n",
179 | "x_test = x[2500:]"
180 | ],
181 | "execution_count": 0,
182 | "outputs": []
183 | },
184 | {
185 | "cell_type": "code",
186 | "metadata": {
187 | "id": "81HOwb0OKutA",
188 | "colab_type": "code",
189 | "colab": {}
190 | },
191 | "source": [
192 | "y_train = y[:2500]\n",
193 | "y_test = y[2500:]"
194 | ],
195 | "execution_count": 0,
196 | "outputs": []
197 | },
198 | {
199 | "cell_type": "code",
200 | "metadata": {
201 | "id": "8pozYKSPKutD",
202 | "colab_type": "code",
203 | "colab": {}
204 | },
205 | "source": [
206 | "# Something fishy here\n",
207 | "from sklearn.preprocessing import MinMaxScaler\n",
208 | "scaler = MinMaxScaler(feature_range=(0,1))\n",
209 | "x_train = scaler.fit_transform(x_train)\n",
210 | "x_test = scaler.fit_transform(x_test)\n",
211 | "y_train = scaler.fit_transform(y_train.values.reshape(-1,1))\n",
212 | "y_test = scaler.fit_transform(y_test.values.reshape(-1,1))\n",
213 | "\n",
214 | "x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1])\n",
215 | "x_test = x_test.reshape(x_test.shape[0],1,x_test.shape[1])"
216 | ],
217 | "execution_count": 0,
218 | "outputs": []
219 | },
220 | {
221 | "cell_type": "code",
222 | "metadata": {
223 | "id": "7qxJGZljKutF",
224 | "colab_type": "code",
225 | "colab": {}
226 | },
227 | "source": [
228 | "x_train.shape"
229 | ],
230 | "execution_count": 0,
231 | "outputs": []
232 | },
233 | {
234 | "cell_type": "code",
235 | "metadata": {
236 | "id": "lQOJ-dG-KutH",
237 | "colab_type": "code",
238 | "colab": {}
239 | },
240 | "source": [
241 | "# does hyper parameter tuning help ?\n",
242 | "multi_lstm_model = Sequential()\n",
243 | "multi_lstm_model.add(LSTM(100,input_shape=(x_train.shape[1],x_train.shape[2])))\n",
244 | "multi_lstm_model.add(Dense(1))\n",
245 | "multi_lstm_model.compile(loss=\"mse\",optimizer=\"adam\")"
246 | ],
247 | "execution_count": 0,
248 | "outputs": []
249 | },
250 | {
251 | "cell_type": "code",
252 | "metadata": {
253 | "id": "QbbWWyU_KutJ",
254 | "colab_type": "code",
255 | "colab": {}
256 | },
257 | "source": [
258 | "multi_lstm_model.fit(x_train,y_train,nb_epoch=25,batch_size=100)"
259 | ],
260 | "execution_count": 0,
261 | "outputs": []
262 | },
263 | {
264 | "cell_type": "code",
265 | "metadata": {
266 | "id": "lXVNTQkbKutK",
267 | "colab_type": "code",
268 | "colab": {}
269 | },
270 | "source": [
271 | "y_pred = multi_lstm_model.predict(x_test)"
272 | ],
273 | "execution_count": 0,
274 | "outputs": []
275 | },
276 | {
277 | "cell_type": "code",
278 | "metadata": {
279 | "id": "Xf5x9Cc8KutM",
280 | "colab_type": "code",
281 | "colab": {}
282 | },
283 | "source": [
284 | "def plot_predictions(preds,y_test):\n",
285 | " plt.figure(figsize=(10, 5.5))\n",
286 | " plt.plot(preds,linestyle='-',color='b')\n",
287 | " plt.plot(y_test,linestyle='-',color='r')\n",
288 | " plt.legend(['Predicted','Actual'], loc=2)\n",
289 | " plt.title('Actual vs Predicted')"
290 | ],
291 | "execution_count": 0,
292 | "outputs": []
293 | },
294 | {
295 | "cell_type": "code",
296 | "metadata": {
297 | "id": "6-uhppEyKutO",
298 | "colab_type": "code",
299 | "colab": {}
300 | },
301 | "source": [
302 | "plot_predictions(y_pred[:100],y_test[:100])"
303 | ],
304 | "execution_count": 0,
305 | "outputs": []
306 | },
307 | {
308 | "cell_type": "code",
309 | "metadata": {
310 | "id": "2u3zn_ZjKutP",
311 | "colab_type": "code",
312 | "colab": {}
313 | },
314 | "source": [
315 | "r2_score(y_test,y_pred)"
316 | ],
317 | "execution_count": 0,
318 | "outputs": []
319 | },
320 | {
321 | "cell_type": "code",
322 | "metadata": {
323 | "id": "L6dEQAqGKutS",
324 | "colab_type": "code",
325 | "colab": {}
326 | },
327 | "source": [
328 | ""
329 | ],
330 | "execution_count": 0,
331 | "outputs": []
332 | }
333 | ]
334 | }
--------------------------------------------------------------------------------
/data/AirPassengers.csv:
--------------------------------------------------------------------------------
1 | Month,#Passengers
1949-01,112
1949-02,118
1949-03,132
1949-04,129
1949-05,121
1949-06,135
1949-07,148
1949-08,148
1949-09,136
1949-10,119
1949-11,104
1949-12,118
1950-01,115
1950-02,126
1950-03,141
1950-04,135
1950-05,125
1950-06,149
1950-07,170
1950-08,170
1950-09,158
1950-10,133
1950-11,114
1950-12,140
1951-01,145
1951-02,150
1951-03,178
1951-04,163
1951-05,172
1951-06,178
1951-07,199
1951-08,199
1951-09,184
1951-10,162
1951-11,146
1951-12,166
1952-01,171
1952-02,180
1952-03,193
1952-04,181
1952-05,183
1952-06,218
1952-07,230
1952-08,242
1952-09,209
1952-10,191
1952-11,172
1952-12,194
1953-01,196
1953-02,196
1953-03,236
1953-04,235
1953-05,229
1953-06,243
1953-07,264
1953-08,272
1953-09,237
1953-10,211
1953-11,180
1953-12,201
1954-01,204
1954-02,188
1954-03,235
1954-04,227
1954-05,234
1954-06,264
1954-07,302
1954-08,293
1954-09,259
1954-10,229
1954-11,203
1954-12,229
1955-01,242
1955-02,233
1955-03,267
1955-04,269
1955-05,270
1955-06,315
1955-07,364
1955-08,347
1955-09,312
1955-10,274
1955-11,237
1955-12,278
1956-01,284
1956-02,277
1956-03,317
1956-04,313
1956-05,318
1956-06,374
1956-07,413
1956-08,405
1956-09,355
1956-10,306
1956-11,271
1956-12,306
1957-01,315
1957-02,301
1957-03,356
1957-04,348
1957-05,355
1957-06,422
1957-07,465
1957-08,467
1957-09,404
1957-10,347
1957-11,305
1957-12,336
1958-01,340
1958-02,318
1958-03,362
1958-04,348
1958-05,363
1958-06,435
1958-07,491
1958-08,505
1958-09,404
1958-10,359
1958-11,310
1958-12,337
1959-01,360
1959-02,342
1959-03,406
1959-04,396
1959-05,420
1959-06,472
1959-07,548
1959-08,559
1959-09,463
1959-10,407
1959-11,362
1959-12,405
1960-01,417
1960-02,391
1960-03,419
1960-04,461
1960-05,472
1960-06,535
1960-07,622
1960-08,606
1960-09,508
1960-10,461
1960-11,390
1960-12,432
--------------------------------------------------------------------------------
/data/All-Transactions House Price Index.csv:
--------------------------------------------------------------------------------
1 | Date,Value
2 | 1990-01-01,164.17
3 | 1990-04-01,164.84
4 | 1990-07-01,165.83
5 | 1990-10-01,165.19
6 | 1991-01-01,166.56
7 | 1991-04-01,167.68
8 | 1991-07-01,167.98
9 | 1991-10-01,170.34
10 | 1992-01-01,171.66
11 | 1992-04-01,171.62
12 | 1992-07-01,173.58
13 | 1992-10-01,174.46
14 | 1993-01-01,174.53
15 | 1993-04-01,176.19
16 | 1993-07-01,177.61
17 | 1993-10-01,179.08
18 | 1994-01-01,180.22
19 | 1994-04-01,181.32
20 | 1994-07-01,182.06
21 | 1994-10-01,181.9
22 | 1995-01-01,182.87
23 | 1995-04-01,185.86
24 | 1995-07-01,188.71
25 | 1995-10-01,190.2
26 | 1996-01-01,192.34
27 | 1996-04-01,192.68
28 | 1996-07-01,193.53
29 | 1996-10-01,195.05
30 | 1997-01-01,196.67
31 | 1997-04-01,198.32
32 | 1997-07-01,201.04
33 | 1997-10-01,203.66
34 | 1998-01-01,206.69
35 | 1998-04-01,208.56
36 | 1998-07-01,211.46
37 | 1998-10-01,213.97
38 | 1999-01-01,216.22
39 | 1999-04-01,219.12
40 | 1999-07-01,222.23
41 | 1999-10-01,224.52
42 | 2000-01-01,228.81
43 | 2000-04-01,232.54
44 | 2000-07-01,236.77
45 | 2000-10-01,240.42
46 | 2001-01-01,246.34
47 | 2001-04-01,250.47
48 | 2001-07-01,254.36
49 | 2001-10-01,257.51
50 | 2002-01-01,261.21
51 | 2002-04-01,265.65
52 | 2002-07-01,270.92
53 | 2002-10-01,274.76
54 | 2003-01-01,278.11
55 | 2003-04-01,281.49
56 | 2003-07-01,285.8
57 | 2003-10-01,293.88
58 | 2004-01-01,298.47
59 | 2004-04-01,305.7
60 | 2004-07-01,317.4
61 | 2004-10-01,324.16
62 | 2005-01-01,331.63
63 | 2005-04-01,342.23
64 | 2005-07-01,352.54
65 | 2005-10-01,360.53
66 | 2006-01-01,366.27
67 | 2006-04-01,370
68 | 2006-07-01,373.08
69 | 2006-10-01,376.87
70 | 2007-01-01,378.23
71 | 2007-04-01,377.98
72 | 2007-07-01,373.79
73 | 2007-10-01,372.64
74 | 2008-01-01,369.98
75 | 2008-04-01,360.71
76 | 2008-07-01,349.42
77 | 2008-10-01,346.25
78 | 2009-01-01,348.8
79 | 2009-04-01,339.58
80 | 2009-07-01,330.58
81 | 2009-10-01,328.16
82 | 2010-01-01,324.21
83 | 2010-04-01,321.32
84 | 2010-07-01,324.39
85 | 2010-10-01,322.03
86 | 2011-01-01,313.19
87 | 2011-04-01,307.78
88 | 2011-07-01,310.13
89 | 2011-10-01,311.5
90 | 2012-01-01,308.25
91 | 2012-04-01,307.02
92 | 2012-07-01,310.98
93 | 2012-10-01,313.51
94 | 2013-01-01,315.16
95 | 2013-04-01,320.16
96 | 2013-07-01,325.31
97 | 2013-10-01,328.2
98 | 2014-01-01,331.12
99 | 2014-04-01,337.56
100 | 2014-07-01,342.53
101 | 2014-10-01,345.41
102 | 2015-01-01,348.86
103 | 2015-04-01,355.22
104 | 2015-07-01,360.9
105 | 2015-10-01,364.34
106 | 2016-01-01,367.93
107 | 2016-04-01,375.33
108 | 2016-07-01,381.93
109 | 2016-10-01,385.38
110 | 2017-01-01,388.91
111 | 2017-04-01,399.18
--------------------------------------------------------------------------------
/data/daily_female_births.csv:
--------------------------------------------------------------------------------
1 | "Date","Births"
2 | "1959-01-01",35
3 | "1959-01-02",32
4 | "1959-01-03",30
5 | "1959-01-04",31
6 | "1959-01-05",44
7 | "1959-01-06",29
8 | "1959-01-07",45
9 | "1959-01-08",43
10 | "1959-01-09",38
11 | "1959-01-10",27
12 | "1959-01-11",38
13 | "1959-01-12",33
14 | "1959-01-13",55
15 | "1959-01-14",47
16 | "1959-01-15",45
17 | "1959-01-16",37
18 | "1959-01-17",50
19 | "1959-01-18",43
20 | "1959-01-19",41
21 | "1959-01-20",52
22 | "1959-01-21",34
23 | "1959-01-22",53
24 | "1959-01-23",39
25 | "1959-01-24",32
26 | "1959-01-25",37
27 | "1959-01-26",43
28 | "1959-01-27",39
29 | "1959-01-28",35
30 | "1959-01-29",44
31 | "1959-01-30",38
32 | "1959-01-31",24
33 | "1959-02-01",23
34 | "1959-02-02",31
35 | "1959-02-03",44
36 | "1959-02-04",38
37 | "1959-02-05",50
38 | "1959-02-06",38
39 | "1959-02-07",51
40 | "1959-02-08",31
41 | "1959-02-09",31
42 | "1959-02-10",51
43 | "1959-02-11",36
44 | "1959-02-12",45
45 | "1959-02-13",51
46 | "1959-02-14",34
47 | "1959-02-15",52
48 | "1959-02-16",47
49 | "1959-02-17",45
50 | "1959-02-18",46
51 | "1959-02-19",39
52 | "1959-02-20",48
53 | "1959-02-21",37
54 | "1959-02-22",35
55 | "1959-02-23",52
56 | "1959-02-24",42
57 | "1959-02-25",45
58 | "1959-02-26",39
59 | "1959-02-27",37
60 | "1959-02-28",30
61 | "1959-03-01",35
62 | "1959-03-02",28
63 | "1959-03-03",45
64 | "1959-03-04",34
65 | "1959-03-05",36
66 | "1959-03-06",50
67 | "1959-03-07",44
68 | "1959-03-08",39
69 | "1959-03-09",32
70 | "1959-03-10",39
71 | "1959-03-11",45
72 | "1959-03-12",43
73 | "1959-03-13",39
74 | "1959-03-14",31
75 | "1959-03-15",27
76 | "1959-03-16",30
77 | "1959-03-17",42
78 | "1959-03-18",46
79 | "1959-03-19",41
80 | "1959-03-20",36
81 | "1959-03-21",45
82 | "1959-03-22",46
83 | "1959-03-23",43
84 | "1959-03-24",38
85 | "1959-03-25",34
86 | "1959-03-26",35
87 | "1959-03-27",56
88 | "1959-03-28",36
89 | "1959-03-29",32
90 | "1959-03-30",50
91 | "1959-03-31",41
92 | "1959-04-01",39
93 | "1959-04-02",41
94 | "1959-04-03",47
95 | "1959-04-04",34
96 | "1959-04-05",36
97 | "1959-04-06",33
98 | "1959-04-07",35
99 | "1959-04-08",38
100 | "1959-04-09",38
101 | "1959-04-10",34
102 | "1959-04-11",53
103 | "1959-04-12",34
104 | "1959-04-13",34
105 | "1959-04-14",38
106 | "1959-04-15",35
107 | "1959-04-16",32
108 | "1959-04-17",42
109 | "1959-04-18",34
110 | "1959-04-19",46
111 | "1959-04-20",30
112 | "1959-04-21",46
113 | "1959-04-22",45
114 | "1959-04-23",54
115 | "1959-04-24",34
116 | "1959-04-25",37
117 | "1959-04-26",35
118 | "1959-04-27",40
119 | "1959-04-28",42
120 | "1959-04-29",58
121 | "1959-04-30",51
122 | "1959-05-01",32
123 | "1959-05-02",35
124 | "1959-05-03",38
125 | "1959-05-04",33
126 | "1959-05-05",39
127 | "1959-05-06",47
128 | "1959-05-07",38
129 | "1959-05-08",52
130 | "1959-05-09",30
131 | "1959-05-10",34
132 | "1959-05-11",40
133 | "1959-05-12",35
134 | "1959-05-13",42
135 | "1959-05-14",41
136 | "1959-05-15",42
137 | "1959-05-16",38
138 | "1959-05-17",24
139 | "1959-05-18",34
140 | "1959-05-19",43
141 | "1959-05-20",36
142 | "1959-05-21",55
143 | "1959-05-22",41
144 | "1959-05-23",45
145 | "1959-05-24",41
146 | "1959-05-25",37
147 | "1959-05-26",43
148 | "1959-05-27",39
149 | "1959-05-28",33
150 | "1959-05-29",43
151 | "1959-05-30",40
152 | "1959-05-31",38
153 | "1959-06-01",45
154 | "1959-06-02",46
155 | "1959-06-03",34
156 | "1959-06-04",35
157 | "1959-06-05",48
158 | "1959-06-06",51
159 | "1959-06-07",36
160 | "1959-06-08",33
161 | "1959-06-09",46
162 | "1959-06-10",42
163 | "1959-06-11",48
164 | "1959-06-12",34
165 | "1959-06-13",41
166 | "1959-06-14",35
167 | "1959-06-15",40
168 | "1959-06-16",34
169 | "1959-06-17",30
170 | "1959-06-18",36
171 | "1959-06-19",40
172 | "1959-06-20",39
173 | "1959-06-21",45
174 | "1959-06-22",38
175 | "1959-06-23",47
176 | "1959-06-24",33
177 | "1959-06-25",30
178 | "1959-06-26",42
179 | "1959-06-27",43
180 | "1959-06-28",41
181 | "1959-06-29",41
182 | "1959-06-30",59
183 | "1959-07-01",43
184 | "1959-07-02",45
185 | "1959-07-03",38
186 | "1959-07-04",37
187 | "1959-07-05",45
188 | "1959-07-06",42
189 | "1959-07-07",57
190 | "1959-07-08",46
191 | "1959-07-09",51
192 | "1959-07-10",41
193 | "1959-07-11",47
194 | "1959-07-12",26
195 | "1959-07-13",35
196 | "1959-07-14",44
197 | "1959-07-15",41
198 | "1959-07-16",42
199 | "1959-07-17",36
200 | "1959-07-18",45
201 | "1959-07-19",45
202 | "1959-07-20",45
203 | "1959-07-21",47
204 | "1959-07-22",38
205 | "1959-07-23",42
206 | "1959-07-24",35
207 | "1959-07-25",36
208 | "1959-07-26",39
209 | "1959-07-27",45
210 | "1959-07-28",43
211 | "1959-07-29",47
212 | "1959-07-30",36
213 | "1959-07-31",41
214 | "1959-08-01",50
215 | "1959-08-02",39
216 | "1959-08-03",41
217 | "1959-08-04",46
218 | "1959-08-05",64
219 | "1959-08-06",45
220 | "1959-08-07",34
221 | "1959-08-08",38
222 | "1959-08-09",44
223 | "1959-08-10",48
224 | "1959-08-11",46
225 | "1959-08-12",44
226 | "1959-08-13",37
227 | "1959-08-14",39
228 | "1959-08-15",44
229 | "1959-08-16",45
230 | "1959-08-17",33
231 | "1959-08-18",44
232 | "1959-08-19",38
233 | "1959-08-20",46
234 | "1959-08-21",46
235 | "1959-08-22",40
236 | "1959-08-23",39
237 | "1959-08-24",44
238 | "1959-08-25",48
239 | "1959-08-26",50
240 | "1959-08-27",41
241 | "1959-08-28",42
242 | "1959-08-29",51
243 | "1959-08-30",41
244 | "1959-08-31",44
245 | "1959-09-01",38
246 | "1959-09-02",68
247 | "1959-09-03",40
248 | "1959-09-04",42
249 | "1959-09-05",51
250 | "1959-09-06",44
251 | "1959-09-07",45
252 | "1959-09-08",36
253 | "1959-09-09",57
254 | "1959-09-10",44
255 | "1959-09-11",42
256 | "1959-09-12",53
257 | "1959-09-13",42
258 | "1959-09-14",34
259 | "1959-09-15",40
260 | "1959-09-16",56
261 | "1959-09-17",44
262 | "1959-09-18",53
263 | "1959-09-19",55
264 | "1959-09-20",39
265 | "1959-09-21",59
266 | "1959-09-22",55
267 | "1959-09-23",73
268 | "1959-09-24",55
269 | "1959-09-25",44
270 | "1959-09-26",43
271 | "1959-09-27",40
272 | "1959-09-28",47
273 | "1959-09-29",51
274 | "1959-09-30",56
275 | "1959-10-01",49
276 | "1959-10-02",54
277 | "1959-10-03",56
278 | "1959-10-04",47
279 | "1959-10-05",44
280 | "1959-10-06",43
281 | "1959-10-07",42
282 | "1959-10-08",45
283 | "1959-10-09",50
284 | "1959-10-10",48
285 | "1959-10-11",43
286 | "1959-10-12",40
287 | "1959-10-13",59
288 | "1959-10-14",41
289 | "1959-10-15",42
290 | "1959-10-16",51
291 | "1959-10-17",49
292 | "1959-10-18",45
293 | "1959-10-19",43
294 | "1959-10-20",42
295 | "1959-10-21",38
296 | "1959-10-22",47
297 | "1959-10-23",38
298 | "1959-10-24",36
299 | "1959-10-25",42
300 | "1959-10-26",35
301 | "1959-10-27",28
302 | "1959-10-28",44
303 | "1959-10-29",36
304 | "1959-10-30",45
305 | "1959-10-31",46
306 | "1959-11-01",48
307 | "1959-11-02",49
308 | "1959-11-03",43
309 | "1959-11-04",42
310 | "1959-11-05",59
311 | "1959-11-06",45
312 | "1959-11-07",52
313 | "1959-11-08",46
314 | "1959-11-09",42
315 | "1959-11-10",40
316 | "1959-11-11",40
317 | "1959-11-12",45
318 | "1959-11-13",35
319 | "1959-11-14",35
320 | "1959-11-15",40
321 | "1959-11-16",39
322 | "1959-11-17",33
323 | "1959-11-18",42
324 | "1959-11-19",47
325 | "1959-11-20",51
326 | "1959-11-21",44
327 | "1959-11-22",40
328 | "1959-11-23",57
329 | "1959-11-24",49
330 | "1959-11-25",45
331 | "1959-11-26",49
332 | "1959-11-27",51
333 | "1959-11-28",46
334 | "1959-11-29",44
335 | "1959-11-30",52
336 | "1959-12-01",45
337 | "1959-12-02",32
338 | "1959-12-03",46
339 | "1959-12-04",41
340 | "1959-12-05",34
341 | "1959-12-06",33
342 | "1959-12-07",36
343 | "1959-12-08",49
344 | "1959-12-09",43
345 | "1959-12-10",43
346 | "1959-12-11",34
347 | "1959-12-12",39
348 | "1959-12-13",35
349 | "1959-12-14",52
350 | "1959-12-15",47
351 | "1959-12-16",52
352 | "1959-12-17",39
353 | "1959-12-18",40
354 | "1959-12-19",42
355 | "1959-12-20",42
356 | "1959-12-21",53
357 | "1959-12-22",39
358 | "1959-12-23",40
359 | "1959-12-24",38
360 | "1959-12-25",44
361 | "1959-12-26",34
362 | "1959-12-27",37
363 | "1959-12-28",52
364 | "1959-12-29",48
365 | "1959-12-30",55
366 | "1959-12-31",50
--------------------------------------------------------------------------------
/data/exercise3.csv:
--------------------------------------------------------------------------------
1 | Date,Value
2 | 1990-01-01,164.17
3 | 1990-04-01,164.84
4 | 1990-07-01,165.83
5 | 1990-10-01,NaN
6 | 1991-01-01,166.56
7 | 1991-04-01,167.68
8 | 1991-07-01,167.98
9 | 1991-10-01,170.34
10 | 1992-01-01,NaN
11 | 1992-04-01,171.62
12 | 1992-07-01,173.58
13 | 1992-10-01,NaN
14 | 1993-01-01,174.53
15 | 1993-04-01,176.19
16 | 1993-07-01,177.61
17 | 1993-10-01,179.08
18 | 1994-01-01,NaN
19 | 1994-04-01,181.32
20 | 1994-07-01,182.06
21 | 1994-10-01,181.9
22 | 1995-01-01,182.87
23 | 1995-04-01,185.86
24 | 1995-07-01,188.71
25 | 1995-10-01,190.2
26 | 1996-01-01,192.34
27 | 1996-04-01,192.68
28 | 1996-07-01,NaN
29 | 1996-10-01,195.05
30 | 1997-01-01,196.67
31 | 1997-04-01,198.32
32 | 1997-07-01,201.04
33 | 1997-10-01,203.66
34 | 1998-01-01,206.69
35 | 1998-04-01,208.56
36 | 1998-07-01,211.46
37 | 1998-10-01,213.97
38 | 1999-01-01,216.22
39 | 1999-04-01,NaN
40 | 1999-07-01,222.23
41 | 1999-10-01,224.52
42 | 2000-01-01,228.81
43 | 2000-04-01,232.54
44 | 2000-07-01,236.77
45 | 2000-10-01,240.42
--------------------------------------------------------------------------------
/data/exercise_4.csv:
--------------------------------------------------------------------------------
1 | date,temp
2 | 01/01/81,20.70
3 | 02/01/81,17.90
4 | 03/01/81,18.80
5 | 04/01/81,14.60
6 | 05/01/81,15.80
7 | 06/01/81,15.80
8 | 07/01/81,15.80
9 | 08/01/81,17.40
10 | 09/01/81,21.80
11 | 10/01/81,20.00
12 | 11/01/81,16.20
13 | 12/01/81,13.30
14 | 13/01/81,16.70
15 | 14/01/81,21.50
16 | 15/01/81,25.00
17 | 16/01/81,20.70
18 | 17/01/81,20.60
19 | 18/01/81,24.80
20 | 19/01/81,17.70
21 | 20/01/81,15.50
22 | 21/01/81,18.20
23 | 22/01/81,12.10
24 | 23/01/81,14.40
25 | 24/01/81,16.00
26 | 25/01/81,16.50
27 | 26/01/81,18.70
28 | 27/01/81,19.40
29 | 28/01/81,17.20
30 | 29/01/81,15.50
31 | 30/01/81,15.10
32 | 31/01/81,15.40
33 | 01/02/81,15.30
34 | 02/02/81,18.80
35 | 03/02/81,21.90
36 | 04/02/81,19.90
37 | 05/02/81,16.60
38 | 06/02/81,16.80
39 | 07/02/81,14.60
40 | 08/02/81,17.10
41 | 09/02/81,25.00
42 | 10/02/81,15.00
43 | 11/02/81,13.70
44 | 12/02/81,13.90
45 | 13/02/81,18.30
46 | 14/02/81,22.00
47 | 15/02/81,22.10
48 | 16/02/81,21.20
49 | 17/02/81,18.40
50 | 18/02/81,16.60
51 | 19/02/81,16.10
52 | 20/02/81,15.70
53 | 21/02/81,16.60
54 | 22/02/81,16.50
55 | 23/02/81,14.40
56 | 24/02/81,14.40
57 | 25/02/81,18.50
58 | 26/02/81,16.90
59 | 27/02/81,17.50
60 | 28/02/81,21.20
61 | 01/03/81,17.80
62 | 02/03/81,18.60
63 | 03/03/81,17.00
64 | 04/03/81,16.00
65 | 05/03/81,13.30
66 | 06/03/81,14.30
67 | 07/03/81,11.40
68 | 08/03/81,16.30
69 | 09/03/81,16.10
70 | 10/03/81,11.80
71 | 11/03/81,12.20
72 | 12/03/81,14.70
73 | 13/03/81,11.80
74 | 14/03/81,11.30
75 | 15/03/81,10.60
76 | 16/03/81,11.70
77 | 17/03/81,14.20
78 | 18/03/81,11.20
79 | 19/03/81,16.90
80 | 20/03/81,16.70
81 | 21/03/81,8.10
82 | 22/03/81,8.00
83 | 23/03/81,8.80
84 | 24/03/81,13.40
85 | 25/03/81,10.90
86 | 26/03/81,13.40
87 | 27/03/81,11.00
88 | 28/03/81,15.00
89 | 29/03/81,15.70
90 | 30/03/81,14.50
91 | 31/03/81,15.80
92 | 01/04/81,16.70
93 | 02/04/81,16.80
94 | 03/04/81,17.50
95 | 04/04/81,17.10
96 | 05/04/81,18.10
97 | 06/04/81,16.60
98 | 07/04/81,10.00
99 | 08/04/81,14.90
100 | 09/04/81,15.90
101 | 10/04/81,13.00
102 | 11/04/81,7.60
103 | 12/04/81,11.50
104 | 13/04/81,13.50
105 | 14/04/81,13.00
106 | 15/04/81,13.30
107 | 16/04/81,12.10
108 | 17/04/81,12.40
109 | 18/04/81,13.20
110 | 19/04/81,13.80
111 | 20/04/81,10.60
112 | 21/04/81,9.00
113 | 22/04/81,10.00
114 | 23/04/81,9.80
115 | 24/04/81,11.50
116 | 25/04/81,8.90
117 | 26/04/81,7.40
118 | 27/04/81,9.90
119 | 28/04/81,9.30
120 | 29/04/81,9.90
121 | 30/04/81,7.40
122 | 01/05/81,8.60
123 | 02/05/81,11.90
124 | 03/05/81,14.00
125 | 04/05/81,8.60
126 | 05/05/81,10.00
127 | 06/05/81,13.50
128 | 07/05/81,12.00
129 | 08/05/81,10.50
130 | 09/05/81,10.70
131 | 10/05/81,8.10
132 | 11/05/81,10.10
133 | 12/05/81,10.60
134 | 13/05/81,5.30
135 | 14/05/81,6.60
136 | 15/05/81,8.50
137 | 16/05/81,11.20
138 | 17/05/81,9.80
139 | 18/05/81,5.90
140 | 19/05/81,3.20
141 | 20/05/81,2.10
142 | 21/05/81,3.40
143 | 22/05/81,5.40
144 | 23/05/81,9.60
145 | 24/05/81,11.50
146 | 25/05/81,12.30
147 | 26/05/81,12.60
148 | 27/05/81,11.00
149 | 28/05/81,11.20
150 | 29/05/81,11.40
151 | 30/05/81,11.80
152 | 31/05/81,12.80
153 | 01/06/81,11.60
154 | 02/06/81,10.60
155 | 03/06/81,9.80
156 | 04/06/81,11.20
157 | 05/06/81,5.70
158 | 06/06/81,7.10
159 | 07/06/81,2.50
160 | 08/06/81,3.50
161 | 09/06/81,4.60
162 | 10/06/81,11.00
163 | 11/06/81,5.70
164 | 12/06/81,7.70
165 | 13/06/81,10.40
166 | 14/06/81,11.40
167 | 15/06/81,9.20
168 | 16/06/81,6.10
169 | 17/06/81,2.70
170 | 18/06/81,4.30
171 | 19/06/81,6.30
172 | 20/06/81,3.80
173 | 21/06/81,4.40
174 | 22/06/81,7.10
175 | 23/06/81,4.80
176 | 24/06/81,5.80
177 | 25/06/81,6.20
178 | 26/06/81,7.30
179 | 27/06/81,9.20
180 | 28/06/81,10.20
181 | 29/06/81,9.50
182 | 30/06/81,9.50
183 |
--------------------------------------------------------------------------------
/data/exercise_sample.csv:
--------------------------------------------------------------------------------
1 | x,value
2 | 2018-11-01T0-10-1,-0.803908052
3 | 2018-11-02T3-45-3,1.586827482
4 | 2018-11-03T0-34-10,0.224111754
5 | 2018-11-01T0-10-1,-0.803908052
6 | 2018-11-02T3-45-3,1.586827482
7 | 2018-11-03T0-34-10,0.224111754
8 | 2018-11-04T11-3-4,-0.715936787
--------------------------------------------------------------------------------
/data/gdp_india.csv:
--------------------------------------------------------------------------------
1 | Country Name,Country Code,Indicator Name,Indicator Code,Attribute,year,value
2 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1951,25.9378
3 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1952,26.90571429
4 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1953,25.90192308
5 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1954,25.81030928
6 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1955,27.7407767
7 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1956,26.38644068
8 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1957,28.75166667
9 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1958,30.16119403
10 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1959,34.115
11 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1960,36.46
12 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1961,38.69375
13 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1962,36.16648649
14 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1963,35.49433962
15 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1964,33.77056452
16 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1965,35.99348659
17 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1966,35.99348659
18 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1967,38.50895954
19 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1968,40.27629428
20 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1969,38.7700495
21 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1970,38.14814815
22 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1971,37.55053996
23 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1972,37.16431373
24 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1973,32.69516129
25 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1974,36.66120219
26 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1975,38.60532995
27 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1976,36.2426384
28 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1977,38.60561915
29 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1978,38.34932821
30 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1979,40.85664336
31 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1980,41.25
32 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1981,40.40050063
33 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1982,41.98764739
34 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1983,38.97453213
35 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1984,40.87122449
36 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1985,35.8421
37 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1986,39.0676
38 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1987,41.4087
39 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1988,41.9725
40 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1989,47.1218
41 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1990,49.90853192
42 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1991,75.33224179
43 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1992,77.40538509
44 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1993,76.97819715
45 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1994,73.46440213
46 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1995,69.65384282
47 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1996,65.97664655
48 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1997,67.8180393
49 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1998,68.0898994
50 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,1999,70.03924264
51 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2000,73.64899508
52 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2001,78.72846968
53 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2002,82.84996788
54 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2003,84.24303728
55 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2004,83.28859967
56 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2005,80.89367746
57 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2006,77.10831335
58 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2007,74.02651796
59 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2008,74.53613636
60 | India,534,Debt to GDP Ratio,GGXWDG_GDP,Value,2009,72.52660659
--------------------------------------------------------------------------------
/data/reliance_data_day.csv:
--------------------------------------------------------------------------------
1 | date,high,low,open,close,volume
2 | 2018-01-01 00:00:00+05:30,922.7,907.5,922.7,909.75,4321686
3 | 2018-01-02 00:00:00+05:30,919.55,906.4,913,911.15,4342815
4 | 2018-01-03 00:00:00+05:30,926,913.05,925,914.8,6175312
5 | 2018-01-04 00:00:00+05:30,921.8,915.7,918.15,920.3,4118581
6 | 2018-01-05 00:00:00+05:30,926.9,920.25,921.8,923.25,3401905
7 | 2018-01-08 00:00:00+05:30,931,923.5,926.1,928.55,4035417
8 | 2018-01-09 00:00:00+05:30,943.9,924,928.15,940.95,6534997
9 | 2018-01-10 00:00:00+05:30,947.4,935.5,943,942.35,5361502
10 | 2018-01-11 00:00:00+05:30,942.65,935,941.8,937.75,3588727
11 | 2018-01-12 00:00:00+05:30,952.8,938.25,943,949,6890028
12 | 2018-01-15 00:00:00+05:30,958.5,945.2,950,949.15,5084113
13 | 2018-01-16 00:00:00+05:30,947.7,920,947.7,922.95,4948895
14 | 2018-01-17 00:00:00+05:30,929,907,926,924.5,6036432
15 | 2018-01-18 00:00:00+05:30,929.6,915.1,929,919.7,4289053
16 | 2018-01-19 00:00:00+05:30,934.4,922.1,923,931.3,4559564
17 | 2018-01-22 00:00:00+05:30,974.3,940,948,971.5,20892838
18 | 2018-01-23 00:00:00+05:30,990.95,975,975,983.25,10298500
19 | 2018-01-24 00:00:00+05:30,981.75,962.65,981.75,966.35,6532672
20 | 2018-01-25 00:00:00+05:30,972,956.15,967.9,965.9,6249419
21 | 2018-01-29 00:00:00+05:30,977.3,959.3,966.2,964.5,4524605
22 | 2018-01-30 00:00:00+05:30,965.8,949,965.8,950.4,4198139
23 | 2018-01-31 00:00:00+05:30,964.5,941.55,950,961.3,5738209
24 | 2018-02-01 00:00:00+05:30,972.6,936.6,963.25,943.85,7554587
25 | 2018-02-02 00:00:00+05:30,943.95,901,935,904.35,13242095
26 | 2018-02-05 00:00:00+05:30,912.5,881.8,894,902.15,8773525
27 | 2018-02-06 00:00:00+05:30,898,871,877,892.1,11000567
28 | 2018-02-07 00:00:00+05:30,905.4,890,902.55,894.4,7100838
29 | 2018-02-08 00:00:00+05:30,910.65,892.35,897.9,904.55,6320734
30 | 2018-02-09 00:00:00+05:30,902,886,889,897.85,5294279
31 | 2018-02-12 00:00:00+05:30,918.2,902.05,903,915.5,5429792
32 | 2018-02-14 00:00:00+05:30,942.5,921,923,933.65,10170959
33 | 2018-02-15 00:00:00+05:30,945.75,928.15,937.65,935.65,6109454
34 | 2018-02-16 00:00:00+05:30,942.6,918.1,939.6,921.3,4401672
35 | 2018-02-19 00:00:00+05:30,932,913.55,921,927,4087936
36 | 2018-02-20 00:00:00+05:30,929.55,918.8,929,921.1,3866919
37 | 2018-02-21 00:00:00+05:30,932.5,922.15,930,929.35,5448230
38 | 2018-02-22 00:00:00+05:30,931,920,922,924.35,5118850
39 | 2018-02-23 00:00:00+05:30,938,922.75,925.4,934.25,4850839
40 | 2018-02-26 00:00:00+05:30,944.9,936.6,939,938.7,3856478
41 | 2018-02-27 00:00:00+05:30,955.8,938.35,939,950.5,6410097
42 | 2018-02-28 00:00:00+05:30,957.1,943.2,945.95,954.55,25898694
43 | 2018-03-01 00:00:00+05:30,959.9,945.95,949.75,948.4,4094652
44 | 2018-03-05 00:00:00+05:30,950.15,921.6,943,924.1,5634893
45 | 2018-03-06 00:00:00+05:30,936.2,905.1,929,910.85,7323680
46 | 2018-03-07 00:00:00+05:30,909.25,888.05,909.25,889.75,7247708
47 | 2018-03-08 00:00:00+05:30,914.5,893.3,897.65,911.45,6072983
48 | 2018-03-09 00:00:00+05:30,919.35,911.05,914.9,913.1,4863577
49 | 2018-03-12 00:00:00+05:30,933.1,917.55,920,930.3,3801407
50 | 2018-03-13 00:00:00+05:30,936,921.35,931.9,931.85,5096825
51 | 2018-03-14 00:00:00+05:30,932.05,919.2,928.8,929.05,6731530
52 | 2018-03-15 00:00:00+05:30,929.45,910,925,911.8,5811424
53 | 2018-03-16 00:00:00+05:30,909.7,886.05,909,900.05,14087564
54 | 2018-03-19 00:00:00+05:30,906.9,892,901,895.5,5774392
55 | 2018-03-20 00:00:00+05:30,904,887.05,893.3,889.8,6945721
56 | 2018-03-21 00:00:00+05:30,902.35,893.3,894.85,896.9,6727261
57 | 2018-03-22 00:00:00+05:30,911.95,895.75,896,908.15,10116965
58 | 2018-03-23 00:00:00+05:30,900,889.1,890.6,893.9,10729694
59 | 2018-03-26 00:00:00+05:30,904.3,892.5,893.95,901.1,6047262
60 | 2018-03-27 00:00:00+05:30,910.75,897,907.2,899.8,6939577
61 | 2018-03-28 00:00:00+05:30,895.85,880,895.8,882.7,8759586
62 | 2018-04-02 00:00:00+05:30,900.95,886.5,893,892.95,5712065
63 | 2018-04-03 00:00:00+05:30,901.7,885.25,891,899.55,6364728
64 | 2018-04-04 00:00:00+05:30,911.8,892.05,904.7,894.9,6498407
65 | 2018-04-05 00:00:00+05:30,910,903.5,905.1,908.2,3898676
66 | 2018-04-06 00:00:00+05:30,918.5,905,908,911,4418462
67 | 2018-04-09 00:00:00+05:30,919,912.55,912.55,916,3572251
68 | 2018-04-10 00:00:00+05:30,923.9,914.15,918.4,917.05,3865402
69 | 2018-04-11 00:00:00+05:30,932.75,916.05,921.8,930.85,6007539
70 | 2018-04-12 00:00:00+05:30,936.65,924.2,929.85,928.7,4947725
71 | 2018-04-13 00:00:00+05:30,941.7,928.7,932.7,938.85,5767984
72 | 2018-04-16 00:00:00+05:30,940,928.5,934.45,937.05,4084450
73 | 2018-04-17 00:00:00+05:30,947.2,936,940,944.25,4388713
74 | 2018-04-18 00:00:00+05:30,948.2,935.05,945.25,938,3365196
75 | 2018-04-19 00:00:00+05:30,945,935,939.1,942.3,4481413
76 | 2018-04-20 00:00:00+05:30,941.85,921.2,938,927.9,3349519
77 | 2018-04-23 00:00:00+05:30,944.35,928.5,930,936,3508515
78 | 2018-04-24 00:00:00+05:30,975.7,935,935.8,970.05,9107264
79 | 2018-04-25 00:00:00+05:30,981.8,963.55,973.65,970.5,6436620
80 | 2018-04-26 00:00:00+05:30,988,962.25,973,975.35,15588001
81 | 2018-04-27 00:00:00+05:30,1011,985.5,989.8,996.3,11666267
82 | 2018-04-30 00:00:00+05:30,992.6,960,982,963.3,8871802
83 | 2018-05-02 00:00:00+05:30,979.2,965.05,967,972.7,6117761
84 | 2018-05-03 00:00:00+05:30,977.9,959.25,977.9,961.6,3947546
85 | 2018-05-04 00:00:00+05:30,964.7,950,962.85,953.95,3795692
86 | 2018-05-07 00:00:00+05:30,973.25,956.35,958.85,971,3512532
87 | 2018-05-08 00:00:00+05:30,982,965.2,977.5,967.1,4934036
88 | 2018-05-09 00:00:00+05:30,984.3,962.25,965.9,976.7,4494423
89 | 2018-05-10 00:00:00+05:30,984.6,974.25,980,981.15,4014645
90 | 2018-05-11 00:00:00+05:30,991.3,977.75,981.25,989.25,4011516
91 | 2018-05-14 00:00:00+05:30,991.95,976.7,991.95,986.6,2579561
92 | 2018-05-15 00:00:00+05:30,1000,976.65,983.05,978.95,4800923
93 | 2018-05-16 00:00:00+05:30,974,952.45,974,956.15,8613363
94 | 2018-05-17 00:00:00+05:30,960,941.5,959.35,945.35,4733477
95 | 2018-05-18 00:00:00+05:30,953.5,932,945.6,933.85,5230037
96 | 2018-05-21 00:00:00+05:30,935.7,922.1,930.75,931.35,6624893
97 | 2018-05-22 00:00:00+05:30,934,925,933.15,926.75,5170585
98 | 2018-05-23 00:00:00+05:30,926,909.15,926,912.25,4567562
99 | 2018-05-24 00:00:00+05:30,921,910.15,914,915.85,5317983
100 | 2018-05-25 00:00:00+05:30,923.4,914.45,916,920.8,8129520
101 | 2018-05-28 00:00:00+05:30,927,918.1,924.9,920.4,5197835
102 | 2018-05-29 00:00:00+05:30,928,914.4,921.5,917.45,8173396
103 | 2018-05-30 00:00:00+05:30,923.2,906.6,914.85,916.1,5991222
104 | 2018-05-31 00:00:00+05:30,922.3,914.55,921,921.35,16951801
105 | 2018-06-01 00:00:00+05:30,935.75,918.45,920,930.55,8765835
106 | 2018-06-04 00:00:00+05:30,944,930.55,933.4,940.85,5600357
107 | 2018-06-05 00:00:00+05:30,955,941,942.8,947.65,7971524
108 | 2018-06-06 00:00:00+05:30,956,942.65,947,954.4,4189483
109 | 2018-06-07 00:00:00+05:30,976,954.45,954.9,971.35,6857300
110 | 2018-06-08 00:00:00+05:30,986.55,960.6,966.35,984.35,6532599
111 | 2018-06-11 00:00:00+05:30,999.95,979.05,987.1,982.65,6866053
112 | 2018-06-12 00:00:00+05:30,1000,980.3,983.4,997.15,7445221
113 | 2018-06-13 00:00:00+05:30,1005.9,989.5,1000,1002.5,5926020
114 | 2018-06-14 00:00:00+05:30,1011.8,987.4,997.45,1007.95,5484867
115 | 2018-06-15 00:00:00+05:30,1023.5,999.25,1006,1014.2,11294702
116 | 2018-06-18 00:00:00+05:30,1020.45,1007,1008.8,1015.55,5652374
117 | 2018-06-19 00:00:00+05:30,1014.8,993.65,1013.9,996.05,6346101
118 | 2018-06-20 00:00:00+05:30,1022.9,994.85,996,1019.25,7193824
119 | 2018-06-21 00:00:00+05:30,1036,1021.3,1023.45,1031.95,10509756
120 | 2018-06-22 00:00:00+05:30,1029.2,1008.45,1028.4,1012.5,16169978
121 | 2018-06-25 00:00:00+05:30,1016.5,1002.3,1011.4,1004.45,5197694
122 | 2018-06-26 00:00:00+05:30,1006,975.25,1006,978.95,8068585
123 | 2018-06-27 00:00:00+05:30,985.4,962.7,978.5,965.85,8958555
124 | 2018-06-28 00:00:00+05:30,968.5,940.3,962,944.95,8581234
125 | 2018-06-29 00:00:00+05:30,975.5,949,949.1,972.45,7966401
126 | 2018-07-02 00:00:00+05:30,972.35,957,972.35,960.6,6660916
127 | 2018-07-03 00:00:00+05:30,978.45,960.35,965,971.3,5067261
128 | 2018-07-04 00:00:00+05:30,992,971,971.8,990.1,5000724
129 | 2018-07-05 00:00:00+05:30,1008.4,958.6,997,964.5,18719353
130 | 2018-07-06 00:00:00+05:30,983,964.5,964.75,977.55,9161266
131 | 2018-07-09 00:00:00+05:30,1000,983.5,987.1,997.1,5170189
132 | 2018-07-10 00:00:00+05:30,1028.5,1000,1002.75,1025.7,10444513
133 | 2018-07-11 00:00:00+05:30,1040.9,1017.85,1025,1038.8,8665072
134 | 2018-07-12 00:00:00+05:30,1099,1042.6,1044.35,1082.35,24109297
135 | 2018-07-13 00:00:00+05:30,1109,1080,1080.5,1099.8,17967282
136 | 2018-07-16 00:00:00+05:30,1108.9,1073.05,1099.8,1076.2,6595781
137 | 2018-07-17 00:00:00+05:30,1095.95,1069.6,1078.3,1092.35,6469256
138 | 2018-07-18 00:00:00+05:30,1104.6,1077.3,1098.4,1092.55,4682099
139 | 2018-07-19 00:00:00+05:30,1115,1086.35,1093.3,1104.85,5433617
140 | 2018-07-20 00:00:00+05:30,1138.5,1111.9,1113.4,1128.65,8018150
141 | 2018-07-23 00:00:00+05:30,1129.8,1113.1,1129.8,1120.3,5842496
142 | 2018-07-24 00:00:00+05:30,1126.75,1107.4,1122,1110.95,4469144
143 | 2018-07-25 00:00:00+05:30,1123,1104.2,1110,1115.15,5949680
144 | 2018-07-26 00:00:00+05:30,1121.5,1107.8,1110.1,1110.65,6424867
145 | 2018-07-27 00:00:00+05:30,1137.4,1112.15,1119.5,1129.85,6679468
146 | 2018-07-30 00:00:00+05:30,1157.3,1128,1130,1151.4,13872467
147 | 2018-07-31 00:00:00+05:30,1190.4,1147.15,1151,1186,13875377
148 | 2018-08-01 00:00:00+05:30,1202.9,1185.4,1190.4,1192.35,9704295
149 | 2018-08-02 00:00:00+05:30,1191.5,1165.95,1191.5,1168.35,6248975
150 | 2018-08-03 00:00:00+05:30,1183.5,1169.55,1175,1176.95,6645397
151 | 2018-08-06 00:00:00+05:30,1196,1175.35,1176.7,1192.6,4016418
152 | 2018-08-07 00:00:00+05:30,1201.4,1177,1194.9,1184.2,4059460
153 | 2018-08-08 00:00:00+05:30,1222.5,1185,1185,1217.7,7912437
154 | 2018-08-09 00:00:00+05:30,1231.7,1212,1221.5,1217.95,6846928
155 | 2018-08-10 00:00:00+05:30,1215,1202.3,1211,1204.2,3581210
156 | 2018-08-13 00:00:00+05:30,1199.95,1182.7,1197.55,1187.7,4668159
157 | 2018-08-14 00:00:00+05:30,1217.2,1184.3,1190,1210.6,5299536
158 | 2018-08-16 00:00:00+05:30,1216,1195.5,1203.25,1200.8,5982490
159 | 2018-08-17 00:00:00+05:30,1215.4,1200.05,1209,1203.75,3830992
160 | 2018-08-20 00:00:00+05:30,1236.8,1207.65,1207.9,1234.9,7607036
161 | 2018-08-21 00:00:00+05:30,1251.5,1227,1237.25,1247.2,7279641
162 | 2018-08-23 00:00:00+05:30,1274,1238.4,1246.3,1269.45,7968123
163 | 2018-08-24 00:00:00+05:30,1279.95,1260.05,1265.25,1277.5,6652005
164 | 2018-08-27 00:00:00+05:30,1295.85,1278.75,1279.4,1291.5,5572177
165 | 2018-08-28 00:00:00+05:30,1323,1295,1296,1319,8694751
166 | 2018-08-29 00:00:00+05:30,1329,1290.2,1316.3,1294.2,11077203
167 | 2018-08-30 00:00:00+05:30,1297.15,1270.45,1293.6,1274.45,9850042
168 | 2018-08-31 00:00:00+05:30,1270,1236.6,1270,1241.65,15305857
169 | 2018-09-03 00:00:00+05:30,1257.9,1226.05,1245,1229.15,12200022
170 | 2018-09-04 00:00:00+05:30,1254.8,1229,1235.15,1241.95,8838770
171 | 2018-09-05 00:00:00+05:30,1252.65,1208.5,1242.5,1227,10324923
172 | 2018-09-06 00:00:00+05:30,1265.7,1225.35,1230.15,1261.4,11195043
173 | 2018-09-07 00:00:00+05:30,1281.35,1254.5,1264.9,1278.6,10692787
174 | 2018-09-10 00:00:00+05:30,1277,1250.1,1275,1255.85,7059008
175 | 2018-09-11 00:00:00+05:30,1268.4,1234.25,1259,1237.7,10874768
176 | 2018-09-12 00:00:00+05:30,1258.7,1228.85,1251.8,1252.5,8065553
177 | 2018-09-14 00:00:00+05:30,1275.6,1247.65,1259.4,1253.15,7878583
178 | 2018-09-17 00:00:00+05:30,1250.75,1221.6,1250.75,1225.9,6754086
179 | 2018-09-18 00:00:00+05:30,1239.4,1214.35,1226,1217.15,7616412
180 | 2018-09-19 00:00:00+05:30,1224,1201,1224,1210.75,11265786
181 | 2018-09-21 00:00:00+05:30,1237.4,1183.5,1219,1217.5,19070172
182 | 2018-09-24 00:00:00+05:30,1239.9,1204.5,1214.45,1232.05,10102589
183 | 2018-09-25 00:00:00+05:30,1245,1195,1235.5,1230.6,9715020
184 | 2018-09-26 00:00:00+05:30,1253.95,1227.15,1239,1251.4,6396828
185 | 2018-09-27 00:00:00+05:30,1263,1237.5,1255,1253.75,10808749
186 | 2018-09-28 00:00:00+05:30,1271.7,1247.3,1259.9,1257.95,11073023
187 | 2018-10-01 00:00:00+05:30,1257.7,1204,1257.7,1231.7,7332693
188 | 2018-10-03 00:00:00+05:30,1239.6,1200.05,1229.5,1205.1,7376575
189 | 2018-10-04 00:00:00+05:30,1190.4,1106,1189.5,1122.25,23212443
190 | 2018-10-05 00:00:00+05:30,1115.3,1038.15,1099.5,1048.85,21943190
191 | 2018-10-08 00:00:00+05:30,1117,1025.55,1050,1109.4,24679428
192 | 2018-10-09 00:00:00+05:30,1123.3,1081.2,1121.65,1090.05,10991196
193 | 2018-10-10 00:00:00+05:30,1115.85,1084.3,1093.9,1102.1,9502176
194 | 2018-10-11 00:00:00+05:30,1114.75,1043,1064,1087.8,13224172
195 | 2018-10-12 00:00:00+05:30,1129,1101.9,1104.95,1126.55,9058536
196 | 2018-10-15 00:00:00+05:30,1146.9,1118.15,1129.9,1139.75,8871776
197 | 2018-10-16 00:00:00+05:30,1167.4,1140,1146,1163.8,7478601
198 | 2018-10-17 00:00:00+05:30,1179.35,1140,1179,1151.3,12785646
199 | 2018-10-19 00:00:00+05:30,1111,1070.25,1100,1101.3,25421229
200 | 2018-10-22 00:00:00+05:30,1114.8,1057.35,1110,1062.65,13165542
201 | 2018-10-23 00:00:00+05:30,1064.7,1043.65,1052.75,1054.7,10573730
202 | 2018-10-24 00:00:00+05:30,1075.95,1031.4,1074.9,1045.75,13655337
203 | 2018-10-25 00:00:00+05:30,1040.65,1016.4,1036,1030.8,14096237
204 | 2018-10-26 00:00:00+05:30,1057,1022,1032.9,1044.9,9182613
205 | 2018-10-29 00:00:00+05:30,1093.85,1046,1046,1087.8,7671807
206 | 2018-10-30 00:00:00+05:30,1085.5,1052.1,1085.5,1057,8806701
207 | 2018-10-31 00:00:00+05:30,1065.85,1047,1059,1061.25,16457691
208 | 2018-11-01 00:00:00+05:30,1067.9,1050,1067.9,1056.25,9396027
209 | 2018-11-02 00:00:00+05:30,1081,1063.15,1067,1074.9,16796200
210 | 2018-11-05 00:00:00+05:30,1092.7,1065,1073.45,1090.3,7582518
211 | 2018-11-06 00:00:00+05:30,1112,1091.6,1094,1104.1,8634266
212 | 2018-11-07 00:00:00+05:30,1115,1106.7,1115,1110.7,1283997
213 | 2018-11-09 00:00:00+05:30,1113.8,1085,1106.25,1093.45,10840462
214 | 2018-11-12 00:00:00+05:30,1098.8,1074.55,1097.7,1080,5801720
215 | 2018-11-13 00:00:00+05:30,1103,1070.05,1074,1099.45,6811598
216 | 2018-11-14 00:00:00+05:30,1112.65,1085.5,1105.05,1097.95,7787530
217 | 2018-11-15 00:00:00+05:30,1103,1086.5,1096.95,1096.85,6831740
218 | 2018-11-16 00:00:00+05:30,1129.95,1097.1,1097.95,1127.4,11031889
219 | 2018-11-19 00:00:00+05:30,1151.7,1131,1132.9,1150,7308981
220 | 2018-11-20 00:00:00+05:30,1155.9,1130,1145,1137.4,5719901
221 | 2018-11-21 00:00:00+05:30,1145.5,1107.05,1137.5,1112.9,7507812
222 | 2018-11-22 00:00:00+05:30,1125.35,1100.1,1114.5,1102.85,5792568
223 | 2018-11-26 00:00:00+05:30,1117.5,1097.4,1109,1109.75,6386556
224 | 2018-11-27 00:00:00+05:30,1130,1105.45,1107.9,1128.2,6810228
225 | 2018-11-28 00:00:00+05:30,1157.75,1126.3,1132.45,1152.65,8003964
226 | 2018-11-29 00:00:00+05:30,1179.9,1160,1161,1168.5,12636169
227 | 2018-11-30 00:00:00+05:30,1186,1162.3,1172.3,1167.55,10717448
228 | 2018-12-03 00:00:00+05:30,1174.9,1148.5,1174.9,1156.45,5569578
229 | 2018-12-04 00:00:00+05:30,1158,1142.2,1158,1152.3,5192825
230 | 2018-12-05 00:00:00+05:30,1158.8,1139.3,1143.75,1155.15,5819618
231 | 2018-12-06 00:00:00+05:30,1149,1118.25,1149,1123.65,5866108
232 | 2018-12-07 00:00:00+05:30,1136.95,1108.35,1129.45,1133.8,7214078
233 | 2018-12-10 00:00:00+05:30,1114.3,1083.1,1110,1090.25,9381544
234 | 2018-12-11 00:00:00+05:30,1102.1,1055,1066,1097.55,13494665
235 | 2018-12-12 00:00:00+05:30,1113.5,1087.75,1099,1110.5,12605431
236 | 2018-12-13 00:00:00+05:30,1128,1095.05,1128,1107.05,9297518
237 | 2018-12-14 00:00:00+05:30,1114.9,1091.8,1103.1,1112.2,7221324
238 | 2018-12-17 00:00:00+05:30,1133,1111.5,1115.95,1129.9,5064357
239 | 2018-12-18 00:00:00+05:30,1139,1120.5,1123.3,1136.4,5870928
240 | 2018-12-19 00:00:00+05:30,1147.3,1133.2,1141.5,1136.8,7368799
241 | 2018-12-20 00:00:00+05:30,1132.5,1116.15,1121,1128.45,5764731
242 | 2018-12-21 00:00:00+05:30,1131.25,1096,1125,1100.2,8281941
243 | 2018-12-24 00:00:00+05:30,1103.2,1086.55,1100.9,1089.15,4144535
244 | 2018-12-26 00:00:00+05:30,1100,1065.65,1085.8,1098.35,7210059
245 | 2018-12-27 00:00:00+05:30,1127,1104,1107.2,1120.2,12478000
246 | 2018-12-28 00:00:00+05:30,1135.25,1121.2,1125.5,1125.55,7666016
247 | 2018-12-31 00:00:00+05:30,1133.85,1116.2,1130.95,1121.25,7222817
--------------------------------------------------------------------------------
/data/sales.csv:
--------------------------------------------------------------------------------
1 | "Month","Sales"
2 | "2001-01",266.0
3 | "2001-02",145.9
4 | "2001-03",183.1
5 | "2001-04",119.3
6 | "2001-05",180.3
7 | "2001-06",168.5
8 | "2001-07",231.8
9 | "2001-08",224.5
10 | "2001-09",192.8
11 | "2001-10",122.9
12 | "2001-11",336.5
13 | "2001-12",185.9
14 | "2002-01",194.3
15 | "2002-02",149.5
16 | "2002-03",210.1
17 | "2002-04",273.3
18 | "2002-05",191.4
19 | "2002-06",287.0
20 | "2002-07",226.0
21 | "2002-08",303.6
22 | "2002-09",289.9
23 | "2002-10",421.6
24 | "2002-11",264.5
25 | "2002-12",342.3
26 | "2003-01",339.7
27 | "2003-02",440.4
28 | "2003-03",315.9
29 | "2003-04",439.3
30 | "2003-05",401.3
31 | "2003-06",437.4
32 | "2003-07",575.5
33 | "2003-08",407.6
34 | "2003-09",682.0
35 | "2003-10",475.3
36 | "2003-11",581.3
37 | "2003-12",646.9
--------------------------------------------------------------------------------
/data/sample.csv:
--------------------------------------------------------------------------------
1 | x,random
2 | 2018-11-01T0-10-1,-0.803908052
3 | 2018-11-02T3-45-3,1.586827482
4 | 2018-11-03T0-34-10,0.224111754
5 | 2018-11-04T11-3-4,-0.715936787
--------------------------------------------------------------------------------
/data/sample_2.csv:
--------------------------------------------------------------------------------
1 | year,month,day,random
2 | 2018,11,1,-0.803908052
3 | 2018,11,2,1.586827482
4 | 2018,11,3,0.224111754
5 | 2018,11,4,-0.715936787
6 | 2018,11,5,0.010597412
7 | 2018,11,6,0.782264074
8 | 2018,11,7,-1.046751639
9 | 2018,11,8,0.825305985
10 | 2018,11,9,-0.658712868
11 | 2018,11,10,0.226168727
12 | 2018,11,11,-0.569545596
13 | 2018,11,12,1.591374293
14 | 2018,11,13,0.470490328
15 | 2018,11,14,-0.363814613
16 | 2018,11,15,-1.70058898
17 | 2018,11,16,0.994420266
18 | 2018,11,17,0.26923845
19 | 2018,11,18,-0.946182168
20 | 2018,11,19,0.753910186
21 | 2018,11,20,1.895683364
22 | 2018,11,21,-1.653269029
23 | 2018,11,22,1.398829468
24 | 2018,11,23,0.064970122
25 | 2018,11,24,-0.579068492
26 | 2018,11,25,0.761552316
27 | 2018,11,26,-0.859265507
28 | 2018,11,27,-0.544670526
29 | 2018,11,28,0.020763882
30 | 2018,11,29,-0.266145079
31 | 2018,11,30,1.298599805
--------------------------------------------------------------------------------
/data/yesbank_data_day.csv:
--------------------------------------------------------------------------------
1 | date,high,low,open,close,volume
2 | 2018-01-01 00:00:00+05:30,317.75,311.3,315.5,312.6,4019878
3 | 2018-01-02 00:00:00+05:30,314,307.15,313.4,311.65,5224976
4 | 2018-01-03 00:00:00+05:30,316.5,311.15,312,315.85,5672263
5 | 2018-01-04 00:00:00+05:30,318.4,313,316,317.1,5667580
6 | 2018-01-05 00:00:00+05:30,337.9,317.45,317.5,332.85,30720675
7 | 2018-01-08 00:00:00+05:30,341.3,331.3,336,333.6,12747890
8 | 2018-01-09 00:00:00+05:30,342.8,327.55,334.9,341.35,13282560
9 | 2018-01-10 00:00:00+05:30,342.35,335.45,341.5,339.8,10385044
10 | 2018-01-11 00:00:00+05:30,344.25,335.3,339,343.15,8266266
11 | 2018-01-12 00:00:00+05:30,344.7,337.55,344.1,340.9,5688676
12 | 2018-01-15 00:00:00+05:30,343.7,335.1,341.9,336,7142164
13 | 2018-01-16 00:00:00+05:30,338.75,328,336,334.85,7296505
14 | 2018-01-17 00:00:00+05:30,343.5,331.4,335.1,342.4,7985222
15 | 2018-01-18 00:00:00+05:30,356.9,332.35,350,341.2,35465087
16 | 2018-01-19 00:00:00+05:30,352.25,339.1,347.5,349.35,21425789
17 | 2018-01-22 00:00:00+05:30,358.25,348.75,349.95,355.35,13456538
18 | 2018-01-23 00:00:00+05:30,360.4,352.3,359.85,359.55,10196645
19 | 2018-01-24 00:00:00+05:30,366.3,356,357,364.8,11258771
20 | 2018-01-25 00:00:00+05:30,364.5,355.65,364.5,361.6,8963188
21 | 2018-01-29 00:00:00+05:30,363.7,355.55,361.2,358,7931235
22 | 2018-01-30 00:00:00+05:30,360.8,351.85,358,353.35,7890491
23 | 2018-01-31 00:00:00+05:30,356.55,350.45,353,354.4,8527044
24 | 2018-02-01 00:00:00+05:30,367.25,352.65,355,359.9,15217926
25 | 2018-02-02 00:00:00+05:30,356,341.8,354.2,349.05,16298953
26 | 2018-02-05 00:00:00+05:30,349,333.6,340,343.6,13407059
27 | 2018-02-06 00:00:00+05:30,342.9,324,325,338.75,12557261
28 | 2018-02-07 00:00:00+05:30,344,330.6,344,332.9,11681640
29 | 2018-02-08 00:00:00+05:30,340.35,331.5,332.9,335,7785799
30 | 2018-02-09 00:00:00+05:30,331.45,324,330,325.55,9395513
31 | 2018-02-12 00:00:00+05:30,337.2,326.6,326.6,335.4,12049356
32 | 2018-02-14 00:00:00+05:30,337.85,318.95,336,320.35,13548524
33 | 2018-02-15 00:00:00+05:30,328.8,317.7,321.2,319.8,15482667
34 | 2018-02-16 00:00:00+05:30,325,309.65,324,311.8,18611798
35 | 2018-02-19 00:00:00+05:30,315,307.55,313.85,312.05,9311433
36 | 2018-02-20 00:00:00+05:30,316.1,307.5,314.35,308.7,11389041
37 | 2018-02-21 00:00:00+05:30,313.9,304.5,311,312.35,13342678
38 | 2018-02-22 00:00:00+05:30,316.9,308.15,311.9,316.1,14787238
39 | 2018-02-23 00:00:00+05:30,326,316.45,316.45,323.45,11399732
40 | 2018-02-26 00:00:00+05:30,328.2,319.35,326.5,326.15,9225197
41 | 2018-02-27 00:00:00+05:30,334.25,325,325.3,327.15,15104405
42 | 2018-02-28 00:00:00+05:30,325.2,318.05,323,322.3,10985771
43 | 2018-03-01 00:00:00+05:30,326,318.35,322.1,321.05,7333939
44 | 2018-03-05 00:00:00+05:30,319,311.15,318.5,312.95,7973146
45 | 2018-03-06 00:00:00+05:30,320.5,309.85,317,312.15,8299126
46 | 2018-03-07 00:00:00+05:30,314.9,308,312,311.95,8420190
47 | 2018-03-08 00:00:00+05:30,313.25,294.7,312.3,308.55,24027679
48 | 2018-03-09 00:00:00+05:30,310.95,301.3,310,303.25,11596136
49 | 2018-03-12 00:00:00+05:30,314.5,301.35,305.2,311.15,12860205
50 | 2018-03-13 00:00:00+05:30,315.4,309.8,310,312.8,11436348
51 | 2018-03-14 00:00:00+05:30,321.9,308.2,311.8,318.85,12905495
52 | 2018-03-15 00:00:00+05:30,321,310.85,318.95,311.85,9999620
53 | 2018-03-16 00:00:00+05:30,316.85,310,312.4,312.9,17094363
54 | 2018-03-19 00:00:00+05:30,316.5,302.5,316,304.8,11267355
55 | 2018-03-20 00:00:00+05:30,305.8,299.7,303,302.4,9874137
56 | 2018-03-21 00:00:00+05:30,309.05,300.05,305,300.75,13383435
57 | 2018-03-22 00:00:00+05:30,303.7,295.75,301.8,298.25,19356591
58 | 2018-03-23 00:00:00+05:30,293.05,285,293,286.65,21617995
59 | 2018-03-26 00:00:00+05:30,304.9,286,286.5,303.35,24240033
60 | 2018-03-27 00:00:00+05:30,309.25,300.7,307,303.5,15267419
61 | 2018-03-28 00:00:00+05:30,307.5,299.1,300.15,304.85,14952643
62 | 2018-04-02 00:00:00+05:30,308.5,303.6,307,306.25,8844027
63 | 2018-04-03 00:00:00+05:30,314.2,304,306.45,313.1,10392997
64 | 2018-04-04 00:00:00+05:30,320,304,313,305.45,13516525
65 | 2018-04-05 00:00:00+05:30,315.6,307.8,311.8,313.05,11449637
66 | 2018-04-06 00:00:00+05:30,316,311.6,314.7,314.5,8598435
67 | 2018-04-09 00:00:00+05:30,319.9,314.4,316,316.25,14002415
68 | 2018-04-10 00:00:00+05:30,320.5,315,320.1,319.25,9976629
69 | 2018-04-11 00:00:00+05:30,320,310.5,320,312,9921987
70 | 2018-04-12 00:00:00+05:30,313.35,308,312.2,311.75,11359462
71 | 2018-04-13 00:00:00+05:30,314.45,305.7,312.6,309.55,12849690
72 | 2018-04-16 00:00:00+05:30,311.1,306.45,307,310.35,10808237
73 | 2018-04-17 00:00:00+05:30,313.35,307.25,311.9,307.9,12509310
74 | 2018-04-18 00:00:00+05:30,313.7,308.75,311,309.55,11323144
75 | 2018-04-19 00:00:00+05:30,319.5,309,311,318.5,13498117
76 | 2018-04-20 00:00:00+05:30,317.7,306.05,317.7,308.55,21800867
77 | 2018-04-23 00:00:00+05:30,317,309,310.9,313.05,10927239
78 | 2018-04-24 00:00:00+05:30,328.8,314,314,324,24392058
79 | 2018-04-25 00:00:00+05:30,328.5,317.45,323.75,325.25,15370686
80 | 2018-04-26 00:00:00+05:30,360.25,323.6,327.95,352.4,62364794
81 | 2018-04-27 00:00:00+05:30,369,345.1,365,348.9,52230752
82 | 2018-04-30 00:00:00+05:30,367.2,350.3,350.9,362,28400085
83 | 2018-05-02 00:00:00+05:30,364.7,351.7,361.95,354.25,13058846
84 | 2018-05-03 00:00:00+05:30,359.25,346.5,354,353.95,15431044
85 | 2018-05-04 00:00:00+05:30,357.75,344.2,354.5,345.1,12241946
86 | 2018-05-07 00:00:00+05:30,350,342.65,347.8,347.9,6781161
87 | 2018-05-08 00:00:00+05:30,353.95,340.5,350,343.3,11670852
88 | 2018-05-09 00:00:00+05:30,349.75,343.2,344.45,347.85,9802803
89 | 2018-05-10 00:00:00+05:30,350.95,342.65,350.95,344.65,7330619
90 | 2018-05-11 00:00:00+05:30,351.8,341.8,346.5,350.25,7642090
91 | 2018-05-14 00:00:00+05:30,353.8,343.1,350.8,344.8,7468232
92 | 2018-05-15 00:00:00+05:30,354.6,343.6,346,345.75,12805393
93 | 2018-05-16 00:00:00+05:30,356.45,342,344,349.75,12708109
94 | 2018-05-17 00:00:00+05:30,352,344,351.5,345.3,6702419
95 | 2018-05-18 00:00:00+05:30,348.8,343.1,346.5,345.5,8938377
96 | 2018-05-21 00:00:00+05:30,349.2,333.05,346,334.15,8748958
97 | 2018-05-22 00:00:00+05:30,340.75,331,334.1,334.7,10869535
98 | 2018-05-23 00:00:00+05:30,338.75,331.5,335,333.7,7738850
99 | 2018-05-24 00:00:00+05:30,337,325.6,336.7,330,8077461
100 | 2018-05-25 00:00:00+05:30,341.9,332,333,339.4,6957024
101 | 2018-05-28 00:00:00+05:30,348.25,340.75,341.5,345.2,8982939
102 | 2018-05-29 00:00:00+05:30,345.9,335.1,344.4,338.05,7870400
103 | 2018-05-30 00:00:00+05:30,344,332.65,335.75,343.15,10036644
104 | 2018-05-31 00:00:00+05:30,349.4,338,345.4,346.2,17910411
105 | 2018-06-01 00:00:00+05:30,348.7,340.25,347,342.7,9470256
106 | 2018-06-04 00:00:00+05:30,350.9,342,343.9,342.95,14501600
107 | 2018-06-05 00:00:00+05:30,342.75,333.65,342.5,336.75,8492155
108 | 2018-06-06 00:00:00+05:30,340.55,332.55,337.95,339.15,8211104
109 | 2018-06-07 00:00:00+05:30,345.5,338,341.2,339.55,9630695
110 | 2018-06-08 00:00:00+05:30,340.25,335.05,337.5,337.65,5851442
111 | 2018-06-11 00:00:00+05:30,344.85,334.7,338.6,335.7,11980621
112 | 2018-06-12 00:00:00+05:30,337.95,331.2,335.7,332.2,9851496
113 | 2018-06-13 00:00:00+05:30,336.9,331.5,336.9,333.2,7530350
114 | 2018-06-14 00:00:00+05:30,338.35,330.55,333,337.2,6819965
115 | 2018-06-15 00:00:00+05:30,336.2,328.65,333,330.65,8518379
116 | 2018-06-18 00:00:00+05:30,333.85,327.7,330.8,331.65,6785348
117 | 2018-06-19 00:00:00+05:30,334.5,328.5,330.95,330.9,8566372
118 | 2018-06-20 00:00:00+05:30,337,331.4,331.9,336.1,7720519
119 | 2018-06-21 00:00:00+05:30,337.6,331.2,336.8,332.95,5133865
120 | 2018-06-22 00:00:00+05:30,335.7,329.5,333.6,335.1,6006046
121 | 2018-06-25 00:00:00+05:30,338.95,332.9,334.75,334.15,5792052
122 | 2018-06-26 00:00:00+05:30,337.75,332.05,333.1,335.95,4706265
123 | 2018-06-27 00:00:00+05:30,337.75,331.05,336,335.6,6930662
124 | 2018-06-28 00:00:00+05:30,336.4,327.3,336.4,329.3,8130250
125 | 2018-06-29 00:00:00+05:30,341.75,329.65,330.7,339.65,8963268
126 | 2018-07-02 00:00:00+05:30,341.7,332.35,338.05,337.05,8091469
127 | 2018-07-03 00:00:00+05:30,340.5,335.3,337.7,336.9,4998778
128 | 2018-07-04 00:00:00+05:30,337.5,332.5,337,335.55,4507001
129 | 2018-07-05 00:00:00+05:30,350.8,338.6,338.75,348.65,21239763
130 | 2018-07-06 00:00:00+05:30,357.7,347,348.3,352.6,17790500
131 | 2018-07-09 00:00:00+05:30,366.9,355,356.1,363.3,16834621
132 | 2018-07-10 00:00:00+05:30,373.1,360,363.9,371.4,20404168
133 | 2018-07-11 00:00:00+05:30,373.35,367.45,370.95,371.6,9423127
134 | 2018-07-12 00:00:00+05:30,385,372.5,372.5,374.8,17868601
135 | 2018-07-13 00:00:00+05:30,380.3,367.3,376.9,376,15448753
136 | 2018-07-16 00:00:00+05:30,380.8,369.6,376,378.5,9093938
137 | 2018-07-17 00:00:00+05:30,385.9,375.6,377.9,380.8,12586691
138 | 2018-07-18 00:00:00+05:30,388,378,381.9,383.65,10472260
139 | 2018-07-19 00:00:00+05:30,394.35,379.4,380.95,392.3,17050256
140 | 2018-07-20 00:00:00+05:30,392.5,385.3,391.1,386.5,10076070
141 | 2018-07-23 00:00:00+05:30,392.35,382.75,387.9,386.6,9511403
142 | 2018-07-24 00:00:00+05:30,390,382.7,389.2,384.35,9785989
143 | 2018-07-25 00:00:00+05:30,392.7,381.5,386,382.9,12467278
144 | 2018-07-26 00:00:00+05:30,386.3,358.55,385,369.75,35251243
145 | 2018-07-27 00:00:00+05:30,374.9,362.4,374.9,370,17100821
146 | 2018-07-30 00:00:00+05:30,373,365.25,372,369.3,11469162
147 | 2018-07-31 00:00:00+05:30,374.7,366.4,370,367.95,9734450
148 | 2018-08-01 00:00:00+05:30,371.55,362,369.9,365.85,7742871
149 | 2018-08-02 00:00:00+05:30,365.65,356.65,364.95,361.8,8365895
150 | 2018-08-03 00:00:00+05:30,374,364,364,373.2,10625834
151 | 2018-08-06 00:00:00+05:30,380.7,373.45,374,377.9,12018227
152 | 2018-08-07 00:00:00+05:30,386.4,376.5,380,381.65,13526041
153 | 2018-08-08 00:00:00+05:30,385.75,379.55,382.6,382.75,6767532
154 | 2018-08-09 00:00:00+05:30,384.7,377.65,383.5,381.05,5854134
155 | 2018-08-10 00:00:00+05:30,391,381,382,382.85,13916077
156 | 2018-08-13 00:00:00+05:30,383,368.3,379,370.7,9205915
157 | 2018-08-14 00:00:00+05:30,384.3,368.15,369,382.5,12351125
158 | 2018-08-16 00:00:00+05:30,384.85,374.2,381.2,378.45,11567658
159 | 2018-08-17 00:00:00+05:30,395.65,380.3,382.4,393.2,16094026
160 | 2018-08-20 00:00:00+05:30,404,392.55,396.8,394,15247130
161 | 2018-08-21 00:00:00+05:30,397.45,389.5,396,391.35,9991245
162 | 2018-08-23 00:00:00+05:30,395.5,383.35,395.25,388.6,9262319
163 | 2018-08-24 00:00:00+05:30,388.7,372.85,387,374.2,16301075
164 | 2018-08-27 00:00:00+05:30,384.65,377,378,383,9563369
165 | 2018-08-28 00:00:00+05:30,386,362.8,386,370.6,26309238
166 | 2018-08-29 00:00:00+05:30,378.55,362,373.65,365.15,30544751
167 | 2018-08-30 00:00:00+05:30,369,358,369,361.75,28157288
168 | 2018-08-31 00:00:00+05:30,350,336.25,343,343.5,84019022
169 | 2018-09-03 00:00:00+05:30,348,337.2,347.95,339.05,26137760
170 | 2018-09-04 00:00:00+05:30,343.4,332.55,340.75,334.05,22057643
171 | 2018-09-05 00:00:00+05:30,344.9,332.25,332.9,343.8,24809578
172 | 2018-09-06 00:00:00+05:30,347.8,337.9,346.55,339.2,17527530
173 | 2018-09-07 00:00:00+05:30,341.05,321.8,341,323.4,48830425
174 | 2018-09-10 00:00:00+05:30,328.9,316,323.65,323.65,43295622
175 | 2018-09-11 00:00:00+05:30,327.85,314.6,326,316.6,28553718
176 | 2018-09-12 00:00:00+05:30,319.95,310.6,318,314.3,26121445
177 | 2018-09-14 00:00:00+05:30,328.5,316.7,318.5,323.1,36624891
178 | 2018-09-17 00:00:00+05:30,321.85,315.95,317.5,318.6,18554055
179 | 2018-09-18 00:00:00+05:30,328.95,319.1,319.35,323.55,30243505
180 | 2018-09-19 00:00:00+05:30,328.75,318.05,326,319.2,16185496
181 | 2018-09-21 00:00:00+05:30,287.3,218.1,287.3,226.5,293552756
182 | 2018-09-24 00:00:00+05:30,237.5,215.85,236.5,226.4,105701372
183 | 2018-09-25 00:00:00+05:30,238.85,197.25,230,219.7,110217421
184 | 2018-09-26 00:00:00+05:30,229.8,217,223,223.75,62814786
185 | 2018-09-27 00:00:00+05:30,227,202.05,226,203.25,91197198
186 | 2018-09-28 00:00:00+05:30,204,165,203.6,183.65,148654874
187 | 2018-10-01 00:00:00+05:30,203.8,170.6,180,200.85,130992623
188 | 2018-10-03 00:00:00+05:30,222.95,202.25,205,212.75,97945695
189 | 2018-10-04 00:00:00+05:30,220,205.05,209.4,215,65604016
190 | 2018-10-05 00:00:00+05:30,218.5,203.1,215,206,40546788
191 | 2018-10-08 00:00:00+05:30,226.6,207.4,210,221.2,62349640
192 | 2018-10-09 00:00:00+05:30,233.8,222.6,225.5,224.65,49480210
193 | 2018-10-10 00:00:00+05:30,237,219.4,229,233.9,52351177
194 | 2018-10-11 00:00:00+05:30,268.95,216.65,222,240.2,94679663
195 | 2018-10-12 00:00:00+05:30,254.4,244.5,247,246.45,43535321
196 | 2018-10-15 00:00:00+05:30,250.6,241.55,248.95,246,24694397
197 | 2018-10-16 00:00:00+05:30,251.4,245.6,247,248.9,18099074
198 | 2018-10-17 00:00:00+05:30,254.85,228.5,252.5,231.9,37767849
199 | 2018-10-19 00:00:00+05:30,230.55,213,225,217.9,50336402
200 | 2018-10-22 00:00:00+05:30,222.35,208.5,221,211.5,33633639
201 | 2018-10-23 00:00:00+05:30,216.7,206.5,209,213.2,28293781
202 | 2018-10-24 00:00:00+05:30,220,202,217.5,204,38009322
203 | 2018-10-25 00:00:00+05:30,207.5,195,202.6,198.35,59170164
204 | 2018-10-26 00:00:00+05:30,191.65,168.6,178.55,180.7,95760078
205 | 2018-10-29 00:00:00+05:30,185.9,174.75,185,181.3,52331613
206 | 2018-10-30 00:00:00+05:30,189.9,180.65,182,182.05,43428186
207 | 2018-10-31 00:00:00+05:30,189.5,179.65,185,188.1,53264490
208 | 2018-11-01 00:00:00+05:30,205.75,189.05,192,204.05,69202992
209 | 2018-11-02 00:00:00+05:30,215.9,207.65,212.75,209.1,51686587
210 | 2018-11-05 00:00:00+05:30,212.35,206.1,209,210.1,21134350
211 | 2018-11-06 00:00:00+05:30,217.9,211,212.25,214.45,26924066
212 | 2018-11-07 00:00:00+05:30,218,215.5,217.45,215.95,3962712
213 | 2018-11-09 00:00:00+05:30,229.4,213.2,214,227.9,40754087
214 | 2018-11-12 00:00:00+05:30,230.85,221.1,230,223.05,29069172
215 | 2018-11-13 00:00:00+05:30,226.85,220,222,225.45,19014119
216 | 2018-11-14 00:00:00+05:30,229,221,226.9,222.4,19780790
217 | 2018-11-15 00:00:00+05:30,217.95,202.25,215,205.85,60944632
218 | 2018-11-16 00:00:00+05:30,201.75,187.8,201.75,191,88101871
219 | 2018-11-19 00:00:00+05:30,205.95,194.35,198.1,204.8,64329717
220 | 2018-11-20 00:00:00+05:30,199.75,191,198,192.1,77354754
221 | 2018-11-21 00:00:00+05:30,200.8,194.6,196.1,198.15,47627190
222 | 2018-11-22 00:00:00+05:30,203.85,194.65,200,195.55,44350629
223 | 2018-11-26 00:00:00+05:30,191.95,182.2,189,187.9,60418383
224 | 2018-11-27 00:00:00+05:30,193.8,180.35,193,182.65,70065939
225 | 2018-11-28 00:00:00+05:30,181,160.3,180.4,162.1,140720495
226 | 2018-11-29 00:00:00+05:30,167,146.75,157.05,160.45,292043464
227 | 2018-11-30 00:00:00+05:30,171.35,160.45,164,169.8,120153166
228 | 2018-12-03 00:00:00+05:30,179.5,165.3,174,178,104720593
229 | 2018-12-04 00:00:00+05:30,182.45,175.25,180.35,176.5,68038860
230 | 2018-12-05 00:00:00+05:30,178.45,172.2,173.5,173.4,52289539
231 | 2018-12-06 00:00:00+05:30,172.5,167.35,169,168.45,38867660
232 | 2018-12-07 00:00:00+05:30,172,162,170,166.2,49750931
233 | 2018-12-10 00:00:00+05:30,169.5,160,162,165.65,50379613
234 | 2018-12-11 00:00:00+05:30,179.85,163.8,164.5,177.85,85103084
235 | 2018-12-12 00:00:00+05:30,187.5,181.55,182,186.6,71832334
236 | 2018-12-13 00:00:00+05:30,193.2,172.3,193,174.7,108920029
237 | 2018-12-14 00:00:00+05:30,181.6,168.25,172.7,180.35,79566067
238 | 2018-12-17 00:00:00+05:30,184.45,179.45,183.25,181,35269769
239 | 2018-12-18 00:00:00+05:30,180.9,176.75,180,179,35729865
240 | 2018-12-19 00:00:00+05:30,183.15,178.6,180.5,179.55,40247602
241 | 2018-12-20 00:00:00+05:30,187.6,178,179,186.75,61970822
242 | 2018-12-21 00:00:00+05:30,188.45,180.85,188,182.95,40864241
243 | 2018-12-24 00:00:00+05:30,185.5,181.25,184,182.3,32861722
244 | 2018-12-26 00:00:00+05:30,182.65,174.25,182.5,179.45,47421246
245 | 2018-12-27 00:00:00+05:30,183.45,177.05,183,178.1,44303877
246 | 2018-12-28 00:00:00+05:30,183.3,180,180,181.45,27142501
247 | 2018-12-31 00:00:00+05:30,183.85,181,183.2,181.8,18970865
--------------------------------------------------------------------------------
/images/pandas_dtypes.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/poornagurram/TimeSeriesAnalysis_ODSC_2019/562b5b1189d6d5c93f8b1c89fb8ecbc42350024b/images/pandas_dtypes.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jupyter
2 | pandas
3 | matplotlib
4 | statsmodels
5 | keras
6 | sklearn
7 | seaborn
8 | xgboost
9 | arch
10 |
--------------------------------------------------------------------------------