├── Demo.gif
├── Preprocessed_data
└── final_data.csv
├── README.md
├── app.py
├── forecast.ipynb
├── forecast_CNN.ipynb
├── forecast_LSTM.ipynb
├── model
├── arima_model.pkl
├── arma_model.pkl
├── auto_arima_model.pkl
├── exp_smoothing_model.pkl
└── prophet_model.pkl
├── req.txt
├── static
├── arima_plot_1591687174.918447.png
├── arma_plot_1591687215.727288.png
├── auto_arima_plot_1591687207.990299.png
├── exp_plot_1591687185.701119.png
├── plot.png
├── prophet_plot_1591687195.249861.png
└── style.css
└── templates
└── index.html
/Demo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/Demo.gif
--------------------------------------------------------------------------------
/Preprocessed_data/final_data.csv:
--------------------------------------------------------------------------------
1 | Date,Views
2 | 2015-07-01,20.38
3 | 2015-07-02,20.75
4 | 2015-07-03,19.57
5 | 2015-07-04,20.44
6 | 2015-07-05,20.77
7 | 2015-07-06,22.54
8 | 2015-07-07,21.21
9 | 2015-07-08,19.11
10 | 2015-07-09,19.99
11 | 2015-07-10,20.2
12 | 2015-07-11,19.7
13 | 2015-07-12,20.23
14 | 2015-07-13,20.52
15 | 2015-07-14,19.7
16 | 2015-07-15,19.56
17 | 2015-07-16,19.66
18 | 2015-07-17,18.99
19 | 2015-07-18,18.36
20 | 2015-07-19,18.83
21 | 2015-07-20,20.34
22 | 2015-07-21,19.74
23 | 2015-07-22,19.54
24 | 2015-07-23,19.62
25 | 2015-07-24,18.64
26 | 2015-07-25,19.32
27 | 2015-07-26,20.4
28 | 2015-07-27,20.16
29 | 2015-07-28,19.89
30 | 2015-07-29,27.56
31 | 2015-07-30,18.5
32 | 2015-07-31,17.93
33 | 2015-08-01,18.18
34 | 2015-08-02,19.41
35 | 2015-08-03,18.95
36 | 2015-08-04,20.53
37 | 2015-08-05,20.25
38 | 2015-08-06,20.06
39 | 2015-08-07,19.12
40 | 2015-08-08,19.44
41 | 2015-08-09,19.71
42 | 2015-08-10,19.85
43 | 2015-08-11,18.37
44 | 2015-08-12,19.39
45 | 2015-08-13,19.19
46 | 2015-08-14,19.45
47 | 2015-08-15,19.31
48 | 2015-08-16,19.68
49 | 2015-08-17,20.31
50 | 2015-08-18,20.08
51 | 2015-08-19,18.9
52 | 2015-08-20,19.09
53 | 2015-08-21,18.9
54 | 2015-08-22,18.8
55 | 2015-08-23,19.89
56 | 2015-08-24,20.21
57 | 2015-08-25,18.56
58 | 2015-08-26,18.09
59 | 2015-08-27,17.86
60 | 2015-08-28,17.29
61 | 2015-08-29,17.84
62 | 2015-08-30,18.2
63 | 2015-08-31,20.55
64 | 2015-09-01,18.08
65 | 2015-09-02,18.42
66 | 2015-09-03,17.85
67 | 2015-09-04,17.48
68 | 2015-09-05,17.9
69 | 2015-09-06,19.0
70 | 2015-09-07,19.74
71 | 2015-09-08,19.65
72 | 2015-09-09,18.61
73 | 2015-09-10,18.07
74 | 2015-09-11,18.56
75 | 2015-09-12,18.93
76 | 2015-09-13,19.06
77 | 2015-09-14,19.22
78 | 2015-09-15,19.27
79 | 2015-09-16,18.23
80 | 2015-09-17,18.2
81 | 2015-09-18,17.71
82 | 2015-09-19,17.85
83 | 2015-09-20,19.61
84 | 2015-09-21,20.8
85 | 2015-09-22,18.83
86 | 2015-09-23,19.59
87 | 2015-09-24,17.95
88 | 2015-09-25,17.81
89 | 2015-09-26,18.1
90 | 2015-09-27,18.7
91 | 2015-09-28,20.31
92 | 2015-09-29,17.54
93 | 2015-09-30,19.7
94 | 2015-10-01,18.84
95 | 2015-10-02,18.54
96 | 2015-10-03,18.3
97 | 2015-10-04,18.94
98 | 2015-10-05,19.28
99 | 2015-10-06,18.27
100 | 2015-10-07,18.84
101 | 2015-10-08,19.05
102 | 2015-10-09,18.44
103 | 2015-10-10,18.94
104 | 2015-10-11,19.12
105 | 2015-10-12,20.29
106 | 2015-10-13,19.56
107 | 2015-10-14,18.8
108 | 2015-10-15,18.82
109 | 2015-10-16,17.87
110 | 2015-10-17,18.04
111 | 2015-10-18,19.03
112 | 2015-10-19,21.35
113 | 2015-10-20,20.67
114 | 2015-10-21,21.55
115 | 2015-10-22,21.47
116 | 2015-10-23,18.92
117 | 2015-10-24,17.53
118 | 2015-10-25,20.1
119 | 2015-10-26,20.55
120 | 2015-10-27,20.89
121 | 2015-10-28,20.54
122 | 2015-10-29,18.82
123 | 2015-10-30,18.18
124 | 2015-10-31,18.12
125 | 2015-11-01,17.97
126 | 2015-11-02,21.33
127 | 2015-11-03,21.05
128 | 2015-11-04,20.02
129 | 2015-11-05,19.78
130 | 2015-11-06,18.06
131 | 2015-11-07,17.54
132 | 2015-11-08,18.54
133 | 2015-11-09,20.78
134 | 2015-11-10,19.63
135 | 2015-11-11,19.2
136 | 2015-11-12,18.8
137 | 2015-11-13,18.12
138 | 2015-11-14,18.07
139 | 2015-11-15,19.12
140 | 2015-11-16,20.88
141 | 2015-11-17,19.79
142 | 2015-11-18,19.89
143 | 2015-11-19,19.15
144 | 2015-11-20,17.57
145 | 2015-11-21,18.95
146 | 2015-11-22,19.46
147 | 2015-11-23,20.56
148 | 2015-11-24,19.45
149 | 2015-11-25,18.82
150 | 2015-11-26,18.14
151 | 2015-11-27,17.9
152 | 2015-11-28,19.15
153 | 2015-11-29,19.28
154 | 2015-11-30,21.37
155 | 2015-12-01,20.1
156 | 2015-12-02,18.21
157 | 2015-12-03,18.57
158 | 2015-12-04,17.96
159 | 2015-12-05,18.17
160 | 2015-12-06,18.41
161 | 2015-12-07,21.11
162 | 2015-12-08,20.08
163 | 2015-12-09,18.03
164 | 2015-12-10,16.57
165 | 2015-12-11,17.55
166 | 2015-12-12,16.26
167 | 2015-12-13,18.34
168 | 2015-12-14,19.25
169 | 2015-12-15,17.27
170 | 2015-12-16,17.62
171 | 2015-12-17,16.54
172 | 2015-12-18,16.28
173 | 2015-12-19,16.65
174 | 2015-12-20,16.68
175 | 2015-12-21,17.8
176 | 2015-12-22,17.44
177 | 2015-12-23,17.78
178 | 2015-12-24,17.21
179 | 2015-12-25,15.98
180 | 2015-12-26,17.89
181 | 2015-12-27,17.4
182 | 2015-12-28,18.94
183 | 2015-12-29,17.57
184 | 2015-12-30,17.38
185 | 2015-12-31,16.96
186 | 2016-01-01,16.36
187 | 2016-01-02,17.61
188 | 2016-01-03,17.54
189 | 2016-01-04,19.23
190 | 2016-01-05,18.14
191 | 2016-01-06,18.38
192 | 2016-01-07,17.28
193 | 2016-01-08,16.44
194 | 2016-01-09,16.44
195 | 2016-01-10,17.28
196 | 2016-01-11,19.1
197 | 2016-01-12,18.45
198 | 2016-01-13,17.2
199 | 2016-01-14,18.14
200 | 2016-01-15,17.46
201 | 2016-01-16,18.22
202 | 2016-01-17,18.7
203 | 2016-01-18,19.39
204 | 2016-01-19,19.66
205 | 2016-01-20,18.72
206 | 2016-01-21,18.93
207 | 2016-01-22,16.9
208 | 2016-01-23,17.53
209 | 2016-01-24,17.62
210 | 2016-01-25,20.1
211 | 2016-01-26,18.64
212 | 2016-01-27,19.26
213 | 2016-01-28,17.27
214 | 2016-01-29,17.16
215 | 2016-01-30,17.09
216 | 2016-01-31,18.36
217 | 2016-02-01,20.28
218 | 2016-02-02,18.92
219 | 2016-02-03,18.86
220 | 2016-02-04,18.48
221 | 2016-02-05,17.84
222 | 2016-02-06,18.59
223 | 2016-02-07,19.31
224 | 2016-02-08,20.74
225 | 2016-02-09,20.4
226 | 2016-02-10,17.8
227 | 2016-02-11,15.99
228 | 2016-02-12,16.62
229 | 2016-02-13,17.48
230 | 2016-02-14,18.73
231 | 2016-02-15,20.03
232 | 2016-02-16,20.58
233 | 2016-02-17,20.48
234 | 2016-02-18,18.84
235 | 2016-02-19,17.77
236 | 2016-02-20,17.82
237 | 2016-02-21,18.61
238 | 2016-02-22,20.42
239 | 2016-02-23,19.12
240 | 2016-02-24,18.96
241 | 2016-02-25,19.34
242 | 2016-02-26,18.11
243 | 2016-02-27,18.72
244 | 2016-02-28,19.8
245 | 2016-02-29,21.58
246 | 2016-03-01,19.91
247 | 2016-03-02,19.66
248 | 2016-03-03,21.09
249 | 2016-03-04,17.95
250 | 2016-03-05,19.49
251 | 2016-03-06,18.44
252 | 2016-03-07,19.34
253 | 2016-03-08,18.44
254 | 2016-03-09,18.14
255 | 2016-03-10,17.28
256 | 2016-03-11,15.68
257 | 2016-03-12,16.38
258 | 2016-03-13,14.68
259 | 2016-03-14,18.62
260 | 2016-03-15,16.79
261 | 2016-03-16,16.29
262 | 2016-03-17,16.04
263 | 2016-03-18,15.87
264 | 2016-03-19,14.5
265 | 2016-03-20,18.17482758620689
266 | 2016-03-21,15.32
267 | 2016-03-22,18.069999999999993
268 | 2016-03-23,15.11
269 | 2016-03-24,14.59
270 | 2016-03-25,17.672592592592586
271 | 2016-03-26,14.59
272 | 2016-03-27,15.33
273 | 2016-03-28,17.45
274 | 2016-03-29,15.72
275 | 2016-03-30,15.31
276 | 2016-03-31,14.51
277 | 2016-04-01,16.65192307692307
278 | 2016-04-02,15.13
279 | 2016-04-03,16.17
280 | 2016-04-04,18.15
281 | 2016-04-05,17.0
282 | 2016-04-06,17.03
283 | 2016-04-07,16.82
284 | 2016-04-08,16.18
285 | 2016-04-09,17.17
286 | 2016-04-10,17.37
287 | 2016-04-11,19.21
288 | 2016-04-12,18.44
289 | 2016-04-13,17.13
290 | 2016-04-14,17.17
291 | 2016-04-15,16.61
292 | 2016-04-16,17.18
293 | 2016-04-17,17.53
294 | 2016-04-18,20.15
295 | 2016-04-19,18.55
296 | 2016-04-20,17.68
297 | 2016-04-21,15.7
298 | 2016-04-22,15.58
299 | 2016-04-23,16.67
300 | 2016-04-24,17.78
301 | 2016-04-25,20.18
302 | 2016-04-26,17.81
303 | 2016-04-27,17.62
304 | 2016-04-28,17.92
305 | 2016-04-29,16.02
306 | 2016-04-30,17.19
307 | 2016-05-01,18.2
308 | 2016-05-02,19.71
309 | 2016-05-03,19.15
310 | 2016-05-04,18.69
311 | 2016-05-05,16.77
312 | 2016-05-06,16.41
313 | 2016-05-07,17.23
314 | 2016-05-08,17.88
315 | 2016-05-09,19.26
316 | 2016-05-10,18.92
317 | 2016-05-11,18.15
318 | 2016-05-12,18.43
319 | 2016-05-13,16.81
320 | 2016-05-14,18.16
321 | 2016-05-15,19.18
322 | 2016-05-16,21.1
323 | 2016-05-17,18.96
324 | 2016-05-18,17.96
325 | 2016-05-19,17.64
326 | 2016-05-20,16.16
327 | 2016-05-21,17.89
328 | 2016-05-22,15.63
329 | 2016-05-23,19.55
330 | 2016-05-24,18.48
331 | 2016-05-25,18.78
332 | 2016-05-26,17.9
333 | 2016-05-27,16.35
334 | 2016-05-28,18.1
335 | 2016-05-29,19.0
336 | 2016-05-30,19.55
337 | 2016-05-31,19.03
338 | 2016-06-01,18.73
339 | 2016-06-02,17.31
340 | 2016-06-03,15.92
341 | 2016-06-04,15.62
342 | 2016-06-05,18.88
343 | 2016-06-06,21.89
344 | 2016-06-07,19.1
345 | 2016-06-08,20.11
346 | 2016-06-09,18.97
347 | 2016-06-10,17.79
348 | 2016-06-11,20.3
349 | 2016-06-12,21.56
350 | 2016-06-13,22.99
351 | 2016-06-14,21.6
352 | 2016-06-15,19.54
353 | 2016-06-16,18.2
354 | 2016-06-17,16.32
355 | 2016-06-18,18.41
356 | 2016-06-19,19.03
357 | 2016-06-20,20.93
358 | 2016-06-21,19.77
359 | 2016-06-22,19.88
360 | 2016-06-23,19.59
361 | 2016-06-24,17.87
362 | 2016-06-25,19.31
363 | 2016-06-26,20.25
364 | 2016-06-27,18.67
365 | 2016-06-28,20.02
366 | 2016-06-29,19.58
367 | 2016-06-30,23.21
368 | 2016-07-01,17.24
369 | 2016-07-02,20.22
370 | 2016-07-03,20.83
371 | 2016-07-04,21.62
372 | 2016-07-05,18.72
373 | 2016-07-06,14.51
374 | 2016-07-07,14.88
375 | 2016-07-08,16.1
376 | 2016-07-09,18.41
377 | 2016-07-10,20.36
378 | 2016-07-11,21.42
379 | 2016-07-12,20.02
380 | 2016-07-13,19.22
381 | 2016-07-14,19.21
382 | 2016-07-15,17.86
383 | 2016-07-16,20.21
384 | 2016-07-17,20.72
385 | 2016-07-18,22.11
386 | 2016-07-19,20.95
387 | 2016-07-20,19.40551724137932
388 | 2016-07-21,19.392500000000013
389 | 2016-07-22,19.374444444444457
390 | 2016-07-23,19.36615384615386
391 | 2016-07-24,19.426000000000016
392 | 2016-07-25,19.43083333333335
393 | 2016-07-26,19.395217391304364
394 | 2016-07-27,19.428181818181834
395 | 2016-07-28,19.400000000000016
396 | 2016-07-29,19.39100000000002
397 | 2016-07-30,19.190000000000023
398 | 2016-07-31,19.298333333333357
399 | 2016-08-01,19.24411764705885
400 | 2016-08-02,19.145000000000028
401 | 2016-08-03,18.98000000000003
402 | 2016-08-04,18.998571428571463
403 | 2016-08-05,19.34384615384619
404 | 2016-08-06,19.715833333333375
405 | 2016-08-07,20.0445454545455
406 | 2016-08-08,20.208000000000048
407 | 2016-08-09,20.191111111111166
408 | 2016-08-10,20.03750000000006
409 | 2016-08-11,20.040000000000067
410 | 2016-08-12,20.176666666666744
411 | 2016-08-13,20.37000000000009
412 | 2016-08-14,20.997500000000112
413 | 2016-08-15,21.26000000000015
414 | 2016-08-16,21.530000000000225
415 | 2016-08-17,24.93
416 | 2016-08-18,22.3
417 | 2016-08-19,20.31
418 | 2016-08-20,23.25
419 | 2016-08-21,23.95
420 | 2016-08-22,25.94
421 | 2016-08-23,25.35
422 | 2016-08-24,24.11
423 | 2016-08-25,22.25
424 | 2016-08-26,20.79
425 | 2016-08-27,25.42
426 | 2016-08-28,26.54
427 | 2016-08-29,23.761666666666702
428 | 2016-08-30,23.761666666666702
429 | 2016-08-31,23.761666666666702
430 | 2016-09-01,23.761666666666702
431 | 2016-09-02,23.761666666666702
432 | 2016-09-03,23.761666666666702
433 | 2016-09-04,23.761666666666702
434 | 2016-09-05,23.761666666666702
435 | 2016-09-06,27.95
436 | 2016-09-07,24.72
437 | 2016-09-08,22.95
438 | 2016-09-09,20.75
439 | 2016-09-10,23.23
440 | 2016-09-11,23.61
441 | 2016-09-12,21.11
442 | 2016-09-13,24.11
443 | 2016-09-14,27.33
444 | 2016-09-15,27.96
445 | 2016-09-16,21.48
446 | 2016-09-17,24.61
447 | 2016-09-18,24.44
448 | 2016-09-19,25.88
449 | 2016-09-20,21.12
450 | 2016-09-21,23.38
451 | 2016-09-22,22.83
452 | 2016-09-23,20.51
453 | 2016-09-24,23.02
454 | 2016-09-25,23.81
455 | 2016-09-26,24.71
456 | 2016-09-27,25.2
457 | 2016-09-28,24.11
458 | 2016-09-29,22.86
459 | 2016-09-30,20.99
460 | 2016-10-01,24.14
461 | 2016-10-02,23.88
462 | 2016-10-03,26.07
463 | 2016-10-04,26.38
464 | 2016-10-05,23.901379310344854
465 | 2016-10-06,23.75678571428574
466 | 2016-10-07,20.85
467 | 2016-10-08,23.05
468 | 2016-10-09,23.95
469 | 2016-10-10,24.91
470 | 2016-10-11,19.7
471 | 2016-10-12,17.72
472 | 2016-10-13,21.41
473 | 2016-10-14,21.27
474 | 2016-10-15,24.25
475 | 2016-10-16,24.59
476 | 2016-10-17,26.99
477 | 2016-10-18,25.23
478 | 2016-10-19,23.66
479 | 2016-10-20,22.12
480 | 2016-10-21,20.25
481 | 2016-10-22,23.4
482 | 2016-10-23,23.62
483 | 2016-10-24,25.39
484 | 2016-10-25,24.28
485 | 2016-10-26,23.48
486 | 2016-10-27,22.31
487 | 2016-10-28,19.9
488 | 2016-10-29,22.61
489 | 2016-10-30,22.99
490 | 2016-10-31,25.23
491 | 2016-11-01,24.03
492 | 2016-11-02,23.74
493 | 2016-11-03,22.67
494 | 2016-11-04,20.86
495 | 2016-11-05,22.98
496 | 2016-11-06,23.32
497 | 2016-11-07,25.02
498 | 2016-11-08,24.6
499 | 2016-11-09,25.3
500 | 2016-11-10,24.22
501 | 2016-11-11,20.82
502 | 2016-11-12,23.48
503 | 2016-11-13,24.37
504 | 2016-11-14,25.56
505 | 2016-11-15,24.23
506 | 2016-11-16,24.03
507 | 2016-11-17,22.34
508 | 2016-11-18,20.33
509 | 2016-11-19,22.77
510 | 2016-11-20,20.38
511 | 2016-11-21,24.42
512 | 2016-11-22,23.95
513 | 2016-11-23,22.7
514 | 2016-11-24,21.13
515 | 2016-11-25,20.06
516 | 2016-11-26,22.9
517 | 2016-11-27,22.25
518 | 2016-11-28,21.39
519 | 2016-11-29,22.78
520 | 2016-11-30,20.29
521 | 2016-12-01,21.17
522 | 2016-12-02,19.55
523 | 2016-12-03,22.7
524 | 2016-12-04,22.66
525 | 2016-12-05,25.2
526 | 2016-12-06,23.75
527 | 2016-12-07,22.92
528 | 2016-12-08,21.9
529 | 2016-12-09,19.53
530 | 2016-12-10,22.86
531 | 2016-12-11,22.93
532 | 2016-12-12,24.77
533 | 2016-12-13,23.66
534 | 2016-12-14,22.66
535 | 2016-12-15,20.83
536 | 2016-12-16,19.65
537 | 2016-12-17,18.57
538 | 2016-12-18,22.42
539 | 2016-12-19,25.07
540 | 2016-12-20,25.04
541 | 2016-12-21,26.28
542 | 2016-12-22,24.2
543 | 2016-12-23,22.54
544 | 2016-12-24,25.06
545 | 2016-12-25,25.87
546 | 2016-12-26,22.70689655172414
547 | 2016-12-27,26.92
548 | 2016-12-28,27.03
549 | 2016-12-29,26.07
550 | 2016-12-30,24.36
551 | 2016-12-31,26.15
552 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Time-Series-Forecasting-on-Web-Traffic-Dataset
2 |
3 | Performed time-series analysis and forecasting on Google's web traffic dataset to forecast number of views of Wikipedia web pages. This can help Google take effective measures to handle the web traffic.
4 |
5 | **Dataset:** Kaggle Web Traffic Time Series Forecasting (https://www.kaggle.com/c/web-traffic-time-series-forecasting)
6 |
7 | **Technologies and libraries used:** Python, AWS EC2, AWS S3, Flask, Statsmodels, Prophet, Tensorflow, Matplotlib.
8 |
9 | 
10 |
11 | ## Key highlights
12 | - Performed time series analysis, anomaly detection using `Isolation Forest` and interpolation using `rolling mean`
13 | - Explored various time series forecasting models including ARMA, ARIMA, Exponential Smoothing, Prophet, Auto-arima, CNN and LSTM and compared performance using `RMSE`
14 | - Developed `flask` app to render forecast plots generated using saved models
15 | - Practiced fetching data from `AWS S3` using boto3 and deployed the flask app on `AWS EC2` instance using nginx and gunicorn
16 |
17 | ## Execution
18 |
19 | Use requirements.txt file to install the dependancies.
20 |
21 | ```
22 | pip install -r req.txt
23 | ```
24 |
25 | To run the application on local system, comment lines 19-23 in app.py and uncomment line 25. Enter the following command in terminal.
26 | ```
27 | python app.py
28 | ```
29 |
30 | ## Time series analysis, anomaly detection and forecasting
31 |
32 | To view various analysis on the web traffic time series data, steps taken to detect and handle anomalies, model training, testing and forecasting, view the following Jupyter notebooks:
33 |
34 | - [forecast.ipynb](forecast.ipynb)
35 | - [forecast_CNN.ipynb](forecast_CNN.ipynb)
36 | - [forecast_LSTM.ipynb](forecast_LSTM.ipynb)
37 |
38 |
39 | ## Model performance
40 |
41 | Model | RMSE
42 | ------------- | -------------
43 | ARMA | 1.732618
44 | ARIMA | 1.734711
45 | Auto-arima | 2.148956
46 | Exponential Smoothing | 2.186610
47 | Prophet | 3.525529
48 |
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | # Import packages
2 | import os
3 | import pickle
4 | from flask import Flask, request, jsonify, render_template
5 | import pandas as pd
6 | import boto3
7 | from statsmodels.tsa.statespace.sarimax import SARIMAXResults
8 | import matplotlib
9 | matplotlib.use('Agg')
10 | import matplotlib.pyplot as plt
11 | from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
12 | from matplotlib.figure import Figure
13 | import time
14 |
15 | app = Flask(__name__)
16 | SECRET_KEY = os.urandom(24)
17 |
18 | # Read data from S3 bucket
19 | bucket = "flaskforecast"
20 | file_name = "final_data.csv"
21 | s3 = boto3.client('s3',aws_access_key_id = YOUR_ACCESS_KEY,aws_secret_access_key=YOUR_SECRET_KEY)
22 | obj = s3.get_object(Bucket= bucket, Key= file_name)
23 | data_df = pd.read_csv(obj['Body'])
24 |
25 | # data_df = pd.read_csv('Preprocessed_data/final_data.csv')
26 |
27 | data_df = data_df.set_index('Date')
28 | data_df.index = pd.DatetimeIndex(data_df.index)
29 |
30 | # Train, test split
31 | train = data_df[:'2016-09']
32 | test = data_df['2016-10':]
33 |
34 |
35 | # Display plots based on user's selection of model
36 | @app.route('/')
37 | @app.route('/result', methods=['POST'])
38 | def home():
39 | fcast = ""
40 | if request.method == 'POST':
41 | method = request.form['forecast']
42 |
43 | # Forecasting using saved ARMA model
44 | if method == "arma":
45 | result=SARIMAXResults.load('model/arma_model.pkl')
46 | forecast_values = result.get_forecast(steps=test.shape[0])
47 | forecast_values_mean = forecast_values.predicted_mean
48 | conf_interval = forecast_values.conf_int()
49 |
50 | arma_forecast_df = pd.DataFrame({'Date':test.index,'Views':forecast_values.predicted_mean,'lower_views':conf_interval['lower Views'].values,'upper_views':conf_interval['upper Views'].values})
51 | arma_forecast_df = arma_forecast_df.set_index('Date')
52 |
53 | fig, ax = plt.subplots(figsize=(15,4))
54 | test.rename(columns={'Views':'Actual value'}).plot(ax=ax,color='blue')
55 | arma_forecast_df[['Views']].rename(columns={'Views':'Forecast'}).plot(ax=ax,label='Forecast',color='red')
56 | plt.fill_between(arma_forecast_df.index, \
57 | arma_forecast_df.lower_views, \
58 | arma_forecast_df.upper_views, \
59 | color='pink', alpha=0.5)
60 | plt.xlabel('Date')
61 | plt.ylabel('Views')
62 | plt.legend(loc='best')
63 | new_arma_plot = "arma_plot_" + str(time.time()) + ".png"
64 |
65 | for filename in os.listdir('static/'):
66 | if filename.startswith('arma_plot_'):
67 | os.remove('static/' + filename)
68 |
69 | plt.savefig('static/' + new_arma_plot)
70 | return render_template('index.html', forecast='ARMA', fcast='static/' + new_arma_plot)
71 |
72 | # Forecasting using saved ARIMA model
73 | elif method =="arima":
74 | arima_result = SARIMAXResults.load('model/arima_model.pkl')
75 | arima_forecast_values = arima_result.get_forecast(steps=test.shape[0])
76 | arima_forecast_mean = arima_forecast_values.predicted_mean
77 | arima_conf_interval = arima_forecast_values.conf_int()
78 |
79 | arima_forecast_df = pd.DataFrame({'Date':test.index,'Views':arima_forecast_values.predicted_mean,'lower_views':arima_conf_interval['lower Views'].values,'upper_views':arima_conf_interval['upper Views'].values})
80 | arima_forecast_df = arima_forecast_df.set_index('Date')
81 |
82 | fig, ax = plt.subplots(figsize=(15,4))
83 | test.rename(columns={'Views':'Actual value'}).plot(ax=ax,color='blue')
84 | arima_forecast_df[['Views']].rename(columns={'Views':'Forecast'}).plot(ax=ax,label='Forecast',color='red')
85 | plt.fill_between(arima_forecast_df.index, \
86 | arima_forecast_df.lower_views, \
87 | arima_forecast_df.upper_views, \
88 | color='pink', alpha=0.5)
89 | plt.xlabel('Date')
90 | plt.ylabel('Views')
91 | plt.legend(loc='best')
92 | new_arima_plot = "arima_plot_" + str(time.time()) + ".png"
93 |
94 | for filename in os.listdir('static/'):
95 | if filename.startswith('arima_plot_'):
96 | os.remove('static/' + filename)
97 |
98 | plt.savefig('static/' + new_arima_plot)
99 | return render_template('index.html', forecast='ARIMA', fcast='static/' + new_arima_plot)
100 |
101 | # Forecasting using saved Exponential Smoothing model
102 | elif method == 'exp':
103 | exp_model = pickle.load(open('model/exp_smoothing_model.pkl', 'rb'))
104 | exp_smoothing_result = exp_model.fit(smoothing_level=0.5,optimized=True)
105 | test.index=pd.DatetimeIndex(test.index)
106 | exp_smoothing_forecast = exp_smoothing_result.forecast(test.shape[0])
107 | exp_smoothing_forecast=exp_smoothing_forecast.reset_index().rename(columns={'index':'Date',0:'Views'}).set_index('Date')
108 |
109 | fig, ax = plt.subplots(figsize=(15,4))
110 | test.rename(columns={'Views':'Actual data'}).plot(ax=ax,color='blue')
111 | exp_smoothing_forecast.rename(columns={'Views':'Forecast'}).plot(ax=ax,color='red')
112 | plt.xlabel('Date')
113 | plt.ylabel('Views')
114 | plt.legend(loc='best')
115 |
116 | new_exp_plot = "exp_plot_" + str(time.time()) + ".png"
117 |
118 | for filename in os.listdir('static/'):
119 | if filename.startswith('exp_plot_'):
120 | os.remove('static/' + filename)
121 |
122 | plt.savefig('static/' + new_exp_plot)
123 | return render_template('index.html', forecast='Exponential Smoothing', fcast='static/' + new_exp_plot)
124 |
125 | # Forecasting using saved Prophet model
126 | elif method == 'prophet':
127 | prophet_model = pickle.load(open('model/prophet_model.pkl', 'rb'))
128 | test.index = pd.DatetimeIndex(test.index)
129 | future = prophet_model.make_future_dataframe(periods=test.shape[0])
130 | prophet_forecast = prophet_model.predict(future)
131 |
132 | forecast_prophet = prophet_forecast[['ds','yhat_lower','yhat_upper','yhat']][-test.shape[0]:]
133 | forecast_prophet = forecast_prophet.set_index('ds')
134 |
135 | fig, ax = plt.subplots(figsize=(15,4))
136 | pd.plotting.register_matplotlib_converters()
137 | test.rename(columns={'Views':'Actual data'}).plot(ax=ax,color='blue')
138 | forecast_prophet.rename(columns={'yhat':'Forecast'})[['Forecast']].plot(ax=ax,color='red')
139 | plt.fill_between(forecast_prophet.index,forecast_prophet['yhat_lower'],forecast_prophet['yhat_upper'],color='pink',alpha=0.5)
140 | plt.xlabel('Date')
141 | plt.ylabel('Views')
142 | plt.legend(loc='best')
143 |
144 | new_prophet_plot = "prophet_plot_" + str(time.time()) + ".png"
145 |
146 | for filename in os.listdir('static/'):
147 | if filename.startswith('prophet_plot_'):
148 | os.remove('static/' + filename)
149 |
150 | plt.savefig('static/' + new_prophet_plot)
151 | return render_template('index.html', forecast='Prophet', fcast='static/' + new_prophet_plot)
152 |
153 | # Forecasting using saved AutoARIMA model
154 | elif method == 'auto_arima':
155 | auto_arima_result = SARIMAXResults.load('model/auto_arima_model.pkl')
156 | auto_arima_forecast = auto_arima_result.predict(n_periods=test.shape[0])
157 | auto_arima_forecast = pd.DataFrame(auto_arima_forecast,index = test.index,columns=['Forecast'])
158 |
159 | fig, ax = plt.subplots(figsize=(15,4))
160 | test.rename(columns={'Views':'Actual value'}).plot(ax=ax,color='blue')
161 | auto_arima_forecast[['Forecast']].plot(ax=ax,label='Forecast',color='red')
162 | ax.set_xlabel('Date')
163 | ax.set_ylabel('Views')
164 | plt.legend(loc='best')
165 |
166 | new_auto_arima_plot = "auto_arima_plot_" + str(time.time()) + ".png"
167 |
168 | for filename in os.listdir('static/'):
169 | if filename.startswith('auto_arima_plot_'):
170 | os.remove('static/' + filename)
171 |
172 | plt.savefig('static/' + new_auto_arima_plot)
173 | return render_template('index.html', forecast='Auto-arima', fcast='static/' + new_auto_arima_plot)
174 | return render_template('index.html', fcast=fcast)
175 |
176 | if __name__ == '__main__':
177 | app.secret_key = SECRET_KEY
178 | app.run(debug=True)
179 |
--------------------------------------------------------------------------------
/forecast_CNN.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
13 | " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
14 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
15 | " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
16 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
17 | " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
18 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
19 | " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
20 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
21 | " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
22 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
23 | " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "# Import libraries\n",
29 | "import pandas as pd\n",
30 | "import tensorflow as tf\n",
31 | "from tensorflow import keras\n",
32 | "import numpy as np\n",
33 | "import matplotlib.pyplot as plt\n",
34 | "plt.style.use('fivethirtyeight')\n",
35 | "from datetime import datetime\n",
36 | "from sklearn.ensemble import IsolationForest\n",
37 | "from statsmodels.graphics import tsaplots\n",
38 | "import statsmodels.api as sm\n",
39 | "from pylab import rcParams\n",
40 | "import warnings \n",
41 | "warnings.filterwarnings(\"ignore\")"
42 | ]
43 | },
44 | {
45 | "cell_type": "code",
46 | "execution_count": 2,
47 | "metadata": {},
48 | "outputs": [],
49 | "source": [
50 | "# Read pre-processed data\n",
51 | "top_page_df = pd.read_csv('Preprocessed_data/final_data.csv')\n",
52 | "top_page_df = top_page_df.set_index('Date')"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 6,
58 | "metadata": {},
59 | "outputs": [
60 | {
61 | "data": {
62 | "text/html": [
63 | "
\n",
64 | "\n",
77 | "
\n",
78 | " \n",
79 | " \n",
80 | " | \n",
81 | " Views | \n",
82 | "
\n",
83 | " \n",
84 | " Date | \n",
85 | " | \n",
86 | "
\n",
87 | " \n",
88 | " \n",
89 | " \n",
90 | " 2015-07-01 | \n",
91 | " 20.38 | \n",
92 | "
\n",
93 | " \n",
94 | " 2015-07-02 | \n",
95 | " 20.75 | \n",
96 | "
\n",
97 | " \n",
98 | " 2015-07-03 | \n",
99 | " 19.57 | \n",
100 | "
\n",
101 | " \n",
102 | " 2015-07-04 | \n",
103 | " 20.44 | \n",
104 | "
\n",
105 | " \n",
106 | " 2015-07-05 | \n",
107 | " 20.77 | \n",
108 | "
\n",
109 | " \n",
110 | "
\n",
111 | "
"
112 | ],
113 | "text/plain": [
114 | " Views\n",
115 | "Date \n",
116 | "2015-07-01 20.38\n",
117 | "2015-07-02 20.75\n",
118 | "2015-07-03 19.57\n",
119 | "2015-07-04 20.44\n",
120 | "2015-07-05 20.77"
121 | ]
122 | },
123 | "execution_count": 6,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "# Time-series of page with maximum views\n",
130 | "top_pages = df_reshaped.groupby('Page')['Views'].sum().reset_index()\n",
131 | "top_pages_list = top_pages.nlargest(5,'Views')['Page'].tolist()\n",
132 | "\n",
133 | "top_page_df = df_reshaped[df_reshaped.Page == top_pages_list[0]]\n",
134 | "top_page_df = top_page_df[['Views']]\n",
135 | "top_page_df['Views'] = top_page_df['Views'].div(1000000).round(2)\n",
136 | "top_page_df.head()"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "metadata": {},
142 | "source": [
143 | "## Anomaly detection"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 7,
149 | "metadata": {},
150 | "outputs": [],
151 | "source": [
152 | "# Detecting anomalies in the data, removing anomalies and filling missing values with rolling mean\n",
153 | "isolation_forest_model = IsolationForest(contamination=0.08)\n",
154 | "isolation_forest_model.fit(top_page_df)\n",
155 | "top_page_df['anomaly'] = isolation_forest_model.predict(top_page_df)\n",
156 | "\n",
157 | "top_page_df['new_views'] = top_page_df.apply(lambda row: row.Views if row.anomaly == 1 else None, axis='columns')\n",
158 | "top_page_df = top_page_df.assign(rolling_mean=top_page_df.new_views.fillna(top_page_df.new_views.rolling(30,min_periods=1).mean()))\n",
159 | "top_page_df = top_page_df.drop(columns=['Views','anomaly','new_views']).rename(columns={'rolling_mean':'Views'})"
160 | ]
161 | },
162 | {
163 | "cell_type": "code",
164 | "execution_count": 3,
165 | "metadata": {},
166 | "outputs": [],
167 | "source": [
168 | "# Convert dataset into suitable form to train the model\n",
169 | "train = top_page_df[:'2016-09']\n",
170 | "test = top_page_df['2016-10':]\n",
171 | "\n",
172 | "def transform_dataset(X, y, time_steps=1):\n",
173 | " Xs, ys = [], []\n",
174 | " for i in range(len(X) - time_steps):\n",
175 | " v = X.iloc[i:(i + time_steps)].values\n",
176 | " Xs.append(v)\n",
177 | " ys.append(y.iloc[i + time_steps])\n",
178 | " return np.array(Xs), np.array(ys)\n",
179 | "\n",
180 | "time_steps = 7\n",
181 | "\n",
182 | "X_train, y_train = transform_dataset(train, train.Views, time_steps)\n",
183 | "X_test, y_test = transform_dataset(test, test.Views, time_steps)"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 4,
189 | "metadata": {},
190 | "outputs": [
191 | {
192 | "name": "stdout",
193 | "output_type": "stream",
194 | "text": [
195 | "WARNING:tensorflow:From /Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/resource_variable_ops.py:435: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
196 | "Instructions for updating:\n",
197 | "Colocations handled automatically by placer.\n",
198 | "WARNING:tensorflow:From /Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/utils/losses_utils.py:170: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
199 | "Instructions for updating:\n",
200 | "Use tf.cast instead.\n"
201 | ]
202 | }
203 | ],
204 | "source": [
205 | "# Building the model\n",
206 | "n_features=1\n",
207 | "model = keras.Sequential()\n",
208 | "model.add(keras.layers.Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(time_steps, n_features)))\n",
209 | "model.add(keras.layers.MaxPooling1D(pool_size=2))\n",
210 | "model.add(keras.layers.Flatten())\n",
211 | "model.add(keras.layers.Dense(50, activation='relu'))\n",
212 | "model.add(keras.layers.Dense(1))\n",
213 | "model.compile(optimizer='adam', loss='mse')"
214 | ]
215 | },
216 | {
217 | "cell_type": "code",
218 | "execution_count": 5,
219 | "metadata": {},
220 | "outputs": [
221 | {
222 | "name": "stdout",
223 | "output_type": "stream",
224 | "text": [
225 | "WARNING:tensorflow:From /Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
226 | "Instructions for updating:\n",
227 | "Use tf.cast instead.\n",
228 | "Epoch 1/20\n",
229 | "421/421 [==============================] - 0s 568us/sample - loss: 301.7494\n",
230 | "Epoch 2/20\n",
231 | "421/421 [==============================] - 0s 47us/sample - loss: 86.0850\n",
232 | "Epoch 3/20\n",
233 | "421/421 [==============================] - 0s 45us/sample - loss: 8.9935\n",
234 | "Epoch 4/20\n",
235 | "421/421 [==============================] - 0s 39us/sample - loss: 5.1175\n",
236 | "Epoch 5/20\n",
237 | "421/421 [==============================] - 0s 42us/sample - loss: 2.7050\n",
238 | "Epoch 6/20\n",
239 | "421/421 [==============================] - 0s 43us/sample - loss: 2.4182\n",
240 | "Epoch 7/20\n",
241 | "421/421 [==============================] - 0s 39us/sample - loss: 2.3232\n",
242 | "Epoch 8/20\n",
243 | "421/421 [==============================] - 0s 37us/sample - loss: 2.2971\n",
244 | "Epoch 9/20\n",
245 | "421/421 [==============================] - 0s 40us/sample - loss: 2.2671\n",
246 | "Epoch 10/20\n",
247 | "421/421 [==============================] - 0s 37us/sample - loss: 2.2649\n",
248 | "Epoch 11/20\n",
249 | "421/421 [==============================] - 0s 40us/sample - loss: 2.2528\n",
250 | "Epoch 12/20\n",
251 | "421/421 [==============================] - 0s 39us/sample - loss: 2.2417\n",
252 | "Epoch 13/20\n",
253 | "421/421 [==============================] - 0s 39us/sample - loss: 2.2324\n",
254 | "Epoch 14/20\n",
255 | "421/421 [==============================] - 0s 40us/sample - loss: 2.2214\n",
256 | "Epoch 15/20\n",
257 | "421/421 [==============================] - 0s 38us/sample - loss: 2.2111\n",
258 | "Epoch 16/20\n",
259 | "421/421 [==============================] - 0s 39us/sample - loss: 2.2003\n",
260 | "Epoch 17/20\n",
261 | "421/421 [==============================] - 0s 40us/sample - loss: 2.1893\n",
262 | "Epoch 18/20\n",
263 | "421/421 [==============================] - 0s 41us/sample - loss: 2.1780\n",
264 | "Epoch 19/20\n",
265 | "421/421 [==============================] - 0s 39us/sample - loss: 2.1653\n",
266 | "Epoch 20/20\n",
267 | "421/421 [==============================] - 0s 41us/sample - loss: 2.1450\n"
268 | ]
269 | }
270 | ],
271 | "source": [
272 | "# Training the model\n",
273 | "history = model.fit(\n",
274 | " X_train, y_train,\n",
275 | " epochs=20,\n",
276 | " verbose=1,\n",
277 | " shuffle=False\n",
278 | ")"
279 | ]
280 | },
281 | {
282 | "cell_type": "code",
283 | "execution_count": 6,
284 | "metadata": {},
285 | "outputs": [],
286 | "source": [
287 | "# Forecasting using test data. Test data is used for forecasting to validate the results and calculate RMSE\n",
288 | "y_pred = model.predict(X_test)"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 7,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "data": {
298 | "image/png": "\n",
299 | "text/plain": [
300 | ""
301 | ]
302 | },
303 | "metadata": {},
304 | "output_type": "display_data"
305 | }
306 | ],
307 | "source": [
308 | "fig,ax=plt.subplots()\n",
309 | "pd.DataFrame({'Date':test.index[:y_test.shape[0]],'Views':y_test}).plot('Date',ax=ax)\n",
310 | "pd.DataFrame({'Date':test.index[:y_pred.shape[0]],'Views':y_pred.flatten()}).plot('Date',ax=ax)\n",
311 | "plt.legend(['test', 'forecast'], loc='upper left')\n",
312 | "plt.ylabel('Views (in million)')\n",
313 | "plt.show()"
314 | ]
315 | },
316 | {
317 | "cell_type": "code",
318 | "execution_count": 8,
319 | "metadata": {},
320 | "outputs": [
321 | {
322 | "name": "stdout",
323 | "output_type": "stream",
324 | "text": [
325 | "RMSE:2.0027657329476893\n"
326 | ]
327 | }
328 | ],
329 | "source": [
330 | "print('RMSE:'+str(np.sqrt(np.mean(np.square(y_pred.flatten() - y_test)))))"
331 | ]
332 | }
333 | ],
334 | "metadata": {
335 | "kernelspec": {
336 | "display_name": "Python 3",
337 | "language": "python",
338 | "name": "python3"
339 | },
340 | "language_info": {
341 | "codemirror_mode": {
342 | "name": "ipython",
343 | "version": 3
344 | },
345 | "file_extension": ".py",
346 | "mimetype": "text/x-python",
347 | "name": "python",
348 | "nbconvert_exporter": "python",
349 | "pygments_lexer": "ipython3",
350 | "version": "3.7.6"
351 | }
352 | },
353 | "nbformat": 4,
354 | "nbformat_minor": 2
355 | }
356 |
--------------------------------------------------------------------------------
/forecast_LSTM.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [
8 | {
9 | "name": "stderr",
10 | "output_type": "stream",
11 | "text": [
12 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
13 | " _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
14 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
15 | " _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
16 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:528: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
17 | " _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
18 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:529: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
19 | " _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
20 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:530: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
21 | " _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
22 | "/Users/subikshaa/anaconda3/lib/python3.7/site-packages/tensorflow/python/framework/dtypes.py:535: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
23 | " np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
24 | ]
25 | }
26 | ],
27 | "source": [
28 | "# Import libraries\n",
29 | "import pandas as pd\n",
30 | "import tensorflow as tf\n",
31 | "from tensorflow import keras\n",
32 | "import numpy as np\n",
33 | "import matplotlib.pyplot as plt\n",
34 | "plt.style.use('fivethirtyeight')\n",
35 | "from datetime import datetime\n",
36 | "from sklearn.ensemble import IsolationForest\n",
37 | "from statsmodels.graphics import tsaplots\n",
38 | "import statsmodels.api as sm\n",
39 | "from pylab import rcParams\n",
40 | "import warnings \n",
41 | "warnings.filterwarnings(\"ignore\")\n",
42 | "import pickle"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 24,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "# Read pre-processed data\n",
52 | "top_page_df = pd.read_csv('Preprocessed_data/final_data.csv')\n",
53 | "top_page_df = top_page_df.set_index('Date')"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "execution_count": 26,
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "# Convert dataset into suitable form to train the model\n",
63 | "train = top_page_df[:'2016-09']\n",
64 | "test = top_page_df['2016-10':]\n",
65 | "\n",
66 | "def transform_dataset(X, y, time_steps=1):\n",
67 | " Xs, ys = [], []\n",
68 | " for i in range(len(X) - time_steps):\n",
69 | " v = X.iloc[i:(i + time_steps)].values\n",
70 | " Xs.append(v)\n",
71 | " ys.append(y.iloc[i + time_steps])\n",
72 | " return np.array(Xs), np.array(ys)\n",
73 | "\n",
74 | "time_steps = 7\n",
75 | "\n",
76 | "X_train, y_train = transform_dataset(train, train.Views, time_steps)\n",
77 | "X_test, y_test = transform_dataset(test, test.Views, time_steps)"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "## LSTM"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 27,
90 | "metadata": {},
91 | "outputs": [],
92 | "source": [
93 | "# Build the model\n",
94 | "model = keras.Sequential()\n",
95 | "model.add(keras.layers.LSTM(\n",
96 | " units=128,\n",
97 | " input_shape=(X_train.shape[1], X_train.shape[2])\n",
98 | "))\n",
99 | "model.add(keras.layers.Dense(units=1))\n",
100 | "model.compile(\n",
101 | " loss='mean_squared_error',\n",
102 | " optimizer=keras.optimizers.RMSprop()\n",
103 | ")"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 28,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "name": "stdout",
113 | "output_type": "stream",
114 | "text": [
115 | "Train on 378 samples, validate on 43 samples\n",
116 | "Epoch 1/20\n",
117 | "378/378 [==============================] - 1s 3ms/sample - loss: 137.4239 - val_loss: 64.4879\n",
118 | "Epoch 2/20\n",
119 | "378/378 [==============================] - 0s 432us/sample - loss: 13.3225 - val_loss: 23.4985\n",
120 | "Epoch 3/20\n",
121 | "378/378 [==============================] - 0s 424us/sample - loss: 3.8077 - val_loss: 13.6965\n",
122 | "Epoch 4/20\n",
123 | "378/378 [==============================] - 0s 444us/sample - loss: 2.5473 - val_loss: 10.9317\n",
124 | "Epoch 5/20\n",
125 | "378/378 [==============================] - 0s 434us/sample - loss: 2.3783 - val_loss: 10.1192\n",
126 | "Epoch 6/20\n",
127 | "378/378 [==============================] - 0s 424us/sample - loss: 2.3542 - val_loss: 9.7383\n",
128 | "Epoch 7/20\n",
129 | "378/378 [==============================] - 0s 432us/sample - loss: 2.2620 - val_loss: 9.2423\n",
130 | "Epoch 8/20\n",
131 | "378/378 [==============================] - 0s 437us/sample - loss: 2.2058 - val_loss: 8.8267\n",
132 | "Epoch 9/20\n",
133 | "378/378 [==============================] - 0s 424us/sample - loss: 2.1622 - val_loss: 8.4865\n",
134 | "Epoch 10/20\n",
135 | "378/378 [==============================] - 0s 411us/sample - loss: 2.1475 - val_loss: 8.2733\n",
136 | "Epoch 11/20\n",
137 | "378/378 [==============================] - 0s 410us/sample - loss: 2.1462 - val_loss: 8.2827\n",
138 | "Epoch 12/20\n",
139 | "378/378 [==============================] - 0s 419us/sample - loss: 2.1442 - val_loss: 8.2541\n",
140 | "Epoch 13/20\n",
141 | "378/378 [==============================] - 0s 422us/sample - loss: 2.1335 - val_loss: 8.0268\n",
142 | "Epoch 14/20\n",
143 | "378/378 [==============================] - 0s 429us/sample - loss: 2.1043 - val_loss: 7.8695\n",
144 | "Epoch 15/20\n",
145 | "378/378 [==============================] - 0s 440us/sample - loss: 2.0836 - val_loss: 7.8140\n",
146 | "Epoch 16/20\n",
147 | "378/378 [==============================] - 0s 426us/sample - loss: 2.0589 - val_loss: 7.6039\n",
148 | "Epoch 17/20\n",
149 | "378/378 [==============================] - 0s 433us/sample - loss: 2.0475 - val_loss: 7.5233\n",
150 | "Epoch 18/20\n",
151 | "378/378 [==============================] - 0s 445us/sample - loss: 2.0286 - val_loss: 7.4461\n",
152 | "Epoch 19/20\n",
153 | "378/378 [==============================] - 0s 432us/sample - loss: 2.0171 - val_loss: 7.4894\n",
154 | "Epoch 20/20\n",
155 | "378/378 [==============================] - 0s 429us/sample - loss: 2.0133 - val_loss: 7.7140\n"
156 | ]
157 | }
158 | ],
159 | "source": [
160 | "# Training the model\n",
161 | "history = model.fit(\n",
162 | " X_train, y_train,\n",
163 | " epochs=20,\n",
164 | " batch_size=16,\n",
165 | " validation_split=0.1,\n",
166 | " verbose=1,\n",
167 | " shuffle=False\n",
168 | ")"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 29,
174 | "metadata": {},
175 | "outputs": [
176 | {
177 | "data": {
178 | "image/png": "\n",
179 | "text/plain": [
180 | ""
181 | ]
182 | },
183 | "metadata": {},
184 | "output_type": "display_data"
185 | }
186 | ],
187 | "source": [
188 | "plt.plot(history.history['loss'])\n",
189 | "plt.plot(history.history['val_loss'])\n",
190 | "plt.title('model loss')\n",
191 | "plt.ylabel('loss')\n",
192 | "plt.xlabel('epoch')\n",
193 | "plt.legend(['train', 'test'], loc='upper left')\n",
194 | "plt.show()"
195 | ]
196 | },
197 | {
198 | "cell_type": "code",
199 | "execution_count": 30,
200 | "metadata": {},
201 | "outputs": [],
202 | "source": [
203 | "# Forecasting using test data\n",
204 | "y_pred = model.predict(X_test)"
205 | ]
206 | },
207 | {
208 | "cell_type": "code",
209 | "execution_count": 31,
210 | "metadata": {},
211 | "outputs": [
212 | {
213 | "data": {
214 | "image/png": "\n",
215 | "text/plain": [
216 | ""
217 | ]
218 | },
219 | "metadata": {},
220 | "output_type": "display_data"
221 | }
222 | ],
223 | "source": [
224 | "fig,ax=plt.subplots()\n",
225 | "pd.DataFrame({'Date':test.index[:y_test.shape[0]],'Views':y_test}).plot('Date',ax=ax)\n",
226 | "pd.DataFrame({'Date':test.index[:y_pred.shape[0]],'Views':y_pred.flatten()}).plot('Date',ax=ax)\n",
227 | "plt.legend(['test', 'forecast'], loc='upper left')\n",
228 | "plt.ylabel('Views (in million)')\n",
229 | "plt.show()"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 32,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stdout",
239 | "output_type": "stream",
240 | "text": [
241 | "RMSE:4.173544036830901\n"
242 | ]
243 | }
244 | ],
245 | "source": [
246 | "print('RMSE:'+str(np.sqrt(np.mean(np.square(y_pred.flatten() - y_test)))))"
247 | ]
248 | }
249 | ],
250 | "metadata": {
251 | "kernelspec": {
252 | "display_name": "Python 3",
253 | "language": "python",
254 | "name": "python3"
255 | },
256 | "language_info": {
257 | "codemirror_mode": {
258 | "name": "ipython",
259 | "version": 3
260 | },
261 | "file_extension": ".py",
262 | "mimetype": "text/x-python",
263 | "name": "python",
264 | "nbconvert_exporter": "python",
265 | "pygments_lexer": "ipython3",
266 | "version": "3.7.6"
267 | }
268 | },
269 | "nbformat": 4,
270 | "nbformat_minor": 2
271 | }
272 |
--------------------------------------------------------------------------------
/model/arima_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/model/arima_model.pkl
--------------------------------------------------------------------------------
/model/arma_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/model/arma_model.pkl
--------------------------------------------------------------------------------
/model/auto_arima_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/model/auto_arima_model.pkl
--------------------------------------------------------------------------------
/model/exp_smoothing_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/model/exp_smoothing_model.pkl
--------------------------------------------------------------------------------
/model/prophet_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/model/prophet_model.pkl
--------------------------------------------------------------------------------
/req.txt:
--------------------------------------------------------------------------------
1 | # This file may be used to create an environment using:
2 | # $ conda create --name --file
3 | # platform: osx-64
4 | blas=1.0=mkl
5 | ca-certificates=2020.1.1=0
6 | cctools=927.0.2=h5ba7a2e_4
7 | certifi=2020.4.5.2=py37_0
8 | clang=9.0.1=default_hf57f61e_0
9 | clang-tools=9.0.1=default_hf57f61e_0
10 | clang_osx-64=9.0.1=h05bbb7f_0
11 | clangxx=9.0.1=default_hf57f61e_0
12 | clangxx_osx-64=9.0.1=h05bbb7f_2
13 | click=7.1.2=py_0
14 | compiler-rt=9.0.1=h6a512c6_3
15 | compiler-rt_osx-64=9.0.1=h99342c6_3
16 | convertdate=2.2.1=pyh9f0ad1d_0
17 | cycler=0.10.0=py_2
18 | cython=0.29.20=py37h570ac47_0
19 | ephem=3.7.7.1=py37h0b31af3_0
20 | fbprophet=0.6=py37h4a8c4bd_0
21 | flask=1.1.2=py_0
22 | freetype=2.10.2=h8da9a1a_0
23 | holidays=0.10.2=pyh9f0ad1d_0
24 | intel-openmp=2019.4=233
25 | itsdangerous=1.1.0=py37_0
26 | jinja2=2.11.2=py_0
27 | joblib=0.15.1=py_0
28 | kiwisolver=1.2.0=py37ha1cc60f_0
29 | korean_lunar_calendar=0.2.1=pyh9f0ad1d_0
30 | ld64=450.3=h3c32e8a_4
31 | libcxx=10.0.0=1
32 | libedit=3.1.20191231=haf1e3a3_0
33 | libffi=3.3=h0a44026_1
34 | libgfortran=3.0.1=h93005f0_2
35 | libllvm9=9.0.1=h7475705_1
36 | libpng=1.6.37=hbbe82c9_1
37 | llvm-openmp=10.0.0=h28b9765_0
38 | lunarcalendar=0.0.9=py_0
39 | markupsafe=1.1.1=py37h1de35cc_0
40 | matplotlib=3.2.1=0
41 | matplotlib-base=3.2.1=py37hddda452_0
42 | mkl=2019.4=233
43 | mkl-service=2.3.0=py37hfbe908c_0
44 | mkl_fft=1.1.0=py37hc64f4ea_0
45 | mkl_random=1.1.1=py37h959d312_0
46 | ncurses=6.2=h0a44026_1
47 | numpy=1.18.1=py37h7241aed_0
48 | numpy-base=1.18.1=py37h3304bdc_1
49 | openssl=1.1.1g=h1de35cc_0
50 | pandas=1.0.4=py37h959d312_0
51 | patsy=0.5.1=py37_0
52 | pip=20.1.1=py37_1
53 | pmdarima=1.2.0=py37h56e435c_7
54 | pymeeus=0.3.7=pyh9f0ad1d_0
55 | pyparsing=2.4.7=pyh9f0ad1d_0
56 | pystan=2.17.1.0=py37h1702cab_1003
57 | python=3.7.7=hf48f09d_4
58 | python-dateutil=2.8.1=py_0
59 | python_abi=3.7=1_cp37m
60 | pytz=2020.1=py_0
61 | readline=8.0=h1de35cc_0
62 | scikit-learn=0.22.1=py37h27c97d8_0
63 | scipy=1.4.1=py37h9fa6033_0
64 | setuptools=47.3.0=py37_0
65 | six=1.15.0=py_0
66 | sqlite=3.32.2=hffcf06c_0
67 | statsmodels=0.11.1=py37haf1e3a3_0
68 | tapi=1000.10.8=ha1b3eb9_4
69 | tk=8.6.10=hb0a8c7a_0
70 | tornado=6.0.4=py37h9bfed18_1
71 | werkzeug=1.0.1=py_0
72 | wheel=0.34.2=py37_0
73 | xz=5.2.5=h1de35cc_0
74 | zlib=1.2.11=h1de35cc_3
75 |
--------------------------------------------------------------------------------
/static/arima_plot_1591687174.918447.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/static/arima_plot_1591687174.918447.png
--------------------------------------------------------------------------------
/static/arma_plot_1591687215.727288.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/static/arma_plot_1591687215.727288.png
--------------------------------------------------------------------------------
/static/auto_arima_plot_1591687207.990299.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/static/auto_arima_plot_1591687207.990299.png
--------------------------------------------------------------------------------
/static/exp_plot_1591687185.701119.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/static/exp_plot_1591687185.701119.png
--------------------------------------------------------------------------------
/static/plot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/static/plot.png
--------------------------------------------------------------------------------
/static/prophet_plot_1591687195.249861.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Subikshaa/Time-Series-Forecasting-on-Web-Traffic-Dataset/bffc37233ef1f913a9e1e6c8bbb6a1d77008f2c6/static/prophet_plot_1591687195.249861.png
--------------------------------------------------------------------------------
/static/style.css:
--------------------------------------------------------------------------------
1 | /* Style inputs, select elements and textareas */
2 | input[type=text], select, textarea{
3 | font-size: 16px;
4 | width: 100%;
5 | padding: 12px;
6 | border: 1px solid #ccc;
7 | border-radius: 4px;
8 | box-sizing: border-box;
9 | resize: vertical;
10 | }
11 |
12 | /* Style the label to display next to the inputs */
13 | label {
14 | font-size: 12px;
15 | padding: 12px 12px 12px 0;
16 | display: inline-block;
17 | }
18 |
19 | /* Style the submit button */
20 | input[type=submit] {
21 | background-color: #4CAF50;
22 | color: white;
23 | padding: 12px 20px;
24 | border: none;
25 | border-radius: 4px;
26 | cursor: pointer;
27 | float: center;
28 | }
29 |
30 | /* Style the container */
31 | .container {
32 | border-radius: 5px;
33 | background-color: #f2f2f2;
34 | padding: 20px;
35 | width: 90%;
36 | margin: auto;
37 | }
38 |
39 | /* Floating column for labels: 25% width */
40 | .col-25 {
41 | float: left;
42 | width: 25%;
43 | margin-top: 6px;
44 | }
45 |
46 | /* Floating column for inputs: 75% width */
47 | .col-75 {
48 | float: left;
49 | width: 75%;
50 | margin-top: 6px;
51 | }
52 |
53 | /* Clear floats after the columns */
54 | .row:after {
55 | content: "";
56 | display: table;
57 | clear: both;
58 | }
59 |
60 | /* Responsive layout - when the screen is less than 600px wide, make the two columns stack on top of each other instead of next to each other */
61 | @media screen and (max-width: 600px) {
62 | .col-25, .col-75, input[type=submit] {
63 | width: 100%;
64 | margin-top: 0;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/templates/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | ML API
6 |
7 |
13 |
14 |
15 |
16 |
17 |
Time series forecasting
18 |
33 |
34 | {% if fcast %}
35 | Forecasting using {{ forecast }}
36 |
37 |
38 |

39 |
40 | {% endif %}
41 |
42 |
43 |
--------------------------------------------------------------------------------