├── README.md
└── First Innings Score Prediction - IPL.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # IPL-score-prediction
--------------------------------------------------------------------------------
/First Innings Score Prediction - IPL.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "nbformat": 4,
3 | "nbformat_minor": 0,
4 | "metadata": {
5 | "colab": {
6 | "name": "First Innings Score Predictor - IPL.ipynb",
7 | "provenance": [],
8 | "collapsed_sections": [],
9 | "toc_visible": true
10 | },
11 | "kernelspec": {
12 | "name": "python3",
13 | "display_name": "Python 3"
14 | }
15 | },
16 | "cells": [
17 | {
18 | "cell_type": "code",
19 | "metadata": {
20 | "id": "Ye3oEMGgcWhp",
21 | "colab_type": "code",
22 | "colab": {}
23 | },
24 | "source": [
25 | "# Importing essential libraries\n",
26 | "import pandas as pd\n",
27 | "import numpy as np"
28 | ],
29 | "execution_count": 1,
30 | "outputs": []
31 | },
32 | {
33 | "cell_type": "code",
34 | "metadata": {
35 | "id": "Z8utzElrdPub",
36 | "colab_type": "code",
37 | "colab": {}
38 | },
39 | "source": [
40 | "# Loading the dataset\n",
41 | "df = pd.read_csv('ipl.csv')"
42 | ],
43 | "execution_count": 2,
44 | "outputs": []
45 | },
46 | {
47 | "cell_type": "markdown",
48 | "metadata": {
49 | "id": "5vWE2y4ldZC4",
50 | "colab_type": "text"
51 | },
52 | "source": [
53 | "## **Exploring the dataset**"
54 | ]
55 | },
56 | {
57 | "cell_type": "code",
58 | "metadata": {
59 | "id": "LEkrx9IWdYHh",
60 | "colab_type": "code",
61 | "colab": {
62 | "base_uri": "https://localhost:8080/",
63 | "height": 87
64 | },
65 | "outputId": "71a453fa-df59-40b6-81b5-3407a240b3f0"
66 | },
67 | "source": [
68 | "df.columns"
69 | ],
70 | "execution_count": 3,
71 | "outputs": [
72 | {
73 | "output_type": "execute_result",
74 | "data": {
75 | "text/plain": [
76 | "Index(['mid', 'date', 'venue', 'bat_team', 'bowl_team', 'batsman', 'bowler',\n",
77 | " 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5', 'striker',\n",
78 | " 'non-striker', 'total'],\n",
79 | " dtype='object')"
80 | ]
81 | },
82 | "metadata": {
83 | "tags": []
84 | },
85 | "execution_count": 3
86 | }
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "metadata": {
92 | "id": "3OZDpVGvddx6",
93 | "colab_type": "code",
94 | "colab": {
95 | "base_uri": "https://localhost:8080/",
96 | "height": 34
97 | },
98 | "outputId": "14bdb7d8-4de8-42d5-820e-a4151ff3533d"
99 | },
100 | "source": [
101 | "df.shape"
102 | ],
103 | "execution_count": 4,
104 | "outputs": [
105 | {
106 | "output_type": "execute_result",
107 | "data": {
108 | "text/plain": [
109 | "(76014, 15)"
110 | ]
111 | },
112 | "metadata": {
113 | "tags": []
114 | },
115 | "execution_count": 4
116 | }
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "metadata": {
122 | "id": "JNodK4n7dpep",
123 | "colab_type": "code",
124 | "colab": {
125 | "base_uri": "https://localhost:8080/",
126 | "height": 301
127 | },
128 | "outputId": "55a278d8-cbc8-467a-892f-c33ed14fd8c1"
129 | },
130 | "source": [
131 | "df.dtypes"
132 | ],
133 | "execution_count": 5,
134 | "outputs": [
135 | {
136 | "output_type": "execute_result",
137 | "data": {
138 | "text/plain": [
139 | "mid int64\n",
140 | "date object\n",
141 | "venue object\n",
142 | "bat_team object\n",
143 | "bowl_team object\n",
144 | "batsman object\n",
145 | "bowler object\n",
146 | "runs int64\n",
147 | "wickets int64\n",
148 | "overs float64\n",
149 | "runs_last_5 int64\n",
150 | "wickets_last_5 int64\n",
151 | "striker int64\n",
152 | "non-striker int64\n",
153 | "total int64\n",
154 | "dtype: object"
155 | ]
156 | },
157 | "metadata": {
158 | "tags": []
159 | },
160 | "execution_count": 5
161 | }
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "metadata": {
167 | "id": "p8VJefH9drgh",
168 | "colab_type": "code",
169 | "colab": {
170 | "base_uri": "https://localhost:8080/",
171 | "height": 381
172 | },
173 | "outputId": "1bd26e10-6d71-4e6d-c747-9a4d014f1adc"
174 | },
175 | "source": [
176 | "df.head()"
177 | ],
178 | "execution_count": 6,
179 | "outputs": [
180 | {
181 | "output_type": "execute_result",
182 | "data": {
183 | "text/html": [
184 | "
\n",
185 | "\n",
198 | "
\n",
199 | " \n",
200 | " \n",
201 | " | \n",
202 | " mid | \n",
203 | " date | \n",
204 | " venue | \n",
205 | " bat_team | \n",
206 | " bowl_team | \n",
207 | " batsman | \n",
208 | " bowler | \n",
209 | " runs | \n",
210 | " wickets | \n",
211 | " overs | \n",
212 | " runs_last_5 | \n",
213 | " wickets_last_5 | \n",
214 | " striker | \n",
215 | " non-striker | \n",
216 | " total | \n",
217 | "
\n",
218 | " \n",
219 | " \n",
220 | " \n",
221 | " | 0 | \n",
222 | " 1 | \n",
223 | " 2008-04-18 | \n",
224 | " M Chinnaswamy Stadium | \n",
225 | " Kolkata Knight Riders | \n",
226 | " Royal Challengers Bangalore | \n",
227 | " SC Ganguly | \n",
228 | " P Kumar | \n",
229 | " 1 | \n",
230 | " 0 | \n",
231 | " 0.1 | \n",
232 | " 1 | \n",
233 | " 0 | \n",
234 | " 0 | \n",
235 | " 0 | \n",
236 | " 222 | \n",
237 | "
\n",
238 | " \n",
239 | " | 1 | \n",
240 | " 1 | \n",
241 | " 2008-04-18 | \n",
242 | " M Chinnaswamy Stadium | \n",
243 | " Kolkata Knight Riders | \n",
244 | " Royal Challengers Bangalore | \n",
245 | " BB McCullum | \n",
246 | " P Kumar | \n",
247 | " 1 | \n",
248 | " 0 | \n",
249 | " 0.2 | \n",
250 | " 1 | \n",
251 | " 0 | \n",
252 | " 0 | \n",
253 | " 0 | \n",
254 | " 222 | \n",
255 | "
\n",
256 | " \n",
257 | " | 2 | \n",
258 | " 1 | \n",
259 | " 2008-04-18 | \n",
260 | " M Chinnaswamy Stadium | \n",
261 | " Kolkata Knight Riders | \n",
262 | " Royal Challengers Bangalore | \n",
263 | " BB McCullum | \n",
264 | " P Kumar | \n",
265 | " 2 | \n",
266 | " 0 | \n",
267 | " 0.2 | \n",
268 | " 2 | \n",
269 | " 0 | \n",
270 | " 0 | \n",
271 | " 0 | \n",
272 | " 222 | \n",
273 | "
\n",
274 | " \n",
275 | " | 3 | \n",
276 | " 1 | \n",
277 | " 2008-04-18 | \n",
278 | " M Chinnaswamy Stadium | \n",
279 | " Kolkata Knight Riders | \n",
280 | " Royal Challengers Bangalore | \n",
281 | " BB McCullum | \n",
282 | " P Kumar | \n",
283 | " 2 | \n",
284 | " 0 | \n",
285 | " 0.3 | \n",
286 | " 2 | \n",
287 | " 0 | \n",
288 | " 0 | \n",
289 | " 0 | \n",
290 | " 222 | \n",
291 | "
\n",
292 | " \n",
293 | " | 4 | \n",
294 | " 1 | \n",
295 | " 2008-04-18 | \n",
296 | " M Chinnaswamy Stadium | \n",
297 | " Kolkata Knight Riders | \n",
298 | " Royal Challengers Bangalore | \n",
299 | " BB McCullum | \n",
300 | " P Kumar | \n",
301 | " 2 | \n",
302 | " 0 | \n",
303 | " 0.4 | \n",
304 | " 2 | \n",
305 | " 0 | \n",
306 | " 0 | \n",
307 | " 0 | \n",
308 | " 222 | \n",
309 | "
\n",
310 | " \n",
311 | "
\n",
312 | "
"
313 | ],
314 | "text/plain": [
315 | " mid date venue ... striker non-striker total\n",
316 | "0 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n",
317 | "1 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n",
318 | "2 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n",
319 | "3 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n",
320 | "4 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n",
321 | "\n",
322 | "[5 rows x 15 columns]"
323 | ]
324 | },
325 | "metadata": {
326 | "tags": []
327 | },
328 | "execution_count": 6
329 | }
330 | ]
331 | },
332 | {
333 | "cell_type": "markdown",
334 | "metadata": {
335 | "id": "srvJazxud7BB",
336 | "colab_type": "text"
337 | },
338 | "source": [
339 | "## **Data Cleaning**\n",
340 | "Points covered under this section:
\n",
341 | "*• Removing unwanted columns*
\n",
342 | "*• Keeping only consistent teams*
\n",
343 | "*• Removing the first 5 overs data in every match*
\n",
344 | "*• Converting the column 'date' from string into datetime object*
"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "metadata": {
350 | "id": "t3w2hRVbekfq",
351 | "colab_type": "code",
352 | "colab": {
353 | "base_uri": "https://localhost:8080/",
354 | "height": 87
355 | },
356 | "outputId": "dbc88fd6-19ed-43b1-94b2-8c19bb9bbd6a"
357 | },
358 | "source": [
359 | "df.columns"
360 | ],
361 | "execution_count": 7,
362 | "outputs": [
363 | {
364 | "output_type": "execute_result",
365 | "data": {
366 | "text/plain": [
367 | "Index(['mid', 'date', 'venue', 'bat_team', 'bowl_team', 'batsman', 'bowler',\n",
368 | " 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5', 'striker',\n",
369 | " 'non-striker', 'total'],\n",
370 | " dtype='object')"
371 | ]
372 | },
373 | "metadata": {
374 | "tags": []
375 | },
376 | "execution_count": 7
377 | }
378 | ]
379 | },
380 | {
381 | "cell_type": "code",
382 | "metadata": {
383 | "id": "bOyVrvsSdyFx",
384 | "colab_type": "code",
385 | "colab": {
386 | "base_uri": "https://localhost:8080/",
387 | "height": 52
388 | },
389 | "outputId": "4a70f21e-7f2d-439a-bfb1-d0f7037cca49"
390 | },
391 | "source": [
392 | "# Removing unwanted columns\n",
393 | "columns_to_remove = ['mid', 'venue', 'batsman', 'bowler', 'striker', 'non-striker']\n",
394 | "\n",
395 | "print('Before removing unwanted columns: {}'.format(df.shape))\n",
396 | "df.drop(labels=columns_to_remove, axis=1, inplace=True)\n",
397 | "print('After removing unwanted columns: {}'.format(df.shape))"
398 | ],
399 | "execution_count": 8,
400 | "outputs": [
401 | {
402 | "output_type": "stream",
403 | "text": [
404 | "Before removing unwanted columns: (76014, 15)\n",
405 | "After removing unwanted columns: (76014, 9)\n"
406 | ],
407 | "name": "stdout"
408 | }
409 | ]
410 | },
411 | {
412 | "cell_type": "code",
413 | "metadata": {
414 | "id": "VeYhEaB6fQdC",
415 | "colab_type": "code",
416 | "colab": {
417 | "base_uri": "https://localhost:8080/",
418 | "height": 70
419 | },
420 | "outputId": "93c091c8-16ce-47be-a79b-46e5dbe1f691"
421 | },
422 | "source": [
423 | "df.columns"
424 | ],
425 | "execution_count": 9,
426 | "outputs": [
427 | {
428 | "output_type": "execute_result",
429 | "data": {
430 | "text/plain": [
431 | "Index(['date', 'bat_team', 'bowl_team', 'runs', 'wickets', 'overs',\n",
432 | " 'runs_last_5', 'wickets_last_5', 'total'],\n",
433 | " dtype='object')"
434 | ]
435 | },
436 | "metadata": {
437 | "tags": []
438 | },
439 | "execution_count": 9
440 | }
441 | ]
442 | },
443 | {
444 | "cell_type": "code",
445 | "metadata": {
446 | "id": "FCfuyal8hvXt",
447 | "colab_type": "code",
448 | "colab": {
449 | "base_uri": "https://localhost:8080/",
450 | "height": 197
451 | },
452 | "outputId": "b128f1c8-dd1d-4283-bd84-f371fc50982b"
453 | },
454 | "source": [
455 | "df.head()"
456 | ],
457 | "execution_count": 10,
458 | "outputs": [
459 | {
460 | "output_type": "execute_result",
461 | "data": {
462 | "text/html": [
463 | "\n",
464 | "\n",
477 | "
\n",
478 | " \n",
479 | " \n",
480 | " | \n",
481 | " date | \n",
482 | " bat_team | \n",
483 | " bowl_team | \n",
484 | " runs | \n",
485 | " wickets | \n",
486 | " overs | \n",
487 | " runs_last_5 | \n",
488 | " wickets_last_5 | \n",
489 | " total | \n",
490 | "
\n",
491 | " \n",
492 | " \n",
493 | " \n",
494 | " | 0 | \n",
495 | " 2008-04-18 | \n",
496 | " Kolkata Knight Riders | \n",
497 | " Royal Challengers Bangalore | \n",
498 | " 1 | \n",
499 | " 0 | \n",
500 | " 0.1 | \n",
501 | " 1 | \n",
502 | " 0 | \n",
503 | " 222 | \n",
504 | "
\n",
505 | " \n",
506 | " | 1 | \n",
507 | " 2008-04-18 | \n",
508 | " Kolkata Knight Riders | \n",
509 | " Royal Challengers Bangalore | \n",
510 | " 1 | \n",
511 | " 0 | \n",
512 | " 0.2 | \n",
513 | " 1 | \n",
514 | " 0 | \n",
515 | " 222 | \n",
516 | "
\n",
517 | " \n",
518 | " | 2 | \n",
519 | " 2008-04-18 | \n",
520 | " Kolkata Knight Riders | \n",
521 | " Royal Challengers Bangalore | \n",
522 | " 2 | \n",
523 | " 0 | \n",
524 | " 0.2 | \n",
525 | " 2 | \n",
526 | " 0 | \n",
527 | " 222 | \n",
528 | "
\n",
529 | " \n",
530 | " | 3 | \n",
531 | " 2008-04-18 | \n",
532 | " Kolkata Knight Riders | \n",
533 | " Royal Challengers Bangalore | \n",
534 | " 2 | \n",
535 | " 0 | \n",
536 | " 0.3 | \n",
537 | " 2 | \n",
538 | " 0 | \n",
539 | " 222 | \n",
540 | "
\n",
541 | " \n",
542 | " | 4 | \n",
543 | " 2008-04-18 | \n",
544 | " Kolkata Knight Riders | \n",
545 | " Royal Challengers Bangalore | \n",
546 | " 2 | \n",
547 | " 0 | \n",
548 | " 0.4 | \n",
549 | " 2 | \n",
550 | " 0 | \n",
551 | " 222 | \n",
552 | "
\n",
553 | " \n",
554 | "
\n",
555 | "
"
556 | ],
557 | "text/plain": [
558 | " date bat_team ... wickets_last_5 total\n",
559 | "0 2008-04-18 Kolkata Knight Riders ... 0 222\n",
560 | "1 2008-04-18 Kolkata Knight Riders ... 0 222\n",
561 | "2 2008-04-18 Kolkata Knight Riders ... 0 222\n",
562 | "3 2008-04-18 Kolkata Knight Riders ... 0 222\n",
563 | "4 2008-04-18 Kolkata Knight Riders ... 0 222\n",
564 | "\n",
565 | "[5 rows x 9 columns]"
566 | ]
567 | },
568 | "metadata": {
569 | "tags": []
570 | },
571 | "execution_count": 10
572 | }
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "metadata": {
578 | "id": "W2jQTWJhmIrt",
579 | "colab_type": "code",
580 | "colab": {
581 | "base_uri": "https://localhost:8080/",
582 | "height": 34
583 | },
584 | "outputId": "23463ddc-6c24-43f7-d60d-f43f64ad1a6b"
585 | },
586 | "source": [
587 | "df.index"
588 | ],
589 | "execution_count": 11,
590 | "outputs": [
591 | {
592 | "output_type": "execute_result",
593 | "data": {
594 | "text/plain": [
595 | "RangeIndex(start=0, stop=76014, step=1)"
596 | ]
597 | },
598 | "metadata": {
599 | "tags": []
600 | },
601 | "execution_count": 11
602 | }
603 | ]
604 | },
605 | {
606 | "cell_type": "code",
607 | "metadata": {
608 | "id": "HD-7eP27iWab",
609 | "colab_type": "code",
610 | "colab": {
611 | "base_uri": "https://localhost:8080/",
612 | "height": 123
613 | },
614 | "outputId": "2477db91-0a4e-41e3-9689-2abf870a2d4d"
615 | },
616 | "source": [
617 | "df['bat_team'].unique()"
618 | ],
619 | "execution_count": 12,
620 | "outputs": [
621 | {
622 | "output_type": "execute_result",
623 | "data": {
624 | "text/plain": [
625 | "array(['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',\n",
626 | " 'Mumbai Indians', 'Deccan Chargers', 'Kings XI Punjab',\n",
627 | " 'Royal Challengers Bangalore', 'Delhi Daredevils',\n",
628 | " 'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',\n",
629 | " 'Rising Pune Supergiants', 'Gujarat Lions',\n",
630 | " 'Rising Pune Supergiant'], dtype=object)"
631 | ]
632 | },
633 | "metadata": {
634 | "tags": []
635 | },
636 | "execution_count": 12
637 | }
638 | ]
639 | },
640 | {
641 | "cell_type": "code",
642 | "metadata": {
643 | "id": "VcC_Crmhih1r",
644 | "colab_type": "code",
645 | "colab": {}
646 | },
647 | "source": [
648 | "consistent_teams = ['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',\n",
649 | " 'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',\n",
650 | " 'Delhi Daredevils', 'Sunrisers Hyderabad']"
651 | ],
652 | "execution_count": 13,
653 | "outputs": []
654 | },
655 | {
656 | "cell_type": "code",
657 | "metadata": {
658 | "id": "MXmAt3Colmdu",
659 | "colab_type": "code",
660 | "colab": {
661 | "base_uri": "https://localhost:8080/",
662 | "height": 52
663 | },
664 | "outputId": "9874be5e-a71f-418a-8d90-45268c6c634f"
665 | },
666 | "source": [
667 | "# Keeping only consistent teams\n",
668 | "print('Before removing inconsistent teams: {}'.format(df.shape))\n",
669 | "df = df[(df['bat_team'].isin(consistent_teams)) & (df['bowl_team'].isin(consistent_teams))]\n",
670 | "print('After removing inconsistent teams: {}'.format(df.shape))"
671 | ],
672 | "execution_count": 14,
673 | "outputs": [
674 | {
675 | "output_type": "stream",
676 | "text": [
677 | "Before removing inconsistent teams: (76014, 9)\n",
678 | "After removing inconsistent teams: (53811, 9)\n"
679 | ],
680 | "name": "stdout"
681 | }
682 | ]
683 | },
684 | {
685 | "cell_type": "code",
686 | "metadata": {
687 | "id": "AB8uOLpHoKtB",
688 | "colab_type": "code",
689 | "colab": {
690 | "base_uri": "https://localhost:8080/",
691 | "height": 70
692 | },
693 | "outputId": "89633380-9611-4552-e964-2d7fdbe8839c"
694 | },
695 | "source": [
696 | "df['bat_team'].unique()"
697 | ],
698 | "execution_count": 15,
699 | "outputs": [
700 | {
701 | "output_type": "execute_result",
702 | "data": {
703 | "text/plain": [
704 | "array(['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',\n",
705 | " 'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',\n",
706 | " 'Delhi Daredevils', 'Sunrisers Hyderabad'], dtype=object)"
707 | ]
708 | },
709 | "metadata": {
710 | "tags": []
711 | },
712 | "execution_count": 15
713 | }
714 | ]
715 | },
716 | {
717 | "cell_type": "code",
718 | "metadata": {
719 | "id": "HVouIS8QqFwX",
720 | "colab_type": "code",
721 | "colab": {
722 | "base_uri": "https://localhost:8080/",
723 | "height": 52
724 | },
725 | "outputId": "5cf9861c-e3e4-41c4-937e-bf1de77c6556"
726 | },
727 | "source": [
728 | "# Removing the first 5 overs data in every match\n",
729 | "print('Before removing first 5 overs data: {}'.format(df.shape))\n",
730 | "df = df[df['overs']>=5.0]\n",
731 | "print('After removing first 5 overs data: {}'.format(df.shape))"
732 | ],
733 | "execution_count": 16,
734 | "outputs": [
735 | {
736 | "output_type": "stream",
737 | "text": [
738 | "Before removing first 5 overs data: (53811, 9)\n",
739 | "After removing first 5 overs data: (40108, 9)\n"
740 | ],
741 | "name": "stdout"
742 | }
743 | ]
744 | },
745 | {
746 | "cell_type": "code",
747 | "metadata": {
748 | "id": "J_R7UNqkf3Mt",
749 | "colab_type": "code",
750 | "colab": {
751 | "base_uri": "https://localhost:8080/",
752 | "height": 52
753 | },
754 | "outputId": "e2383f98-4771-41d9-c6a4-ebd96a7aa575"
755 | },
756 | "source": [
757 | "# Converting the column 'date' from string into datetime object\n",
758 | "from datetime import datetime\n",
759 | "print(\"Before converting 'date' column from string to datetime object: {}\".format(type(df.iloc[0,0])))\n",
760 | "df['date'] = df['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))\n",
761 | "print(\"After converting 'date' column from string to datetime object: {}\".format(type(df.iloc[0,0])))"
762 | ],
763 | "execution_count": 17,
764 | "outputs": [
765 | {
766 | "output_type": "stream",
767 | "text": [
768 | "Before converting 'date' column from string to datetime object: \n",
769 | "After converting 'date' column from string to datetime object: \n"
770 | ],
771 | "name": "stdout"
772 | }
773 | ]
774 | },
775 | {
776 | "cell_type": "code",
777 | "metadata": {
778 | "id": "try0O4SSxjBE",
779 | "colab_type": "code",
780 | "colab": {
781 | "base_uri": "https://localhost:8080/",
782 | "height": 651
783 | },
784 | "outputId": "3bde5d22-653f-49a4-965b-f992b2a6374d"
785 | },
786 | "source": [
787 | "# Selecting correlated features using Heatmap\n",
788 | "import matplotlib.pyplot as plt\n",
789 | "import seaborn as sns\n",
790 | "\n",
791 | "# Get correlation of all the features of the dataset\n",
792 | "corr_matrix = df.corr()\n",
793 | "top_corr_features = corr_matrix.index\n",
794 | "\n",
795 | "# Plotting the heatmap\n",
796 | "plt.figure(figsize=(13,10))\n",
797 | "g = sns.heatmap(data=df[top_corr_features].corr(), annot=True, cmap='RdYlGn')"
798 | ],
799 | "execution_count": 18,
800 | "outputs": [
801 | {
802 | "output_type": "stream",
803 | "text": [
804 | "/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
805 | " import pandas.util.testing as tm\n"
806 | ],
807 | "name": "stderr"
808 | },
809 | {
810 | "output_type": "display_data",
811 | "data": {
812 | "image/png": "\n",
813 | "text/plain": [
814 | ""
815 | ]
816 | },
817 | "metadata": {
818 | "tags": [],
819 | "needs_background": "light"
820 | }
821 | }
822 | ]
823 | },
824 | {
825 | "cell_type": "markdown",
826 | "metadata": {
827 | "id": "o9Qt4bOGpKh3",
828 | "colab_type": "text"
829 | },
830 | "source": [
831 | "## **Data Preprocessing**\n",
832 | "*• Handling categorical features*
\n",
833 | "*• Splitting dataset into train and test set on the basis of date*
"
834 | ]
835 | },
836 | {
837 | "cell_type": "code",
838 | "metadata": {
839 | "id": "ZpJLe1YVfrr6",
840 | "colab_type": "code",
841 | "colab": {
842 | "base_uri": "https://localhost:8080/",
843 | "height": 212
844 | },
845 | "outputId": "ced0b921-0d9e-4f51-f3ce-841d16fcff0f"
846 | },
847 | "source": [
848 | "# Converting categorical features using OneHotEncoding method\n",
849 | "encoded_df = pd.get_dummies(data=df, columns=['bat_team', 'bowl_team'])\n",
850 | "encoded_df.columns"
851 | ],
852 | "execution_count": 19,
853 | "outputs": [
854 | {
855 | "output_type": "execute_result",
856 | "data": {
857 | "text/plain": [
858 | "Index(['date', 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5',\n",
859 | " 'total', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils',\n",
860 | " 'bat_team_Kings XI Punjab', 'bat_team_Kolkata Knight Riders',\n",
861 | " 'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',\n",
862 | " 'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',\n",
863 | " 'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils',\n",
864 | " 'bowl_team_Kings XI Punjab', 'bowl_team_Kolkata Knight Riders',\n",
865 | " 'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',\n",
866 | " 'bowl_team_Royal Challengers Bangalore',\n",
867 | " 'bowl_team_Sunrisers Hyderabad'],\n",
868 | " dtype='object')"
869 | ]
870 | },
871 | "metadata": {
872 | "tags": []
873 | },
874 | "execution_count": 19
875 | }
876 | ]
877 | },
878 | {
879 | "cell_type": "code",
880 | "metadata": {
881 | "id": "ZtrtRO7AiLPz",
882 | "colab_type": "code",
883 | "colab": {
884 | "base_uri": "https://localhost:8080/",
885 | "height": 334
886 | },
887 | "outputId": "925d1844-a1b0-49ac-bced-79940195d950"
888 | },
889 | "source": [
890 | "encoded_df.head()"
891 | ],
892 | "execution_count": 20,
893 | "outputs": [
894 | {
895 | "output_type": "execute_result",
896 | "data": {
897 | "text/html": [
898 | "\n",
899 | "\n",
912 | "
\n",
913 | " \n",
914 | " \n",
915 | " | \n",
916 | " date | \n",
917 | " runs | \n",
918 | " wickets | \n",
919 | " overs | \n",
920 | " runs_last_5 | \n",
921 | " wickets_last_5 | \n",
922 | " total | \n",
923 | " bat_team_Chennai Super Kings | \n",
924 | " bat_team_Delhi Daredevils | \n",
925 | " bat_team_Kings XI Punjab | \n",
926 | " bat_team_Kolkata Knight Riders | \n",
927 | " bat_team_Mumbai Indians | \n",
928 | " bat_team_Rajasthan Royals | \n",
929 | " bat_team_Royal Challengers Bangalore | \n",
930 | " bat_team_Sunrisers Hyderabad | \n",
931 | " bowl_team_Chennai Super Kings | \n",
932 | " bowl_team_Delhi Daredevils | \n",
933 | " bowl_team_Kings XI Punjab | \n",
934 | " bowl_team_Kolkata Knight Riders | \n",
935 | " bowl_team_Mumbai Indians | \n",
936 | " bowl_team_Rajasthan Royals | \n",
937 | " bowl_team_Royal Challengers Bangalore | \n",
938 | " bowl_team_Sunrisers Hyderabad | \n",
939 | "
\n",
940 | " \n",
941 | " \n",
942 | " \n",
943 | " | 32 | \n",
944 | " 2008-04-18 | \n",
945 | " 61 | \n",
946 | " 0 | \n",
947 | " 5.1 | \n",
948 | " 59 | \n",
949 | " 0 | \n",
950 | " 222 | \n",
951 | " 0 | \n",
952 | " 0 | \n",
953 | " 0 | \n",
954 | " 1 | \n",
955 | " 0 | \n",
956 | " 0 | \n",
957 | " 0 | \n",
958 | " 0 | \n",
959 | " 0 | \n",
960 | " 0 | \n",
961 | " 0 | \n",
962 | " 0 | \n",
963 | " 0 | \n",
964 | " 0 | \n",
965 | " 1 | \n",
966 | " 0 | \n",
967 | "
\n",
968 | " \n",
969 | " | 33 | \n",
970 | " 2008-04-18 | \n",
971 | " 61 | \n",
972 | " 1 | \n",
973 | " 5.2 | \n",
974 | " 59 | \n",
975 | " 1 | \n",
976 | " 222 | \n",
977 | " 0 | \n",
978 | " 0 | \n",
979 | " 0 | \n",
980 | " 1 | \n",
981 | " 0 | \n",
982 | " 0 | \n",
983 | " 0 | \n",
984 | " 0 | \n",
985 | " 0 | \n",
986 | " 0 | \n",
987 | " 0 | \n",
988 | " 0 | \n",
989 | " 0 | \n",
990 | " 0 | \n",
991 | " 1 | \n",
992 | " 0 | \n",
993 | "
\n",
994 | " \n",
995 | " | 34 | \n",
996 | " 2008-04-18 | \n",
997 | " 61 | \n",
998 | " 1 | \n",
999 | " 5.3 | \n",
1000 | " 59 | \n",
1001 | " 1 | \n",
1002 | " 222 | \n",
1003 | " 0 | \n",
1004 | " 0 | \n",
1005 | " 0 | \n",
1006 | " 1 | \n",
1007 | " 0 | \n",
1008 | " 0 | \n",
1009 | " 0 | \n",
1010 | " 0 | \n",
1011 | " 0 | \n",
1012 | " 0 | \n",
1013 | " 0 | \n",
1014 | " 0 | \n",
1015 | " 0 | \n",
1016 | " 0 | \n",
1017 | " 1 | \n",
1018 | " 0 | \n",
1019 | "
\n",
1020 | " \n",
1021 | " | 35 | \n",
1022 | " 2008-04-18 | \n",
1023 | " 61 | \n",
1024 | " 1 | \n",
1025 | " 5.4 | \n",
1026 | " 59 | \n",
1027 | " 1 | \n",
1028 | " 222 | \n",
1029 | " 0 | \n",
1030 | " 0 | \n",
1031 | " 0 | \n",
1032 | " 1 | \n",
1033 | " 0 | \n",
1034 | " 0 | \n",
1035 | " 0 | \n",
1036 | " 0 | \n",
1037 | " 0 | \n",
1038 | " 0 | \n",
1039 | " 0 | \n",
1040 | " 0 | \n",
1041 | " 0 | \n",
1042 | " 0 | \n",
1043 | " 1 | \n",
1044 | " 0 | \n",
1045 | "
\n",
1046 | " \n",
1047 | " | 36 | \n",
1048 | " 2008-04-18 | \n",
1049 | " 61 | \n",
1050 | " 1 | \n",
1051 | " 5.5 | \n",
1052 | " 58 | \n",
1053 | " 1 | \n",
1054 | " 222 | \n",
1055 | " 0 | \n",
1056 | " 0 | \n",
1057 | " 0 | \n",
1058 | " 1 | \n",
1059 | " 0 | \n",
1060 | " 0 | \n",
1061 | " 0 | \n",
1062 | " 0 | \n",
1063 | " 0 | \n",
1064 | " 0 | \n",
1065 | " 0 | \n",
1066 | " 0 | \n",
1067 | " 0 | \n",
1068 | " 0 | \n",
1069 | " 1 | \n",
1070 | " 0 | \n",
1071 | "
\n",
1072 | " \n",
1073 | "
\n",
1074 | "
"
1075 | ],
1076 | "text/plain": [
1077 | " date ... bowl_team_Sunrisers Hyderabad\n",
1078 | "32 2008-04-18 ... 0\n",
1079 | "33 2008-04-18 ... 0\n",
1080 | "34 2008-04-18 ... 0\n",
1081 | "35 2008-04-18 ... 0\n",
1082 | "36 2008-04-18 ... 0\n",
1083 | "\n",
1084 | "[5 rows x 23 columns]"
1085 | ]
1086 | },
1087 | "metadata": {
1088 | "tags": []
1089 | },
1090 | "execution_count": 20
1091 | }
1092 | ]
1093 | },
1094 | {
1095 | "cell_type": "code",
1096 | "metadata": {
1097 | "id": "dVj9eyGJj5-J",
1098 | "colab_type": "code",
1099 | "colab": {}
1100 | },
1101 | "source": [
1102 | "# Rearranging the columns\n",
1103 | "encoded_df = encoded_df[['date', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils', 'bat_team_Kings XI Punjab',\n",
1104 | " 'bat_team_Kolkata Knight Riders', 'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',\n",
1105 | " 'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',\n",
1106 | " 'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils', 'bowl_team_Kings XI Punjab',\n",
1107 | " 'bowl_team_Kolkata Knight Riders', 'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',\n",
1108 | " 'bowl_team_Royal Challengers Bangalore', 'bowl_team_Sunrisers Hyderabad',\n",
1109 | " 'overs', 'runs', 'wickets', 'runs_last_5', 'wickets_last_5', 'total']]"
1110 | ],
1111 | "execution_count": 21,
1112 | "outputs": []
1113 | },
1114 | {
1115 | "cell_type": "code",
1116 | "metadata": {
1117 | "id": "8raq2VVXeHyr",
1118 | "colab_type": "code",
1119 | "colab": {
1120 | "base_uri": "https://localhost:8080/",
1121 | "height": 34
1122 | },
1123 | "outputId": "c89b9399-6397-438f-cd3c-3e7ca0ae63ee"
1124 | },
1125 | "source": [
1126 | "# Splitting the data into train and test set\n",
1127 | "X_train = encoded_df.drop(labels='total', axis=1)[encoded_df['date'].dt.year <= 2016]\n",
1128 | "X_test = encoded_df.drop(labels='total', axis=1)[encoded_df['date'].dt.year >= 2017]\n",
1129 | "\n",
1130 | "y_train = encoded_df[encoded_df['date'].dt.year <= 2016]['total'].values\n",
1131 | "y_test = encoded_df[encoded_df['date'].dt.year >= 2017]['total'].values\n",
1132 | "\n",
1133 | "# Removing the 'date' column\n",
1134 | "X_train.drop(labels='date', axis=True, inplace=True)\n",
1135 | "X_test.drop(labels='date', axis=True, inplace=True)\n",
1136 | "\n",
1137 | "print(\"Training set: {} and Test set: {}\".format(X_train.shape, X_test.shape))"
1138 | ],
1139 | "execution_count": 22,
1140 | "outputs": [
1141 | {
1142 | "output_type": "stream",
1143 | "text": [
1144 | "Training set: (37330, 21) and Test set: (2778, 21)\n"
1145 | ],
1146 | "name": "stdout"
1147 | }
1148 | ]
1149 | },
1150 | {
1151 | "cell_type": "markdown",
1152 | "metadata": {
1153 | "id": "QRZoptrOlsT5",
1154 | "colab_type": "text"
1155 | },
1156 | "source": [
1157 | "## **Model Building**\n",
1158 | "I will experiment with 5 different algorithms, they are as follows:
\n",
1159 | "*• Linear Regression*
\n",
1160 | "*• Decision Tree Regression*
\n",
1161 | "*• Random Forest Regression*
\n",
1162 | "\n",
1163 | "----- Boosting Algorithm -----
\n",
1164 | "*• Adaptive Boosting (AdaBoost) Algorithm*
"
1165 | ]
1166 | },
1167 | {
1168 | "cell_type": "markdown",
1169 | "metadata": {
1170 | "id": "w5NJZyB8oFEw",
1171 | "colab_type": "text"
1172 | },
1173 | "source": [
1174 | "### *Linear Regression*"
1175 | ]
1176 | },
1177 | {
1178 | "cell_type": "code",
1179 | "metadata": {
1180 | "id": "TAbGSLrVln6Q",
1181 | "colab_type": "code",
1182 | "colab": {
1183 | "base_uri": "https://localhost:8080/",
1184 | "height": 34
1185 | },
1186 | "outputId": "d33e0b5f-511c-42dd-f4e3-da8f8b6b2610"
1187 | },
1188 | "source": [
1189 | "# Linear Regression Model\n",
1190 | "from sklearn.linear_model import LinearRegression\n",
1191 | "linear_regressor = LinearRegression()\n",
1192 | "linear_regressor.fit(X_train,y_train)"
1193 | ],
1194 | "execution_count": 23,
1195 | "outputs": [
1196 | {
1197 | "output_type": "execute_result",
1198 | "data": {
1199 | "text/plain": [
1200 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)"
1201 | ]
1202 | },
1203 | "metadata": {
1204 | "tags": []
1205 | },
1206 | "execution_count": 23
1207 | }
1208 | ]
1209 | },
1210 | {
1211 | "cell_type": "code",
1212 | "metadata": {
1213 | "id": "JeKomR6-nfaX",
1214 | "colab_type": "code",
1215 | "colab": {}
1216 | },
1217 | "source": [
1218 | "# Predicting results\n",
1219 | "y_pred_lr = linear_regressor.predict(X_test)"
1220 | ],
1221 | "execution_count": 24,
1222 | "outputs": []
1223 | },
1224 | {
1225 | "cell_type": "code",
1226 | "metadata": {
1227 | "id": "xRPc6nsmmlbo",
1228 | "colab_type": "code",
1229 | "colab": {
1230 | "base_uri": "https://localhost:8080/",
1231 | "height": 87
1232 | },
1233 | "outputId": "24f2c7ee-a0dd-43c3-9efc-ba1d0856cad7"
1234 | },
1235 | "source": [
1236 | "# Linear Regression - Model Evaluation\n",
1237 | "from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse, accuracy_score\n",
1238 | "print(\"---- Linear Regression - Model Evaluation ----\")\n",
1239 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_lr)))\n",
1240 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_lr)))\n",
1241 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_lr))))"
1242 | ],
1243 | "execution_count": 25,
1244 | "outputs": [
1245 | {
1246 | "output_type": "stream",
1247 | "text": [
1248 | "---- Linear Regression - Model Evaluation ----\n",
1249 | "Mean Absolute Error (MAE): 12.11861754619329\n",
1250 | "Mean Squared Error (MSE): 251.00792310417438\n",
1251 | "Root Mean Squared Error (RMSE): 15.843229566732106\n"
1252 | ],
1253 | "name": "stdout"
1254 | }
1255 | ]
1256 | },
1257 | {
1258 | "cell_type": "markdown",
1259 | "metadata": {
1260 | "colab_type": "text",
1261 | "id": "fuPztrQQoLNg"
1262 | },
1263 | "source": [
1264 | "### *Decision Tree*"
1265 | ]
1266 | },
1267 | {
1268 | "cell_type": "code",
1269 | "metadata": {
1270 | "id": "drFWmrvBoC6x",
1271 | "colab_type": "code",
1272 | "colab": {
1273 | "base_uri": "https://localhost:8080/",
1274 | "height": 123
1275 | },
1276 | "outputId": "83a98e88-4307-4bbd-f941-74bb30dc5170"
1277 | },
1278 | "source": [
1279 | "# Decision Tree Regression Model\n",
1280 | "from sklearn.tree import DecisionTreeRegressor\n",
1281 | "decision_regressor = DecisionTreeRegressor()\n",
1282 | "decision_regressor.fit(X_train,y_train)"
1283 | ],
1284 | "execution_count": 26,
1285 | "outputs": [
1286 | {
1287 | "output_type": "execute_result",
1288 | "data": {
1289 | "text/plain": [
1290 | "DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n",
1291 | " max_features=None, max_leaf_nodes=None,\n",
1292 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
1293 | " min_samples_leaf=1, min_samples_split=2,\n",
1294 | " min_weight_fraction_leaf=0.0, presort='deprecated',\n",
1295 | " random_state=None, splitter='best')"
1296 | ]
1297 | },
1298 | "metadata": {
1299 | "tags": []
1300 | },
1301 | "execution_count": 26
1302 | }
1303 | ]
1304 | },
1305 | {
1306 | "cell_type": "code",
1307 | "metadata": {
1308 | "id": "cCl-LXmpofeq",
1309 | "colab_type": "code",
1310 | "colab": {}
1311 | },
1312 | "source": [
1313 | "# Predicting results\n",
1314 | "y_pred_dt = decision_regressor.predict(X_test)"
1315 | ],
1316 | "execution_count": 27,
1317 | "outputs": []
1318 | },
1319 | {
1320 | "cell_type": "code",
1321 | "metadata": {
1322 | "id": "fxQBkteHoj2Y",
1323 | "colab_type": "code",
1324 | "colab": {
1325 | "base_uri": "https://localhost:8080/",
1326 | "height": 87
1327 | },
1328 | "outputId": "bc4f87bf-0ce4-44fb-c957-11f85d3526e0"
1329 | },
1330 | "source": [
1331 | "# Decision Tree Regression - Model Evaluation\n",
1332 | "print(\"---- Decision Tree Regression - Model Evaluation ----\")\n",
1333 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_dt)))\n",
1334 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_dt)))\n",
1335 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_dt))))"
1336 | ],
1337 | "execution_count": 28,
1338 | "outputs": [
1339 | {
1340 | "output_type": "stream",
1341 | "text": [
1342 | "---- Decision Tree Regression - Model Evaluation ----\n",
1343 | "Mean Absolute Error (MAE): 17.08963282937365\n",
1344 | "Mean Squared Error (MSE): 531.0550755939524\n",
1345 | "Root Mean Squared Error (RMSE): 23.044632251219642\n"
1346 | ],
1347 | "name": "stdout"
1348 | }
1349 | ]
1350 | },
1351 | {
1352 | "cell_type": "markdown",
1353 | "metadata": {
1354 | "colab_type": "text",
1355 | "id": "scpqVJxCpFyB"
1356 | },
1357 | "source": [
1358 | "### *Random Forest*"
1359 | ]
1360 | },
1361 | {
1362 | "cell_type": "code",
1363 | "metadata": {
1364 | "id": "MNNGZMlRpKnq",
1365 | "colab_type": "code",
1366 | "colab": {
1367 | "base_uri": "https://localhost:8080/",
1368 | "height": 141
1369 | },
1370 | "outputId": "8d67934e-f177-4270-b2ba-93feba78fef6"
1371 | },
1372 | "source": [
1373 | "# Random Forest Regression Model\n",
1374 | "from sklearn.ensemble import RandomForestRegressor\n",
1375 | "random_regressor = RandomForestRegressor()\n",
1376 | "random_regressor.fit(X_train,y_train)"
1377 | ],
1378 | "execution_count": 29,
1379 | "outputs": [
1380 | {
1381 | "output_type": "execute_result",
1382 | "data": {
1383 | "text/plain": [
1384 | "RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',\n",
1385 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
1386 | " max_samples=None, min_impurity_decrease=0.0,\n",
1387 | " min_impurity_split=None, min_samples_leaf=1,\n",
1388 | " min_samples_split=2, min_weight_fraction_leaf=0.0,\n",
1389 | " n_estimators=100, n_jobs=None, oob_score=False,\n",
1390 | " random_state=None, verbose=0, warm_start=False)"
1391 | ]
1392 | },
1393 | "metadata": {
1394 | "tags": []
1395 | },
1396 | "execution_count": 29
1397 | }
1398 | ]
1399 | },
1400 | {
1401 | "cell_type": "code",
1402 | "metadata": {
1403 | "colab_type": "code",
1404 | "id": "UpqtMy1jpYTd",
1405 | "colab": {}
1406 | },
1407 | "source": [
1408 | "# Predicting results\n",
1409 | "y_pred_rf = random_regressor.predict(X_test)"
1410 | ],
1411 | "execution_count": 30,
1412 | "outputs": []
1413 | },
1414 | {
1415 | "cell_type": "code",
1416 | "metadata": {
1417 | "id": "VPsdbFk_pdrH",
1418 | "colab_type": "code",
1419 | "colab": {
1420 | "base_uri": "https://localhost:8080/",
1421 | "height": 87
1422 | },
1423 | "outputId": "ad050bf7-70e5-4df5-a7ca-7fc0700cf7d5"
1424 | },
1425 | "source": [
1426 | "# Random Forest Regression - Model Evaluation\n",
1427 | "print(\"---- Random Forest Regression - Model Evaluation ----\")\n",
1428 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_rf)))\n",
1429 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_rf)))\n",
1430 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_rf))))"
1431 | ],
1432 | "execution_count": 31,
1433 | "outputs": [
1434 | {
1435 | "output_type": "stream",
1436 | "text": [
1437 | "---- Random Forest Regression - Model Evaluation ----\n",
1438 | "Mean Absolute Error (MAE): 13.76117836573074\n",
1439 | "Mean Squared Error (MSE): 330.21283012058035\n",
1440 | "Root Mean Squared Error (RMSE): 18.171759136654337\n"
1441 | ],
1442 | "name": "stdout"
1443 | }
1444 | ]
1445 | },
1446 | {
1447 | "cell_type": "markdown",
1448 | "metadata": {
1449 | "id": "wMd5-w5Tpv-Y",
1450 | "colab_type": "text"
1451 | },
1452 | "source": [
1453 | "*Note: Since Linear Regression model performs best as compared to other two, we use this model and boost it's performance using AdaBoost Algorithm*"
1454 | ]
1455 | },
1456 | {
1457 | "cell_type": "markdown",
1458 | "metadata": {
1459 | "colab_type": "text",
1460 | "id": "cJNnp9xHssI8"
1461 | },
1462 | "source": [
1463 | "### *AdaBoost Algorithm*"
1464 | ]
1465 | },
1466 | {
1467 | "cell_type": "code",
1468 | "metadata": {
1469 | "colab_type": "code",
1470 | "id": "eua4hjEus0W2",
1471 | "colab": {
1472 | "base_uri": "https://localhost:8080/",
1473 | "height": 123
1474 | },
1475 | "outputId": "2290f73b-67f2-4551-a77a-7f2942e465c4"
1476 | },
1477 | "source": [
1478 | "# AdaBoost Model using Linear Regression as the base learner\n",
1479 | "from sklearn.ensemble import AdaBoostRegressor\n",
1480 | "adb_regressor = AdaBoostRegressor(base_estimator=linear_regressor, n_estimators=100)\n",
1481 | "adb_regressor.fit(X_train, y_train)"
1482 | ],
1483 | "execution_count": 32,
1484 | "outputs": [
1485 | {
1486 | "output_type": "execute_result",
1487 | "data": {
1488 | "text/plain": [
1489 | "AdaBoostRegressor(base_estimator=LinearRegression(copy_X=True,\n",
1490 | " fit_intercept=True,\n",
1491 | " n_jobs=None,\n",
1492 | " normalize=False),\n",
1493 | " learning_rate=1.0, loss='linear', n_estimators=100,\n",
1494 | " random_state=None)"
1495 | ]
1496 | },
1497 | "metadata": {
1498 | "tags": []
1499 | },
1500 | "execution_count": 32
1501 | }
1502 | ]
1503 | },
1504 | {
1505 | "cell_type": "code",
1506 | "metadata": {
1507 | "id": "mBRmYqGvtdaJ",
1508 | "colab_type": "code",
1509 | "colab": {}
1510 | },
1511 | "source": [
1512 | "# Predicting results\n",
1513 | "y_pred_adb = adb_regressor.predict(X_test)"
1514 | ],
1515 | "execution_count": 33,
1516 | "outputs": []
1517 | },
1518 | {
1519 | "cell_type": "code",
1520 | "metadata": {
1521 | "id": "67pZWWEKtgiF",
1522 | "colab_type": "code",
1523 | "colab": {
1524 | "base_uri": "https://localhost:8080/",
1525 | "height": 87
1526 | },
1527 | "outputId": "709cb77d-a0eb-43f5-aded-3c16ff2d0c98"
1528 | },
1529 | "source": [
1530 | "# AdaBoost Regression - Model Evaluation\n",
1531 | "print(\"---- AdaBoost Regression - Model Evaluation ----\")\n",
1532 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_adb)))\n",
1533 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_adb)))\n",
1534 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_adb))))"
1535 | ],
1536 | "execution_count": 34,
1537 | "outputs": [
1538 | {
1539 | "output_type": "stream",
1540 | "text": [
1541 | "---- AdaBoost Regression - Model Evaluation ----\n",
1542 | "Mean Absolute Error (MAE): 12.217362349360771\n",
1543 | "Mean Squared Error (MSE): 249.6020058588232\n",
1544 | "Root Mean Squared Error (RMSE): 15.798797608008757\n"
1545 | ],
1546 | "name": "stdout"
1547 | }
1548 | ]
1549 | },
1550 | {
1551 | "cell_type": "markdown",
1552 | "metadata": {
1553 | "id": "nbvk3I7ZvVG4",
1554 | "colab_type": "text"
1555 | },
1556 | "source": [
1557 | "*Note: Using AdaBoost did not reduce the error to a significant level. Hence, we will you simple linear regression model for prediction*"
1558 | ]
1559 | },
1560 | {
1561 | "cell_type": "markdown",
1562 | "metadata": {
1563 | "id": "2Xl7f4FQybFg",
1564 | "colab_type": "text"
1565 | },
1566 | "source": [
1567 | "## **Predictions**\n",
1568 | "• Model *trained on* the data from **IPL Seasons 1 to 9** ie: (2008 to 2016)
\n",
1569 | "• Model *tested on* data from **IPL Season 10** ie: (2017)
\n",
1570 | "• Model *predicts on* data from **IPL Seasons 11 to 12** ie: (2018 to 2019)"
1571 | ]
1572 | },
1573 | {
1574 | "cell_type": "code",
1575 | "metadata": {
1576 | "id": "-C4QKIa-yhYB",
1577 | "colab_type": "code",
1578 | "colab": {}
1579 | },
1580 | "source": [
1581 | "def predict_score(batting_team='Chennai Super Kings', bowling_team='Mumbai Indians', overs=5.1, runs=50, wickets=0, runs_in_prev_5=50, wickets_in_prev_5=0):\n",
1582 | " temp_array = list()\n",
1583 | "\n",
1584 | " # Batting Team\n",
1585 | " if batting_team == 'Chennai Super Kings':\n",
1586 | " temp_array = temp_array + [1,0,0,0,0,0,0,0]\n",
1587 | " elif batting_team == 'Delhi Daredevils':\n",
1588 | " temp_array = temp_array + [0,1,0,0,0,0,0,0]\n",
1589 | " elif batting_team == 'Kings XI Punjab':\n",
1590 | " temp_array = temp_array + [0,0,1,0,0,0,0,0]\n",
1591 | " elif batting_team == 'Kolkata Knight Riders':\n",
1592 | " temp_array = temp_array + [0,0,0,1,0,0,0,0]\n",
1593 | " elif batting_team == 'Mumbai Indians':\n",
1594 | " temp_array = temp_array + [0,0,0,0,1,0,0,0]\n",
1595 | " elif batting_team == 'Rajasthan Royals':\n",
1596 | " temp_array = temp_array + [0,0,0,0,0,1,0,0]\n",
1597 | " elif batting_team == 'Royal Challengers Bangalore':\n",
1598 | " temp_array = temp_array + [0,0,0,0,0,0,1,0]\n",
1599 | " elif batting_team == 'Sunrisers Hyderabad':\n",
1600 | " temp_array = temp_array + [0,0,0,0,0,0,0,1]\n",
1601 | "\n",
1602 | " # Bowling Team\n",
1603 | " if bowling_team == 'Chennai Super Kings':\n",
1604 | " temp_array = temp_array + [1,0,0,0,0,0,0,0]\n",
1605 | " elif bowling_team == 'Delhi Daredevils':\n",
1606 | " temp_array = temp_array + [0,1,0,0,0,0,0,0]\n",
1607 | " elif bowling_team == 'Kings XI Punjab':\n",
1608 | " temp_array = temp_array + [0,0,1,0,0,0,0,0]\n",
1609 | " elif bowling_team == 'Kolkata Knight Riders':\n",
1610 | " temp_array = temp_array + [0,0,0,1,0,0,0,0]\n",
1611 | " elif bowling_team == 'Mumbai Indians':\n",
1612 | " temp_array = temp_array + [0,0,0,0,1,0,0,0]\n",
1613 | " elif bowling_team == 'Rajasthan Royals':\n",
1614 | " temp_array = temp_array + [0,0,0,0,0,1,0,0]\n",
1615 | " elif bowling_team == 'Royal Challengers Bangalore':\n",
1616 | " temp_array = temp_array + [0,0,0,0,0,0,1,0]\n",
1617 | " elif bowling_team == 'Sunrisers Hyderabad':\n",
1618 | " temp_array = temp_array + [0,0,0,0,0,0,0,1]\n",
1619 | "\n",
1620 | " # Overs, Runs, Wickets, Runs_in_prev_5, Wickets_in_prev_5\n",
1621 | " temp_array = temp_array + [overs, runs, wickets, runs_in_prev_5, wickets_in_prev_5]\n",
1622 | "\n",
1623 | " # Converting into numpy array\n",
1624 | " temp_array = np.array([temp_array])\n",
1625 | "\n",
1626 | " # Prediction\n",
1627 | " return int(linear_regressor.predict(temp_array)[0])"
1628 | ],
1629 | "execution_count": 35,
1630 | "outputs": []
1631 | },
1632 | {
1633 | "cell_type": "markdown",
1634 | "metadata": {
1635 | "id": "lRZQW-YGS141",
1636 | "colab_type": "text"
1637 | },
1638 | "source": [
1639 | "### **Prediction 1**\n",
1640 | "• Date: 16th April 2018
\n",
1641 | "• IPL : Season 11
\n",
1642 | "• Match number: 13
\n",
1643 | "• Teams: Kolkata Knight Riders vs. Delhi Daredevils
\n",
1644 | "• First Innings final score: 200/9\n"
1645 | ]
1646 | },
1647 | {
1648 | "cell_type": "code",
1649 | "metadata": {
1650 | "id": "fhgissiE10Bx",
1651 | "colab_type": "code",
1652 | "colab": {
1653 | "base_uri": "https://localhost:8080/",
1654 | "height": 34
1655 | },
1656 | "outputId": "94bfaafb-6305-4187-c813-9c2c9af0a3d9"
1657 | },
1658 | "source": [
1659 | "final_score = predict_score(batting_team='Kolkata Knight Riders', bowling_team='Delhi Daredevils', overs=9.2, runs=79, wickets=2, runs_in_prev_5=60, wickets_in_prev_5=1)\n",
1660 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1661 | ],
1662 | "execution_count": 36,
1663 | "outputs": [
1664 | {
1665 | "output_type": "stream",
1666 | "text": [
1667 | "The final predicted score (range): 159 to 174\n"
1668 | ],
1669 | "name": "stdout"
1670 | }
1671 | ]
1672 | },
1673 | {
1674 | "cell_type": "markdown",
1675 | "metadata": {
1676 | "colab_type": "text",
1677 | "id": "IMAYg53PfbLm"
1678 | },
1679 | "source": [
1680 | "### **Prediction 2**\n",
1681 | "• Date: 7th May 2018
\n",
1682 | "• IPL : Season 11
\n",
1683 | "• Match number: 39
\n",
1684 | "• Teams: Sunrisers Hyderabad vs. Royal Challengers Bangalore
\n",
1685 | "• First Innings final score: 146/10\n"
1686 | ]
1687 | },
1688 | {
1689 | "cell_type": "code",
1690 | "metadata": {
1691 | "id": "eVtlLk3afeDT",
1692 | "colab_type": "code",
1693 | "colab": {
1694 | "base_uri": "https://localhost:8080/",
1695 | "height": 34
1696 | },
1697 | "outputId": "62562997-ce49-41c5-8341-430965293e35"
1698 | },
1699 | "source": [
1700 | "final_score = predict_score(batting_team='Sunrisers Hyderabad', bowling_team='Royal Challengers Bangalore', overs=10.5, runs=67, wickets=3, runs_in_prev_5=29, wickets_in_prev_5=1)\n",
1701 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1702 | ],
1703 | "execution_count": 37,
1704 | "outputs": [
1705 | {
1706 | "output_type": "stream",
1707 | "text": [
1708 | "The final predicted score (range): 138 to 153\n"
1709 | ],
1710 | "name": "stdout"
1711 | }
1712 | ]
1713 | },
1714 | {
1715 | "cell_type": "markdown",
1716 | "metadata": {
1717 | "colab_type": "text",
1718 | "id": "M-3FC7VhUzdK"
1719 | },
1720 | "source": [
1721 | "### **Prediction 3**\n",
1722 | "• Date: 17th May 2018
\n",
1723 | "• IPL : Season 11
\n",
1724 | "• Match number: 50
\n",
1725 | "• Teams: Mumbai Indians vs. Kings XI Punjab
\n",
1726 | "• First Innings final score: 186/8
\n"
1727 | ]
1728 | },
1729 | {
1730 | "cell_type": "code",
1731 | "metadata": {
1732 | "id": "CVgb01MY29NQ",
1733 | "colab_type": "code",
1734 | "colab": {
1735 | "base_uri": "https://localhost:8080/",
1736 | "height": 34
1737 | },
1738 | "outputId": "a1a3dc25-0ba3-489d-ff65-a4a3c4829c22"
1739 | },
1740 | "source": [
1741 | "final_score = predict_score(batting_team='Mumbai Indians', bowling_team='Kings XI Punjab', overs=14.1, runs=136, wickets=4, runs_in_prev_5=50, wickets_in_prev_5=0)\n",
1742 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1743 | ],
1744 | "execution_count": 38,
1745 | "outputs": [
1746 | {
1747 | "output_type": "stream",
1748 | "text": [
1749 | "The final predicted score (range): 180 to 195\n"
1750 | ],
1751 | "name": "stdout"
1752 | }
1753 | ]
1754 | },
1755 | {
1756 | "cell_type": "markdown",
1757 | "metadata": {
1758 | "colab_type": "text",
1759 | "id": "Ix3XW3y2iPgc"
1760 | },
1761 | "source": [
1762 | "### **Prediction 4**\n",
1763 | "• Date: 30th March 2019
\n",
1764 | "• IPL : Season 12
\n",
1765 | "• Match number: 9
\n",
1766 | "• Teams: Mumbai Indians vs. Kings XI Punjab
\n",
1767 | "• First Innings final score: 176/7\n"
1768 | ]
1769 | },
1770 | {
1771 | "cell_type": "code",
1772 | "metadata": {
1773 | "id": "FUWG9gsviudD",
1774 | "colab_type": "code",
1775 | "colab": {
1776 | "base_uri": "https://localhost:8080/",
1777 | "height": 34
1778 | },
1779 | "outputId": "eaf6b5c4-d1ef-4f13-9ba4-8457fbe6dd5d"
1780 | },
1781 | "source": [
1782 | "final_score = predict_score(batting_team='Mumbai Indians', bowling_team='Kings XI Punjab', overs=12.3, runs=113, wickets=2, runs_in_prev_5=55, wickets_in_prev_5=0)\n",
1783 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1784 | ],
1785 | "execution_count": 39,
1786 | "outputs": [
1787 | {
1788 | "output_type": "stream",
1789 | "text": [
1790 | "The final predicted score (range): 179 to 194\n"
1791 | ],
1792 | "name": "stdout"
1793 | }
1794 | ]
1795 | },
1796 | {
1797 | "cell_type": "markdown",
1798 | "metadata": {
1799 | "colab_type": "text",
1800 | "id": "pNs2zIxlW6Ou"
1801 | },
1802 | "source": [
1803 | "### **Prediction 5**\n",
1804 | "• Date: 11th April 2019
\n",
1805 | "• IPL : Season 12
\n",
1806 | "• Match number: 25
\n",
1807 | "• Teams: Rajasthan Royals vs. Chennai Super Kings
\n",
1808 | "• First Innings final score: 151/7\n"
1809 | ]
1810 | },
1811 | {
1812 | "cell_type": "code",
1813 | "metadata": {
1814 | "colab_type": "code",
1815 | "id": "hByMrV6l29YV",
1816 | "colab": {
1817 | "base_uri": "https://localhost:8080/",
1818 | "height": 34
1819 | },
1820 | "outputId": "1cff54c2-0e7b-46f9-dd02-4437bd0d4028"
1821 | },
1822 | "source": [
1823 | "final_score = predict_score(batting_team='Rajasthan Royals', bowling_team='Chennai Super Kings', overs=13.3, runs=92, wickets=5, runs_in_prev_5=27, wickets_in_prev_5=2)\n",
1824 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1825 | ],
1826 | "execution_count": 40,
1827 | "outputs": [
1828 | {
1829 | "output_type": "stream",
1830 | "text": [
1831 | "The final predicted score (range): 128 to 143\n"
1832 | ],
1833 | "name": "stdout"
1834 | }
1835 | ]
1836 | },
1837 | {
1838 | "cell_type": "markdown",
1839 | "metadata": {
1840 | "colab_type": "text",
1841 | "id": "hYLKJMHShBn8"
1842 | },
1843 | "source": [
1844 | "### **Prediction 6**\n",
1845 | "• Date: 14th April 2019
\n",
1846 | "• IPL : Season 12
\n",
1847 | "• Match number: 30
\n",
1848 | "• Teams: Sunrisers Hyderabad vs. Delhi Daredevils
\n",
1849 | "• First Innings final score: 155/7\n"
1850 | ]
1851 | },
1852 | {
1853 | "cell_type": "code",
1854 | "metadata": {
1855 | "id": "dAmNR2WLhD2F",
1856 | "colab_type": "code",
1857 | "colab": {
1858 | "base_uri": "https://localhost:8080/",
1859 | "height": 34
1860 | },
1861 | "outputId": "f6d18cd0-5eeb-4999-a6d9-9b303324fa2a"
1862 | },
1863 | "source": [
1864 | "final_score = predict_score(batting_team='Delhi Daredevils', bowling_team='Sunrisers Hyderabad', overs=11.5, runs=98, wickets=3, runs_in_prev_5=41, wickets_in_prev_5=1)\n",
1865 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1866 | ],
1867 | "execution_count": 41,
1868 | "outputs": [
1869 | {
1870 | "output_type": "stream",
1871 | "text": [
1872 | "The final predicted score (range): 157 to 172\n"
1873 | ],
1874 | "name": "stdout"
1875 | }
1876 | ]
1877 | },
1878 | {
1879 | "cell_type": "markdown",
1880 | "metadata": {
1881 | "colab_type": "text",
1882 | "id": "zxjq482uaQpc"
1883 | },
1884 | "source": [
1885 | "### **Prediction 7**\n",
1886 | "• Date: 10th May 2019
\n",
1887 | "• IPL : Season 12
\n",
1888 | "• Match number: 59 (Eliminator)
\n",
1889 | "• Teams: Delhi Daredevils vs. Chennai Super Kings
\n",
1890 | "• First Innings final score: 147/9\n"
1891 | ]
1892 | },
1893 | {
1894 | "cell_type": "code",
1895 | "metadata": {
1896 | "id": "UR6QNPK_aSSj",
1897 | "colab_type": "code",
1898 | "colab": {
1899 | "base_uri": "https://localhost:8080/",
1900 | "height": 34
1901 | },
1902 | "outputId": "1afc6b5b-d1bc-4f55-9c61-362e31cb4e80"
1903 | },
1904 | "source": [
1905 | "final_score = predict_score(batting_team='Delhi Daredevils', bowling_team='Chennai Super Kings', overs=10.2, runs=68, wickets=3, runs_in_prev_5=29, wickets_in_prev_5=1)\n",
1906 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))"
1907 | ],
1908 | "execution_count": 42,
1909 | "outputs": [
1910 | {
1911 | "output_type": "stream",
1912 | "text": [
1913 | "The final predicted score (range): 137 to 152\n"
1914 | ],
1915 | "name": "stdout"
1916 | }
1917 | ]
1918 | },
1919 | {
1920 | "cell_type": "markdown",
1921 | "metadata": {
1922 | "id": "ZaEK5rEmjp8K",
1923 | "colab_type": "text"
1924 | },
1925 | "source": [
1926 | "*Note: In IPL, it is very difficult to predict the actual score because in a moment of time the game can completely turn upside down!*\n"
1927 | ]
1928 | }
1929 | ]
1930 | }
--------------------------------------------------------------------------------