├── README.md └── First Innings Score Prediction - IPL.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # IPL-score-prediction -------------------------------------------------------------------------------- /First Innings Score Prediction - IPL.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "name": "First Innings Score Predictor - IPL.ipynb", 7 | "provenance": [], 8 | "collapsed_sections": [], 9 | "toc_visible": true 10 | }, 11 | "kernelspec": { 12 | "name": "python3", 13 | "display_name": "Python 3" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "code", 19 | "metadata": { 20 | "id": "Ye3oEMGgcWhp", 21 | "colab_type": "code", 22 | "colab": {} 23 | }, 24 | "source": [ 25 | "# Importing essential libraries\n", 26 | "import pandas as pd\n", 27 | "import numpy as np" 28 | ], 29 | "execution_count": 1, 30 | "outputs": [] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "metadata": { 35 | "id": "Z8utzElrdPub", 36 | "colab_type": "code", 37 | "colab": {} 38 | }, 39 | "source": [ 40 | "# Loading the dataset\n", 41 | "df = pd.read_csv('ipl.csv')" 42 | ], 43 | "execution_count": 2, 44 | "outputs": [] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": { 49 | "id": "5vWE2y4ldZC4", 50 | "colab_type": "text" 51 | }, 52 | "source": [ 53 | "## **Exploring the dataset**" 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "metadata": { 59 | "id": "LEkrx9IWdYHh", 60 | "colab_type": "code", 61 | "colab": { 62 | "base_uri": "https://localhost:8080/", 63 | "height": 87 64 | }, 65 | "outputId": "71a453fa-df59-40b6-81b5-3407a240b3f0" 66 | }, 67 | "source": [ 68 | "df.columns" 69 | ], 70 | "execution_count": 3, 71 | "outputs": [ 72 | { 73 | "output_type": "execute_result", 74 | "data": { 75 | "text/plain": [ 76 | "Index(['mid', 'date', 'venue', 'bat_team', 'bowl_team', 'batsman', 'bowler',\n", 77 | " 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5', 'striker',\n", 78 | " 'non-striker', 'total'],\n", 79 | " dtype='object')" 80 | ] 81 | }, 82 | "metadata": { 83 | "tags": [] 84 | }, 85 | "execution_count": 3 86 | } 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "metadata": { 92 | "id": "3OZDpVGvddx6", 93 | "colab_type": "code", 94 | "colab": { 95 | "base_uri": "https://localhost:8080/", 96 | "height": 34 97 | }, 98 | "outputId": "14bdb7d8-4de8-42d5-820e-a4151ff3533d" 99 | }, 100 | "source": [ 101 | "df.shape" 102 | ], 103 | "execution_count": 4, 104 | "outputs": [ 105 | { 106 | "output_type": "execute_result", 107 | "data": { 108 | "text/plain": [ 109 | "(76014, 15)" 110 | ] 111 | }, 112 | "metadata": { 113 | "tags": [] 114 | }, 115 | "execution_count": 4 116 | } 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "metadata": { 122 | "id": "JNodK4n7dpep", 123 | "colab_type": "code", 124 | "colab": { 125 | "base_uri": "https://localhost:8080/", 126 | "height": 301 127 | }, 128 | "outputId": "55a278d8-cbc8-467a-892f-c33ed14fd8c1" 129 | }, 130 | "source": [ 131 | "df.dtypes" 132 | ], 133 | "execution_count": 5, 134 | "outputs": [ 135 | { 136 | "output_type": "execute_result", 137 | "data": { 138 | "text/plain": [ 139 | "mid int64\n", 140 | "date object\n", 141 | "venue object\n", 142 | "bat_team object\n", 143 | "bowl_team object\n", 144 | "batsman object\n", 145 | "bowler object\n", 146 | "runs int64\n", 147 | "wickets int64\n", 148 | "overs float64\n", 149 | "runs_last_5 int64\n", 150 | "wickets_last_5 int64\n", 151 | "striker int64\n", 152 | "non-striker int64\n", 153 | "total int64\n", 154 | "dtype: object" 155 | ] 156 | }, 157 | "metadata": { 158 | "tags": [] 159 | }, 160 | "execution_count": 5 161 | } 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "metadata": { 167 | "id": "p8VJefH9drgh", 168 | "colab_type": "code", 169 | "colab": { 170 | "base_uri": "https://localhost:8080/", 171 | "height": 381 172 | }, 173 | "outputId": "1bd26e10-6d71-4e6d-c747-9a4d014f1adc" 174 | }, 175 | "source": [ 176 | "df.head()" 177 | ], 178 | "execution_count": 6, 179 | "outputs": [ 180 | { 181 | "output_type": "execute_result", 182 | "data": { 183 | "text/html": [ 184 | "
\n", 185 | "\n", 198 | "\n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | "
middatevenuebat_teambowl_teambatsmanbowlerrunswicketsoversruns_last_5wickets_last_5strikernon-strikertotal
012008-04-18M Chinnaswamy StadiumKolkata Knight RidersRoyal Challengers BangaloreSC GangulyP Kumar100.11000222
112008-04-18M Chinnaswamy StadiumKolkata Knight RidersRoyal Challengers BangaloreBB McCullumP Kumar100.21000222
212008-04-18M Chinnaswamy StadiumKolkata Knight RidersRoyal Challengers BangaloreBB McCullumP Kumar200.22000222
312008-04-18M Chinnaswamy StadiumKolkata Knight RidersRoyal Challengers BangaloreBB McCullumP Kumar200.32000222
412008-04-18M Chinnaswamy StadiumKolkata Knight RidersRoyal Challengers BangaloreBB McCullumP Kumar200.42000222
\n", 312 | "
" 313 | ], 314 | "text/plain": [ 315 | " mid date venue ... striker non-striker total\n", 316 | "0 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n", 317 | "1 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n", 318 | "2 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n", 319 | "3 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n", 320 | "4 1 2008-04-18 M Chinnaswamy Stadium ... 0 0 222\n", 321 | "\n", 322 | "[5 rows x 15 columns]" 323 | ] 324 | }, 325 | "metadata": { 326 | "tags": [] 327 | }, 328 | "execution_count": 6 329 | } 330 | ] 331 | }, 332 | { 333 | "cell_type": "markdown", 334 | "metadata": { 335 | "id": "srvJazxud7BB", 336 | "colab_type": "text" 337 | }, 338 | "source": [ 339 | "## **Data Cleaning**\n", 340 | "Points covered under this section:
\n", 341 | "*• Removing unwanted columns*
\n", 342 | "*• Keeping only consistent teams*
\n", 343 | "*• Removing the first 5 overs data in every match*
\n", 344 | "*• Converting the column 'date' from string into datetime object*
" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "metadata": { 350 | "id": "t3w2hRVbekfq", 351 | "colab_type": "code", 352 | "colab": { 353 | "base_uri": "https://localhost:8080/", 354 | "height": 87 355 | }, 356 | "outputId": "dbc88fd6-19ed-43b1-94b2-8c19bb9bbd6a" 357 | }, 358 | "source": [ 359 | "df.columns" 360 | ], 361 | "execution_count": 7, 362 | "outputs": [ 363 | { 364 | "output_type": "execute_result", 365 | "data": { 366 | "text/plain": [ 367 | "Index(['mid', 'date', 'venue', 'bat_team', 'bowl_team', 'batsman', 'bowler',\n", 368 | " 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5', 'striker',\n", 369 | " 'non-striker', 'total'],\n", 370 | " dtype='object')" 371 | ] 372 | }, 373 | "metadata": { 374 | "tags": [] 375 | }, 376 | "execution_count": 7 377 | } 378 | ] 379 | }, 380 | { 381 | "cell_type": "code", 382 | "metadata": { 383 | "id": "bOyVrvsSdyFx", 384 | "colab_type": "code", 385 | "colab": { 386 | "base_uri": "https://localhost:8080/", 387 | "height": 52 388 | }, 389 | "outputId": "4a70f21e-7f2d-439a-bfb1-d0f7037cca49" 390 | }, 391 | "source": [ 392 | "# Removing unwanted columns\n", 393 | "columns_to_remove = ['mid', 'venue', 'batsman', 'bowler', 'striker', 'non-striker']\n", 394 | "\n", 395 | "print('Before removing unwanted columns: {}'.format(df.shape))\n", 396 | "df.drop(labels=columns_to_remove, axis=1, inplace=True)\n", 397 | "print('After removing unwanted columns: {}'.format(df.shape))" 398 | ], 399 | "execution_count": 8, 400 | "outputs": [ 401 | { 402 | "output_type": "stream", 403 | "text": [ 404 | "Before removing unwanted columns: (76014, 15)\n", 405 | "After removing unwanted columns: (76014, 9)\n" 406 | ], 407 | "name": "stdout" 408 | } 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "metadata": { 414 | "id": "VeYhEaB6fQdC", 415 | "colab_type": "code", 416 | "colab": { 417 | "base_uri": "https://localhost:8080/", 418 | "height": 70 419 | }, 420 | "outputId": "93c091c8-16ce-47be-a79b-46e5dbe1f691" 421 | }, 422 | "source": [ 423 | "df.columns" 424 | ], 425 | "execution_count": 9, 426 | "outputs": [ 427 | { 428 | "output_type": "execute_result", 429 | "data": { 430 | "text/plain": [ 431 | "Index(['date', 'bat_team', 'bowl_team', 'runs', 'wickets', 'overs',\n", 432 | " 'runs_last_5', 'wickets_last_5', 'total'],\n", 433 | " dtype='object')" 434 | ] 435 | }, 436 | "metadata": { 437 | "tags": [] 438 | }, 439 | "execution_count": 9 440 | } 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "metadata": { 446 | "id": "FCfuyal8hvXt", 447 | "colab_type": "code", 448 | "colab": { 449 | "base_uri": "https://localhost:8080/", 450 | "height": 197 451 | }, 452 | "outputId": "b128f1c8-dd1d-4283-bd84-f371fc50982b" 453 | }, 454 | "source": [ 455 | "df.head()" 456 | ], 457 | "execution_count": 10, 458 | "outputs": [ 459 | { 460 | "output_type": "execute_result", 461 | "data": { 462 | "text/html": [ 463 | "
\n", 464 | "\n", 477 | "\n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | "
datebat_teambowl_teamrunswicketsoversruns_last_5wickets_last_5total
02008-04-18Kolkata Knight RidersRoyal Challengers Bangalore100.110222
12008-04-18Kolkata Knight RidersRoyal Challengers Bangalore100.210222
22008-04-18Kolkata Knight RidersRoyal Challengers Bangalore200.220222
32008-04-18Kolkata Knight RidersRoyal Challengers Bangalore200.320222
42008-04-18Kolkata Knight RidersRoyal Challengers Bangalore200.420222
\n", 555 | "
" 556 | ], 557 | "text/plain": [ 558 | " date bat_team ... wickets_last_5 total\n", 559 | "0 2008-04-18 Kolkata Knight Riders ... 0 222\n", 560 | "1 2008-04-18 Kolkata Knight Riders ... 0 222\n", 561 | "2 2008-04-18 Kolkata Knight Riders ... 0 222\n", 562 | "3 2008-04-18 Kolkata Knight Riders ... 0 222\n", 563 | "4 2008-04-18 Kolkata Knight Riders ... 0 222\n", 564 | "\n", 565 | "[5 rows x 9 columns]" 566 | ] 567 | }, 568 | "metadata": { 569 | "tags": [] 570 | }, 571 | "execution_count": 10 572 | } 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "metadata": { 578 | "id": "W2jQTWJhmIrt", 579 | "colab_type": "code", 580 | "colab": { 581 | "base_uri": "https://localhost:8080/", 582 | "height": 34 583 | }, 584 | "outputId": "23463ddc-6c24-43f7-d60d-f43f64ad1a6b" 585 | }, 586 | "source": [ 587 | "df.index" 588 | ], 589 | "execution_count": 11, 590 | "outputs": [ 591 | { 592 | "output_type": "execute_result", 593 | "data": { 594 | "text/plain": [ 595 | "RangeIndex(start=0, stop=76014, step=1)" 596 | ] 597 | }, 598 | "metadata": { 599 | "tags": [] 600 | }, 601 | "execution_count": 11 602 | } 603 | ] 604 | }, 605 | { 606 | "cell_type": "code", 607 | "metadata": { 608 | "id": "HD-7eP27iWab", 609 | "colab_type": "code", 610 | "colab": { 611 | "base_uri": "https://localhost:8080/", 612 | "height": 123 613 | }, 614 | "outputId": "2477db91-0a4e-41e3-9689-2abf870a2d4d" 615 | }, 616 | "source": [ 617 | "df['bat_team'].unique()" 618 | ], 619 | "execution_count": 12, 620 | "outputs": [ 621 | { 622 | "output_type": "execute_result", 623 | "data": { 624 | "text/plain": [ 625 | "array(['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',\n", 626 | " 'Mumbai Indians', 'Deccan Chargers', 'Kings XI Punjab',\n", 627 | " 'Royal Challengers Bangalore', 'Delhi Daredevils',\n", 628 | " 'Kochi Tuskers Kerala', 'Pune Warriors', 'Sunrisers Hyderabad',\n", 629 | " 'Rising Pune Supergiants', 'Gujarat Lions',\n", 630 | " 'Rising Pune Supergiant'], dtype=object)" 631 | ] 632 | }, 633 | "metadata": { 634 | "tags": [] 635 | }, 636 | "execution_count": 12 637 | } 638 | ] 639 | }, 640 | { 641 | "cell_type": "code", 642 | "metadata": { 643 | "id": "VcC_Crmhih1r", 644 | "colab_type": "code", 645 | "colab": {} 646 | }, 647 | "source": [ 648 | "consistent_teams = ['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',\n", 649 | " 'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',\n", 650 | " 'Delhi Daredevils', 'Sunrisers Hyderabad']" 651 | ], 652 | "execution_count": 13, 653 | "outputs": [] 654 | }, 655 | { 656 | "cell_type": "code", 657 | "metadata": { 658 | "id": "MXmAt3Colmdu", 659 | "colab_type": "code", 660 | "colab": { 661 | "base_uri": "https://localhost:8080/", 662 | "height": 52 663 | }, 664 | "outputId": "9874be5e-a71f-418a-8d90-45268c6c634f" 665 | }, 666 | "source": [ 667 | "# Keeping only consistent teams\n", 668 | "print('Before removing inconsistent teams: {}'.format(df.shape))\n", 669 | "df = df[(df['bat_team'].isin(consistent_teams)) & (df['bowl_team'].isin(consistent_teams))]\n", 670 | "print('After removing inconsistent teams: {}'.format(df.shape))" 671 | ], 672 | "execution_count": 14, 673 | "outputs": [ 674 | { 675 | "output_type": "stream", 676 | "text": [ 677 | "Before removing inconsistent teams: (76014, 9)\n", 678 | "After removing inconsistent teams: (53811, 9)\n" 679 | ], 680 | "name": "stdout" 681 | } 682 | ] 683 | }, 684 | { 685 | "cell_type": "code", 686 | "metadata": { 687 | "id": "AB8uOLpHoKtB", 688 | "colab_type": "code", 689 | "colab": { 690 | "base_uri": "https://localhost:8080/", 691 | "height": 70 692 | }, 693 | "outputId": "89633380-9611-4552-e964-2d7fdbe8839c" 694 | }, 695 | "source": [ 696 | "df['bat_team'].unique()" 697 | ], 698 | "execution_count": 15, 699 | "outputs": [ 700 | { 701 | "output_type": "execute_result", 702 | "data": { 703 | "text/plain": [ 704 | "array(['Kolkata Knight Riders', 'Chennai Super Kings', 'Rajasthan Royals',\n", 705 | " 'Mumbai Indians', 'Kings XI Punjab', 'Royal Challengers Bangalore',\n", 706 | " 'Delhi Daredevils', 'Sunrisers Hyderabad'], dtype=object)" 707 | ] 708 | }, 709 | "metadata": { 710 | "tags": [] 711 | }, 712 | "execution_count": 15 713 | } 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "metadata": { 719 | "id": "HVouIS8QqFwX", 720 | "colab_type": "code", 721 | "colab": { 722 | "base_uri": "https://localhost:8080/", 723 | "height": 52 724 | }, 725 | "outputId": "5cf9861c-e3e4-41c4-937e-bf1de77c6556" 726 | }, 727 | "source": [ 728 | "# Removing the first 5 overs data in every match\n", 729 | "print('Before removing first 5 overs data: {}'.format(df.shape))\n", 730 | "df = df[df['overs']>=5.0]\n", 731 | "print('After removing first 5 overs data: {}'.format(df.shape))" 732 | ], 733 | "execution_count": 16, 734 | "outputs": [ 735 | { 736 | "output_type": "stream", 737 | "text": [ 738 | "Before removing first 5 overs data: (53811, 9)\n", 739 | "After removing first 5 overs data: (40108, 9)\n" 740 | ], 741 | "name": "stdout" 742 | } 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "metadata": { 748 | "id": "J_R7UNqkf3Mt", 749 | "colab_type": "code", 750 | "colab": { 751 | "base_uri": "https://localhost:8080/", 752 | "height": 52 753 | }, 754 | "outputId": "e2383f98-4771-41d9-c6a4-ebd96a7aa575" 755 | }, 756 | "source": [ 757 | "# Converting the column 'date' from string into datetime object\n", 758 | "from datetime import datetime\n", 759 | "print(\"Before converting 'date' column from string to datetime object: {}\".format(type(df.iloc[0,0])))\n", 760 | "df['date'] = df['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))\n", 761 | "print(\"After converting 'date' column from string to datetime object: {}\".format(type(df.iloc[0,0])))" 762 | ], 763 | "execution_count": 17, 764 | "outputs": [ 765 | { 766 | "output_type": "stream", 767 | "text": [ 768 | "Before converting 'date' column from string to datetime object: \n", 769 | "After converting 'date' column from string to datetime object: \n" 770 | ], 771 | "name": "stdout" 772 | } 773 | ] 774 | }, 775 | { 776 | "cell_type": "code", 777 | "metadata": { 778 | "id": "try0O4SSxjBE", 779 | "colab_type": "code", 780 | "colab": { 781 | "base_uri": "https://localhost:8080/", 782 | "height": 651 783 | }, 784 | "outputId": "3bde5d22-653f-49a4-965b-f992b2a6374d" 785 | }, 786 | "source": [ 787 | "# Selecting correlated features using Heatmap\n", 788 | "import matplotlib.pyplot as plt\n", 789 | "import seaborn as sns\n", 790 | "\n", 791 | "# Get correlation of all the features of the dataset\n", 792 | "corr_matrix = df.corr()\n", 793 | "top_corr_features = corr_matrix.index\n", 794 | "\n", 795 | "# Plotting the heatmap\n", 796 | "plt.figure(figsize=(13,10))\n", 797 | "g = sns.heatmap(data=df[top_corr_features].corr(), annot=True, cmap='RdYlGn')" 798 | ], 799 | "execution_count": 18, 800 | "outputs": [ 801 | { 802 | "output_type": "stream", 803 | "text": [ 804 | "/usr/local/lib/python3.6/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n", 805 | " import pandas.util.testing as tm\n" 806 | ], 807 | "name": "stderr" 808 | }, 809 | { 810 | "output_type": "display_data", 811 | "data": { 812 | "image/png": "\n", 813 | "text/plain": [ 814 | "
" 815 | ] 816 | }, 817 | "metadata": { 818 | "tags": [], 819 | "needs_background": "light" 820 | } 821 | } 822 | ] 823 | }, 824 | { 825 | "cell_type": "markdown", 826 | "metadata": { 827 | "id": "o9Qt4bOGpKh3", 828 | "colab_type": "text" 829 | }, 830 | "source": [ 831 | "## **Data Preprocessing**\n", 832 | "*• Handling categorical features*
\n", 833 | "*• Splitting dataset into train and test set on the basis of date*
" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "metadata": { 839 | "id": "ZpJLe1YVfrr6", 840 | "colab_type": "code", 841 | "colab": { 842 | "base_uri": "https://localhost:8080/", 843 | "height": 212 844 | }, 845 | "outputId": "ced0b921-0d9e-4f51-f3ce-841d16fcff0f" 846 | }, 847 | "source": [ 848 | "# Converting categorical features using OneHotEncoding method\n", 849 | "encoded_df = pd.get_dummies(data=df, columns=['bat_team', 'bowl_team'])\n", 850 | "encoded_df.columns" 851 | ], 852 | "execution_count": 19, 853 | "outputs": [ 854 | { 855 | "output_type": "execute_result", 856 | "data": { 857 | "text/plain": [ 858 | "Index(['date', 'runs', 'wickets', 'overs', 'runs_last_5', 'wickets_last_5',\n", 859 | " 'total', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils',\n", 860 | " 'bat_team_Kings XI Punjab', 'bat_team_Kolkata Knight Riders',\n", 861 | " 'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',\n", 862 | " 'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',\n", 863 | " 'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils',\n", 864 | " 'bowl_team_Kings XI Punjab', 'bowl_team_Kolkata Knight Riders',\n", 865 | " 'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',\n", 866 | " 'bowl_team_Royal Challengers Bangalore',\n", 867 | " 'bowl_team_Sunrisers Hyderabad'],\n", 868 | " dtype='object')" 869 | ] 870 | }, 871 | "metadata": { 872 | "tags": [] 873 | }, 874 | "execution_count": 19 875 | } 876 | ] 877 | }, 878 | { 879 | "cell_type": "code", 880 | "metadata": { 881 | "id": "ZtrtRO7AiLPz", 882 | "colab_type": "code", 883 | "colab": { 884 | "base_uri": "https://localhost:8080/", 885 | "height": 334 886 | }, 887 | "outputId": "925d1844-a1b0-49ac-bced-79940195d950" 888 | }, 889 | "source": [ 890 | "encoded_df.head()" 891 | ], 892 | "execution_count": 20, 893 | "outputs": [ 894 | { 895 | "output_type": "execute_result", 896 | "data": { 897 | "text/html": [ 898 | "
\n", 899 | "\n", 912 | "\n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | " \n", 973 | " \n", 974 | " \n", 975 | " \n", 976 | " \n", 977 | " \n", 978 | " \n", 979 | " \n", 980 | " \n", 981 | " \n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | "
daterunswicketsoversruns_last_5wickets_last_5totalbat_team_Chennai Super Kingsbat_team_Delhi Daredevilsbat_team_Kings XI Punjabbat_team_Kolkata Knight Ridersbat_team_Mumbai Indiansbat_team_Rajasthan Royalsbat_team_Royal Challengers Bangalorebat_team_Sunrisers Hyderabadbowl_team_Chennai Super Kingsbowl_team_Delhi Daredevilsbowl_team_Kings XI Punjabbowl_team_Kolkata Knight Ridersbowl_team_Mumbai Indiansbowl_team_Rajasthan Royalsbowl_team_Royal Challengers Bangalorebowl_team_Sunrisers Hyderabad
322008-04-186105.15902220001000000000010
332008-04-186115.25912220001000000000010
342008-04-186115.35912220001000000000010
352008-04-186115.45912220001000000000010
362008-04-186115.55812220001000000000010
\n", 1074 | "
" 1075 | ], 1076 | "text/plain": [ 1077 | " date ... bowl_team_Sunrisers Hyderabad\n", 1078 | "32 2008-04-18 ... 0\n", 1079 | "33 2008-04-18 ... 0\n", 1080 | "34 2008-04-18 ... 0\n", 1081 | "35 2008-04-18 ... 0\n", 1082 | "36 2008-04-18 ... 0\n", 1083 | "\n", 1084 | "[5 rows x 23 columns]" 1085 | ] 1086 | }, 1087 | "metadata": { 1088 | "tags": [] 1089 | }, 1090 | "execution_count": 20 1091 | } 1092 | ] 1093 | }, 1094 | { 1095 | "cell_type": "code", 1096 | "metadata": { 1097 | "id": "dVj9eyGJj5-J", 1098 | "colab_type": "code", 1099 | "colab": {} 1100 | }, 1101 | "source": [ 1102 | "# Rearranging the columns\n", 1103 | "encoded_df = encoded_df[['date', 'bat_team_Chennai Super Kings', 'bat_team_Delhi Daredevils', 'bat_team_Kings XI Punjab',\n", 1104 | " 'bat_team_Kolkata Knight Riders', 'bat_team_Mumbai Indians', 'bat_team_Rajasthan Royals',\n", 1105 | " 'bat_team_Royal Challengers Bangalore', 'bat_team_Sunrisers Hyderabad',\n", 1106 | " 'bowl_team_Chennai Super Kings', 'bowl_team_Delhi Daredevils', 'bowl_team_Kings XI Punjab',\n", 1107 | " 'bowl_team_Kolkata Knight Riders', 'bowl_team_Mumbai Indians', 'bowl_team_Rajasthan Royals',\n", 1108 | " 'bowl_team_Royal Challengers Bangalore', 'bowl_team_Sunrisers Hyderabad',\n", 1109 | " 'overs', 'runs', 'wickets', 'runs_last_5', 'wickets_last_5', 'total']]" 1110 | ], 1111 | "execution_count": 21, 1112 | "outputs": [] 1113 | }, 1114 | { 1115 | "cell_type": "code", 1116 | "metadata": { 1117 | "id": "8raq2VVXeHyr", 1118 | "colab_type": "code", 1119 | "colab": { 1120 | "base_uri": "https://localhost:8080/", 1121 | "height": 34 1122 | }, 1123 | "outputId": "c89b9399-6397-438f-cd3c-3e7ca0ae63ee" 1124 | }, 1125 | "source": [ 1126 | "# Splitting the data into train and test set\n", 1127 | "X_train = encoded_df.drop(labels='total', axis=1)[encoded_df['date'].dt.year <= 2016]\n", 1128 | "X_test = encoded_df.drop(labels='total', axis=1)[encoded_df['date'].dt.year >= 2017]\n", 1129 | "\n", 1130 | "y_train = encoded_df[encoded_df['date'].dt.year <= 2016]['total'].values\n", 1131 | "y_test = encoded_df[encoded_df['date'].dt.year >= 2017]['total'].values\n", 1132 | "\n", 1133 | "# Removing the 'date' column\n", 1134 | "X_train.drop(labels='date', axis=True, inplace=True)\n", 1135 | "X_test.drop(labels='date', axis=True, inplace=True)\n", 1136 | "\n", 1137 | "print(\"Training set: {} and Test set: {}\".format(X_train.shape, X_test.shape))" 1138 | ], 1139 | "execution_count": 22, 1140 | "outputs": [ 1141 | { 1142 | "output_type": "stream", 1143 | "text": [ 1144 | "Training set: (37330, 21) and Test set: (2778, 21)\n" 1145 | ], 1146 | "name": "stdout" 1147 | } 1148 | ] 1149 | }, 1150 | { 1151 | "cell_type": "markdown", 1152 | "metadata": { 1153 | "id": "QRZoptrOlsT5", 1154 | "colab_type": "text" 1155 | }, 1156 | "source": [ 1157 | "## **Model Building**\n", 1158 | "I will experiment with 5 different algorithms, they are as follows:
\n", 1159 | "*• Linear Regression*
\n", 1160 | "*• Decision Tree Regression*
\n", 1161 | "*• Random Forest Regression*
\n", 1162 | "\n", 1163 | "----- Boosting Algorithm -----
\n", 1164 | "*• Adaptive Boosting (AdaBoost) Algorithm*
" 1165 | ] 1166 | }, 1167 | { 1168 | "cell_type": "markdown", 1169 | "metadata": { 1170 | "id": "w5NJZyB8oFEw", 1171 | "colab_type": "text" 1172 | }, 1173 | "source": [ 1174 | "### *Linear Regression*" 1175 | ] 1176 | }, 1177 | { 1178 | "cell_type": "code", 1179 | "metadata": { 1180 | "id": "TAbGSLrVln6Q", 1181 | "colab_type": "code", 1182 | "colab": { 1183 | "base_uri": "https://localhost:8080/", 1184 | "height": 34 1185 | }, 1186 | "outputId": "d33e0b5f-511c-42dd-f4e3-da8f8b6b2610" 1187 | }, 1188 | "source": [ 1189 | "# Linear Regression Model\n", 1190 | "from sklearn.linear_model import LinearRegression\n", 1191 | "linear_regressor = LinearRegression()\n", 1192 | "linear_regressor.fit(X_train,y_train)" 1193 | ], 1194 | "execution_count": 23, 1195 | "outputs": [ 1196 | { 1197 | "output_type": "execute_result", 1198 | "data": { 1199 | "text/plain": [ 1200 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)" 1201 | ] 1202 | }, 1203 | "metadata": { 1204 | "tags": [] 1205 | }, 1206 | "execution_count": 23 1207 | } 1208 | ] 1209 | }, 1210 | { 1211 | "cell_type": "code", 1212 | "metadata": { 1213 | "id": "JeKomR6-nfaX", 1214 | "colab_type": "code", 1215 | "colab": {} 1216 | }, 1217 | "source": [ 1218 | "# Predicting results\n", 1219 | "y_pred_lr = linear_regressor.predict(X_test)" 1220 | ], 1221 | "execution_count": 24, 1222 | "outputs": [] 1223 | }, 1224 | { 1225 | "cell_type": "code", 1226 | "metadata": { 1227 | "id": "xRPc6nsmmlbo", 1228 | "colab_type": "code", 1229 | "colab": { 1230 | "base_uri": "https://localhost:8080/", 1231 | "height": 87 1232 | }, 1233 | "outputId": "24f2c7ee-a0dd-43c3-9efc-ba1d0856cad7" 1234 | }, 1235 | "source": [ 1236 | "# Linear Regression - Model Evaluation\n", 1237 | "from sklearn.metrics import mean_absolute_error as mae, mean_squared_error as mse, accuracy_score\n", 1238 | "print(\"---- Linear Regression - Model Evaluation ----\")\n", 1239 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_lr)))\n", 1240 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_lr)))\n", 1241 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_lr))))" 1242 | ], 1243 | "execution_count": 25, 1244 | "outputs": [ 1245 | { 1246 | "output_type": "stream", 1247 | "text": [ 1248 | "---- Linear Regression - Model Evaluation ----\n", 1249 | "Mean Absolute Error (MAE): 12.11861754619329\n", 1250 | "Mean Squared Error (MSE): 251.00792310417438\n", 1251 | "Root Mean Squared Error (RMSE): 15.843229566732106\n" 1252 | ], 1253 | "name": "stdout" 1254 | } 1255 | ] 1256 | }, 1257 | { 1258 | "cell_type": "markdown", 1259 | "metadata": { 1260 | "colab_type": "text", 1261 | "id": "fuPztrQQoLNg" 1262 | }, 1263 | "source": [ 1264 | "### *Decision Tree*" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "metadata": { 1270 | "id": "drFWmrvBoC6x", 1271 | "colab_type": "code", 1272 | "colab": { 1273 | "base_uri": "https://localhost:8080/", 1274 | "height": 123 1275 | }, 1276 | "outputId": "83a98e88-4307-4bbd-f941-74bb30dc5170" 1277 | }, 1278 | "source": [ 1279 | "# Decision Tree Regression Model\n", 1280 | "from sklearn.tree import DecisionTreeRegressor\n", 1281 | "decision_regressor = DecisionTreeRegressor()\n", 1282 | "decision_regressor.fit(X_train,y_train)" 1283 | ], 1284 | "execution_count": 26, 1285 | "outputs": [ 1286 | { 1287 | "output_type": "execute_result", 1288 | "data": { 1289 | "text/plain": [ 1290 | "DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n", 1291 | " max_features=None, max_leaf_nodes=None,\n", 1292 | " min_impurity_decrease=0.0, min_impurity_split=None,\n", 1293 | " min_samples_leaf=1, min_samples_split=2,\n", 1294 | " min_weight_fraction_leaf=0.0, presort='deprecated',\n", 1295 | " random_state=None, splitter='best')" 1296 | ] 1297 | }, 1298 | "metadata": { 1299 | "tags": [] 1300 | }, 1301 | "execution_count": 26 1302 | } 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "code", 1307 | "metadata": { 1308 | "id": "cCl-LXmpofeq", 1309 | "colab_type": "code", 1310 | "colab": {} 1311 | }, 1312 | "source": [ 1313 | "# Predicting results\n", 1314 | "y_pred_dt = decision_regressor.predict(X_test)" 1315 | ], 1316 | "execution_count": 27, 1317 | "outputs": [] 1318 | }, 1319 | { 1320 | "cell_type": "code", 1321 | "metadata": { 1322 | "id": "fxQBkteHoj2Y", 1323 | "colab_type": "code", 1324 | "colab": { 1325 | "base_uri": "https://localhost:8080/", 1326 | "height": 87 1327 | }, 1328 | "outputId": "bc4f87bf-0ce4-44fb-c957-11f85d3526e0" 1329 | }, 1330 | "source": [ 1331 | "# Decision Tree Regression - Model Evaluation\n", 1332 | "print(\"---- Decision Tree Regression - Model Evaluation ----\")\n", 1333 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_dt)))\n", 1334 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_dt)))\n", 1335 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_dt))))" 1336 | ], 1337 | "execution_count": 28, 1338 | "outputs": [ 1339 | { 1340 | "output_type": "stream", 1341 | "text": [ 1342 | "---- Decision Tree Regression - Model Evaluation ----\n", 1343 | "Mean Absolute Error (MAE): 17.08963282937365\n", 1344 | "Mean Squared Error (MSE): 531.0550755939524\n", 1345 | "Root Mean Squared Error (RMSE): 23.044632251219642\n" 1346 | ], 1347 | "name": "stdout" 1348 | } 1349 | ] 1350 | }, 1351 | { 1352 | "cell_type": "markdown", 1353 | "metadata": { 1354 | "colab_type": "text", 1355 | "id": "scpqVJxCpFyB" 1356 | }, 1357 | "source": [ 1358 | "### *Random Forest*" 1359 | ] 1360 | }, 1361 | { 1362 | "cell_type": "code", 1363 | "metadata": { 1364 | "id": "MNNGZMlRpKnq", 1365 | "colab_type": "code", 1366 | "colab": { 1367 | "base_uri": "https://localhost:8080/", 1368 | "height": 141 1369 | }, 1370 | "outputId": "8d67934e-f177-4270-b2ba-93feba78fef6" 1371 | }, 1372 | "source": [ 1373 | "# Random Forest Regression Model\n", 1374 | "from sklearn.ensemble import RandomForestRegressor\n", 1375 | "random_regressor = RandomForestRegressor()\n", 1376 | "random_regressor.fit(X_train,y_train)" 1377 | ], 1378 | "execution_count": 29, 1379 | "outputs": [ 1380 | { 1381 | "output_type": "execute_result", 1382 | "data": { 1383 | "text/plain": [ 1384 | "RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',\n", 1385 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", 1386 | " max_samples=None, min_impurity_decrease=0.0,\n", 1387 | " min_impurity_split=None, min_samples_leaf=1,\n", 1388 | " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", 1389 | " n_estimators=100, n_jobs=None, oob_score=False,\n", 1390 | " random_state=None, verbose=0, warm_start=False)" 1391 | ] 1392 | }, 1393 | "metadata": { 1394 | "tags": [] 1395 | }, 1396 | "execution_count": 29 1397 | } 1398 | ] 1399 | }, 1400 | { 1401 | "cell_type": "code", 1402 | "metadata": { 1403 | "colab_type": "code", 1404 | "id": "UpqtMy1jpYTd", 1405 | "colab": {} 1406 | }, 1407 | "source": [ 1408 | "# Predicting results\n", 1409 | "y_pred_rf = random_regressor.predict(X_test)" 1410 | ], 1411 | "execution_count": 30, 1412 | "outputs": [] 1413 | }, 1414 | { 1415 | "cell_type": "code", 1416 | "metadata": { 1417 | "id": "VPsdbFk_pdrH", 1418 | "colab_type": "code", 1419 | "colab": { 1420 | "base_uri": "https://localhost:8080/", 1421 | "height": 87 1422 | }, 1423 | "outputId": "ad050bf7-70e5-4df5-a7ca-7fc0700cf7d5" 1424 | }, 1425 | "source": [ 1426 | "# Random Forest Regression - Model Evaluation\n", 1427 | "print(\"---- Random Forest Regression - Model Evaluation ----\")\n", 1428 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_rf)))\n", 1429 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_rf)))\n", 1430 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_rf))))" 1431 | ], 1432 | "execution_count": 31, 1433 | "outputs": [ 1434 | { 1435 | "output_type": "stream", 1436 | "text": [ 1437 | "---- Random Forest Regression - Model Evaluation ----\n", 1438 | "Mean Absolute Error (MAE): 13.76117836573074\n", 1439 | "Mean Squared Error (MSE): 330.21283012058035\n", 1440 | "Root Mean Squared Error (RMSE): 18.171759136654337\n" 1441 | ], 1442 | "name": "stdout" 1443 | } 1444 | ] 1445 | }, 1446 | { 1447 | "cell_type": "markdown", 1448 | "metadata": { 1449 | "id": "wMd5-w5Tpv-Y", 1450 | "colab_type": "text" 1451 | }, 1452 | "source": [ 1453 | "*Note: Since Linear Regression model performs best as compared to other two, we use this model and boost it's performance using AdaBoost Algorithm*" 1454 | ] 1455 | }, 1456 | { 1457 | "cell_type": "markdown", 1458 | "metadata": { 1459 | "colab_type": "text", 1460 | "id": "cJNnp9xHssI8" 1461 | }, 1462 | "source": [ 1463 | "### *AdaBoost Algorithm*" 1464 | ] 1465 | }, 1466 | { 1467 | "cell_type": "code", 1468 | "metadata": { 1469 | "colab_type": "code", 1470 | "id": "eua4hjEus0W2", 1471 | "colab": { 1472 | "base_uri": "https://localhost:8080/", 1473 | "height": 123 1474 | }, 1475 | "outputId": "2290f73b-67f2-4551-a77a-7f2942e465c4" 1476 | }, 1477 | "source": [ 1478 | "# AdaBoost Model using Linear Regression as the base learner\n", 1479 | "from sklearn.ensemble import AdaBoostRegressor\n", 1480 | "adb_regressor = AdaBoostRegressor(base_estimator=linear_regressor, n_estimators=100)\n", 1481 | "adb_regressor.fit(X_train, y_train)" 1482 | ], 1483 | "execution_count": 32, 1484 | "outputs": [ 1485 | { 1486 | "output_type": "execute_result", 1487 | "data": { 1488 | "text/plain": [ 1489 | "AdaBoostRegressor(base_estimator=LinearRegression(copy_X=True,\n", 1490 | " fit_intercept=True,\n", 1491 | " n_jobs=None,\n", 1492 | " normalize=False),\n", 1493 | " learning_rate=1.0, loss='linear', n_estimators=100,\n", 1494 | " random_state=None)" 1495 | ] 1496 | }, 1497 | "metadata": { 1498 | "tags": [] 1499 | }, 1500 | "execution_count": 32 1501 | } 1502 | ] 1503 | }, 1504 | { 1505 | "cell_type": "code", 1506 | "metadata": { 1507 | "id": "mBRmYqGvtdaJ", 1508 | "colab_type": "code", 1509 | "colab": {} 1510 | }, 1511 | "source": [ 1512 | "# Predicting results\n", 1513 | "y_pred_adb = adb_regressor.predict(X_test)" 1514 | ], 1515 | "execution_count": 33, 1516 | "outputs": [] 1517 | }, 1518 | { 1519 | "cell_type": "code", 1520 | "metadata": { 1521 | "id": "67pZWWEKtgiF", 1522 | "colab_type": "code", 1523 | "colab": { 1524 | "base_uri": "https://localhost:8080/", 1525 | "height": 87 1526 | }, 1527 | "outputId": "709cb77d-a0eb-43f5-aded-3c16ff2d0c98" 1528 | }, 1529 | "source": [ 1530 | "# AdaBoost Regression - Model Evaluation\n", 1531 | "print(\"---- AdaBoost Regression - Model Evaluation ----\")\n", 1532 | "print(\"Mean Absolute Error (MAE): {}\".format(mae(y_test, y_pred_adb)))\n", 1533 | "print(\"Mean Squared Error (MSE): {}\".format(mse(y_test, y_pred_adb)))\n", 1534 | "print(\"Root Mean Squared Error (RMSE): {}\".format(np.sqrt(mse(y_test, y_pred_adb))))" 1535 | ], 1536 | "execution_count": 34, 1537 | "outputs": [ 1538 | { 1539 | "output_type": "stream", 1540 | "text": [ 1541 | "---- AdaBoost Regression - Model Evaluation ----\n", 1542 | "Mean Absolute Error (MAE): 12.217362349360771\n", 1543 | "Mean Squared Error (MSE): 249.6020058588232\n", 1544 | "Root Mean Squared Error (RMSE): 15.798797608008757\n" 1545 | ], 1546 | "name": "stdout" 1547 | } 1548 | ] 1549 | }, 1550 | { 1551 | "cell_type": "markdown", 1552 | "metadata": { 1553 | "id": "nbvk3I7ZvVG4", 1554 | "colab_type": "text" 1555 | }, 1556 | "source": [ 1557 | "*Note: Using AdaBoost did not reduce the error to a significant level. Hence, we will you simple linear regression model for prediction*" 1558 | ] 1559 | }, 1560 | { 1561 | "cell_type": "markdown", 1562 | "metadata": { 1563 | "id": "2Xl7f4FQybFg", 1564 | "colab_type": "text" 1565 | }, 1566 | "source": [ 1567 | "## **Predictions**\n", 1568 | "• Model *trained on* the data from **IPL Seasons 1 to 9** ie: (2008 to 2016)
\n", 1569 | "• Model *tested on* data from **IPL Season 10** ie: (2017)
\n", 1570 | "• Model *predicts on* data from **IPL Seasons 11 to 12** ie: (2018 to 2019)" 1571 | ] 1572 | }, 1573 | { 1574 | "cell_type": "code", 1575 | "metadata": { 1576 | "id": "-C4QKIa-yhYB", 1577 | "colab_type": "code", 1578 | "colab": {} 1579 | }, 1580 | "source": [ 1581 | "def predict_score(batting_team='Chennai Super Kings', bowling_team='Mumbai Indians', overs=5.1, runs=50, wickets=0, runs_in_prev_5=50, wickets_in_prev_5=0):\n", 1582 | " temp_array = list()\n", 1583 | "\n", 1584 | " # Batting Team\n", 1585 | " if batting_team == 'Chennai Super Kings':\n", 1586 | " temp_array = temp_array + [1,0,0,0,0,0,0,0]\n", 1587 | " elif batting_team == 'Delhi Daredevils':\n", 1588 | " temp_array = temp_array + [0,1,0,0,0,0,0,0]\n", 1589 | " elif batting_team == 'Kings XI Punjab':\n", 1590 | " temp_array = temp_array + [0,0,1,0,0,0,0,0]\n", 1591 | " elif batting_team == 'Kolkata Knight Riders':\n", 1592 | " temp_array = temp_array + [0,0,0,1,0,0,0,0]\n", 1593 | " elif batting_team == 'Mumbai Indians':\n", 1594 | " temp_array = temp_array + [0,0,0,0,1,0,0,0]\n", 1595 | " elif batting_team == 'Rajasthan Royals':\n", 1596 | " temp_array = temp_array + [0,0,0,0,0,1,0,0]\n", 1597 | " elif batting_team == 'Royal Challengers Bangalore':\n", 1598 | " temp_array = temp_array + [0,0,0,0,0,0,1,0]\n", 1599 | " elif batting_team == 'Sunrisers Hyderabad':\n", 1600 | " temp_array = temp_array + [0,0,0,0,0,0,0,1]\n", 1601 | "\n", 1602 | " # Bowling Team\n", 1603 | " if bowling_team == 'Chennai Super Kings':\n", 1604 | " temp_array = temp_array + [1,0,0,0,0,0,0,0]\n", 1605 | " elif bowling_team == 'Delhi Daredevils':\n", 1606 | " temp_array = temp_array + [0,1,0,0,0,0,0,0]\n", 1607 | " elif bowling_team == 'Kings XI Punjab':\n", 1608 | " temp_array = temp_array + [0,0,1,0,0,0,0,0]\n", 1609 | " elif bowling_team == 'Kolkata Knight Riders':\n", 1610 | " temp_array = temp_array + [0,0,0,1,0,0,0,0]\n", 1611 | " elif bowling_team == 'Mumbai Indians':\n", 1612 | " temp_array = temp_array + [0,0,0,0,1,0,0,0]\n", 1613 | " elif bowling_team == 'Rajasthan Royals':\n", 1614 | " temp_array = temp_array + [0,0,0,0,0,1,0,0]\n", 1615 | " elif bowling_team == 'Royal Challengers Bangalore':\n", 1616 | " temp_array = temp_array + [0,0,0,0,0,0,1,0]\n", 1617 | " elif bowling_team == 'Sunrisers Hyderabad':\n", 1618 | " temp_array = temp_array + [0,0,0,0,0,0,0,1]\n", 1619 | "\n", 1620 | " # Overs, Runs, Wickets, Runs_in_prev_5, Wickets_in_prev_5\n", 1621 | " temp_array = temp_array + [overs, runs, wickets, runs_in_prev_5, wickets_in_prev_5]\n", 1622 | "\n", 1623 | " # Converting into numpy array\n", 1624 | " temp_array = np.array([temp_array])\n", 1625 | "\n", 1626 | " # Prediction\n", 1627 | " return int(linear_regressor.predict(temp_array)[0])" 1628 | ], 1629 | "execution_count": 35, 1630 | "outputs": [] 1631 | }, 1632 | { 1633 | "cell_type": "markdown", 1634 | "metadata": { 1635 | "id": "lRZQW-YGS141", 1636 | "colab_type": "text" 1637 | }, 1638 | "source": [ 1639 | "### **Prediction 1**\n", 1640 | "• Date: 16th April 2018
\n", 1641 | "• IPL : Season 11
\n", 1642 | "• Match number: 13
\n", 1643 | "• Teams: Kolkata Knight Riders vs. Delhi Daredevils
\n", 1644 | "• First Innings final score: 200/9\n" 1645 | ] 1646 | }, 1647 | { 1648 | "cell_type": "code", 1649 | "metadata": { 1650 | "id": "fhgissiE10Bx", 1651 | "colab_type": "code", 1652 | "colab": { 1653 | "base_uri": "https://localhost:8080/", 1654 | "height": 34 1655 | }, 1656 | "outputId": "94bfaafb-6305-4187-c813-9c2c9af0a3d9" 1657 | }, 1658 | "source": [ 1659 | "final_score = predict_score(batting_team='Kolkata Knight Riders', bowling_team='Delhi Daredevils', overs=9.2, runs=79, wickets=2, runs_in_prev_5=60, wickets_in_prev_5=1)\n", 1660 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1661 | ], 1662 | "execution_count": 36, 1663 | "outputs": [ 1664 | { 1665 | "output_type": "stream", 1666 | "text": [ 1667 | "The final predicted score (range): 159 to 174\n" 1668 | ], 1669 | "name": "stdout" 1670 | } 1671 | ] 1672 | }, 1673 | { 1674 | "cell_type": "markdown", 1675 | "metadata": { 1676 | "colab_type": "text", 1677 | "id": "IMAYg53PfbLm" 1678 | }, 1679 | "source": [ 1680 | "### **Prediction 2**\n", 1681 | "• Date: 7th May 2018
\n", 1682 | "• IPL : Season 11
\n", 1683 | "• Match number: 39
\n", 1684 | "• Teams: Sunrisers Hyderabad vs. Royal Challengers Bangalore
\n", 1685 | "• First Innings final score: 146/10\n" 1686 | ] 1687 | }, 1688 | { 1689 | "cell_type": "code", 1690 | "metadata": { 1691 | "id": "eVtlLk3afeDT", 1692 | "colab_type": "code", 1693 | "colab": { 1694 | "base_uri": "https://localhost:8080/", 1695 | "height": 34 1696 | }, 1697 | "outputId": "62562997-ce49-41c5-8341-430965293e35" 1698 | }, 1699 | "source": [ 1700 | "final_score = predict_score(batting_team='Sunrisers Hyderabad', bowling_team='Royal Challengers Bangalore', overs=10.5, runs=67, wickets=3, runs_in_prev_5=29, wickets_in_prev_5=1)\n", 1701 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1702 | ], 1703 | "execution_count": 37, 1704 | "outputs": [ 1705 | { 1706 | "output_type": "stream", 1707 | "text": [ 1708 | "The final predicted score (range): 138 to 153\n" 1709 | ], 1710 | "name": "stdout" 1711 | } 1712 | ] 1713 | }, 1714 | { 1715 | "cell_type": "markdown", 1716 | "metadata": { 1717 | "colab_type": "text", 1718 | "id": "M-3FC7VhUzdK" 1719 | }, 1720 | "source": [ 1721 | "### **Prediction 3**\n", 1722 | "• Date: 17th May 2018
\n", 1723 | "• IPL : Season 11
\n", 1724 | "• Match number: 50
\n", 1725 | "• Teams: Mumbai Indians vs. Kings XI Punjab
\n", 1726 | "• First Innings final score: 186/8
\n" 1727 | ] 1728 | }, 1729 | { 1730 | "cell_type": "code", 1731 | "metadata": { 1732 | "id": "CVgb01MY29NQ", 1733 | "colab_type": "code", 1734 | "colab": { 1735 | "base_uri": "https://localhost:8080/", 1736 | "height": 34 1737 | }, 1738 | "outputId": "a1a3dc25-0ba3-489d-ff65-a4a3c4829c22" 1739 | }, 1740 | "source": [ 1741 | "final_score = predict_score(batting_team='Mumbai Indians', bowling_team='Kings XI Punjab', overs=14.1, runs=136, wickets=4, runs_in_prev_5=50, wickets_in_prev_5=0)\n", 1742 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1743 | ], 1744 | "execution_count": 38, 1745 | "outputs": [ 1746 | { 1747 | "output_type": "stream", 1748 | "text": [ 1749 | "The final predicted score (range): 180 to 195\n" 1750 | ], 1751 | "name": "stdout" 1752 | } 1753 | ] 1754 | }, 1755 | { 1756 | "cell_type": "markdown", 1757 | "metadata": { 1758 | "colab_type": "text", 1759 | "id": "Ix3XW3y2iPgc" 1760 | }, 1761 | "source": [ 1762 | "### **Prediction 4**\n", 1763 | "• Date: 30th March 2019
\n", 1764 | "• IPL : Season 12
\n", 1765 | "• Match number: 9
\n", 1766 | "• Teams: Mumbai Indians vs. Kings XI Punjab
\n", 1767 | "• First Innings final score: 176/7\n" 1768 | ] 1769 | }, 1770 | { 1771 | "cell_type": "code", 1772 | "metadata": { 1773 | "id": "FUWG9gsviudD", 1774 | "colab_type": "code", 1775 | "colab": { 1776 | "base_uri": "https://localhost:8080/", 1777 | "height": 34 1778 | }, 1779 | "outputId": "eaf6b5c4-d1ef-4f13-9ba4-8457fbe6dd5d" 1780 | }, 1781 | "source": [ 1782 | "final_score = predict_score(batting_team='Mumbai Indians', bowling_team='Kings XI Punjab', overs=12.3, runs=113, wickets=2, runs_in_prev_5=55, wickets_in_prev_5=0)\n", 1783 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1784 | ], 1785 | "execution_count": 39, 1786 | "outputs": [ 1787 | { 1788 | "output_type": "stream", 1789 | "text": [ 1790 | "The final predicted score (range): 179 to 194\n" 1791 | ], 1792 | "name": "stdout" 1793 | } 1794 | ] 1795 | }, 1796 | { 1797 | "cell_type": "markdown", 1798 | "metadata": { 1799 | "colab_type": "text", 1800 | "id": "pNs2zIxlW6Ou" 1801 | }, 1802 | "source": [ 1803 | "### **Prediction 5**\n", 1804 | "• Date: 11th April 2019
\n", 1805 | "• IPL : Season 12
\n", 1806 | "• Match number: 25
\n", 1807 | "• Teams: Rajasthan Royals vs. Chennai Super Kings
\n", 1808 | "• First Innings final score: 151/7\n" 1809 | ] 1810 | }, 1811 | { 1812 | "cell_type": "code", 1813 | "metadata": { 1814 | "colab_type": "code", 1815 | "id": "hByMrV6l29YV", 1816 | "colab": { 1817 | "base_uri": "https://localhost:8080/", 1818 | "height": 34 1819 | }, 1820 | "outputId": "1cff54c2-0e7b-46f9-dd02-4437bd0d4028" 1821 | }, 1822 | "source": [ 1823 | "final_score = predict_score(batting_team='Rajasthan Royals', bowling_team='Chennai Super Kings', overs=13.3, runs=92, wickets=5, runs_in_prev_5=27, wickets_in_prev_5=2)\n", 1824 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1825 | ], 1826 | "execution_count": 40, 1827 | "outputs": [ 1828 | { 1829 | "output_type": "stream", 1830 | "text": [ 1831 | "The final predicted score (range): 128 to 143\n" 1832 | ], 1833 | "name": "stdout" 1834 | } 1835 | ] 1836 | }, 1837 | { 1838 | "cell_type": "markdown", 1839 | "metadata": { 1840 | "colab_type": "text", 1841 | "id": "hYLKJMHShBn8" 1842 | }, 1843 | "source": [ 1844 | "### **Prediction 6**\n", 1845 | "• Date: 14th April 2019
\n", 1846 | "• IPL : Season 12
\n", 1847 | "• Match number: 30
\n", 1848 | "• Teams: Sunrisers Hyderabad vs. Delhi Daredevils
\n", 1849 | "• First Innings final score: 155/7\n" 1850 | ] 1851 | }, 1852 | { 1853 | "cell_type": "code", 1854 | "metadata": { 1855 | "id": "dAmNR2WLhD2F", 1856 | "colab_type": "code", 1857 | "colab": { 1858 | "base_uri": "https://localhost:8080/", 1859 | "height": 34 1860 | }, 1861 | "outputId": "f6d18cd0-5eeb-4999-a6d9-9b303324fa2a" 1862 | }, 1863 | "source": [ 1864 | "final_score = predict_score(batting_team='Delhi Daredevils', bowling_team='Sunrisers Hyderabad', overs=11.5, runs=98, wickets=3, runs_in_prev_5=41, wickets_in_prev_5=1)\n", 1865 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1866 | ], 1867 | "execution_count": 41, 1868 | "outputs": [ 1869 | { 1870 | "output_type": "stream", 1871 | "text": [ 1872 | "The final predicted score (range): 157 to 172\n" 1873 | ], 1874 | "name": "stdout" 1875 | } 1876 | ] 1877 | }, 1878 | { 1879 | "cell_type": "markdown", 1880 | "metadata": { 1881 | "colab_type": "text", 1882 | "id": "zxjq482uaQpc" 1883 | }, 1884 | "source": [ 1885 | "### **Prediction 7**\n", 1886 | "• Date: 10th May 2019
\n", 1887 | "• IPL : Season 12
\n", 1888 | "• Match number: 59 (Eliminator)
\n", 1889 | "• Teams: Delhi Daredevils vs. Chennai Super Kings
\n", 1890 | "• First Innings final score: 147/9\n" 1891 | ] 1892 | }, 1893 | { 1894 | "cell_type": "code", 1895 | "metadata": { 1896 | "id": "UR6QNPK_aSSj", 1897 | "colab_type": "code", 1898 | "colab": { 1899 | "base_uri": "https://localhost:8080/", 1900 | "height": 34 1901 | }, 1902 | "outputId": "1afc6b5b-d1bc-4f55-9c61-362e31cb4e80" 1903 | }, 1904 | "source": [ 1905 | "final_score = predict_score(batting_team='Delhi Daredevils', bowling_team='Chennai Super Kings', overs=10.2, runs=68, wickets=3, runs_in_prev_5=29, wickets_in_prev_5=1)\n", 1906 | "print(\"The final predicted score (range): {} to {}\".format(final_score-10, final_score+5))" 1907 | ], 1908 | "execution_count": 42, 1909 | "outputs": [ 1910 | { 1911 | "output_type": "stream", 1912 | "text": [ 1913 | "The final predicted score (range): 137 to 152\n" 1914 | ], 1915 | "name": "stdout" 1916 | } 1917 | ] 1918 | }, 1919 | { 1920 | "cell_type": "markdown", 1921 | "metadata": { 1922 | "id": "ZaEK5rEmjp8K", 1923 | "colab_type": "text" 1924 | }, 1925 | "source": [ 1926 | "*Note: In IPL, it is very difficult to predict the actual score because in a moment of time the game can completely turn upside down!*\n" 1927 | ] 1928 | } 1929 | ] 1930 | } --------------------------------------------------------------------------------