└── Dendrogram_linkage.ipynb /Dendrogram_linkage.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyMcNO2yUmh/RFX/M4tqhRYy", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "BePFV0-xpKYW" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import pandas as pd\n", 38 | "from scipy.cluster.hierarchy import dendrogram, linkage\n", 39 | "from sklearn.preprocessing import OneHotEncoder\n", 40 | "import matplotlib.pyplot as plt # Import for plotting\n", 41 | "from tqdm import tqdm # Import for progress bar" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "source": [ 47 | "data = pd.read_csv('IPL_Ball_by_Ball_2008_2022.csv')" 48 | ], 49 | "metadata": { 50 | "id": "QOzZBzqZqMDd" 51 | }, 52 | "execution_count": null, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "source": [ 58 | "print(data.columns.tolist())" 59 | ], 60 | "metadata": { 61 | "colab": { 62 | "base_uri": "https://localhost:8080/" 63 | }, 64 | "id": "1uGFVwUhsoQy", 65 | "outputId": "d119421f-aa46-4187-c815-3dbf7010753a" 66 | }, 67 | "execution_count": null, 68 | "outputs": [ 69 | { 70 | "output_type": "stream", 71 | "name": "stdout", 72 | "text": [ 73 | "['ID', 'innings', 'overs', 'ballnumber', 'batter', 'bowler', 'non-striker', 'extra_type', 'batsman_run', 'extras_run', 'total_run', 'non_boundary', 'isWicketDelivery', 'player_out', 'kind', 'fielders_involved', 'BattingTeam']\n" 74 | ] 75 | } 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "source": [ 81 | "# Select relevant features for clustering\n", 82 | "features = ['batsman_run', 'extras_run', 'total_run', 'non_boundary', 'isWicketDelivery']" 83 | ], 84 | "metadata": { 85 | "id": "L-_LMKCosTSS" 86 | }, 87 | "execution_count": null, 88 | "outputs": [] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "source": [ 93 | "# Check data type of 'isWicketDelivery' (optional)\n", 94 | "print(data['isWicketDelivery'].dtype) # Check if it's already numerical" 95 | ], 96 | "metadata": { 97 | "colab": { 98 | "base_uri": "https://localhost:8080/" 99 | }, 100 | "id": "5NibnoCK4t_I", 101 | "outputId": "f36d55f7-73ea-4de6-fea0-b3ad6abd1764" 102 | }, 103 | "execution_count": null, 104 | "outputs": [ 105 | { 106 | "output_type": "stream", 107 | "name": "stdout", 108 | "text": [ 109 | "int64\n" 110 | ] 111 | } 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "source": [ 117 | "# One-hot encoding (if necessary)\n", 118 | "categorical_feature = ['isWicketDelivery'] # Assuming 'isWicketDelivery' is categorical\n", 119 | "if pd.api.types.is_string_dtype(data[categorical_feature]): # Check if string type\n", 120 | " encoder = OneHotEncoder(sparse=False) # Consider sparse=True for large datasets\n", 121 | " data[categorical_feature] = encoder.fit_transform(data[categorical_feature])" 122 | ], 123 | "metadata": { 124 | "id": "1Of7ZUqCw-6I" 125 | }, 126 | "execution_count": null, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "source": [ 132 | "# Select data for clustering\n", 133 | "X = data[features]" 134 | ], 135 | "metadata": { 136 | "id": "cRt0JAswuZ2h" 137 | }, 138 | "execution_count": null, 139 | "outputs": [] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "source": [ 144 | "# Choose distance metric (consider data types)\n", 145 | "# Options: 'euclidean', 'manhattan', 'cosine' (for normalized data)\n", 146 | "distance_metric = 'euclidean'" 147 | ], 148 | "metadata": { 149 | "id": "5R_QYEHHum_P" 150 | }, 151 | "execution_count": null, 152 | "outputs": [] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "source": [ 157 | "from tqdm import tqdm\n", 158 | "from scipy.spatial.distance import pdist" 159 | ], 160 | "metadata": { 161 | "id": "OPqzm-Ga6IRk" 162 | }, 163 | "execution_count": null, 164 | "outputs": [] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "source": [ 169 | "!pip install scipy\n", 170 | "import numpy as np\n", 171 | "from scipy.spatial.distance import pdist\n", 172 | "\n", 173 | "def calculate_distance_matrix(X, metric):\n", 174 | " distance_matrix = []\n", 175 | " features = range(X.shape[1])\n", 176 | " for i in features:\n", 177 | " for j in features:\n", 178 | " if i != j:\n", 179 | " # Combine data points into a single argument\n", 180 | " data_point = np.array([X[i], X[j]])\n", 181 | " # Pass only the combined data point and metric\n", 182 | " distance_matrix.append(pdist(data_point, metric))\n", 183 | " return np.asarray(distance_matrix)" 184 | ], 185 | "metadata": { 186 | "colab": { 187 | "base_uri": "https://localhost:8080/" 188 | }, 189 | "id": "2gcLO9XPu6ue", 190 | "outputId": "1de4c378-0c55-4b64-f2b5-266498db5e82" 191 | }, 192 | "execution_count": null, 193 | "outputs": [ 194 | { 195 | "output_type": "stream", 196 | "name": "stdout", 197 | "text": [ 198 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.11.4)\n", 199 | "Requirement already satisfied: numpy<1.28.0,>=1.21.6 in /usr/local/lib/python3.10/dist-packages (from scipy) (1.25.2)\n" 200 | ] 201 | } 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "source": [ 207 | "!pip install scipy\n", 208 | "import scipy.spatial.distance as distance" 209 | ], 210 | "metadata": { 211 | "colab": { 212 | "base_uri": "https://localhost:8080/" 213 | }, 214 | "id": "J--HRkDe98gL", 215 | "outputId": "fcbeea6f-8fca-44f2-848b-27174dbea8a5" 216 | }, 217 | "execution_count": null, 218 | "outputs": [ 219 | { 220 | "output_type": "stream", 221 | "name": "stdout", 222 | "text": [ 223 | "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (1.11.4)\n", 224 | "Requirement already satisfied: numpy<1.28.0,>=1.21.6 in /usr/local/lib/python3.10/dist-packages (from scipy) (1.25.2)\n" 225 | ] 226 | } 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "source": [ 232 | "import numpy as np\n", 233 | "# Define the data points (replace with your actual data)\n", 234 | "X = np.array([[1, 2], [3, 4], [5, 6]])\n", 235 | "\n", 236 | "# Define the distance metric (replace with your desired metric)\n", 237 | "distance_metric = 'euclidean'\n", 238 | "\n", 239 | "# Calculate the distance matrix\n", 240 | "distance_matrix = distance.cdist(X, X, metric=distance_metric)\n", 241 | "\n", 242 | "# Perform hierarchical clustering (Ward's method)\n", 243 | "Z = linkage(distance_matrix, method='ward')\n" 244 | ], 245 | "metadata": { 246 | "colab": { 247 | "base_uri": "https://localhost:8080/" 248 | }, 249 | "id": "Il9uo0W70gaW", 250 | "outputId": "40a775fb-7024-4bb2-81af-c4ce804eae42" 251 | }, 252 | "execution_count": null, 253 | "outputs": [ 254 | { 255 | "output_type": "stream", 256 | "name": "stderr", 257 | "text": [ 258 | ":12: ClusterWarning: scipy.cluster: The symmetric non-negative hollow observation matrix looks suspiciously like an uncondensed distance matrix\n", 259 | " Z = linkage(distance_matrix, method='ward')\n" 260 | ] 261 | } 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "source": [ 267 | "import matplotlib.pyplot as plt\n", 268 | "from scipy.cluster.hierarchy import dendrogram, linkage\n", 269 | "print(Z)" 270 | ], 271 | "metadata": { 272 | "colab": { 273 | "base_uri": "https://localhost:8080/" 274 | }, 275 | "id": "9XiRJy3i-eK9", 276 | "outputId": "255063ee-99c1-4ba3-8d7e-27f7b7dead14" 277 | }, 278 | "execution_count": null, 279 | "outputs": [ 280 | { 281 | "output_type": "stream", 282 | "name": "stdout", 283 | "text": [ 284 | "[[0. 1. 4.89897949 2. ]\n", 285 | " [2. 3. 7.11805217 3. ]]\n" 286 | ] 287 | } 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "source": [ 293 | "print(data.head())" 294 | ], 295 | "metadata": { 296 | "colab": { 297 | "base_uri": "https://localhost:8080/" 298 | }, 299 | "id": "wyIGmcwX-lRw", 300 | "outputId": "97d303c4-9c2a-43dd-99e5-3681a4fb0212" 301 | }, 302 | "execution_count": null, 303 | "outputs": [ 304 | { 305 | "output_type": "stream", 306 | "name": "stdout", 307 | "text": [ 308 | " ID innings overs ballnumber batter bowler \\\n", 309 | "0 1312200 1 0 1 YBK Jaiswal Mohammed Shami \n", 310 | "1 1312200 1 0 2 YBK Jaiswal Mohammed Shami \n", 311 | "2 1312200 1 0 3 JC Buttler Mohammed Shami \n", 312 | "3 1312200 1 0 4 YBK Jaiswal Mohammed Shami \n", 313 | "4 1312200 1 0 5 YBK Jaiswal Mohammed Shami \n", 314 | "\n", 315 | " non-striker extra_type batsman_run extras_run total_run non_boundary \\\n", 316 | "0 JC Buttler NaN 0 0 0 0 \n", 317 | "1 JC Buttler legbyes 0 1 1 0 \n", 318 | "2 YBK Jaiswal NaN 1 0 1 0 \n", 319 | "3 JC Buttler NaN 0 0 0 0 \n", 320 | "4 JC Buttler NaN 0 0 0 0 \n", 321 | "\n", 322 | " isWicketDelivery player_out kind fielders_involved BattingTeam \n", 323 | "0 0 NaN NaN NaN Rajasthan Royals \n", 324 | "1 0 NaN NaN NaN Rajasthan Royals \n", 325 | "2 0 NaN NaN NaN Rajasthan Royals \n", 326 | "3 0 NaN NaN NaN Rajasthan Royals \n", 327 | "4 0 NaN NaN NaN Rajasthan Royals \n" 328 | ] 329 | } 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "source": [ 335 | "print(Z.shape)\n", 336 | "print(data['BattingTeam'].shape)" 337 | ], 338 | "metadata": { 339 | "colab": { 340 | "base_uri": "https://localhost:8080/" 341 | }, 342 | "id": "E3pyH8-t-1dT", 343 | "outputId": "2bc4003f-79b2-4307-82e0-21afaf3d1238" 344 | }, 345 | "execution_count": null, 346 | "outputs": [ 347 | { 348 | "output_type": "stream", 349 | "name": "stdout", 350 | "text": [ 351 | "(2, 4)\n", 352 | "(225954,)\n" 353 | ] 354 | } 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "source": [ 360 | "print(Z.shape[0])\n", 361 | "print(data['BattingTeam'].shape[0])" 362 | ], 363 | "metadata": { 364 | "colab": { 365 | "base_uri": "https://localhost:8080/" 366 | }, 367 | "id": "_YMNJakO_x0F", 368 | "outputId": "9b8a8ff4-a65a-43ad-9a48-f1e5f01c5057" 369 | }, 370 | "execution_count": null, 371 | "outputs": [ 372 | { 373 | "output_type": "stream", 374 | "name": "stdout", 375 | "text": [ 376 | "2\n", 377 | "225954\n" 378 | ] 379 | } 380 | ] 381 | }, 382 | { 383 | "cell_type": "code", 384 | "source": [ 385 | "# Check if there are any missing values in data['BattingTeam']\n", 386 | "missing_values = data['BattingTeam'].isnull().sum()\n", 387 | "if missing_values > 0:\n", 388 | " print(\"There are missing values in data['BattingTeam'].\")" 389 | ], 390 | "metadata": { 391 | "id": "H0YL_2bN_K0E" 392 | }, 393 | "execution_count": null, 394 | "outputs": [] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "source": [ 399 | "import pandas as pd\n", 400 | "import numpy as np" 401 | ], 402 | "metadata": { 403 | "id": "M1ApY2EWAIFL" 404 | }, 405 | "execution_count": null, 406 | "outputs": [] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "source": [ 411 | "data.head()" 412 | ], 413 | "metadata": { 414 | "colab": { 415 | "base_uri": "https://localhost:8080/", 416 | "height": 330 417 | }, 418 | "id": "w14i0V1FADhB", 419 | "outputId": "f53aa06c-ed7b-4f9f-e1d9-0d21bff7173b" 420 | }, 421 | "execution_count": null, 422 | "outputs": [ 423 | { 424 | "output_type": "execute_result", 425 | "data": { 426 | "text/plain": [ 427 | " ID innings overs ballnumber batter bowler \\\n", 428 | "0 1312200 1 0 1 YBK Jaiswal Mohammed Shami \n", 429 | "1 1312200 1 0 2 YBK Jaiswal Mohammed Shami \n", 430 | "2 1312200 1 0 3 JC Buttler Mohammed Shami \n", 431 | "3 1312200 1 0 4 YBK Jaiswal Mohammed Shami \n", 432 | "4 1312200 1 0 5 YBK Jaiswal Mohammed Shami \n", 433 | "\n", 434 | " non-striker extra_type batsman_run extras_run total_run non_boundary \\\n", 435 | "0 JC Buttler NaN 0 0 0 0 \n", 436 | "1 JC Buttler legbyes 0 1 1 0 \n", 437 | "2 YBK Jaiswal NaN 1 0 1 0 \n", 438 | "3 JC Buttler NaN 0 0 0 0 \n", 439 | "4 JC Buttler NaN 0 0 0 0 \n", 440 | "\n", 441 | " isWicketDelivery player_out kind fielders_involved BattingTeam \n", 442 | "0 0 NaN NaN NaN Rajasthan Royals \n", 443 | "1 0 NaN NaN NaN Rajasthan Royals \n", 444 | "2 0 NaN NaN NaN Rajasthan Royals \n", 445 | "3 0 NaN NaN NaN Rajasthan Royals \n", 446 | "4 0 NaN NaN NaN Rajasthan Royals " 447 | ], 448 | "text/html": [ 449 | "\n", 450 | "
\n", 451 | "
\n", 452 | "\n", 465 | "\n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | "
IDinningsoversballnumberbatterbowlernon-strikerextra_typebatsman_runextras_runtotal_runnon_boundaryisWicketDeliveryplayer_outkindfielders_involvedBattingTeam
01312200101YBK JaiswalMohammed ShamiJC ButtlerNaN00000NaNNaNNaNRajasthan Royals
11312200102YBK JaiswalMohammed ShamiJC Buttlerlegbyes01100NaNNaNNaNRajasthan Royals
21312200103JC ButtlerMohammed ShamiYBK JaiswalNaN10100NaNNaNNaNRajasthan Royals
31312200104YBK JaiswalMohammed ShamiJC ButtlerNaN00000NaNNaNNaNRajasthan Royals
41312200105YBK JaiswalMohammed ShamiJC ButtlerNaN00000NaNNaNNaNRajasthan Royals
\n", 591 | "
\n", 592 | "
\n", 593 | "\n", 594 | "
\n", 595 | " \n", 603 | "\n", 604 | " \n", 644 | "\n", 645 | " \n", 669 | "
\n", 670 | "\n", 671 | "\n", 672 | "
\n", 673 | " \n", 684 | "\n", 685 | "\n", 774 | "\n", 775 | " \n", 797 | "
\n", 798 | "\n", 799 | "
\n", 800 | "
\n" 801 | ], 802 | "application/vnd.google.colaboratory.intrinsic+json": { 803 | "type": "dataframe", 804 | "variable_name": "data" 805 | } 806 | }, 807 | "metadata": {}, 808 | "execution_count": 40 809 | } 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "source": [ 815 | "'Cluster' in data.columns" 816 | ], 817 | "metadata": { 818 | "colab": { 819 | "base_uri": "https://localhost:8080/" 820 | }, 821 | "id": "Lex6BdRWACnE", 822 | "outputId": "34b31cde-e9a7-4e7c-f301-b6d2f72e945b" 823 | }, 824 | "execution_count": null, 825 | "outputs": [ 826 | { 827 | "output_type": "execute_result", 828 | "data": { 829 | "text/plain": [ 830 | "False" 831 | ] 832 | }, 833 | "metadata": {}, 834 | "execution_count": 41 835 | } 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "source": [ 841 | "import pandas as pd\n", 842 | "import matplotlib.pyplot as plt\n", 843 | "from scipy.cluster.hierarchy import dendrogram" 844 | ], 845 | "metadata": { 846 | "id": "4HBLNb_DAaF_" 847 | }, 848 | "execution_count": null, 849 | "outputs": [] 850 | }, 851 | { 852 | "cell_type": "code", 853 | "source": [ 854 | "print(Z.shape)\n", 855 | "print(data['BattingTeam'].shape)" 856 | ], 857 | "metadata": { 858 | "colab": { 859 | "base_uri": "https://localhost:8080/" 860 | }, 861 | "id": "a1pMGsNyAeMo", 862 | "outputId": "c976082f-8bb3-4dfe-e21f-8a137aaf5702" 863 | }, 864 | "execution_count": null, 865 | "outputs": [ 866 | { 867 | "output_type": "stream", 868 | "name": "stdout", 869 | "text": [ 870 | "(2, 4)\n", 871 | "(225954,)\n" 872 | ] 873 | } 874 | ] 875 | }, 876 | { 877 | "cell_type": "code", 878 | "source": [ 879 | "print(data.head())" 880 | ], 881 | "metadata": { 882 | "colab": { 883 | "base_uri": "https://localhost:8080/" 884 | }, 885 | "id": "EjuvD3oiBBOD", 886 | "outputId": "6d43b612-051a-4509-fa4c-6d90690198b5" 887 | }, 888 | "execution_count": null, 889 | "outputs": [ 890 | { 891 | "output_type": "stream", 892 | "name": "stdout", 893 | "text": [ 894 | " ID innings overs ballnumber batter bowler \\\n", 895 | "0 1312200 1 0 1 YBK Jaiswal Mohammed Shami \n", 896 | "1 1312200 1 0 2 YBK Jaiswal Mohammed Shami \n", 897 | "2 1312200 1 0 3 JC Buttler Mohammed Shami \n", 898 | "3 1312200 1 0 4 YBK Jaiswal Mohammed Shami \n", 899 | "4 1312200 1 0 5 YBK Jaiswal Mohammed Shami \n", 900 | "\n", 901 | " non-striker extra_type batsman_run extras_run total_run non_boundary \\\n", 902 | "0 JC Buttler NaN 0 0 0 0 \n", 903 | "1 JC Buttler legbyes 0 1 1 0 \n", 904 | "2 YBK Jaiswal NaN 1 0 1 0 \n", 905 | "3 JC Buttler NaN 0 0 0 0 \n", 906 | "4 JC Buttler NaN 0 0 0 0 \n", 907 | "\n", 908 | " isWicketDelivery player_out kind fielders_involved BattingTeam \n", 909 | "0 0 NaN NaN NaN Rajasthan Royals \n", 910 | "1 0 NaN NaN NaN Rajasthan Royals \n", 911 | "2 0 NaN NaN NaN Rajasthan Royals \n", 912 | "3 0 NaN NaN NaN Rajasthan Royals \n", 913 | "4 0 NaN NaN NaN Rajasthan Royals \n" 914 | ] 915 | } 916 | ] 917 | }, 918 | { 919 | "cell_type": "code", 920 | "source": [ 921 | "print(f\"Shape of Z: {Z.shape}\")\n", 922 | "print(f\"Shape of labels: {data['BattingTeam'][:-1].shape}\")" 923 | ], 924 | "metadata": { 925 | "colab": { 926 | "base_uri": "https://localhost:8080/" 927 | }, 928 | "id": "pWpv7P8YBPF6", 929 | "outputId": "80127805-146b-48f0-cc8f-e9437c8c03a5" 930 | }, 931 | "execution_count": null, 932 | "outputs": [ 933 | { 934 | "output_type": "stream", 935 | "name": "stdout", 936 | "text": [ 937 | "Shape of Z: (2, 4)\n", 938 | "Shape of labels: (225953,)\n" 939 | ] 940 | } 941 | ] 942 | }, 943 | { 944 | "cell_type": "code", 945 | "source": [ 946 | "# Assuming you have the distance matrix ('distance_matrix') and linkage matrix ('Z') calculated\n", 947 | "\n", 948 | "# Check if distance_matrix and Z are not empty\n", 949 | "if distance_matrix.size > 0 and Z.size > 0:\n", 950 | "\n", 951 | " # Ensure data['BattingTeam'] has the same length as the number of data points\n", 952 | " if len(data) == Z.shape[0]: # Check data length vs. linkage matrix size\n", 953 | " plt.figure(figsize=(10, 6))\n", 954 | " dendrogram(Z, labels=data['BattingTeam']) # Assuming 'BattingTeam' is the team name column\n", 955 | " plt.title('IPL Match Clustering (Ward\\'s Method)')\n", 956 | " plt.show()\n", 957 | " else:\n", 958 | " print(\"Error: Data size and linkage matrix size don't match. Check data selection.\")\n", 959 | "else:\n", 960 | " print(\"Error: Distance matrix or linkage matrix is empty. Check data and calculations.\")\n" 961 | ], 962 | "metadata": { 963 | "colab": { 964 | "base_uri": "https://localhost:8080/" 965 | }, 966 | "id": "JwHA8cZmAhHo", 967 | "outputId": "a32004f8-6fcf-4ce6-f7e8-0f0fe2bc4a26" 968 | }, 969 | "execution_count": null, 970 | "outputs": [ 971 | { 972 | "output_type": "stream", 973 | "name": "stdout", 974 | "text": [ 975 | "Error: Data size and linkage matrix size don't match. Check data selection.\n" 976 | ] 977 | } 978 | ] 979 | } 980 | ] 981 | } --------------------------------------------------------------------------------