├── 3.Decision Tree Using ID3.ipynb ├── ID3+-+Algorithm+ID3(Examples,+TargetAttribute,+Attributes).jpg ├── PlayTennis.csv ├── README.md └── _config.yml /3.Decision Tree Using ID3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "raw", 5 | "metadata": {}, 6 | "source": [ 7 | "Write a program to demonstrate the working of the decision tree based ID3\n", 8 | "algorithm. Use an appropriate data set for building the decision tree and \n", 9 | "apply this knowledge to classify a new sample." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Import Play Tennis Data " 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 149, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stdout", 26 | "output_type": "stream", 27 | "text": [ 28 | "\n", 29 | " Given Play Tennis Data Set:\n", 30 | "\n", 31 | " PlayTennis Outlook Temperature Humidity Wind\n", 32 | "0 No Sunny Hot High Weak\n", 33 | "1 No Sunny Hot High Strong\n", 34 | "2 Yes Overcast Hot High Weak\n", 35 | "3 Yes Rain Mild High Weak\n", 36 | "4 Yes Rain Cool Normal Weak\n", 37 | "5 No Rain Cool Normal Strong\n", 38 | "6 Yes Overcast Cool Normal Strong\n", 39 | "7 No Sunny Mild High Weak\n", 40 | "8 Yes Sunny Cool Normal Weak\n", 41 | "9 Yes Rain Mild Normal Weak\n", 42 | "10 Yes Sunny Mild Normal Strong\n", 43 | "11 Yes Overcast Mild High Strong\n", 44 | "12 Yes Overcast Hot Normal Weak\n", 45 | "13 No Rain Mild High Strong\n" 46 | ] 47 | } 48 | ], 49 | "source": [ 50 | "# Author : Dr.Thyagaraju G S , Context Innovations Lab , DEpt of CSE , SDMIT - Ujire \n", 51 | "# Date : July 11 2018 \n", 52 | "import pandas as pd\n", 53 | "from pandas import DataFrame \n", 54 | "df_tennis = DataFrame.from_csv('C:\\\\Users\\\\Dr.Thyagaraju\\\\Desktop\\\\Data\\\\PlayTennis.csv')\n", 55 | "print(\"\\n Given Play Tennis Data Set:\\n\\n\", df_tennis)" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 206, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "'PlayTennis'" 67 | ] 68 | }, 69 | "execution_count": 206, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "#df_tennis.columns[0]\n", 76 | "df_tennis.keys()[0]" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "metadata": {}, 82 | "source": [ 83 | "# Entropy of the Training Data Set" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 215, 89 | "metadata": {}, 90 | "outputs": [ 91 | { 92 | "name": "stdout", 93 | "output_type": "stream", 94 | "text": [ 95 | "\n", 96 | " INPUT DATA SET FOR ENTROPY CALCULATION:\n", 97 | " 0 No\n", 98 | "1 No\n", 99 | "2 Yes\n", 100 | "3 Yes\n", 101 | "4 Yes\n", 102 | "5 No\n", 103 | "6 Yes\n", 104 | "7 No\n", 105 | "8 Yes\n", 106 | "9 Yes\n", 107 | "10 Yes\n", 108 | "11 Yes\n", 109 | "12 Yes\n", 110 | "13 No\n", 111 | "Name: PlayTennis, dtype: object\n", 112 | "\n", 113 | " Number of Instances of the Current Sub Class is 14.0:\n", 114 | "\n", 115 | " Classes: No Yes\n", 116 | " \n", 117 | " Probabilities of Class No is 0.35714285714285715:\n", 118 | " \n", 119 | " Probabilities of Class Yes is 0.6428571428571429:\n", 120 | "\n", 121 | " Total Entropy of PlayTennis Data Set: 0.9402859586706309\n" 122 | ] 123 | } 124 | ], 125 | "source": [ 126 | "#Function to calculate the entropy of probaility of observations\n", 127 | "# -p*log2*p\n", 128 | "\n", 129 | "def entropy(probs): \n", 130 | " import math\n", 131 | " return sum( [-prob*math.log(prob, 2) for prob in probs] )\n", 132 | "\n", 133 | "#Function to calulate the entropy of the given Data Sets/List with respect to target attributes\n", 134 | "def entropy_of_list(a_list): \n", 135 | " #print(\"A-list\",a_list)\n", 136 | " from collections import Counter\n", 137 | " cnt = Counter(x for x in a_list) # Counter calculates the propotion of class\n", 138 | " # print(\"\\nClasses:\",cnt)\n", 139 | " #print(\"No and Yes Classes:\",a_list.name,cnt)\n", 140 | " num_instances = len(a_list)*1.0 # = 14\n", 141 | " print(\"\\n Number of Instances of the Current Sub Class is {0}:\".format(num_instances ))\n", 142 | " probs = [x / num_instances for x in cnt.values()] # x means no of YES/NO\n", 143 | " print(\"\\n Classes:\",min(cnt),max(cnt))\n", 144 | " print(\" \\n Probabilities of Class {0} is {1}:\".format(min(cnt),min(probs)))\n", 145 | " print(\" \\n Probabilities of Class {0} is {1}:\".format(max(cnt),max(probs)))\n", 146 | " return entropy(probs) # Call Entropy :\n", 147 | " \n", 148 | "# The initial entropy of the YES/NO attribute for our dataset.\n", 149 | "print(\"\\n INPUT DATA SET FOR ENTROPY CALCULATION:\\n\", df_tennis['PlayTennis'])\n", 150 | "\n", 151 | "total_entropy = entropy_of_list(df_tennis['PlayTennis'])\n", 152 | "\n", 153 | "print(\"\\n Total Entropy of PlayTennis Data Set:\",total_entropy)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "# Information Gain of Attributes " 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 216, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | "Information Gain Calculation of Outlook\n", 173 | "\n", 174 | " Number of Instances of the Current Sub Class is 4.0:\n", 175 | "\n", 176 | " Classes: Yes Yes\n", 177 | " \n", 178 | " Probabilities of Class Yes is 1.0:\n", 179 | " \n", 180 | " Probabilities of Class Yes is 1.0:\n", 181 | "\n", 182 | " Number of Instances of the Current Sub Class is 5.0:\n", 183 | "\n", 184 | " Classes: No Yes\n", 185 | " \n", 186 | " Probabilities of Class No is 0.4:\n", 187 | " \n", 188 | " Probabilities of Class Yes is 0.6:\n", 189 | "\n", 190 | " Number of Instances of the Current Sub Class is 5.0:\n", 191 | "\n", 192 | " Classes: No Yes\n", 193 | " \n", 194 | " Probabilities of Class No is 0.4:\n", 195 | " \n", 196 | " Probabilities of Class Yes is 0.6:\n", 197 | "\n", 198 | " Number of Instances of the Current Sub Class is 14.0:\n", 199 | "\n", 200 | " Classes: No Yes\n", 201 | " \n", 202 | " Probabilities of Class No is 0.35714285714285715:\n", 203 | " \n", 204 | " Probabilities of Class Yes is 0.6428571428571429:\n", 205 | "Info-gain for Outlook is :0.246749819774 \n", 206 | "\n", 207 | "Information Gain Calculation of Humidity\n", 208 | "\n", 209 | " Number of Instances of the Current Sub Class is 7.0:\n", 210 | "\n", 211 | " Classes: No Yes\n", 212 | " \n", 213 | " Probabilities of Class No is 0.42857142857142855:\n", 214 | " \n", 215 | " Probabilities of Class Yes is 0.5714285714285714:\n", 216 | "\n", 217 | " Number of Instances of the Current Sub Class is 7.0:\n", 218 | "\n", 219 | " Classes: No Yes\n", 220 | " \n", 221 | " Probabilities of Class No is 0.14285714285714285:\n", 222 | " \n", 223 | " Probabilities of Class Yes is 0.8571428571428571:\n", 224 | "\n", 225 | " Number of Instances of the Current Sub Class is 14.0:\n", 226 | "\n", 227 | " Classes: No Yes\n", 228 | " \n", 229 | " Probabilities of Class No is 0.35714285714285715:\n", 230 | " \n", 231 | " Probabilities of Class Yes is 0.6428571428571429:\n", 232 | "\n", 233 | " Info-gain for Humidity is: 0.151835501362 \n", 234 | "\n", 235 | "Information Gain Calculation of Wind\n", 236 | "\n", 237 | " Number of Instances of the Current Sub Class is 6.0:\n", 238 | "\n", 239 | " Classes: No Yes\n", 240 | " \n", 241 | " Probabilities of Class No is 0.5:\n", 242 | " \n", 243 | " Probabilities of Class Yes is 0.5:\n", 244 | "\n", 245 | " Number of Instances of the Current Sub Class is 8.0:\n", 246 | "\n", 247 | " Classes: No Yes\n", 248 | " \n", 249 | " Probabilities of Class No is 0.25:\n", 250 | " \n", 251 | " Probabilities of Class Yes is 0.75:\n", 252 | "\n", 253 | " Number of Instances of the Current Sub Class is 14.0:\n", 254 | "\n", 255 | " Classes: No Yes\n", 256 | " \n", 257 | " Probabilities of Class No is 0.35714285714285715:\n", 258 | " \n", 259 | " Probabilities of Class Yes is 0.6428571428571429:\n", 260 | "\n", 261 | " Info-gain for Wind is:0.0481270304083 \n", 262 | "\n", 263 | "Information Gain Calculation of Temperature\n", 264 | "\n", 265 | " Number of Instances of the Current Sub Class is 4.0:\n", 266 | "\n", 267 | " Classes: No Yes\n", 268 | " \n", 269 | " Probabilities of Class No is 0.25:\n", 270 | " \n", 271 | " Probabilities of Class Yes is 0.75:\n", 272 | "\n", 273 | " Number of Instances of the Current Sub Class is 4.0:\n", 274 | "\n", 275 | " Classes: No Yes\n", 276 | " \n", 277 | " Probabilities of Class No is 0.5:\n", 278 | " \n", 279 | " Probabilities of Class Yes is 0.5:\n", 280 | "\n", 281 | " Number of Instances of the Current Sub Class is 6.0:\n", 282 | "\n", 283 | " Classes: No Yes\n", 284 | " \n", 285 | " Probabilities of Class No is 0.3333333333333333:\n", 286 | " \n", 287 | " Probabilities of Class Yes is 0.6666666666666666:\n", 288 | "\n", 289 | " Number of Instances of the Current Sub Class is 14.0:\n", 290 | "\n", 291 | " Classes: No Yes\n", 292 | " \n", 293 | " Probabilities of Class No is 0.35714285714285715:\n", 294 | " \n", 295 | " Probabilities of Class Yes is 0.6428571428571429:\n", 296 | "\n", 297 | " Info-gain for Temperature is:0.029222565659 \n", 298 | "\n" 299 | ] 300 | } 301 | ], 302 | "source": [ 303 | "def information_gain(df, split_attribute_name, target_attribute_name, trace=0):\n", 304 | " print(\"Information Gain Calculation of \",split_attribute_name)\n", 305 | " '''\n", 306 | " Takes a DataFrame of attributes, and quantifies the entropy of a target\n", 307 | " attribute after performing a split along the values of another attribute.\n", 308 | " '''\n", 309 | " # Split Data by Possible Vals of Attribute:\n", 310 | " df_split = df.groupby(split_attribute_name)\n", 311 | " # for name,group in df_split:\n", 312 | " # print(\"Name:\\n\",name)\n", 313 | " # print(\"Group:\\n\",group)\n", 314 | " \n", 315 | " # Calculate Entropy for Target Attribute, as well as\n", 316 | " # Proportion of Obs in Each Data-Split\n", 317 | " nobs = len(df.index) * 1.0\n", 318 | " # print(\"NOBS\",nobs)\n", 319 | " df_agg_ent = df_split.agg({target_attribute_name : [entropy_of_list, lambda x: len(x)/nobs] })[target_attribute_name]\n", 320 | " #print([target_attribute_name])\n", 321 | " #print(\" Entropy List \",entropy_of_list)\n", 322 | " #print(\"DFAGGENT\",df_agg_ent)\n", 323 | " df_agg_ent.columns = ['Entropy', 'PropObservations']\n", 324 | " #if trace: # helps understand what fxn is doing:\n", 325 | " # print(df_agg_ent)\n", 326 | " \n", 327 | " # Calculate Information Gain:\n", 328 | " new_entropy = sum( df_agg_ent['Entropy'] * df_agg_ent['PropObservations'] )\n", 329 | " old_entropy = entropy_of_list(df[target_attribute_name])\n", 330 | " return old_entropy - new_entropy\n", 331 | "\n", 332 | "\n", 333 | "print('Info-gain for Outlook is :'+str( information_gain(df_tennis, 'Outlook', 'PlayTennis')),\"\\n\")\n", 334 | "print('\\n Info-gain for Humidity is: ' + str( information_gain(df_tennis, 'Humidity', 'PlayTennis')),\"\\n\")\n", 335 | "print('\\n Info-gain for Wind is:' + str( information_gain(df_tennis, 'Wind', 'PlayTennis')),\"\\n\")\n", 336 | "print('\\n Info-gain for Temperature is:' + str( information_gain(df_tennis, 'Temperature','PlayTennis')),\"\\n\")" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "# ID3 Algorithm" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 217, 349 | "metadata": { 350 | "collapsed": true 351 | }, 352 | "outputs": [], 353 | "source": [ 354 | "def id3(df, target_attribute_name, attribute_names, default_class=None):\n", 355 | " \n", 356 | " ## Tally target attribute:\n", 357 | " from collections import Counter\n", 358 | " cnt = Counter(x for x in df[target_attribute_name])# class of YES /NO\n", 359 | " \n", 360 | " ## First check: Is this split of the dataset homogeneous?\n", 361 | " if len(cnt) == 1:\n", 362 | " return next(iter(cnt)) # next input data set, or raises StopIteration when EOF is hit.\n", 363 | " \n", 364 | " ## Second check: Is this split of the dataset empty?\n", 365 | " # if yes, return a default value\n", 366 | " elif df.empty or (not attribute_names):\n", 367 | " return default_class # Return None for Empty Data Set\n", 368 | " \n", 369 | " ## Otherwise: This dataset is ready to be devied up!\n", 370 | " else:\n", 371 | " # Get Default Value for next recursive call of this function:\n", 372 | " default_class = max(cnt.keys()) #No of YES and NO Class\n", 373 | " # Compute the Information Gain of the attributes:\n", 374 | " gainz = [information_gain(df, attr, target_attribute_name) for attr in attribute_names] #\n", 375 | " index_of_max = gainz.index(max(gainz)) # Index of Best Attribute\n", 376 | " # Choose Best Attribute to split on:\n", 377 | " best_attr = attribute_names[index_of_max]\n", 378 | " \n", 379 | " # Create an empty tree, to be populated in a moment\n", 380 | " tree = {best_attr:{}} # Iniiate the tree with best attribute as a node \n", 381 | " remaining_attribute_names = [i for i in attribute_names if i != best_attr]\n", 382 | " \n", 383 | " # Split dataset\n", 384 | " # On each split, recursively call this algorithm.\n", 385 | " # populate the empty tree with subtrees, which\n", 386 | " # are the result of the recursive call\n", 387 | " for attr_val, data_subset in df.groupby(best_attr):\n", 388 | " subtree = id3(data_subset,\n", 389 | " target_attribute_name,\n", 390 | " remaining_attribute_names,\n", 391 | " default_class)\n", 392 | " tree[best_attr][attr_val] = subtree\n", 393 | " return tree" 394 | ] 395 | }, 396 | { 397 | "cell_type": "markdown", 398 | "metadata": {}, 399 | "source": [ 400 | "# Predicting Attributes" 401 | ] 402 | }, 403 | { 404 | "cell_type": "code", 405 | "execution_count": 218, 406 | "metadata": {}, 407 | "outputs": [ 408 | { 409 | "name": "stdout", 410 | "output_type": "stream", 411 | "text": [ 412 | "List of Attributes: ['PlayTennis', 'Outlook', 'Temperature', 'Humidity', 'Wind']\n", 413 | "Predicting Attributes: ['Outlook', 'Temperature', 'Humidity', 'Wind']\n" 414 | ] 415 | } 416 | ], 417 | "source": [ 418 | "# Get Predictor Names (all but 'class')\n", 419 | "attribute_names = list(df_tennis.columns)\n", 420 | "print(\"List of Attributes:\", attribute_names) \n", 421 | "attribute_names.remove('PlayTennis') #Remove the class attribute \n", 422 | "print(\"Predicting Attributes:\", attribute_names)" 423 | ] 424 | }, 425 | { 426 | "cell_type": "raw", 427 | "metadata": {}, 428 | "source": [ 429 | "# Tree Construction" 430 | ] 431 | }, 432 | { 433 | "cell_type": "code", 434 | "execution_count": 219, 435 | "metadata": {}, 436 | "outputs": [ 437 | { 438 | "name": "stdout", 439 | "output_type": "stream", 440 | "text": [ 441 | "Information Gain Calculation of Outlook\n", 442 | "\n", 443 | " Number of Instances of the Current Sub Class is 4.0:\n", 444 | "\n", 445 | " Classes: Yes Yes\n", 446 | " \n", 447 | " Probabilities of Class Yes is 1.0:\n", 448 | " \n", 449 | " Probabilities of Class Yes is 1.0:\n", 450 | "\n", 451 | " Number of Instances of the Current Sub Class is 5.0:\n", 452 | "\n", 453 | " Classes: No Yes\n", 454 | " \n", 455 | " Probabilities of Class No is 0.4:\n", 456 | " \n", 457 | " Probabilities of Class Yes is 0.6:\n", 458 | "\n", 459 | " Number of Instances of the Current Sub Class is 5.0:\n", 460 | "\n", 461 | " Classes: No Yes\n", 462 | " \n", 463 | " Probabilities of Class No is 0.4:\n", 464 | " \n", 465 | " Probabilities of Class Yes is 0.6:\n", 466 | "\n", 467 | " Number of Instances of the Current Sub Class is 14.0:\n", 468 | "\n", 469 | " Classes: No Yes\n", 470 | " \n", 471 | " Probabilities of Class No is 0.35714285714285715:\n", 472 | " \n", 473 | " Probabilities of Class Yes is 0.6428571428571429:\n", 474 | "Information Gain Calculation of Temperature\n", 475 | "\n", 476 | " Number of Instances of the Current Sub Class is 4.0:\n", 477 | "\n", 478 | " Classes: No Yes\n", 479 | " \n", 480 | " Probabilities of Class No is 0.25:\n", 481 | " \n", 482 | " Probabilities of Class Yes is 0.75:\n", 483 | "\n", 484 | " Number of Instances of the Current Sub Class is 4.0:\n", 485 | "\n", 486 | " Classes: No Yes\n", 487 | " \n", 488 | " Probabilities of Class No is 0.5:\n", 489 | " \n", 490 | " Probabilities of Class Yes is 0.5:\n", 491 | "\n", 492 | " Number of Instances of the Current Sub Class is 6.0:\n", 493 | "\n", 494 | " Classes: No Yes\n", 495 | " \n", 496 | " Probabilities of Class No is 0.3333333333333333:\n", 497 | " \n", 498 | " Probabilities of Class Yes is 0.6666666666666666:\n", 499 | "\n", 500 | " Number of Instances of the Current Sub Class is 14.0:\n", 501 | "\n", 502 | " Classes: No Yes\n", 503 | " \n", 504 | " Probabilities of Class No is 0.35714285714285715:\n", 505 | " \n", 506 | " Probabilities of Class Yes is 0.6428571428571429:\n", 507 | "Information Gain Calculation of Humidity\n", 508 | "\n", 509 | " Number of Instances of the Current Sub Class is 7.0:\n", 510 | "\n", 511 | " Classes: No Yes\n", 512 | " \n", 513 | " Probabilities of Class No is 0.42857142857142855:\n", 514 | " \n", 515 | " Probabilities of Class Yes is 0.5714285714285714:\n", 516 | "\n", 517 | " Number of Instances of the Current Sub Class is 7.0:\n", 518 | "\n", 519 | " Classes: No Yes\n", 520 | " \n", 521 | " Probabilities of Class No is 0.14285714285714285:\n", 522 | " \n", 523 | " Probabilities of Class Yes is 0.8571428571428571:\n", 524 | "\n", 525 | " Number of Instances of the Current Sub Class is 14.0:\n", 526 | "\n", 527 | " Classes: No Yes\n", 528 | " \n", 529 | " Probabilities of Class No is 0.35714285714285715:\n", 530 | " \n", 531 | " Probabilities of Class Yes is 0.6428571428571429:\n", 532 | "Information Gain Calculation of Wind\n", 533 | "\n", 534 | " Number of Instances of the Current Sub Class is 6.0:\n", 535 | "\n", 536 | " Classes: No Yes\n", 537 | " \n", 538 | " Probabilities of Class No is 0.5:\n", 539 | " \n", 540 | " Probabilities of Class Yes is 0.5:\n", 541 | "\n", 542 | " Number of Instances of the Current Sub Class is 8.0:\n", 543 | "\n", 544 | " Classes: No Yes\n", 545 | " \n", 546 | " Probabilities of Class No is 0.25:\n", 547 | " \n", 548 | " Probabilities of Class Yes is 0.75:\n", 549 | "\n", 550 | " Number of Instances of the Current Sub Class is 14.0:\n", 551 | "\n", 552 | " Classes: No Yes\n", 553 | " \n", 554 | " Probabilities of Class No is 0.35714285714285715:\n", 555 | " \n", 556 | " Probabilities of Class Yes is 0.6428571428571429:\n", 557 | "Information Gain Calculation of Temperature\n", 558 | "\n", 559 | " Number of Instances of the Current Sub Class is 2.0:\n", 560 | "\n", 561 | " Classes: No Yes\n", 562 | " \n", 563 | " Probabilities of Class No is 0.5:\n", 564 | " \n", 565 | " Probabilities of Class Yes is 0.5:\n", 566 | "\n", 567 | " Number of Instances of the Current Sub Class is 3.0:\n", 568 | "\n", 569 | " Classes: No Yes\n", 570 | " \n", 571 | " Probabilities of Class No is 0.3333333333333333:\n", 572 | " \n", 573 | " Probabilities of Class Yes is 0.6666666666666666:\n", 574 | "\n", 575 | " Number of Instances of the Current Sub Class is 5.0:\n", 576 | "\n", 577 | " Classes: No Yes\n", 578 | " \n", 579 | " Probabilities of Class No is 0.4:\n", 580 | " \n", 581 | " Probabilities of Class Yes is 0.6:\n", 582 | "Information Gain Calculation of Humidity\n", 583 | "\n", 584 | " Number of Instances of the Current Sub Class is 2.0:\n", 585 | "\n", 586 | " Classes: No Yes\n", 587 | " \n", 588 | " Probabilities of Class No is 0.5:\n", 589 | " \n", 590 | " Probabilities of Class Yes is 0.5:\n", 591 | "\n", 592 | " Number of Instances of the Current Sub Class is 3.0:\n", 593 | "\n", 594 | " Classes: No Yes\n", 595 | " \n", 596 | " Probabilities of Class No is 0.3333333333333333:\n", 597 | " \n", 598 | " Probabilities of Class Yes is 0.6666666666666666:\n", 599 | "\n", 600 | " Number of Instances of the Current Sub Class is 5.0:\n", 601 | "\n", 602 | " Classes: No Yes\n", 603 | " \n", 604 | " Probabilities of Class No is 0.4:\n", 605 | " \n", 606 | " Probabilities of Class Yes is 0.6:\n", 607 | "Information Gain Calculation of Wind\n", 608 | "\n", 609 | " Number of Instances of the Current Sub Class is 2.0:\n", 610 | "\n", 611 | " Classes: No No\n", 612 | " \n", 613 | " Probabilities of Class No is 1.0:\n", 614 | " \n", 615 | " Probabilities of Class No is 1.0:\n", 616 | "\n", 617 | " Number of Instances of the Current Sub Class is 3.0:\n", 618 | "\n", 619 | " Classes: Yes Yes\n", 620 | " \n", 621 | " Probabilities of Class Yes is 1.0:\n", 622 | " \n", 623 | " Probabilities of Class Yes is 1.0:\n", 624 | "\n", 625 | " Number of Instances of the Current Sub Class is 5.0:\n", 626 | "\n", 627 | " Classes: No Yes\n", 628 | " \n", 629 | " Probabilities of Class No is 0.4:\n", 630 | " \n", 631 | " Probabilities of Class Yes is 0.6:\n", 632 | "Information Gain Calculation of Temperature\n", 633 | "\n", 634 | " Number of Instances of the Current Sub Class is 1.0:\n", 635 | "\n", 636 | " Classes: Yes Yes\n", 637 | " \n", 638 | " Probabilities of Class Yes is 1.0:\n", 639 | " \n", 640 | " Probabilities of Class Yes is 1.0:\n", 641 | "\n", 642 | " Number of Instances of the Current Sub Class is 2.0:\n", 643 | "\n", 644 | " Classes: No No\n", 645 | " \n", 646 | " Probabilities of Class No is 1.0:\n", 647 | " \n", 648 | " Probabilities of Class No is 1.0:\n", 649 | "\n", 650 | " Number of Instances of the Current Sub Class is 2.0:\n", 651 | "\n", 652 | " Classes: No Yes\n", 653 | " \n", 654 | " Probabilities of Class No is 0.5:\n", 655 | " \n", 656 | " Probabilities of Class Yes is 0.5:\n", 657 | "\n", 658 | " Number of Instances of the Current Sub Class is 5.0:\n", 659 | "\n", 660 | " Classes: No Yes\n", 661 | " \n", 662 | " Probabilities of Class No is 0.4:\n", 663 | " \n", 664 | " Probabilities of Class Yes is 0.6:\n", 665 | "Information Gain Calculation of Humidity\n", 666 | "\n", 667 | " Number of Instances of the Current Sub Class is 3.0:\n", 668 | "\n", 669 | " Classes: No No\n", 670 | " \n", 671 | " Probabilities of Class No is 1.0:\n", 672 | " \n", 673 | " Probabilities of Class No is 1.0:\n", 674 | "\n", 675 | " Number of Instances of the Current Sub Class is 2.0:\n", 676 | "\n", 677 | " Classes: Yes Yes\n", 678 | " \n", 679 | " Probabilities of Class Yes is 1.0:\n", 680 | " \n", 681 | " Probabilities of Class Yes is 1.0:\n", 682 | "\n", 683 | " Number of Instances of the Current Sub Class is 5.0:\n", 684 | "\n", 685 | " Classes: No Yes\n", 686 | " \n", 687 | " Probabilities of Class No is 0.4:\n", 688 | " \n", 689 | " Probabilities of Class Yes is 0.6:\n", 690 | "Information Gain Calculation of Wind\n", 691 | "\n", 692 | " Number of Instances of the Current Sub Class is 2.0:\n", 693 | "\n", 694 | " Classes: No Yes\n", 695 | " \n", 696 | " Probabilities of Class No is 0.5:\n", 697 | " \n", 698 | " Probabilities of Class Yes is 0.5:\n", 699 | "\n", 700 | " Number of Instances of the Current Sub Class is 3.0:\n", 701 | "\n", 702 | " Classes: No Yes\n", 703 | " \n", 704 | " Probabilities of Class No is 0.3333333333333333:\n", 705 | " \n", 706 | " Probabilities of Class Yes is 0.6666666666666666:\n", 707 | "\n", 708 | " Number of Instances of the Current Sub Class is 5.0:\n", 709 | "\n", 710 | " Classes: No Yes\n", 711 | " \n", 712 | " Probabilities of Class No is 0.4:\n", 713 | " \n", 714 | " Probabilities of Class Yes is 0.6:\n", 715 | "\n", 716 | "\n", 717 | "The Resultant Decision Tree is :\n", 718 | "\n", 719 | "{'Outlook': {'Overcast': 'Yes',\n", 720 | " 'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},\n", 721 | " 'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}\n", 722 | "Best Attribute :\n", 723 | " Outlook\n", 724 | "Tree Keys:\n", 725 | " dict_keys(['Overcast', 'Rain', 'Sunny'])\n" 726 | ] 727 | } 728 | ], 729 | "source": [ 730 | "# Run Algorithm:\n", 731 | "from pprint import pprint\n", 732 | "tree = id3(df_tennis,'PlayTennis',attribute_names)\n", 733 | "print(\"\\n\\nThe Resultant Decision Tree is :\\n\")\n", 734 | "#print(tree)\n", 735 | "pprint(tree)\n", 736 | "attribute = next(iter(tree))\n", 737 | "print(\"Best Attribute :\\n\",attribute)\n", 738 | "print(\"Tree Keys:\\n\",tree[attribute].keys())" 739 | ] 740 | }, 741 | { 742 | "cell_type": "markdown", 743 | "metadata": {}, 744 | "source": [ 745 | "# Classification Accuracy" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 220, 751 | "metadata": {}, 752 | "outputs": [], 753 | "source": [ 754 | "def classify(instance, tree, default=None): # Instance of Play Tennis with Predicted \n", 755 | " \n", 756 | " #print(\"Instance:\",instance)\n", 757 | " attribute = next(iter(tree)) # Outlook/Humidity/Wind \n", 758 | " print(\"Key:\",tree.keys()) # [Outlook,Humidity,Wind ]\n", 759 | " print(\"Attribute:\",attribute) # [Key /Attribute Both are same ]\n", 760 | " \n", 761 | " # print(\"Insance of Attribute :\",instance[attribute],attribute)\n", 762 | " if instance[attribute] in tree[attribute].keys(): # Value of the attributs in set of Tree keys \n", 763 | " result = tree[attribute][instance[attribute]]\n", 764 | " print(\"Instance Attribute:\",instance[attribute],\"TreeKeys :\",tree[attribute].keys())\n", 765 | " if isinstance(result, dict): # this is a tree, delve deeper\n", 766 | " return classify(instance, result)\n", 767 | " else:\n", 768 | " return result # this is a label\n", 769 | " else:\n", 770 | " return default" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 138, 776 | "metadata": {}, 777 | "outputs": [ 778 | { 779 | "name": "stdout", 780 | "output_type": "stream", 781 | "text": [ 782 | "Key: dict_keys(['Outlook'])\n", 783 | "Attribute: Outlook\n", 784 | "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 785 | "Key: dict_keys(['Humidity'])\n", 786 | "Attribute: Humidity\n", 787 | "Instance Attribute: High TreeKeys : dict_keys(['High', 'Normal'])\n", 788 | "Key: dict_keys(['Outlook'])\n", 789 | "Attribute: Outlook\n", 790 | "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 791 | "Key: dict_keys(['Humidity'])\n", 792 | "Attribute: Humidity\n", 793 | "Instance Attribute: High TreeKeys : dict_keys(['High', 'Normal'])\n", 794 | "Key: dict_keys(['Outlook'])\n", 795 | "Attribute: Outlook\n", 796 | "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 797 | "Key: dict_keys(['Outlook'])\n", 798 | "Attribute: Outlook\n", 799 | "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 800 | "Key: dict_keys(['Wind'])\n", 801 | "Attribute: Wind\n", 802 | "Instance Attribute: Weak TreeKeys : dict_keys(['Strong', 'Weak'])\n", 803 | "Key: dict_keys(['Outlook'])\n", 804 | "Attribute: Outlook\n", 805 | "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 806 | "Key: dict_keys(['Wind'])\n", 807 | "Attribute: Wind\n", 808 | "Instance Attribute: Weak TreeKeys : dict_keys(['Strong', 'Weak'])\n", 809 | "Key: dict_keys(['Outlook'])\n", 810 | "Attribute: Outlook\n", 811 | "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 812 | "Key: dict_keys(['Wind'])\n", 813 | "Attribute: Wind\n", 814 | "Instance Attribute: Strong TreeKeys : dict_keys(['Strong', 'Weak'])\n", 815 | "Key: dict_keys(['Outlook'])\n", 816 | "Attribute: Outlook\n", 817 | "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 818 | "Key: dict_keys(['Outlook'])\n", 819 | "Attribute: Outlook\n", 820 | "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 821 | "Key: dict_keys(['Humidity'])\n", 822 | "Attribute: Humidity\n", 823 | "Instance Attribute: High TreeKeys : dict_keys(['High', 'Normal'])\n", 824 | "Key: dict_keys(['Outlook'])\n", 825 | "Attribute: Outlook\n", 826 | "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 827 | "Key: dict_keys(['Humidity'])\n", 828 | "Attribute: Humidity\n", 829 | "Instance Attribute: Normal TreeKeys : dict_keys(['High', 'Normal'])\n", 830 | "Key: dict_keys(['Outlook'])\n", 831 | "Attribute: Outlook\n", 832 | "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 833 | "Key: dict_keys(['Wind'])\n", 834 | "Attribute: Wind\n", 835 | "Instance Attribute: Weak TreeKeys : dict_keys(['Strong', 'Weak'])\n", 836 | "Key: dict_keys(['Outlook'])\n", 837 | "Attribute: Outlook\n", 838 | "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 839 | "Key: dict_keys(['Humidity'])\n", 840 | "Attribute: Humidity\n", 841 | "Instance Attribute: Normal TreeKeys : dict_keys(['High', 'Normal'])\n", 842 | "Key: dict_keys(['Outlook'])\n", 843 | "Attribute: Outlook\n", 844 | "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 845 | "Key: dict_keys(['Outlook'])\n", 846 | "Attribute: Outlook\n", 847 | "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 848 | "Key: dict_keys(['Outlook'])\n", 849 | "Attribute: Outlook\n", 850 | "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 851 | "Key: dict_keys(['Wind'])\n", 852 | "Attribute: Wind\n", 853 | "Instance Attribute: Strong TreeKeys : dict_keys(['Strong', 'Weak'])\n", 854 | "0 No\n", 855 | "1 No\n", 856 | "2 Yes\n", 857 | "3 Yes\n", 858 | "4 Yes\n", 859 | "5 No\n", 860 | "6 Yes\n", 861 | "7 No\n", 862 | "8 Yes\n", 863 | "9 Yes\n", 864 | "10 Yes\n", 865 | "11 Yes\n", 866 | "12 Yes\n", 867 | "13 No\n", 868 | "Name: predicted, dtype: object\n", 869 | "\n", 870 | " Accuracy is:\n", 871 | "1.0\n" 872 | ] 873 | }, 874 | { 875 | "data": { 876 | "text/html": [ 877 | "
\n", 878 | "\n", 891 | "\n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | "
PlayTennispredicted
0NoNo
1NoNo
2YesYes
3YesYes
4YesYes
5NoNo
6YesYes
7NoNo
8YesYes
9YesYes
10YesYes
11YesYes
12YesYes
13NoNo
\n", 972 | "
" 973 | ], 974 | "text/plain": [ 975 | " PlayTennis predicted\n", 976 | "0 No No\n", 977 | "1 No No\n", 978 | "2 Yes Yes\n", 979 | "3 Yes Yes\n", 980 | "4 Yes Yes\n", 981 | "5 No No\n", 982 | "6 Yes Yes\n", 983 | "7 No No\n", 984 | "8 Yes Yes\n", 985 | "9 Yes Yes\n", 986 | "10 Yes Yes\n", 987 | "11 Yes Yes\n", 988 | "12 Yes Yes\n", 989 | "13 No No" 990 | ] 991 | }, 992 | "execution_count": 138, 993 | "metadata": {}, 994 | "output_type": "execute_result" 995 | } 996 | ], 997 | "source": [ 998 | "df_tennis['predicted'] = df_tennis.apply(classify, axis=1, args=(tree,'No') ) \n", 999 | " # classify func allows for a default arg: when tree doesn't have answer for a particular\n", 1000 | " # combitation of attribute-values, we can use 'no' as the default guess \n", 1001 | "\n", 1002 | "print(df_tennis['predicted'])\n", 1003 | "\n", 1004 | "print('\\n Accuracy is:\\n' + str( sum(df_tennis['PlayTennis']==df_tennis['predicted'] ) / (1.0*len(df_tennis.index)) ))\n", 1005 | "\n", 1006 | "\n", 1007 | "df_tennis[['PlayTennis', 'predicted']]\n" 1008 | ] 1009 | }, 1010 | { 1011 | "cell_type": "markdown", 1012 | "metadata": { 1013 | "collapsed": true 1014 | }, 1015 | "source": [ 1016 | "# Classification Accuracy: Training/Testing Set" 1017 | ] 1018 | }, 1019 | { 1020 | "cell_type": "code", 1021 | "execution_count": 221, 1022 | "metadata": { 1023 | "scrolled": false 1024 | }, 1025 | "outputs": [ 1026 | { 1027 | "name": "stdout", 1028 | "output_type": "stream", 1029 | "text": [ 1030 | "Information Gain Calculation of Outlook\n", 1031 | "\n", 1032 | " Number of Instances of the Current Sub Class is 2.0:\n", 1033 | "\n", 1034 | " Classes: Yes Yes\n", 1035 | " \n", 1036 | " Probabilities of Class Yes is 1.0:\n", 1037 | " \n", 1038 | " Probabilities of Class Yes is 1.0:\n", 1039 | "\n", 1040 | " Number of Instances of the Current Sub Class is 4.0:\n", 1041 | "\n", 1042 | " Classes: No Yes\n", 1043 | " \n", 1044 | " Probabilities of Class No is 0.25:\n", 1045 | " \n", 1046 | " Probabilities of Class Yes is 0.75:\n", 1047 | "\n", 1048 | " Number of Instances of the Current Sub Class is 3.0:\n", 1049 | "\n", 1050 | " Classes: No Yes\n", 1051 | " \n", 1052 | " Probabilities of Class No is 0.3333333333333333:\n", 1053 | " \n", 1054 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1055 | "\n", 1056 | " Number of Instances of the Current Sub Class is 9.0:\n", 1057 | "\n", 1058 | " Classes: No Yes\n", 1059 | " \n", 1060 | " Probabilities of Class No is 0.3333333333333333:\n", 1061 | " \n", 1062 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1063 | "Information Gain Calculation of Temperature\n", 1064 | "\n", 1065 | " Number of Instances of the Current Sub Class is 4.0:\n", 1066 | "\n", 1067 | " Classes: No Yes\n", 1068 | " \n", 1069 | " Probabilities of Class No is 0.25:\n", 1070 | " \n", 1071 | " Probabilities of Class Yes is 0.75:\n", 1072 | "\n", 1073 | " Number of Instances of the Current Sub Class is 2.0:\n", 1074 | "\n", 1075 | " Classes: No Yes\n", 1076 | " \n", 1077 | " Probabilities of Class No is 0.5:\n", 1078 | " \n", 1079 | " Probabilities of Class Yes is 0.5:\n", 1080 | "\n", 1081 | " Number of Instances of the Current Sub Class is 3.0:\n", 1082 | "\n", 1083 | " Classes: No Yes\n", 1084 | " \n", 1085 | " Probabilities of Class No is 0.3333333333333333:\n", 1086 | " \n", 1087 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1088 | "\n", 1089 | " Number of Instances of the Current Sub Class is 9.0:\n", 1090 | "\n", 1091 | " Classes: No Yes\n", 1092 | " \n", 1093 | " Probabilities of Class No is 0.3333333333333333:\n", 1094 | " \n", 1095 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1096 | "Information Gain Calculation of Humidity\n", 1097 | "\n", 1098 | " Number of Instances of the Current Sub Class is 4.0:\n", 1099 | "\n", 1100 | " Classes: No Yes\n", 1101 | " \n", 1102 | " Probabilities of Class No is 0.5:\n", 1103 | " \n", 1104 | " Probabilities of Class Yes is 0.5:\n", 1105 | "\n", 1106 | " Number of Instances of the Current Sub Class is 5.0:\n", 1107 | "\n", 1108 | " Classes: No Yes\n", 1109 | " \n", 1110 | " Probabilities of Class No is 0.2:\n", 1111 | " \n", 1112 | " Probabilities of Class Yes is 0.8:\n", 1113 | "\n", 1114 | " Number of Instances of the Current Sub Class is 9.0:\n", 1115 | "\n", 1116 | " Classes: No Yes\n", 1117 | " \n", 1118 | " Probabilities of Class No is 0.3333333333333333:\n", 1119 | " \n", 1120 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1121 | "Information Gain Calculation of Wind\n", 1122 | "\n", 1123 | " Number of Instances of the Current Sub Class is 3.0:\n", 1124 | "\n", 1125 | " Classes: No Yes\n", 1126 | " \n", 1127 | " Probabilities of Class No is 0.3333333333333333:\n", 1128 | " \n", 1129 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1130 | "\n", 1131 | " Number of Instances of the Current Sub Class is 6.0:\n", 1132 | "\n", 1133 | " Classes: No Yes\n", 1134 | " \n", 1135 | " Probabilities of Class No is 0.16666666666666666:\n", 1136 | " \n", 1137 | " Probabilities of Class Yes is 0.8333333333333334:\n", 1138 | "\n", 1139 | " Number of Instances of the Current Sub Class is 9.0:\n", 1140 | "\n", 1141 | " Classes: No Yes\n", 1142 | " \n", 1143 | " Probabilities of Class No is 0.3333333333333333:\n", 1144 | " \n", 1145 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1146 | "Information Gain Calculation of Temperature\n", 1147 | "\n", 1148 | " Number of Instances of the Current Sub Class is 2.0:\n", 1149 | "\n", 1150 | " Classes: No Yes\n", 1151 | " \n", 1152 | " Probabilities of Class No is 0.5:\n", 1153 | " \n", 1154 | " Probabilities of Class Yes is 0.5:\n", 1155 | "\n", 1156 | " Number of Instances of the Current Sub Class is 2.0:\n", 1157 | "\n", 1158 | " Classes: Yes Yes\n", 1159 | " \n", 1160 | " Probabilities of Class Yes is 1.0:\n", 1161 | " \n", 1162 | " Probabilities of Class Yes is 1.0:\n", 1163 | "\n", 1164 | " Number of Instances of the Current Sub Class is 4.0:\n", 1165 | "\n", 1166 | " Classes: No Yes\n", 1167 | " \n", 1168 | " Probabilities of Class No is 0.25:\n", 1169 | " \n", 1170 | " Probabilities of Class Yes is 0.75:\n", 1171 | "Information Gain Calculation of Humidity\n", 1172 | "\n", 1173 | " Number of Instances of the Current Sub Class is 1.0:\n", 1174 | "\n", 1175 | " Classes: Yes Yes\n", 1176 | " \n", 1177 | " Probabilities of Class Yes is 1.0:\n", 1178 | " \n", 1179 | " Probabilities of Class Yes is 1.0:\n", 1180 | "\n", 1181 | " Number of Instances of the Current Sub Class is 3.0:\n", 1182 | "\n", 1183 | " Classes: No Yes\n", 1184 | " \n", 1185 | " Probabilities of Class No is 0.3333333333333333:\n", 1186 | " \n", 1187 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1188 | "\n", 1189 | " Number of Instances of the Current Sub Class is 4.0:\n", 1190 | "\n", 1191 | " Classes: No Yes\n", 1192 | " \n", 1193 | " Probabilities of Class No is 0.25:\n", 1194 | " \n", 1195 | " Probabilities of Class Yes is 0.75:\n", 1196 | "Information Gain Calculation of Wind\n", 1197 | "\n", 1198 | " Number of Instances of the Current Sub Class is 1.0:\n", 1199 | "\n", 1200 | " Classes: No No\n", 1201 | " \n", 1202 | " Probabilities of Class No is 1.0:\n", 1203 | " \n", 1204 | " Probabilities of Class No is 1.0:\n", 1205 | "\n", 1206 | " Number of Instances of the Current Sub Class is 3.0:\n", 1207 | "\n", 1208 | " Classes: Yes Yes\n", 1209 | " \n", 1210 | " Probabilities of Class Yes is 1.0:\n", 1211 | " \n", 1212 | " Probabilities of Class Yes is 1.0:\n", 1213 | "\n", 1214 | " Number of Instances of the Current Sub Class is 4.0:\n", 1215 | "\n", 1216 | " Classes: No Yes\n", 1217 | " \n", 1218 | " Probabilities of Class No is 0.25:\n", 1219 | " \n", 1220 | " Probabilities of Class Yes is 0.75:\n", 1221 | "Information Gain Calculation of Temperature\n", 1222 | "\n", 1223 | " Number of Instances of the Current Sub Class is 1.0:\n", 1224 | "\n", 1225 | " Classes: Yes Yes\n", 1226 | " \n", 1227 | " Probabilities of Class Yes is 1.0:\n", 1228 | " \n", 1229 | " Probabilities of Class Yes is 1.0:\n", 1230 | "\n", 1231 | " Number of Instances of the Current Sub Class is 1.0:\n", 1232 | "\n", 1233 | " Classes: No No\n", 1234 | " \n", 1235 | " Probabilities of Class No is 1.0:\n", 1236 | " \n", 1237 | " Probabilities of Class No is 1.0:\n", 1238 | "\n", 1239 | " Number of Instances of the Current Sub Class is 1.0:\n", 1240 | "\n", 1241 | " Classes: No No\n", 1242 | " \n", 1243 | " Probabilities of Class No is 1.0:\n", 1244 | " \n", 1245 | " Probabilities of Class No is 1.0:\n", 1246 | "\n", 1247 | " Number of Instances of the Current Sub Class is 3.0:\n", 1248 | "\n", 1249 | " Classes: No Yes\n", 1250 | " \n", 1251 | " Probabilities of Class No is 0.3333333333333333:\n", 1252 | " \n", 1253 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1254 | "Information Gain Calculation of Humidity\n", 1255 | "\n", 1256 | " Number of Instances of the Current Sub Class is 2.0:\n", 1257 | "\n", 1258 | " Classes: No No\n", 1259 | " \n", 1260 | " Probabilities of Class No is 1.0:\n", 1261 | " \n", 1262 | " Probabilities of Class No is 1.0:\n", 1263 | "\n", 1264 | " Number of Instances of the Current Sub Class is 1.0:\n", 1265 | "\n", 1266 | " Classes: Yes Yes\n", 1267 | " \n", 1268 | " Probabilities of Class Yes is 1.0:\n", 1269 | " \n", 1270 | " Probabilities of Class Yes is 1.0:\n", 1271 | "\n", 1272 | " Number of Instances of the Current Sub Class is 3.0:\n", 1273 | "\n", 1274 | " Classes: No Yes\n", 1275 | " \n", 1276 | " Probabilities of Class No is 0.3333333333333333:\n", 1277 | " \n", 1278 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1279 | "Information Gain Calculation of Wind\n", 1280 | "\n", 1281 | " Number of Instances of the Current Sub Class is 1.0:\n", 1282 | "\n", 1283 | " Classes: No No\n", 1284 | " \n", 1285 | " Probabilities of Class No is 1.0:\n", 1286 | " \n", 1287 | " Probabilities of Class No is 1.0:\n", 1288 | "\n", 1289 | " Number of Instances of the Current Sub Class is 2.0:\n", 1290 | "\n", 1291 | " Classes: No Yes\n", 1292 | " \n", 1293 | " Probabilities of Class No is 0.5:\n", 1294 | " \n", 1295 | " Probabilities of Class Yes is 0.5:\n", 1296 | "\n", 1297 | " Number of Instances of the Current Sub Class is 3.0:\n", 1298 | "\n", 1299 | " Classes: No Yes\n", 1300 | " \n", 1301 | " Probabilities of Class No is 0.3333333333333333:\n", 1302 | " \n", 1303 | " Probabilities of Class Yes is 0.6666666666666666:\n", 1304 | "Key: dict_keys(['Outlook'])\n", 1305 | "Attribute: Outlook\n", 1306 | "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 1307 | "Key: dict_keys(['Temperature'])\n", 1308 | "Attribute: Temperature\n", 1309 | "Instance Attribute: Mild TreeKeys : dict_keys(['Cool', 'Hot', 'Mild'])\n", 1310 | "Key: dict_keys(['Outlook'])\n", 1311 | "Attribute: Outlook\n", 1312 | "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 1313 | "Key: dict_keys(['Outlook'])\n", 1314 | "Attribute: Outlook\n", 1315 | "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 1316 | "Key: dict_keys(['Outlook'])\n", 1317 | "Attribute: Outlook\n", 1318 | "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n", 1319 | "Key: dict_keys(['Wind'])\n", 1320 | "Attribute: Wind\n", 1321 | "Instance Attribute: Strong TreeKeys : dict_keys(['Strong', 'Weak'])\n", 1322 | "\n", 1323 | "\n", 1324 | " Accuracy is : 0.75\n" 1325 | ] 1326 | }, 1327 | { 1328 | "name": "stderr", 1329 | "output_type": "stream", 1330 | "text": [ 1331 | "C:\\Users\\Dr.Thyagaraju\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:8: SettingWithCopyWarning: \n", 1332 | "A value is trying to be set on a copy of a slice from a DataFrame.\n", 1333 | "Try using .loc[row_indexer,col_indexer] = value instead\n", 1334 | "\n", 1335 | "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", 1336 | " \n" 1337 | ] 1338 | } 1339 | ], 1340 | "source": [ 1341 | "training_data = df_tennis.iloc[1:-4] # all but last four instances\n", 1342 | "test_data = df_tennis.iloc[-4:] # just the last four\n", 1343 | "train_tree = id3(training_data, 'PlayTennis', attribute_names)\n", 1344 | "\n", 1345 | "test_data['predicted2'] = test_data.apply( # <---- test_data source\n", 1346 | " classify, \n", 1347 | " axis=1, \n", 1348 | " args=(train_tree,'Yes') ) # <---- train_data tree\n", 1349 | "\n", 1350 | "\n", 1351 | "print ('\\n\\n Accuracy is : ' + str( sum(test_data['PlayTennis']==test_data['predicted2'] ) / (1.0*len(test_data.index)) ))" 1352 | ] 1353 | }, 1354 | { 1355 | "cell_type": "markdown", 1356 | "metadata": { 1357 | "collapsed": true 1358 | }, 1359 | "source": [ 1360 | "# End" 1361 | ] 1362 | } 1363 | ], 1364 | "metadata": { 1365 | "kernelspec": { 1366 | "display_name": "Python 3", 1367 | "language": "python", 1368 | "name": "python3" 1369 | }, 1370 | "language_info": { 1371 | "codemirror_mode": { 1372 | "name": "ipython", 1373 | "version": 3 1374 | }, 1375 | "file_extension": ".py", 1376 | "mimetype": "text/x-python", 1377 | "name": "python", 1378 | "nbconvert_exporter": "python", 1379 | "pygments_lexer": "ipython3", 1380 | "version": "3.6.3" 1381 | } 1382 | }, 1383 | "nbformat": 4, 1384 | "nbformat_minor": 2 1385 | } 1386 | -------------------------------------------------------------------------------- /ID3+-+Algorithm+ID3(Examples,+TargetAttribute,+Attributes).jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/profthyagu/Python-Decision-Tree-Using-ID3/abb2d8bb265c6b0435745a01e0cb980a581893ea/ID3+-+Algorithm+ID3(Examples,+TargetAttribute,+Attributes).jpg -------------------------------------------------------------------------------- /PlayTennis.csv: -------------------------------------------------------------------------------- 1 | ,PlayTennis,Outlook,Temperature,Humidity,Wind 2 | 0,No,Sunny,Hot,High,Weak 3 | 1,No,Sunny,Hot,High,Strong 4 | 2,Yes,Overcast,Hot,High,Weak 5 | 3,Yes,Rain,Mild,High,Weak 6 | 4,Yes,Rain,Cool,Normal,Weak 7 | 5,No,Rain,Cool,Normal,Strong 8 | 6,Yes,Overcast,Cool,Normal,Strong 9 | 7,No,Sunny,Mild,High,Weak 10 | 8,Yes,Sunny,Cool,Normal,Weak 11 | 9,Yes,Rain,Mild,Normal,Weak 12 | 10,Yes,Sunny,Mild,Normal,Strong 13 | 11,Yes,Overcast,Mild,High,Strong 14 | 12,Yes,Overcast,Hot,Normal,Weak 15 | 13,No,Rain,Mild,High,Strong 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Decision-Tree-Using-ID3- 2 | Problem : Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and apply this knowledge to classify a new sample. 3 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-cayman --------------------------------------------------------------------------------