├── 3.Decision Tree Using ID3.ipynb
├── ID3+-+Algorithm+ID3(Examples,+TargetAttribute,+Attributes).jpg
├── PlayTennis.csv
├── README.md
└── _config.yml


/3.Decision Tree Using ID3.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "raw",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "Write a program to demonstrate the working of the decision tree based ID3\n",
   8 |     "algorithm. Use an appropriate data set for building the decision tree and \n",
   9 |     "apply this knowledge to classify a new sample."
  10 |    ]
  11 |   },
  12 |   {
  13 |    "cell_type": "markdown",
  14 |    "metadata": {},
  15 |    "source": [
  16 |     "# Import Play Tennis Data "
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "code",
  21 |    "execution_count": 149,
  22 |    "metadata": {},
  23 |    "outputs": [
  24 |     {
  25 |      "name": "stdout",
  26 |      "output_type": "stream",
  27 |      "text": [
  28 |       "\n",
  29 |       " Given Play Tennis Data Set:\n",
  30 |       "\n",
  31 |       "    PlayTennis   Outlook Temperature Humidity    Wind\n",
  32 |       "0          No     Sunny         Hot     High    Weak\n",
  33 |       "1          No     Sunny         Hot     High  Strong\n",
  34 |       "2         Yes  Overcast         Hot     High    Weak\n",
  35 |       "3         Yes      Rain        Mild     High    Weak\n",
  36 |       "4         Yes      Rain        Cool   Normal    Weak\n",
  37 |       "5          No      Rain        Cool   Normal  Strong\n",
  38 |       "6         Yes  Overcast        Cool   Normal  Strong\n",
  39 |       "7          No     Sunny        Mild     High    Weak\n",
  40 |       "8         Yes     Sunny        Cool   Normal    Weak\n",
  41 |       "9         Yes      Rain        Mild   Normal    Weak\n",
  42 |       "10        Yes     Sunny        Mild   Normal  Strong\n",
  43 |       "11        Yes  Overcast        Mild     High  Strong\n",
  44 |       "12        Yes  Overcast         Hot   Normal    Weak\n",
  45 |       "13         No      Rain        Mild     High  Strong\n"
  46 |      ]
  47 |     }
  48 |    ],
  49 |    "source": [
  50 |     "# Author : Dr.Thyagaraju G S , Context Innovations Lab , DEpt of CSE , SDMIT - Ujire \n",
  51 |     "# Date : July 11 2018 \n",
  52 |     "import pandas as pd\n",
  53 |     "from pandas import DataFrame \n",
  54 |     "df_tennis = DataFrame.from_csv('C:\\\\Users\\\\Dr.Thyagaraju\\\\Desktop\\\\Data\\\\PlayTennis.csv')\n",
  55 |     "print(\"\\n Given Play Tennis Data Set:\\n\\n\", df_tennis)"
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "code",
  60 |    "execution_count": 206,
  61 |    "metadata": {},
  62 |    "outputs": [
  63 |     {
  64 |      "data": {
  65 |       "text/plain": [
  66 |        "'PlayTennis'"
  67 |       ]
  68 |      },
  69 |      "execution_count": 206,
  70 |      "metadata": {},
  71 |      "output_type": "execute_result"
  72 |     }
  73 |    ],
  74 |    "source": [
  75 |     "#df_tennis.columns[0]\n",
  76 |     "df_tennis.keys()[0]"
  77 |    ]
  78 |   },
  79 |   {
  80 |    "cell_type": "markdown",
  81 |    "metadata": {},
  82 |    "source": [
  83 |     "# Entropy of the Training Data Set"
  84 |    ]
  85 |   },
  86 |   {
  87 |    "cell_type": "code",
  88 |    "execution_count": 215,
  89 |    "metadata": {},
  90 |    "outputs": [
  91 |     {
  92 |      "name": "stdout",
  93 |      "output_type": "stream",
  94 |      "text": [
  95 |       "\n",
  96 |       "  INPUT DATA SET FOR ENTROPY CALCULATION:\n",
  97 |       " 0      No\n",
  98 |       "1      No\n",
  99 |       "2     Yes\n",
 100 |       "3     Yes\n",
 101 |       "4     Yes\n",
 102 |       "5      No\n",
 103 |       "6     Yes\n",
 104 |       "7      No\n",
 105 |       "8     Yes\n",
 106 |       "9     Yes\n",
 107 |       "10    Yes\n",
 108 |       "11    Yes\n",
 109 |       "12    Yes\n",
 110 |       "13     No\n",
 111 |       "Name: PlayTennis, dtype: object\n",
 112 |       "\n",
 113 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 114 |       "\n",
 115 |       " Classes: No Yes\n",
 116 |       " \n",
 117 |       " Probabilities of Class No is 0.35714285714285715:\n",
 118 |       " \n",
 119 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 120 |       "\n",
 121 |       " Total Entropy of PlayTennis Data Set: 0.9402859586706309\n"
 122 |      ]
 123 |     }
 124 |    ],
 125 |    "source": [
 126 |     "#Function to calculate the entropy of probaility of observations\n",
 127 |     "# -p*log2*p\n",
 128 |     "\n",
 129 |     "def entropy(probs):  \n",
 130 |     "    import math\n",
 131 |     "    return sum( [-prob*math.log(prob, 2) for prob in probs] )\n",
 132 |     "\n",
 133 |     "#Function to calulate the entropy of the given Data Sets/List with respect to target attributes\n",
 134 |     "def entropy_of_list(a_list):  \n",
 135 |     "    #print(\"A-list\",a_list)\n",
 136 |     "    from collections import Counter\n",
 137 |     "    cnt = Counter(x for x in a_list)   # Counter calculates the propotion of class\n",
 138 |     "   # print(\"\\nClasses:\",cnt)\n",
 139 |     "    #print(\"No and Yes Classes:\",a_list.name,cnt)\n",
 140 |     "    num_instances = len(a_list)*1.0   # = 14\n",
 141 |     "    print(\"\\n Number of Instances of the Current Sub Class is {0}:\".format(num_instances ))\n",
 142 |     "    probs = [x / num_instances for x in cnt.values()]  # x means no of YES/NO\n",
 143 |     "    print(\"\\n Classes:\",min(cnt),max(cnt))\n",
 144 |     "    print(\" \\n Probabilities of Class {0} is {1}:\".format(min(cnt),min(probs)))\n",
 145 |     "    print(\" \\n Probabilities of Class {0} is {1}:\".format(max(cnt),max(probs)))\n",
 146 |     "    return entropy(probs) # Call Entropy :\n",
 147 |     "    \n",
 148 |     "# The initial entropy of the YES/NO attribute for our dataset.\n",
 149 |     "print(\"\\n  INPUT DATA SET FOR ENTROPY CALCULATION:\\n\", df_tennis['PlayTennis'])\n",
 150 |     "\n",
 151 |     "total_entropy = entropy_of_list(df_tennis['PlayTennis'])\n",
 152 |     "\n",
 153 |     "print(\"\\n Total Entropy of PlayTennis Data Set:\",total_entropy)"
 154 |    ]
 155 |   },
 156 |   {
 157 |    "cell_type": "markdown",
 158 |    "metadata": {},
 159 |    "source": [
 160 |     "# Information Gain of Attributes "
 161 |    ]
 162 |   },
 163 |   {
 164 |    "cell_type": "code",
 165 |    "execution_count": 216,
 166 |    "metadata": {},
 167 |    "outputs": [
 168 |     {
 169 |      "name": "stdout",
 170 |      "output_type": "stream",
 171 |      "text": [
 172 |       "Information Gain Calculation of  Outlook\n",
 173 |       "\n",
 174 |       " Number of Instances of the Current Sub Class is 4.0:\n",
 175 |       "\n",
 176 |       " Classes: Yes Yes\n",
 177 |       " \n",
 178 |       " Probabilities of Class Yes is 1.0:\n",
 179 |       " \n",
 180 |       " Probabilities of Class Yes is 1.0:\n",
 181 |       "\n",
 182 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 183 |       "\n",
 184 |       " Classes: No Yes\n",
 185 |       " \n",
 186 |       " Probabilities of Class No is 0.4:\n",
 187 |       " \n",
 188 |       " Probabilities of Class Yes is 0.6:\n",
 189 |       "\n",
 190 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 191 |       "\n",
 192 |       " Classes: No Yes\n",
 193 |       " \n",
 194 |       " Probabilities of Class No is 0.4:\n",
 195 |       " \n",
 196 |       " Probabilities of Class Yes is 0.6:\n",
 197 |       "\n",
 198 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 199 |       "\n",
 200 |       " Classes: No Yes\n",
 201 |       " \n",
 202 |       " Probabilities of Class No is 0.35714285714285715:\n",
 203 |       " \n",
 204 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 205 |       "Info-gain for Outlook is :0.246749819774 \n",
 206 |       "\n",
 207 |       "Information Gain Calculation of  Humidity\n",
 208 |       "\n",
 209 |       " Number of Instances of the Current Sub Class is 7.0:\n",
 210 |       "\n",
 211 |       " Classes: No Yes\n",
 212 |       " \n",
 213 |       " Probabilities of Class No is 0.42857142857142855:\n",
 214 |       " \n",
 215 |       " Probabilities of Class Yes is 0.5714285714285714:\n",
 216 |       "\n",
 217 |       " Number of Instances of the Current Sub Class is 7.0:\n",
 218 |       "\n",
 219 |       " Classes: No Yes\n",
 220 |       " \n",
 221 |       " Probabilities of Class No is 0.14285714285714285:\n",
 222 |       " \n",
 223 |       " Probabilities of Class Yes is 0.8571428571428571:\n",
 224 |       "\n",
 225 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 226 |       "\n",
 227 |       " Classes: No Yes\n",
 228 |       " \n",
 229 |       " Probabilities of Class No is 0.35714285714285715:\n",
 230 |       " \n",
 231 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 232 |       "\n",
 233 |       " Info-gain for Humidity is: 0.151835501362 \n",
 234 |       "\n",
 235 |       "Information Gain Calculation of  Wind\n",
 236 |       "\n",
 237 |       " Number of Instances of the Current Sub Class is 6.0:\n",
 238 |       "\n",
 239 |       " Classes: No Yes\n",
 240 |       " \n",
 241 |       " Probabilities of Class No is 0.5:\n",
 242 |       " \n",
 243 |       " Probabilities of Class Yes is 0.5:\n",
 244 |       "\n",
 245 |       " Number of Instances of the Current Sub Class is 8.0:\n",
 246 |       "\n",
 247 |       " Classes: No Yes\n",
 248 |       " \n",
 249 |       " Probabilities of Class No is 0.25:\n",
 250 |       " \n",
 251 |       " Probabilities of Class Yes is 0.75:\n",
 252 |       "\n",
 253 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 254 |       "\n",
 255 |       " Classes: No Yes\n",
 256 |       " \n",
 257 |       " Probabilities of Class No is 0.35714285714285715:\n",
 258 |       " \n",
 259 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 260 |       "\n",
 261 |       " Info-gain for Wind is:0.0481270304083 \n",
 262 |       "\n",
 263 |       "Information Gain Calculation of  Temperature\n",
 264 |       "\n",
 265 |       " Number of Instances of the Current Sub Class is 4.0:\n",
 266 |       "\n",
 267 |       " Classes: No Yes\n",
 268 |       " \n",
 269 |       " Probabilities of Class No is 0.25:\n",
 270 |       " \n",
 271 |       " Probabilities of Class Yes is 0.75:\n",
 272 |       "\n",
 273 |       " Number of Instances of the Current Sub Class is 4.0:\n",
 274 |       "\n",
 275 |       " Classes: No Yes\n",
 276 |       " \n",
 277 |       " Probabilities of Class No is 0.5:\n",
 278 |       " \n",
 279 |       " Probabilities of Class Yes is 0.5:\n",
 280 |       "\n",
 281 |       " Number of Instances of the Current Sub Class is 6.0:\n",
 282 |       "\n",
 283 |       " Classes: No Yes\n",
 284 |       " \n",
 285 |       " Probabilities of Class No is 0.3333333333333333:\n",
 286 |       " \n",
 287 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
 288 |       "\n",
 289 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 290 |       "\n",
 291 |       " Classes: No Yes\n",
 292 |       " \n",
 293 |       " Probabilities of Class No is 0.35714285714285715:\n",
 294 |       " \n",
 295 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 296 |       "\n",
 297 |       " Info-gain for Temperature is:0.029222565659 \n",
 298 |       "\n"
 299 |      ]
 300 |     }
 301 |    ],
 302 |    "source": [
 303 |     "def information_gain(df, split_attribute_name, target_attribute_name, trace=0):\n",
 304 |     "    print(\"Information Gain Calculation of \",split_attribute_name)\n",
 305 |     "    '''\n",
 306 |     "    Takes a DataFrame of attributes, and quantifies the entropy of a target\n",
 307 |     "    attribute after performing a split along the values of another attribute.\n",
 308 |     "    '''\n",
 309 |     "    # Split Data by Possible Vals of Attribute:\n",
 310 |     "    df_split = df.groupby(split_attribute_name)\n",
 311 |     "   # for name,group in df_split:\n",
 312 |     "    #    print(\"Name:\\n\",name)\n",
 313 |     "     #   print(\"Group:\\n\",group)\n",
 314 |     "    \n",
 315 |     "    # Calculate Entropy for Target Attribute, as well as\n",
 316 |     "    # Proportion of Obs in Each Data-Split\n",
 317 |     "    nobs = len(df.index) * 1.0\n",
 318 |     "   # print(\"NOBS\",nobs)\n",
 319 |     "    df_agg_ent = df_split.agg({target_attribute_name : [entropy_of_list, lambda x: len(x)/nobs] })[target_attribute_name]\n",
 320 |     "    #print([target_attribute_name])\n",
 321 |     "    #print(\" Entropy List \",entropy_of_list)\n",
 322 |     "    #print(\"DFAGGENT\",df_agg_ent)\n",
 323 |     "    df_agg_ent.columns = ['Entropy', 'PropObservations']\n",
 324 |     "    #if trace: # helps understand what fxn is doing:\n",
 325 |     "     #   print(df_agg_ent)\n",
 326 |     "    \n",
 327 |     "    # Calculate Information Gain:\n",
 328 |     "    new_entropy = sum( df_agg_ent['Entropy'] * df_agg_ent['PropObservations'] )\n",
 329 |     "    old_entropy = entropy_of_list(df[target_attribute_name])\n",
 330 |     "    return old_entropy - new_entropy\n",
 331 |     "\n",
 332 |     "\n",
 333 |     "print('Info-gain for Outlook is :'+str( information_gain(df_tennis, 'Outlook', 'PlayTennis')),\"\\n\")\n",
 334 |     "print('\\n Info-gain for Humidity is: ' + str( information_gain(df_tennis, 'Humidity', 'PlayTennis')),\"\\n\")\n",
 335 |     "print('\\n Info-gain for Wind is:' + str( information_gain(df_tennis, 'Wind', 'PlayTennis')),\"\\n\")\n",
 336 |     "print('\\n Info-gain for Temperature is:' + str( information_gain(df_tennis, 'Temperature','PlayTennis')),\"\\n\")"
 337 |    ]
 338 |   },
 339 |   {
 340 |    "cell_type": "markdown",
 341 |    "metadata": {},
 342 |    "source": [
 343 |     "# ID3 Algorithm"
 344 |    ]
 345 |   },
 346 |   {
 347 |    "cell_type": "code",
 348 |    "execution_count": 217,
 349 |    "metadata": {
 350 |     "collapsed": true
 351 |    },
 352 |    "outputs": [],
 353 |    "source": [
 354 |     "def id3(df, target_attribute_name, attribute_names, default_class=None):\n",
 355 |     "    \n",
 356 |     "    ## Tally target attribute:\n",
 357 |     "    from collections import Counter\n",
 358 |     "    cnt = Counter(x for x in df[target_attribute_name])# class of YES /NO\n",
 359 |     "    \n",
 360 |     "    ## First check: Is this split of the dataset homogeneous?\n",
 361 |     "    if len(cnt) == 1:\n",
 362 |     "        return next(iter(cnt))  # next input data set, or raises StopIteration when EOF is hit.\n",
 363 |     "    \n",
 364 |     "    ## Second check: Is this split of the dataset empty?\n",
 365 |     "    # if yes, return a default value\n",
 366 |     "    elif df.empty or (not attribute_names):\n",
 367 |     "        return default_class  # Return None for Empty Data Set\n",
 368 |     "    \n",
 369 |     "    ## Otherwise: This dataset is ready to be devied up!\n",
 370 |     "    else:\n",
 371 |     "        # Get Default Value for next recursive call of this function:\n",
 372 |     "        default_class = max(cnt.keys()) #No of YES and NO Class\n",
 373 |     "        # Compute the Information Gain of the attributes:\n",
 374 |     "        gainz = [information_gain(df, attr, target_attribute_name) for attr in attribute_names] #\n",
 375 |     "        index_of_max = gainz.index(max(gainz)) # Index of Best Attribute\n",
 376 |     "        # Choose Best Attribute to split on:\n",
 377 |     "        best_attr = attribute_names[index_of_max]\n",
 378 |     "        \n",
 379 |     "        # Create an empty tree, to be populated in a moment\n",
 380 |     "        tree = {best_attr:{}} # Iniiate the tree with best attribute as a node \n",
 381 |     "        remaining_attribute_names = [i for i in attribute_names if i != best_attr]\n",
 382 |     "        \n",
 383 |     "        # Split dataset\n",
 384 |     "        # On each split, recursively call this algorithm.\n",
 385 |     "        # populate the empty tree with subtrees, which\n",
 386 |     "        # are the result of the recursive call\n",
 387 |     "        for attr_val, data_subset in df.groupby(best_attr):\n",
 388 |     "            subtree = id3(data_subset,\n",
 389 |     "                        target_attribute_name,\n",
 390 |     "                        remaining_attribute_names,\n",
 391 |     "                        default_class)\n",
 392 |     "            tree[best_attr][attr_val] = subtree\n",
 393 |     "        return tree"
 394 |    ]
 395 |   },
 396 |   {
 397 |    "cell_type": "markdown",
 398 |    "metadata": {},
 399 |    "source": [
 400 |     "# Predicting Attributes"
 401 |    ]
 402 |   },
 403 |   {
 404 |    "cell_type": "code",
 405 |    "execution_count": 218,
 406 |    "metadata": {},
 407 |    "outputs": [
 408 |     {
 409 |      "name": "stdout",
 410 |      "output_type": "stream",
 411 |      "text": [
 412 |       "List of Attributes: ['PlayTennis', 'Outlook', 'Temperature', 'Humidity', 'Wind']\n",
 413 |       "Predicting Attributes: ['Outlook', 'Temperature', 'Humidity', 'Wind']\n"
 414 |      ]
 415 |     }
 416 |    ],
 417 |    "source": [
 418 |     "# Get Predictor Names (all but 'class')\n",
 419 |     "attribute_names = list(df_tennis.columns)\n",
 420 |     "print(\"List of Attributes:\", attribute_names) \n",
 421 |     "attribute_names.remove('PlayTennis') #Remove the class attribute \n",
 422 |     "print(\"Predicting Attributes:\", attribute_names)"
 423 |    ]
 424 |   },
 425 |   {
 426 |    "cell_type": "raw",
 427 |    "metadata": {},
 428 |    "source": [
 429 |     "# Tree Construction"
 430 |    ]
 431 |   },
 432 |   {
 433 |    "cell_type": "code",
 434 |    "execution_count": 219,
 435 |    "metadata": {},
 436 |    "outputs": [
 437 |     {
 438 |      "name": "stdout",
 439 |      "output_type": "stream",
 440 |      "text": [
 441 |       "Information Gain Calculation of  Outlook\n",
 442 |       "\n",
 443 |       " Number of Instances of the Current Sub Class is 4.0:\n",
 444 |       "\n",
 445 |       " Classes: Yes Yes\n",
 446 |       " \n",
 447 |       " Probabilities of Class Yes is 1.0:\n",
 448 |       " \n",
 449 |       " Probabilities of Class Yes is 1.0:\n",
 450 |       "\n",
 451 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 452 |       "\n",
 453 |       " Classes: No Yes\n",
 454 |       " \n",
 455 |       " Probabilities of Class No is 0.4:\n",
 456 |       " \n",
 457 |       " Probabilities of Class Yes is 0.6:\n",
 458 |       "\n",
 459 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 460 |       "\n",
 461 |       " Classes: No Yes\n",
 462 |       " \n",
 463 |       " Probabilities of Class No is 0.4:\n",
 464 |       " \n",
 465 |       " Probabilities of Class Yes is 0.6:\n",
 466 |       "\n",
 467 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 468 |       "\n",
 469 |       " Classes: No Yes\n",
 470 |       " \n",
 471 |       " Probabilities of Class No is 0.35714285714285715:\n",
 472 |       " \n",
 473 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 474 |       "Information Gain Calculation of  Temperature\n",
 475 |       "\n",
 476 |       " Number of Instances of the Current Sub Class is 4.0:\n",
 477 |       "\n",
 478 |       " Classes: No Yes\n",
 479 |       " \n",
 480 |       " Probabilities of Class No is 0.25:\n",
 481 |       " \n",
 482 |       " Probabilities of Class Yes is 0.75:\n",
 483 |       "\n",
 484 |       " Number of Instances of the Current Sub Class is 4.0:\n",
 485 |       "\n",
 486 |       " Classes: No Yes\n",
 487 |       " \n",
 488 |       " Probabilities of Class No is 0.5:\n",
 489 |       " \n",
 490 |       " Probabilities of Class Yes is 0.5:\n",
 491 |       "\n",
 492 |       " Number of Instances of the Current Sub Class is 6.0:\n",
 493 |       "\n",
 494 |       " Classes: No Yes\n",
 495 |       " \n",
 496 |       " Probabilities of Class No is 0.3333333333333333:\n",
 497 |       " \n",
 498 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
 499 |       "\n",
 500 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 501 |       "\n",
 502 |       " Classes: No Yes\n",
 503 |       " \n",
 504 |       " Probabilities of Class No is 0.35714285714285715:\n",
 505 |       " \n",
 506 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 507 |       "Information Gain Calculation of  Humidity\n",
 508 |       "\n",
 509 |       " Number of Instances of the Current Sub Class is 7.0:\n",
 510 |       "\n",
 511 |       " Classes: No Yes\n",
 512 |       " \n",
 513 |       " Probabilities of Class No is 0.42857142857142855:\n",
 514 |       " \n",
 515 |       " Probabilities of Class Yes is 0.5714285714285714:\n",
 516 |       "\n",
 517 |       " Number of Instances of the Current Sub Class is 7.0:\n",
 518 |       "\n",
 519 |       " Classes: No Yes\n",
 520 |       " \n",
 521 |       " Probabilities of Class No is 0.14285714285714285:\n",
 522 |       " \n",
 523 |       " Probabilities of Class Yes is 0.8571428571428571:\n",
 524 |       "\n",
 525 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 526 |       "\n",
 527 |       " Classes: No Yes\n",
 528 |       " \n",
 529 |       " Probabilities of Class No is 0.35714285714285715:\n",
 530 |       " \n",
 531 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 532 |       "Information Gain Calculation of  Wind\n",
 533 |       "\n",
 534 |       " Number of Instances of the Current Sub Class is 6.0:\n",
 535 |       "\n",
 536 |       " Classes: No Yes\n",
 537 |       " \n",
 538 |       " Probabilities of Class No is 0.5:\n",
 539 |       " \n",
 540 |       " Probabilities of Class Yes is 0.5:\n",
 541 |       "\n",
 542 |       " Number of Instances of the Current Sub Class is 8.0:\n",
 543 |       "\n",
 544 |       " Classes: No Yes\n",
 545 |       " \n",
 546 |       " Probabilities of Class No is 0.25:\n",
 547 |       " \n",
 548 |       " Probabilities of Class Yes is 0.75:\n",
 549 |       "\n",
 550 |       " Number of Instances of the Current Sub Class is 14.0:\n",
 551 |       "\n",
 552 |       " Classes: No Yes\n",
 553 |       " \n",
 554 |       " Probabilities of Class No is 0.35714285714285715:\n",
 555 |       " \n",
 556 |       " Probabilities of Class Yes is 0.6428571428571429:\n",
 557 |       "Information Gain Calculation of  Temperature\n",
 558 |       "\n",
 559 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 560 |       "\n",
 561 |       " Classes: No Yes\n",
 562 |       " \n",
 563 |       " Probabilities of Class No is 0.5:\n",
 564 |       " \n",
 565 |       " Probabilities of Class Yes is 0.5:\n",
 566 |       "\n",
 567 |       " Number of Instances of the Current Sub Class is 3.0:\n",
 568 |       "\n",
 569 |       " Classes: No Yes\n",
 570 |       " \n",
 571 |       " Probabilities of Class No is 0.3333333333333333:\n",
 572 |       " \n",
 573 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
 574 |       "\n",
 575 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 576 |       "\n",
 577 |       " Classes: No Yes\n",
 578 |       " \n",
 579 |       " Probabilities of Class No is 0.4:\n",
 580 |       " \n",
 581 |       " Probabilities of Class Yes is 0.6:\n",
 582 |       "Information Gain Calculation of  Humidity\n",
 583 |       "\n",
 584 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 585 |       "\n",
 586 |       " Classes: No Yes\n",
 587 |       " \n",
 588 |       " Probabilities of Class No is 0.5:\n",
 589 |       " \n",
 590 |       " Probabilities of Class Yes is 0.5:\n",
 591 |       "\n",
 592 |       " Number of Instances of the Current Sub Class is 3.0:\n",
 593 |       "\n",
 594 |       " Classes: No Yes\n",
 595 |       " \n",
 596 |       " Probabilities of Class No is 0.3333333333333333:\n",
 597 |       " \n",
 598 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
 599 |       "\n",
 600 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 601 |       "\n",
 602 |       " Classes: No Yes\n",
 603 |       " \n",
 604 |       " Probabilities of Class No is 0.4:\n",
 605 |       " \n",
 606 |       " Probabilities of Class Yes is 0.6:\n",
 607 |       "Information Gain Calculation of  Wind\n",
 608 |       "\n",
 609 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 610 |       "\n",
 611 |       " Classes: No No\n",
 612 |       " \n",
 613 |       " Probabilities of Class No is 1.0:\n",
 614 |       " \n",
 615 |       " Probabilities of Class No is 1.0:\n",
 616 |       "\n",
 617 |       " Number of Instances of the Current Sub Class is 3.0:\n",
 618 |       "\n",
 619 |       " Classes: Yes Yes\n",
 620 |       " \n",
 621 |       " Probabilities of Class Yes is 1.0:\n",
 622 |       " \n",
 623 |       " Probabilities of Class Yes is 1.0:\n",
 624 |       "\n",
 625 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 626 |       "\n",
 627 |       " Classes: No Yes\n",
 628 |       " \n",
 629 |       " Probabilities of Class No is 0.4:\n",
 630 |       " \n",
 631 |       " Probabilities of Class Yes is 0.6:\n",
 632 |       "Information Gain Calculation of  Temperature\n",
 633 |       "\n",
 634 |       " Number of Instances of the Current Sub Class is 1.0:\n",
 635 |       "\n",
 636 |       " Classes: Yes Yes\n",
 637 |       " \n",
 638 |       " Probabilities of Class Yes is 1.0:\n",
 639 |       " \n",
 640 |       " Probabilities of Class Yes is 1.0:\n",
 641 |       "\n",
 642 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 643 |       "\n",
 644 |       " Classes: No No\n",
 645 |       " \n",
 646 |       " Probabilities of Class No is 1.0:\n",
 647 |       " \n",
 648 |       " Probabilities of Class No is 1.0:\n",
 649 |       "\n",
 650 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 651 |       "\n",
 652 |       " Classes: No Yes\n",
 653 |       " \n",
 654 |       " Probabilities of Class No is 0.5:\n",
 655 |       " \n",
 656 |       " Probabilities of Class Yes is 0.5:\n",
 657 |       "\n",
 658 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 659 |       "\n",
 660 |       " Classes: No Yes\n",
 661 |       " \n",
 662 |       " Probabilities of Class No is 0.4:\n",
 663 |       " \n",
 664 |       " Probabilities of Class Yes is 0.6:\n",
 665 |       "Information Gain Calculation of  Humidity\n",
 666 |       "\n",
 667 |       " Number of Instances of the Current Sub Class is 3.0:\n",
 668 |       "\n",
 669 |       " Classes: No No\n",
 670 |       " \n",
 671 |       " Probabilities of Class No is 1.0:\n",
 672 |       " \n",
 673 |       " Probabilities of Class No is 1.0:\n",
 674 |       "\n",
 675 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 676 |       "\n",
 677 |       " Classes: Yes Yes\n",
 678 |       " \n",
 679 |       " Probabilities of Class Yes is 1.0:\n",
 680 |       " \n",
 681 |       " Probabilities of Class Yes is 1.0:\n",
 682 |       "\n",
 683 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 684 |       "\n",
 685 |       " Classes: No Yes\n",
 686 |       " \n",
 687 |       " Probabilities of Class No is 0.4:\n",
 688 |       " \n",
 689 |       " Probabilities of Class Yes is 0.6:\n",
 690 |       "Information Gain Calculation of  Wind\n",
 691 |       "\n",
 692 |       " Number of Instances of the Current Sub Class is 2.0:\n",
 693 |       "\n",
 694 |       " Classes: No Yes\n",
 695 |       " \n",
 696 |       " Probabilities of Class No is 0.5:\n",
 697 |       " \n",
 698 |       " Probabilities of Class Yes is 0.5:\n",
 699 |       "\n",
 700 |       " Number of Instances of the Current Sub Class is 3.0:\n",
 701 |       "\n",
 702 |       " Classes: No Yes\n",
 703 |       " \n",
 704 |       " Probabilities of Class No is 0.3333333333333333:\n",
 705 |       " \n",
 706 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
 707 |       "\n",
 708 |       " Number of Instances of the Current Sub Class is 5.0:\n",
 709 |       "\n",
 710 |       " Classes: No Yes\n",
 711 |       " \n",
 712 |       " Probabilities of Class No is 0.4:\n",
 713 |       " \n",
 714 |       " Probabilities of Class Yes is 0.6:\n",
 715 |       "\n",
 716 |       "\n",
 717 |       "The Resultant Decision Tree is :\n",
 718 |       "\n",
 719 |       "{'Outlook': {'Overcast': 'Yes',\n",
 720 |       "             'Rain': {'Wind': {'Strong': 'No', 'Weak': 'Yes'}},\n",
 721 |       "             'Sunny': {'Humidity': {'High': 'No', 'Normal': 'Yes'}}}}\n",
 722 |       "Best Attribute :\n",
 723 |       " Outlook\n",
 724 |       "Tree Keys:\n",
 725 |       " dict_keys(['Overcast', 'Rain', 'Sunny'])\n"
 726 |      ]
 727 |     }
 728 |    ],
 729 |    "source": [
 730 |     "# Run Algorithm:\n",
 731 |     "from pprint import pprint\n",
 732 |     "tree = id3(df_tennis,'PlayTennis',attribute_names)\n",
 733 |     "print(\"\\n\\nThe Resultant Decision Tree is :\\n\")\n",
 734 |     "#print(tree)\n",
 735 |     "pprint(tree)\n",
 736 |     "attribute = next(iter(tree))\n",
 737 |     "print(\"Best Attribute :\\n\",attribute)\n",
 738 |     "print(\"Tree Keys:\\n\",tree[attribute].keys())"
 739 |    ]
 740 |   },
 741 |   {
 742 |    "cell_type": "markdown",
 743 |    "metadata": {},
 744 |    "source": [
 745 |     "# Classification Accuracy"
 746 |    ]
 747 |   },
 748 |   {
 749 |    "cell_type": "code",
 750 |    "execution_count": 220,
 751 |    "metadata": {},
 752 |    "outputs": [],
 753 |    "source": [
 754 |     "def classify(instance, tree, default=None): # Instance of Play Tennis with Predicted \n",
 755 |     "    \n",
 756 |     "    #print(\"Instance:\",instance)\n",
 757 |     "    attribute = next(iter(tree)) # Outlook/Humidity/Wind       \n",
 758 |     "    print(\"Key:\",tree.keys())  # [Outlook,Humidity,Wind ]\n",
 759 |     "    print(\"Attribute:\",attribute) # [Key /Attribute Both are same ]\n",
 760 |     "   \n",
 761 |     "    # print(\"Insance of Attribute :\",instance[attribute],attribute)\n",
 762 |     "    if instance[attribute] in tree[attribute].keys(): # Value of the attributs in  set of Tree keys  \n",
 763 |     "        result = tree[attribute][instance[attribute]]\n",
 764 |     "        print(\"Instance Attribute:\",instance[attribute],\"TreeKeys :\",tree[attribute].keys())\n",
 765 |     "        if isinstance(result, dict): # this is a tree, delve deeper\n",
 766 |     "            return classify(instance, result)\n",
 767 |     "        else:\n",
 768 |     "            return result # this is a label\n",
 769 |     "    else:\n",
 770 |     "        return default"
 771 |    ]
 772 |   },
 773 |   {
 774 |    "cell_type": "code",
 775 |    "execution_count": 138,
 776 |    "metadata": {},
 777 |    "outputs": [
 778 |     {
 779 |      "name": "stdout",
 780 |      "output_type": "stream",
 781 |      "text": [
 782 |       "Key: dict_keys(['Outlook'])\n",
 783 |       "Attribute: Outlook\n",
 784 |       "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 785 |       "Key: dict_keys(['Humidity'])\n",
 786 |       "Attribute: Humidity\n",
 787 |       "Instance Attribute: High TreeKeys : dict_keys(['High', 'Normal'])\n",
 788 |       "Key: dict_keys(['Outlook'])\n",
 789 |       "Attribute: Outlook\n",
 790 |       "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 791 |       "Key: dict_keys(['Humidity'])\n",
 792 |       "Attribute: Humidity\n",
 793 |       "Instance Attribute: High TreeKeys : dict_keys(['High', 'Normal'])\n",
 794 |       "Key: dict_keys(['Outlook'])\n",
 795 |       "Attribute: Outlook\n",
 796 |       "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 797 |       "Key: dict_keys(['Outlook'])\n",
 798 |       "Attribute: Outlook\n",
 799 |       "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 800 |       "Key: dict_keys(['Wind'])\n",
 801 |       "Attribute: Wind\n",
 802 |       "Instance Attribute: Weak TreeKeys : dict_keys(['Strong', 'Weak'])\n",
 803 |       "Key: dict_keys(['Outlook'])\n",
 804 |       "Attribute: Outlook\n",
 805 |       "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 806 |       "Key: dict_keys(['Wind'])\n",
 807 |       "Attribute: Wind\n",
 808 |       "Instance Attribute: Weak TreeKeys : dict_keys(['Strong', 'Weak'])\n",
 809 |       "Key: dict_keys(['Outlook'])\n",
 810 |       "Attribute: Outlook\n",
 811 |       "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 812 |       "Key: dict_keys(['Wind'])\n",
 813 |       "Attribute: Wind\n",
 814 |       "Instance Attribute: Strong TreeKeys : dict_keys(['Strong', 'Weak'])\n",
 815 |       "Key: dict_keys(['Outlook'])\n",
 816 |       "Attribute: Outlook\n",
 817 |       "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 818 |       "Key: dict_keys(['Outlook'])\n",
 819 |       "Attribute: Outlook\n",
 820 |       "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 821 |       "Key: dict_keys(['Humidity'])\n",
 822 |       "Attribute: Humidity\n",
 823 |       "Instance Attribute: High TreeKeys : dict_keys(['High', 'Normal'])\n",
 824 |       "Key: dict_keys(['Outlook'])\n",
 825 |       "Attribute: Outlook\n",
 826 |       "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 827 |       "Key: dict_keys(['Humidity'])\n",
 828 |       "Attribute: Humidity\n",
 829 |       "Instance Attribute: Normal TreeKeys : dict_keys(['High', 'Normal'])\n",
 830 |       "Key: dict_keys(['Outlook'])\n",
 831 |       "Attribute: Outlook\n",
 832 |       "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 833 |       "Key: dict_keys(['Wind'])\n",
 834 |       "Attribute: Wind\n",
 835 |       "Instance Attribute: Weak TreeKeys : dict_keys(['Strong', 'Weak'])\n",
 836 |       "Key: dict_keys(['Outlook'])\n",
 837 |       "Attribute: Outlook\n",
 838 |       "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 839 |       "Key: dict_keys(['Humidity'])\n",
 840 |       "Attribute: Humidity\n",
 841 |       "Instance Attribute: Normal TreeKeys : dict_keys(['High', 'Normal'])\n",
 842 |       "Key: dict_keys(['Outlook'])\n",
 843 |       "Attribute: Outlook\n",
 844 |       "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 845 |       "Key: dict_keys(['Outlook'])\n",
 846 |       "Attribute: Outlook\n",
 847 |       "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 848 |       "Key: dict_keys(['Outlook'])\n",
 849 |       "Attribute: Outlook\n",
 850 |       "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
 851 |       "Key: dict_keys(['Wind'])\n",
 852 |       "Attribute: Wind\n",
 853 |       "Instance Attribute: Strong TreeKeys : dict_keys(['Strong', 'Weak'])\n",
 854 |       "0      No\n",
 855 |       "1      No\n",
 856 |       "2     Yes\n",
 857 |       "3     Yes\n",
 858 |       "4     Yes\n",
 859 |       "5      No\n",
 860 |       "6     Yes\n",
 861 |       "7      No\n",
 862 |       "8     Yes\n",
 863 |       "9     Yes\n",
 864 |       "10    Yes\n",
 865 |       "11    Yes\n",
 866 |       "12    Yes\n",
 867 |       "13     No\n",
 868 |       "Name: predicted, dtype: object\n",
 869 |       "\n",
 870 |       " Accuracy is:\n",
 871 |       "1.0\n"
 872 |      ]
 873 |     },
 874 |     {
 875 |      "data": {
 876 |       "text/html": [
 877 |        "<div>\n",
 878 |        "<style>\n",
 879 |        "    .dataframe thead tr:only-child th {\n",
 880 |        "        text-align: right;\n",
 881 |        "    }\n",
 882 |        "\n",
 883 |        "    .dataframe thead th {\n",
 884 |        "        text-align: left;\n",
 885 |        "    }\n",
 886 |        "\n",
 887 |        "    .dataframe tbody tr th {\n",
 888 |        "        vertical-align: top;\n",
 889 |        "    }\n",
 890 |        "</style>\n",
 891 |        "<table border=\"1\" class=\"dataframe\">\n",
 892 |        "  <thead>\n",
 893 |        "    <tr style=\"text-align: right;\">\n",
 894 |        "      <th></th>\n",
 895 |        "      <th>PlayTennis</th>\n",
 896 |        "      <th>predicted</th>\n",
 897 |        "    </tr>\n",
 898 |        "  </thead>\n",
 899 |        "  <tbody>\n",
 900 |        "    <tr>\n",
 901 |        "      <th>0</th>\n",
 902 |        "      <td>No</td>\n",
 903 |        "      <td>No</td>\n",
 904 |        "    </tr>\n",
 905 |        "    <tr>\n",
 906 |        "      <th>1</th>\n",
 907 |        "      <td>No</td>\n",
 908 |        "      <td>No</td>\n",
 909 |        "    </tr>\n",
 910 |        "    <tr>\n",
 911 |        "      <th>2</th>\n",
 912 |        "      <td>Yes</td>\n",
 913 |        "      <td>Yes</td>\n",
 914 |        "    </tr>\n",
 915 |        "    <tr>\n",
 916 |        "      <th>3</th>\n",
 917 |        "      <td>Yes</td>\n",
 918 |        "      <td>Yes</td>\n",
 919 |        "    </tr>\n",
 920 |        "    <tr>\n",
 921 |        "      <th>4</th>\n",
 922 |        "      <td>Yes</td>\n",
 923 |        "      <td>Yes</td>\n",
 924 |        "    </tr>\n",
 925 |        "    <tr>\n",
 926 |        "      <th>5</th>\n",
 927 |        "      <td>No</td>\n",
 928 |        "      <td>No</td>\n",
 929 |        "    </tr>\n",
 930 |        "    <tr>\n",
 931 |        "      <th>6</th>\n",
 932 |        "      <td>Yes</td>\n",
 933 |        "      <td>Yes</td>\n",
 934 |        "    </tr>\n",
 935 |        "    <tr>\n",
 936 |        "      <th>7</th>\n",
 937 |        "      <td>No</td>\n",
 938 |        "      <td>No</td>\n",
 939 |        "    </tr>\n",
 940 |        "    <tr>\n",
 941 |        "      <th>8</th>\n",
 942 |        "      <td>Yes</td>\n",
 943 |        "      <td>Yes</td>\n",
 944 |        "    </tr>\n",
 945 |        "    <tr>\n",
 946 |        "      <th>9</th>\n",
 947 |        "      <td>Yes</td>\n",
 948 |        "      <td>Yes</td>\n",
 949 |        "    </tr>\n",
 950 |        "    <tr>\n",
 951 |        "      <th>10</th>\n",
 952 |        "      <td>Yes</td>\n",
 953 |        "      <td>Yes</td>\n",
 954 |        "    </tr>\n",
 955 |        "    <tr>\n",
 956 |        "      <th>11</th>\n",
 957 |        "      <td>Yes</td>\n",
 958 |        "      <td>Yes</td>\n",
 959 |        "    </tr>\n",
 960 |        "    <tr>\n",
 961 |        "      <th>12</th>\n",
 962 |        "      <td>Yes</td>\n",
 963 |        "      <td>Yes</td>\n",
 964 |        "    </tr>\n",
 965 |        "    <tr>\n",
 966 |        "      <th>13</th>\n",
 967 |        "      <td>No</td>\n",
 968 |        "      <td>No</td>\n",
 969 |        "    </tr>\n",
 970 |        "  </tbody>\n",
 971 |        "</table>\n",
 972 |        "</div>"
 973 |       ],
 974 |       "text/plain": [
 975 |        "   PlayTennis predicted\n",
 976 |        "0          No        No\n",
 977 |        "1          No        No\n",
 978 |        "2         Yes       Yes\n",
 979 |        "3         Yes       Yes\n",
 980 |        "4         Yes       Yes\n",
 981 |        "5          No        No\n",
 982 |        "6         Yes       Yes\n",
 983 |        "7          No        No\n",
 984 |        "8         Yes       Yes\n",
 985 |        "9         Yes       Yes\n",
 986 |        "10        Yes       Yes\n",
 987 |        "11        Yes       Yes\n",
 988 |        "12        Yes       Yes\n",
 989 |        "13         No        No"
 990 |       ]
 991 |      },
 992 |      "execution_count": 138,
 993 |      "metadata": {},
 994 |      "output_type": "execute_result"
 995 |     }
 996 |    ],
 997 |    "source": [
 998 |     "df_tennis['predicted'] = df_tennis.apply(classify, axis=1, args=(tree,'No') ) \n",
 999 |     "    # classify func allows for a default arg: when tree doesn't have answer for a particular\n",
1000 |     "    # combitation of attribute-values, we can use 'no' as the default guess \n",
1001 |     "\n",
1002 |     "print(df_tennis['predicted'])\n",
1003 |     "\n",
1004 |     "print('\\n Accuracy is:\\n' + str( sum(df_tennis['PlayTennis']==df_tennis['predicted'] ) / (1.0*len(df_tennis.index)) ))\n",
1005 |     "\n",
1006 |     "\n",
1007 |     "df_tennis[['PlayTennis', 'predicted']]\n"
1008 |    ]
1009 |   },
1010 |   {
1011 |    "cell_type": "markdown",
1012 |    "metadata": {
1013 |     "collapsed": true
1014 |    },
1015 |    "source": [
1016 |     "# Classification Accuracy: Training/Testing Set"
1017 |    ]
1018 |   },
1019 |   {
1020 |    "cell_type": "code",
1021 |    "execution_count": 221,
1022 |    "metadata": {
1023 |     "scrolled": false
1024 |    },
1025 |    "outputs": [
1026 |     {
1027 |      "name": "stdout",
1028 |      "output_type": "stream",
1029 |      "text": [
1030 |       "Information Gain Calculation of  Outlook\n",
1031 |       "\n",
1032 |       " Number of Instances of the Current Sub Class is 2.0:\n",
1033 |       "\n",
1034 |       " Classes: Yes Yes\n",
1035 |       " \n",
1036 |       " Probabilities of Class Yes is 1.0:\n",
1037 |       " \n",
1038 |       " Probabilities of Class Yes is 1.0:\n",
1039 |       "\n",
1040 |       " Number of Instances of the Current Sub Class is 4.0:\n",
1041 |       "\n",
1042 |       " Classes: No Yes\n",
1043 |       " \n",
1044 |       " Probabilities of Class No is 0.25:\n",
1045 |       " \n",
1046 |       " Probabilities of Class Yes is 0.75:\n",
1047 |       "\n",
1048 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1049 |       "\n",
1050 |       " Classes: No Yes\n",
1051 |       " \n",
1052 |       " Probabilities of Class No is 0.3333333333333333:\n",
1053 |       " \n",
1054 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1055 |       "\n",
1056 |       " Number of Instances of the Current Sub Class is 9.0:\n",
1057 |       "\n",
1058 |       " Classes: No Yes\n",
1059 |       " \n",
1060 |       " Probabilities of Class No is 0.3333333333333333:\n",
1061 |       " \n",
1062 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1063 |       "Information Gain Calculation of  Temperature\n",
1064 |       "\n",
1065 |       " Number of Instances of the Current Sub Class is 4.0:\n",
1066 |       "\n",
1067 |       " Classes: No Yes\n",
1068 |       " \n",
1069 |       " Probabilities of Class No is 0.25:\n",
1070 |       " \n",
1071 |       " Probabilities of Class Yes is 0.75:\n",
1072 |       "\n",
1073 |       " Number of Instances of the Current Sub Class is 2.0:\n",
1074 |       "\n",
1075 |       " Classes: No Yes\n",
1076 |       " \n",
1077 |       " Probabilities of Class No is 0.5:\n",
1078 |       " \n",
1079 |       " Probabilities of Class Yes is 0.5:\n",
1080 |       "\n",
1081 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1082 |       "\n",
1083 |       " Classes: No Yes\n",
1084 |       " \n",
1085 |       " Probabilities of Class No is 0.3333333333333333:\n",
1086 |       " \n",
1087 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1088 |       "\n",
1089 |       " Number of Instances of the Current Sub Class is 9.0:\n",
1090 |       "\n",
1091 |       " Classes: No Yes\n",
1092 |       " \n",
1093 |       " Probabilities of Class No is 0.3333333333333333:\n",
1094 |       " \n",
1095 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1096 |       "Information Gain Calculation of  Humidity\n",
1097 |       "\n",
1098 |       " Number of Instances of the Current Sub Class is 4.0:\n",
1099 |       "\n",
1100 |       " Classes: No Yes\n",
1101 |       " \n",
1102 |       " Probabilities of Class No is 0.5:\n",
1103 |       " \n",
1104 |       " Probabilities of Class Yes is 0.5:\n",
1105 |       "\n",
1106 |       " Number of Instances of the Current Sub Class is 5.0:\n",
1107 |       "\n",
1108 |       " Classes: No Yes\n",
1109 |       " \n",
1110 |       " Probabilities of Class No is 0.2:\n",
1111 |       " \n",
1112 |       " Probabilities of Class Yes is 0.8:\n",
1113 |       "\n",
1114 |       " Number of Instances of the Current Sub Class is 9.0:\n",
1115 |       "\n",
1116 |       " Classes: No Yes\n",
1117 |       " \n",
1118 |       " Probabilities of Class No is 0.3333333333333333:\n",
1119 |       " \n",
1120 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1121 |       "Information Gain Calculation of  Wind\n",
1122 |       "\n",
1123 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1124 |       "\n",
1125 |       " Classes: No Yes\n",
1126 |       " \n",
1127 |       " Probabilities of Class No is 0.3333333333333333:\n",
1128 |       " \n",
1129 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1130 |       "\n",
1131 |       " Number of Instances of the Current Sub Class is 6.0:\n",
1132 |       "\n",
1133 |       " Classes: No Yes\n",
1134 |       " \n",
1135 |       " Probabilities of Class No is 0.16666666666666666:\n",
1136 |       " \n",
1137 |       " Probabilities of Class Yes is 0.8333333333333334:\n",
1138 |       "\n",
1139 |       " Number of Instances of the Current Sub Class is 9.0:\n",
1140 |       "\n",
1141 |       " Classes: No Yes\n",
1142 |       " \n",
1143 |       " Probabilities of Class No is 0.3333333333333333:\n",
1144 |       " \n",
1145 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1146 |       "Information Gain Calculation of  Temperature\n",
1147 |       "\n",
1148 |       " Number of Instances of the Current Sub Class is 2.0:\n",
1149 |       "\n",
1150 |       " Classes: No Yes\n",
1151 |       " \n",
1152 |       " Probabilities of Class No is 0.5:\n",
1153 |       " \n",
1154 |       " Probabilities of Class Yes is 0.5:\n",
1155 |       "\n",
1156 |       " Number of Instances of the Current Sub Class is 2.0:\n",
1157 |       "\n",
1158 |       " Classes: Yes Yes\n",
1159 |       " \n",
1160 |       " Probabilities of Class Yes is 1.0:\n",
1161 |       " \n",
1162 |       " Probabilities of Class Yes is 1.0:\n",
1163 |       "\n",
1164 |       " Number of Instances of the Current Sub Class is 4.0:\n",
1165 |       "\n",
1166 |       " Classes: No Yes\n",
1167 |       " \n",
1168 |       " Probabilities of Class No is 0.25:\n",
1169 |       " \n",
1170 |       " Probabilities of Class Yes is 0.75:\n",
1171 |       "Information Gain Calculation of  Humidity\n",
1172 |       "\n",
1173 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1174 |       "\n",
1175 |       " Classes: Yes Yes\n",
1176 |       " \n",
1177 |       " Probabilities of Class Yes is 1.0:\n",
1178 |       " \n",
1179 |       " Probabilities of Class Yes is 1.0:\n",
1180 |       "\n",
1181 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1182 |       "\n",
1183 |       " Classes: No Yes\n",
1184 |       " \n",
1185 |       " Probabilities of Class No is 0.3333333333333333:\n",
1186 |       " \n",
1187 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1188 |       "\n",
1189 |       " Number of Instances of the Current Sub Class is 4.0:\n",
1190 |       "\n",
1191 |       " Classes: No Yes\n",
1192 |       " \n",
1193 |       " Probabilities of Class No is 0.25:\n",
1194 |       " \n",
1195 |       " Probabilities of Class Yes is 0.75:\n",
1196 |       "Information Gain Calculation of  Wind\n",
1197 |       "\n",
1198 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1199 |       "\n",
1200 |       " Classes: No No\n",
1201 |       " \n",
1202 |       " Probabilities of Class No is 1.0:\n",
1203 |       " \n",
1204 |       " Probabilities of Class No is 1.0:\n",
1205 |       "\n",
1206 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1207 |       "\n",
1208 |       " Classes: Yes Yes\n",
1209 |       " \n",
1210 |       " Probabilities of Class Yes is 1.0:\n",
1211 |       " \n",
1212 |       " Probabilities of Class Yes is 1.0:\n",
1213 |       "\n",
1214 |       " Number of Instances of the Current Sub Class is 4.0:\n",
1215 |       "\n",
1216 |       " Classes: No Yes\n",
1217 |       " \n",
1218 |       " Probabilities of Class No is 0.25:\n",
1219 |       " \n",
1220 |       " Probabilities of Class Yes is 0.75:\n",
1221 |       "Information Gain Calculation of  Temperature\n",
1222 |       "\n",
1223 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1224 |       "\n",
1225 |       " Classes: Yes Yes\n",
1226 |       " \n",
1227 |       " Probabilities of Class Yes is 1.0:\n",
1228 |       " \n",
1229 |       " Probabilities of Class Yes is 1.0:\n",
1230 |       "\n",
1231 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1232 |       "\n",
1233 |       " Classes: No No\n",
1234 |       " \n",
1235 |       " Probabilities of Class No is 1.0:\n",
1236 |       " \n",
1237 |       " Probabilities of Class No is 1.0:\n",
1238 |       "\n",
1239 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1240 |       "\n",
1241 |       " Classes: No No\n",
1242 |       " \n",
1243 |       " Probabilities of Class No is 1.0:\n",
1244 |       " \n",
1245 |       " Probabilities of Class No is 1.0:\n",
1246 |       "\n",
1247 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1248 |       "\n",
1249 |       " Classes: No Yes\n",
1250 |       " \n",
1251 |       " Probabilities of Class No is 0.3333333333333333:\n",
1252 |       " \n",
1253 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1254 |       "Information Gain Calculation of  Humidity\n",
1255 |       "\n",
1256 |       " Number of Instances of the Current Sub Class is 2.0:\n",
1257 |       "\n",
1258 |       " Classes: No No\n",
1259 |       " \n",
1260 |       " Probabilities of Class No is 1.0:\n",
1261 |       " \n",
1262 |       " Probabilities of Class No is 1.0:\n",
1263 |       "\n",
1264 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1265 |       "\n",
1266 |       " Classes: Yes Yes\n",
1267 |       " \n",
1268 |       " Probabilities of Class Yes is 1.0:\n",
1269 |       " \n",
1270 |       " Probabilities of Class Yes is 1.0:\n",
1271 |       "\n",
1272 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1273 |       "\n",
1274 |       " Classes: No Yes\n",
1275 |       " \n",
1276 |       " Probabilities of Class No is 0.3333333333333333:\n",
1277 |       " \n",
1278 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1279 |       "Information Gain Calculation of  Wind\n",
1280 |       "\n",
1281 |       " Number of Instances of the Current Sub Class is 1.0:\n",
1282 |       "\n",
1283 |       " Classes: No No\n",
1284 |       " \n",
1285 |       " Probabilities of Class No is 1.0:\n",
1286 |       " \n",
1287 |       " Probabilities of Class No is 1.0:\n",
1288 |       "\n",
1289 |       " Number of Instances of the Current Sub Class is 2.0:\n",
1290 |       "\n",
1291 |       " Classes: No Yes\n",
1292 |       " \n",
1293 |       " Probabilities of Class No is 0.5:\n",
1294 |       " \n",
1295 |       " Probabilities of Class Yes is 0.5:\n",
1296 |       "\n",
1297 |       " Number of Instances of the Current Sub Class is 3.0:\n",
1298 |       "\n",
1299 |       " Classes: No Yes\n",
1300 |       " \n",
1301 |       " Probabilities of Class No is 0.3333333333333333:\n",
1302 |       " \n",
1303 |       " Probabilities of Class Yes is 0.6666666666666666:\n",
1304 |       "Key: dict_keys(['Outlook'])\n",
1305 |       "Attribute: Outlook\n",
1306 |       "Instance Attribute: Sunny TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
1307 |       "Key: dict_keys(['Temperature'])\n",
1308 |       "Attribute: Temperature\n",
1309 |       "Instance Attribute: Mild TreeKeys : dict_keys(['Cool', 'Hot', 'Mild'])\n",
1310 |       "Key: dict_keys(['Outlook'])\n",
1311 |       "Attribute: Outlook\n",
1312 |       "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
1313 |       "Key: dict_keys(['Outlook'])\n",
1314 |       "Attribute: Outlook\n",
1315 |       "Instance Attribute: Overcast TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
1316 |       "Key: dict_keys(['Outlook'])\n",
1317 |       "Attribute: Outlook\n",
1318 |       "Instance Attribute: Rain TreeKeys : dict_keys(['Overcast', 'Rain', 'Sunny'])\n",
1319 |       "Key: dict_keys(['Wind'])\n",
1320 |       "Attribute: Wind\n",
1321 |       "Instance Attribute: Strong TreeKeys : dict_keys(['Strong', 'Weak'])\n",
1322 |       "\n",
1323 |       "\n",
1324 |       " Accuracy is : 0.75\n"
1325 |      ]
1326 |     },
1327 |     {
1328 |      "name": "stderr",
1329 |      "output_type": "stream",
1330 |      "text": [
1331 |       "C:\\Users\\Dr.Thyagaraju\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:8: SettingWithCopyWarning: \n",
1332 |       "A value is trying to be set on a copy of a slice from a DataFrame.\n",
1333 |       "Try using .loc[row_indexer,col_indexer] = value instead\n",
1334 |       "\n",
1335 |       "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
1336 |       "  \n"
1337 |      ]
1338 |     }
1339 |    ],
1340 |    "source": [
1341 |     "training_data = df_tennis.iloc[1:-4] # all but last four instances\n",
1342 |     "test_data  = df_tennis.iloc[-4:] # just the last four\n",
1343 |     "train_tree = id3(training_data, 'PlayTennis', attribute_names)\n",
1344 |     "\n",
1345 |     "test_data['predicted2'] = test_data.apply(                                # <---- test_data source\n",
1346 |     "                                          classify, \n",
1347 |     "                                          axis=1, \n",
1348 |     "                                          args=(train_tree,'Yes') ) # <---- train_data tree\n",
1349 |     "\n",
1350 |     "\n",
1351 |     "print ('\\n\\n Accuracy is : ' + str( sum(test_data['PlayTennis']==test_data['predicted2'] ) / (1.0*len(test_data.index)) ))"
1352 |    ]
1353 |   },
1354 |   {
1355 |    "cell_type": "markdown",
1356 |    "metadata": {
1357 |     "collapsed": true
1358 |    },
1359 |    "source": [
1360 |     "# End"
1361 |    ]
1362 |   }
1363 |  ],
1364 |  "metadata": {
1365 |   "kernelspec": {
1366 |    "display_name": "Python 3",
1367 |    "language": "python",
1368 |    "name": "python3"
1369 |   },
1370 |   "language_info": {
1371 |    "codemirror_mode": {
1372 |     "name": "ipython",
1373 |     "version": 3
1374 |    },
1375 |    "file_extension": ".py",
1376 |    "mimetype": "text/x-python",
1377 |    "name": "python",
1378 |    "nbconvert_exporter": "python",
1379 |    "pygments_lexer": "ipython3",
1380 |    "version": "3.6.3"
1381 |   }
1382 |  },
1383 |  "nbformat": 4,
1384 |  "nbformat_minor": 2
1385 | }
1386 | 


--------------------------------------------------------------------------------
/ID3+-+Algorithm+ID3(Examples,+TargetAttribute,+Attributes).jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/profthyagu/Python-Decision-Tree-Using-ID3/abb2d8bb265c6b0435745a01e0cb980a581893ea/ID3+-+Algorithm+ID3(Examples,+TargetAttribute,+Attributes).jpg


--------------------------------------------------------------------------------
/PlayTennis.csv:
--------------------------------------------------------------------------------
 1 | ,PlayTennis,Outlook,Temperature,Humidity,Wind
 2 | 0,No,Sunny,Hot,High,Weak
 3 | 1,No,Sunny,Hot,High,Strong
 4 | 2,Yes,Overcast,Hot,High,Weak
 5 | 3,Yes,Rain,Mild,High,Weak
 6 | 4,Yes,Rain,Cool,Normal,Weak
 7 | 5,No,Rain,Cool,Normal,Strong
 8 | 6,Yes,Overcast,Cool,Normal,Strong
 9 | 7,No,Sunny,Mild,High,Weak
10 | 8,Yes,Sunny,Cool,Normal,Weak
11 | 9,Yes,Rain,Mild,Normal,Weak
12 | 10,Yes,Sunny,Mild,Normal,Strong
13 | 11,Yes,Overcast,Mild,High,Strong
14 | 12,Yes,Overcast,Hot,Normal,Weak
15 | 13,No,Rain,Mild,High,Strong
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Decision-Tree-Using-ID3-
2 | Problem : Write a program to demonstrate the working of the decision tree based ID3 algorithm. Use an appropriate data set for building the decision tree and  apply this knowledge to classify a new sample.
3 | 


--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman


--------------------------------------------------------------------------------