└── ID3 Algorithm.ipynb


/ID3 Algorithm.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 1,
 6 |    "id": "36c44b7b",
 7 |    "metadata": {},
 8 |    "outputs": [],
 9 |    "source": [
10 |     "import math\n",
11 |     "import pandas as pd\n",
12 |     "\n",
13 |     "class DecisionTree:\n",
14 |     "    def __init__(self, df, target, positive, parent_val, parent):\n",
15 |     "        self.data = df\n",
16 |     "        self.target = target\n",
17 |     "        self.positive = positive\n",
18 |     "        self.parent_val = parent_val\n",
19 |     "        self.parent = parent\n",
20 |     "        self.childs = []\n",
21 |     "        self.decision = \"\"\n",
22 |     "\n",
23 |     "    def _get_entropy(self, data):\n",
24 |     "        p = sum(data[self.target] == self.positive)\n",
25 |     "        n = data.shape[0] - p\n",
26 |     "        p_ratio = p / (p + n) if (p + n) != 0 else 0\n",
27 |     "        n_ratio = 1 - p_ratio\n",
28 |     "        entropy_p = -p_ratio * math.log2(p_ratio) if p_ratio != 0 else 0\n",
29 |     "        entropy_n = -n_ratio * math.log2(n_ratio) if n_ratio != 0 else 0\n",
30 |     "        return entropy_p + entropy_n\n",
31 |     "\n",
32 |     "    def _get_gain(self, feat):\n",
33 |     "        avg_info = sum(\n",
34 |     "            self._get_entropy(self.data[self.data[feat] == val]) * sum(self.data[feat] == val) / self.data.shape[0]\n",
35 |     "            for val in self.data[feat].unique()\n",
36 |     "        )\n",
37 |     "        return self._get_entropy(self.data) - avg_info\n",
38 |     "\n",
39 |     "    def _get_splitter(self):\n",
40 |     "        self.splitter = max(self.gains, key=lambda x: x[1])[0]\n",
41 |     "\n",
42 |     "    def update_nodes(self):\n",
43 |     "        self.features = [col for col in self.data.columns if col != self.target]\n",
44 |     "        self.entropy = self._get_entropy(self.data)\n",
45 |     "        if self.entropy != 0:\n",
46 |     "            self.gains = [(feat, self._get_gain(feat)) for feat in self.features]\n",
47 |     "            self._get_splitter()\n",
48 |     "            residual_columns = [k for k in self.data.columns if k != self.splitter]\n",
49 |     "            for val in self.data[self.splitter].unique():\n",
50 |     "                df_tmp = self.data[self.data[self.splitter] == val][residual_columns]\n",
51 |     "                tmp_node = DecisionTree(df_tmp, self.target, self.positive, val, self.splitter)\n",
52 |     "                tmp_node.update_nodes()\n",
53 |     "                self.childs.append(tmp_node)\n",
54 |     "\n",
55 |     "    def print_tree(node, depth=0):\n",
56 |     "        if node:\n",
57 |     "            print(f\"{' ' * depth}Parent: {node.parent} | Parent Value: {node.parent_val}\")\n",
58 |     "        for child in node.childs:\n",
59 |     "            DecisionTree.print_tree(child, depth + 1)\n",
60 |     "\n",
61 |     "# Assuming the CSV file is in the same directory as the notebook\n",
62 |     "df = pd.read_csv(\"governors_county.csv\")\n",
63 |     "dt = DecisionTree(df, \"state\", \"100\", \",\", None)\n",
64 |     "dt.update_nodes()\n",
65 |     "DecisionTree.print_tree(dt)\n"
66 |    ]
67 |   }
68 |  ],
69 |  "metadata": {
70 |   "kernelspec": {
71 |    "display_name": "Python 3",
72 |    "language": "python",
73 |    "name": "python3"
74 |   },
75 |   "language_info": {
76 |    "codemirror_mode": {
77 |     "name": "ipython",
78 |     "version": 3
79 |    },
80 |    "file_extension": ".py",
81 |    "mimetype": "text/x-python",
82 |    "name": "python",
83 |    "nbconvert_exporter": "python",
84 |    "pygments_lexer": "ipython3",
85 |    "version": "3.8"
86 |   }
87 |  },
88 |  "nbformat": 4,
89 |  "nbformat_minor": 5
90 | }
91 | 


--------------------------------------------------------------------------------