└── GaussianNB.ipynb /GaussianNB.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "authorship_tag": "ABX9TyPdNvGtMa21p6GYkSd0ei45", 8 | "include_colab_link": true 9 | }, 10 | "kernelspec": { 11 | "name": "python3", 12 | "display_name": "Python 3" 13 | }, 14 | "language_info": { 15 | "name": "python" 16 | } 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "metadata": { 22 | "id": "view-in-github", 23 | "colab_type": "text" 24 | }, 25 | "source": [ 26 | "\"Open" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "id": "ipKOzJxvoRby" 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "import pandas as pd\n", 38 | "import numpy as np\n", 39 | "from sklearn.naive_bayes import GaussianNB\n", 40 | "from sklearn.model_selection import train_test_split\n", 41 | "from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "source": [ 47 | "data = {\n", 48 | " \"GPA\": [3.8, 2.5, 3.2, 4.0, 2.8, 3.5, 3.9, 1.9],\n", 49 | " \"Attendance_Rate\": [0.90, 0.75, 0.82, 0.95, 0.68, 0.88, 0.92, 0.50],\n", 50 | " \"Standardized_Test_Score\": [1200, 980, 1100, 1350, 850, 1050, 1280, 700],\n", 51 | " \"At_Risk\": [0, 1, 0, 0, 1, 0, 0, 1] # 0: Not at risk, 1: At risk\n", 52 | "}" 53 | ], 54 | "metadata": { 55 | "id": "RdkhBmZmonyU" 56 | }, 57 | "execution_count": null, 58 | "outputs": [] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "source": [ 63 | "df = pd.DataFrame(data)" 64 | ], 65 | "metadata": { 66 | "id": "uw2R6dCiotH3" 67 | }, 68 | "execution_count": null, 69 | "outputs": [] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "features = [\"GPA\", \"Attendance_Rate\", \"Standardized_Test_Score\"]\n", 75 | "target = \"At_Risk\"" 76 | ], 77 | "metadata": { 78 | "id": "99PBg61bozjF" 79 | }, 80 | "execution_count": null, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "source": [ 86 | "# Split data into training and testing sets\n", 87 | "X_train, X_test, y_train, y_test = train_test_split(df[features], df[target], test_size=0.2)" 88 | ], 89 | "metadata": { 90 | "id": "8CG7Ozbko1p8" 91 | }, 92 | "execution_count": null, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "source": [ 98 | "clf = GaussianNB() # Choose GaussianNB for continuous features\n", 99 | "clf.fit(X_train, y_train)" 100 | ], 101 | "metadata": { 102 | "colab": { 103 | "base_uri": "https://localhost:8080/", 104 | "height": 74 105 | }, 106 | "id": "tCIfvTkeo7GX", 107 | "outputId": "73f8fe81-ad5e-46af-c79b-a3b94539bcd7" 108 | }, 109 | "execution_count": null, 110 | "outputs": [ 111 | { 112 | "output_type": "execute_result", 113 | "data": { 114 | "text/plain": [ 115 | "GaussianNB()" 116 | ], 117 | "text/html": [ 118 | "
GaussianNB()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 119 | ] 120 | }, 121 | "metadata": {}, 122 | "execution_count": 28 123 | } 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "source": [ 129 | "# Make predictions on the test set\n", 130 | "y_pred = clf.predict(X_test)" 131 | ], 132 | "metadata": { 133 | "id": "8Lwl-epio_lT" 134 | }, 135 | "execution_count": null, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "source": [ 141 | "# Evaluate model performance\n", 142 | "accuracy = accuracy_score(y_test, y_pred)\n", 143 | "precision = precision_score(y_test, y_pred)\n", 144 | "recall = recall_score(y_test, y_pred)\n", 145 | "f1 = f1_score(y_test, y_pred)" 146 | ], 147 | "metadata": { 148 | "colab": { 149 | "base_uri": "https://localhost:8080/" 150 | }, 151 | "id": "y67qpYfYpFsu", 152 | "outputId": "d713a027-0422-4da9-bef1-cc65dc4f4ed0" 153 | }, 154 | "execution_count": null, 155 | "outputs": [ 156 | { 157 | "output_type": "stream", 158 | "name": "stderr", 159 | "text": [ 160 | "/usr/local/lib/python3.10/dist-packages/sklearn/metrics/_classification.py:1344: UndefinedMetricWarning: Recall is ill-defined and being set to 0.0 due to no true samples. Use `zero_division` parameter to control this behavior.\n", 161 | " _warn_prf(average, modifier, msg_start, len(result))\n" 162 | ] 163 | } 164 | ] 165 | }, 166 | { 167 | "cell_type": "code", 168 | "source": [ 169 | "print(\"Accuracy:\", accuracy)\n", 170 | "print(\"Precision:\", precision)\n", 171 | "print(\"Recall:\", recall)\n", 172 | "print(\"F1 Score:\", f1)" 173 | ], 174 | "metadata": { 175 | "colab": { 176 | "base_uri": "https://localhost:8080/" 177 | }, 178 | "id": "12SMOKK-pH9D", 179 | "outputId": "3566fd59-7fd4-4d9e-f6bc-f2e774cea5af" 180 | }, 181 | "execution_count": null, 182 | "outputs": [ 183 | { 184 | "output_type": "stream", 185 | "name": "stdout", 186 | "text": [ 187 | "Accuracy: 0.5\n", 188 | "Precision: 0.0\n", 189 | "Recall: 0.0\n", 190 | "F1 Score: 0.0\n" 191 | ] 192 | } 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "source": [ 198 | "new_data = {\"GPA\": 3.0, \"Attendance_Rate\": 0.80, \"Standardized_Test_Score\": 1000}" 199 | ], 200 | "metadata": { 201 | "id": "v1kSDey2sx3k" 202 | }, 203 | "execution_count": null, 204 | "outputs": [] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "source": [ 209 | "if not all(key in new_data for key in features):\n", 210 | " print(\"Warning: new_data might be missing some feature names.\")" 211 | ], 212 | "metadata": { 213 | "id": "e75mY2KFs4YD" 214 | }, 215 | "execution_count": null, 216 | "outputs": [] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "source": [ 221 | "new_data = {\"GPA\": 3.0, \"Attendance_Rate\": 0.80, \"Standardized_Test_Score\": 1000}\n", 222 | "\n", 223 | "# Convert the dictionary to a 2D array\n", 224 | "new_data_array = np.array([list(new_data.values())])\n", 225 | "\n", 226 | "# Predict the risk for the new student\n", 227 | "new_student_risk = clf.predict(new_data_array)[0]\n", 228 | "if new_student_risk == 1:\n", 229 | " print(\"New student is predicted to be at risk.\")\n", 230 | "else:\n", 231 | " print(\"New student is predicted to be low risk.\")" 232 | ], 233 | "metadata": { 234 | "colab": { 235 | "base_uri": "https://localhost:8080/" 236 | }, 237 | "id": "j55ArL_8pMZI", 238 | "outputId": "f9541c82-7202-4ed1-8a43-8fc8b2bb49a3" 239 | }, 240 | "execution_count": null, 241 | "outputs": [ 242 | { 243 | "output_type": "stream", 244 | "name": "stdout", 245 | "text": [ 246 | "New student is predicted to be at risk.\n" 247 | ] 248 | }, 249 | { 250 | "output_type": "stream", 251 | "name": "stderr", 252 | "text": [ 253 | "/usr/local/lib/python3.10/dist-packages/sklearn/base.py:439: UserWarning: X does not have valid feature names, but GaussianNB was fitted with feature names\n", 254 | " warnings.warn(\n" 255 | ] 256 | } 257 | ] 258 | } 259 | ] 260 | } --------------------------------------------------------------------------------