├── .DS_Store
├── NaiveBayes
├── mnist_train_small.csv
└── naiveBayes.ipynb
├── Perceptron
├── mnist_train_small.csv
└── perceptron_jf.ipynb
├── README.md
├── data
└── clean_weather.csv
├── gradientDescent
└── gradientDescent.ipynb
├── knn
├── .DS_Store
├── knn.ipynb
└── mnist_train_small.csv
├── linear_regression
├── .DS_Store
├── cal_housing_price_prediciton.ipynb
├── california_housing_test.csv
├── california_housing_train.csv
├── price_prediction.ipynb
└── regression.ipynb
└── logistic_regression
├── logistic_classification.ipynb
└── mnist_train_small.csv
/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taureanjoe/Machine-Learning/2d095810454989fafec46017a4a6d08631c5b999/.DS_Store
--------------------------------------------------------------------------------
/NaiveBayes/naiveBayes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "from sklearn.preprocessing import MinMaxScaler"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 3,
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "
\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " \n",
42 | " 0 \n",
43 | " 1 \n",
44 | " 2 \n",
45 | " 3 \n",
46 | " 4 \n",
47 | " 5 \n",
48 | " 6 \n",
49 | " 7 \n",
50 | " 8 \n",
51 | " 9 \n",
52 | " ... \n",
53 | " 775 \n",
54 | " 776 \n",
55 | " 777 \n",
56 | " 778 \n",
57 | " 779 \n",
58 | " 780 \n",
59 | " 781 \n",
60 | " 782 \n",
61 | " 783 \n",
62 | " 784 \n",
63 | " \n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 19995 \n",
68 | " 0 \n",
69 | " 0 \n",
70 | " 0 \n",
71 | " 0 \n",
72 | " 0 \n",
73 | " 0 \n",
74 | " 0 \n",
75 | " 0 \n",
76 | " 0 \n",
77 | " 0 \n",
78 | " ... \n",
79 | " 0 \n",
80 | " 0 \n",
81 | " 0 \n",
82 | " 0 \n",
83 | " 0 \n",
84 | " 0 \n",
85 | " 0 \n",
86 | " 0 \n",
87 | " 0 \n",
88 | " 0 \n",
89 | " \n",
90 | " \n",
91 | " 19996 \n",
92 | " 1 \n",
93 | " 0 \n",
94 | " 0 \n",
95 | " 0 \n",
96 | " 0 \n",
97 | " 0 \n",
98 | " 0 \n",
99 | " 0 \n",
100 | " 0 \n",
101 | " 0 \n",
102 | " ... \n",
103 | " 0 \n",
104 | " 0 \n",
105 | " 0 \n",
106 | " 0 \n",
107 | " 0 \n",
108 | " 0 \n",
109 | " 0 \n",
110 | " 0 \n",
111 | " 0 \n",
112 | " 0 \n",
113 | " \n",
114 | " \n",
115 | " 19997 \n",
116 | " 2 \n",
117 | " 0 \n",
118 | " 0 \n",
119 | " 0 \n",
120 | " 0 \n",
121 | " 0 \n",
122 | " 0 \n",
123 | " 0 \n",
124 | " 0 \n",
125 | " 0 \n",
126 | " ... \n",
127 | " 0 \n",
128 | " 0 \n",
129 | " 0 \n",
130 | " 0 \n",
131 | " 0 \n",
132 | " 0 \n",
133 | " 0 \n",
134 | " 0 \n",
135 | " 0 \n",
136 | " 0 \n",
137 | " \n",
138 | " \n",
139 | " 19998 \n",
140 | " 9 \n",
141 | " 0 \n",
142 | " 0 \n",
143 | " 0 \n",
144 | " 0 \n",
145 | " 0 \n",
146 | " 0 \n",
147 | " 0 \n",
148 | " 0 \n",
149 | " 0 \n",
150 | " ... \n",
151 | " 0 \n",
152 | " 0 \n",
153 | " 0 \n",
154 | " 0 \n",
155 | " 0 \n",
156 | " 0 \n",
157 | " 0 \n",
158 | " 0 \n",
159 | " 0 \n",
160 | " 0 \n",
161 | " \n",
162 | " \n",
163 | " 19999 \n",
164 | " 5 \n",
165 | " 0 \n",
166 | " 0 \n",
167 | " 0 \n",
168 | " 0 \n",
169 | " 0 \n",
170 | " 0 \n",
171 | " 0 \n",
172 | " 0 \n",
173 | " 0 \n",
174 | " ... \n",
175 | " 0 \n",
176 | " 0 \n",
177 | " 0 \n",
178 | " 0 \n",
179 | " 0 \n",
180 | " 0 \n",
181 | " 0 \n",
182 | " 0 \n",
183 | " 0 \n",
184 | " 0 \n",
185 | " \n",
186 | " \n",
187 | "
\n",
188 | "
5 rows × 785 columns
\n",
189 | "
"
190 | ],
191 | "text/plain": [
192 | " 0 1 2 3 4 5 6 7 8 9 ... 775 776 777 \\\n",
193 | "19995 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
194 | "19996 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
195 | "19997 2 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
196 | "19998 9 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
197 | "19999 5 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
198 | "\n",
199 | " 778 779 780 781 782 783 784 \n",
200 | "19995 0 0 0 0 0 0 0 \n",
201 | "19996 0 0 0 0 0 0 0 \n",
202 | "19997 0 0 0 0 0 0 0 \n",
203 | "19998 0 0 0 0 0 0 0 \n",
204 | "19999 0 0 0 0 0 0 0 \n",
205 | "\n",
206 | "[5 rows x 785 columns]"
207 | ]
208 | },
209 | "execution_count": 3,
210 | "metadata": {},
211 | "output_type": "execute_result"
212 | }
213 | ],
214 | "source": [
215 | "data = pd.read_csv(\"mnist_train_small.csv\", header= None)\n",
216 | "data.tail()"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 4,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "# Mnist - Digit recognition dataset\n",
226 | "# Image is embedded inside the csv file\n",
227 | "# 28*28 -> flattened -> fitting into a csv file\n",
228 | "\n",
229 | "# 785 -> 1 column (output), 784 (image)\n",
230 | "# pixel range - (0-255)\n",
231 | "# 0 - black\n",
232 | "# 255 - white"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 5,
238 | "metadata": {},
239 | "outputs": [
240 | {
241 | "data": {
242 | "text/plain": [
243 | "False"
244 | ]
245 | },
246 | "execution_count": 5,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "data.isnull().sum().any()\n"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 6,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "name": "stdout",
262 | "output_type": "stream",
263 | "text": [
264 | "\n",
265 | "RangeIndex: 20000 entries, 0 to 19999\n",
266 | "Columns: 785 entries, 0 to 784\n",
267 | "dtypes: int64(785)\n",
268 | "memory usage: 119.8 MB\n"
269 | ]
270 | }
271 | ],
272 | "source": [
273 | "data.info()"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 7,
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/plain": [
284 | "((20000, 784), (20000,))"
285 | ]
286 | },
287 | "execution_count": 7,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "# dividing features and targets\n",
294 | "\n",
295 | "X = data.iloc[:, 1:].values\n",
296 | "Y = data.iloc[:, 0].values\n",
297 | "\n",
298 | "X.shape, Y.shape"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": 8,
304 | "metadata": {},
305 | "outputs": [
306 | {
307 | "data": {
308 | "text/plain": [
309 | "255"
310 | ]
311 | },
312 | "execution_count": 8,
313 | "metadata": {},
314 | "output_type": "execute_result"
315 | }
316 | ],
317 | "source": [
318 | "X.max()"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 9,
324 | "metadata": {},
325 | "outputs": [],
326 | "source": [
327 | "scaler = MinMaxScaler()\n",
328 | "X = scaler.fit_transform(X)"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 10,
334 | "metadata": {},
335 | "outputs": [
336 | {
337 | "data": {
338 | "text/plain": [
339 | "(20000, 784)"
340 | ]
341 | },
342 | "execution_count": 10,
343 | "metadata": {},
344 | "output_type": "execute_result"
345 | }
346 | ],
347 | "source": [
348 | "X.shape"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": 11,
354 | "metadata": {},
355 | "outputs": [],
356 | "source": [
357 | "def PlotImage(X, Y):\n",
358 | " idx = np.random.choice(X.shape[0])\n",
359 | " label = Y[idx]\n",
360 | " print(\" Target : \", label)\n",
361 | " x = X[idx, :]\n",
362 | " x = x.reshape(28,28)\n",
363 | " plt.imshow(x) "
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": 12,
369 | "metadata": {},
370 | "outputs": [
371 | {
372 | "name": "stdout",
373 | "output_type": "stream",
374 | "text": [
375 | " Target : 8\n"
376 | ]
377 | },
378 | {
379 | "data": {
380 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAGdCAYAAABU0qcqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbyElEQVR4nO3df3DU9b3v8dcGkgUkWQwh2WwJNICKiqS3FGIGpVgyhHjGAeF6Qe294HhhwOAtoNWTjoI/OpMWe6xXJ8I5d1qop+IP5gpUhtKjwYRRA72gXA63mhIaJRySULkn2RAkhORz/+C6upJIv8su7yQ8HzPfGbL7fef74dsdn/2ym298zjknAAAusyTrBQAArkwECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmBhovYCv6+rq0vHjx5Wamiqfz2e9HACAR845tba2KhQKKSmp5+ucXheg48ePKycnx3oZAIBLVF9fr5EjR/b4fK8LUGpqqiTpFt2ugUo2Xg0AwKtz6tC72hH573lPEhag8vJyPfPMM2psbFReXp5eeOEFTZky5aJzX/yz20Ala6CPAAFAn/P/7zB6sbdREvIhhNdee02rVq3SmjVr9MEHHygvL09FRUU6ceJEIg4HAOiDEhKgZ599VosXL9Z9992nG264QevXr9eQIUP061//OhGHAwD0QXEP0NmzZ7V//34VFhZ+eZCkJBUWFqq6uvqC/dvb2xUOh6M2AED/F/cAffbZZ+rs7FRWVlbU41lZWWpsbLxg/7KyMgUCgcjGJ+AA4Mpg/oOopaWlamlpiWz19fXWSwIAXAZx/xRcRkaGBgwYoKampqjHm5qaFAwGL9jf7/fL7/fHexkAgF4u7ldAKSkpmjRpkioqKiKPdXV1qaKiQgUFBfE+HACgj0rIzwGtWrVKCxcu1Pe+9z1NmTJFzz33nNra2nTfffcl4nAAgD4oIQGaP3++/vrXv2r16tVqbGzUd77zHe3cufOCDyYAAK5cPuecs17EV4XDYQUCAU3XbO6EAAB90DnXoUptU0tLi9LS0nrcz/xTcACAKxMBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwMdB6AUCvkjTA80h4/mTPM0OPtXueaRkzyPPMb5/6hecZSVr5/bs9z5z75GhMx8KViysgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAENyNF7+fzeR45ef/NMR1q4codnmcevHp9TMe6HA6e9X5zVUlSZ1d8FwJ0gysgAIAJAgQAMBH3AD3xxBPy+XxR2/jx4+N9GABAH5eQ94BuvPFGvf32218eZCBvNQEAoiWkDAMHDlQwGEzEtwYA9BMJeQ/o8OHDCoVCGjNmjO69914dPdrzr+ptb29XOByO2gAA/V/cA5Sfn6+NGzdq586dWrdunerq6nTrrbeqtbW12/3LysoUCAQiW05OTryXBADoheIeoOLiYt11112aOHGiioqKtGPHDjU3N+v111/vdv/S0lK1tLREtvr6+ngvCQDQCyX80wHDhg3Ttddeq9ra2m6f9/v98vv9iV4GAKCXSfjPAZ06dUpHjhxRdnZ2og8FAOhD4h6ghx9+WFVVVfrkk0/0/vvv684779SAAQN09913x/tQAIA+LO7/BHfs2DHdfffdOnnypEaMGKFbbrlFe/bs0YgRI+J9KABAHxb3AL366qvx/pa4wg0YP87zzL6n1sV0rH85nex55rtPL/M8M3fZO55nHsv42PPM9nCe5xlJOvdvDTHNAV5wLzgAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwETCfyEdcKnq7rp8d1J//Kn/6nlmxEvVnmc2zfie55lYbkZ6e+pBzzOS9F5orueZc8f+LaZj4crFFRAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMcDds9Hpnvn3W88wf2ztiOtbwHX/2PnTtWM8j/zJlveeZhnOeR3Tv/3jE+5Ckkcfej2kO8IIrIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABDcjRa/3H8Z96nkm1RfbzUh9Q6/yPHNy8gjPM+lJKZ5nbl9S4nlm5A5uKoreiysgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAENyNFr3f4zWs8z1y/ckhMx/rkH1I9z9z8rUOeZzaEx3qe8e/4X55ngN6MKyAAgAkCBAAw4TlAu3fv1h133KFQKCSfz6etW7dGPe+c0+rVq5Wdna3BgwersLBQhw8fjtd6AQD9hOcAtbW1KS8vT+Xl5d0+v3btWj3//PNav3699u7dq6uuukpFRUU6c+bMJS8WANB/eP4QQnFxsYqLi7t9zjmn5557To899phmz54tSXrppZeUlZWlrVu3asGCBZe2WgBAvxHX94Dq6urU2NiowsLCyGOBQED5+fmqrq7udqa9vV3hcDhqAwD0f3ENUGNjoyQpKysr6vGsrKzIc19XVlamQCAQ2XJycuK5JABAL2X+KbjS0lK1tLREtvr6euslAQAug7gGKBgMSpKampqiHm9qaoo893V+v19paWlRGwCg/4trgHJzcxUMBlVRURF5LBwOa+/evSooKIjnoQAAfZznT8GdOnVKtbW1ka/r6up04MABpaena9SoUVqxYoV++tOf6pprrlFubq4ef/xxhUIhzZkzJ57rBgD0cZ4DtG/fPt12222Rr1etWiVJWrhwoTZu3KhHHnlEbW1tWrJkiZqbm3XLLbdo586dGjRoUPxWDQDo83zOOWe9iK8Kh8MKBAKartka6Eu2Xg56gYGjvX8y8rfvvhbTsa4eENtNTL0qb/b+d/rdDcMTsBIg/s65DlVqm1paWr7xfX3zT8EBAK5MBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMOH51zEAl9u5T73/mvbCnz4U07H+6e//u+eZSf4UzzOTB//F88xv7/k7zzNpm/Z4ngEuF66AAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAAT3IwU/VLGP1bHNHf36B95nvnzonWeZ6b4kz3P/OKnL3qeecj3gOcZSQq8zE1MkXhcAQEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJrgZKfAV59K6PM9sbRvqeWb1uv/ieaZq5S88z6x9er3nGUn62YH5nmc6/09NTMfClYsrIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABDcjBb7irql7Pc80dw7xPJP97PueZ6YlPex55l9Xveh5RpL+8j/f8Tzz+rzbPM9wA9MrG1dAAAATBAgAYMJzgHbv3q077rhDoVBIPp9PW7dujXp+0aJF8vl8UdusWbPitV4AQD/hOUBtbW3Ky8tTeXl5j/vMmjVLDQ0Nke2VV165pEUCAPofzx9CKC4uVnFx8Tfu4/f7FQwGY14UAKD/S8h7QJWVlcrMzNR1112nZcuW6eTJkz3u297ernA4HLUBAPq/uAdo1qxZeumll1RRUaGf//znqqqqUnFxsTo7O7vdv6ysTIFAILLl5OTEe0kAgF4o7j8HtGDBgsifb7rpJk2cOFFjx45VZWWlZsyYccH+paWlWrVqVeTrcDhMhADgCpDwj2GPGTNGGRkZqq2t7fZ5v9+vtLS0qA0A0P8lPEDHjh3TyZMnlZ2dnehDAQD6EM//BHfq1Kmoq5m6ujodOHBA6enpSk9P15NPPql58+YpGAzqyJEjeuSRRzRu3DgVFRXFdeEAgL7Nc4D27dun22778p5PX7x/s3DhQq1bt04HDx7Ub37zGzU3NysUCmnmzJl6+umn5ff747dqAECf5zlA06dPl3Oux+f/8Ic/XNKCgHgYMC43prmfjNjkeWbLqdExHcurUZvrPc9MLZwb07Hem/iG55nt63v+cYuetN7qeQT9CPeCAwCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgIm4/0puoFdIju2lHUga7HnmH+u839I5TUc8z5z71PvdsIctHeV5RpI27sz0PLM8tMvzzM8n/CfPM12HPvY8g96JKyAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQ3IwUuUdNfMjzPxHIz0lic++RoTHO//vs5nmd2v/hPnmeWrk72PDPa+/1L0UtxBQQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmOBmpAAukHqg0fPMR2dPe56ZOqrO88wxzxPorbgCAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMcDNSABfoShvieSY1qSsBK0F/xhUQAMAEAQIAmPAUoLKyMk2ePFmpqanKzMzUnDlzVFNTE7XPmTNnVFJSouHDh2vo0KGaN2+empqa4rpoAEDf5ylAVVVVKikp0Z49e/TWW2+po6NDM2fOVFtbW2SflStX6s0339TmzZtVVVWl48ePa+7cuXFfOACgb/P0IYSdO3dGfb1x40ZlZmZq//79mjZtmlpaWvSrX/1KmzZt0g9+8ANJ0oYNG3T99ddrz549uvnmm+O3cgBAn3ZJ7wG1tLRIktLT0yVJ+/fvV0dHhwoLCyP7jB8/XqNGjVJ1dXW336O9vV3hcDhqAwD0fzEHqKurSytWrNDUqVM1YcIESVJjY6NSUlI0bNiwqH2zsrLU2Nj975gvKytTIBCIbDk5ObEuCQDQh8QcoJKSEh06dEivvvrqJS2gtLRULS0tka2+vv6Svh8AoG+I6QdRly9fru3bt2v37t0aOXJk5PFgMKizZ8+qubk56iqoqalJwWCw2+/l9/vl9/tjWQYAoA/zdAXknNPy5cu1ZcsW7dq1S7m5uVHPT5o0ScnJyaqoqIg8VlNTo6NHj6qgoCA+KwYA9AueroBKSkq0adMmbdu2TampqZH3dQKBgAYPHqxAIKD7779fq1atUnp6utLS0vTggw+qoKCAT8ABAKJ4CtC6deskSdOnT496fMOGDVq0aJEk6Ze//KWSkpI0b948tbe3q6ioSC+++GJcFgsA6D88Bcg5d9F9Bg0apPLycpWXl8e8KKAveei2HZ5nfqfhCVjJhQaMGBHT3Ef/bajnmZEDvc8c/CzkeSZdf/Y8g96Je8EBAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADAREy/ERXo7To/ro1p7prfLvM88/G93u/8/ruKPM8zja2pnmf++TsbPM9I0sSUQZ5n7jt6q+eZEf/5r55nOj1PoLfiCggAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMMHNSNE/ORfT2DVlH3ueuX7UIs8zvy940fPM2OShnmee/b83eJ6RpP/4r95vLDpu2SeeZzqb/93zDPoProAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABPcjBT4is5/935zzNwF3mce0C2eZy6nXP1vzzOdCVgH+jeugAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJTwEqKyvT5MmTlZqaqszMTM2ZM0c1NTVR+0yfPl0+ny9qW7p0aVwXDQDo+zwFqKqqSiUlJdqzZ4/eeustdXR0aObMmWpra4vab/HixWpoaIhsa9eujeuiAQB9n6ffiLpz586orzdu3KjMzEzt379f06ZNizw+ZMgQBYPB+KwQANAvXdJ7QC0tLZKk9PT0qMdffvllZWRkaMKECSotLdXp06d7/B7t7e0Kh8NRGwCg//N0BfRVXV1dWrFihaZOnaoJEyZEHr/nnns0evRohUIhHTx4UI8++qhqamr0xhtvdPt9ysrK9OSTT8a6DABAH+VzzrlYBpctW6bf//73evfddzVy5Mge99u1a5dmzJih2tpajR079oLn29vb1d7eHvk6HA4rJydH0zVbA33JsSwNAGDonOtQpbappaVFaWlpPe4X0xXQ8uXLtX37du3evfsb4yNJ+fn5ktRjgPx+v/x+fyzLAAD0YZ4C5JzTgw8+qC1btqiyslK5ubkXnTlw4IAkKTs7O6YFAgD6J08BKikp0aZNm7Rt2zalpqaqsbFRkhQIBDR48GAdOXJEmzZt0u23367hw4fr4MGDWrlypaZNm6aJEycm5C8AAOibPL0H5PP5un18w4YNWrRokerr6/XDH/5Qhw4dUltbm3JycnTnnXfqscce+8Z/B/yqcDisQCDAe0AA0Ecl5D2gi7UqJydHVVVVXr4lAOAKxb3gAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmBlov4Oucc5Kkc+qQnPFiAACenVOHpC//e96TXheg1tZWSdK72mG8EgDApWhtbVUgEOjxeZ+7WKIus66uLh0/flypqany+XxRz4XDYeXk5Ki+vl5paWlGK7THeTiP83Ae5+E8zsN5veE8OOfU2tqqUCikpKSe3+npdVdASUlJGjly5Dfuk5aWdkW/wL7AeTiP83Ae5+E8zsN51ufhm658vsCHEAAAJggQAMBEnwqQ3+/XmjVr5Pf7rZdiivNwHufhPM7DeZyH8/rSeeh1H0IAAFwZ+tQVEACg/yBAAAATBAgAYIIAAQBM9JkAlZeX69vf/rYGDRqk/Px8/fGPf7Re0mX3xBNPyOfzRW3jx4+3XlbC7d69W3fccYdCoZB8Pp+2bt0a9bxzTqtXr1Z2drYGDx6swsJCHT582GaxCXSx87Bo0aILXh+zZs2yWWyClJWVafLkyUpNTVVmZqbmzJmjmpqaqH3OnDmjkpISDR8+XEOHDtW8efPU1NRktOLE+FvOw/Tp0y94PSxdutRoxd3rEwF67bXXtGrVKq1Zs0YffPCB8vLyVFRUpBMnTlgv7bK78cYb1dDQENneffdd6yUlXFtbm/Ly8lReXt7t82vXrtXzzz+v9evXa+/evbrqqqtUVFSkM2fOXOaVJtbFzoMkzZo1K+r18corr1zGFSZeVVWVSkpKtGfPHr311lvq6OjQzJkz1dbWFtln5cqVevPNN7V582ZVVVXp+PHjmjt3ruGq4+9vOQ+StHjx4qjXw9q1a41W3APXB0yZMsWVlJREvu7s7HShUMiVlZUZruryW7NmjcvLy7NehilJbsuWLZGvu7q6XDAYdM8880zksebmZuf3+90rr7xisMLL4+vnwTnnFi5c6GbPnm2yHisnTpxwklxVVZVz7vz/9snJyW7z5s2RfT766CMnyVVXV1stM+G+fh6cc+773/+++9GPfmS3qL9Br78COnv2rPbv36/CwsLIY0lJSSosLFR1dbXhymwcPnxYoVBIY8aM0b333qujR49aL8lUXV2dGhsbo14fgUBA+fn5V+Tro7KyUpmZmbruuuu0bNkynTx50npJCdXS0iJJSk9PlyTt379fHR0dUa+H8ePHa9SoUf369fD18/CFl19+WRkZGZowYYJKS0t1+vRpi+X1qNfdjPTrPvvsM3V2diorKyvq8aysLH388cdGq7KRn5+vjRs36rrrrlNDQ4OefPJJ3XrrrTp06JBSU1Otl2eisbFRkrp9fXzx3JVi1qxZmjt3rnJzc3XkyBH95Cc/UXFxsaqrqzVgwADr5cVdV1eXVqxYoalTp2rChAmSzr8eUlJSNGzYsKh9+/ProbvzIEn33HOPRo8erVAopIMHD+rRRx9VTU2N3njjDcPVRuv1AcKXiouLI3+eOHGi8vPzNXr0aL3++uu6//77DVeG3mDBggWRP990002aOHGixo4dq8rKSs2YMcNwZYlRUlKiQ4cOXRHvg36Tns7DkiVLIn++6aablJ2drRkzZujIkSMaO3bs5V5mt3r9P8FlZGRowIABF3yKpampScFg0GhVvcOwYcN07bXXqra21nopZr54DfD6uNCYMWOUkZHRL18fy5cv1/bt2/XOO+9E/fqWYDCos2fPqrm5OWr//vp66Ok8dCc/P1+SetXrodcHKCUlRZMmTVJFRUXksa6uLlVUVKigoMBwZfZOnTqlI0eOKDs723opZnJzcxUMBqNeH+FwWHv37r3iXx/Hjh3TyZMn+9Xrwzmn5cuXa8uWLdq1a5dyc3Ojnp80aZKSk5OjXg81NTU6evRov3o9XOw8dOfAgQOS1LteD9afgvhbvPrqq87v97uNGze6P/3pT27JkiVu2LBhrrGx0Xppl9VDDz3kKisrXV1dnXvvvfdcYWGhy8jIcCdOnLBeWkK1tra6Dz/80H344YdOknv22Wfdhx9+6D799FPnnHM/+9nP3LBhw9y2bdvcwYMH3ezZs11ubq77/PPPjVceX990HlpbW93DDz/sqqurXV1dnXv77bfdd7/7XXfNNde4M2fOWC89bpYtW+YCgYCrrKx0DQ0Nke306dORfZYuXepGjRrldu3a5fbt2+cKCgpcQUGB4arj72Lnoba21j311FNu3759rq6uzm3bts2NGTPGTZs2zXjl0fpEgJxz7oUXXnCjRo1yKSkpbsqUKW7Pnj3WS7rs5s+f77Kzs11KSor71re+5ebPn+9qa2utl5Vw77zzjpN0wbZw4ULn3PmPYj/++OMuKyvL+f1+N2PGDFdTU2O76AT4pvNw+vRpN3PmTDdixAiXnJzsRo8e7RYvXtzv/k9ad39/SW7Dhg2RfT7//HP3wAMPuKuvvtoNGTLE3Xnnna6hocFu0QlwsfNw9OhRN23aNJeenu78fr8bN26c+/GPf+xaWlpsF/41/DoGAICJXv8eEACgfyJAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATPw//XGm7nuSJ7oAAAAASUVORK5CYII=",
381 | "text/plain": [
382 | ""
383 | ]
384 | },
385 | "metadata": {},
386 | "output_type": "display_data"
387 | }
388 | ],
389 | "source": [
390 | "PlotImage(X,Y)"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 13,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | " training Data shape : (16000, 784) (16000,)\n",
403 | " testing Data shape : (4000, 784) (4000,)\n"
404 | ]
405 | }
406 | ],
407 | "source": [
408 | "from sklearn.model_selection import train_test_split\n",
409 | "Xtrain,Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=4)\n",
410 | "\n",
411 | "print(\" training Data shape : \", Xtrain.shape, Ytrain.shape)\n",
412 | "print(\" testing Data shape : \", Xtest.shape, Ytest.shape)"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": 14,
418 | "metadata": {},
419 | "outputs": [
420 | {
421 | "name": "stdout",
422 | "output_type": "stream",
423 | "text": [
424 | "CPU times: user 267 ms, sys: 114 ms, total: 381 ms\n",
425 | "Wall time: 641 ms\n"
426 | ]
427 | },
428 | {
429 | "data": {
430 | "text/html": [
431 | "GaussianNB() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
432 | ],
433 | "text/plain": [
434 | "GaussianNB()"
435 | ]
436 | },
437 | "execution_count": 14,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "from sklearn.naive_bayes import GaussianNB\n",
444 | "\n",
445 | "model = GaussianNB()\n",
446 | "%time model.fit(Xtrain, Ytrain)"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 15,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "name": "stdout",
456 | "output_type": "stream",
457 | "text": [
458 | "CPU times: user 284 ms, sys: 86.9 ms, total: 371 ms\n",
459 | "Wall time: 548 ms\n"
460 | ]
461 | }
462 | ],
463 | "source": [
464 | "%time predictions = model.predict(Xtest)"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 16,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "0.55425"
476 | ]
477 | },
478 | "execution_count": 16,
479 | "metadata": {},
480 | "output_type": "execute_result"
481 | }
482 | ],
483 | "source": [
484 | "\n",
485 | "# model.score() -> regression - R2score\n",
486 | "#----------------> classification - accuracy\n",
487 | "\n",
488 | "model.score(Xtest, Ytest)"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": null,
494 | "metadata": {},
495 | "outputs": [],
496 | "source": []
497 | }
498 | ],
499 | "metadata": {
500 | "kernelspec": {
501 | "display_name": "Python 3.10.6 64-bit",
502 | "language": "python",
503 | "name": "python3"
504 | },
505 | "language_info": {
506 | "codemirror_mode": {
507 | "name": "ipython",
508 | "version": 3
509 | },
510 | "file_extension": ".py",
511 | "mimetype": "text/x-python",
512 | "name": "python",
513 | "nbconvert_exporter": "python",
514 | "pygments_lexer": "ipython3",
515 | "version": "3.10.6"
516 | },
517 | "orig_nbformat": 4,
518 | "vscode": {
519 | "interpreter": {
520 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
521 | }
522 | }
523 | },
524 | "nbformat": 4,
525 | "nbformat_minor": 2
526 | }
527 |
--------------------------------------------------------------------------------
/Perceptron/perceptron_jf.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "from sklearn.preprocessing import MinMaxScaler"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 4,
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " \n",
42 | " 0 \n",
43 | " 1 \n",
44 | " 2 \n",
45 | " 3 \n",
46 | " 4 \n",
47 | " 5 \n",
48 | " 6 \n",
49 | " 7 \n",
50 | " 8 \n",
51 | " 9 \n",
52 | " ... \n",
53 | " 775 \n",
54 | " 776 \n",
55 | " 777 \n",
56 | " 778 \n",
57 | " 779 \n",
58 | " 780 \n",
59 | " 781 \n",
60 | " 782 \n",
61 | " 783 \n",
62 | " 784 \n",
63 | " \n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 19995 \n",
68 | " 0 \n",
69 | " 0 \n",
70 | " 0 \n",
71 | " 0 \n",
72 | " 0 \n",
73 | " 0 \n",
74 | " 0 \n",
75 | " 0 \n",
76 | " 0 \n",
77 | " 0 \n",
78 | " ... \n",
79 | " 0 \n",
80 | " 0 \n",
81 | " 0 \n",
82 | " 0 \n",
83 | " 0 \n",
84 | " 0 \n",
85 | " 0 \n",
86 | " 0 \n",
87 | " 0 \n",
88 | " 0 \n",
89 | " \n",
90 | " \n",
91 | " 19996 \n",
92 | " 1 \n",
93 | " 0 \n",
94 | " 0 \n",
95 | " 0 \n",
96 | " 0 \n",
97 | " 0 \n",
98 | " 0 \n",
99 | " 0 \n",
100 | " 0 \n",
101 | " 0 \n",
102 | " ... \n",
103 | " 0 \n",
104 | " 0 \n",
105 | " 0 \n",
106 | " 0 \n",
107 | " 0 \n",
108 | " 0 \n",
109 | " 0 \n",
110 | " 0 \n",
111 | " 0 \n",
112 | " 0 \n",
113 | " \n",
114 | " \n",
115 | " 19997 \n",
116 | " 2 \n",
117 | " 0 \n",
118 | " 0 \n",
119 | " 0 \n",
120 | " 0 \n",
121 | " 0 \n",
122 | " 0 \n",
123 | " 0 \n",
124 | " 0 \n",
125 | " 0 \n",
126 | " ... \n",
127 | " 0 \n",
128 | " 0 \n",
129 | " 0 \n",
130 | " 0 \n",
131 | " 0 \n",
132 | " 0 \n",
133 | " 0 \n",
134 | " 0 \n",
135 | " 0 \n",
136 | " 0 \n",
137 | " \n",
138 | " \n",
139 | " 19998 \n",
140 | " 9 \n",
141 | " 0 \n",
142 | " 0 \n",
143 | " 0 \n",
144 | " 0 \n",
145 | " 0 \n",
146 | " 0 \n",
147 | " 0 \n",
148 | " 0 \n",
149 | " 0 \n",
150 | " ... \n",
151 | " 0 \n",
152 | " 0 \n",
153 | " 0 \n",
154 | " 0 \n",
155 | " 0 \n",
156 | " 0 \n",
157 | " 0 \n",
158 | " 0 \n",
159 | " 0 \n",
160 | " 0 \n",
161 | " \n",
162 | " \n",
163 | " 19999 \n",
164 | " 5 \n",
165 | " 0 \n",
166 | " 0 \n",
167 | " 0 \n",
168 | " 0 \n",
169 | " 0 \n",
170 | " 0 \n",
171 | " 0 \n",
172 | " 0 \n",
173 | " 0 \n",
174 | " ... \n",
175 | " 0 \n",
176 | " 0 \n",
177 | " 0 \n",
178 | " 0 \n",
179 | " 0 \n",
180 | " 0 \n",
181 | " 0 \n",
182 | " 0 \n",
183 | " 0 \n",
184 | " 0 \n",
185 | " \n",
186 | " \n",
187 | "
\n",
188 | "
5 rows × 785 columns
\n",
189 | "
"
190 | ],
191 | "text/plain": [
192 | " 0 1 2 3 4 5 6 7 8 9 ... 775 776 777 \\\n",
193 | "19995 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
194 | "19996 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
195 | "19997 2 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
196 | "19998 9 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
197 | "19999 5 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
198 | "\n",
199 | " 778 779 780 781 782 783 784 \n",
200 | "19995 0 0 0 0 0 0 0 \n",
201 | "19996 0 0 0 0 0 0 0 \n",
202 | "19997 0 0 0 0 0 0 0 \n",
203 | "19998 0 0 0 0 0 0 0 \n",
204 | "19999 0 0 0 0 0 0 0 \n",
205 | "\n",
206 | "[5 rows x 785 columns]"
207 | ]
208 | },
209 | "execution_count": 4,
210 | "metadata": {},
211 | "output_type": "execute_result"
212 | }
213 | ],
214 | "source": [
215 | "data = pd.read_csv(\"mnist_train_small.csv\", header= None)\n",
216 | "data.tail()"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 5,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "# Mnist - Digit recognition dataset\n",
226 | "# Image is embedded inside the csv file\n",
227 | "# 28*28 -> flattened -> fitting into a csv file\n",
228 | "\n",
229 | "# 785 -> 1 column (output), 784 (image)\n",
230 | "# pixel range - (0-255)\n",
231 | "# 0 - black\n",
232 | "# 255 - white"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 6,
238 | "metadata": {},
239 | "outputs": [
240 | {
241 | "data": {
242 | "text/plain": [
243 | "False"
244 | ]
245 | },
246 | "execution_count": 6,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "data.isnull().sum().any()\n"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 7,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "name": "stdout",
262 | "output_type": "stream",
263 | "text": [
264 | "\n",
265 | "RangeIndex: 20000 entries, 0 to 19999\n",
266 | "Columns: 785 entries, 0 to 784\n",
267 | "dtypes: int64(785)\n",
268 | "memory usage: 119.8 MB\n"
269 | ]
270 | }
271 | ],
272 | "source": [
273 | "data.info()"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 8,
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/plain": [
284 | "((20000, 784), (20000,))"
285 | ]
286 | },
287 | "execution_count": 8,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "# dividing features and targets\n",
294 | "\n",
295 | "X = data.iloc[:, 1:].values\n",
296 | "Y = data.iloc[:, 0].values\n",
297 | "\n",
298 | "X.shape, Y.shape"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": 9,
304 | "metadata": {},
305 | "outputs": [
306 | {
307 | "data": {
308 | "text/plain": [
309 | "255"
310 | ]
311 | },
312 | "execution_count": 9,
313 | "metadata": {},
314 | "output_type": "execute_result"
315 | }
316 | ],
317 | "source": [
318 | "X.max()"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 10,
324 | "metadata": {},
325 | "outputs": [],
326 | "source": [
327 | "scaler = MinMaxScaler()\n",
328 | "X = scaler.fit_transform(X)"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 11,
334 | "metadata": {},
335 | "outputs": [
336 | {
337 | "data": {
338 | "text/plain": [
339 | "(20000, 784)"
340 | ]
341 | },
342 | "execution_count": 11,
343 | "metadata": {},
344 | "output_type": "execute_result"
345 | }
346 | ],
347 | "source": [
348 | "X.shape"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": 12,
354 | "metadata": {},
355 | "outputs": [],
356 | "source": [
357 | "def PlotImage(X, Y):\n",
358 | " idx = np.random.choice(X.shape[0])\n",
359 | " label = Y[idx]\n",
360 | " print(\" Target : \", label)\n",
361 | " x = X[idx, :]\n",
362 | " x = x.reshape(28,28)\n",
363 | " plt.imshow(x) "
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": 13,
369 | "metadata": {},
370 | "outputs": [
371 | {
372 | "name": "stdout",
373 | "output_type": "stream",
374 | "text": [
375 | " Target : 8\n"
376 | ]
377 | },
378 | {
379 | "data": {
380 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAGdCAYAAABU0qcqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAcZ0lEQVR4nO3df3DV9b3n8VcI5ACSnBhicpISMEEFK5BuEWIuSFEyQJxSUGaLP/YOUC+sNthianXSVZDWmbS4l3rxRph7b0vqHQFlRmBlu3QxmnBtAy4RhtLWlGRjgQsJld2cEwKEQD77B+tpjwTwezgn7yQ8HzPfmZzv9/s+37cfv/Dim+/3fE6Cc84JAIAeNsC6AQDAjYkAAgCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgImB1g18XldXl44fP67k5GQlJCRYtwMA8Mg5p7a2NmVnZ2vAgCtf5/S6ADp+/LhycnKs2wAAXKejR49qxIgRV9ze6wIoOTlZkjRVD2igBhl3AwDw6oI69YF+Gf77/EriFkAVFRV6+eWX1dzcrPz8fL366quaPHnyNes++7XbQA3SwAQCCAD6nP8/w+i1bqPE5SGEN998U6WlpVq5cqU++ugj5efna9asWTp58mQ8DgcA6IPiEkBr1qzRkiVLtHjxYn35y1/W+vXrNXToUP385z+Px+EAAH1QzAPo/PnzqqurU1FR0V8OMmCAioqKVFtbe9n+HR0dCoVCEQsAoP+LeQB9+umnunjxojIzMyPWZ2Zmqrm5+bL9y8vL5ff7wwtPwAHAjcH8g6hlZWUKBoPh5ejRo9YtAQB6QMyfgktPT1diYqJaWloi1re0tCgQCFy2v8/nk8/ni3UbAIBeLuZXQElJSZo4caKqqqrC67q6ulRVVaXCwsJYHw4A0EfF5XNApaWlWrhwoe6++25NnjxZr7zyitrb27V48eJ4HA4A0AfFJYAWLFigP//5z1qxYoWam5v1la98RTt37rzswQQAwI0rwTnnrJv4a6FQSH6/X9M1l5kQAKAPuuA6Va3tCgaDSklJueJ+5k/BAQBuTAQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADAR8wB68cUXlZCQELGMHTs21ocBAPRxA+PxpnfddZfefffdvxxkYFwOAwDow+KSDAMHDlQgEIjHWwMA+om43AM6fPiwsrOzlZeXp8cee0xHjhy54r4dHR0KhUIRCwCg/4t5ABUUFKiyslI7d+7UunXr1NTUpHvvvVdtbW3d7l9eXi6/3x9ecnJyYt0SAKAXSnDOuXgeoLW1VaNGjdKaNWv0+OOPX7a9o6NDHR0d4dehUEg5OTmarrkamDAonq0BAOLggutUtbYrGAwqJSXlivvF/emA1NRU3XHHHWpoaOh2u8/nk8/ni3cbAIBeJu6fAzp9+rQaGxuVlZUV70MBAPqQmAfQM888o5qaGn3yySf6zW9+owcffFCJiYl65JFHYn0oAEAfFvNfwR07dkyPPPKITp06pVtuuUVTp07Vnj17dMstt8T6UACAPizmAbR58+ZYvyWAHpYQxYfHj39nsueaoUUnPdf8W/6bnmui9fzJiZ5rXsqoi0Mnl5u45qmo6rL+/jcx7iR6zAUHADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADARNy/kA6AHVeYH1Vd8IV2zzX/K/8fojqWV109cpRLoplYtCuKDu/ctsx7zRuNnmsk6UJUVfHBFRAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwASzYQPXKXF4muea1qI7PNe0PxL0XLNr4mueayTp5gGDPdf05CzVPeU/Njzguaa+arTnmjt/9onnmgvNLZ5rehuugAAAJgggAIAJAggAYIIAAgCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgAkCCABgggACAJhgMlLgrxwr+xvPNU/97XbPNYv9v/JcE52kHjpO7zZ1/2NR1aV9/Y+ea0bK+yShFzxX9A9cAQEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADDBZKTo9VxhvueahZU7ojrWN4e96rmmS11RHQvRyf/1tzzX5P3nY1Ed62JUVfiiuAICAJgggAAAJjwH0O7duzVnzhxlZ2crISFB27Zti9junNOKFSuUlZWlIUOGqKioSIcPH45VvwCAfsJzALW3tys/P18VFRXdbl+9erXWrl2r9evXa+/evbrppps0a9YsnTt37rqbBQD0H54fQiguLlZxcXG325xzeuWVV/T8889r7ty5kqTXX39dmZmZ2rZtmx5++OHr6xYA0G/E9B5QU1OTmpubVVRUFF7n9/tVUFCg2trabms6OjoUCoUiFgBA/xfTAGpubpYkZWZmRqzPzMwMb/u88vJy+f3+8JKTkxPLlgAAvZT5U3BlZWUKBoPh5ejRo9YtAQB6QEwDKBAISJJaWloi1re0tIS3fZ7P51NKSkrEAgDo/2IaQLm5uQoEAqqqqgqvC4VC2rt3rwoLC2N5KABAH+f5KbjTp0+roaEh/LqpqUkHDhxQWlqaRo4cqeXLl+ull17S7bffrtzcXL3wwgvKzs7WvHnzYtk3AKCP8xxA+/bt03333Rd+XVpaKklauHChKisr9eyzz6q9vV1Lly5Va2urpk6dqp07d2rw4MGx6xoA0OclOOecdRN/LRQKye/3a7rmamDCIOt2cBUJPp/nmpOLv+q5Zu8L/+i5JlrRTCxae877OHzr3xZ7rrmj4rznmhffrPRcI0n3DE70XBPsOuu5ZmXLvZ5rDs/r/n7y1Vw4Gt1kpIjOBdepam1XMBi86n1986fgAAA3JgIIAGCCAAIAmCCAAAAmCCAAgAkCCABgggACAJgggAAAJgggAIAJAggAYIIAAgCYIIAAACYIIACACc9fxwB85n+v8j6z9W//dq3nGu/zU0dv7PYSzzV3fPtDzzXZ873/0fvxW695rrkzKbp/Ywa7vM+8PfkXpZ5rbv0vtZ5rJGa27i+4AgIAmCCAAAAmCCAAgAkCCABgggACAJgggAAAJgggAIAJAggAYIIAAgCYIIAAACYIIACACQIIAGCCyUihxFR/VHXD80/GuBN7WTXe/032x3+523PNmzP+0XNNtBOLRmPyhigmFn0hmolFcSPjCggAYIIAAgCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgAkCCABgggACAJgggAAAJgggAIAJJiOFLrYGo6pL/WaX55rx//R3nmt+O+1fPNdEq2ZNheeaLnkfh54yYcN3oqpjYlH0BK6AAAAmCCAAgAkCCABgggACAJgggAAAJgggAIAJAggAYIIAAgCYIIAAACYIIACACQIIAGCCAAIAmGAyUkStq63Nc03ef73guWZfQaLnmrt9Fz3X9HZr/+9YzzWj//7jqI7V/0YPvRFXQAAAEwQQAMCE5wDavXu35syZo+zsbCUkJGjbtm0R2xctWqSEhISIZfbs2bHqFwDQT3gOoPb2duXn56ui4spf3DV79mydOHEivGzatOm6mgQA9D+eH0IoLi5WcXHxVffx+XwKBAJRNwUA6P/icg+ourpaGRkZGjNmjJ588kmdOnXqivt2dHQoFApFLACA/i/mATR79my9/vrrqqqq0k9+8hPV1NSouLhYFy92/2BneXm5/H5/eMnJyYl1SwCAXijmnwN6+OGHwz+PHz9eEyZM0OjRo1VdXa0ZM2Zctn9ZWZlKS0vDr0OhECEEADeAuD+GnZeXp/T0dDU0NHS73efzKSUlJWIBAPR/cQ+gY8eO6dSpU8rKyor3oQAAfYjnX8GdPn064mqmqalJBw4cUFpamtLS0rRq1SrNnz9fgUBAjY2NevbZZ3Xbbbdp1qxZMW0cANC3eQ6gffv26b777gu//uz+zcKFC7Vu3TodPHhQv/jFL9Ta2qrs7GzNnDlTP/rRj+Tz+WLXNQCgz/McQNOnT5dz7orbf/WrX11XQ+jfXN3vPNes+Lslnms2VP6D5xpJGjlwmOeaziv/cYipRHk/0Kmve5/AVJJS/7U2qjrAC+aCAwCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgAkCCABgggACAJgggAAAJgggAIAJAggAYIIAAgCYiPlXcgOx9u/TkzzXZCZG9/Uf//PMIM81JR8u8lzz36dUeD/OzfWea/75G1M810hS6r9GVQZ4whUQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAE0xGih51rOxvPNf8t4UvR3Ek7xOYStJLzyz2XJO37UPPNQ9sLPFc87uv/bP34+T9znONJB2KqgrwhisgAIAJAggAYIIAAgCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgAkCCABgggACAJgggAAAJpiMFFEbGMj0XDP+6x97rhk10PvEolP3P+a5RpIy6o57L4piHC4Go5ss1asd/6MgqrpbVRvjToDLcQUEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABJORImpu2FDPNfPSq2PfSDcySs5GVXfh6DHPNYm35XquKb9/i+eaaFy4yfXIcYBocAUEADBBAAEATHgKoPLyck2aNEnJycnKyMjQvHnzVF9fH7HPuXPnVFJSouHDh2vYsGGaP3++WlpaYto0AKDv8xRANTU1Kikp0Z49e7Rr1y51dnZq5syZam9vD+/z9NNP65133tGWLVtUU1Oj48eP66GHHop54wCAvs3TQwg7d+6MeF1ZWamMjAzV1dVp2rRpCgaD+tnPfqaNGzfq/vvvlyRt2LBBd955p/bs2aN77rkndp0DAPq067oHFAwGJUlpaWmSpLq6OnV2dqqoqCi8z9ixYzVy5EjV1nb/Fb8dHR0KhUIRCwCg/4s6gLq6urR8+XJNmTJF48aNkyQ1NzcrKSlJqampEftmZmaqubm52/cpLy+X3+8PLzk5OdG2BADoQ6IOoJKSEh06dEibN2++rgbKysoUDAbDy9GjR6/r/QAAfUNUH0RdtmyZduzYod27d2vEiBHh9YFAQOfPn1dra2vEVVBLS4sCgUC37+Xz+eTz+aJpAwDQh3m6AnLOadmyZdq6davee+895eZGfgJ84sSJGjRokKqqqsLr6uvrdeTIERUWFsamYwBAv+DpCqikpEQbN27U9u3blZycHL6v4/f7NWTIEPn9fj3++OMqLS1VWlqaUlJS9NRTT6mwsJAn4AAAETwF0Lp16yRJ06dPj1i/YcMGLVq0SJL005/+VAMGDND8+fPV0dGhWbNm6bXXXotJswCA/sNTADl37YkNBw8erIqKClVUVETdFHpWwsDo5qT9P2sTPdfMH/ap55r95z2XSF3RTcKZODzNc03Dt7q/v3k1RUO9T3q68JM5nmvGrPqD5xpJuhhVFeANc8EBAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAExENw0y+pWEIUOiqhs//ITnmi51ea75T5u/47km92it5xpJ+uP6yZ5rPp6z1nPN+2e9z7od+pb3moutDZ5rgJ7CFRAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATTEYKdbW1RVVX++93eS8a4b1k3BTvE2o2L7jH+4Ek/eT+TVHVeZU64IznGjeIP67oX7gCAgCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgAkCCABgggACAJgggAAAJgggAIAJAggAYILZDaHEVH9Udd/I/W2MO+neptG/9F60JrpjDYji32T/FMzzXLPjm1M813Qd+thzDdCbcQUEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABJORQhdbg1HV7V88znPN1JfGeq754D+84bkmWicunvVc89azxZ5rBh/60HMN0N9wBQQAMEEAAQBMeAqg8vJyTZo0ScnJycrIyNC8efNUX18fsc/06dOVkJAQsTzxxBMxbRoA0Pd5CqCamhqVlJRoz5492rVrlzo7OzVz5ky1t7dH7LdkyRKdOHEivKxevTqmTQMA+j5PDyHs3Lkz4nVlZaUyMjJUV1enadOmhdcPHTpUgUAgNh0CAPql67oHFAxeenoqLS0tYv0bb7yh9PR0jRs3TmVlZTpz5swV36Ojo0OhUChiAQD0f1E/ht3V1aXly5drypQpGjfuL4/jPvrooxo1apSys7N18OBBPffcc6qvr9fbb7/d7fuUl5dr1apV0bYBAOijog6gkpISHTp0SB988EHE+qVLl4Z/Hj9+vLKysjRjxgw1NjZq9OjRl71PWVmZSktLw69DoZBycnKibQsA0EdEFUDLli3Tjh07tHv3bo0YMeKq+xYUFEiSGhoaug0gn88nn88XTRsAgD7MUwA55/TUU09p69atqq6uVm5u7jVrDhw4IEnKysqKqkEAQP/kKYBKSkq0ceNGbd++XcnJyWpubpYk+f1+DRkyRI2Njdq4caMeeOABDR8+XAcPHtTTTz+tadOmacKECXH5DwAA9E2eAmjdunWSLn3Y9K9t2LBBixYtUlJSkt5991298soram9vV05OjubPn6/nn38+Zg0DAPoHz7+Cu5qcnBzV1NRcV0MAgBsDs2Ejal0Hfu+5Ju3r3o/zDU3yXtSDBouZrYFoMBkpAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwQQAMAEAQQAMEEAAQBMEEAAABMEEADABAEEADBBAAEATBBAAAATBBAAwAQBBAAwQQABAEwQQAAAEwOtG/g855wk6YI6JWfcDADAswvqlPSXv8+vpNcFUFtbmyTpA/3SuBMAwPVoa2uT3++/4vYEd62I6mFdXV06fvy4kpOTlZCQELEtFAopJydHR48eVUpKilGH9hiHSxiHSxiHSxiHS3rDODjn1NbWpuzsbA0YcOU7Pb3uCmjAgAEaMWLEVfdJSUm5oU+wzzAOlzAOlzAOlzAOl1iPw9WufD7DQwgAABMEEADARJ8KIJ/Pp5UrV8rn81m3YopxuIRxuIRxuIRxuKQvjUOvewgBAHBj6FNXQACA/oMAAgCYIIAAACYIIACAiT4TQBUVFbr11ls1ePBgFRQU6MMPP7Ruqce9+OKLSkhIiFjGjh1r3Vbc7d69W3PmzFF2drYSEhK0bdu2iO3OOa1YsUJZWVkaMmSIioqKdPjwYZtm4+ha47Bo0aLLzo/Zs2fbNBsn5eXlmjRpkpKTk5WRkaF58+apvr4+Yp9z586ppKREw4cP17BhwzR//ny1tLQYdRwfX2Qcpk+fftn58MQTTxh13L0+EUBvvvmmSktLtXLlSn300UfKz8/XrFmzdPLkSevWetxdd92lEydOhJcPPvjAuqW4a29vV35+vioqKrrdvnr1aq1du1br16/X3r17ddNNN2nWrFk6d+5cD3caX9caB0maPXt2xPmxadOmHuww/mpqalRSUqI9e/Zo165d6uzs1MyZM9Xe3h7e5+mnn9Y777yjLVu2qKamRsePH9dDDz1k2HXsfZFxkKQlS5ZEnA+rV6826vgKXB8wefJkV1JSEn598eJFl52d7crLyw276nkrV650+fn51m2YkuS2bt0aft3V1eUCgYB7+eWXw+taW1udz+dzmzZtMuiwZ3x+HJxzbuHChW7u3Lkm/Vg5efKkk+Rqamqcc5f+3w8aNMht2bIlvM8f/vAHJ8nV1tZatRl3nx8H55z72te+5r773e/aNfUF9PoroPPnz6uurk5FRUXhdQMGDFBRUZFqa2sNO7Nx+PBhZWdnKy8vT4899piOHDli3ZKppqYmNTc3R5wffr9fBQUFN+T5UV1drYyMDI0ZM0ZPPvmkTp06Zd1SXAWDQUlSWlqaJKmurk6dnZ0R58PYsWM1cuTIfn0+fH4cPvPGG28oPT1d48aNU1lZmc6cOWPR3hX1uslIP+/TTz/VxYsXlZmZGbE+MzNTH3/8sVFXNgoKClRZWakxY8boxIkTWrVqle69914dOnRIycnJ1u2ZaG5ulqRuz4/Ptt0oZs+erYceeki5ublqbGzUD37wAxUXF6u2tlaJiYnW7cVcV1eXli9frilTpmjcuHGSLp0PSUlJSk1Njdi3P58P3Y2DJD366KMaNWqUsrOzdfDgQT333HOqr6/X22+/bdhtpF4fQPiL4uLi8M8TJkxQQUGBRo0apbfeekuPP/64YWfoDR5++OHwz+PHj9eECRM0evRoVVdXa8aMGYadxUdJSYkOHTp0Q9wHvZorjcPSpUvDP48fP15ZWVmaMWOGGhsbNXr06J5us1u9/ldw6enpSkxMvOwplpaWFgUCAaOueofU1FTdcccdamhosG7FzGfnAOfH5fLy8pSent4vz49ly5Zpx44dev/99yO+viUQCOj8+fNqbW2N2L+/ng9XGofuFBQUSFKvOh96fQAlJSVp4sSJqqqqCq/r6upSVVWVCgsLDTuzd/r0aTU2NiorK8u6FTO5ubkKBAIR50coFNLevXtv+PPj2LFjOnXqVL86P5xzWrZsmbZu3ar33ntPubm5EdsnTpyoQYMGRZwP9fX1OnLkSL86H641Dt05cOCAJPWu88H6KYgvYvPmzc7n87nKykr3+9//3i1dutSlpqa65uZm69Z61Pe+9z1XXV3tmpqa3K9//WtXVFTk0tPT3cmTJ61bi6u2tja3f/9+t3//fifJrVmzxu3fv9/96U9/cs459+Mf/9ilpqa67du3u4MHD7q5c+e63Nxcd/bsWePOY+tq49DW1uaeeeYZV1tb65qamty7777rvvrVr7rbb7/dnTt3zrr1mHnyySed3+931dXV7sSJE+HlzJkz4X2eeOIJN3LkSPfee++5ffv2ucLCQldYWGjYdexdaxwaGhrcD3/4Q7dv3z7X1NTktm/f7vLy8ty0adOMO4/UJwLIOedeffVVN3LkSJeUlOQmT57s9uzZY91Sj1uwYIHLyspySUlJ7ktf+pJbsGCBa2hosG4r7t5//30n6bJl4cKFzrlLj2K/8MILLjMz0/l8PjdjxgxXX19v23QcXG0czpw542bOnOluueUWN2jQIDdq1Ci3ZMmSfvePtO7++yW5DRs2hPc5e/as+/a3v+1uvvlmN3ToUPfggw+6EydO2DUdB9cahyNHjrhp06a5tLQ05/P53G233ea+//3vu2AwaNv45/B1DAAAE73+HhAAoH8igAAAJgggAIAJAggAYIIAAgCYIIAAACYIIACACQIIAGCCAAIAmCCAAAAmCCAAgAkCCABg4v8BpQu15Tuiu0EAAAAASUVORK5CYII=",
381 | "text/plain": [
382 | ""
383 | ]
384 | },
385 | "metadata": {},
386 | "output_type": "display_data"
387 | }
388 | ],
389 | "source": [
390 | "PlotImage(X,Y)"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 14,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | " training Data shape : (16000, 784) (16000,)\n",
403 | " testing Data shape : (4000, 784) (4000,)\n"
404 | ]
405 | }
406 | ],
407 | "source": [
408 | "from sklearn.model_selection import train_test_split\n",
409 | "Xtrain,Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=4)\n",
410 | "\n",
411 | "print(\" training Data shape : \", Xtrain.shape, Ytrain.shape)\n",
412 | "print(\" testing Data shape : \", Xtest.shape, Ytest.shape)"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": 15,
418 | "metadata": {},
419 | "outputs": [
420 | {
421 | "name": "stdout",
422 | "output_type": "stream",
423 | "text": [
424 | "CPU times: user 17.9 ms, sys: 923 µs, total: 18.8 ms\n",
425 | "Wall time: 19.4 ms\n"
426 | ]
427 | },
428 | {
429 | "data": {
430 | "text/html": [
431 | "KNeighborsClassifier() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
432 | ],
433 | "text/plain": [
434 | "KNeighborsClassifier()"
435 | ]
436 | },
437 | "execution_count": 15,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "from sklearn.neighbors import KNeighborsClassifier\n",
444 | "\n",
445 | "model = KNeighborsClassifier()\n",
446 | "%time model.fit(Xtrain, Ytrain)"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 16,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "name": "stdout",
456 | "output_type": "stream",
457 | "text": [
458 | "CPU times: user 6.65 s, sys: 213 ms, total: 6.86 s\n",
459 | "Wall time: 4.52 s\n"
460 | ]
461 | }
462 | ],
463 | "source": [
464 | "%time predictions = model.predict(Xtest)"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 17,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "0.95925"
476 | ]
477 | },
478 | "execution_count": 17,
479 | "metadata": {},
480 | "output_type": "execute_result"
481 | }
482 | ],
483 | "source": [
484 | "\n",
485 | "# model.score() -> regression - R2score\n",
486 | "#----------------> classification - accuracy\n",
487 | "\n",
488 | "model.score(Xtest, Ytest)"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 18,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "name": "stdout",
498 | "output_type": "stream",
499 | "text": [
500 | " K : 3\n",
501 | " Accuracy : 0.9615\n",
502 | " K : 5\n",
503 | " Accuracy : 0.95925\n",
504 | " K : 7\n",
505 | " Accuracy : 0.958\n",
506 | " K : 9\n",
507 | " Accuracy : 0.95575\n"
508 | ]
509 | }
510 | ],
511 | "source": [
512 | "# Selection of K\n",
513 | "\n",
514 | "for k in [3,5,7,9]:\n",
515 | " print(\" K : \", k)\n",
516 | " model = KNeighborsClassifier(n_neighbors=k)\n",
517 | " model.fit(Xtrain, Ytrain)\n",
518 | " print(\" Accuracy : \", model.score(Xtest, Ytest))\n"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": 20,
524 | "metadata": {},
525 | "outputs": [
526 | {
527 | "data": {
528 | "text/html": [
529 | "Perceptron() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
530 | ],
531 | "text/plain": [
532 | "Perceptron()"
533 | ]
534 | },
535 | "execution_count": 20,
536 | "metadata": {},
537 | "output_type": "execute_result"
538 | }
539 | ],
540 | "source": [
541 | "from sklearn.linear_model import Perceptron\n",
542 | "\n",
543 | "model = Perceptron()\n",
544 | "model.fit(Xtrain, Ytrain)"
545 | ]
546 | },
547 | {
548 | "cell_type": "code",
549 | "execution_count": 21,
550 | "metadata": {},
551 | "outputs": [
552 | {
553 | "data": {
554 | "text/plain": [
555 | "0.89725"
556 | ]
557 | },
558 | "execution_count": 21,
559 | "metadata": {},
560 | "output_type": "execute_result"
561 | }
562 | ],
563 | "source": [
564 | "model.score(Xtrain, Ytrain)"
565 | ]
566 | },
567 | {
568 | "cell_type": "code",
569 | "execution_count": 22,
570 | "metadata": {},
571 | "outputs": [
572 | {
573 | "data": {
574 | "text/plain": [
575 | "0.872"
576 | ]
577 | },
578 | "execution_count": 22,
579 | "metadata": {},
580 | "output_type": "execute_result"
581 | }
582 | ],
583 | "source": [
584 | "model.score(Xtest, Ytest)"
585 | ]
586 | },
587 | {
588 | "cell_type": "code",
589 | "execution_count": null,
590 | "metadata": {},
591 | "outputs": [],
592 | "source": []
593 | }
594 | ],
595 | "metadata": {
596 | "kernelspec": {
597 | "display_name": "Python 3.10.6 64-bit",
598 | "language": "python",
599 | "name": "python3"
600 | },
601 | "language_info": {
602 | "codemirror_mode": {
603 | "name": "ipython",
604 | "version": 3
605 | },
606 | "file_extension": ".py",
607 | "mimetype": "text/x-python",
608 | "name": "python",
609 | "nbconvert_exporter": "python",
610 | "pygments_lexer": "ipython3",
611 | "version": "3.10.6"
612 | },
613 | "orig_nbformat": 4,
614 | "vscode": {
615 | "interpreter": {
616 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
617 | }
618 | }
619 | },
620 | "nbformat": 4,
621 | "nbformat_minor": 2
622 | }
623 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # hi all
2 |
--------------------------------------------------------------------------------
/knn/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taureanjoe/Machine-Learning/2d095810454989fafec46017a4a6d08631c5b999/knn/.DS_Store
--------------------------------------------------------------------------------
/knn/knn.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "\n",
13 | "from sklearn.preprocessing import MinMaxScaler"
14 | ]
15 | },
16 | {
17 | "cell_type": "code",
18 | "execution_count": 4,
19 | "metadata": {},
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " \n",
42 | " 0 \n",
43 | " 1 \n",
44 | " 2 \n",
45 | " 3 \n",
46 | " 4 \n",
47 | " 5 \n",
48 | " 6 \n",
49 | " 7 \n",
50 | " 8 \n",
51 | " 9 \n",
52 | " ... \n",
53 | " 775 \n",
54 | " 776 \n",
55 | " 777 \n",
56 | " 778 \n",
57 | " 779 \n",
58 | " 780 \n",
59 | " 781 \n",
60 | " 782 \n",
61 | " 783 \n",
62 | " 784 \n",
63 | " \n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " 19995 \n",
68 | " 0 \n",
69 | " 0 \n",
70 | " 0 \n",
71 | " 0 \n",
72 | " 0 \n",
73 | " 0 \n",
74 | " 0 \n",
75 | " 0 \n",
76 | " 0 \n",
77 | " 0 \n",
78 | " ... \n",
79 | " 0 \n",
80 | " 0 \n",
81 | " 0 \n",
82 | " 0 \n",
83 | " 0 \n",
84 | " 0 \n",
85 | " 0 \n",
86 | " 0 \n",
87 | " 0 \n",
88 | " 0 \n",
89 | " \n",
90 | " \n",
91 | " 19996 \n",
92 | " 1 \n",
93 | " 0 \n",
94 | " 0 \n",
95 | " 0 \n",
96 | " 0 \n",
97 | " 0 \n",
98 | " 0 \n",
99 | " 0 \n",
100 | " 0 \n",
101 | " 0 \n",
102 | " ... \n",
103 | " 0 \n",
104 | " 0 \n",
105 | " 0 \n",
106 | " 0 \n",
107 | " 0 \n",
108 | " 0 \n",
109 | " 0 \n",
110 | " 0 \n",
111 | " 0 \n",
112 | " 0 \n",
113 | " \n",
114 | " \n",
115 | " 19997 \n",
116 | " 2 \n",
117 | " 0 \n",
118 | " 0 \n",
119 | " 0 \n",
120 | " 0 \n",
121 | " 0 \n",
122 | " 0 \n",
123 | " 0 \n",
124 | " 0 \n",
125 | " 0 \n",
126 | " ... \n",
127 | " 0 \n",
128 | " 0 \n",
129 | " 0 \n",
130 | " 0 \n",
131 | " 0 \n",
132 | " 0 \n",
133 | " 0 \n",
134 | " 0 \n",
135 | " 0 \n",
136 | " 0 \n",
137 | " \n",
138 | " \n",
139 | " 19998 \n",
140 | " 9 \n",
141 | " 0 \n",
142 | " 0 \n",
143 | " 0 \n",
144 | " 0 \n",
145 | " 0 \n",
146 | " 0 \n",
147 | " 0 \n",
148 | " 0 \n",
149 | " 0 \n",
150 | " ... \n",
151 | " 0 \n",
152 | " 0 \n",
153 | " 0 \n",
154 | " 0 \n",
155 | " 0 \n",
156 | " 0 \n",
157 | " 0 \n",
158 | " 0 \n",
159 | " 0 \n",
160 | " 0 \n",
161 | " \n",
162 | " \n",
163 | " 19999 \n",
164 | " 5 \n",
165 | " 0 \n",
166 | " 0 \n",
167 | " 0 \n",
168 | " 0 \n",
169 | " 0 \n",
170 | " 0 \n",
171 | " 0 \n",
172 | " 0 \n",
173 | " 0 \n",
174 | " ... \n",
175 | " 0 \n",
176 | " 0 \n",
177 | " 0 \n",
178 | " 0 \n",
179 | " 0 \n",
180 | " 0 \n",
181 | " 0 \n",
182 | " 0 \n",
183 | " 0 \n",
184 | " 0 \n",
185 | " \n",
186 | " \n",
187 | "
\n",
188 | "
5 rows × 785 columns
\n",
189 | "
"
190 | ],
191 | "text/plain": [
192 | " 0 1 2 3 4 5 6 7 8 9 ... 775 776 777 \\\n",
193 | "19995 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
194 | "19996 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
195 | "19997 2 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
196 | "19998 9 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
197 | "19999 5 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n",
198 | "\n",
199 | " 778 779 780 781 782 783 784 \n",
200 | "19995 0 0 0 0 0 0 0 \n",
201 | "19996 0 0 0 0 0 0 0 \n",
202 | "19997 0 0 0 0 0 0 0 \n",
203 | "19998 0 0 0 0 0 0 0 \n",
204 | "19999 0 0 0 0 0 0 0 \n",
205 | "\n",
206 | "[5 rows x 785 columns]"
207 | ]
208 | },
209 | "execution_count": 4,
210 | "metadata": {},
211 | "output_type": "execute_result"
212 | }
213 | ],
214 | "source": [
215 | "data = pd.read_csv(\"mnist_train_small.csv\", header= None)\n",
216 | "data.tail()"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 5,
222 | "metadata": {},
223 | "outputs": [],
224 | "source": [
225 | "# Mnist - Digit recognition dataset\n",
226 | "# Image is embedded inside the csv file\n",
227 | "# 28*28 -> flattened -> fitting into a csv file\n",
228 | "\n",
229 | "# 785 -> 1 column (output), 784 (image)\n",
230 | "# pixel range - (0-255)\n",
231 | "# 0 - black\n",
232 | "# 255 - white"
233 | ]
234 | },
235 | {
236 | "cell_type": "code",
237 | "execution_count": 6,
238 | "metadata": {},
239 | "outputs": [
240 | {
241 | "data": {
242 | "text/plain": [
243 | "False"
244 | ]
245 | },
246 | "execution_count": 6,
247 | "metadata": {},
248 | "output_type": "execute_result"
249 | }
250 | ],
251 | "source": [
252 | "data.isnull().sum().any()\n"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 7,
258 | "metadata": {},
259 | "outputs": [
260 | {
261 | "name": "stdout",
262 | "output_type": "stream",
263 | "text": [
264 | "\n",
265 | "RangeIndex: 20000 entries, 0 to 19999\n",
266 | "Columns: 785 entries, 0 to 784\n",
267 | "dtypes: int64(785)\n",
268 | "memory usage: 119.8 MB\n"
269 | ]
270 | }
271 | ],
272 | "source": [
273 | "data.info()"
274 | ]
275 | },
276 | {
277 | "cell_type": "code",
278 | "execution_count": 8,
279 | "metadata": {},
280 | "outputs": [
281 | {
282 | "data": {
283 | "text/plain": [
284 | "((20000, 784), (20000,))"
285 | ]
286 | },
287 | "execution_count": 8,
288 | "metadata": {},
289 | "output_type": "execute_result"
290 | }
291 | ],
292 | "source": [
293 | "# dividing features and targets\n",
294 | "\n",
295 | "X = data.iloc[:, 1:].values\n",
296 | "Y = data.iloc[:, 0].values\n",
297 | "\n",
298 | "X.shape, Y.shape"
299 | ]
300 | },
301 | {
302 | "cell_type": "code",
303 | "execution_count": 9,
304 | "metadata": {},
305 | "outputs": [
306 | {
307 | "data": {
308 | "text/plain": [
309 | "255"
310 | ]
311 | },
312 | "execution_count": 9,
313 | "metadata": {},
314 | "output_type": "execute_result"
315 | }
316 | ],
317 | "source": [
318 | "X.max()"
319 | ]
320 | },
321 | {
322 | "cell_type": "code",
323 | "execution_count": 10,
324 | "metadata": {},
325 | "outputs": [],
326 | "source": [
327 | "scaler = MinMaxScaler()\n",
328 | "X = scaler.fit_transform(X)"
329 | ]
330 | },
331 | {
332 | "cell_type": "code",
333 | "execution_count": 11,
334 | "metadata": {},
335 | "outputs": [
336 | {
337 | "data": {
338 | "text/plain": [
339 | "(20000, 784)"
340 | ]
341 | },
342 | "execution_count": 11,
343 | "metadata": {},
344 | "output_type": "execute_result"
345 | }
346 | ],
347 | "source": [
348 | "X.shape"
349 | ]
350 | },
351 | {
352 | "cell_type": "code",
353 | "execution_count": 12,
354 | "metadata": {},
355 | "outputs": [],
356 | "source": [
357 | "def PlotImage(X, Y):\n",
358 | " idx = np.random.choice(X.shape[0])\n",
359 | " label = Y[idx]\n",
360 | " print(\" Target : \", label)\n",
361 | " x = X[idx, :]\n",
362 | " x = x.reshape(28,28)\n",
363 | " plt.imshow(x) "
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": 13,
369 | "metadata": {},
370 | "outputs": [
371 | {
372 | "name": "stdout",
373 | "output_type": "stream",
374 | "text": [
375 | " Target : 7\n"
376 | ]
377 | },
378 | {
379 | "data": {
380 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAaAAAAGdCAYAAABU0qcqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAbJklEQVR4nO3df3BU9f3v8dcGyPLDZDHEZLMlYEAFK5DeoqQZkWLJkMTv9YLwh7/u/YLjwIjBW0ytThwFbTs3LX4HuXpTmHu/LdTviFrvCIxMhw5GE65tQr9EuFymNkNyY4FCgvKdZEOAEMnn/sF125UEepbdvLPL8zFzZsju+eS8Pd369LCbE59zzgkAgCGWZj0AAOD6RIAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAICJkdYDfF1/f79OnDihjIwM+Xw+63EAAB4559Td3a1QKKS0tMGvc4ZdgE6cOKH8/HzrMQAA1+jYsWOaOHHioM8PuwBlZGRIkubqPo3UKONpAABefak+fazfRP59PpiEBaimpkavvPKK2tvbVVhYqNdff11z5sy56rqv/tptpEZppI8AAUDS+f93GL3a2ygJ+RDCO++8o8rKSq1bt06ffPKJCgsLVVpaqlOnTiXicACAJJSQAG3YsEErVqzQY489pm9+85vavHmzxo4dq1/+8peJOBwAIAnFPUAXLlxQU1OTSkpK/nqQtDSVlJSooaHhsv17e3sVDoejNgBA6ot7gL744gtdvHhRubm5UY/n5uaqvb39sv2rq6sVCAQiG5+AA4Drg/kPolZVVamrqyuyHTt2zHokAMAQiPun4LKzszVixAh1dHREPd7R0aFgMHjZ/n6/X36/P95jAACGubhfAaWnp2v27Nmqra2NPNbf36/a2loVFxfH+3AAgCSVkJ8Dqqys1LJly3TnnXdqzpw52rhxo3p6evTYY48l4nAAgCSUkAA9+OCD+vzzz7V27Vq1t7frW9/6lnbv3n3ZBxMAANcvn3POWQ/xt8LhsAKBgOZrEXdCAIAk9KXrU512qqurS5mZmYPuZ/4pOADA9YkAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJiIe4Beeukl+Xy+qG369OnxPgwAIMmNTMQ3veOOO/TBBx/89SAjE3IYAEASS0gZRo4cqWAwmIhvDQBIEQl5D+jIkSMKhUKaMmWKHn30UR09enTQfXt7exUOh6M2AEDqi3uAioqKtHXrVu3evVubNm1SW1ub7rnnHnV3dw+4f3V1tQKBQGTLz8+P90gAgGHI55xziTxAZ2enJk+erA0bNujxxx+/7Pne3l719vZGvg6Hw8rPz9d8LdJI36hEjgYASIAvXZ/qtFNdXV3KzMwcdL+Efzpg/Pjxuu2229TS0jLg836/X36/P9FjAACGmYT/HNCZM2fU2tqqvLy8RB8KAJBE4h6gZ555RvX19frss8/0+9//Xg888IBGjBihhx9+ON6HAgAksbj/Fdzx48f18MMP6/Tp07rppps0d+5cNTY26qabbor3oQAASSzuAXr77bfj/S0BACmIe8EBAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYS/gvpgL+VNm6c5zVu+s3xHySO/m3G4L/xcTDncn2e14w70e95zeff9rxEkuSy+jyvmZj3b57X7J253fOaO9et8rxmwv9o8LwGiccVEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAExwN+wUM2JCluc1p++bFtOxvvh3zvOa8nsOeF7zaugNz2uQHPq8v4S0oWqT5zU/2/MfvB9I0pefHY1pHf4+XAEBAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACa4GekQ+ct7d3hes3bGLs9rMtM+9bxmwZg9ntekqn71e15ztr/P85rKv5R6XlP3v6d7XjOUXpj3vuc1/5j5F89riv0XPa85PTfkeY0kBbgZaUJxBQQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmOBmpEOkvyngec2fb8tOwCSX+z/nh+QwMdt5vNDzms66YEzHGnPKeV6T9cuGGI7U7XnFbfrXGI4TG5/f73nNb2u933A3lpuRnunv9bwm/Yz3m8wi8bgCAgCYIEAAABOeA7R3717df//9CoVC8vl82rFjR9TzzjmtXbtWeXl5GjNmjEpKSnTkyJF4zQsASBGeA9TT06PCwkLV1NQM+Pz69ev12muvafPmzdq3b5/GjRun0tJSnT8/zN9oAAAMKc8fQigvL1d5efmAzznntHHjRr3wwgtatGiRJOmNN95Qbm6uduzYoYceeujapgUApIy4vgfU1tam9vZ2lZSURB4LBAIqKipSQ8PAnxTq7e1VOByO2gAAqS+uAWpvb5ck5ebmRj2em5sbee7rqqurFQgEIlt+fn48RwIADFPmn4KrqqpSV1dXZDt27Jj1SACAIRDXAAWDl374r6OjI+rxjo6OyHNf5/f7lZmZGbUBAFJfXANUUFCgYDCo2trayGPhcFj79u1TcXFxPA8FAEhynj8Fd+bMGbW0tES+bmtr08GDB5WVlaVJkyZpzZo1+slPfqJbb71VBQUFevHFFxUKhbR48eJ4zg0ASHKeA7R//37de++9ka8rKyslScuWLdPWrVv17LPPqqenRytXrlRnZ6fmzp2r3bt3a/To0fGbGgCQ9HzOOe93X0ygcDisQCCg+Vqkkb5R1uMA16Vzi+Z4XlP7800JmORyD//fUs9ruu/5IgGTYDBfuj7Vaae6urqu+L6++afgAADXJwIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJjw/OsYAKS+Y/8wrG6SH+XUP03xvGaMuBv2cMQVEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABggpuRAils5DdCMa3bV74xhlWjPa84cKHf85qxvznoec3wvbXq9Y0rIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABDcjBVLYp8/nx7TuxjTvNxaNxT/+y3/2vGZyX0MCJoEFroAAACYIEADABAECAJggQAAAEwQIAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABPcjBRIYcvm/q8hO9aTx+d5XnPzj5s8r3GeV2C44goIAGCCAAEATHgO0N69e3X//fcrFArJ5/Npx44dUc8vX75cPp8vaisrK4vXvACAFOE5QD09PSosLFRNTc2g+5SVlenkyZOR7a233rqmIQEAqcfzhxDKy8tVXl5+xX38fr+CwWDMQwEAUl9C3gOqq6tTTk6Opk2bplWrVun06dOD7tvb26twOBy1AQBSX9wDVFZWpjfeeEO1tbX62c9+pvr6epWXl+vixYsD7l9dXa1AIBDZ8vNj+x32AIDkEvefA3rooYcif545c6ZmzZqlqVOnqq6uTgsWLLhs/6qqKlVWVka+DofDRAgArgMJ/xj2lClTlJ2drZaWlgGf9/v9yszMjNoAAKkv4QE6fvy4Tp8+rby8vEQfCgCQRDz/FdyZM2eirmba2tp08OBBZWVlKSsrSy+//LKWLl2qYDCo1tZWPfvss7rllltUWloa18EBAMnNc4D279+ve++9N/L1V+/fLFu2TJs2bdKhQ4f0q1/9Sp2dnQqFQlq4cKF+/OMfy+/3x29qAEDS8xyg+fPny7nBbwf429/+9poGAjCwc4vmeF6zKPP1GI/m/fNJ+9u9f3gop+9PntcgdXAvOACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJiI+6/kBnB1I/OCnte8uvG/eV5zR3ps/xfvuHjO85qs18bFdCxcv7gCAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMcDNSwIC7MdPzmlnpIxIwycDm//oZz2um1jYmYBKkMq6AAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAAT3IwUMNCydrT1CFd0w1H+2xSJx6sMAGCCAAEATBAgAIAJAgQAMEGAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADDBzUiBa9T1H7/jec3Bua/FcKQRnldsCefHcBwp758Pel7TH9ORcD3jCggAYIIAAQBMeApQdXW17rrrLmVkZCgnJ0eLFy9Wc3Nz1D7nz59XRUWFJkyYoBtuuEFLly5VR0dHXIcGACQ/TwGqr69XRUWFGhsbtWfPHvX19WnhwoXq6emJ7PP000/r/fff17vvvqv6+nqdOHFCS5YsifvgAIDk5ulDCLt37476euvWrcrJyVFTU5PmzZunrq4u/eIXv9C2bdv0ve99T5K0ZcsW3X777WpsbNR3vuP9zVoAQGq6pveAurq6JElZWVmSpKamJvX19amkpCSyz/Tp0zVp0iQ1NDQM+D16e3sVDoejNgBA6os5QP39/VqzZo3uvvtuzZgxQ5LU3t6u9PR0jR8/Pmrf3Nxctbe3D/h9qqurFQgEIlt+fmwfGwUAJJeYA1RRUaHDhw/r7bffvqYBqqqq1NXVFdmOHTt2Td8PAJAcYvpB1NWrV2vXrl3au3evJk6cGHk8GAzqwoUL6uzsjLoK6ujoUDAYHPB7+f1++f3+WMYAACQxT1dAzjmtXr1a27dv14cffqiCgoKo52fPnq1Ro0aptrY28lhzc7OOHj2q4uLi+EwMAEgJnq6AKioqtG3bNu3cuVMZGRmR93UCgYDGjBmjQCCgxx9/XJWVlcrKylJmZqaeeuopFRcX8wk4AEAUTwHatGmTJGn+/PlRj2/ZskXLly+XJL366qtKS0vT0qVL1dvbq9LSUv385z+Py7AAgNThKUDOuavuM3r0aNXU1KimpibmoYBk8nlZr+c1o3zebywai3dWl8e0buTZpjhPAlyOe8EBAEwQIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADAREy/ERVIVWeXFHles33uf43hSKM8r9jcOcXzGv/h2H7F/cWYVgHecAUEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJjgZqTA3+j8T92e19w+yvuNRWOx8cD3PK+5peNAAiYB4oMrIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABDcjRUoacfutMa3bP+dXMazy/t9xR78853nNrRv7PK9xnlcAQ4crIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABDcjRUq6482WmNalDdF/k5U3POl5TcH+QwmYBLDDFRAAwAQBAgCY8BSg6upq3XXXXcrIyFBOTo4WL16s5ubmqH3mz58vn88XtT3xxBNxHRoAkPw8Bai+vl4VFRVqbGzUnj171NfXp4ULF6qnpydqvxUrVujkyZORbf369XEdGgCQ/Dx9CGH37t1RX2/dulU5OTlqamrSvHnzIo+PHTtWwWAwPhMCAFLSNb0H1NXVJUnKysqKevzNN99Udna2ZsyYoaqqKp09e3bQ79Hb26twOBy1AQBSX8wfw+7v79eaNWt09913a8aMGZHHH3nkEU2ePFmhUEiHDh3Sc889p+bmZr333nsDfp/q6mq9/PLLsY4BAEhSMQeooqJChw8f1scffxz1+MqVKyN/njlzpvLy8rRgwQK1trZq6tSpl32fqqoqVVZWRr4Oh8PKz8+PdSwAQJKIKUCrV6/Wrl27tHfvXk2cOPGK+xYVFUmSWlpaBgyQ3++X3++PZQwAQBLzFCDnnJ566ilt375ddXV1KigouOqagwcPSpLy8vJiGhAAkJo8BaiiokLbtm3Tzp07lZGRofb2dklSIBDQmDFj1Nraqm3btum+++7ThAkTdOjQIT399NOaN2+eZs2alZB/AABAcvIUoE2bNkm69MOmf2vLli1avny50tPT9cEHH2jjxo3q6elRfn6+li5dqhdeeCFuAwMAUoPnv4K7kvz8fNXX11/TQACA6wN3w0ZK+p/774xp3X/5h/2e1/z3rps9r7n1ee8/7/al5xXA8MbNSAEAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACYIEADABAECAJggQAAAE9yMFCnptpX/GtO6f6/ZcZ5kMJ8N0XGA4YsrIACACQIEADBBgAAAJggQAMAEAQIAmCBAAAATBAgAYIIAAQBMECAAgAkCBAAwQYAAACaG3b3gnHOSpC/VJznjYQAAnn2pPkl//ff5YIZdgLq7uyVJH+s3xpMAAK5Fd3e3AoHAoM/73NUSNcT6+/t14sQJZWRkyOfzRT0XDoeVn5+vY8eOKTMz02hCe5yHSzgPl3AeLuE8XDIczoNzTt3d3QqFQkpLG/ydnmF3BZSWlqaJEydecZ/MzMzr+gX2Fc7DJZyHSzgPl3AeLrE+D1e68vkKH0IAAJggQAAAE0kVIL/fr3Xr1snv91uPYorzcAnn4RLOwyWch0uS6TwMuw8hAACuD0l1BQQASB0ECABgggABAEwQIACAiaQJUE1NjW6++WaNHj1aRUVF+sMf/mA90pB76aWX5PP5orbp06dbj5Vwe/fu1f33369QKCSfz6cdO3ZEPe+c09q1a5WXl6cxY8aopKRER44csRk2ga52HpYvX37Z66OsrMxm2ASprq7WXXfdpYyMDOXk5Gjx4sVqbm6O2uf8+fOqqKjQhAkTdMMNN2jp0qXq6Ogwmjgx/p7zMH/+/MteD0888YTRxANLigC98847qqys1Lp16/TJJ5+osLBQpaWlOnXqlPVoQ+6OO+7QyZMnI9vHH39sPVLC9fT0qLCwUDU1NQM+v379er322mvavHmz9u3bp3Hjxqm0tFTnz58f4kkT62rnQZLKysqiXh9vvfXWEE6YePX19aqoqFBjY6P27Nmjvr4+LVy4UD09PZF9nn76ab3//vt69913VV9frxMnTmjJkiWGU8ff33MeJGnFihVRr4f169cbTTwIlwTmzJnjKioqIl9fvHjRhUIhV11dbTjV0Fu3bp0rLCy0HsOUJLd9+/bI1/39/S4YDLpXXnkl8lhnZ6fz+/3urbfeMphwaHz9PDjn3LJly9yiRYtM5rFy6tQpJ8nV19c75y79bz9q1Cj37rvvRvb59NNPnSTX0NBgNWbCff08OOfcd7/7Xff973/fbqi/w7C/Arpw4YKamppUUlISeSwtLU0lJSVqaGgwnMzGkSNHFAqFNGXKFD366KM6evSo9Uim2tra1N7eHvX6CAQCKioqui5fH3V1dcrJydG0adO0atUqnT592nqkhOrq6pIkZWVlSZKamprU19cX9XqYPn26Jk2alNKvh6+fh6+8+eabys7O1owZM1RVVaWzZ89ajDeoYXcz0q/74osvdPHiReXm5kY9npubqz/96U9GU9koKirS1q1bNW3aNJ08eVIvv/yy7rnnHh0+fFgZGRnW45lob2+XpAFfH189d70oKyvTkiVLVFBQoNbWVj3//PMqLy9XQ0ODRowYYT1e3PX392vNmjW6++67NWPGDEmXXg/p6ekaP3581L6p/HoY6DxI0iOPPKLJkycrFArp0KFDeu6559Tc3Kz33nvPcNpowz5A+Kvy8vLIn2fNmqWioiJNnjxZv/71r/X4448bTobh4KGHHor8eebMmZo1a5amTp2quro6LViwwHCyxKioqNDhw4evi/dBr2Sw87By5crIn2fOnKm8vDwtWLBAra2tmjp16lCPOaBh/1dw2dnZGjFixGWfYuno6FAwGDSaangYP368brvtNrW0tFiPYuar1wCvj8tNmTJF2dnZKfn6WL16tXbt2qWPPvoo6te3BINBXbhwQZ2dnVH7p+rrYbDzMJCioiJJGlavh2EfoPT0dM2ePVu1tbWRx/r7+1VbW6vi4mLDyeydOXNGra2tysvLsx7FTEFBgYLBYNTrIxwOa9++fdf96+P48eM6ffp0Sr0+nHNavXq1tm/frg8//FAFBQVRz8+ePVujRo2Kej00Nzfr6NGjKfV6uNp5GMjBgwclaXi9Hqw/BfH3ePvtt53f73dbt251f/zjH93KlSvd+PHjXXt7u/VoQ+oHP/iBq6urc21tbe53v/udKykpcdnZ2e7UqVPWoyVUd3e3O3DggDtw4ICT5DZs2OAOHDjg/vznPzvnnPvpT3/qxo8f73bu3OkOHTrkFi1a5AoKCty5c+eMJ4+vK52H7u5u98wzz7iGhgbX1tbmPvjgA/ftb3/b3Xrrre78+fPWo8fNqlWrXCAQcHV1de7kyZOR7ezZs5F9nnjiCTdp0iT34Ycfuv3797vi4mJXXFxsOHX8Xe08tLS0uB/96Edu//79rq2tze3cudNNmTLFzZs3z3jyaEkRIOece/31192kSZNcenq6mzNnjmtsbLQeacg9+OCDLi8vz6Wnp7tvfOMb7sEHH3QtLS3WYyXcRx995CRdti1btsw5d+mj2C+++KLLzc11fr/fLViwwDU3N9sOnQBXOg9nz551CxcudDfddJMbNWqUmzx5sluxYkXK/UfaQP/8ktyWLVsi+5w7d849+eST7sYbb3Rjx451DzzwgDt58qTd0AlwtfNw9OhRN2/ePJeVleX8fr+75ZZb3A9/+EPX1dVlO/jX8OsYAAAmhv17QACA1ESAAAAmCBAAwAQBAgCYIEAAABMECABgggABAEwQIACACQIEADBBgAAAJggQAMAEAQIAmPh/cptn99D2pqoAAAAASUVORK5CYII=",
381 | "text/plain": [
382 | ""
383 | ]
384 | },
385 | "metadata": {},
386 | "output_type": "display_data"
387 | }
388 | ],
389 | "source": [
390 | "PlotImage(X,Y)"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 14,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "name": "stdout",
400 | "output_type": "stream",
401 | "text": [
402 | " training Data shape : (16000, 784) (16000,)\n",
403 | " testing Data shape : (4000, 784) (4000,)\n"
404 | ]
405 | }
406 | ],
407 | "source": [
408 | "from sklearn.model_selection import train_test_split\n",
409 | "Xtrain,Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=4)\n",
410 | "\n",
411 | "print(\" training Data shape : \", Xtrain.shape, Ytrain.shape)\n",
412 | "print(\" testing Data shape : \", Xtest.shape, Ytest.shape)"
413 | ]
414 | },
415 | {
416 | "cell_type": "code",
417 | "execution_count": 15,
418 | "metadata": {},
419 | "outputs": [
420 | {
421 | "name": "stdout",
422 | "output_type": "stream",
423 | "text": [
424 | "CPU times: user 29 ms, sys: 1.71 ms, total: 30.7 ms\n",
425 | "Wall time: 69.9 ms\n"
426 | ]
427 | },
428 | {
429 | "data": {
430 | "text/html": [
431 | "KNeighborsClassifier() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
432 | ],
433 | "text/plain": [
434 | "KNeighborsClassifier()"
435 | ]
436 | },
437 | "execution_count": 15,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "from sklearn.neighbors import KNeighborsClassifier\n",
444 | "\n",
445 | "model = KNeighborsClassifier()\n",
446 | "%time model.fit(Xtrain, Ytrain)"
447 | ]
448 | },
449 | {
450 | "cell_type": "code",
451 | "execution_count": 16,
452 | "metadata": {},
453 | "outputs": [
454 | {
455 | "name": "stdout",
456 | "output_type": "stream",
457 | "text": [
458 | "CPU times: user 6.42 s, sys: 170 ms, total: 6.59 s\n",
459 | "Wall time: 4.69 s\n"
460 | ]
461 | }
462 | ],
463 | "source": [
464 | "%time predictions = model.predict(Xtest)"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 17,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "0.95925"
476 | ]
477 | },
478 | "execution_count": 17,
479 | "metadata": {},
480 | "output_type": "execute_result"
481 | }
482 | ],
483 | "source": [
484 | "\n",
485 | "# model.score() -> regression - R2score\n",
486 | "#----------------> classification - accuracy\n",
487 | "\n",
488 | "model.score(Xtest, Ytest)"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 18,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "name": "stdout",
498 | "output_type": "stream",
499 | "text": [
500 | " K : 3\n",
501 | " Accuracy : 0.9615\n",
502 | " K : 5\n",
503 | " Accuracy : 0.95925\n",
504 | " K : 7\n",
505 | " Accuracy : 0.958\n",
506 | " K : 9\n",
507 | " Accuracy : 0.95575\n"
508 | ]
509 | }
510 | ],
511 | "source": [
512 | "# Selection of K\n",
513 | "\n",
514 | "for k in [3,5,7,9]:\n",
515 | " print(\" K : \", k)\n",
516 | " model = KNeighborsClassifier(n_neighbors=k)\n",
517 | " model.fit(Xtrain, Ytrain)\n",
518 | " print(\" Accuracy : \", model.score(Xtest, Ytest))\n"
519 | ]
520 | },
521 | {
522 | "cell_type": "code",
523 | "execution_count": null,
524 | "metadata": {},
525 | "outputs": [],
526 | "source": []
527 | }
528 | ],
529 | "metadata": {
530 | "kernelspec": {
531 | "display_name": "Python 3.10.6 64-bit",
532 | "language": "python",
533 | "name": "python3"
534 | },
535 | "language_info": {
536 | "codemirror_mode": {
537 | "name": "ipython",
538 | "version": 3
539 | },
540 | "file_extension": ".py",
541 | "mimetype": "text/x-python",
542 | "name": "python",
543 | "nbconvert_exporter": "python",
544 | "pygments_lexer": "ipython3",
545 | "version": "3.10.6"
546 | },
547 | "orig_nbformat": 4,
548 | "vscode": {
549 | "interpreter": {
550 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
551 | }
552 | }
553 | },
554 | "nbformat": 4,
555 | "nbformat_minor": 2
556 | }
557 |
--------------------------------------------------------------------------------
/linear_regression/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/taureanjoe/Machine-Learning/2d095810454989fafec46017a4a6d08631c5b999/linear_regression/.DS_Store
--------------------------------------------------------------------------------
/linear_regression/cal_housing_price_prediciton.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {
7 | "id": "-riwMJJihZJs"
8 | },
9 | "outputs": [],
10 | "source": [
11 | "# Linear Regression - Ordinary Least Squares\n",
12 | "# 1. Line which has least Mean Squared Error ( only 1 which is best fit)\n",
13 | "\n",
14 | "# Y = mx + c\n",
15 | "# price = m*(sq ft) + intercept\n",
16 | "\n",
17 | "# MSE - mean squared error\n",
18 | "# Mean of Squared Differences between Actual and Estimation (Prediction)\n",
19 | "\n",
20 | "# difficult \n",
21 | "# features: sqft, n of bedrooms, ladnmarks, children play area, swmming pool, flooring\n",
22 | "# output - price\n",
23 | "\n",
24 | "# y = mx + c\n",
25 | "# y = m1x1 + m2x2 + m4x4 + m6x6 + c\n",
26 | "\n",
27 | "\n",
28 | "\n",
29 | "\n",
30 | "# -> write detailed formula\n",
31 | "\n",
32 | "\n",
33 | "\n",
34 | "\n",
35 | "# prediction = W.X + b\n",
36 | "\n",
37 | "# foreward propagation\n",
38 | "# take data into model and get prediction\n",
39 | "\n",
40 | "\n",
41 | "# back propagation(learning)\n",
42 | "# calculate loss -> MSE\n",
43 | "# adjust the weights such that MSE goes down\n",
44 | "\n",
45 | "# optmization - gradient descent \n",
46 | "# code\n",
47 | "\n",
48 | "# what are the weights set to?\n",
49 | "# 1. weights are initialized randomly \n",
50 | "# as a part of training/learning, these weights need to be adjusted\n",
51 | "\n",
52 | "# how did we get the best line?\n",
53 | "# 1. adjusted weights.X + adjusted bias => best line\n",
54 | "\n",
55 | "# jeoff hinton\n",
56 | "# yann lecun\n",
57 | "\n",
58 | "\n"
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": 2,
64 | "metadata": {
65 | "id": "aBWG14fxaD5g"
66 | },
67 | "outputs": [],
68 | "source": [
69 | "import pandas as pd\n",
70 | "import numpy as np"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 3,
76 | "metadata": {
77 | "id": "xBTEYsdzaL7o"
78 | },
79 | "outputs": [],
80 | "source": [
81 | "data = pd.read_csv(\"california_housing_train.csv\")"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": []
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": 4,
94 | "metadata": {
95 | "colab": {
96 | "base_uri": "https://localhost:8080/",
97 | "height": 268
98 | },
99 | "id": "EPCiOuBkanla",
100 | "outputId": "ac87dd62-79eb-4978-8225-9ddeb7dfcd4c"
101 | },
102 | "outputs": [
103 | {
104 | "data": {
105 | "text/html": [
106 | "\n",
107 | "\n",
120 | "
\n",
121 | " \n",
122 | " \n",
123 | " \n",
124 | " longitude \n",
125 | " latitude \n",
126 | " housing_median_age \n",
127 | " total_rooms \n",
128 | " total_bedrooms \n",
129 | " population \n",
130 | " households \n",
131 | " median_income \n",
132 | " median_house_value \n",
133 | " \n",
134 | " \n",
135 | " \n",
136 | " \n",
137 | " 0 \n",
138 | " -114.31 \n",
139 | " 34.19 \n",
140 | " 15.0 \n",
141 | " 5612.0 \n",
142 | " 1283.0 \n",
143 | " 1015.0 \n",
144 | " 472.0 \n",
145 | " 1.4936 \n",
146 | " 66900.0 \n",
147 | " \n",
148 | " \n",
149 | " 1 \n",
150 | " -114.47 \n",
151 | " 34.40 \n",
152 | " 19.0 \n",
153 | " 7650.0 \n",
154 | " 1901.0 \n",
155 | " 1129.0 \n",
156 | " 463.0 \n",
157 | " 1.8200 \n",
158 | " 80100.0 \n",
159 | " \n",
160 | " \n",
161 | " 2 \n",
162 | " -114.56 \n",
163 | " 33.69 \n",
164 | " 17.0 \n",
165 | " 720.0 \n",
166 | " 174.0 \n",
167 | " 333.0 \n",
168 | " 117.0 \n",
169 | " 1.6509 \n",
170 | " 85700.0 \n",
171 | " \n",
172 | " \n",
173 | " 3 \n",
174 | " -114.57 \n",
175 | " 33.64 \n",
176 | " 14.0 \n",
177 | " 1501.0 \n",
178 | " 337.0 \n",
179 | " 515.0 \n",
180 | " 226.0 \n",
181 | " 3.1917 \n",
182 | " 73400.0 \n",
183 | " \n",
184 | " \n",
185 | " 4 \n",
186 | " -114.57 \n",
187 | " 33.57 \n",
188 | " 20.0 \n",
189 | " 1454.0 \n",
190 | " 326.0 \n",
191 | " 624.0 \n",
192 | " 262.0 \n",
193 | " 1.9250 \n",
194 | " 65500.0 \n",
195 | " \n",
196 | " \n",
197 | "
\n",
198 | "
"
199 | ],
200 | "text/plain": [
201 | " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
202 | "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
203 | "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
204 | "2 -114.56 33.69 17.0 720.0 174.0 \n",
205 | "3 -114.57 33.64 14.0 1501.0 337.0 \n",
206 | "4 -114.57 33.57 20.0 1454.0 326.0 \n",
207 | "\n",
208 | " population households median_income median_house_value \n",
209 | "0 1015.0 472.0 1.4936 66900.0 \n",
210 | "1 1129.0 463.0 1.8200 80100.0 \n",
211 | "2 333.0 117.0 1.6509 85700.0 \n",
212 | "3 515.0 226.0 3.1917 73400.0 \n",
213 | "4 624.0 262.0 1.9250 65500.0 "
214 | ]
215 | },
216 | "execution_count": 4,
217 | "metadata": {},
218 | "output_type": "execute_result"
219 | }
220 | ],
221 | "source": [
222 | "data.head()"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 5,
228 | "metadata": {
229 | "colab": {
230 | "base_uri": "https://localhost:8080/"
231 | },
232 | "id": "RqrjPopia6uf",
233 | "outputId": "b8609e6b-0222-451c-e2b1-2c09a5eac153"
234 | },
235 | "outputs": [
236 | {
237 | "data": {
238 | "text/plain": [
239 | "longitude 0\n",
240 | "latitude 0\n",
241 | "housing_median_age 0\n",
242 | "total_rooms 0\n",
243 | "total_bedrooms 0\n",
244 | "population 0\n",
245 | "households 0\n",
246 | "median_income 0\n",
247 | "median_house_value 0\n",
248 | "dtype: int64"
249 | ]
250 | },
251 | "execution_count": 5,
252 | "metadata": {},
253 | "output_type": "execute_result"
254 | }
255 | ],
256 | "source": [
257 | "data.isnull().sum()"
258 | ]
259 | },
260 | {
261 | "cell_type": "code",
262 | "execution_count": 6,
263 | "metadata": {
264 | "colab": {
265 | "base_uri": "https://localhost:8080/"
266 | },
267 | "id": "-kKSmydwa-CP",
268 | "outputId": "8c8b675f-f9c6-440f-b9f3-3020020ee68f"
269 | },
270 | "outputs": [
271 | {
272 | "name": "stdout",
273 | "output_type": "stream",
274 | "text": [
275 | "\n",
276 | "RangeIndex: 17000 entries, 0 to 16999\n",
277 | "Data columns (total 9 columns):\n",
278 | " # Column Non-Null Count Dtype \n",
279 | "--- ------ -------------- ----- \n",
280 | " 0 longitude 17000 non-null float64\n",
281 | " 1 latitude 17000 non-null float64\n",
282 | " 2 housing_median_age 17000 non-null float64\n",
283 | " 3 total_rooms 17000 non-null float64\n",
284 | " 4 total_bedrooms 17000 non-null float64\n",
285 | " 5 population 17000 non-null float64\n",
286 | " 6 households 17000 non-null float64\n",
287 | " 7 median_income 17000 non-null float64\n",
288 | " 8 median_house_value 17000 non-null float64\n",
289 | "dtypes: float64(9)\n",
290 | "memory usage: 1.2 MB\n"
291 | ]
292 | }
293 | ],
294 | "source": [
295 | "data.info()"
296 | ]
297 | },
298 | {
299 | "cell_type": "code",
300 | "execution_count": 7,
301 | "metadata": {
302 | "id": "ftCZURcmao1L"
303 | },
304 | "outputs": [],
305 | "source": [
306 | "# scale the data\n",
307 | "from sklearn.preprocessing import MinMaxScaler\n",
308 | "scaler = MinMaxScaler()\n",
309 | "data.iloc[:,:] = scaler.fit_transform(data)"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 8,
315 | "metadata": {
316 | "colab": {
317 | "base_uri": "https://localhost:8080/",
318 | "height": 268
319 | },
320 | "id": "k4vBLS8rez7D",
321 | "outputId": "9baa0559-c7cd-4591-81f7-ae70289b3bb7"
322 | },
323 | "outputs": [
324 | {
325 | "data": {
326 | "text/html": [
327 | "\n",
328 | "\n",
341 | "
\n",
342 | " \n",
343 | " \n",
344 | " \n",
345 | " longitude \n",
346 | " latitude \n",
347 | " housing_median_age \n",
348 | " total_rooms \n",
349 | " total_bedrooms \n",
350 | " population \n",
351 | " households \n",
352 | " median_income \n",
353 | " median_house_value \n",
354 | " \n",
355 | " \n",
356 | " \n",
357 | " \n",
358 | " 0 \n",
359 | " 1.000000 \n",
360 | " 0.175345 \n",
361 | " 0.274510 \n",
362 | " 0.147885 \n",
363 | " 0.198945 \n",
364 | " 0.028364 \n",
365 | " 0.077454 \n",
366 | " 0.068530 \n",
367 | " 0.107012 \n",
368 | " \n",
369 | " \n",
370 | " 1 \n",
371 | " 0.984064 \n",
372 | " 0.197662 \n",
373 | " 0.352941 \n",
374 | " 0.201608 \n",
375 | " 0.294848 \n",
376 | " 0.031559 \n",
377 | " 0.075974 \n",
378 | " 0.091040 \n",
379 | " 0.134228 \n",
380 | " \n",
381 | " \n",
382 | " 2 \n",
383 | " 0.975100 \n",
384 | " 0.122210 \n",
385 | " 0.313725 \n",
386 | " 0.018927 \n",
387 | " 0.026847 \n",
388 | " 0.009249 \n",
389 | " 0.019076 \n",
390 | " 0.079378 \n",
391 | " 0.145775 \n",
392 | " \n",
393 | " \n",
394 | " 3 \n",
395 | " 0.974104 \n",
396 | " 0.116897 \n",
397 | " 0.254902 \n",
398 | " 0.039515 \n",
399 | " 0.052142 \n",
400 | " 0.014350 \n",
401 | " 0.037000 \n",
402 | " 0.185639 \n",
403 | " 0.120414 \n",
404 | " \n",
405 | " \n",
406 | " 4 \n",
407 | " 0.974104 \n",
408 | " 0.109458 \n",
409 | " 0.372549 \n",
410 | " 0.038276 \n",
411 | " 0.050435 \n",
412 | " 0.017405 \n",
413 | " 0.042921 \n",
414 | " 0.098281 \n",
415 | " 0.104125 \n",
416 | " \n",
417 | " \n",
418 | "
\n",
419 | "
"
420 | ],
421 | "text/plain": [
422 | " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
423 | "0 1.000000 0.175345 0.274510 0.147885 0.198945 \n",
424 | "1 0.984064 0.197662 0.352941 0.201608 0.294848 \n",
425 | "2 0.975100 0.122210 0.313725 0.018927 0.026847 \n",
426 | "3 0.974104 0.116897 0.254902 0.039515 0.052142 \n",
427 | "4 0.974104 0.109458 0.372549 0.038276 0.050435 \n",
428 | "\n",
429 | " population households median_income median_house_value \n",
430 | "0 0.028364 0.077454 0.068530 0.107012 \n",
431 | "1 0.031559 0.075974 0.091040 0.134228 \n",
432 | "2 0.009249 0.019076 0.079378 0.145775 \n",
433 | "3 0.014350 0.037000 0.185639 0.120414 \n",
434 | "4 0.017405 0.042921 0.098281 0.104125 "
435 | ]
436 | },
437 | "execution_count": 8,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "data.head()"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 25,
449 | "metadata": {
450 | "id": "895IpB4Ee1XF"
451 | },
452 | "outputs": [],
453 | "source": [
454 | "X = data.iloc[:, :-1].values # features\n",
455 | "Y = data.iloc[:, -1].values # target/output"
456 | ]
457 | },
458 | {
459 | "cell_type": "code",
460 | "execution_count": 10,
461 | "metadata": {
462 | "colab": {
463 | "base_uri": "https://localhost:8080/"
464 | },
465 | "id": "vNQ_VkxEfTqV",
466 | "outputId": "e80fe680-9043-4750-f215-910d0ab1e7de"
467 | },
468 | "outputs": [
469 | {
470 | "data": {
471 | "text/plain": [
472 | "((17000, 8), (17000,))"
473 | ]
474 | },
475 | "execution_count": 10,
476 | "metadata": {},
477 | "output_type": "execute_result"
478 | }
479 | ],
480 | "source": [
481 | "X.shape, Y.shape"
482 | ]
483 | },
484 | {
485 | "cell_type": "code",
486 | "execution_count": 11,
487 | "metadata": {
488 | "id": "cbuUe8V7fUVz"
489 | },
490 | "outputs": [],
491 | "source": [
492 | "from sklearn.model_selection import train_test_split\n",
493 | "Xtrain, Xtest, Ytrain, Ytest = train_test_split(X,Y, test_size = 0.2, shuffle = True, random_state = 4)"
494 | ]
495 | },
496 | {
497 | "cell_type": "code",
498 | "execution_count": 12,
499 | "metadata": {
500 | "colab": {
501 | "base_uri": "https://localhost:8080/"
502 | },
503 | "id": "fp4XNOPrfp1N",
504 | "outputId": "7a5bb691-d470-4941-b668-e1a7ef356eea"
505 | },
506 | "outputs": [
507 | {
508 | "data": {
509 | "text/html": [
510 | "LinearRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
511 | ],
512 | "text/plain": [
513 | "LinearRegression()"
514 | ]
515 | },
516 | "execution_count": 12,
517 | "metadata": {},
518 | "output_type": "execute_result"
519 | }
520 | ],
521 | "source": [
522 | "from sklearn.linear_model import LinearRegression\n",
523 | "\n",
524 | "model = LinearRegression()\n",
525 | "model.fit(Xtrain, Ytrain)\n"
526 | ]
527 | },
528 | {
529 | "cell_type": "code",
530 | "execution_count": 13,
531 | "metadata": {
532 | "id": "Fj_EkhbfgJC6"
533 | },
534 | "outputs": [],
535 | "source": [
536 | "#MSE\n",
537 | "# mean of squared differences between actual and predictions\n",
538 | "\n",
539 | "def MSE(actual, prediction):\n",
540 | " error = actual - prediction\n",
541 | " squared = np.square(error)\n",
542 | " return np.mean(squared)\n"
543 | ]
544 | },
545 | {
546 | "cell_type": "code",
547 | "execution_count": 14,
548 | "metadata": {
549 | "colab": {
550 | "base_uri": "https://localhost:8080/"
551 | },
552 | "id": "1Dfdtn3Rgnt7",
553 | "outputId": "74e024f2-1b36-473d-8714-d568f24e9714"
554 | },
555 | "outputs": [
556 | {
557 | "data": {
558 | "text/plain": [
559 | "0.02000868218950632"
560 | ]
561 | },
562 | "execution_count": 14,
563 | "metadata": {},
564 | "output_type": "execute_result"
565 | }
566 | ],
567 | "source": [
568 | "prediction_testing = model.predict(Xtest)\n",
569 | "\n",
570 | "MSE(Ytest, prediction_testing)"
571 | ]
572 | },
573 | {
574 | "cell_type": "code",
575 | "execution_count": 15,
576 | "metadata": {
577 | "colab": {
578 | "base_uri": "https://localhost:8080/"
579 | },
580 | "id": "xDSQa_xdg3zl",
581 | "outputId": "08890285-e196-427b-a026-cb9c8fee3c8d"
582 | },
583 | "outputs": [
584 | {
585 | "data": {
586 | "text/plain": [
587 | "array([-0.91447718, -0.84873666, 0.11771612, -0.66327286, 1.57499784,\n",
588 | " -2.68089479, 0.50641965, 1.20543441])"
589 | ]
590 | },
591 | "execution_count": 15,
592 | "metadata": {},
593 | "output_type": "execute_result"
594 | }
595 | ],
596 | "source": [
597 | "model.coef_"
598 | ]
599 | },
600 | {
601 | "cell_type": "code",
602 | "execution_count": 16,
603 | "metadata": {
604 | "id": "2A9HyOaolna2"
605 | },
606 | "outputs": [],
607 | "source": [
608 | "# rule\n",
609 | "# a.b => columns of a should be equal to rows of b\n",
610 | "\n",
611 | "def prediction(x, weights, b):\n",
612 | " return x.dot(weights) + b"
613 | ]
614 | },
615 | {
616 | "cell_type": "code",
617 | "execution_count": 17,
618 | "metadata": {
619 | "colab": {
620 | "base_uri": "https://localhost:8080/"
621 | },
622 | "id": "aIEoQhYKoKdp",
623 | "outputId": "ce369dbb-e8f3-4a43-bbc1-5bc76afe827a"
624 | },
625 | "outputs": [
626 | {
627 | "data": {
628 | "text/plain": [
629 | "(3400, 8)"
630 | ]
631 | },
632 | "execution_count": 17,
633 | "metadata": {},
634 | "output_type": "execute_result"
635 | }
636 | ],
637 | "source": [
638 | "Xtest.shape"
639 | ]
640 | },
641 | {
642 | "cell_type": "code",
643 | "execution_count": 18,
644 | "metadata": {
645 | "colab": {
646 | "base_uri": "https://localhost:8080/"
647 | },
648 | "id": "8NNVeU0ioL8a",
649 | "outputId": "cfa17670-e62c-46df-9811-e6b223a3ca0e"
650 | },
651 | "outputs": [
652 | {
653 | "data": {
654 | "text/plain": [
655 | "array([0.28914931, 0.44840173, 0.55358487, ..., 0.20538748, 0.52725745,\n",
656 | " 0.75157992])"
657 | ]
658 | },
659 | "execution_count": 18,
660 | "metadata": {},
661 | "output_type": "execute_result"
662 | }
663 | ],
664 | "source": [
665 | "prediction(Xtest, model.coef_, model.intercept_)"
666 | ]
667 | },
668 | {
669 | "cell_type": "code",
670 | "execution_count": 19,
671 | "metadata": {
672 | "colab": {
673 | "base_uri": "https://localhost:8080/"
674 | },
675 | "id": "zucBSZthoQuS",
676 | "outputId": "9e12769a-91e2-4e7b-be46-e9e2750900b9"
677 | },
678 | "outputs": [
679 | {
680 | "data": {
681 | "text/plain": [
682 | "array([0.28914931, 0.44840173, 0.55358487, ..., 0.20538748, 0.52725745,\n",
683 | " 0.75157992])"
684 | ]
685 | },
686 | "execution_count": 19,
687 | "metadata": {},
688 | "output_type": "execute_result"
689 | }
690 | ],
691 | "source": [
692 | "model.predict(Xtest)"
693 | ]
694 | },
695 | {
696 | "cell_type": "code",
697 | "execution_count": 20,
698 | "metadata": {
699 | "id": "USZmnhqboS_g"
700 | },
701 | "outputs": [],
702 | "source": [
703 | "# proves that our function give exact same result as that of sklearn algo"
704 | ]
705 | },
706 | {
707 | "cell_type": "code",
708 | "execution_count": 21,
709 | "metadata": {
710 | "id": "wp3eY83voZoL"
711 | },
712 | "outputs": [],
713 | "source": [
714 | "# Lasso and Ridge Regression\n",
715 | "\n"
716 | ]
717 | },
718 | {
719 | "cell_type": "code",
720 | "execution_count": 22,
721 | "metadata": {},
722 | "outputs": [
723 | {
724 | "data": {
725 | "text/html": [
726 | "Lasso() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
727 | ],
728 | "text/plain": [
729 | "Lasso()"
730 | ]
731 | },
732 | "execution_count": 22,
733 | "metadata": {},
734 | "output_type": "execute_result"
735 | }
736 | ],
737 | "source": [
738 | "from sklearn.linear_model import Lasso, Ridge\n",
739 | "\n",
740 | "model = Lasso()\n",
741 | "model.fit(Xtrain,Ytrain)\n",
742 | "\n",
743 | "Lasso()"
744 | ]
745 | }
746 | ],
747 | "metadata": {
748 | "colab": {
749 | "provenance": []
750 | },
751 | "kernelspec": {
752 | "display_name": "Python 3.10.6 64-bit",
753 | "language": "python",
754 | "name": "python3"
755 | },
756 | "language_info": {
757 | "codemirror_mode": {
758 | "name": "ipython",
759 | "version": 3
760 | },
761 | "file_extension": ".py",
762 | "mimetype": "text/x-python",
763 | "name": "python",
764 | "nbconvert_exporter": "python",
765 | "pygments_lexer": "ipython3",
766 | "version": "3.10.6"
767 | },
768 | "vscode": {
769 | "interpreter": {
770 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
771 | }
772 | }
773 | },
774 | "nbformat": 4,
775 | "nbformat_minor": 0
776 | }
777 |
--------------------------------------------------------------------------------
/linear_regression/price_prediction.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/html": [
21 | "\n",
22 | "\n",
35 | "
\n",
36 | " \n",
37 | " \n",
38 | " \n",
39 | " longitude \n",
40 | " latitude \n",
41 | " housing_median_age \n",
42 | " total_rooms \n",
43 | " total_bedrooms \n",
44 | " population \n",
45 | " households \n",
46 | " median_income \n",
47 | " median_house_value \n",
48 | " \n",
49 | " \n",
50 | " \n",
51 | " \n",
52 | " 0 \n",
53 | " -114.31 \n",
54 | " 34.19 \n",
55 | " 15.0 \n",
56 | " 5612.0 \n",
57 | " 1283.0 \n",
58 | " 1015.0 \n",
59 | " 472.0 \n",
60 | " 1.4936 \n",
61 | " 66900.0 \n",
62 | " \n",
63 | " \n",
64 | " 1 \n",
65 | " -114.47 \n",
66 | " 34.40 \n",
67 | " 19.0 \n",
68 | " 7650.0 \n",
69 | " 1901.0 \n",
70 | " 1129.0 \n",
71 | " 463.0 \n",
72 | " 1.8200 \n",
73 | " 80100.0 \n",
74 | " \n",
75 | " \n",
76 | " 2 \n",
77 | " -114.56 \n",
78 | " 33.69 \n",
79 | " 17.0 \n",
80 | " 720.0 \n",
81 | " 174.0 \n",
82 | " 333.0 \n",
83 | " 117.0 \n",
84 | " 1.6509 \n",
85 | " 85700.0 \n",
86 | " \n",
87 | " \n",
88 | " 3 \n",
89 | " -114.57 \n",
90 | " 33.64 \n",
91 | " 14.0 \n",
92 | " 1501.0 \n",
93 | " 337.0 \n",
94 | " 515.0 \n",
95 | " 226.0 \n",
96 | " 3.1917 \n",
97 | " 73400.0 \n",
98 | " \n",
99 | " \n",
100 | " 4 \n",
101 | " -114.57 \n",
102 | " 33.57 \n",
103 | " 20.0 \n",
104 | " 1454.0 \n",
105 | " 326.0 \n",
106 | " 624.0 \n",
107 | " 262.0 \n",
108 | " 1.9250 \n",
109 | " 65500.0 \n",
110 | " \n",
111 | " \n",
112 | "
\n",
113 | "
"
114 | ],
115 | "text/plain": [
116 | " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
117 | "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
118 | "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
119 | "2 -114.56 33.69 17.0 720.0 174.0 \n",
120 | "3 -114.57 33.64 14.0 1501.0 337.0 \n",
121 | "4 -114.57 33.57 20.0 1454.0 326.0 \n",
122 | "\n",
123 | " population households median_income median_house_value \n",
124 | "0 1015.0 472.0 1.4936 66900.0 \n",
125 | "1 1129.0 463.0 1.8200 80100.0 \n",
126 | "2 333.0 117.0 1.6509 85700.0 \n",
127 | "3 515.0 226.0 3.1917 73400.0 \n",
128 | "4 624.0 262.0 1.9250 65500.0 "
129 | ]
130 | },
131 | "execution_count": 2,
132 | "metadata": {},
133 | "output_type": "execute_result"
134 | }
135 | ],
136 | "source": [
137 | "data = pd.read_csv(\"california_housing_train.csv\")\n",
138 | "data.head()"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 3,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "data": {
148 | "text/plain": [
149 | "longitude 0\n",
150 | "latitude 0\n",
151 | "housing_median_age 0\n",
152 | "total_rooms 0\n",
153 | "total_bedrooms 0\n",
154 | "population 0\n",
155 | "households 0\n",
156 | "median_income 0\n",
157 | "median_house_value 0\n",
158 | "dtype: int64"
159 | ]
160 | },
161 | "execution_count": 3,
162 | "metadata": {},
163 | "output_type": "execute_result"
164 | }
165 | ],
166 | "source": [
167 | "data.isnull().sum()"
168 | ]
169 | },
170 | {
171 | "cell_type": "code",
172 | "execution_count": 4,
173 | "metadata": {},
174 | "outputs": [
175 | {
176 | "name": "stdout",
177 | "output_type": "stream",
178 | "text": [
179 | "\n",
180 | "RangeIndex: 17000 entries, 0 to 16999\n",
181 | "Data columns (total 9 columns):\n",
182 | " # Column Non-Null Count Dtype \n",
183 | "--- ------ -------------- ----- \n",
184 | " 0 longitude 17000 non-null float64\n",
185 | " 1 latitude 17000 non-null float64\n",
186 | " 2 housing_median_age 17000 non-null float64\n",
187 | " 3 total_rooms 17000 non-null float64\n",
188 | " 4 total_bedrooms 17000 non-null float64\n",
189 | " 5 population 17000 non-null float64\n",
190 | " 6 households 17000 non-null float64\n",
191 | " 7 median_income 17000 non-null float64\n",
192 | " 8 median_house_value 17000 non-null float64\n",
193 | "dtypes: float64(9)\n",
194 | "memory usage: 1.2 MB\n"
195 | ]
196 | }
197 | ],
198 | "source": [
199 | "data.info()"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 5,
205 | "metadata": {},
206 | "outputs": [],
207 | "source": [
208 | "# scale the data\n",
209 | "\n",
210 | "from sklearn.preprocessing import MinMaxScaler\n",
211 | "scaler = MinMaxScaler()\n",
212 | "data.iloc[:,:] = scaler.fit_transform(data)"
213 | ]
214 | },
215 | {
216 | "cell_type": "code",
217 | "execution_count": 6,
218 | "metadata": {},
219 | "outputs": [
220 | {
221 | "data": {
222 | "text/html": [
223 | "\n",
224 | "\n",
237 | "
\n",
238 | " \n",
239 | " \n",
240 | " \n",
241 | " longitude \n",
242 | " latitude \n",
243 | " housing_median_age \n",
244 | " total_rooms \n",
245 | " total_bedrooms \n",
246 | " population \n",
247 | " households \n",
248 | " median_income \n",
249 | " median_house_value \n",
250 | " \n",
251 | " \n",
252 | " \n",
253 | " \n",
254 | " 0 \n",
255 | " 1.000000 \n",
256 | " 0.175345 \n",
257 | " 0.274510 \n",
258 | " 0.147885 \n",
259 | " 0.198945 \n",
260 | " 0.028364 \n",
261 | " 0.077454 \n",
262 | " 0.068530 \n",
263 | " 0.107012 \n",
264 | " \n",
265 | " \n",
266 | " 1 \n",
267 | " 0.984064 \n",
268 | " 0.197662 \n",
269 | " 0.352941 \n",
270 | " 0.201608 \n",
271 | " 0.294848 \n",
272 | " 0.031559 \n",
273 | " 0.075974 \n",
274 | " 0.091040 \n",
275 | " 0.134228 \n",
276 | " \n",
277 | " \n",
278 | " 2 \n",
279 | " 0.975100 \n",
280 | " 0.122210 \n",
281 | " 0.313725 \n",
282 | " 0.018927 \n",
283 | " 0.026847 \n",
284 | " 0.009249 \n",
285 | " 0.019076 \n",
286 | " 0.079378 \n",
287 | " 0.145775 \n",
288 | " \n",
289 | " \n",
290 | " 3 \n",
291 | " 0.974104 \n",
292 | " 0.116897 \n",
293 | " 0.254902 \n",
294 | " 0.039515 \n",
295 | " 0.052142 \n",
296 | " 0.014350 \n",
297 | " 0.037000 \n",
298 | " 0.185639 \n",
299 | " 0.120414 \n",
300 | " \n",
301 | " \n",
302 | " 4 \n",
303 | " 0.974104 \n",
304 | " 0.109458 \n",
305 | " 0.372549 \n",
306 | " 0.038276 \n",
307 | " 0.050435 \n",
308 | " 0.017405 \n",
309 | " 0.042921 \n",
310 | " 0.098281 \n",
311 | " 0.104125 \n",
312 | " \n",
313 | " \n",
314 | "
\n",
315 | "
"
316 | ],
317 | "text/plain": [
318 | " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
319 | "0 1.000000 0.175345 0.274510 0.147885 0.198945 \n",
320 | "1 0.984064 0.197662 0.352941 0.201608 0.294848 \n",
321 | "2 0.975100 0.122210 0.313725 0.018927 0.026847 \n",
322 | "3 0.974104 0.116897 0.254902 0.039515 0.052142 \n",
323 | "4 0.974104 0.109458 0.372549 0.038276 0.050435 \n",
324 | "\n",
325 | " population households median_income median_house_value \n",
326 | "0 0.028364 0.077454 0.068530 0.107012 \n",
327 | "1 0.031559 0.075974 0.091040 0.134228 \n",
328 | "2 0.009249 0.019076 0.079378 0.145775 \n",
329 | "3 0.014350 0.037000 0.185639 0.120414 \n",
330 | "4 0.017405 0.042921 0.098281 0.104125 "
331 | ]
332 | },
333 | "execution_count": 6,
334 | "metadata": {},
335 | "output_type": "execute_result"
336 | }
337 | ],
338 | "source": [
339 | "data.head()"
340 | ]
341 | },
342 | {
343 | "cell_type": "code",
344 | "execution_count": 7,
345 | "metadata": {},
346 | "outputs": [],
347 | "source": [
348 | "X = data.iloc[:,:-1].values #features\n",
349 | "Y = data.iloc[:,-1].values #target/output"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 8,
355 | "metadata": {},
356 | "outputs": [
357 | {
358 | "data": {
359 | "text/plain": [
360 | "((17000, 8), (17000,))"
361 | ]
362 | },
363 | "execution_count": 8,
364 | "metadata": {},
365 | "output_type": "execute_result"
366 | }
367 | ],
368 | "source": [
369 | "X.shape, Y.shape"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 10,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "from sklearn.model_selection import train_test_split\n",
379 | "\n",
380 | "Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size = 0.2, shuffle=True, random_state=4)"
381 | ]
382 | },
383 | {
384 | "cell_type": "code",
385 | "execution_count": 11,
386 | "metadata": {},
387 | "outputs": [
388 | {
389 | "data": {
390 | "text/html": [
391 | "LinearRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
392 | ],
393 | "text/plain": [
394 | "LinearRegression()"
395 | ]
396 | },
397 | "execution_count": 11,
398 | "metadata": {},
399 | "output_type": "execute_result"
400 | }
401 | ],
402 | "source": [
403 | "from sklearn.linear_model import LinearRegression\n",
404 | "\n",
405 | "model = LinearRegression()\n",
406 | "model.fit(Xtrain, Ytrain)"
407 | ]
408 | },
409 | {
410 | "cell_type": "code",
411 | "execution_count": 12,
412 | "metadata": {},
413 | "outputs": [],
414 | "source": [
415 | "# MSE\n",
416 | "# mean of squared differences between actual and predictions\n",
417 | "\n",
418 | "def MSE(actual, prediction):\n",
419 | " error = actual - prediction\n",
420 | " squared = np.square(error)\n",
421 | " return np.mean(squared)"
422 | ]
423 | },
424 | {
425 | "cell_type": "code",
426 | "execution_count": 13,
427 | "metadata": {},
428 | "outputs": [
429 | {
430 | "data": {
431 | "text/plain": [
432 | "0.02000868218950632"
433 | ]
434 | },
435 | "execution_count": 13,
436 | "metadata": {},
437 | "output_type": "execute_result"
438 | }
439 | ],
440 | "source": [
441 | "prediction_testing = model.predict(Xtest)\n",
442 | "\n",
443 | "MSE(Ytest, prediction_testing)"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 14,
449 | "metadata": {},
450 | "outputs": [
451 | {
452 | "data": {
453 | "text/plain": [
454 | "array([-0.91447718, -0.84873666, 0.11771612, -0.66327286, 1.57499784,\n",
455 | " -2.68089479, 0.50641965, 1.20543441])"
456 | ]
457 | },
458 | "execution_count": 14,
459 | "metadata": {},
460 | "output_type": "execute_result"
461 | }
462 | ],
463 | "source": [
464 | "model.coef_"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 15,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "0.7461423305403194"
476 | ]
477 | },
478 | "execution_count": 15,
479 | "metadata": {},
480 | "output_type": "execute_result"
481 | }
482 | ],
483 | "source": [
484 | "model.intercept_"
485 | ]
486 | },
487 | {
488 | "cell_type": "code",
489 | "execution_count": 16,
490 | "metadata": {},
491 | "outputs": [],
492 | "source": [
493 | "def prediction(x, weights, b):\n",
494 | " return x.dot(weights) + b"
495 | ]
496 | },
497 | {
498 | "cell_type": "code",
499 | "execution_count": 17,
500 | "metadata": {},
501 | "outputs": [
502 | {
503 | "data": {
504 | "text/plain": [
505 | "array([0.28914931, 0.44840173, 0.55358487, ..., 0.20538748, 0.52725745,\n",
506 | " 0.75157992])"
507 | ]
508 | },
509 | "execution_count": 17,
510 | "metadata": {},
511 | "output_type": "execute_result"
512 | }
513 | ],
514 | "source": [
515 | "prediction(Xtest, model.coef_, model.intercept_)"
516 | ]
517 | },
518 | {
519 | "cell_type": "code",
520 | "execution_count": 18,
521 | "metadata": {},
522 | "outputs": [
523 | {
524 | "data": {
525 | "text/plain": [
526 | "array([0.28914931, 0.44840173, 0.55358487, ..., 0.20538748, 0.52725745,\n",
527 | " 0.75157992])"
528 | ]
529 | },
530 | "execution_count": 18,
531 | "metadata": {},
532 | "output_type": "execute_result"
533 | }
534 | ],
535 | "source": [
536 | "prediction_testing"
537 | ]
538 | },
539 | {
540 | "cell_type": "code",
541 | "execution_count": null,
542 | "metadata": {},
543 | "outputs": [],
544 | "source": []
545 | }
546 | ],
547 | "metadata": {
548 | "kernelspec": {
549 | "display_name": "Python 3.10.6 64-bit",
550 | "language": "python",
551 | "name": "python3"
552 | },
553 | "language_info": {
554 | "codemirror_mode": {
555 | "name": "ipython",
556 | "version": 3
557 | },
558 | "file_extension": ".py",
559 | "mimetype": "text/x-python",
560 | "name": "python",
561 | "nbconvert_exporter": "python",
562 | "pygments_lexer": "ipython3",
563 | "version": "3.10.6"
564 | },
565 | "orig_nbformat": 4,
566 | "vscode": {
567 | "interpreter": {
568 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
569 | }
570 | }
571 | },
572 | "nbformat": 4,
573 | "nbformat_minor": 2
574 | }
575 |
--------------------------------------------------------------------------------
/linear_regression/regression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 26,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 27,
16 | "metadata": {},
17 | "outputs": [
18 | {
19 | "data": {
20 | "text/html": [
21 | "\n",
22 | "\n",
35 | "
\n",
36 | " \n",
37 | " \n",
38 | " \n",
39 | " longitude \n",
40 | " latitude \n",
41 | " housing_median_age \n",
42 | " total_rooms \n",
43 | " total_bedrooms \n",
44 | " population \n",
45 | " households \n",
46 | " median_income \n",
47 | " median_house_value \n",
48 | " \n",
49 | " \n",
50 | " \n",
51 | " \n",
52 | " 0 \n",
53 | " -114.31 \n",
54 | " 34.19 \n",
55 | " 15.0 \n",
56 | " 5612.0 \n",
57 | " 1283.0 \n",
58 | " 1015.0 \n",
59 | " 472.0 \n",
60 | " 1.4936 \n",
61 | " 66900.0 \n",
62 | " \n",
63 | " \n",
64 | " 1 \n",
65 | " -114.47 \n",
66 | " 34.40 \n",
67 | " 19.0 \n",
68 | " 7650.0 \n",
69 | " 1901.0 \n",
70 | " 1129.0 \n",
71 | " 463.0 \n",
72 | " 1.8200 \n",
73 | " 80100.0 \n",
74 | " \n",
75 | " \n",
76 | " 2 \n",
77 | " -114.56 \n",
78 | " 33.69 \n",
79 | " 17.0 \n",
80 | " 720.0 \n",
81 | " 174.0 \n",
82 | " 333.0 \n",
83 | " 117.0 \n",
84 | " 1.6509 \n",
85 | " 85700.0 \n",
86 | " \n",
87 | " \n",
88 | " 3 \n",
89 | " -114.57 \n",
90 | " 33.64 \n",
91 | " 14.0 \n",
92 | " 1501.0 \n",
93 | " 337.0 \n",
94 | " 515.0 \n",
95 | " 226.0 \n",
96 | " 3.1917 \n",
97 | " 73400.0 \n",
98 | " \n",
99 | " \n",
100 | " 4 \n",
101 | " -114.57 \n",
102 | " 33.57 \n",
103 | " 20.0 \n",
104 | " 1454.0 \n",
105 | " 326.0 \n",
106 | " 624.0 \n",
107 | " 262.0 \n",
108 | " 1.9250 \n",
109 | " 65500.0 \n",
110 | " \n",
111 | " \n",
112 | "
\n",
113 | "
"
114 | ],
115 | "text/plain": [
116 | " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
117 | "0 -114.31 34.19 15.0 5612.0 1283.0 \n",
118 | "1 -114.47 34.40 19.0 7650.0 1901.0 \n",
119 | "2 -114.56 33.69 17.0 720.0 174.0 \n",
120 | "3 -114.57 33.64 14.0 1501.0 337.0 \n",
121 | "4 -114.57 33.57 20.0 1454.0 326.0 \n",
122 | "\n",
123 | " population households median_income median_house_value \n",
124 | "0 1015.0 472.0 1.4936 66900.0 \n",
125 | "1 1129.0 463.0 1.8200 80100.0 \n",
126 | "2 333.0 117.0 1.6509 85700.0 \n",
127 | "3 515.0 226.0 3.1917 73400.0 \n",
128 | "4 624.0 262.0 1.9250 65500.0 "
129 | ]
130 | },
131 | "execution_count": 27,
132 | "metadata": {},
133 | "output_type": "execute_result"
134 | }
135 | ],
136 | "source": [
137 | "data = pd.read_csv(\"california_housing_train.csv\")\n",
138 | "data.head()"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 28,
144 | "metadata": {},
145 | "outputs": [
146 | {
147 | "name": "stdout",
148 | "output_type": "stream",
149 | "text": [
150 | "\n",
151 | "RangeIndex: 17000 entries, 0 to 16999\n",
152 | "Data columns (total 9 columns):\n",
153 | " # Column Non-Null Count Dtype \n",
154 | "--- ------ -------------- ----- \n",
155 | " 0 longitude 17000 non-null float64\n",
156 | " 1 latitude 17000 non-null float64\n",
157 | " 2 housing_median_age 17000 non-null float64\n",
158 | " 3 total_rooms 17000 non-null float64\n",
159 | " 4 total_bedrooms 17000 non-null float64\n",
160 | " 5 population 17000 non-null float64\n",
161 | " 6 households 17000 non-null float64\n",
162 | " 7 median_income 17000 non-null float64\n",
163 | " 8 median_house_value 17000 non-null float64\n",
164 | "dtypes: float64(9)\n",
165 | "memory usage: 1.2 MB\n"
166 | ]
167 | }
168 | ],
169 | "source": [
170 | "data.info()"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 29,
176 | "metadata": {},
177 | "outputs": [
178 | {
179 | "data": {
180 | "text/plain": [
181 | "False"
182 | ]
183 | },
184 | "execution_count": 29,
185 | "metadata": {},
186 | "output_type": "execute_result"
187 | }
188 | ],
189 | "source": [
190 | "data.isnull().sum().any()"
191 | ]
192 | },
193 | {
194 | "cell_type": "code",
195 | "execution_count": 30,
196 | "metadata": {},
197 | "outputs": [
198 | {
199 | "data": {
200 | "text/html": [
201 | "\n",
202 | "\n",
215 | "
\n",
216 | " \n",
217 | " \n",
218 | " \n",
219 | " longitude \n",
220 | " latitude \n",
221 | " housing_median_age \n",
222 | " total_rooms \n",
223 | " total_bedrooms \n",
224 | " population \n",
225 | " households \n",
226 | " median_income \n",
227 | " median_house_value \n",
228 | " \n",
229 | " \n",
230 | " \n",
231 | " \n",
232 | " 0 \n",
233 | " 1.000000 \n",
234 | " 0.175345 \n",
235 | " 0.274510 \n",
236 | " 0.147885 \n",
237 | " 0.198945 \n",
238 | " 0.028364 \n",
239 | " 0.077454 \n",
240 | " 0.068530 \n",
241 | " 0.107012 \n",
242 | " \n",
243 | " \n",
244 | " 1 \n",
245 | " 0.984064 \n",
246 | " 0.197662 \n",
247 | " 0.352941 \n",
248 | " 0.201608 \n",
249 | " 0.294848 \n",
250 | " 0.031559 \n",
251 | " 0.075974 \n",
252 | " 0.091040 \n",
253 | " 0.134228 \n",
254 | " \n",
255 | " \n",
256 | " 2 \n",
257 | " 0.975100 \n",
258 | " 0.122210 \n",
259 | " 0.313725 \n",
260 | " 0.018927 \n",
261 | " 0.026847 \n",
262 | " 0.009249 \n",
263 | " 0.019076 \n",
264 | " 0.079378 \n",
265 | " 0.145775 \n",
266 | " \n",
267 | " \n",
268 | " 3 \n",
269 | " 0.974104 \n",
270 | " 0.116897 \n",
271 | " 0.254902 \n",
272 | " 0.039515 \n",
273 | " 0.052142 \n",
274 | " 0.014350 \n",
275 | " 0.037000 \n",
276 | " 0.185639 \n",
277 | " 0.120414 \n",
278 | " \n",
279 | " \n",
280 | " 4 \n",
281 | " 0.974104 \n",
282 | " 0.109458 \n",
283 | " 0.372549 \n",
284 | " 0.038276 \n",
285 | " 0.050435 \n",
286 | " 0.017405 \n",
287 | " 0.042921 \n",
288 | " 0.098281 \n",
289 | " 0.104125 \n",
290 | " \n",
291 | " \n",
292 | "
\n",
293 | "
"
294 | ],
295 | "text/plain": [
296 | " longitude latitude housing_median_age total_rooms total_bedrooms \\\n",
297 | "0 1.000000 0.175345 0.274510 0.147885 0.198945 \n",
298 | "1 0.984064 0.197662 0.352941 0.201608 0.294848 \n",
299 | "2 0.975100 0.122210 0.313725 0.018927 0.026847 \n",
300 | "3 0.974104 0.116897 0.254902 0.039515 0.052142 \n",
301 | "4 0.974104 0.109458 0.372549 0.038276 0.050435 \n",
302 | "\n",
303 | " population households median_income median_house_value \n",
304 | "0 0.028364 0.077454 0.068530 0.107012 \n",
305 | "1 0.031559 0.075974 0.091040 0.134228 \n",
306 | "2 0.009249 0.019076 0.079378 0.145775 \n",
307 | "3 0.014350 0.037000 0.185639 0.120414 \n",
308 | "4 0.017405 0.042921 0.098281 0.104125 "
309 | ]
310 | },
311 | "execution_count": 30,
312 | "metadata": {},
313 | "output_type": "execute_result"
314 | }
315 | ],
316 | "source": [
317 | "from sklearn.preprocessing import MinMaxScaler\n",
318 | "\n",
319 | "scaler = MinMaxScaler()\n",
320 | "data.iloc[:,:] = scaler.fit_transform(data)\n",
321 | "\n",
322 | "data.head()"
323 | ]
324 | },
325 | {
326 | "cell_type": "code",
327 | "execution_count": 31,
328 | "metadata": {},
329 | "outputs": [],
330 | "source": [
331 | "X = data.iloc[:, :-1].values\n",
332 | "\n",
333 | "Y = data.iloc[:, -1].values"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 32,
339 | "metadata": {},
340 | "outputs": [],
341 | "source": [
342 | "from sklearn.model_selection import train_test_split\n",
343 | "Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.2, shuffle=True, random_state=4)"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 33,
349 | "metadata": {},
350 | "outputs": [
351 | {
352 | "data": {
353 | "text/html": [
354 | "LinearRegression() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
355 | ],
356 | "text/plain": [
357 | "LinearRegression()"
358 | ]
359 | },
360 | "execution_count": 33,
361 | "metadata": {},
362 | "output_type": "execute_result"
363 | }
364 | ],
365 | "source": [
366 | "from sklearn.linear_model import LinearRegression\n",
367 | "\n",
368 | "model = LinearRegression()\n",
369 | "model.fit(Xtrain, Ytrain)"
370 | ]
371 | },
372 | {
373 | "cell_type": "code",
374 | "execution_count": 34,
375 | "metadata": {},
376 | "outputs": [],
377 | "source": [
378 | "prediction_testing = model.predict(Xtest)"
379 | ]
380 | },
381 | {
382 | "cell_type": "code",
383 | "execution_count": 35,
384 | "metadata": {},
385 | "outputs": [],
386 | "source": [
387 | "def MSE(actual, prediction):\n",
388 | " error = actual - prediction\n",
389 | " squared = np.square(error)\n",
390 | " return np.mean(squared)"
391 | ]
392 | },
393 | {
394 | "cell_type": "code",
395 | "execution_count": 36,
396 | "metadata": {},
397 | "outputs": [
398 | {
399 | "data": {
400 | "text/plain": [
401 | "0.02000868218950632"
402 | ]
403 | },
404 | "execution_count": 36,
405 | "metadata": {},
406 | "output_type": "execute_result"
407 | }
408 | ],
409 | "source": [
410 | "MSE(Ytest, prediction_testing)"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 37,
416 | "metadata": {},
417 | "outputs": [],
418 | "source": [
419 | "def RMSE(y_actual, y_prediction):\n",
420 | " return np.sqrt(MSE(y_actual, y_prediction))\n",
421 | "\n",
422 | "def MAE(actual, prediction):\n",
423 | " error = actual - prediction\n",
424 | " absolute = np.abs(error)\n",
425 | " return np.mean(absolute)\n",
426 | "\n",
427 | "def R2Score(y_actual, y_predicted):\n",
428 | "\t\t\tmean = np.mean(y_actual)\n",
429 | "\t\t\tr2score = 1- np.mean(np.square(y_actual - y_predicted))/np.mean(np.square(y_actual - mean))\n",
430 | "\t\t\treturn r2score"
431 | ]
432 | },
433 | {
434 | "cell_type": "code",
435 | "execution_count": 38,
436 | "metadata": {},
437 | "outputs": [
438 | {
439 | "name": "stdout",
440 | "output_type": "stream",
441 | "text": [
442 | " MSE : 0.02000868218950632\n",
443 | " RMSE : 0.14145204908203457\n",
444 | " MAE : 0.10454649510468285\n",
445 | " R2Score : 0.6463260859038824\n"
446 | ]
447 | }
448 | ],
449 | "source": [
450 | "print(\" MSE : \",MSE(Ytest, prediction_testing))\n",
451 | "print(\" RMSE : \",RMSE(Ytest, prediction_testing))\n",
452 | "print(\" MAE : \",MAE(Ytest, prediction_testing))\n",
453 | "print(\" R2Score : \",R2Score(Ytest, prediction_testing))\n",
454 | "\n"
455 | ]
456 | },
457 | {
458 | "cell_type": "code",
459 | "execution_count": 39,
460 | "metadata": {},
461 | "outputs": [
462 | {
463 | "data": {
464 | "text/plain": [
465 | "array([-0.91447718, -0.84873666, 0.11771612, -0.66327286, 1.57499784,\n",
466 | " -2.68089479, 0.50641965, 1.20543441])"
467 | ]
468 | },
469 | "execution_count": 39,
470 | "metadata": {},
471 | "output_type": "execute_result"
472 | }
473 | ],
474 | "source": [
475 | "model.coef_"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": 40,
481 | "metadata": {},
482 | "outputs": [
483 | {
484 | "data": {
485 | "text/plain": [
486 | "0.7461423305403194"
487 | ]
488 | },
489 | "execution_count": 40,
490 | "metadata": {},
491 | "output_type": "execute_result"
492 | }
493 | ],
494 | "source": [
495 | "model.intercept_"
496 | ]
497 | },
498 | {
499 | "cell_type": "code",
500 | "execution_count": 41,
501 | "metadata": {},
502 | "outputs": [],
503 | "source": [
504 | "def prediction(x, weights, b):\n",
505 | " return x.dot(weights) + b"
506 | ]
507 | },
508 | {
509 | "cell_type": "code",
510 | "execution_count": 42,
511 | "metadata": {},
512 | "outputs": [
513 | {
514 | "data": {
515 | "text/plain": [
516 | "array([0.28914931, 0.44840173, 0.55358487, ..., 0.20538748, 0.52725745,\n",
517 | " 0.75157992])"
518 | ]
519 | },
520 | "execution_count": 42,
521 | "metadata": {},
522 | "output_type": "execute_result"
523 | }
524 | ],
525 | "source": [
526 | "prediction(Xtest, model.coef_, model.intercept_)"
527 | ]
528 | },
529 | {
530 | "cell_type": "code",
531 | "execution_count": 43,
532 | "metadata": {},
533 | "outputs": [
534 | {
535 | "data": {
536 | "text/plain": [
537 | "array([0.28914931, 0.44840173, 0.55358487, ..., 0.20538748, 0.52725745,\n",
538 | " 0.75157992])"
539 | ]
540 | },
541 | "execution_count": 43,
542 | "metadata": {},
543 | "output_type": "execute_result"
544 | }
545 | ],
546 | "source": [
547 | "prediction_testing"
548 | ]
549 | },
550 | {
551 | "cell_type": "code",
552 | "execution_count": 44,
553 | "metadata": {},
554 | "outputs": [
555 | {
556 | "data": {
557 | "text/html": [
558 | "Lasso() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
559 | ],
560 | "text/plain": [
561 | "Lasso()"
562 | ]
563 | },
564 | "execution_count": 44,
565 | "metadata": {},
566 | "output_type": "execute_result"
567 | }
568 | ],
569 | "source": [
570 | "# lasso and ridge regression\n",
571 | "\n",
572 | "from sklearn.linear_model import Lasso\n",
573 | "\n",
574 | "model = Lasso()\n",
575 | "\n",
576 | "model.fit(Xtrain, Ytrain)\n"
577 | ]
578 | },
579 | {
580 | "cell_type": "code",
581 | "execution_count": 45,
582 | "metadata": {},
583 | "outputs": [
584 | {
585 | "name": "stdout",
586 | "output_type": "stream",
587 | "text": [
588 | "==Lasso Regression==\n",
589 | " MSE : 0.05657465206027041\n",
590 | " RMSE : 0.23785426643276847\n",
591 | " MAE : 0.18771396740146856\n",
592 | " R2Score : -1.4815732125361691e-05\n"
593 | ]
594 | }
595 | ],
596 | "source": [
597 | "prediction_testing = model.predict(Xtest)\n",
598 | "\n",
599 | "print(\"==Lasso Regression==\")\n",
600 | "print(\" MSE : \",MSE(Ytest, prediction_testing))\n",
601 | "print(\" RMSE : \",RMSE(Ytest, prediction_testing))\n",
602 | "print(\" MAE : \",MAE(Ytest, prediction_testing))\n",
603 | "print(\" R2Score : \",R2Score(Ytest, prediction_testing))"
604 | ]
605 | },
606 | {
607 | "cell_type": "code",
608 | "execution_count": 46,
609 | "metadata": {},
610 | "outputs": [
611 | {
612 | "data": {
613 | "text/html": [
614 | "Ridge() In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
615 | ],
616 | "text/plain": [
617 | "Ridge()"
618 | ]
619 | },
620 | "execution_count": 46,
621 | "metadata": {},
622 | "output_type": "execute_result"
623 | }
624 | ],
625 | "source": [
626 | "from sklearn.linear_model import Ridge\n",
627 | "\n",
628 | "model = Ridge()\n",
629 | "\n",
630 | "model.fit(Xtrain, Ytrain)"
631 | ]
632 | },
633 | {
634 | "cell_type": "code",
635 | "execution_count": 47,
636 | "metadata": {},
637 | "outputs": [
638 | {
639 | "name": "stdout",
640 | "output_type": "stream",
641 | "text": [
642 | "==Ridge Regression==\n",
643 | " MSE : 0.020316486450436506\n",
644 | " RMSE : 0.14253591284457579\n",
645 | " MAE : 0.10540829325059124\n",
646 | " R2Score : 0.6408853309002502\n"
647 | ]
648 | }
649 | ],
650 | "source": [
651 | "prediction_testing = model.predict(Xtest)\n",
652 | "\n",
653 | "print(\"==Ridge Regression==\")\n",
654 | "print(\" MSE : \",MSE(Ytest, prediction_testing))\n",
655 | "print(\" RMSE : \",RMSE(Ytest, prediction_testing))\n",
656 | "print(\" MAE : \",MAE(Ytest, prediction_testing))\n",
657 | "print(\" R2Score : \",R2Score(Ytest, prediction_testing))"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": 48,
663 | "metadata": {},
664 | "outputs": [
665 | {
666 | "name": "stdout",
667 | "output_type": "stream",
668 | "text": [
669 | "---- Ridge Regression Lambda = 1.7 ----\n",
670 | " MSE : 0.02052184410992044\n",
671 | " RMSE : 0.14325447326321242\n",
672 | " MAE : 0.10599064844968335\n",
673 | " R2Score : 0.6372554243161278\n",
674 | "---- Ridge Regression Lambda = 2.0 ----\n",
675 | " MSE : 0.02060155172442634\n",
676 | " RMSE : 0.14353240653046384\n",
677 | " MAE : 0.10621516994931038\n",
678 | " R2Score : 0.6358465107385834\n",
679 | "---- Ridge Regression Lambda = 0.5 ----\n",
680 | " MSE : 0.020158152266279827\n",
681 | " RMSE : 0.14197940789522903\n",
682 | " MAE : 0.10495745493793113\n",
683 | " R2Score : 0.6436840494822904\n",
684 | "---- Ridge Regression Lambda = 0.2 ----\n",
685 | " MSE : 0.020065160155331427\n",
686 | " RMSE : 0.14165154483919837\n",
687 | " MAE : 0.10470406142416774\n",
688 | " R2Score : 0.6453277801162064\n",
689 | "---- Ridge Regression Lambda = 0.1 ----\n",
690 | " MSE : 0.0200360696805774\n",
691 | " RMSE : 0.14154882437017058\n",
692 | " MAE : 0.10462429756247424\n",
693 | " R2Score : 0.6458419839988873\n"
694 | ]
695 | }
696 | ],
697 | "source": [
698 | "for i in [1.7, 2.0, 0.5, 0.2, 0.1]:\n",
699 | " model = Ridge(alpha=i)\n",
700 | " model.fit(Xtrain, Ytrain)\n",
701 | " predictedvalue = model.predict(Xtest)\n",
702 | " print(\"---- Ridge Regression Lambda = \", i,\" ----\")\n",
703 | " print(\" MSE : \",MSE(Ytest, predictedvalue))\n",
704 | " print(\" RMSE : \",RMSE(Ytest, predictedvalue))\n",
705 | " print(\" MAE : \",MAE(Ytest, predictedvalue))\n",
706 | " print(\" R2Score : \",R2Score(Ytest, predictedvalue))"
707 | ]
708 | }
709 | ],
710 | "metadata": {
711 | "kernelspec": {
712 | "display_name": "Python 3.10.6 64-bit",
713 | "language": "python",
714 | "name": "python3"
715 | },
716 | "language_info": {
717 | "codemirror_mode": {
718 | "name": "ipython",
719 | "version": 3
720 | },
721 | "file_extension": ".py",
722 | "mimetype": "text/x-python",
723 | "name": "python",
724 | "nbconvert_exporter": "python",
725 | "pygments_lexer": "ipython3",
726 | "version": "3.10.6"
727 | },
728 | "orig_nbformat": 4,
729 | "vscode": {
730 | "interpreter": {
731 | "hash": "aee8b7b246df8f9039afb4144a1f6fd8d2ca17a180786b69acc140d282b71a49"
732 | }
733 | }
734 | },
735 | "nbformat": 4,
736 | "nbformat_minor": 2
737 | }
738 |
--------------------------------------------------------------------------------