└── Linear regression.ipynb /Linear regression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 5, 6 | "id": "9c4a3e9c", 7 | "metadata": { 8 | "scrolled": true 9 | }, 10 | "outputs": [], 11 | "source": [ 12 | "import pandas as pd\n", 13 | "import numpy as np\n", 14 | "import matplotlib.pyplot as plt" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 8, 20 | "id": "5059a90d", 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stdout", 25 | "output_type": "stream", 26 | "text": [ 27 | " db full_path age gender\n", 28 | "0 UTKface part1/41_0_2_20170105164126307.jpg 41 1\n", 29 | "1 UTKface part1/80_1_0_20170110122217473.jpg 80 0\n", 30 | "2 UTKface part1/56_0_0_20170111202842202.jpg 56 1\n", 31 | "3 UTKface part1/82_1_0_20170110141329969.jpg 82 0\n", 32 | "4 UTKface part1/16_1_4_20170102234841875.jpg 16 0\n", 33 | "... ... ... ... ...\n", 34 | "24099 UTKface part3/31_1_3_20170119200101732.jpg 31 0\n", 35 | "24100 UTKface part3/24_1_1_20170116225255798.jpg 24 0\n", 36 | "24101 UTKface part3/39_0_3_20170119203757237.jpg 39 1\n", 37 | "24102 UTKface part3/44_1_3_20170119192758593.jpg 44 0\n", 38 | "24103 UTKface part3/78_1_0_20170120224727472.jpg 78 0\n", 39 | "\n", 40 | "[24104 rows x 4 columns]\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "data=pd.read_csv('D:\\\\Jeyashri\\\\IBM\\\\Datasets\\\\age.csv') \n", 46 | "print(data)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 9, 52 | "id": "7c97bbf9", 53 | "metadata": {}, 54 | "outputs": [ 55 | { 56 | "data": { 57 | "text/html": [ 58 | "
\n", 59 | "\n", 72 | "\n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | "
dbfull_pathagegender
0UTKfacepart1/41_0_2_20170105164126307.jpg411
1UTKfacepart1/80_1_0_20170110122217473.jpg800
2UTKfacepart1/56_0_0_20170111202842202.jpg561
3UTKfacepart1/82_1_0_20170110141329969.jpg820
4UTKfacepart1/16_1_4_20170102234841875.jpg160
\n", 120 | "
" 121 | ], 122 | "text/plain": [ 123 | " db full_path age gender\n", 124 | "0 UTKface part1/41_0_2_20170105164126307.jpg 41 1\n", 125 | "1 UTKface part1/80_1_0_20170110122217473.jpg 80 0\n", 126 | "2 UTKface part1/56_0_0_20170111202842202.jpg 56 1\n", 127 | "3 UTKface part1/82_1_0_20170110141329969.jpg 82 0\n", 128 | "4 UTKface part1/16_1_4_20170102234841875.jpg 16 0" 129 | ] 130 | }, 131 | "execution_count": 9, 132 | "metadata": {}, 133 | "output_type": "execute_result" 134 | } 135 | ], 136 | "source": [ 137 | "data.head()" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 10, 143 | "id": "16deb7f9", 144 | "metadata": {}, 145 | "outputs": [ 146 | { 147 | "data": { 148 | "image/png": "\n", 149 | "text/plain": [ 150 | "
" 151 | ] 152 | }, 153 | "metadata": { 154 | "needs_background": "light" 155 | }, 156 | "output_type": "display_data" 157 | } 158 | ], 159 | "source": [ 160 | "data.plot.scatter(x='age', y='gender');" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 5, 166 | "id": "eba049a9", 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "(7, 4)" 173 | ] 174 | }, 175 | "execution_count": 5, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "data.shape" 182 | ] 183 | }, 184 | { 185 | "cell_type": "markdown", 186 | "id": "4fd41d26", 187 | "metadata": {}, 188 | "source": [ 189 | "# Data Preprocessing dividing dependent and independent values " 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 11, 195 | "id": "b508fc71", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "x=data['age'].values.reshape(-1,1)\n", 200 | "y=data['gender'].values.reshape(-1,1)#depending " 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": 12, 206 | "id": "be67843d", 207 | "metadata": {}, 208 | "outputs": [ 209 | { 210 | "data": { 211 | "text/plain": [ 212 | "(24104, 1)" 213 | ] 214 | }, 215 | "execution_count": 12, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "x.shape" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 9, 227 | "id": "9b90ff4a", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "SEED = 30" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 13, 237 | "id": "6bc22e00", 238 | "metadata": {}, 239 | "outputs": [], 240 | "source": [ 241 | "from sklearn.model_selection import train_test_split\n", 242 | "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 30)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 14, 248 | "id": "e515405a", 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "[[23]\n", 256 | " [ 1]\n", 257 | " [24]\n", 258 | " ...\n", 259 | " [54]\n", 260 | " [29]\n", 261 | " [52]]\n", 262 | "[[0]\n", 263 | " [0]\n", 264 | " [0]\n", 265 | " ...\n", 266 | " [1]\n", 267 | " [0]\n", 268 | " [1]]\n" 269 | ] 270 | } 271 | ], 272 | "source": [ 273 | "print(x_train)\n", 274 | "print(y_train)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 15, 280 | "id": "34119934", 281 | "metadata": {}, 282 | "outputs": [], 283 | "source": [ 284 | "from sklearn.linear_model import LinearRegression\n", 285 | "regressor = LinearRegression()" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 16, 291 | "id": "22911b97", 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/plain": [ 297 | "LinearRegression()" 298 | ] 299 | }, 300 | "execution_count": 16, 301 | "metadata": {}, 302 | "output_type": "execute_result" 303 | } 304 | ], 305 | "source": [ 306 | "regressor.fit(x_train, y_train)" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": 17, 312 | "id": "e0cb27ed", 313 | "metadata": {}, 314 | "outputs": [], 315 | "source": [ 316 | "y_pred = regressor.predict(x_test)" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": 20, 322 | "id": "079abf66", 323 | "metadata": {}, 324 | "outputs": [ 325 | { 326 | "name": "stdout", 327 | "output_type": "stream", 328 | "text": [ 329 | " Actual Predicted\n", 330 | "0 1 0.496964\n", 331 | "1 0 0.493954\n", 332 | "2 1 0.512011\n", 333 | "3 0 0.496964\n", 334 | "4 1 0.617343\n", 335 | "... ... ...\n", 336 | "4816 0 0.505992\n", 337 | "4817 0 0.475897\n", 338 | "4818 1 0.496964\n", 339 | "4819 1 0.509002\n", 340 | "4820 1 0.484926\n", 341 | "\n", 342 | "[4821 rows x 2 columns]\n" 343 | ] 344 | } 345 | ], 346 | "source": [ 347 | "df_preds = pd.DataFrame({'Actual': y_test.squeeze(), 'Predicted': y_pred.squeeze()})\n", 348 | "print(df_preds)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 21, 354 | "id": "d6df4ee5", 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "from sklearn.metrics import mean_absolute_error, mean_squared_error\n", 359 | "mae = mean_absolute_error(y_test, y_pred)\n", 360 | "mse = mean_squared_error(y_test, y_pred)\n", 361 | "rmse = np.sqrt(mse)" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 20, 367 | "id": "5d631858", 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "name": "stdout", 372 | "output_type": "stream", 373 | "text": [ 374 | "Mean absolute error: 7.59\n", 375 | "Mean squared error: 82.12\n", 376 | "Root mean squared error: 9.06\n" 377 | ] 378 | } 379 | ], 380 | "source": [ 381 | "print(f'Mean absolute error: {mae:.2f}')\n", 382 | "print(f'Mean squared error: {mse:.2f}')\n", 383 | "print(f'Root mean squared error: {rmse:.2f}')" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 24, 389 | "id": "8512e8df", 390 | "metadata": {}, 391 | "outputs": [ 392 | { 393 | "data": { 394 | "image/png": "\n", 395 | "text/plain": [ 396 | "
" 397 | ] 398 | }, 399 | "metadata": { 400 | "needs_background": "light" 401 | }, 402 | "output_type": "display_data" 403 | } 404 | ], 405 | "source": [ 406 | "plt.figure(figsize=(12, 6))\n", 407 | "plt.plot(x,y,'ro') # regression line\n", 408 | "plt.plot(x_test,y_pred) # scatter plot showing actual data\n", 409 | "plt.title('Actual vs Predicted')\n", 410 | "plt.xlabel('X')\n", 411 | "plt.ylabel('y')\n", 412 | "plt.show()" 413 | ] 414 | } 415 | ], 416 | "metadata": { 417 | "kernelspec": { 418 | "display_name": "Python 3 (ipykernel)", 419 | "language": "python", 420 | "name": "python3" 421 | }, 422 | "language_info": { 423 | "codemirror_mode": { 424 | "name": "ipython", 425 | "version": 3 426 | }, 427 | "file_extension": ".py", 428 | "mimetype": "text/x-python", 429 | "name": "python", 430 | "nbconvert_exporter": "python", 431 | "pygments_lexer": "ipython3", 432 | "version": "3.9.12" 433 | } 434 | }, 435 | "nbformat": 4, 436 | "nbformat_minor": 5 437 | } 438 | --------------------------------------------------------------------------------