├── README.md ├── .ipynb_checkpoints ├── README-checkpoint.md ├── MultivariateRegression-checkpoint.ipynb └── homeprices-checkpoint.csv ├── Exercise ├── .ipynb_checkpoints │ ├── Hiring-checkpoint.ipynb │ └── hiring-checkpoint.csv ├── hiring.csv └── Hiring.ipynb ├── homeprices.csv └── MultivariateRegression.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Multivariate Regression 2 | 3 | This is a repository for practicing Multivariate Regression. -------------------------------------------------------------------------------- /.ipynb_checkpoints/README-checkpoint.md: -------------------------------------------------------------------------------- 1 | # Multivariate Regression 2 | 3 | This is a repository for practicing Multivariate Regression. -------------------------------------------------------------------------------- /Exercise/.ipynb_checkpoints/Hiring-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/MultivariateRegression-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [], 3 | "metadata": {}, 4 | "nbformat": 4, 5 | "nbformat_minor": 5 6 | } 7 | -------------------------------------------------------------------------------- /homeprices.csv: -------------------------------------------------------------------------------- 1 | area,bedrooms,age,price 2 | 2600,3,20,550000 3 | 3000,4,15,565000 4 | 3200,,18,610000 5 | 3600,3,30,595000 6 | 4000,5,8,760000 7 | 4100,6,8,810000 8 | -------------------------------------------------------------------------------- /.ipynb_checkpoints/homeprices-checkpoint.csv: -------------------------------------------------------------------------------- 1 | area,bedrooms,age,price 2 | 2600,3,20,550000 3 | 3000,4,15,565000 4 | 3200,,18,610000 5 | 3600,3,30,595000 6 | 4000,5,8,760000 7 | 4100,6,8,810000 8 | -------------------------------------------------------------------------------- /Exercise/hiring.csv: -------------------------------------------------------------------------------- 1 | experience,test_score(out of 10),interview_score(out of 10),salary($) 2 | ,8,9,50000 3 | ,8,6,45000 4 | five,6,7,60000 5 | two,10,10,65000 6 | seven,9,6,70000 7 | three,7,10,62000 8 | ten,,7,72000 9 | eleven,7,8,80000 10 | -------------------------------------------------------------------------------- /Exercise/.ipynb_checkpoints/hiring-checkpoint.csv: -------------------------------------------------------------------------------- 1 | experience,test_score(out of 10),interview_score(out of 10),salary($) 2 | ,8,9,50000 3 | ,8,6,45000 4 | five,6,7,60000 5 | two,10,10,65000 6 | seven,9,6,70000 7 | three,7,10,62000 8 | ten,,7,72000 9 | eleven,7,8,80000 10 | -------------------------------------------------------------------------------- /MultivariateRegression.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "163bb9db-1f10-4f7d-8704-44516220ba9d", 6 | "metadata": {}, 7 | "source": [ 8 | "## Import Modules" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "0162937c-2be6-43a6-abaf-b41d10c1da6d", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "from sklearn import linear_model" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "c07135e1-0c4c-48c6-b9ab-c6d6619ee9cc", 27 | "metadata": {}, 28 | "source": [ 29 | "## Loading the data to Dataframe" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "id": "6f78a703-4a18-4e70-a1eb-a3a6c3b7674f", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | "
areabedroomsageprice
026003.020550000
130004.015565000
23200NaN18610000
336003.030595000
440005.08760000
541006.08810000
\n", 110 | "
" 111 | ], 112 | "text/plain": [ 113 | " area bedrooms age price\n", 114 | "0 2600 3.0 20 550000\n", 115 | "1 3000 4.0 15 565000\n", 116 | "2 3200 NaN 18 610000\n", 117 | "3 3600 3.0 30 595000\n", 118 | "4 4000 5.0 8 760000\n", 119 | "5 4100 6.0 8 810000" 120 | ] 121 | }, 122 | "execution_count": 2, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "df = pd.read_csv('homeprices.csv')\n", 129 | "df" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "id": "672f3fd9-40b8-4823-9f22-5e1252b5534b", 135 | "metadata": {}, 136 | "source": [ 137 | "## Data Preprocessing" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "id": "36119832-5df3-480e-8177-0b6e3273e115", 143 | "metadata": {}, 144 | "source": [ 145 | "#### Fill NaN values with the median of the column" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": 3, 151 | "id": "c867e803-f11d-44b5-8a3e-db1d96490d90", 152 | "metadata": {}, 153 | "outputs": [ 154 | { 155 | "data": { 156 | "text/plain": [ 157 | "4.0" 158 | ] 159 | }, 160 | "execution_count": 3, 161 | "metadata": {}, 162 | "output_type": "execute_result" 163 | } 164 | ], 165 | "source": [ 166 | "df.bedrooms.median()" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": 4, 172 | "id": "0787912e-d584-4dad-bc5c-c86cb7cd3f80", 173 | "metadata": {}, 174 | "outputs": [ 175 | { 176 | "data": { 177 | "text/html": [ 178 | "
\n", 179 | "\n", 192 | "\n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | "
areabedroomsageprice
026003.020550000
130004.015565000
232004.018610000
336003.030595000
440005.08760000
541006.08810000
\n", 247 | "
" 248 | ], 249 | "text/plain": [ 250 | " area bedrooms age price\n", 251 | "0 2600 3.0 20 550000\n", 252 | "1 3000 4.0 15 565000\n", 253 | "2 3200 4.0 18 610000\n", 254 | "3 3600 3.0 30 595000\n", 255 | "4 4000 5.0 8 760000\n", 256 | "5 4100 6.0 8 810000" 257 | ] 258 | }, 259 | "execution_count": 4, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())\n", 266 | "df" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "id": "ef5a5b21-ad7f-4b7a-95b5-111dc8fa6169", 272 | "metadata": {}, 273 | "source": [ 274 | "## Creating Linear Regression Object" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 7, 280 | "id": "7394b82a-d646-4c18-9e6f-67ec32ff216d", 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/html": [ 286 | "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 287 | ], 288 | "text/plain": [ 289 | "LinearRegression()" 290 | ] 291 | }, 292 | "execution_count": 7, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "reg = linear_model.LinearRegression()\n", 299 | "reg.fit(df.drop('price', axis = 'columns'), df.price)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": 9, 305 | "id": "eb5e418d-7a33-4d5f-8756-e6d80da868ab", 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "array([ 112.06244194, 23388.88007794, -3231.71790863])" 312 | ] 313 | }, 314 | "execution_count": 9, 315 | "metadata": {}, 316 | "output_type": "execute_result" 317 | } 318 | ], 319 | "source": [ 320 | "reg.coef_" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": 10, 326 | "id": "bc32ab26-ac1b-4bd2-8b66-7f4d6d8c9bb9", 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/plain": [ 332 | "221323.00186540408" 333 | ] 334 | }, 335 | "execution_count": 10, 336 | "metadata": {}, 337 | "output_type": "execute_result" 338 | } 339 | ], 340 | "source": [ 341 | "reg.intercept_" 342 | ] 343 | }, 344 | { 345 | "cell_type": "markdown", 346 | "id": "4c327650-3712-4046-a60c-6c56ed6bbfb3", 347 | "metadata": {}, 348 | "source": [ 349 | "#### Find price of home with 3000 sqr ft area, 3 bedrooms, 40 year old" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 11, 355 | "id": "b70e39de-ffec-4010-b652-dd0d5dae3a36", 356 | "metadata": {}, 357 | "outputs": [ 358 | { 359 | "name": "stderr", 360 | "output_type": "stream", 361 | "text": [ 362 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", 363 | " warnings.warn(\n" 364 | ] 365 | }, 366 | { 367 | "data": { 368 | "text/plain": [ 369 | "array([498408.25158031])" 370 | ] 371 | }, 372 | "execution_count": 11, 373 | "metadata": {}, 374 | "output_type": "execute_result" 375 | } 376 | ], 377 | "source": [ 378 | "reg.predict([[3000,3,40]])" 379 | ] 380 | }, 381 | { 382 | "cell_type": "markdown", 383 | "id": "21510c85-a29d-47ea-ad13-a66714966e0f", 384 | "metadata": {}, 385 | "source": [ 386 | "#### Find price of home with 2500 sqr ft area, 4 bedrooms, 5 year old" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 12, 392 | "id": "e2d7cc86-069d-4105-a41d-c5f33d1af4e4", 393 | "metadata": {}, 394 | "outputs": [ 395 | { 396 | "name": "stderr", 397 | "output_type": "stream", 398 | "text": [ 399 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", 400 | " warnings.warn(\n" 401 | ] 402 | }, 403 | { 404 | "data": { 405 | "text/plain": [ 406 | "array([578876.03748933])" 407 | ] 408 | }, 409 | "execution_count": 12, 410 | "metadata": {}, 411 | "output_type": "execute_result" 412 | } 413 | ], 414 | "source": [ 415 | "reg.predict([[2500,4,5]])" 416 | ] 417 | } 418 | ], 419 | "metadata": { 420 | "kernelspec": { 421 | "display_name": "Python 3 (ipykernel)", 422 | "language": "python", 423 | "name": "python3" 424 | }, 425 | "language_info": { 426 | "codemirror_mode": { 427 | "name": "ipython", 428 | "version": 3 429 | }, 430 | "file_extension": ".py", 431 | "mimetype": "text/x-python", 432 | "name": "python", 433 | "nbconvert_exporter": "python", 434 | "pygments_lexer": "ipython3", 435 | "version": "3.11.4" 436 | } 437 | }, 438 | "nbformat": 4, 439 | "nbformat_minor": 5 440 | } 441 | -------------------------------------------------------------------------------- /Exercise/Hiring.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "30ebf0fa-b490-4686-8637-1070d4cc72c2", 6 | "metadata": {}, 7 | "source": [ 8 | "## Importing Modules" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "6f39c976-e5f2-4922-a12b-f45c19b3577b", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import pandas as pd\n", 20 | "import matplotlib.pyplot as plt\n", 21 | "from sklearn import linear_model" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "1590695d-25c2-42a2-b934-b2dc288db055", 27 | "metadata": {}, 28 | "source": [ 29 | "## Loading the data to Dataframe" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 35, 35 | "id": "3fc93792-4067-4d9c-a2e9-bd1d28400022", 36 | "metadata": {}, 37 | "outputs": [ 38 | { 39 | "data": { 40 | "text/html": [ 41 | "
\n", 42 | "\n", 55 | "\n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | "
experiencetest_score(out of 10)interview_score(out of 10)salary($)
0NaN8.0950000
1NaN8.0645000
2five6.0760000
3two10.01065000
4seven9.0670000
5three7.01062000
6tenNaN772000
7eleven7.0880000
\n", 124 | "
" 125 | ], 126 | "text/plain": [ 127 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n", 128 | "0 NaN 8.0 9 50000\n", 129 | "1 NaN 8.0 6 45000\n", 130 | "2 five 6.0 7 60000\n", 131 | "3 two 10.0 10 65000\n", 132 | "4 seven 9.0 6 70000\n", 133 | "5 three 7.0 10 62000\n", 134 | "6 ten NaN 7 72000\n", 135 | "7 eleven 7.0 8 80000" 136 | ] 137 | }, 138 | "execution_count": 35, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "df = pd.read_csv(\"hiring.csv\")\n", 145 | "df" 146 | ] 147 | }, 148 | { 149 | "cell_type": "markdown", 150 | "id": "366c3127-265b-4640-bff9-80dc3bee7c36", 151 | "metadata": {}, 152 | "source": [ 153 | "## Data preprocessing" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 3, 159 | "id": "c6cc1407-42f7-47d7-9854-8a3241be8a0e", 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "data": { 164 | "text/plain": [ 165 | "Index(['experience', 'test_score(out of 10)', 'interview_score(out of 10)',\n", 166 | " 'salary($)'],\n", 167 | " dtype='object')" 168 | ] 169 | }, 170 | "execution_count": 3, 171 | "metadata": {}, 172 | "output_type": "execute_result" 173 | } 174 | ], 175 | "source": [ 176 | "df.columns" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 36, 182 | "id": "0d012d4b-e183-44fb-bb54-4e6b225adaad", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "7" 189 | ] 190 | }, 191 | "execution_count": 36, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "import math\n", 198 | "median_test_score = math.floor(df['test_score(out of 10)'].mean())\n", 199 | "median_test_score" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 37, 205 | "id": "f82adb09-5c25-4227-9804-6c0761c8d05f", 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "data": { 210 | "text/html": [ 211 | "
\n", 212 | "\n", 225 | "\n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | "
experiencetest_score(out of 10)interview_score(out of 10)salary($)
0NaN8.0950000
1NaN8.0645000
2five6.0760000
3two10.01065000
4seven9.0670000
5three7.01062000
6ten7.0772000
7eleven7.0880000
\n", 294 | "
" 295 | ], 296 | "text/plain": [ 297 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n", 298 | "0 NaN 8.0 9 50000\n", 299 | "1 NaN 8.0 6 45000\n", 300 | "2 five 6.0 7 60000\n", 301 | "3 two 10.0 10 65000\n", 302 | "4 seven 9.0 6 70000\n", 303 | "5 three 7.0 10 62000\n", 304 | "6 ten 7.0 7 72000\n", 305 | "7 eleven 7.0 8 80000" 306 | ] 307 | }, 308 | "execution_count": 37, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "df['test_score(out of 10)'] = df['test_score(out of 10)'].fillna(median_test_score)\n", 315 | "df" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "id": "212400f7-1493-470f-93b8-67a059b33e77", 321 | "metadata": {}, 322 | "source": [ 323 | "#### importing word2number module" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 7, 329 | "id": "f7f74731-45ee-463e-9833-13e566869c44", 330 | "metadata": {}, 331 | "outputs": [], 332 | "source": [ 333 | "from word2number import w2n" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": 38, 339 | "id": "5f29bebb-597f-4b43-a2d2-77a90827e617", 340 | "metadata": {}, 341 | "outputs": [ 342 | { 343 | "data": { 344 | "text/html": [ 345 | "
\n", 346 | "\n", 359 | "\n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | "
experiencetest_score(out of 10)interview_score(out of 10)salary($)
0zero8.0950000
1zero8.0645000
2five6.0760000
3two10.01065000
4seven9.0670000
5three7.01062000
6ten7.0772000
7eleven7.0880000
\n", 428 | "
" 429 | ], 430 | "text/plain": [ 431 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n", 432 | "0 zero 8.0 9 50000\n", 433 | "1 zero 8.0 6 45000\n", 434 | "2 five 6.0 7 60000\n", 435 | "3 two 10.0 10 65000\n", 436 | "4 seven 9.0 6 70000\n", 437 | "5 three 7.0 10 62000\n", 438 | "6 ten 7.0 7 72000\n", 439 | "7 eleven 7.0 8 80000" 440 | ] 441 | }, 442 | "execution_count": 38, 443 | "metadata": {}, 444 | "output_type": "execute_result" 445 | } 446 | ], 447 | "source": [ 448 | "df.experience = df.experience.fillna('zero')\n", 449 | "df" 450 | ] 451 | }, 452 | { 453 | "cell_type": "code", 454 | "execution_count": 39, 455 | "id": "22a6e7a3-61cf-45ec-a066-dfb5625d3f6a", 456 | "metadata": {}, 457 | "outputs": [ 458 | { 459 | "data": { 460 | "text/html": [ 461 | "
\n", 462 | "\n", 475 | "\n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | "
experiencetest_score(out of 10)interview_score(out of 10)salary($)
008.0950000
108.0645000
256.0760000
3210.01065000
479.0670000
537.01062000
6107.0772000
7117.0880000
\n", 544 | "
" 545 | ], 546 | "text/plain": [ 547 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n", 548 | "0 0 8.0 9 50000\n", 549 | "1 0 8.0 6 45000\n", 550 | "2 5 6.0 7 60000\n", 551 | "3 2 10.0 10 65000\n", 552 | "4 7 9.0 6 70000\n", 553 | "5 3 7.0 10 62000\n", 554 | "6 10 7.0 7 72000\n", 555 | "7 11 7.0 8 80000" 556 | ] 557 | }, 558 | "execution_count": 39, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | } 562 | ], 563 | "source": [ 564 | "df.experience = df.experience.apply(w2n.word_to_num)\n", 565 | "df" 566 | ] 567 | }, 568 | { 569 | "cell_type": "markdown", 570 | "id": "da085474-684c-460b-9786-72b77802706e", 571 | "metadata": {}, 572 | "source": [ 573 | "#### define a mapping of number words to numbers. This can be done using word2number module." 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": 12, 579 | "id": "b7a45514-9abf-4a71-8d28-7e1b5c3cd7ca", 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "word_to_number = {\n", 584 | " 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4,\n", 585 | " 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,\n", 586 | " 'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15\n", 587 | "}" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 15, 593 | "id": "f0890063-dfd3-4abe-9772-f15a64d443b1", 594 | "metadata": { 595 | "collapsed": true, 596 | "jupyter": { 597 | "outputs_hidden": true 598 | } 599 | }, 600 | "outputs": [ 601 | { 602 | "data": { 603 | "text/html": [ 604 | "
\n", 605 | "\n", 618 | "\n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | "
experiencetest_score(out of 10)interview_score(out of 10)salary($)
0NaN8.0950000
1NaN8.0645000
25.06.0760000
32.010.01065000
47.09.0670000
53.07.01062000
610.08.0772000
711.07.0880000
\n", 687 | "
" 688 | ], 689 | "text/plain": [ 690 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n", 691 | "0 NaN 8.0 9 50000\n", 692 | "1 NaN 8.0 6 45000\n", 693 | "2 5.0 6.0 7 60000\n", 694 | "3 2.0 10.0 10 65000\n", 695 | "4 7.0 9.0 6 70000\n", 696 | "5 3.0 7.0 10 62000\n", 697 | "6 10.0 8.0 7 72000\n", 698 | "7 11.0 7.0 8 80000" 699 | ] 700 | }, 701 | "execution_count": 15, 702 | "metadata": {}, 703 | "output_type": "execute_result" 704 | } 705 | ], 706 | "source": [ 707 | "df.experience = df['experience'].map(word_to_number)\n", 708 | "df" 709 | ] 710 | }, 711 | { 712 | "cell_type": "markdown", 713 | "id": "2ddbf835-5a78-47e0-9fd6-6372530d428d", 714 | "metadata": {}, 715 | "source": [ 716 | "## creating linear regression object / model" 717 | ] 718 | }, 719 | { 720 | "cell_type": "code", 721 | "execution_count": 40, 722 | "id": "c9b8e084-a491-47ba-bc70-192f0c573ae5", 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "data": { 727 | "text/html": [ 728 | "
LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" 729 | ], 730 | "text/plain": [ 731 | "LinearRegression()" 732 | ] 733 | }, 734 | "execution_count": 40, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "model = linear_model.LinearRegression()\n", 741 | "model.fit(df[['experience','test_score(out of 10)','interview_score(out of 10)']], df['salary($)'])" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 41, 747 | "id": "2e2fcd0e-76f3-4671-8b4d-515e7964dc3f", 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "name": "stderr", 752 | "output_type": "stream", 753 | "text": [ 754 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", 755 | " warnings.warn(\n" 756 | ] 757 | }, 758 | { 759 | "data": { 760 | "text/plain": [ 761 | "array([53713.86677124])" 762 | ] 763 | }, 764 | "execution_count": 41, 765 | "metadata": {}, 766 | "output_type": "execute_result" 767 | } 768 | ], 769 | "source": [ 770 | "model.predict([[2,9,6]])" 771 | ] 772 | }, 773 | { 774 | "cell_type": "code", 775 | "execution_count": 42, 776 | "id": "70eeccb6-a445-4a62-a918-481733e86b80", 777 | "metadata": {}, 778 | "outputs": [ 779 | { 780 | "name": "stderr", 781 | "output_type": "stream", 782 | "text": [ 783 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n", 784 | " warnings.warn(\n" 785 | ] 786 | }, 787 | { 788 | "data": { 789 | "text/plain": [ 790 | "array([93747.79628651])" 791 | ] 792 | }, 793 | "execution_count": 42, 794 | "metadata": {}, 795 | "output_type": "execute_result" 796 | } 797 | ], 798 | "source": [ 799 | "model.predict([[12,10,10]])" 800 | ] 801 | }, 802 | { 803 | "cell_type": "code", 804 | "execution_count": 31, 805 | "id": "088bb049-859c-4618-93cd-8a72da2eb482", 806 | "metadata": {}, 807 | "outputs": [ 808 | { 809 | "data": { 810 | "text/html": [ 811 | "
\n", 812 | "\n", 825 | "\n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | "
experiencetest_score(out of 10)interview_score(out of 10)salary($)
06.08.0950000
16.08.0645000
25.06.0760000
32.010.01065000
47.09.0670000
53.07.01062000
610.08.0772000
711.07.0880000
\n", 894 | "
" 895 | ], 896 | "text/plain": [ 897 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n", 898 | "0 6.0 8.0 9 50000\n", 899 | "1 6.0 8.0 6 45000\n", 900 | "2 5.0 6.0 7 60000\n", 901 | "3 2.0 10.0 10 65000\n", 902 | "4 7.0 9.0 6 70000\n", 903 | "5 3.0 7.0 10 62000\n", 904 | "6 10.0 8.0 7 72000\n", 905 | "7 11.0 7.0 8 80000" 906 | ] 907 | }, 908 | "execution_count": 31, 909 | "metadata": {}, 910 | "output_type": "execute_result" 911 | } 912 | ], 913 | "source": [ 914 | "df.experience = df.experience.fillna(df.experience.median())\n", 915 | "df" 916 | ] 917 | } 918 | ], 919 | "metadata": { 920 | "kernelspec": { 921 | "display_name": "Python 3 (ipykernel)", 922 | "language": "python", 923 | "name": "python3" 924 | }, 925 | "language_info": { 926 | "codemirror_mode": { 927 | "name": "ipython", 928 | "version": 3 929 | }, 930 | "file_extension": ".py", 931 | "mimetype": "text/x-python", 932 | "name": "python", 933 | "nbconvert_exporter": "python", 934 | "pygments_lexer": "ipython3", 935 | "version": "3.11.4" 936 | } 937 | }, 938 | "nbformat": 4, 939 | "nbformat_minor": 5 940 | } 941 | --------------------------------------------------------------------------------