├── README.md
├── .ipynb_checkpoints
├── README-checkpoint.md
├── MultivariateRegression-checkpoint.ipynb
└── homeprices-checkpoint.csv
├── Exercise
├── .ipynb_checkpoints
│ ├── Hiring-checkpoint.ipynb
│ └── hiring-checkpoint.csv
├── hiring.csv
└── Hiring.ipynb
├── homeprices.csv
└── MultivariateRegression.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Multivariate Regression
2 |
3 | This is a repository for practicing Multivariate Regression.
--------------------------------------------------------------------------------
/.ipynb_checkpoints/README-checkpoint.md:
--------------------------------------------------------------------------------
1 | # Multivariate Regression
2 |
3 | This is a repository for practicing Multivariate Regression.
--------------------------------------------------------------------------------
/Exercise/.ipynb_checkpoints/Hiring-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 5
6 | }
7 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/MultivariateRegression-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [],
3 | "metadata": {},
4 | "nbformat": 4,
5 | "nbformat_minor": 5
6 | }
7 |
--------------------------------------------------------------------------------
/homeprices.csv:
--------------------------------------------------------------------------------
1 | area,bedrooms,age,price
2 | 2600,3,20,550000
3 | 3000,4,15,565000
4 | 3200,,18,610000
5 | 3600,3,30,595000
6 | 4000,5,8,760000
7 | 4100,6,8,810000
8 |
--------------------------------------------------------------------------------
/.ipynb_checkpoints/homeprices-checkpoint.csv:
--------------------------------------------------------------------------------
1 | area,bedrooms,age,price
2 | 2600,3,20,550000
3 | 3000,4,15,565000
4 | 3200,,18,610000
5 | 3600,3,30,595000
6 | 4000,5,8,760000
7 | 4100,6,8,810000
8 |
--------------------------------------------------------------------------------
/Exercise/hiring.csv:
--------------------------------------------------------------------------------
1 | experience,test_score(out of 10),interview_score(out of 10),salary($)
2 | ,8,9,50000
3 | ,8,6,45000
4 | five,6,7,60000
5 | two,10,10,65000
6 | seven,9,6,70000
7 | three,7,10,62000
8 | ten,,7,72000
9 | eleven,7,8,80000
10 |
--------------------------------------------------------------------------------
/Exercise/.ipynb_checkpoints/hiring-checkpoint.csv:
--------------------------------------------------------------------------------
1 | experience,test_score(out of 10),interview_score(out of 10),salary($)
2 | ,8,9,50000
3 | ,8,6,45000
4 | five,6,7,60000
5 | two,10,10,65000
6 | seven,9,6,70000
7 | three,7,10,62000
8 | ten,,7,72000
9 | eleven,7,8,80000
10 |
--------------------------------------------------------------------------------
/MultivariateRegression.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "163bb9db-1f10-4f7d-8704-44516220ba9d",
6 | "metadata": {},
7 | "source": [
8 | "## Import Modules"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "0162937c-2be6-43a6-abaf-b41d10c1da6d",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import pandas as pd\n",
20 | "import matplotlib.pyplot as plt\n",
21 | "from sklearn import linear_model"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "id": "c07135e1-0c4c-48c6-b9ab-c6d6619ee9cc",
27 | "metadata": {},
28 | "source": [
29 | "## Loading the data to Dataframe"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 2,
35 | "id": "6f78a703-4a18-4e70-a1eb-a3a6c3b7674f",
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/html": [
41 | "
\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " area | \n",
60 | " bedrooms | \n",
61 | " age | \n",
62 | " price | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 0 | \n",
68 | " 2600 | \n",
69 | " 3.0 | \n",
70 | " 20 | \n",
71 | " 550000 | \n",
72 | "
\n",
73 | " \n",
74 | " | 1 | \n",
75 | " 3000 | \n",
76 | " 4.0 | \n",
77 | " 15 | \n",
78 | " 565000 | \n",
79 | "
\n",
80 | " \n",
81 | " | 2 | \n",
82 | " 3200 | \n",
83 | " NaN | \n",
84 | " 18 | \n",
85 | " 610000 | \n",
86 | "
\n",
87 | " \n",
88 | " | 3 | \n",
89 | " 3600 | \n",
90 | " 3.0 | \n",
91 | " 30 | \n",
92 | " 595000 | \n",
93 | "
\n",
94 | " \n",
95 | " | 4 | \n",
96 | " 4000 | \n",
97 | " 5.0 | \n",
98 | " 8 | \n",
99 | " 760000 | \n",
100 | "
\n",
101 | " \n",
102 | " | 5 | \n",
103 | " 4100 | \n",
104 | " 6.0 | \n",
105 | " 8 | \n",
106 | " 810000 | \n",
107 | "
\n",
108 | " \n",
109 | "
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " area bedrooms age price\n",
114 | "0 2600 3.0 20 550000\n",
115 | "1 3000 4.0 15 565000\n",
116 | "2 3200 NaN 18 610000\n",
117 | "3 3600 3.0 30 595000\n",
118 | "4 4000 5.0 8 760000\n",
119 | "5 4100 6.0 8 810000"
120 | ]
121 | },
122 | "execution_count": 2,
123 | "metadata": {},
124 | "output_type": "execute_result"
125 | }
126 | ],
127 | "source": [
128 | "df = pd.read_csv('homeprices.csv')\n",
129 | "df"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "id": "672f3fd9-40b8-4823-9f22-5e1252b5534b",
135 | "metadata": {},
136 | "source": [
137 | "## Data Preprocessing"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "id": "36119832-5df3-480e-8177-0b6e3273e115",
143 | "metadata": {},
144 | "source": [
145 | "#### Fill NaN values with the median of the column"
146 | ]
147 | },
148 | {
149 | "cell_type": "code",
150 | "execution_count": 3,
151 | "id": "c867e803-f11d-44b5-8a3e-db1d96490d90",
152 | "metadata": {},
153 | "outputs": [
154 | {
155 | "data": {
156 | "text/plain": [
157 | "4.0"
158 | ]
159 | },
160 | "execution_count": 3,
161 | "metadata": {},
162 | "output_type": "execute_result"
163 | }
164 | ],
165 | "source": [
166 | "df.bedrooms.median()"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": 4,
172 | "id": "0787912e-d584-4dad-bc5c-c86cb7cd3f80",
173 | "metadata": {},
174 | "outputs": [
175 | {
176 | "data": {
177 | "text/html": [
178 | "\n",
179 | "\n",
192 | "
\n",
193 | " \n",
194 | " \n",
195 | " | \n",
196 | " area | \n",
197 | " bedrooms | \n",
198 | " age | \n",
199 | " price | \n",
200 | "
\n",
201 | " \n",
202 | " \n",
203 | " \n",
204 | " | 0 | \n",
205 | " 2600 | \n",
206 | " 3.0 | \n",
207 | " 20 | \n",
208 | " 550000 | \n",
209 | "
\n",
210 | " \n",
211 | " | 1 | \n",
212 | " 3000 | \n",
213 | " 4.0 | \n",
214 | " 15 | \n",
215 | " 565000 | \n",
216 | "
\n",
217 | " \n",
218 | " | 2 | \n",
219 | " 3200 | \n",
220 | " 4.0 | \n",
221 | " 18 | \n",
222 | " 610000 | \n",
223 | "
\n",
224 | " \n",
225 | " | 3 | \n",
226 | " 3600 | \n",
227 | " 3.0 | \n",
228 | " 30 | \n",
229 | " 595000 | \n",
230 | "
\n",
231 | " \n",
232 | " | 4 | \n",
233 | " 4000 | \n",
234 | " 5.0 | \n",
235 | " 8 | \n",
236 | " 760000 | \n",
237 | "
\n",
238 | " \n",
239 | " | 5 | \n",
240 | " 4100 | \n",
241 | " 6.0 | \n",
242 | " 8 | \n",
243 | " 810000 | \n",
244 | "
\n",
245 | " \n",
246 | "
\n",
247 | "
"
248 | ],
249 | "text/plain": [
250 | " area bedrooms age price\n",
251 | "0 2600 3.0 20 550000\n",
252 | "1 3000 4.0 15 565000\n",
253 | "2 3200 4.0 18 610000\n",
254 | "3 3600 3.0 30 595000\n",
255 | "4 4000 5.0 8 760000\n",
256 | "5 4100 6.0 8 810000"
257 | ]
258 | },
259 | "execution_count": 4,
260 | "metadata": {},
261 | "output_type": "execute_result"
262 | }
263 | ],
264 | "source": [
265 | "df.bedrooms = df.bedrooms.fillna(df.bedrooms.median())\n",
266 | "df"
267 | ]
268 | },
269 | {
270 | "cell_type": "markdown",
271 | "id": "ef5a5b21-ad7f-4b7a-95b5-111dc8fa6169",
272 | "metadata": {},
273 | "source": [
274 | "## Creating Linear Regression Object"
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 7,
280 | "id": "7394b82a-d646-4c18-9e6f-67ec32ff216d",
281 | "metadata": {},
282 | "outputs": [
283 | {
284 | "data": {
285 | "text/html": [
286 | "LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
287 | ],
288 | "text/plain": [
289 | "LinearRegression()"
290 | ]
291 | },
292 | "execution_count": 7,
293 | "metadata": {},
294 | "output_type": "execute_result"
295 | }
296 | ],
297 | "source": [
298 | "reg = linear_model.LinearRegression()\n",
299 | "reg.fit(df.drop('price', axis = 'columns'), df.price)"
300 | ]
301 | },
302 | {
303 | "cell_type": "code",
304 | "execution_count": 9,
305 | "id": "eb5e418d-7a33-4d5f-8756-e6d80da868ab",
306 | "metadata": {},
307 | "outputs": [
308 | {
309 | "data": {
310 | "text/plain": [
311 | "array([ 112.06244194, 23388.88007794, -3231.71790863])"
312 | ]
313 | },
314 | "execution_count": 9,
315 | "metadata": {},
316 | "output_type": "execute_result"
317 | }
318 | ],
319 | "source": [
320 | "reg.coef_"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 10,
326 | "id": "bc32ab26-ac1b-4bd2-8b66-7f4d6d8c9bb9",
327 | "metadata": {},
328 | "outputs": [
329 | {
330 | "data": {
331 | "text/plain": [
332 | "221323.00186540408"
333 | ]
334 | },
335 | "execution_count": 10,
336 | "metadata": {},
337 | "output_type": "execute_result"
338 | }
339 | ],
340 | "source": [
341 | "reg.intercept_"
342 | ]
343 | },
344 | {
345 | "cell_type": "markdown",
346 | "id": "4c327650-3712-4046-a60c-6c56ed6bbfb3",
347 | "metadata": {},
348 | "source": [
349 | "#### Find price of home with 3000 sqr ft area, 3 bedrooms, 40 year old"
350 | ]
351 | },
352 | {
353 | "cell_type": "code",
354 | "execution_count": 11,
355 | "id": "b70e39de-ffec-4010-b652-dd0d5dae3a36",
356 | "metadata": {},
357 | "outputs": [
358 | {
359 | "name": "stderr",
360 | "output_type": "stream",
361 | "text": [
362 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
363 | " warnings.warn(\n"
364 | ]
365 | },
366 | {
367 | "data": {
368 | "text/plain": [
369 | "array([498408.25158031])"
370 | ]
371 | },
372 | "execution_count": 11,
373 | "metadata": {},
374 | "output_type": "execute_result"
375 | }
376 | ],
377 | "source": [
378 | "reg.predict([[3000,3,40]])"
379 | ]
380 | },
381 | {
382 | "cell_type": "markdown",
383 | "id": "21510c85-a29d-47ea-ad13-a66714966e0f",
384 | "metadata": {},
385 | "source": [
386 | "#### Find price of home with 2500 sqr ft area, 4 bedrooms, 5 year old"
387 | ]
388 | },
389 | {
390 | "cell_type": "code",
391 | "execution_count": 12,
392 | "id": "e2d7cc86-069d-4105-a41d-c5f33d1af4e4",
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "name": "stderr",
397 | "output_type": "stream",
398 | "text": [
399 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
400 | " warnings.warn(\n"
401 | ]
402 | },
403 | {
404 | "data": {
405 | "text/plain": [
406 | "array([578876.03748933])"
407 | ]
408 | },
409 | "execution_count": 12,
410 | "metadata": {},
411 | "output_type": "execute_result"
412 | }
413 | ],
414 | "source": [
415 | "reg.predict([[2500,4,5]])"
416 | ]
417 | }
418 | ],
419 | "metadata": {
420 | "kernelspec": {
421 | "display_name": "Python 3 (ipykernel)",
422 | "language": "python",
423 | "name": "python3"
424 | },
425 | "language_info": {
426 | "codemirror_mode": {
427 | "name": "ipython",
428 | "version": 3
429 | },
430 | "file_extension": ".py",
431 | "mimetype": "text/x-python",
432 | "name": "python",
433 | "nbconvert_exporter": "python",
434 | "pygments_lexer": "ipython3",
435 | "version": "3.11.4"
436 | }
437 | },
438 | "nbformat": 4,
439 | "nbformat_minor": 5
440 | }
441 |
--------------------------------------------------------------------------------
/Exercise/Hiring.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "30ebf0fa-b490-4686-8637-1070d4cc72c2",
6 | "metadata": {},
7 | "source": [
8 | "## Importing Modules"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "6f39c976-e5f2-4922-a12b-f45c19b3577b",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import numpy as np\n",
19 | "import pandas as pd\n",
20 | "import matplotlib.pyplot as plt\n",
21 | "from sklearn import linear_model"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "id": "1590695d-25c2-42a2-b934-b2dc288db055",
27 | "metadata": {},
28 | "source": [
29 | "## Loading the data to Dataframe"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 35,
35 | "id": "3fc93792-4067-4d9c-a2e9-bd1d28400022",
36 | "metadata": {},
37 | "outputs": [
38 | {
39 | "data": {
40 | "text/html": [
41 | "\n",
42 | "\n",
55 | "
\n",
56 | " \n",
57 | " \n",
58 | " | \n",
59 | " experience | \n",
60 | " test_score(out of 10) | \n",
61 | " interview_score(out of 10) | \n",
62 | " salary($) | \n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " \n",
67 | " | 0 | \n",
68 | " NaN | \n",
69 | " 8.0 | \n",
70 | " 9 | \n",
71 | " 50000 | \n",
72 | "
\n",
73 | " \n",
74 | " | 1 | \n",
75 | " NaN | \n",
76 | " 8.0 | \n",
77 | " 6 | \n",
78 | " 45000 | \n",
79 | "
\n",
80 | " \n",
81 | " | 2 | \n",
82 | " five | \n",
83 | " 6.0 | \n",
84 | " 7 | \n",
85 | " 60000 | \n",
86 | "
\n",
87 | " \n",
88 | " | 3 | \n",
89 | " two | \n",
90 | " 10.0 | \n",
91 | " 10 | \n",
92 | " 65000 | \n",
93 | "
\n",
94 | " \n",
95 | " | 4 | \n",
96 | " seven | \n",
97 | " 9.0 | \n",
98 | " 6 | \n",
99 | " 70000 | \n",
100 | "
\n",
101 | " \n",
102 | " | 5 | \n",
103 | " three | \n",
104 | " 7.0 | \n",
105 | " 10 | \n",
106 | " 62000 | \n",
107 | "
\n",
108 | " \n",
109 | " | 6 | \n",
110 | " ten | \n",
111 | " NaN | \n",
112 | " 7 | \n",
113 | " 72000 | \n",
114 | "
\n",
115 | " \n",
116 | " | 7 | \n",
117 | " eleven | \n",
118 | " 7.0 | \n",
119 | " 8 | \n",
120 | " 80000 | \n",
121 | "
\n",
122 | " \n",
123 | "
\n",
124 | "
"
125 | ],
126 | "text/plain": [
127 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n",
128 | "0 NaN 8.0 9 50000\n",
129 | "1 NaN 8.0 6 45000\n",
130 | "2 five 6.0 7 60000\n",
131 | "3 two 10.0 10 65000\n",
132 | "4 seven 9.0 6 70000\n",
133 | "5 three 7.0 10 62000\n",
134 | "6 ten NaN 7 72000\n",
135 | "7 eleven 7.0 8 80000"
136 | ]
137 | },
138 | "execution_count": 35,
139 | "metadata": {},
140 | "output_type": "execute_result"
141 | }
142 | ],
143 | "source": [
144 | "df = pd.read_csv(\"hiring.csv\")\n",
145 | "df"
146 | ]
147 | },
148 | {
149 | "cell_type": "markdown",
150 | "id": "366c3127-265b-4640-bff9-80dc3bee7c36",
151 | "metadata": {},
152 | "source": [
153 | "## Data preprocessing"
154 | ]
155 | },
156 | {
157 | "cell_type": "code",
158 | "execution_count": 3,
159 | "id": "c6cc1407-42f7-47d7-9854-8a3241be8a0e",
160 | "metadata": {},
161 | "outputs": [
162 | {
163 | "data": {
164 | "text/plain": [
165 | "Index(['experience', 'test_score(out of 10)', 'interview_score(out of 10)',\n",
166 | " 'salary($)'],\n",
167 | " dtype='object')"
168 | ]
169 | },
170 | "execution_count": 3,
171 | "metadata": {},
172 | "output_type": "execute_result"
173 | }
174 | ],
175 | "source": [
176 | "df.columns"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 36,
182 | "id": "0d012d4b-e183-44fb-bb54-4e6b225adaad",
183 | "metadata": {},
184 | "outputs": [
185 | {
186 | "data": {
187 | "text/plain": [
188 | "7"
189 | ]
190 | },
191 | "execution_count": 36,
192 | "metadata": {},
193 | "output_type": "execute_result"
194 | }
195 | ],
196 | "source": [
197 | "import math\n",
198 | "median_test_score = math.floor(df['test_score(out of 10)'].mean())\n",
199 | "median_test_score"
200 | ]
201 | },
202 | {
203 | "cell_type": "code",
204 | "execution_count": 37,
205 | "id": "f82adb09-5c25-4227-9804-6c0761c8d05f",
206 | "metadata": {},
207 | "outputs": [
208 | {
209 | "data": {
210 | "text/html": [
211 | "\n",
212 | "\n",
225 | "
\n",
226 | " \n",
227 | " \n",
228 | " | \n",
229 | " experience | \n",
230 | " test_score(out of 10) | \n",
231 | " interview_score(out of 10) | \n",
232 | " salary($) | \n",
233 | "
\n",
234 | " \n",
235 | " \n",
236 | " \n",
237 | " | 0 | \n",
238 | " NaN | \n",
239 | " 8.0 | \n",
240 | " 9 | \n",
241 | " 50000 | \n",
242 | "
\n",
243 | " \n",
244 | " | 1 | \n",
245 | " NaN | \n",
246 | " 8.0 | \n",
247 | " 6 | \n",
248 | " 45000 | \n",
249 | "
\n",
250 | " \n",
251 | " | 2 | \n",
252 | " five | \n",
253 | " 6.0 | \n",
254 | " 7 | \n",
255 | " 60000 | \n",
256 | "
\n",
257 | " \n",
258 | " | 3 | \n",
259 | " two | \n",
260 | " 10.0 | \n",
261 | " 10 | \n",
262 | " 65000 | \n",
263 | "
\n",
264 | " \n",
265 | " | 4 | \n",
266 | " seven | \n",
267 | " 9.0 | \n",
268 | " 6 | \n",
269 | " 70000 | \n",
270 | "
\n",
271 | " \n",
272 | " | 5 | \n",
273 | " three | \n",
274 | " 7.0 | \n",
275 | " 10 | \n",
276 | " 62000 | \n",
277 | "
\n",
278 | " \n",
279 | " | 6 | \n",
280 | " ten | \n",
281 | " 7.0 | \n",
282 | " 7 | \n",
283 | " 72000 | \n",
284 | "
\n",
285 | " \n",
286 | " | 7 | \n",
287 | " eleven | \n",
288 | " 7.0 | \n",
289 | " 8 | \n",
290 | " 80000 | \n",
291 | "
\n",
292 | " \n",
293 | "
\n",
294 | "
"
295 | ],
296 | "text/plain": [
297 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n",
298 | "0 NaN 8.0 9 50000\n",
299 | "1 NaN 8.0 6 45000\n",
300 | "2 five 6.0 7 60000\n",
301 | "3 two 10.0 10 65000\n",
302 | "4 seven 9.0 6 70000\n",
303 | "5 three 7.0 10 62000\n",
304 | "6 ten 7.0 7 72000\n",
305 | "7 eleven 7.0 8 80000"
306 | ]
307 | },
308 | "execution_count": 37,
309 | "metadata": {},
310 | "output_type": "execute_result"
311 | }
312 | ],
313 | "source": [
314 | "df['test_score(out of 10)'] = df['test_score(out of 10)'].fillna(median_test_score)\n",
315 | "df"
316 | ]
317 | },
318 | {
319 | "cell_type": "markdown",
320 | "id": "212400f7-1493-470f-93b8-67a059b33e77",
321 | "metadata": {},
322 | "source": [
323 | "#### importing word2number module"
324 | ]
325 | },
326 | {
327 | "cell_type": "code",
328 | "execution_count": 7,
329 | "id": "f7f74731-45ee-463e-9833-13e566869c44",
330 | "metadata": {},
331 | "outputs": [],
332 | "source": [
333 | "from word2number import w2n"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 38,
339 | "id": "5f29bebb-597f-4b43-a2d2-77a90827e617",
340 | "metadata": {},
341 | "outputs": [
342 | {
343 | "data": {
344 | "text/html": [
345 | "\n",
346 | "\n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " | \n",
363 | " experience | \n",
364 | " test_score(out of 10) | \n",
365 | " interview_score(out of 10) | \n",
366 | " salary($) | \n",
367 | "
\n",
368 | " \n",
369 | " \n",
370 | " \n",
371 | " | 0 | \n",
372 | " zero | \n",
373 | " 8.0 | \n",
374 | " 9 | \n",
375 | " 50000 | \n",
376 | "
\n",
377 | " \n",
378 | " | 1 | \n",
379 | " zero | \n",
380 | " 8.0 | \n",
381 | " 6 | \n",
382 | " 45000 | \n",
383 | "
\n",
384 | " \n",
385 | " | 2 | \n",
386 | " five | \n",
387 | " 6.0 | \n",
388 | " 7 | \n",
389 | " 60000 | \n",
390 | "
\n",
391 | " \n",
392 | " | 3 | \n",
393 | " two | \n",
394 | " 10.0 | \n",
395 | " 10 | \n",
396 | " 65000 | \n",
397 | "
\n",
398 | " \n",
399 | " | 4 | \n",
400 | " seven | \n",
401 | " 9.0 | \n",
402 | " 6 | \n",
403 | " 70000 | \n",
404 | "
\n",
405 | " \n",
406 | " | 5 | \n",
407 | " three | \n",
408 | " 7.0 | \n",
409 | " 10 | \n",
410 | " 62000 | \n",
411 | "
\n",
412 | " \n",
413 | " | 6 | \n",
414 | " ten | \n",
415 | " 7.0 | \n",
416 | " 7 | \n",
417 | " 72000 | \n",
418 | "
\n",
419 | " \n",
420 | " | 7 | \n",
421 | " eleven | \n",
422 | " 7.0 | \n",
423 | " 8 | \n",
424 | " 80000 | \n",
425 | "
\n",
426 | " \n",
427 | "
\n",
428 | "
"
429 | ],
430 | "text/plain": [
431 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n",
432 | "0 zero 8.0 9 50000\n",
433 | "1 zero 8.0 6 45000\n",
434 | "2 five 6.0 7 60000\n",
435 | "3 two 10.0 10 65000\n",
436 | "4 seven 9.0 6 70000\n",
437 | "5 three 7.0 10 62000\n",
438 | "6 ten 7.0 7 72000\n",
439 | "7 eleven 7.0 8 80000"
440 | ]
441 | },
442 | "execution_count": 38,
443 | "metadata": {},
444 | "output_type": "execute_result"
445 | }
446 | ],
447 | "source": [
448 | "df.experience = df.experience.fillna('zero')\n",
449 | "df"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 39,
455 | "id": "22a6e7a3-61cf-45ec-a066-dfb5625d3f6a",
456 | "metadata": {},
457 | "outputs": [
458 | {
459 | "data": {
460 | "text/html": [
461 | "\n",
462 | "\n",
475 | "
\n",
476 | " \n",
477 | " \n",
478 | " | \n",
479 | " experience | \n",
480 | " test_score(out of 10) | \n",
481 | " interview_score(out of 10) | \n",
482 | " salary($) | \n",
483 | "
\n",
484 | " \n",
485 | " \n",
486 | " \n",
487 | " | 0 | \n",
488 | " 0 | \n",
489 | " 8.0 | \n",
490 | " 9 | \n",
491 | " 50000 | \n",
492 | "
\n",
493 | " \n",
494 | " | 1 | \n",
495 | " 0 | \n",
496 | " 8.0 | \n",
497 | " 6 | \n",
498 | " 45000 | \n",
499 | "
\n",
500 | " \n",
501 | " | 2 | \n",
502 | " 5 | \n",
503 | " 6.0 | \n",
504 | " 7 | \n",
505 | " 60000 | \n",
506 | "
\n",
507 | " \n",
508 | " | 3 | \n",
509 | " 2 | \n",
510 | " 10.0 | \n",
511 | " 10 | \n",
512 | " 65000 | \n",
513 | "
\n",
514 | " \n",
515 | " | 4 | \n",
516 | " 7 | \n",
517 | " 9.0 | \n",
518 | " 6 | \n",
519 | " 70000 | \n",
520 | "
\n",
521 | " \n",
522 | " | 5 | \n",
523 | " 3 | \n",
524 | " 7.0 | \n",
525 | " 10 | \n",
526 | " 62000 | \n",
527 | "
\n",
528 | " \n",
529 | " | 6 | \n",
530 | " 10 | \n",
531 | " 7.0 | \n",
532 | " 7 | \n",
533 | " 72000 | \n",
534 | "
\n",
535 | " \n",
536 | " | 7 | \n",
537 | " 11 | \n",
538 | " 7.0 | \n",
539 | " 8 | \n",
540 | " 80000 | \n",
541 | "
\n",
542 | " \n",
543 | "
\n",
544 | "
"
545 | ],
546 | "text/plain": [
547 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n",
548 | "0 0 8.0 9 50000\n",
549 | "1 0 8.0 6 45000\n",
550 | "2 5 6.0 7 60000\n",
551 | "3 2 10.0 10 65000\n",
552 | "4 7 9.0 6 70000\n",
553 | "5 3 7.0 10 62000\n",
554 | "6 10 7.0 7 72000\n",
555 | "7 11 7.0 8 80000"
556 | ]
557 | },
558 | "execution_count": 39,
559 | "metadata": {},
560 | "output_type": "execute_result"
561 | }
562 | ],
563 | "source": [
564 | "df.experience = df.experience.apply(w2n.word_to_num)\n",
565 | "df"
566 | ]
567 | },
568 | {
569 | "cell_type": "markdown",
570 | "id": "da085474-684c-460b-9786-72b77802706e",
571 | "metadata": {},
572 | "source": [
573 | "#### define a mapping of number words to numbers. This can be done using word2number module."
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": 12,
579 | "id": "b7a45514-9abf-4a71-8d28-7e1b5c3cd7ca",
580 | "metadata": {},
581 | "outputs": [],
582 | "source": [
583 | "word_to_number = {\n",
584 | " 'zero': 0, 'one': 1, 'two': 2, 'three': 3, 'four': 4,\n",
585 | " 'five': 5, 'six': 6, 'seven': 7, 'eight': 8, 'nine': 9, 'ten': 10,\n",
586 | " 'eleven': 11, 'twelve': 12, 'thirteen': 13, 'fourteen': 14, 'fifteen': 15\n",
587 | "}"
588 | ]
589 | },
590 | {
591 | "cell_type": "code",
592 | "execution_count": 15,
593 | "id": "f0890063-dfd3-4abe-9772-f15a64d443b1",
594 | "metadata": {
595 | "collapsed": true,
596 | "jupyter": {
597 | "outputs_hidden": true
598 | }
599 | },
600 | "outputs": [
601 | {
602 | "data": {
603 | "text/html": [
604 | "\n",
605 | "\n",
618 | "
\n",
619 | " \n",
620 | " \n",
621 | " | \n",
622 | " experience | \n",
623 | " test_score(out of 10) | \n",
624 | " interview_score(out of 10) | \n",
625 | " salary($) | \n",
626 | "
\n",
627 | " \n",
628 | " \n",
629 | " \n",
630 | " | 0 | \n",
631 | " NaN | \n",
632 | " 8.0 | \n",
633 | " 9 | \n",
634 | " 50000 | \n",
635 | "
\n",
636 | " \n",
637 | " | 1 | \n",
638 | " NaN | \n",
639 | " 8.0 | \n",
640 | " 6 | \n",
641 | " 45000 | \n",
642 | "
\n",
643 | " \n",
644 | " | 2 | \n",
645 | " 5.0 | \n",
646 | " 6.0 | \n",
647 | " 7 | \n",
648 | " 60000 | \n",
649 | "
\n",
650 | " \n",
651 | " | 3 | \n",
652 | " 2.0 | \n",
653 | " 10.0 | \n",
654 | " 10 | \n",
655 | " 65000 | \n",
656 | "
\n",
657 | " \n",
658 | " | 4 | \n",
659 | " 7.0 | \n",
660 | " 9.0 | \n",
661 | " 6 | \n",
662 | " 70000 | \n",
663 | "
\n",
664 | " \n",
665 | " | 5 | \n",
666 | " 3.0 | \n",
667 | " 7.0 | \n",
668 | " 10 | \n",
669 | " 62000 | \n",
670 | "
\n",
671 | " \n",
672 | " | 6 | \n",
673 | " 10.0 | \n",
674 | " 8.0 | \n",
675 | " 7 | \n",
676 | " 72000 | \n",
677 | "
\n",
678 | " \n",
679 | " | 7 | \n",
680 | " 11.0 | \n",
681 | " 7.0 | \n",
682 | " 8 | \n",
683 | " 80000 | \n",
684 | "
\n",
685 | " \n",
686 | "
\n",
687 | "
"
688 | ],
689 | "text/plain": [
690 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n",
691 | "0 NaN 8.0 9 50000\n",
692 | "1 NaN 8.0 6 45000\n",
693 | "2 5.0 6.0 7 60000\n",
694 | "3 2.0 10.0 10 65000\n",
695 | "4 7.0 9.0 6 70000\n",
696 | "5 3.0 7.0 10 62000\n",
697 | "6 10.0 8.0 7 72000\n",
698 | "7 11.0 7.0 8 80000"
699 | ]
700 | },
701 | "execution_count": 15,
702 | "metadata": {},
703 | "output_type": "execute_result"
704 | }
705 | ],
706 | "source": [
707 | "df.experience = df['experience'].map(word_to_number)\n",
708 | "df"
709 | ]
710 | },
711 | {
712 | "cell_type": "markdown",
713 | "id": "2ddbf835-5a78-47e0-9fd6-6372530d428d",
714 | "metadata": {},
715 | "source": [
716 | "## creating linear regression object / model"
717 | ]
718 | },
719 | {
720 | "cell_type": "code",
721 | "execution_count": 40,
722 | "id": "c9b8e084-a491-47ba-bc70-192f0c573ae5",
723 | "metadata": {},
724 | "outputs": [
725 | {
726 | "data": {
727 | "text/html": [
728 | "LinearRegression()
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org. "
729 | ],
730 | "text/plain": [
731 | "LinearRegression()"
732 | ]
733 | },
734 | "execution_count": 40,
735 | "metadata": {},
736 | "output_type": "execute_result"
737 | }
738 | ],
739 | "source": [
740 | "model = linear_model.LinearRegression()\n",
741 | "model.fit(df[['experience','test_score(out of 10)','interview_score(out of 10)']], df['salary($)'])"
742 | ]
743 | },
744 | {
745 | "cell_type": "code",
746 | "execution_count": 41,
747 | "id": "2e2fcd0e-76f3-4671-8b4d-515e7964dc3f",
748 | "metadata": {},
749 | "outputs": [
750 | {
751 | "name": "stderr",
752 | "output_type": "stream",
753 | "text": [
754 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
755 | " warnings.warn(\n"
756 | ]
757 | },
758 | {
759 | "data": {
760 | "text/plain": [
761 | "array([53713.86677124])"
762 | ]
763 | },
764 | "execution_count": 41,
765 | "metadata": {},
766 | "output_type": "execute_result"
767 | }
768 | ],
769 | "source": [
770 | "model.predict([[2,9,6]])"
771 | ]
772 | },
773 | {
774 | "cell_type": "code",
775 | "execution_count": 42,
776 | "id": "70eeccb6-a445-4a62-a918-481733e86b80",
777 | "metadata": {},
778 | "outputs": [
779 | {
780 | "name": "stderr",
781 | "output_type": "stream",
782 | "text": [
783 | "/opt/homebrew/lib/python3.11/site-packages/sklearn/base.py:464: UserWarning: X does not have valid feature names, but LinearRegression was fitted with feature names\n",
784 | " warnings.warn(\n"
785 | ]
786 | },
787 | {
788 | "data": {
789 | "text/plain": [
790 | "array([93747.79628651])"
791 | ]
792 | },
793 | "execution_count": 42,
794 | "metadata": {},
795 | "output_type": "execute_result"
796 | }
797 | ],
798 | "source": [
799 | "model.predict([[12,10,10]])"
800 | ]
801 | },
802 | {
803 | "cell_type": "code",
804 | "execution_count": 31,
805 | "id": "088bb049-859c-4618-93cd-8a72da2eb482",
806 | "metadata": {},
807 | "outputs": [
808 | {
809 | "data": {
810 | "text/html": [
811 | "\n",
812 | "\n",
825 | "
\n",
826 | " \n",
827 | " \n",
828 | " | \n",
829 | " experience | \n",
830 | " test_score(out of 10) | \n",
831 | " interview_score(out of 10) | \n",
832 | " salary($) | \n",
833 | "
\n",
834 | " \n",
835 | " \n",
836 | " \n",
837 | " | 0 | \n",
838 | " 6.0 | \n",
839 | " 8.0 | \n",
840 | " 9 | \n",
841 | " 50000 | \n",
842 | "
\n",
843 | " \n",
844 | " | 1 | \n",
845 | " 6.0 | \n",
846 | " 8.0 | \n",
847 | " 6 | \n",
848 | " 45000 | \n",
849 | "
\n",
850 | " \n",
851 | " | 2 | \n",
852 | " 5.0 | \n",
853 | " 6.0 | \n",
854 | " 7 | \n",
855 | " 60000 | \n",
856 | "
\n",
857 | " \n",
858 | " | 3 | \n",
859 | " 2.0 | \n",
860 | " 10.0 | \n",
861 | " 10 | \n",
862 | " 65000 | \n",
863 | "
\n",
864 | " \n",
865 | " | 4 | \n",
866 | " 7.0 | \n",
867 | " 9.0 | \n",
868 | " 6 | \n",
869 | " 70000 | \n",
870 | "
\n",
871 | " \n",
872 | " | 5 | \n",
873 | " 3.0 | \n",
874 | " 7.0 | \n",
875 | " 10 | \n",
876 | " 62000 | \n",
877 | "
\n",
878 | " \n",
879 | " | 6 | \n",
880 | " 10.0 | \n",
881 | " 8.0 | \n",
882 | " 7 | \n",
883 | " 72000 | \n",
884 | "
\n",
885 | " \n",
886 | " | 7 | \n",
887 | " 11.0 | \n",
888 | " 7.0 | \n",
889 | " 8 | \n",
890 | " 80000 | \n",
891 | "
\n",
892 | " \n",
893 | "
\n",
894 | "
"
895 | ],
896 | "text/plain": [
897 | " experience test_score(out of 10) interview_score(out of 10) salary($)\n",
898 | "0 6.0 8.0 9 50000\n",
899 | "1 6.0 8.0 6 45000\n",
900 | "2 5.0 6.0 7 60000\n",
901 | "3 2.0 10.0 10 65000\n",
902 | "4 7.0 9.0 6 70000\n",
903 | "5 3.0 7.0 10 62000\n",
904 | "6 10.0 8.0 7 72000\n",
905 | "7 11.0 7.0 8 80000"
906 | ]
907 | },
908 | "execution_count": 31,
909 | "metadata": {},
910 | "output_type": "execute_result"
911 | }
912 | ],
913 | "source": [
914 | "df.experience = df.experience.fillna(df.experience.median())\n",
915 | "df"
916 | ]
917 | }
918 | ],
919 | "metadata": {
920 | "kernelspec": {
921 | "display_name": "Python 3 (ipykernel)",
922 | "language": "python",
923 | "name": "python3"
924 | },
925 | "language_info": {
926 | "codemirror_mode": {
927 | "name": "ipython",
928 | "version": 3
929 | },
930 | "file_extension": ".py",
931 | "mimetype": "text/x-python",
932 | "name": "python",
933 | "nbconvert_exporter": "python",
934 | "pygments_lexer": "ipython3",
935 | "version": "3.11.4"
936 | }
937 | },
938 | "nbformat": 4,
939 | "nbformat_minor": 5
940 | }
941 |
--------------------------------------------------------------------------------