├── Multiple Linear Regression in Python
├── init
└── Housing+Case+Study+using+RFE (2).ipynb
├── Industry Relevance of Linear Regression
└── init
├── Simple Linear Regression in Python
└── init
└── README.md
/Multiple Linear Regression in Python/init:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/Industry Relevance of Linear Regression/init:
--------------------------------------------------------------------------------
1 | Initialise
2 |
--------------------------------------------------------------------------------
/Simple Linear Regression in Python/init:
--------------------------------------------------------------------------------
1 | initialising folder
2 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Welcome to Linear Regression Module
2 |
3 | ## TOC:
4 | - How to download files?
5 | - What is where?
6 |
7 | ### How to download files?
8 | 
9 |
10 | Click on Code button and then click on Download ZIP
11 | OR
12 | Use `git clone https://github.com/ContentUpgrad/Linear-Regression.git` command on your terminal if git is installed in your machine.
13 |
14 |
15 | ### What is where?
16 | The folder structure is given below:
17 |
18 | 
19 |
20 |
21 | As you can see there are three main folders when you log in:
22 |
23 | 1. **Industry Relevance of Linear Regression** This is where all the code files regarding Industry Relevance of Linear Regression sessions are kept
24 | 2. **Multiple Linear Regression in Python** This is where all the code files regarding Multiple Linear Regression in Python session are kept
25 | 3. **Simple Linear Regression in Python**This is where all the code files regarding Simple Linear Regression in Python session are kept
26 |
27 | When you click on any folder you will find the code and data folders as shown below:
28 | 
29 |
30 | You will find all the code files of the session in code folder and data folder will be empty. Please note that you need to follow the instructions given in the segment for downloading data files and keep it in the data folder manually.
31 |
32 | #### Industry Relevance of Linear Regression
33 | You will find the following files in the code folder of Industry Relevance of Linear Regression
34 | 
35 |
36 |
37 | #### Multiple Linear Regression in Python
38 | You will find the following files in the code folder of Multiple Linear Regression in Python
39 | 
40 |
41 |
42 | #### Simple Linear Regression in Python
43 | You will find the following files in the code folder of Simple Linear Regression in Python
44 | 
45 |
46 |
--------------------------------------------------------------------------------
/Multiple Linear Regression in Python/Housing+Case+Study+using+RFE (2).ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "## Model Selection using RFE (Housing Case Study)"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "### Importing and Understanding Data"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 1,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "# Supress Warnings\n",
24 | "\n",
25 | "import warnings\n",
26 | "warnings.filterwarnings('ignore')"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": null,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "\n",
36 | "# # Recommended versions\n",
37 | "# numpy \t1.26.4\n",
38 | "# pandas\t2.2.2\n",
39 | "# matplotlib\t3.7.1\n",
40 | "# seaborn\t0.10.0\n",
41 | "# statsmodels\t0.14.4\n",
42 | "# sklearn\t1.5.2"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 2,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "import pandas as pd\n",
52 | "import numpy as np"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 3,
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "# Importing Housing.csv\n",
62 | "housing = pd.read_csv('Housing.csv')"
63 | ]
64 | },
65 | {
66 | "cell_type": "code",
67 | "execution_count": 4,
68 | "metadata": {},
69 | "outputs": [
70 | {
71 | "data": {
72 | "text/html": [
73 | "
\n",
74 | "\n",
87 | "
\n",
88 | " \n",
89 | " \n",
90 | " \n",
91 | " price \n",
92 | " area \n",
93 | " bedrooms \n",
94 | " bathrooms \n",
95 | " stories \n",
96 | " mainroad \n",
97 | " guestroom \n",
98 | " basement \n",
99 | " hotwaterheating \n",
100 | " airconditioning \n",
101 | " parking \n",
102 | " prefarea \n",
103 | " furnishingstatus \n",
104 | " \n",
105 | " \n",
106 | " \n",
107 | " \n",
108 | " 0 \n",
109 | " 13300000 \n",
110 | " 7420 \n",
111 | " 4 \n",
112 | " 2 \n",
113 | " 3 \n",
114 | " yes \n",
115 | " no \n",
116 | " no \n",
117 | " no \n",
118 | " yes \n",
119 | " 2 \n",
120 | " yes \n",
121 | " furnished \n",
122 | " \n",
123 | " \n",
124 | " 1 \n",
125 | " 12250000 \n",
126 | " 8960 \n",
127 | " 4 \n",
128 | " 4 \n",
129 | " 4 \n",
130 | " yes \n",
131 | " no \n",
132 | " no \n",
133 | " no \n",
134 | " yes \n",
135 | " 3 \n",
136 | " no \n",
137 | " furnished \n",
138 | " \n",
139 | " \n",
140 | " 2 \n",
141 | " 12250000 \n",
142 | " 9960 \n",
143 | " 3 \n",
144 | " 2 \n",
145 | " 2 \n",
146 | " yes \n",
147 | " no \n",
148 | " yes \n",
149 | " no \n",
150 | " no \n",
151 | " 2 \n",
152 | " yes \n",
153 | " semi-furnished \n",
154 | " \n",
155 | " \n",
156 | " 3 \n",
157 | " 12215000 \n",
158 | " 7500 \n",
159 | " 4 \n",
160 | " 2 \n",
161 | " 2 \n",
162 | " yes \n",
163 | " no \n",
164 | " yes \n",
165 | " no \n",
166 | " yes \n",
167 | " 3 \n",
168 | " yes \n",
169 | " furnished \n",
170 | " \n",
171 | " \n",
172 | " 4 \n",
173 | " 11410000 \n",
174 | " 7420 \n",
175 | " 4 \n",
176 | " 1 \n",
177 | " 2 \n",
178 | " yes \n",
179 | " yes \n",
180 | " yes \n",
181 | " no \n",
182 | " yes \n",
183 | " 2 \n",
184 | " no \n",
185 | " furnished \n",
186 | " \n",
187 | " \n",
188 | "
\n",
189 | "
"
190 | ],
191 | "text/plain": [
192 | " price area bedrooms bathrooms stories mainroad guestroom basement \\\n",
193 | "0 13300000 7420 4 2 3 yes no no \n",
194 | "1 12250000 8960 4 4 4 yes no no \n",
195 | "2 12250000 9960 3 2 2 yes no yes \n",
196 | "3 12215000 7500 4 2 2 yes no yes \n",
197 | "4 11410000 7420 4 1 2 yes yes yes \n",
198 | "\n",
199 | " hotwaterheating airconditioning parking prefarea furnishingstatus \n",
200 | "0 no yes 2 yes furnished \n",
201 | "1 no yes 3 no furnished \n",
202 | "2 no no 2 yes semi-furnished \n",
203 | "3 no yes 3 yes furnished \n",
204 | "4 no yes 2 no furnished "
205 | ]
206 | },
207 | "execution_count": 4,
208 | "metadata": {},
209 | "output_type": "execute_result"
210 | }
211 | ],
212 | "source": [
213 | "# Looking at the first five rows\n",
214 | "housing.head()"
215 | ]
216 | },
217 | {
218 | "cell_type": "markdown",
219 | "metadata": {},
220 | "source": [
221 | "### Data Preparation"
222 | ]
223 | },
224 | {
225 | "cell_type": "code",
226 | "execution_count": 5,
227 | "metadata": {},
228 | "outputs": [],
229 | "source": [
230 | "# List of variables to map\n",
231 | "\n",
232 | "varlist = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea']\n",
233 | "\n",
234 | "# Defining the map function\n",
235 | "def binary_map(x):\n",
236 | " return x.map({'yes': 1, \"no\": 0})\n",
237 | "\n",
238 | "# Applying the function to the housing list\n",
239 | "housing[varlist] = housing[varlist].apply(binary_map)"
240 | ]
241 | },
242 | {
243 | "cell_type": "code",
244 | "execution_count": 6,
245 | "metadata": {
246 | "scrolled": false
247 | },
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/html": [
252 | "\n",
253 | "\n",
266 | "
\n",
267 | " \n",
268 | " \n",
269 | " \n",
270 | " price \n",
271 | " area \n",
272 | " bedrooms \n",
273 | " bathrooms \n",
274 | " stories \n",
275 | " mainroad \n",
276 | " guestroom \n",
277 | " basement \n",
278 | " hotwaterheating \n",
279 | " airconditioning \n",
280 | " parking \n",
281 | " prefarea \n",
282 | " furnishingstatus \n",
283 | " \n",
284 | " \n",
285 | " \n",
286 | " \n",
287 | " 0 \n",
288 | " 13300000 \n",
289 | " 7420 \n",
290 | " 4 \n",
291 | " 2 \n",
292 | " 3 \n",
293 | " 1 \n",
294 | " 0 \n",
295 | " 0 \n",
296 | " 0 \n",
297 | " 1 \n",
298 | " 2 \n",
299 | " 1 \n",
300 | " furnished \n",
301 | " \n",
302 | " \n",
303 | " 1 \n",
304 | " 12250000 \n",
305 | " 8960 \n",
306 | " 4 \n",
307 | " 4 \n",
308 | " 4 \n",
309 | " 1 \n",
310 | " 0 \n",
311 | " 0 \n",
312 | " 0 \n",
313 | " 1 \n",
314 | " 3 \n",
315 | " 0 \n",
316 | " furnished \n",
317 | " \n",
318 | " \n",
319 | " 2 \n",
320 | " 12250000 \n",
321 | " 9960 \n",
322 | " 3 \n",
323 | " 2 \n",
324 | " 2 \n",
325 | " 1 \n",
326 | " 0 \n",
327 | " 1 \n",
328 | " 0 \n",
329 | " 0 \n",
330 | " 2 \n",
331 | " 1 \n",
332 | " semi-furnished \n",
333 | " \n",
334 | " \n",
335 | " 3 \n",
336 | " 12215000 \n",
337 | " 7500 \n",
338 | " 4 \n",
339 | " 2 \n",
340 | " 2 \n",
341 | " 1 \n",
342 | " 0 \n",
343 | " 1 \n",
344 | " 0 \n",
345 | " 1 \n",
346 | " 3 \n",
347 | " 1 \n",
348 | " furnished \n",
349 | " \n",
350 | " \n",
351 | " 4 \n",
352 | " 11410000 \n",
353 | " 7420 \n",
354 | " 4 \n",
355 | " 1 \n",
356 | " 2 \n",
357 | " 1 \n",
358 | " 1 \n",
359 | " 1 \n",
360 | " 0 \n",
361 | " 1 \n",
362 | " 2 \n",
363 | " 0 \n",
364 | " furnished \n",
365 | " \n",
366 | " \n",
367 | "
\n",
368 | "
"
369 | ],
370 | "text/plain": [
371 | " price area bedrooms bathrooms stories mainroad guestroom \\\n",
372 | "0 13300000 7420 4 2 3 1 0 \n",
373 | "1 12250000 8960 4 4 4 1 0 \n",
374 | "2 12250000 9960 3 2 2 1 0 \n",
375 | "3 12215000 7500 4 2 2 1 0 \n",
376 | "4 11410000 7420 4 1 2 1 1 \n",
377 | "\n",
378 | " basement hotwaterheating airconditioning parking prefarea \\\n",
379 | "0 0 0 1 2 1 \n",
380 | "1 0 0 1 3 0 \n",
381 | "2 1 0 0 2 1 \n",
382 | "3 1 0 1 3 1 \n",
383 | "4 1 0 1 2 0 \n",
384 | "\n",
385 | " furnishingstatus \n",
386 | "0 furnished \n",
387 | "1 furnished \n",
388 | "2 semi-furnished \n",
389 | "3 furnished \n",
390 | "4 furnished "
391 | ]
392 | },
393 | "execution_count": 6,
394 | "metadata": {},
395 | "output_type": "execute_result"
396 | }
397 | ],
398 | "source": [
399 | "# Check the housing dataframe now\n",
400 | "\n",
401 | "housing.head()"
402 | ]
403 | },
404 | {
405 | "cell_type": "markdown",
406 | "metadata": {},
407 | "source": [
408 | "### Dummy Variables"
409 | ]
410 | },
411 | {
412 | "cell_type": "markdown",
413 | "metadata": {},
414 | "source": [
415 | "The variable `furnishingstatus` has three levels. We need to convert these levels into integer as well. For this, we will use something called `dummy variables`."
416 | ]
417 | },
418 | {
419 | "cell_type": "code",
420 | "execution_count": 7,
421 | "metadata": {},
422 | "outputs": [
423 | {
424 | "data": {
425 | "text/html": [
426 | "\n",
427 | "\n",
440 | "
\n",
441 | " \n",
442 | " \n",
443 | " \n",
444 | " furnished \n",
445 | " semi-furnished \n",
446 | " unfurnished \n",
447 | " \n",
448 | " \n",
449 | " \n",
450 | " \n",
451 | " 0 \n",
452 | " 1 \n",
453 | " 0 \n",
454 | " 0 \n",
455 | " \n",
456 | " \n",
457 | " 1 \n",
458 | " 1 \n",
459 | " 0 \n",
460 | " 0 \n",
461 | " \n",
462 | " \n",
463 | " 2 \n",
464 | " 0 \n",
465 | " 1 \n",
466 | " 0 \n",
467 | " \n",
468 | " \n",
469 | " 3 \n",
470 | " 1 \n",
471 | " 0 \n",
472 | " 0 \n",
473 | " \n",
474 | " \n",
475 | " 4 \n",
476 | " 1 \n",
477 | " 0 \n",
478 | " 0 \n",
479 | " \n",
480 | " \n",
481 | "
\n",
482 | "
"
483 | ],
484 | "text/plain": [
485 | " furnished semi-furnished unfurnished\n",
486 | "0 1 0 0\n",
487 | "1 1 0 0\n",
488 | "2 0 1 0\n",
489 | "3 1 0 0\n",
490 | "4 1 0 0"
491 | ]
492 | },
493 | "execution_count": 7,
494 | "metadata": {},
495 | "output_type": "execute_result"
496 | }
497 | ],
498 | "source": [
499 | "# Get the dummy variables for the feature 'furnishingstatus' and store it in a new variable - 'status'\n",
500 | "\n",
501 | "status = pd.get_dummies(housing['furnishingstatus'])\n",
502 | "\n",
503 | "# Check what the dataset 'status' looks like\n",
504 | "status.head()"
505 | ]
506 | },
507 | {
508 | "cell_type": "markdown",
509 | "metadata": {},
510 | "source": [
511 | "Now, you don't need three columns. You can drop the `furnished` column, as the type of furnishing can be identified with just the last two columns where — \n",
512 | "- `00` will correspond to `furnished`\n",
513 | "- `01` will correspond to `unfurnished`\n",
514 | "- `10` will correspond to `semi-furnished`"
515 | ]
516 | },
517 | {
518 | "cell_type": "code",
519 | "execution_count": 8,
520 | "metadata": {},
521 | "outputs": [
522 | {
523 | "data": {
524 | "text/html": [
525 | "\n",
526 | "\n",
539 | "
\n",
540 | " \n",
541 | " \n",
542 | " \n",
543 | " price \n",
544 | " area \n",
545 | " bedrooms \n",
546 | " bathrooms \n",
547 | " stories \n",
548 | " mainroad \n",
549 | " guestroom \n",
550 | " basement \n",
551 | " hotwaterheating \n",
552 | " airconditioning \n",
553 | " parking \n",
554 | " prefarea \n",
555 | " furnishingstatus \n",
556 | " semi-furnished \n",
557 | " unfurnished \n",
558 | " \n",
559 | " \n",
560 | " \n",
561 | " \n",
562 | " 0 \n",
563 | " 13300000 \n",
564 | " 7420 \n",
565 | " 4 \n",
566 | " 2 \n",
567 | " 3 \n",
568 | " 1 \n",
569 | " 0 \n",
570 | " 0 \n",
571 | " 0 \n",
572 | " 1 \n",
573 | " 2 \n",
574 | " 1 \n",
575 | " furnished \n",
576 | " 0 \n",
577 | " 0 \n",
578 | " \n",
579 | " \n",
580 | " 1 \n",
581 | " 12250000 \n",
582 | " 8960 \n",
583 | " 4 \n",
584 | " 4 \n",
585 | " 4 \n",
586 | " 1 \n",
587 | " 0 \n",
588 | " 0 \n",
589 | " 0 \n",
590 | " 1 \n",
591 | " 3 \n",
592 | " 0 \n",
593 | " furnished \n",
594 | " 0 \n",
595 | " 0 \n",
596 | " \n",
597 | " \n",
598 | " 2 \n",
599 | " 12250000 \n",
600 | " 9960 \n",
601 | " 3 \n",
602 | " 2 \n",
603 | " 2 \n",
604 | " 1 \n",
605 | " 0 \n",
606 | " 1 \n",
607 | " 0 \n",
608 | " 0 \n",
609 | " 2 \n",
610 | " 1 \n",
611 | " semi-furnished \n",
612 | " 1 \n",
613 | " 0 \n",
614 | " \n",
615 | " \n",
616 | " 3 \n",
617 | " 12215000 \n",
618 | " 7500 \n",
619 | " 4 \n",
620 | " 2 \n",
621 | " 2 \n",
622 | " 1 \n",
623 | " 0 \n",
624 | " 1 \n",
625 | " 0 \n",
626 | " 1 \n",
627 | " 3 \n",
628 | " 1 \n",
629 | " furnished \n",
630 | " 0 \n",
631 | " 0 \n",
632 | " \n",
633 | " \n",
634 | " 4 \n",
635 | " 11410000 \n",
636 | " 7420 \n",
637 | " 4 \n",
638 | " 1 \n",
639 | " 2 \n",
640 | " 1 \n",
641 | " 1 \n",
642 | " 1 \n",
643 | " 0 \n",
644 | " 1 \n",
645 | " 2 \n",
646 | " 0 \n",
647 | " furnished \n",
648 | " 0 \n",
649 | " 0 \n",
650 | " \n",
651 | " \n",
652 | "
\n",
653 | "
"
654 | ],
655 | "text/plain": [
656 | " price area bedrooms bathrooms stories mainroad guestroom \\\n",
657 | "0 13300000 7420 4 2 3 1 0 \n",
658 | "1 12250000 8960 4 4 4 1 0 \n",
659 | "2 12250000 9960 3 2 2 1 0 \n",
660 | "3 12215000 7500 4 2 2 1 0 \n",
661 | "4 11410000 7420 4 1 2 1 1 \n",
662 | "\n",
663 | " basement hotwaterheating airconditioning parking prefarea \\\n",
664 | "0 0 0 1 2 1 \n",
665 | "1 0 0 1 3 0 \n",
666 | "2 1 0 0 2 1 \n",
667 | "3 1 0 1 3 1 \n",
668 | "4 1 0 1 2 0 \n",
669 | "\n",
670 | " furnishingstatus semi-furnished unfurnished \n",
671 | "0 furnished 0 0 \n",
672 | "1 furnished 0 0 \n",
673 | "2 semi-furnished 1 0 \n",
674 | "3 furnished 0 0 \n",
675 | "4 furnished 0 0 "
676 | ]
677 | },
678 | "execution_count": 8,
679 | "metadata": {},
680 | "output_type": "execute_result"
681 | }
682 | ],
683 | "source": [
684 | "# Let's drop the first column from status df using 'drop_first = True'\n",
685 | "status = pd.get_dummies(housing['furnishingstatus'], drop_first = True)\n",
686 | "\n",
687 | "# Add the results to the original housing dataframe\n",
688 | "housing = pd.concat([housing, status], axis = 1)\n",
689 | "\n",
690 | "# Now let's see the head of our dataframe.\n",
691 | "housing.head()"
692 | ]
693 | },
694 | {
695 | "cell_type": "code",
696 | "execution_count": 9,
697 | "metadata": {},
698 | "outputs": [
699 | {
700 | "data": {
701 | "text/html": [
702 | "\n",
703 | "\n",
716 | "
\n",
717 | " \n",
718 | " \n",
719 | " \n",
720 | " price \n",
721 | " area \n",
722 | " bedrooms \n",
723 | " bathrooms \n",
724 | " stories \n",
725 | " mainroad \n",
726 | " guestroom \n",
727 | " basement \n",
728 | " hotwaterheating \n",
729 | " airconditioning \n",
730 | " parking \n",
731 | " prefarea \n",
732 | " semi-furnished \n",
733 | " unfurnished \n",
734 | " \n",
735 | " \n",
736 | " \n",
737 | " \n",
738 | " 0 \n",
739 | " 13300000 \n",
740 | " 7420 \n",
741 | " 4 \n",
742 | " 2 \n",
743 | " 3 \n",
744 | " 1 \n",
745 | " 0 \n",
746 | " 0 \n",
747 | " 0 \n",
748 | " 1 \n",
749 | " 2 \n",
750 | " 1 \n",
751 | " 0 \n",
752 | " 0 \n",
753 | " \n",
754 | " \n",
755 | " 1 \n",
756 | " 12250000 \n",
757 | " 8960 \n",
758 | " 4 \n",
759 | " 4 \n",
760 | " 4 \n",
761 | " 1 \n",
762 | " 0 \n",
763 | " 0 \n",
764 | " 0 \n",
765 | " 1 \n",
766 | " 3 \n",
767 | " 0 \n",
768 | " 0 \n",
769 | " 0 \n",
770 | " \n",
771 | " \n",
772 | " 2 \n",
773 | " 12250000 \n",
774 | " 9960 \n",
775 | " 3 \n",
776 | " 2 \n",
777 | " 2 \n",
778 | " 1 \n",
779 | " 0 \n",
780 | " 1 \n",
781 | " 0 \n",
782 | " 0 \n",
783 | " 2 \n",
784 | " 1 \n",
785 | " 1 \n",
786 | " 0 \n",
787 | " \n",
788 | " \n",
789 | " 3 \n",
790 | " 12215000 \n",
791 | " 7500 \n",
792 | " 4 \n",
793 | " 2 \n",
794 | " 2 \n",
795 | " 1 \n",
796 | " 0 \n",
797 | " 1 \n",
798 | " 0 \n",
799 | " 1 \n",
800 | " 3 \n",
801 | " 1 \n",
802 | " 0 \n",
803 | " 0 \n",
804 | " \n",
805 | " \n",
806 | " 4 \n",
807 | " 11410000 \n",
808 | " 7420 \n",
809 | " 4 \n",
810 | " 1 \n",
811 | " 2 \n",
812 | " 1 \n",
813 | " 1 \n",
814 | " 1 \n",
815 | " 0 \n",
816 | " 1 \n",
817 | " 2 \n",
818 | " 0 \n",
819 | " 0 \n",
820 | " 0 \n",
821 | " \n",
822 | " \n",
823 | "
\n",
824 | "
"
825 | ],
826 | "text/plain": [
827 | " price area bedrooms bathrooms stories mainroad guestroom \\\n",
828 | "0 13300000 7420 4 2 3 1 0 \n",
829 | "1 12250000 8960 4 4 4 1 0 \n",
830 | "2 12250000 9960 3 2 2 1 0 \n",
831 | "3 12215000 7500 4 2 2 1 0 \n",
832 | "4 11410000 7420 4 1 2 1 1 \n",
833 | "\n",
834 | " basement hotwaterheating airconditioning parking prefarea \\\n",
835 | "0 0 0 1 2 1 \n",
836 | "1 0 0 1 3 0 \n",
837 | "2 1 0 0 2 1 \n",
838 | "3 1 0 1 3 1 \n",
839 | "4 1 0 1 2 0 \n",
840 | "\n",
841 | " semi-furnished unfurnished \n",
842 | "0 0 0 \n",
843 | "1 0 0 \n",
844 | "2 1 0 \n",
845 | "3 0 0 \n",
846 | "4 0 0 "
847 | ]
848 | },
849 | "execution_count": 9,
850 | "metadata": {},
851 | "output_type": "execute_result"
852 | }
853 | ],
854 | "source": [
855 | "# Drop 'furnishingstatus' as we have created the dummies for it\n",
856 | "housing.drop(['furnishingstatus'], axis = 1, inplace = True)\n",
857 | "\n",
858 | "housing.head()"
859 | ]
860 | },
861 | {
862 | "cell_type": "markdown",
863 | "metadata": {},
864 | "source": [
865 | "## Splitting the Data into Training and Testing Sets"
866 | ]
867 | },
868 | {
869 | "cell_type": "code",
870 | "execution_count": 10,
871 | "metadata": {},
872 | "outputs": [],
873 | "source": [
874 | "from sklearn.model_selection import train_test_split\n",
875 | "\n",
876 | "# We specify this so that the train and test data set always have the same rows, respectively\n",
877 | "\n",
878 | "df_train, df_test = train_test_split(housing, train_size = 0.7, test_size = 0.3, random_state = 100)"
879 | ]
880 | },
881 | {
882 | "cell_type": "markdown",
883 | "metadata": {},
884 | "source": [
885 | "### Rescaling the Features \n",
886 | "\n",
887 | "We will use MinMax scaling."
888 | ]
889 | },
890 | {
891 | "cell_type": "code",
892 | "execution_count": 11,
893 | "metadata": {},
894 | "outputs": [],
895 | "source": [
896 | "from sklearn.preprocessing import MinMaxScaler\n",
897 | "scaler = MinMaxScaler()"
898 | ]
899 | },
900 | {
901 | "cell_type": "code",
902 | "execution_count": 12,
903 | "metadata": {},
904 | "outputs": [
905 | {
906 | "data": {
907 | "text/html": [
908 | "\n",
909 | "\n",
922 | "
\n",
923 | " \n",
924 | " \n",
925 | " \n",
926 | " price \n",
927 | " area \n",
928 | " bedrooms \n",
929 | " bathrooms \n",
930 | " stories \n",
931 | " mainroad \n",
932 | " guestroom \n",
933 | " basement \n",
934 | " hotwaterheating \n",
935 | " airconditioning \n",
936 | " parking \n",
937 | " prefarea \n",
938 | " semi-furnished \n",
939 | " unfurnished \n",
940 | " \n",
941 | " \n",
942 | " \n",
943 | " \n",
944 | " 359 \n",
945 | " 0.169697 \n",
946 | " 0.155227 \n",
947 | " 0.4 \n",
948 | " 0.0 \n",
949 | " 0.000000 \n",
950 | " 1 \n",
951 | " 0 \n",
952 | " 0 \n",
953 | " 0 \n",
954 | " 0 \n",
955 | " 0.333333 \n",
956 | " 0 \n",
957 | " 0 \n",
958 | " 1 \n",
959 | " \n",
960 | " \n",
961 | " 19 \n",
962 | " 0.615152 \n",
963 | " 0.403379 \n",
964 | " 0.4 \n",
965 | " 0.5 \n",
966 | " 0.333333 \n",
967 | " 1 \n",
968 | " 0 \n",
969 | " 0 \n",
970 | " 0 \n",
971 | " 1 \n",
972 | " 0.333333 \n",
973 | " 1 \n",
974 | " 1 \n",
975 | " 0 \n",
976 | " \n",
977 | " \n",
978 | " 159 \n",
979 | " 0.321212 \n",
980 | " 0.115628 \n",
981 | " 0.4 \n",
982 | " 0.5 \n",
983 | " 0.000000 \n",
984 | " 1 \n",
985 | " 1 \n",
986 | " 1 \n",
987 | " 0 \n",
988 | " 1 \n",
989 | " 0.000000 \n",
990 | " 0 \n",
991 | " 0 \n",
992 | " 0 \n",
993 | " \n",
994 | " \n",
995 | " 35 \n",
996 | " 0.548133 \n",
997 | " 0.454417 \n",
998 | " 0.4 \n",
999 | " 0.5 \n",
1000 | " 1.000000 \n",
1001 | " 1 \n",
1002 | " 0 \n",
1003 | " 0 \n",
1004 | " 0 \n",
1005 | " 1 \n",
1006 | " 0.666667 \n",
1007 | " 0 \n",
1008 | " 0 \n",
1009 | " 0 \n",
1010 | " \n",
1011 | " \n",
1012 | " 28 \n",
1013 | " 0.575758 \n",
1014 | " 0.538015 \n",
1015 | " 0.8 \n",
1016 | " 0.5 \n",
1017 | " 0.333333 \n",
1018 | " 1 \n",
1019 | " 0 \n",
1020 | " 1 \n",
1021 | " 1 \n",
1022 | " 0 \n",
1023 | " 0.666667 \n",
1024 | " 0 \n",
1025 | " 0 \n",
1026 | " 1 \n",
1027 | " \n",
1028 | " \n",
1029 | "
\n",
1030 | "
"
1031 | ],
1032 | "text/plain": [
1033 | " price area bedrooms bathrooms stories mainroad guestroom \\\n",
1034 | "359 0.169697 0.155227 0.4 0.0 0.000000 1 0 \n",
1035 | "19 0.615152 0.403379 0.4 0.5 0.333333 1 0 \n",
1036 | "159 0.321212 0.115628 0.4 0.5 0.000000 1 1 \n",
1037 | "35 0.548133 0.454417 0.4 0.5 1.000000 1 0 \n",
1038 | "28 0.575758 0.538015 0.8 0.5 0.333333 1 0 \n",
1039 | "\n",
1040 | " basement hotwaterheating airconditioning parking prefarea \\\n",
1041 | "359 0 0 0 0.333333 0 \n",
1042 | "19 0 0 1 0.333333 1 \n",
1043 | "159 1 0 1 0.000000 0 \n",
1044 | "35 0 0 1 0.666667 0 \n",
1045 | "28 1 1 0 0.666667 0 \n",
1046 | "\n",
1047 | " semi-furnished unfurnished \n",
1048 | "359 0 1 \n",
1049 | "19 1 0 \n",
1050 | "159 0 0 \n",
1051 | "35 0 0 \n",
1052 | "28 0 1 "
1053 | ]
1054 | },
1055 | "execution_count": 12,
1056 | "metadata": {},
1057 | "output_type": "execute_result"
1058 | }
1059 | ],
1060 | "source": [
1061 | "# Apply scaler() to all the columns except the 'yes-no' and 'dummy' variables\n",
1062 | "num_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking','price']\n",
1063 | "\n",
1064 | "df_train[num_vars] = scaler.fit_transform(df_train[num_vars])\n",
1065 | "\n",
1066 | "df_train.head()"
1067 | ]
1068 | },
1069 | {
1070 | "cell_type": "markdown",
1071 | "metadata": {},
1072 | "source": [
1073 | "### Dividing into X and Y sets for the model building"
1074 | ]
1075 | },
1076 | {
1077 | "cell_type": "code",
1078 | "execution_count": 13,
1079 | "metadata": {},
1080 | "outputs": [],
1081 | "source": [
1082 | "y_train = df_train.pop('price')\n",
1083 | "X_train = df_train"
1084 | ]
1085 | },
1086 | {
1087 | "cell_type": "markdown",
1088 | "metadata": {},
1089 | "source": [
1090 | "## Building our model\n",
1091 | "\n",
1092 | "This time, we will be using the **LinearRegression function from SciKit Learn** for its compatibility with RFE (which is a utility from sklearn)"
1093 | ]
1094 | },
1095 | {
1096 | "cell_type": "markdown",
1097 | "metadata": {},
1098 | "source": [
1099 | "### RFE\n",
1100 | "Recursive feature elimination"
1101 | ]
1102 | },
1103 | {
1104 | "cell_type": "code",
1105 | "execution_count": 14,
1106 | "metadata": {},
1107 | "outputs": [],
1108 | "source": [
1109 | "# Importing RFE and LinearRegression\n",
1110 | "from sklearn.feature_selection import RFE\n",
1111 | "from sklearn.linear_model import LinearRegression"
1112 | ]
1113 | },
1114 | {
1115 | "cell_type": "code",
1116 | "execution_count": 15,
1117 | "metadata": {},
1118 | "outputs": [],
1119 | "source": [
1120 | "# Running RFE with the output number of the variable equal to 10\n",
1121 | "lm = LinearRegression()\n",
1122 | "lm.fit(X_train, y_train)\n",
1123 | "\n",
1124 | "rfe = RFE(lm, 10) # running RFE\n",
1125 | "rfe = rfe.fit(X_train, y_train)"
1126 | ]
1127 | },
1128 | {
1129 | "cell_type": "code",
1130 | "execution_count": 16,
1131 | "metadata": {},
1132 | "outputs": [
1133 | {
1134 | "data": {
1135 | "text/plain": [
1136 | "[('area', True, 1),\n",
1137 | " ('bedrooms', True, 1),\n",
1138 | " ('bathrooms', True, 1),\n",
1139 | " ('stories', True, 1),\n",
1140 | " ('mainroad', True, 1),\n",
1141 | " ('guestroom', True, 1),\n",
1142 | " ('basement', False, 3),\n",
1143 | " ('hotwaterheating', True, 1),\n",
1144 | " ('airconditioning', True, 1),\n",
1145 | " ('parking', True, 1),\n",
1146 | " ('prefarea', True, 1),\n",
1147 | " ('semi-furnished', False, 4),\n",
1148 | " ('unfurnished', False, 2)]"
1149 | ]
1150 | },
1151 | "execution_count": 16,
1152 | "metadata": {},
1153 | "output_type": "execute_result"
1154 | }
1155 | ],
1156 | "source": [
1157 | "list(zip(X_train.columns,rfe.support_,rfe.ranking_))"
1158 | ]
1159 | },
1160 | {
1161 | "cell_type": "code",
1162 | "execution_count": 17,
1163 | "metadata": {
1164 | "scrolled": false
1165 | },
1166 | "outputs": [
1167 | {
1168 | "data": {
1169 | "text/plain": [
1170 | "Index(['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom',\n",
1171 | " 'hotwaterheating', 'airconditioning', 'parking', 'prefarea'],\n",
1172 | " dtype='object')"
1173 | ]
1174 | },
1175 | "execution_count": 17,
1176 | "metadata": {},
1177 | "output_type": "execute_result"
1178 | }
1179 | ],
1180 | "source": [
1181 | "col = X_train.columns[rfe.support_]\n",
1182 | "col"
1183 | ]
1184 | },
1185 | {
1186 | "cell_type": "code",
1187 | "execution_count": 18,
1188 | "metadata": {},
1189 | "outputs": [
1190 | {
1191 | "data": {
1192 | "text/plain": [
1193 | "Index(['basement', 'semi-furnished', 'unfurnished'], dtype='object')"
1194 | ]
1195 | },
1196 | "execution_count": 18,
1197 | "metadata": {},
1198 | "output_type": "execute_result"
1199 | }
1200 | ],
1201 | "source": [
1202 | "X_train.columns[~rfe.support_]"
1203 | ]
1204 | },
1205 | {
1206 | "cell_type": "markdown",
1207 | "metadata": {},
1208 | "source": [
1209 | "### Building model using statsmodel, for the detailed statistics"
1210 | ]
1211 | },
1212 | {
1213 | "cell_type": "code",
1214 | "execution_count": 19,
1215 | "metadata": {},
1216 | "outputs": [],
1217 | "source": [
1218 | "# Creating X_test dataframe with RFE selected variables\n",
1219 | "X_train_rfe = X_train[col]"
1220 | ]
1221 | },
1222 | {
1223 | "cell_type": "code",
1224 | "execution_count": 20,
1225 | "metadata": {},
1226 | "outputs": [],
1227 | "source": [
1228 | "# Adding a constant variable \n",
1229 | "import statsmodels.api as sm \n",
1230 | "X_train_rfe = sm.add_constant(X_train_rfe)"
1231 | ]
1232 | },
1233 | {
1234 | "cell_type": "code",
1235 | "execution_count": 21,
1236 | "metadata": {},
1237 | "outputs": [],
1238 | "source": [
1239 | "lm = sm.OLS(y_train,X_train_rfe).fit() # Running the linear model"
1240 | ]
1241 | },
1242 | {
1243 | "cell_type": "code",
1244 | "execution_count": 22,
1245 | "metadata": {},
1246 | "outputs": [
1247 | {
1248 | "name": "stdout",
1249 | "output_type": "stream",
1250 | "text": [
1251 | " OLS Regression Results \n",
1252 | "==============================================================================\n",
1253 | "Dep. Variable: price R-squared: 0.669\n",
1254 | "Model: OLS Adj. R-squared: 0.660\n",
1255 | "Method: Least Squares F-statistic: 74.89\n",
1256 | "Date: Tue, 09 Oct 2018 Prob (F-statistic): 1.28e-82\n",
1257 | "Time: 13:15:31 Log-Likelihood: 374.65\n",
1258 | "No. Observations: 381 AIC: -727.3\n",
1259 | "Df Residuals: 370 BIC: -683.9\n",
1260 | "Df Model: 10 \n",
1261 | "Covariance Type: nonrobust \n",
1262 | "===================================================================================\n",
1263 | " coef std err t P>|t| [0.025 0.975]\n",
1264 | "-----------------------------------------------------------------------------------\n",
1265 | "const 0.0027 0.018 0.151 0.880 -0.033 0.038\n",
1266 | "area 0.2363 0.030 7.787 0.000 0.177 0.296\n",
1267 | "bedrooms 0.0661 0.037 1.794 0.074 -0.006 0.139\n",
1268 | "bathrooms 0.1982 0.022 8.927 0.000 0.155 0.242\n",
1269 | "stories 0.0977 0.019 5.251 0.000 0.061 0.134\n",
1270 | "mainroad 0.0556 0.014 3.848 0.000 0.027 0.084\n",
1271 | "guestroom 0.0381 0.013 2.934 0.004 0.013 0.064\n",
1272 | "hotwaterheating 0.0897 0.022 4.104 0.000 0.047 0.133\n",
1273 | "airconditioning 0.0711 0.011 6.235 0.000 0.049 0.093\n",
1274 | "parking 0.0637 0.018 3.488 0.001 0.028 0.100\n",
1275 | "prefarea 0.0643 0.012 5.445 0.000 0.041 0.088\n",
1276 | "==============================================================================\n",
1277 | "Omnibus: 86.105 Durbin-Watson: 2.098\n",
1278 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 286.069\n",
1279 | "Skew: 0.992 Prob(JB): 7.60e-63\n",
1280 | "Kurtosis: 6.753 Cond. No. 13.2\n",
1281 | "==============================================================================\n",
1282 | "\n",
1283 | "Warnings:\n",
1284 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
1285 | ]
1286 | }
1287 | ],
1288 | "source": [
1289 | "#Let's see the summary of our linear model\n",
1290 | "print(lm.summary())"
1291 | ]
1292 | },
1293 | {
1294 | "cell_type": "markdown",
1295 | "metadata": {},
1296 | "source": [
1297 | "`Bedrooms` is insignificant in presence of other variables; can be dropped"
1298 | ]
1299 | },
1300 | {
1301 | "cell_type": "code",
1302 | "execution_count": 23,
1303 | "metadata": {},
1304 | "outputs": [],
1305 | "source": [
1306 | "X_train_new = X_train_rfe.drop([\"bedrooms\"], axis = 1)"
1307 | ]
1308 | },
1309 | {
1310 | "cell_type": "markdown",
1311 | "metadata": {},
1312 | "source": [
1313 | "Rebuilding the model without `bedrooms`"
1314 | ]
1315 | },
1316 | {
1317 | "cell_type": "code",
1318 | "execution_count": 24,
1319 | "metadata": {},
1320 | "outputs": [],
1321 | "source": [
1322 | "# Adding a constant variable \n",
1323 | "import statsmodels.api as sm \n",
1324 | "X_train_lm = sm.add_constant(X_train_new)"
1325 | ]
1326 | },
1327 | {
1328 | "cell_type": "code",
1329 | "execution_count": 25,
1330 | "metadata": {},
1331 | "outputs": [],
1332 | "source": [
1333 | "lm = sm.OLS(y_train,X_train_lm).fit() # Running the linear model"
1334 | ]
1335 | },
1336 | {
1337 | "cell_type": "code",
1338 | "execution_count": 26,
1339 | "metadata": {},
1340 | "outputs": [
1341 | {
1342 | "name": "stdout",
1343 | "output_type": "stream",
1344 | "text": [
1345 | " OLS Regression Results \n",
1346 | "==============================================================================\n",
1347 | "Dep. Variable: price R-squared: 0.666\n",
1348 | "Model: OLS Adj. R-squared: 0.658\n",
1349 | "Method: Least Squares F-statistic: 82.37\n",
1350 | "Date: Tue, 09 Oct 2018 Prob (F-statistic): 6.67e-83\n",
1351 | "Time: 13:15:31 Log-Likelihood: 373.00\n",
1352 | "No. Observations: 381 AIC: -726.0\n",
1353 | "Df Residuals: 371 BIC: -686.6\n",
1354 | "Df Model: 9 \n",
1355 | "Covariance Type: nonrobust \n",
1356 | "===================================================================================\n",
1357 | " coef std err t P>|t| [0.025 0.975]\n",
1358 | "-----------------------------------------------------------------------------------\n",
1359 | "const 0.0242 0.013 1.794 0.074 -0.002 0.051\n",
1360 | "area 0.2367 0.030 7.779 0.000 0.177 0.297\n",
1361 | "bathrooms 0.2070 0.022 9.537 0.000 0.164 0.250\n",
1362 | "stories 0.1096 0.017 6.280 0.000 0.075 0.144\n",
1363 | "mainroad 0.0536 0.014 3.710 0.000 0.025 0.082\n",
1364 | "guestroom 0.0390 0.013 2.991 0.003 0.013 0.065\n",
1365 | "hotwaterheating 0.0921 0.022 4.213 0.000 0.049 0.135\n",
1366 | "airconditioning 0.0710 0.011 6.212 0.000 0.049 0.094\n",
1367 | "parking 0.0669 0.018 3.665 0.000 0.031 0.103\n",
1368 | "prefarea 0.0653 0.012 5.513 0.000 0.042 0.089\n",
1369 | "==============================================================================\n",
1370 | "Omnibus: 91.542 Durbin-Watson: 2.107\n",
1371 | "Prob(Omnibus): 0.000 Jarque-Bera (JB): 315.402\n",
1372 | "Skew: 1.044 Prob(JB): 3.25e-69\n",
1373 | "Kurtosis: 6.938 Cond. No. 10.0\n",
1374 | "==============================================================================\n",
1375 | "\n",
1376 | "Warnings:\n",
1377 | "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n"
1378 | ]
1379 | }
1380 | ],
1381 | "source": [
1382 | "#Let's see the summary of our linear model\n",
1383 | "print(lm.summary())"
1384 | ]
1385 | },
1386 | {
1387 | "cell_type": "code",
1388 | "execution_count": 27,
1389 | "metadata": {},
1390 | "outputs": [
1391 | {
1392 | "data": {
1393 | "text/plain": [
1394 | "Index(['const', 'area', 'bathrooms', 'stories', 'mainroad', 'guestroom',\n",
1395 | " 'hotwaterheating', 'airconditioning', 'parking', 'prefarea'],\n",
1396 | " dtype='object')"
1397 | ]
1398 | },
1399 | "execution_count": 27,
1400 | "metadata": {},
1401 | "output_type": "execute_result"
1402 | }
1403 | ],
1404 | "source": [
1405 | "X_train_new.columns"
1406 | ]
1407 | },
1408 | {
1409 | "cell_type": "code",
1410 | "execution_count": 28,
1411 | "metadata": {},
1412 | "outputs": [],
1413 | "source": [
1414 | "X_train_new = X_train_new.drop(['const'], axis=1)"
1415 | ]
1416 | },
1417 | {
1418 | "cell_type": "code",
1419 | "execution_count": 29,
1420 | "metadata": {},
1421 | "outputs": [
1422 | {
1423 | "data": {
1424 | "text/html": [
1425 | "\n",
1426 | "\n",
1439 | "
\n",
1440 | " \n",
1441 | " \n",
1442 | " \n",
1443 | " Features \n",
1444 | " VIF \n",
1445 | " \n",
1446 | " \n",
1447 | " \n",
1448 | " \n",
1449 | " 0 \n",
1450 | " area \n",
1451 | " 4.52 \n",
1452 | " \n",
1453 | " \n",
1454 | " 3 \n",
1455 | " mainroad \n",
1456 | " 4.26 \n",
1457 | " \n",
1458 | " \n",
1459 | " 2 \n",
1460 | " stories \n",
1461 | " 2.12 \n",
1462 | " \n",
1463 | " \n",
1464 | " 7 \n",
1465 | " parking \n",
1466 | " 2.10 \n",
1467 | " \n",
1468 | " \n",
1469 | " 6 \n",
1470 | " airconditioning \n",
1471 | " 1.75 \n",
1472 | " \n",
1473 | " \n",
1474 | " 1 \n",
1475 | " bathrooms \n",
1476 | " 1.58 \n",
1477 | " \n",
1478 | " \n",
1479 | " 8 \n",
1480 | " prefarea \n",
1481 | " 1.47 \n",
1482 | " \n",
1483 | " \n",
1484 | " 4 \n",
1485 | " guestroom \n",
1486 | " 1.30 \n",
1487 | " \n",
1488 | " \n",
1489 | " 5 \n",
1490 | " hotwaterheating \n",
1491 | " 1.12 \n",
1492 | " \n",
1493 | " \n",
1494 | "
\n",
1495 | "
"
1496 | ],
1497 | "text/plain": [
1498 | " Features VIF\n",
1499 | "0 area 4.52\n",
1500 | "3 mainroad 4.26\n",
1501 | "2 stories 2.12\n",
1502 | "7 parking 2.10\n",
1503 | "6 airconditioning 1.75\n",
1504 | "1 bathrooms 1.58\n",
1505 | "8 prefarea 1.47\n",
1506 | "4 guestroom 1.30\n",
1507 | "5 hotwaterheating 1.12"
1508 | ]
1509 | },
1510 | "execution_count": 29,
1511 | "metadata": {},
1512 | "output_type": "execute_result"
1513 | }
1514 | ],
1515 | "source": [
1516 | "# Calculate the VIFs for the new model\n",
1517 | "from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
1518 | "\n",
1519 | "vif = pd.DataFrame()\n",
1520 | "X = X_train_new\n",
1521 | "vif['Features'] = X.columns\n",
1522 | "vif['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n",
1523 | "vif['VIF'] = round(vif['VIF'], 2)\n",
1524 | "vif = vif.sort_values(by = \"VIF\", ascending = False)\n",
1525 | "vif"
1526 | ]
1527 | },
1528 | {
1529 | "cell_type": "markdown",
1530 | "metadata": {},
1531 | "source": [
1532 | "## Residual Analysis of the train data\n",
1533 | "\n",
1534 | "So, now to check if the error terms are also normally distributed (which is infact, one of the major assumptions of linear regression), let us plot the histogram of the error terms and see what it looks like."
1535 | ]
1536 | },
1537 | {
1538 | "cell_type": "code",
1539 | "execution_count": 30,
1540 | "metadata": {},
1541 | "outputs": [],
1542 | "source": [
1543 | "y_train_price = lm.predict(X_train_lm)"
1544 | ]
1545 | },
1546 | {
1547 | "cell_type": "code",
1548 | "execution_count": 31,
1549 | "metadata": {},
1550 | "outputs": [],
1551 | "source": [
1552 | "# Importing the required libraries for plots.\n",
1553 | "import matplotlib.pyplot as plt\n",
1554 | "import seaborn as sns\n",
1555 | "%matplotlib inline"
1556 | ]
1557 | },
1558 | {
1559 | "cell_type": "code",
1560 | "execution_count": 32,
1561 | "metadata": {},
1562 | "outputs": [
1563 | {
1564 | "name": "stderr",
1565 | "output_type": "stream",
1566 | "text": [
1567 | "C:\\Users\\admin\\Anaconda3\\lib\\site-packages\\matplotlib\\axes\\_axes.py:6462: UserWarning: The 'normed' kwarg is deprecated, and has been replaced by the 'density' kwarg.\n",
1568 | " warnings.warn(\"The 'normed' kwarg is deprecated, and has been \"\n"
1569 | ]
1570 | },
1571 | {
1572 | "data": {
1573 | "text/plain": [
1574 | "Text(0.5,0,'Errors')"
1575 | ]
1576 | },
1577 | "execution_count": 32,
1578 | "metadata": {},
1579 | "output_type": "execute_result"
1580 | },
1581 | {
1582 | "data": {
1583 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAErCAYAAAD+N2lQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvhp/UCwAAIABJREFUeJzt3Xl8XFed5/3Pr6q076tly5bkLY4TO3FsJSYrYQkkTROgYQYI0ITOvDIz0A/D0z0PSw/THbqnG2aafpqenl5Iszch7B0CCZAQsod43+14t2R5kyyV9rVUZ/6oklEcLSVbVbdu1ff9etVLUt2re3+6lr8+Pvfcc8w5h4iI+EfA6wJERGRuFNwiIj6j4BYR8RkFt4iIzyi4RUR8RsEtIuIzCm4REZ9RcGchM3MJvG73us65MrNvJPizTbye8bpmkUsR8roA8dTnZth2IlVFzKNHeG3dtwOvB54Fnrlo28X7iviC6cnJ7GNmDsA5Z17Xkmxm9gDwZ8DnnHMPeFuNyPxQV4nMyswemOg+MbN7zGyTmfWb2Yn49qb49m+Y2RVm9j0zazez6OQuFzNbaWbfMrNTZjZqZqfjX6+c6zmT8DOamd1rZs+ZWbeZDZvZXjP7lJnlXLRvcby2n5nZEjP7ppmdMbNxM3tPfJ8fxvepNbM/NrNX4sc8amZ/POlYHzSz7WY2aGZnzexvLj5ffL83mdnP49duJH6+F83sU8m4HpLe1FUic/HHwB3AT4GngbKLti8HNgGHgIeAAqAXwMyuB34FlACPAvuBK4EPAO8wszc557Zewjkvm5kZ8DDwXmLdJz8A+oBbgC8At5nZ251z0Yu+tY7Yz9se/54A0HnRPv8E3Ar8DHgCeCfwxfg5c4HPAD8h1o1zF/BHQBT4/ybV927gh/FjPwqcBaqBq4D/CPzPy7sC4jcK7iwW70aYyrBz7gtTvP9G4Ebn3I5pvu8W4PPOuT+56DwGfAsoBT7onHto0rb3At8Fvm1mV00RjrOdcz58jFhoPwTc55wbmVT3/wL+K/AR4KsXfd8G4MvAR6eoe8LVwBrnXHv8mP8DOAL8KTAIrHPOHY1v+yywD/iomf2Fc643foz74x9f55w7MvngZlZ9aT+y+JpzTq8sewFullf3Rfs/EH//b6c5XlN8+1kgb4rtN8e3vzTN9z8f335bouecw886cZwHZtjnMDAAFE6xLTe+7deT3iuOH7MfKJvmmD+M7/PeKbZ9P77tk1Ns++v4tg2T3vslsVb4Yq9/d/RKj5da3FnMzf3m5OZZtu9y8dbqRdbHP/56mu/7NbHW+nXAc3M852WJt1hXAKeAT8Ya2a8xCKye4v2DzrmeWU4xVffP6fjHbVNsOxX/uHjS9oeAtwA7zex7xLqMXnTOnZnl3JKhFNwyF2cvcftEv/R0QTPxfvklnPNyVcU/1hMbfTKd/ineS6S2qYI9ksC2CzconXPfMrN+4BPE+rQ/CmBmLwOfds49m0AdkkE0qkTmYraxo9Ntnwioumm2L7xov7mc83JNnPN555zN8CrxoLbfnsi5HzvnbgMqiLW+/w+xPvbHzWxZquqQ9KDgllSYuLF4+zTbJ97fnvRKLuKcO0tsJMl1Zlac6vPPlXOuzzn3pHPu/wH+FigkNupGsoiCW1LhReAgcMvEOOcJ8a9vIzaE8AUPaoNYABYD/2Jmr2lZm1m1mV2b+rIunP8OM8ubYtOC+MfBVNYj3lMfdxabYTggwCPOuZ3zcR7nnDOzDwNPAt8zs58ArwCriI1r7gN+300/pC7Z/p7YDdQPA28ysyeBVmJjpZcTu3H6v4FdHtX3T0CFmT1L7H8H48BGYuPDDwH/5lFd4hEFd3ab6WbcCWBeghvAObcp/hDOZ4E3A28HzhN78OUvnHMH5+tcl1CbA+41s0eJjZl+K7Ex551AC/B54F+9qo/YnDJvJ/aPy1uIBXcrsaGOf++cm+rGqWQwzVUiIuIz6uMWEfEZBbeIiM8ouEVEfEbBLSLiMwpuERGfUXCLiPiMgltExGcU3CIiPqPgFhHxGQW3iIjPKLhFRHxGwS0i4jNJmR2wurraNTU1JePQIiIZadu2beedczWJ7JuU4G5qamLr1qnWSBURkamYWUui+6qrRETEZxTcIiI+o+AWEfEZBbeIiM8ouEVEfEbBLSLiMwpuERGfUXCLiPiMgltExGeS8uSkZJ7vbGqd0/73bGxIUiUioha3iIjPKLhFRHxGwS0i4jMKbhERn1Fwi4j4jIJbRMRnEhoOaGYngD5gHIg455qTWZSIiExvLuO43+CcO5+0SkREJCHqKhER8ZlEg9sBT5jZNjO7P5kFiYjIzBLtKrnZOXfazGqBJ83sFefcc5N3iAf6/QANDXrcOduMRx07T3az/0wvZQUhxqNR7riqjrqyfK9LE8k45pyb2zeYPQD0O+e+ON0+zc3NTqu8Z5aZ5irZd7qHx3afoXtojPKCHIbGxhmJRCnOC/Hpu67knhsaCAQshdWK+I+ZbUt04MesLW4zKwICzrm++OdvAf78MmuUDNHaOcB3N5+ktjSPd6xbxBULSgDYuKySP3t0H599ZC8/33uGL3+omeI8zWkmMh8S6eNeALxgZruAzcBjzrlfJLcs8YO+4TG+s7mVssIc7rtlKavqSjEzzIwVtSV8+76N/NW71vLysS4+8vXNDI5GvC5ZJCPM2gRyzh0Drk1BLeIjUed4eHMrQ2Pj/KebmijMfe2vkplxz8YGSgtCfPzhHdz3ja187d7rKcgNelCxSObQcEC5JHtP9XCic5C7r13EwrKCGff93WsW8bfvXcfLxzv57CN7U1ShSOZScMucOed47lAH1cV5XNdQkdD3vGNdPR9/40p+tL2NH2w9meQKRTKbglvm7Eh7P6d7hrltZTUBS3y0yMfftJKbllfx33+yl0Pn+pJYoUhmU3DLnD17uIPS/BDrlpTP6fuCAeNL71tHcV4OH394B5HxaJIqFMlsGp8lc9IWHuRYxwB3rakjFJz+3/2Zxn3fefUCvr2plY8/vINbVtZceF/rVIokRi1umZNNx7vICwW4oanyko+xemEpqxaU8KtX2ukZGpvH6kSyg4JbEjYedew/3cvqhaXk5Vz6kD4z4+3XLiIadfxs9+l5rFAkOyi4JWFHO/oZGhtnzaKyyz5WZVEub7iyln2nezl2vn8eqhPJHgpuSdjeUz3khQKsXFA8L8e7ZUU1pfkhfrX/HHOdM0ckmym4JSHjUce+071cWVdCzgw3JeciJxjg9atqOdE5yJEOtbpFEqXgloQci3eTrK2//G6Sya5vrKCsIEetbpE5UHBLQvac6iE3FGBlfPa/+RIKBnjDqlpOhod45mDHvB5bJFMpuGVW0ajjwJn57SaZbENjBeWFOfzzs0fn/dgimUjBLbM6cLaXgdHxC3Ntz7dgwLhxWRWbjnex73RPUs4hkkkU3DKrl450ArC8Zn5Gk0ylubGSgpwg33zpRNLOIZIpFNwyqxeOnKemOI+ygpyknaMgN8jvra/nkZ2n6ewfSdp5RDKBgltmNBqJsvl4F8tri5J+ro/c3MRoJMp3t2jaV5GZKLhlRjtawwyNjSe1m2TCitoSbl1Zzb/+poXxqIYGikxHwS0zevFoJwGDZdXJD26AD2xs4GzvMC8cOZ+S84n4kYJbZvTSkfOsrS9L2TqRb7iylvLCHH60rS0l5xPxIwW3TKt/JMLOk93cvKI6ZefMCwV5+zWL+OW+s/QOa8pXkakouGVaW453EYm6lAY3wLs3LGYkEuXx3WdSel4Rv1Bwy7S2tnQRDBjrE1wQeL5cu7iM5TVF/Gi7uktEpqLglmltawlz9aLSlPVvTzAzfm/9YracCNPSOZDSc4v4gYJbpjQ2HmXXyZ6Ut7YnvOu6egB+pu4SkddQcMuUXjnTx9DYOBsavQnuReUFrFtSzi/2nvXk/CLpTMEtU9rW0gXgWXAD3LWmjj2nejjZNehZDSLpSMEtU9rW2s3CsnwWlRd4VsOda+oA+OU+tbpFJlNwy5S2t4RZ72FrG6CxqojVC0vVXSJyEQW3vMaZniFOdQ+xwaMbk5PdeXUd21rDtPcOe12KSNoIJbqjmQWBrcAp59zvJq8k8dr2lm4g9f3b39nU+pr3xp3DOfgfjx3gdcuqXrXtno0NqSpNJK3MpcX9X4ADySpE0se2ljD5OQGuWlTqdSksKMmjujiX/Wd6vS5FJG0kFNxmthh4G/CV5JYj6WDHyTDX1JcnZX3JuTIzrqwr5fj5AUYjUa/LEUkLif7N/BLwSUB/czLc2HiU/ad7uWZxmdelXHDFghLGo45jHf1elyKSFmYNbjP7XaDdObdtlv3uN7OtZra1o6Nj3gqU1Dp0ro+RSJRrlpR7XcoFTVWF5ASNQ+19XpcikhYSaXHfDNxtZieA7wJvNLNvX7yTc+5B51yzc665pqZmnsuUVNndFltl/Zr69Glxh4IBltcUc+hcP85pZRyRWYPbOfcZ59xi51wT8D7g1865Dya9MvHE7rYeSvNDNFYVel3Kq1yxoISugVE6+0e9LkXEc97ffZK0srutm2sWl2NmXpfyKlcsKAFQd4kIcwxu59wzGsOduYbHxjl4ti+tbkxOqCzKpbo4l0PnFNwianHLBQfO9BKJOq5ZnD43Jie7YkEJxzoGGBvX4CbJbgpuueDCjck0bHEDrKwtIRJ1tHRqtkDJbgpuuWB3Ww/VxXksLMv3upQpNVUVEjA0nluynoJbLojdmCxLuxuTE/JygtSXF3DsvJYzk+ym4BYA+kciHOnoT9tukgnLaoppCw8yEhn3uhQRzyQ8O6Bkjqlm4Tt+fgDnIDwwOuX2dLGspohnD3Won1uymlrcAkBbOBaE9RXp9eDNxRoriwiaqZ9bspqCWwA41T1EeUEOxXnp/Z+w3FCAxZXq55bspuAWANrCQ9RXeLe+5Fwsqy7mVHiI3uExr0sR8YSCWxgcjdA1MMpiDxcGnotlNUU4YMvxLq9LEfGEgls41T0EpH//9oSGykJCAePlY51elyLiCQW3cCocD26ftLhzggHqywvY2hL2uhQRTyi4hbbwEFVFuRTkBr0uJWGNVUXsPdXD8JjGc0v2UXALp7qHWOyTG5MTGqsKGRt37DrZ7XUpIimn4M5yfcNj9AyN+aZ/e0JjZaxedZdINlJwZ7mJ/m2/jCiZUJgXYnlNEdsU3JKFFNxZrq17CAMWlqfnjIAzaW6sZFtLmGhU61BKdlFwZ7m28CC1pXnkhfxzY3JCc1MFPUNjHNXj75JlFNxZzDlHW3iIxeX+6t+e0NxUCcCWE+oukeyi4M5i3YNjDI6O++ZR94s1VRVSVZTL1hY9QSnZRcGdxdriT0z6bSjgBDNjQ2OFblBK1lFwZ7G28CDBgFGXpkuVJWJDYwUtnYN09o94XYpIyii4s1hbeIiFZfmEAv79Nbh2SWxF+l1tehBHsod//8bKZYk6x+nuId/MTzKdtfVlBAx2tiq4JXsouLPU+b4RRiJRFvvsicmLFeWFuGJBCTvberwuRSRlFNxZyu83Jie7rqGcXSe7cU4P4kh2UHBnqbbwELnBADUleV6XctnWLSmnZ2iM41rOTLKEgjtLnQoPsqi8gICZ16VctokblDs1U6BkCQV3FopEo5zpGc6IbhKAlbUlFOUGFdySNRTcWehc7wiRqMuY4A4GjLWLyzQ3t2QNBXcWagsPAvh+RMlk65ZUsP9Mr1bEkawwa3CbWb6ZbTazXWa2z8w+l4rCJHlOhYcozA1SUZjjdSnzZt2ScsbGHfvP9HpdikjSJdLiHgHe6Jy7FlgH3Glmr0tuWZJMbeHYgzeWATcmJ6ybuEGpB3EkC8wa3C5mYsLjnPhLA2Z9amh0nPa+zLkxOaGuLJ+60nw9+i5ZIaE+bjMLmtlOoB140jm3aYp97jezrWa2taOjY77rlHmy73QPUZdZ/dsT1i0p18gSyQoJBbdzbtw5tw5YDNxgZmum2OdB51yzc665pqZmvuuUebIr/mi4X+fgnsm6hnJaOgfpGhj1uhSRpJrTqBLnXDfwDHBnUqqRpNvd1k1pfojS/My5MTlhop9bwwIl0yUyqqTGzMrjnxcAbwZeSXZhkhy723oyspsEfjtT4A4Ft2S4RFrcC4GnzWw3sIVYH/fPkluWJMPEfB6ZdmNywsRMgWpxS6YLzbaDc243cF0KapEk25PB/dsT1i0p5xf7zuKcy6jhjiKT6cnJLLKjNYwZLMnQrhKIBXf34BgnOge9LkUkaRTcWWTHyW5W1BSTnxP0upSkWdcwMVOgFhCWzKXgzhLOOXa0hlnfUOF1KUm1sraEwtygnqCUjKbgzhInOgcJD45xXbxFmqmCAWNtfZkexJGMpuDOEttbYl0H6xszu8UNcF2DZgqUzDbrqBLJDDtOhinJC7GippitJzKj//c7m1qnfL9/eIyxcceXnjxEQ1XRq7bds7EhFaWJJJVa3Flie0s36xrKCQQyf4jc4srYqJnW8JDHlYgkh4I7CwyORnjlbC/XLcns/u0Jpfk5lBfmcLJLQwIlMym4s8Cuk7EZAa/Lgv7tCUsqCjkZVnBLZlJwZ4HtrbE+7WxpcQMsqSyke3CM3uExr0sRmXcK7iywozXMsuoiygtzvS4lZRrij/W3qbtEMpCCO8NFo45tLWGam7KnmwRgYXkBQTNau3SDUjKPgjvDHTvfT3hwjObGSq9LSamcYICF5fnq55aMpODOcBNjtrOtxQ2xfu628CDjUS2RKplFwZ3htpwIU1WUy9Lqotl3zjBLKgoZG3e09w17XYrIvFJwZ7htLV1saKzIyrmpGyYexNENSskwCu4M1tE3wonOQa5vyq7+7QkVhTkU5QY5qRuUkmEU3BlsW0sXABuysH8bwMxYUlmoJygl4yi4M9iWE2HyQgHWLCrzuhTPNFQW0tE/wtCoZgqUzKHgzmBbT3Rx7ZJyckPZ+8e8JN7P3aZhgZJBsvdvdIYbHI2w73Qv12dpN8mE+vICDGhVcEsGUXBnqO0t3USiLmtvTE7IzwlSW5qnfm7JKAruDLXpeCfBgNGc5cEN8ZkCu4ZwTg/iSGbQCjg+N90qMD/ddZqFZfk8uvN0iitKPw2VhWxtCdPZP+p1KSLzQi3uDDQ2HuVkeIilVdn3tORUllxYEUfdJZIZFNwZ6GRXbH6ObHzMfSo1JXnkhQK0diq4JTMouDPQ8fMDGNCoFjcAATMaqwo50TngdSki80LBnYGOnx+griyfgtyg16WkjaVVRbT3jdDZP+J1KSKXTcGdYSLRKK1dg+omuUhT/HpsiU9zK+JnCu4Mcyo8RET9269RX1FAKGBsPt7ldSkil23W4DazJWb2tJkdMLN9ZvZfUlGYXJpj52P9uE3q336VUCBAQ2Uhm090el2KyGVLpMUdAf7YObcaeB3wMTO7KrllyaU62t7PwrJ8ivI0RP9iTdVF7D/dq5XfxfdmDW7n3Bnn3Pb4533AAaA+2YXJ3I2Nx/q3l9cUe11KWmqqKiLqYFuL+rnF3+bUx21mTcB1wKYptt1vZlvNbGtHR8f8VCdz0tI5SCTqWF6jbpKpNFQWEgoYW9TPLT6XcHCbWTHwI+ATzrnei7c75x50zjU755pramrms0ZJ0NGOfgL22xEU8mq5oQBrF5fpBqX4XkLBbWY5xEL7Iefcj5Nbklyqox39LKkoJC+k8dvT2bi0ip0nuxkYiXhdisglS2RUiQFfBQ445/7/5Jckl2JodJxT4SGWqX97RresqCYSdWp1i68l0uK+GfgQ8EYz2xl//U6S65I5On5+AAcsr1U3yUyamyrICwV4/vB5r0sRuWSzjhlzzr0AWApqkctwtKOfnKDRUFHodSlpLT8nyPVNlbxwRDfQxb/05GSGONLRT1NVEaGg/khnc8vKag6d6+dc77DXpYhcEv0tzwDdg6N09I2wolb924m4ZUU1AC8eUXeJ+JOCOwMcPtcPwBULSjyuxB+uWlhKZVEuL6ifW3xKwZ0BDrX3UVaQQ21Jntel+EIgYNy0vIoXjpzXOpTiSwpunxuPOo6093PFgmJiIzclEbeurKa9b4RD8f+tiPiJgtvnWrsGGYlEWVmrbpK5uGVl7OneZw62e1yJyNwpuH3u8Lk+AoZuTM5RfXkBqxeW8tQBBbf4j+b+9LlD7X00VBaSn6PH3BPxnU2tFz6vK83jmYMdfOW5YxTOMA3uPRsbUlGaSMLU4vaxjr4RTncPazTJJbqyrhQHHDzX53UpInOi4Paxif5ZBfelqa8ooCQvxCtnFdziLwpuH3ti/znKCnJYWJbvdSm+FDBjVV0Jh871EYlGvS5HJGEKbp8aGh3n+cMdrF5YqmGAl2H1wlJGIlFOnB/0uhSRhCm4feqFI+cZHoty1cJSr0vxteU1xYQCxoGzr1kbRCRtKbh96ol9ZynJD7FUq91cltxQgJW1xew/3UtUT1GKTyi4fWg86vj1K+28YVUtwYC6SS7X2sVl9AyNcbJL3SXiDwpuH9reGqZzYJS3XL3A61Iywuq6UkIBY3dbj9eliCREwe1DT+w7S07QeP0VWpR5PuTlBFlVV8LeUz3qLhFfUHD7TDTqeHzPWW5ZUU1Jfo7X5WSMtfVl9I1EOH5+wOtSRGal4PaZba1hTnUPcfe6RV6XklGurCslJ2jsUXeJ+ICC22ce3XmavFCAO66q87qUjJIbCnBlXSl7T/cwHlV3iaQ3BbePRMajPL7nDG9evYDiGSZFkktz7eJyBkfHOay5SyTNKbh95MWjnXQOjPL2a9VNkgyr6kooyguxtSXsdSkiM1Jw+8ijO09Tkhfi9lUaTZIMwYCxfkk5r5ztpX8k4nU5ItNScPvE8Ng4T+w7y51r6jT3dhJtaKwg6mBnq1rdkr4U3D7x+J4z9I1EeNf6eq9LyWi1pfk0VBaytSWshYQlbSm4feLhza0srS7ixmVVXpeS8TY0VtDeN0JbeMjrUkSmpOD2gcPn+thyIsz7rl+iKVxTYG19GbnBAJuPd3ldisiUFNw+8PDmk+QEjXdvWOx1KVkhPyfIdQ3l7GrrZkA3KSUNKbjT3PDYOD/e0cZbrq6jujjP63KyxuuWVRGJOracUKtb0o+CO839fO8ZugfHuOcGrTSeSgtK81lRW8ym412MjWtZM0kvswa3mX3NzNrNbG8qCpLfcs7xleePs6xGNyW9cOOyKnqGxnhi3zmvSxF5lURa3N8A7kxyHTKFl452su90L/ffuoyAFkxIuVV1JVQW5fK1F497XYrIq8wa3M655wB19Hngy88do7o4j3dep7HbXgiYcdPyKra1hDXCRNKK+rjT1IEzvTx3qIOP3NykJyU91NxYSVVRLv/n6SNelyJywbwFt5ndb2ZbzWxrR0fHfB02a/3L88cozA3ywY2NXpeS1XJDAe67dSnPHepg18lur8sRAWDe5gZ1zj0IPAjQ3NysZ4Uv0Xc2tdLZP8IjO05x47IqHttzxuuSst6HXtfIPz9zlH94+ggP/n6z1+WIqKskHT31SjvBgHGb1pRMCyX5Odx7UxNP7D/HK2d7vS5HJKHhgA8DvwFWmVmbmd2X/LKy17neYXad7ObGZVVaUzKN/MEtSynJC/HXvzjodSkiCY0qeb9zbqFzLsc5t9g599VUFJatnnqlnZxQgFtXqrWdTsoLc/lPty/nqVfa2XSs0+tyJMupqySN7D3Vw95TPdy8vJoiLU2Wdv7g5qXUlebzhV+8oilfxVMK7jThnOOvHj9AYW6QW1ZUe12OTKEgN8j/e8dKdrR280s9TSkeUnCniV+/0s5LRzt505W1FORq3Ha6evf6xaysLeavHj/A8Ni41+VIllJwp4Gx8Sh/+fgBltUUccNSzUmSzkLBAJ+7+2pauwb5Rz2UIx5RcKeBhze3cqxjgD+5azVBzUmS9m5aUc071y3in589xrGOfq/LkSyk4PZY18Aof/PEIW5aXsWbVtd6XY4k6E/etpq8nAB/+pN9ulEpKafg9tgXnzhI/0iEB+6+WsuS+UhtST6ffOsqXjhynu9uOel1OZJlFNwe2tPWw8ObW/nwjU1csaDE63Jkjj6wsZFbVlTz5z/dz/HzA16XI1lEwe2RaNTxZ4/upaool0/csdLrcuQSBALGF//dteSGAnziezu1Uo6kjILbI9/dcpLtrd18+q7VlOrRdt+qK8vn87+3ll0nu/niL/U4vKSGgtsD7b3DfP7nB7hxWRXvXq9FEvzud9Yu5AMbG/jyc8d4ZMcpr8uRLKDnqj3wuZ/tZyQS5S/ftUY3JH3gO5taZ91nVV0JS6uL+K8/2MXS6iKuXVKegsokW6nFnWJPHTjHY7vP8PE3rmBZTbHX5cg8CQUCvP+GBkryQ9z3za0a3y1JpeBOoYGRCH/6k31csaCY+29b7nU5Ms+K80J8+MYmnHPc8y+baOnUSBNJDkvGwwPNzc1u69at835cv7n4v9iP7T7Ni0c7+Y+3LaOxqsijqiTZ1jeW8/4HX6YwN8RD/2EjTdX6s5bZmdk251xCSyypxZ0ip8JDvHS0kxuWViq0M9yVdaX8630bGRyN8M5/fFErxMu8U3CnQCQa5cc72ijOD/HWq+q8LkdSYE19GY987GYqi3L5wFde5ntbWvVovMwbBXcKPHWgnTM9w7xzXb2mbM0ijVVF/Nt/vpkbllbyqR/t4Q+/s4OewTGvy5IMoOBOsuPnB3juUAfNjRWsXljqdTmSYmWFOXzrDzbyyTtX8ct9Z3nrl57j8T1n1PqWy6LgTqLhsXF+uO0kFUW5vG3tQq/LEY8EA8ZHb1/Bjz96ExVFuXz0oe186KubOXBGK8bLpVFwJ0k06vjhtjZ6hsb4dxsWk5ejLpJsd83icn76hzfz5++4mt1t3dz1d8/zsYe2c/Bsn9elic/oyckk+adnj7L/TC9vW7tQo0jkglAwwO/f2MQ7rq3nKy8c42svHOexPWe4dWU1997UxOuvqCEUVHtKZqZx3Enw3KEO7v36ZtbUl/He5iV6rF2mNTgSYfOJLna39XC2d5iakjzefs0i3nZNHeuWVEy7IlIij+Ff7J6NDZdbriTRXMZxq8U9z3ae7OY/f3sbVywo4feuW6zQlhkV5oW4fVUt//CB9Twt374CAAAMLUlEQVR1oJ1Hdpzi2y+38LUXj1NRmMMtK2u4YWkl1zdVcEVtCQEtbScouOfVwbN93Pv1zVQV5/HNP7iBpw60e12S+EROMMCda+q4c00dPUNjPHeog6cPtvP84fP8dNdpAErzQ2xorKC5qZLuwTEWVxSQo26VrKTgnie727q575tbyQ0G+PZ9G1lQmu91SeIjU3V9NDdWsqGhgvDgGCc6B2jpHGDv6V6ePtgBxEar1JcX0FRVyIra2OyEWmw6Oyi458Hje87wR9/fSVVRHt/4yPU0VBV6XZJkCDOjsiiXyqJc1jdUALHJylq7BuNhPsiLRzp57vB5inKDrKkv4/qmShaVF3hcuSSTgvsy9AyO8TdPHuRbv2lhQ2MFX/7QBqqL87wuSzJcUV6I1QtLLzzQNRqJcuhcH3tO9bCtJcym4100VBaycWkla+rL1J2SgbI2uC/nrnzP0Bg/2XmKv/vVYcKDo9x7UxOfvutK8jVWWzyQGwqwpr6MNfVlDI5G2N7azebjnfxgWxuP7TnDhoYKblha6XWZ80YjarIwuJ1zhAfHONs7zOjYOKPjjkAAcgIBQkEjdOFj7PNINMrYuKNnaIyvv3icrS1hfrX/HCORKBsaK/jWO27g6kVlXv9YIgAU5oa4ZUU1Ny+v4mjHAJuOd/Li0fM8f+Q8W1rCvGfDYu5YvcD3c+aMjI3T3jdCR98IXYOjdA+O0j8SYWBknJFIlGh8mHN+KEB+TpCtLV00VRWxvKaY6xrKfd+VlNA4bjO7E/g7IAh8xTn3hZn2T5dx3IOjEQ6c6WXvqV72nOph3+leWjoHGBwdv+RjVhfn8Ttr63jPhsWsrS+bcbjfpbQMROZb79AYW1q62H+6lzM9wxTlBrn9ylpuW1nNTcurWVxRkLbDVofHxjnWMcChc30cPNfHobN9bG8NE540WZcBpQU5FOeFKMoLkhsKEoz/OCORKIOj44yNRznTM3zhe+pK81nfWM76hgrWN1awZlEZuSFvu5TmMo571uA2syBwCLgDaAO2AO93zu2f7nu8CO7B0Qj7T8cCes+pHva09XC0o59o/MerLs7l6kVlLK8ppr6igMPn+sjPCZITDBB1jsi4IxKNEhl3jI1HiUQdkagjFDByQwGK80IsLMunRCuyi0+97/olbDrexSM7TvH0wXba+0YAKMkPccWCEhaW5VNVlEthXgjnwOHYf6oXB0SdYzwae0WdI+ogFDByQgFygwHyQgFyQwHuuGoBFYWxm6kVRblUFubO2Lp3zvH1F0/QOzxG/3CE7sEx2vuGf9uaHhhlIqECBjUleSwozY+9SvKpLcmjvDBn1qdN79nYwPDYOIfO9bG9Jcz21m62t4ZpCw8BkBcKcO2Scq5vqqC5sZKrF5VSU5KX0n/Q5ju4bwQecM69Nf71ZwCcc5+f7nsuJ7hd/BckEv8liYw7+kcj9MX/YPuGI4QHR2kLD9EWHuRk1xAnw4Oc7h66ENI1JXlcU1/G1fVlrI2/FpS++g9BrWHJNpP7eZ1zHDrXz+YTXRw828uhc/109I3Q2T/C4Og4gYBhxALbMAIBCJgRDBhBMwIBIzIeZXQ8ymgkeuHv3lTycwJUFOZSEG8omcHYeJThsSjn+0cYiURftX/QjKriXGpL8qgtzaemJI+60nyqinMJBS6tVTxdH3d77zDbW8NsORFm64ku9p3uJRL/YUryQiyrLWZ5TayLpbo4l7KCXCoKc6goyqU0P4fcUICcoJETDJATDFzWcMz5fnKyHjg56es2YOOlFDabq//0FwzMoRujtiSPxRUFbGis4N3rY10XaxeXaQy1yCzMjFV1JayqK5lxv0QaOBONrZFIlDeuriU8MErXwCjhwVG6BsbiH0cZHot1WURd7IZqXjBAdUkebV2DFOfnUJIfoiw/FoqpGo9eW5rPnWsWcuea2Oydg6MRdp3s4XB7H0fa+zna0c9LRzr58fZTCR2vpiSPLf/tzcksGUgsuKe6gq/599XM7gfuj3/Zb2YHL6ewRLQQ67dJQ9XAea+LSEO6LtNL+rX5QDIPnjzzcl1S9bO3APbZS/72xkR3TCS424Alk75eDJy+eCfn3IPAg4meOJOZ2dZE/8uTTXRdpqdrMzVdl6kl0mG0BVhpZkvNLBd4H/BocssSEZHpzNrids5FzOwPgV8SGw74NefcvqRXJiIiU0roARzn3OPA40muJZOoy2hqui7T07WZmq7LFJKykIKIiCSPZp8REfEZBfc8MLNKM3vSzA7HP1ZMsc86M/uNme0zs91m9l4vak0FM7vTzA6a2REz+/QU2/PM7Hvx7ZvMrCn1VaZeAtflj8xsf/z34ykzS3h4mN/Ndm0m7fceM3NmltUjTRTc8+PTwFPOuZXAU/GvLzYI/L5z7mrgTuBLZlaewhpTIj5Fwj8AdwFXAe83s6su2u0+IOycWwH8LfA/U1tl6iV4XXYAzc65a4AfAv8rtVV6I8Frg5mVAB8HNqW2wvSj4J4f7wC+Gf/8m8A7L97BOXfIOXc4/vlpoB2oSVmFqXMDcMQ5d8w5Nwp8l9j1mWzy9foh8CZL11mO5s+s18U597RzbjD+5cvEnpnIBon8zgD8BbF/zIan2JZVFNzzY4Fz7gxA/GPtTDub2Q1ALnA0BbWl2lRTJNRPt49zLgL0AFUpqc47iVyXye4Dfp7UitLHrNfGzK4DljjnfpbKwtJV1s3HfanM7FdA3RSb/tscj7MQ+Ffgw8656Gz7+1AiUyQkNI1Chkn4ZzazDwLNwOuTWlH6mPHamFmAWJfavakqKN0puBPknJt25hgzO2dmC51zZ+LBPOXy7mZWCjwGfNY593KSSvVaIlMkTOzTZmYhoAzoSk15nklo6ggzezOxxsDrnXMjKarNa7NdmxJgDfBMvEetDnjUzO52znk/8b8H1FUyPx4FPhz//MPATy7eIT5dwL8B33LO/SCFtaVaIlMkTL5e7wF+7TL/gYJZr0u8O+DLwN3OuSn/8c9QM14b51yPc67aOdfknGsi1v+ftaENCu758gXgDjM7TGzBiS8AmFmzmX0lvs+/B24D7jWznfHXOm/KTZ54n/XEFAkHgO875/aZ2Z+b2d3x3b4KVJnZEeCPmHoUTkZJ8Lr8NVAM/CD++5EVcwIleG1kEj05KSLiM2pxi4j4jIJbRMRnFNwiIj6j4BYR8RkFt4iIzyi4RUR8RsEtacPMbo9P2TndK+J1jSLpQI+8Szp6mKmXysvEuV1E5kzBLelou3Pu23P9JjMrcc71TbOtABiLP6V3WWY6j0gqqKtEfMfMmuJdJw+Y2XvNbJuZDQF/H9/+jfj2GjP7mpmdAwaIz29tZiEz+1R8tZlhM+s0s38zs7VzPM+S+PFbzGzEzNrN7CUz+zAiSaQWt6SjQjOrnuL9Uedc76Sv30lsRZR/Av4Z6L1o/yeBs8Qm4C8C+uPvP0Rs7pgn499bB3wM+I2Z3eqc23HRcV5znvishk8Smzf6H4FDxGY5vAa4ld8uFCEy7xTcko4+F39d7DHgdyd9fTVwjXPuwDTH2euc++DkN8zsDmKh/X3gfROzEprZ94DtwP8mFryTveY8ZnYNsAr4lHMuK5YYk/Sh4JZ09CAw1dS3HRd9/dgMoQ3wxSnee1f8419OnkrWObfbzH4GvMPMapxzk8811Xl64h/fYGbfyLJpWMVjCm5JR4edc79KYL9Dl7B9KbHRKVMF/l5iax0u5dX/SLzmOM65FjP7S+AzwBkz20lsoegfOOe2JFC7yCXTzUnxs8GZNk5aeHeyS1mUeMrzOOc+C6wEPkFs/dD/AGw2s4xftV68peCWbHOU2O/96im2XRX/eDzRg8VXJv9759y/BxYBzwGfNLMZF4wWuRwKbsk2j8Q/fsbiCxgCmNka4G7ghYv6t6dkZmVmljP5PefcML/tgqmYp3pFXkN93JKO1sdXOp/KI9O8nxDn3JNm9n1i6xpWxG9ITgwHHCY27C8RbwAeNLMfAQeJDTXcQKy7ZJNz7uDl1CkyEwW3pKP3x19TWQlc7tOPHyA29O9e4G+IPZzzLPDfnXN7EjzGLuDHwO3x4wWBVuCv4scUSRqtOSki4jPq4xYR8RkFt4iIzyi4RUR8RsEtIuIzCm4REZ9RcIuI+IyCW0TEZxTcIiI+o+AWEfEZBbeIiM/8X8V+UeDMVDiFAAAAAElFTkSuQmCC\n",
1584 | "text/plain": [
1585 | ""
1586 | ]
1587 | },
1588 | "metadata": {},
1589 | "output_type": "display_data"
1590 | }
1591 | ],
1592 | "source": [
1593 | "# Plot the histogram of the error terms\n",
1594 | "fig = plt.figure()\n",
1595 | "sns.distplot((y_train - y_train_price), bins = 20)\n",
1596 | "fig.suptitle('Error Terms', fontsize = 20) # Plot heading \n",
1597 | "plt.xlabel('Errors', fontsize = 18) # X-label"
1598 | ]
1599 | },
1600 | {
1601 | "cell_type": "markdown",
1602 | "metadata": {},
1603 | "source": [
1604 | "## Making Predictions"
1605 | ]
1606 | },
1607 | {
1608 | "cell_type": "markdown",
1609 | "metadata": {},
1610 | "source": [
1611 | "#### Applying the scaling on the test sets"
1612 | ]
1613 | },
1614 | {
1615 | "cell_type": "code",
1616 | "execution_count": 33,
1617 | "metadata": {},
1618 | "outputs": [],
1619 | "source": [
1620 | "num_vars = ['area', 'bedrooms', 'bathrooms', 'stories', 'parking','price']\n",
1621 | "\n",
1622 | "df_test[num_vars] = scaler.transform(df_test[num_vars])"
1623 | ]
1624 | },
1625 | {
1626 | "cell_type": "markdown",
1627 | "metadata": {},
1628 | "source": [
1629 | "#### Dividing into X_test and y_test"
1630 | ]
1631 | },
1632 | {
1633 | "cell_type": "code",
1634 | "execution_count": 34,
1635 | "metadata": {},
1636 | "outputs": [],
1637 | "source": [
1638 | "y_test = df_test.pop('price')\n",
1639 | "X_test = df_test"
1640 | ]
1641 | },
1642 | {
1643 | "cell_type": "code",
1644 | "execution_count": 35,
1645 | "metadata": {},
1646 | "outputs": [],
1647 | "source": [
1648 | "# Now let's use our model to make predictions.\n",
1649 | "\n",
1650 | "# Creating X_test_new dataframe by dropping variables from X_test\n",
1651 | "X_test_new = X_test[X_train_new.columns]\n",
1652 | "\n",
1653 | "# Adding a constant variable \n",
1654 | "X_test_new = sm.add_constant(X_test_new)"
1655 | ]
1656 | },
1657 | {
1658 | "cell_type": "code",
1659 | "execution_count": 36,
1660 | "metadata": {},
1661 | "outputs": [],
1662 | "source": [
1663 | "# Making predictions\n",
1664 | "y_pred = lm.predict(X_test_new)"
1665 | ]
1666 | },
1667 | {
1668 | "cell_type": "markdown",
1669 | "metadata": {},
1670 | "source": [
1671 | "## Model Evaluation"
1672 | ]
1673 | },
1674 | {
1675 | "cell_type": "code",
1676 | "execution_count": 37,
1677 | "metadata": {},
1678 | "outputs": [
1679 | {
1680 | "data": {
1681 | "text/plain": [
1682 | "Text(0,0.5,'y_pred')"
1683 | ]
1684 | },
1685 | "execution_count": 37,
1686 | "metadata": {},
1687 | "output_type": "execute_result"
1688 | },
1689 | {
1690 | "data": {
1691 | "image/png": "\n",
1692 | "text/plain": [
1693 | ""
1694 | ]
1695 | },
1696 | "metadata": {},
1697 | "output_type": "display_data"
1698 | }
1699 | ],
1700 | "source": [
1701 | "# Plotting y_test and y_pred to understand the spread.\n",
1702 | "fig = plt.figure()\n",
1703 | "plt.scatter(y_test,y_pred)\n",
1704 | "fig.suptitle('y_test vs y_pred', fontsize=20) # Plot heading \n",
1705 | "plt.xlabel('y_test', fontsize=18) # X-label\n",
1706 | "plt.ylabel('y_pred', fontsize=16) # Y-label"
1707 | ]
1708 | }
1709 | ],
1710 | "metadata": {
1711 | "kernelspec": {
1712 | "display_name": "Python 3",
1713 | "language": "python",
1714 | "name": "python3"
1715 | },
1716 | "language_info": {
1717 | "codemirror_mode": {
1718 | "name": "ipython",
1719 | "version": 3
1720 | },
1721 | "file_extension": ".py",
1722 | "mimetype": "text/x-python",
1723 | "name": "python",
1724 | "nbconvert_exporter": "python",
1725 | "pygments_lexer": "ipython3",
1726 | "version": "3.6.5"
1727 | }
1728 | },
1729 | "nbformat": 4,
1730 | "nbformat_minor": 2
1731 | }
1732 |
--------------------------------------------------------------------------------