├── Data Preprocessing
├── Untitled.ipynb
└── sf.pdf
├── Decision Tree
├── Untitled.ipynb
└── utf-8''iris(1).csv
├── K-NN
├── Untitled.ipynb
└── utf-8''iris(1).csv
├── K-means
├── CC GENERAL.csv
├── K0means.ipynb
├── creditcard.ipynb
└── utf-8''iris.csv
├── LICENSE
├── Linear Regression
├── LinearRegressionMultipleVariables.ipynb
├── LinearRegressionSingle Variables.ipynb
├── ex1data1.txt
└── ex1data2.txt
├── Logistic Regression
└── Logistic
│ ├── Untitled.ipynb
│ ├── ex2data1.txt
│ └── ex2data2.txt
├── README.md
├── RandomForest
├── RandomForest.ipynb
├── Social_Network_Ads.csv
└── Untitled.ipynb
├── SVM
├── Social_Network_Ads.csv
└── Untitled.ipynb
├── Sentiment Analysis
├── Restaurant_Reviews.tsv
├── moviereview.ipynb
└── restaurentreview.ipynb
├── TextAnalytics
└── textAnalytics.ipynb
├── TextClassification
├── 20news-bydate_py3.pkz
└── Textclassification.ipynb
└── _config.yml
/Data Preprocessing/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "DATA PREPROCESSING"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": null,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import numpy as np\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "import seaborn as sb"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 6,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/html": [
30 | "
\n",
31 | "\n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " | \n",
48 | " Name | \n",
49 | " Gender | \n",
50 | " Size | \n",
51 | " Color | \n",
52 | "
\n",
53 | " \n",
54 | " \n",
55 | " \n",
56 | " 0 | \n",
57 | " Alex | \n",
58 | " F | \n",
59 | " Small | \n",
60 | " Blue | \n",
61 | "
\n",
62 | " \n",
63 | " 1 | \n",
64 | " Ben | \n",
65 | " M | \n",
66 | " Large | \n",
67 | " Yellow | \n",
68 | "
\n",
69 | " \n",
70 | " 2 | \n",
71 | " Cam | \n",
72 | " M | \n",
73 | " Medium | \n",
74 | " Red | \n",
75 | "
\n",
76 | " \n",
77 | " 3 | \n",
78 | " Dave | \n",
79 | " M | \n",
80 | " Small | \n",
81 | " Red | \n",
82 | "
\n",
83 | " \n",
84 | " 4 | \n",
85 | " Eli | \n",
86 | " F | \n",
87 | " Medium | \n",
88 | " Yellow | \n",
89 | "
\n",
90 | " \n",
91 | " 5 | \n",
92 | " Frank | \n",
93 | " M | \n",
94 | " Large | \n",
95 | " Red | \n",
96 | "
\n",
97 | " \n",
98 | " 6 | \n",
99 | " Grace | \n",
100 | " F | \n",
101 | " Large | \n",
102 | " Blue | \n",
103 | "
\n",
104 | " \n",
105 | " 7 | \n",
106 | " Henry | \n",
107 | " M | \n",
108 | " Large | \n",
109 | " Yellow | \n",
110 | "
\n",
111 | " \n",
112 | " 8 | \n",
113 | " Iris | \n",
114 | " F | \n",
115 | " Small | \n",
116 | " Yellow | \n",
117 | "
\n",
118 | " \n",
119 | " 9 | \n",
120 | " Jack | \n",
121 | " M | \n",
122 | " Small | \n",
123 | " Blue | \n",
124 | "
\n",
125 | " \n",
126 | "
\n",
127 | "
"
128 | ],
129 | "text/plain": [
130 | " Name Gender Size Color\n",
131 | "0 Alex F Small Blue\n",
132 | "1 Ben M Large Yellow\n",
133 | "2 Cam M Medium Red\n",
134 | "3 Dave M Small Red\n",
135 | "4 Eli F Medium Yellow\n",
136 | "5 Frank M Large Red\n",
137 | "6 Grace F Large Blue\n",
138 | "7 Henry M Large Yellow\n",
139 | "8 Iris F Small Yellow\n",
140 | "9 Jack M Small Blue"
141 | ]
142 | },
143 | "execution_count": 6,
144 | "metadata": {},
145 | "output_type": "execute_result"
146 | }
147 | ],
148 | "source": [
149 | "#Our dataset\n",
150 | "shirt_order = pd.DataFrame({'Name':['Alex', 'Ben', 'Cam', 'Dave', 'Eli', 'Frank', 'Grace', 'Henry', 'Iris', 'Jack'],\n",
151 | " 'Gender':['F', 'M', 'M', 'M', 'F', 'M', 'F', 'M', 'F', 'M'],\n",
152 | " 'Size':['Small', 'Large', 'Medium', 'Small', 'Medium', 'Large', 'Large', 'Large', 'Small', 'Small'],\n",
153 | " 'Color':['Blue', 'Yellow', 'Red', 'Red', 'Yellow', 'Red', 'Blue', 'Yellow', 'Yellow', 'Blue']\n",
154 | " })\n",
155 | "#Visualize dataset\n",
156 | "shirt_order"
157 | ]
158 | },
159 | {
160 | "cell_type": "code",
161 | "execution_count": 8,
162 | "metadata": {},
163 | "outputs": [
164 | {
165 | "data": {
166 | "text/html": [
167 | "\n",
168 | "\n",
181 | "
\n",
182 | " \n",
183 | " \n",
184 | " | \n",
185 | " Name | \n",
186 | " Gender | \n",
187 | " Size | \n",
188 | " Color | \n",
189 | " Gender Category | \n",
190 | "
\n",
191 | " \n",
192 | " \n",
193 | " \n",
194 | " 0 | \n",
195 | " Alex | \n",
196 | " F | \n",
197 | " Small | \n",
198 | " Blue | \n",
199 | " 0 | \n",
200 | "
\n",
201 | " \n",
202 | " 1 | \n",
203 | " Ben | \n",
204 | " M | \n",
205 | " Large | \n",
206 | " Yellow | \n",
207 | " 1 | \n",
208 | "
\n",
209 | " \n",
210 | " 2 | \n",
211 | " Cam | \n",
212 | " M | \n",
213 | " Medium | \n",
214 | " Red | \n",
215 | " 1 | \n",
216 | "
\n",
217 | " \n",
218 | " 3 | \n",
219 | " Dave | \n",
220 | " M | \n",
221 | " Small | \n",
222 | " Red | \n",
223 | " 1 | \n",
224 | "
\n",
225 | " \n",
226 | " 4 | \n",
227 | " Eli | \n",
228 | " F | \n",
229 | " Medium | \n",
230 | " Yellow | \n",
231 | " 0 | \n",
232 | "
\n",
233 | " \n",
234 | " 5 | \n",
235 | " Frank | \n",
236 | " M | \n",
237 | " Large | \n",
238 | " Red | \n",
239 | " 1 | \n",
240 | "
\n",
241 | " \n",
242 | " 6 | \n",
243 | " Grace | \n",
244 | " F | \n",
245 | " Large | \n",
246 | " Blue | \n",
247 | " 0 | \n",
248 | "
\n",
249 | " \n",
250 | " 7 | \n",
251 | " Henry | \n",
252 | " M | \n",
253 | " Large | \n",
254 | " Yellow | \n",
255 | " 1 | \n",
256 | "
\n",
257 | " \n",
258 | " 8 | \n",
259 | " Iris | \n",
260 | " F | \n",
261 | " Small | \n",
262 | " Yellow | \n",
263 | " 0 | \n",
264 | "
\n",
265 | " \n",
266 | " 9 | \n",
267 | " Jack | \n",
268 | " M | \n",
269 | " Small | \n",
270 | " Blue | \n",
271 | " 1 | \n",
272 | "
\n",
273 | " \n",
274 | "
\n",
275 | "
"
276 | ],
277 | "text/plain": [
278 | " Name Gender Size Color Gender Category\n",
279 | "0 Alex F Small Blue 0\n",
280 | "1 Ben M Large Yellow 1\n",
281 | "2 Cam M Medium Red 1\n",
282 | "3 Dave M Small Red 1\n",
283 | "4 Eli F Medium Yellow 0\n",
284 | "5 Frank M Large Red 1\n",
285 | "6 Grace F Large Blue 0\n",
286 | "7 Henry M Large Yellow 1\n",
287 | "8 Iris F Small Yellow 0\n",
288 | "9 Jack M Small Blue 1"
289 | ]
290 | },
291 | "execution_count": 8,
292 | "metadata": {},
293 | "output_type": "execute_result"
294 | }
295 | ],
296 | "source": [
297 | "#LABEL ENCODING\n",
298 | "from sklearn.preprocessing import LabelEncoder\n",
299 | "#Create an object for Label Encoder\n",
300 | "LE=LabelEncoder()\n",
301 | "#Create a new column that will show you the encoded require column\n",
302 | "shirt_order['Gender Category']=LE.fit_transform(shirt_order.Gender)\n",
303 | "#Check\n",
304 | "shirt_order"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 10,
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "data": {
314 | "text/plain": [
315 | "array(['Small', 'Large', 'Medium'], dtype=object)"
316 | ]
317 | },
318 | "execution_count": 10,
319 | "metadata": {},
320 | "output_type": "execute_result"
321 | }
322 | ],
323 | "source": [
324 | "shirt_order.Size.unique()\n"
325 | ]
326 | },
327 | {
328 | "cell_type": "code",
329 | "execution_count": 13,
330 | "metadata": {},
331 | "outputs": [
332 | {
333 | "data": {
334 | "text/html": [
335 | "\n",
336 | "\n",
349 | "
\n",
350 | " \n",
351 | " \n",
352 | " | \n",
353 | " Name | \n",
354 | " Gender | \n",
355 | " Size | \n",
356 | " Color | \n",
357 | " Gender Category | \n",
358 | " Size Category | \n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " \n",
363 | " 0 | \n",
364 | " Alex | \n",
365 | " F | \n",
366 | " Small | \n",
367 | " Blue | \n",
368 | " 0 | \n",
369 | " 0 | \n",
370 | "
\n",
371 | " \n",
372 | " 1 | \n",
373 | " Ben | \n",
374 | " M | \n",
375 | " Large | \n",
376 | " Yellow | \n",
377 | " 1 | \n",
378 | " 2 | \n",
379 | "
\n",
380 | " \n",
381 | " 2 | \n",
382 | " Cam | \n",
383 | " M | \n",
384 | " Medium | \n",
385 | " Red | \n",
386 | " 1 | \n",
387 | " 1 | \n",
388 | "
\n",
389 | " \n",
390 | " 3 | \n",
391 | " Dave | \n",
392 | " M | \n",
393 | " Small | \n",
394 | " Red | \n",
395 | " 1 | \n",
396 | " 0 | \n",
397 | "
\n",
398 | " \n",
399 | " 4 | \n",
400 | " Eli | \n",
401 | " F | \n",
402 | " Medium | \n",
403 | " Yellow | \n",
404 | " 0 | \n",
405 | " 1 | \n",
406 | "
\n",
407 | " \n",
408 | " 5 | \n",
409 | " Frank | \n",
410 | " M | \n",
411 | " Large | \n",
412 | " Red | \n",
413 | " 1 | \n",
414 | " 2 | \n",
415 | "
\n",
416 | " \n",
417 | " 6 | \n",
418 | " Grace | \n",
419 | " F | \n",
420 | " Large | \n",
421 | " Blue | \n",
422 | " 0 | \n",
423 | " 2 | \n",
424 | "
\n",
425 | " \n",
426 | " 7 | \n",
427 | " Henry | \n",
428 | " M | \n",
429 | " Large | \n",
430 | " Yellow | \n",
431 | " 1 | \n",
432 | " 2 | \n",
433 | "
\n",
434 | " \n",
435 | " 8 | \n",
436 | " Iris | \n",
437 | " F | \n",
438 | " Small | \n",
439 | " Yellow | \n",
440 | " 0 | \n",
441 | " 0 | \n",
442 | "
\n",
443 | " \n",
444 | " 9 | \n",
445 | " Jack | \n",
446 | " M | \n",
447 | " Small | \n",
448 | " Blue | \n",
449 | " 1 | \n",
450 | " 0 | \n",
451 | "
\n",
452 | " \n",
453 | "
\n",
454 | "
"
455 | ],
456 | "text/plain": [
457 | " Name Gender Size Color Gender Category Size Category\n",
458 | "0 Alex F Small Blue 0 0\n",
459 | "1 Ben M Large Yellow 1 2\n",
460 | "2 Cam M Medium Red 1 1\n",
461 | "3 Dave M Small Red 1 0\n",
462 | "4 Eli F Medium Yellow 0 1\n",
463 | "5 Frank M Large Red 1 2\n",
464 | "6 Grace F Large Blue 0 2\n",
465 | "7 Henry M Large Yellow 1 2\n",
466 | "8 Iris F Small Yellow 0 0\n",
467 | "9 Jack M Small Blue 1 0"
468 | ]
469 | },
470 | "execution_count": 13,
471 | "metadata": {},
472 | "output_type": "execute_result"
473 | }
474 | ],
475 | "source": [
476 | "#ORDINAL ENCODING\n",
477 | "mapping_dict={'Small':0,'Medium':1,'Large':2}\n",
478 | "shirt_order['Size Category']=shirt_order.Size.map(mapping_dict)\n",
479 | "#check\n",
480 | "shirt_order\n"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": 18,
486 | "metadata": {},
487 | "outputs": [
488 | {
489 | "data": {
490 | "text/html": [
491 | "\n",
492 | "\n",
505 | "
\n",
506 | " \n",
507 | " \n",
508 | " | \n",
509 | " Name | \n",
510 | " Gender | \n",
511 | " Size | \n",
512 | " Color | \n",
513 | " Gender Category | \n",
514 | " Size Category | \n",
515 | " Color_Blue | \n",
516 | " Color_Red | \n",
517 | " Color_Yellow | \n",
518 | "
\n",
519 | " \n",
520 | " \n",
521 | " \n",
522 | " 0 | \n",
523 | " Alex | \n",
524 | " F | \n",
525 | " Small | \n",
526 | " Blue | \n",
527 | " 0 | \n",
528 | " 0 | \n",
529 | " 1 | \n",
530 | " 0 | \n",
531 | " 0 | \n",
532 | "
\n",
533 | " \n",
534 | " 1 | \n",
535 | " Ben | \n",
536 | " M | \n",
537 | " Large | \n",
538 | " Yellow | \n",
539 | " 1 | \n",
540 | " 2 | \n",
541 | " 0 | \n",
542 | " 0 | \n",
543 | " 1 | \n",
544 | "
\n",
545 | " \n",
546 | " 2 | \n",
547 | " Cam | \n",
548 | " M | \n",
549 | " Medium | \n",
550 | " Red | \n",
551 | " 1 | \n",
552 | " 1 | \n",
553 | " 0 | \n",
554 | " 1 | \n",
555 | " 0 | \n",
556 | "
\n",
557 | " \n",
558 | " 3 | \n",
559 | " Dave | \n",
560 | " M | \n",
561 | " Small | \n",
562 | " Red | \n",
563 | " 1 | \n",
564 | " 0 | \n",
565 | " 0 | \n",
566 | " 1 | \n",
567 | " 0 | \n",
568 | "
\n",
569 | " \n",
570 | " 4 | \n",
571 | " Eli | \n",
572 | " F | \n",
573 | " Medium | \n",
574 | " Yellow | \n",
575 | " 0 | \n",
576 | " 1 | \n",
577 | " 0 | \n",
578 | " 0 | \n",
579 | " 1 | \n",
580 | "
\n",
581 | " \n",
582 | " 5 | \n",
583 | " Frank | \n",
584 | " M | \n",
585 | " Large | \n",
586 | " Red | \n",
587 | " 1 | \n",
588 | " 2 | \n",
589 | " 0 | \n",
590 | " 1 | \n",
591 | " 0 | \n",
592 | "
\n",
593 | " \n",
594 | " 6 | \n",
595 | " Grace | \n",
596 | " F | \n",
597 | " Large | \n",
598 | " Blue | \n",
599 | " 0 | \n",
600 | " 2 | \n",
601 | " 1 | \n",
602 | " 0 | \n",
603 | " 0 | \n",
604 | "
\n",
605 | " \n",
606 | " 7 | \n",
607 | " Henry | \n",
608 | " M | \n",
609 | " Large | \n",
610 | " Yellow | \n",
611 | " 1 | \n",
612 | " 2 | \n",
613 | " 0 | \n",
614 | " 0 | \n",
615 | " 1 | \n",
616 | "
\n",
617 | " \n",
618 | " 8 | \n",
619 | " Iris | \n",
620 | " F | \n",
621 | " Small | \n",
622 | " Yellow | \n",
623 | " 0 | \n",
624 | " 0 | \n",
625 | " 0 | \n",
626 | " 0 | \n",
627 | " 1 | \n",
628 | "
\n",
629 | " \n",
630 | " 9 | \n",
631 | " Jack | \n",
632 | " M | \n",
633 | " Small | \n",
634 | " Blue | \n",
635 | " 1 | \n",
636 | " 0 | \n",
637 | " 1 | \n",
638 | " 0 | \n",
639 | " 0 | \n",
640 | "
\n",
641 | " \n",
642 | "
\n",
643 | "
"
644 | ],
645 | "text/plain": [
646 | " Name Gender Size Color Gender Category Size Category Color_Blue \\\n",
647 | "0 Alex F Small Blue 0 0 1 \n",
648 | "1 Ben M Large Yellow 1 2 0 \n",
649 | "2 Cam M Medium Red 1 1 0 \n",
650 | "3 Dave M Small Red 1 0 0 \n",
651 | "4 Eli F Medium Yellow 0 1 0 \n",
652 | "5 Frank M Large Red 1 2 0 \n",
653 | "6 Grace F Large Blue 0 2 1 \n",
654 | "7 Henry M Large Yellow 1 2 0 \n",
655 | "8 Iris F Small Yellow 0 0 0 \n",
656 | "9 Jack M Small Blue 1 0 1 \n",
657 | "\n",
658 | " Color_Red Color_Yellow \n",
659 | "0 0 0 \n",
660 | "1 0 1 \n",
661 | "2 1 0 \n",
662 | "3 1 0 \n",
663 | "4 0 1 \n",
664 | "5 1 0 \n",
665 | "6 0 0 \n",
666 | "7 0 1 \n",
667 | "8 0 1 \n",
668 | "9 0 0 "
669 | ]
670 | },
671 | "execution_count": 18,
672 | "metadata": {},
673 | "output_type": "execute_result"
674 | }
675 | ],
676 | "source": [
677 | "#ONE HOT ENCODING\n",
678 | "#duplicate color column for keeping the orginal values\n",
679 | "shirt_order['color_category']=shirt_order.Color\n",
680 | "shirt_order_onehotencoding=pd.get_dummies(shirt_order,columns=[\"color_category\"],prefix=[\"Color\"])\n",
681 | "#check\n",
682 | "shirt_order_onehotencoding\n"
683 | ]
684 | },
685 | {
686 | "cell_type": "markdown",
687 | "metadata": {},
688 | "source": []
689 | }
690 | ],
691 | "metadata": {
692 | "kernelspec": {
693 | "display_name": "Python 3",
694 | "language": "python",
695 | "name": "python3"
696 | },
697 | "language_info": {
698 | "codemirror_mode": {
699 | "name": "ipython",
700 | "version": 3
701 | },
702 | "file_extension": ".py",
703 | "mimetype": "text/x-python",
704 | "name": "python",
705 | "nbconvert_exporter": "python",
706 | "pygments_lexer": "ipython3",
707 | "version": "3.7.1"
708 | }
709 | },
710 | "nbformat": 4,
711 | "nbformat_minor": 2
712 | }
713 |
--------------------------------------------------------------------------------
/Data Preprocessing/sf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suubh/Machine-Learning-in-Python/154596a9509dc7a066ae3caf5526b6f663a359cc/Data Preprocessing/sf.pdf
--------------------------------------------------------------------------------
/Decision Tree/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 38,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import numpy as np\n",
10 | "import pandas as pd\n",
11 | "import matplotlib.pyplot as plt\n",
12 | "import seaborn as sb"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 3,
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "data": {
22 | "text/html": [
23 | "\n",
24 | "\n",
37 | "
\n",
38 | " \n",
39 | " \n",
40 | " | \n",
41 | " sepal_length | \n",
42 | " sepal_width | \n",
43 | " petal_length | \n",
44 | " petal_width | \n",
45 | " species | \n",
46 | "
\n",
47 | " \n",
48 | " \n",
49 | " \n",
50 | " 0 | \n",
51 | " 5.1 | \n",
52 | " 3.5 | \n",
53 | " 1.4 | \n",
54 | " 0.2 | \n",
55 | " setosa | \n",
56 | "
\n",
57 | " \n",
58 | " 1 | \n",
59 | " 4.9 | \n",
60 | " 3.0 | \n",
61 | " 1.4 | \n",
62 | " 0.2 | \n",
63 | " setosa | \n",
64 | "
\n",
65 | " \n",
66 | " 2 | \n",
67 | " 4.7 | \n",
68 | " 3.2 | \n",
69 | " 1.3 | \n",
70 | " 0.2 | \n",
71 | " setosa | \n",
72 | "
\n",
73 | " \n",
74 | " 3 | \n",
75 | " 4.6 | \n",
76 | " 3.1 | \n",
77 | " 1.5 | \n",
78 | " 0.2 | \n",
79 | " setosa | \n",
80 | "
\n",
81 | " \n",
82 | " 4 | \n",
83 | " 5.0 | \n",
84 | " 3.6 | \n",
85 | " 1.4 | \n",
86 | " 0.2 | \n",
87 | " setosa | \n",
88 | "
\n",
89 | " \n",
90 | "
\n",
91 | "
"
92 | ],
93 | "text/plain": [
94 | " sepal_length sepal_width petal_length petal_width species\n",
95 | "0 5.1 3.5 1.4 0.2 setosa\n",
96 | "1 4.9 3.0 1.4 0.2 setosa\n",
97 | "2 4.7 3.2 1.3 0.2 setosa\n",
98 | "3 4.6 3.1 1.5 0.2 setosa\n",
99 | "4 5.0 3.6 1.4 0.2 setosa"
100 | ]
101 | },
102 | "execution_count": 3,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "df=pd.read_csv(\"utf-8''iris(1).csv\")\n",
109 | "df.head()"
110 | ]
111 | },
112 | {
113 | "cell_type": "code",
114 | "execution_count": 4,
115 | "metadata": {},
116 | "outputs": [
117 | {
118 | "data": {
119 | "text/plain": [
120 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)"
121 | ]
122 | },
123 | "execution_count": 4,
124 | "metadata": {},
125 | "output_type": "execute_result"
126 | }
127 | ],
128 | "source": [
129 | "df.species.unique()"
130 | ]
131 | },
132 | {
133 | "cell_type": "code",
134 | "execution_count": 6,
135 | "metadata": {},
136 | "outputs": [
137 | {
138 | "data": {
139 | "text/html": [
140 | "\n",
141 | "\n",
154 | "
\n",
155 | " \n",
156 | " \n",
157 | " | \n",
158 | " sepal_length | \n",
159 | " sepal_width | \n",
160 | " petal_length | \n",
161 | " petal_width | \n",
162 | " species | \n",
163 | " species_cat | \n",
164 | "
\n",
165 | " \n",
166 | " \n",
167 | " \n",
168 | " 14 | \n",
169 | " 5.8 | \n",
170 | " 4.0 | \n",
171 | " 1.2 | \n",
172 | " 0.2 | \n",
173 | " setosa | \n",
174 | " 0 | \n",
175 | "
\n",
176 | " \n",
177 | " 98 | \n",
178 | " 5.1 | \n",
179 | " 2.5 | \n",
180 | " 3.0 | \n",
181 | " 1.1 | \n",
182 | " versicolor | \n",
183 | " 1 | \n",
184 | "
\n",
185 | " \n",
186 | " 75 | \n",
187 | " 6.6 | \n",
188 | " 3.0 | \n",
189 | " 4.4 | \n",
190 | " 1.4 | \n",
191 | " versicolor | \n",
192 | " 1 | \n",
193 | "
\n",
194 | " \n",
195 | " 16 | \n",
196 | " 5.4 | \n",
197 | " 3.9 | \n",
198 | " 1.3 | \n",
199 | " 0.4 | \n",
200 | " setosa | \n",
201 | " 0 | \n",
202 | "
\n",
203 | " \n",
204 | " 131 | \n",
205 | " 7.9 | \n",
206 | " 3.8 | \n",
207 | " 6.4 | \n",
208 | " 2.0 | \n",
209 | " virginica | \n",
210 | " 2 | \n",
211 | "
\n",
212 | " \n",
213 | "
\n",
214 | "
"
215 | ],
216 | "text/plain": [
217 | " sepal_length sepal_width petal_length petal_width species \\\n",
218 | "14 5.8 4.0 1.2 0.2 setosa \n",
219 | "98 5.1 2.5 3.0 1.1 versicolor \n",
220 | "75 6.6 3.0 4.4 1.4 versicolor \n",
221 | "16 5.4 3.9 1.3 0.4 setosa \n",
222 | "131 7.9 3.8 6.4 2.0 virginica \n",
223 | "\n",
224 | " species_cat \n",
225 | "14 0 \n",
226 | "98 1 \n",
227 | "75 1 \n",
228 | "16 0 \n",
229 | "131 2 "
230 | ]
231 | },
232 | "execution_count": 6,
233 | "metadata": {},
234 | "output_type": "execute_result"
235 | }
236 | ],
237 | "source": [
238 | "from sklearn.preprocessing import LabelEncoder\n",
239 | "lr=LabelEncoder()\n",
240 | "df['species_cat']=lr.fit_transform(df.species)\n",
241 | "df.sample(5,random_state=1)"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 16,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/plain": [
252 | "14 0\n",
253 | "98 1\n",
254 | "75 1\n",
255 | "16 0\n",
256 | "131 2\n",
257 | "Name: species_cat, dtype: int32"
258 | ]
259 | },
260 | "execution_count": 16,
261 | "metadata": {},
262 | "output_type": "execute_result"
263 | }
264 | ],
265 | "source": [
266 | "X=df[['sepal_length','sepal_width','petal_length','petal_width']]\n",
267 | "y=df['species_cat']\n",
268 | "#X.sample(5,random_state=1)\n",
269 | "y.sample(5,random_state=1)\n"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 14,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "name": "stdout",
279 | "output_type": "stream",
280 | "text": [
281 | "(150, 4)\n",
282 | "(150,)\n"
283 | ]
284 | }
285 | ],
286 | "source": [
287 | "print(X.shape)\n",
288 | "print(y.shape)"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 23,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "data": {
298 | "text/plain": [
299 | "2 50\n",
300 | "1 50\n",
301 | "0 50\n",
302 | "Name: species_cat, dtype: int64"
303 | ]
304 | },
305 | "execution_count": 23,
306 | "metadata": {},
307 | "output_type": "execute_result"
308 | }
309 | ],
310 | "source": [
311 | "df['species_cat'].value_counts()"
312 | ]
313 | },
314 | {
315 | "cell_type": "code",
316 | "execution_count": 27,
317 | "metadata": {},
318 | "outputs": [
319 | {
320 | "data": {
321 | "text/plain": [
322 | "( sepal_length sepal_width petal_length petal_width\n",
323 | " 77 6.7 3.0 5.0 1.7\n",
324 | " 29 4.7 3.2 1.6 0.2\n",
325 | " 92 5.8 2.6 4.0 1.2\n",
326 | " 23 5.1 3.3 1.7 0.5\n",
327 | " 128 6.4 2.8 5.6 2.1, 77 1\n",
328 | " 29 0\n",
329 | " 92 1\n",
330 | " 23 0\n",
331 | " 128 2\n",
332 | " Name: species_cat, dtype: int32)"
333 | ]
334 | },
335 | "execution_count": 27,
336 | "metadata": {},
337 | "output_type": "execute_result"
338 | }
339 | ],
340 | "source": [
341 | "from sklearn.model_selection import train_test_split\n",
342 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=23)\n",
343 | "X_train.shape,X_test.shape\n",
344 | "X_train.head(),y_train.head()"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 28,
350 | "metadata": {},
351 | "outputs": [
352 | {
353 | "data": {
354 | "text/plain": [
355 | "1 32\n",
356 | "2 29\n",
357 | "0 29\n",
358 | "Name: species_cat, dtype: int64"
359 | ]
360 | },
361 | "execution_count": 28,
362 | "metadata": {},
363 | "output_type": "execute_result"
364 | }
365 | ],
366 | "source": [
367 | "y_train.value_counts()"
368 | ]
369 | },
370 | {
371 | "cell_type": "code",
372 | "execution_count": 22,
373 | "metadata": {},
374 | "outputs": [
375 | {
376 | "name": "stdout",
377 | "output_type": "stream",
378 | "text": [
379 | "Decision Tree prediction accuracy = 96.7%\n"
380 | ]
381 | }
382 | ],
383 | "source": [
384 | "from sklearn.tree import DecisionTreeClassifier\n",
385 | "model = DecisionTreeClassifier(random_state=23)\n",
386 | "model = model.fit(X_train, y_train)\n",
387 | "score = 100.0 * model.score(X_test, y_test)\n",
388 | "print(f\"Decision Tree prediction accuracy = {score:4.1f}%\")"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 30,
394 | "metadata": {},
395 | "outputs": [
396 | {
397 | "data": {
398 | "text/plain": [
399 | "array([2, 2, 1, 0, 2, 1, 0, 2, 0, 1, 1, 0, 2, 0, 0, 1, 1, 1, 2, 0, 2, 0,\n",
400 | " 0, 0, 2, 0, 0, 2, 1, 1, 0, 1, 0, 2, 0, 0, 1, 1, 1, 2, 2, 0, 1, 0,\n",
401 | " 1, 2, 1, 2, 0, 1, 2, 2, 0, 2, 1, 1, 2, 1, 0, 2])"
402 | ]
403 | },
404 | "execution_count": 30,
405 | "metadata": {},
406 | "output_type": "execute_result"
407 | }
408 | ],
409 | "source": [
410 | "result=model.predict(X_test)\n",
411 | "result"
412 | ]
413 | },
414 | {
415 | "cell_type": "markdown",
416 | "metadata": {},
417 | "source": [
418 | "CLASSIFICATION REPORT AND CONFUSION MATRIX"
419 | ]
420 | },
421 | {
422 | "cell_type": "code",
423 | "execution_count": 34,
424 | "metadata": {},
425 | "outputs": [
426 | {
427 | "name": "stdout",
428 | "output_type": "stream",
429 | "text": [
430 | " precision recall f1-score support\n",
431 | "\n",
432 | " Setosa 1.00 1.00 1.00 21\n",
433 | " Versicolor 0.90 1.00 0.95 18\n",
434 | " Virginica 1.00 0.90 0.95 21\n",
435 | "\n",
436 | " micro avg 0.97 0.97 0.97 60\n",
437 | " macro avg 0.97 0.97 0.97 60\n",
438 | "weighted avg 0.97 0.97 0.97 60\n",
439 | "\n",
440 | "[[21 0 0]\n",
441 | " [ 0 18 0]\n",
442 | " [ 0 2 19]]\n"
443 | ]
444 | }
445 | ],
446 | "source": [
447 | "from sklearn.metrics import classification_report,confusion_matrix\n",
448 | "result=model.predict(X_test)\n",
449 | "Labels=['Setosa','Versicolor','Virginica']\n",
450 | "print(classification_report(y_test,result,target_names=Labels))\n",
451 | "print(confusion_matrix(y_test,result))"
452 | ]
453 | },
454 | {
455 | "cell_type": "markdown",
456 | "metadata": {},
457 | "source": [
458 | "MAKING PLOT"
459 | ]
460 | },
461 | {
462 | "cell_type": "code",
463 | "execution_count": 39,
464 | "metadata": {},
465 | "outputs": [
466 | {
467 | "ename": "ModuleNotFoundError",
468 | "evalue": "No module named 'helper_code'",
469 | "output_type": "error",
470 | "traceback": [
471 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
472 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
473 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mhelper_code\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmlplots\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfusion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my_test\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mresult\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mLabels\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Decsion Tree Classifiaction'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
474 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'helper_code'"
475 | ]
476 | }
477 | ],
478 | "source": [
479 | "from helper_code import mlplots as ml\n",
480 | "ml.confusion(y_test,result,Labels,'Decsion Tree Classifiaction')"
481 | ]
482 | },
483 | {
484 | "cell_type": "code",
485 | "execution_count": null,
486 | "metadata": {},
487 | "outputs": [],
488 | "source": []
489 | }
490 | ],
491 | "metadata": {
492 | "kernelspec": {
493 | "display_name": "Python 3",
494 | "language": "python",
495 | "name": "python3"
496 | },
497 | "language_info": {
498 | "codemirror_mode": {
499 | "name": "ipython",
500 | "version": 3
501 | },
502 | "file_extension": ".py",
503 | "mimetype": "text/x-python",
504 | "name": "python",
505 | "nbconvert_exporter": "python",
506 | "pygments_lexer": "ipython3",
507 | "version": "3.7.1"
508 | }
509 | },
510 | "nbformat": 4,
511 | "nbformat_minor": 2
512 | }
513 |
--------------------------------------------------------------------------------
/Decision Tree/utf-8''iris(1).csv:
--------------------------------------------------------------------------------
1 | sepal_length,sepal_width,petal_length,petal_width,species
2 | 5.1,3.5,1.4,0.2,setosa
3 | 4.9,3.0,1.4,0.2,setosa
4 | 4.7,3.2,1.3,0.2,setosa
5 | 4.6,3.1,1.5,0.2,setosa
6 | 5.0,3.6,1.4,0.2,setosa
7 | 5.4,3.9,1.7,0.4,setosa
8 | 4.6,3.4,1.4,0.3,setosa
9 | 5.0,3.4,1.5,0.2,setosa
10 | 4.4,2.9,1.4,0.2,setosa
11 | 4.9,3.1,1.5,0.1,setosa
12 | 5.4,3.7,1.5,0.2,setosa
13 | 4.8,3.4,1.6,0.2,setosa
14 | 4.8,3.0,1.4,0.1,setosa
15 | 4.3,3.0,1.1,0.1,setosa
16 | 5.8,4.0,1.2,0.2,setosa
17 | 5.7,4.4,1.5,0.4,setosa
18 | 5.4,3.9,1.3,0.4,setosa
19 | 5.1,3.5,1.4,0.3,setosa
20 | 5.7,3.8,1.7,0.3,setosa
21 | 5.1,3.8,1.5,0.3,setosa
22 | 5.4,3.4,1.7,0.2,setosa
23 | 5.1,3.7,1.5,0.4,setosa
24 | 4.6,3.6,1.0,0.2,setosa
25 | 5.1,3.3,1.7,0.5,setosa
26 | 4.8,3.4,1.9,0.2,setosa
27 | 5.0,3.0,1.6,0.2,setosa
28 | 5.0,3.4,1.6,0.4,setosa
29 | 5.2,3.5,1.5,0.2,setosa
30 | 5.2,3.4,1.4,0.2,setosa
31 | 4.7,3.2,1.6,0.2,setosa
32 | 4.8,3.1,1.6,0.2,setosa
33 | 5.4,3.4,1.5,0.4,setosa
34 | 5.2,4.1,1.5,0.1,setosa
35 | 5.5,4.2,1.4,0.2,setosa
36 | 4.9,3.1,1.5,0.2,setosa
37 | 5.0,3.2,1.2,0.2,setosa
38 | 5.5,3.5,1.3,0.2,setosa
39 | 4.9,3.6,1.4,0.1,setosa
40 | 4.4,3.0,1.3,0.2,setosa
41 | 5.1,3.4,1.5,0.2,setosa
42 | 5.0,3.5,1.3,0.3,setosa
43 | 4.5,2.3,1.3,0.3,setosa
44 | 4.4,3.2,1.3,0.2,setosa
45 | 5.0,3.5,1.6,0.6,setosa
46 | 5.1,3.8,1.9,0.4,setosa
47 | 4.8,3.0,1.4,0.3,setosa
48 | 5.1,3.8,1.6,0.2,setosa
49 | 4.6,3.2,1.4,0.2,setosa
50 | 5.3,3.7,1.5,0.2,setosa
51 | 5.0,3.3,1.4,0.2,setosa
52 | 7.0,3.2,4.7,1.4,versicolor
53 | 6.4,3.2,4.5,1.5,versicolor
54 | 6.9,3.1,4.9,1.5,versicolor
55 | 5.5,2.3,4.0,1.3,versicolor
56 | 6.5,2.8,4.6,1.5,versicolor
57 | 5.7,2.8,4.5,1.3,versicolor
58 | 6.3,3.3,4.7,1.6,versicolor
59 | 4.9,2.4,3.3,1.0,versicolor
60 | 6.6,2.9,4.6,1.3,versicolor
61 | 5.2,2.7,3.9,1.4,versicolor
62 | 5.0,2.0,3.5,1.0,versicolor
63 | 5.9,3.0,4.2,1.5,versicolor
64 | 6.0,2.2,4.0,1.0,versicolor
65 | 6.1,2.9,4.7,1.4,versicolor
66 | 5.6,2.9,3.6,1.3,versicolor
67 | 6.7,3.1,4.4,1.4,versicolor
68 | 5.6,3.0,4.5,1.5,versicolor
69 | 5.8,2.7,4.1,1.0,versicolor
70 | 6.2,2.2,4.5,1.5,versicolor
71 | 5.6,2.5,3.9,1.1,versicolor
72 | 5.9,3.2,4.8,1.8,versicolor
73 | 6.1,2.8,4.0,1.3,versicolor
74 | 6.3,2.5,4.9,1.5,versicolor
75 | 6.1,2.8,4.7,1.2,versicolor
76 | 6.4,2.9,4.3,1.3,versicolor
77 | 6.6,3.0,4.4,1.4,versicolor
78 | 6.8,2.8,4.8,1.4,versicolor
79 | 6.7,3.0,5.0,1.7,versicolor
80 | 6.0,2.9,4.5,1.5,versicolor
81 | 5.7,2.6,3.5,1.0,versicolor
82 | 5.5,2.4,3.8,1.1,versicolor
83 | 5.5,2.4,3.7,1.0,versicolor
84 | 5.8,2.7,3.9,1.2,versicolor
85 | 6.0,2.7,5.1,1.6,versicolor
86 | 5.4,3.0,4.5,1.5,versicolor
87 | 6.0,3.4,4.5,1.6,versicolor
88 | 6.7,3.1,4.7,1.5,versicolor
89 | 6.3,2.3,4.4,1.3,versicolor
90 | 5.6,3.0,4.1,1.3,versicolor
91 | 5.5,2.5,4.0,1.3,versicolor
92 | 5.5,2.6,4.4,1.2,versicolor
93 | 6.1,3.0,4.6,1.4,versicolor
94 | 5.8,2.6,4.0,1.2,versicolor
95 | 5.0,2.3,3.3,1.0,versicolor
96 | 5.6,2.7,4.2,1.3,versicolor
97 | 5.7,3.0,4.2,1.2,versicolor
98 | 5.7,2.9,4.2,1.3,versicolor
99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 |
--------------------------------------------------------------------------------
/K-NN/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "KNN ALGORITHM ON IRIS DATASET"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import numpy as np\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "import seaborn as sb"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 2,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/html": [
30 | "\n",
31 | "\n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " | \n",
48 | " sepal_length | \n",
49 | " sepal_width | \n",
50 | " petal_length | \n",
51 | " petal_width | \n",
52 | " species | \n",
53 | "
\n",
54 | " \n",
55 | " \n",
56 | " \n",
57 | " 0 | \n",
58 | " 5.1 | \n",
59 | " 3.5 | \n",
60 | " 1.4 | \n",
61 | " 0.2 | \n",
62 | " setosa | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " 4.9 | \n",
67 | " 3.0 | \n",
68 | " 1.4 | \n",
69 | " 0.2 | \n",
70 | " setosa | \n",
71 | "
\n",
72 | " \n",
73 | " 2 | \n",
74 | " 4.7 | \n",
75 | " 3.2 | \n",
76 | " 1.3 | \n",
77 | " 0.2 | \n",
78 | " setosa | \n",
79 | "
\n",
80 | " \n",
81 | " 3 | \n",
82 | " 4.6 | \n",
83 | " 3.1 | \n",
84 | " 1.5 | \n",
85 | " 0.2 | \n",
86 | " setosa | \n",
87 | "
\n",
88 | " \n",
89 | " 4 | \n",
90 | " 5.0 | \n",
91 | " 3.6 | \n",
92 | " 1.4 | \n",
93 | " 0.2 | \n",
94 | " setosa | \n",
95 | "
\n",
96 | " \n",
97 | "
\n",
98 | "
"
99 | ],
100 | "text/plain": [
101 | " sepal_length sepal_width petal_length petal_width species\n",
102 | "0 5.1 3.5 1.4 0.2 setosa\n",
103 | "1 4.9 3.0 1.4 0.2 setosa\n",
104 | "2 4.7 3.2 1.3 0.2 setosa\n",
105 | "3 4.6 3.1 1.5 0.2 setosa\n",
106 | "4 5.0 3.6 1.4 0.2 setosa"
107 | ]
108 | },
109 | "execution_count": 2,
110 | "metadata": {},
111 | "output_type": "execute_result"
112 | }
113 | ],
114 | "source": [
115 | "df=pd.read_csv(\"utf-8''iris(1).csv\")\n",
116 | "df.head()"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": 3,
122 | "metadata": {},
123 | "outputs": [
124 | {
125 | "data": {
126 | "text/plain": [
127 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)"
128 | ]
129 | },
130 | "execution_count": 3,
131 | "metadata": {},
132 | "output_type": "execute_result"
133 | }
134 | ],
135 | "source": [
136 | "df.species.unique()"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 4,
142 | "metadata": {},
143 | "outputs": [
144 | {
145 | "data": {
146 | "text/html": [
147 | "\n",
148 | "\n",
161 | "
\n",
162 | " \n",
163 | " \n",
164 | " | \n",
165 | " sepal_length | \n",
166 | " sepal_width | \n",
167 | " petal_length | \n",
168 | " petal_width | \n",
169 | " species | \n",
170 | " species_cat | \n",
171 | "
\n",
172 | " \n",
173 | " \n",
174 | " \n",
175 | " 0 | \n",
176 | " 5.1 | \n",
177 | " 3.5 | \n",
178 | " 1.4 | \n",
179 | " 0.2 | \n",
180 | " setosa | \n",
181 | " 0 | \n",
182 | "
\n",
183 | " \n",
184 | " 1 | \n",
185 | " 4.9 | \n",
186 | " 3.0 | \n",
187 | " 1.4 | \n",
188 | " 0.2 | \n",
189 | " setosa | \n",
190 | " 0 | \n",
191 | "
\n",
192 | " \n",
193 | " 2 | \n",
194 | " 4.7 | \n",
195 | " 3.2 | \n",
196 | " 1.3 | \n",
197 | " 0.2 | \n",
198 | " setosa | \n",
199 | " 0 | \n",
200 | "
\n",
201 | " \n",
202 | " 3 | \n",
203 | " 4.6 | \n",
204 | " 3.1 | \n",
205 | " 1.5 | \n",
206 | " 0.2 | \n",
207 | " setosa | \n",
208 | " 0 | \n",
209 | "
\n",
210 | " \n",
211 | " 4 | \n",
212 | " 5.0 | \n",
213 | " 3.6 | \n",
214 | " 1.4 | \n",
215 | " 0.2 | \n",
216 | " setosa | \n",
217 | " 0 | \n",
218 | "
\n",
219 | " \n",
220 | "
\n",
221 | "
"
222 | ],
223 | "text/plain": [
224 | " sepal_length sepal_width petal_length petal_width species species_cat\n",
225 | "0 5.1 3.5 1.4 0.2 setosa 0\n",
226 | "1 4.9 3.0 1.4 0.2 setosa 0\n",
227 | "2 4.7 3.2 1.3 0.2 setosa 0\n",
228 | "3 4.6 3.1 1.5 0.2 setosa 0\n",
229 | "4 5.0 3.6 1.4 0.2 setosa 0"
230 | ]
231 | },
232 | "execution_count": 4,
233 | "metadata": {},
234 | "output_type": "execute_result"
235 | }
236 | ],
237 | "source": [
238 | "from sklearn.preprocessing import LabelEncoder\n",
239 | "df['species_cat']=LabelEncoder().fit_transform(df.species)\n",
240 | "df.head()"
241 | ]
242 | },
243 | {
244 | "cell_type": "code",
245 | "execution_count": 5,
246 | "metadata": {},
247 | "outputs": [
248 | {
249 | "data": {
250 | "text/plain": [
251 | "versicolor 50\n",
252 | "setosa 50\n",
253 | "virginica 50\n",
254 | "Name: species, dtype: int64"
255 | ]
256 | },
257 | "execution_count": 5,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "df.species.value_counts()"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 7,
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "name": "stdout",
273 | "output_type": "stream",
274 | "text": [
275 | " sepal_length sepal_width petal_length petal_width\n",
276 | "0 5.1 3.5 1.4 0.2\n",
277 | "1 4.9 3.0 1.4 0.2\n",
278 | "2 4.7 3.2 1.3 0.2\n",
279 | "3 4.6 3.1 1.5 0.2\n",
280 | "4 5.0 3.6 1.4 0.2\n",
281 | "0 0\n",
282 | "1 0\n",
283 | "2 0\n",
284 | "3 0\n",
285 | "4 0\n",
286 | "Name: species_cat, dtype: int32\n"
287 | ]
288 | }
289 | ],
290 | "source": [
291 | "X=df[['sepal_length','sepal_width','petal_length','petal_width']]\n",
292 | "y=df['species_cat']\n",
293 | "print(X.head())\n",
294 | "print(y.head())"
295 | ]
296 | },
297 | {
298 | "cell_type": "code",
299 | "execution_count": 8,
300 | "metadata": {},
301 | "outputs": [],
302 | "source": [
303 | "from sklearn.model_selection import train_test_split\n",
304 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.4,random_state=23)"
305 | ]
306 | },
307 | {
308 | "cell_type": "code",
309 | "execution_count": 12,
310 | "metadata": {},
311 | "outputs": [
312 | {
313 | "name": "stdout",
314 | "output_type": "stream",
315 | "text": [
316 | "The acurrracy of the algorithm = 98.33333333333333\n"
317 | ]
318 | }
319 | ],
320 | "source": [
321 | "from sklearn.neighbors import KNeighborsClassifier\n",
322 | "model=KNeighborsClassifier()\n",
323 | "model.fit(X_train,y_train)\n",
324 | "score=100*model.score(X_test,y_test)\n",
325 | "print(\"The acurrracy of the algorithm = {}\".format(score))"
326 | ]
327 | },
328 | {
329 | "cell_type": "markdown",
330 | "metadata": {},
331 | "source": [
332 | "CLASSIFICATION REPORT "
333 | ]
334 | },
335 | {
336 | "cell_type": "code",
337 | "execution_count": 14,
338 | "metadata": {},
339 | "outputs": [
340 | {
341 | "name": "stdout",
342 | "output_type": "stream",
343 | "text": [
344 | " precision recall f1-score support\n",
345 | "\n",
346 | " Setosa 1.00 1.00 1.00 21\n",
347 | " Versicolor 0.95 1.00 0.97 18\n",
348 | " Virginica 1.00 0.95 0.98 21\n",
349 | "\n",
350 | " micro avg 0.98 0.98 0.98 60\n",
351 | " macro avg 0.98 0.98 0.98 60\n",
352 | "weighted avg 0.98 0.98 0.98 60\n",
353 | "\n"
354 | ]
355 | },
356 | {
357 | "data": {
358 | "text/plain": [
359 | "array([[21, 0, 0],\n",
360 | " [ 0, 18, 0],\n",
361 | " [ 0, 1, 20]], dtype=int64)"
362 | ]
363 | },
364 | "execution_count": 14,
365 | "metadata": {},
366 | "output_type": "execute_result"
367 | }
368 | ],
369 | "source": [
370 | "from sklearn.metrics import classification_report,confusion_matrix\n",
371 | "labels=['Setosa','Versicolor','Virginica']\n",
372 | "result=model.predict(X_test)\n",
373 | "print(classification_report(y_test,result,target_names=labels))\n",
374 | "confusion_matrix(y_test,result)"
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": 15,
380 | "metadata": {},
381 | "outputs": [
382 | {
383 | "ename": "ModuleNotFoundError",
384 | "evalue": "No module named 'helper_code'",
385 | "output_type": "error",
386 | "traceback": [
387 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
388 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
389 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mhelper_code\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mmlplots\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;31m# Call confusion matrix plotting routine\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfusion\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0ml_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpredicted\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlabels\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'KNN Classification'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
390 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'helper_code'"
391 | ]
392 | }
393 | ],
394 | "source": [
395 | "from helper_code import mlplots as ml\n",
396 | "\n",
397 | "# Call confusion matrix plotting routine\n",
398 | "ml.confusion(l_test, predicted, labels, 'KNN Classification')"
399 | ]
400 | },
401 | {
402 | "cell_type": "code",
403 | "execution_count": null,
404 | "metadata": {},
405 | "outputs": [],
406 | "source": []
407 | }
408 | ],
409 | "metadata": {
410 | "kernelspec": {
411 | "display_name": "Python 3",
412 | "language": "python",
413 | "name": "python3"
414 | },
415 | "language_info": {
416 | "codemirror_mode": {
417 | "name": "ipython",
418 | "version": 3
419 | },
420 | "file_extension": ".py",
421 | "mimetype": "text/x-python",
422 | "name": "python",
423 | "nbconvert_exporter": "python",
424 | "pygments_lexer": "ipython3",
425 | "version": "3.7.1"
426 | }
427 | },
428 | "nbformat": 4,
429 | "nbformat_minor": 2
430 | }
431 |
--------------------------------------------------------------------------------
/K-NN/utf-8''iris(1).csv:
--------------------------------------------------------------------------------
1 | sepal_length,sepal_width,petal_length,petal_width,species
2 | 5.1,3.5,1.4,0.2,setosa
3 | 4.9,3.0,1.4,0.2,setosa
4 | 4.7,3.2,1.3,0.2,setosa
5 | 4.6,3.1,1.5,0.2,setosa
6 | 5.0,3.6,1.4,0.2,setosa
7 | 5.4,3.9,1.7,0.4,setosa
8 | 4.6,3.4,1.4,0.3,setosa
9 | 5.0,3.4,1.5,0.2,setosa
10 | 4.4,2.9,1.4,0.2,setosa
11 | 4.9,3.1,1.5,0.1,setosa
12 | 5.4,3.7,1.5,0.2,setosa
13 | 4.8,3.4,1.6,0.2,setosa
14 | 4.8,3.0,1.4,0.1,setosa
15 | 4.3,3.0,1.1,0.1,setosa
16 | 5.8,4.0,1.2,0.2,setosa
17 | 5.7,4.4,1.5,0.4,setosa
18 | 5.4,3.9,1.3,0.4,setosa
19 | 5.1,3.5,1.4,0.3,setosa
20 | 5.7,3.8,1.7,0.3,setosa
21 | 5.1,3.8,1.5,0.3,setosa
22 | 5.4,3.4,1.7,0.2,setosa
23 | 5.1,3.7,1.5,0.4,setosa
24 | 4.6,3.6,1.0,0.2,setosa
25 | 5.1,3.3,1.7,0.5,setosa
26 | 4.8,3.4,1.9,0.2,setosa
27 | 5.0,3.0,1.6,0.2,setosa
28 | 5.0,3.4,1.6,0.4,setosa
29 | 5.2,3.5,1.5,0.2,setosa
30 | 5.2,3.4,1.4,0.2,setosa
31 | 4.7,3.2,1.6,0.2,setosa
32 | 4.8,3.1,1.6,0.2,setosa
33 | 5.4,3.4,1.5,0.4,setosa
34 | 5.2,4.1,1.5,0.1,setosa
35 | 5.5,4.2,1.4,0.2,setosa
36 | 4.9,3.1,1.5,0.2,setosa
37 | 5.0,3.2,1.2,0.2,setosa
38 | 5.5,3.5,1.3,0.2,setosa
39 | 4.9,3.6,1.4,0.1,setosa
40 | 4.4,3.0,1.3,0.2,setosa
41 | 5.1,3.4,1.5,0.2,setosa
42 | 5.0,3.5,1.3,0.3,setosa
43 | 4.5,2.3,1.3,0.3,setosa
44 | 4.4,3.2,1.3,0.2,setosa
45 | 5.0,3.5,1.6,0.6,setosa
46 | 5.1,3.8,1.9,0.4,setosa
47 | 4.8,3.0,1.4,0.3,setosa
48 | 5.1,3.8,1.6,0.2,setosa
49 | 4.6,3.2,1.4,0.2,setosa
50 | 5.3,3.7,1.5,0.2,setosa
51 | 5.0,3.3,1.4,0.2,setosa
52 | 7.0,3.2,4.7,1.4,versicolor
53 | 6.4,3.2,4.5,1.5,versicolor
54 | 6.9,3.1,4.9,1.5,versicolor
55 | 5.5,2.3,4.0,1.3,versicolor
56 | 6.5,2.8,4.6,1.5,versicolor
57 | 5.7,2.8,4.5,1.3,versicolor
58 | 6.3,3.3,4.7,1.6,versicolor
59 | 4.9,2.4,3.3,1.0,versicolor
60 | 6.6,2.9,4.6,1.3,versicolor
61 | 5.2,2.7,3.9,1.4,versicolor
62 | 5.0,2.0,3.5,1.0,versicolor
63 | 5.9,3.0,4.2,1.5,versicolor
64 | 6.0,2.2,4.0,1.0,versicolor
65 | 6.1,2.9,4.7,1.4,versicolor
66 | 5.6,2.9,3.6,1.3,versicolor
67 | 6.7,3.1,4.4,1.4,versicolor
68 | 5.6,3.0,4.5,1.5,versicolor
69 | 5.8,2.7,4.1,1.0,versicolor
70 | 6.2,2.2,4.5,1.5,versicolor
71 | 5.6,2.5,3.9,1.1,versicolor
72 | 5.9,3.2,4.8,1.8,versicolor
73 | 6.1,2.8,4.0,1.3,versicolor
74 | 6.3,2.5,4.9,1.5,versicolor
75 | 6.1,2.8,4.7,1.2,versicolor
76 | 6.4,2.9,4.3,1.3,versicolor
77 | 6.6,3.0,4.4,1.4,versicolor
78 | 6.8,2.8,4.8,1.4,versicolor
79 | 6.7,3.0,5.0,1.7,versicolor
80 | 6.0,2.9,4.5,1.5,versicolor
81 | 5.7,2.6,3.5,1.0,versicolor
82 | 5.5,2.4,3.8,1.1,versicolor
83 | 5.5,2.4,3.7,1.0,versicolor
84 | 5.8,2.7,3.9,1.2,versicolor
85 | 6.0,2.7,5.1,1.6,versicolor
86 | 5.4,3.0,4.5,1.5,versicolor
87 | 6.0,3.4,4.5,1.6,versicolor
88 | 6.7,3.1,4.7,1.5,versicolor
89 | 6.3,2.3,4.4,1.3,versicolor
90 | 5.6,3.0,4.1,1.3,versicolor
91 | 5.5,2.5,4.0,1.3,versicolor
92 | 5.5,2.6,4.4,1.2,versicolor
93 | 6.1,3.0,4.6,1.4,versicolor
94 | 5.8,2.6,4.0,1.2,versicolor
95 | 5.0,2.3,3.3,1.0,versicolor
96 | 5.6,2.7,4.2,1.3,versicolor
97 | 5.7,3.0,4.2,1.2,versicolor
98 | 5.7,2.9,4.2,1.3,versicolor
99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 |
--------------------------------------------------------------------------------
/K-means/utf-8''iris.csv:
--------------------------------------------------------------------------------
1 | sepal_length,sepal_width,petal_length,petal_width,species
2 | 5.1,3.5,1.4,0.2,setosa
3 | 4.9,3.0,1.4,0.2,setosa
4 | 4.7,3.2,1.3,0.2,setosa
5 | 4.6,3.1,1.5,0.2,setosa
6 | 5.0,3.6,1.4,0.2,setosa
7 | 5.4,3.9,1.7,0.4,setosa
8 | 4.6,3.4,1.4,0.3,setosa
9 | 5.0,3.4,1.5,0.2,setosa
10 | 4.4,2.9,1.4,0.2,setosa
11 | 4.9,3.1,1.5,0.1,setosa
12 | 5.4,3.7,1.5,0.2,setosa
13 | 4.8,3.4,1.6,0.2,setosa
14 | 4.8,3.0,1.4,0.1,setosa
15 | 4.3,3.0,1.1,0.1,setosa
16 | 5.8,4.0,1.2,0.2,setosa
17 | 5.7,4.4,1.5,0.4,setosa
18 | 5.4,3.9,1.3,0.4,setosa
19 | 5.1,3.5,1.4,0.3,setosa
20 | 5.7,3.8,1.7,0.3,setosa
21 | 5.1,3.8,1.5,0.3,setosa
22 | 5.4,3.4,1.7,0.2,setosa
23 | 5.1,3.7,1.5,0.4,setosa
24 | 4.6,3.6,1.0,0.2,setosa
25 | 5.1,3.3,1.7,0.5,setosa
26 | 4.8,3.4,1.9,0.2,setosa
27 | 5.0,3.0,1.6,0.2,setosa
28 | 5.0,3.4,1.6,0.4,setosa
29 | 5.2,3.5,1.5,0.2,setosa
30 | 5.2,3.4,1.4,0.2,setosa
31 | 4.7,3.2,1.6,0.2,setosa
32 | 4.8,3.1,1.6,0.2,setosa
33 | 5.4,3.4,1.5,0.4,setosa
34 | 5.2,4.1,1.5,0.1,setosa
35 | 5.5,4.2,1.4,0.2,setosa
36 | 4.9,3.1,1.5,0.2,setosa
37 | 5.0,3.2,1.2,0.2,setosa
38 | 5.5,3.5,1.3,0.2,setosa
39 | 4.9,3.6,1.4,0.1,setosa
40 | 4.4,3.0,1.3,0.2,setosa
41 | 5.1,3.4,1.5,0.2,setosa
42 | 5.0,3.5,1.3,0.3,setosa
43 | 4.5,2.3,1.3,0.3,setosa
44 | 4.4,3.2,1.3,0.2,setosa
45 | 5.0,3.5,1.6,0.6,setosa
46 | 5.1,3.8,1.9,0.4,setosa
47 | 4.8,3.0,1.4,0.3,setosa
48 | 5.1,3.8,1.6,0.2,setosa
49 | 4.6,3.2,1.4,0.2,setosa
50 | 5.3,3.7,1.5,0.2,setosa
51 | 5.0,3.3,1.4,0.2,setosa
52 | 7.0,3.2,4.7,1.4,versicolor
53 | 6.4,3.2,4.5,1.5,versicolor
54 | 6.9,3.1,4.9,1.5,versicolor
55 | 5.5,2.3,4.0,1.3,versicolor
56 | 6.5,2.8,4.6,1.5,versicolor
57 | 5.7,2.8,4.5,1.3,versicolor
58 | 6.3,3.3,4.7,1.6,versicolor
59 | 4.9,2.4,3.3,1.0,versicolor
60 | 6.6,2.9,4.6,1.3,versicolor
61 | 5.2,2.7,3.9,1.4,versicolor
62 | 5.0,2.0,3.5,1.0,versicolor
63 | 5.9,3.0,4.2,1.5,versicolor
64 | 6.0,2.2,4.0,1.0,versicolor
65 | 6.1,2.9,4.7,1.4,versicolor
66 | 5.6,2.9,3.6,1.3,versicolor
67 | 6.7,3.1,4.4,1.4,versicolor
68 | 5.6,3.0,4.5,1.5,versicolor
69 | 5.8,2.7,4.1,1.0,versicolor
70 | 6.2,2.2,4.5,1.5,versicolor
71 | 5.6,2.5,3.9,1.1,versicolor
72 | 5.9,3.2,4.8,1.8,versicolor
73 | 6.1,2.8,4.0,1.3,versicolor
74 | 6.3,2.5,4.9,1.5,versicolor
75 | 6.1,2.8,4.7,1.2,versicolor
76 | 6.4,2.9,4.3,1.3,versicolor
77 | 6.6,3.0,4.4,1.4,versicolor
78 | 6.8,2.8,4.8,1.4,versicolor
79 | 6.7,3.0,5.0,1.7,versicolor
80 | 6.0,2.9,4.5,1.5,versicolor
81 | 5.7,2.6,3.5,1.0,versicolor
82 | 5.5,2.4,3.8,1.1,versicolor
83 | 5.5,2.4,3.7,1.0,versicolor
84 | 5.8,2.7,3.9,1.2,versicolor
85 | 6.0,2.7,5.1,1.6,versicolor
86 | 5.4,3.0,4.5,1.5,versicolor
87 | 6.0,3.4,4.5,1.6,versicolor
88 | 6.7,3.1,4.7,1.5,versicolor
89 | 6.3,2.3,4.4,1.3,versicolor
90 | 5.6,3.0,4.1,1.3,versicolor
91 | 5.5,2.5,4.0,1.3,versicolor
92 | 5.5,2.6,4.4,1.2,versicolor
93 | 6.1,3.0,4.6,1.4,versicolor
94 | 5.8,2.6,4.0,1.2,versicolor
95 | 5.0,2.3,3.3,1.0,versicolor
96 | 5.6,2.7,4.2,1.3,versicolor
97 | 5.7,3.0,4.2,1.2,versicolor
98 | 5.7,2.9,4.2,1.3,versicolor
99 | 6.2,2.9,4.3,1.3,versicolor
100 | 5.1,2.5,3.0,1.1,versicolor
101 | 5.7,2.8,4.1,1.3,versicolor
102 | 6.3,3.3,6.0,2.5,virginica
103 | 5.8,2.7,5.1,1.9,virginica
104 | 7.1,3.0,5.9,2.1,virginica
105 | 6.3,2.9,5.6,1.8,virginica
106 | 6.5,3.0,5.8,2.2,virginica
107 | 7.6,3.0,6.6,2.1,virginica
108 | 4.9,2.5,4.5,1.7,virginica
109 | 7.3,2.9,6.3,1.8,virginica
110 | 6.7,2.5,5.8,1.8,virginica
111 | 7.2,3.6,6.1,2.5,virginica
112 | 6.5,3.2,5.1,2.0,virginica
113 | 6.4,2.7,5.3,1.9,virginica
114 | 6.8,3.0,5.5,2.1,virginica
115 | 5.7,2.5,5.0,2.0,virginica
116 | 5.8,2.8,5.1,2.4,virginica
117 | 6.4,3.2,5.3,2.3,virginica
118 | 6.5,3.0,5.5,1.8,virginica
119 | 7.7,3.8,6.7,2.2,virginica
120 | 7.7,2.6,6.9,2.3,virginica
121 | 6.0,2.2,5.0,1.5,virginica
122 | 6.9,3.2,5.7,2.3,virginica
123 | 5.6,2.8,4.9,2.0,virginica
124 | 7.7,2.8,6.7,2.0,virginica
125 | 6.3,2.7,4.9,1.8,virginica
126 | 6.7,3.3,5.7,2.1,virginica
127 | 7.2,3.2,6.0,1.8,virginica
128 | 6.2,2.8,4.8,1.8,virginica
129 | 6.1,3.0,4.9,1.8,virginica
130 | 6.4,2.8,5.6,2.1,virginica
131 | 7.2,3.0,5.8,1.6,virginica
132 | 7.4,2.8,6.1,1.9,virginica
133 | 7.9,3.8,6.4,2.0,virginica
134 | 6.4,2.8,5.6,2.2,virginica
135 | 6.3,2.8,5.1,1.5,virginica
136 | 6.1,2.6,5.6,1.4,virginica
137 | 7.7,3.0,6.1,2.3,virginica
138 | 6.3,3.4,5.6,2.4,virginica
139 | 6.4,3.1,5.5,1.8,virginica
140 | 6.0,3.0,4.8,1.8,virginica
141 | 6.9,3.1,5.4,2.1,virginica
142 | 6.7,3.1,5.6,2.4,virginica
143 | 6.9,3.1,5.1,2.3,virginica
144 | 5.8,2.7,5.1,1.9,virginica
145 | 6.8,3.2,5.9,2.3,virginica
146 | 6.7,3.3,5.7,2.5,virginica
147 | 6.7,3.0,5.2,2.3,virginica
148 | 6.3,2.5,5.0,1.9,virginica
149 | 6.5,3.0,5.2,2.0,virginica
150 | 6.2,3.4,5.4,2.3,virginica
151 | 5.9,3.0,5.1,1.8,virginica
152 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 suubh
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Linear Regression/LinearRegressionMultipleVariables.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Linear Resgression Multiple variables"
8 | ]
9 | },
10 | {
11 | "cell_type": "markdown",
12 | "metadata": {},
13 | "source": [
14 | "Import the Libraries"
15 | ]
16 | },
17 | {
18 | "cell_type": "code",
19 | "execution_count": 17,
20 | "metadata": {},
21 | "outputs": [],
22 | "source": [
23 | "import numpy as np\n",
24 | "import pandas as pd\n",
25 | "import matplotlib.pyplot as plt\n",
26 | "import seaborn as sb"
27 | ]
28 | },
29 | {
30 | "cell_type": "markdown",
31 | "metadata": {},
32 | "source": [
33 | "Load the Dataset"
34 | ]
35 | },
36 | {
37 | "cell_type": "code",
38 | "execution_count": 18,
39 | "metadata": {},
40 | "outputs": [],
41 | "source": [
42 | "dataset=pd.read_csv('ex1data2.txt')\n",
43 | "X=dataset.iloc[:,:-1].values\n",
44 | "y=dataset.iloc[:,-1].values"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 19,
50 | "metadata": {},
51 | "outputs": [
52 | {
53 | "name": "stdout",
54 | "output_type": "stream",
55 | "text": [
56 | "[[1600 3]\n",
57 | " [2400 3]\n",
58 | " [1416 2]\n",
59 | " [3000 4]\n",
60 | " [1985 4]\n",
61 | " [1534 3]\n",
62 | " [1427 3]\n",
63 | " [1380 3]\n",
64 | " [1494 3]\n",
65 | " [1940 4]\n",
66 | " [2000 3]\n",
67 | " [1890 3]\n",
68 | " [4478 5]\n",
69 | " [1268 3]\n",
70 | " [2300 4]\n",
71 | " [1320 2]\n",
72 | " [1236 3]\n",
73 | " [2609 4]\n",
74 | " [3031 4]\n",
75 | " [1767 3]\n",
76 | " [1888 2]\n",
77 | " [1604 3]\n",
78 | " [1962 4]\n",
79 | " [3890 3]\n",
80 | " [1100 3]\n",
81 | " [1458 3]\n",
82 | " [2526 3]\n",
83 | " [2200 3]\n",
84 | " [2637 3]\n",
85 | " [1839 2]\n",
86 | " [1000 1]\n",
87 | " [2040 4]\n",
88 | " [3137 3]\n",
89 | " [1811 4]\n",
90 | " [1437 3]\n",
91 | " [1239 3]\n",
92 | " [2132 4]\n",
93 | " [4215 4]\n",
94 | " [2162 4]\n",
95 | " [1664 2]\n",
96 | " [2238 3]\n",
97 | " [2567 4]\n",
98 | " [1200 3]\n",
99 | " [ 852 2]\n",
100 | " [1852 4]\n",
101 | " [1203 3]]\n"
102 | ]
103 | }
104 | ],
105 | "source": [
106 | "print(X)"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 20,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "name": "stdout",
116 | "output_type": "stream",
117 | "text": [
118 | "[329900 369000 232000 539900 299900 314900 198999 212000 242500 239999\n",
119 | " 347000 329999 699900 259900 449900 299900 199900 499998 599000 252900\n",
120 | " 255000 242900 259900 573900 249900 464500 469000 475000 299900 349900\n",
121 | " 169900 314900 579900 285900 249900 229900 345000 549000 287000 368500\n",
122 | " 329900 314000 299000 179900 299900 239500]\n"
123 | ]
124 | }
125 | ],
126 | "source": [
127 | "print(y)"
128 | ]
129 | },
130 | {
131 | "cell_type": "markdown",
132 | "metadata": {},
133 | "source": [
134 | "Splitting the Dataset"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": 21,
140 | "metadata": {},
141 | "outputs": [],
142 | "source": [
143 | "from sklearn.model_selection import train_test_split\n",
144 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 22,
150 | "metadata": {},
151 | "outputs": [],
152 | "source": [
153 | "from sklearn.linear_model import LinearRegression\n",
154 | "model=LinearRegression(normalize=True)\n",
155 | "model.fit(X_train,y_train)\n",
156 | "result=model.predict(X_test)\n"
157 | ]
158 | },
159 | {
160 | "cell_type": "markdown",
161 | "metadata": {},
162 | "source": []
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 23,
167 | "metadata": {},
168 | "outputs": [
169 | {
170 | "name": "stdout",
171 | "output_type": "stream",
172 | "text": [
173 | "[266611.31683851 324085.31639041 438749.980843 327443.40737608\n",
174 | " 345745.46093557 335473.6249505 338350.81825691 329685.02578672\n",
175 | " 345505.01209594 511751.95879232 231965.49347897 353286.10757014\n",
176 | " 228942.29621667 276591.14491179]\n"
177 | ]
178 | }
179 | ],
180 | "source": [
181 | "print(result)"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": null,
187 | "metadata": {},
188 | "outputs": [],
189 | "source": []
190 | }
191 | ],
192 | "metadata": {
193 | "kernelspec": {
194 | "display_name": "Python 3",
195 | "language": "python",
196 | "name": "python3"
197 | },
198 | "language_info": {
199 | "codemirror_mode": {
200 | "name": "ipython",
201 | "version": 3
202 | },
203 | "file_extension": ".py",
204 | "mimetype": "text/x-python",
205 | "name": "python",
206 | "nbconvert_exporter": "python",
207 | "pygments_lexer": "ipython3",
208 | "version": "3.7.1"
209 | }
210 | },
211 | "nbformat": 4,
212 | "nbformat_minor": 2
213 | }
214 |
--------------------------------------------------------------------------------
/Linear Regression/LinearRegressionSingle Variables.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "Import the Libraries"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import numpy as np\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "\n"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "Load the Dataset"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 2,
32 | "metadata": {},
33 | "outputs": [],
34 | "source": [
35 | "dataset=pd.read_csv('ex1data1.txt')\n",
36 | "X=dataset.iloc[:,:-1].values\n",
37 | "y=dataset.iloc[:,-1].values"
38 | ]
39 | },
40 | {
41 | "cell_type": "markdown",
42 | "metadata": {},
43 | "source": [
44 | "Split the dataset"
45 | ]
46 | },
47 | {
48 | "cell_type": "code",
49 | "execution_count": 3,
50 | "metadata": {},
51 | "outputs": [],
52 | "source": [
53 | "from sklearn.model_selection import train_test_split\n",
54 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)\n"
55 | ]
56 | },
57 | {
58 | "cell_type": "markdown",
59 | "metadata": {},
60 | "source": [
61 | "Train Linear Regression using Skicit Library"
62 | ]
63 | },
64 | {
65 | "cell_type": "code",
66 | "execution_count": 4,
67 | "metadata": {},
68 | "outputs": [
69 | {
70 | "data": {
71 | "text/plain": [
72 | "LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,\n",
73 | " normalize=False)"
74 | ]
75 | },
76 | "execution_count": 4,
77 | "metadata": {},
78 | "output_type": "execute_result"
79 | }
80 | ],
81 | "source": [
82 | "from sklearn.linear_model import LinearRegression\n",
83 | "model=LinearRegression()\n",
84 | "model.fit(X_train,y_train)\n"
85 | ]
86 | },
87 | {
88 | "cell_type": "markdown",
89 | "metadata": {},
90 | "source": [
91 | "Predict "
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 9,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | "array([ 8.88953046, 6.00803956, 4.21995151, 3.35897649, 19.97236945,\n",
103 | " 3.4552309 , 2.00775355, 1.98129835, 3.45901021, 8.03800372,\n",
104 | " 3.42818518, 1.92425432, 3.04245892, 2.61079038, 3.60935606,\n",
105 | " 3.73123894, 3.42027224, 18.32364361, 5.64652453, 1.91858535,\n",
106 | " 5.19017234, 13.5558025 , 2.8695553 , 3.03749857, 1.93700951,\n",
107 | " 2.49374974, 6.07996464, 2.3700953 , 2.07660793])"
108 | ]
109 | },
110 | "execution_count": 9,
111 | "metadata": {},
112 | "output_type": "execute_result"
113 | }
114 | ],
115 | "source": [
116 | "result=model.predict(X_test)\n",
117 | "result"
118 | ]
119 | },
120 | {
121 | "cell_type": "markdown",
122 | "metadata": {},
123 | "source": [
124 | "Plot the Training set"
125 | ]
126 | },
127 | {
128 | "cell_type": "code",
129 | "execution_count": 10,
130 | "metadata": {},
131 | "outputs": [
132 | {
133 | "data": {
134 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3X2cHFWd7/HPLw8o4TkhYHjIDCrrinu5AeYiT+4irCzhxYO4KuAgUVgi6+WuLK7Impe7qDeroguyuopRWCIZJYgogUVW5AICCjgJzxsQxEwIxJBATAJBQjK/+8epJj09Vd3V013d1V3f9+vVr+k+Xd39m0mnfnWqzvkdc3dERKS4xrU7ABERaS8lAhGRglMiEBEpOCUCEZGCUyIQESk4JQIRkYJTIpDCMLPxZvaSmU1v5rYinU6JQHIr2hGXbsNm9krZ4/5638/dt7j79u6+vJnb1svM/q+ZvWZmG6LbE2b2b2b2pjre424z+0izY5NiUiKQ3Ip2xNu7+/bAcuCEsraByu3NbELroxyzAXffAZgC/DWwNzBoZru3NywpIiUC6VjRkfVCM/uBmW0ATjezQ83sXjP7g5mtjI60J0bbTzAzN7Pe6PGC6PmfRkfmvzKzferdNnp+ppn9xszWmdnXzeyeNEfs7r7J3R8FPgD8Afj76P2mmNnNZrbazNaa2Y1mtmf03JeBQ4HLo97R16L2b5jZCjNbb2a/NrPDmvKHlq6nRCCd7mTg+8BOwEJgM/AJYFfgcOBY4GNVXv8h4LPAZEKv4wv1bmtmuwHXAp+KPvd3wMH1/BLuvhlYBLwrahoHfAeYDvQArwGXRdt+GvgVcE7UOzoves19wP5RfNcBPzSzN9QThxSTEoF0urvd/UZ3H3b3V9z91+5+n7tvdvengXnAX1R5/XXuPujurwEDwIwxbHs88KC73xA9dymwZgy/y3OEnTjuvtrdfxz9TuuBf6nxe+DuV7v7i1FSuRjYEXjrGOKQgumkc6oicZ4pf2Bmfwr8K3AQMInwHb+vyut/X3Z/I7D9GLbdozwOd3czW1Ez8tH2BF4EMLPtCD2AY4Cdo+d3qPZiM7sAOBOYBjiwHaGHIlKVegTS6SrL534beBR4q7vvCPwTYBnHsBLYq/TAzIywU0/NzMYDJwB3RU0XAPsAB0e/x1EVLxnxe5vZu4HzCReedwZ2AV4i+99duoASgXSbHYB1wMtm9naqXx9olpuAA83shGjk0ieAqWleaGYTzWw/4BrCaaGvRU/tQOh1rDWzKYSEVm4V8OayxzsQro+sASYCFxF6BCI1KRFIt/kkMAvYQOgdLMz6A919FXAKcAnwAvAW4AHg1Sov649GOq0FbiDs2PvcvXT66RLCBfAXgF8CP614/deA06LRUZcANwM/B54ElgHrCT0VkZpMC9OINFd0muc54P3uflet7UXaTT0CkSYws2PNbKdouOZnCadp7m9zWCKpKBGINMcRwNOEc/THAu9192qnhkRyQ6eGREQKTj0CEZGC64gJZbvuuqv39va2OwwRkY6yePHiNe5ecyhzZonAzPYGvge8CRgG5rn7ZWZ2EXA2sDra9DPufnO19+rt7WVwcDCrUEVEupKZDaXZLssewWbgk+6+xMx2ABab2a3Rc5e6+1cz/GwREUkps0Tg7iuJJrS4+wYzW0qd0+5FRCR7LblYHNV0P4Ctxb/ONbOHzexKM9sl4TWzzWzQzAZXr14dt4mIiDRB5onAzLYHfgScF5XT/RZhCv4MQo/hX+Ne5+7z3L3P3fumTk1VtkVERMYg00QQrQz1I8KyfNdDqMsSrQc7TFh4o64FPEREpLkySwRRKd4rgKXufklZ+7SyzU4mlAwWEZGSgQHo7YVx48LPgVFLdDdVlqOGDgc+DDxiZg9GbZ8hVEycQainvozWlAkWEekMAwMwezZs3BgeDw2FxwD9/Zl8ZEeUmOjr63PNIxCRQujtDTv/Sj09sGxZXW9lZovdva/WdioxISKSJ8uX19feBEoEIiJ5Mn16fe1NoEQgIpInc+fCpEkj2yZNCu0ZUSIQEcmT/n6YNy9cEzALP+fNy+xCMXRI9VERkULp7890x19JPQIRkYJTIhARKTglAhGRglMiEBEpOCUCEZGCUyIQESk4JQIRkYJTIhARKTglAhGRglMiEBHJmccfD9Ul9tgDNm3K/vOUCERESlq8MlilRx4JCeDtbw+PV66ECS0oBKREICICW1cGGxoC960rg7UgGSxeHBLA/vtvbbv++hDGuBbspZUIREQA5szZujxkycaNoT0jv/pVSAB9ZWuI/ed/hgRw8smZfewoqj4qIgItXRnszjvhyCNHtv3853D00U3/qFTUIxARgZasDHbrraEHUJ4EfvGL0ANoVxIAJQIRkSDDlcFuvDEkgGOO2dp2330hAbzrXQ2/fcOUCEREIJOVwa67LrzViSdubVuyJCSAgw9uQsxNomsEIiIlTVoZ7Ljj4Kc/Hdn26KPwjnc0/NaZUCIQEWmSQw+Fe+8d2fbEE/Anf9KeeNLSqSERkQbtt184BVSeBH7+83AKKO9JANQjEBEZsze9CVatGtn2y1+GnkEnUSIQEanTNtvAa6+NbFu8GA48sD3xNEqJQEQkhaRyD489Fk4NdbLMrhGY2d5mdruZLTWzx8zsE1H7ZDO71cyejH7uklUMIiKNGh4O5/8rk8BTT4Xk0OlJALK9WLwZ+KS7vx04BPjfZrYfcCFwm7vvC9wWPRYRyZUtW0ICGD9+ZPszz4QE8Ja3tCeuLGSWCNx9pbsvie5vAJYCewInAfOjzeYD780qBhGRem3aFBJAZfnnVatCAthrr/bElaWWDB81s17gAOA+YHd3XwkhWQC7JbxmtpkNmtng6tWrWxGmiBTYhg0hAbzhDSPbX3ghJIDdYvdU3SHzRGBm2wM/As5z9/VpX+fu89y9z937pk6dml2AIlJoL74YEsCOO45sX78+JIDJk9sTVytlmgjMbCIhCQy4+/VR8yozmxY9Pw14PssYRETi/P73IQFMmTKyfd26kAB22KE9cbVDlqOGDLgCWOrul5Q9tQiYFd2fBdyQVQwiIpWWLQsJYNq0ke0bN4YEUNkzKIIs5xEcDnwYeMTMHozaPgN8CbjWzM4ClgMfyDAGEREAli6NH+q5aRNMnNj6ePIks0Tg7ncDlvB0G5dgEJEieeCB+Bm/mzePHhpaVCo6JyJd6e67wymgyiQwPBxOASkJbKVEICJd5ZZbQgKoXPmrlAAs6TxFgSkRiEhXKK0GNnPmyHZ3JYBalAhEpKP9x3+EnfwHKoadlBKA1KZEICId6bLLQgI488yR7UoA9VMZahHpKB/8IPzwh6PbtfMfO/UIRKQjHHVU6AFUJoFc9gAGBqC3N9Su7u0Nj3NMPQIRybX99guTwSrlbudfMjAAs2eHqcoAQ0PhMUB/f/viqkI9AhHJpcmTQw+gMgnksgdQbs6crUmgZOPG0J5T6hGISK4kDfPM9c6/3PLl9bXngHoEIpILZqOTwG67dUAPoNL06fW154ASgYi0VVwC2H//sPNftao9MTVk7lyYNGlk26RJoT2nlAhEpC3iEsB73hMSwEMPtSempujvh3nzoKcn/II9PeFxTi8UgxKBiLRYXAL40IdCAvjZz1K8QScMzezvDwsfDA+HnzlOAqCLxSLSInEXgT/xCfja1+p4kw4cmtkJ1CMQkUzF9QA+//nQA6grCUBHDs3sBOoRiEgm4noAX/86nHtuA2/agUMzO4F6BCLSNKVyz5VJ4Oqrw3MNJQHoyKGZnUCJQEQaNjwcdv7jKvYoixaFBHD66U36oA4cmtkJlAhEZMw2bw4JoHLZx9tvDwnghBOa/IEdODSzE+gagYjU7Y9/hG23Hd0+OAgHHZTxh/f3a8ffZEoEIpLahg2w446j25cuhT/909bHI81RMxGYmQEHA3sCDjwH3O/eUdU/RKQBa9bA1Kmj25ctC2dnpLNVTQRmdgzwTeBJ4NmoeS/grWb2cXdPMw9QRDrUs8/CXnuNbl+1KhSEk+5Q62LxZcBfuvtMd/+b6HYs8J7oORGpphPKIcR48slwLbYyCaxdGy4CKwl0l1qnhiYAK2LanwUmNj8ckS7SgeUQHnoIZswY3f7yy6NHbUr3qJUIrgR+bWbXAM9EbXsDpwJXZBmYSMerVg4hZ4ngnnvgiCNGt7/6KmyzTevjkdaqmgjc/YtmdgNwInAoYIQeQr+7/3cL4hPpXB1QDuGWW2DmzNHtmzePnhsg3avmqKFoh//fZjY5PPS1ad7YzK4Ejgeed/c/i9ouAs4GVkebfcbdbx5L4CK5N316OB0U195mCxfCqaeObi/NEJZiqXqx2Mymm9k1ZvY8cB9wv5k9H7X11njvq4BjY9ovdfcZ0U1JQLpXDsshfPvbYUdfmQRKy0EqCRRTrVFDC4EfA9PcfV933xeYBvwEuKbaC939F8CLTYlSpB0aHfGTo3IIF18cQjjnnJHtHbcesGTCqs0LM7Mno51/Xc+VbdML3FRxaugjwHpgEPhk0qkmM5sNzAaYPn36QUNxXWyRrFSO+IFwNN9hdW0uuAC+8pXR7dr5F4OZLXb3vprb1UgE1xCO6uczctTQLGBXd/9gjSB6GZkIdgfWEGYof4HQ0zizVpB9fX0+ODhYazOR5untjT+/39MTptPm3FlnwZVXjm5XAiiWtImg1sXiM4CzgM8RSkyURg0tYgzDR919VVmA3wFuqvc9RFqiA0b8xDnxRLjxxtHtSgBSTa3ho5uAb0W3hpnZNHdfGT08GXi0Ge8r0nQ5HvET553vhPvvH92uBCBp1Ko1NIHQI3gvI4vO3QBc4e6vVXntD4AjgV3NbAXwz8CRZjYjep9lwMca/xVEMjB3bvw1gpwtgJJ0BksJQOpR69TQ1cAfCKeGSqUm9iJcI1gAnJL0Qnc/LaZZs5GlM5QuCM+ZE04HTZ8ekkBOLhS/4Q2wadPodiUAGYtaieBAd39bRdsK4F4z+01GMYnkQw4XQIkb5//GN8Irr7Q+FuketeYRrDWzD5jZ69uZ2TgzOwVINcNYWqxDq11KdXELwvf2hh6AkoA0qlYiOBV4P7DKzH4T9QJ+D7wvek7ypDT2fWgo7CFK1S6VDJLlPHHGJYBDDgn/vL/7XXtiku5TdR7BiA3NpkTbr8k2pNE0jyClDh/73nI5njQWdwropJPgJz9pfSzSudLOI6jVI3idu79QSgJm1mdmezYSoGSgQ8e+t021MtFtEtcDOOus0ANQEpCspE4EFf4PcJOZLWxmMNKgpDHuOR373nY5SpxxCeDTnw4J4LvfbXk4UjBjSgTuPsvdDwD+psnxSCNyWO0y13KQOOMSwMUXhwTwpS+1LAwpuJqJwMx2MrNTzOx8M/v76P7OAO6+IfsQJbUcVbvsCG1MnHEJYN68kAA+9anMP15khFrrEZwBLCHMEJ4EbAe8G1gcPSd5098fLgwPD4efSgLJWpw4S/X+KxPAtdeG584+O5OPFampVvXRJ4B3uvsfKtp3Ae5z9z/JOD5Ao4aks23ZAhNipm7ecgv81V+1Ph4pjmZVHzVCXaBKw9FzIpLg1VfDrN9K99wDhx3W+nhEktRKBHOBJWb2M7auRzAdeA9hPQERqbBhA+y44+j2hx6C/fdvfTwitVS9RuDu84E+4E7gVWATcAfQ5+5XZR2ctEjOZ9d2ijVrwvn/yiTw8MPhGoCSgORVrR4B0VKSVdcnlg5WObu2VJYCdKE5pWeeiR9x+vTTsM8+rY9HpF5jnVCGmT3SzECkQWM9qs/h7NpO8cQToQdQmQRWrgw9ACUB6RS1FqZ5X9JTwJuaH46MSSNH9TmaXdspFi+GvphxGGvXws47tz4ekUbVOjW0EBggfuRQzHgIaYtqR/W1EkGHLcnYTjfeGNYErvTyy6PnpYl0klqJ4GHgq+4+am1hM/vLbEKSujVyVN8hSzK201VXwUc/Orp90yaYOLHl4Yg0Xa1rBOcB6xOeO7nJschYNVIzR2UpEl14YfiTVCaBLVvCNQAlAekWVXsE7n5Xlec01TcvGj2qz+GSjO10xhlw9dWj24eH49cJEOl0aYrO7WZm20X3tzWzOWb2JTObln14koqO6pviqKPCn68yCbhvrRMk0o1qziMgzCH4CPAy8DlgKvA48H1CATrJAx3Vj9mECeF0T6WUi/eJdLxaw0dnAW8BjjQzA04BLgZeAnqiCqQPuvvDmUcq0mRJR/hKAFI0tXoEdwCvAEuBnYBVwI2EeQTnRs+vyy48keZTAhAZqdbF4iEzuwy4CZgInOHuy81sOrDG3TXrSDqGEoBIvDS1hr5lZlcDw+5eGpbyAnBappGJNIkSgEh1aS4W4+4vVTx+OZtwRJpHCUAknTEXnavFzK40s+fN7NGytslmdquZPRn93CWrz5fiilsOErYOAx1FZbil4DJLBMBVwLEVbRcCt7n7vsBt0WORpqg7AcDWgn1DQ2GjUsE+JQMpkMwSgbv/AnixovkkYH50fz7w3qw+X4pjTAmgRGW4RdIlAjN7X3Q6Z52ZrTezDWaWVIOomt3dfSVA9HO3Kp8528wGzWxw9erVY/go6XYNJYASleEWSd0juBg40d13cvcd3X0Hd49ZlbV53H2eu/e5e9/UqVOz/CjpME1JACWNFOwT6RJpE8Eqd1/ahM9bVapRFP18vgnvKQXR1ARQMnfu6MUEVIZbCiZtIhg0s4Vmdlp0muh9VVYvq2YRMCu6Pwu4YQzvIQWTSQIoUcE+kdSJYEdgI3AMcEJ0O77aC8zsB8CvgLeZ2QozOwv4EvAeM3sSeE/0uH00bDDXMk0A5fr7YdmyUGd62bL4JKDvinSxtBPKYtZnqvmapJnHR9f7XploZJ1fyYx72NdW2m03WLWq9fEA+q5I1zOvcmhlZhe4+8Vm9nVi1i1297/LMriSvr4+Hxxs8jo4vb3xa/X29ISjQmmpLVtCOehKhx8Od9/d+nhG0HdFOpSZLXb3vlrb1eoRlC4Qd99qZBo2mAuvvBK/8PsZZ8D8+aPb20LfFelytaqP3hj9zMt/yeaZPj3+KE/DBlvixRdhypTR7Z/9LHz+862Ppyp9V6TLZVliIt80bLAtli8PF4Ark8Dll4frA7lLAqDvinS94iYCDRtsqYcf3vpnLrdoUUgAH/tYe+JKRd8V6XJVLxa/vpHZ4e5+T622rGRysVha4vbbw6Lwle65Bw47rPXxiBRJ2ovFaXsEX0/ZJgLAtdeGg+fKJLB0aegBjDkJaDy/SNPVWrz+UOAwYKqZnV/21I7A+CwDk8506aVw/vmj2597DqZNa/DNNZ5fJBO1egTbANsTEsYOZbf1wPuzDU06yfnnhx5AZRJYty70ABpOAqCS0SIZqTV89E7gTjO7yt1jxs9J0Z11Flx55ej2V1+FbbZp8odpPL9IJqr2CMzsa9Hdb5jZospbC+KTvKg4N3/0O36P2egksGVL6AE0PQmASkaLZKTWzOLvRT+/mnUgkmNl5+Z7+R1DQ72jNmnJgvBz5468RgAazy/SBLWuEXwl+nmcu99Zecs6uLbpxJEpWcY8Zw628WUMZ4jeEU81vRJoNRrPL5KJWj2CaWb2F8CJZnYNMKIosLsvySyydunEkSkZxhzKQC8b1e6UakQPN/T+devvz++/g0iHqlV99P3AWcARjC485+4eM1Wo+Vo6oawTK01mEHPcOgAQJYAmvL+IZK8pE8rc/Tp3nwlc7O7vrri1JAm0XDtHpoz19E4jMVd8ZuJiMJO2G5kE4s7Nd+IpNREBd091A04kXDT+KnB82tc143bQQQd5y/T0lE57j7z19GT7uQsWuE+aNPIzJ00K7VnFXPaZcS8Pnb6ybXt63M3Cz/K4FixwnzJl9IvTxi8imQAGPc3+PdVG8EXgNuDM6HYr8MU0r23GraWJoJEdciMaSUBjjbmnp3YCGMtntzKBikiitIkgbdG5h4EZ7j4cPR4PPODu+ze9ixKj5UXnBgbCbNXly8MY9blzs79AOW5c/PAbs7CWbi11xlz1GkDaz4Tk6xPlH5T2vUSkqZq1Qlm5nYEXo/s7jSmqTtGOkSmNLn6SMuZUF4HrmaBV6zqEJnuJ5F7a6qNfBB4ws6vMbD6wGPiX7MIqoIwXP2noInA11Xb0muwl0hFqJgIzM+Bu4BDg+uh2qLtfk3FsxZLRZKnEBFCaCNboZ8YlMAhLkGmyl0hHSHuNYLG7H9SCeGJpYZr6JZ4CymIWcDuuqYhITc1emOZeM/tfDcbUWkUZ0552HkCWpSD6+8PEsuHh8FNJQKSjpL1Y/G7gHDNbBrxMKDXhrRo1VLdOLBMxFmW/p+EQc605s52/iHSNtD2CmcCbgaOAE4Djo5/51IoFTMbS42h2L6WsGFylhnoAee9N5T0+kU5TbZIB8EbgPOAbwMeACWkmJzT7VveEMrP4yU1m9b1PkgUL3LfZJnkCVdxEriZPVEucCNbo79muCXVp5T0+kRyhGTOLgYXAgigJ/AS4LM2bNvtWdyLIukxEXDmFWjumJsS0ZUuNBFDtPauViGhynJnKe3wiOdKsRPBI2f0JwJI0b1rzQ0Nd40eAB9MEWnciyPqosVoSSNoxpe2lxOyw//jHKgkgze9Zz98j695Uo/Ien0iONCsRLKn2eKy3KBHsmnb7MdUaSnsEPBZpEkHljinpSHb8+K2xVeyw1zA59iVvfnOdv2c9R9F5P+LOe3wiOdKsRLAFWB/dNgCby+6vT/MBCe+bfSLIUq1TQ3E7pmrF2UpH59FO7rfsE7vZiSeOMd56jqLzfg4+7/GJ5EhTEkFWN+B3wBJCqYrZCdvMJiyGMzh9+vSM/kyRensPCxa4T5yYnASSdkwLFoQeQELi+CWHxj51AV9u7Per9yg6y95UM+Q9PpGcyHsi2CP6uRvwEPDn1bbPtEcw1iPM8p3RlCnhlmbHFHN0/n1Ojd1Pf5uzt+6wG9n56ShapJBynQhGBAAXAf9QbZtME0GrzzmXfd5c/jH2oxdx/NYHZu5HH934jlxH0SKFkzYRpKo11Exmth0wzt03RPdvBT7v7rckvSazWkMDA3D66UmBZlNHf2CAv/3IRi7ffPaop+7lnbyT++Njift30prBIlJFFusRNMvuwI9DUVMmAN+vlgQyUyrPkCSDOvrvfjfcccfoEhdPsw/7sCz5hUnJemgozKxVkTcRaUDaEhNN4+5Pu/v/jG7vcPf2FKyPK0NR0qw6+lEphKm2GjO4446RT6/ddg8cq54EainVUWq0zILKNogUVssTQW5UW1mrGXX0Bwaw0/uxoWWsYeqIp159Fbynl51fWZn+/ZLqSkPjdZRKvaOhodD7aFZyEZGOUNxEkHTqp6cnfRJIOIo2Azt99HsMY3hPL9tsQ+0lHstNmgTnnBNiS1LP+1VqRZE+EcmtYiaCgQF46aXR7fWcEoo5irbT++PXAsBwLCwIWdph13MNYt48+OY3w4XhpGTQyDWNpCTSSHIRkY5RvERQ2oG/8MLI9nHjth4FpzklUnYUbdGuvlIpAYwwfXpyIopT2UPJYm3jpCSihedFCqF4iSDpInFpqOjQEHz4w/Dxj1d/n+XLkxOAgy8YiN9hH3dcfCLafnuYOHH09pU7+CzWNs4iuYhI50gz2aDdt6ZOKEuquxN3+9u/HfnaaFJW0uajJqLFTeKqNoGtnZO+NOFMpOuQ1wllY9HUCWW9veGoPw0zuPrqcLQdjQKK8/rpn0mTah+djxsXPy8gqwlsIlJYzV68vnvEnQZJ4h6Wg0wYBeQYPn5CfadodD5eRHKmHTOL26u0o54zp2bPIHFB+PILwMPD9R3Jz537+oLzr9P5eBFpo+L1CCAkgypDMeseBVTvZzf7Yq+ISAOKmQhKKk4TjWkU0FiO5EuJaHg4/FQSEJE2KnYiiI7Oq/YAJm0Xxv3rSF5EulTxRg2VSSrfM+r0j8o9i0gH0qihOFFtILP4JOA2bnQSAJVaEJGuVpxEUFYNtFJpVlfihd9x41SJU0S6ViESwb77jp4HcAR3hWsAPb2hoVr9ny1bVJZZRLpWVyeCFSvCKaCnntradi0fwDHu4s9Dw9BQciG6cirLLCJdqqsnlC1evPX+knF9HDC8ePRG48dXX62snK4ViEgX6upEcNJJZWV9LCYJQDjtk3YHrzIQItKFuvrU0AhJC7r09KTbwasMhIh0qeIkgmo192sVopsypWnrGGuBeBHJm64+NTRCebG55ctDL+C447Y+njwZtt02XDAePz6cMurpCUmiGbOHSxekS9ciSgvEl8cmItIGxZ1ZXLljhnTrCYxV0joImrUsIhnRzOJa4kYKZTlEVAvEi0hOFTMRDAwkr0WQ1Y5ZC9KISE4VLxGUTgklGTcum4u5WiBeRHKqeImg1uSxLVvC5IPSxdw0ySDNaCCVsRaRnCpeIqjn1E+aawalHsbQUO0EksWCNBqSKiINaksiMLNjzewJM3vKzC5s6YfXe06+VuJo9UXncvUkIRGRBC1PBGY2Hvh3YCawH3Came3XsgBqTR6rVCtxtHM0UDuTkIh0jXb0CA4GnnL3p919E3ANcFLLPr10rj6NNBdz2zkaSENSRaQJ2pEI9gSeKXu8Imobwcxmm9mgmQ2uXr26uRH09yfXHho/vr6Lue0cDaQhqSLSBO1IBHErBY+a3uzu89y9z937pk6d2vwoknbg8+fXdzG3naOBNCRVRJqgHYlgBbB32eO9gOdaHkUzd+BZjAZK+7kakioiDWp5rSEzmwD8BjgaeBb4NfAhd38s6TUN1xoaGBhZbK5ZheRERHIsba2hllcfdffNZnYu8F/AeODKakmgYar6KSJSVfdXH1XVTxEpKFUfLUkaSplUdE5EpGC6PxEkDaU00wxcERGKkAjmzg07/UrumoErIkIREkF/f9jpx9EMXBGRAiQCSJ5FrBm4IiIFSQSagSsikqgYiUAzcEVEErV8Qlnb9Pdrxy8iEqMYPQIREUmkRCAiUnBKBCIiBde9iUCLuouIpNKdF4tVcVREJLXu7BFoUXcRkdS6MxFoUXcRkdS6MxFoUXcRkdS6MxGopISISGrdmQhUUkJEJLXuHDUEKikhIpJSd/YIREQkNSUCEZEDeIvkAAAIqUlEQVSCUyIQESk4JQIRkYJTIhARKTjzpIXdc8TMVgNDY3z5rsCaJoaTNcWbvU6LWfFmq9PihfQx97j71FobdUQiaISZDbp7X7vjSEvxZq/TYla82eq0eKH5MevUkIhIwSkRiIgUXBESwbx2B1AnxZu9TotZ8War0+KFJsfc9dcIRESkuiL0CEREpAolAhGRguuaRGBmy8zsETN70MwGY543M/s3M3vKzB42swPbEWcUy9uiOEu39WZ2XsU2R5rZurJt/qnFMV5pZs+b2aNlbZPN7FYzezL6uUvCa2dF2zxpZrPaHPNXzOzx6N/8x2a2c8Jrq35/WhjvRWb2bNm/+3EJrz3WzJ6Ivs8XtjHehWWxLjOzBxNe246/795mdruZLTWzx8zsE1F7Lr/HVeLN/jvs7l1xA5YBu1Z5/jjgp4ABhwD3tTvmKK7xwO8JEz/K248EbmpjXH8OHAg8WtZ2MXBhdP9C4Msxr5sMPB393CW6v0sbYz4GmBDd/3JczGm+Py2M9yLgH1J8Z34LvBnYBngI2K8d8VY8/6/AP+Xo7zsNODC6vwPwG2C/vH6Pq8Sb+Xe4a3oEKZwEfM+De4GdzWxau4MCjgZ+6+5jnTmdCXf/BfBiRfNJwPzo/nzgvTEv/SvgVnd/0d3XArcCx2YWaJm4mN39Z+6+OXp4L7BXK2JJI+FvnMbBwFPu/rS7bwKuIfzbZKpavGZmwAeBH2QdR1ruvtLdl0T3NwBLgT3J6fc4Kd5WfIe7KRE48DMzW2xms2Oe3xN4puzxiqit3U4l+T/PoWb2kJn91Mze0cqgEuzu7ishfGmB3WK2yevfGeBMQq8wTq3vTyudG50GuDLhtEUe/8bvAla5+5MJz7f172tmvcABwH10wPe4It5ymXyHu2mFssPd/Tkz2w241cwej45gSizmNW0dO2tm2wAnAv8Y8/QSwumil6LzxD8B9m1lfGOUu78zgJnNATYDAwmb1Pr+tMq3gC8Q/mZfIJxuObNimzz+jU+jem+gbX9fM9se+BFwnruvD52X2i+LaWvJ37gy3rL2zL7DXdMjcPfnop/PAz8mdJ/LrQD2Lnu8F/Bca6JLNBNY4u6rKp9w9/Xu/lJ0/2Zgopnt2uoAK6wqnU6Lfj4fs03u/s7Rhb7jgX6PTqZWSvH9aQl3X+XuW9x9GPhOQhy5+hub2QTgfcDCpG3a9fc1s4mEneqAu18fNef2e5wQb+bf4a5IBGa2nZntULpPuLjyaMVmi4AzLDgEWFfqHrZR4lGUmb0pOu+KmR1M+Ld6oYWxxVkElEZPzAJuiNnmv4BjzGyX6LTGMVFbW5jZscCngRPdfWPCNmm+Py1Rcd3q5IQ4fg3sa2b7RL3KUwn/Nu3yl8Dj7r4i7sl2/X2j/z9XAEvd/ZKyp3L5PU6KtyXf4SyvgrfqRhg98VB0ewyYE7WfA5wT3Tfg3wmjLR4B+toc8yTCjn2nsrbyeM+NfpeHCBeIDmtxfD8AVgKvEY6OzgKmALcBT0Y/J0fb9gHfLXvtmcBT0e2jbY75KcK53gej2+XRtnsAN1f7/rQp3quj7+fDhB3WtMp4o8fHEUaV/Lad8UbtV5W+t2Xb5uHvewThdM7DZf/+x+X1e1wl3sy/wyoxISJScF1xakhERMZOiUBEpOCUCERECk6JQESk4JQIREQKTolA2sLMtkRVEh81sx+a2aQmv/9HzOwbNbY50swOK3t8jpmd0YTP3sPMrqvzNedaqCTq5RMHo3kvNavmWkI10miuwX1RBc2F0bwDzOwN0eOnoud7x/bbSjdQIpB2ecXdZ7j7nwGbCHMoWu1I4PVE4O6Xu/v3Gn1Td3/O3d9f58vuIUzMqiw+OJNQWmRfYDahBMUIZjaeMEdmJqFa5Wlmtl/09JeBS919X2AtYa4C0c+17v5W4NJoOykoJQLJg7uAtwKY2flRL+FRi9ZoMLNeC/XY50dHxdeVehAWarDvGt3vM7M7Kt/czE6IjnofMLOfm9nu0RHwOcDfRz2Td1lYC+AfotfMMLN7bWsN+F2i9jvM7Mtmdr+Z/cbM3hXzeb0W1eyPeibXm9kt0VH5xXF/AHd/wN2XxTyVpmpubDXSaKbqUUCpd1JeabO8Aud1wNGlmexSPEoE0lYW6tTMBB4xs4OAjwLvJKwZcbaZHRBt+jZgnrvvD6wHPl7Hx9wNHOLuBxB2khdEO93LCUfLM9z9rorXfA/4dPR5jwD/XPbcBHc/GDivoj3JDOAU4H8Ap5jZ3jW2L5emCmbSNlOAP/jWEsblr339NdHz66LtpYCUCKRdtrWwmtUgsJxQY+UI4Mfu/rKHgnvXE8obAzzj7vdE9xdE26a1F/BfZvYI8CmgaklvM9sJ2Nnd74ya5hMWZSkpFQNbDPSm+Pzb3H2du/8R+G+gp47Y01TBTNqm2mvzWMFU2qSbylBLZ3nF3WeUN9Q4NVG5kyo93szWA5o3Jrz268Al7r7IzI4krALWiFejn1tI93/o1bL7aV9TkqYKZtI2awinkiZER/3lry29ZkXUK9uJsS2SI11APQLJk18A7zWzSVEFxZMJ1w8AppvZodH90wineyAsz3dQdP+vE953J+DZ6H752rMbCEsCjuDu64C1Zef/PwzcWbldiyRWzTWzx6NtYquReigkdjtQunBdXmmzvALn+4H/5yo8VlhKBJIbHpbpuwq4n7Ay03fd/YHo6aXALDN7mLCObGn0zOeAy8zsLsLRdpyLgB9G26wpa78ROLl0sbjiNbOAr0SfNwP4fCO/Wy1m9ndmtoJw1P6wmX03eupmwnq5TxHWJ/h4tP2uRKd3oqP9cwllkpcC17r7Y9HrPw2cb2ZPEa4BXBG1XwFMidrPJ6zdKwWl6qOSe9EIn5uioaYCmNnxwJvd/d/aHYt0Pl0jEOlA7n5Tu2OQ7qEegYhIwekagYhIwSkRiIgUnBKBiEjBKRGIiBScEoGISMH9f8PpdrCFcwwVAAAAAElFTkSuQmCC\n",
135 | "text/plain": [
136 | ""
137 | ]
138 | },
139 | "metadata": {
140 | "needs_background": "light"
141 | },
142 | "output_type": "display_data"
143 | }
144 | ],
145 | "source": [
146 | "plt.scatter(X_train,y_train,color='red')\n",
147 | "plt.plot(X_train,model.predict(X_train),color='blue')\n",
148 | "plt.xlabel('Population in 10,000')\n",
149 | "plt.ylabel('Profit in $10,000')\n",
150 | "plt.title('Training Data')\n",
151 | "plt.show()"
152 | ]
153 | },
154 | {
155 | "cell_type": "code",
156 | "execution_count": 12,
157 | "metadata": {},
158 | "outputs": [
159 | {
160 | "data": {
161 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAEWCAYAAABrDZDcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xu0XHV99/H3h4T7HRIhQkJEaJWnIMgpDxfBeKNAC0LVgk8eibeH0ooFsQrKkqos2gooorVSFBagUQIUSsrCC6ImSgU9XAwB1ERMIFxy4ZIQMJDkfJ8/fnvInDN7zsyZM3uun9das2b2b/ae/c2cyf7u32X/tiICMzPrX5u1OwAzM2svJwIzsz7nRGBm1uecCMzM+pwTgZlZn3MiMDPrc04EZmZ9zonAeoaktWWPIUl/LFueOY7PvUvS/x3l/ddJirJ9PSVprqS3jGEfp0v6UaMxmo2HE4H1jIjYrvQAHgWOLyubXfDuN5bt+yBgPnCrpFMK3q/ZuDkRWN+QNEHSZyQ9ImmVpNmSdsre21bSdZKekfScpLsl7Szpi8CfA9/Mzva/WGs/EfFkRFwC/Atwcdn+z5f0B0nPS1oo6S+z8oOALwMzSjWKrPwkSb+WtEbSUkmfbv63YuZEYP3lE8DRwJuAPYH1wKXZex8GJgJ7AJOAM4CXI+LjwK+AD2dn/B8fw/5uAvaU9Jps+bfA4cCOwBeA6yRNioj7gLOAn2b72D1bfw3wf4CdgJOAf5R0TAP/brNRORFYP/lb4NyIeCIi1gGfA06WJFJSmAy8NiI2RMSvIuKFce7viex5F4CImJPVFoYi4lvA48DB1TaOiDsi4sFs/XuB64E3jzMmswpOBNYXsoP9VOC2rOnnOeA+0v+BXYErgXnAjZKWSfpnSRPGuds9sudnshg+JGlB2f73IdU+qsV8hKR5klZKWg28f7T1zRrlRGB9IdI0u48Db42IncoeW0XEqoh4KSLOj4jXAUcB7wFKHb2NTtF7ErAsIv4g6U+ArwKnAbtExE7AYkCj7ON6YA4wNSJ2BK4uW9+saZwIrJ9cDvyrpKkAkl4l6fjs9dsl7SdpM1Lb/AZgY7bdcmDvenciaXdJHwM+BZybFW8HDAErgc0knU6qEZQsB6ZK2jz7DGXbPB0R6yQdTkpOZk3nRGD95CLgR8CPJT0P/A/wxuy9PYBbgOeBhcBtpDNySB3Kp0p6VtJFVT57Qjbi5wXg18DbgHeWhq1mbfyXA4PAk8Brstcl3weWACskLctqMKcDl2SxfhK4YZz/frNc8o1pzMz6m2sEZmZ9zonAzKzPORGYmfU5JwIzsz43sd0B1GPSpEkxffr0dodhZtZV7rnnnlURMbnWel2RCKZPn87g4GDtFc3M7BWSltaznpuGzMz6nBOBmVmfcyIwM+tzTgRmZn3OicDMrM85EZhZl5kNTCcdvqZnyzYeXTF81MwsmU26pcOL2fLSbBlgZlsi6gWuEZhZFzmPTUmg5MWs3BrlRGBmXeTRMZZbPZwIzKyLTBtjudXDicDMusiFwDYjyrbJyq1RTgRm1kVmAlcAewHKnq+gFzuKb78dpkyBNWuK35dHDZlZl5lJLx74S377W3jd6zYtP/ggHHZYsft0IjAz6wDPPAN77w2rV28qGxyEgw8uft9uGjIza6P162HGDNh1101J4IYbIKI1SQCcCMzM2iICzjoLttgC5s1LZZ//fCp/97tbG4ubhszMWuzKK+HDH960/K53wfXXw2ZtOjV3IjAza5H58+HNb960/NrXwv33w3bbtS8mcCIwMyvcI4+kg365pUthWodcB+c+AjOzCs2Z4XT1anj1q4cngV/8IvUDdEoSANcIzMxGGP8Mpy+9BFttNbzs29+GmR16+YNrBGZmw4xvhlNpeBL49KdTDaBTkwC4RmBmNkJjM5wedxx873vDy9atgy23bE5URXKNwMxsmLHNcHrppakWUJ4EHn001QK6IQmAE4GZ2Qj1zXD685+nBHD22ZvKbr89JYCpU4uOsbmcCMzMhhl9htOnnkoJ4MgjN21xwQUpAbz97S0PtincR2BmVqFyhtMNG2DzzYevdfjhcOedrYuqKIUlAklTgWuB3YEh4IqIuEzSLsAc0uDcJcDfRMSzRcVhZjZeEyfCxo3Dy4aGUs2gFxTZNLQB+HhEvB44FPiIpP2Ac4E7ImJf4I5s2cys45x8cjrYlyeBF15IzUC9kgSgwEQQEU9GxL3Z6+eBh4E9gHcC12SrXQOcWFQMZmaN+I//SAf666/fVLZ4cUoA24zsR+4BLeksljQdOAi4G9gtIp6ElCyAV1XZ5jRJg5IGV65c2YowzazP3XJLSgCnn76pbO7clABGzhXUSwpPBJK2A/4TOCsi6r77ZkRcEREDETEwefLk4gI0s7732GMpAZxY1j5xzjkpARx/fPviapVCRw1J2pyUBGZHxE1Z8XJJUyLiSUlTgBVFxmBmVk3eSCBICaCfFFYjkCTgSuDhiPhS2VtzgVnZ61nALUXFYGZWjVSZBIaG+i8JQLFNQ0cA7wPeKun+7HEc8K/AOyQtAt6RLZuZtcRuu1WO+Fm1qvdGAo1FYU1DEfFz0mV5ed5W1H7NzPJ88pNw8cXDy+bNg6OOak88ncRTTJhZT/vRj9KZfnkSOP/8VANwEkg8xYSZ9aTly2H33YeX7b03/P737YmnkzkRmFlPGRqCCRMqy/uxE7heTgRm1jPyOns3boTN3Ag+Kn89Ztb1Xv/6yiTwxBOpFuAkUJu/IjPrWhdckBLAb36zqez7308JYMqU9sXVbdw0ZGZdZ948mDFjeNmZZ8KXv9yWcLqeE4GZdY28kUA77ACrV7cnnl7hRGBmHa9aW79HAjWH+wjMrKNJlUngpZecBJrJicDMOpJUORLooYdSAthii/bE1KucCMyso3zkI5UJ4CtfSQng9a9vT0y9zn0EZtYRfvxjeNuI6Sj33x8WLGhPPP3EicDM2urZZ2GXXSrL3QfQOk4EZtY2eVNCOAG0nvsIzKzl8jqCX3jBSaBdnAjMrGV23LEyAQwOpgSwzTbticmcCMysBT7zmZQA1qzZVPb5z6cEcPDB7YvLEvcRmFlh7r4bDj10eNluu8FTT7UnHsvnRGBmTbd2LWy/fWW5+wA6kxOBmTWVRwJ1H/cRmFlT5I0EWr3aSaAbOBGY2bjsu29lApg/PyWAHXZoT0w2Nk4EZtaQL34xJYDFizeVfexjKQEceWT74rKxcx+BmY3JAw/AAQdUlrsJqHs5EZhZXV56CbbaqrLcCaD7ORGYWU15I4GGhvLLrfu4j8DMqsobCbRyZaoFOAn0DicCM6tw+OGVB/rbbksJYNKk9sRkxXEiMLNXfPObKQH84hebyt7//pQAjj22bWFZwdxHYGYsXpyuBxjJHcH9oWYikCTgEGAPIIAngF9G+Cdi1u02bIDNN68s9//u/jJq05Cko4FFwGeB44C/BD4HLMreG23bqyStkLSwrOyzkh6XdH/2OG7c/wIza4hUmQQ2bnQS6Ee1agSXAW+PiCXlhZJeA9wGvH6Uba8G/g24dkT5pRFxydjCNLNmyRvts2wZ7LFH62OxzlCrs3gisCyn/HEgp0K5SUTMB55pMC4za7ITTqhMAnPmpBqAk0B/q1UjuAr4laTrgMeysqnAKcCVDe7zDEmnAoPAxyPi2byVJJ0GnAYwbdq0BndlZtdfDyefPLzs+ONh7tz2xGOdR7X6fCXtB5xA6iwWqYYwNyIeqvnh0nTg1oj4s2x5N2AVqdP5AmBKRHyw1ucMDAzE4OBgrdXMrMzjj8Oee1aWuw+gf0i6JyIGaq1Xc9RQdsB/SNIuaTH/DL4eEbG8LMBvALc2+llmlm9oCCZMqCx3ArBqao0amibpOkkrgLuBX2Yjga7LzvbHRNKUssWTgIXV1jWzsZMqk8D69U4CNrpaNYI5wJeBmRGxEUDSBOA9wHXAodU2lPRdYAYwSdIy4J+AGZIOJDUNLQH+dpzxmxn5I4EWLYJ99ml9LNZ9ao0amhQRc0pJACAiNkbEdcCuo20YEe+NiCkRsXlE7BkRV0bE+yJi/4g4ICJOiIgnm/GPMOtXH/hAZRK44opUA3ASsHrVqhHcI+nfgWsYPmpoFnBfkYGZWXXf+x4cN+JyzEMPHT5HkFm9aiWCU4EPka4mHjZqiMaHj5pZg1atgsmTK8vdB2DjMWoiiIiXga9nDzNrkwjYLKch1wnAmmHURCBpIqlGcCLDJ527BbgyItYXHqFZn8vrCF63DrbcsvWxWG+q1TT0LeA5UtNQaaqJPUl9BN8GTq6ynZmNU14CWLAA9t+/9bFYb6s1auiNEfF3EXFXRCzLHndFxN8BB7UiQLN+c/bZlUng4otTM5CTgBWhVo3gWUnvAf4zIoYAJG1Guo6g4SuMzazS/Pnw5jcPL3vta9NNY8yKVCsRnAJ8Afh3SaUD/07AT7L3zGycVq+GnXaqLHdHsLVKrVFDS8j6ASTtSpqkblUL4jLrC3n9AE4A1mp137w+Ip4uJQFJA5I8g7lZg6TKJLB2rZOAtUfdiWCEjwK3SprTzGDMet3uu1cmgLvvTglg223bE5NZQ4kgImZFxEHAh5scj1lPuuCClACWL99U9pnPpARwyCHti8sM6rgfgaQdgWMYfkHZDyLiuYh4vuD4zLravHkwY8bwsh13hOeea0s4Zrlq3Y/gVOBe0nTS2wDbAm8hTUZ3auHRmXWptWtTDWBkEohwErDOU6tGcB5wcEQM++lK2pl0o5priwrMrFt5JJB1m1p9BCI1B400lL1nZpm8kUArVjSSBGYD00n/Padny2bFqZUILgTulfR1SZ/OHpeTmosuLD48a4wPJK2UlwBuvDElgLwpo0c3GzgNWEo6B1uaLftvaMUZNRFExDXAADAPeAl4GfgpMBARVxcdXO9o5YHZB5JWOffcygRw9NEpAbzrXY1+6nnAiyPKXszKzYpRc9RQRDxLuj+xNaR0YC795y4dmAFmFrC/0Q4kReyv/9xzDwwMVJY3px/g0TGWm41foxeUIemBZgbSu1p9hucDSVHWrUs1gJFJIKKZncHTxlhuNn61bkzz19XeAnZvfji9qNUH5mmkWkdeuTUqbyTQ0FB++fhcyPAaJKSR2+6Ss+LUahqaQ2rbyDvf2ar54fSiVh+YfSBpprwD/aOPwtSpRe2x1Hx3HulkYRrpb+dmPStOrUSwALgkIhaOfEPS24sJqde0+sDsA0kz5CWAiy6CT3yiFXufif9e1kq1EsFZwJoq753U5Fh6VDsOzD6QNGrmTPjOd4aXbbYZbNzYnnjMWqHW/Qh+Nsp7g80Pp1f5wNzp7roLDjusstxXBFs/qGfSuVcBL0TEC5K2Bs4Gtgcui4gniw7QrEjr18MWW1SWOwFYP6ln+Oh1wK7Z688B+5DuV/ydqluYdQGpMgmsX+8kYP2n1uyjs4DXAjOy1ycDg8BTwF6STpV0QPFhmjVP3pQQP/tZSgATa9aRzXpPrRrBT4E/Ag8DjwPLgf/Oyp/OnvPGRpp1nLwEcOyxKQG86U3ticmsE9TqLF4q6TLgVmBz4NSIeFTSNGBVRPhyVet4p54K3/pWZbmbgMySeuYa+rqkbwFDEVEaDP808N5CIzMbp/vvh4MOqix3AjAbrq4W0YhYO2L5hWLCMRu/oSGYMKGy3AnALF/Dk87VIukqSSskLSwr20XS7ZIWZc87F7V/609SZRJ44QUnAbPRFJYIgKtJN70vdy5wR0TsC9yRLZuNW15H8Ny5KQFss017YjLrFoUlgoiYDzwzovidwDXZ62uAE4vav/WHvASw//4pARx/fHtiMus2dSUCSX+dNeeslrRG0vOSqs1BNJrdSlcjZ8+vGmWfp0kalDS4cuXKBnZlveycc6rfJH7BgtbHY9bN6r185iLg+Ih4uMhgykXEFcAVAAMDA27hNQAeeAAOyLmE0X0AZo2rNxEsb1ISWC5pSkQ8KWkKsKIJn2l9ICLNAppXbmbjU28iGJQ0B/gv0k3sAYiIm8a4v7nALOBfs+dbxri99aG8JqAVK2Dy5NbHYtaL6u0s3oF0Z5WjgeOzx1+NtoGk7wK/AP5U0jJJHyIlgHdIWgS8I1s2y5XXEfzP/5xqAU4CZs1T7wVlHxjrB0dEtSuP3zbWz7L+Uu0+wG4GMitGrZvXfzIiLpL0VXLuWxwR/1BYZNZ3zj4bLr20stwJwKxYtWoEpQ5i343MCrN0KUyfXlnuBGDWGrVmH/3v7Pma0dYza1S1awHMrHWKnGLCrKq8juA//MFJwKwdnAispfISwEc/mhJAXvOQmRWvrlFDko6IiDtrlZlV45FAZp2r3hrBV+ssMxvmkkuq9wM4CZh1hlrDRw8DDgcmSzq77K0dgJxbf5glTz8NkyZVlvvgb9Z5ajUNbQFsl623fVn5GuDdRQVl3S2vBjA0VL15yMzaq9bw0XnAPElXR8TSFsVkXWk20syK0vvvhze8oQ3hmFndajUNfTkizgL+TVLelcUnFBaZdY10pj88CZx44lxuvvn5inIz6zy1moauzZ4vKToQ6z677ZZmAR0potQGtBdOBGadr1YiuJg0SdxxEXFOC+KxLnDjjfCe91SWb0oAJY+2JB4zG59aiWCKpDcDJ0i6Dhj2Pz0i7i0sMus4L74I225bWR4xHcjrQppWcERm1gy1EsH5wLnAnsCXRrwXwFuLCMo6T96Inw0bYMIEgAuB00i3rCjZJis3s0436gVlEXFjRBwLXBQRbxnxcBLoA3lTQtx5Z7oeYMIrV5LMJN1eei9SpXGvbLlW/8BsYDrpZzg9WzazVqv3xjQXSDoBOCor+mlE3FpcWNZueTWAN7whDQfNN5OxdQzPZngtYmm2XPosM2uVuqaYkPQvwJnAQ9njzKysR/Xvmeq221afEqJ6EmjEeQxvSiJbPq+ZOzGzOtQ719BfAu+IiKsi4irgmKysB5XOVJeSukFKZ6rtSgatSUpz5qQE8OKIY3NxcwJVG1HkkUZmrTaWaah3Knu9Y7MD6RxjPVMt8kBdKymNf98vv5wSwCmnDC8vflK4aiOKPNLIrNXq6iMA/gW4T9JPSL2BRwGfKiyqthrLmWrR7dy1ktL49p3XBPTii7D11mMMsyEeaWTWKWrWCCQJ+DlwKHBT9jgsIq4rOLY2GcuZatHt3KMlpcb3nTcS6GtfSzWA1iQBaHykkZk1W80aQUSEpP+KiIOBuS2Iqc3GcqZadDv3NKpfqDX2fXfezWHGOtLIzIpQbx/BXZL+vNBIOsZYzlSLbue+kJSEypWSUv37zqsBgG8OY2ZJvYngLaRk8HtJCyQ9IGlBkYG110xgCTCUPVc7a807UAOspTmdxqMlpdGSRHLrrU4AZlZbvZ3FxxYaRdcqJYgzgafLyp+meZ3G1ZpPSmXnkZqDppGSwEyGhsqv+t3EB38zy6MY5eggaSvgdGAf4AHgyojY0KLYXjEwMBCDg4Ot3u0YTCe/LX8vUo2idfJqACtWwOTJLQ3DzDqApHsiYqDWerWahq4BBkhJ4Fjgi02IrQe1/+KovH6AM85ItQAnATMbTa2mof0iYn8ASVcCvyw+pG402uieYnXeSCAz6za1agTrSy/a0STUPWp33DbbtGnuCDaz5qiVCN4gaU32eB44oPRa0ppWBNgdWndx1P/8T0oAjz02vNwJwMwaVet+BBMiYofssX1ETCx7vUOrgmxcUfMA5X1uvUNOGyfBEUcML3MCMLPxqnf4aFNJWgI8D2wENtTTqz12Rc0D1Pp59POagH73O9h330J2Z2Z9ZiyzjzbbWyLiwGKSABQ3D1Dr5tHPGwl05JGpBuAkYGbN0pYaQWsUNaSz+KGiHglkZq3UrhpBAD+UdI+k0/JWkHSapEFJgytXrmxgF0XNA1Tc/EIHHeSRQGbWeu1KBEdExBtJF6l9RNJRI1eIiCsiYiAiBiY3dEVUUUM6m/+5CxemBDDyVpBOAGbWCm1JBBHxRPa8ArgZOKT5eylqSGdzP1eC/fcfXuYEYGat1PJEIGlbSduXXgNHAwuL2VtRQzrH/7l5HcHz5zcjAbTmHsdm1jva0Vm8G3BzuvEZE4HvRMT32xBHWxTbEdz6oa1m1v1anggi4hHgDa3eb7u1ZiTQaENbnQjMLF87ryPoC7NmtXIkUPtnQTWz7tPjiaB97eVPPpkSwLXXDi8vtiO46Ftnmlkv6uFEUGovX0q6bKHUXl58MpDg1a8eXjY01IqRQK2fBdXMul8PJ4LWTQVRkjcS6IYbUgKo1kfQXK2bBdXMeoenmGiCzpoSoto9js3M8vVwjaD49vK8GgD4gjAz6y49nAiKay//1KecAMysd/Rw01CpeeQ8UnPQNFISaLzZZO1a2H77ynIf/M2sm/VwIoBmtpfn1QDWrYMtt2zKx5uZtU0PNw1BM64jyOsH+NKXUi3AScDMekEP1wjGN+9OZ40EMjMrTg/XCBq7jsAjgcys3/RwIhjbdQQ33FBvAvA0z2bWW3q4aWgaqTkor3yTjRthYs63kH/272mezaz39HCNoPZ1BFJlElizZrQmoNZPW2FmVrQeTgSleXd2LSvbGsjvB/jKV1ICyLtOYBNP82xmvaeHE0HJH1959bGPnYc0vAnn4INTAvjoR+v5LE/zbGa9p4f7CKDUlLNo0T78yZ8sqnh37KOALmR4HwF4mmcz63Y9nQheeukpttqq8mgfsRnpxvNj1fxpK8zM2q2nE8EJJ/zwlddbbrmOdeu2zpb2GseneppnM+stPd1H8I1vPM373vcd1q+fWJYEmtGU42sJzKx39HQimDbtJK69Npg4cU+ad8eu9t0C08ysCD2dCJKZwBJSn8ASxp4ERp79n4mvJTCzXtLTfQTjl3clcTW+lsDMulMf1AhGU6utP+9K4mp8LYGZdac+rhHUM2/QaDWAcr6WwMy6Vx8ngmrzBs0qWxapQ3ikXYHt8LUEZtYL+jQRzKb62f5GUs1ga/KTgIDLstelC8tKHcVOBmbWffqgj2BkP8Dfs6kJqJoXgaervFdKDh5Cama9occTQd6Y/8upvwM4z154Omoz6yU9ngjyDtj1zjS3K9XvZ+DpqM2sd/R4Imj0wLwNqR/gClINYORVyZ6O2sx6R1sSgaRjJP1W0mJJ5xa3p3oPzFuQagAjD/jVrkquffczM7Nu0fJEIGkC8DXgWGA/4L2S9itmb7UOzKUD/1XAKuqfhqJ097O82oKZWXdpx/DRQ4DFEfEIgKTrgHcCDzV/VzNJcwPljQDai3TQH89n+8BvZt2vHU1DewCPlS0vy8qGkXSapEFJgytXrhzH7i7DzThmZtW1IxEop6xiKE9EXBERAxExMHny5HHszs04ZmajaUfT0DJgatnynsATxe7SzThmZtW0o0bwK2BfSa+RtAVwCjC3DXGYmRltqBFExAZJZwA/ACYAV0XEg62Ow8zMkrZMOhcRtwG3tWPfZmY2XI9fWWxmZrU4EZiZ9TknAjOzPudE0LBa9zs2M+sOfXqHsvGq537HZmbdwTWChvjGNGbWO5wIGuIb05hZ73AiaIhvTGNmvcOJoCG+MY2Z9Q4ngoZ4RlMz6x0eNdQwz2hqZr3BNQIzsz7nRGBm1uecCMzM+pwTgZlZn3MiMDPrc4qouG98x5G0kjShTyMmAauaGE7RHG/xui1mx1usbosX6o95r4iYXGulrkgE4yFpMCIG2h1HvRxv8botZsdbrG6LF5ofs5uGzMz6nBOBmVmf64dEcEW7Axgjx1u8bovZ8Rar2+KFJsfc830EZmY2un6oEZiZ2SicCMzM+lzPJAJJSyQ9IOl+SYM570vSVyQtlrRA0hvbEWcWy59mcZYeaySdNWKdGZJWl61zfotjvErSCkkLy8p2kXS7pEXZ885Vtp2VrbNI0qw2x3yxpN9kf/ObJe1UZdtRfz8tjPezkh4v+7sfV2XbYyT9Nvs9n9vGeOeUxbpE0v1Vtm3H9ztV0k8kPSzpQUlnZuUd+TseJd7if8MR0RMPYAkwaZT3jwO+R7qBwKHA3e2OOYtrAvAU6cKP8vIZwK1tjOso4I3AwrKyi4Bzs9fnAl/I2W4X4JHseefs9c5tjPloYGL2+gt5Mdfz+2lhvJ8F/rGO38zvgb2BLYBfA/u1I94R738ROL+Dvt8pwBuz19sDvwP269Tf8SjxFv4b7pkaQR3eCVwbyV3ATpKmtDso4G3A7yOi0SunCxER84FnRhS/E7gme30NcGLOpn8B3B4Rz0TEs8DtwDGFBVomL+aI+GFEbMgW7wL2bEUs9ajyHdfjEGBxRDwSES8D15H+NoUaLV5JAv4G+G7RcdQrIp6MiHuz188DDwN70KG/42rxtuI33EuJIIAfSrpH0mk57+8BPFa2vCwra7dTqP6f5zBJv5b0PUn/q5VBVbFbRDwJ6UcLvCpnnU79ngE+SKoV5qn1+2mlM7JmgKuqNFt04nd8JLA8IhZVeb+t36+k6cBBwN10we94RLzlCvkN99Idyo6IiCckvQq4XdJvsjOYEuVs09axs5K2AE4APpXz9r2k5qK1WTvxfwH7tjK+BnXc9wwg6TxgAzC7yiq1fj+t8nXgAtJ3dgGpueWDI9bpxO/4vYxeG2jb9ytpO+A/gbMiYk2qvNTeLKesJd/xyHjLygv7DfdMjSAinsieVwA3k6rP5ZYBU8uW9wSeaE10VR0L3BsRy0e+ERFrImJt9vo2YHNJk1od4AjLS81p2fOKnHU67nvOOvr+CpgZWWPqSHX8floiIpZHxMaIGAK+USWOjvqOJU0E/hqYU22ddn2/kjYnHVRnR8RNWXHH/o6rxFv4b7gnEoGkbSVtX3pN6lxZOGK1ucCpSg4FVpeqh21U9SxK0u5ZuyuSDiH9rZ5uYWx55gKl0ROzgFty1vkBcLSknbNmjaOzsraQdAxwDnBCRLxYZZ16fj8tMaLf6qQqcfwK2FfSa7Ja5Smkv027vB34TUQsy3uzXd9v9v/nSuDhiPhS2Vsd+TuuFm9LfsNF9oK36kEaPfHr7PEgcF5WfjpwevZawNdIoy0eAAbaHPM2pAP7jmVl5fGekf1bfk3qIDq8xfF9F3gSWE86O/oQsCtwB7Aoe94lW3cA+GbZth8EFme8EHkOAAAENElEQVSPD7Q55sWktt77s8fl2bqvBm4b7ffTpni/lf0+F5AOWFNGxpstH0caVfL7dsablV9d+t2WrdsJ3++bSM05C8r+/sd16u94lHgL/w17igkzsz7XE01DZmbWOCcCM7M+50RgZtbnnAjMzPqcE4GZWZ9zIrC2kLQxmyVxoaQbJG3T5M9/v6R/q7HODEmHly2fLunUJuz71ZJuHOM2ZyjNJBrlFw5m173UnDVXVWYjza41uDubQXNOdt0BkrbMlhdn709v7F9rvcCJwNrljxFxYET8GfAy6RqKVpsBvJIIIuLyiLh2vB8aEU9ExLvHuNmdpAuzRk4+eCxpapF9gdNIU1AMI2kC6RqZY0mzVb5X0n7Z218ALo2IfYFnSdcqkD0/GxH7AJdm61mfciKwTvAzYB8ASWdntYSFyu7RIGm60nzs12RnxTeWahBKc7BPyl4PSPrpyA+XdHx21nufpB9J2i07Az4d+FhWMzlS6V4A/5htc6Cku7RpDvids/KfSvqCpF9K+p2kI3P2N13ZnP1ZzeQmSd/PzsovyvsCIuK+iFiS81Y9s+bmzkaaXan6VqBUOymfabN8Bs4bgbeVrmS3/uNEYG2lNE/NscADkg4GPgD8b9I9I/6fpIOyVf8UuCIiDgDWAH8/ht38HDg0Ig4iHSQ/mR10LyedLR8YET8bsc21wDnZ/h4A/qnsvYkRcQhw1ojyag4ETgb2B06WNLXG+uXqmQWz2jq7As/FpimMy7d9ZZvs/dXZ+taHnAisXbZWupvVIPAoaY6VNwE3R8QLkSbcu4k0vTHAYxFxZ/b629m69doT+IGkB4BPAKNO6S1pR2CniJiXFV1DuilLSWkysHuA6XXs/46IWB0R64CHgL3GEHs9s2BWW2e0bTtxBlNrk16ahtq6yx8j4sDyghpNEyMPUqXlDWw6odmqyrZfBb4UEXMlzSDdBWw8XsqeN1Lf/6GXyl7Xu01JPbNgVltnFakpaWJ21l++bWmbZVmtbEcau0mO9QDXCKyTzAdOlLRNNoPiSaT+A4Bpkg7LXr+X1NwD6fZ8B2ev31Xlc3cEHs9el9979nnSLQGHiYjVwLNl7f/vA+aNXK9Fqs6aK+k32Tq5s5FGmkjsJ0Cp47p8ps3yGTjfDfw4PPFY33IisI4R6TZ9VwO/JN2Z6ZsRcV/29sPALEkLSPeRLY2e+RxwmaSfkc6283wWuCFbZ1VZ+X8DJ5U6i0dsMwu4ONvfgcDnx/Nvq0XSP0haRjprXyDpm9lbt5Hul7uYdH+Cv8/Wn0TWvJOd7Z9Bmib5YeD6iHgw2/4c4GxJi0l9AFdm5VcCu2blZ5Pu3Wt9yrOPWsfLRvjcmg01NUDSXwF7R8RX2h2LdT/3EZh1oYi4td0xWO9wjcDMrM+5j8DMrM85EZiZ9TknAjOzPudEYGbW55wIzMz63P8Ht7TcPey9OBYAAAAASUVORK5CYII=\n",
162 | "text/plain": [
163 | ""
164 | ]
165 | },
166 | "metadata": {
167 | "needs_background": "light"
168 | },
169 | "output_type": "display_data"
170 | }
171 | ],
172 | "source": [
173 | "plt.scatter(X_test,y_test,color='yellow')\n",
174 | "plt.plot(X_train,model.predict(X_train),color='blue')\n",
175 | "plt.xlabel('Population in 10,000')\n",
176 | "plt.ylabel('Profit in $10,000')\n",
177 | "plt.title('Test Data')\n",
178 | "plt.show()"
179 | ]
180 | },
181 | {
182 | "cell_type": "code",
183 | "execution_count": null,
184 | "metadata": {},
185 | "outputs": [],
186 | "source": []
187 | }
188 | ],
189 | "metadata": {
190 | "kernelspec": {
191 | "display_name": "Python 3",
192 | "language": "python",
193 | "name": "python3"
194 | },
195 | "language_info": {
196 | "codemirror_mode": {
197 | "name": "ipython",
198 | "version": 3
199 | },
200 | "file_extension": ".py",
201 | "mimetype": "text/x-python",
202 | "name": "python",
203 | "nbconvert_exporter": "python",
204 | "pygments_lexer": "ipython3",
205 | "version": "3.7.1"
206 | }
207 | },
208 | "nbformat": 4,
209 | "nbformat_minor": 2
210 | }
211 |
--------------------------------------------------------------------------------
/Linear Regression/ex1data1.txt:
--------------------------------------------------------------------------------
1 | 6.1101,17.592
2 | 5.5277,9.1302
3 | 8.5186,13.662
4 | 7.0032,11.854
5 | 5.8598,6.8233
6 | 8.3829,11.886
7 | 7.4764,4.3483
8 | 8.5781,12
9 | 6.4862,6.5987
10 | 5.0546,3.8166
11 | 5.7107,3.2522
12 | 14.164,15.505
13 | 5.734,3.1551
14 | 8.4084,7.2258
15 | 5.6407,0.71618
16 | 5.3794,3.5129
17 | 6.3654,5.3048
18 | 5.1301,0.56077
19 | 6.4296,3.6518
20 | 7.0708,5.3893
21 | 6.1891,3.1386
22 | 20.27,21.767
23 | 5.4901,4.263
24 | 6.3261,5.1875
25 | 5.5649,3.0825
26 | 18.945,22.638
27 | 12.828,13.501
28 | 10.957,7.0467
29 | 13.176,14.692
30 | 22.203,24.147
31 | 5.2524,-1.22
32 | 6.5894,5.9966
33 | 9.2482,12.134
34 | 5.8918,1.8495
35 | 8.2111,6.5426
36 | 7.9334,4.5623
37 | 8.0959,4.1164
38 | 5.6063,3.3928
39 | 12.836,10.117
40 | 6.3534,5.4974
41 | 5.4069,0.55657
42 | 6.8825,3.9115
43 | 11.708,5.3854
44 | 5.7737,2.4406
45 | 7.8247,6.7318
46 | 7.0931,1.0463
47 | 5.0702,5.1337
48 | 5.8014,1.844
49 | 11.7,8.0043
50 | 5.5416,1.0179
51 | 7.5402,6.7504
52 | 5.3077,1.8396
53 | 7.4239,4.2885
54 | 7.6031,4.9981
55 | 6.3328,1.4233
56 | 6.3589,-1.4211
57 | 6.2742,2.4756
58 | 5.6397,4.6042
59 | 9.3102,3.9624
60 | 9.4536,5.4141
61 | 8.8254,5.1694
62 | 5.1793,-0.74279
63 | 21.279,17.929
64 | 14.908,12.054
65 | 18.959,17.054
66 | 7.2182,4.8852
67 | 8.2951,5.7442
68 | 10.236,7.7754
69 | 5.4994,1.0173
70 | 20.341,20.992
71 | 10.136,6.6799
72 | 7.3345,4.0259
73 | 6.0062,1.2784
74 | 7.2259,3.3411
75 | 5.0269,-2.6807
76 | 6.5479,0.29678
77 | 7.5386,3.8845
78 | 5.0365,5.7014
79 | 10.274,6.7526
80 | 5.1077,2.0576
81 | 5.7292,0.47953
82 | 5.1884,0.20421
83 | 6.3557,0.67861
84 | 9.7687,7.5435
85 | 6.5159,5.3436
86 | 8.5172,4.2415
87 | 9.1802,6.7981
88 | 6.002,0.92695
89 | 5.5204,0.152
90 | 5.0594,2.8214
91 | 5.7077,1.8451
92 | 7.6366,4.2959
93 | 5.8707,7.2029
94 | 5.3054,1.9869
95 | 8.2934,0.14454
96 | 13.394,9.0551
97 | 5.4369,0.61705
98 |
--------------------------------------------------------------------------------
/Linear Regression/ex1data2.txt:
--------------------------------------------------------------------------------
1 | 2104,3,399900
2 | 1600,3,329900
3 | 2400,3,369000
4 | 1416,2,232000
5 | 3000,4,539900
6 | 1985,4,299900
7 | 1534,3,314900
8 | 1427,3,198999
9 | 1380,3,212000
10 | 1494,3,242500
11 | 1940,4,239999
12 | 2000,3,347000
13 | 1890,3,329999
14 | 4478,5,699900
15 | 1268,3,259900
16 | 2300,4,449900
17 | 1320,2,299900
18 | 1236,3,199900
19 | 2609,4,499998
20 | 3031,4,599000
21 | 1767,3,252900
22 | 1888,2,255000
23 | 1604,3,242900
24 | 1962,4,259900
25 | 3890,3,573900
26 | 1100,3,249900
27 | 1458,3,464500
28 | 2526,3,469000
29 | 2200,3,475000
30 | 2637,3,299900
31 | 1839,2,349900
32 | 1000,1,169900
33 | 2040,4,314900
34 | 3137,3,579900
35 | 1811,4,285900
36 | 1437,3,249900
37 | 1239,3,229900
38 | 2132,4,345000
39 | 4215,4,549000
40 | 2162,4,287000
41 | 1664,2,368500
42 | 2238,3,329900
43 | 2567,4,314000
44 | 1200,3,299000
45 | 852,2,179900
46 | 1852,4,299900
47 | 1203,3,239500
48 |
--------------------------------------------------------------------------------
/Logistic Regression/Logistic/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "IMPORTING THE LIBRARIES "
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd\n",
17 | "import numpy as np\n",
18 | "import matplotlib.pyplot as plt\n",
19 | "import seaborn as sb"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "TRAINING A LOGISTIC REGRESSION MODEL TO PREDICT EHETHER A STUDENT GET SELECTED ON THE BASIS OF TWO EXAMS"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 2,
32 | "metadata": {},
33 | "outputs": [
34 | {
35 | "data": {
36 | "text/html": [
37 | "\n",
38 | "\n",
51 | "
\n",
52 | " \n",
53 | " \n",
54 | " | \n",
55 | " 34.62365962451697 | \n",
56 | " 78.0246928153624 | \n",
57 | " 0 | \n",
58 | "
\n",
59 | " \n",
60 | " \n",
61 | " \n",
62 | " 75 | \n",
63 | " 47.264269 | \n",
64 | " 88.475865 | \n",
65 | " 1 | \n",
66 | "
\n",
67 | " \n",
68 | "
\n",
69 | "
"
70 | ],
71 | "text/plain": [
72 | " 34.62365962451697 78.0246928153624 0\n",
73 | "75 47.264269 88.475865 1"
74 | ]
75 | },
76 | "execution_count": 2,
77 | "metadata": {},
78 | "output_type": "execute_result"
79 | }
80 | ],
81 | "source": [
82 | "dataset=pd.read_csv('ex2data1.txt')\n",
83 | "dataset.sample()"
84 | ]
85 | },
86 | {
87 | "cell_type": "code",
88 | "execution_count": 3,
89 | "metadata": {},
90 | "outputs": [
91 | {
92 | "name": "stdout",
93 | "output_type": "stream",
94 | "text": [
95 | "\n",
96 | "RangeIndex: 99 entries, 0 to 98\n",
97 | "Data columns (total 3 columns):\n",
98 | "34.62365962451697 99 non-null float64\n",
99 | "78.0246928153624 99 non-null float64\n",
100 | "0 99 non-null int64\n",
101 | "dtypes: float64(2), int64(1)\n",
102 | "memory usage: 2.4 KB\n"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "dataset.info()"
108 | ]
109 | },
110 | {
111 | "cell_type": "code",
112 | "execution_count": 4,
113 | "metadata": {},
114 | "outputs": [
115 | {
116 | "data": {
117 | "text/html": [
118 | "\n",
119 | "\n",
132 | "
\n",
133 | " \n",
134 | " \n",
135 | " | \n",
136 | " 34.62365962451697 | \n",
137 | " 78.0246928153624 | \n",
138 | " 0 | \n",
139 | "
\n",
140 | " \n",
141 | " \n",
142 | " \n",
143 | " count | \n",
144 | " 99.000000 | \n",
145 | " 99.000000 | \n",
146 | " 99.000000 | \n",
147 | "
\n",
148 | " \n",
149 | " mean | \n",
150 | " 65.957614 | \n",
151 | " 66.102779 | \n",
152 | " 0.606061 | \n",
153 | "
\n",
154 | " \n",
155 | " std | \n",
156 | " 19.302009 | \n",
157 | " 18.638875 | \n",
158 | " 0.491108 | \n",
159 | "
\n",
160 | " \n",
161 | " min | \n",
162 | " 30.058822 | \n",
163 | " 30.603263 | \n",
164 | " 0.000000 | \n",
165 | "
\n",
166 | " \n",
167 | " 25% | \n",
168 | " 51.297736 | \n",
169 | " 47.978125 | \n",
170 | " 0.000000 | \n",
171 | "
\n",
172 | " \n",
173 | " 50% | \n",
174 | " 67.319257 | \n",
175 | " 66.589353 | \n",
176 | " 1.000000 | \n",
177 | "
\n",
178 | " \n",
179 | " 75% | \n",
180 | " 80.234877 | \n",
181 | " 79.876423 | \n",
182 | " 1.000000 | \n",
183 | "
\n",
184 | " \n",
185 | " max | \n",
186 | " 99.827858 | \n",
187 | " 98.869436 | \n",
188 | " 1.000000 | \n",
189 | "
\n",
190 | " \n",
191 | "
\n",
192 | "
"
193 | ],
194 | "text/plain": [
195 | " 34.62365962451697 78.0246928153624 0\n",
196 | "count 99.000000 99.000000 99.000000\n",
197 | "mean 65.957614 66.102779 0.606061\n",
198 | "std 19.302009 18.638875 0.491108\n",
199 | "min 30.058822 30.603263 0.000000\n",
200 | "25% 51.297736 47.978125 0.000000\n",
201 | "50% 67.319257 66.589353 1.000000\n",
202 | "75% 80.234877 79.876423 1.000000\n",
203 | "max 99.827858 98.869436 1.000000"
204 | ]
205 | },
206 | "execution_count": 4,
207 | "metadata": {},
208 | "output_type": "execute_result"
209 | }
210 | ],
211 | "source": [
212 | "dataset.describe()"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "metadata": {},
218 | "source": [
219 | "CREATE FEATURE MATRIX AND DEPENDENT MATRIX"
220 | ]
221 | },
222 | {
223 | "cell_type": "code",
224 | "execution_count": 7,
225 | "metadata": {},
226 | "outputs": [],
227 | "source": [
228 | "X=dataset.iloc[:,:-1].values\n",
229 | "y=dataset.iloc[:,-1].values"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 12,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "name": "stdout",
239 | "output_type": "stream",
240 | "text": [
241 | "[[30.28671077 43.89499752]\n",
242 | " [35.84740877 72.90219803]\n",
243 | " [60.18259939 86.3085521 ]\n",
244 | " [79.03273605 75.34437644]\n",
245 | " [45.08327748 56.31637178]\n",
246 | " [61.10666454 96.51142588]\n",
247 | " [75.02474557 46.55401354]\n",
248 | " [76.0987867 87.42056972]\n",
249 | " [84.43281996 43.53339331]\n",
250 | " [95.86155507 38.22527806]\n",
251 | " [75.01365839 30.60326323]\n",
252 | " [82.30705337 76.4819633 ]\n",
253 | " [69.36458876 97.71869196]\n",
254 | " [39.53833914 76.03681085]\n",
255 | " [53.97105215 89.20735014]\n",
256 | " [69.07014406 52.74046973]\n",
257 | " [67.94685548 46.67857411]\n",
258 | " [70.66150955 92.92713789]\n",
259 | " [76.97878373 47.57596365]\n",
260 | " [67.37202755 42.83843832]\n",
261 | " [89.67677575 65.79936593]\n",
262 | " [50.53478829 48.85581153]\n",
263 | " [34.21206098 44.2095286 ]\n",
264 | " [77.92409145 68.97235999]\n",
265 | " [62.27101367 69.95445795]\n",
266 | " [80.19018075 44.82162893]\n",
267 | " [93.1143888 38.80067034]\n",
268 | " [61.83020602 50.25610789]\n",
269 | " [38.7858038 64.99568096]\n",
270 | " [61.37928945 72.80788731]\n",
271 | " [85.40451939 57.05198398]\n",
272 | " [52.10797973 63.12762377]\n",
273 | " [52.04540477 69.43286012]\n",
274 | " [40.23689374 71.16774802]\n",
275 | " [54.63510555 52.21388588]\n",
276 | " [33.91550011 98.86943574]\n",
277 | " [64.17698887 80.90806059]\n",
278 | " [74.78925296 41.57341523]\n",
279 | " [34.18364003 75.23772034]\n",
280 | " [83.90239366 56.30804622]\n",
281 | " [51.54772027 46.85629026]\n",
282 | " [94.44336777 65.56892161]\n",
283 | " [82.36875376 40.61825516]\n",
284 | " [51.04775177 45.82270146]\n",
285 | " [62.22267576 52.06099195]\n",
286 | " [77.19303493 70.4582 ]\n",
287 | " [97.77159928 86.72782233]\n",
288 | " [62.0730638 96.76882412]\n",
289 | " [91.5649745 88.69629255]\n",
290 | " [79.94481794 74.16311935]\n",
291 | " [99.27252693 60.999031 ]\n",
292 | " [90.54671411 43.39060181]\n",
293 | " [34.52451385 60.39634246]\n",
294 | " [50.28649612 49.80453881]\n",
295 | " [49.58667722 59.80895099]\n",
296 | " [97.64563396 68.86157272]\n",
297 | " [32.57720017 95.59854761]\n",
298 | " [74.24869137 69.82457123]\n",
299 | " [71.79646206 78.45356225]\n",
300 | " [75.39561147 85.75993667]\n",
301 | " [35.28611282 47.02051395]\n",
302 | " [56.2538175 39.26147251]\n",
303 | " [30.05882245 49.59297387]\n",
304 | " [44.66826172 66.45008615]\n",
305 | " [66.56089447 41.09209808]\n",
306 | " [40.45755098 97.53518549]\n",
307 | " [49.07256322 51.88321182]\n",
308 | " [80.27957401 92.11606081]\n",
309 | " [66.74671857 60.99139403]\n",
310 | " [32.72283304 43.30717306]\n",
311 | " [64.03932042 78.03168802]\n",
312 | " [72.34649423 96.22759297]\n",
313 | " [60.45788574 73.0949981 ]\n",
314 | " [58.84095622 75.85844831]\n",
315 | " [99.8278578 72.36925193]\n",
316 | " [47.26426911 88.475865 ]\n",
317 | " [50.4581598 75.80985953]\n",
318 | " [60.45555629 42.50840944]\n",
319 | " [82.22666158 42.71987854]\n",
320 | " [88.91389642 69.8037889 ]\n",
321 | " [94.83450672 45.6943068 ]\n",
322 | " [67.31925747 66.58935318]\n",
323 | " [57.23870632 59.51428198]\n",
324 | " [80.366756 90.9601479 ]\n",
325 | " [68.46852179 85.5943071 ]\n",
326 | " [42.07545454 78.844786 ]\n",
327 | " [75.47770201 90.424539 ]\n",
328 | " [78.63542435 96.64742717]\n",
329 | " [52.34800399 60.76950526]\n",
330 | " [94.09433113 77.15910509]\n",
331 | " [90.44855097 87.50879176]\n",
332 | " [55.48216114 35.57070347]\n",
333 | " [74.49269242 84.84513685]\n",
334 | " [89.84580671 45.35828361]\n",
335 | " [83.48916274 48.3802858 ]\n",
336 | " [42.26170081 87.10385094]\n",
337 | " [99.31500881 68.77540947]\n",
338 | " [55.34001756 64.93193801]\n",
339 | " [74.775893 89.5298129 ]]\n"
340 | ]
341 | }
342 | ],
343 | "source": [
344 | "print(X)"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 13,
350 | "metadata": {},
351 | "outputs": [
352 | {
353 | "name": "stdout",
354 | "output_type": "stream",
355 | "text": [
356 | "[0 0 1 1 0 1 1 1 1 0 0 1 1 0 1 1 0 1 1 0 1 0 0 1 1 1 0 0 0 1 1 0 1 0 0 0 1\n",
357 | " 0 0 1 0 1 0 0 0 1 1 1 1 1 1 1 0 0 0 1 0 1 1 1 0 0 0 0 0 1 0 1 1 0 1 1 1 1\n",
358 | " 1 1 1 0 0 1 1 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 1 1 1]\n"
359 | ]
360 | }
361 | ],
362 | "source": [
363 | "print(y)"
364 | ]
365 | },
366 | {
367 | "cell_type": "markdown",
368 | "metadata": {},
369 | "source": [
370 | "SPLITTING THE DATASET INTO TRAINING SET AND TEST SET"
371 | ]
372 | },
373 | {
374 | "cell_type": "code",
375 | "execution_count": 14,
376 | "metadata": {},
377 | "outputs": [],
378 | "source": [
379 | "from sklearn.model_selection import train_test_split\n",
380 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=0)\n"
381 | ]
382 | },
383 | {
384 | "cell_type": "markdown",
385 | "metadata": {},
386 | "source": [
387 | "TRAIN USING LOGISTIC REGRESSION SCIKIT LIBRARY"
388 | ]
389 | },
390 | {
391 | "cell_type": "code",
392 | "execution_count": 15,
393 | "metadata": {},
394 | "outputs": [
395 | {
396 | "name": "stderr",
397 | "output_type": "stream",
398 | "text": [
399 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
400 | " FutureWarning)\n"
401 | ]
402 | },
403 | {
404 | "data": {
405 | "text/plain": [
406 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
407 | " intercept_scaling=1, max_iter=100, multi_class='warn',\n",
408 | " n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
409 | " tol=0.0001, verbose=0, warm_start=False)"
410 | ]
411 | },
412 | "execution_count": 15,
413 | "metadata": {},
414 | "output_type": "execute_result"
415 | }
416 | ],
417 | "source": [
418 | "from sklearn.linear_model import LogisticRegression\n",
419 | "model=LogisticRegression()\n",
420 | "model.fit(X_train,y_train)"
421 | ]
422 | },
423 | {
424 | "cell_type": "code",
425 | "execution_count": 17,
426 | "metadata": {},
427 | "outputs": [
428 | {
429 | "data": {
430 | "text/plain": [
431 | "array([1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0,\n",
432 | " 1, 1, 1, 1, 1, 1, 1, 1], dtype=int64)"
433 | ]
434 | },
435 | "execution_count": 17,
436 | "metadata": {},
437 | "output_type": "execute_result"
438 | }
439 | ],
440 | "source": [
441 | "predicted=model.predict(X_test)\n",
442 | "predicted"
443 | ]
444 | },
445 | {
446 | "cell_type": "markdown",
447 | "metadata": {},
448 | "source": [
449 | "PREDICTING BY PROVIDING INPUT"
450 | ]
451 | },
452 | {
453 | "cell_type": "code",
454 | "execution_count": 19,
455 | "metadata": {},
456 | "outputs": [
457 | {
458 | "name": "stdout",
459 | "output_type": "stream",
460 | "text": [
461 | "[0]\n"
462 | ]
463 | }
464 | ],
465 | "source": [
466 | "print(model.predict([[25,85]]))"
467 | ]
468 | },
469 | {
470 | "cell_type": "markdown",
471 | "metadata": {},
472 | "source": [
473 | "FINAL REPORT "
474 | ]
475 | },
476 | {
477 | "cell_type": "code",
478 | "execution_count": 21,
479 | "metadata": {},
480 | "outputs": [
481 | {
482 | "name": "stdout",
483 | "output_type": "stream",
484 | "text": [
485 | "90.0\n"
486 | ]
487 | },
488 | {
489 | "data": {
490 | "text/plain": [
491 | "' precision recall f1-score support\\n\\n 0 1.00 0.73 0.84 11\\n 1 0.86 1.00 0.93 19\\n\\n micro avg 0.90 0.90 0.90 30\\n macro avg 0.93 0.86 0.88 30\\nweighted avg 0.91 0.90 0.90 30\\n'"
492 | ]
493 | },
494 | "execution_count": 21,
495 | "metadata": {},
496 | "output_type": "execute_result"
497 | }
498 | ],
499 | "source": [
500 | "from sklearn import metrics\n",
501 | "score=100*metrics.accuracy_score(y_test,predicted)\n",
502 | "print(score)\n",
503 | "report=metrics.classification_report(y_test,predicted)\n",
504 | "report"
505 | ]
506 | },
507 | {
508 | "cell_type": "markdown",
509 | "metadata": {},
510 | "source": []
511 | },
512 | {
513 | "cell_type": "code",
514 | "execution_count": null,
515 | "metadata": {},
516 | "outputs": [],
517 | "source": []
518 | },
519 | {
520 | "cell_type": "code",
521 | "execution_count": null,
522 | "metadata": {},
523 | "outputs": [],
524 | "source": []
525 | }
526 | ],
527 | "metadata": {
528 | "kernelspec": {
529 | "display_name": "Python 3",
530 | "language": "python",
531 | "name": "python3"
532 | },
533 | "language_info": {
534 | "codemirror_mode": {
535 | "name": "ipython",
536 | "version": 3
537 | },
538 | "file_extension": ".py",
539 | "mimetype": "text/x-python",
540 | "name": "python",
541 | "nbconvert_exporter": "python",
542 | "pygments_lexer": "ipython3",
543 | "version": "3.7.1"
544 | }
545 | },
546 | "nbformat": 4,
547 | "nbformat_minor": 2
548 | }
549 |
--------------------------------------------------------------------------------
/Logistic Regression/Logistic/ex2data1.txt:
--------------------------------------------------------------------------------
1 | 34.62365962451697,78.0246928153624,0
2 | 30.28671076822607,43.89499752400101,0
3 | 35.84740876993872,72.90219802708364,0
4 | 60.18259938620976,86.30855209546826,1
5 | 79.0327360507101,75.3443764369103,1
6 | 45.08327747668339,56.3163717815305,0
7 | 61.10666453684766,96.51142588489624,1
8 | 75.02474556738889,46.55401354116538,1
9 | 76.09878670226257,87.42056971926803,1
10 | 84.43281996120035,43.53339331072109,1
11 | 95.86155507093572,38.22527805795094,0
12 | 75.01365838958247,30.60326323428011,0
13 | 82.30705337399482,76.48196330235604,1
14 | 69.36458875970939,97.71869196188608,1
15 | 39.53833914367223,76.03681085115882,0
16 | 53.9710521485623,89.20735013750205,1
17 | 69.07014406283025,52.74046973016765,1
18 | 67.94685547711617,46.67857410673128,0
19 | 70.66150955499435,92.92713789364831,1
20 | 76.97878372747498,47.57596364975532,1
21 | 67.37202754570876,42.83843832029179,0
22 | 89.67677575072079,65.79936592745237,1
23 | 50.534788289883,48.85581152764205,0
24 | 34.21206097786789,44.20952859866288,0
25 | 77.9240914545704,68.9723599933059,1
26 | 62.27101367004632,69.95445795447587,1
27 | 80.1901807509566,44.82162893218353,1
28 | 93.114388797442,38.80067033713209,0
29 | 61.83020602312595,50.25610789244621,0
30 | 38.78580379679423,64.99568095539578,0
31 | 61.379289447425,72.80788731317097,1
32 | 85.40451939411645,57.05198397627122,1
33 | 52.10797973193984,63.12762376881715,0
34 | 52.04540476831827,69.43286012045222,1
35 | 40.23689373545111,71.16774802184875,0
36 | 54.63510555424817,52.21388588061123,0
37 | 33.91550010906887,98.86943574220611,0
38 | 64.17698887494485,80.90806058670817,1
39 | 74.78925295941542,41.57341522824434,0
40 | 34.1836400264419,75.2377203360134,0
41 | 83.90239366249155,56.30804621605327,1
42 | 51.54772026906181,46.85629026349976,0
43 | 94.44336776917852,65.56892160559052,1
44 | 82.36875375713919,40.61825515970618,0
45 | 51.04775177128865,45.82270145776001,0
46 | 62.22267576120188,52.06099194836679,0
47 | 77.19303492601364,70.45820000180959,1
48 | 97.77159928000232,86.7278223300282,1
49 | 62.07306379667647,96.76882412413983,1
50 | 91.56497449807442,88.69629254546599,1
51 | 79.94481794066932,74.16311935043758,1
52 | 99.2725269292572,60.99903099844988,1
53 | 90.54671411399852,43.39060180650027,1
54 | 34.52451385320009,60.39634245837173,0
55 | 50.2864961189907,49.80453881323059,0
56 | 49.58667721632031,59.80895099453265,0
57 | 97.64563396007767,68.86157272420604,1
58 | 32.57720016809309,95.59854761387875,0
59 | 74.24869136721598,69.82457122657193,1
60 | 71.79646205863379,78.45356224515052,1
61 | 75.3956114656803,85.75993667331619,1
62 | 35.28611281526193,47.02051394723416,0
63 | 56.25381749711624,39.26147251058019,0
64 | 30.05882244669796,49.59297386723685,0
65 | 44.66826172480893,66.45008614558913,0
66 | 66.56089447242954,41.09209807936973,0
67 | 40.45755098375164,97.53518548909936,1
68 | 49.07256321908844,51.88321182073966,0
69 | 80.27957401466998,92.11606081344084,1
70 | 66.74671856944039,60.99139402740988,1
71 | 32.72283304060323,43.30717306430063,0
72 | 64.0393204150601,78.03168802018232,1
73 | 72.34649422579923,96.22759296761404,1
74 | 60.45788573918959,73.09499809758037,1
75 | 58.84095621726802,75.85844831279042,1
76 | 99.82785779692128,72.36925193383885,1
77 | 47.26426910848174,88.47586499559782,1
78 | 50.45815980285988,75.80985952982456,1
79 | 60.45555629271532,42.50840943572217,0
80 | 82.22666157785568,42.71987853716458,0
81 | 88.9138964166533,69.80378889835472,1
82 | 94.83450672430196,45.69430680250754,1
83 | 67.31925746917527,66.58935317747915,1
84 | 57.23870631569862,59.51428198012956,1
85 | 80.36675600171273,90.96014789746954,1
86 | 68.46852178591112,85.59430710452014,1
87 | 42.0754545384731,78.84478600148043,0
88 | 75.47770200533905,90.42453899753964,1
89 | 78.63542434898018,96.64742716885644,1
90 | 52.34800398794107,60.76950525602592,0
91 | 94.09433112516793,77.15910509073893,1
92 | 90.44855097096364,87.50879176484702,1
93 | 55.48216114069585,35.57070347228866,0
94 | 74.49269241843041,84.84513684930135,1
95 | 89.84580670720979,45.35828361091658,1
96 | 83.48916274498238,48.38028579728175,1
97 | 42.2617008099817,87.10385094025457,1
98 | 99.31500880510394,68.77540947206617,1
99 | 55.34001756003703,64.9319380069486,1
100 | 74.77589300092767,89.52981289513276,1
101 |
--------------------------------------------------------------------------------
/Logistic Regression/Logistic/ex2data2.txt:
--------------------------------------------------------------------------------
1 | 0.051267,0.69956,1
2 | -0.092742,0.68494,1
3 | -0.21371,0.69225,1
4 | -0.375,0.50219,1
5 | -0.51325,0.46564,1
6 | -0.52477,0.2098,1
7 | -0.39804,0.034357,1
8 | -0.30588,-0.19225,1
9 | 0.016705,-0.40424,1
10 | 0.13191,-0.51389,1
11 | 0.38537,-0.56506,1
12 | 0.52938,-0.5212,1
13 | 0.63882,-0.24342,1
14 | 0.73675,-0.18494,1
15 | 0.54666,0.48757,1
16 | 0.322,0.5826,1
17 | 0.16647,0.53874,1
18 | -0.046659,0.81652,1
19 | -0.17339,0.69956,1
20 | -0.47869,0.63377,1
21 | -0.60541,0.59722,1
22 | -0.62846,0.33406,1
23 | -0.59389,0.005117,1
24 | -0.42108,-0.27266,1
25 | -0.11578,-0.39693,1
26 | 0.20104,-0.60161,1
27 | 0.46601,-0.53582,1
28 | 0.67339,-0.53582,1
29 | -0.13882,0.54605,1
30 | -0.29435,0.77997,1
31 | -0.26555,0.96272,1
32 | -0.16187,0.8019,1
33 | -0.17339,0.64839,1
34 | -0.28283,0.47295,1
35 | -0.36348,0.31213,1
36 | -0.30012,0.027047,1
37 | -0.23675,-0.21418,1
38 | -0.06394,-0.18494,1
39 | 0.062788,-0.16301,1
40 | 0.22984,-0.41155,1
41 | 0.2932,-0.2288,1
42 | 0.48329,-0.18494,1
43 | 0.64459,-0.14108,1
44 | 0.46025,0.012427,1
45 | 0.6273,0.15863,1
46 | 0.57546,0.26827,1
47 | 0.72523,0.44371,1
48 | 0.22408,0.52412,1
49 | 0.44297,0.67032,1
50 | 0.322,0.69225,1
51 | 0.13767,0.57529,1
52 | -0.0063364,0.39985,1
53 | -0.092742,0.55336,1
54 | -0.20795,0.35599,1
55 | -0.20795,0.17325,1
56 | -0.43836,0.21711,1
57 | -0.21947,-0.016813,1
58 | -0.13882,-0.27266,1
59 | 0.18376,0.93348,0
60 | 0.22408,0.77997,0
61 | 0.29896,0.61915,0
62 | 0.50634,0.75804,0
63 | 0.61578,0.7288,0
64 | 0.60426,0.59722,0
65 | 0.76555,0.50219,0
66 | 0.92684,0.3633,0
67 | 0.82316,0.27558,0
68 | 0.96141,0.085526,0
69 | 0.93836,0.012427,0
70 | 0.86348,-0.082602,0
71 | 0.89804,-0.20687,0
72 | 0.85196,-0.36769,0
73 | 0.82892,-0.5212,0
74 | 0.79435,-0.55775,0
75 | 0.59274,-0.7405,0
76 | 0.51786,-0.5943,0
77 | 0.46601,-0.41886,0
78 | 0.35081,-0.57968,0
79 | 0.28744,-0.76974,0
80 | 0.085829,-0.75512,0
81 | 0.14919,-0.57968,0
82 | -0.13306,-0.4481,0
83 | -0.40956,-0.41155,0
84 | -0.39228,-0.25804,0
85 | -0.74366,-0.25804,0
86 | -0.69758,0.041667,0
87 | -0.75518,0.2902,0
88 | -0.69758,0.68494,0
89 | -0.4038,0.70687,0
90 | -0.38076,0.91886,0
91 | -0.50749,0.90424,0
92 | -0.54781,0.70687,0
93 | 0.10311,0.77997,0
94 | 0.057028,0.91886,0
95 | -0.10426,0.99196,0
96 | -0.081221,1.1089,0
97 | 0.28744,1.087,0
98 | 0.39689,0.82383,0
99 | 0.63882,0.88962,0
100 | 0.82316,0.66301,0
101 | 0.67339,0.64108,0
102 | 1.0709,0.10015,0
103 | -0.046659,-0.57968,0
104 | -0.23675,-0.63816,0
105 | -0.15035,-0.36769,0
106 | -0.49021,-0.3019,0
107 | -0.46717,-0.13377,0
108 | -0.28859,-0.060673,0
109 | -0.61118,-0.067982,0
110 | -0.66302,-0.21418,0
111 | -0.59965,-0.41886,0
112 | -0.72638,-0.082602,0
113 | -0.83007,0.31213,0
114 | -0.72062,0.53874,0
115 | -0.59389,0.49488,0
116 | -0.48445,0.99927,0
117 | -0.0063364,0.99927,0
118 | 0.63265,-0.030612,0
119 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Machine Learning in Python
2 | This repository contains Machine Learning Projects in Python programming language.
3 | All the projects are done on Jupyter Notebooks.
4 |
5 | ## Libraries Required
6 | The following libraries are required to successfully implement the projects.
7 | - Python 3.6+
8 | - NumPy (for Linear Algebra)
9 | - Pandas (for Data Preprocesssing)
10 | - Scikit-learn (for ML models)
11 | - Matplotlib (for Data Visualization)
12 | - Seaborn (for statistical data visualization)
13 |
14 | The projects are divided into various categories listed below -
15 |
16 | ## Supervised Learning
17 | - [**Linear Regression**]()
18 | - [Linear Regression Single Variables.](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Linear%20Regression/LinearRegressionSingle%20Variables.ipynb) : A Simple Linear Regression Model to model the linear relationship between Population and Profit for plot sales.
19 | - [Linear Regression Multiple Variables.](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Linear%20Regression/LinearRegressionMultipleVariables.ipynb) : In this project, I build a Linear Regression Model for multiple variables for predicting the House price based on acres and number of rooms.
20 |
21 | - [**Logistic Regression**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Logistic%20Regression/Logistic/Untitled.ipynb) : In this project, I train a binary Logistic Regression classifier to predict whether a student will get selected on the basis of mid semester and end semester marks.
22 |
23 | - [**Support Vector Machine**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/SVM/Untitled.ipynb) : In this project, I build a Support Vector Machines classifier for predicting Social Network Ads . It predicts whether a user with age and estimated salary will buy the product after watching the ads or not. It uses the Radial Basic Function Kernal of SVM.
24 |
25 | - [**K Nearest Neighbours**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/K-NN/Untitled.ipynb) : K Nearest Neighbours or KNN is the simplest of all machine learning algorithms. In this project, I build a kNN classifier on the Iris Species Dataset which predict the three species of Iris with four features *sepal_length, sepal_width, petal_length* and *petal_width*.
26 |
27 | - [**Naive Bayes**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/TextClassification/Textclassification.ipynb) : In this project, I build a Naïve Bayes Classifier to classify the different class of a message from sklearn dataset called [*fetch_20newsgroups*](https://scikit-learn.org/stable/modules/generated/sklearn.datasets.fetch_20newsgroups.html).
28 |
29 | - [**Decision Tree Classification**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Decision%20Tree/Untitled.ipynb) : In this project, I used the Iris Dataset and tried a Decision Tree Classifier which give an accuracy of 96.7% which is less than KNN.
30 |
31 | - [**Random Forest Classification**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/RandomForest/RandomForest.ipynb) : In this project I used Random Forest Classifier and Random Forest Regressor on the Social Network Ads dataset.
32 |
33 | ## Unsupervised Learning
34 | - [**K Means Clustering**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/K-means/creditcard.ipynb) : K-Means clustering is used to find intrinsic groups within the unlabelled dataset and draw inferences.It is one of the most detailed projects, In this project, I implement K-Means Clustering on Credit Card Dataset to cluster different credit card users based on the features.I scaled the data using *StandardScaler* because normalizing(scale in range 0 to 1) will improves the convergence.I also implemented the [*Elbow Method*](https://en.wikipedia.org/wiki/Elbow_method_(clustering)) to search for the best numbers of clusters.For visualizing the dataset I used [*PCA(Principal Component Analysis)*](https://en.wikipedia.org/wiki/Principal_component_analysis) for dimensionality reduction as the dataset features were large in number.In the end I used [*Silhouette Score*](https://en.wikipedia.org/wiki/Silhouette_(clustering)) which is used to calculate the performance of clustering . It ranges from -1 to 1 and I got a score of 0.203.
35 |
36 | ## NLP( Natural Language Processing )
37 | - [**Text Analytics**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/TextAnalytics/textAnalytics.ipynb) : It is a project for Introduction to Text Analytics in NLP.I performed the important steps -
38 | - ***Tokenization***
39 | - ***Removal of Special Characters***
40 | - ***Lower Case***
41 | - ***Removing StopWords***
42 | - ***Stemming***
43 | - ***Count Vectorizer*** ( which generally performs all the steps mentioned above except Stemming)
44 | - ***DTM (Document Term Matrix)***
45 | - ***TF-IDF (Text Frequency Inverse Document Frequency)***
46 |
47 | - [**Sentiment Analysis**](https://github.com/suubh/Machine-Learning-in-Python/tree/master/Sentiment%20Analysis) : I applied Sentiment analysis in MovieReview (Dataset from nltk library) and RestaurentReview Datasets to predict the positive and negative review . I used Naive Bayes Classifier (78.8%) and Logistic Regression (84.3%) to build the models and for prediction.
48 |
49 | ## Data Cleaning and Preprocessing
50 | - [**Data Preprocessing**](https://github.com/suubh/Machine-Learning-in-Python/blob/master/Data%20Preprocessing/Untitled.ipynb) : I perform various data preprocessin and cleaning methods which are mentioned below -
51 | - ***Label Encoding*** : It converts each category into a unique numeric value ranging from 0 to n(size of dataset).
52 | - ***Ordinal Encoding*** : Categories to ordered numerical values.
53 | - ***One Hot Encoding*** : It creates a dummy variable with value 0 to n(unique value count in the column) for each category value.Extra columns are created.
54 |
55 | ## Some Comparisons on Datasets
56 |
57 | | **Social Network Ads** | **Accuracy**|
58 | | ----------- | ----------- |
59 | | Support Vector Machine | 90.83% |
60 | | Random Forest Classifier | 90.0% |
61 | | Random Forest Regressor | 61.8% |
62 |
63 | | **Iris Dataset** | **Accuracy** |
64 | | ----------- | ----------- |
65 | | KNN | 98.3% |
66 | | Decision Tree | 96.7% |
67 |
68 | ## Kaggle
69 |
70 |
71 | 
72 |
73 |
74 |
75 |
76 |
77 |
--------------------------------------------------------------------------------
/RandomForest/RandomForest.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "RANDOM FOREST"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 13,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "import matplotlib.pyplot as plt"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 14,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " Age | \n",
48 | " EstimatedSalary | \n",
49 | " Purchased | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " 19 | \n",
56 | " 19000 | \n",
57 | " 0 | \n",
58 | "
\n",
59 | " \n",
60 | " 1 | \n",
61 | " 35 | \n",
62 | " 20000 | \n",
63 | " 0 | \n",
64 | "
\n",
65 | " \n",
66 | " 2 | \n",
67 | " 26 | \n",
68 | " 43000 | \n",
69 | " 0 | \n",
70 | "
\n",
71 | " \n",
72 | " 3 | \n",
73 | " 27 | \n",
74 | " 57000 | \n",
75 | " 0 | \n",
76 | "
\n",
77 | " \n",
78 | " 4 | \n",
79 | " 19 | \n",
80 | " 76000 | \n",
81 | " 0 | \n",
82 | "
\n",
83 | " \n",
84 | "
\n",
85 | "
"
86 | ],
87 | "text/plain": [
88 | " Age EstimatedSalary Purchased\n",
89 | "0 19 19000 0\n",
90 | "1 35 20000 0\n",
91 | "2 26 43000 0\n",
92 | "3 27 57000 0\n",
93 | "4 19 76000 0"
94 | ]
95 | },
96 | "execution_count": 14,
97 | "metadata": {},
98 | "output_type": "execute_result"
99 | }
100 | ],
101 | "source": [
102 | "df=pd.read_csv('Social_Network_Ads.csv')\n",
103 | "df.head()"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 16,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/plain": [
114 | "0 257\n",
115 | "1 143\n",
116 | "Name: Purchased, dtype: int64"
117 | ]
118 | },
119 | "execution_count": 16,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "df.Purchased.value_counts()"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "NOW MAKE THE DEPENDENT MATRIX AND INDEPENDENT MATRIX"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 21,
138 | "metadata": {},
139 | "outputs": [
140 | {
141 | "name": "stdout",
142 | "output_type": "stream",
143 | "text": [
144 | " Age EstimatedSalary\n",
145 | "0 19 19000\n",
146 | "1 35 20000\n",
147 | "2 26 43000\n",
148 | "3 27 57000\n",
149 | "4 19 76000\n"
150 | ]
151 | },
152 | {
153 | "data": {
154 | "text/plain": [
155 | "0 0\n",
156 | "1 0\n",
157 | "2 0\n",
158 | "3 0\n",
159 | "4 0\n",
160 | "Name: Purchased, dtype: int64"
161 | ]
162 | },
163 | "execution_count": 21,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "X=df[['Age','EstimatedSalary']]\n",
170 | "y=df['Purchased']\n",
171 | "print(X.head())\n",
172 | "y.head()\n"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "NOW SPLIT THE DATASET INTO TRAINING SET AND TEST SET"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 22,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "from sklearn.model_selection import train_test_split\n",
189 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": [
198 | "TRAIN THE MODEL"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 23,
204 | "metadata": {},
205 | "outputs": [
206 | {
207 | "name": "stderr",
208 | "output_type": "stream",
209 | "text": [
210 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
211 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
212 | ]
213 | },
214 | {
215 | "data": {
216 | "text/plain": [
217 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
218 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
219 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
220 | " min_samples_leaf=1, min_samples_split=2,\n",
221 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n",
222 | " oob_score=False, random_state=None, verbose=0,\n",
223 | " warm_start=False)"
224 | ]
225 | },
226 | "execution_count": 23,
227 | "metadata": {},
228 | "output_type": "execute_result"
229 | }
230 | ],
231 | "source": [
232 | "from sklearn.ensemble import RandomForestClassifier\n",
233 | "model=RandomForestClassifier()\n",
234 | "model.fit(X_train,y_train)"
235 | ]
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {},
240 | "source": [
241 | "NOW PREDICT "
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 30,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "name": "stdout",
251 | "output_type": "stream",
252 | "text": [
253 | "[0 1 0 0 1 0 1 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0\n",
254 | " 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 0 0 0 1 0 0 0\n",
255 | " 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0\n",
256 | " 0 0 0 0 0 1 0 1 0]\n"
257 | ]
258 | }
259 | ],
260 | "source": [
261 | "result=model.predict(X_test)\n",
262 | "print(result)"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "NOW FIND THE SCORE AND THE CLASSIFICATION REPORT"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 31,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "name": "stdout",
279 | "output_type": "stream",
280 | "text": [
281 | "90.0\n"
282 | ]
283 | }
284 | ],
285 | "source": [
286 | "score=100*model.score(X_test,y_test)\n",
287 | "print(score)"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 32,
293 | "metadata": {},
294 | "outputs": [
295 | {
296 | "name": "stdout",
297 | "output_type": "stream",
298 | "text": [
299 | " precision recall f1-score support\n",
300 | "\n",
301 | " 0 0.92 0.92 0.92 79\n",
302 | " 1 0.85 0.85 0.85 41\n",
303 | "\n",
304 | " micro avg 0.90 0.90 0.90 120\n",
305 | " macro avg 0.89 0.89 0.89 120\n",
306 | "weighted avg 0.90 0.90 0.90 120\n",
307 | "\n"
308 | ]
309 | }
310 | ],
311 | "source": [
312 | "from sklearn.metrics import classification_report\n",
313 | "print(classification_report(y_test,result))"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {},
319 | "source": [
320 | "WE CAN FIND WHICH FEATURE IMPORTANT OR IMPORTANCE OF EACH OF THE FEATURE"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 52,
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "ename": "AttributeError",
330 | "evalue": "module 'pandas' has no attribute 'df'",
331 | "output_type": "error",
332 | "traceback": [
333 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
334 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
335 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfeature_importance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Feature'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
336 | "\u001b[1;31mAttributeError\u001b[0m: module 'pandas' has no attribute 'df'"
337 | ]
338 | }
339 | ],
340 | "source": [
341 | "feature_importance=pd.dataframe(list(zip(X_train.columns,df.feature_importance)),columns=['Feature','Importance'])\n",
342 | "feature_importance.sort_values(by='Importance',ascending=False)\n",
343 | "#It can show error like pandas has no attribute df sometimes .Try to update the library."
344 | ]
345 | },
346 | {
347 | "cell_type": "markdown",
348 | "metadata": {},
349 | "source": [
350 | "NOW WE WILL USE RANDOM FOREST REGRESSION"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 54,
356 | "metadata": {},
357 | "outputs": [
358 | {
359 | "name": "stderr",
360 | "output_type": "stream",
361 | "text": [
362 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
363 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
364 | ]
365 | },
366 | {
367 | "data": {
368 | "text/plain": [
369 | "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
370 | " max_features='auto', max_leaf_nodes=None,\n",
371 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
372 | " min_samples_leaf=1, min_samples_split=2,\n",
373 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n",
374 | " oob_score=False, random_state=None, verbose=0, warm_start=False)"
375 | ]
376 | },
377 | "execution_count": 54,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "from sklearn.ensemble import RandomForestRegressor\n",
384 | "reg_model=RandomForestRegressor()\n",
385 | "reg_model.fit(X_train,y_train)"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 59,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/plain": [
396 | "61.886003910672"
397 | ]
398 | },
399 | "execution_count": 59,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "result=reg_model.predict(X_test)\n",
406 | "score=100*reg_model.score(X_test,y_test)\n",
407 | "score"
408 | ]
409 | }
410 | ],
411 | "metadata": {
412 | "kernelspec": {
413 | "display_name": "Python 3",
414 | "language": "python",
415 | "name": "python3"
416 | },
417 | "language_info": {
418 | "codemirror_mode": {
419 | "name": "ipython",
420 | "version": 3
421 | },
422 | "file_extension": ".py",
423 | "mimetype": "text/x-python",
424 | "name": "python",
425 | "nbconvert_exporter": "python",
426 | "pygments_lexer": "ipython3",
427 | "version": "3.7.1"
428 | }
429 | },
430 | "nbformat": 4,
431 | "nbformat_minor": 2
432 | }
433 |
--------------------------------------------------------------------------------
/RandomForest/Social_Network_Ads.csv:
--------------------------------------------------------------------------------
1 | Age,EstimatedSalary,Purchased
2 | 19,19000,0
3 | 35,20000,0
4 | 26,43000,0
5 | 27,57000,0
6 | 19,76000,0
7 | 27,58000,0
8 | 27,84000,0
9 | 32,150000,1
10 | 25,33000,0
11 | 35,65000,0
12 | 26,80000,0
13 | 26,52000,0
14 | 20,86000,0
15 | 32,18000,0
16 | 18,82000,0
17 | 29,80000,0
18 | 47,25000,1
19 | 45,26000,1
20 | 46,28000,1
21 | 48,29000,1
22 | 45,22000,1
23 | 47,49000,1
24 | 48,41000,1
25 | 45,22000,1
26 | 46,23000,1
27 | 47,20000,1
28 | 49,28000,1
29 | 47,30000,1
30 | 29,43000,0
31 | 31,18000,0
32 | 31,74000,0
33 | 27,137000,1
34 | 21,16000,0
35 | 28,44000,0
36 | 27,90000,0
37 | 35,27000,0
38 | 33,28000,0
39 | 30,49000,0
40 | 26,72000,0
41 | 27,31000,0
42 | 27,17000,0
43 | 33,51000,0
44 | 35,108000,0
45 | 30,15000,0
46 | 28,84000,0
47 | 23,20000,0
48 | 25,79000,0
49 | 27,54000,0
50 | 30,135000,1
51 | 31,89000,0
52 | 24,32000,0
53 | 18,44000,0
54 | 29,83000,0
55 | 35,23000,0
56 | 27,58000,0
57 | 24,55000,0
58 | 23,48000,0
59 | 28,79000,0
60 | 22,18000,0
61 | 32,117000,0
62 | 27,20000,0
63 | 25,87000,0
64 | 23,66000,0
65 | 32,120000,1
66 | 59,83000,0
67 | 24,58000,0
68 | 24,19000,0
69 | 23,82000,0
70 | 22,63000,0
71 | 31,68000,0
72 | 25,80000,0
73 | 24,27000,0
74 | 20,23000,0
75 | 33,113000,0
76 | 32,18000,0
77 | 34,112000,1
78 | 18,52000,0
79 | 22,27000,0
80 | 28,87000,0
81 | 26,17000,0
82 | 30,80000,0
83 | 39,42000,0
84 | 20,49000,0
85 | 35,88000,0
86 | 30,62000,0
87 | 31,118000,1
88 | 24,55000,0
89 | 28,85000,0
90 | 26,81000,0
91 | 35,50000,0
92 | 22,81000,0
93 | 30,116000,0
94 | 26,15000,0
95 | 29,28000,0
96 | 29,83000,0
97 | 35,44000,0
98 | 35,25000,0
99 | 28,123000,1
100 | 35,73000,0
101 | 28,37000,0
102 | 27,88000,0
103 | 28,59000,0
104 | 32,86000,0
105 | 33,149000,1
106 | 19,21000,0
107 | 21,72000,0
108 | 26,35000,0
109 | 27,89000,0
110 | 26,86000,0
111 | 38,80000,0
112 | 39,71000,0
113 | 37,71000,0
114 | 38,61000,0
115 | 37,55000,0
116 | 42,80000,0
117 | 40,57000,0
118 | 35,75000,0
119 | 36,52000,0
120 | 40,59000,0
121 | 41,59000,0
122 | 36,75000,0
123 | 37,72000,0
124 | 40,75000,0
125 | 35,53000,0
126 | 41,51000,0
127 | 39,61000,0
128 | 42,65000,0
129 | 26,32000,0
130 | 30,17000,0
131 | 26,84000,0
132 | 31,58000,0
133 | 33,31000,0
134 | 30,87000,0
135 | 21,68000,0
136 | 28,55000,0
137 | 23,63000,0
138 | 20,82000,0
139 | 30,107000,1
140 | 28,59000,0
141 | 19,25000,0
142 | 19,85000,0
143 | 18,68000,0
144 | 35,59000,0
145 | 30,89000,0
146 | 34,25000,0
147 | 24,89000,0
148 | 27,96000,1
149 | 41,30000,0
150 | 29,61000,0
151 | 20,74000,0
152 | 26,15000,0
153 | 41,45000,0
154 | 31,76000,0
155 | 36,50000,0
156 | 40,47000,0
157 | 31,15000,0
158 | 46,59000,0
159 | 29,75000,0
160 | 26,30000,0
161 | 32,135000,1
162 | 32,100000,1
163 | 25,90000,0
164 | 37,33000,0
165 | 35,38000,0
166 | 33,69000,0
167 | 18,86000,0
168 | 22,55000,0
169 | 35,71000,0
170 | 29,148000,1
171 | 29,47000,0
172 | 21,88000,0
173 | 34,115000,0
174 | 26,118000,0
175 | 34,43000,0
176 | 34,72000,0
177 | 23,28000,0
178 | 35,47000,0
179 | 25,22000,0
180 | 24,23000,0
181 | 31,34000,0
182 | 26,16000,0
183 | 31,71000,0
184 | 32,117000,1
185 | 33,43000,0
186 | 33,60000,0
187 | 31,66000,0
188 | 20,82000,0
189 | 33,41000,0
190 | 35,72000,0
191 | 28,32000,0
192 | 24,84000,0
193 | 19,26000,0
194 | 29,43000,0
195 | 19,70000,0
196 | 28,89000,0
197 | 34,43000,0
198 | 30,79000,0
199 | 20,36000,0
200 | 26,80000,0
201 | 35,22000,0
202 | 35,39000,0
203 | 49,74000,0
204 | 39,134000,1
205 | 41,71000,0
206 | 58,101000,1
207 | 47,47000,0
208 | 55,130000,1
209 | 52,114000,0
210 | 40,142000,1
211 | 46,22000,0
212 | 48,96000,1
213 | 52,150000,1
214 | 59,42000,0
215 | 35,58000,0
216 | 47,43000,0
217 | 60,108000,1
218 | 49,65000,0
219 | 40,78000,0
220 | 46,96000,0
221 | 59,143000,1
222 | 41,80000,0
223 | 35,91000,1
224 | 37,144000,1
225 | 60,102000,1
226 | 35,60000,0
227 | 37,53000,0
228 | 36,126000,1
229 | 56,133000,1
230 | 40,72000,0
231 | 42,80000,1
232 | 35,147000,1
233 | 39,42000,0
234 | 40,107000,1
235 | 49,86000,1
236 | 38,112000,0
237 | 46,79000,1
238 | 40,57000,0
239 | 37,80000,0
240 | 46,82000,0
241 | 53,143000,1
242 | 42,149000,1
243 | 38,59000,0
244 | 50,88000,1
245 | 56,104000,1
246 | 41,72000,0
247 | 51,146000,1
248 | 35,50000,0
249 | 57,122000,1
250 | 41,52000,0
251 | 35,97000,1
252 | 44,39000,0
253 | 37,52000,0
254 | 48,134000,1
255 | 37,146000,1
256 | 50,44000,0
257 | 52,90000,1
258 | 41,72000,0
259 | 40,57000,0
260 | 58,95000,1
261 | 45,131000,1
262 | 35,77000,0
263 | 36,144000,1
264 | 55,125000,1
265 | 35,72000,0
266 | 48,90000,1
267 | 42,108000,1
268 | 40,75000,0
269 | 37,74000,0
270 | 47,144000,1
271 | 40,61000,0
272 | 43,133000,0
273 | 59,76000,1
274 | 60,42000,1
275 | 39,106000,1
276 | 57,26000,1
277 | 57,74000,1
278 | 38,71000,0
279 | 49,88000,1
280 | 52,38000,1
281 | 50,36000,1
282 | 59,88000,1
283 | 35,61000,0
284 | 37,70000,1
285 | 52,21000,1
286 | 48,141000,0
287 | 37,93000,1
288 | 37,62000,0
289 | 48,138000,1
290 | 41,79000,0
291 | 37,78000,1
292 | 39,134000,1
293 | 49,89000,1
294 | 55,39000,1
295 | 37,77000,0
296 | 35,57000,0
297 | 36,63000,0
298 | 42,73000,1
299 | 43,112000,1
300 | 45,79000,0
301 | 46,117000,1
302 | 58,38000,1
303 | 48,74000,1
304 | 37,137000,1
305 | 37,79000,1
306 | 40,60000,0
307 | 42,54000,0
308 | 51,134000,0
309 | 47,113000,1
310 | 36,125000,1
311 | 38,50000,0
312 | 42,70000,0
313 | 39,96000,1
314 | 38,50000,0
315 | 49,141000,1
316 | 39,79000,0
317 | 39,75000,1
318 | 54,104000,1
319 | 35,55000,0
320 | 45,32000,1
321 | 36,60000,0
322 | 52,138000,1
323 | 53,82000,1
324 | 41,52000,0
325 | 48,30000,1
326 | 48,131000,1
327 | 41,60000,0
328 | 41,72000,0
329 | 42,75000,0
330 | 36,118000,1
331 | 47,107000,1
332 | 38,51000,0
333 | 48,119000,1
334 | 42,65000,0
335 | 40,65000,0
336 | 57,60000,1
337 | 36,54000,0
338 | 58,144000,1
339 | 35,79000,0
340 | 38,55000,0
341 | 39,122000,1
342 | 53,104000,1
343 | 35,75000,0
344 | 38,65000,0
345 | 47,51000,1
346 | 47,105000,1
347 | 41,63000,0
348 | 53,72000,1
349 | 54,108000,1
350 | 39,77000,0
351 | 38,61000,0
352 | 38,113000,1
353 | 37,75000,0
354 | 42,90000,1
355 | 37,57000,0
356 | 36,99000,1
357 | 60,34000,1
358 | 54,70000,1
359 | 41,72000,0
360 | 40,71000,1
361 | 42,54000,0
362 | 43,129000,1
363 | 53,34000,1
364 | 47,50000,1
365 | 42,79000,0
366 | 42,104000,1
367 | 59,29000,1
368 | 58,47000,1
369 | 46,88000,1
370 | 38,71000,0
371 | 54,26000,1
372 | 60,46000,1
373 | 60,83000,1
374 | 39,73000,0
375 | 59,130000,1
376 | 37,80000,0
377 | 46,32000,1
378 | 46,74000,0
379 | 42,53000,0
380 | 41,87000,1
381 | 58,23000,1
382 | 42,64000,0
383 | 48,33000,1
384 | 44,139000,1
385 | 49,28000,1
386 | 57,33000,1
387 | 56,60000,1
388 | 49,39000,1
389 | 39,71000,0
390 | 47,34000,1
391 | 48,35000,1
392 | 48,33000,1
393 | 47,23000,1
394 | 45,45000,1
395 | 60,42000,1
396 | 39,59000,0
397 | 46,41000,1
398 | 51,23000,1
399 | 50,20000,1
400 | 36,33000,0
401 | 49,36000,1
--------------------------------------------------------------------------------
/RandomForest/Untitled.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "RANDOM FOREST"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 13,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np\n",
17 | "import pandas as pd\n",
18 | "import matplotlib.pyplot as plt"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 14,
24 | "metadata": {},
25 | "outputs": [
26 | {
27 | "data": {
28 | "text/html": [
29 | "\n",
30 | "\n",
43 | "
\n",
44 | " \n",
45 | " \n",
46 | " | \n",
47 | " Age | \n",
48 | " EstimatedSalary | \n",
49 | " Purchased | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " 19 | \n",
56 | " 19000 | \n",
57 | " 0 | \n",
58 | "
\n",
59 | " \n",
60 | " 1 | \n",
61 | " 35 | \n",
62 | " 20000 | \n",
63 | " 0 | \n",
64 | "
\n",
65 | " \n",
66 | " 2 | \n",
67 | " 26 | \n",
68 | " 43000 | \n",
69 | " 0 | \n",
70 | "
\n",
71 | " \n",
72 | " 3 | \n",
73 | " 27 | \n",
74 | " 57000 | \n",
75 | " 0 | \n",
76 | "
\n",
77 | " \n",
78 | " 4 | \n",
79 | " 19 | \n",
80 | " 76000 | \n",
81 | " 0 | \n",
82 | "
\n",
83 | " \n",
84 | "
\n",
85 | "
"
86 | ],
87 | "text/plain": [
88 | " Age EstimatedSalary Purchased\n",
89 | "0 19 19000 0\n",
90 | "1 35 20000 0\n",
91 | "2 26 43000 0\n",
92 | "3 27 57000 0\n",
93 | "4 19 76000 0"
94 | ]
95 | },
96 | "execution_count": 14,
97 | "metadata": {},
98 | "output_type": "execute_result"
99 | }
100 | ],
101 | "source": [
102 | "df=pd.read_csv('Social_Network_Ads.csv')\n",
103 | "df.head()"
104 | ]
105 | },
106 | {
107 | "cell_type": "code",
108 | "execution_count": 16,
109 | "metadata": {},
110 | "outputs": [
111 | {
112 | "data": {
113 | "text/plain": [
114 | "0 257\n",
115 | "1 143\n",
116 | "Name: Purchased, dtype: int64"
117 | ]
118 | },
119 | "execution_count": 16,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "df.Purchased.value_counts()"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "metadata": {},
131 | "source": [
132 | "NOW MAKE THE DEPENDENT MATRIX AND INDEPENDENT MATRIX"
133 | ]
134 | },
135 | {
136 | "cell_type": "code",
137 | "execution_count": 21,
138 | "metadata": {},
139 | "outputs": [
140 | {
141 | "name": "stdout",
142 | "output_type": "stream",
143 | "text": [
144 | " Age EstimatedSalary\n",
145 | "0 19 19000\n",
146 | "1 35 20000\n",
147 | "2 26 43000\n",
148 | "3 27 57000\n",
149 | "4 19 76000\n"
150 | ]
151 | },
152 | {
153 | "data": {
154 | "text/plain": [
155 | "0 0\n",
156 | "1 0\n",
157 | "2 0\n",
158 | "3 0\n",
159 | "4 0\n",
160 | "Name: Purchased, dtype: int64"
161 | ]
162 | },
163 | "execution_count": 21,
164 | "metadata": {},
165 | "output_type": "execute_result"
166 | }
167 | ],
168 | "source": [
169 | "X=df[['Age','EstimatedSalary']]\n",
170 | "y=df['Purchased']\n",
171 | "print(X.head())\n",
172 | "y.head()\n"
173 | ]
174 | },
175 | {
176 | "cell_type": "markdown",
177 | "metadata": {},
178 | "source": [
179 | "NOW SPLIT THE DATASET INTO TRAINING SET AND TEST SET"
180 | ]
181 | },
182 | {
183 | "cell_type": "code",
184 | "execution_count": 22,
185 | "metadata": {},
186 | "outputs": [],
187 | "source": [
188 | "from sklearn.model_selection import train_test_split\n",
189 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)"
190 | ]
191 | },
192 | {
193 | "cell_type": "code",
194 | "execution_count": null,
195 | "metadata": {},
196 | "outputs": [],
197 | "source": [
198 | "TRAIN THE MODEL"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 23,
204 | "metadata": {},
205 | "outputs": [
206 | {
207 | "name": "stderr",
208 | "output_type": "stream",
209 | "text": [
210 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
211 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
212 | ]
213 | },
214 | {
215 | "data": {
216 | "text/plain": [
217 | "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n",
218 | " max_depth=None, max_features='auto', max_leaf_nodes=None,\n",
219 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
220 | " min_samples_leaf=1, min_samples_split=2,\n",
221 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n",
222 | " oob_score=False, random_state=None, verbose=0,\n",
223 | " warm_start=False)"
224 | ]
225 | },
226 | "execution_count": 23,
227 | "metadata": {},
228 | "output_type": "execute_result"
229 | }
230 | ],
231 | "source": [
232 | "from sklearn.ensemble import RandomForestClassifier\n",
233 | "model=RandomForestClassifier()\n",
234 | "model.fit(X_train,y_train)"
235 | ]
236 | },
237 | {
238 | "cell_type": "markdown",
239 | "metadata": {},
240 | "source": [
241 | "NOW PREDICT "
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 30,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "name": "stdout",
251 | "output_type": "stream",
252 | "text": [
253 | "[0 1 0 0 1 0 1 1 1 1 0 1 0 0 0 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 0 0\n",
254 | " 0 1 0 1 0 1 0 1 1 1 0 1 1 0 0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 0 0 0 1 0 0 0\n",
255 | " 1 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0\n",
256 | " 0 0 0 0 0 1 0 1 0]\n"
257 | ]
258 | }
259 | ],
260 | "source": [
261 | "result=model.predict(X_test)\n",
262 | "print(result)"
263 | ]
264 | },
265 | {
266 | "cell_type": "markdown",
267 | "metadata": {},
268 | "source": [
269 | "NOW FIND THE SCORE AND THE CLASSIFICATION REPORT"
270 | ]
271 | },
272 | {
273 | "cell_type": "code",
274 | "execution_count": 31,
275 | "metadata": {},
276 | "outputs": [
277 | {
278 | "name": "stdout",
279 | "output_type": "stream",
280 | "text": [
281 | "90.0\n"
282 | ]
283 | }
284 | ],
285 | "source": [
286 | "score=100*model.score(X_test,y_test)\n",
287 | "print(score)"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 32,
293 | "metadata": {},
294 | "outputs": [
295 | {
296 | "name": "stdout",
297 | "output_type": "stream",
298 | "text": [
299 | " precision recall f1-score support\n",
300 | "\n",
301 | " 0 0.92 0.92 0.92 79\n",
302 | " 1 0.85 0.85 0.85 41\n",
303 | "\n",
304 | " micro avg 0.90 0.90 0.90 120\n",
305 | " macro avg 0.89 0.89 0.89 120\n",
306 | "weighted avg 0.90 0.90 0.90 120\n",
307 | "\n"
308 | ]
309 | }
310 | ],
311 | "source": [
312 | "from sklearn.metrics import classification_report\n",
313 | "print(classification_report(y_test,result))"
314 | ]
315 | },
316 | {
317 | "cell_type": "markdown",
318 | "metadata": {},
319 | "source": [
320 | "WE CAN FIND WHICH FEATURE IMPORTANT OR IMPORTANCE OF EACH OF THE FEATURE"
321 | ]
322 | },
323 | {
324 | "cell_type": "code",
325 | "execution_count": 52,
326 | "metadata": {},
327 | "outputs": [
328 | {
329 | "ename": "AttributeError",
330 | "evalue": "module 'pandas' has no attribute 'df'",
331 | "output_type": "error",
332 | "traceback": [
333 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
334 | "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)",
335 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mzip\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfeature_importance\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'Feature'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mfeature_importance\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort_values\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mby\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'Importance'\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mascending\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
336 | "\u001b[1;31mAttributeError\u001b[0m: module 'pandas' has no attribute 'df'"
337 | ]
338 | }
339 | ],
340 | "source": [
341 | "feature_importance=pd.dataframe(list(zip(X_train.columns,df.feature_importance)),columns=['Feature','Importance'])\n",
342 | "feature_importance.sort_values(by='Importance',ascending=False)\n",
343 | "#It can show error like pandas has no attribute df sometimes .Try to update the library."
344 | ]
345 | },
346 | {
347 | "cell_type": "markdown",
348 | "metadata": {},
349 | "source": [
350 | "NOW WE WILL USE RANDOM FOREST REGRESSION"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 54,
356 | "metadata": {},
357 | "outputs": [
358 | {
359 | "name": "stderr",
360 | "output_type": "stream",
361 | "text": [
362 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\ensemble\\forest.py:246: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n",
363 | " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n"
364 | ]
365 | },
366 | {
367 | "data": {
368 | "text/plain": [
369 | "RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,\n",
370 | " max_features='auto', max_leaf_nodes=None,\n",
371 | " min_impurity_decrease=0.0, min_impurity_split=None,\n",
372 | " min_samples_leaf=1, min_samples_split=2,\n",
373 | " min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=None,\n",
374 | " oob_score=False, random_state=None, verbose=0, warm_start=False)"
375 | ]
376 | },
377 | "execution_count": 54,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "from sklearn.ensemble import RandomForestRegressor\n",
384 | "reg_model=RandomForestRegressor()\n",
385 | "reg_model.fit(X_train,y_train)"
386 | ]
387 | },
388 | {
389 | "cell_type": "code",
390 | "execution_count": 59,
391 | "metadata": {},
392 | "outputs": [
393 | {
394 | "data": {
395 | "text/plain": [
396 | "61.886003910672"
397 | ]
398 | },
399 | "execution_count": 59,
400 | "metadata": {},
401 | "output_type": "execute_result"
402 | }
403 | ],
404 | "source": [
405 | "result=reg_model.predict(X_test)\n",
406 | "score=100*reg_model.score(X_test,y_test)\n",
407 | "score"
408 | ]
409 | }
410 | ],
411 | "metadata": {
412 | "kernelspec": {
413 | "display_name": "Python 3",
414 | "language": "python",
415 | "name": "python3"
416 | },
417 | "language_info": {
418 | "codemirror_mode": {
419 | "name": "ipython",
420 | "version": 3
421 | },
422 | "file_extension": ".py",
423 | "mimetype": "text/x-python",
424 | "name": "python",
425 | "nbconvert_exporter": "python",
426 | "pygments_lexer": "ipython3",
427 | "version": "3.7.1"
428 | }
429 | },
430 | "nbformat": 4,
431 | "nbformat_minor": 2
432 | }
433 |
--------------------------------------------------------------------------------
/SVM/Social_Network_Ads.csv:
--------------------------------------------------------------------------------
1 | Age,EstimatedSalary,Purchased
2 | 19,19000,0
3 | 35,20000,0
4 | 26,43000,0
5 | 27,57000,0
6 | 19,76000,0
7 | 27,58000,0
8 | 27,84000,0
9 | 32,150000,1
10 | 25,33000,0
11 | 35,65000,0
12 | 26,80000,0
13 | 26,52000,0
14 | 20,86000,0
15 | 32,18000,0
16 | 18,82000,0
17 | 29,80000,0
18 | 47,25000,1
19 | 45,26000,1
20 | 46,28000,1
21 | 48,29000,1
22 | 45,22000,1
23 | 47,49000,1
24 | 48,41000,1
25 | 45,22000,1
26 | 46,23000,1
27 | 47,20000,1
28 | 49,28000,1
29 | 47,30000,1
30 | 29,43000,0
31 | 31,18000,0
32 | 31,74000,0
33 | 27,137000,1
34 | 21,16000,0
35 | 28,44000,0
36 | 27,90000,0
37 | 35,27000,0
38 | 33,28000,0
39 | 30,49000,0
40 | 26,72000,0
41 | 27,31000,0
42 | 27,17000,0
43 | 33,51000,0
44 | 35,108000,0
45 | 30,15000,0
46 | 28,84000,0
47 | 23,20000,0
48 | 25,79000,0
49 | 27,54000,0
50 | 30,135000,1
51 | 31,89000,0
52 | 24,32000,0
53 | 18,44000,0
54 | 29,83000,0
55 | 35,23000,0
56 | 27,58000,0
57 | 24,55000,0
58 | 23,48000,0
59 | 28,79000,0
60 | 22,18000,0
61 | 32,117000,0
62 | 27,20000,0
63 | 25,87000,0
64 | 23,66000,0
65 | 32,120000,1
66 | 59,83000,0
67 | 24,58000,0
68 | 24,19000,0
69 | 23,82000,0
70 | 22,63000,0
71 | 31,68000,0
72 | 25,80000,0
73 | 24,27000,0
74 | 20,23000,0
75 | 33,113000,0
76 | 32,18000,0
77 | 34,112000,1
78 | 18,52000,0
79 | 22,27000,0
80 | 28,87000,0
81 | 26,17000,0
82 | 30,80000,0
83 | 39,42000,0
84 | 20,49000,0
85 | 35,88000,0
86 | 30,62000,0
87 | 31,118000,1
88 | 24,55000,0
89 | 28,85000,0
90 | 26,81000,0
91 | 35,50000,0
92 | 22,81000,0
93 | 30,116000,0
94 | 26,15000,0
95 | 29,28000,0
96 | 29,83000,0
97 | 35,44000,0
98 | 35,25000,0
99 | 28,123000,1
100 | 35,73000,0
101 | 28,37000,0
102 | 27,88000,0
103 | 28,59000,0
104 | 32,86000,0
105 | 33,149000,1
106 | 19,21000,0
107 | 21,72000,0
108 | 26,35000,0
109 | 27,89000,0
110 | 26,86000,0
111 | 38,80000,0
112 | 39,71000,0
113 | 37,71000,0
114 | 38,61000,0
115 | 37,55000,0
116 | 42,80000,0
117 | 40,57000,0
118 | 35,75000,0
119 | 36,52000,0
120 | 40,59000,0
121 | 41,59000,0
122 | 36,75000,0
123 | 37,72000,0
124 | 40,75000,0
125 | 35,53000,0
126 | 41,51000,0
127 | 39,61000,0
128 | 42,65000,0
129 | 26,32000,0
130 | 30,17000,0
131 | 26,84000,0
132 | 31,58000,0
133 | 33,31000,0
134 | 30,87000,0
135 | 21,68000,0
136 | 28,55000,0
137 | 23,63000,0
138 | 20,82000,0
139 | 30,107000,1
140 | 28,59000,0
141 | 19,25000,0
142 | 19,85000,0
143 | 18,68000,0
144 | 35,59000,0
145 | 30,89000,0
146 | 34,25000,0
147 | 24,89000,0
148 | 27,96000,1
149 | 41,30000,0
150 | 29,61000,0
151 | 20,74000,0
152 | 26,15000,0
153 | 41,45000,0
154 | 31,76000,0
155 | 36,50000,0
156 | 40,47000,0
157 | 31,15000,0
158 | 46,59000,0
159 | 29,75000,0
160 | 26,30000,0
161 | 32,135000,1
162 | 32,100000,1
163 | 25,90000,0
164 | 37,33000,0
165 | 35,38000,0
166 | 33,69000,0
167 | 18,86000,0
168 | 22,55000,0
169 | 35,71000,0
170 | 29,148000,1
171 | 29,47000,0
172 | 21,88000,0
173 | 34,115000,0
174 | 26,118000,0
175 | 34,43000,0
176 | 34,72000,0
177 | 23,28000,0
178 | 35,47000,0
179 | 25,22000,0
180 | 24,23000,0
181 | 31,34000,0
182 | 26,16000,0
183 | 31,71000,0
184 | 32,117000,1
185 | 33,43000,0
186 | 33,60000,0
187 | 31,66000,0
188 | 20,82000,0
189 | 33,41000,0
190 | 35,72000,0
191 | 28,32000,0
192 | 24,84000,0
193 | 19,26000,0
194 | 29,43000,0
195 | 19,70000,0
196 | 28,89000,0
197 | 34,43000,0
198 | 30,79000,0
199 | 20,36000,0
200 | 26,80000,0
201 | 35,22000,0
202 | 35,39000,0
203 | 49,74000,0
204 | 39,134000,1
205 | 41,71000,0
206 | 58,101000,1
207 | 47,47000,0
208 | 55,130000,1
209 | 52,114000,0
210 | 40,142000,1
211 | 46,22000,0
212 | 48,96000,1
213 | 52,150000,1
214 | 59,42000,0
215 | 35,58000,0
216 | 47,43000,0
217 | 60,108000,1
218 | 49,65000,0
219 | 40,78000,0
220 | 46,96000,0
221 | 59,143000,1
222 | 41,80000,0
223 | 35,91000,1
224 | 37,144000,1
225 | 60,102000,1
226 | 35,60000,0
227 | 37,53000,0
228 | 36,126000,1
229 | 56,133000,1
230 | 40,72000,0
231 | 42,80000,1
232 | 35,147000,1
233 | 39,42000,0
234 | 40,107000,1
235 | 49,86000,1
236 | 38,112000,0
237 | 46,79000,1
238 | 40,57000,0
239 | 37,80000,0
240 | 46,82000,0
241 | 53,143000,1
242 | 42,149000,1
243 | 38,59000,0
244 | 50,88000,1
245 | 56,104000,1
246 | 41,72000,0
247 | 51,146000,1
248 | 35,50000,0
249 | 57,122000,1
250 | 41,52000,0
251 | 35,97000,1
252 | 44,39000,0
253 | 37,52000,0
254 | 48,134000,1
255 | 37,146000,1
256 | 50,44000,0
257 | 52,90000,1
258 | 41,72000,0
259 | 40,57000,0
260 | 58,95000,1
261 | 45,131000,1
262 | 35,77000,0
263 | 36,144000,1
264 | 55,125000,1
265 | 35,72000,0
266 | 48,90000,1
267 | 42,108000,1
268 | 40,75000,0
269 | 37,74000,0
270 | 47,144000,1
271 | 40,61000,0
272 | 43,133000,0
273 | 59,76000,1
274 | 60,42000,1
275 | 39,106000,1
276 | 57,26000,1
277 | 57,74000,1
278 | 38,71000,0
279 | 49,88000,1
280 | 52,38000,1
281 | 50,36000,1
282 | 59,88000,1
283 | 35,61000,0
284 | 37,70000,1
285 | 52,21000,1
286 | 48,141000,0
287 | 37,93000,1
288 | 37,62000,0
289 | 48,138000,1
290 | 41,79000,0
291 | 37,78000,1
292 | 39,134000,1
293 | 49,89000,1
294 | 55,39000,1
295 | 37,77000,0
296 | 35,57000,0
297 | 36,63000,0
298 | 42,73000,1
299 | 43,112000,1
300 | 45,79000,0
301 | 46,117000,1
302 | 58,38000,1
303 | 48,74000,1
304 | 37,137000,1
305 | 37,79000,1
306 | 40,60000,0
307 | 42,54000,0
308 | 51,134000,0
309 | 47,113000,1
310 | 36,125000,1
311 | 38,50000,0
312 | 42,70000,0
313 | 39,96000,1
314 | 38,50000,0
315 | 49,141000,1
316 | 39,79000,0
317 | 39,75000,1
318 | 54,104000,1
319 | 35,55000,0
320 | 45,32000,1
321 | 36,60000,0
322 | 52,138000,1
323 | 53,82000,1
324 | 41,52000,0
325 | 48,30000,1
326 | 48,131000,1
327 | 41,60000,0
328 | 41,72000,0
329 | 42,75000,0
330 | 36,118000,1
331 | 47,107000,1
332 | 38,51000,0
333 | 48,119000,1
334 | 42,65000,0
335 | 40,65000,0
336 | 57,60000,1
337 | 36,54000,0
338 | 58,144000,1
339 | 35,79000,0
340 | 38,55000,0
341 | 39,122000,1
342 | 53,104000,1
343 | 35,75000,0
344 | 38,65000,0
345 | 47,51000,1
346 | 47,105000,1
347 | 41,63000,0
348 | 53,72000,1
349 | 54,108000,1
350 | 39,77000,0
351 | 38,61000,0
352 | 38,113000,1
353 | 37,75000,0
354 | 42,90000,1
355 | 37,57000,0
356 | 36,99000,1
357 | 60,34000,1
358 | 54,70000,1
359 | 41,72000,0
360 | 40,71000,1
361 | 42,54000,0
362 | 43,129000,1
363 | 53,34000,1
364 | 47,50000,1
365 | 42,79000,0
366 | 42,104000,1
367 | 59,29000,1
368 | 58,47000,1
369 | 46,88000,1
370 | 38,71000,0
371 | 54,26000,1
372 | 60,46000,1
373 | 60,83000,1
374 | 39,73000,0
375 | 59,130000,1
376 | 37,80000,0
377 | 46,32000,1
378 | 46,74000,0
379 | 42,53000,0
380 | 41,87000,1
381 | 58,23000,1
382 | 42,64000,0
383 | 48,33000,1
384 | 44,139000,1
385 | 49,28000,1
386 | 57,33000,1
387 | 56,60000,1
388 | 49,39000,1
389 | 39,71000,0
390 | 47,34000,1
391 | 48,35000,1
392 | 48,33000,1
393 | 47,23000,1
394 | 45,45000,1
395 | 60,42000,1
396 | 39,59000,0
397 | 46,41000,1
398 | 51,23000,1
399 | 50,20000,1
400 | 36,33000,0
401 | 49,36000,1
--------------------------------------------------------------------------------
/Sentiment Analysis/moviereview.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import seaborn as sb\n",
12 | "import matplotlib.pyplot as plt"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "# Using Movie Reviews dataset of nltk library"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "metadata": {},
25 | "source": [
26 | "## Importing the moview_reviews from nltk"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 3,
32 | "metadata": {},
33 | "outputs": [
34 | {
35 | "name": "stdout",
36 | "output_type": "stream",
37 | "text": [
38 | "Help on LazyCorpusLoader in module nltk.corpus.util object:\n",
39 | "\n",
40 | "movie_reviews = class LazyCorpusLoader(builtins.object)\n",
41 | " | movie_reviews(name, reader_cls, *args, **kwargs)\n",
42 | " | \n",
43 | " | To see the API documentation for this lazily loaded corpus, first\n",
44 | " | run corpus.ensure_loaded(), and then run help(this_corpus).\n",
45 | " | \n",
46 | " | LazyCorpusLoader is a proxy object which is used to stand in for a\n",
47 | " | corpus object before the corpus is loaded. This allows NLTK to\n",
48 | " | create an object for each corpus, but defer the costs associated\n",
49 | " | with loading those corpora until the first time that they're\n",
50 | " | actually accessed.\n",
51 | " | \n",
52 | " | The first time this object is accessed in any way, it will load\n",
53 | " | the corresponding corpus, and transform itself into that corpus\n",
54 | " | (by modifying its own ``__class__`` and ``__dict__`` attributes).\n",
55 | " | \n",
56 | " | If the corpus can not be found, then accessing this object will\n",
57 | " | raise an exception, displaying installation instructions for the\n",
58 | " | NLTK data package. Once they've properly installed the data\n",
59 | " | package (or modified ``nltk.data.path`` to point to its location),\n",
60 | " | they can then use the corpus object without restarting python.\n",
61 | " | \n",
62 | " | :param name: The name of the corpus\n",
63 | " | :type name: str\n",
64 | " | :param reader_cls: The specific CorpusReader class, e.g. PlaintextCorpusReader, WordListCorpusReader\n",
65 | " | :type reader: nltk.corpus.reader.api.CorpusReader\n",
66 | " | :param nltk_data_subdir: The subdirectory where the corpus is stored.\n",
67 | " | :type nltk_data_subdir: str\n",
68 | " | :param *args: Any other non-keywords arguments that `reader_cls` might need.\n",
69 | " | :param *kargs: Any other keywords arguments that `reader_cls` might need.\n",
70 | " | \n",
71 | " | Methods defined here:\n",
72 | " | \n",
73 | " | __getattr__(self, attr)\n",
74 | " | \n",
75 | " | __init__(self, name, reader_cls, *args, **kwargs)\n",
76 | " | Initialize self. See help(type(self)) for accurate signature.\n",
77 | " | \n",
78 | " | __repr__(self)\n",
79 | " | Return repr(self).\n",
80 | " | \n",
81 | " | __unicode__ = __str__(self, /)\n",
82 | " | Return str(self).\n",
83 | " | \n",
84 | " | unicode_repr = __repr__(self)\n",
85 | " | \n",
86 | " | ----------------------------------------------------------------------\n",
87 | " | Data descriptors defined here:\n",
88 | " | \n",
89 | " | __dict__\n",
90 | " | dictionary for instance variables (if defined)\n",
91 | " | \n",
92 | " | __weakref__\n",
93 | " | list of weak references to the object (if defined)\n",
94 | "\n"
95 | ]
96 | }
97 | ],
98 | "source": [
99 | "import random\n",
100 | "from nltk.corpus import movie_reviews\n",
101 | "help(movie_reviews)"
102 | ]
103 | },
104 | {
105 | "cell_type": "markdown",
106 | "metadata": {},
107 | "source": [
108 | "## Preparing the document "
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 16,
114 | "metadata": {},
115 | "outputs": [
116 | {
117 | "name": "stdout",
118 | "output_type": "stream",
119 | "text": [
120 | "Number of Reviews: 2000\n",
121 | "Number of Positive review: 1000\n",
122 | "Number of Negative review: 1000\n"
123 | ]
124 | }
125 | ],
126 | "source": [
127 | "#loading moview_reviews\n",
128 | "#import nltk\n",
129 | "#nltk.download('movie_reviews')\n",
130 | "documents = [(list(movie_reviews.words(fileid)), category)\n",
131 | " for category in movie_reviews.categories()\n",
132 | " for fileid in movie_reviews.fileids(category)]\n",
133 | "print(\"Number of Reviews:\",len(documents))\n",
134 | "\n",
135 | "#This loaded document is a list of tokens eg-['don',''','t',i,movie,was,good]\n",
136 | "\n",
137 | "#For shuffling the document (not so important ,just to increase reproductibility)\n",
138 | "random.seed\n",
139 | "random.shuffle(documents)\n",
140 | "\n",
141 | "#list to store all review text and label\n",
142 | "text_data=[]\n",
143 | "label=[]\n",
144 | "for i in range(len(documents)):\n",
145 | " text_data.append(' '.join(documents[i][0]))\n",
146 | " label.append(0 if documents[i][1]=='neg' else 1)\n",
147 | " \n",
148 | "print(\"Number of Positive review:\",label.count(1))\n",
149 | "print(\"Number of Negative review:\",label.count(0))\n",
150 | "\n"
151 | ]
152 | },
153 | {
154 | "cell_type": "markdown",
155 | "metadata": {},
156 | "source": [
157 | "## Splitting the Dataset"
158 | ]
159 | },
160 | {
161 | "cell_type": "code",
162 | "execution_count": 29,
163 | "metadata": {},
164 | "outputs": [
165 | {
166 | "name": "stdout",
167 | "output_type": "stream",
168 | "text": [
169 | "eddie murphy has a lot riding on harlem nights . as the movie ' s writer , director , executive producer , and star , murphy will shoulder all of the blame if harlem nights fails . but at the same time , he ' ll receive all of the credit if it succeeds . should you sacrifice your hard - earned cash to support murphy ' s risky gamble ? well , that depends on whom you trust more : me or eddie murphy . here ' s what murphy thinks : \" i think the audience is expecting a good time . they gonna get sexy . they gonna get funny . they gonna get drama . they gonna get all of that . i think it ' s the best movie i ' ve done \" ( paramount radio network ) . here ' s what i think : harlem nights is charmless , unoriginal , disappointing , and almost without question , the worst film of the actor ' s career ( i haven ' t seen best defense ) . and guess who ' s to blame ? ! the movie ' s problem is not murphy ' s direction : harlem nights is a fairly good looking film . no , the project was probably doomed even before the cameras rolled . murphy ' s awful script is the culprit . let ' s count the mistakes he makes in his first attempt at screenwriting : ( 1 ) murphy shatters the record for the most profanity in a motion picture . yes , he even outdoes his own work in raw . practically every line of dialogue in harlem nights contains at least one four letter word . and after 15 minutes , it gets irritating . ( 2 ) murphy wastes the talents of his fine cast . richard pryor , redd foxx , michael lerner , and della reese face the impossible task of carving out credible characters from a script riddled with stereotypes . each of them shines occasionally , but basically what we have are good performers stuck in a bad vehicle . ( 3 ) the movie demeans women by depicting them solely as sexual objects and as pawns in power struggles between men . murphy has admitted in interviews that he is weary of women in his private life , which is really neither here nor there . but when murphy puts his bitter feelings on 3 , 000 movie screens across the country , it ' s another matter altogether . you ' re forced to swallow some pretty gruesome stuff . for instance , murphy punches della reese in the stomach . and he shoots jasmine guy in the head . this is a mean - spirited movie , folks ! lovely newcomer lela rochon gets off easy in her role as a common whore , but only because she doesn ' t have any scenes with murphy . thank god : he might have run her over with a bulldozer . ( 4 ) murphy has written for himself perhaps his blandest role to date . the loveable eddie murphy charisma emerges only once or twice during the film . murphy would rather give his character a spiffy wardrobe than a spiffy personality . sometimes it seems as if murphy made harlem nights just so he could wear fancy suits and look debonair . ( 5 ) the plot is a shameless rip - off of the sting . if you ' re going to make another sting movie , you ' ve got to do something original . murphy ' s tale of warring nightclub owners in harlem ( circa 1938 ) fails to add anything new to the formula . ( 6 ) to get laughs , murphy makes fun of stuttering . you know a comedy is digging deep when it resorts to ridiculing the handicapped . ( 7 ) murphy ' s idea of drama is a scene in which his character apologizes for the first time in his life . for what ? for shooting reese ' s little toe off ! needless to say , murphy shows little , if any , promise or imagination as a screenwriter . in all fairness , however , a few rays of sunshine do manage to break through the gloomy cloud surrounding the movie . danny aiello is fun to watch as a dirty cop on the take . aiello stands out in the large , ensemble cast : he obviously relishes the opportunity to play such a nasty character ( a racist detective with mob ties ) . aiello ' s zesty performance gives harlem nights some much needed spice . another bright spot is arsenio hall , who has a hilarious , show - stopping cameo as a cry - baby gangster ; hall virtually steals the spotlight from murphy . in fact , hall ' s ten minutes on screen are the funniest ten minutes in the movie . unfortunately , his character is completely irrelevant to the plot ; murphy should have given hall a much bigger role . of course , i ' ve already mentioned that i didn ' t care for murphy ' s character , but i have to admit that i did love his neckties . they are simply spectacular -- almost worth the price of admission .\n",
170 | "\n",
171 | "0\n"
172 | ]
173 | }
174 | ],
175 | "source": [
176 | "from sklearn.model_selection import train_test_split\n",
177 | "X_train,X_test,y_train,y_test=train_test_split(text_data,label,test_size=0.25,random_state=23)\n",
178 | "print(X_train[3])\n",
179 | "print()\n",
180 | "print(y_train[34])"
181 | ]
182 | },
183 | {
184 | "cell_type": "markdown",
185 | "metadata": {},
186 | "source": [
187 | "## Preparing the Bag of words(DTM) ,Fitting the model , Calculating the Score"
188 | ]
189 | },
190 | {
191 | "cell_type": "code",
192 | "execution_count": 54,
193 | "metadata": {},
194 | "outputs": [
195 | {
196 | "name": "stdout",
197 | "output_type": "stream",
198 | "text": [
199 | "The accuracy of the Naive bayes: 78.8\n",
200 | "Classification Report:\n",
201 | " precision recall f1-score support\n",
202 | "\n",
203 | " Negative 0.72 0.89 0.80 238\n",
204 | " Positive 0.88 0.69 0.77 262\n",
205 | "\n",
206 | " micro avg 0.79 0.79 0.79 500\n",
207 | " macro avg 0.80 0.79 0.79 500\n",
208 | "weighted avg 0.81 0.79 0.79 500\n",
209 | "\n"
210 | ]
211 | }
212 | ],
213 | "source": [
214 | "from sklearn import metrics\n",
215 | "from sklearn.naive_bayes import MultinomialNB\n",
216 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
217 | "\n",
218 | "tf_cv=TfidfVectorizer(stop_words='english')\n",
219 | "train_dtm_tf=tf_cv.fit_transform(X_train)\n",
220 | "test_dtm_tf=tf_cv.transform(X_test)\n",
221 | "\n",
222 | "nb=MultinomialNB()\n",
223 | "nb=nb.fit(train_dtm_tf,y_train)\n",
224 | "predicted=nb.predict(test_dtm_tf)\n",
225 | "score=100.0* nb.score(test_dtm_tf,y_test)\n",
226 | "print(\"The accuracy of the Naive bayes:\",score)\n",
227 | "print(\"Classification Report:\")\n",
228 | "report=metrics.classification_report(y_test,predicted, target_names = ['Negative', 'Positive'])\n",
229 | "print(report)"
230 | ]
231 | },
232 | {
233 | "cell_type": "code",
234 | "execution_count": 46,
235 | "metadata": {},
236 | "outputs": [
237 | {
238 | "data": {
239 | "text/plain": [
240 | "array([[213, 25],\n",
241 | " [ 81, 181]], dtype=int64)"
242 | ]
243 | },
244 | "execution_count": 46,
245 | "metadata": {},
246 | "output_type": "execute_result"
247 | }
248 | ],
249 | "source": [
250 | "metrics.confusion_matrix(y_test,predicted)\n",
251 | "#confusion(y_test, y_pred, ['Negative', 'Positive'], 'Naive Bayes Model')"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "## Top 20 positive words "
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 47,
264 | "metadata": {},
265 | "outputs": [
266 | {
267 | "name": "stdout",
268 | "output_type": "stream",
269 | "text": [
270 | "['film', 'movie', 'like', 'life', 'story', 'good', 'just', 'time', 'character', 'characters', 'films', 'great', 'way', 'people', 'best', 'really', 'does', 'love', 'man', 'world']\n"
271 | ]
272 | }
273 | ],
274 | "source": [
275 | "all_words=np.array(tf_cv.get_feature_names())\n",
276 | "top_word_index=np.argsort(nb.coef_[0])[-20:]\n",
277 | "tn_lst=[word for word in all_words[top_word_index]]\n",
278 | "tn_lst.reverse()\n",
279 | "print(tn_lst)"
280 | ]
281 | },
282 | {
283 | "cell_type": "markdown",
284 | "metadata": {},
285 | "source": [
286 | "## Using Logistic Regression"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 53,
292 | "metadata": {},
293 | "outputs": [
294 | {
295 | "name": "stdout",
296 | "output_type": "stream",
297 | "text": [
298 | "Accuracy of Logistic Regression: 84.39999999999999\n"
299 | ]
300 | }
301 | ],
302 | "source": [
303 | "from sklearn.linear_model import LogisticRegression\n",
304 | "lr=LogisticRegression(C=1000)\n",
305 | "\n",
306 | "lr=lr.fit(train_dtm_tf,y_train)\n",
307 | "predicted=lr.predict(test_dtm_tf)\n",
308 | "scr = 100.0 * lr.score(test_dtm_tf, y_test)\n",
309 | "print(\"Accuracy of Logistic Regression:\",scr)"
310 | ]
311 | },
312 | {
313 | "cell_type": "markdown",
314 | "metadata": {},
315 | "source": [
316 | "### Top 20 Positive Word "
317 | ]
318 | },
319 | {
320 | "cell_type": "code",
321 | "execution_count": 55,
322 | "metadata": {},
323 | "outputs": [
324 | {
325 | "name": "stdout",
326 | "output_type": "stream",
327 | "text": [
328 | "['great', 'fun', 'overall', 'life', 'memorable', 'definitely', 'quite', 'frank', 'performance', 'seen', 'excellent', 'hilarious', 'titanic', 'terrific', 'enjoyed', 'job', 'rob', 'family', 'different', 'performances']\n"
329 | ]
330 | }
331 | ],
332 | "source": [
333 | "top_word_index=np.argsort(lr.coef_[0])[-20:]\n",
334 | "tn_lst=[word for word in all_words[top_word_index]]\n",
335 | "tn_lst.reverse()\n",
336 | "print(tn_lst)"
337 | ]
338 | },
339 | {
340 | "cell_type": "markdown",
341 | "metadata": {},
342 | "source": [
343 | "### Top 20 Negative Word"
344 | ]
345 | },
346 | {
347 | "cell_type": "code",
348 | "execution_count": 56,
349 | "metadata": {},
350 | "outputs": [
351 | {
352 | "name": "stderr",
353 | "output_type": "stream",
354 | "text": [
355 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
356 | " FutureWarning)\n"
357 | ]
358 | },
359 | {
360 | "name": "stdout",
361 | "output_type": "stream",
362 | "text": [
363 | "['bad', 'plot', 'unfortunately', 'boring', 'worst', 'reason', 'supposed', 'awful', 'poor', 'waste', 'stupid', 'script', 'ridiculous', 'fails', 'harry', 'dull', 'carpenter', 'terrible', 'mess', 'poorly']\n"
364 | ]
365 | }
366 | ],
367 | "source": [
368 | "y_train_reverse = [0 if y==1 else 1 for y in y_train]\n",
369 | "lr = lr.fit(train_dtm_tf, y_train_reverse)\n",
370 | "\n",
371 | "top_word_index = np.argsort(lr.coef_[0])[-20:]\n",
372 | "tn_lst = [word for word in all_words[top_word_index]]\n",
373 | "tn_lst.reverse()\n",
374 | "print(tn_lst)"
375 | ]
376 | },
377 | {
378 | "cell_type": "markdown",
379 | "metadata": {},
380 | "source": [
381 | "# Now using Stemming "
382 | ]
383 | },
384 | {
385 | "cell_type": "markdown",
386 | "metadata": {},
387 | "source": [
388 | "## As Countvectorizer and TF-IDF dont do stemming "
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 5,
394 | "metadata": {},
395 | "outputs": [
396 | {
397 | "ename": "NameError",
398 | "evalue": "name 'X_train' is not defined",
399 | "output_type": "error",
400 | "traceback": [
401 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
402 | "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
403 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 11\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 12\u001b[0m \u001b[0mtf_cv\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mTfidfVectorizer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtokenizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtokenize\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 13\u001b[1;33m \u001b[0mtrain_dtm_tf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtf_cv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_transform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_train\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 14\u001b[0m \u001b[0mtest_dtm_tf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtf_cv\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
404 | "\u001b[1;31mNameError\u001b[0m: name 'X_train' is not defined"
405 | ]
406 | }
407 | ],
408 | "source": [
409 | "import string,nltk\n",
410 | "from nltk.stem import PorterStemmer\n",
411 | "from sklearn.feature_extraction.text import TfidfVectorizer\n",
412 | "from sklearn.linear_model import LogisticRegression\n",
413 | "\n",
414 | "def tokenize(text):\n",
415 | " tokens=nltk.word_tokenize(text)\n",
416 | " tokens=[token for token in tokens if token not in string.punctuation]\n",
417 | " ps=PorterStemmer()\n",
418 | " stems=map(stemmer.stem,tokens)\n",
419 | " return stems\n",
420 | "\n",
421 | "tf_cv=TfidfVectorizer(tokenizer=tokenize)\n",
422 | "train_dtm_tf=tf_cv.fit_transform(X_train)\n",
423 | "test_dtm_tf=tf_cv.transform(X_test)\n",
424 | "\n",
425 | "lr=LogisticRegression(C=1000)\n",
426 | "lr=lr.fit(train_dtm_tf,y_train)\n",
427 | "predicted=lr.predict(test_dtm_tf)\n",
428 | "\n",
429 | "scr=100.0 * lr.score(test_dtm_tf,y_test)\n",
430 | "print(\"Accuracy after applying stemming:\",scr)"
431 | ]
432 | }
433 | ],
434 | "metadata": {
435 | "kernelspec": {
436 | "display_name": "Python 3",
437 | "language": "python",
438 | "name": "python3"
439 | },
440 | "language_info": {
441 | "codemirror_mode": {
442 | "name": "ipython",
443 | "version": 3
444 | },
445 | "file_extension": ".py",
446 | "mimetype": "text/x-python",
447 | "name": "python",
448 | "nbconvert_exporter": "python",
449 | "pygments_lexer": "ipython3",
450 | "version": "3.7.1"
451 | }
452 | },
453 | "nbformat": 4,
454 | "nbformat_minor": 2
455 | }
456 |
--------------------------------------------------------------------------------
/Sentiment Analysis/restaurentreview.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 3,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import pandas as pd\n",
10 | "import numpy as np\n",
11 | "import seaborn as sb\n",
12 | "import matplotlib.pyplot as plt"
13 | ]
14 | },
15 | {
16 | "cell_type": "markdown",
17 | "metadata": {},
18 | "source": [
19 | "# Restaurant Reviews using Logistic Regression"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 4,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/html": [
30 | "\n",
31 | "\n",
44 | "
\n",
45 | " \n",
46 | " \n",
47 | " | \n",
48 | " Review | \n",
49 | " Liked | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " Wow... Loved this place. | \n",
56 | " 1 | \n",
57 | "
\n",
58 | " \n",
59 | " 1 | \n",
60 | " Crust is not good. | \n",
61 | " 0 | \n",
62 | "
\n",
63 | " \n",
64 | " 2 | \n",
65 | " Not tasty and the texture was just nasty. | \n",
66 | " 0 | \n",
67 | "
\n",
68 | " \n",
69 | " 3 | \n",
70 | " Stopped by during the late May bank holiday of... | \n",
71 | " 1 | \n",
72 | "
\n",
73 | " \n",
74 | " 4 | \n",
75 | " The selection on the menu was great and so wer... | \n",
76 | " 1 | \n",
77 | "
\n",
78 | " \n",
79 | "
\n",
80 | "
"
81 | ],
82 | "text/plain": [
83 | " Review Liked\n",
84 | "0 Wow... Loved this place. 1\n",
85 | "1 Crust is not good. 0\n",
86 | "2 Not tasty and the texture was just nasty. 0\n",
87 | "3 Stopped by during the late May bank holiday of... 1\n",
88 | "4 The selection on the menu was great and so wer... 1"
89 | ]
90 | },
91 | "execution_count": 4,
92 | "metadata": {},
93 | "output_type": "execute_result"
94 | }
95 | ],
96 | "source": [
97 | "document= pd.read_csv('Restaurant_Reviews.tsv', delimiter = '\\t', quoting = 3)\n",
98 | "document.head()"
99 | ]
100 | },
101 | {
102 | "cell_type": "code",
103 | "execution_count": 5,
104 | "metadata": {},
105 | "outputs": [
106 | {
107 | "name": "stdout",
108 | "output_type": "stream",
109 | "text": [
110 | "\n",
111 | "RangeIndex: 1000 entries, 0 to 999\n",
112 | "Data columns (total 2 columns):\n",
113 | "Review 1000 non-null object\n",
114 | "Liked 1000 non-null int64\n",
115 | "dtypes: int64(1), object(1)\n",
116 | "memory usage: 15.7+ KB\n"
117 | ]
118 | }
119 | ],
120 | "source": [
121 | "document.info()"
122 | ]
123 | },
124 | {
125 | "cell_type": "code",
126 | "execution_count": 6,
127 | "metadata": {},
128 | "outputs": [
129 | {
130 | "data": {
131 | "text/plain": [
132 | "1 500\n",
133 | "0 500\n",
134 | "Name: Liked, dtype: int64"
135 | ]
136 | },
137 | "execution_count": 6,
138 | "metadata": {},
139 | "output_type": "execute_result"
140 | }
141 | ],
142 | "source": [
143 | "document['Liked'].value_counts()"
144 | ]
145 | },
146 | {
147 | "cell_type": "code",
148 | "execution_count": 10,
149 | "metadata": {},
150 | "outputs": [
151 | {
152 | "data": {
153 | "text/plain": [
154 | "(0 Wow... Loved this place.\n",
155 | " 1 Crust is not good.\n",
156 | " 2 Not tasty and the texture was just nasty.\n",
157 | " 3 Stopped by during the late May bank holiday of...\n",
158 | " 4 The selection on the menu was great and so wer...\n",
159 | " Name: Review, dtype: object, 0 1\n",
160 | " 1 0\n",
161 | " 2 0\n",
162 | " 3 1\n",
163 | " 4 1\n",
164 | " Name: Liked, dtype: int64)"
165 | ]
166 | },
167 | "execution_count": 10,
168 | "metadata": {},
169 | "output_type": "execute_result"
170 | }
171 | ],
172 | "source": [
173 | "X=document['Review']\n",
174 | "y=document['Liked']\n",
175 | "X.head(),y.head()"
176 | ]
177 | },
178 | {
179 | "cell_type": "code",
180 | "execution_count": 11,
181 | "metadata": {},
182 | "outputs": [],
183 | "source": [
184 | "from sklearn.model_selection import train_test_split\n",
185 | "X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=23)"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 15,
191 | "metadata": {},
192 | "outputs": [
193 | {
194 | "name": "stdout",
195 | "output_type": "stream",
196 | "text": [
197 | "The accuracy using Logistic Regression 73.33333333333333\n"
198 | ]
199 | },
200 | {
201 | "name": "stderr",
202 | "output_type": "stream",
203 | "text": [
204 | "C:\\Users\\HP\\Documents\\New folder\\lib\\site-packages\\sklearn\\linear_model\\logistic.py:433: FutureWarning: Default solver will be changed to 'lbfgs' in 0.22. Specify a solver to silence this warning.\n",
205 | " FutureWarning)\n"
206 | ]
207 | }
208 | ],
209 | "source": [
210 | "from sklearn.linear_model import LogisticRegression\n",
211 | "from sklearn.feature_extraction.text import CountVectorizer\n",
212 | "\n",
213 | "cv=CountVectorizer(stop_words='english')\n",
214 | "train_dtm=cv.fit_transform(X_train)\n",
215 | "test_dtm=cv.transform(X_test)\n",
216 | "\n",
217 | "lr=LogisticRegression(C=1000)\n",
218 | "lr=lr.fit(train_dtm,y_train)\n",
219 | "predicted=lr.predict(test_dtm)\n",
220 | "\n",
221 | "scr=lr.score(test_dtm,y_test)\n",
222 | "print(\"The accuracy using Logistic Regression\",scr*100.0)"
223 | ]
224 | },
225 | {
226 | "cell_type": "code",
227 | "execution_count": 16,
228 | "metadata": {},
229 | "outputs": [
230 | {
231 | "name": "stdout",
232 | "output_type": "stream",
233 | "text": [
234 | "['great', 'eclectic', 'loved', 'glad', 'amazing', 'heart', 'gyros', 'awesome', 'inside', 'delicious', 'fantastic', 'prompt', 'enjoyed', 'check', 'excellent', 'beat', 'complaints', 'nachos', 'friendly', 'complain']\n"
235 | ]
236 | }
237 | ],
238 | "source": [
239 | "#top 20 positive words\n",
240 | "all_words=np.array(cv.get_feature_names())\n",
241 | "top_word_index=np.argsort(lr.coef_[0])[-20:]\n",
242 | "tn_lst=[word for word in all_words[top_word_index]]\n",
243 | "tn_lst.reverse()\n",
244 | "print(tn_lst)"
245 | ]
246 | }
247 | ],
248 | "metadata": {
249 | "kernelspec": {
250 | "display_name": "Python 3",
251 | "language": "python",
252 | "name": "python3"
253 | },
254 | "language_info": {
255 | "codemirror_mode": {
256 | "name": "ipython",
257 | "version": 3
258 | },
259 | "file_extension": ".py",
260 | "mimetype": "text/x-python",
261 | "name": "python",
262 | "nbconvert_exporter": "python",
263 | "pygments_lexer": "ipython3",
264 | "version": "3.7.1"
265 | }
266 | },
267 | "nbformat": 4,
268 | "nbformat_minor": 2
269 | }
270 |
--------------------------------------------------------------------------------
/TextClassification/20news-bydate_py3.pkz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/suubh/Machine-Learning-in-Python/154596a9509dc7a066ae3caf5526b6f663a359cc/TextClassification/20news-bydate_py3.pkz
--------------------------------------------------------------------------------
/TextClassification/Textclassification.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Text Classification and Naive Bayes Classifier"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 2,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as ps\n",
17 | "import numpy as np\n",
18 | "import seaborn as sb\n",
19 | "import matplotlib.pyplot as plt\n",
20 | "import warnings\n",
21 | "warnings.filterwarnings(\"ignore\")"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 3,
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "from sklearn.datasets import fetch_20newsgroups"
31 | ]
32 | },
33 | {
34 | "cell_type": "code",
35 | "execution_count": 6,
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "#help(fetch_20newsgroups)"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": 4,
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "train=fetch_20newsgroups(data_home='.', subset='train')"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 5,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "test=fetch_20newsgroups(data_home='.', subset='test')"
58 | ]
59 | },
60 | {
61 | "cell_type": "code",
62 | "execution_count": 12,
63 | "metadata": {},
64 | "outputs": [
65 | {
66 | "data": {
67 | "text/plain": [
68 | "(dict_keys(['data', 'filenames', 'target_names', 'target', 'DESCR']),\n",
69 | " dict_keys(['data', 'filenames', 'target_names', 'target', 'DESCR']))"
70 | ]
71 | },
72 | "execution_count": 12,
73 | "metadata": {},
74 | "output_type": "execute_result"
75 | }
76 | ],
77 | "source": [
78 | "train.keys(),test.keys()"
79 | ]
80 | },
81 | {
82 | "cell_type": "markdown",
83 | "metadata": {},
84 | "source": [
85 | "There are 5 keys in the train object:\n",
86 | "\n",
87 | "data: List of text messages\n",
88 | "filenames: List of file names, for each message in data, there's a corresponding file name. This is normally not needed in classification.\n",
89 | "target: Numeric code for the 20 news groups, from 0 to 19. Each message has a corresponding target, that is used as label or class in classification.\n",
90 | "target_names: String, name of the 20 targets.\n",
91 | "DESCR: Description of the dataset.\n"
92 | ]
93 | },
94 | {
95 | "cell_type": "code",
96 | "execution_count": 19,
97 | "metadata": {},
98 | "outputs": [
99 | {
100 | "data": {
101 | "text/plain": [
102 | "(11314, 7532)"
103 | ]
104 | },
105 | "execution_count": 19,
106 | "metadata": {},
107 | "output_type": "execute_result"
108 | }
109 | ],
110 | "source": [
111 | "len(train['data']),len(test['data'])"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": 20,
117 | "metadata": {},
118 | "outputs": [
119 | {
120 | "name": "stdout",
121 | "output_type": "stream",
122 | "text": [
123 | "Class 0=alt.atheism\n",
124 | "Class 1=comp.graphics\n",
125 | "Class 2=comp.os.ms-windows.misc\n",
126 | "Class 3=comp.sys.ibm.pc.hardware\n",
127 | "Class 4=comp.sys.mac.hardware\n",
128 | "Class 5=comp.windows.x\n",
129 | "Class 6=misc.forsale\n",
130 | "Class 7=rec.autos\n",
131 | "Class 8=rec.motorcycles\n",
132 | "Class 9=rec.sport.baseball\n",
133 | "Class10=rec.sport.hockey\n",
134 | "Class11=sci.crypt\n",
135 | "Class12=sci.electronics\n",
136 | "Class13=sci.med\n",
137 | "Class14=sci.space\n",
138 | "Class15=soc.religion.christian\n",
139 | "Class16=talk.politics.guns\n",
140 | "Class17=talk.politics.mideast\n",
141 | "Class18=talk.politics.misc\n",
142 | "Class19=talk.religion.misc\n"
143 | ]
144 | }
145 | ],
146 | "source": [
147 | "for i ,label in enumerate(train['target_names']):\n",
148 | " print(f'Class{i:2d}={label}')"
149 | ]
150 | },
151 | {
152 | "cell_type": "markdown",
153 | "metadata": {},
154 | "source": [
155 | "# Printing a random message"
156 | ]
157 | },
158 | {
159 | "cell_type": "code",
160 | "execution_count": 28,
161 | "metadata": {},
162 | "outputs": [
163 | {
164 | "name": "stdout",
165 | "output_type": "stream",
166 | "text": [
167 | "Class number=0\n",
168 | "Class name=alt.atheism\n",
169 | "\n",
170 | "From: keith@cco.caltech.edu (Keith Allan Schneider)\n",
171 | "Subject: Re: <>The \"`little' things\" above were in reference to Germany, clearly. People\n",
180 | ">>said that there were similar things in Germany, but no one could name any.\n",
181 | ">That's not true. I gave you two examples. One was the rather\n",
182 | ">pevasive anti-semitism in German Christianity well before Hitler\n",
183 | ">arrived. The other was the system of social ranks that were used\n",
184 | ">in Imperail Germany and Austria to distinguish Jews from the rest \n",
185 | ">of the population.\n",
186 | "\n",
187 | "These don't seem like \"little things\" to me. At least, they are orders\n",
188 | "worse than the motto. Do you think that the motto is a \"little thing\"\n",
189 | "that will lead to worse things?\n",
190 | "\n",
191 | "keith\n",
192 | "\n"
193 | ]
194 | }
195 | ],
196 | "source": [
197 | "#From training set\n",
198 | "item_num=20\n",
199 | "class_num=train['target'][item_num]\n",
200 | "print(f'Class number={class_num}')\n",
201 | "print(f'Class name={train[\"target_names\"][class_num]}')\n",
202 | "print()\n",
203 | "print(train['data'][item_num])"
204 | ]
205 | },
206 | {
207 | "cell_type": "code",
208 | "execution_count": 32,
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "name": "stdout",
213 | "output_type": "stream",
214 | "text": [
215 | "Class Number = 0\n",
216 | "Class Name = alt.atheism\n",
217 | "From: mathew \n",
218 | "Subject: Re: STRONG & weak Atheism\n",
219 | "Organization: Mantis Consultants, Cambridge. UK.\n",
220 | "X-Newsreader: rusnews v1.02\n",
221 | "Lines: 9\n",
222 | "\n",
223 | "acooper@mac.cc.macalstr.edu (Turin Turambar, ME Department of Utter Misery) writes:\n",
224 | "> Did that FAQ ever got modified to re-define strong atheists as not those who\n",
225 | "> assert the nonexistence of God, but as those who assert that they BELIEVE in \n",
226 | "> the nonexistence of God?\n",
227 | "\n",
228 | "In a word, yes.\n",
229 | "\n",
230 | "\n",
231 | "mathew\n",
232 | "\n"
233 | ]
234 | }
235 | ],
236 | "source": [
237 | "#From Testing set\n",
238 | "test_message = 2\n",
239 | "class_num = test['target'][test_message]\n",
240 | "print(f'Class Number = {class_num}')\n",
241 | "print(f'Class Name = {test[\"target_names\"][class_num]}')\n",
242 | "print(test['data'][test_message])"
243 | ]
244 | },
245 | {
246 | "cell_type": "markdown",
247 | "metadata": {},
248 | "source": [
249 | "# Naive Bayes Classifier"
250 | ]
251 | },
252 | {
253 | "cell_type": "markdown",
254 | "metadata": {},
255 | "source": [
256 | "## 1)Using Count vectorizer for making DTM"
257 | ]
258 | },
259 | {
260 | "cell_type": "code",
261 | "execution_count": 6,
262 | "metadata": {},
263 | "outputs": [
264 | {
265 | "name": "stdout",
266 | "output_type": "stream",
267 | "text": [
268 | "Acurracy of the model is : 80.23101433882103\n"
269 | ]
270 | }
271 | ],
272 | "source": [
273 | "from sklearn.feature_extraction.text import CountVectorizer\n",
274 | "from sklearn.naive_bayes import MultinomialNB\n",
275 | "\n",
276 | "#make the DTM\n",
277 | "cv=CountVectorizer(stop_words='english')\n",
278 | "train_dtm=cv.fit_transform(train['data'])\n",
279 | "test_dtm=cv.transform(test['data'])\n",
280 | "\n",
281 | "#Fit the model\n",
282 | "nb=MultinomialNB()\n",
283 | "nb=nb.fit(train_dtm,train['target'])\n",
284 | "\n",
285 | "\n",
286 | "predicted = nb.predict(test_dtm)\n",
287 | "score=100.0 * nb.score(test_dtm,test['target'])\n",
288 | "print('Acurracy of the model is :',score)\n"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 52,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "name": "stdout",
298 | "output_type": "stream",
299 | "text": [
300 | " precision recall f1-score support\n",
301 | "\n",
302 | " alt.atheism 0.80 0.81 0.80 319\n",
303 | " comp.graphics 0.65 0.80 0.72 389\n",
304 | " comp.os.ms-windows.misc 0.80 0.04 0.08 394\n",
305 | "comp.sys.ibm.pc.hardware 0.55 0.80 0.65 392\n",
306 | " comp.sys.mac.hardware 0.85 0.79 0.82 385\n",
307 | " comp.windows.x 0.69 0.84 0.76 395\n",
308 | " misc.forsale 0.89 0.74 0.81 390\n",
309 | " rec.autos 0.89 0.92 0.91 396\n",
310 | " rec.motorcycles 0.95 0.94 0.95 398\n",
311 | " rec.sport.baseball 0.95 0.92 0.93 397\n",
312 | " rec.sport.hockey 0.92 0.97 0.94 399\n",
313 | " sci.crypt 0.80 0.96 0.87 396\n",
314 | " sci.electronics 0.79 0.70 0.74 393\n",
315 | " sci.med 0.88 0.87 0.87 396\n",
316 | " sci.space 0.84 0.92 0.88 394\n",
317 | " soc.religion.christian 0.81 0.95 0.87 398\n",
318 | " talk.politics.guns 0.72 0.93 0.81 364\n",
319 | " talk.politics.mideast 0.93 0.94 0.94 376\n",
320 | " talk.politics.misc 0.68 0.62 0.65 310\n",
321 | " talk.religion.misc 0.88 0.44 0.59 251\n",
322 | "\n",
323 | " micro avg 0.80 0.80 0.80 7532\n",
324 | " macro avg 0.81 0.79 0.78 7532\n",
325 | " weighted avg 0.81 0.80 0.78 7532\n",
326 | "\n"
327 | ]
328 | }
329 | ],
330 | "source": [
331 | "#Classification Report\n",
332 | "from sklearn import metrics\n",
333 | "print(metrics.classification_report(test['target'],predicted,target_names=test['target_names']))"
334 | ]
335 | },
336 | {
337 | "cell_type": "code",
338 | "execution_count": 7,
339 | "metadata": {},
340 | "outputs": [
341 | {
342 | "ename": "ModuleNotFoundError",
343 | "evalue": "No module named 'mlplots'",
344 | "output_type": "error",
345 | "traceback": [
346 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
347 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
348 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;31m#Confusion Matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mmlplots\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mml\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mfig\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0max\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mplt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msubplots\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfigsize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m13\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m# Call confusion matrix plotting routine\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
349 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'mlplots'"
350 | ]
351 | }
352 | ],
353 | "source": [
354 | "#Confusion Matrix\n",
355 | "import mlplots as ml\n",
356 | "\n",
357 | "fig, ax = plt.subplots(figsize=(13, 10))\n",
358 | "# Call confusion matrix plotting routine\n",
359 | "ml.confusion(test['target'], predicted, test['target_names'], 'Naive Bayes Model')"
360 | ]
361 | },
362 | {
363 | "cell_type": "code",
364 | "execution_count": 8,
365 | "metadata": {},
366 | "outputs": [
367 | {
368 | "ename": "InvalidArgument",
369 | "evalue": "y_true must contain only values of 0 or 1",
370 | "output_type": "error",
371 | "traceback": [
372 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
373 | "\u001b[1;31mInvalidArgument\u001b[0m Traceback (most recent call last)",
374 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mmlplot\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevaluation\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mClassificationEvaluation\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[0meval\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mClassificationEvaluation\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'target'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mpredicted\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mtest\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'target_names'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m'Naive Bayes'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[0meval\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconfusion_matrix\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mthreshold\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0.5\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;31m#confusion matrix\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
375 | "\u001b[1;32m~\\Documents\\New folder\\lib\\site-packages\\mlplot\\evaluation\\classification.py\u001b[0m in \u001b[0;36m__init__\u001b[1;34m(self, y_true, y_pred, class_names, model_name)\u001b[0m\n\u001b[0;32m 30\u001b[0m \u001b[0mtrue_values\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msort\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0munique\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0my_true\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 31\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrue_values\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[1;36m2\u001b[0m \u001b[1;32mor\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mequal\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrue_values\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 32\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mInvalidArgument\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'y_true must contain only values of 0 or 1'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 33\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 34\u001b[0m \u001b[1;31m# Check y_pred values\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
376 | "\u001b[1;31mInvalidArgument\u001b[0m: y_true must contain only values of 0 or 1"
377 | ]
378 | }
379 | ],
380 | "source": [
381 | "from mlplot.evaluation import ClassificationEvaluation\n",
382 | "eval = ClassificationEvaluation(test['target'], predicted,test['target_names'],'Naive Bayes')\n",
383 | "eval.confusion_matrix(threshold=0.5)\n",
384 | "\n",
385 | "#confusion matrix"
386 | ]
387 | }
388 | ],
389 | "metadata": {
390 | "kernelspec": {
391 | "display_name": "Python 3",
392 | "language": "python",
393 | "name": "python3"
394 | },
395 | "language_info": {
396 | "codemirror_mode": {
397 | "name": "ipython",
398 | "version": 3
399 | },
400 | "file_extension": ".py",
401 | "mimetype": "text/x-python",
402 | "name": "python",
403 | "nbconvert_exporter": "python",
404 | "pygments_lexer": "ipython3",
405 | "version": "3.7.1"
406 | }
407 | },
408 | "nbformat": 4,
409 | "nbformat_minor": 2
410 | }
411 |
--------------------------------------------------------------------------------
/_config.yml:
--------------------------------------------------------------------------------
1 | theme: jekyll-theme-cayman
--------------------------------------------------------------------------------