├── README.md
├── Python for Data Science - Part 2.ipynb
├── Grid Search - Breast Cancer.ipynb
├── Python for data science - Part 4.ipynb
└── Python for Data Science - Part 1.ipynb
/README.md:
--------------------------------------------------------------------------------
1 | # Python-for-data-science
2 | Learn data science with Python
3 |
--------------------------------------------------------------------------------
/Python for Data Science - Part 2.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# The Numpy library"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 16,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import numpy as np #import"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 17,
22 | "metadata": {},
23 | "outputs": [
24 | {
25 | "data": {
26 | "text/plain": [
27 | "array([1, 2, 3])"
28 | ]
29 | },
30 | "execution_count": 17,
31 | "metadata": {},
32 | "output_type": "execute_result"
33 | }
34 | ],
35 | "source": [
36 | "#create numpy array\n",
37 | "np.array([1,2,3])"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 18,
43 | "metadata": {},
44 | "outputs": [
45 | {
46 | "data": {
47 | "text/plain": [
48 | "array([[1, 2, 3],\n",
49 | " [4, 5, 6]])"
50 | ]
51 | },
52 | "execution_count": 18,
53 | "metadata": {},
54 | "output_type": "execute_result"
55 | }
56 | ],
57 | "source": [
58 | "a = np.array([[1,2,3],[4,5,6]]) #create multi dimensional array\n",
59 | "a"
60 | ]
61 | },
62 | {
63 | "cell_type": "code",
64 | "execution_count": 19,
65 | "metadata": {},
66 | "outputs": [
67 | {
68 | "data": {
69 | "text/plain": [
70 | "(2, 3)"
71 | ]
72 | },
73 | "execution_count": 19,
74 | "metadata": {},
75 | "output_type": "execute_result"
76 | }
77 | ],
78 | "source": [
79 | "#shape of the array\n",
80 | "a.shape"
81 | ]
82 | },
83 | {
84 | "cell_type": "code",
85 | "execution_count": 20,
86 | "metadata": {},
87 | "outputs": [
88 | {
89 | "data": {
90 | "text/plain": [
91 | "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29])"
92 | ]
93 | },
94 | "execution_count": 20,
95 | "metadata": {},
96 | "output_type": "execute_result"
97 | }
98 | ],
99 | "source": [
100 | "#evenly spaced values between the interval\n",
101 | "b = np.arange(1,30,2)\n",
102 | "b"
103 | ]
104 | },
105 | {
106 | "cell_type": "code",
107 | "execution_count": 21,
108 | "metadata": {},
109 | "outputs": [
110 | {
111 | "data": {
112 | "text/plain": [
113 | "array([[ 1, 3, 5],\n",
114 | " [ 7, 9, 11],\n",
115 | " [13, 15, 17],\n",
116 | " [19, 21, 23],\n",
117 | " [25, 27, 29]])"
118 | ]
119 | },
120 | "execution_count": 21,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "#create new shape of data\n",
127 | "b.reshape(5,3)"
128 | ]
129 | },
130 | {
131 | "cell_type": "code",
132 | "execution_count": 174,
133 | "metadata": {},
134 | "outputs": [
135 | {
136 | "data": {
137 | "text/plain": [
138 | "array([ 1. , 1.21052632, 1.42105263, 1.63157895, 1.84210526,\n",
139 | " 2.05263158, 2.26315789, 2.47368421, 2.68421053, 2.89473684,\n",
140 | " 3.10526316, 3.31578947, 3.52631579, 3.73684211, 3.94736842,\n",
141 | " 4.15789474, 4.36842105, 4.57894737, 4.78947368, 5. ])"
142 | ]
143 | },
144 | "execution_count": 174,
145 | "metadata": {},
146 | "output_type": "execute_result"
147 | }
148 | ],
149 | "source": [
150 | "#get evenly spaced numbers between specified interval\n",
151 | "c = np.linspace(1,5,20)\n",
152 | "c"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 175,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/plain": [
163 | "(20,)"
164 | ]
165 | },
166 | "execution_count": 175,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "c.shape"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 176,
178 | "metadata": {},
179 | "outputs": [
180 | {
181 | "data": {
182 | "text/plain": [
183 | "array([[ 1. , 1.21052632, 1.42105263, 1.63157895],\n",
184 | " [ 1.84210526, 2.05263158, 2.26315789, 2.47368421],\n",
185 | " [ 2.68421053, 2.89473684, 3.10526316, 3.31578947],\n",
186 | " [ 3.52631579, 3.73684211, 3.94736842, 4.15789474],\n",
187 | " [ 4.36842105, 4.57894737, 4.78947368, 5. ]])"
188 | ]
189 | },
190 | "execution_count": 176,
191 | "metadata": {},
192 | "output_type": "execute_result"
193 | }
194 | ],
195 | "source": [
196 | "#change shape of array in-place\n",
197 | "c.resize(5,4)\n",
198 | "c"
199 | ]
200 | },
201 | {
202 | "cell_type": "code",
203 | "execution_count": 177,
204 | "metadata": {},
205 | "outputs": [
206 | {
207 | "data": {
208 | "text/plain": [
209 | "(5, 4)"
210 | ]
211 | },
212 | "execution_count": 177,
213 | "metadata": {},
214 | "output_type": "execute_result"
215 | }
216 | ],
217 | "source": [
218 | "c.shape"
219 | ]
220 | },
221 | {
222 | "cell_type": "code",
223 | "execution_count": 181,
224 | "metadata": {},
225 | "outputs": [
226 | {
227 | "data": {
228 | "text/plain": [
229 | "array([[ 1., 1.],\n",
230 | " [ 1., 1.]])"
231 | ]
232 | },
233 | "execution_count": 181,
234 | "metadata": {},
235 | "output_type": "execute_result"
236 | }
237 | ],
238 | "source": [
239 | "#create array filled with ones\n",
240 | "d = np.ones((2,2))\n",
241 | "d"
242 | ]
243 | },
244 | {
245 | "cell_type": "code",
246 | "execution_count": 185,
247 | "metadata": {},
248 | "outputs": [
249 | {
250 | "data": {
251 | "text/plain": [
252 | "array([[ 0., 0., 0.],\n",
253 | " [ 0., 0., 0.],\n",
254 | " [ 0., 0., 0.]])"
255 | ]
256 | },
257 | "execution_count": 185,
258 | "metadata": {},
259 | "output_type": "execute_result"
260 | }
261 | ],
262 | "source": [
263 | "#create array filled with zeros\n",
264 | "e = np.zeros((3,3))\n",
265 | "e"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 189,
271 | "metadata": {},
272 | "outputs": [
273 | {
274 | "data": {
275 | "text/plain": [
276 | "array([[ 1., 0.],\n",
277 | " [ 0., 1.]])"
278 | ]
279 | },
280 | "execution_count": 189,
281 | "metadata": {},
282 | "output_type": "execute_result"
283 | }
284 | ],
285 | "source": [
286 | "#create diagonal matrix with diagonal values =1\n",
287 | "f = np.eye(2)\n",
288 | "f"
289 | ]
290 | },
291 | {
292 | "cell_type": "code",
293 | "execution_count": 191,
294 | "metadata": {},
295 | "outputs": [
296 | {
297 | "data": {
298 | "text/plain": [
299 | "array([ 1., 1.])"
300 | ]
301 | },
302 | "execution_count": 191,
303 | "metadata": {},
304 | "output_type": "execute_result"
305 | }
306 | ],
307 | "source": [
308 | "#extract only the diagonal values from array\n",
309 | "np.diag(f)"
310 | ]
311 | },
312 | {
313 | "cell_type": "code",
314 | "execution_count": 194,
315 | "metadata": {},
316 | "outputs": [
317 | {
318 | "data": {
319 | "text/plain": [
320 | "array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])"
321 | ]
322 | },
323 | "execution_count": 194,
324 | "metadata": {},
325 | "output_type": "execute_result"
326 | }
327 | ],
328 | "source": [
329 | "#create array using repeating list\n",
330 | "g = np.array([1,2,3]*5)\n",
331 | "g"
332 | ]
333 | },
334 | {
335 | "cell_type": "code",
336 | "execution_count": 196,
337 | "metadata": {},
338 | "outputs": [
339 | {
340 | "data": {
341 | "text/plain": [
342 | "array([1, 1, 1, 2, 2, 2, 3, 3, 3])"
343 | ]
344 | },
345 | "execution_count": 196,
346 | "metadata": {},
347 | "output_type": "execute_result"
348 | }
349 | ],
350 | "source": [
351 | "#repeat elements using repeat\n",
352 | "np.repeat([1,2,3],3)"
353 | ]
354 | },
355 | {
356 | "cell_type": "code",
357 | "execution_count": 23,
358 | "metadata": {},
359 | "outputs": [
360 | {
361 | "data": {
362 | "text/plain": [
363 | "array([[ 0.61243315, 0.22618119, 0.67464992],\n",
364 | " [ 0.0344245 , 0.2948191 , 0.52865199]])"
365 | ]
366 | },
367 | "execution_count": 23,
368 | "metadata": {},
369 | "output_type": "execute_result"
370 | }
371 | ],
372 | "source": [
373 | "g = np.random.rand(2,3) #this generates a random array\n",
374 | "g"
375 | ]
376 | },
377 | {
378 | "cell_type": "code",
379 | "execution_count": 221,
380 | "metadata": {},
381 | "outputs": [
382 | {
383 | "data": {
384 | "text/plain": [
385 | "array([[ 1. , 1. , 1. ],\n",
386 | " [ 1. , 1. , 1. ],\n",
387 | " [ 0.50725359, 0.28610842, 0.98268379],\n",
388 | " [ 0.52425524, 0.23682556, 0.21598467]])"
389 | ]
390 | },
391 | "execution_count": 221,
392 | "metadata": {},
393 | "output_type": "execute_result"
394 | }
395 | ],
396 | "source": [
397 | "#stack the above two arrays vertically\n",
398 | "i = np.vstack([h,g])\n",
399 | "i"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 222,
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "data": {
409 | "text/plain": [
410 | "(4, 3)"
411 | ]
412 | },
413 | "execution_count": 222,
414 | "metadata": {},
415 | "output_type": "execute_result"
416 | }
417 | ],
418 | "source": [
419 | "i.shape"
420 | ]
421 | },
422 | {
423 | "cell_type": "code",
424 | "execution_count": 224,
425 | "metadata": {},
426 | "outputs": [
427 | {
428 | "data": {
429 | "text/plain": [
430 | "array([[ 1. , 1. , 1. , 0.50725359, 0.28610842,\n",
431 | " 0.98268379],\n",
432 | " [ 1. , 1. , 1. , 0.52425524, 0.23682556,\n",
433 | " 0.21598467]])"
434 | ]
435 | },
436 | "execution_count": 224,
437 | "metadata": {},
438 | "output_type": "execute_result"
439 | }
440 | ],
441 | "source": [
442 | "#Now, stack them horizontally\n",
443 | "j = np.hstack([h,g])\n",
444 | "j"
445 | ]
446 | },
447 | {
448 | "cell_type": "code",
449 | "execution_count": 225,
450 | "metadata": {},
451 | "outputs": [
452 | {
453 | "data": {
454 | "text/plain": [
455 | "(2, 6)"
456 | ]
457 | },
458 | "execution_count": 225,
459 | "metadata": {},
460 | "output_type": "execute_result"
461 | }
462 | ],
463 | "source": [
464 | "j.shape"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 24,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "name": "stdout",
474 | "output_type": "stream",
475 | "text": [
476 | "[[ 0.02430146 0.14448542]\n",
477 | " [ 0.54428337 0.40332494]]\n",
478 | "[[ 0.77574886 0.08747577]\n",
479 | " [ 0.51484157 0.92319888]]\n"
480 | ]
481 | }
482 | ],
483 | "source": [
484 | "#Array operations\n",
485 | "k = np.random.rand(2,2)\n",
486 | "l = np.random.rand(2,2)\n",
487 | "print(k)\n",
488 | "print(l)"
489 | ]
490 | },
491 | {
492 | "cell_type": "code",
493 | "execution_count": 25,
494 | "metadata": {},
495 | "outputs": [
496 | {
497 | "data": {
498 | "text/plain": [
499 | "array([[ 0.80005032, 0.23196118],\n",
500 | " [ 1.05912494, 1.32652381]])"
501 | ]
502 | },
503 | "execution_count": 25,
504 | "metadata": {},
505 | "output_type": "execute_result"
506 | }
507 | ],
508 | "source": [
509 | "#element wise addition\n",
510 | "m = k + l\n",
511 | "m"
512 | ]
513 | },
514 | {
515 | "cell_type": "code",
516 | "execution_count": 26,
517 | "metadata": {},
518 | "outputs": [
519 | {
520 | "data": {
521 | "text/plain": [
522 | "array([[-0.75144739, 0.05700965],\n",
523 | " [ 0.02944179, -0.51987394]])"
524 | ]
525 | },
526 | "execution_count": 26,
527 | "metadata": {},
528 | "output_type": "execute_result"
529 | }
530 | ],
531 | "source": [
532 | "#element wise subtraction\n",
533 | "n = k-l\n",
534 | "n"
535 | ]
536 | },
537 | {
538 | "cell_type": "code",
539 | "execution_count": 234,
540 | "metadata": {},
541 | "outputs": [
542 | {
543 | "data": {
544 | "text/plain": [
545 | "array([[ 0.38359018, 0.09226875],\n",
546 | " [ 0.39123512, 0.07776932]])"
547 | ]
548 | },
549 | "execution_count": 234,
550 | "metadata": {},
551 | "output_type": "execute_result"
552 | }
553 | ],
554 | "source": [
555 | "#element wise multiplication\n",
556 | "o = k*l\n",
557 | "o"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": 27,
563 | "metadata": {},
564 | "outputs": [
565 | {
566 | "data": {
567 | "text/plain": [
568 | "array([[ 0.00059056, 0.02087604],\n",
569 | " [ 0.29624438, 0.162671 ]])"
570 | ]
571 | },
572 | "execution_count": 27,
573 | "metadata": {},
574 | "output_type": "execute_result"
575 | }
576 | ],
577 | "source": [
578 | "#element wise power\n",
579 | "p = k**2\n",
580 | "p"
581 | ]
582 | },
583 | {
584 | "cell_type": "code",
585 | "execution_count": 28,
586 | "metadata": {},
587 | "outputs": [
588 | {
589 | "data": {
590 | "text/plain": [
591 | "array([[ 0.09323893, 0.13551456],\n",
592 | " [ 0.62987564, 0.41996073]])"
593 | ]
594 | },
595 | "execution_count": 28,
596 | "metadata": {},
597 | "output_type": "execute_result"
598 | }
599 | ],
600 | "source": [
601 | "#dot product\n",
602 | "q = k.dot(l)\n",
603 | "q"
604 | ]
605 | },
606 | {
607 | "cell_type": "code",
608 | "execution_count": 241,
609 | "metadata": {},
610 | "outputs": [
611 | {
612 | "data": {
613 | "text/plain": [
614 | "array([[1, 2],\n",
615 | " [3, 4]])"
616 | ]
617 | },
618 | "execution_count": 241,
619 | "metadata": {},
620 | "output_type": "execute_result"
621 | }
622 | ],
623 | "source": [
624 | "#transpose arrays\n",
625 | "a = np.array([[1,2],[3,4]])\n",
626 | "a"
627 | ]
628 | },
629 | {
630 | "cell_type": "code",
631 | "execution_count": 242,
632 | "metadata": {},
633 | "outputs": [
634 | {
635 | "data": {
636 | "text/plain": [
637 | "array([[1, 3],\n",
638 | " [2, 4]])"
639 | ]
640 | },
641 | "execution_count": 242,
642 | "metadata": {},
643 | "output_type": "execute_result"
644 | }
645 | ],
646 | "source": [
647 | "a.T #transpose"
648 | ]
649 | },
650 | {
651 | "cell_type": "code",
652 | "execution_count": 243,
653 | "metadata": {},
654 | "outputs": [
655 | {
656 | "data": {
657 | "text/plain": [
658 | "dtype('int32')"
659 | ]
660 | },
661 | "execution_count": 243,
662 | "metadata": {},
663 | "output_type": "execute_result"
664 | }
665 | ],
666 | "source": [
667 | "#check datatype of elements in array\n",
668 | "a.dtype"
669 | ]
670 | },
671 | {
672 | "cell_type": "code",
673 | "execution_count": 245,
674 | "metadata": {},
675 | "outputs": [
676 | {
677 | "data": {
678 | "text/plain": [
679 | "dtype('float32')"
680 | ]
681 | },
682 | "execution_count": 245,
683 | "metadata": {},
684 | "output_type": "execute_result"
685 | }
686 | ],
687 | "source": [
688 | "#change type using astype\n",
689 | "b = a.astype('f')\n",
690 | "b.dtype"
691 | ]
692 | },
693 | {
694 | "cell_type": "code",
695 | "execution_count": 246,
696 | "metadata": {},
697 | "outputs": [],
698 | "source": [
699 | "#Math functions in numpy\n",
700 | "c = np.array([1,2,3,4,5])"
701 | ]
702 | },
703 | {
704 | "cell_type": "code",
705 | "execution_count": 248,
706 | "metadata": {},
707 | "outputs": [
708 | {
709 | "data": {
710 | "text/plain": [
711 | "15"
712 | ]
713 | },
714 | "execution_count": 248,
715 | "metadata": {},
716 | "output_type": "execute_result"
717 | }
718 | ],
719 | "source": [
720 | "c.sum()"
721 | ]
722 | },
723 | {
724 | "cell_type": "code",
725 | "execution_count": 249,
726 | "metadata": {},
727 | "outputs": [
728 | {
729 | "data": {
730 | "text/plain": [
731 | "5"
732 | ]
733 | },
734 | "execution_count": 249,
735 | "metadata": {},
736 | "output_type": "execute_result"
737 | }
738 | ],
739 | "source": [
740 | "c.max()"
741 | ]
742 | },
743 | {
744 | "cell_type": "code",
745 | "execution_count": 250,
746 | "metadata": {},
747 | "outputs": [
748 | {
749 | "data": {
750 | "text/plain": [
751 | "3.0"
752 | ]
753 | },
754 | "execution_count": 250,
755 | "metadata": {},
756 | "output_type": "execute_result"
757 | }
758 | ],
759 | "source": [
760 | "c.mean()"
761 | ]
762 | },
763 | {
764 | "cell_type": "code",
765 | "execution_count": 252,
766 | "metadata": {},
767 | "outputs": [
768 | {
769 | "data": {
770 | "text/plain": [
771 | "4"
772 | ]
773 | },
774 | "execution_count": 252,
775 | "metadata": {},
776 | "output_type": "execute_result"
777 | }
778 | ],
779 | "source": [
780 | "#return index of maximum value\n",
781 | "c.argmax()"
782 | ]
783 | },
784 | {
785 | "cell_type": "code",
786 | "execution_count": 253,
787 | "metadata": {},
788 | "outputs": [
789 | {
790 | "data": {
791 | "text/plain": [
792 | "0"
793 | ]
794 | },
795 | "execution_count": 253,
796 | "metadata": {},
797 | "output_type": "execute_result"
798 | }
799 | ],
800 | "source": [
801 | "#return index of minimum value\n",
802 | "c.argmin()"
803 | ]
804 | },
805 | {
806 | "cell_type": "code",
807 | "execution_count": 30,
808 | "metadata": {},
809 | "outputs": [
810 | {
811 | "data": {
812 | "text/plain": [
813 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81], dtype=int32)"
814 | ]
815 | },
816 | "execution_count": 30,
817 | "metadata": {},
818 | "output_type": "execute_result"
819 | }
820 | ],
821 | "source": [
822 | "d = np.arange(10)**2\n",
823 | "d"
824 | ]
825 | },
826 | {
827 | "cell_type": "code",
828 | "execution_count": 31,
829 | "metadata": {},
830 | "outputs": [
831 | {
832 | "data": {
833 | "text/plain": [
834 | "4"
835 | ]
836 | },
837 | "execution_count": 31,
838 | "metadata": {},
839 | "output_type": "execute_result"
840 | }
841 | ],
842 | "source": [
843 | "#use index for accessing values\n",
844 | "d[2]"
845 | ]
846 | },
847 | {
848 | "cell_type": "code",
849 | "execution_count": 32,
850 | "metadata": {},
851 | "outputs": [
852 | {
853 | "data": {
854 | "text/plain": [
855 | "0"
856 | ]
857 | },
858 | "execution_count": 32,
859 | "metadata": {},
860 | "output_type": "execute_result"
861 | }
862 | ],
863 | "source": [
864 | "d[0]"
865 | ]
866 | },
867 | {
868 | "cell_type": "code",
869 | "execution_count": 33,
870 | "metadata": {},
871 | "outputs": [
872 | {
873 | "data": {
874 | "text/plain": [
875 | "array([ 1, 4, 9, 16], dtype=int32)"
876 | ]
877 | },
878 | "execution_count": 33,
879 | "metadata": {},
880 | "output_type": "execute_result"
881 | }
882 | ],
883 | "source": [
884 | "d[1:5]"
885 | ]
886 | },
887 | {
888 | "cell_type": "code",
889 | "execution_count": 34,
890 | "metadata": {},
891 | "outputs": [
892 | {
893 | "data": {
894 | "text/plain": [
895 | "array([81], dtype=int32)"
896 | ]
897 | },
898 | "execution_count": 34,
899 | "metadata": {},
900 | "output_type": "execute_result"
901 | }
902 | ],
903 | "source": [
904 | "#use negatives to count from back\n",
905 | "d[-1:]"
906 | ]
907 | },
908 | {
909 | "cell_type": "code",
910 | "execution_count": 35,
911 | "metadata": {},
912 | "outputs": [
913 | {
914 | "data": {
915 | "text/plain": [
916 | "array([ 1, 9, 25, 49, 81], dtype=int32)"
917 | ]
918 | },
919 | "execution_count": 35,
920 | "metadata": {},
921 | "output_type": "execute_result"
922 | }
923 | ],
924 | "source": [
925 | "#use two :: to include step size\n",
926 | "d[1:10:2] #d[start:stop:stepsize]"
927 | ]
928 | },
929 | {
930 | "cell_type": "code",
931 | "execution_count": 46,
932 | "metadata": {},
933 | "outputs": [
934 | {
935 | "data": {
936 | "text/plain": [
937 | "array([[ 0, 1, 2, 3, 4, 5],\n",
938 | " [ 6, 7, 8, 9, 10, 11],\n",
939 | " [12, 13, 14, 15, 16, 17],\n",
940 | " [18, 19, 20, 21, 22, 23],\n",
941 | " [24, 25, 26, 27, 28, 29],\n",
942 | " [30, 31, 32, 33, 34, 35]])"
943 | ]
944 | },
945 | "execution_count": 46,
946 | "metadata": {},
947 | "output_type": "execute_result"
948 | }
949 | ],
950 | "source": [
951 | "#multidimensional arrays\n",
952 | "e = np.arange(36)\n",
953 | "e.resize(6,6)\n",
954 | "e"
955 | ]
956 | },
957 | {
958 | "cell_type": "code",
959 | "execution_count": 47,
960 | "metadata": {},
961 | "outputs": [
962 | {
963 | "data": {
964 | "text/plain": [
965 | "8"
966 | ]
967 | },
968 | "execution_count": 47,
969 | "metadata": {},
970 | "output_type": "execute_result"
971 | }
972 | ],
973 | "source": [
974 | "#Access 2nd row and 3rd column\n",
975 | "e[1,2]"
976 | ]
977 | },
978 | {
979 | "cell_type": "code",
980 | "execution_count": 48,
981 | "metadata": {},
982 | "outputs": [
983 | {
984 | "data": {
985 | "text/plain": [
986 | "array([ 8, 9, 10, 11])"
987 | ]
988 | },
989 | "execution_count": 48,
990 | "metadata": {},
991 | "output_type": "execute_result"
992 | }
993 | ],
994 | "source": [
995 | "#use : to select range of columns\n",
996 | "e[1, 2:6]"
997 | ]
998 | },
999 | {
1000 | "cell_type": "code",
1001 | "execution_count": 49,
1002 | "metadata": {},
1003 | "outputs": [
1004 | {
1005 | "data": {
1006 | "text/plain": [
1007 | "array([[ 0, 1, 2, 3, 4],\n",
1008 | " [ 6, 7, 8, 9, 10]])"
1009 | ]
1010 | },
1011 | "execution_count": 49,
1012 | "metadata": {},
1013 | "output_type": "execute_result"
1014 | }
1015 | ],
1016 | "source": [
1017 | "#Select all rows till 2nd row and all columns except last column\n",
1018 | "e[:2,:-1]"
1019 | ]
1020 | },
1021 | {
1022 | "cell_type": "code",
1023 | "execution_count": 50,
1024 | "metadata": {},
1025 | "outputs": [
1026 | {
1027 | "data": {
1028 | "text/plain": [
1029 | "array([30, 32, 34])"
1030 | ]
1031 | },
1032 | "execution_count": 50,
1033 | "metadata": {},
1034 | "output_type": "execute_result"
1035 | }
1036 | ],
1037 | "source": [
1038 | "#select last row and alternate columns\n",
1039 | "e[-1,::2]"
1040 | ]
1041 | },
1042 | {
1043 | "cell_type": "code",
1044 | "execution_count": 51,
1045 | "metadata": {},
1046 | "outputs": [
1047 | {
1048 | "data": {
1049 | "text/plain": [
1050 | "array([32, 34])"
1051 | ]
1052 | },
1053 | "execution_count": 51,
1054 | "metadata": {},
1055 | "output_type": "execute_result"
1056 | }
1057 | ],
1058 | "source": [
1059 | "#select last row and alternate columns from 3rd column\n",
1060 | "e[-1,2::2]"
1061 | ]
1062 | },
1063 | {
1064 | "cell_type": "code",
1065 | "execution_count": 52,
1066 | "metadata": {},
1067 | "outputs": [
1068 | {
1069 | "data": {
1070 | "text/plain": [
1071 | "array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])"
1072 | ]
1073 | },
1074 | "execution_count": 52,
1075 | "metadata": {},
1076 | "output_type": "execute_result"
1077 | }
1078 | ],
1079 | "source": [
1080 | "#select values from array greater than 20\n",
1081 | "e[e>20]"
1082 | ]
1083 | },
1084 | {
1085 | "cell_type": "code",
1086 | "execution_count": 53,
1087 | "metadata": {},
1088 | "outputs": [
1089 | {
1090 | "data": {
1091 | "text/plain": [
1092 | "array([[ 0, 1, 2, 3, 4, 5],\n",
1093 | " [ 6, 7, 8, 9, 10, 11],\n",
1094 | " [12, 13, 14, 15, 16, 17],\n",
1095 | " [18, 19, 20, 20, 20, 20],\n",
1096 | " [20, 20, 20, 20, 20, 20],\n",
1097 | " [20, 20, 20, 20, 20, 20]])"
1098 | ]
1099 | },
1100 | "execution_count": 53,
1101 | "metadata": {},
1102 | "output_type": "execute_result"
1103 | }
1104 | ],
1105 | "source": [
1106 | "#Assign element value as 20 if value is greater than 20\n",
1107 | "e[e>20] = 20\n",
1108 | "e"
1109 | ]
1110 | },
1111 | {
1112 | "cell_type": "code",
1113 | "execution_count": 45,
1114 | "metadata": {},
1115 | "outputs": [
1116 | {
1117 | "data": {
1118 | "text/plain": [
1119 | "array([[ 0, 1, 2],\n",
1120 | " [ 6, 7, 8],\n",
1121 | " [12, 13, 14]])"
1122 | ]
1123 | },
1124 | "execution_count": 45,
1125 | "metadata": {},
1126 | "output_type": "execute_result"
1127 | }
1128 | ],
1129 | "source": [
1130 | "#BE CAREFUL WHILE COPYING ARRAYS\n",
1131 | "f = e[:3,:3]\n",
1132 | "f"
1133 | ]
1134 | },
1135 | {
1136 | "cell_type": "code",
1137 | "execution_count": 309,
1138 | "metadata": {},
1139 | "outputs": [
1140 | {
1141 | "data": {
1142 | "text/plain": [
1143 | "array([[0, 0, 0],\n",
1144 | " [0, 0, 0],\n",
1145 | " [0, 0, 0]])"
1146 | ]
1147 | },
1148 | "execution_count": 309,
1149 | "metadata": {},
1150 | "output_type": "execute_result"
1151 | }
1152 | ],
1153 | "source": [
1154 | "f[:] = 0\n",
1155 | "f"
1156 | ]
1157 | },
1158 | {
1159 | "cell_type": "code",
1160 | "execution_count": 310,
1161 | "metadata": {},
1162 | "outputs": [
1163 | {
1164 | "data": {
1165 | "text/plain": [
1166 | "array([[ 0, 0, 0, 3, 4, 5],\n",
1167 | " [ 0, 0, 0, 9, 10, 11],\n",
1168 | " [ 0, 0, 0, 15, 16, 17],\n",
1169 | " [18, 19, 20, 20, 20, 20],\n",
1170 | " [20, 20, 20, 20, 20, 20],\n",
1171 | " [20, 20, 20, 20, 20, 20]])"
1172 | ]
1173 | },
1174 | "execution_count": 310,
1175 | "metadata": {},
1176 | "output_type": "execute_result"
1177 | }
1178 | ],
1179 | "source": [
1180 | "e #e also got changed"
1181 | ]
1182 | },
1183 | {
1184 | "cell_type": "code",
1185 | "execution_count": 54,
1186 | "metadata": {},
1187 | "outputs": [
1188 | {
1189 | "data": {
1190 | "text/plain": [
1191 | "array([[ 0, 1, 2, 3, 4, 5],\n",
1192 | " [ 6, 7, 8, 9, 10, 11],\n",
1193 | " [12, 13, 14, 15, 16, 17],\n",
1194 | " [18, 19, 20, 20, 20, 20],\n",
1195 | " [20, 20, 20, 20, 20, 20],\n",
1196 | " [20, 20, 20, 20, 20, 20]])"
1197 | ]
1198 | },
1199 | "execution_count": 54,
1200 | "metadata": {},
1201 | "output_type": "execute_result"
1202 | }
1203 | ],
1204 | "source": [
1205 | "#copy using copy function\n",
1206 | "f = e.copy()\n",
1207 | "f"
1208 | ]
1209 | },
1210 | {
1211 | "cell_type": "code",
1212 | "execution_count": 315,
1213 | "metadata": {},
1214 | "outputs": [
1215 | {
1216 | "data": {
1217 | "text/plain": [
1218 | "array([[ 0, 0, 0, 3, 4, 5],\n",
1219 | " [ 0, 0, 0, 9, 10, 11],\n",
1220 | " [ 0, 0, 0, 15, 16, 17],\n",
1221 | " [18, 19, 20, 0, 0, 0],\n",
1222 | " [20, 20, 20, 0, 0, 0],\n",
1223 | " [20, 20, 20, 0, 0, 0]])"
1224 | ]
1225 | },
1226 | "execution_count": 315,
1227 | "metadata": {},
1228 | "output_type": "execute_result"
1229 | }
1230 | ],
1231 | "source": [
1232 | "f[3:,3:] = 0\n",
1233 | "f"
1234 | ]
1235 | },
1236 | {
1237 | "cell_type": "code",
1238 | "execution_count": 316,
1239 | "metadata": {},
1240 | "outputs": [
1241 | {
1242 | "data": {
1243 | "text/plain": [
1244 | "array([[ 0, 0, 0, 3, 4, 5],\n",
1245 | " [ 0, 0, 0, 9, 10, 11],\n",
1246 | " [ 0, 0, 0, 15, 16, 17],\n",
1247 | " [18, 19, 20, 20, 20, 20],\n",
1248 | " [20, 20, 20, 20, 20, 20],\n",
1249 | " [20, 20, 20, 20, 20, 20]])"
1250 | ]
1251 | },
1252 | "execution_count": 316,
1253 | "metadata": {},
1254 | "output_type": "execute_result"
1255 | }
1256 | ],
1257 | "source": [
1258 | "e"
1259 | ]
1260 | },
1261 | {
1262 | "cell_type": "code",
1263 | "execution_count": 329,
1264 | "metadata": {},
1265 | "outputs": [
1266 | {
1267 | "data": {
1268 | "text/plain": [
1269 | "array([[9, 7, 1, 4],\n",
1270 | " [1, 4, 3, 6],\n",
1271 | " [2, 5, 5, 1],\n",
1272 | " [2, 2, 9, 9]])"
1273 | ]
1274 | },
1275 | "execution_count": 329,
1276 | "metadata": {},
1277 | "output_type": "execute_result"
1278 | }
1279 | ],
1280 | "source": [
1281 | "#iterating over arrays\n",
1282 | "g = np.random.randint(1,10,(4,4))\n",
1283 | "g"
1284 | ]
1285 | },
1286 | {
1287 | "cell_type": "code",
1288 | "execution_count": 330,
1289 | "metadata": {},
1290 | "outputs": [
1291 | {
1292 | "name": "stdout",
1293 | "output_type": "stream",
1294 | "text": [
1295 | "[9 7 1 4]\n",
1296 | "[1 4 3 6]\n",
1297 | "[2 5 5 1]\n",
1298 | "[2 2 9 9]\n"
1299 | ]
1300 | }
1301 | ],
1302 | "source": [
1303 | "#iterate over row\n",
1304 | "for row in g:\n",
1305 | " print(row)"
1306 | ]
1307 | },
1308 | {
1309 | "cell_type": "code",
1310 | "execution_count": 332,
1311 | "metadata": {},
1312 | "outputs": [
1313 | {
1314 | "name": "stdout",
1315 | "output_type": "stream",
1316 | "text": [
1317 | "[9 7 1 4]\n",
1318 | "[1 4 3 6]\n",
1319 | "[2 5 5 1]\n",
1320 | "[2 2 9 9]\n"
1321 | ]
1322 | }
1323 | ],
1324 | "source": [
1325 | "#iterate by index\n",
1326 | "for i in range(len(g)):\n",
1327 | " print(g[i])"
1328 | ]
1329 | }
1330 | ],
1331 | "metadata": {
1332 | "kernelspec": {
1333 | "display_name": "Python 3",
1334 | "language": "python",
1335 | "name": "python3"
1336 | },
1337 | "language_info": {
1338 | "codemirror_mode": {
1339 | "name": "ipython",
1340 | "version": 3
1341 | },
1342 | "file_extension": ".py",
1343 | "mimetype": "text/x-python",
1344 | "name": "python",
1345 | "nbconvert_exporter": "python",
1346 | "pygments_lexer": "ipython3",
1347 | "version": "3.6.5"
1348 | }
1349 | },
1350 | "nbformat": 4,
1351 | "nbformat_minor": 2
1352 | }
1353 |
--------------------------------------------------------------------------------
/Grid Search - Breast Cancer.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Data import and pre-processing"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "#import libraries\n",
17 | "import warnings\n",
18 | "warnings.filterwarnings('ignore')\n",
19 | "import pandas as pd\n",
20 | "import numpy as np\n",
21 | "import matplotlib.pyplot as plt\n",
22 | "import os\n",
23 | "os.chdir('C:\\\\Users\\\\rohan\\\\Documents\\\\Analytics\\\\Data')"
24 | ]
25 | },
26 | {
27 | "cell_type": "code",
28 | "execution_count": 2,
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "#import data\n",
33 | "data = pd.read_csv('breast-cancer-wisconsin.csv',header=None)\n",
34 | "\n",
35 | "#set column names\n",
36 | "data.columns = ['Sample Code Number','Clump Thickness','Uniformity of Cell Size',\n",
37 | " 'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',\n",
38 | " 'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 4,
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/html": [
49 | "
\n",
50 | "\n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " | \n",
67 | " Sample Code Number | \n",
68 | " Clump Thickness | \n",
69 | " Uniformity of Cell Size | \n",
70 | " Uniformity of Cell Shape | \n",
71 | " Marginal Adhesion | \n",
72 | " Single Epithelial Cell Size | \n",
73 | " Bare Nuclei | \n",
74 | " Bland Chromatin | \n",
75 | " Normal Nucleoli | \n",
76 | " Mitoses | \n",
77 | " Class | \n",
78 | "
\n",
79 | " \n",
80 | " \n",
81 | " \n",
82 | " | 0 | \n",
83 | " 1000025 | \n",
84 | " 5 | \n",
85 | " 1 | \n",
86 | " 1 | \n",
87 | " 1 | \n",
88 | " 2 | \n",
89 | " 1 | \n",
90 | " 3 | \n",
91 | " 1 | \n",
92 | " 1 | \n",
93 | " 2 | \n",
94 | "
\n",
95 | " \n",
96 | " | 1 | \n",
97 | " 1002945 | \n",
98 | " 5 | \n",
99 | " 4 | \n",
100 | " 4 | \n",
101 | " 5 | \n",
102 | " 7 | \n",
103 | " 10 | \n",
104 | " 3 | \n",
105 | " 2 | \n",
106 | " 1 | \n",
107 | " 2 | \n",
108 | "
\n",
109 | " \n",
110 | " | 2 | \n",
111 | " 1015425 | \n",
112 | " 3 | \n",
113 | " 1 | \n",
114 | " 1 | \n",
115 | " 1 | \n",
116 | " 2 | \n",
117 | " 2 | \n",
118 | " 3 | \n",
119 | " 1 | \n",
120 | " 1 | \n",
121 | " 2 | \n",
122 | "
\n",
123 | " \n",
124 | " | 3 | \n",
125 | " 1016277 | \n",
126 | " 6 | \n",
127 | " 8 | \n",
128 | " 8 | \n",
129 | " 1 | \n",
130 | " 3 | \n",
131 | " 4 | \n",
132 | " 3 | \n",
133 | " 7 | \n",
134 | " 1 | \n",
135 | " 2 | \n",
136 | "
\n",
137 | " \n",
138 | " | 4 | \n",
139 | " 1017023 | \n",
140 | " 4 | \n",
141 | " 1 | \n",
142 | " 1 | \n",
143 | " 3 | \n",
144 | " 2 | \n",
145 | " 1 | \n",
146 | " 3 | \n",
147 | " 1 | \n",
148 | " 1 | \n",
149 | " 2 | \n",
150 | "
\n",
151 | " \n",
152 | " | 5 | \n",
153 | " 1017122 | \n",
154 | " 8 | \n",
155 | " 10 | \n",
156 | " 10 | \n",
157 | " 8 | \n",
158 | " 7 | \n",
159 | " 10 | \n",
160 | " 9 | \n",
161 | " 7 | \n",
162 | " 1 | \n",
163 | " 4 | \n",
164 | "
\n",
165 | " \n",
166 | " | 6 | \n",
167 | " 1018099 | \n",
168 | " 1 | \n",
169 | " 1 | \n",
170 | " 1 | \n",
171 | " 1 | \n",
172 | " 2 | \n",
173 | " 10 | \n",
174 | " 3 | \n",
175 | " 1 | \n",
176 | " 1 | \n",
177 | " 2 | \n",
178 | "
\n",
179 | " \n",
180 | " | 7 | \n",
181 | " 1018561 | \n",
182 | " 2 | \n",
183 | " 1 | \n",
184 | " 2 | \n",
185 | " 1 | \n",
186 | " 2 | \n",
187 | " 1 | \n",
188 | " 3 | \n",
189 | " 1 | \n",
190 | " 1 | \n",
191 | " 2 | \n",
192 | "
\n",
193 | " \n",
194 | " | 8 | \n",
195 | " 1033078 | \n",
196 | " 2 | \n",
197 | " 1 | \n",
198 | " 1 | \n",
199 | " 1 | \n",
200 | " 2 | \n",
201 | " 1 | \n",
202 | " 1 | \n",
203 | " 1 | \n",
204 | " 5 | \n",
205 | " 2 | \n",
206 | "
\n",
207 | " \n",
208 | " | 9 | \n",
209 | " 1033078 | \n",
210 | " 4 | \n",
211 | " 2 | \n",
212 | " 1 | \n",
213 | " 1 | \n",
214 | " 2 | \n",
215 | " 1 | \n",
216 | " 2 | \n",
217 | " 1 | \n",
218 | " 1 | \n",
219 | " 2 | \n",
220 | "
\n",
221 | " \n",
222 | "
\n",
223 | "
"
224 | ],
225 | "text/plain": [
226 | " Sample Code Number Clump Thickness Uniformity of Cell Size \\\n",
227 | "0 1000025 5 1 \n",
228 | "1 1002945 5 4 \n",
229 | "2 1015425 3 1 \n",
230 | "3 1016277 6 8 \n",
231 | "4 1017023 4 1 \n",
232 | "5 1017122 8 10 \n",
233 | "6 1018099 1 1 \n",
234 | "7 1018561 2 1 \n",
235 | "8 1033078 2 1 \n",
236 | "9 1033078 4 2 \n",
237 | "\n",
238 | " Uniformity of Cell Shape Marginal Adhesion Single Epithelial Cell Size \\\n",
239 | "0 1 1 2 \n",
240 | "1 4 5 7 \n",
241 | "2 1 1 2 \n",
242 | "3 8 1 3 \n",
243 | "4 1 3 2 \n",
244 | "5 10 8 7 \n",
245 | "6 1 1 2 \n",
246 | "7 2 1 2 \n",
247 | "8 1 1 2 \n",
248 | "9 1 1 2 \n",
249 | "\n",
250 | " Bare Nuclei Bland Chromatin Normal Nucleoli Mitoses Class \n",
251 | "0 1 3 1 1 2 \n",
252 | "1 10 3 2 1 2 \n",
253 | "2 2 3 1 1 2 \n",
254 | "3 4 3 7 1 2 \n",
255 | "4 1 3 1 1 2 \n",
256 | "5 10 9 7 1 4 \n",
257 | "6 10 3 1 1 2 \n",
258 | "7 1 3 1 1 2 \n",
259 | "8 1 1 1 5 2 \n",
260 | "9 1 2 1 1 2 "
261 | ]
262 | },
263 | "execution_count": 4,
264 | "metadata": {},
265 | "output_type": "execute_result"
266 | }
267 | ],
268 | "source": [
269 | "#view top rows\n",
270 | "data.head(10)"
271 | ]
272 | },
273 | {
274 | "cell_type": "code",
275 | "execution_count": 5,
276 | "metadata": {},
277 | "outputs": [
278 | {
279 | "data": {
280 | "text/html": [
281 | "\n",
282 | "\n",
295 | "
\n",
296 | " \n",
297 | " \n",
298 | " | \n",
299 | " Clump Thickness | \n",
300 | " Uniformity of Cell Size | \n",
301 | " Uniformity of Cell Shape | \n",
302 | " Marginal Adhesion | \n",
303 | " Single Epithelial Cell Size | \n",
304 | " Bare Nuclei | \n",
305 | " Bland Chromatin | \n",
306 | " Normal Nucleoli | \n",
307 | " Mitoses | \n",
308 | " Class | \n",
309 | "
\n",
310 | " \n",
311 | " \n",
312 | " \n",
313 | " | 0 | \n",
314 | " 5 | \n",
315 | " 1 | \n",
316 | " 1 | \n",
317 | " 1 | \n",
318 | " 2 | \n",
319 | " 1 | \n",
320 | " 3 | \n",
321 | " 1 | \n",
322 | " 1 | \n",
323 | " 0 | \n",
324 | "
\n",
325 | " \n",
326 | " | 1 | \n",
327 | " 5 | \n",
328 | " 4 | \n",
329 | " 4 | \n",
330 | " 5 | \n",
331 | " 7 | \n",
332 | " 10 | \n",
333 | " 3 | \n",
334 | " 2 | \n",
335 | " 1 | \n",
336 | " 0 | \n",
337 | "
\n",
338 | " \n",
339 | " | 2 | \n",
340 | " 3 | \n",
341 | " 1 | \n",
342 | " 1 | \n",
343 | " 1 | \n",
344 | " 2 | \n",
345 | " 2 | \n",
346 | " 3 | \n",
347 | " 1 | \n",
348 | " 1 | \n",
349 | " 0 | \n",
350 | "
\n",
351 | " \n",
352 | " | 3 | \n",
353 | " 6 | \n",
354 | " 8 | \n",
355 | " 8 | \n",
356 | " 1 | \n",
357 | " 3 | \n",
358 | " 4 | \n",
359 | " 3 | \n",
360 | " 7 | \n",
361 | " 1 | \n",
362 | " 0 | \n",
363 | "
\n",
364 | " \n",
365 | " | 4 | \n",
366 | " 4 | \n",
367 | " 1 | \n",
368 | " 1 | \n",
369 | " 3 | \n",
370 | " 2 | \n",
371 | " 1 | \n",
372 | " 3 | \n",
373 | " 1 | \n",
374 | " 1 | \n",
375 | " 0 | \n",
376 | "
\n",
377 | " \n",
378 | "
\n",
379 | "
"
380 | ],
381 | "text/plain": [
382 | " Clump Thickness Uniformity of Cell Size Uniformity of Cell Shape \\\n",
383 | "0 5 1 1 \n",
384 | "1 5 4 4 \n",
385 | "2 3 1 1 \n",
386 | "3 6 8 8 \n",
387 | "4 4 1 1 \n",
388 | "\n",
389 | " Marginal Adhesion Single Epithelial Cell Size Bare Nuclei \\\n",
390 | "0 1 2 1 \n",
391 | "1 5 7 10 \n",
392 | "2 1 2 2 \n",
393 | "3 1 3 4 \n",
394 | "4 3 2 1 \n",
395 | "\n",
396 | " Bland Chromatin Normal Nucleoli Mitoses Class \n",
397 | "0 3 1 1 0 \n",
398 | "1 3 2 1 0 \n",
399 | "2 3 1 1 0 \n",
400 | "3 3 7 1 0 \n",
401 | "4 3 1 1 0 "
402 | ]
403 | },
404 | "execution_count": 5,
405 | "metadata": {},
406 | "output_type": "execute_result"
407 | }
408 | ],
409 | "source": [
410 | "#Data cleaning\n",
411 | "data = data.drop(['Sample Code Number'],axis=1) #Drop 1st column\n",
412 | "data = data[data['Bare Nuclei'] != '?'] #Remove rows with missing data\n",
413 | "data['Class'] = np.where(data['Class'] ==2,0,1) #Change the Class representation\n",
414 | "data.head() #View top 10 rows"
415 | ]
416 | },
417 | {
418 | "cell_type": "code",
419 | "execution_count": 6,
420 | "metadata": {},
421 | "outputs": [
422 | {
423 | "data": {
424 | "text/plain": [
425 | "0 444\n",
426 | "1 239\n",
427 | "Name: Class, dtype: int64"
428 | ]
429 | },
430 | "execution_count": 6,
431 | "metadata": {},
432 | "output_type": "execute_result"
433 | }
434 | ],
435 | "source": [
436 | "#Distribution of Class\n",
437 | "data['Class'].value_counts()"
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": 7,
443 | "metadata": {},
444 | "outputs": [],
445 | "source": [
446 | "#Split data into attributes and class\n",
447 | "X = data.drop(['Class'],axis=1)\n",
448 | "y = data['Class']"
449 | ]
450 | },
451 | {
452 | "cell_type": "code",
453 | "execution_count": 8,
454 | "metadata": {},
455 | "outputs": [],
456 | "source": [
457 | "#perform training and test split\n",
458 | "from sklearn.model_selection import train_test_split\n",
459 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)"
460 | ]
461 | },
462 | {
463 | "cell_type": "markdown",
464 | "metadata": {},
465 | "source": [
466 | "# 1. Dummy Classifier"
467 | ]
468 | },
469 | {
470 | "cell_type": "code",
471 | "execution_count": 158,
472 | "metadata": {},
473 | "outputs": [
474 | {
475 | "name": "stdout",
476 | "output_type": "stream",
477 | "text": [
478 | "y actual : \n",
479 | "0 103\n",
480 | "1 68\n",
481 | "Name: Class, dtype: int64\n",
482 | "y predicted : \n",
483 | "0 171\n",
484 | "dtype: int64\n",
485 | "Accuracy Score : 0.6023391812865497\n",
486 | "Precision Score : 0.0\n",
487 | "Recall Score : 0.0\n",
488 | "F1 Score : 0.0\n",
489 | "Confusion Matrix : \n",
490 | "[[103 0]\n",
491 | " [ 68 0]]\n"
492 | ]
493 | }
494 | ],
495 | "source": [
496 | "#Dummy Classifier\n",
497 | "from sklearn.dummy import DummyClassifier\n",
498 | "clf = DummyClassifier(strategy= 'most_frequent').fit(X_train,y_train)\n",
499 | "y_pred = clf.predict(X_test)\n",
500 | "\n",
501 | "#Distribution of y test\n",
502 | "print('y actual : \\n' + str(y_test.value_counts()))\n",
503 | "\n",
504 | "#Distribution of y predicted\n",
505 | "print('y predicted : \\n' + str(pd.Series(y_pred).value_counts()))\n",
506 | "\n",
507 | "# Model Evaluation metrics \n",
508 | "from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n",
509 | "print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))\n",
510 | "print('Precision Score : ' + str(precision_score(y_test,y_pred)))\n",
511 | "print('Recall Score : ' + str(recall_score(y_test,y_pred)))\n",
512 | "print('F1 Score : ' + str(f1_score(y_test,y_pred)))\n",
513 | "\n",
514 | "#Dummy Classifier Confusion matrix\n",
515 | "from sklearn.metrics import confusion_matrix\n",
516 | "print('Confusion Matrix : \\n' + str(confusion_matrix(y_test,y_pred)))"
517 | ]
518 | },
519 | {
520 | "cell_type": "markdown",
521 | "metadata": {},
522 | "source": [
523 | "# 2. Logistic Regression"
524 | ]
525 | },
526 | {
527 | "cell_type": "code",
528 | "execution_count": 159,
529 | "metadata": {},
530 | "outputs": [
531 | {
532 | "name": "stdout",
533 | "output_type": "stream",
534 | "text": [
535 | "Accuracy Score : 0.9473684210526315\n",
536 | "Precision Score : 0.9836065573770492\n",
537 | "Recall Score : 0.8823529411764706\n",
538 | "F1 Score : 0.9302325581395349\n",
539 | "Confusion Matrix : \n",
540 | "[[102 1]\n",
541 | " [ 8 60]]\n"
542 | ]
543 | }
544 | ],
545 | "source": [
546 | "#Logistic regression\n",
547 | "from sklearn.linear_model import LogisticRegression\n",
548 | "\n",
549 | "clf = LogisticRegression().fit(X_train,y_train)\n",
550 | "y_pred = clf.predict(X_test)\n",
551 | "\n",
552 | "# Model Evaluation metrics \n",
553 | "from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n",
554 | "print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))\n",
555 | "print('Precision Score : ' + str(precision_score(y_test,y_pred)))\n",
556 | "print('Recall Score : ' + str(recall_score(y_test,y_pred)))\n",
557 | "print('F1 Score : ' + str(f1_score(y_test,y_pred)))\n",
558 | "\n",
559 | "#Dummy Classifier Confusion matrix\n",
560 | "from sklearn.metrics import confusion_matrix\n",
561 | "print('Confusion Matrix : \\n' + str(confusion_matrix(y_test,y_pred)))"
562 | ]
563 | },
564 | {
565 | "cell_type": "markdown",
566 | "metadata": {},
567 | "source": [
568 | "# 3. Logistic Regression + Grid Search"
569 | ]
570 | },
571 | {
572 | "cell_type": "code",
573 | "execution_count": 161,
574 | "metadata": {},
575 | "outputs": [
576 | {
577 | "name": "stdout",
578 | "output_type": "stream",
579 | "text": [
580 | "LogisticRegression(C=0.009, class_weight=None, dual=False, fit_intercept=True,\n",
581 | " intercept_scaling=1, max_iter=100, multi_class='warn',\n",
582 | " n_jobs=None, penalty='l1', random_state=None, solver='warn',\n",
583 | " tol=0.0001, verbose=0, warm_start=False)\n",
584 | "Accuracy Score : 0.9122807017543859\n",
585 | "Precision Score : 0.8732394366197183\n",
586 | "Recall Score : 0.9117647058823529\n",
587 | "F1 Score : 0.8920863309352517\n",
588 | "Confusion Matrix : \n",
589 | "[[94 9]\n",
590 | " [ 6 62]]\n"
591 | ]
592 | }
593 | ],
594 | "source": [
595 | "from sklearn.model_selection import GridSearchCV\n",
596 | "clf = LogisticRegression()\n",
597 | "grid_values = {'penalty': ['l1', 'l2'],'C':[0.001,.009,0.01,.09,1,5,10,25]}\n",
598 | "grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,scoring = 'recall')\n",
599 | "grid_clf_acc.fit(X_train, y_train)\n",
600 | "\n",
601 | "print(grid_clf_acc.best_estimator_)\n",
602 | "\n",
603 | "y_pred_acc = grid_clf_acc.predict(X_test)\n",
604 | "\n",
605 | "# Model Evaluation metrics \n",
606 | "from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n",
607 | "print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))\n",
608 | "print('Precision Score : ' + str(precision_score(y_test,y_pred_acc)))\n",
609 | "print('Recall Score : ' + str(recall_score(y_test,y_pred_acc)))\n",
610 | "print('F1 Score : ' + str(f1_score(y_test,y_pred_acc)))\n",
611 | "\n",
612 | "#Dummy Classifier Confusion matrix\n",
613 | "from sklearn.metrics import confusion_matrix\n",
614 | "print('Confusion Matrix : \\n' + str(confusion_matrix(y_test,y_pred_acc)))"
615 | ]
616 | },
617 | {
618 | "cell_type": "markdown",
619 | "metadata": {},
620 | "source": [
621 | "## Function for confusion matrix plot"
622 | ]
623 | },
624 | {
625 | "cell_type": "code",
626 | "execution_count": 162,
627 | "metadata": {},
628 | "outputs": [],
629 | "source": [
630 | "#Function to plot intuitive confusion matrix\n",
631 | "import itertools\n",
632 | "def plot_confusion_matrix(cm, classes,\n",
633 | " normalize=False,\n",
634 | " title='Confusion matrix',\n",
635 | " cmap=plt.cm.Blues):\n",
636 | " \"\"\"\n",
637 | " This function prints and plots the confusion matrix.\n",
638 | " Normalization can be applied by setting `normalize=True`.\n",
639 | " \"\"\"\n",
640 | " if normalize:\n",
641 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n",
642 | " print(\"Normalized confusion matrix\")\n",
643 | " else:\n",
644 | " print('Confusion matrix, without normalization')\n",
645 | "\n",
646 | " print(cm)\n",
647 | "\n",
648 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n",
649 | " plt.title(title)\n",
650 | " plt.colorbar()\n",
651 | " tick_marks = np.arange(len(classes))\n",
652 | " plt.xticks(tick_marks, classes, rotation=45)\n",
653 | " plt.yticks(tick_marks, classes)\n",
654 | "\n",
655 | " fmt = '.2f' if normalize else 'd'\n",
656 | " thresh = cm.max() / 2.\n",
657 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n",
658 | " plt.text(j, i, format(cm[i, j], fmt),\n",
659 | " horizontalalignment=\"center\",\n",
660 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n",
661 | "\n",
662 | " plt.ylabel('True label')\n",
663 | " plt.xlabel('Predicted label')\n",
664 | " plt.tight_layout()"
665 | ]
666 | },
667 | {
668 | "cell_type": "code",
669 | "execution_count": 163,
670 | "metadata": {},
671 | "outputs": [],
672 | "source": [
673 | "# Compute confusion matrix\n",
674 | "cnf_matrix = confusion_matrix(y_test, y_pred_acc)\n",
675 | "np.set_printoptions(precision=2)"
676 | ]
677 | },
678 | {
679 | "cell_type": "code",
680 | "execution_count": 164,
681 | "metadata": {},
682 | "outputs": [
683 | {
684 | "name": "stdout",
685 | "output_type": "stream",
686 | "text": [
687 | "Confusion matrix, without normalization\n",
688 | "[[94 9]\n",
689 | " [ 6 62]]\n"
690 | ]
691 | },
692 | {
693 | "data": {
694 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVUAAAEYCAYAAADsymWcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xm8HFWZxvHfc28SQhZIIKwJO2Gf\nCUvYEZFNVBREGQXUoFEUV8SFRWbEGRdQFFAQDKLEURBcEAQRYwQlTATDTlgS9iUhKwFCwpLwzh/n\nXGgu997uTiq3usLzzac+t2vpU293Vb85darqlCICMzMrRlvZAZiZrUycVM3MCuSkamZWICdVM7MC\nOamamRXISdXMrEClJlVJq0r6o6RnJP1mOco5StJfioytLJLeIun+suNolKQNJS2U1L4M7z1f0n+u\niLhamaSTJf207DiWV719VdJFkr7ZmzE1S9Kpkn5ZZJkNJVVJR0qakn88MyVdI2mvAtb/fmAdYM2I\nOHxZC4mIX0XEgQXEs0JJCkmb97RMRNwQEVuuoPUXvpNHxGMRMSgiltZZ99GSJnV676ci4n+aXaek\nRyQtzvvjU/lzDWq2nLJExLcj4uNlx1FL0khJv5Y0R9KzkqZL+pGkEd29Z3n2VUn9JH1f0hN5Oz4s\n6cxl/wSto25SlXQ8cBbwbVIC3BD4MXBIAevfCJgWEUsKKKvyJPUpO4YKeXdEDAK2B3YATloRK1mW\nGnirkNTQnT35P/qbgBnADhGxGrAn8CDQZeWpgH31JGA0sAswGHgbcNtylvkGpfymIqLbAVgdWAgc\n3sMyq5CS7ow8nAWskuftAzwBfAmYDcwEPprnfQN4CXg5r2MscCrwy5qyNwYC6JPHjwYeAp4DHgaO\nqpk+qeZ9ewD/Ap7Jf/eomXc98D/AjbmcvwDDuvlsHfF/tSb+Q4F3AtOA+cDJNcvvAkwGFuRlzwH6\n5Xn/yJ/l+fx5P1BT/gnAU8D/dkzL79ksr2PHPL4+MBfYp6ft1sO2ugj4ZjfzevrONsnxPwf8FTi3\nYzs1so2ArYEXgKX5sy/oKh7Sf9S3A8+SftAHdRPrI8D+NePfBa7utE+eATwGzALOB1atmf/VvH1m\nAB/P8W9eE9N5wJ/yttq/p/KAYcBVeZvPB24A2vK8E4An83dxP7Bfnn4qr9/P3wNMzWVcD2zd6bN+\nGbgzb5tLgf4Nbu9ocLlfAn+ss8w+9LCv5mV2AG7Nn/dS4Nc97G9XAcf1sL71gd8Bc/J+9PlGfmcd\nnxv4DDAdeDhP2xaYkLfRLPLvNm+Ly4Bf5LinAqOX5ff16vrrfJEHAUvIP5hulvlv4J/A2sBawP8B\n/1OzIZbkZfqSktEiYGg3O1fn8Y3zF9QHGEj6sW2Z560HbFvzQ56UX68BPA18OL/viDy+Zp5/PekH\nuwWwah4/rYcdaQnwXzn+T+SNfDHpf9dtScli07z8TsBueb0bA/fW7jjU/Hg7lX866Ye7Km/cUT+R\nyxkAXAucscwbu5uk2sB3NpmUVPqRai7P0kVSbXQbdRUP6YfyDHAA6QhqOLBVN5/jEXJSBUYAdwFn\n18w/C7gyf67BwB+B79Ts00/lbTeAlBw6J9VnSDW1NqB/nfK+Q0qyffPwFkDAlsDjwPo139Nmnfdz\n0n74fP7cfUkJ/wFe+8/4EeBmUpJZI+8Ln2pwe0eDyz0FHF1nmX3oYV/N+8ajwBfz53g/qcLUXVI9\nhfSf1KeBfwNUM68NuIX0u+sHbEr6j/rtTfzOJuTva9W8zWaSKnf98/iuNdviBVJuas/b85/L+huL\nBpLqUcBTdZZ5EHhnzfjbgUdqNsRiapIyqca3W+edq5vxjXn9D3YB8D5qah2df7CkxHBzp/mTO3Ya\nUhI9pWbep4E/97AjLQba8/jgHM+uNcvcAhzazfuPAy7vtLE7J9WXqKl50Cmp5mlXkhLHneSjgGXa\n2N0n1W6/M1JzzxJgQM28X9J9Uq27jbqKB/gJcGaDn+MRUo33ubzuicCQPE+kJLVZzfK781qN5Wfk\nhJjHN+eNSfUXNfPrlfffwBW127Wm3Nmkmm7fTvNOrfn+/hO4rGZeG6l2u0/NZ/1QzfzvAuc3+D1F\ng8stoeaoAPhs3o4LgQsa2VeBvUk1/9rk+H9d7W95XjupNnkj8GJ+75g8b1fgsU7LnwT8vInf2b41\n40cAt3Xz3lOBv9aMbwMsXtbfWETUbVOdBwyr0y6xPul/qA6P5mmvlhGvbzNdBDR9UiEinicdMn8K\nmCnpaklbNRBPR0zDa8afaiKeefHaSZjF+e+smvmLO94vaQtJV+WTJ8+S2qGH9VA2wJyIeKHOMhcA\n2wE/iogXu1ogXwGxMA/X1Cmvs56+s/WB+RGxqGbe410V0sQ26soGpP+gG3VoRAwm/bC34rXveS1S\nDfQWSQskLQD+nKeTP09t/F19ltpp9cr7Hqlm+RdJD0k6ESAiHiD92E8FZueTQLW/iw6v++4j4pW8\n/qb3V0l7dcSY46R2vIeTy/NIRxUdMZwTEUNINfS+Ncv1tK+uDzwZOTNlnfepV0XE0og4NyL2BIYA\n3wJ+Jmlr0rmW9Tt9lpNJ53Qa/Z3VbsN6+1bn77f/8rTF1kuqk0lV40N7WGYG6UvosGGetiyeJ+3A\nHdatnRkR10bEAaQd4D5SsqkXT0dMTy5jTM04jxTXyEiN/SeTajo9iZ5m5rPaZwEXAqdKWqPLQtIV\nEIPy8I4m4+7pO5sJrCGpdrts0F1BPWyjHj8n6UewWTNB5/X9nVS7PCNPmkv6j27biBiSh9UjndSC\n9Hlqz2h39VlqY+2xvIh4LiK+FBGbAu8Gjpe0X553cUTsRfpug3To3NnrvntJyjE1vb9GxKSaGIfk\naUNqhkndvHUicFgjq+hh3kxgeI6/w4YNxr04Is4lNTltQ9oXHu4U++CIeGd+SyO/s9pYl2nfWlY9\nJtWIeIbUrnGupEMlDZDUV9I7JH03L3YJcIqktSQNy8sv63VftwN752sfV6fmjK6kdSS9R9JA0uHC\nQtKJj87+BGyRLwPrI+kDpA111TLG1IzBpDbFhbmGdmyn+bNI7UPNOBu4JdIlOFeT2u+WR7uk/jVD\nP3r4ziLiUWAKKaH3k7Q7KXm8QZ1tNAsYkdfXlQuBj0raT1KbpOFN1HLPAg6QtH2u6V0AnClp7RzX\ncElvz8teltezdf6P4r96KrheeZIOlrR5TibP5s+7VNKWkvaVtAqpYrKYrvfXy4B35c/dl9Tu9yLp\n0Lm3nAq8RdIPJA0HyL/lrZsoYzKpGeHzeR86jNRO3iVJx0naR+la9T6SxpB+P7eR2pCflXRCnt8u\naTtJO+e31/uddXYVsG5e5yqSBkvatYnP1pS6l1RFxA+A40kNy3NIWf+zwB/yIt8k/ejuJLX73Zqn\nNS0iJpDOGt5JaqusTYRtpB1uBukM3ltJ7aGdy5gHHJyXnUdq+D84IuYuS0xN+jJwJKmt7wLSZ6l1\nKjA+H9L8R73CJB1COrHyqTzpeGBHSUctR4wnkn7gHcPfGvjOjiK1I84jbdtLST/8znraRn8jnVl9\nStIbtkVE3Ax8FDiTdKLo77yx9tyliJhDOnvbcSPBCaRD8n/mw8O/kk4cERHXAD8ErsvLTM7v6bJZ\npV55wMg8vjCX9eOIuJ50Muc0Uk33KdKJ3JO7iP1+4EPAj/Ky7yZdLvZSI5+9CBExjXTiZwRwh6Tn\nSG2dM3jtO61Xxkuk2u7RpBrnB4Df9/CWxcD3Sd/NXFL76vsi4qHc3PZu0uVyD+f5PyVdjQT1f2ed\nY3uOdCLw3Xl900mXcK0Qen0TiFl9ki4F7ouIr5cdy/LKbXh3k04A+nppW26+99/qkrSzpM3yYflB\npOtJ/1Dvfa1K0ntzU8ZQUjvnH51QrShOqtaIdUmXoi0kHTofGxGF3/3Siz5Jasp6kNTOWa9Nzqxh\nPvw3MyuQa6pmZgV603TgoT6rhvoNLjsMa8KorRq6zNFayO233TI3Itaqv+Tya19to4gli+svCMTi\nOddGxEErOCTgzZRU+w1mlS3rXsVkLeT6G88uOwRr0pABfbq9i6posWRxw7/pF24/t96djYV50yRV\nM1vZCNR6LZhOqmZWTQLaWq+7WydVM6su1etao/c5qZpZRfnw38ysWC1YU229NG9m1giRaqqNDI0U\nJ31B0t2Spko6Lk9bQ9IEpQchTsi3NvfISdXMKkqpptrIUK8kaTvSo4t2AUYBB0saSerVbWJEjCT1\nO3tivbKcVM2sutraGxvq25r0bKpFuXOdvwPvJXUeND4vM56eO+xPIS3jRzEzK5maOfwfJmlKzXBM\np8LuJnWQv2buvPydpCcwrBMRMwHy37XrReUTVWZWTaKZE1VzI2J0dzMj4l5Jp5OewroQuIP0JIOm\nuaZqZtVV4ImqiLgwInaMiL1JT66YDsyStB5A/ju7XjlOqmZWUU0d/tcv7bVnkG1IejTMJaTHw4/J\ni4whPY68Rz78N7Pqaiv0OtXfSVoTeBn4TEQ8Lek04DJJY4HHgMPrFeKkambVVPC9/xHxli6mzQP2\na6YcJ1UzqyjfpmpmVqwWvE3VSdXMqss1VTOzgjR4C2pvc1I1s+pyJ9VmZkXxiSozs2L58N/MrCAd\n/am2GCdVM6soH/6bmRXLh/9mZgXy2X8zs4LIh/9mZsXy4b+ZWXHkpGpmVoz0NBUnVTOzYigPLab1\nWnnNzBoi2traGhoaKk36oqSpku6WdImk/pI2kXSTpOmSLpXUr145TqpmVlmSGhoaKGc48HlgdERs\nB7QDHwROB86MiJHA08DYemU5qZpZZRWVVLM+wKqS+gADgJnAvsBv8/zxwKH1CnFSNbNqUhMDDJM0\npWY4praoiHgSOIP0cL+ZwDPALcCCiFiSF3sCGF4vLJ+oMrNKEk3VQudGxOhuy5KGAocAmwALgN8A\n7+hi0ai3IidVM6usRk9CNWB/4OGImAMg6ffAHsAQSX1ybXUEMKNuTEVFZGbW2wpsU30M2E3SAKU3\n7AfcA1wHvD8vMwa4ol5BTqpmVk3Ntan2KCJuIp2QuhW4i5QbxwEnAMdLegBYE7iwXlk+/Dezyiry\njqqI+Drw9U6THwJ2aaYcJ1Uzq6QmT1T1GidVM6ssJ1Uzs6II1OakamZWGNdUzcwK5KRqZlYQn6gy\nMyta6+VUX/xfNZ85Yh+m/OZkbvnt1/jskfu8bt5xH96Pxbedw5pDBpYTnNV13rk/ZPfRo9htp3/n\nx+ecXXY41abCe6kqhJNqhWyz2Xp89LA9eMuHv8cuH/gO79h7OzbbcC0ARqwzhH1324rHZs4vOUrr\nzj1T7+YXP7+Qif+YzKSbbuXaa67mwQemlx1WpRXZSXVhMfXq2my5bLXJutx81yMsfuFlli59hRtu\neYBD3jYKgO9++X187ew/EFG3Ex0rybT772P0zrsyYMAA+vTpw5577c1VV/6h7LCqraDbVIvkpFoh\nUx+cwV47bs4aqw9k1f59OWivbRmx7lDe9dZ/Y8bsBdw17cmyQ7QebL3NtvzfjTcwf948Fi1axIRr\nr+GJJ54oO6xKa8XD/0qfqJJ0EHA26dEHP42I00oOaYW6/+FZfP+iCVx13md5fvGL3DntSZYsWcoJ\nY9/OwZ8+p+zwrI4tt9qaLxz/FQ49+CAGDhrIdv82ij592ssOq7LKSJiNqGxNVVI7cC6pI9ltgCMk\nbVNuVCve+D9MZo8jT+eAsWfx9DPP8+iM+Ww0fE1uvvQk7rv6GwxfewiTLz6BddYcXHao1oWPHP0x\n/jH5X1wz4XqGDh3KZpuNLDukSnNNtVi7AA9ExEMAkn5N6rn7nlKjWsHWGjqIOU8vZIN1h3LIvqPY\nZ8z3OfeS61+df9/V32DPo77LvAXPlxekdWvO7NmstfbaPP74Y/zxyj8w4bpJZYdUaa1YU61yUh0O\nPF4z/gSwa+0C+Tk06Vk0fQf1WmAr0iVnfJw1hgzk5SVLOe60y1jw3OKyQ7ImfOTIw5k/fz59+vbl\njDN/yJChQ8sOqdJ873+xuvo2X3fqOyLGkTqapW3A2ivFafH9x57V4/yt3tW5O0hrJdf89e9lh7Dy\nUGvWVCvbpkqqmW5QM97Q82PMbOUgQGpsqFuWtKWk22uGZyUdJ2kNSRMkTc9/6x5aVDmp/gsYKWkT\nSf2ADwJXlhyTmfWaxk5SNVKbjYj7I2L7iNge2AlYBFwOnAhMjIiRwMQ83qPKJtX8dMPPAtcC9wKX\nRcTUcqMys95UVE21k/2AByPiUdLJ7/F5+njg0HpvrnKbKhHxJ+BPZcdhZiUQtDV+omqYpCk14+Py\nOZeufBC4JL9eJyJmAkTETElr11tRpZOqmb15iaaS6tyIGF23zNSU+B7gpGWNq7KH/2ZmK+Dw/x3A\nrRExK4/PkrReWpfWA2bXK8BJ1cwqawXcUXUErx36Qzr5PSa/HgNcUa8AJ1Uzq6YGa6mN5lRJA4AD\ngN/XTD4NOEDS9Dyvbv8iblM1s0pK16kWd/F/RCwC1uw0bR7paoCGOamaWUWpmRNVvcZJ1cwqqxVv\nU3VSNbNqWrYL+1c4J1Uzq6Si21SL4qRqZpXVgjnVSdXMqss1VTOzojR373+vcVI1s0rq6E+11Tip\nmllFtebTVJ1UzayyWjCnOqmaWXW5pmpmVhD5RJWZWbFcUzUzK1AL5lQnVTOrrlasqbqTajOrpuI7\nqR4i6beS7pN0r6TdJa0haYKk6fnv0HrlOKmaWSWJxh6l0kRt9mzgzxGxFTAKuBc4EZgYESOBiXm8\nR06qZlZZ7W1qaKhH0mrA3sCFABHxUkQsAA4BxufFxgOH1ivLSdXMKqvAw/9NgTnAzyXdJumnkgYC\n60TETID8d+16BTmpmlklpYTZ8OH/MElTaoZjOhXXB9gROC8idgCep4FD/a6UdvY/V7e7FRHP9lYs\nZlZNTVz7PzciRvcw/wngiYi4KY//lpRUZ0laLyJmSloPmF1vRWVeUjUVCFJnMx06xgPYsIygzKw6\nirqkKiKekvS4pC0j4n7SE1TvycMY0qOpxwBX1CurtKQaERuUtW4zWzkUfJnq54BfSeoHPAR8lNRE\nepmkscBjwOH1CmmJi/8lfRDYNCK+LWkEqXH4lrLjMrPWJaC9wKwaEbcDXTUR7NdMOaWfqJJ0DvA2\n4MN50iLg/PIiMrNKaPAkVW/fddUKNdU9ImJHSbcBRMT8XP02M+tRC96l2hJJ9WVJbaSTU0haE3il\n3JDMrNUJaGvBrFr64T9wLvA7YC1J3wAmAaeXG5KZVUGR9/4XpfSaakT8QtItwP550uERcXeZMZlZ\n63Mn1T1rB14mNQG0Qu3ZzCrAh/9dkPQ14BJgfWAEcLGkk8qNysyqQA0OvakVaqofAnaKiEUAkr4F\n3AJ8p9SozKzltWIn1a2QVB/l9XH0Id3NYGbWrXT2v+wo3qjMDlXOJLWhLgKmSro2jx9IugLAzKx7\nJVzY34gya6odZ/inAlfXTP9nCbGYWQX57H+NiLiwrHWbWfX58L8bkjYDvgVsA/TvmB4RW5QWlJlV\nQise/pd+SRVwEfBz0n887wAuA35dZkBmVg2teElVKyTVARFxLUBEPBgRp5B6rTIz65aULv5vZOhN\npR/+Ay8q1eEflPQp4EkaeLiWmVkLHv23RFL9IjAI+DypbXV14GOlRmRmlVDk2X9JjwDPAUuBJREx\nWtIawKXAxsAjwH9ExNM9lVN6Uq150NZzvNZRtZlZj8QKObR/W0TMrRk/EZgYEadJOjGPn9BTAWVe\n/H85uQ/VrkTEYb0YjplVTe9063cIsE9+PR64nlZNqsA5vbmyHbbekBtv6tVV2nI66Jwbyw7BWlwT\nl1QNkzSlZnxcRIzrtEwAf5EUwE/y/HUiYiZAfkx13fM9ZV78P7GsdZvZyqGJy5fmRkRXD/WrtWdE\nzMiJc4Kk+1ZwTGZmrUNQ6IP/ImJG/jsbuBzYBZglaT3SutYDZtcrx0nVzCqrT1tjQz2SBkoa3PGa\n1LHT3cCVwJi82BjgiroxLeuHKZqkVSLixbLjMLNqSM+fKuxM1TrA5bm8PsDFEfFnSf8CLpM0FngM\nOLxeQaUnVUm7ABeSrk/dUNIo4OMR8blyIzOzVlfUZaoR8RAwqovp84D9moqpmJCWyw+Bg4F5ABFx\nB75N1cwa4Kepdq0tIh7tVI1fWlYwZlYNqeu/1rtPtRWS6uO5CSAktQOfA6aVHJOZVUB76+XUlkiq\nx5KaADYEZgF/zdPMzLqlEnqgakTpSTVfE/bBsuMws+ppwZxaflKVdAFd9AEQEceUEI6ZVYgfp9K1\nv9a87g+8F3i8pFjMrCJ8oqobEXFp7bik/wUmlBSOmVVIC+bU8pNqFzYBNio7CDNrcYL2FsyqpSdV\nSU/zWptqGzCf1BGsmVm3/IjqLuRnU40iPZcK4JWI6LbjajOzWq2YVEu9TTUn0MsjYmkenFDNrGFF\ndv1XlFa49/9mSTuWHYSZVUvH4X8jQ28q8xlVfSJiCbAX8AlJDwLPk76riAgnWjPrXgmdpTSizDbV\nm4EdgUNLjMHMKkpAnxZsVC0zqQogIh4sMQYzqzDXVF9vLUnHdzczIn7Qm8GYWdWINorNqrmnvCnA\nkxFxsKRNgF8DawC3Ah+OiJd6KqPME1XtwCBgcDeDmVm30oP/Cu+k+gvAvTXjpwNnRsRI4GlgbL0C\nyqypzoyI/y5x/WZWZQWf2Zc0AngX8C3g+Hwd/b7AkXmR8cCpwHk9lVN6m6qZ2bIQ0N54Vh0maUrN\n+LiIGNdpmbOAr/LakfKawIJ8lRLAE8DweisqM6k29TAtM7POmuilam5EjO5upqSDgdkRcYukfTom\nd7Fo3RuUSkuqETG/rHWb2cqhwLP/ewLvkfROUhekq5FqrkNqrqkfAcyoV1Ar3FFlZtY0kRJYI0M9\nEXFSRIyIiI1JTyL5W0QcBVwHvD8vNga4ol5ZTqpmVk3qlXv/TyCdtHqA1MZ6Yb03lN71n5nZsloR\nZ7sj4nrg+vz6IWCXZt7vpGpmlSTcSbWZWaFaMKc6qZpZVfV+X6mNcFI1s0rqOPvfapxUzayyXFM1\nMytQ66VUJ1Uzqyj5EdVmZsXy4b+ZWYFaL6U6qZpZhbVgRdVJ1cyqKV1S1XpZ1UnVzCrLNVUzs8Ko\nmU6qe42TqplVkg//zcyK1PyTUnuFk6qZVVYrJtVW7I/AzKwhavBf3XKk/pJulnSHpKmSvpGnbyLp\nJknTJV0qqV+9slxTrbAFCxZw7Cc/zj1T70YS54/7GbvtvnvZYVkng1Zp5yv7b84maw4ggNMnPMDe\nm63JHpsO5eWlwYxnXuD0CdNZ+OLSskOtlII7qX4R2DciFkrqC0ySdA1wPHBmRPxa0vnAWOC8ngpy\nUq2wL3/xCxx44EFcculveemll1i0aFHZIVkXPvvWTbn5kQV8/er76dMm+vdtY0rfdi648RGWBhyz\n10YcufMIxk16tOxQK6eonBoRASzMo33zEMC+wJF5+njgVOokVR/+V9Szzz7LpEn/4OiPjQWgX79+\nDBkypOSorLMB/doZNXw1rp46C4AlrwQLX1zKlMcWsDQ/Qf6emc+x1qBVSoyyupo4/B8maUrNcMwb\nypLaJd0OzAYmAA8CC/LjqQGeAIbXi8k11Yp6+KGHGDZsLY4Z+1HuuvMOdthxJ84482wGDhxYdmhW\nY/3V+7Ng8cuceODmbDZsINNmP8+Prn+IF5a88uoy79x2Ha6bNrfEKKtJQFvjNdW5ETG6pwUiYimw\nvaQhwOXA1l0tVm9Fla2pSvqZpNmS7i47ljIsWbKE22+7lU988lj+OeU2BgwcyBnfPa3ssKyTdokt\n1h7EFXc+xScuvoPFLy/lyJ1HvDr/QzuPYOkrwYT75pQYZVU1Wk9tro0gIhaQnqa6GzBEUkflcwQw\no977K5tUgYuAg8oOoizDR4xg+IgR7LLrrgC8933v5/bbbi05KutszsIXmbPwRe59KjXX/X36PEau\nnY4m3r71Wuy+6VC++edpZYZYXfk61UaGukVJa+UaKpJWBfYH7gWuA96fFxsDXFGvrMom1Yj4BzC/\n7DjKsu666zJixAZMu/9+AK7/20S22nqbkqOyzuYvepnZz73IBkNXBWCnDVfn0XmL2WWjIRwxegQn\nX3kvL9Y0BVjjOs7+NzI0YD3gOkl3Av8CJkTEVcAJwPGSHgDWBC6sV9BK3aaaG6OPAdhgww1LjqZ4\nPzjrR3z0I0fx0ksvsfGmmzLupz8vOyTrwg+vf5hTDtqCPm1i5rMvcNpfpvOTI0bRt72N7x+2LQD3\nzFzID/72YMmRVk9RF1RFxJ3ADl1MfwjYpZmyVuqkGhHjgHEAO+00um4Dc9WM2n57brxpStlhWB0P\nzHmeT15yx+umHXWRm2oK0YJ3VK3USdXMVm7NnoTqDU6qZlZZvve/QJIuASYDW0p6QtLYsmMys96l\nBofeVNmaakQcUXYMZlYe4aepmpkVx/2pmpkVqwVzqpOqmVVYC2ZVJ1Uzq6jm7+vvDU6qZlZJTfZS\n1WucVM2supxUzcyK48N/M7MC+ZIqM7MCtWBOdVI1s4oq4x7UBlT23n8ze3NLZ//V0FC3LGkDSddJ\nulfSVElfyNPXkDRB0vT8d2i9spxUzayyCuxQZQnwpYjYmvRsqs9I2gY4EZgYESOBiXm8R06qZlZd\nBWXViJgZEbfm18+Rnk81HDgEGJ8XGw8cWq8st6maWWWtiEuqJG1MerTKTcA6ETETUuKVtHa99zup\nmlllNXFJ1TBJtc8eGpcft9SpPA0CfgccFxHPLkvXgk6qZlZZTaS8uRExuseypL6khPqriPh9njxL\n0nq5lroeMLveitymamaV1NFJdSND3bLSQhcC90bED2pmXQmMya/HAFfUK8s1VTOrpmI7qd4T+DBw\nl6Tb87STgdOAy/Ljmh4DDq9XkJOqmVVWUTk1Iib1UNx+zZTlpGpm1dWCd1Q5qZpZRbmTajOzwriT\najOzojmpmpkVx4f/ZmYFcifVZmYFasGc6qRqZhVV7MX/hXFSNbNK6rhNtdU4qZpZZbVeSnVSNbMK\na8GKqpOqmVWXL6kyMytS6+VUJ1Uzq64WzKlOqmZWTRINPX66tzmpmll1tV5O9eNUzKy6CnpCNZJ+\nJmm2pLtrpq0haYKk6fnv0EaoYO6/AAAF4ElEQVRiclI1s8qSGhsacBFwUKdpJwITI2IkMDGP1+Wk\namYVpYb/1RMR/wDmd5p8CDA+vx4PHNpIVG5TNbNKSrepNrz4MElTasbHRcS4Ou9ZJyJmAuRHVK/d\nyIqcVM2ssppIqnMjYvQKDOVVPvw3s8oq6vC/G7MkrQeQ/85u5E1OqmZWTQ2epFqOS1mvBMbk12OA\nKxp5k5OqmVVSo5dTNXhJ1SXAZGBLSU9IGgucBhwgaTpwQB6vy22qZlZdBV38HxFHdDNrv2bLclI1\ns8rybapmZgVqvZTqpGpmVdaCWdVJ1cwqqxU7qVZElB1Dr5A0B3i07DhWkGHA3LKDsKasrNtso4hY\nqzdWJOnPpO+xEXMjovO9/SvEmyaprswkTemtu0WsGN5mKy9fp2pmViAnVTOzAjmprhzq9bZjrcfb\nbCXlNlUzswK5pmpmViAnVTOzAjmpmpkVyEm1giRtKWl3SX0ltZcdjzXO22vl5xNVFSPpMODbwJN5\nmAJcFBHPlhqY9UjSFhExLb9uj4ilZcdkK4ZrqhUiqS/wAWBsROxH6ol8A+CrklYrNTjrlqSDgdsl\nXQwQEUtdY115OalWz2rAyPz6cuAqoB9wpNSCnUu+yUkaCHwWOA54SdIvwYl1ZeakWiER8TLwA+Aw\nSW+JiFeAScDtwF6lBmddiojngY8BFwNfBvrXJtYyY7MVw0m1em4A/gJ8WNLeEbE0Ii4G1gdGlRua\ndSUiZkTEwoiYC3wSWLUjsUraUdJW5UZoRXJ/qhUTES9I+hUQwEn5B/kisA4ws9TgrK6ImCfpk8D3\nJN0HtANvKzksK5CTagVFxNOSLgDuIdV8XgA+FBGzyo3MGhERcyXdCbwDOCAinig7JiuOL6mquHyy\nI3L7qlWApKHAZcCXIuLOsuOxYjmpmpVAUv+IeKHsOKx4TqpmZgXy2X8zswI5qZqZFchJ1cysQE6q\nZmYFclK1V0laKul2SXdL+o2kActR1j6Srsqv3yPpxB6WHSLp08uwjlMlfbnR6Z2WuUjS+5tY18aS\n7m42RnvzcVK1WosjYvuI2A54CfhU7UwlTe8zEXFlRJzWwyJDgKaTqlkrclK17twAbJ5raPdK+jFw\nK7CBpAMlTZZ0a67RDgKQdJCk+yRNAg7rKEjS0ZLOya/XkXS5pDvysAdwGrBZriV/Ly/3FUn/knSn\npG/UlPU1SfdL+iuwZb0PIekTuZw7JP2uU+17f0k3SJqWu+dDUruk79Ws+5PL+0Xam4uTqr2BpD6k\nWyjvypO2BH4RETsAzwOnAPtHxI6kTrKPl9QfuAB4N/AWYN1uiv8h8PeIGAXsCEwFTgQezLXkr0g6\nkNS94S7A9sBOkvaWtBPwQWAHUtLeuYGP8/uI2Dmv715gbM28jYG3Au8Czs+fYSzwTETsnMv/hKRN\nGliPGeB7/+31VpV0e359A3AhqferRyPin3n6bsA2wI25+9Z+wGRgK+DhiJgOkHthOqaLdewLfARe\n7frumXzbZq0D83BbHh9ESrKDgcsjYlFex5UNfKbtJH2T1MQwCLi2Zt5l+fbe6ZIeyp/hQODfa9pb\nV8/rntbAusycVO11FkfE9rUTcuJ8vnYSMCEijui03PaknrOKIOA7EfGTTus4bhnWcRFwaETcIelo\nYJ+aeZ3Lirzuz0VEbfJF0sZNrtfepHz4b836J7CnpM0BJA2QtAVwH7CJpM3yckd08/6JwLH5ve35\nMTDPkWqhHa4FPlbTVjtc0trAP4D3SlpV0mBSU0M9g4GZ+VE0R3Wad7ikthzzpsD9ed3H5uWRtEXu\nvd+sIa6pWlMiYk6u8V0iaZU8+ZSImCbpGOBqSXNJTyTYrosivgCMkzQWWAocGxGTJd2YL1m6Jrer\nbg1MzjXlhaSuDW+VdCnpSQePkpoo6vlP4Ka8/F28PnnfD/yd1Bftp3JftT8ltbXeqrTyOcChjX07\nZu5QxcysUD78NzMrkJOqmVmBnFTNzArkpGpmViAnVTOzAjmpmpkVyEnVzKxA/w+eaOKzMirJMgAA\nAABJRU5ErkJggg==\n",
695 | "text/plain": [
696 | ""
697 | ]
698 | },
699 | "metadata": {},
700 | "output_type": "display_data"
701 | }
702 | ],
703 | "source": [
704 | "# Plot non-normalized confusion matrix\n",
705 | "plt.figure()\n",
706 | "class_names = [0,1]\n",
707 | "plot_confusion_matrix(cnf_matrix, classes=class_names,\n",
708 | " title='Confusion matrix - Logistic Regression + Grid Search')\n",
709 | "a = plt.gcf()\n",
710 | "a.set_size_inches(8,4)\n",
711 | "plt.show()"
712 | ]
713 | }
714 | ],
715 | "metadata": {
716 | "kernelspec": {
717 | "display_name": "Python 3",
718 | "language": "python",
719 | "name": "python3"
720 | },
721 | "language_info": {
722 | "codemirror_mode": {
723 | "name": "ipython",
724 | "version": 3
725 | },
726 | "file_extension": ".py",
727 | "mimetype": "text/x-python",
728 | "name": "python",
729 | "nbconvert_exporter": "python",
730 | "pygments_lexer": "ipython3",
731 | "version": "3.6.5"
732 | }
733 | },
734 | "nbformat": 4,
735 | "nbformat_minor": 2
736 | }
737 |
--------------------------------------------------------------------------------
/Python for data science - Part 4.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 69,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#import libraries\n",
10 | "import pandas as pd\n",
11 | "import os\n",
12 | "import numpy as np"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 70,
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "#Set working directory\n",
22 | "os.chdir('C:\\\\Users\\\\rohan\\\\Documents\\\\Analytics\\\\Data')"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 71,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "#import iris dataset\n",
32 | "a = pd.read_csv('iris.csv')"
33 | ]
34 | },
35 | {
36 | "cell_type": "code",
37 | "execution_count": 72,
38 | "metadata": {},
39 | "outputs": [
40 | {
41 | "data": {
42 | "text/html": [
43 | "\n",
44 | "\n",
57 | "
\n",
58 | " \n",
59 | " \n",
60 | " | \n",
61 | " SepalLength | \n",
62 | " SepalWidth | \n",
63 | " PetalLength | \n",
64 | " PetalWidth | \n",
65 | " Name | \n",
66 | "
\n",
67 | " \n",
68 | " \n",
69 | " \n",
70 | " | 0 | \n",
71 | " 5.1 | \n",
72 | " 3.5 | \n",
73 | " 1.4 | \n",
74 | " 0.2 | \n",
75 | " setosa | \n",
76 | "
\n",
77 | " \n",
78 | " | 1 | \n",
79 | " 4.9 | \n",
80 | " 3.0 | \n",
81 | " 1.4 | \n",
82 | " 0.2 | \n",
83 | " setosa | \n",
84 | "
\n",
85 | " \n",
86 | " | 2 | \n",
87 | " 4.7 | \n",
88 | " 3.2 | \n",
89 | " 1.3 | \n",
90 | " 0.2 | \n",
91 | " setosa | \n",
92 | "
\n",
93 | " \n",
94 | " | 3 | \n",
95 | " 4.6 | \n",
96 | " 3.1 | \n",
97 | " 1.5 | \n",
98 | " 0.2 | \n",
99 | " setosa | \n",
100 | "
\n",
101 | " \n",
102 | " | 4 | \n",
103 | " 5.0 | \n",
104 | " 3.6 | \n",
105 | " 1.4 | \n",
106 | " 0.2 | \n",
107 | " setosa | \n",
108 | "
\n",
109 | " \n",
110 | "
\n",
111 | "
"
112 | ],
113 | "text/plain": [
114 | " SepalLength SepalWidth PetalLength PetalWidth Name\n",
115 | "0 5.1 3.5 1.4 0.2 setosa\n",
116 | "1 4.9 3.0 1.4 0.2 setosa\n",
117 | "2 4.7 3.2 1.3 0.2 setosa\n",
118 | "3 4.6 3.1 1.5 0.2 setosa\n",
119 | "4 5.0 3.6 1.4 0.2 setosa"
120 | ]
121 | },
122 | "execution_count": 72,
123 | "metadata": {},
124 | "output_type": "execute_result"
125 | }
126 | ],
127 | "source": [
128 | "#View top 5 rows of dataset\n",
129 | "a.head()"
130 | ]
131 | },
132 | {
133 | "cell_type": "markdown",
134 | "metadata": {},
135 | "source": [
136 | "# 1. Descriptive statistics of numerical columns"
137 | ]
138 | },
139 | {
140 | "cell_type": "code",
141 | "execution_count": 73,
142 | "metadata": {},
143 | "outputs": [
144 | {
145 | "data": {
146 | "text/html": [
147 | "\n",
148 | "\n",
161 | "
\n",
162 | " \n",
163 | " \n",
164 | " | \n",
165 | " SepalLength | \n",
166 | " SepalWidth | \n",
167 | " PetalLength | \n",
168 | " PetalWidth | \n",
169 | "
\n",
170 | " \n",
171 | " \n",
172 | " \n",
173 | " | count | \n",
174 | " 150.000000 | \n",
175 | " 150.000000 | \n",
176 | " 150.000000 | \n",
177 | " 150.000000 | \n",
178 | "
\n",
179 | " \n",
180 | " | mean | \n",
181 | " 5.843333 | \n",
182 | " 3.054000 | \n",
183 | " 3.758667 | \n",
184 | " 1.198667 | \n",
185 | "
\n",
186 | " \n",
187 | " | std | \n",
188 | " 0.828066 | \n",
189 | " 0.433594 | \n",
190 | " 1.764420 | \n",
191 | " 0.763161 | \n",
192 | "
\n",
193 | " \n",
194 | " | min | \n",
195 | " 4.300000 | \n",
196 | " 2.000000 | \n",
197 | " 1.000000 | \n",
198 | " 0.100000 | \n",
199 | "
\n",
200 | " \n",
201 | " | 25% | \n",
202 | " 5.100000 | \n",
203 | " 2.800000 | \n",
204 | " 1.600000 | \n",
205 | " 0.300000 | \n",
206 | "
\n",
207 | " \n",
208 | " | 50% | \n",
209 | " 5.800000 | \n",
210 | " 3.000000 | \n",
211 | " 4.350000 | \n",
212 | " 1.300000 | \n",
213 | "
\n",
214 | " \n",
215 | " | 75% | \n",
216 | " 6.400000 | \n",
217 | " 3.300000 | \n",
218 | " 5.100000 | \n",
219 | " 1.800000 | \n",
220 | "
\n",
221 | " \n",
222 | " | max | \n",
223 | " 7.900000 | \n",
224 | " 4.400000 | \n",
225 | " 6.900000 | \n",
226 | " 2.500000 | \n",
227 | "
\n",
228 | " \n",
229 | "
\n",
230 | "
"
231 | ],
232 | "text/plain": [
233 | " SepalLength SepalWidth PetalLength PetalWidth\n",
234 | "count 150.000000 150.000000 150.000000 150.000000\n",
235 | "mean 5.843333 3.054000 3.758667 1.198667\n",
236 | "std 0.828066 0.433594 1.764420 0.763161\n",
237 | "min 4.300000 2.000000 1.000000 0.100000\n",
238 | "25% 5.100000 2.800000 1.600000 0.300000\n",
239 | "50% 5.800000 3.000000 4.350000 1.300000\n",
240 | "75% 6.400000 3.300000 5.100000 1.800000\n",
241 | "max 7.900000 4.400000 6.900000 2.500000"
242 | ]
243 | },
244 | "execution_count": 73,
245 | "metadata": {},
246 | "output_type": "execute_result"
247 | }
248 | ],
249 | "source": [
250 | "#Summarize the numerical columns of dataset\n",
251 | "a.describe()"
252 | ]
253 | },
254 | {
255 | "cell_type": "markdown",
256 | "metadata": {},
257 | "source": [
258 | "# 2. Conditional column "
259 | ]
260 | },
261 | {
262 | "cell_type": "code",
263 | "execution_count": 74,
264 | "metadata": {},
265 | "outputs": [
266 | {
267 | "data": {
268 | "text/html": [
269 | "\n",
270 | "\n",
283 | "
\n",
284 | " \n",
285 | " \n",
286 | " | \n",
287 | " SepalLength | \n",
288 | " SepalWidth | \n",
289 | " PetalLength | \n",
290 | " PetalWidth | \n",
291 | " Name | \n",
292 | " SepalLength_Size | \n",
293 | "
\n",
294 | " \n",
295 | " \n",
296 | " \n",
297 | " | 0 | \n",
298 | " 5.1 | \n",
299 | " 3.5 | \n",
300 | " 1.4 | \n",
301 | " 0.2 | \n",
302 | " setosa | \n",
303 | " High | \n",
304 | "
\n",
305 | " \n",
306 | " | 1 | \n",
307 | " 4.9 | \n",
308 | " 3.0 | \n",
309 | " 1.4 | \n",
310 | " 0.2 | \n",
311 | " setosa | \n",
312 | " Low | \n",
313 | "
\n",
314 | " \n",
315 | " | 2 | \n",
316 | " 4.7 | \n",
317 | " 3.2 | \n",
318 | " 1.3 | \n",
319 | " 0.2 | \n",
320 | " setosa | \n",
321 | " Low | \n",
322 | "
\n",
323 | " \n",
324 | " | 3 | \n",
325 | " 4.6 | \n",
326 | " 3.1 | \n",
327 | " 1.5 | \n",
328 | " 0.2 | \n",
329 | " setosa | \n",
330 | " Low | \n",
331 | "
\n",
332 | " \n",
333 | " | 4 | \n",
334 | " 5.0 | \n",
335 | " 3.6 | \n",
336 | " 1.4 | \n",
337 | " 0.2 | \n",
338 | " setosa | \n",
339 | " High | \n",
340 | "
\n",
341 | " \n",
342 | "
\n",
343 | "
"
344 | ],
345 | "text/plain": [
346 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n",
347 | "0 5.1 3.5 1.4 0.2 setosa High\n",
348 | "1 4.9 3.0 1.4 0.2 setosa Low\n",
349 | "2 4.7 3.2 1.3 0.2 setosa Low\n",
350 | "3 4.6 3.1 1.5 0.2 setosa Low\n",
351 | "4 5.0 3.6 1.4 0.2 setosa High"
352 | ]
353 | },
354 | "execution_count": 74,
355 | "metadata": {},
356 | "output_type": "execute_result"
357 | }
358 | ],
359 | "source": [
360 | "#Create new column for bucketing the Sepal Length as High or Low\n",
361 | "a['SepalLength_Size'] = np.where(a['SepalLength']>=5,'High','Low')\n",
362 | "a.head()"
363 | ]
364 | },
365 | {
366 | "cell_type": "code",
367 | "execution_count": 75,
368 | "metadata": {},
369 | "outputs": [
370 | {
371 | "data": {
372 | "text/html": [
373 | "\n",
374 | "\n",
387 | "
\n",
388 | " \n",
389 | " \n",
390 | " | \n",
391 | " SepalLength | \n",
392 | " SepalWidth | \n",
393 | " PetalLength | \n",
394 | " PetalWidth | \n",
395 | " Name | \n",
396 | " SepalLength_Size | \n",
397 | "
\n",
398 | " \n",
399 | " \n",
400 | " \n",
401 | " | 0 | \n",
402 | " 5.1 | \n",
403 | " 3.5 | \n",
404 | " 1.4 | \n",
405 | " 0.2 | \n",
406 | " setosa | \n",
407 | " High | \n",
408 | "
\n",
409 | " \n",
410 | " | 1 | \n",
411 | " 4.9 | \n",
412 | " 3.0 | \n",
413 | " 1.4 | \n",
414 | " 0.2 | \n",
415 | " setosa | \n",
416 | " Low | \n",
417 | "
\n",
418 | " \n",
419 | " | 2 | \n",
420 | " 4.7 | \n",
421 | " 3.2 | \n",
422 | " 1.3 | \n",
423 | " 0.2 | \n",
424 | " setosa | \n",
425 | " Low | \n",
426 | "
\n",
427 | " \n",
428 | " | 3 | \n",
429 | " 4.6 | \n",
430 | " 3.1 | \n",
431 | " 1.5 | \n",
432 | " 0.2 | \n",
433 | " setosa | \n",
434 | " Low | \n",
435 | "
\n",
436 | " \n",
437 | " | 4 | \n",
438 | " 5.0 | \n",
439 | " 3.6 | \n",
440 | " 1.4 | \n",
441 | " 0.2 | \n",
442 | " setosa | \n",
443 | " High | \n",
444 | "
\n",
445 | " \n",
446 | "
\n",
447 | "
"
448 | ],
449 | "text/plain": [
450 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n",
451 | "0 5.1 3.5 1.4 0.2 setosa High\n",
452 | "1 4.9 3.0 1.4 0.2 setosa Low\n",
453 | "2 4.7 3.2 1.3 0.2 setosa Low\n",
454 | "3 4.6 3.1 1.5 0.2 setosa Low\n",
455 | "4 5.0 3.6 1.4 0.2 setosa High"
456 | ]
457 | },
458 | "execution_count": 75,
459 | "metadata": {},
460 | "output_type": "execute_result"
461 | }
462 | ],
463 | "source": [
464 | "#Using list comprehension\n",
465 | "a['SepalLength_Size'] = ['High' if x >= 5 else 'Low' for x in a['SepalLength'] ]\n",
466 | "a.head()"
467 | ]
468 | },
469 | {
470 | "cell_type": "code",
471 | "execution_count": 76,
472 | "metadata": {
473 | "scrolled": true
474 | },
475 | "outputs": [
476 | {
477 | "data": {
478 | "text/html": [
479 | "\n",
480 | "\n",
493 | "
\n",
494 | " \n",
495 | " \n",
496 | " | \n",
497 | " SepalLength | \n",
498 | " SepalWidth | \n",
499 | " PetalLength | \n",
500 | " PetalWidth | \n",
501 | " Name | \n",
502 | " SepalLength_Size | \n",
503 | "
\n",
504 | " \n",
505 | " \n",
506 | " \n",
507 | " | 0 | \n",
508 | " 5.1 | \n",
509 | " 3.5 | \n",
510 | " 1.4 | \n",
511 | " 0.2 | \n",
512 | " setosa | \n",
513 | " High | \n",
514 | "
\n",
515 | " \n",
516 | " | 1 | \n",
517 | " 4.9 | \n",
518 | " 3.0 | \n",
519 | " 1.4 | \n",
520 | " 0.2 | \n",
521 | " setosa | \n",
522 | " Low | \n",
523 | "
\n",
524 | " \n",
525 | " | 2 | \n",
526 | " 4.7 | \n",
527 | " 3.2 | \n",
528 | " 1.3 | \n",
529 | " 0.2 | \n",
530 | " setosa | \n",
531 | " Low | \n",
532 | "
\n",
533 | " \n",
534 | " | 3 | \n",
535 | " 4.6 | \n",
536 | " 3.1 | \n",
537 | " 1.5 | \n",
538 | " 0.2 | \n",
539 | " setosa | \n",
540 | " Low | \n",
541 | "
\n",
542 | " \n",
543 | " | 4 | \n",
544 | " 5.0 | \n",
545 | " 3.6 | \n",
546 | " 1.4 | \n",
547 | " 0.2 | \n",
548 | " setosa | \n",
549 | " High | \n",
550 | "
\n",
551 | " \n",
552 | "
\n",
553 | "
"
554 | ],
555 | "text/plain": [
556 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n",
557 | "0 5.1 3.5 1.4 0.2 setosa High\n",
558 | "1 4.9 3.0 1.4 0.2 setosa Low\n",
559 | "2 4.7 3.2 1.3 0.2 setosa Low\n",
560 | "3 4.6 3.1 1.5 0.2 setosa Low\n",
561 | "4 5.0 3.6 1.4 0.2 setosa High"
562 | ]
563 | },
564 | "execution_count": 76,
565 | "metadata": {},
566 | "output_type": "execute_result"
567 | }
568 | ],
569 | "source": [
570 | "#Using assign function\n",
571 | "def size(row_number):\n",
572 | " if row_number[\"SepalLength\"] >=5:\n",
573 | " return 'High'\n",
574 | " else:\n",
575 | " return 'Low'\n",
576 | "\n",
577 | "a = a.assign(SepalLength_Size=a.apply(size, axis=1))\n",
578 | "\n",
579 | "a.head()"
580 | ]
581 | },
582 | {
583 | "cell_type": "code",
584 | "execution_count": 77,
585 | "metadata": {},
586 | "outputs": [
587 | {
588 | "data": {
589 | "text/html": [
590 | "\n",
591 | "\n",
604 | "
\n",
605 | " \n",
606 | " \n",
607 | " | \n",
608 | " SepalLength | \n",
609 | " SepalWidth | \n",
610 | " PetalLength | \n",
611 | " PetalWidth | \n",
612 | " Name | \n",
613 | " SepalLength_Size | \n",
614 | "
\n",
615 | " \n",
616 | " \n",
617 | " \n",
618 | " | 0 | \n",
619 | " 5.1 | \n",
620 | " 3.5 | \n",
621 | " 1.4 | \n",
622 | " 0.2 | \n",
623 | " setosa | \n",
624 | " High | \n",
625 | "
\n",
626 | " \n",
627 | " | 1 | \n",
628 | " 4.9 | \n",
629 | " 3.0 | \n",
630 | " 1.4 | \n",
631 | " 0.2 | \n",
632 | " setosa | \n",
633 | " Low | \n",
634 | "
\n",
635 | " \n",
636 | " | 2 | \n",
637 | " 4.7 | \n",
638 | " 3.2 | \n",
639 | " 1.3 | \n",
640 | " 0.2 | \n",
641 | " setosa | \n",
642 | " Low | \n",
643 | "
\n",
644 | " \n",
645 | " | 3 | \n",
646 | " 4.6 | \n",
647 | " 3.1 | \n",
648 | " 1.5 | \n",
649 | " 0.2 | \n",
650 | " setosa | \n",
651 | " Low | \n",
652 | "
\n",
653 | " \n",
654 | " | 4 | \n",
655 | " 5.0 | \n",
656 | " 3.6 | \n",
657 | " 1.4 | \n",
658 | " 0.2 | \n",
659 | " setosa | \n",
660 | " High | \n",
661 | "
\n",
662 | " \n",
663 | "
\n",
664 | "
"
665 | ],
666 | "text/plain": [
667 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n",
668 | "0 5.1 3.5 1.4 0.2 setosa High\n",
669 | "1 4.9 3.0 1.4 0.2 setosa Low\n",
670 | "2 4.7 3.2 1.3 0.2 setosa Low\n",
671 | "3 4.6 3.1 1.5 0.2 setosa Low\n",
672 | "4 5.0 3.6 1.4 0.2 setosa High"
673 | ]
674 | },
675 | "execution_count": 77,
676 | "metadata": {},
677 | "output_type": "execute_result"
678 | }
679 | ],
680 | "source": [
681 | "#Using lambda function\n",
682 | "a['SepalLength_Size'] = a['SepalLength'].map( lambda x: 'High' if x >= 5 else 'Low')\n",
683 | "a.head()"
684 | ]
685 | },
686 | {
687 | "cell_type": "markdown",
688 | "metadata": {},
689 | "source": [
690 | "# 3. Unique of a column"
691 | ]
692 | },
693 | {
694 | "cell_type": "code",
695 | "execution_count": 78,
696 | "metadata": {},
697 | "outputs": [
698 | {
699 | "data": {
700 | "text/plain": [
701 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)"
702 | ]
703 | },
704 | "execution_count": 78,
705 | "metadata": {},
706 | "output_type": "execute_result"
707 | }
708 | ],
709 | "source": [
710 | "a['Name'].unique()"
711 | ]
712 | },
713 | {
714 | "cell_type": "code",
715 | "execution_count": 79,
716 | "metadata": {},
717 | "outputs": [
718 | {
719 | "data": {
720 | "text/plain": [
721 | "3"
722 | ]
723 | },
724 | "execution_count": 79,
725 | "metadata": {},
726 | "output_type": "execute_result"
727 | }
728 | ],
729 | "source": [
730 | "a['Name'].nunique()"
731 | ]
732 | },
733 | {
734 | "cell_type": "markdown",
735 | "metadata": {},
736 | "source": [
737 | "# 4. Cross tab function"
738 | ]
739 | },
740 | {
741 | "cell_type": "code",
742 | "execution_count": 80,
743 | "metadata": {},
744 | "outputs": [
745 | {
746 | "data": {
747 | "text/html": [
748 | "\n",
749 | "\n",
762 | "
\n",
763 | " \n",
764 | " \n",
765 | " | SepalLength_Size | \n",
766 | " High | \n",
767 | " Low | \n",
768 | "
\n",
769 | " \n",
770 | " | Name | \n",
771 | " | \n",
772 | " | \n",
773 | "
\n",
774 | " \n",
775 | " \n",
776 | " \n",
777 | " | setosa | \n",
778 | " 30 | \n",
779 | " 20 | \n",
780 | "
\n",
781 | " \n",
782 | " | versicolor | \n",
783 | " 49 | \n",
784 | " 1 | \n",
785 | "
\n",
786 | " \n",
787 | " | virginica | \n",
788 | " 49 | \n",
789 | " 1 | \n",
790 | "
\n",
791 | " \n",
792 | "
\n",
793 | "
"
794 | ],
795 | "text/plain": [
796 | "SepalLength_Size High Low\n",
797 | "Name \n",
798 | "setosa 30 20\n",
799 | "versicolor 49 1\n",
800 | "virginica 49 1"
801 | ]
802 | },
803 | "execution_count": 80,
804 | "metadata": {},
805 | "output_type": "execute_result"
806 | }
807 | ],
808 | "source": [
809 | "pd.crosstab(a['Name'],a['SepalLength_Size'])"
810 | ]
811 | },
812 | {
813 | "cell_type": "code",
814 | "execution_count": 81,
815 | "metadata": {},
816 | "outputs": [
817 | {
818 | "data": {
819 | "text/html": [
820 | "\n",
821 | "\n",
834 | "
\n",
835 | " \n",
836 | " \n",
837 | " | SepalLength_Size | \n",
838 | " High | \n",
839 | " Low | \n",
840 | "
\n",
841 | " \n",
842 | " | Name | \n",
843 | " | \n",
844 | " | \n",
845 | "
\n",
846 | " \n",
847 | " \n",
848 | " \n",
849 | " | setosa | \n",
850 | " 5.230000 | \n",
851 | " 4.67 | \n",
852 | "
\n",
853 | " \n",
854 | " | versicolor | \n",
855 | " 5.957143 | \n",
856 | " 4.90 | \n",
857 | "
\n",
858 | " \n",
859 | " | virginica | \n",
860 | " 6.622449 | \n",
861 | " 4.90 | \n",
862 | "
\n",
863 | " \n",
864 | "
\n",
865 | "
"
866 | ],
867 | "text/plain": [
868 | "SepalLength_Size High Low\n",
869 | "Name \n",
870 | "setosa 5.230000 4.67\n",
871 | "versicolor 5.957143 4.90\n",
872 | "virginica 6.622449 4.90"
873 | ]
874 | },
875 | "execution_count": 81,
876 | "metadata": {},
877 | "output_type": "execute_result"
878 | }
879 | ],
880 | "source": [
881 | "pd.crosstab(a['Name'],a['SepalLength_Size'],values=a['SepalLength'],aggfunc=np.mean)"
882 | ]
883 | },
884 | {
885 | "cell_type": "markdown",
886 | "metadata": {},
887 | "source": [
888 | "# 5. Sorting a column"
889 | ]
890 | },
891 | {
892 | "cell_type": "code",
893 | "execution_count": 82,
894 | "metadata": {},
895 | "outputs": [
896 | {
897 | "data": {
898 | "text/html": [
899 | "\n",
900 | "\n",
913 | "
\n",
914 | " \n",
915 | " \n",
916 | " | \n",
917 | " SepalLength | \n",
918 | " SepalWidth | \n",
919 | " PetalLength | \n",
920 | " PetalWidth | \n",
921 | " Name | \n",
922 | " SepalLength_Size | \n",
923 | "
\n",
924 | " \n",
925 | " \n",
926 | " \n",
927 | " | 13 | \n",
928 | " 4.3 | \n",
929 | " 3.0 | \n",
930 | " 1.1 | \n",
931 | " 0.1 | \n",
932 | " setosa | \n",
933 | " Low | \n",
934 | "
\n",
935 | " \n",
936 | " | 42 | \n",
937 | " 4.4 | \n",
938 | " 3.2 | \n",
939 | " 1.3 | \n",
940 | " 0.2 | \n",
941 | " setosa | \n",
942 | " Low | \n",
943 | "
\n",
944 | " \n",
945 | " | 38 | \n",
946 | " 4.4 | \n",
947 | " 3.0 | \n",
948 | " 1.3 | \n",
949 | " 0.2 | \n",
950 | " setosa | \n",
951 | " Low | \n",
952 | "
\n",
953 | " \n",
954 | " | 8 | \n",
955 | " 4.4 | \n",
956 | " 2.9 | \n",
957 | " 1.4 | \n",
958 | " 0.2 | \n",
959 | " setosa | \n",
960 | " Low | \n",
961 | "
\n",
962 | " \n",
963 | " | 41 | \n",
964 | " 4.5 | \n",
965 | " 2.3 | \n",
966 | " 1.3 | \n",
967 | " 0.3 | \n",
968 | " setosa | \n",
969 | " Low | \n",
970 | "
\n",
971 | " \n",
972 | "
\n",
973 | "
"
974 | ],
975 | "text/plain": [
976 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n",
977 | "13 4.3 3.0 1.1 0.1 setosa Low\n",
978 | "42 4.4 3.2 1.3 0.2 setosa Low\n",
979 | "38 4.4 3.0 1.3 0.2 setosa Low\n",
980 | "8 4.4 2.9 1.4 0.2 setosa Low\n",
981 | "41 4.5 2.3 1.3 0.3 setosa Low"
982 | ]
983 | },
984 | "execution_count": 82,
985 | "metadata": {},
986 | "output_type": "execute_result"
987 | }
988 | ],
989 | "source": [
990 | "b = a.sort_values('SepalLength')\n",
991 | "b.head()"
992 | ]
993 | },
994 | {
995 | "cell_type": "code",
996 | "execution_count": 83,
997 | "metadata": {},
998 | "outputs": [
999 | {
1000 | "data": {
1001 | "text/html": [
1002 | "\n",
1003 | "\n",
1016 | "
\n",
1017 | " \n",
1018 | " \n",
1019 | " | \n",
1020 | " SepalLength | \n",
1021 | " SepalWidth | \n",
1022 | " PetalLength | \n",
1023 | " PetalWidth | \n",
1024 | " Name | \n",
1025 | " SepalLength_Size | \n",
1026 | "
\n",
1027 | " \n",
1028 | " \n",
1029 | " \n",
1030 | " | 14 | \n",
1031 | " 5.8 | \n",
1032 | " 4.0 | \n",
1033 | " 1.2 | \n",
1034 | " 0.2 | \n",
1035 | " setosa | \n",
1036 | " High | \n",
1037 | "
\n",
1038 | " \n",
1039 | " | 15 | \n",
1040 | " 5.7 | \n",
1041 | " 4.4 | \n",
1042 | " 1.5 | \n",
1043 | " 0.4 | \n",
1044 | " setosa | \n",
1045 | " High | \n",
1046 | "
\n",
1047 | " \n",
1048 | " | 18 | \n",
1049 | " 5.7 | \n",
1050 | " 3.8 | \n",
1051 | " 1.7 | \n",
1052 | " 0.3 | \n",
1053 | " setosa | \n",
1054 | " High | \n",
1055 | "
\n",
1056 | " \n",
1057 | " | 33 | \n",
1058 | " 5.5 | \n",
1059 | " 4.2 | \n",
1060 | " 1.4 | \n",
1061 | " 0.2 | \n",
1062 | " setosa | \n",
1063 | " High | \n",
1064 | "
\n",
1065 | " \n",
1066 | " | 36 | \n",
1067 | " 5.5 | \n",
1068 | " 3.5 | \n",
1069 | " 1.3 | \n",
1070 | " 0.2 | \n",
1071 | " setosa | \n",
1072 | " High | \n",
1073 | "
\n",
1074 | " \n",
1075 | "
\n",
1076 | "
"
1077 | ],
1078 | "text/plain": [
1079 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n",
1080 | "14 5.8 4.0 1.2 0.2 setosa High\n",
1081 | "15 5.7 4.4 1.5 0.4 setosa High\n",
1082 | "18 5.7 3.8 1.7 0.3 setosa High\n",
1083 | "33 5.5 4.2 1.4 0.2 setosa High\n",
1084 | "36 5.5 3.5 1.3 0.2 setosa High"
1085 | ]
1086 | },
1087 | "execution_count": 83,
1088 | "metadata": {},
1089 | "output_type": "execute_result"
1090 | }
1091 | ],
1092 | "source": [
1093 | "c = a.sort_values(['Name','SepalLength'],ascending=[True,False])\n",
1094 | "c.head()"
1095 | ]
1096 | },
1097 | {
1098 | "cell_type": "markdown",
1099 | "metadata": {},
1100 | "source": [
1101 | "# 6. Column datatypes"
1102 | ]
1103 | },
1104 | {
1105 | "cell_type": "code",
1106 | "execution_count": 84,
1107 | "metadata": {},
1108 | "outputs": [
1109 | {
1110 | "data": {
1111 | "text/plain": [
1112 | "SepalLength float64\n",
1113 | "SepalWidth float64\n",
1114 | "PetalLength float64\n",
1115 | "PetalWidth float64\n",
1116 | "Name object\n",
1117 | "SepalLength_Size object\n",
1118 | "dtype: object"
1119 | ]
1120 | },
1121 | "execution_count": 84,
1122 | "metadata": {},
1123 | "output_type": "execute_result"
1124 | }
1125 | ],
1126 | "source": [
1127 | "#check datatype of columns\n",
1128 | "a.dtypes"
1129 | ]
1130 | },
1131 | {
1132 | "cell_type": "markdown",
1133 | "metadata": {},
1134 | "source": [
1135 | "# 7. Binning a column"
1136 | ]
1137 | },
1138 | {
1139 | "cell_type": "code",
1140 | "execution_count": 92,
1141 | "metadata": {},
1142 | "outputs": [
1143 | {
1144 | "data": {
1145 | "text/html": [
1146 | "\n",
1147 | "\n",
1160 | "
\n",
1161 | " \n",
1162 | " \n",
1163 | " | \n",
1164 | " SepalLength | \n",
1165 | " SepalWidth | \n",
1166 | " PetalLength | \n",
1167 | " PetalWidth | \n",
1168 | " Name | \n",
1169 | " SepalLength_Size | \n",
1170 | " bins | \n",
1171 | "
\n",
1172 | " \n",
1173 | " \n",
1174 | " \n",
1175 | " | 0 | \n",
1176 | " 5.1 | \n",
1177 | " 3.5 | \n",
1178 | " 1.4 | \n",
1179 | " 0.2 | \n",
1180 | " setosa | \n",
1181 | " High | \n",
1182 | " (5, 6] | \n",
1183 | "
\n",
1184 | " \n",
1185 | " | 1 | \n",
1186 | " 4.9 | \n",
1187 | " 3.0 | \n",
1188 | " 1.4 | \n",
1189 | " 0.2 | \n",
1190 | " setosa | \n",
1191 | " Low | \n",
1192 | " (4, 5] | \n",
1193 | "
\n",
1194 | " \n",
1195 | " | 2 | \n",
1196 | " 4.7 | \n",
1197 | " 3.2 | \n",
1198 | " 1.3 | \n",
1199 | " 0.2 | \n",
1200 | " setosa | \n",
1201 | " Low | \n",
1202 | " (4, 5] | \n",
1203 | "
\n",
1204 | " \n",
1205 | " | 3 | \n",
1206 | " 4.6 | \n",
1207 | " 3.1 | \n",
1208 | " 1.5 | \n",
1209 | " 0.2 | \n",
1210 | " setosa | \n",
1211 | " Low | \n",
1212 | " (4, 5] | \n",
1213 | "
\n",
1214 | " \n",
1215 | " | 4 | \n",
1216 | " 5.0 | \n",
1217 | " 3.6 | \n",
1218 | " 1.4 | \n",
1219 | " 0.2 | \n",
1220 | " setosa | \n",
1221 | " High | \n",
1222 | " (4, 5] | \n",
1223 | "
\n",
1224 | " \n",
1225 | "
\n",
1226 | "
"
1227 | ],
1228 | "text/plain": [
1229 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size \\\n",
1230 | "0 5.1 3.5 1.4 0.2 setosa High \n",
1231 | "1 4.9 3.0 1.4 0.2 setosa Low \n",
1232 | "2 4.7 3.2 1.3 0.2 setosa Low \n",
1233 | "3 4.6 3.1 1.5 0.2 setosa Low \n",
1234 | "4 5.0 3.6 1.4 0.2 setosa High \n",
1235 | "\n",
1236 | " bins \n",
1237 | "0 (5, 6] \n",
1238 | "1 (4, 5] \n",
1239 | "2 (4, 5] \n",
1240 | "3 (4, 5] \n",
1241 | "4 (4, 5] "
1242 | ]
1243 | },
1244 | "execution_count": 92,
1245 | "metadata": {},
1246 | "output_type": "execute_result"
1247 | }
1248 | ],
1249 | "source": [
1250 | "bins = [0, 1, 2, 3, 4, 5,6,7]\n",
1251 | "a['bins'] = pd.cut(a['SepalLength'], bins)\n",
1252 | "a.head()"
1253 | ]
1254 | },
1255 | {
1256 | "cell_type": "code",
1257 | "execution_count": 93,
1258 | "metadata": {},
1259 | "outputs": [
1260 | {
1261 | "data": {
1262 | "text/plain": [
1263 | "(5, 6] 57\n",
1264 | "(6, 7] 49\n",
1265 | "(4, 5] 32\n",
1266 | "(3, 4] 0\n",
1267 | "(2, 3] 0\n",
1268 | "(1, 2] 0\n",
1269 | "(0, 1] 0\n",
1270 | "Name: bins, dtype: int64"
1271 | ]
1272 | },
1273 | "execution_count": 93,
1274 | "metadata": {},
1275 | "output_type": "execute_result"
1276 | }
1277 | ],
1278 | "source": [
1279 | "a['bins'].value_counts()"
1280 | ]
1281 | }
1282 | ],
1283 | "metadata": {
1284 | "kernelspec": {
1285 | "display_name": "Python 3",
1286 | "language": "python",
1287 | "name": "python3"
1288 | },
1289 | "language_info": {
1290 | "codemirror_mode": {
1291 | "name": "ipython",
1292 | "version": 3
1293 | },
1294 | "file_extension": ".py",
1295 | "mimetype": "text/x-python",
1296 | "name": "python",
1297 | "nbconvert_exporter": "python",
1298 | "pygments_lexer": "ipython3",
1299 | "version": "3.6.5"
1300 | }
1301 | },
1302 | "nbformat": 4,
1303 | "nbformat_minor": 2
1304 | }
1305 |
--------------------------------------------------------------------------------
/Python for Data Science - Part 1.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# 1. Python function"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 3,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "#multiply two numbers using a python function\n",
17 | "def multiply(x,y):\n",
18 | " z = x*y\n",
19 | " return z"
20 | ]
21 | },
22 | {
23 | "cell_type": "code",
24 | "execution_count": 4,
25 | "metadata": {},
26 | "outputs": [
27 | {
28 | "data": {
29 | "text/plain": [
30 | "6"
31 | ]
32 | },
33 | "execution_count": 4,
34 | "metadata": {},
35 | "output_type": "execute_result"
36 | }
37 | ],
38 | "source": [
39 | "multiply(2,3)"
40 | ]
41 | },
42 | {
43 | "cell_type": "markdown",
44 | "metadata": {},
45 | "source": [
46 | "# 2. Python types"
47 | ]
48 | },
49 | {
50 | "cell_type": "code",
51 | "execution_count": 6,
52 | "metadata": {},
53 | "outputs": [
54 | {
55 | "data": {
56 | "text/plain": [
57 | "str"
58 | ]
59 | },
60 | "execution_count": 6,
61 | "metadata": {},
62 | "output_type": "execute_result"
63 | }
64 | ],
65 | "source": [
66 | "type(' My name is Rohan')"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 7,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "data": {
76 | "text/plain": [
77 | "int"
78 | ]
79 | },
80 | "execution_count": 7,
81 | "metadata": {},
82 | "output_type": "execute_result"
83 | }
84 | ],
85 | "source": [
86 | "type(1)"
87 | ]
88 | },
89 | {
90 | "cell_type": "code",
91 | "execution_count": 8,
92 | "metadata": {},
93 | "outputs": [
94 | {
95 | "data": {
96 | "text/plain": [
97 | "float"
98 | ]
99 | },
100 | "execution_count": 8,
101 | "metadata": {},
102 | "output_type": "execute_result"
103 | }
104 | ],
105 | "source": [
106 | "type(1.0)"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": 10,
112 | "metadata": {},
113 | "outputs": [
114 | {
115 | "data": {
116 | "text/plain": [
117 | "NoneType"
118 | ]
119 | },
120 | "execution_count": 10,
121 | "metadata": {},
122 | "output_type": "execute_result"
123 | }
124 | ],
125 | "source": [
126 | "type(None)"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 15,
132 | "metadata": {},
133 | "outputs": [
134 | {
135 | "data": {
136 | "text/plain": [
137 | "function"
138 | ]
139 | },
140 | "execution_count": 15,
141 | "metadata": {},
142 | "output_type": "execute_result"
143 | }
144 | ],
145 | "source": [
146 | "type(multiply)"
147 | ]
148 | },
149 | {
150 | "cell_type": "markdown",
151 | "metadata": {},
152 | "source": [
153 | "# 3. Python sequences"
154 | ]
155 | },
156 | {
157 | "cell_type": "markdown",
158 | "metadata": {},
159 | "source": [
160 | "##### Tuples are an immutable data structure which cannot be altered (unlike lists)"
161 | ]
162 | },
163 | {
164 | "cell_type": "code",
165 | "execution_count": 23,
166 | "metadata": {},
167 | "outputs": [],
168 | "source": [
169 | "a = (1,2,3,4)"
170 | ]
171 | },
172 | {
173 | "cell_type": "code",
174 | "execution_count": 24,
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "data": {
179 | "text/plain": [
180 | "tuple"
181 | ]
182 | },
183 | "execution_count": 24,
184 | "metadata": {},
185 | "output_type": "execute_result"
186 | }
187 | ],
188 | "source": [
189 | "type(a)"
190 | ]
191 | },
192 | {
193 | "cell_type": "markdown",
194 | "metadata": {},
195 | "source": [
196 | "#### Lists are mutable objects"
197 | ]
198 | },
199 | {
200 | "cell_type": "code",
201 | "execution_count": 28,
202 | "metadata": {},
203 | "outputs": [],
204 | "source": [
205 | "b = [1,2,3,4]"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 29,
211 | "metadata": {},
212 | "outputs": [
213 | {
214 | "data": {
215 | "text/plain": [
216 | "list"
217 | ]
218 | },
219 | "execution_count": 29,
220 | "metadata": {},
221 | "output_type": "execute_result"
222 | }
223 | ],
224 | "source": [
225 | "type(b)"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 30,
231 | "metadata": {},
232 | "outputs": [
233 | {
234 | "name": "stdout",
235 | "output_type": "stream",
236 | "text": [
237 | "[1, 2, 3, 4, 2.2]\n"
238 | ]
239 | }
240 | ],
241 | "source": [
242 | "b.append(2.2) #append to list using this function\n",
243 | "print(b)"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 32,
249 | "metadata": {},
250 | "outputs": [
251 | {
252 | "name": "stdout",
253 | "output_type": "stream",
254 | "text": [
255 | "1\n",
256 | "2\n",
257 | "3\n",
258 | "4\n",
259 | "2.2\n"
260 | ]
261 | }
262 | ],
263 | "source": [
264 | "for number in b: #looping through list\n",
265 | " print(number)"
266 | ]
267 | },
268 | {
269 | "cell_type": "code",
270 | "execution_count": 34,
271 | "metadata": {},
272 | "outputs": [
273 | {
274 | "name": "stdout",
275 | "output_type": "stream",
276 | "text": [
277 | "1\n",
278 | "2\n",
279 | "3\n",
280 | "4\n",
281 | "2.2\n"
282 | ]
283 | }
284 | ],
285 | "source": [
286 | "for i in range(len(b)): #looping using index of a list\n",
287 | " print(b[i])"
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 38,
293 | "metadata": {},
294 | "outputs": [
295 | {
296 | "data": {
297 | "text/plain": [
298 | "[1, 2, 3, 1, 'abc', 'de']"
299 | ]
300 | },
301 | "execution_count": 38,
302 | "metadata": {},
303 | "output_type": "execute_result"
304 | }
305 | ],
306 | "source": [
307 | "[1,2,3] + [1,'abc','de'] #concatenate lists"
308 | ]
309 | },
310 | {
311 | "cell_type": "code",
312 | "execution_count": 39,
313 | "metadata": {},
314 | "outputs": [
315 | {
316 | "data": {
317 | "text/plain": [
318 | "[1, 2, 1, 2, 1, 2]"
319 | ]
320 | },
321 | "execution_count": 39,
322 | "metadata": {},
323 | "output_type": "execute_result"
324 | }
325 | ],
326 | "source": [
327 | "[1,2]*3 #repeat lists"
328 | ]
329 | },
330 | {
331 | "cell_type": "code",
332 | "execution_count": 40,
333 | "metadata": {},
334 | "outputs": [
335 | {
336 | "data": {
337 | "text/plain": [
338 | "True"
339 | ]
340 | },
341 | "execution_count": 40,
342 | "metadata": {},
343 | "output_type": "execute_result"
344 | }
345 | ],
346 | "source": [
347 | "3 in b #in operator to check if required object is in list"
348 | ]
349 | },
350 | {
351 | "cell_type": "markdown",
352 | "metadata": {},
353 | "source": [
354 | "#### Strings store character objects"
355 | ]
356 | },
357 | {
358 | "cell_type": "code",
359 | "execution_count": 42,
360 | "metadata": {},
361 | "outputs": [],
362 | "source": [
363 | "x = 'My name is Rohan'"
364 | ]
365 | },
366 | {
367 | "cell_type": "code",
368 | "execution_count": 43,
369 | "metadata": {},
370 | "outputs": [
371 | {
372 | "data": {
373 | "text/plain": [
374 | "'M'"
375 | ]
376 | },
377 | "execution_count": 43,
378 | "metadata": {},
379 | "output_type": "execute_result"
380 | }
381 | ],
382 | "source": [
383 | "x[0] #Access first letter"
384 | ]
385 | },
386 | {
387 | "cell_type": "code",
388 | "execution_count": 45,
389 | "metadata": {},
390 | "outputs": [
391 | {
392 | "data": {
393 | "text/plain": [
394 | "'M'"
395 | ]
396 | },
397 | "execution_count": 45,
398 | "metadata": {},
399 | "output_type": "execute_result"
400 | }
401 | ],
402 | "source": [
403 | "x[0:1] #Still accesses only first letter"
404 | ]
405 | },
406 | {
407 | "cell_type": "code",
408 | "execution_count": 47,
409 | "metadata": {},
410 | "outputs": [
411 | {
412 | "data": {
413 | "text/plain": [
414 | "'My'"
415 | ]
416 | },
417 | "execution_count": 47,
418 | "metadata": {},
419 | "output_type": "execute_result"
420 | }
421 | ],
422 | "source": [
423 | "x[0:2] #Accesses two letters"
424 | ]
425 | },
426 | {
427 | "cell_type": "code",
428 | "execution_count": 49,
429 | "metadata": {},
430 | "outputs": [
431 | {
432 | "data": {
433 | "text/plain": [
434 | "'My name is Roha'"
435 | ]
436 | },
437 | "execution_count": 49,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "x[:-1] #Accesses everything except last letter"
444 | ]
445 | },
446 | {
447 | "cell_type": "code",
448 | "execution_count": 56,
449 | "metadata": {},
450 | "outputs": [
451 | {
452 | "data": {
453 | "text/plain": [
454 | "' Rohan'"
455 | ]
456 | },
457 | "execution_count": 56,
458 | "metadata": {},
459 | "output_type": "execute_result"
460 | }
461 | ],
462 | "source": [
463 | "x[10:] #returns all the characters from 10th position till end"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": 64,
469 | "metadata": {},
470 | "outputs": [],
471 | "source": [
472 | "first = 'Rohan'\n",
473 | "last = 'Joseph'\n",
474 | "\n",
475 | "Name = first + ' ' + last #string concatenation"
476 | ]
477 | },
478 | {
479 | "cell_type": "code",
480 | "execution_count": 65,
481 | "metadata": {},
482 | "outputs": [
483 | {
484 | "name": "stdout",
485 | "output_type": "stream",
486 | "text": [
487 | "Rohan Joseph\n"
488 | ]
489 | }
490 | ],
491 | "source": [
492 | "print(Name)"
493 | ]
494 | },
495 | {
496 | "cell_type": "code",
497 | "execution_count": 69,
498 | "metadata": {},
499 | "outputs": [
500 | {
501 | "data": {
502 | "text/plain": [
503 | "['Rohan', 'Joseph']"
504 | ]
505 | },
506 | "execution_count": 69,
507 | "metadata": {},
508 | "output_type": "execute_result"
509 | }
510 | ],
511 | "source": [
512 | "Name.split(' ') #split the words in a string using split function"
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 74,
518 | "metadata": {},
519 | "outputs": [
520 | {
521 | "data": {
522 | "text/plain": [
523 | "'Rohan'"
524 | ]
525 | },
526 | "execution_count": 74,
527 | "metadata": {},
528 | "output_type": "execute_result"
529 | }
530 | ],
531 | "source": [
532 | "Name.split(' ')[0] #Show the first word"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": 75,
538 | "metadata": {},
539 | "outputs": [
540 | {
541 | "data": {
542 | "text/plain": [
543 | "'Joseph'"
544 | ]
545 | },
546 | "execution_count": 75,
547 | "metadata": {},
548 | "output_type": "execute_result"
549 | }
550 | ],
551 | "source": [
552 | "Name.split(' ')[1] #Show the second word"
553 | ]
554 | },
555 | {
556 | "cell_type": "code",
557 | "execution_count": 76,
558 | "metadata": {},
559 | "outputs": [
560 | {
561 | "data": {
562 | "text/plain": [
563 | "'Joseph'"
564 | ]
565 | },
566 | "execution_count": 76,
567 | "metadata": {},
568 | "output_type": "execute_result"
569 | }
570 | ],
571 | "source": [
572 | "Name.split(' ')[-1] #Last word from the end"
573 | ]
574 | },
575 | {
576 | "cell_type": "code",
577 | "execution_count": 79,
578 | "metadata": {},
579 | "outputs": [
580 | {
581 | "data": {
582 | "text/plain": [
583 | "'Rohan2'"
584 | ]
585 | },
586 | "execution_count": 79,
587 | "metadata": {},
588 | "output_type": "execute_result"
589 | }
590 | ],
591 | "source": [
592 | "#for concatenation convert objects to strings\n",
593 | "'Rohan' + str(2)"
594 | ]
595 | },
596 | {
597 | "cell_type": "markdown",
598 | "metadata": {},
599 | "source": [
600 | "#### Dictionaries link keys and values"
601 | ]
602 | },
603 | {
604 | "cell_type": "code",
605 | "execution_count": 16,
606 | "metadata": {},
607 | "outputs": [],
608 | "source": [
609 | "c = {\"Name\" : \"Rohan\", \"Height\" : 176}"
610 | ]
611 | },
612 | {
613 | "cell_type": "code",
614 | "execution_count": 82,
615 | "metadata": {},
616 | "outputs": [
617 | {
618 | "data": {
619 | "text/plain": [
620 | "dict"
621 | ]
622 | },
623 | "execution_count": 82,
624 | "metadata": {},
625 | "output_type": "execute_result"
626 | }
627 | ],
628 | "source": [
629 | "type(c)"
630 | ]
631 | },
632 | {
633 | "cell_type": "code",
634 | "execution_count": 85,
635 | "metadata": {},
636 | "outputs": [
637 | {
638 | "name": "stdout",
639 | "output_type": "stream",
640 | "text": [
641 | "{'Name': 'Rohan', 'Height': 176}\n"
642 | ]
643 | }
644 | ],
645 | "source": [
646 | "print(c)"
647 | ]
648 | },
649 | {
650 | "cell_type": "code",
651 | "execution_count": 84,
652 | "metadata": {},
653 | "outputs": [
654 | {
655 | "data": {
656 | "text/plain": [
657 | "'Rohan'"
658 | ]
659 | },
660 | "execution_count": 84,
661 | "metadata": {},
662 | "output_type": "execute_result"
663 | }
664 | ],
665 | "source": [
666 | "c['Name'] #Access Name"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": 86,
672 | "metadata": {},
673 | "outputs": [
674 | {
675 | "data": {
676 | "text/plain": [
677 | "176"
678 | ]
679 | },
680 | "execution_count": 86,
681 | "metadata": {},
682 | "output_type": "execute_result"
683 | }
684 | ],
685 | "source": [
686 | "c['Height']"
687 | ]
688 | },
689 | {
690 | "cell_type": "code",
691 | "execution_count": 87,
692 | "metadata": {},
693 | "outputs": [
694 | {
695 | "name": "stdout",
696 | "output_type": "stream",
697 | "text": [
698 | "Name\n",
699 | "Height\n"
700 | ]
701 | }
702 | ],
703 | "source": [
704 | "#print all the keys\n",
705 | "for i in c:\n",
706 | " print(i)"
707 | ]
708 | },
709 | {
710 | "cell_type": "code",
711 | "execution_count": 92,
712 | "metadata": {},
713 | "outputs": [
714 | {
715 | "name": "stdout",
716 | "output_type": "stream",
717 | "text": [
718 | "Rohan\n",
719 | "176\n"
720 | ]
721 | }
722 | ],
723 | "source": [
724 | "#print all the values\n",
725 | "for i in c.values():\n",
726 | " print(i)"
727 | ]
728 | },
729 | {
730 | "cell_type": "code",
731 | "execution_count": 93,
732 | "metadata": {},
733 | "outputs": [
734 | {
735 | "name": "stdout",
736 | "output_type": "stream",
737 | "text": [
738 | "Name\n",
739 | "Rohan\n",
740 | "Height\n",
741 | "176\n"
742 | ]
743 | }
744 | ],
745 | "source": [
746 | "#iterate over all the items\n",
747 | "for name, height in c.items():\n",
748 | " print(name)\n",
749 | " print(height)"
750 | ]
751 | },
752 | {
753 | "cell_type": "code",
754 | "execution_count": 99,
755 | "metadata": {},
756 | "outputs": [
757 | {
758 | "name": "stdout",
759 | "output_type": "stream",
760 | "text": [
761 | "abc\n",
762 | "def\n"
763 | ]
764 | }
765 | ],
766 | "source": [
767 | "#Unpacking a list into different variables\n",
768 | "a,b = ('abc','def')\n",
769 | "print(a)\n",
770 | "print(b)"
771 | ]
772 | },
773 | {
774 | "cell_type": "markdown",
775 | "metadata": {},
776 | "source": [
777 | "# 4. Python dates and times"
778 | ]
779 | },
780 | {
781 | "cell_type": "code",
782 | "execution_count": 100,
783 | "metadata": {},
784 | "outputs": [],
785 | "source": [
786 | "import datetime as dt\n",
787 | "import time as tm"
788 | ]
789 | },
790 | {
791 | "cell_type": "code",
792 | "execution_count": 103,
793 | "metadata": {},
794 | "outputs": [
795 | {
796 | "data": {
797 | "text/plain": [
798 | "1532483980.5827992"
799 | ]
800 | },
801 | "execution_count": 103,
802 | "metadata": {},
803 | "output_type": "execute_result"
804 | }
805 | ],
806 | "source": [
807 | "tm.time() #print current time in seconds from January 1, 1970"
808 | ]
809 | },
810 | {
811 | "cell_type": "code",
812 | "execution_count": 109,
813 | "metadata": {},
814 | "outputs": [],
815 | "source": [
816 | "dtnow = dt.datetime.fromtimestamp(tm.time()) #convert timestamp to datetime"
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": 111,
822 | "metadata": {},
823 | "outputs": [
824 | {
825 | "data": {
826 | "text/plain": [
827 | "2018"
828 | ]
829 | },
830 | "execution_count": 111,
831 | "metadata": {},
832 | "output_type": "execute_result"
833 | }
834 | ],
835 | "source": [
836 | "dtnow.year"
837 | ]
838 | },
839 | {
840 | "cell_type": "code",
841 | "execution_count": 112,
842 | "metadata": {},
843 | "outputs": [
844 | {
845 | "data": {
846 | "text/plain": [
847 | "7"
848 | ]
849 | },
850 | "execution_count": 112,
851 | "metadata": {},
852 | "output_type": "execute_result"
853 | }
854 | ],
855 | "source": [
856 | "dtnow.month"
857 | ]
858 | },
859 | {
860 | "cell_type": "code",
861 | "execution_count": 119,
862 | "metadata": {},
863 | "outputs": [],
864 | "source": [
865 | "delta = dt.timedelta(days=100)"
866 | ]
867 | },
868 | {
869 | "cell_type": "code",
870 | "execution_count": 115,
871 | "metadata": {},
872 | "outputs": [],
873 | "source": [
874 | "today = dt.date.today()"
875 | ]
876 | },
877 | {
878 | "cell_type": "code",
879 | "execution_count": 116,
880 | "metadata": {},
881 | "outputs": [
882 | {
883 | "data": {
884 | "text/plain": [
885 | "datetime.date(2018, 7, 25)"
886 | ]
887 | },
888 | "execution_count": 116,
889 | "metadata": {},
890 | "output_type": "execute_result"
891 | }
892 | ],
893 | "source": [
894 | "today"
895 | ]
896 | },
897 | {
898 | "cell_type": "code",
899 | "execution_count": 120,
900 | "metadata": {},
901 | "outputs": [
902 | {
903 | "data": {
904 | "text/plain": [
905 | "datetime.date(2018, 4, 16)"
906 | ]
907 | },
908 | "execution_count": 120,
909 | "metadata": {},
910 | "output_type": "execute_result"
911 | }
912 | ],
913 | "source": [
914 | "today - delta #subtract 100 days from today's date"
915 | ]
916 | },
917 | {
918 | "cell_type": "markdown",
919 | "metadata": {},
920 | "source": [
921 | "# 5. Map function"
922 | ]
923 | },
924 | {
925 | "cell_type": "code",
926 | "execution_count": 135,
927 | "metadata": {},
928 | "outputs": [],
929 | "source": [
930 | "a = [1,2,3,10]\n",
931 | "b = [5,6,2,9]\n",
932 | "\n",
933 | "c = map(min,a,b) #Find the minimum between two pairs of lists\n"
934 | ]
935 | },
936 | {
937 | "cell_type": "code",
938 | "execution_count": 136,
939 | "metadata": {},
940 | "outputs": [
941 | {
942 | "name": "stdout",
943 | "output_type": "stream",
944 | "text": [
945 | "1\n",
946 | "2\n",
947 | "2\n",
948 | "9\n"
949 | ]
950 | }
951 | ],
952 | "source": [
953 | "for item in c:\n",
954 | " print(item) #print the minimum of the pairs"
955 | ]
956 | },
957 | {
958 | "cell_type": "markdown",
959 | "metadata": {},
960 | "source": [
961 | "# 6. Lambda function"
962 | ]
963 | },
964 | {
965 | "cell_type": "code",
966 | "execution_count": 2,
967 | "metadata": {},
968 | "outputs": [],
969 | "source": [
970 | "function = lambda a,b,c : a+b+c #function to add three numbers"
971 | ]
972 | },
973 | {
974 | "cell_type": "code",
975 | "execution_count": 3,
976 | "metadata": {},
977 | "outputs": [
978 | {
979 | "data": {
980 | "text/plain": [
981 | "7"
982 | ]
983 | },
984 | "execution_count": 3,
985 | "metadata": {},
986 | "output_type": "execute_result"
987 | }
988 | ],
989 | "source": [
990 | "function(2,2,3)"
991 | ]
992 | },
993 | {
994 | "cell_type": "markdown",
995 | "metadata": {},
996 | "source": [
997 | "# 7. Filter function"
998 | ]
999 | },
1000 | {
1001 | "cell_type": "code",
1002 | "execution_count": 5,
1003 | "metadata": {},
1004 | "outputs": [],
1005 | "source": [
1006 | "x = [1,2,3,4,5,6,7,8,9] #create a list\n",
1007 | "x2 = filter(lambda a : a>5, x) #filter using filter function"
1008 | ]
1009 | },
1010 | {
1011 | "cell_type": "code",
1012 | "execution_count": 8,
1013 | "metadata": {},
1014 | "outputs": [
1015 | {
1016 | "name": "stdout",
1017 | "output_type": "stream",
1018 | "text": [
1019 | "[6, 7, 8, 9]\n"
1020 | ]
1021 | }
1022 | ],
1023 | "source": [
1024 | "print(list(x2))"
1025 | ]
1026 | },
1027 | {
1028 | "cell_type": "markdown",
1029 | "metadata": {},
1030 | "source": [
1031 | "# 8. Reduce funtion"
1032 | ]
1033 | },
1034 | {
1035 | "cell_type": "code",
1036 | "execution_count": 10,
1037 | "metadata": {},
1038 | "outputs": [
1039 | {
1040 | "data": {
1041 | "text/plain": [
1042 | "120"
1043 | ]
1044 | },
1045 | "execution_count": 10,
1046 | "metadata": {},
1047 | "output_type": "execute_result"
1048 | }
1049 | ],
1050 | "source": [
1051 | "from functools import reduce #import reduce function\n",
1052 | "y = [1,2,3,4,5] #create list\n",
1053 | "reduce(lambda a,b : a*b,y) #use reduce"
1054 | ]
1055 | },
1056 | {
1057 | "cell_type": "markdown",
1058 | "metadata": {},
1059 | "source": [
1060 | "# 9. Zip function"
1061 | ]
1062 | },
1063 | {
1064 | "cell_type": "code",
1065 | "execution_count": 14,
1066 | "metadata": {},
1067 | "outputs": [
1068 | {
1069 | "name": "stdout",
1070 | "output_type": "stream",
1071 | "text": [
1072 | "[(1, 5), (2, 6), (3, 7), (4, 8)]\n"
1073 | ]
1074 | }
1075 | ],
1076 | "source": [
1077 | "a = [1,2,3,4] #create two lists\n",
1078 | "b = [5,6,7,8]\n",
1079 | "c = zip(a,b) #Use the zip function\n",
1080 | "print(list(c))"
1081 | ]
1082 | },
1083 | {
1084 | "cell_type": "markdown",
1085 | "metadata": {},
1086 | "source": [
1087 | "# 10. List comprehension"
1088 | ]
1089 | },
1090 | {
1091 | "cell_type": "code",
1092 | "execution_count": 148,
1093 | "metadata": {},
1094 | "outputs": [],
1095 | "source": [
1096 | "#return even numbers from 1 to 100\n",
1097 | "\n",
1098 | "even=[]\n",
1099 | "for i in range(100):\n",
1100 | " if i%2 ==0:\n",
1101 | " even.append(i)\n",
1102 | " else:\n",
1103 | " None\n",
1104 | " "
1105 | ]
1106 | },
1107 | {
1108 | "cell_type": "code",
1109 | "execution_count": 149,
1110 | "metadata": {},
1111 | "outputs": [
1112 | {
1113 | "name": "stdout",
1114 | "output_type": "stream",
1115 | "text": [
1116 | "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]\n"
1117 | ]
1118 | }
1119 | ],
1120 | "source": [
1121 | "print(even)"
1122 | ]
1123 | },
1124 | {
1125 | "cell_type": "code",
1126 | "execution_count": 154,
1127 | "metadata": {},
1128 | "outputs": [
1129 | {
1130 | "name": "stdout",
1131 | "output_type": "stream",
1132 | "text": [
1133 | "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]\n"
1134 | ]
1135 | }
1136 | ],
1137 | "source": [
1138 | "#Let's do the same using list comprehension\n",
1139 | "\n",
1140 | "even = [i for i in range(100) if i%2==0]\n",
1141 | "print(even)"
1142 | ]
1143 | },
1144 | {
1145 | "cell_type": "markdown",
1146 | "metadata": {},
1147 | "source": [
1148 | "# 11. Numpy"
1149 | ]
1150 | },
1151 | {
1152 | "cell_type": "code",
1153 | "execution_count": 155,
1154 | "metadata": {},
1155 | "outputs": [],
1156 | "source": [
1157 | "import numpy as np #import"
1158 | ]
1159 | },
1160 | {
1161 | "cell_type": "code",
1162 | "execution_count": 157,
1163 | "metadata": {},
1164 | "outputs": [
1165 | {
1166 | "data": {
1167 | "text/plain": [
1168 | "array([1, 2, 3])"
1169 | ]
1170 | },
1171 | "execution_count": 157,
1172 | "metadata": {},
1173 | "output_type": "execute_result"
1174 | }
1175 | ],
1176 | "source": [
1177 | "#create numpy array\n",
1178 | "np.array([1,2,3])"
1179 | ]
1180 | },
1181 | {
1182 | "cell_type": "code",
1183 | "execution_count": 160,
1184 | "metadata": {},
1185 | "outputs": [
1186 | {
1187 | "data": {
1188 | "text/plain": [
1189 | "array([[1, 2, 3],\n",
1190 | " [4, 5, 6]])"
1191 | ]
1192 | },
1193 | "execution_count": 160,
1194 | "metadata": {},
1195 | "output_type": "execute_result"
1196 | }
1197 | ],
1198 | "source": [
1199 | "a = np.array([[1,2,3],[4,5,6]]) #create multi dimensional array\n",
1200 | "a"
1201 | ]
1202 | },
1203 | {
1204 | "cell_type": "code",
1205 | "execution_count": 161,
1206 | "metadata": {},
1207 | "outputs": [
1208 | {
1209 | "data": {
1210 | "text/plain": [
1211 | "(2, 3)"
1212 | ]
1213 | },
1214 | "execution_count": 161,
1215 | "metadata": {},
1216 | "output_type": "execute_result"
1217 | }
1218 | ],
1219 | "source": [
1220 | "#shape of the array\n",
1221 | "a.shape"
1222 | ]
1223 | },
1224 | {
1225 | "cell_type": "code",
1226 | "execution_count": 163,
1227 | "metadata": {},
1228 | "outputs": [
1229 | {
1230 | "data": {
1231 | "text/plain": [
1232 | "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29])"
1233 | ]
1234 | },
1235 | "execution_count": 163,
1236 | "metadata": {},
1237 | "output_type": "execute_result"
1238 | }
1239 | ],
1240 | "source": [
1241 | "#evenly spaced values between the interval\n",
1242 | "b = np.arange(1,30,2)\n",
1243 | "b"
1244 | ]
1245 | },
1246 | {
1247 | "cell_type": "code",
1248 | "execution_count": 170,
1249 | "metadata": {},
1250 | "outputs": [
1251 | {
1252 | "data": {
1253 | "text/plain": [
1254 | "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29])"
1255 | ]
1256 | },
1257 | "execution_count": 170,
1258 | "metadata": {},
1259 | "output_type": "execute_result"
1260 | }
1261 | ],
1262 | "source": [
1263 | "#create new shape of data\n",
1264 | "b.reshape(5,3)"
1265 | ]
1266 | },
1267 | {
1268 | "cell_type": "code",
1269 | "execution_count": 174,
1270 | "metadata": {},
1271 | "outputs": [
1272 | {
1273 | "data": {
1274 | "text/plain": [
1275 | "array([ 1. , 1.21052632, 1.42105263, 1.63157895, 1.84210526,\n",
1276 | " 2.05263158, 2.26315789, 2.47368421, 2.68421053, 2.89473684,\n",
1277 | " 3.10526316, 3.31578947, 3.52631579, 3.73684211, 3.94736842,\n",
1278 | " 4.15789474, 4.36842105, 4.57894737, 4.78947368, 5. ])"
1279 | ]
1280 | },
1281 | "execution_count": 174,
1282 | "metadata": {},
1283 | "output_type": "execute_result"
1284 | }
1285 | ],
1286 | "source": [
1287 | "#get evenly spaced numbers between specified interval\n",
1288 | "c = np.linspace(1,5,20)\n",
1289 | "c"
1290 | ]
1291 | },
1292 | {
1293 | "cell_type": "code",
1294 | "execution_count": 175,
1295 | "metadata": {},
1296 | "outputs": [
1297 | {
1298 | "data": {
1299 | "text/plain": [
1300 | "(20,)"
1301 | ]
1302 | },
1303 | "execution_count": 175,
1304 | "metadata": {},
1305 | "output_type": "execute_result"
1306 | }
1307 | ],
1308 | "source": [
1309 | "c.shape"
1310 | ]
1311 | },
1312 | {
1313 | "cell_type": "code",
1314 | "execution_count": 176,
1315 | "metadata": {},
1316 | "outputs": [
1317 | {
1318 | "data": {
1319 | "text/plain": [
1320 | "array([[ 1. , 1.21052632, 1.42105263, 1.63157895],\n",
1321 | " [ 1.84210526, 2.05263158, 2.26315789, 2.47368421],\n",
1322 | " [ 2.68421053, 2.89473684, 3.10526316, 3.31578947],\n",
1323 | " [ 3.52631579, 3.73684211, 3.94736842, 4.15789474],\n",
1324 | " [ 4.36842105, 4.57894737, 4.78947368, 5. ]])"
1325 | ]
1326 | },
1327 | "execution_count": 176,
1328 | "metadata": {},
1329 | "output_type": "execute_result"
1330 | }
1331 | ],
1332 | "source": [
1333 | "#change shape of array in-place\n",
1334 | "c.resize(5,4)\n",
1335 | "c"
1336 | ]
1337 | },
1338 | {
1339 | "cell_type": "code",
1340 | "execution_count": 177,
1341 | "metadata": {},
1342 | "outputs": [
1343 | {
1344 | "data": {
1345 | "text/plain": [
1346 | "(5, 4)"
1347 | ]
1348 | },
1349 | "execution_count": 177,
1350 | "metadata": {},
1351 | "output_type": "execute_result"
1352 | }
1353 | ],
1354 | "source": [
1355 | "c.shape"
1356 | ]
1357 | },
1358 | {
1359 | "cell_type": "code",
1360 | "execution_count": 181,
1361 | "metadata": {},
1362 | "outputs": [
1363 | {
1364 | "data": {
1365 | "text/plain": [
1366 | "array([[ 1., 1.],\n",
1367 | " [ 1., 1.]])"
1368 | ]
1369 | },
1370 | "execution_count": 181,
1371 | "metadata": {},
1372 | "output_type": "execute_result"
1373 | }
1374 | ],
1375 | "source": [
1376 | "#create array filled with ones\n",
1377 | "d = np.ones((2,2))\n",
1378 | "d"
1379 | ]
1380 | },
1381 | {
1382 | "cell_type": "code",
1383 | "execution_count": 185,
1384 | "metadata": {},
1385 | "outputs": [
1386 | {
1387 | "data": {
1388 | "text/plain": [
1389 | "array([[ 0., 0., 0.],\n",
1390 | " [ 0., 0., 0.],\n",
1391 | " [ 0., 0., 0.]])"
1392 | ]
1393 | },
1394 | "execution_count": 185,
1395 | "metadata": {},
1396 | "output_type": "execute_result"
1397 | }
1398 | ],
1399 | "source": [
1400 | "#create array filled with zeros\n",
1401 | "e = np.zeros((3,3))\n",
1402 | "e"
1403 | ]
1404 | },
1405 | {
1406 | "cell_type": "code",
1407 | "execution_count": 189,
1408 | "metadata": {},
1409 | "outputs": [
1410 | {
1411 | "data": {
1412 | "text/plain": [
1413 | "array([[ 1., 0.],\n",
1414 | " [ 0., 1.]])"
1415 | ]
1416 | },
1417 | "execution_count": 189,
1418 | "metadata": {},
1419 | "output_type": "execute_result"
1420 | }
1421 | ],
1422 | "source": [
1423 | "#create diagonal matrix with diagonal values =1\n",
1424 | "f = np.eye(2)\n",
1425 | "f"
1426 | ]
1427 | },
1428 | {
1429 | "cell_type": "code",
1430 | "execution_count": 191,
1431 | "metadata": {},
1432 | "outputs": [
1433 | {
1434 | "data": {
1435 | "text/plain": [
1436 | "array([ 1., 1.])"
1437 | ]
1438 | },
1439 | "execution_count": 191,
1440 | "metadata": {},
1441 | "output_type": "execute_result"
1442 | }
1443 | ],
1444 | "source": [
1445 | "#extract only the diagonal values from array\n",
1446 | "np.diag(f)"
1447 | ]
1448 | },
1449 | {
1450 | "cell_type": "code",
1451 | "execution_count": 194,
1452 | "metadata": {},
1453 | "outputs": [
1454 | {
1455 | "data": {
1456 | "text/plain": [
1457 | "array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])"
1458 | ]
1459 | },
1460 | "execution_count": 194,
1461 | "metadata": {},
1462 | "output_type": "execute_result"
1463 | }
1464 | ],
1465 | "source": [
1466 | "#create array using repeating list\n",
1467 | "g = np.array([1,2,3]*5)\n",
1468 | "g"
1469 | ]
1470 | },
1471 | {
1472 | "cell_type": "code",
1473 | "execution_count": 196,
1474 | "metadata": {},
1475 | "outputs": [
1476 | {
1477 | "data": {
1478 | "text/plain": [
1479 | "array([1, 1, 1, 2, 2, 2, 3, 3, 3])"
1480 | ]
1481 | },
1482 | "execution_count": 196,
1483 | "metadata": {},
1484 | "output_type": "execute_result"
1485 | }
1486 | ],
1487 | "source": [
1488 | "#repeat elements using repeat\n",
1489 | "np.repeat([1,2,3],3)"
1490 | ]
1491 | },
1492 | {
1493 | "cell_type": "code",
1494 | "execution_count": 215,
1495 | "metadata": {},
1496 | "outputs": [],
1497 | "source": [
1498 | "h = np.ones((2,3))\n",
1499 | "g = np.random.rand(2,3) #this generates a random array"
1500 | ]
1501 | },
1502 | {
1503 | "cell_type": "code",
1504 | "execution_count": 221,
1505 | "metadata": {},
1506 | "outputs": [
1507 | {
1508 | "data": {
1509 | "text/plain": [
1510 | "array([[ 1. , 1. , 1. ],\n",
1511 | " [ 1. , 1. , 1. ],\n",
1512 | " [ 0.50725359, 0.28610842, 0.98268379],\n",
1513 | " [ 0.52425524, 0.23682556, 0.21598467]])"
1514 | ]
1515 | },
1516 | "execution_count": 221,
1517 | "metadata": {},
1518 | "output_type": "execute_result"
1519 | }
1520 | ],
1521 | "source": [
1522 | "#stack the above two arrays vertically\n",
1523 | "i = np.vstack([h,g])\n",
1524 | "i"
1525 | ]
1526 | },
1527 | {
1528 | "cell_type": "code",
1529 | "execution_count": 222,
1530 | "metadata": {},
1531 | "outputs": [
1532 | {
1533 | "data": {
1534 | "text/plain": [
1535 | "(4, 3)"
1536 | ]
1537 | },
1538 | "execution_count": 222,
1539 | "metadata": {},
1540 | "output_type": "execute_result"
1541 | }
1542 | ],
1543 | "source": [
1544 | "i.shape"
1545 | ]
1546 | },
1547 | {
1548 | "cell_type": "code",
1549 | "execution_count": 224,
1550 | "metadata": {},
1551 | "outputs": [
1552 | {
1553 | "data": {
1554 | "text/plain": [
1555 | "array([[ 1. , 1. , 1. , 0.50725359, 0.28610842,\n",
1556 | " 0.98268379],\n",
1557 | " [ 1. , 1. , 1. , 0.52425524, 0.23682556,\n",
1558 | " 0.21598467]])"
1559 | ]
1560 | },
1561 | "execution_count": 224,
1562 | "metadata": {},
1563 | "output_type": "execute_result"
1564 | }
1565 | ],
1566 | "source": [
1567 | "#Now, stack them horizontally\n",
1568 | "j = np.hstack([h,g])\n",
1569 | "j"
1570 | ]
1571 | },
1572 | {
1573 | "cell_type": "code",
1574 | "execution_count": 225,
1575 | "metadata": {},
1576 | "outputs": [
1577 | {
1578 | "data": {
1579 | "text/plain": [
1580 | "(2, 6)"
1581 | ]
1582 | },
1583 | "execution_count": 225,
1584 | "metadata": {},
1585 | "output_type": "execute_result"
1586 | }
1587 | ],
1588 | "source": [
1589 | "j.shape"
1590 | ]
1591 | },
1592 | {
1593 | "cell_type": "code",
1594 | "execution_count": 228,
1595 | "metadata": {},
1596 | "outputs": [
1597 | {
1598 | "name": "stdout",
1599 | "output_type": "stream",
1600 | "text": [
1601 | "[[ 0.42443005 0.28611242]\n",
1602 | " [ 0.65396499 0.57360354]]\n",
1603 | "[[ 0.90377716 0.32249125]\n",
1604 | " [ 0.59825087 0.13558027]]\n"
1605 | ]
1606 | }
1607 | ],
1608 | "source": [
1609 | "#Array operations\n",
1610 | "k = np.random.rand(2,2)\n",
1611 | "l = np.random.rand(2,2)\n",
1612 | "print(k)\n",
1613 | "#print(l)"
1614 | ]
1615 | },
1616 | {
1617 | "cell_type": "code",
1618 | "execution_count": 230,
1619 | "metadata": {},
1620 | "outputs": [
1621 | {
1622 | "data": {
1623 | "text/plain": [
1624 | "array([[ 1.32820721, 0.60860367],\n",
1625 | " [ 1.25221586, 0.70918381]])"
1626 | ]
1627 | },
1628 | "execution_count": 230,
1629 | "metadata": {},
1630 | "output_type": "execute_result"
1631 | }
1632 | ],
1633 | "source": [
1634 | "#element wise addition\n",
1635 | "m = k + l\n",
1636 | "m"
1637 | ]
1638 | },
1639 | {
1640 | "cell_type": "code",
1641 | "execution_count": 232,
1642 | "metadata": {},
1643 | "outputs": [
1644 | {
1645 | "data": {
1646 | "text/plain": [
1647 | "array([[-0.47934711, -0.03637883],\n",
1648 | " [ 0.05571412, 0.43802327]])"
1649 | ]
1650 | },
1651 | "execution_count": 232,
1652 | "metadata": {},
1653 | "output_type": "execute_result"
1654 | }
1655 | ],
1656 | "source": [
1657 | "#element wise subtraction\n",
1658 | "n = k-l\n",
1659 | "n"
1660 | ]
1661 | },
1662 | {
1663 | "cell_type": "code",
1664 | "execution_count": 234,
1665 | "metadata": {},
1666 | "outputs": [
1667 | {
1668 | "data": {
1669 | "text/plain": [
1670 | "array([[ 0.38359018, 0.09226875],\n",
1671 | " [ 0.39123512, 0.07776932]])"
1672 | ]
1673 | },
1674 | "execution_count": 234,
1675 | "metadata": {},
1676 | "output_type": "execute_result"
1677 | }
1678 | ],
1679 | "source": [
1680 | "#element wise multiplication\n",
1681 | "o = k*l\n",
1682 | "o"
1683 | ]
1684 | },
1685 | {
1686 | "cell_type": "code",
1687 | "execution_count": 236,
1688 | "metadata": {},
1689 | "outputs": [
1690 | {
1691 | "data": {
1692 | "text/plain": [
1693 | "array([[ 0.18014086, 0.08186032],\n",
1694 | " [ 0.42767021, 0.32902102]])"
1695 | ]
1696 | },
1697 | "execution_count": 236,
1698 | "metadata": {},
1699 | "output_type": "execute_result"
1700 | }
1701 | ],
1702 | "source": [
1703 | "#element wise power\n",
1704 | "p = k**2\n",
1705 | "p"
1706 | ]
1707 | },
1708 | {
1709 | "cell_type": "code",
1710 | "execution_count": 238,
1711 | "metadata": {},
1712 | "outputs": [
1713 | {
1714 | "data": {
1715 | "text/plain": [
1716 | "array([[ 0.55475719, 0.17566617],\n",
1717 | " [ 0.93419744, 0.28866731]])"
1718 | ]
1719 | },
1720 | "execution_count": 238,
1721 | "metadata": {},
1722 | "output_type": "execute_result"
1723 | }
1724 | ],
1725 | "source": [
1726 | "#dot product\n",
1727 | "q = k.dot(l)\n",
1728 | "q"
1729 | ]
1730 | },
1731 | {
1732 | "cell_type": "code",
1733 | "execution_count": 241,
1734 | "metadata": {},
1735 | "outputs": [
1736 | {
1737 | "data": {
1738 | "text/plain": [
1739 | "array([[1, 2],\n",
1740 | " [3, 4]])"
1741 | ]
1742 | },
1743 | "execution_count": 241,
1744 | "metadata": {},
1745 | "output_type": "execute_result"
1746 | }
1747 | ],
1748 | "source": [
1749 | "#transpose arrays\n",
1750 | "a = np.array([[1,2],[3,4]])\n",
1751 | "a"
1752 | ]
1753 | },
1754 | {
1755 | "cell_type": "code",
1756 | "execution_count": 242,
1757 | "metadata": {},
1758 | "outputs": [
1759 | {
1760 | "data": {
1761 | "text/plain": [
1762 | "array([[1, 3],\n",
1763 | " [2, 4]])"
1764 | ]
1765 | },
1766 | "execution_count": 242,
1767 | "metadata": {},
1768 | "output_type": "execute_result"
1769 | }
1770 | ],
1771 | "source": [
1772 | "a.T #transpose"
1773 | ]
1774 | },
1775 | {
1776 | "cell_type": "code",
1777 | "execution_count": 243,
1778 | "metadata": {},
1779 | "outputs": [
1780 | {
1781 | "data": {
1782 | "text/plain": [
1783 | "dtype('int32')"
1784 | ]
1785 | },
1786 | "execution_count": 243,
1787 | "metadata": {},
1788 | "output_type": "execute_result"
1789 | }
1790 | ],
1791 | "source": [
1792 | "#check datatype of elements in array\n",
1793 | "a.dtype"
1794 | ]
1795 | },
1796 | {
1797 | "cell_type": "code",
1798 | "execution_count": 245,
1799 | "metadata": {},
1800 | "outputs": [
1801 | {
1802 | "data": {
1803 | "text/plain": [
1804 | "dtype('float32')"
1805 | ]
1806 | },
1807 | "execution_count": 245,
1808 | "metadata": {},
1809 | "output_type": "execute_result"
1810 | }
1811 | ],
1812 | "source": [
1813 | "#change type using astype\n",
1814 | "b = a.astype('f')\n",
1815 | "b.dtype"
1816 | ]
1817 | },
1818 | {
1819 | "cell_type": "code",
1820 | "execution_count": 246,
1821 | "metadata": {},
1822 | "outputs": [],
1823 | "source": [
1824 | "#Math functions in numpy\n",
1825 | "c = np.array([1,2,3,4,5])"
1826 | ]
1827 | },
1828 | {
1829 | "cell_type": "code",
1830 | "execution_count": 248,
1831 | "metadata": {},
1832 | "outputs": [
1833 | {
1834 | "data": {
1835 | "text/plain": [
1836 | "15"
1837 | ]
1838 | },
1839 | "execution_count": 248,
1840 | "metadata": {},
1841 | "output_type": "execute_result"
1842 | }
1843 | ],
1844 | "source": [
1845 | "c.sum()"
1846 | ]
1847 | },
1848 | {
1849 | "cell_type": "code",
1850 | "execution_count": 249,
1851 | "metadata": {},
1852 | "outputs": [
1853 | {
1854 | "data": {
1855 | "text/plain": [
1856 | "5"
1857 | ]
1858 | },
1859 | "execution_count": 249,
1860 | "metadata": {},
1861 | "output_type": "execute_result"
1862 | }
1863 | ],
1864 | "source": [
1865 | "c.max()"
1866 | ]
1867 | },
1868 | {
1869 | "cell_type": "code",
1870 | "execution_count": 250,
1871 | "metadata": {},
1872 | "outputs": [
1873 | {
1874 | "data": {
1875 | "text/plain": [
1876 | "3.0"
1877 | ]
1878 | },
1879 | "execution_count": 250,
1880 | "metadata": {},
1881 | "output_type": "execute_result"
1882 | }
1883 | ],
1884 | "source": [
1885 | "c.mean()"
1886 | ]
1887 | },
1888 | {
1889 | "cell_type": "code",
1890 | "execution_count": 252,
1891 | "metadata": {},
1892 | "outputs": [
1893 | {
1894 | "data": {
1895 | "text/plain": [
1896 | "4"
1897 | ]
1898 | },
1899 | "execution_count": 252,
1900 | "metadata": {},
1901 | "output_type": "execute_result"
1902 | }
1903 | ],
1904 | "source": [
1905 | "#return index of maximum value\n",
1906 | "c.argmax()"
1907 | ]
1908 | },
1909 | {
1910 | "cell_type": "code",
1911 | "execution_count": 253,
1912 | "metadata": {},
1913 | "outputs": [
1914 | {
1915 | "data": {
1916 | "text/plain": [
1917 | "0"
1918 | ]
1919 | },
1920 | "execution_count": 253,
1921 | "metadata": {},
1922 | "output_type": "execute_result"
1923 | }
1924 | ],
1925 | "source": [
1926 | "#return index of minimum value\n",
1927 | "c.argmin()"
1928 | ]
1929 | },
1930 | {
1931 | "cell_type": "code",
1932 | "execution_count": 262,
1933 | "metadata": {},
1934 | "outputs": [
1935 | {
1936 | "data": {
1937 | "text/plain": [
1938 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81], dtype=int32)"
1939 | ]
1940 | },
1941 | "execution_count": 262,
1942 | "metadata": {},
1943 | "output_type": "execute_result"
1944 | }
1945 | ],
1946 | "source": [
1947 | "d = np.arange(10)**2\n",
1948 | "d"
1949 | ]
1950 | },
1951 | {
1952 | "cell_type": "code",
1953 | "execution_count": 263,
1954 | "metadata": {},
1955 | "outputs": [
1956 | {
1957 | "data": {
1958 | "text/plain": [
1959 | "4"
1960 | ]
1961 | },
1962 | "execution_count": 263,
1963 | "metadata": {},
1964 | "output_type": "execute_result"
1965 | }
1966 | ],
1967 | "source": [
1968 | "#use index for accessing values\n",
1969 | "d[2]"
1970 | ]
1971 | },
1972 | {
1973 | "cell_type": "code",
1974 | "execution_count": 264,
1975 | "metadata": {},
1976 | "outputs": [
1977 | {
1978 | "data": {
1979 | "text/plain": [
1980 | "0"
1981 | ]
1982 | },
1983 | "execution_count": 264,
1984 | "metadata": {},
1985 | "output_type": "execute_result"
1986 | }
1987 | ],
1988 | "source": [
1989 | "d[0]"
1990 | ]
1991 | },
1992 | {
1993 | "cell_type": "code",
1994 | "execution_count": 266,
1995 | "metadata": {},
1996 | "outputs": [
1997 | {
1998 | "data": {
1999 | "text/plain": [
2000 | "array([ 1, 4, 9, 16], dtype=int32)"
2001 | ]
2002 | },
2003 | "execution_count": 266,
2004 | "metadata": {},
2005 | "output_type": "execute_result"
2006 | }
2007 | ],
2008 | "source": [
2009 | "d[1:5]"
2010 | ]
2011 | },
2012 | {
2013 | "cell_type": "code",
2014 | "execution_count": 276,
2015 | "metadata": {},
2016 | "outputs": [
2017 | {
2018 | "data": {
2019 | "text/plain": [
2020 | "array([81], dtype=int32)"
2021 | ]
2022 | },
2023 | "execution_count": 276,
2024 | "metadata": {},
2025 | "output_type": "execute_result"
2026 | }
2027 | ],
2028 | "source": [
2029 | "#use negatives to count from back\n",
2030 | "d[-1:]"
2031 | ]
2032 | },
2033 | {
2034 | "cell_type": "code",
2035 | "execution_count": 280,
2036 | "metadata": {},
2037 | "outputs": [
2038 | {
2039 | "data": {
2040 | "text/plain": [
2041 | "array([ 1, 9, 25, 49, 81], dtype=int32)"
2042 | ]
2043 | },
2044 | "execution_count": 280,
2045 | "metadata": {},
2046 | "output_type": "execute_result"
2047 | }
2048 | ],
2049 | "source": [
2050 | "#use two :: to include step size\n",
2051 | "d[1:10:2] #d[start:stop:stepsize]"
2052 | ]
2053 | },
2054 | {
2055 | "cell_type": "code",
2056 | "execution_count": 282,
2057 | "metadata": {},
2058 | "outputs": [
2059 | {
2060 | "data": {
2061 | "text/plain": [
2062 | "array([[ 0, 1, 2, 3, 4, 5],\n",
2063 | " [ 6, 7, 8, 9, 10, 11],\n",
2064 | " [12, 13, 14, 15, 16, 17],\n",
2065 | " [18, 19, 20, 21, 22, 23],\n",
2066 | " [24, 25, 26, 27, 28, 29],\n",
2067 | " [30, 31, 32, 33, 34, 35]])"
2068 | ]
2069 | },
2070 | "execution_count": 282,
2071 | "metadata": {},
2072 | "output_type": "execute_result"
2073 | }
2074 | ],
2075 | "source": [
2076 | "#multidimensional arrays\n",
2077 | "e = np.arange(36)\n",
2078 | "e.resize(6,6)\n",
2079 | "e"
2080 | ]
2081 | },
2082 | {
2083 | "cell_type": "code",
2084 | "execution_count": 284,
2085 | "metadata": {},
2086 | "outputs": [
2087 | {
2088 | "data": {
2089 | "text/plain": [
2090 | "8"
2091 | ]
2092 | },
2093 | "execution_count": 284,
2094 | "metadata": {},
2095 | "output_type": "execute_result"
2096 | }
2097 | ],
2098 | "source": [
2099 | "#Access 2nd row and 3rd column\n",
2100 | "e[1,2]"
2101 | ]
2102 | },
2103 | {
2104 | "cell_type": "code",
2105 | "execution_count": 286,
2106 | "metadata": {},
2107 | "outputs": [
2108 | {
2109 | "data": {
2110 | "text/plain": [
2111 | "array([ 8, 9, 10, 11])"
2112 | ]
2113 | },
2114 | "execution_count": 286,
2115 | "metadata": {},
2116 | "output_type": "execute_result"
2117 | }
2118 | ],
2119 | "source": [
2120 | "#use : to select range of columns\n",
2121 | "e[1, 2:6]"
2122 | ]
2123 | },
2124 | {
2125 | "cell_type": "code",
2126 | "execution_count": 287,
2127 | "metadata": {},
2128 | "outputs": [
2129 | {
2130 | "data": {
2131 | "text/plain": [
2132 | "array([[ 0, 1, 2, 3, 4],\n",
2133 | " [ 6, 7, 8, 9, 10]])"
2134 | ]
2135 | },
2136 | "execution_count": 287,
2137 | "metadata": {},
2138 | "output_type": "execute_result"
2139 | }
2140 | ],
2141 | "source": [
2142 | "#Select all rows till 2nd row and all columns except last column\n",
2143 | "e[:2,:-1]"
2144 | ]
2145 | },
2146 | {
2147 | "cell_type": "code",
2148 | "execution_count": 290,
2149 | "metadata": {},
2150 | "outputs": [
2151 | {
2152 | "data": {
2153 | "text/plain": [
2154 | "array([30, 32, 34])"
2155 | ]
2156 | },
2157 | "execution_count": 290,
2158 | "metadata": {},
2159 | "output_type": "execute_result"
2160 | }
2161 | ],
2162 | "source": [
2163 | "#select last row and alternate columns\n",
2164 | "e[-1,::2]"
2165 | ]
2166 | },
2167 | {
2168 | "cell_type": "code",
2169 | "execution_count": 291,
2170 | "metadata": {},
2171 | "outputs": [
2172 | {
2173 | "data": {
2174 | "text/plain": [
2175 | "array([32, 34])"
2176 | ]
2177 | },
2178 | "execution_count": 291,
2179 | "metadata": {},
2180 | "output_type": "execute_result"
2181 | }
2182 | ],
2183 | "source": [
2184 | "#select last row and alternate columns from 3rd column\n",
2185 | "e[-1,2::2]"
2186 | ]
2187 | },
2188 | {
2189 | "cell_type": "code",
2190 | "execution_count": 292,
2191 | "metadata": {},
2192 | "outputs": [
2193 | {
2194 | "data": {
2195 | "text/plain": [
2196 | "array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])"
2197 | ]
2198 | },
2199 | "execution_count": 292,
2200 | "metadata": {},
2201 | "output_type": "execute_result"
2202 | }
2203 | ],
2204 | "source": [
2205 | "#select values from array greater than 20\n",
2206 | "e[e>20]"
2207 | ]
2208 | },
2209 | {
2210 | "cell_type": "code",
2211 | "execution_count": 294,
2212 | "metadata": {},
2213 | "outputs": [
2214 | {
2215 | "data": {
2216 | "text/plain": [
2217 | "array([[ 0, 1, 2, 3, 4, 5],\n",
2218 | " [ 6, 7, 8, 9, 10, 11],\n",
2219 | " [12, 13, 14, 15, 16, 17],\n",
2220 | " [18, 19, 20, 20, 20, 20],\n",
2221 | " [20, 20, 20, 20, 20, 20],\n",
2222 | " [20, 20, 20, 20, 20, 20]])"
2223 | ]
2224 | },
2225 | "execution_count": 294,
2226 | "metadata": {},
2227 | "output_type": "execute_result"
2228 | }
2229 | ],
2230 | "source": [
2231 | "#Assign element value as 20 if value is greater than 20\n",
2232 | "e[e>20] = 20\n",
2233 | "e"
2234 | ]
2235 | },
2236 | {
2237 | "cell_type": "code",
2238 | "execution_count": 308,
2239 | "metadata": {},
2240 | "outputs": [
2241 | {
2242 | "data": {
2243 | "text/plain": [
2244 | "array([[0, 0, 0],\n",
2245 | " [0, 0, 0],\n",
2246 | " [0, 0, 0]])"
2247 | ]
2248 | },
2249 | "execution_count": 308,
2250 | "metadata": {},
2251 | "output_type": "execute_result"
2252 | }
2253 | ],
2254 | "source": [
2255 | "#BE CAREFUL WHILE COPYING ARRAYS\n",
2256 | "f = e[:3,:3]\n",
2257 | "f"
2258 | ]
2259 | },
2260 | {
2261 | "cell_type": "code",
2262 | "execution_count": 309,
2263 | "metadata": {},
2264 | "outputs": [
2265 | {
2266 | "data": {
2267 | "text/plain": [
2268 | "array([[0, 0, 0],\n",
2269 | " [0, 0, 0],\n",
2270 | " [0, 0, 0]])"
2271 | ]
2272 | },
2273 | "execution_count": 309,
2274 | "metadata": {},
2275 | "output_type": "execute_result"
2276 | }
2277 | ],
2278 | "source": [
2279 | "f[:] = 0\n",
2280 | "f"
2281 | ]
2282 | },
2283 | {
2284 | "cell_type": "code",
2285 | "execution_count": 310,
2286 | "metadata": {},
2287 | "outputs": [
2288 | {
2289 | "data": {
2290 | "text/plain": [
2291 | "array([[ 0, 0, 0, 3, 4, 5],\n",
2292 | " [ 0, 0, 0, 9, 10, 11],\n",
2293 | " [ 0, 0, 0, 15, 16, 17],\n",
2294 | " [18, 19, 20, 20, 20, 20],\n",
2295 | " [20, 20, 20, 20, 20, 20],\n",
2296 | " [20, 20, 20, 20, 20, 20]])"
2297 | ]
2298 | },
2299 | "execution_count": 310,
2300 | "metadata": {},
2301 | "output_type": "execute_result"
2302 | }
2303 | ],
2304 | "source": [
2305 | "e #e also got changed"
2306 | ]
2307 | },
2308 | {
2309 | "cell_type": "code",
2310 | "execution_count": 313,
2311 | "metadata": {},
2312 | "outputs": [
2313 | {
2314 | "data": {
2315 | "text/plain": [
2316 | "array([[ 0, 0, 0, 3, 4, 5],\n",
2317 | " [ 0, 0, 0, 9, 10, 11],\n",
2318 | " [ 0, 0, 0, 15, 16, 17],\n",
2319 | " [18, 19, 20, 20, 20, 20],\n",
2320 | " [20, 20, 20, 20, 20, 20],\n",
2321 | " [20, 20, 20, 20, 20, 20]])"
2322 | ]
2323 | },
2324 | "execution_count": 313,
2325 | "metadata": {},
2326 | "output_type": "execute_result"
2327 | }
2328 | ],
2329 | "source": [
2330 | "#copy using copy function\n",
2331 | "f = e.copy()\n",
2332 | "f"
2333 | ]
2334 | },
2335 | {
2336 | "cell_type": "code",
2337 | "execution_count": 315,
2338 | "metadata": {},
2339 | "outputs": [
2340 | {
2341 | "data": {
2342 | "text/plain": [
2343 | "array([[ 0, 0, 0, 3, 4, 5],\n",
2344 | " [ 0, 0, 0, 9, 10, 11],\n",
2345 | " [ 0, 0, 0, 15, 16, 17],\n",
2346 | " [18, 19, 20, 0, 0, 0],\n",
2347 | " [20, 20, 20, 0, 0, 0],\n",
2348 | " [20, 20, 20, 0, 0, 0]])"
2349 | ]
2350 | },
2351 | "execution_count": 315,
2352 | "metadata": {},
2353 | "output_type": "execute_result"
2354 | }
2355 | ],
2356 | "source": [
2357 | "f[3:,3:] = 0\n",
2358 | "f"
2359 | ]
2360 | },
2361 | {
2362 | "cell_type": "code",
2363 | "execution_count": 316,
2364 | "metadata": {},
2365 | "outputs": [
2366 | {
2367 | "data": {
2368 | "text/plain": [
2369 | "array([[ 0, 0, 0, 3, 4, 5],\n",
2370 | " [ 0, 0, 0, 9, 10, 11],\n",
2371 | " [ 0, 0, 0, 15, 16, 17],\n",
2372 | " [18, 19, 20, 20, 20, 20],\n",
2373 | " [20, 20, 20, 20, 20, 20],\n",
2374 | " [20, 20, 20, 20, 20, 20]])"
2375 | ]
2376 | },
2377 | "execution_count": 316,
2378 | "metadata": {},
2379 | "output_type": "execute_result"
2380 | }
2381 | ],
2382 | "source": [
2383 | "e"
2384 | ]
2385 | },
2386 | {
2387 | "cell_type": "code",
2388 | "execution_count": 329,
2389 | "metadata": {},
2390 | "outputs": [
2391 | {
2392 | "data": {
2393 | "text/plain": [
2394 | "array([[9, 7, 1, 4],\n",
2395 | " [1, 4, 3, 6],\n",
2396 | " [2, 5, 5, 1],\n",
2397 | " [2, 2, 9, 9]])"
2398 | ]
2399 | },
2400 | "execution_count": 329,
2401 | "metadata": {},
2402 | "output_type": "execute_result"
2403 | }
2404 | ],
2405 | "source": [
2406 | "#iterating over arrays\n",
2407 | "g = np.random.randint(1,10,(4,4))\n",
2408 | "g"
2409 | ]
2410 | },
2411 | {
2412 | "cell_type": "code",
2413 | "execution_count": 330,
2414 | "metadata": {},
2415 | "outputs": [
2416 | {
2417 | "name": "stdout",
2418 | "output_type": "stream",
2419 | "text": [
2420 | "[9 7 1 4]\n",
2421 | "[1 4 3 6]\n",
2422 | "[2 5 5 1]\n",
2423 | "[2 2 9 9]\n"
2424 | ]
2425 | }
2426 | ],
2427 | "source": [
2428 | "#iterate over row\n",
2429 | "for row in g:\n",
2430 | " print(row)"
2431 | ]
2432 | },
2433 | {
2434 | "cell_type": "code",
2435 | "execution_count": 332,
2436 | "metadata": {},
2437 | "outputs": [
2438 | {
2439 | "name": "stdout",
2440 | "output_type": "stream",
2441 | "text": [
2442 | "[9 7 1 4]\n",
2443 | "[1 4 3 6]\n",
2444 | "[2 5 5 1]\n",
2445 | "[2 2 9 9]\n"
2446 | ]
2447 | }
2448 | ],
2449 | "source": [
2450 | "#iterate by index\n",
2451 | "for i in range(len(g)):\n",
2452 | " print(g[i])"
2453 | ]
2454 | }
2455 | ],
2456 | "metadata": {
2457 | "kernelspec": {
2458 | "display_name": "Python 3",
2459 | "language": "python",
2460 | "name": "python3"
2461 | },
2462 | "language_info": {
2463 | "codemirror_mode": {
2464 | "name": "ipython",
2465 | "version": 3
2466 | },
2467 | "file_extension": ".py",
2468 | "mimetype": "text/x-python",
2469 | "name": "python",
2470 | "nbconvert_exporter": "python",
2471 | "pygments_lexer": "ipython3",
2472 | "version": "3.6.5"
2473 | }
2474 | },
2475 | "nbformat": 4,
2476 | "nbformat_minor": 2
2477 | }
2478 |
--------------------------------------------------------------------------------