├── README.md ├── Python for Data Science - Part 2.ipynb ├── Grid Search - Breast Cancer.ipynb ├── Python for data science - Part 4.ipynb └── Python for Data Science - Part 1.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Python-for-data-science 2 | Learn data science with Python 3 | -------------------------------------------------------------------------------- /Python for Data Science - Part 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# The Numpy library" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 16, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np #import" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 17, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "data": { 26 | "text/plain": [ 27 | "array([1, 2, 3])" 28 | ] 29 | }, 30 | "execution_count": 17, 31 | "metadata": {}, 32 | "output_type": "execute_result" 33 | } 34 | ], 35 | "source": [ 36 | "#create numpy array\n", 37 | "np.array([1,2,3])" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 18, 43 | "metadata": {}, 44 | "outputs": [ 45 | { 46 | "data": { 47 | "text/plain": [ 48 | "array([[1, 2, 3],\n", 49 | " [4, 5, 6]])" 50 | ] 51 | }, 52 | "execution_count": 18, 53 | "metadata": {}, 54 | "output_type": "execute_result" 55 | } 56 | ], 57 | "source": [ 58 | "a = np.array([[1,2,3],[4,5,6]]) #create multi dimensional array\n", 59 | "a" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 19, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "data": { 69 | "text/plain": [ 70 | "(2, 3)" 71 | ] 72 | }, 73 | "execution_count": 19, 74 | "metadata": {}, 75 | "output_type": "execute_result" 76 | } 77 | ], 78 | "source": [ 79 | "#shape of the array\n", 80 | "a.shape" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": 20, 86 | "metadata": {}, 87 | "outputs": [ 88 | { 89 | "data": { 90 | "text/plain": [ 91 | "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29])" 92 | ] 93 | }, 94 | "execution_count": 20, 95 | "metadata": {}, 96 | "output_type": "execute_result" 97 | } 98 | ], 99 | "source": [ 100 | "#evenly spaced values between the interval\n", 101 | "b = np.arange(1,30,2)\n", 102 | "b" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 21, 108 | "metadata": {}, 109 | "outputs": [ 110 | { 111 | "data": { 112 | "text/plain": [ 113 | "array([[ 1, 3, 5],\n", 114 | " [ 7, 9, 11],\n", 115 | " [13, 15, 17],\n", 116 | " [19, 21, 23],\n", 117 | " [25, 27, 29]])" 118 | ] 119 | }, 120 | "execution_count": 21, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "#create new shape of data\n", 127 | "b.reshape(5,3)" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 174, 133 | "metadata": {}, 134 | "outputs": [ 135 | { 136 | "data": { 137 | "text/plain": [ 138 | "array([ 1. , 1.21052632, 1.42105263, 1.63157895, 1.84210526,\n", 139 | " 2.05263158, 2.26315789, 2.47368421, 2.68421053, 2.89473684,\n", 140 | " 3.10526316, 3.31578947, 3.52631579, 3.73684211, 3.94736842,\n", 141 | " 4.15789474, 4.36842105, 4.57894737, 4.78947368, 5. ])" 142 | ] 143 | }, 144 | "execution_count": 174, 145 | "metadata": {}, 146 | "output_type": "execute_result" 147 | } 148 | ], 149 | "source": [ 150 | "#get evenly spaced numbers between specified interval\n", 151 | "c = np.linspace(1,5,20)\n", 152 | "c" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 175, 158 | "metadata": {}, 159 | "outputs": [ 160 | { 161 | "data": { 162 | "text/plain": [ 163 | "(20,)" 164 | ] 165 | }, 166 | "execution_count": 175, 167 | "metadata": {}, 168 | "output_type": "execute_result" 169 | } 170 | ], 171 | "source": [ 172 | "c.shape" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": 176, 178 | "metadata": {}, 179 | "outputs": [ 180 | { 181 | "data": { 182 | "text/plain": [ 183 | "array([[ 1. , 1.21052632, 1.42105263, 1.63157895],\n", 184 | " [ 1.84210526, 2.05263158, 2.26315789, 2.47368421],\n", 185 | " [ 2.68421053, 2.89473684, 3.10526316, 3.31578947],\n", 186 | " [ 3.52631579, 3.73684211, 3.94736842, 4.15789474],\n", 187 | " [ 4.36842105, 4.57894737, 4.78947368, 5. ]])" 188 | ] 189 | }, 190 | "execution_count": 176, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "#change shape of array in-place\n", 197 | "c.resize(5,4)\n", 198 | "c" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 177, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": [ 209 | "(5, 4)" 210 | ] 211 | }, 212 | "execution_count": 177, 213 | "metadata": {}, 214 | "output_type": "execute_result" 215 | } 216 | ], 217 | "source": [ 218 | "c.shape" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": 181, 224 | "metadata": {}, 225 | "outputs": [ 226 | { 227 | "data": { 228 | "text/plain": [ 229 | "array([[ 1., 1.],\n", 230 | " [ 1., 1.]])" 231 | ] 232 | }, 233 | "execution_count": 181, 234 | "metadata": {}, 235 | "output_type": "execute_result" 236 | } 237 | ], 238 | "source": [ 239 | "#create array filled with ones\n", 240 | "d = np.ones((2,2))\n", 241 | "d" 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 185, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "data": { 251 | "text/plain": [ 252 | "array([[ 0., 0., 0.],\n", 253 | " [ 0., 0., 0.],\n", 254 | " [ 0., 0., 0.]])" 255 | ] 256 | }, 257 | "execution_count": 185, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "#create array filled with zeros\n", 264 | "e = np.zeros((3,3))\n", 265 | "e" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 189, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "data": { 275 | "text/plain": [ 276 | "array([[ 1., 0.],\n", 277 | " [ 0., 1.]])" 278 | ] 279 | }, 280 | "execution_count": 189, 281 | "metadata": {}, 282 | "output_type": "execute_result" 283 | } 284 | ], 285 | "source": [ 286 | "#create diagonal matrix with diagonal values =1\n", 287 | "f = np.eye(2)\n", 288 | "f" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 191, 294 | "metadata": {}, 295 | "outputs": [ 296 | { 297 | "data": { 298 | "text/plain": [ 299 | "array([ 1., 1.])" 300 | ] 301 | }, 302 | "execution_count": 191, 303 | "metadata": {}, 304 | "output_type": "execute_result" 305 | } 306 | ], 307 | "source": [ 308 | "#extract only the diagonal values from array\n", 309 | "np.diag(f)" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 194, 315 | "metadata": {}, 316 | "outputs": [ 317 | { 318 | "data": { 319 | "text/plain": [ 320 | "array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])" 321 | ] 322 | }, 323 | "execution_count": 194, 324 | "metadata": {}, 325 | "output_type": "execute_result" 326 | } 327 | ], 328 | "source": [ 329 | "#create array using repeating list\n", 330 | "g = np.array([1,2,3]*5)\n", 331 | "g" 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": 196, 337 | "metadata": {}, 338 | "outputs": [ 339 | { 340 | "data": { 341 | "text/plain": [ 342 | "array([1, 1, 1, 2, 2, 2, 3, 3, 3])" 343 | ] 344 | }, 345 | "execution_count": 196, 346 | "metadata": {}, 347 | "output_type": "execute_result" 348 | } 349 | ], 350 | "source": [ 351 | "#repeat elements using repeat\n", 352 | "np.repeat([1,2,3],3)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": 23, 358 | "metadata": {}, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/plain": [ 363 | "array([[ 0.61243315, 0.22618119, 0.67464992],\n", 364 | " [ 0.0344245 , 0.2948191 , 0.52865199]])" 365 | ] 366 | }, 367 | "execution_count": 23, 368 | "metadata": {}, 369 | "output_type": "execute_result" 370 | } 371 | ], 372 | "source": [ 373 | "g = np.random.rand(2,3) #this generates a random array\n", 374 | "g" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 221, 380 | "metadata": {}, 381 | "outputs": [ 382 | { 383 | "data": { 384 | "text/plain": [ 385 | "array([[ 1. , 1. , 1. ],\n", 386 | " [ 1. , 1. , 1. ],\n", 387 | " [ 0.50725359, 0.28610842, 0.98268379],\n", 388 | " [ 0.52425524, 0.23682556, 0.21598467]])" 389 | ] 390 | }, 391 | "execution_count": 221, 392 | "metadata": {}, 393 | "output_type": "execute_result" 394 | } 395 | ], 396 | "source": [ 397 | "#stack the above two arrays vertically\n", 398 | "i = np.vstack([h,g])\n", 399 | "i" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 222, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/plain": [ 410 | "(4, 3)" 411 | ] 412 | }, 413 | "execution_count": 222, 414 | "metadata": {}, 415 | "output_type": "execute_result" 416 | } 417 | ], 418 | "source": [ 419 | "i.shape" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": 224, 425 | "metadata": {}, 426 | "outputs": [ 427 | { 428 | "data": { 429 | "text/plain": [ 430 | "array([[ 1. , 1. , 1. , 0.50725359, 0.28610842,\n", 431 | " 0.98268379],\n", 432 | " [ 1. , 1. , 1. , 0.52425524, 0.23682556,\n", 433 | " 0.21598467]])" 434 | ] 435 | }, 436 | "execution_count": 224, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "#Now, stack them horizontally\n", 443 | "j = np.hstack([h,g])\n", 444 | "j" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": 225, 450 | "metadata": {}, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "(2, 6)" 456 | ] 457 | }, 458 | "execution_count": 225, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "j.shape" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 24, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "[[ 0.02430146 0.14448542]\n", 477 | " [ 0.54428337 0.40332494]]\n", 478 | "[[ 0.77574886 0.08747577]\n", 479 | " [ 0.51484157 0.92319888]]\n" 480 | ] 481 | } 482 | ], 483 | "source": [ 484 | "#Array operations\n", 485 | "k = np.random.rand(2,2)\n", 486 | "l = np.random.rand(2,2)\n", 487 | "print(k)\n", 488 | "print(l)" 489 | ] 490 | }, 491 | { 492 | "cell_type": "code", 493 | "execution_count": 25, 494 | "metadata": {}, 495 | "outputs": [ 496 | { 497 | "data": { 498 | "text/plain": [ 499 | "array([[ 0.80005032, 0.23196118],\n", 500 | " [ 1.05912494, 1.32652381]])" 501 | ] 502 | }, 503 | "execution_count": 25, 504 | "metadata": {}, 505 | "output_type": "execute_result" 506 | } 507 | ], 508 | "source": [ 509 | "#element wise addition\n", 510 | "m = k + l\n", 511 | "m" 512 | ] 513 | }, 514 | { 515 | "cell_type": "code", 516 | "execution_count": 26, 517 | "metadata": {}, 518 | "outputs": [ 519 | { 520 | "data": { 521 | "text/plain": [ 522 | "array([[-0.75144739, 0.05700965],\n", 523 | " [ 0.02944179, -0.51987394]])" 524 | ] 525 | }, 526 | "execution_count": 26, 527 | "metadata": {}, 528 | "output_type": "execute_result" 529 | } 530 | ], 531 | "source": [ 532 | "#element wise subtraction\n", 533 | "n = k-l\n", 534 | "n" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": 234, 540 | "metadata": {}, 541 | "outputs": [ 542 | { 543 | "data": { 544 | "text/plain": [ 545 | "array([[ 0.38359018, 0.09226875],\n", 546 | " [ 0.39123512, 0.07776932]])" 547 | ] 548 | }, 549 | "execution_count": 234, 550 | "metadata": {}, 551 | "output_type": "execute_result" 552 | } 553 | ], 554 | "source": [ 555 | "#element wise multiplication\n", 556 | "o = k*l\n", 557 | "o" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": 27, 563 | "metadata": {}, 564 | "outputs": [ 565 | { 566 | "data": { 567 | "text/plain": [ 568 | "array([[ 0.00059056, 0.02087604],\n", 569 | " [ 0.29624438, 0.162671 ]])" 570 | ] 571 | }, 572 | "execution_count": 27, 573 | "metadata": {}, 574 | "output_type": "execute_result" 575 | } 576 | ], 577 | "source": [ 578 | "#element wise power\n", 579 | "p = k**2\n", 580 | "p" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 28, 586 | "metadata": {}, 587 | "outputs": [ 588 | { 589 | "data": { 590 | "text/plain": [ 591 | "array([[ 0.09323893, 0.13551456],\n", 592 | " [ 0.62987564, 0.41996073]])" 593 | ] 594 | }, 595 | "execution_count": 28, 596 | "metadata": {}, 597 | "output_type": "execute_result" 598 | } 599 | ], 600 | "source": [ 601 | "#dot product\n", 602 | "q = k.dot(l)\n", 603 | "q" 604 | ] 605 | }, 606 | { 607 | "cell_type": "code", 608 | "execution_count": 241, 609 | "metadata": {}, 610 | "outputs": [ 611 | { 612 | "data": { 613 | "text/plain": [ 614 | "array([[1, 2],\n", 615 | " [3, 4]])" 616 | ] 617 | }, 618 | "execution_count": 241, 619 | "metadata": {}, 620 | "output_type": "execute_result" 621 | } 622 | ], 623 | "source": [ 624 | "#transpose arrays\n", 625 | "a = np.array([[1,2],[3,4]])\n", 626 | "a" 627 | ] 628 | }, 629 | { 630 | "cell_type": "code", 631 | "execution_count": 242, 632 | "metadata": {}, 633 | "outputs": [ 634 | { 635 | "data": { 636 | "text/plain": [ 637 | "array([[1, 3],\n", 638 | " [2, 4]])" 639 | ] 640 | }, 641 | "execution_count": 242, 642 | "metadata": {}, 643 | "output_type": "execute_result" 644 | } 645 | ], 646 | "source": [ 647 | "a.T #transpose" 648 | ] 649 | }, 650 | { 651 | "cell_type": "code", 652 | "execution_count": 243, 653 | "metadata": {}, 654 | "outputs": [ 655 | { 656 | "data": { 657 | "text/plain": [ 658 | "dtype('int32')" 659 | ] 660 | }, 661 | "execution_count": 243, 662 | "metadata": {}, 663 | "output_type": "execute_result" 664 | } 665 | ], 666 | "source": [ 667 | "#check datatype of elements in array\n", 668 | "a.dtype" 669 | ] 670 | }, 671 | { 672 | "cell_type": "code", 673 | "execution_count": 245, 674 | "metadata": {}, 675 | "outputs": [ 676 | { 677 | "data": { 678 | "text/plain": [ 679 | "dtype('float32')" 680 | ] 681 | }, 682 | "execution_count": 245, 683 | "metadata": {}, 684 | "output_type": "execute_result" 685 | } 686 | ], 687 | "source": [ 688 | "#change type using astype\n", 689 | "b = a.astype('f')\n", 690 | "b.dtype" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": 246, 696 | "metadata": {}, 697 | "outputs": [], 698 | "source": [ 699 | "#Math functions in numpy\n", 700 | "c = np.array([1,2,3,4,5])" 701 | ] 702 | }, 703 | { 704 | "cell_type": "code", 705 | "execution_count": 248, 706 | "metadata": {}, 707 | "outputs": [ 708 | { 709 | "data": { 710 | "text/plain": [ 711 | "15" 712 | ] 713 | }, 714 | "execution_count": 248, 715 | "metadata": {}, 716 | "output_type": "execute_result" 717 | } 718 | ], 719 | "source": [ 720 | "c.sum()" 721 | ] 722 | }, 723 | { 724 | "cell_type": "code", 725 | "execution_count": 249, 726 | "metadata": {}, 727 | "outputs": [ 728 | { 729 | "data": { 730 | "text/plain": [ 731 | "5" 732 | ] 733 | }, 734 | "execution_count": 249, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "c.max()" 741 | ] 742 | }, 743 | { 744 | "cell_type": "code", 745 | "execution_count": 250, 746 | "metadata": {}, 747 | "outputs": [ 748 | { 749 | "data": { 750 | "text/plain": [ 751 | "3.0" 752 | ] 753 | }, 754 | "execution_count": 250, 755 | "metadata": {}, 756 | "output_type": "execute_result" 757 | } 758 | ], 759 | "source": [ 760 | "c.mean()" 761 | ] 762 | }, 763 | { 764 | "cell_type": "code", 765 | "execution_count": 252, 766 | "metadata": {}, 767 | "outputs": [ 768 | { 769 | "data": { 770 | "text/plain": [ 771 | "4" 772 | ] 773 | }, 774 | "execution_count": 252, 775 | "metadata": {}, 776 | "output_type": "execute_result" 777 | } 778 | ], 779 | "source": [ 780 | "#return index of maximum value\n", 781 | "c.argmax()" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": 253, 787 | "metadata": {}, 788 | "outputs": [ 789 | { 790 | "data": { 791 | "text/plain": [ 792 | "0" 793 | ] 794 | }, 795 | "execution_count": 253, 796 | "metadata": {}, 797 | "output_type": "execute_result" 798 | } 799 | ], 800 | "source": [ 801 | "#return index of minimum value\n", 802 | "c.argmin()" 803 | ] 804 | }, 805 | { 806 | "cell_type": "code", 807 | "execution_count": 30, 808 | "metadata": {}, 809 | "outputs": [ 810 | { 811 | "data": { 812 | "text/plain": [ 813 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81], dtype=int32)" 814 | ] 815 | }, 816 | "execution_count": 30, 817 | "metadata": {}, 818 | "output_type": "execute_result" 819 | } 820 | ], 821 | "source": [ 822 | "d = np.arange(10)**2\n", 823 | "d" 824 | ] 825 | }, 826 | { 827 | "cell_type": "code", 828 | "execution_count": 31, 829 | "metadata": {}, 830 | "outputs": [ 831 | { 832 | "data": { 833 | "text/plain": [ 834 | "4" 835 | ] 836 | }, 837 | "execution_count": 31, 838 | "metadata": {}, 839 | "output_type": "execute_result" 840 | } 841 | ], 842 | "source": [ 843 | "#use index for accessing values\n", 844 | "d[2]" 845 | ] 846 | }, 847 | { 848 | "cell_type": "code", 849 | "execution_count": 32, 850 | "metadata": {}, 851 | "outputs": [ 852 | { 853 | "data": { 854 | "text/plain": [ 855 | "0" 856 | ] 857 | }, 858 | "execution_count": 32, 859 | "metadata": {}, 860 | "output_type": "execute_result" 861 | } 862 | ], 863 | "source": [ 864 | "d[0]" 865 | ] 866 | }, 867 | { 868 | "cell_type": "code", 869 | "execution_count": 33, 870 | "metadata": {}, 871 | "outputs": [ 872 | { 873 | "data": { 874 | "text/plain": [ 875 | "array([ 1, 4, 9, 16], dtype=int32)" 876 | ] 877 | }, 878 | "execution_count": 33, 879 | "metadata": {}, 880 | "output_type": "execute_result" 881 | } 882 | ], 883 | "source": [ 884 | "d[1:5]" 885 | ] 886 | }, 887 | { 888 | "cell_type": "code", 889 | "execution_count": 34, 890 | "metadata": {}, 891 | "outputs": [ 892 | { 893 | "data": { 894 | "text/plain": [ 895 | "array([81], dtype=int32)" 896 | ] 897 | }, 898 | "execution_count": 34, 899 | "metadata": {}, 900 | "output_type": "execute_result" 901 | } 902 | ], 903 | "source": [ 904 | "#use negatives to count from back\n", 905 | "d[-1:]" 906 | ] 907 | }, 908 | { 909 | "cell_type": "code", 910 | "execution_count": 35, 911 | "metadata": {}, 912 | "outputs": [ 913 | { 914 | "data": { 915 | "text/plain": [ 916 | "array([ 1, 9, 25, 49, 81], dtype=int32)" 917 | ] 918 | }, 919 | "execution_count": 35, 920 | "metadata": {}, 921 | "output_type": "execute_result" 922 | } 923 | ], 924 | "source": [ 925 | "#use two :: to include step size\n", 926 | "d[1:10:2] #d[start:stop:stepsize]" 927 | ] 928 | }, 929 | { 930 | "cell_type": "code", 931 | "execution_count": 46, 932 | "metadata": {}, 933 | "outputs": [ 934 | { 935 | "data": { 936 | "text/plain": [ 937 | "array([[ 0, 1, 2, 3, 4, 5],\n", 938 | " [ 6, 7, 8, 9, 10, 11],\n", 939 | " [12, 13, 14, 15, 16, 17],\n", 940 | " [18, 19, 20, 21, 22, 23],\n", 941 | " [24, 25, 26, 27, 28, 29],\n", 942 | " [30, 31, 32, 33, 34, 35]])" 943 | ] 944 | }, 945 | "execution_count": 46, 946 | "metadata": {}, 947 | "output_type": "execute_result" 948 | } 949 | ], 950 | "source": [ 951 | "#multidimensional arrays\n", 952 | "e = np.arange(36)\n", 953 | "e.resize(6,6)\n", 954 | "e" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": 47, 960 | "metadata": {}, 961 | "outputs": [ 962 | { 963 | "data": { 964 | "text/plain": [ 965 | "8" 966 | ] 967 | }, 968 | "execution_count": 47, 969 | "metadata": {}, 970 | "output_type": "execute_result" 971 | } 972 | ], 973 | "source": [ 974 | "#Access 2nd row and 3rd column\n", 975 | "e[1,2]" 976 | ] 977 | }, 978 | { 979 | "cell_type": "code", 980 | "execution_count": 48, 981 | "metadata": {}, 982 | "outputs": [ 983 | { 984 | "data": { 985 | "text/plain": [ 986 | "array([ 8, 9, 10, 11])" 987 | ] 988 | }, 989 | "execution_count": 48, 990 | "metadata": {}, 991 | "output_type": "execute_result" 992 | } 993 | ], 994 | "source": [ 995 | "#use : to select range of columns\n", 996 | "e[1, 2:6]" 997 | ] 998 | }, 999 | { 1000 | "cell_type": "code", 1001 | "execution_count": 49, 1002 | "metadata": {}, 1003 | "outputs": [ 1004 | { 1005 | "data": { 1006 | "text/plain": [ 1007 | "array([[ 0, 1, 2, 3, 4],\n", 1008 | " [ 6, 7, 8, 9, 10]])" 1009 | ] 1010 | }, 1011 | "execution_count": 49, 1012 | "metadata": {}, 1013 | "output_type": "execute_result" 1014 | } 1015 | ], 1016 | "source": [ 1017 | "#Select all rows till 2nd row and all columns except last column\n", 1018 | "e[:2,:-1]" 1019 | ] 1020 | }, 1021 | { 1022 | "cell_type": "code", 1023 | "execution_count": 50, 1024 | "metadata": {}, 1025 | "outputs": [ 1026 | { 1027 | "data": { 1028 | "text/plain": [ 1029 | "array([30, 32, 34])" 1030 | ] 1031 | }, 1032 | "execution_count": 50, 1033 | "metadata": {}, 1034 | "output_type": "execute_result" 1035 | } 1036 | ], 1037 | "source": [ 1038 | "#select last row and alternate columns\n", 1039 | "e[-1,::2]" 1040 | ] 1041 | }, 1042 | { 1043 | "cell_type": "code", 1044 | "execution_count": 51, 1045 | "metadata": {}, 1046 | "outputs": [ 1047 | { 1048 | "data": { 1049 | "text/plain": [ 1050 | "array([32, 34])" 1051 | ] 1052 | }, 1053 | "execution_count": 51, 1054 | "metadata": {}, 1055 | "output_type": "execute_result" 1056 | } 1057 | ], 1058 | "source": [ 1059 | "#select last row and alternate columns from 3rd column\n", 1060 | "e[-1,2::2]" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "code", 1065 | "execution_count": 52, 1066 | "metadata": {}, 1067 | "outputs": [ 1068 | { 1069 | "data": { 1070 | "text/plain": [ 1071 | "array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])" 1072 | ] 1073 | }, 1074 | "execution_count": 52, 1075 | "metadata": {}, 1076 | "output_type": "execute_result" 1077 | } 1078 | ], 1079 | "source": [ 1080 | "#select values from array greater than 20\n", 1081 | "e[e>20]" 1082 | ] 1083 | }, 1084 | { 1085 | "cell_type": "code", 1086 | "execution_count": 53, 1087 | "metadata": {}, 1088 | "outputs": [ 1089 | { 1090 | "data": { 1091 | "text/plain": [ 1092 | "array([[ 0, 1, 2, 3, 4, 5],\n", 1093 | " [ 6, 7, 8, 9, 10, 11],\n", 1094 | " [12, 13, 14, 15, 16, 17],\n", 1095 | " [18, 19, 20, 20, 20, 20],\n", 1096 | " [20, 20, 20, 20, 20, 20],\n", 1097 | " [20, 20, 20, 20, 20, 20]])" 1098 | ] 1099 | }, 1100 | "execution_count": 53, 1101 | "metadata": {}, 1102 | "output_type": "execute_result" 1103 | } 1104 | ], 1105 | "source": [ 1106 | "#Assign element value as 20 if value is greater than 20\n", 1107 | "e[e>20] = 20\n", 1108 | "e" 1109 | ] 1110 | }, 1111 | { 1112 | "cell_type": "code", 1113 | "execution_count": 45, 1114 | "metadata": {}, 1115 | "outputs": [ 1116 | { 1117 | "data": { 1118 | "text/plain": [ 1119 | "array([[ 0, 1, 2],\n", 1120 | " [ 6, 7, 8],\n", 1121 | " [12, 13, 14]])" 1122 | ] 1123 | }, 1124 | "execution_count": 45, 1125 | "metadata": {}, 1126 | "output_type": "execute_result" 1127 | } 1128 | ], 1129 | "source": [ 1130 | "#BE CAREFUL WHILE COPYING ARRAYS\n", 1131 | "f = e[:3,:3]\n", 1132 | "f" 1133 | ] 1134 | }, 1135 | { 1136 | "cell_type": "code", 1137 | "execution_count": 309, 1138 | "metadata": {}, 1139 | "outputs": [ 1140 | { 1141 | "data": { 1142 | "text/plain": [ 1143 | "array([[0, 0, 0],\n", 1144 | " [0, 0, 0],\n", 1145 | " [0, 0, 0]])" 1146 | ] 1147 | }, 1148 | "execution_count": 309, 1149 | "metadata": {}, 1150 | "output_type": "execute_result" 1151 | } 1152 | ], 1153 | "source": [ 1154 | "f[:] = 0\n", 1155 | "f" 1156 | ] 1157 | }, 1158 | { 1159 | "cell_type": "code", 1160 | "execution_count": 310, 1161 | "metadata": {}, 1162 | "outputs": [ 1163 | { 1164 | "data": { 1165 | "text/plain": [ 1166 | "array([[ 0, 0, 0, 3, 4, 5],\n", 1167 | " [ 0, 0, 0, 9, 10, 11],\n", 1168 | " [ 0, 0, 0, 15, 16, 17],\n", 1169 | " [18, 19, 20, 20, 20, 20],\n", 1170 | " [20, 20, 20, 20, 20, 20],\n", 1171 | " [20, 20, 20, 20, 20, 20]])" 1172 | ] 1173 | }, 1174 | "execution_count": 310, 1175 | "metadata": {}, 1176 | "output_type": "execute_result" 1177 | } 1178 | ], 1179 | "source": [ 1180 | "e #e also got changed" 1181 | ] 1182 | }, 1183 | { 1184 | "cell_type": "code", 1185 | "execution_count": 54, 1186 | "metadata": {}, 1187 | "outputs": [ 1188 | { 1189 | "data": { 1190 | "text/plain": [ 1191 | "array([[ 0, 1, 2, 3, 4, 5],\n", 1192 | " [ 6, 7, 8, 9, 10, 11],\n", 1193 | " [12, 13, 14, 15, 16, 17],\n", 1194 | " [18, 19, 20, 20, 20, 20],\n", 1195 | " [20, 20, 20, 20, 20, 20],\n", 1196 | " [20, 20, 20, 20, 20, 20]])" 1197 | ] 1198 | }, 1199 | "execution_count": 54, 1200 | "metadata": {}, 1201 | "output_type": "execute_result" 1202 | } 1203 | ], 1204 | "source": [ 1205 | "#copy using copy function\n", 1206 | "f = e.copy()\n", 1207 | "f" 1208 | ] 1209 | }, 1210 | { 1211 | "cell_type": "code", 1212 | "execution_count": 315, 1213 | "metadata": {}, 1214 | "outputs": [ 1215 | { 1216 | "data": { 1217 | "text/plain": [ 1218 | "array([[ 0, 0, 0, 3, 4, 5],\n", 1219 | " [ 0, 0, 0, 9, 10, 11],\n", 1220 | " [ 0, 0, 0, 15, 16, 17],\n", 1221 | " [18, 19, 20, 0, 0, 0],\n", 1222 | " [20, 20, 20, 0, 0, 0],\n", 1223 | " [20, 20, 20, 0, 0, 0]])" 1224 | ] 1225 | }, 1226 | "execution_count": 315, 1227 | "metadata": {}, 1228 | "output_type": "execute_result" 1229 | } 1230 | ], 1231 | "source": [ 1232 | "f[3:,3:] = 0\n", 1233 | "f" 1234 | ] 1235 | }, 1236 | { 1237 | "cell_type": "code", 1238 | "execution_count": 316, 1239 | "metadata": {}, 1240 | "outputs": [ 1241 | { 1242 | "data": { 1243 | "text/plain": [ 1244 | "array([[ 0, 0, 0, 3, 4, 5],\n", 1245 | " [ 0, 0, 0, 9, 10, 11],\n", 1246 | " [ 0, 0, 0, 15, 16, 17],\n", 1247 | " [18, 19, 20, 20, 20, 20],\n", 1248 | " [20, 20, 20, 20, 20, 20],\n", 1249 | " [20, 20, 20, 20, 20, 20]])" 1250 | ] 1251 | }, 1252 | "execution_count": 316, 1253 | "metadata": {}, 1254 | "output_type": "execute_result" 1255 | } 1256 | ], 1257 | "source": [ 1258 | "e" 1259 | ] 1260 | }, 1261 | { 1262 | "cell_type": "code", 1263 | "execution_count": 329, 1264 | "metadata": {}, 1265 | "outputs": [ 1266 | { 1267 | "data": { 1268 | "text/plain": [ 1269 | "array([[9, 7, 1, 4],\n", 1270 | " [1, 4, 3, 6],\n", 1271 | " [2, 5, 5, 1],\n", 1272 | " [2, 2, 9, 9]])" 1273 | ] 1274 | }, 1275 | "execution_count": 329, 1276 | "metadata": {}, 1277 | "output_type": "execute_result" 1278 | } 1279 | ], 1280 | "source": [ 1281 | "#iterating over arrays\n", 1282 | "g = np.random.randint(1,10,(4,4))\n", 1283 | "g" 1284 | ] 1285 | }, 1286 | { 1287 | "cell_type": "code", 1288 | "execution_count": 330, 1289 | "metadata": {}, 1290 | "outputs": [ 1291 | { 1292 | "name": "stdout", 1293 | "output_type": "stream", 1294 | "text": [ 1295 | "[9 7 1 4]\n", 1296 | "[1 4 3 6]\n", 1297 | "[2 5 5 1]\n", 1298 | "[2 2 9 9]\n" 1299 | ] 1300 | } 1301 | ], 1302 | "source": [ 1303 | "#iterate over row\n", 1304 | "for row in g:\n", 1305 | " print(row)" 1306 | ] 1307 | }, 1308 | { 1309 | "cell_type": "code", 1310 | "execution_count": 332, 1311 | "metadata": {}, 1312 | "outputs": [ 1313 | { 1314 | "name": "stdout", 1315 | "output_type": "stream", 1316 | "text": [ 1317 | "[9 7 1 4]\n", 1318 | "[1 4 3 6]\n", 1319 | "[2 5 5 1]\n", 1320 | "[2 2 9 9]\n" 1321 | ] 1322 | } 1323 | ], 1324 | "source": [ 1325 | "#iterate by index\n", 1326 | "for i in range(len(g)):\n", 1327 | " print(g[i])" 1328 | ] 1329 | } 1330 | ], 1331 | "metadata": { 1332 | "kernelspec": { 1333 | "display_name": "Python 3", 1334 | "language": "python", 1335 | "name": "python3" 1336 | }, 1337 | "language_info": { 1338 | "codemirror_mode": { 1339 | "name": "ipython", 1340 | "version": 3 1341 | }, 1342 | "file_extension": ".py", 1343 | "mimetype": "text/x-python", 1344 | "name": "python", 1345 | "nbconvert_exporter": "python", 1346 | "pygments_lexer": "ipython3", 1347 | "version": "3.6.5" 1348 | } 1349 | }, 1350 | "nbformat": 4, 1351 | "nbformat_minor": 2 1352 | } 1353 | -------------------------------------------------------------------------------- /Grid Search - Breast Cancer.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Data import and pre-processing" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#import libraries\n", 17 | "import warnings\n", 18 | "warnings.filterwarnings('ignore')\n", 19 | "import pandas as pd\n", 20 | "import numpy as np\n", 21 | "import matplotlib.pyplot as plt\n", 22 | "import os\n", 23 | "os.chdir('C:\\\\Users\\\\rohan\\\\Documents\\\\Analytics\\\\Data')" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 2, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "#import data\n", 33 | "data = pd.read_csv('breast-cancer-wisconsin.csv',header=None)\n", 34 | "\n", 35 | "#set column names\n", 36 | "data.columns = ['Sample Code Number','Clump Thickness','Uniformity of Cell Size',\n", 37 | " 'Uniformity of Cell Shape','Marginal Adhesion','Single Epithelial Cell Size',\n", 38 | " 'Bare Nuclei','Bland Chromatin','Normal Nucleoli','Mitoses','Class']" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/html": [ 49 | "
\n", 50 | "\n", 63 | "\n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | "
Sample Code NumberClump ThicknessUniformity of Cell SizeUniformity of Cell ShapeMarginal AdhesionSingle Epithelial Cell SizeBare NucleiBland ChromatinNormal NucleoliMitosesClass
010000255111213112
1100294554457103212
210154253111223112
310162776881343712
410170234113213112
510171228101087109714
6101809911112103112
710185612121213112
810330782111211152
910330784211212112
\n", 223 | "
" 224 | ], 225 | "text/plain": [ 226 | " Sample Code Number Clump Thickness Uniformity of Cell Size \\\n", 227 | "0 1000025 5 1 \n", 228 | "1 1002945 5 4 \n", 229 | "2 1015425 3 1 \n", 230 | "3 1016277 6 8 \n", 231 | "4 1017023 4 1 \n", 232 | "5 1017122 8 10 \n", 233 | "6 1018099 1 1 \n", 234 | "7 1018561 2 1 \n", 235 | "8 1033078 2 1 \n", 236 | "9 1033078 4 2 \n", 237 | "\n", 238 | " Uniformity of Cell Shape Marginal Adhesion Single Epithelial Cell Size \\\n", 239 | "0 1 1 2 \n", 240 | "1 4 5 7 \n", 241 | "2 1 1 2 \n", 242 | "3 8 1 3 \n", 243 | "4 1 3 2 \n", 244 | "5 10 8 7 \n", 245 | "6 1 1 2 \n", 246 | "7 2 1 2 \n", 247 | "8 1 1 2 \n", 248 | "9 1 1 2 \n", 249 | "\n", 250 | " Bare Nuclei Bland Chromatin Normal Nucleoli Mitoses Class \n", 251 | "0 1 3 1 1 2 \n", 252 | "1 10 3 2 1 2 \n", 253 | "2 2 3 1 1 2 \n", 254 | "3 4 3 7 1 2 \n", 255 | "4 1 3 1 1 2 \n", 256 | "5 10 9 7 1 4 \n", 257 | "6 10 3 1 1 2 \n", 258 | "7 1 3 1 1 2 \n", 259 | "8 1 1 1 5 2 \n", 260 | "9 1 2 1 1 2 " 261 | ] 262 | }, 263 | "execution_count": 4, 264 | "metadata": {}, 265 | "output_type": "execute_result" 266 | } 267 | ], 268 | "source": [ 269 | "#view top rows\n", 270 | "data.head(10)" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 5, 276 | "metadata": {}, 277 | "outputs": [ 278 | { 279 | "data": { 280 | "text/html": [ 281 | "
\n", 282 | "\n", 295 | "\n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | "
Clump ThicknessUniformity of Cell SizeUniformity of Cell ShapeMarginal AdhesionSingle Epithelial Cell SizeBare NucleiBland ChromatinNormal NucleoliMitosesClass
05111213110
154457103210
23111223110
36881343710
44113213110
\n", 379 | "
" 380 | ], 381 | "text/plain": [ 382 | " Clump Thickness Uniformity of Cell Size Uniformity of Cell Shape \\\n", 383 | "0 5 1 1 \n", 384 | "1 5 4 4 \n", 385 | "2 3 1 1 \n", 386 | "3 6 8 8 \n", 387 | "4 4 1 1 \n", 388 | "\n", 389 | " Marginal Adhesion Single Epithelial Cell Size Bare Nuclei \\\n", 390 | "0 1 2 1 \n", 391 | "1 5 7 10 \n", 392 | "2 1 2 2 \n", 393 | "3 1 3 4 \n", 394 | "4 3 2 1 \n", 395 | "\n", 396 | " Bland Chromatin Normal Nucleoli Mitoses Class \n", 397 | "0 3 1 1 0 \n", 398 | "1 3 2 1 0 \n", 399 | "2 3 1 1 0 \n", 400 | "3 3 7 1 0 \n", 401 | "4 3 1 1 0 " 402 | ] 403 | }, 404 | "execution_count": 5, 405 | "metadata": {}, 406 | "output_type": "execute_result" 407 | } 408 | ], 409 | "source": [ 410 | "#Data cleaning\n", 411 | "data = data.drop(['Sample Code Number'],axis=1) #Drop 1st column\n", 412 | "data = data[data['Bare Nuclei'] != '?'] #Remove rows with missing data\n", 413 | "data['Class'] = np.where(data['Class'] ==2,0,1) #Change the Class representation\n", 414 | "data.head() #View top 10 rows" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 6, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "data": { 424 | "text/plain": [ 425 | "0 444\n", 426 | "1 239\n", 427 | "Name: Class, dtype: int64" 428 | ] 429 | }, 430 | "execution_count": 6, 431 | "metadata": {}, 432 | "output_type": "execute_result" 433 | } 434 | ], 435 | "source": [ 436 | "#Distribution of Class\n", 437 | "data['Class'].value_counts()" 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 7, 443 | "metadata": {}, 444 | "outputs": [], 445 | "source": [ 446 | "#Split data into attributes and class\n", 447 | "X = data.drop(['Class'],axis=1)\n", 448 | "y = data['Class']" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": 8, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [ 457 | "#perform training and test split\n", 458 | "from sklearn.model_selection import train_test_split\n", 459 | "X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)" 460 | ] 461 | }, 462 | { 463 | "cell_type": "markdown", 464 | "metadata": {}, 465 | "source": [ 466 | "# 1. Dummy Classifier" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 158, 472 | "metadata": {}, 473 | "outputs": [ 474 | { 475 | "name": "stdout", 476 | "output_type": "stream", 477 | "text": [ 478 | "y actual : \n", 479 | "0 103\n", 480 | "1 68\n", 481 | "Name: Class, dtype: int64\n", 482 | "y predicted : \n", 483 | "0 171\n", 484 | "dtype: int64\n", 485 | "Accuracy Score : 0.6023391812865497\n", 486 | "Precision Score : 0.0\n", 487 | "Recall Score : 0.0\n", 488 | "F1 Score : 0.0\n", 489 | "Confusion Matrix : \n", 490 | "[[103 0]\n", 491 | " [ 68 0]]\n" 492 | ] 493 | } 494 | ], 495 | "source": [ 496 | "#Dummy Classifier\n", 497 | "from sklearn.dummy import DummyClassifier\n", 498 | "clf = DummyClassifier(strategy= 'most_frequent').fit(X_train,y_train)\n", 499 | "y_pred = clf.predict(X_test)\n", 500 | "\n", 501 | "#Distribution of y test\n", 502 | "print('y actual : \\n' + str(y_test.value_counts()))\n", 503 | "\n", 504 | "#Distribution of y predicted\n", 505 | "print('y predicted : \\n' + str(pd.Series(y_pred).value_counts()))\n", 506 | "\n", 507 | "# Model Evaluation metrics \n", 508 | "from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n", 509 | "print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))\n", 510 | "print('Precision Score : ' + str(precision_score(y_test,y_pred)))\n", 511 | "print('Recall Score : ' + str(recall_score(y_test,y_pred)))\n", 512 | "print('F1 Score : ' + str(f1_score(y_test,y_pred)))\n", 513 | "\n", 514 | "#Dummy Classifier Confusion matrix\n", 515 | "from sklearn.metrics import confusion_matrix\n", 516 | "print('Confusion Matrix : \\n' + str(confusion_matrix(y_test,y_pred)))" 517 | ] 518 | }, 519 | { 520 | "cell_type": "markdown", 521 | "metadata": {}, 522 | "source": [ 523 | "# 2. Logistic Regression" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 159, 529 | "metadata": {}, 530 | "outputs": [ 531 | { 532 | "name": "stdout", 533 | "output_type": "stream", 534 | "text": [ 535 | "Accuracy Score : 0.9473684210526315\n", 536 | "Precision Score : 0.9836065573770492\n", 537 | "Recall Score : 0.8823529411764706\n", 538 | "F1 Score : 0.9302325581395349\n", 539 | "Confusion Matrix : \n", 540 | "[[102 1]\n", 541 | " [ 8 60]]\n" 542 | ] 543 | } 544 | ], 545 | "source": [ 546 | "#Logistic regression\n", 547 | "from sklearn.linear_model import LogisticRegression\n", 548 | "\n", 549 | "clf = LogisticRegression().fit(X_train,y_train)\n", 550 | "y_pred = clf.predict(X_test)\n", 551 | "\n", 552 | "# Model Evaluation metrics \n", 553 | "from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n", 554 | "print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))\n", 555 | "print('Precision Score : ' + str(precision_score(y_test,y_pred)))\n", 556 | "print('Recall Score : ' + str(recall_score(y_test,y_pred)))\n", 557 | "print('F1 Score : ' + str(f1_score(y_test,y_pred)))\n", 558 | "\n", 559 | "#Dummy Classifier Confusion matrix\n", 560 | "from sklearn.metrics import confusion_matrix\n", 561 | "print('Confusion Matrix : \\n' + str(confusion_matrix(y_test,y_pred)))" 562 | ] 563 | }, 564 | { 565 | "cell_type": "markdown", 566 | "metadata": {}, 567 | "source": [ 568 | "# 3. Logistic Regression + Grid Search" 569 | ] 570 | }, 571 | { 572 | "cell_type": "code", 573 | "execution_count": 161, 574 | "metadata": {}, 575 | "outputs": [ 576 | { 577 | "name": "stdout", 578 | "output_type": "stream", 579 | "text": [ 580 | "LogisticRegression(C=0.009, class_weight=None, dual=False, fit_intercept=True,\n", 581 | " intercept_scaling=1, max_iter=100, multi_class='warn',\n", 582 | " n_jobs=None, penalty='l1', random_state=None, solver='warn',\n", 583 | " tol=0.0001, verbose=0, warm_start=False)\n", 584 | "Accuracy Score : 0.9122807017543859\n", 585 | "Precision Score : 0.8732394366197183\n", 586 | "Recall Score : 0.9117647058823529\n", 587 | "F1 Score : 0.8920863309352517\n", 588 | "Confusion Matrix : \n", 589 | "[[94 9]\n", 590 | " [ 6 62]]\n" 591 | ] 592 | } 593 | ], 594 | "source": [ 595 | "from sklearn.model_selection import GridSearchCV\n", 596 | "clf = LogisticRegression()\n", 597 | "grid_values = {'penalty': ['l1', 'l2'],'C':[0.001,.009,0.01,.09,1,5,10,25]}\n", 598 | "grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,scoring = 'recall')\n", 599 | "grid_clf_acc.fit(X_train, y_train)\n", 600 | "\n", 601 | "print(grid_clf_acc.best_estimator_)\n", 602 | "\n", 603 | "y_pred_acc = grid_clf_acc.predict(X_test)\n", 604 | "\n", 605 | "# Model Evaluation metrics \n", 606 | "from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score\n", 607 | "print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))\n", 608 | "print('Precision Score : ' + str(precision_score(y_test,y_pred_acc)))\n", 609 | "print('Recall Score : ' + str(recall_score(y_test,y_pred_acc)))\n", 610 | "print('F1 Score : ' + str(f1_score(y_test,y_pred_acc)))\n", 611 | "\n", 612 | "#Dummy Classifier Confusion matrix\n", 613 | "from sklearn.metrics import confusion_matrix\n", 614 | "print('Confusion Matrix : \\n' + str(confusion_matrix(y_test,y_pred_acc)))" 615 | ] 616 | }, 617 | { 618 | "cell_type": "markdown", 619 | "metadata": {}, 620 | "source": [ 621 | "## Function for confusion matrix plot" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 162, 627 | "metadata": {}, 628 | "outputs": [], 629 | "source": [ 630 | "#Function to plot intuitive confusion matrix\n", 631 | "import itertools\n", 632 | "def plot_confusion_matrix(cm, classes,\n", 633 | " normalize=False,\n", 634 | " title='Confusion matrix',\n", 635 | " cmap=plt.cm.Blues):\n", 636 | " \"\"\"\n", 637 | " This function prints and plots the confusion matrix.\n", 638 | " Normalization can be applied by setting `normalize=True`.\n", 639 | " \"\"\"\n", 640 | " if normalize:\n", 641 | " cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]\n", 642 | " print(\"Normalized confusion matrix\")\n", 643 | " else:\n", 644 | " print('Confusion matrix, without normalization')\n", 645 | "\n", 646 | " print(cm)\n", 647 | "\n", 648 | " plt.imshow(cm, interpolation='nearest', cmap=cmap)\n", 649 | " plt.title(title)\n", 650 | " plt.colorbar()\n", 651 | " tick_marks = np.arange(len(classes))\n", 652 | " plt.xticks(tick_marks, classes, rotation=45)\n", 653 | " plt.yticks(tick_marks, classes)\n", 654 | "\n", 655 | " fmt = '.2f' if normalize else 'd'\n", 656 | " thresh = cm.max() / 2.\n", 657 | " for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):\n", 658 | " plt.text(j, i, format(cm[i, j], fmt),\n", 659 | " horizontalalignment=\"center\",\n", 660 | " color=\"white\" if cm[i, j] > thresh else \"black\")\n", 661 | "\n", 662 | " plt.ylabel('True label')\n", 663 | " plt.xlabel('Predicted label')\n", 664 | " plt.tight_layout()" 665 | ] 666 | }, 667 | { 668 | "cell_type": "code", 669 | "execution_count": 163, 670 | "metadata": {}, 671 | "outputs": [], 672 | "source": [ 673 | "# Compute confusion matrix\n", 674 | "cnf_matrix = confusion_matrix(y_test, y_pred_acc)\n", 675 | "np.set_printoptions(precision=2)" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": 164, 681 | "metadata": {}, 682 | "outputs": [ 683 | { 684 | "name": "stdout", 685 | "output_type": "stream", 686 | "text": [ 687 | "Confusion matrix, without normalization\n", 688 | "[[94 9]\n", 689 | " [ 6 62]]\n" 690 | ] 691 | }, 692 | { 693 | "data": { 694 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAVUAAAEYCAYAAADsymWcAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvqOYd8AAAIABJREFUeJzt3Xm8HFWZxvHfc28SQhZIIKwJO2Gf\nCUvYEZFNVBREGQXUoFEUV8SFRWbEGRdQFFAQDKLEURBcEAQRYwQlTATDTlgS9iUhKwFCwpLwzh/n\nXGgu997uTiq3usLzzac+t2vpU293Vb85darqlCICMzMrRlvZAZiZrUycVM3MCuSkamZWICdVM7MC\nOamamRXISdXMrEClJlVJq0r6o6RnJP1mOco5StJfioytLJLeIun+suNolKQNJS2U1L4M7z1f0n+u\niLhamaSTJf207DiWV719VdJFkr7ZmzE1S9Kpkn5ZZJkNJVVJR0qakn88MyVdI2mvAtb/fmAdYM2I\nOHxZC4mIX0XEgQXEs0JJCkmb97RMRNwQEVuuoPUXvpNHxGMRMSgiltZZ99GSJnV676ci4n+aXaek\nRyQtzvvjU/lzDWq2nLJExLcj4uNlx1FL0khJv5Y0R9KzkqZL+pGkEd29Z3n2VUn9JH1f0hN5Oz4s\n6cxl/wSto25SlXQ8cBbwbVIC3BD4MXBIAevfCJgWEUsKKKvyJPUpO4YKeXdEDAK2B3YATloRK1mW\nGnirkNTQnT35P/qbgBnADhGxGrAn8CDQZeWpgH31JGA0sAswGHgbcNtylvkGpfymIqLbAVgdWAgc\n3sMyq5CS7ow8nAWskuftAzwBfAmYDcwEPprnfQN4CXg5r2MscCrwy5qyNwYC6JPHjwYeAp4DHgaO\nqpk+qeZ9ewD/Ap7Jf/eomXc98D/AjbmcvwDDuvlsHfF/tSb+Q4F3AtOA+cDJNcvvAkwGFuRlzwH6\n5Xn/yJ/l+fx5P1BT/gnAU8D/dkzL79ksr2PHPL4+MBfYp6ft1sO2ugj4ZjfzevrONsnxPwf8FTi3\nYzs1so2ArYEXgKX5sy/oKh7Sf9S3A8+SftAHdRPrI8D+NePfBa7utE+eATwGzALOB1atmf/VvH1m\nAB/P8W9eE9N5wJ/yttq/p/KAYcBVeZvPB24A2vK8E4An83dxP7Bfnn4qr9/P3wNMzWVcD2zd6bN+\nGbgzb5tLgf4Nbu9ocLlfAn+ss8w+9LCv5mV2AG7Nn/dS4Nc97G9XAcf1sL71gd8Bc/J+9PlGfmcd\nnxv4DDAdeDhP2xaYkLfRLPLvNm+Ly4Bf5LinAqOX5ff16vrrfJEHAUvIP5hulvlv4J/A2sBawP8B\n/1OzIZbkZfqSktEiYGg3O1fn8Y3zF9QHGEj6sW2Z560HbFvzQ56UX68BPA18OL/viDy+Zp5/PekH\nuwWwah4/rYcdaQnwXzn+T+SNfDHpf9dtScli07z8TsBueb0bA/fW7jjU/Hg7lX866Ye7Km/cUT+R\nyxkAXAucscwbu5uk2sB3NpmUVPqRai7P0kVSbXQbdRUP6YfyDHAA6QhqOLBVN5/jEXJSBUYAdwFn\n18w/C7gyf67BwB+B79Ts00/lbTeAlBw6J9VnSDW1NqB/nfK+Q0qyffPwFkDAlsDjwPo139Nmnfdz\n0n74fP7cfUkJ/wFe+8/4EeBmUpJZI+8Ln2pwe0eDyz0FHF1nmX3oYV/N+8ajwBfz53g/qcLUXVI9\nhfSf1KeBfwNUM68NuIX0u+sHbEr6j/rtTfzOJuTva9W8zWaSKnf98/iuNdviBVJuas/b85/L+huL\nBpLqUcBTdZZ5EHhnzfjbgUdqNsRiapIyqca3W+edq5vxjXn9D3YB8D5qah2df7CkxHBzp/mTO3Ya\nUhI9pWbep4E/97AjLQba8/jgHM+uNcvcAhzazfuPAy7vtLE7J9WXqKl50Cmp5mlXkhLHneSjgGXa\n2N0n1W6/M1JzzxJgQM28X9J9Uq27jbqKB/gJcGaDn+MRUo33ubzuicCQPE+kJLVZzfK781qN5Wfk\nhJjHN+eNSfUXNfPrlfffwBW127Wm3Nmkmm7fTvNOrfn+/hO4rGZeG6l2u0/NZ/1QzfzvAuc3+D1F\ng8stoeaoAPhs3o4LgQsa2VeBvUk1/9rk+H9d7W95XjupNnkj8GJ+75g8b1fgsU7LnwT8vInf2b41\n40cAt3Xz3lOBv9aMbwMsXtbfWETUbVOdBwyr0y6xPul/qA6P5mmvlhGvbzNdBDR9UiEinicdMn8K\nmCnpaklbNRBPR0zDa8afaiKeefHaSZjF+e+smvmLO94vaQtJV+WTJ8+S2qGH9VA2wJyIeKHOMhcA\n2wE/iogXu1ogXwGxMA/X1Cmvs56+s/WB+RGxqGbe410V0sQ26soGpP+gG3VoRAwm/bC34rXveS1S\nDfQWSQskLQD+nKeTP09t/F19ltpp9cr7Hqlm+RdJD0k6ESAiHiD92E8FZueTQLW/iw6v++4j4pW8\n/qb3V0l7dcSY46R2vIeTy/NIRxUdMZwTEUNINfS+Ncv1tK+uDzwZOTNlnfepV0XE0og4NyL2BIYA\n3wJ+Jmlr0rmW9Tt9lpNJ53Qa/Z3VbsN6+1bn77f/8rTF1kuqk0lV40N7WGYG6UvosGGetiyeJ+3A\nHdatnRkR10bEAaQd4D5SsqkXT0dMTy5jTM04jxTXyEiN/SeTajo9iZ5m5rPaZwEXAqdKWqPLQtIV\nEIPy8I4m4+7pO5sJrCGpdrts0F1BPWyjHj8n6UewWTNB5/X9nVS7PCNPmkv6j27biBiSh9UjndSC\n9Hlqz2h39VlqY+2xvIh4LiK+FBGbAu8Gjpe0X553cUTsRfpug3To3NnrvntJyjE1vb9GxKSaGIfk\naUNqhkndvHUicFgjq+hh3kxgeI6/w4YNxr04Is4lNTltQ9oXHu4U++CIeGd+SyO/s9pYl2nfWlY9\nJtWIeIbUrnGupEMlDZDUV9I7JH03L3YJcIqktSQNy8sv63VftwN752sfV6fmjK6kdSS9R9JA0uHC\nQtKJj87+BGyRLwPrI+kDpA111TLG1IzBpDbFhbmGdmyn+bNI7UPNOBu4JdIlOFeT2u+WR7uk/jVD\nP3r4ziLiUWAKKaH3k7Q7KXm8QZ1tNAsYkdfXlQuBj0raT1KbpOFN1HLPAg6QtH2u6V0AnClp7RzX\ncElvz8teltezdf6P4r96KrheeZIOlrR5TibP5s+7VNKWkvaVtAqpYrKYrvfXy4B35c/dl9Tu9yLp\n0Lm3nAq8RdIPJA0HyL/lrZsoYzKpGeHzeR86jNRO3iVJx0naR+la9T6SxpB+P7eR2pCflXRCnt8u\naTtJO+e31/uddXYVsG5e5yqSBkvatYnP1pS6l1RFxA+A40kNy3NIWf+zwB/yIt8k/ejuJLX73Zqn\nNS0iJpDOGt5JaqusTYRtpB1uBukM3ltJ7aGdy5gHHJyXnUdq+D84IuYuS0xN+jJwJKmt7wLSZ6l1\nKjA+H9L8R73CJB1COrHyqTzpeGBHSUctR4wnkn7gHcPfGvjOjiK1I84jbdtLST/8znraRn8jnVl9\nStIbtkVE3Ax8FDiTdKLo77yx9tyliJhDOnvbcSPBCaRD8n/mw8O/kk4cERHXAD8ErsvLTM7v6bJZ\npV55wMg8vjCX9eOIuJ50Muc0Uk33KdKJ3JO7iP1+4EPAj/Ky7yZdLvZSI5+9CBExjXTiZwRwh6Tn\nSG2dM3jtO61Xxkuk2u7RpBrnB4Df9/CWxcD3Sd/NXFL76vsi4qHc3PZu0uVyD+f5PyVdjQT1f2ed\nY3uOdCLw3Xl900mXcK0Qen0TiFl9ki4F7ouIr5cdy/LKbXh3k04A+nppW26+99/qkrSzpM3yYflB\npOtJ/1Dvfa1K0ntzU8ZQUjvnH51QrShOqtaIdUmXoi0kHTofGxGF3/3Siz5Jasp6kNTOWa9Nzqxh\nPvw3MyuQa6pmZgV603TgoT6rhvoNLjsMa8KorRq6zNFayO233TI3Itaqv+Tya19to4gli+svCMTi\nOddGxEErOCTgzZRU+w1mlS3rXsVkLeT6G88uOwRr0pABfbq9i6posWRxw7/pF24/t96djYV50yRV\nM1vZCNR6LZhOqmZWTQLaWq+7WydVM6su1etao/c5qZpZRfnw38ysWC1YU229NG9m1giRaqqNDI0U\nJ31B0t2Spko6Lk9bQ9IEpQchTsi3NvfISdXMKkqpptrIUK8kaTvSo4t2AUYBB0saSerVbWJEjCT1\nO3tivbKcVM2sutraGxvq25r0bKpFuXOdvwPvJXUeND4vM56eO+xPIS3jRzEzK5maOfwfJmlKzXBM\np8LuJnWQv2buvPydpCcwrBMRMwHy37XrReUTVWZWTaKZE1VzI2J0dzMj4l5Jp5OewroQuIP0JIOm\nuaZqZtVV4ImqiLgwInaMiL1JT66YDsyStB5A/ju7XjlOqmZWUU0d/tcv7bVnkG1IejTMJaTHw4/J\ni4whPY68Rz78N7Pqaiv0OtXfSVoTeBn4TEQ8Lek04DJJY4HHgMPrFeKkambVVPC9/xHxli6mzQP2\na6YcJ1UzqyjfpmpmVqwWvE3VSdXMqss1VTOzgjR4C2pvc1I1s+pyJ9VmZkXxiSozs2L58N/MrCAd\n/am2GCdVM6soH/6bmRXLh/9mZgXy2X8zs4LIh/9mZsXy4b+ZWXHkpGpmVoz0NBUnVTOzYigPLab1\nWnnNzBoi2traGhoaKk36oqSpku6WdImk/pI2kXSTpOmSLpXUr145TqpmVlmSGhoaKGc48HlgdERs\nB7QDHwROB86MiJHA08DYemU5qZpZZRWVVLM+wKqS+gADgJnAvsBv8/zxwKH1CnFSNbNqUhMDDJM0\npWY4praoiHgSOIP0cL+ZwDPALcCCiFiSF3sCGF4vLJ+oMrNKEk3VQudGxOhuy5KGAocAmwALgN8A\n7+hi0ai3IidVM6usRk9CNWB/4OGImAMg6ffAHsAQSX1ybXUEMKNuTEVFZGbW2wpsU30M2E3SAKU3\n7AfcA1wHvD8vMwa4ol5BTqpmVk3Ntan2KCJuIp2QuhW4i5QbxwEnAMdLegBYE7iwXlk+/Dezyiry\njqqI+Drw9U6THwJ2aaYcJ1Uzq6QmT1T1GidVM6ssJ1Uzs6II1OakamZWGNdUzcwK5KRqZlYQn6gy\nMyta6+VUX/xfNZ85Yh+m/OZkbvnt1/jskfu8bt5xH96Pxbedw5pDBpYTnNV13rk/ZPfRo9htp3/n\nx+ecXXY41abCe6kqhJNqhWyz2Xp89LA9eMuHv8cuH/gO79h7OzbbcC0ARqwzhH1324rHZs4vOUrr\nzj1T7+YXP7+Qif+YzKSbbuXaa67mwQemlx1WpRXZSXVhMfXq2my5bLXJutx81yMsfuFlli59hRtu\neYBD3jYKgO9++X187ew/EFG3Ex0rybT772P0zrsyYMAA+vTpw5577c1VV/6h7LCqraDbVIvkpFoh\nUx+cwV47bs4aqw9k1f59OWivbRmx7lDe9dZ/Y8bsBdw17cmyQ7QebL3NtvzfjTcwf948Fi1axIRr\nr+GJJ54oO6xKa8XD/0qfqJJ0EHA26dEHP42I00oOaYW6/+FZfP+iCVx13md5fvGL3DntSZYsWcoJ\nY9/OwZ8+p+zwrI4tt9qaLxz/FQ49+CAGDhrIdv82ij592ssOq7LKSJiNqGxNVVI7cC6pI9ltgCMk\nbVNuVCve+D9MZo8jT+eAsWfx9DPP8+iM+Ww0fE1uvvQk7rv6GwxfewiTLz6BddYcXHao1oWPHP0x\n/jH5X1wz4XqGDh3KZpuNLDukSnNNtVi7AA9ExEMAkn5N6rn7nlKjWsHWGjqIOU8vZIN1h3LIvqPY\nZ8z3OfeS61+df9/V32DPo77LvAXPlxekdWvO7NmstfbaPP74Y/zxyj8w4bpJZYdUaa1YU61yUh0O\nPF4z/gSwa+0C+Tk06Vk0fQf1WmAr0iVnfJw1hgzk5SVLOe60y1jw3OKyQ7ImfOTIw5k/fz59+vbl\njDN/yJChQ8sOqdJ873+xuvo2X3fqOyLGkTqapW3A2ivFafH9x57V4/yt3tW5O0hrJdf89e9lh7Dy\nUGvWVCvbpkqqmW5QM97Q82PMbOUgQGpsqFuWtKWk22uGZyUdJ2kNSRMkTc9/6x5aVDmp/gsYKWkT\nSf2ADwJXlhyTmfWaxk5SNVKbjYj7I2L7iNge2AlYBFwOnAhMjIiRwMQ83qPKJtX8dMPPAtcC9wKX\nRcTUcqMys95UVE21k/2AByPiUdLJ7/F5+njg0HpvrnKbKhHxJ+BPZcdhZiUQtDV+omqYpCk14+Py\nOZeufBC4JL9eJyJmAkTETElr11tRpZOqmb15iaaS6tyIGF23zNSU+B7gpGWNq7KH/2ZmK+Dw/x3A\nrRExK4/PkrReWpfWA2bXK8BJ1cwqawXcUXUErx36Qzr5PSa/HgNcUa8AJ1Uzq6YGa6mN5lRJA4AD\ngN/XTD4NOEDS9Dyvbv8iblM1s0pK16kWd/F/RCwC1uw0bR7paoCGOamaWUWpmRNVvcZJ1cwqqxVv\nU3VSNbNqWrYL+1c4J1Uzq6Si21SL4qRqZpXVgjnVSdXMqss1VTOzojR373+vcVI1s0rq6E+11Tip\nmllFtebTVJ1UzayyWjCnOqmaWXW5pmpmVhD5RJWZWbFcUzUzK1AL5lQnVTOrrlasqbqTajOrpuI7\nqR4i6beS7pN0r6TdJa0haYKk6fnv0HrlOKmaWSWJxh6l0kRt9mzgzxGxFTAKuBc4EZgYESOBiXm8\nR06qZlZZ7W1qaKhH0mrA3sCFABHxUkQsAA4BxufFxgOH1ivLSdXMKqvAw/9NgTnAzyXdJumnkgYC\n60TETID8d+16BTmpmlklpYTZ8OH/MElTaoZjOhXXB9gROC8idgCep4FD/a6UdvY/V7e7FRHP9lYs\nZlZNTVz7PzciRvcw/wngiYi4KY//lpRUZ0laLyJmSloPmF1vRWVeUjUVCFJnMx06xgPYsIygzKw6\nirqkKiKekvS4pC0j4n7SE1TvycMY0qOpxwBX1CurtKQaERuUtW4zWzkUfJnq54BfSeoHPAR8lNRE\nepmkscBjwOH1CmmJi/8lfRDYNCK+LWkEqXH4lrLjMrPWJaC9wKwaEbcDXTUR7NdMOaWfqJJ0DvA2\n4MN50iLg/PIiMrNKaPAkVW/fddUKNdU9ImJHSbcBRMT8XP02M+tRC96l2hJJ9WVJbaSTU0haE3il\n3JDMrNUJaGvBrFr64T9wLvA7YC1J3wAmAaeXG5KZVUGR9/4XpfSaakT8QtItwP550uERcXeZMZlZ\n63Mn1T1rB14mNQG0Qu3ZzCrAh/9dkPQ14BJgfWAEcLGkk8qNysyqQA0OvakVaqofAnaKiEUAkr4F\n3AJ8p9SozKzltWIn1a2QVB/l9XH0Id3NYGbWrXT2v+wo3qjMDlXOJLWhLgKmSro2jx9IugLAzKx7\nJVzY34gya6odZ/inAlfXTP9nCbGYWQX57H+NiLiwrHWbWfX58L8bkjYDvgVsA/TvmB4RW5QWlJlV\nQise/pd+SRVwEfBz0n887wAuA35dZkBmVg2teElVKyTVARFxLUBEPBgRp5B6rTIz65aULv5vZOhN\npR/+Ay8q1eEflPQp4EkaeLiWmVkLHv23RFL9IjAI+DypbXV14GOlRmRmlVDk2X9JjwDPAUuBJREx\nWtIawKXAxsAjwH9ExNM9lVN6Uq150NZzvNZRtZlZj8QKObR/W0TMrRk/EZgYEadJOjGPn9BTAWVe\n/H85uQ/VrkTEYb0YjplVTe9063cIsE9+PR64nlZNqsA5vbmyHbbekBtv6tVV2nI66Jwbyw7BWlwT\nl1QNkzSlZnxcRIzrtEwAf5EUwE/y/HUiYiZAfkx13fM9ZV78P7GsdZvZyqGJy5fmRkRXD/WrtWdE\nzMiJc4Kk+1ZwTGZmrUNQ6IP/ImJG/jsbuBzYBZglaT3SutYDZtcrx0nVzCqrT1tjQz2SBkoa3PGa\n1LHT3cCVwJi82BjgiroxLeuHKZqkVSLixbLjMLNqSM+fKuxM1TrA5bm8PsDFEfFnSf8CLpM0FngM\nOLxeQaUnVUm7ABeSrk/dUNIo4OMR8blyIzOzVlfUZaoR8RAwqovp84D9moqpmJCWyw+Bg4F5ABFx\nB75N1cwa4Kepdq0tIh7tVI1fWlYwZlYNqeu/1rtPtRWS6uO5CSAktQOfA6aVHJOZVUB76+XUlkiq\nx5KaADYEZgF/zdPMzLqlEnqgakTpSTVfE/bBsuMws+ppwZxaflKVdAFd9AEQEceUEI6ZVYgfp9K1\nv9a87g+8F3i8pFjMrCJ8oqobEXFp7bik/wUmlBSOmVVIC+bU8pNqFzYBNio7CDNrcYL2FsyqpSdV\nSU/zWptqGzCf1BGsmVm3/IjqLuRnU40iPZcK4JWI6LbjajOzWq2YVEu9TTUn0MsjYmkenFDNrGFF\ndv1XlFa49/9mSTuWHYSZVUvH4X8jQ28q8xlVfSJiCbAX8AlJDwLPk76riAgnWjPrXgmdpTSizDbV\nm4EdgUNLjMHMKkpAnxZsVC0zqQogIh4sMQYzqzDXVF9vLUnHdzczIn7Qm8GYWdWINorNqrmnvCnA\nkxFxsKRNgF8DawC3Ah+OiJd6KqPME1XtwCBgcDeDmVm30oP/Cu+k+gvAvTXjpwNnRsRI4GlgbL0C\nyqypzoyI/y5x/WZWZQWf2Zc0AngX8C3g+Hwd/b7AkXmR8cCpwHk9lVN6m6qZ2bIQ0N54Vh0maUrN\n+LiIGNdpmbOAr/LakfKawIJ8lRLAE8DweisqM6k29TAtM7POmuilam5EjO5upqSDgdkRcYukfTom\nd7Fo3RuUSkuqETG/rHWb2cqhwLP/ewLvkfROUhekq5FqrkNqrqkfAcyoV1Ar3FFlZtY0kRJYI0M9\nEXFSRIyIiI1JTyL5W0QcBVwHvD8vNga4ol5ZTqpmVk3qlXv/TyCdtHqA1MZ6Yb03lN71n5nZsloR\nZ7sj4nrg+vz6IWCXZt7vpGpmlSTcSbWZWaFaMKc6qZpZVfV+X6mNcFI1s0rqOPvfapxUzayyXFM1\nMytQ66VUJ1Uzqyj5EdVmZsXy4b+ZWYFaL6U6qZpZhbVgRdVJ1cyqKV1S1XpZ1UnVzCrLNVUzs8Ko\nmU6qe42TqplVkg//zcyK1PyTUnuFk6qZVVYrJtVW7I/AzKwhavBf3XKk/pJulnSHpKmSvpGnbyLp\nJknTJV0qqV+9slxTrbAFCxZw7Cc/zj1T70YS54/7GbvtvnvZYVkng1Zp5yv7b84maw4ggNMnPMDe\nm63JHpsO5eWlwYxnXuD0CdNZ+OLSskOtlII7qX4R2DciFkrqC0ySdA1wPHBmRPxa0vnAWOC8ngpy\nUq2wL3/xCxx44EFcculveemll1i0aFHZIVkXPvvWTbn5kQV8/er76dMm+vdtY0rfdi648RGWBhyz\n10YcufMIxk16tOxQK6eonBoRASzMo33zEMC+wJF5+njgVOokVR/+V9Szzz7LpEn/4OiPjQWgX79+\nDBkypOSorLMB/doZNXw1rp46C4AlrwQLX1zKlMcWsDQ/Qf6emc+x1qBVSoyyupo4/B8maUrNcMwb\nypLaJd0OzAYmAA8CC/LjqQGeAIbXi8k11Yp6+KGHGDZsLY4Z+1HuuvMOdthxJ84482wGDhxYdmhW\nY/3V+7Ng8cuceODmbDZsINNmP8+Prn+IF5a88uoy79x2Ha6bNrfEKKtJQFvjNdW5ETG6pwUiYimw\nvaQhwOXA1l0tVm9Fla2pSvqZpNmS7i47ljIsWbKE22+7lU988lj+OeU2BgwcyBnfPa3ssKyTdokt\n1h7EFXc+xScuvoPFLy/lyJ1HvDr/QzuPYOkrwYT75pQYZVU1Wk9tro0gIhaQnqa6GzBEUkflcwQw\no977K5tUgYuAg8oOoizDR4xg+IgR7LLrrgC8933v5/bbbi05KutszsIXmbPwRe59KjXX/X36PEau\nnY4m3r71Wuy+6VC++edpZYZYXfk61UaGukVJa+UaKpJWBfYH7gWuA96fFxsDXFGvrMom1Yj4BzC/\n7DjKsu666zJixAZMu/9+AK7/20S22nqbkqOyzuYvepnZz73IBkNXBWCnDVfn0XmL2WWjIRwxegQn\nX3kvL9Y0BVjjOs7+NzI0YD3gOkl3Av8CJkTEVcAJwPGSHgDWBC6sV9BK3aaaG6OPAdhgww1LjqZ4\nPzjrR3z0I0fx0ksvsfGmmzLupz8vOyTrwg+vf5hTDtqCPm1i5rMvcNpfpvOTI0bRt72N7x+2LQD3\nzFzID/72YMmRVk9RF1RFxJ3ADl1MfwjYpZmyVuqkGhHjgHEAO+00um4Dc9WM2n57brxpStlhWB0P\nzHmeT15yx+umHXWRm2oK0YJ3VK3USdXMVm7NnoTqDU6qZlZZvve/QJIuASYDW0p6QtLYsmMys96l\nBofeVNmaakQcUXYMZlYe4aepmpkVx/2pmpkVqwVzqpOqmVVYC2ZVJ1Uzq6jm7+vvDU6qZlZJTfZS\n1WucVM2supxUzcyK48N/M7MC+ZIqM7MCtWBOdVI1s4oq4x7UBlT23n8ze3NLZ//V0FC3LGkDSddJ\nulfSVElfyNPXkDRB0vT8d2i9spxUzayyCuxQZQnwpYjYmvRsqs9I2gY4EZgYESOBiXm8R06qZlZd\nBWXViJgZEbfm18+Rnk81HDgEGJ8XGw8cWq8st6maWWWtiEuqJG1MerTKTcA6ETETUuKVtHa99zup\nmlllNXFJ1TBJtc8eGpcft9SpPA0CfgccFxHPLkvXgk6qZlZZTaS8uRExuseypL6khPqriPh9njxL\n0nq5lroeMLveitymamaV1NFJdSND3bLSQhcC90bED2pmXQmMya/HAFfUK8s1VTOrpmI7qd4T+DBw\nl6Tb87STgdOAy/Ljmh4DDq9XkJOqmVVWUTk1Iib1UNx+zZTlpGpm1dWCd1Q5qZpZRbmTajOzwriT\najOzojmpmpkVx4f/ZmYFcifVZmYFasGc6qRqZhVV7MX/hXFSNbNK6rhNtdU4qZpZZbVeSnVSNbMK\na8GKqpOqmVWXL6kyMytS6+VUJ1Uzq64WzKlOqmZWTRINPX66tzmpmll1tV5O9eNUzKy6CnpCNZJ+\nJmm2pLtrpq0haYKk6fnv0EaoYO6/AAAF4ElEQVRiclI1s8qSGhsacBFwUKdpJwITI2IkMDGP1+Wk\namYVpYb/1RMR/wDmd5p8CDA+vx4PHNpIVG5TNbNKSrepNrz4MElTasbHRcS4Ou9ZJyJmAuRHVK/d\nyIqcVM2ssppIqnMjYvQKDOVVPvw3s8oq6vC/G7MkrQeQ/85u5E1OqmZWTQ2epFqOS1mvBMbk12OA\nKxp5k5OqmVVSo5dTNXhJ1SXAZGBLSU9IGgucBhwgaTpwQB6vy22qZlZdBV38HxFHdDNrv2bLclI1\ns8rybapmZgVqvZTqpGpmVdaCWdVJ1cwqqxU7qVZElB1Dr5A0B3i07DhWkGHA3LKDsKasrNtso4hY\nqzdWJOnPpO+xEXMjovO9/SvEmyaprswkTemtu0WsGN5mKy9fp2pmViAnVTOzAjmprhzq9bZjrcfb\nbCXlNlUzswK5pmpmViAnVTOzAjmpmpkVyEm1giRtKWl3SX0ltZcdjzXO22vl5xNVFSPpMODbwJN5\nmAJcFBHPlhqY9UjSFhExLb9uj4ilZcdkK4ZrqhUiqS/wAWBsROxH6ol8A+CrklYrNTjrlqSDgdsl\nXQwQEUtdY115OalWz2rAyPz6cuAqoB9wpNSCnUu+yUkaCHwWOA54SdIvwYl1ZeakWiER8TLwA+Aw\nSW+JiFeAScDtwF6lBmddiojngY8BFwNfBvrXJtYyY7MVw0m1em4A/gJ8WNLeEbE0Ii4G1gdGlRua\ndSUiZkTEwoiYC3wSWLUjsUraUdJW5UZoRXJ/qhUTES9I+hUQwEn5B/kisA4ws9TgrK6ImCfpk8D3\nJN0HtANvKzksK5CTagVFxNOSLgDuIdV8XgA+FBGzyo3MGhERcyXdCbwDOCAinig7JiuOL6mquHyy\nI3L7qlWApKHAZcCXIuLOsuOxYjmpmpVAUv+IeKHsOKx4TqpmZgXy2X8zswI5qZqZFchJ1cysQE6q\nZmYFclK1V0laKul2SXdL+o2kActR1j6Srsqv3yPpxB6WHSLp08uwjlMlfbnR6Z2WuUjS+5tY18aS\n7m42RnvzcVK1WosjYvuI2A54CfhU7UwlTe8zEXFlRJzWwyJDgKaTqlkrclK17twAbJ5raPdK+jFw\nK7CBpAMlTZZ0a67RDgKQdJCk+yRNAg7rKEjS0ZLOya/XkXS5pDvysAdwGrBZriV/Ly/3FUn/knSn\npG/UlPU1SfdL+iuwZb0PIekTuZw7JP2uU+17f0k3SJqWu+dDUruk79Ws+5PL+0Xam4uTqr2BpD6k\nWyjvypO2BH4RETsAzwOnAPtHxI6kTrKPl9QfuAB4N/AWYN1uiv8h8PeIGAXsCEwFTgQezLXkr0g6\nkNS94S7A9sBOkvaWtBPwQWAHUtLeuYGP8/uI2Dmv715gbM28jYG3Au8Czs+fYSzwTETsnMv/hKRN\nGliPGeB7/+31VpV0e359A3AhqferRyPin3n6bsA2wI25+9Z+wGRgK+DhiJgOkHthOqaLdewLfARe\n7frumXzbZq0D83BbHh9ESrKDgcsjYlFex5UNfKbtJH2T1MQwCLi2Zt5l+fbe6ZIeyp/hQODfa9pb\nV8/rntbAusycVO11FkfE9rUTcuJ8vnYSMCEijui03PaknrOKIOA7EfGTTus4bhnWcRFwaETcIelo\nYJ+aeZ3Lirzuz0VEbfJF0sZNrtfepHz4b836J7CnpM0BJA2QtAVwH7CJpM3yckd08/6JwLH5ve35\nMTDPkWqhHa4FPlbTVjtc0trAP4D3SlpV0mBSU0M9g4GZ+VE0R3Wad7ikthzzpsD9ed3H5uWRtEXu\nvd+sIa6pWlMiYk6u8V0iaZU8+ZSImCbpGOBqSXNJTyTYrosivgCMkzQWWAocGxGTJd2YL1m6Jrer\nbg1MzjXlhaSuDW+VdCnpSQePkpoo6vlP4Ka8/F28PnnfD/yd1Bftp3JftT8ltbXeqrTyOcChjX07\nZu5QxcysUD78NzMrkJOqmVmBnFTNzArkpGpmViAnVTOzAjmpmpkVyEnVzKxA/w+eaOKzMirJMgAA\nAABJRU5ErkJggg==\n", 695 | "text/plain": [ 696 | "
" 697 | ] 698 | }, 699 | "metadata": {}, 700 | "output_type": "display_data" 701 | } 702 | ], 703 | "source": [ 704 | "# Plot non-normalized confusion matrix\n", 705 | "plt.figure()\n", 706 | "class_names = [0,1]\n", 707 | "plot_confusion_matrix(cnf_matrix, classes=class_names,\n", 708 | " title='Confusion matrix - Logistic Regression + Grid Search')\n", 709 | "a = plt.gcf()\n", 710 | "a.set_size_inches(8,4)\n", 711 | "plt.show()" 712 | ] 713 | } 714 | ], 715 | "metadata": { 716 | "kernelspec": { 717 | "display_name": "Python 3", 718 | "language": "python", 719 | "name": "python3" 720 | }, 721 | "language_info": { 722 | "codemirror_mode": { 723 | "name": "ipython", 724 | "version": 3 725 | }, 726 | "file_extension": ".py", 727 | "mimetype": "text/x-python", 728 | "name": "python", 729 | "nbconvert_exporter": "python", 730 | "pygments_lexer": "ipython3", 731 | "version": "3.6.5" 732 | } 733 | }, 734 | "nbformat": 4, 735 | "nbformat_minor": 2 736 | } 737 | -------------------------------------------------------------------------------- /Python for data science - Part 4.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 69, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "#import libraries\n", 10 | "import pandas as pd\n", 11 | "import os\n", 12 | "import numpy as np" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 70, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "#Set working directory\n", 22 | "os.chdir('C:\\\\Users\\\\rohan\\\\Documents\\\\Analytics\\\\Data')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 71, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "#import iris dataset\n", 32 | "a = pd.read_csv('iris.csv')" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 72, 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 44 | "\n", 57 | "\n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | "
SepalLengthSepalWidthPetalLengthPetalWidthName
05.13.51.40.2setosa
14.93.01.40.2setosa
24.73.21.30.2setosa
34.63.11.50.2setosa
45.03.61.40.2setosa
\n", 111 | "
" 112 | ], 113 | "text/plain": [ 114 | " SepalLength SepalWidth PetalLength PetalWidth Name\n", 115 | "0 5.1 3.5 1.4 0.2 setosa\n", 116 | "1 4.9 3.0 1.4 0.2 setosa\n", 117 | "2 4.7 3.2 1.3 0.2 setosa\n", 118 | "3 4.6 3.1 1.5 0.2 setosa\n", 119 | "4 5.0 3.6 1.4 0.2 setosa" 120 | ] 121 | }, 122 | "execution_count": 72, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "#View top 5 rows of dataset\n", 129 | "a.head()" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "# 1. Descriptive statistics of numerical columns" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 73, 142 | "metadata": {}, 143 | "outputs": [ 144 | { 145 | "data": { 146 | "text/html": [ 147 | "
\n", 148 | "\n", 161 | "\n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | "
SepalLengthSepalWidthPetalLengthPetalWidth
count150.000000150.000000150.000000150.000000
mean5.8433333.0540003.7586671.198667
std0.8280660.4335941.7644200.763161
min4.3000002.0000001.0000000.100000
25%5.1000002.8000001.6000000.300000
50%5.8000003.0000004.3500001.300000
75%6.4000003.3000005.1000001.800000
max7.9000004.4000006.9000002.500000
\n", 230 | "
" 231 | ], 232 | "text/plain": [ 233 | " SepalLength SepalWidth PetalLength PetalWidth\n", 234 | "count 150.000000 150.000000 150.000000 150.000000\n", 235 | "mean 5.843333 3.054000 3.758667 1.198667\n", 236 | "std 0.828066 0.433594 1.764420 0.763161\n", 237 | "min 4.300000 2.000000 1.000000 0.100000\n", 238 | "25% 5.100000 2.800000 1.600000 0.300000\n", 239 | "50% 5.800000 3.000000 4.350000 1.300000\n", 240 | "75% 6.400000 3.300000 5.100000 1.800000\n", 241 | "max 7.900000 4.400000 6.900000 2.500000" 242 | ] 243 | }, 244 | "execution_count": 73, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "#Summarize the numerical columns of dataset\n", 251 | "a.describe()" 252 | ] 253 | }, 254 | { 255 | "cell_type": "markdown", 256 | "metadata": {}, 257 | "source": [ 258 | "# 2. Conditional column " 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 74, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/html": [ 269 | "
\n", 270 | "\n", 283 | "\n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Size
05.13.51.40.2setosaHigh
14.93.01.40.2setosaLow
24.73.21.30.2setosaLow
34.63.11.50.2setosaLow
45.03.61.40.2setosaHigh
\n", 343 | "
" 344 | ], 345 | "text/plain": [ 346 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n", 347 | "0 5.1 3.5 1.4 0.2 setosa High\n", 348 | "1 4.9 3.0 1.4 0.2 setosa Low\n", 349 | "2 4.7 3.2 1.3 0.2 setosa Low\n", 350 | "3 4.6 3.1 1.5 0.2 setosa Low\n", 351 | "4 5.0 3.6 1.4 0.2 setosa High" 352 | ] 353 | }, 354 | "execution_count": 74, 355 | "metadata": {}, 356 | "output_type": "execute_result" 357 | } 358 | ], 359 | "source": [ 360 | "#Create new column for bucketing the Sepal Length as High or Low\n", 361 | "a['SepalLength_Size'] = np.where(a['SepalLength']>=5,'High','Low')\n", 362 | "a.head()" 363 | ] 364 | }, 365 | { 366 | "cell_type": "code", 367 | "execution_count": 75, 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/html": [ 373 | "
\n", 374 | "\n", 387 | "\n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Size
05.13.51.40.2setosaHigh
14.93.01.40.2setosaLow
24.73.21.30.2setosaLow
34.63.11.50.2setosaLow
45.03.61.40.2setosaHigh
\n", 447 | "
" 448 | ], 449 | "text/plain": [ 450 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n", 451 | "0 5.1 3.5 1.4 0.2 setosa High\n", 452 | "1 4.9 3.0 1.4 0.2 setosa Low\n", 453 | "2 4.7 3.2 1.3 0.2 setosa Low\n", 454 | "3 4.6 3.1 1.5 0.2 setosa Low\n", 455 | "4 5.0 3.6 1.4 0.2 setosa High" 456 | ] 457 | }, 458 | "execution_count": 75, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "#Using list comprehension\n", 465 | "a['SepalLength_Size'] = ['High' if x >= 5 else 'Low' for x in a['SepalLength'] ]\n", 466 | "a.head()" 467 | ] 468 | }, 469 | { 470 | "cell_type": "code", 471 | "execution_count": 76, 472 | "metadata": { 473 | "scrolled": true 474 | }, 475 | "outputs": [ 476 | { 477 | "data": { 478 | "text/html": [ 479 | "
\n", 480 | "\n", 493 | "\n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Size
05.13.51.40.2setosaHigh
14.93.01.40.2setosaLow
24.73.21.30.2setosaLow
34.63.11.50.2setosaLow
45.03.61.40.2setosaHigh
\n", 553 | "
" 554 | ], 555 | "text/plain": [ 556 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n", 557 | "0 5.1 3.5 1.4 0.2 setosa High\n", 558 | "1 4.9 3.0 1.4 0.2 setosa Low\n", 559 | "2 4.7 3.2 1.3 0.2 setosa Low\n", 560 | "3 4.6 3.1 1.5 0.2 setosa Low\n", 561 | "4 5.0 3.6 1.4 0.2 setosa High" 562 | ] 563 | }, 564 | "execution_count": 76, 565 | "metadata": {}, 566 | "output_type": "execute_result" 567 | } 568 | ], 569 | "source": [ 570 | "#Using assign function\n", 571 | "def size(row_number):\n", 572 | " if row_number[\"SepalLength\"] >=5:\n", 573 | " return 'High'\n", 574 | " else:\n", 575 | " return 'Low'\n", 576 | "\n", 577 | "a = a.assign(SepalLength_Size=a.apply(size, axis=1))\n", 578 | "\n", 579 | "a.head()" 580 | ] 581 | }, 582 | { 583 | "cell_type": "code", 584 | "execution_count": 77, 585 | "metadata": {}, 586 | "outputs": [ 587 | { 588 | "data": { 589 | "text/html": [ 590 | "
\n", 591 | "\n", 604 | "\n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Size
05.13.51.40.2setosaHigh
14.93.01.40.2setosaLow
24.73.21.30.2setosaLow
34.63.11.50.2setosaLow
45.03.61.40.2setosaHigh
\n", 664 | "
" 665 | ], 666 | "text/plain": [ 667 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n", 668 | "0 5.1 3.5 1.4 0.2 setosa High\n", 669 | "1 4.9 3.0 1.4 0.2 setosa Low\n", 670 | "2 4.7 3.2 1.3 0.2 setosa Low\n", 671 | "3 4.6 3.1 1.5 0.2 setosa Low\n", 672 | "4 5.0 3.6 1.4 0.2 setosa High" 673 | ] 674 | }, 675 | "execution_count": 77, 676 | "metadata": {}, 677 | "output_type": "execute_result" 678 | } 679 | ], 680 | "source": [ 681 | "#Using lambda function\n", 682 | "a['SepalLength_Size'] = a['SepalLength'].map( lambda x: 'High' if x >= 5 else 'Low')\n", 683 | "a.head()" 684 | ] 685 | }, 686 | { 687 | "cell_type": "markdown", 688 | "metadata": {}, 689 | "source": [ 690 | "# 3. Unique of a column" 691 | ] 692 | }, 693 | { 694 | "cell_type": "code", 695 | "execution_count": 78, 696 | "metadata": {}, 697 | "outputs": [ 698 | { 699 | "data": { 700 | "text/plain": [ 701 | "array(['setosa', 'versicolor', 'virginica'], dtype=object)" 702 | ] 703 | }, 704 | "execution_count": 78, 705 | "metadata": {}, 706 | "output_type": "execute_result" 707 | } 708 | ], 709 | "source": [ 710 | "a['Name'].unique()" 711 | ] 712 | }, 713 | { 714 | "cell_type": "code", 715 | "execution_count": 79, 716 | "metadata": {}, 717 | "outputs": [ 718 | { 719 | "data": { 720 | "text/plain": [ 721 | "3" 722 | ] 723 | }, 724 | "execution_count": 79, 725 | "metadata": {}, 726 | "output_type": "execute_result" 727 | } 728 | ], 729 | "source": [ 730 | "a['Name'].nunique()" 731 | ] 732 | }, 733 | { 734 | "cell_type": "markdown", 735 | "metadata": {}, 736 | "source": [ 737 | "# 4. Cross tab function" 738 | ] 739 | }, 740 | { 741 | "cell_type": "code", 742 | "execution_count": 80, 743 | "metadata": {}, 744 | "outputs": [ 745 | { 746 | "data": { 747 | "text/html": [ 748 | "
\n", 749 | "\n", 762 | "\n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | "
SepalLength_SizeHighLow
Name
setosa3020
versicolor491
virginica491
\n", 793 | "
" 794 | ], 795 | "text/plain": [ 796 | "SepalLength_Size High Low\n", 797 | "Name \n", 798 | "setosa 30 20\n", 799 | "versicolor 49 1\n", 800 | "virginica 49 1" 801 | ] 802 | }, 803 | "execution_count": 80, 804 | "metadata": {}, 805 | "output_type": "execute_result" 806 | } 807 | ], 808 | "source": [ 809 | "pd.crosstab(a['Name'],a['SepalLength_Size'])" 810 | ] 811 | }, 812 | { 813 | "cell_type": "code", 814 | "execution_count": 81, 815 | "metadata": {}, 816 | "outputs": [ 817 | { 818 | "data": { 819 | "text/html": [ 820 | "
\n", 821 | "\n", 834 | "\n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | "
SepalLength_SizeHighLow
Name
setosa5.2300004.67
versicolor5.9571434.90
virginica6.6224494.90
\n", 865 | "
" 866 | ], 867 | "text/plain": [ 868 | "SepalLength_Size High Low\n", 869 | "Name \n", 870 | "setosa 5.230000 4.67\n", 871 | "versicolor 5.957143 4.90\n", 872 | "virginica 6.622449 4.90" 873 | ] 874 | }, 875 | "execution_count": 81, 876 | "metadata": {}, 877 | "output_type": "execute_result" 878 | } 879 | ], 880 | "source": [ 881 | "pd.crosstab(a['Name'],a['SepalLength_Size'],values=a['SepalLength'],aggfunc=np.mean)" 882 | ] 883 | }, 884 | { 885 | "cell_type": "markdown", 886 | "metadata": {}, 887 | "source": [ 888 | "# 5. Sorting a column" 889 | ] 890 | }, 891 | { 892 | "cell_type": "code", 893 | "execution_count": 82, 894 | "metadata": {}, 895 | "outputs": [ 896 | { 897 | "data": { 898 | "text/html": [ 899 | "
\n", 900 | "\n", 913 | "\n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | " \n", 962 | " \n", 963 | " \n", 964 | " \n", 965 | " \n", 966 | " \n", 967 | " \n", 968 | " \n", 969 | " \n", 970 | " \n", 971 | " \n", 972 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Size
134.33.01.10.1setosaLow
424.43.21.30.2setosaLow
384.43.01.30.2setosaLow
84.42.91.40.2setosaLow
414.52.31.30.3setosaLow
\n", 973 | "
" 974 | ], 975 | "text/plain": [ 976 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n", 977 | "13 4.3 3.0 1.1 0.1 setosa Low\n", 978 | "42 4.4 3.2 1.3 0.2 setosa Low\n", 979 | "38 4.4 3.0 1.3 0.2 setosa Low\n", 980 | "8 4.4 2.9 1.4 0.2 setosa Low\n", 981 | "41 4.5 2.3 1.3 0.3 setosa Low" 982 | ] 983 | }, 984 | "execution_count": 82, 985 | "metadata": {}, 986 | "output_type": "execute_result" 987 | } 988 | ], 989 | "source": [ 990 | "b = a.sort_values('SepalLength')\n", 991 | "b.head()" 992 | ] 993 | }, 994 | { 995 | "cell_type": "code", 996 | "execution_count": 83, 997 | "metadata": {}, 998 | "outputs": [ 999 | { 1000 | "data": { 1001 | "text/html": [ 1002 | "
\n", 1003 | "\n", 1016 | "\n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Size
145.84.01.20.2setosaHigh
155.74.41.50.4setosaHigh
185.73.81.70.3setosaHigh
335.54.21.40.2setosaHigh
365.53.51.30.2setosaHigh
\n", 1076 | "
" 1077 | ], 1078 | "text/plain": [ 1079 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size\n", 1080 | "14 5.8 4.0 1.2 0.2 setosa High\n", 1081 | "15 5.7 4.4 1.5 0.4 setosa High\n", 1082 | "18 5.7 3.8 1.7 0.3 setosa High\n", 1083 | "33 5.5 4.2 1.4 0.2 setosa High\n", 1084 | "36 5.5 3.5 1.3 0.2 setosa High" 1085 | ] 1086 | }, 1087 | "execution_count": 83, 1088 | "metadata": {}, 1089 | "output_type": "execute_result" 1090 | } 1091 | ], 1092 | "source": [ 1093 | "c = a.sort_values(['Name','SepalLength'],ascending=[True,False])\n", 1094 | "c.head()" 1095 | ] 1096 | }, 1097 | { 1098 | "cell_type": "markdown", 1099 | "metadata": {}, 1100 | "source": [ 1101 | "# 6. Column datatypes" 1102 | ] 1103 | }, 1104 | { 1105 | "cell_type": "code", 1106 | "execution_count": 84, 1107 | "metadata": {}, 1108 | "outputs": [ 1109 | { 1110 | "data": { 1111 | "text/plain": [ 1112 | "SepalLength float64\n", 1113 | "SepalWidth float64\n", 1114 | "PetalLength float64\n", 1115 | "PetalWidth float64\n", 1116 | "Name object\n", 1117 | "SepalLength_Size object\n", 1118 | "dtype: object" 1119 | ] 1120 | }, 1121 | "execution_count": 84, 1122 | "metadata": {}, 1123 | "output_type": "execute_result" 1124 | } 1125 | ], 1126 | "source": [ 1127 | "#check datatype of columns\n", 1128 | "a.dtypes" 1129 | ] 1130 | }, 1131 | { 1132 | "cell_type": "markdown", 1133 | "metadata": {}, 1134 | "source": [ 1135 | "# 7. Binning a column" 1136 | ] 1137 | }, 1138 | { 1139 | "cell_type": "code", 1140 | "execution_count": 92, 1141 | "metadata": {}, 1142 | "outputs": [ 1143 | { 1144 | "data": { 1145 | "text/html": [ 1146 | "
\n", 1147 | "\n", 1160 | "\n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | "
SepalLengthSepalWidthPetalLengthPetalWidthNameSepalLength_Sizebins
05.13.51.40.2setosaHigh(5, 6]
14.93.01.40.2setosaLow(4, 5]
24.73.21.30.2setosaLow(4, 5]
34.63.11.50.2setosaLow(4, 5]
45.03.61.40.2setosaHigh(4, 5]
\n", 1226 | "
" 1227 | ], 1228 | "text/plain": [ 1229 | " SepalLength SepalWidth PetalLength PetalWidth Name SepalLength_Size \\\n", 1230 | "0 5.1 3.5 1.4 0.2 setosa High \n", 1231 | "1 4.9 3.0 1.4 0.2 setosa Low \n", 1232 | "2 4.7 3.2 1.3 0.2 setosa Low \n", 1233 | "3 4.6 3.1 1.5 0.2 setosa Low \n", 1234 | "4 5.0 3.6 1.4 0.2 setosa High \n", 1235 | "\n", 1236 | " bins \n", 1237 | "0 (5, 6] \n", 1238 | "1 (4, 5] \n", 1239 | "2 (4, 5] \n", 1240 | "3 (4, 5] \n", 1241 | "4 (4, 5] " 1242 | ] 1243 | }, 1244 | "execution_count": 92, 1245 | "metadata": {}, 1246 | "output_type": "execute_result" 1247 | } 1248 | ], 1249 | "source": [ 1250 | "bins = [0, 1, 2, 3, 4, 5,6,7]\n", 1251 | "a['bins'] = pd.cut(a['SepalLength'], bins)\n", 1252 | "a.head()" 1253 | ] 1254 | }, 1255 | { 1256 | "cell_type": "code", 1257 | "execution_count": 93, 1258 | "metadata": {}, 1259 | "outputs": [ 1260 | { 1261 | "data": { 1262 | "text/plain": [ 1263 | "(5, 6] 57\n", 1264 | "(6, 7] 49\n", 1265 | "(4, 5] 32\n", 1266 | "(3, 4] 0\n", 1267 | "(2, 3] 0\n", 1268 | "(1, 2] 0\n", 1269 | "(0, 1] 0\n", 1270 | "Name: bins, dtype: int64" 1271 | ] 1272 | }, 1273 | "execution_count": 93, 1274 | "metadata": {}, 1275 | "output_type": "execute_result" 1276 | } 1277 | ], 1278 | "source": [ 1279 | "a['bins'].value_counts()" 1280 | ] 1281 | } 1282 | ], 1283 | "metadata": { 1284 | "kernelspec": { 1285 | "display_name": "Python 3", 1286 | "language": "python", 1287 | "name": "python3" 1288 | }, 1289 | "language_info": { 1290 | "codemirror_mode": { 1291 | "name": "ipython", 1292 | "version": 3 1293 | }, 1294 | "file_extension": ".py", 1295 | "mimetype": "text/x-python", 1296 | "name": "python", 1297 | "nbconvert_exporter": "python", 1298 | "pygments_lexer": "ipython3", 1299 | "version": "3.6.5" 1300 | } 1301 | }, 1302 | "nbformat": 4, 1303 | "nbformat_minor": 2 1304 | } 1305 | -------------------------------------------------------------------------------- /Python for Data Science - Part 1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# 1. Python function" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "#multiply two numbers using a python function\n", 17 | "def multiply(x,y):\n", 18 | " z = x*y\n", 19 | " return z" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 4, 25 | "metadata": {}, 26 | "outputs": [ 27 | { 28 | "data": { 29 | "text/plain": [ 30 | "6" 31 | ] 32 | }, 33 | "execution_count": 4, 34 | "metadata": {}, 35 | "output_type": "execute_result" 36 | } 37 | ], 38 | "source": [ 39 | "multiply(2,3)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "# 2. Python types" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 6, 52 | "metadata": {}, 53 | "outputs": [ 54 | { 55 | "data": { 56 | "text/plain": [ 57 | "str" 58 | ] 59 | }, 60 | "execution_count": 6, 61 | "metadata": {}, 62 | "output_type": "execute_result" 63 | } 64 | ], 65 | "source": [ 66 | "type(' My name is Rohan')" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 7, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/plain": [ 77 | "int" 78 | ] 79 | }, 80 | "execution_count": 7, 81 | "metadata": {}, 82 | "output_type": "execute_result" 83 | } 84 | ], 85 | "source": [ 86 | "type(1)" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 8, 92 | "metadata": {}, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "float" 98 | ] 99 | }, 100 | "execution_count": 8, 101 | "metadata": {}, 102 | "output_type": "execute_result" 103 | } 104 | ], 105 | "source": [ 106 | "type(1.0)" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": 10, 112 | "metadata": {}, 113 | "outputs": [ 114 | { 115 | "data": { 116 | "text/plain": [ 117 | "NoneType" 118 | ] 119 | }, 120 | "execution_count": 10, 121 | "metadata": {}, 122 | "output_type": "execute_result" 123 | } 124 | ], 125 | "source": [ 126 | "type(None)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 15, 132 | "metadata": {}, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/plain": [ 137 | "function" 138 | ] 139 | }, 140 | "execution_count": 15, 141 | "metadata": {}, 142 | "output_type": "execute_result" 143 | } 144 | ], 145 | "source": [ 146 | "type(multiply)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "# 3. Python sequences" 154 | ] 155 | }, 156 | { 157 | "cell_type": "markdown", 158 | "metadata": {}, 159 | "source": [ 160 | "##### Tuples are an immutable data structure which cannot be altered (unlike lists)" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 23, 166 | "metadata": {}, 167 | "outputs": [], 168 | "source": [ 169 | "a = (1,2,3,4)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "code", 174 | "execution_count": 24, 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "tuple" 181 | ] 182 | }, 183 | "execution_count": 24, 184 | "metadata": {}, 185 | "output_type": "execute_result" 186 | } 187 | ], 188 | "source": [ 189 | "type(a)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "markdown", 194 | "metadata": {}, 195 | "source": [ 196 | "#### Lists are mutable objects" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 28, 202 | "metadata": {}, 203 | "outputs": [], 204 | "source": [ 205 | "b = [1,2,3,4]" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 29, 211 | "metadata": {}, 212 | "outputs": [ 213 | { 214 | "data": { 215 | "text/plain": [ 216 | "list" 217 | ] 218 | }, 219 | "execution_count": 29, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "type(b)" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 30, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "name": "stdout", 235 | "output_type": "stream", 236 | "text": [ 237 | "[1, 2, 3, 4, 2.2]\n" 238 | ] 239 | } 240 | ], 241 | "source": [ 242 | "b.append(2.2) #append to list using this function\n", 243 | "print(b)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 32, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "1\n", 256 | "2\n", 257 | "3\n", 258 | "4\n", 259 | "2.2\n" 260 | ] 261 | } 262 | ], 263 | "source": [ 264 | "for number in b: #looping through list\n", 265 | " print(number)" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": 34, 271 | "metadata": {}, 272 | "outputs": [ 273 | { 274 | "name": "stdout", 275 | "output_type": "stream", 276 | "text": [ 277 | "1\n", 278 | "2\n", 279 | "3\n", 280 | "4\n", 281 | "2.2\n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "for i in range(len(b)): #looping using index of a list\n", 287 | " print(b[i])" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 38, 293 | "metadata": {}, 294 | "outputs": [ 295 | { 296 | "data": { 297 | "text/plain": [ 298 | "[1, 2, 3, 1, 'abc', 'de']" 299 | ] 300 | }, 301 | "execution_count": 38, 302 | "metadata": {}, 303 | "output_type": "execute_result" 304 | } 305 | ], 306 | "source": [ 307 | "[1,2,3] + [1,'abc','de'] #concatenate lists" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 39, 313 | "metadata": {}, 314 | "outputs": [ 315 | { 316 | "data": { 317 | "text/plain": [ 318 | "[1, 2, 1, 2, 1, 2]" 319 | ] 320 | }, 321 | "execution_count": 39, 322 | "metadata": {}, 323 | "output_type": "execute_result" 324 | } 325 | ], 326 | "source": [ 327 | "[1,2]*3 #repeat lists" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 40, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "True" 339 | ] 340 | }, 341 | "execution_count": 40, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "3 in b #in operator to check if required object is in list" 348 | ] 349 | }, 350 | { 351 | "cell_type": "markdown", 352 | "metadata": {}, 353 | "source": [ 354 | "#### Strings store character objects" 355 | ] 356 | }, 357 | { 358 | "cell_type": "code", 359 | "execution_count": 42, 360 | "metadata": {}, 361 | "outputs": [], 362 | "source": [ 363 | "x = 'My name is Rohan'" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": 43, 369 | "metadata": {}, 370 | "outputs": [ 371 | { 372 | "data": { 373 | "text/plain": [ 374 | "'M'" 375 | ] 376 | }, 377 | "execution_count": 43, 378 | "metadata": {}, 379 | "output_type": "execute_result" 380 | } 381 | ], 382 | "source": [ 383 | "x[0] #Access first letter" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": 45, 389 | "metadata": {}, 390 | "outputs": [ 391 | { 392 | "data": { 393 | "text/plain": [ 394 | "'M'" 395 | ] 396 | }, 397 | "execution_count": 45, 398 | "metadata": {}, 399 | "output_type": "execute_result" 400 | } 401 | ], 402 | "source": [ 403 | "x[0:1] #Still accesses only first letter" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 47, 409 | "metadata": {}, 410 | "outputs": [ 411 | { 412 | "data": { 413 | "text/plain": [ 414 | "'My'" 415 | ] 416 | }, 417 | "execution_count": 47, 418 | "metadata": {}, 419 | "output_type": "execute_result" 420 | } 421 | ], 422 | "source": [ 423 | "x[0:2] #Accesses two letters" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 49, 429 | "metadata": {}, 430 | "outputs": [ 431 | { 432 | "data": { 433 | "text/plain": [ 434 | "'My name is Roha'" 435 | ] 436 | }, 437 | "execution_count": 49, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "x[:-1] #Accesses everything except last letter" 444 | ] 445 | }, 446 | { 447 | "cell_type": "code", 448 | "execution_count": 56, 449 | "metadata": {}, 450 | "outputs": [ 451 | { 452 | "data": { 453 | "text/plain": [ 454 | "' Rohan'" 455 | ] 456 | }, 457 | "execution_count": 56, 458 | "metadata": {}, 459 | "output_type": "execute_result" 460 | } 461 | ], 462 | "source": [ 463 | "x[10:] #returns all the characters from 10th position till end" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 64, 469 | "metadata": {}, 470 | "outputs": [], 471 | "source": [ 472 | "first = 'Rohan'\n", 473 | "last = 'Joseph'\n", 474 | "\n", 475 | "Name = first + ' ' + last #string concatenation" 476 | ] 477 | }, 478 | { 479 | "cell_type": "code", 480 | "execution_count": 65, 481 | "metadata": {}, 482 | "outputs": [ 483 | { 484 | "name": "stdout", 485 | "output_type": "stream", 486 | "text": [ 487 | "Rohan Joseph\n" 488 | ] 489 | } 490 | ], 491 | "source": [ 492 | "print(Name)" 493 | ] 494 | }, 495 | { 496 | "cell_type": "code", 497 | "execution_count": 69, 498 | "metadata": {}, 499 | "outputs": [ 500 | { 501 | "data": { 502 | "text/plain": [ 503 | "['Rohan', 'Joseph']" 504 | ] 505 | }, 506 | "execution_count": 69, 507 | "metadata": {}, 508 | "output_type": "execute_result" 509 | } 510 | ], 511 | "source": [ 512 | "Name.split(' ') #split the words in a string using split function" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 74, 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "'Rohan'" 524 | ] 525 | }, 526 | "execution_count": 74, 527 | "metadata": {}, 528 | "output_type": "execute_result" 529 | } 530 | ], 531 | "source": [ 532 | "Name.split(' ')[0] #Show the first word" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": 75, 538 | "metadata": {}, 539 | "outputs": [ 540 | { 541 | "data": { 542 | "text/plain": [ 543 | "'Joseph'" 544 | ] 545 | }, 546 | "execution_count": 75, 547 | "metadata": {}, 548 | "output_type": "execute_result" 549 | } 550 | ], 551 | "source": [ 552 | "Name.split(' ')[1] #Show the second word" 553 | ] 554 | }, 555 | { 556 | "cell_type": "code", 557 | "execution_count": 76, 558 | "metadata": {}, 559 | "outputs": [ 560 | { 561 | "data": { 562 | "text/plain": [ 563 | "'Joseph'" 564 | ] 565 | }, 566 | "execution_count": 76, 567 | "metadata": {}, 568 | "output_type": "execute_result" 569 | } 570 | ], 571 | "source": [ 572 | "Name.split(' ')[-1] #Last word from the end" 573 | ] 574 | }, 575 | { 576 | "cell_type": "code", 577 | "execution_count": 79, 578 | "metadata": {}, 579 | "outputs": [ 580 | { 581 | "data": { 582 | "text/plain": [ 583 | "'Rohan2'" 584 | ] 585 | }, 586 | "execution_count": 79, 587 | "metadata": {}, 588 | "output_type": "execute_result" 589 | } 590 | ], 591 | "source": [ 592 | "#for concatenation convert objects to strings\n", 593 | "'Rohan' + str(2)" 594 | ] 595 | }, 596 | { 597 | "cell_type": "markdown", 598 | "metadata": {}, 599 | "source": [ 600 | "#### Dictionaries link keys and values" 601 | ] 602 | }, 603 | { 604 | "cell_type": "code", 605 | "execution_count": 16, 606 | "metadata": {}, 607 | "outputs": [], 608 | "source": [ 609 | "c = {\"Name\" : \"Rohan\", \"Height\" : 176}" 610 | ] 611 | }, 612 | { 613 | "cell_type": "code", 614 | "execution_count": 82, 615 | "metadata": {}, 616 | "outputs": [ 617 | { 618 | "data": { 619 | "text/plain": [ 620 | "dict" 621 | ] 622 | }, 623 | "execution_count": 82, 624 | "metadata": {}, 625 | "output_type": "execute_result" 626 | } 627 | ], 628 | "source": [ 629 | "type(c)" 630 | ] 631 | }, 632 | { 633 | "cell_type": "code", 634 | "execution_count": 85, 635 | "metadata": {}, 636 | "outputs": [ 637 | { 638 | "name": "stdout", 639 | "output_type": "stream", 640 | "text": [ 641 | "{'Name': 'Rohan', 'Height': 176}\n" 642 | ] 643 | } 644 | ], 645 | "source": [ 646 | "print(c)" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 84, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/plain": [ 657 | "'Rohan'" 658 | ] 659 | }, 660 | "execution_count": 84, 661 | "metadata": {}, 662 | "output_type": "execute_result" 663 | } 664 | ], 665 | "source": [ 666 | "c['Name'] #Access Name" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": 86, 672 | "metadata": {}, 673 | "outputs": [ 674 | { 675 | "data": { 676 | "text/plain": [ 677 | "176" 678 | ] 679 | }, 680 | "execution_count": 86, 681 | "metadata": {}, 682 | "output_type": "execute_result" 683 | } 684 | ], 685 | "source": [ 686 | "c['Height']" 687 | ] 688 | }, 689 | { 690 | "cell_type": "code", 691 | "execution_count": 87, 692 | "metadata": {}, 693 | "outputs": [ 694 | { 695 | "name": "stdout", 696 | "output_type": "stream", 697 | "text": [ 698 | "Name\n", 699 | "Height\n" 700 | ] 701 | } 702 | ], 703 | "source": [ 704 | "#print all the keys\n", 705 | "for i in c:\n", 706 | " print(i)" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": 92, 712 | "metadata": {}, 713 | "outputs": [ 714 | { 715 | "name": "stdout", 716 | "output_type": "stream", 717 | "text": [ 718 | "Rohan\n", 719 | "176\n" 720 | ] 721 | } 722 | ], 723 | "source": [ 724 | "#print all the values\n", 725 | "for i in c.values():\n", 726 | " print(i)" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": 93, 732 | "metadata": {}, 733 | "outputs": [ 734 | { 735 | "name": "stdout", 736 | "output_type": "stream", 737 | "text": [ 738 | "Name\n", 739 | "Rohan\n", 740 | "Height\n", 741 | "176\n" 742 | ] 743 | } 744 | ], 745 | "source": [ 746 | "#iterate over all the items\n", 747 | "for name, height in c.items():\n", 748 | " print(name)\n", 749 | " print(height)" 750 | ] 751 | }, 752 | { 753 | "cell_type": "code", 754 | "execution_count": 99, 755 | "metadata": {}, 756 | "outputs": [ 757 | { 758 | "name": "stdout", 759 | "output_type": "stream", 760 | "text": [ 761 | "abc\n", 762 | "def\n" 763 | ] 764 | } 765 | ], 766 | "source": [ 767 | "#Unpacking a list into different variables\n", 768 | "a,b = ('abc','def')\n", 769 | "print(a)\n", 770 | "print(b)" 771 | ] 772 | }, 773 | { 774 | "cell_type": "markdown", 775 | "metadata": {}, 776 | "source": [ 777 | "# 4. Python dates and times" 778 | ] 779 | }, 780 | { 781 | "cell_type": "code", 782 | "execution_count": 100, 783 | "metadata": {}, 784 | "outputs": [], 785 | "source": [ 786 | "import datetime as dt\n", 787 | "import time as tm" 788 | ] 789 | }, 790 | { 791 | "cell_type": "code", 792 | "execution_count": 103, 793 | "metadata": {}, 794 | "outputs": [ 795 | { 796 | "data": { 797 | "text/plain": [ 798 | "1532483980.5827992" 799 | ] 800 | }, 801 | "execution_count": 103, 802 | "metadata": {}, 803 | "output_type": "execute_result" 804 | } 805 | ], 806 | "source": [ 807 | "tm.time() #print current time in seconds from January 1, 1970" 808 | ] 809 | }, 810 | { 811 | "cell_type": "code", 812 | "execution_count": 109, 813 | "metadata": {}, 814 | "outputs": [], 815 | "source": [ 816 | "dtnow = dt.datetime.fromtimestamp(tm.time()) #convert timestamp to datetime" 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": 111, 822 | "metadata": {}, 823 | "outputs": [ 824 | { 825 | "data": { 826 | "text/plain": [ 827 | "2018" 828 | ] 829 | }, 830 | "execution_count": 111, 831 | "metadata": {}, 832 | "output_type": "execute_result" 833 | } 834 | ], 835 | "source": [ 836 | "dtnow.year" 837 | ] 838 | }, 839 | { 840 | "cell_type": "code", 841 | "execution_count": 112, 842 | "metadata": {}, 843 | "outputs": [ 844 | { 845 | "data": { 846 | "text/plain": [ 847 | "7" 848 | ] 849 | }, 850 | "execution_count": 112, 851 | "metadata": {}, 852 | "output_type": "execute_result" 853 | } 854 | ], 855 | "source": [ 856 | "dtnow.month" 857 | ] 858 | }, 859 | { 860 | "cell_type": "code", 861 | "execution_count": 119, 862 | "metadata": {}, 863 | "outputs": [], 864 | "source": [ 865 | "delta = dt.timedelta(days=100)" 866 | ] 867 | }, 868 | { 869 | "cell_type": "code", 870 | "execution_count": 115, 871 | "metadata": {}, 872 | "outputs": [], 873 | "source": [ 874 | "today = dt.date.today()" 875 | ] 876 | }, 877 | { 878 | "cell_type": "code", 879 | "execution_count": 116, 880 | "metadata": {}, 881 | "outputs": [ 882 | { 883 | "data": { 884 | "text/plain": [ 885 | "datetime.date(2018, 7, 25)" 886 | ] 887 | }, 888 | "execution_count": 116, 889 | "metadata": {}, 890 | "output_type": "execute_result" 891 | } 892 | ], 893 | "source": [ 894 | "today" 895 | ] 896 | }, 897 | { 898 | "cell_type": "code", 899 | "execution_count": 120, 900 | "metadata": {}, 901 | "outputs": [ 902 | { 903 | "data": { 904 | "text/plain": [ 905 | "datetime.date(2018, 4, 16)" 906 | ] 907 | }, 908 | "execution_count": 120, 909 | "metadata": {}, 910 | "output_type": "execute_result" 911 | } 912 | ], 913 | "source": [ 914 | "today - delta #subtract 100 days from today's date" 915 | ] 916 | }, 917 | { 918 | "cell_type": "markdown", 919 | "metadata": {}, 920 | "source": [ 921 | "# 5. Map function" 922 | ] 923 | }, 924 | { 925 | "cell_type": "code", 926 | "execution_count": 135, 927 | "metadata": {}, 928 | "outputs": [], 929 | "source": [ 930 | "a = [1,2,3,10]\n", 931 | "b = [5,6,2,9]\n", 932 | "\n", 933 | "c = map(min,a,b) #Find the minimum between two pairs of lists\n" 934 | ] 935 | }, 936 | { 937 | "cell_type": "code", 938 | "execution_count": 136, 939 | "metadata": {}, 940 | "outputs": [ 941 | { 942 | "name": "stdout", 943 | "output_type": "stream", 944 | "text": [ 945 | "1\n", 946 | "2\n", 947 | "2\n", 948 | "9\n" 949 | ] 950 | } 951 | ], 952 | "source": [ 953 | "for item in c:\n", 954 | " print(item) #print the minimum of the pairs" 955 | ] 956 | }, 957 | { 958 | "cell_type": "markdown", 959 | "metadata": {}, 960 | "source": [ 961 | "# 6. Lambda function" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": 2, 967 | "metadata": {}, 968 | "outputs": [], 969 | "source": [ 970 | "function = lambda a,b,c : a+b+c #function to add three numbers" 971 | ] 972 | }, 973 | { 974 | "cell_type": "code", 975 | "execution_count": 3, 976 | "metadata": {}, 977 | "outputs": [ 978 | { 979 | "data": { 980 | "text/plain": [ 981 | "7" 982 | ] 983 | }, 984 | "execution_count": 3, 985 | "metadata": {}, 986 | "output_type": "execute_result" 987 | } 988 | ], 989 | "source": [ 990 | "function(2,2,3)" 991 | ] 992 | }, 993 | { 994 | "cell_type": "markdown", 995 | "metadata": {}, 996 | "source": [ 997 | "# 7. Filter function" 998 | ] 999 | }, 1000 | { 1001 | "cell_type": "code", 1002 | "execution_count": 5, 1003 | "metadata": {}, 1004 | "outputs": [], 1005 | "source": [ 1006 | "x = [1,2,3,4,5,6,7,8,9] #create a list\n", 1007 | "x2 = filter(lambda a : a>5, x) #filter using filter function" 1008 | ] 1009 | }, 1010 | { 1011 | "cell_type": "code", 1012 | "execution_count": 8, 1013 | "metadata": {}, 1014 | "outputs": [ 1015 | { 1016 | "name": "stdout", 1017 | "output_type": "stream", 1018 | "text": [ 1019 | "[6, 7, 8, 9]\n" 1020 | ] 1021 | } 1022 | ], 1023 | "source": [ 1024 | "print(list(x2))" 1025 | ] 1026 | }, 1027 | { 1028 | "cell_type": "markdown", 1029 | "metadata": {}, 1030 | "source": [ 1031 | "# 8. Reduce funtion" 1032 | ] 1033 | }, 1034 | { 1035 | "cell_type": "code", 1036 | "execution_count": 10, 1037 | "metadata": {}, 1038 | "outputs": [ 1039 | { 1040 | "data": { 1041 | "text/plain": [ 1042 | "120" 1043 | ] 1044 | }, 1045 | "execution_count": 10, 1046 | "metadata": {}, 1047 | "output_type": "execute_result" 1048 | } 1049 | ], 1050 | "source": [ 1051 | "from functools import reduce #import reduce function\n", 1052 | "y = [1,2,3,4,5] #create list\n", 1053 | "reduce(lambda a,b : a*b,y) #use reduce" 1054 | ] 1055 | }, 1056 | { 1057 | "cell_type": "markdown", 1058 | "metadata": {}, 1059 | "source": [ 1060 | "# 9. Zip function" 1061 | ] 1062 | }, 1063 | { 1064 | "cell_type": "code", 1065 | "execution_count": 14, 1066 | "metadata": {}, 1067 | "outputs": [ 1068 | { 1069 | "name": "stdout", 1070 | "output_type": "stream", 1071 | "text": [ 1072 | "[(1, 5), (2, 6), (3, 7), (4, 8)]\n" 1073 | ] 1074 | } 1075 | ], 1076 | "source": [ 1077 | "a = [1,2,3,4] #create two lists\n", 1078 | "b = [5,6,7,8]\n", 1079 | "c = zip(a,b) #Use the zip function\n", 1080 | "print(list(c))" 1081 | ] 1082 | }, 1083 | { 1084 | "cell_type": "markdown", 1085 | "metadata": {}, 1086 | "source": [ 1087 | "# 10. List comprehension" 1088 | ] 1089 | }, 1090 | { 1091 | "cell_type": "code", 1092 | "execution_count": 148, 1093 | "metadata": {}, 1094 | "outputs": [], 1095 | "source": [ 1096 | "#return even numbers from 1 to 100\n", 1097 | "\n", 1098 | "even=[]\n", 1099 | "for i in range(100):\n", 1100 | " if i%2 ==0:\n", 1101 | " even.append(i)\n", 1102 | " else:\n", 1103 | " None\n", 1104 | " " 1105 | ] 1106 | }, 1107 | { 1108 | "cell_type": "code", 1109 | "execution_count": 149, 1110 | "metadata": {}, 1111 | "outputs": [ 1112 | { 1113 | "name": "stdout", 1114 | "output_type": "stream", 1115 | "text": [ 1116 | "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]\n" 1117 | ] 1118 | } 1119 | ], 1120 | "source": [ 1121 | "print(even)" 1122 | ] 1123 | }, 1124 | { 1125 | "cell_type": "code", 1126 | "execution_count": 154, 1127 | "metadata": {}, 1128 | "outputs": [ 1129 | { 1130 | "name": "stdout", 1131 | "output_type": "stream", 1132 | "text": [ 1133 | "[0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]\n" 1134 | ] 1135 | } 1136 | ], 1137 | "source": [ 1138 | "#Let's do the same using list comprehension\n", 1139 | "\n", 1140 | "even = [i for i in range(100) if i%2==0]\n", 1141 | "print(even)" 1142 | ] 1143 | }, 1144 | { 1145 | "cell_type": "markdown", 1146 | "metadata": {}, 1147 | "source": [ 1148 | "# 11. Numpy" 1149 | ] 1150 | }, 1151 | { 1152 | "cell_type": "code", 1153 | "execution_count": 155, 1154 | "metadata": {}, 1155 | "outputs": [], 1156 | "source": [ 1157 | "import numpy as np #import" 1158 | ] 1159 | }, 1160 | { 1161 | "cell_type": "code", 1162 | "execution_count": 157, 1163 | "metadata": {}, 1164 | "outputs": [ 1165 | { 1166 | "data": { 1167 | "text/plain": [ 1168 | "array([1, 2, 3])" 1169 | ] 1170 | }, 1171 | "execution_count": 157, 1172 | "metadata": {}, 1173 | "output_type": "execute_result" 1174 | } 1175 | ], 1176 | "source": [ 1177 | "#create numpy array\n", 1178 | "np.array([1,2,3])" 1179 | ] 1180 | }, 1181 | { 1182 | "cell_type": "code", 1183 | "execution_count": 160, 1184 | "metadata": {}, 1185 | "outputs": [ 1186 | { 1187 | "data": { 1188 | "text/plain": [ 1189 | "array([[1, 2, 3],\n", 1190 | " [4, 5, 6]])" 1191 | ] 1192 | }, 1193 | "execution_count": 160, 1194 | "metadata": {}, 1195 | "output_type": "execute_result" 1196 | } 1197 | ], 1198 | "source": [ 1199 | "a = np.array([[1,2,3],[4,5,6]]) #create multi dimensional array\n", 1200 | "a" 1201 | ] 1202 | }, 1203 | { 1204 | "cell_type": "code", 1205 | "execution_count": 161, 1206 | "metadata": {}, 1207 | "outputs": [ 1208 | { 1209 | "data": { 1210 | "text/plain": [ 1211 | "(2, 3)" 1212 | ] 1213 | }, 1214 | "execution_count": 161, 1215 | "metadata": {}, 1216 | "output_type": "execute_result" 1217 | } 1218 | ], 1219 | "source": [ 1220 | "#shape of the array\n", 1221 | "a.shape" 1222 | ] 1223 | }, 1224 | { 1225 | "cell_type": "code", 1226 | "execution_count": 163, 1227 | "metadata": {}, 1228 | "outputs": [ 1229 | { 1230 | "data": { 1231 | "text/plain": [ 1232 | "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29])" 1233 | ] 1234 | }, 1235 | "execution_count": 163, 1236 | "metadata": {}, 1237 | "output_type": "execute_result" 1238 | } 1239 | ], 1240 | "source": [ 1241 | "#evenly spaced values between the interval\n", 1242 | "b = np.arange(1,30,2)\n", 1243 | "b" 1244 | ] 1245 | }, 1246 | { 1247 | "cell_type": "code", 1248 | "execution_count": 170, 1249 | "metadata": {}, 1250 | "outputs": [ 1251 | { 1252 | "data": { 1253 | "text/plain": [ 1254 | "array([ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29])" 1255 | ] 1256 | }, 1257 | "execution_count": 170, 1258 | "metadata": {}, 1259 | "output_type": "execute_result" 1260 | } 1261 | ], 1262 | "source": [ 1263 | "#create new shape of data\n", 1264 | "b.reshape(5,3)" 1265 | ] 1266 | }, 1267 | { 1268 | "cell_type": "code", 1269 | "execution_count": 174, 1270 | "metadata": {}, 1271 | "outputs": [ 1272 | { 1273 | "data": { 1274 | "text/plain": [ 1275 | "array([ 1. , 1.21052632, 1.42105263, 1.63157895, 1.84210526,\n", 1276 | " 2.05263158, 2.26315789, 2.47368421, 2.68421053, 2.89473684,\n", 1277 | " 3.10526316, 3.31578947, 3.52631579, 3.73684211, 3.94736842,\n", 1278 | " 4.15789474, 4.36842105, 4.57894737, 4.78947368, 5. ])" 1279 | ] 1280 | }, 1281 | "execution_count": 174, 1282 | "metadata": {}, 1283 | "output_type": "execute_result" 1284 | } 1285 | ], 1286 | "source": [ 1287 | "#get evenly spaced numbers between specified interval\n", 1288 | "c = np.linspace(1,5,20)\n", 1289 | "c" 1290 | ] 1291 | }, 1292 | { 1293 | "cell_type": "code", 1294 | "execution_count": 175, 1295 | "metadata": {}, 1296 | "outputs": [ 1297 | { 1298 | "data": { 1299 | "text/plain": [ 1300 | "(20,)" 1301 | ] 1302 | }, 1303 | "execution_count": 175, 1304 | "metadata": {}, 1305 | "output_type": "execute_result" 1306 | } 1307 | ], 1308 | "source": [ 1309 | "c.shape" 1310 | ] 1311 | }, 1312 | { 1313 | "cell_type": "code", 1314 | "execution_count": 176, 1315 | "metadata": {}, 1316 | "outputs": [ 1317 | { 1318 | "data": { 1319 | "text/plain": [ 1320 | "array([[ 1. , 1.21052632, 1.42105263, 1.63157895],\n", 1321 | " [ 1.84210526, 2.05263158, 2.26315789, 2.47368421],\n", 1322 | " [ 2.68421053, 2.89473684, 3.10526316, 3.31578947],\n", 1323 | " [ 3.52631579, 3.73684211, 3.94736842, 4.15789474],\n", 1324 | " [ 4.36842105, 4.57894737, 4.78947368, 5. ]])" 1325 | ] 1326 | }, 1327 | "execution_count": 176, 1328 | "metadata": {}, 1329 | "output_type": "execute_result" 1330 | } 1331 | ], 1332 | "source": [ 1333 | "#change shape of array in-place\n", 1334 | "c.resize(5,4)\n", 1335 | "c" 1336 | ] 1337 | }, 1338 | { 1339 | "cell_type": "code", 1340 | "execution_count": 177, 1341 | "metadata": {}, 1342 | "outputs": [ 1343 | { 1344 | "data": { 1345 | "text/plain": [ 1346 | "(5, 4)" 1347 | ] 1348 | }, 1349 | "execution_count": 177, 1350 | "metadata": {}, 1351 | "output_type": "execute_result" 1352 | } 1353 | ], 1354 | "source": [ 1355 | "c.shape" 1356 | ] 1357 | }, 1358 | { 1359 | "cell_type": "code", 1360 | "execution_count": 181, 1361 | "metadata": {}, 1362 | "outputs": [ 1363 | { 1364 | "data": { 1365 | "text/plain": [ 1366 | "array([[ 1., 1.],\n", 1367 | " [ 1., 1.]])" 1368 | ] 1369 | }, 1370 | "execution_count": 181, 1371 | "metadata": {}, 1372 | "output_type": "execute_result" 1373 | } 1374 | ], 1375 | "source": [ 1376 | "#create array filled with ones\n", 1377 | "d = np.ones((2,2))\n", 1378 | "d" 1379 | ] 1380 | }, 1381 | { 1382 | "cell_type": "code", 1383 | "execution_count": 185, 1384 | "metadata": {}, 1385 | "outputs": [ 1386 | { 1387 | "data": { 1388 | "text/plain": [ 1389 | "array([[ 0., 0., 0.],\n", 1390 | " [ 0., 0., 0.],\n", 1391 | " [ 0., 0., 0.]])" 1392 | ] 1393 | }, 1394 | "execution_count": 185, 1395 | "metadata": {}, 1396 | "output_type": "execute_result" 1397 | } 1398 | ], 1399 | "source": [ 1400 | "#create array filled with zeros\n", 1401 | "e = np.zeros((3,3))\n", 1402 | "e" 1403 | ] 1404 | }, 1405 | { 1406 | "cell_type": "code", 1407 | "execution_count": 189, 1408 | "metadata": {}, 1409 | "outputs": [ 1410 | { 1411 | "data": { 1412 | "text/plain": [ 1413 | "array([[ 1., 0.],\n", 1414 | " [ 0., 1.]])" 1415 | ] 1416 | }, 1417 | "execution_count": 189, 1418 | "metadata": {}, 1419 | "output_type": "execute_result" 1420 | } 1421 | ], 1422 | "source": [ 1423 | "#create diagonal matrix with diagonal values =1\n", 1424 | "f = np.eye(2)\n", 1425 | "f" 1426 | ] 1427 | }, 1428 | { 1429 | "cell_type": "code", 1430 | "execution_count": 191, 1431 | "metadata": {}, 1432 | "outputs": [ 1433 | { 1434 | "data": { 1435 | "text/plain": [ 1436 | "array([ 1., 1.])" 1437 | ] 1438 | }, 1439 | "execution_count": 191, 1440 | "metadata": {}, 1441 | "output_type": "execute_result" 1442 | } 1443 | ], 1444 | "source": [ 1445 | "#extract only the diagonal values from array\n", 1446 | "np.diag(f)" 1447 | ] 1448 | }, 1449 | { 1450 | "cell_type": "code", 1451 | "execution_count": 194, 1452 | "metadata": {}, 1453 | "outputs": [ 1454 | { 1455 | "data": { 1456 | "text/plain": [ 1457 | "array([1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3, 1, 2, 3])" 1458 | ] 1459 | }, 1460 | "execution_count": 194, 1461 | "metadata": {}, 1462 | "output_type": "execute_result" 1463 | } 1464 | ], 1465 | "source": [ 1466 | "#create array using repeating list\n", 1467 | "g = np.array([1,2,3]*5)\n", 1468 | "g" 1469 | ] 1470 | }, 1471 | { 1472 | "cell_type": "code", 1473 | "execution_count": 196, 1474 | "metadata": {}, 1475 | "outputs": [ 1476 | { 1477 | "data": { 1478 | "text/plain": [ 1479 | "array([1, 1, 1, 2, 2, 2, 3, 3, 3])" 1480 | ] 1481 | }, 1482 | "execution_count": 196, 1483 | "metadata": {}, 1484 | "output_type": "execute_result" 1485 | } 1486 | ], 1487 | "source": [ 1488 | "#repeat elements using repeat\n", 1489 | "np.repeat([1,2,3],3)" 1490 | ] 1491 | }, 1492 | { 1493 | "cell_type": "code", 1494 | "execution_count": 215, 1495 | "metadata": {}, 1496 | "outputs": [], 1497 | "source": [ 1498 | "h = np.ones((2,3))\n", 1499 | "g = np.random.rand(2,3) #this generates a random array" 1500 | ] 1501 | }, 1502 | { 1503 | "cell_type": "code", 1504 | "execution_count": 221, 1505 | "metadata": {}, 1506 | "outputs": [ 1507 | { 1508 | "data": { 1509 | "text/plain": [ 1510 | "array([[ 1. , 1. , 1. ],\n", 1511 | " [ 1. , 1. , 1. ],\n", 1512 | " [ 0.50725359, 0.28610842, 0.98268379],\n", 1513 | " [ 0.52425524, 0.23682556, 0.21598467]])" 1514 | ] 1515 | }, 1516 | "execution_count": 221, 1517 | "metadata": {}, 1518 | "output_type": "execute_result" 1519 | } 1520 | ], 1521 | "source": [ 1522 | "#stack the above two arrays vertically\n", 1523 | "i = np.vstack([h,g])\n", 1524 | "i" 1525 | ] 1526 | }, 1527 | { 1528 | "cell_type": "code", 1529 | "execution_count": 222, 1530 | "metadata": {}, 1531 | "outputs": [ 1532 | { 1533 | "data": { 1534 | "text/plain": [ 1535 | "(4, 3)" 1536 | ] 1537 | }, 1538 | "execution_count": 222, 1539 | "metadata": {}, 1540 | "output_type": "execute_result" 1541 | } 1542 | ], 1543 | "source": [ 1544 | "i.shape" 1545 | ] 1546 | }, 1547 | { 1548 | "cell_type": "code", 1549 | "execution_count": 224, 1550 | "metadata": {}, 1551 | "outputs": [ 1552 | { 1553 | "data": { 1554 | "text/plain": [ 1555 | "array([[ 1. , 1. , 1. , 0.50725359, 0.28610842,\n", 1556 | " 0.98268379],\n", 1557 | " [ 1. , 1. , 1. , 0.52425524, 0.23682556,\n", 1558 | " 0.21598467]])" 1559 | ] 1560 | }, 1561 | "execution_count": 224, 1562 | "metadata": {}, 1563 | "output_type": "execute_result" 1564 | } 1565 | ], 1566 | "source": [ 1567 | "#Now, stack them horizontally\n", 1568 | "j = np.hstack([h,g])\n", 1569 | "j" 1570 | ] 1571 | }, 1572 | { 1573 | "cell_type": "code", 1574 | "execution_count": 225, 1575 | "metadata": {}, 1576 | "outputs": [ 1577 | { 1578 | "data": { 1579 | "text/plain": [ 1580 | "(2, 6)" 1581 | ] 1582 | }, 1583 | "execution_count": 225, 1584 | "metadata": {}, 1585 | "output_type": "execute_result" 1586 | } 1587 | ], 1588 | "source": [ 1589 | "j.shape" 1590 | ] 1591 | }, 1592 | { 1593 | "cell_type": "code", 1594 | "execution_count": 228, 1595 | "metadata": {}, 1596 | "outputs": [ 1597 | { 1598 | "name": "stdout", 1599 | "output_type": "stream", 1600 | "text": [ 1601 | "[[ 0.42443005 0.28611242]\n", 1602 | " [ 0.65396499 0.57360354]]\n", 1603 | "[[ 0.90377716 0.32249125]\n", 1604 | " [ 0.59825087 0.13558027]]\n" 1605 | ] 1606 | } 1607 | ], 1608 | "source": [ 1609 | "#Array operations\n", 1610 | "k = np.random.rand(2,2)\n", 1611 | "l = np.random.rand(2,2)\n", 1612 | "print(k)\n", 1613 | "#print(l)" 1614 | ] 1615 | }, 1616 | { 1617 | "cell_type": "code", 1618 | "execution_count": 230, 1619 | "metadata": {}, 1620 | "outputs": [ 1621 | { 1622 | "data": { 1623 | "text/plain": [ 1624 | "array([[ 1.32820721, 0.60860367],\n", 1625 | " [ 1.25221586, 0.70918381]])" 1626 | ] 1627 | }, 1628 | "execution_count": 230, 1629 | "metadata": {}, 1630 | "output_type": "execute_result" 1631 | } 1632 | ], 1633 | "source": [ 1634 | "#element wise addition\n", 1635 | "m = k + l\n", 1636 | "m" 1637 | ] 1638 | }, 1639 | { 1640 | "cell_type": "code", 1641 | "execution_count": 232, 1642 | "metadata": {}, 1643 | "outputs": [ 1644 | { 1645 | "data": { 1646 | "text/plain": [ 1647 | "array([[-0.47934711, -0.03637883],\n", 1648 | " [ 0.05571412, 0.43802327]])" 1649 | ] 1650 | }, 1651 | "execution_count": 232, 1652 | "metadata": {}, 1653 | "output_type": "execute_result" 1654 | } 1655 | ], 1656 | "source": [ 1657 | "#element wise subtraction\n", 1658 | "n = k-l\n", 1659 | "n" 1660 | ] 1661 | }, 1662 | { 1663 | "cell_type": "code", 1664 | "execution_count": 234, 1665 | "metadata": {}, 1666 | "outputs": [ 1667 | { 1668 | "data": { 1669 | "text/plain": [ 1670 | "array([[ 0.38359018, 0.09226875],\n", 1671 | " [ 0.39123512, 0.07776932]])" 1672 | ] 1673 | }, 1674 | "execution_count": 234, 1675 | "metadata": {}, 1676 | "output_type": "execute_result" 1677 | } 1678 | ], 1679 | "source": [ 1680 | "#element wise multiplication\n", 1681 | "o = k*l\n", 1682 | "o" 1683 | ] 1684 | }, 1685 | { 1686 | "cell_type": "code", 1687 | "execution_count": 236, 1688 | "metadata": {}, 1689 | "outputs": [ 1690 | { 1691 | "data": { 1692 | "text/plain": [ 1693 | "array([[ 0.18014086, 0.08186032],\n", 1694 | " [ 0.42767021, 0.32902102]])" 1695 | ] 1696 | }, 1697 | "execution_count": 236, 1698 | "metadata": {}, 1699 | "output_type": "execute_result" 1700 | } 1701 | ], 1702 | "source": [ 1703 | "#element wise power\n", 1704 | "p = k**2\n", 1705 | "p" 1706 | ] 1707 | }, 1708 | { 1709 | "cell_type": "code", 1710 | "execution_count": 238, 1711 | "metadata": {}, 1712 | "outputs": [ 1713 | { 1714 | "data": { 1715 | "text/plain": [ 1716 | "array([[ 0.55475719, 0.17566617],\n", 1717 | " [ 0.93419744, 0.28866731]])" 1718 | ] 1719 | }, 1720 | "execution_count": 238, 1721 | "metadata": {}, 1722 | "output_type": "execute_result" 1723 | } 1724 | ], 1725 | "source": [ 1726 | "#dot product\n", 1727 | "q = k.dot(l)\n", 1728 | "q" 1729 | ] 1730 | }, 1731 | { 1732 | "cell_type": "code", 1733 | "execution_count": 241, 1734 | "metadata": {}, 1735 | "outputs": [ 1736 | { 1737 | "data": { 1738 | "text/plain": [ 1739 | "array([[1, 2],\n", 1740 | " [3, 4]])" 1741 | ] 1742 | }, 1743 | "execution_count": 241, 1744 | "metadata": {}, 1745 | "output_type": "execute_result" 1746 | } 1747 | ], 1748 | "source": [ 1749 | "#transpose arrays\n", 1750 | "a = np.array([[1,2],[3,4]])\n", 1751 | "a" 1752 | ] 1753 | }, 1754 | { 1755 | "cell_type": "code", 1756 | "execution_count": 242, 1757 | "metadata": {}, 1758 | "outputs": [ 1759 | { 1760 | "data": { 1761 | "text/plain": [ 1762 | "array([[1, 3],\n", 1763 | " [2, 4]])" 1764 | ] 1765 | }, 1766 | "execution_count": 242, 1767 | "metadata": {}, 1768 | "output_type": "execute_result" 1769 | } 1770 | ], 1771 | "source": [ 1772 | "a.T #transpose" 1773 | ] 1774 | }, 1775 | { 1776 | "cell_type": "code", 1777 | "execution_count": 243, 1778 | "metadata": {}, 1779 | "outputs": [ 1780 | { 1781 | "data": { 1782 | "text/plain": [ 1783 | "dtype('int32')" 1784 | ] 1785 | }, 1786 | "execution_count": 243, 1787 | "metadata": {}, 1788 | "output_type": "execute_result" 1789 | } 1790 | ], 1791 | "source": [ 1792 | "#check datatype of elements in array\n", 1793 | "a.dtype" 1794 | ] 1795 | }, 1796 | { 1797 | "cell_type": "code", 1798 | "execution_count": 245, 1799 | "metadata": {}, 1800 | "outputs": [ 1801 | { 1802 | "data": { 1803 | "text/plain": [ 1804 | "dtype('float32')" 1805 | ] 1806 | }, 1807 | "execution_count": 245, 1808 | "metadata": {}, 1809 | "output_type": "execute_result" 1810 | } 1811 | ], 1812 | "source": [ 1813 | "#change type using astype\n", 1814 | "b = a.astype('f')\n", 1815 | "b.dtype" 1816 | ] 1817 | }, 1818 | { 1819 | "cell_type": "code", 1820 | "execution_count": 246, 1821 | "metadata": {}, 1822 | "outputs": [], 1823 | "source": [ 1824 | "#Math functions in numpy\n", 1825 | "c = np.array([1,2,3,4,5])" 1826 | ] 1827 | }, 1828 | { 1829 | "cell_type": "code", 1830 | "execution_count": 248, 1831 | "metadata": {}, 1832 | "outputs": [ 1833 | { 1834 | "data": { 1835 | "text/plain": [ 1836 | "15" 1837 | ] 1838 | }, 1839 | "execution_count": 248, 1840 | "metadata": {}, 1841 | "output_type": "execute_result" 1842 | } 1843 | ], 1844 | "source": [ 1845 | "c.sum()" 1846 | ] 1847 | }, 1848 | { 1849 | "cell_type": "code", 1850 | "execution_count": 249, 1851 | "metadata": {}, 1852 | "outputs": [ 1853 | { 1854 | "data": { 1855 | "text/plain": [ 1856 | "5" 1857 | ] 1858 | }, 1859 | "execution_count": 249, 1860 | "metadata": {}, 1861 | "output_type": "execute_result" 1862 | } 1863 | ], 1864 | "source": [ 1865 | "c.max()" 1866 | ] 1867 | }, 1868 | { 1869 | "cell_type": "code", 1870 | "execution_count": 250, 1871 | "metadata": {}, 1872 | "outputs": [ 1873 | { 1874 | "data": { 1875 | "text/plain": [ 1876 | "3.0" 1877 | ] 1878 | }, 1879 | "execution_count": 250, 1880 | "metadata": {}, 1881 | "output_type": "execute_result" 1882 | } 1883 | ], 1884 | "source": [ 1885 | "c.mean()" 1886 | ] 1887 | }, 1888 | { 1889 | "cell_type": "code", 1890 | "execution_count": 252, 1891 | "metadata": {}, 1892 | "outputs": [ 1893 | { 1894 | "data": { 1895 | "text/plain": [ 1896 | "4" 1897 | ] 1898 | }, 1899 | "execution_count": 252, 1900 | "metadata": {}, 1901 | "output_type": "execute_result" 1902 | } 1903 | ], 1904 | "source": [ 1905 | "#return index of maximum value\n", 1906 | "c.argmax()" 1907 | ] 1908 | }, 1909 | { 1910 | "cell_type": "code", 1911 | "execution_count": 253, 1912 | "metadata": {}, 1913 | "outputs": [ 1914 | { 1915 | "data": { 1916 | "text/plain": [ 1917 | "0" 1918 | ] 1919 | }, 1920 | "execution_count": 253, 1921 | "metadata": {}, 1922 | "output_type": "execute_result" 1923 | } 1924 | ], 1925 | "source": [ 1926 | "#return index of minimum value\n", 1927 | "c.argmin()" 1928 | ] 1929 | }, 1930 | { 1931 | "cell_type": "code", 1932 | "execution_count": 262, 1933 | "metadata": {}, 1934 | "outputs": [ 1935 | { 1936 | "data": { 1937 | "text/plain": [ 1938 | "array([ 0, 1, 4, 9, 16, 25, 36, 49, 64, 81], dtype=int32)" 1939 | ] 1940 | }, 1941 | "execution_count": 262, 1942 | "metadata": {}, 1943 | "output_type": "execute_result" 1944 | } 1945 | ], 1946 | "source": [ 1947 | "d = np.arange(10)**2\n", 1948 | "d" 1949 | ] 1950 | }, 1951 | { 1952 | "cell_type": "code", 1953 | "execution_count": 263, 1954 | "metadata": {}, 1955 | "outputs": [ 1956 | { 1957 | "data": { 1958 | "text/plain": [ 1959 | "4" 1960 | ] 1961 | }, 1962 | "execution_count": 263, 1963 | "metadata": {}, 1964 | "output_type": "execute_result" 1965 | } 1966 | ], 1967 | "source": [ 1968 | "#use index for accessing values\n", 1969 | "d[2]" 1970 | ] 1971 | }, 1972 | { 1973 | "cell_type": "code", 1974 | "execution_count": 264, 1975 | "metadata": {}, 1976 | "outputs": [ 1977 | { 1978 | "data": { 1979 | "text/plain": [ 1980 | "0" 1981 | ] 1982 | }, 1983 | "execution_count": 264, 1984 | "metadata": {}, 1985 | "output_type": "execute_result" 1986 | } 1987 | ], 1988 | "source": [ 1989 | "d[0]" 1990 | ] 1991 | }, 1992 | { 1993 | "cell_type": "code", 1994 | "execution_count": 266, 1995 | "metadata": {}, 1996 | "outputs": [ 1997 | { 1998 | "data": { 1999 | "text/plain": [ 2000 | "array([ 1, 4, 9, 16], dtype=int32)" 2001 | ] 2002 | }, 2003 | "execution_count": 266, 2004 | "metadata": {}, 2005 | "output_type": "execute_result" 2006 | } 2007 | ], 2008 | "source": [ 2009 | "d[1:5]" 2010 | ] 2011 | }, 2012 | { 2013 | "cell_type": "code", 2014 | "execution_count": 276, 2015 | "metadata": {}, 2016 | "outputs": [ 2017 | { 2018 | "data": { 2019 | "text/plain": [ 2020 | "array([81], dtype=int32)" 2021 | ] 2022 | }, 2023 | "execution_count": 276, 2024 | "metadata": {}, 2025 | "output_type": "execute_result" 2026 | } 2027 | ], 2028 | "source": [ 2029 | "#use negatives to count from back\n", 2030 | "d[-1:]" 2031 | ] 2032 | }, 2033 | { 2034 | "cell_type": "code", 2035 | "execution_count": 280, 2036 | "metadata": {}, 2037 | "outputs": [ 2038 | { 2039 | "data": { 2040 | "text/plain": [ 2041 | "array([ 1, 9, 25, 49, 81], dtype=int32)" 2042 | ] 2043 | }, 2044 | "execution_count": 280, 2045 | "metadata": {}, 2046 | "output_type": "execute_result" 2047 | } 2048 | ], 2049 | "source": [ 2050 | "#use two :: to include step size\n", 2051 | "d[1:10:2] #d[start:stop:stepsize]" 2052 | ] 2053 | }, 2054 | { 2055 | "cell_type": "code", 2056 | "execution_count": 282, 2057 | "metadata": {}, 2058 | "outputs": [ 2059 | { 2060 | "data": { 2061 | "text/plain": [ 2062 | "array([[ 0, 1, 2, 3, 4, 5],\n", 2063 | " [ 6, 7, 8, 9, 10, 11],\n", 2064 | " [12, 13, 14, 15, 16, 17],\n", 2065 | " [18, 19, 20, 21, 22, 23],\n", 2066 | " [24, 25, 26, 27, 28, 29],\n", 2067 | " [30, 31, 32, 33, 34, 35]])" 2068 | ] 2069 | }, 2070 | "execution_count": 282, 2071 | "metadata": {}, 2072 | "output_type": "execute_result" 2073 | } 2074 | ], 2075 | "source": [ 2076 | "#multidimensional arrays\n", 2077 | "e = np.arange(36)\n", 2078 | "e.resize(6,6)\n", 2079 | "e" 2080 | ] 2081 | }, 2082 | { 2083 | "cell_type": "code", 2084 | "execution_count": 284, 2085 | "metadata": {}, 2086 | "outputs": [ 2087 | { 2088 | "data": { 2089 | "text/plain": [ 2090 | "8" 2091 | ] 2092 | }, 2093 | "execution_count": 284, 2094 | "metadata": {}, 2095 | "output_type": "execute_result" 2096 | } 2097 | ], 2098 | "source": [ 2099 | "#Access 2nd row and 3rd column\n", 2100 | "e[1,2]" 2101 | ] 2102 | }, 2103 | { 2104 | "cell_type": "code", 2105 | "execution_count": 286, 2106 | "metadata": {}, 2107 | "outputs": [ 2108 | { 2109 | "data": { 2110 | "text/plain": [ 2111 | "array([ 8, 9, 10, 11])" 2112 | ] 2113 | }, 2114 | "execution_count": 286, 2115 | "metadata": {}, 2116 | "output_type": "execute_result" 2117 | } 2118 | ], 2119 | "source": [ 2120 | "#use : to select range of columns\n", 2121 | "e[1, 2:6]" 2122 | ] 2123 | }, 2124 | { 2125 | "cell_type": "code", 2126 | "execution_count": 287, 2127 | "metadata": {}, 2128 | "outputs": [ 2129 | { 2130 | "data": { 2131 | "text/plain": [ 2132 | "array([[ 0, 1, 2, 3, 4],\n", 2133 | " [ 6, 7, 8, 9, 10]])" 2134 | ] 2135 | }, 2136 | "execution_count": 287, 2137 | "metadata": {}, 2138 | "output_type": "execute_result" 2139 | } 2140 | ], 2141 | "source": [ 2142 | "#Select all rows till 2nd row and all columns except last column\n", 2143 | "e[:2,:-1]" 2144 | ] 2145 | }, 2146 | { 2147 | "cell_type": "code", 2148 | "execution_count": 290, 2149 | "metadata": {}, 2150 | "outputs": [ 2151 | { 2152 | "data": { 2153 | "text/plain": [ 2154 | "array([30, 32, 34])" 2155 | ] 2156 | }, 2157 | "execution_count": 290, 2158 | "metadata": {}, 2159 | "output_type": "execute_result" 2160 | } 2161 | ], 2162 | "source": [ 2163 | "#select last row and alternate columns\n", 2164 | "e[-1,::2]" 2165 | ] 2166 | }, 2167 | { 2168 | "cell_type": "code", 2169 | "execution_count": 291, 2170 | "metadata": {}, 2171 | "outputs": [ 2172 | { 2173 | "data": { 2174 | "text/plain": [ 2175 | "array([32, 34])" 2176 | ] 2177 | }, 2178 | "execution_count": 291, 2179 | "metadata": {}, 2180 | "output_type": "execute_result" 2181 | } 2182 | ], 2183 | "source": [ 2184 | "#select last row and alternate columns from 3rd column\n", 2185 | "e[-1,2::2]" 2186 | ] 2187 | }, 2188 | { 2189 | "cell_type": "code", 2190 | "execution_count": 292, 2191 | "metadata": {}, 2192 | "outputs": [ 2193 | { 2194 | "data": { 2195 | "text/plain": [ 2196 | "array([21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35])" 2197 | ] 2198 | }, 2199 | "execution_count": 292, 2200 | "metadata": {}, 2201 | "output_type": "execute_result" 2202 | } 2203 | ], 2204 | "source": [ 2205 | "#select values from array greater than 20\n", 2206 | "e[e>20]" 2207 | ] 2208 | }, 2209 | { 2210 | "cell_type": "code", 2211 | "execution_count": 294, 2212 | "metadata": {}, 2213 | "outputs": [ 2214 | { 2215 | "data": { 2216 | "text/plain": [ 2217 | "array([[ 0, 1, 2, 3, 4, 5],\n", 2218 | " [ 6, 7, 8, 9, 10, 11],\n", 2219 | " [12, 13, 14, 15, 16, 17],\n", 2220 | " [18, 19, 20, 20, 20, 20],\n", 2221 | " [20, 20, 20, 20, 20, 20],\n", 2222 | " [20, 20, 20, 20, 20, 20]])" 2223 | ] 2224 | }, 2225 | "execution_count": 294, 2226 | "metadata": {}, 2227 | "output_type": "execute_result" 2228 | } 2229 | ], 2230 | "source": [ 2231 | "#Assign element value as 20 if value is greater than 20\n", 2232 | "e[e>20] = 20\n", 2233 | "e" 2234 | ] 2235 | }, 2236 | { 2237 | "cell_type": "code", 2238 | "execution_count": 308, 2239 | "metadata": {}, 2240 | "outputs": [ 2241 | { 2242 | "data": { 2243 | "text/plain": [ 2244 | "array([[0, 0, 0],\n", 2245 | " [0, 0, 0],\n", 2246 | " [0, 0, 0]])" 2247 | ] 2248 | }, 2249 | "execution_count": 308, 2250 | "metadata": {}, 2251 | "output_type": "execute_result" 2252 | } 2253 | ], 2254 | "source": [ 2255 | "#BE CAREFUL WHILE COPYING ARRAYS\n", 2256 | "f = e[:3,:3]\n", 2257 | "f" 2258 | ] 2259 | }, 2260 | { 2261 | "cell_type": "code", 2262 | "execution_count": 309, 2263 | "metadata": {}, 2264 | "outputs": [ 2265 | { 2266 | "data": { 2267 | "text/plain": [ 2268 | "array([[0, 0, 0],\n", 2269 | " [0, 0, 0],\n", 2270 | " [0, 0, 0]])" 2271 | ] 2272 | }, 2273 | "execution_count": 309, 2274 | "metadata": {}, 2275 | "output_type": "execute_result" 2276 | } 2277 | ], 2278 | "source": [ 2279 | "f[:] = 0\n", 2280 | "f" 2281 | ] 2282 | }, 2283 | { 2284 | "cell_type": "code", 2285 | "execution_count": 310, 2286 | "metadata": {}, 2287 | "outputs": [ 2288 | { 2289 | "data": { 2290 | "text/plain": [ 2291 | "array([[ 0, 0, 0, 3, 4, 5],\n", 2292 | " [ 0, 0, 0, 9, 10, 11],\n", 2293 | " [ 0, 0, 0, 15, 16, 17],\n", 2294 | " [18, 19, 20, 20, 20, 20],\n", 2295 | " [20, 20, 20, 20, 20, 20],\n", 2296 | " [20, 20, 20, 20, 20, 20]])" 2297 | ] 2298 | }, 2299 | "execution_count": 310, 2300 | "metadata": {}, 2301 | "output_type": "execute_result" 2302 | } 2303 | ], 2304 | "source": [ 2305 | "e #e also got changed" 2306 | ] 2307 | }, 2308 | { 2309 | "cell_type": "code", 2310 | "execution_count": 313, 2311 | "metadata": {}, 2312 | "outputs": [ 2313 | { 2314 | "data": { 2315 | "text/plain": [ 2316 | "array([[ 0, 0, 0, 3, 4, 5],\n", 2317 | " [ 0, 0, 0, 9, 10, 11],\n", 2318 | " [ 0, 0, 0, 15, 16, 17],\n", 2319 | " [18, 19, 20, 20, 20, 20],\n", 2320 | " [20, 20, 20, 20, 20, 20],\n", 2321 | " [20, 20, 20, 20, 20, 20]])" 2322 | ] 2323 | }, 2324 | "execution_count": 313, 2325 | "metadata": {}, 2326 | "output_type": "execute_result" 2327 | } 2328 | ], 2329 | "source": [ 2330 | "#copy using copy function\n", 2331 | "f = e.copy()\n", 2332 | "f" 2333 | ] 2334 | }, 2335 | { 2336 | "cell_type": "code", 2337 | "execution_count": 315, 2338 | "metadata": {}, 2339 | "outputs": [ 2340 | { 2341 | "data": { 2342 | "text/plain": [ 2343 | "array([[ 0, 0, 0, 3, 4, 5],\n", 2344 | " [ 0, 0, 0, 9, 10, 11],\n", 2345 | " [ 0, 0, 0, 15, 16, 17],\n", 2346 | " [18, 19, 20, 0, 0, 0],\n", 2347 | " [20, 20, 20, 0, 0, 0],\n", 2348 | " [20, 20, 20, 0, 0, 0]])" 2349 | ] 2350 | }, 2351 | "execution_count": 315, 2352 | "metadata": {}, 2353 | "output_type": "execute_result" 2354 | } 2355 | ], 2356 | "source": [ 2357 | "f[3:,3:] = 0\n", 2358 | "f" 2359 | ] 2360 | }, 2361 | { 2362 | "cell_type": "code", 2363 | "execution_count": 316, 2364 | "metadata": {}, 2365 | "outputs": [ 2366 | { 2367 | "data": { 2368 | "text/plain": [ 2369 | "array([[ 0, 0, 0, 3, 4, 5],\n", 2370 | " [ 0, 0, 0, 9, 10, 11],\n", 2371 | " [ 0, 0, 0, 15, 16, 17],\n", 2372 | " [18, 19, 20, 20, 20, 20],\n", 2373 | " [20, 20, 20, 20, 20, 20],\n", 2374 | " [20, 20, 20, 20, 20, 20]])" 2375 | ] 2376 | }, 2377 | "execution_count": 316, 2378 | "metadata": {}, 2379 | "output_type": "execute_result" 2380 | } 2381 | ], 2382 | "source": [ 2383 | "e" 2384 | ] 2385 | }, 2386 | { 2387 | "cell_type": "code", 2388 | "execution_count": 329, 2389 | "metadata": {}, 2390 | "outputs": [ 2391 | { 2392 | "data": { 2393 | "text/plain": [ 2394 | "array([[9, 7, 1, 4],\n", 2395 | " [1, 4, 3, 6],\n", 2396 | " [2, 5, 5, 1],\n", 2397 | " [2, 2, 9, 9]])" 2398 | ] 2399 | }, 2400 | "execution_count": 329, 2401 | "metadata": {}, 2402 | "output_type": "execute_result" 2403 | } 2404 | ], 2405 | "source": [ 2406 | "#iterating over arrays\n", 2407 | "g = np.random.randint(1,10,(4,4))\n", 2408 | "g" 2409 | ] 2410 | }, 2411 | { 2412 | "cell_type": "code", 2413 | "execution_count": 330, 2414 | "metadata": {}, 2415 | "outputs": [ 2416 | { 2417 | "name": "stdout", 2418 | "output_type": "stream", 2419 | "text": [ 2420 | "[9 7 1 4]\n", 2421 | "[1 4 3 6]\n", 2422 | "[2 5 5 1]\n", 2423 | "[2 2 9 9]\n" 2424 | ] 2425 | } 2426 | ], 2427 | "source": [ 2428 | "#iterate over row\n", 2429 | "for row in g:\n", 2430 | " print(row)" 2431 | ] 2432 | }, 2433 | { 2434 | "cell_type": "code", 2435 | "execution_count": 332, 2436 | "metadata": {}, 2437 | "outputs": [ 2438 | { 2439 | "name": "stdout", 2440 | "output_type": "stream", 2441 | "text": [ 2442 | "[9 7 1 4]\n", 2443 | "[1 4 3 6]\n", 2444 | "[2 5 5 1]\n", 2445 | "[2 2 9 9]\n" 2446 | ] 2447 | } 2448 | ], 2449 | "source": [ 2450 | "#iterate by index\n", 2451 | "for i in range(len(g)):\n", 2452 | " print(g[i])" 2453 | ] 2454 | } 2455 | ], 2456 | "metadata": { 2457 | "kernelspec": { 2458 | "display_name": "Python 3", 2459 | "language": "python", 2460 | "name": "python3" 2461 | }, 2462 | "language_info": { 2463 | "codemirror_mode": { 2464 | "name": "ipython", 2465 | "version": 3 2466 | }, 2467 | "file_extension": ".py", 2468 | "mimetype": "text/x-python", 2469 | "name": "python", 2470 | "nbconvert_exporter": "python", 2471 | "pygments_lexer": "ipython3", 2472 | "version": "3.6.5" 2473 | } 2474 | }, 2475 | "nbformat": 4, 2476 | "nbformat_minor": 2 2477 | } 2478 | --------------------------------------------------------------------------------