├── README.md └── 12 Amazing Pandas & Numpy Functions.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # Medium : 12-Amazing-Pandas-NumPy-Functions 2 | 3 | ### This Jupyter Notebook is linked to my article published on medium.com 4 | 5 | https://towardsdatascience.com/12-amazing-pandas-numpy-functions-22e5671a45b8 6 | -------------------------------------------------------------------------------- /12 Amazing Pandas & Numpy Functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# NumPy Functions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 102, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import numpy as np" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Argpartition()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 103, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Random array\n", 33 | "x = np.array([12, 10, 12, 0, 6, 8, 9, 1, 16, 4, 6, 0])" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": 104, 39 | "metadata": {}, 40 | "outputs": [ 41 | { 42 | "data": { 43 | "text/plain": [ 44 | "array([1, 8, 2, 0], dtype=int64)" 45 | ] 46 | }, 47 | "execution_count": 104, 48 | "metadata": {}, 49 | "output_type": "execute_result" 50 | } 51 | ], 52 | "source": [ 53 | "index_val = np.argpartition(x, -4)[-4:]\n", 54 | "index_val" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 105, 60 | "metadata": {}, 61 | "outputs": [ 62 | { 63 | "data": { 64 | "text/plain": [ 65 | "array([10, 12, 12, 16])" 66 | ] 67 | }, 68 | "execution_count": 105, 69 | "metadata": {}, 70 | "output_type": "execute_result" 71 | } 72 | ], 73 | "source": [ 74 | "np.sort(x[index_val])" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## Allclose()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 106, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "array1 = np.array([0.12,0.17,0.24,0.29])\n", 91 | "array2 = np.array([0.13,0.19,0.26,0.31])" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": 107, 97 | "metadata": {}, 98 | "outputs": [ 99 | { 100 | "data": { 101 | "text/plain": [ 102 | "False" 103 | ] 104 | }, 105 | "execution_count": 107, 106 | "metadata": {}, 107 | "output_type": "execute_result" 108 | } 109 | ], 110 | "source": [ 111 | "# with a tolerance of 0.1, it should return False:\n", 112 | "np.allclose(array1,array2,0.1)" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": 108, 118 | "metadata": {}, 119 | "outputs": [ 120 | { 121 | "data": { 122 | "text/plain": [ 123 | "True" 124 | ] 125 | }, 126 | "execution_count": 108, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "# with a tolerance of 0.2, it should return True:\n", 133 | "np.allclose(array1,array2,0.2)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "metadata": {}, 139 | "source": [ 140 | "## Clip()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 109, 146 | "metadata": {}, 147 | "outputs": [], 148 | "source": [ 149 | "x = np.array([3, 17, 14, 23, 2, 2, 6, 8, 1, 2, 16, 0])" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 110, 155 | "metadata": {}, 156 | "outputs": [ 157 | { 158 | "data": { 159 | "text/plain": [ 160 | "array([3, 5, 5, 5, 2, 2, 5, 5, 2, 2, 5, 2])" 161 | ] 162 | }, 163 | "execution_count": 110, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "np.clip(x,2,5)" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "## Extract()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 111, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "data": { 186 | "text/plain": [ 187 | "array([17, 14, 6, 10, 12, 4, 13, 4, 3, 11, 0, 10])" 188 | ] 189 | }, 190 | "execution_count": 111, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "# Random integers\n", 197 | "array = np.random.randint(20, size=12)\n", 198 | "array" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 112, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": [ 209 | "array([ True, False, False, False, False, False, True, False, True,\n", 210 | " True, False, False])" 211 | ] 212 | }, 213 | "execution_count": 112, 214 | "metadata": {}, 215 | "output_type": "execute_result" 216 | } 217 | ], 218 | "source": [ 219 | "# Divide by 2 and check if remainder is 1\n", 220 | "cond = np.mod(array, 2)==1\n", 221 | "cond" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 113, 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/plain": [ 232 | "array([17, 13, 3, 11])" 233 | ] 234 | }, 235 | "execution_count": 113, 236 | "metadata": {}, 237 | "output_type": "execute_result" 238 | } 239 | ], 240 | "source": [ 241 | "# Use extract to get the values\n", 242 | "np.extract(cond, array)" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 114, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "array([17, 0])" 254 | ] 255 | }, 256 | "execution_count": 114, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "# Apply condition on extract directly\n", 263 | "np.extract(((array < 3) | (array > 15)), array)" 264 | ] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "metadata": {}, 269 | "source": [ 270 | "## Where()" 271 | ] 272 | }, 273 | { 274 | "cell_type": "code", 275 | "execution_count": 115, 276 | "metadata": {}, 277 | "outputs": [], 278 | "source": [ 279 | "y = np.array([1,5,6,8,1,7,3,6,9])" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 116, 285 | "metadata": {}, 286 | "outputs": [ 287 | { 288 | "data": { 289 | "text/plain": [ 290 | "(array([2, 3, 5, 7, 8], dtype=int64),)" 291 | ] 292 | }, 293 | "execution_count": 116, 294 | "metadata": {}, 295 | "output_type": "execute_result" 296 | } 297 | ], 298 | "source": [ 299 | "# Where y is greater than 5, returns index position\n", 300 | "np.where(y>5)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": 117, 306 | "metadata": {}, 307 | "outputs": [ 308 | { 309 | "data": { 310 | "text/plain": [ 311 | "array(['Miss', 'Miss', 'Hit', 'Hit', 'Miss', 'Hit', 'Miss', 'Hit', 'Hit'],\n", 312 | " dtype='5, \"Hit\", \"Miss\")" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "## Percentile()" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 118, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "a = np.array([1,5,6,8,1,7,3,6,9])" 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": 119, 344 | "metadata": {}, 345 | "outputs": [ 346 | { 347 | "name": "stdout", 348 | "output_type": "stream", 349 | "text": [ 350 | "50th Percentile of arr, axis = 0 : 6.0\n" 351 | ] 352 | } 353 | ], 354 | "source": [ 355 | "print(\"50th Percentile of arr, axis = 0 : \", \n", 356 | " np.percentile(a, 50, axis =0))" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 120, 362 | "metadata": {}, 363 | "outputs": [], 364 | "source": [ 365 | "b = np.array([[10, 7, 4], [3, 2, 1]])" 366 | ] 367 | }, 368 | { 369 | "cell_type": "code", 370 | "execution_count": 121, 371 | "metadata": {}, 372 | "outputs": [ 373 | { 374 | "name": "stdout", 375 | "output_type": "stream", 376 | "text": [ 377 | "30th Percentile of arr, axis = 0 : [5.1 3.5 1.9]\n" 378 | ] 379 | } 380 | ], 381 | "source": [ 382 | "print(\"30th Percentile of arr, axis = 0 : \", \n", 383 | " np.percentile(b, 30, axis =0))" 384 | ] 385 | }, 386 | { 387 | "cell_type": "code", 388 | "execution_count": null, 389 | "metadata": {}, 390 | "outputs": [], 391 | "source": [] 392 | }, 393 | { 394 | "cell_type": "markdown", 395 | "metadata": {}, 396 | "source": [ 397 | "# Pandas Functions" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": 122, 403 | "metadata": {}, 404 | "outputs": [], 405 | "source": [ 406 | "import pandas as pd" 407 | ] 408 | }, 409 | { 410 | "cell_type": "markdown", 411 | "metadata": {}, 412 | "source": [ 413 | "## read_csv(nrows=10)" 414 | ] 415 | }, 416 | { 417 | "cell_type": "code", 418 | "execution_count": 128, 419 | "metadata": {}, 420 | "outputs": [], 421 | "source": [ 422 | "import io\n", 423 | "import requests\n", 424 | "\n", 425 | "# I am using this online data set just to make things easier for you guys\n", 426 | "url = \"https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/datasets/AirPassengers.csv\"\n", 427 | "s = requests.get(url).content" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": 132, 433 | "metadata": {}, 434 | "outputs": [ 435 | { 436 | "data": { 437 | "text/html": [ 438 | "
\n", 439 | "\n", 452 | "\n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | "
timevalue
11949.000000112
21949.083333118
31949.166667132
41949.250000129
51949.333333121
61949.416667135
71949.500000148
81949.583333148
91949.666667136
101949.750000119
\n", 513 | "
" 514 | ], 515 | "text/plain": [ 516 | " time value\n", 517 | "1 1949.000000 112\n", 518 | "2 1949.083333 118\n", 519 | "3 1949.166667 132\n", 520 | "4 1949.250000 129\n", 521 | "5 1949.333333 121\n", 522 | "6 1949.416667 135\n", 523 | "7 1949.500000 148\n", 524 | "8 1949.583333 148\n", 525 | "9 1949.666667 136\n", 526 | "10 1949.750000 119" 527 | ] 528 | }, 529 | "execution_count": 132, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [ 535 | "# read only first 10 rows\n", 536 | "df = pd.read_csv(io.StringIO(s.decode('utf-8')),nrows=10 , index_col=0)\n", 537 | "df" 538 | ] 539 | }, 540 | { 541 | "cell_type": "markdown", 542 | "metadata": {}, 543 | "source": [ 544 | "## map()" 545 | ] 546 | }, 547 | { 548 | "cell_type": "code", 549 | "execution_count": 138, 550 | "metadata": {}, 551 | "outputs": [ 552 | { 553 | "data": { 554 | "text/html": [ 555 | "
\n", 556 | "\n", 569 | "\n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | "
bde
India1.3022450.5719170.573815
USA-0.481980-0.4645180.528452
China-0.3395891.244659-0.518640
Russia1.5902901.839274-0.250006
\n", 605 | "
" 606 | ], 607 | "text/plain": [ 608 | " b d e\n", 609 | "India 1.302245 0.571917 0.573815\n", 610 | "USA -0.481980 -0.464518 0.528452\n", 611 | "China -0.339589 1.244659 -0.518640\n", 612 | "Russia 1.590290 1.839274 -0.250006" 613 | ] 614 | }, 615 | "execution_count": 138, 616 | "metadata": {}, 617 | "output_type": "execute_result" 618 | } 619 | ], 620 | "source": [ 621 | "# create a dataframe\n", 622 | "dframe = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['India', 'USA', 'China', 'Russia'])\n", 623 | "dframe" 624 | ] 625 | }, 626 | { 627 | "cell_type": "code", 628 | "execution_count": 141, 629 | "metadata": {}, 630 | "outputs": [], 631 | "source": [ 632 | "#compute a formatted string from each floating point value in frame\n", 633 | "changefn = lambda x: '%.2f' % x" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 142, 639 | "metadata": {}, 640 | "outputs": [ 641 | { 642 | "data": { 643 | "text/plain": [ 644 | "India 0.57\n", 645 | "USA -0.46\n", 646 | "China 1.24\n", 647 | "Russia 1.84\n", 648 | "Name: d, dtype: object" 649 | ] 650 | }, 651 | "execution_count": 142, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "# Make changes element-wise\n", 658 | "dframe['d'].map(changefn)" 659 | ] 660 | }, 661 | { 662 | "cell_type": "markdown", 663 | "metadata": {}, 664 | "source": [ 665 | "## Apply()" 666 | ] 667 | }, 668 | { 669 | "cell_type": "code", 670 | "execution_count": 143, 671 | "metadata": {}, 672 | "outputs": [], 673 | "source": [ 674 | "# max minus mix lambda fn\n", 675 | "fn = lambda x: x.max() - x.min()" 676 | ] 677 | }, 678 | { 679 | "cell_type": "code", 680 | "execution_count": 144, 681 | "metadata": {}, 682 | "outputs": [ 683 | { 684 | "data": { 685 | "text/plain": [ 686 | "b 2.072270\n", 687 | "d 2.303792\n", 688 | "e 1.092456\n", 689 | "dtype: float64" 690 | ] 691 | }, 692 | "execution_count": 144, 693 | "metadata": {}, 694 | "output_type": "execute_result" 695 | } 696 | ], 697 | "source": [ 698 | "# Apply this on dframe\n", 699 | "dframe.apply(fn)" 700 | ] 701 | }, 702 | { 703 | "cell_type": "markdown", 704 | "metadata": {}, 705 | "source": [ 706 | "## isin()" 707 | ] 708 | }, 709 | { 710 | "cell_type": "code", 711 | "execution_count": 158, 712 | "metadata": {}, 713 | "outputs": [], 714 | "source": [ 715 | "# Using the dataframe we created for read_csv\n", 716 | "filter1 = df[\"value\"].isin([112]) \n", 717 | "filter2 = df[\"time\"].isin([1949.000000]) " 718 | ] 719 | }, 720 | { 721 | "cell_type": "code", 722 | "execution_count": 159, 723 | "metadata": {}, 724 | "outputs": [ 725 | { 726 | "data": { 727 | "text/html": [ 728 | "
\n", 729 | "\n", 742 | "\n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | "
timevalue
11949.0112
\n", 758 | "
" 759 | ], 760 | "text/plain": [ 761 | " time value\n", 762 | "1 1949.0 112" 763 | ] 764 | }, 765 | "execution_count": 159, 766 | "metadata": {}, 767 | "output_type": "execute_result" 768 | } 769 | ], 770 | "source": [ 771 | "df [filter1 & filter2]" 772 | ] 773 | }, 774 | { 775 | "cell_type": "markdown", 776 | "metadata": {}, 777 | "source": [ 778 | "## copy()" 779 | ] 780 | }, 781 | { 782 | "cell_type": "code", 783 | "execution_count": 160, 784 | "metadata": {}, 785 | "outputs": [], 786 | "source": [ 787 | "# creating sample series \n", 788 | "data = pd.Series(['India', 'Pakistan', 'China', 'Mongolia']) " 789 | ] 790 | }, 791 | { 792 | "cell_type": "code", 793 | "execution_count": 173, 794 | "metadata": {}, 795 | "outputs": [ 796 | { 797 | "data": { 798 | "text/plain": [ 799 | "0 USA\n", 800 | "1 Pakistan\n", 801 | "2 China\n", 802 | "3 Mongolia\n", 803 | "dtype: object" 804 | ] 805 | }, 806 | "execution_count": 173, 807 | "metadata": {}, 808 | "output_type": "execute_result" 809 | } 810 | ], 811 | "source": [ 812 | "# Assigning issue that we face\n", 813 | "data1= data\n", 814 | "# Change a value\n", 815 | "data1[0]='USA'\n", 816 | "# Also changes value in old dataframe\n", 817 | "data" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": 161, 823 | "metadata": {}, 824 | "outputs": [], 825 | "source": [ 826 | "# creating copy of series \n", 827 | "new = data.copy() " 828 | ] 829 | }, 830 | { 831 | "cell_type": "code", 832 | "execution_count": 162, 833 | "metadata": {}, 834 | "outputs": [], 835 | "source": [ 836 | "# assigning new values \n", 837 | "new[1]='Changed value'" 838 | ] 839 | }, 840 | { 841 | "cell_type": "code", 842 | "execution_count": 169, 843 | "metadata": {}, 844 | "outputs": [ 845 | { 846 | "name": "stdout", 847 | "output_type": "stream", 848 | "text": [ 849 | "0 India\n", 850 | "1 Changed value\n", 851 | "2 China\n", 852 | "3 Mongolia\n", 853 | "dtype: object\n", 854 | "0 India\n", 855 | "1 Pakistan\n", 856 | "2 China\n", 857 | "3 Mongolia\n", 858 | "dtype: object\n" 859 | ] 860 | } 861 | ], 862 | "source": [ 863 | "# printing data \n", 864 | "print(new) \n", 865 | "print(data) " 866 | ] 867 | }, 868 | { 869 | "cell_type": "markdown", 870 | "metadata": {}, 871 | "source": [ 872 | "## select_dtypes()" 873 | ] 874 | }, 875 | { 876 | "cell_type": "code", 877 | "execution_count": 176, 878 | "metadata": {}, 879 | "outputs": [], 880 | "source": [ 881 | "framex = df.select_dtypes(include=\"float64\")" 882 | ] 883 | }, 884 | { 885 | "cell_type": "code", 886 | "execution_count": 177, 887 | "metadata": {}, 888 | "outputs": [ 889 | { 890 | "data": { 891 | "text/html": [ 892 | "
\n", 893 | "\n", 906 | "\n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | "
time
11949.000000
21949.083333
31949.166667
41949.250000
51949.333333
61949.416667
71949.500000
81949.583333
91949.666667
101949.750000
\n", 956 | "
" 957 | ], 958 | "text/plain": [ 959 | " time\n", 960 | "1 1949.000000\n", 961 | "2 1949.083333\n", 962 | "3 1949.166667\n", 963 | "4 1949.250000\n", 964 | "5 1949.333333\n", 965 | "6 1949.416667\n", 966 | "7 1949.500000\n", 967 | "8 1949.583333\n", 968 | "9 1949.666667\n", 969 | "10 1949.750000" 970 | ] 971 | }, 972 | "execution_count": 177, 973 | "metadata": {}, 974 | "output_type": "execute_result" 975 | } 976 | ], 977 | "source": [ 978 | "framex" 979 | ] 980 | }, 981 | { 982 | "cell_type": "markdown", 983 | "metadata": {}, 984 | "source": [ 985 | "# Bonus:" 986 | ] 987 | }, 988 | { 989 | "cell_type": "markdown", 990 | "metadata": {}, 991 | "source": [ 992 | "## pivot_table()" 993 | ] 994 | }, 995 | { 996 | "cell_type": "code", 997 | "execution_count": 182, 998 | "metadata": {}, 999 | "outputs": [], 1000 | "source": [ 1001 | "# Create a sample dataframe\n", 1002 | "school = pd.DataFrame({'A': ['Jay', 'Usher', 'Nicky', 'Romero', 'Will'], \n", 1003 | " 'B': ['Masters', 'Graduate', 'Graduate', 'Masters', 'Graduate'], \n", 1004 | " 'C': [26, 22, 20, 23, 24]}) " 1005 | ] 1006 | }, 1007 | { 1008 | "cell_type": "code", 1009 | "execution_count": 181, 1010 | "metadata": {}, 1011 | "outputs": [ 1012 | { 1013 | "data": { 1014 | "text/html": [ 1015 | "
\n", 1016 | "\n", 1029 | "\n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | "
BGraduateMasters
BC
Graduate20NickyNot Available
22UsherNot Available
24WillNot Available
Masters23Not AvailableRomero
26Not AvailableJay
\n", 1074 | "
" 1075 | ], 1076 | "text/plain": [ 1077 | "B Graduate Masters\n", 1078 | "B C \n", 1079 | "Graduate 20 Nicky Not Available\n", 1080 | " 22 Usher Not Available\n", 1081 | " 24 Will Not Available\n", 1082 | "Masters 23 Not Available Romero\n", 1083 | " 26 Not Available Jay" 1084 | ] 1085 | }, 1086 | "execution_count": 181, 1087 | "metadata": {}, 1088 | "output_type": "execute_result" 1089 | } 1090 | ], 1091 | "source": [ 1092 | "# Lets create a pivot table to segregate students based on age and course\n", 1093 | "table = pd.pivot_table(school, values ='A', index =['B', 'C'], \n", 1094 | " columns =['B'], aggfunc = np.sum, fill_value=\"Not Available\") \n", 1095 | " \n", 1096 | "table" 1097 | ] 1098 | }, 1099 | { 1100 | "cell_type": "markdown", 1101 | "metadata": {}, 1102 | "source": [ 1103 | "# Thank yoouuuuuuuu ! " 1104 | ] 1105 | } 1106 | ], 1107 | "metadata": { 1108 | "kernelspec": { 1109 | "display_name": "Python 3", 1110 | "language": "python", 1111 | "name": "python3" 1112 | }, 1113 | "language_info": { 1114 | "codemirror_mode": { 1115 | "name": "ipython", 1116 | "version": 3 1117 | }, 1118 | "file_extension": ".py", 1119 | "mimetype": "text/x-python", 1120 | "name": "python", 1121 | "nbconvert_exporter": "python", 1122 | "pygments_lexer": "ipython3", 1123 | "version": "3.7.3" 1124 | } 1125 | }, 1126 | "nbformat": 4, 1127 | "nbformat_minor": 2 1128 | } 1129 | --------------------------------------------------------------------------------