└── PANDAS_NOTEBOOK_3.ipynb /PANDAS_NOTEBOOK_3.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "f15d6659", 6 | "metadata": {}, 7 | "source": [ 8 | "## Advanced Aggregation Functions: Aggregate() Function" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "6185cdeb", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "import numpy as np" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "id": "2ba48364", 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "df = pd.DataFrame({'groups': ['X', 'Y', 'Z', 'X', 'Y', 'Z'],\n", 30 | " 'val1': [2, 15, 25, 14, 3, 91],\n", 31 | " 'val2': [92,245,325,254,103,961]})" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "id": "f9bfd7a6", 38 | "metadata": {}, 39 | "outputs": [ 40 | { 41 | "data": { 42 | "text/html": [ 43 | "
\n", 44 | "\n", 57 | "\n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | "
groupsval1val2
0X292
1Y15245
2Z25325
3X14254
4Y3103
5Z91961
\n", 105 | "
" 106 | ], 107 | "text/plain": [ 108 | " groups val1 val2\n", 109 | "0 X 2 92\n", 110 | "1 Y 15 245\n", 111 | "2 Z 25 325\n", 112 | "3 X 14 254\n", 113 | "4 Y 3 103\n", 114 | "5 Z 91 961" 115 | ] 116 | }, 117 | "execution_count": 3, 118 | "metadata": {}, 119 | "output_type": "execute_result" 120 | } 121 | ], 122 | "source": [ 123 | "df" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": 4, 129 | "id": "03518204", 130 | "metadata": { 131 | "scrolled": false 132 | }, 133 | "outputs": [ 134 | { 135 | "data": { 136 | "text/html": [ 137 | "
\n", 138 | "\n", 151 | "\n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | "
val1val2
groups
X8173
Y9174
Z58643
\n", 182 | "
" 183 | ], 184 | "text/plain": [ 185 | " val1 val2\n", 186 | "groups \n", 187 | "X 8 173\n", 188 | "Y 9 174\n", 189 | "Z 58 643" 190 | ] 191 | }, 192 | "execution_count": 4, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "df.groupby(\"groups\").mean()" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 5, 204 | "id": "ccd5b657", 205 | "metadata": {}, 206 | "outputs": [ 207 | { 208 | "data": { 209 | "text/html": [ 210 | "
\n", 211 | "\n", 228 | "\n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | "
val1val2
meanmedianminsummeanmedianminsum
groups
X8821617317392346
Y99318174174103348
Z5858251166436433251286
\n", 294 | "
" 295 | ], 296 | "text/plain": [ 297 | " val1 val2 \n", 298 | " mean median min sum mean median min sum\n", 299 | "groups \n", 300 | "X 8 8 2 16 173 173 92 346\n", 301 | "Y 9 9 3 18 174 174 103 348\n", 302 | "Z 58 58 25 116 643 643 325 1286" 303 | ] 304 | }, 305 | "execution_count": 5, 306 | "metadata": {}, 307 | "output_type": "execute_result" 308 | } 309 | ], 310 | "source": [ 311 | "df.groupby(\"groups\").aggregate([\"mean\", np.median, min, \"sum\"])" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": 6, 317 | "id": "5c3fcf02", 318 | "metadata": {}, 319 | "outputs": [ 320 | { 321 | "data": { 322 | "text/html": [ 323 | "
\n", 324 | "\n", 337 | "\n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | "
val1val2
groups
X8173
Y9174
Z58643
\n", 368 | "
" 369 | ], 370 | "text/plain": [ 371 | " val1 val2\n", 372 | "groups \n", 373 | "X 8 173\n", 374 | "Y 9 174\n", 375 | "Z 58 643" 376 | ] 377 | }, 378 | "execution_count": 6, 379 | "metadata": {}, 380 | "output_type": "execute_result" 381 | } 382 | ], 383 | "source": [ 384 | "df.groupby(\"groups\").aggregate({\"val1\" : \"mean\", \"val2\" : \"median\"})" 385 | ] 386 | }, 387 | { 388 | "cell_type": "markdown", 389 | "id": "0f2b9d90", 390 | "metadata": {}, 391 | "source": [ 392 | "## Advanced Aggregation Functions: Filter() Function" 393 | ] 394 | }, 395 | { 396 | "cell_type": "code", 397 | "execution_count": 7, 398 | "id": "e103a217", 399 | "metadata": {}, 400 | "outputs": [], 401 | "source": [ 402 | "import pandas as pd\n", 403 | "import numpy as np" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 8, 409 | "id": "77ce20db", 410 | "metadata": {}, 411 | "outputs": [], 412 | "source": [ 413 | "df = pd.DataFrame({'groups': ['X', 'Y', 'Z', 'X', 'Y', 'Z'],\n", 414 | " 'val1': [2, 15, 25, 14, 3, 91],\n", 415 | " 'val2': [92,245,325,254,103,961]})" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 9, 421 | "id": "78184ade", 422 | "metadata": {}, 423 | "outputs": [ 424 | { 425 | "data": { 426 | "text/html": [ 427 | "
\n", 428 | "\n", 441 | "\n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | "
groupsval1val2
0X292
1Y15245
2Z25325
3X14254
4Y3103
5Z91961
\n", 489 | "
" 490 | ], 491 | "text/plain": [ 492 | " groups val1 val2\n", 493 | "0 X 2 92\n", 494 | "1 Y 15 245\n", 495 | "2 Z 25 325\n", 496 | "3 X 14 254\n", 497 | "4 Y 3 103\n", 498 | "5 Z 91 961" 499 | ] 500 | }, 501 | "execution_count": 9, 502 | "metadata": {}, 503 | "output_type": "execute_result" 504 | } 505 | ], 506 | "source": [ 507 | "df" 508 | ] 509 | }, 510 | { 511 | "cell_type": "code", 512 | "execution_count": 10, 513 | "id": "c8b04fe2", 514 | "metadata": {}, 515 | "outputs": [ 516 | { 517 | "data": { 518 | "text/html": [ 519 | "
\n", 520 | "\n", 533 | "\n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | "
val1val2
groups
X8173
Y9174
Z58643
\n", 564 | "
" 565 | ], 566 | "text/plain": [ 567 | " val1 val2\n", 568 | "groups \n", 569 | "X 8 173\n", 570 | "Y 9 174\n", 571 | "Z 58 643" 572 | ] 573 | }, 574 | "execution_count": 10, 575 | "metadata": {}, 576 | "output_type": "execute_result" 577 | } 578 | ], 579 | "source": [ 580 | "df.groupby(\"groups\").mean()" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": 11, 586 | "id": "0bef7174", 587 | "metadata": {}, 588 | "outputs": [], 589 | "source": [ 590 | "def example_function(x):\n", 591 | " return x[\"val2\"].mean() < 200" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": 12, 597 | "id": "3c5ac82d", 598 | "metadata": {}, 599 | "outputs": [ 600 | { 601 | "data": { 602 | "text/html": [ 603 | "
\n", 604 | "\n", 617 | "\n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | "
groupsval1val2
0X292
1Y15245
3X14254
4Y3103
\n", 653 | "
" 654 | ], 655 | "text/plain": [ 656 | " groups val1 val2\n", 657 | "0 X 2 92\n", 658 | "1 Y 15 245\n", 659 | "3 X 14 254\n", 660 | "4 Y 3 103" 661 | ] 662 | }, 663 | "execution_count": 12, 664 | "metadata": {}, 665 | "output_type": "execute_result" 666 | } 667 | ], 668 | "source": [ 669 | "df.groupby(\"groups\").filter(example_function)" 670 | ] 671 | }, 672 | { 673 | "cell_type": "code", 674 | "execution_count": 13, 675 | "id": "adadfb0f", 676 | "metadata": {}, 677 | "outputs": [ 678 | { 679 | "data": { 680 | "text/html": [ 681 | "
\n", 682 | "\n", 695 | "\n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | "
val1val2
groups
X8.485281114.551299
Y8.485281100.409163
Z46.669048449.719913
\n", 726 | "
" 727 | ], 728 | "text/plain": [ 729 | " val1 val2\n", 730 | "groups \n", 731 | "X 8.485281 114.551299\n", 732 | "Y 8.485281 100.409163\n", 733 | "Z 46.669048 449.719913" 734 | ] 735 | }, 736 | "execution_count": 13, 737 | "metadata": {}, 738 | "output_type": "execute_result" 739 | } 740 | ], 741 | "source": [ 742 | "df.groupby(\"groups\").std()" 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 14, 748 | "id": "ecb47ae5", 749 | "metadata": {}, 750 | "outputs": [ 751 | { 752 | "data": { 753 | "text/html": [ 754 | "
\n", 755 | "\n", 768 | "\n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | "
groupsval1val2
0X292
2Z25325
3X14254
5Z91961
\n", 804 | "
" 805 | ], 806 | "text/plain": [ 807 | " groups val1 val2\n", 808 | "0 X 2 92\n", 809 | "2 Z 25 325\n", 810 | "3 X 14 254\n", 811 | "5 Z 91 961" 812 | ] 813 | }, 814 | "execution_count": 14, 815 | "metadata": {}, 816 | "output_type": "execute_result" 817 | } 818 | ], 819 | "source": [ 820 | "df.groupby(\"groups\").filter(lambda x : x[\"val2\"].std() > 105)" 821 | ] 822 | }, 823 | { 824 | "cell_type": "markdown", 825 | "id": "e7eac850", 826 | "metadata": {}, 827 | "source": [ 828 | "## Advanced Aggregation Functions: Transform() Function" 829 | ] 830 | }, 831 | { 832 | "cell_type": "code", 833 | "execution_count": 15, 834 | "id": "73f28b47", 835 | "metadata": {}, 836 | "outputs": [], 837 | "source": [ 838 | "import pandas as pd\n", 839 | "import numpy as np" 840 | ] 841 | }, 842 | { 843 | "cell_type": "code", 844 | "execution_count": 16, 845 | "id": "cd8edd85", 846 | "metadata": {}, 847 | "outputs": [], 848 | "source": [ 849 | "df = pd.DataFrame({'groups': ['X', 'Y', 'Z', 'X', 'Y', 'Z'],\n", 850 | " 'val1': [2, 15, 25, 14, 3, 91],\n", 851 | " 'val2': [92,245,325,254,103,961]})" 852 | ] 853 | }, 854 | { 855 | "cell_type": "code", 856 | "execution_count": 17, 857 | "id": "f39a11b3", 858 | "metadata": {}, 859 | "outputs": [ 860 | { 861 | "data": { 862 | "text/html": [ 863 | "
\n", 864 | "\n", 877 | "\n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | "
groupsval1val2
0X292
1Y15245
2Z25325
3X14254
4Y3103
5Z91961
\n", 925 | "
" 926 | ], 927 | "text/plain": [ 928 | " groups val1 val2\n", 929 | "0 X 2 92\n", 930 | "1 Y 15 245\n", 931 | "2 Z 25 325\n", 932 | "3 X 14 254\n", 933 | "4 Y 3 103\n", 934 | "5 Z 91 961" 935 | ] 936 | }, 937 | "execution_count": 17, 938 | "metadata": {}, 939 | "output_type": "execute_result" 940 | } 941 | ], 942 | "source": [ 943 | "df" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": 18, 949 | "id": "2f15e109", 950 | "metadata": {}, 951 | "outputs": [], 952 | "source": [ 953 | "df_new = df.loc[:, \"val1\":\"val2\"]" 954 | ] 955 | }, 956 | { 957 | "cell_type": "code", 958 | "execution_count": 19, 959 | "id": "869b3bd1", 960 | "metadata": { 961 | "scrolled": true 962 | }, 963 | "outputs": [ 964 | { 965 | "data": { 966 | "text/html": [ 967 | "
\n", 968 | "\n", 981 | "\n", 982 | " \n", 983 | " \n", 984 | " \n", 985 | " \n", 986 | " \n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | "
val1val2
0292
115245
225325
314254
43103
591961
\n", 1022 | "
" 1023 | ], 1024 | "text/plain": [ 1025 | " val1 val2\n", 1026 | "0 2 92\n", 1027 | "1 15 245\n", 1028 | "2 25 325\n", 1029 | "3 14 254\n", 1030 | "4 3 103\n", 1031 | "5 91 961" 1032 | ] 1033 | }, 1034 | "execution_count": 19, 1035 | "metadata": {}, 1036 | "output_type": "execute_result" 1037 | } 1038 | ], 1039 | "source": [ 1040 | "df_new" 1041 | ] 1042 | }, 1043 | { 1044 | "cell_type": "code", 1045 | "execution_count": 20, 1046 | "id": "0e717299", 1047 | "metadata": {}, 1048 | "outputs": [], 1049 | "source": [ 1050 | "def normalize(x):\n", 1051 | " return (x - x.min()) / (x.max() - x.min())" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": 21, 1057 | "id": "9f20ee93", 1058 | "metadata": {}, 1059 | "outputs": [ 1060 | { 1061 | "data": { 1062 | "text/html": [ 1063 | "
\n", 1064 | "\n", 1077 | "\n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | "
val1val2
00.0000000.000000
10.1460670.176064
20.2584270.268124
30.1348310.186421
40.0112360.012658
51.0000001.000000
\n", 1118 | "
" 1119 | ], 1120 | "text/plain": [ 1121 | " val1 val2\n", 1122 | "0 0.000000 0.000000\n", 1123 | "1 0.146067 0.176064\n", 1124 | "2 0.258427 0.268124\n", 1125 | "3 0.134831 0.186421\n", 1126 | "4 0.011236 0.012658\n", 1127 | "5 1.000000 1.000000" 1128 | ] 1129 | }, 1130 | "execution_count": 21, 1131 | "metadata": {}, 1132 | "output_type": "execute_result" 1133 | } 1134 | ], 1135 | "source": [ 1136 | "df_new.transform(normalize)" 1137 | ] 1138 | }, 1139 | { 1140 | "cell_type": "code", 1141 | "execution_count": 22, 1142 | "id": "0f194072", 1143 | "metadata": {}, 1144 | "outputs": [ 1145 | { 1146 | "data": { 1147 | "text/plain": [ 1148 | "15" 1149 | ] 1150 | }, 1151 | "execution_count": 22, 1152 | "metadata": {}, 1153 | "output_type": "execute_result" 1154 | } 1155 | ], 1156 | "source": [ 1157 | "df_new.loc[1, \"val1\"]" 1158 | ] 1159 | }, 1160 | { 1161 | "cell_type": "code", 1162 | "execution_count": 23, 1163 | "id": "96f69f3b", 1164 | "metadata": {}, 1165 | "outputs": [ 1166 | { 1167 | "data": { 1168 | "text/plain": [ 1169 | "0.14606741573033707" 1170 | ] 1171 | }, 1172 | "execution_count": 23, 1173 | "metadata": {}, 1174 | "output_type": "execute_result" 1175 | } 1176 | ], 1177 | "source": [ 1178 | "(df_new.loc[1, \"val1\"] - df[\"val1\"].min()) / (df[\"val1\"].max() - df[\"val1\"].min())" 1179 | ] 1180 | }, 1181 | { 1182 | "cell_type": "code", 1183 | "execution_count": 24, 1184 | "id": "7cee8da6", 1185 | "metadata": {}, 1186 | "outputs": [ 1187 | { 1188 | "data": { 1189 | "text/html": [ 1190 | "
\n", 1191 | "\n", 1204 | "\n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | "
val1val2
0292
115245
225325
314254
43103
591961
\n", 1245 | "
" 1246 | ], 1247 | "text/plain": [ 1248 | " val1 val2\n", 1249 | "0 2 92\n", 1250 | "1 15 245\n", 1251 | "2 25 325\n", 1252 | "3 14 254\n", 1253 | "4 3 103\n", 1254 | "5 91 961" 1255 | ] 1256 | }, 1257 | "execution_count": 24, 1258 | "metadata": {}, 1259 | "output_type": "execute_result" 1260 | } 1261 | ], 1262 | "source": [ 1263 | "df_new" 1264 | ] 1265 | }, 1266 | { 1267 | "cell_type": "code", 1268 | "execution_count": 25, 1269 | "id": "5a10fc63", 1270 | "metadata": {}, 1271 | "outputs": [ 1272 | { 1273 | "data": { 1274 | "text/html": [ 1275 | "
\n", 1276 | "\n", 1289 | "\n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | "
val1val2
00.909297-0.779466
10.650288-0.044213
2-0.132352-0.988036
30.9906070.451999
40.1411200.622989
50.105988-0.321537
\n", 1330 | "
" 1331 | ], 1332 | "text/plain": [ 1333 | " val1 val2\n", 1334 | "0 0.909297 -0.779466\n", 1335 | "1 0.650288 -0.044213\n", 1336 | "2 -0.132352 -0.988036\n", 1337 | "3 0.990607 0.451999\n", 1338 | "4 0.141120 0.622989\n", 1339 | "5 0.105988 -0.321537" 1340 | ] 1341 | }, 1342 | "execution_count": 25, 1343 | "metadata": {}, 1344 | "output_type": "execute_result" 1345 | } 1346 | ], 1347 | "source": [ 1348 | "df_new.transform(lambda a : np.sin(a))" 1349 | ] 1350 | }, 1351 | { 1352 | "cell_type": "code", 1353 | "execution_count": 26, 1354 | "id": "6d7b6b4f", 1355 | "metadata": {}, 1356 | "outputs": [ 1357 | { 1358 | "data": { 1359 | "text/plain": [ 1360 | "0 -0.779466\n", 1361 | "1 -0.044213\n", 1362 | "2 -0.988036\n", 1363 | "3 0.451999\n", 1364 | "4 0.622989\n", 1365 | "5 -0.321537\n", 1366 | "Name: val2, dtype: float64" 1367 | ] 1368 | }, 1369 | "execution_count": 26, 1370 | "metadata": {}, 1371 | "output_type": "execute_result" 1372 | } 1373 | ], 1374 | "source": [ 1375 | "np.sin(df_new.val2)" 1376 | ] 1377 | }, 1378 | { 1379 | "cell_type": "markdown", 1380 | "id": "5f8a2091", 1381 | "metadata": {}, 1382 | "source": [ 1383 | "## Advanced Aggregation Functions: Apply() Function" 1384 | ] 1385 | }, 1386 | { 1387 | "cell_type": "code", 1388 | "execution_count": 1, 1389 | "id": "ffe5d394", 1390 | "metadata": {}, 1391 | "outputs": [], 1392 | "source": [ 1393 | "import pandas as pd\n", 1394 | "import numpy as np" 1395 | ] 1396 | }, 1397 | { 1398 | "cell_type": "code", 1399 | "execution_count": 2, 1400 | "id": "9130568c", 1401 | "metadata": {}, 1402 | "outputs": [], 1403 | "source": [ 1404 | "df = pd.DataFrame({'groups': ['X', 'Y', 'Z', 'X', 'Y', 'Z'],\n", 1405 | " 'val1': [2, 15, 25, 14, 3, 91],\n", 1406 | " 'val2': [92,245,325,254,103,961]})" 1407 | ] 1408 | }, 1409 | { 1410 | "cell_type": "code", 1411 | "execution_count": 3, 1412 | "id": "6a0b50d2", 1413 | "metadata": { 1414 | "scrolled": true 1415 | }, 1416 | "outputs": [ 1417 | { 1418 | "data": { 1419 | "text/html": [ 1420 | "
\n", 1421 | "\n", 1434 | "\n", 1435 | " \n", 1436 | " \n", 1437 | " \n", 1438 | " \n", 1439 | " \n", 1440 | " \n", 1441 | " \n", 1442 | " \n", 1443 | " \n", 1444 | " \n", 1445 | " \n", 1446 | " \n", 1447 | " \n", 1448 | " \n", 1449 | " \n", 1450 | " \n", 1451 | " \n", 1452 | " \n", 1453 | " \n", 1454 | " \n", 1455 | " \n", 1456 | " \n", 1457 | " \n", 1458 | " \n", 1459 | " \n", 1460 | " \n", 1461 | " \n", 1462 | " \n", 1463 | " \n", 1464 | " \n", 1465 | " \n", 1466 | " \n", 1467 | " \n", 1468 | " \n", 1469 | " \n", 1470 | " \n", 1471 | " \n", 1472 | " \n", 1473 | " \n", 1474 | " \n", 1475 | " \n", 1476 | " \n", 1477 | " \n", 1478 | " \n", 1479 | " \n", 1480 | " \n", 1481 | "
groupsval1val2
0X292
1Y15245
2Z25325
3X14254
4Y3103
5Z91961
\n", 1482 | "
" 1483 | ], 1484 | "text/plain": [ 1485 | " groups val1 val2\n", 1486 | "0 X 2 92\n", 1487 | "1 Y 15 245\n", 1488 | "2 Z 25 325\n", 1489 | "3 X 14 254\n", 1490 | "4 Y 3 103\n", 1491 | "5 Z 91 961" 1492 | ] 1493 | }, 1494 | "execution_count": 3, 1495 | "metadata": {}, 1496 | "output_type": "execute_result" 1497 | } 1498 | ], 1499 | "source": [ 1500 | "df" 1501 | ] 1502 | }, 1503 | { 1504 | "cell_type": "code", 1505 | "execution_count": 4, 1506 | "id": "263a19dd", 1507 | "metadata": {}, 1508 | "outputs": [ 1509 | { 1510 | "data": { 1511 | "text/plain": [ 1512 | "groups XYZXYZ\n", 1513 | "val1 150\n", 1514 | "val2 1980\n", 1515 | "dtype: object" 1516 | ] 1517 | }, 1518 | "execution_count": 4, 1519 | "metadata": {}, 1520 | "output_type": "execute_result" 1521 | } 1522 | ], 1523 | "source": [ 1524 | "df.apply(np.sum)" 1525 | ] 1526 | }, 1527 | { 1528 | "cell_type": "code", 1529 | "execution_count": 5, 1530 | "id": "c14ca735", 1531 | "metadata": {}, 1532 | "outputs": [ 1533 | { 1534 | "ename": "TypeError", 1535 | "evalue": "Could not convert XYZXYZ to numeric", 1536 | "output_type": "error", 1537 | "traceback": [ 1538 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 1539 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 1540 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36m_ensure_numeric\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 1536\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1537\u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1538\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1541 | "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'XYZXYZ'", 1542 | "\nDuring handling of the above exception, another exception occurred:\n", 1543 | "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", 1544 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36m_ensure_numeric\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 1540\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1541\u001b[1;33m \u001b[0mx\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcomplex\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mx\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1542\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1545 | "\u001b[1;31mValueError\u001b[0m: complex() arg is a malformed string", 1546 | "\nThe above exception was the direct cause of the following exception:\n", 1547 | "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)", 1548 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m", 1549 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\frame.py\u001b[0m in \u001b[0;36mapply\u001b[1;34m(self, func, axis, raw, result_type, args, **kwds)\u001b[0m\n\u001b[0;32m 7766\u001b[0m \u001b[0mkwds\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7767\u001b[0m )\n\u001b[1;32m-> 7768\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 7769\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 7770\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapplymap\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mna_action\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mOptional\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1550 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mget_result\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 183\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_raw\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 184\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 185\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 186\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 187\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_empty_result\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1551 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_standard\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 274\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 275\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mapply_standard\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 276\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mres_index\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mapply_series_generator\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 277\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 278\u001b[0m \u001b[1;31m# wrap results\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1552 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\apply.py\u001b[0m in \u001b[0;36mapply_series_generator\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 288\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mi\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mv\u001b[0m \u001b[1;32min\u001b[0m \u001b[0menumerate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mseries_gen\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 289\u001b[0m \u001b[1;31m# ignore SettingWithCopy here in case the user mutates\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 290\u001b[1;33m \u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mv\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 291\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mi\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mABCSeries\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 292\u001b[0m \u001b[1;31m# If we have a view on v, we need to make a copy because\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1553 | "\u001b[1;32m<__array_function__ internals>\u001b[0m in \u001b[0;36mmean\u001b[1;34m(*args, **kwargs)\u001b[0m\n", 1554 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\numpy\\core\\fromnumeric.py\u001b[0m in \u001b[0;36mmean\u001b[1;34m(a, axis, dtype, out, keepdims, where)\u001b[0m\n\u001b[0;32m 3415\u001b[0m \u001b[1;32mpass\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3416\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 3417\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mout\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mout\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3418\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3419\u001b[0m return _methods._mean(a, axis=axis, dtype=dtype,\n", 1555 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mmean\u001b[1;34m(self, axis, skipna, level, numeric_only, **kwargs)\u001b[0m\n\u001b[0;32m 11116\u001b[0m )\n\u001b[0;32m 11117\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnumeric_only\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m> 11118\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mNDFrame\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnumeric_only\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 11119\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 11120\u001b[0m \u001b[1;31m# pandas\\core\\generic.py:10924: error: Cannot assign to a method\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1556 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36mmean\u001b[1;34m(self, axis, skipna, level, numeric_only, **kwargs)\u001b[0m\n\u001b[0;32m 10724\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10725\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mmean\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnumeric_only\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m> 10726\u001b[1;33m return self._stat_function(\n\u001b[0m\u001b[0;32m 10727\u001b[0m \u001b[1;34m\"mean\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnanops\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mnanmean\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnumeric_only\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10728\u001b[0m )\n", 1557 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m_stat_function\u001b[1;34m(self, name, func, axis, skipna, level, numeric_only, **kwargs)\u001b[0m\n\u001b[0;32m 10709\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mlevel\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10710\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_agg_by_level\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mlevel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mlevel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m> 10711\u001b[1;33m return self._reduce(\n\u001b[0m\u001b[0;32m 10712\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mname\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mname\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnumeric_only\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mnumeric_only\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 10713\u001b[0m )\n", 1558 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\series.py\u001b[0m in \u001b[0;36m_reduce\u001b[1;34m(self, op, name, axis, skipna, numeric_only, filter_type, **kwds)\u001b[0m\n\u001b[0;32m 4180\u001b[0m )\n\u001b[0;32m 4181\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mall\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 4182\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdelegate\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 4183\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4184\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_reindex_indexer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnew_index\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindexer\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1559 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36m_f\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 71\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 72\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0merrstate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0minvalid\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"ignore\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 73\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mf\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 74\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 75\u001b[0m \u001b[1;31m# we want to transform an object array\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1560 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36mf\u001b[1;34m(values, axis, skipna, **kwds)\u001b[0m\n\u001b[0;32m 133\u001b[0m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0malt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 134\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 135\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0malt\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 136\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 137\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1561 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36mnew_func\u001b[1;34m(values, axis, skipna, mask, **kwargs)\u001b[0m\n\u001b[0;32m 392\u001b[0m \u001b[0mmask\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0misna\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 393\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 394\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mfunc\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mskipna\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mskipna\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmask\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 395\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 396\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mdatetimelike\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1562 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36mnanmean\u001b[1;34m(values, axis, skipna, mask)\u001b[0m\n\u001b[0;32m 631\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 632\u001b[0m \u001b[0mcount\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_get_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmask\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype_count\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 633\u001b[1;33m \u001b[0mthe_sum\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_ensure_numeric\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msum\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype_sum\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 634\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 635\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0maxis\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mthe_sum\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"ndim\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 1563 | "\u001b[1;32m~\\anaconda3\\lib\\site-packages\\pandas\\core\\nanops.py\u001b[0m in \u001b[0;36m_ensure_numeric\u001b[1;34m(x)\u001b[0m\n\u001b[0;32m 1542\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1543\u001b[0m \u001b[1;31m# e.g. \"foo\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1544\u001b[1;33m \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34mf\"Could not convert {x} to numeric\"\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0merr\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 1545\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mx\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1546\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", 1564 | "\u001b[1;31mTypeError\u001b[0m: Could not convert XYZXYZ to numeric" 1565 | ] 1566 | } 1567 | ], 1568 | "source": [ 1569 | "df.apply(np.mean)" 1570 | ] 1571 | }, 1572 | { 1573 | "cell_type": "code", 1574 | "execution_count": 6, 1575 | "id": "e3de4789", 1576 | "metadata": {}, 1577 | "outputs": [], 1578 | "source": [ 1579 | "df_new = df.loc[:, \"val1\":\"val2\"]" 1580 | ] 1581 | }, 1582 | { 1583 | "cell_type": "code", 1584 | "execution_count": 7, 1585 | "id": "7157fc82", 1586 | "metadata": {}, 1587 | "outputs": [ 1588 | { 1589 | "data": { 1590 | "text/html": [ 1591 | "
\n", 1592 | "\n", 1605 | "\n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | " \n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1644 | " \n", 1645 | "
val1val2
0292
115245
225325
314254
43103
591961
\n", 1646 | "
" 1647 | ], 1648 | "text/plain": [ 1649 | " val1 val2\n", 1650 | "0 2 92\n", 1651 | "1 15 245\n", 1652 | "2 25 325\n", 1653 | "3 14 254\n", 1654 | "4 3 103\n", 1655 | "5 91 961" 1656 | ] 1657 | }, 1658 | "execution_count": 7, 1659 | "metadata": {}, 1660 | "output_type": "execute_result" 1661 | } 1662 | ], 1663 | "source": [ 1664 | "df_new" 1665 | ] 1666 | }, 1667 | { 1668 | "cell_type": "code", 1669 | "execution_count": 8, 1670 | "id": "7999e9ec", 1671 | "metadata": { 1672 | "scrolled": true 1673 | }, 1674 | "outputs": [ 1675 | { 1676 | "data": { 1677 | "text/plain": [ 1678 | "val1 25.0\n", 1679 | "val2 330.0\n", 1680 | "dtype: float64" 1681 | ] 1682 | }, 1683 | "execution_count": 8, 1684 | "metadata": {}, 1685 | "output_type": "execute_result" 1686 | } 1687 | ], 1688 | "source": [ 1689 | "df_new.apply(np.mean)" 1690 | ] 1691 | }, 1692 | { 1693 | "cell_type": "code", 1694 | "execution_count": 33, 1695 | "id": "38093ea4", 1696 | "metadata": {}, 1697 | "outputs": [ 1698 | { 1699 | "data": { 1700 | "text/plain": [ 1701 | "330.0" 1702 | ] 1703 | }, 1704 | "execution_count": 33, 1705 | "metadata": {}, 1706 | "output_type": "execute_result" 1707 | } 1708 | ], 1709 | "source": [ 1710 | "df_new[\"val2\"].mean()" 1711 | ] 1712 | }, 1713 | { 1714 | "cell_type": "code", 1715 | "execution_count": 34, 1716 | "id": "f9a05aaa", 1717 | "metadata": {}, 1718 | "outputs": [ 1719 | { 1720 | "data": { 1721 | "text/plain": [ 1722 | "0 47.0\n", 1723 | "1 130.0\n", 1724 | "2 175.0\n", 1725 | "3 134.0\n", 1726 | "4 53.0\n", 1727 | "5 526.0\n", 1728 | "dtype: float64" 1729 | ] 1730 | }, 1731 | "execution_count": 34, 1732 | "metadata": {}, 1733 | "output_type": "execute_result" 1734 | } 1735 | ], 1736 | "source": [ 1737 | "df_new.apply(np.mean, axis = 1)" 1738 | ] 1739 | }, 1740 | { 1741 | "cell_type": "code", 1742 | "execution_count": 35, 1743 | "id": "7b431561", 1744 | "metadata": {}, 1745 | "outputs": [ 1746 | { 1747 | "data": { 1748 | "text/html": [ 1749 | "
\n", 1750 | "\n", 1763 | "\n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | "
val1val2
groups
X8.0173.0
Y9.0174.0
Z58.0643.0
\n", 1794 | "
" 1795 | ], 1796 | "text/plain": [ 1797 | " val1 val2\n", 1798 | "groups \n", 1799 | "X 8.0 173.0\n", 1800 | "Y 9.0 174.0\n", 1801 | "Z 58.0 643.0" 1802 | ] 1803 | }, 1804 | "execution_count": 35, 1805 | "metadata": {}, 1806 | "output_type": "execute_result" 1807 | } 1808 | ], 1809 | "source": [ 1810 | "df.groupby(\"groups\").apply(np.mean)" 1811 | ] 1812 | }, 1813 | { 1814 | "cell_type": "code", 1815 | "execution_count": 36, 1816 | "id": "fe0671f9", 1817 | "metadata": { 1818 | "scrolled": true 1819 | }, 1820 | "outputs": [], 1821 | "source": [ 1822 | "df2 = pd.DataFrame({'val1':[2, 4, 6, 8, 10], 'val2':['Turkey', 'UK', 'Australia','Philippines', 'Egypt']})" 1823 | ] 1824 | }, 1825 | { 1826 | "cell_type": "code", 1827 | "execution_count": 37, 1828 | "id": "99c1f576", 1829 | "metadata": { 1830 | "scrolled": true 1831 | }, 1832 | "outputs": [ 1833 | { 1834 | "data": { 1835 | "text/html": [ 1836 | "
\n", 1837 | "\n", 1850 | "\n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | "
val1val2
02Turkey
14UK
26Australia
38Philippines
410Egypt
\n", 1886 | "
" 1887 | ], 1888 | "text/plain": [ 1889 | " val1 val2\n", 1890 | "0 2 Turkey\n", 1891 | "1 4 UK\n", 1892 | "2 6 Australia\n", 1893 | "3 8 Philippines\n", 1894 | "4 10 Egypt" 1895 | ] 1896 | }, 1897 | "execution_count": 37, 1898 | "metadata": {}, 1899 | "output_type": "execute_result" 1900 | } 1901 | ], 1902 | "source": [ 1903 | "df2" 1904 | ] 1905 | }, 1906 | { 1907 | "cell_type": "code", 1908 | "execution_count": 38, 1909 | "id": "a9ead634", 1910 | "metadata": {}, 1911 | "outputs": [], 1912 | "source": [ 1913 | "def cube(x):\n", 1914 | " return x ** 3" 1915 | ] 1916 | }, 1917 | { 1918 | "cell_type": "code", 1919 | "execution_count": 39, 1920 | "id": "8b5e0492", 1921 | "metadata": { 1922 | "scrolled": true 1923 | }, 1924 | "outputs": [ 1925 | { 1926 | "data": { 1927 | "text/plain": [ 1928 | "0 8\n", 1929 | "1 64\n", 1930 | "2 216\n", 1931 | "3 512\n", 1932 | "4 1000\n", 1933 | "Name: val1, dtype: int64" 1934 | ] 1935 | }, 1936 | "execution_count": 39, 1937 | "metadata": {}, 1938 | "output_type": "execute_result" 1939 | } 1940 | ], 1941 | "source": [ 1942 | "df2.val1.apply(cube)" 1943 | ] 1944 | }, 1945 | { 1946 | "cell_type": "code", 1947 | "execution_count": 40, 1948 | "id": "4cd9e724", 1949 | "metadata": {}, 1950 | "outputs": [ 1951 | { 1952 | "data": { 1953 | "text/plain": [ 1954 | "0 6\n", 1955 | "1 2\n", 1956 | "2 9\n", 1957 | "3 11\n", 1958 | "4 5\n", 1959 | "Name: val2, dtype: int64" 1960 | ] 1961 | }, 1962 | "execution_count": 40, 1963 | "metadata": {}, 1964 | "output_type": "execute_result" 1965 | } 1966 | ], 1967 | "source": [ 1968 | "df2.val2.apply(len)" 1969 | ] 1970 | }, 1971 | { 1972 | "cell_type": "markdown", 1973 | "id": "d99b9023", 1974 | "metadata": {}, 1975 | "source": [ 1976 | "## Examining the Data Set 3" 1977 | ] 1978 | }, 1979 | { 1980 | "cell_type": "code", 1981 | "execution_count": 41, 1982 | "id": "ebf223a1", 1983 | "metadata": {}, 1984 | "outputs": [], 1985 | "source": [ 1986 | "import pandas as pd\n", 1987 | "import seaborn as sns\n", 1988 | "import numpy as np" 1989 | ] 1990 | }, 1991 | { 1992 | "cell_type": "code", 1993 | "execution_count": 42, 1994 | "id": "af020fef", 1995 | "metadata": {}, 1996 | "outputs": [], 1997 | "source": [ 1998 | "df = sns.load_dataset(\"titanic\")" 1999 | ] 2000 | }, 2001 | { 2002 | "cell_type": "code", 2003 | "execution_count": 43, 2004 | "id": "51bcc364", 2005 | "metadata": { 2006 | "scrolled": false 2007 | }, 2008 | "outputs": [ 2009 | { 2010 | "data": { 2011 | "text/html": [ 2012 | "
\n", 2013 | "\n", 2026 | "\n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | " \n", 2091 | " \n", 2092 | " \n", 2093 | " \n", 2094 | " \n", 2095 | " \n", 2096 | " \n", 2097 | " \n", 2098 | " \n", 2099 | " \n", 2100 | " \n", 2101 | " \n", 2102 | " \n", 2103 | " \n", 2104 | " \n", 2105 | " \n", 2106 | " \n", 2107 | " \n", 2108 | " \n", 2109 | " \n", 2110 | " \n", 2111 | " \n", 2112 | " \n", 2113 | " \n", 2114 | " \n", 2115 | " \n", 2116 | " \n", 2117 | " \n", 2118 | " \n", 2119 | " \n", 2120 | " \n", 2121 | " \n", 2122 | " \n", 2123 | " \n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
\n", 2140 | "
" 2141 | ], 2142 | "text/plain": [ 2143 | " survived pclass sex age sibsp parch fare embarked class \\\n", 2144 | "0 0 3 male 22.0 1 0 7.2500 S Third \n", 2145 | "1 1 1 female 38.0 1 0 71.2833 C First \n", 2146 | "2 1 3 female 26.0 0 0 7.9250 S Third \n", 2147 | "3 1 1 female 35.0 1 0 53.1000 S First \n", 2148 | "4 0 3 male 35.0 0 0 8.0500 S Third \n", 2149 | "\n", 2150 | " who adult_male deck embark_town alive alone \n", 2151 | "0 man True NaN Southampton no False \n", 2152 | "1 woman False C Cherbourg yes False \n", 2153 | "2 woman False NaN Southampton yes True \n", 2154 | "3 woman False C Southampton yes False \n", 2155 | "4 man True NaN Southampton no True " 2156 | ] 2157 | }, 2158 | "execution_count": 43, 2159 | "metadata": {}, 2160 | "output_type": "execute_result" 2161 | } 2162 | ], 2163 | "source": [ 2164 | "df.head()" 2165 | ] 2166 | }, 2167 | { 2168 | "cell_type": "code", 2169 | "execution_count": 44, 2170 | "id": "45902a00", 2171 | "metadata": { 2172 | "scrolled": false 2173 | }, 2174 | "outputs": [ 2175 | { 2176 | "name": "stdout", 2177 | "output_type": "stream", 2178 | "text": [ 2179 | "\n", 2180 | "RangeIndex: 891 entries, 0 to 890\n", 2181 | "Data columns (total 15 columns):\n", 2182 | " # Column Non-Null Count Dtype \n", 2183 | "--- ------ -------------- ----- \n", 2184 | " 0 survived 891 non-null int64 \n", 2185 | " 1 pclass 891 non-null int64 \n", 2186 | " 2 sex 891 non-null object \n", 2187 | " 3 age 714 non-null float64 \n", 2188 | " 4 sibsp 891 non-null int64 \n", 2189 | " 5 parch 891 non-null int64 \n", 2190 | " 6 fare 891 non-null float64 \n", 2191 | " 7 embarked 889 non-null object \n", 2192 | " 8 class 891 non-null category\n", 2193 | " 9 who 891 non-null object \n", 2194 | " 10 adult_male 891 non-null bool \n", 2195 | " 11 deck 203 non-null category\n", 2196 | " 12 embark_town 889 non-null object \n", 2197 | " 13 alive 891 non-null object \n", 2198 | " 14 alone 891 non-null bool \n", 2199 | "dtypes: bool(2), category(2), float64(2), int64(4), object(5)\n", 2200 | "memory usage: 80.7+ KB\n" 2201 | ] 2202 | } 2203 | ], 2204 | "source": [ 2205 | "df.info()" 2206 | ] 2207 | }, 2208 | { 2209 | "cell_type": "markdown", 2210 | "id": "0de1ad71", 2211 | "metadata": {}, 2212 | "source": [ 2213 | "## Pivot Tables in Pandas Library" 2214 | ] 2215 | }, 2216 | { 2217 | "cell_type": "code", 2218 | "execution_count": 45, 2219 | "id": "9b80afdf", 2220 | "metadata": {}, 2221 | "outputs": [], 2222 | "source": [ 2223 | "import pandas as pd\n", 2224 | "import seaborn as sns\n", 2225 | "import numpy as np" 2226 | ] 2227 | }, 2228 | { 2229 | "cell_type": "code", 2230 | "execution_count": 46, 2231 | "id": "933388ab", 2232 | "metadata": {}, 2233 | "outputs": [], 2234 | "source": [ 2235 | "df = sns.load_dataset(\"titanic\")" 2236 | ] 2237 | }, 2238 | { 2239 | "cell_type": "code", 2240 | "execution_count": 47, 2241 | "id": "2faa9a4a", 2242 | "metadata": {}, 2243 | "outputs": [ 2244 | { 2245 | "data": { 2246 | "text/html": [ 2247 | "
\n", 2248 | "\n", 2261 | "\n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | " \n", 2291 | " \n", 2292 | " \n", 2293 | " \n", 2294 | " \n", 2295 | " \n", 2296 | " \n", 2297 | " \n", 2298 | " \n", 2299 | " \n", 2300 | " \n", 2301 | " \n", 2302 | " \n", 2303 | " \n", 2304 | " \n", 2305 | " \n", 2306 | " \n", 2307 | " \n", 2308 | " \n", 2309 | " \n", 2310 | " \n", 2311 | " \n", 2312 | " \n", 2313 | " \n", 2314 | " \n", 2315 | " \n", 2316 | " \n", 2317 | " \n", 2318 | " \n", 2319 | " \n", 2320 | " \n", 2321 | " \n", 2322 | " \n", 2323 | " \n", 2324 | " \n", 2325 | " \n", 2326 | " \n", 2327 | " \n", 2328 | " \n", 2329 | " \n", 2330 | " \n", 2331 | " \n", 2332 | " \n", 2333 | " \n", 2334 | " \n", 2335 | " \n", 2336 | " \n", 2337 | " \n", 2338 | " \n", 2339 | " \n", 2340 | " \n", 2341 | " \n", 2342 | " \n", 2343 | " \n", 2344 | " \n", 2345 | " \n", 2346 | " \n", 2347 | " \n", 2348 | " \n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | "
survivedpclasssexagesibspparchfareembarkedclasswhoadult_maledeckembark_townalivealone
003male22.0107.2500SThirdmanTrueNaNSouthamptonnoFalse
111female38.01071.2833CFirstwomanFalseCCherbourgyesFalse
213female26.0007.9250SThirdwomanFalseNaNSouthamptonyesTrue
311female35.01053.1000SFirstwomanFalseCSouthamptonyesFalse
403male35.0008.0500SThirdmanTrueNaNSouthamptonnoTrue
\n", 2375 | "
" 2376 | ], 2377 | "text/plain": [ 2378 | " survived pclass sex age sibsp parch fare embarked class \\\n", 2379 | "0 0 3 male 22.0 1 0 7.2500 S Third \n", 2380 | "1 1 1 female 38.0 1 0 71.2833 C First \n", 2381 | "2 1 3 female 26.0 0 0 7.9250 S Third \n", 2382 | "3 1 1 female 35.0 1 0 53.1000 S First \n", 2383 | "4 0 3 male 35.0 0 0 8.0500 S Third \n", 2384 | "\n", 2385 | " who adult_male deck embark_town alive alone \n", 2386 | "0 man True NaN Southampton no False \n", 2387 | "1 woman False C Cherbourg yes False \n", 2388 | "2 woman False NaN Southampton yes True \n", 2389 | "3 woman False C Southampton yes False \n", 2390 | "4 man True NaN Southampton no True " 2391 | ] 2392 | }, 2393 | "execution_count": 47, 2394 | "metadata": {}, 2395 | "output_type": "execute_result" 2396 | } 2397 | ], 2398 | "source": [ 2399 | "df.head()" 2400 | ] 2401 | }, 2402 | { 2403 | "cell_type": "code", 2404 | "execution_count": 48, 2405 | "id": "90071115", 2406 | "metadata": { 2407 | "scrolled": true 2408 | }, 2409 | "outputs": [ 2410 | { 2411 | "data": { 2412 | "text/html": [ 2413 | "
\n", 2414 | "\n", 2427 | "\n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | "
survived
sex
female0.742038
male0.188908
\n", 2449 | "
" 2450 | ], 2451 | "text/plain": [ 2452 | " survived\n", 2453 | "sex \n", 2454 | "female 0.742038\n", 2455 | "male 0.188908" 2456 | ] 2457 | }, 2458 | "execution_count": 48, 2459 | "metadata": {}, 2460 | "output_type": "execute_result" 2461 | } 2462 | ], 2463 | "source": [ 2464 | "df.groupby(\"sex\")[[\"survived\"]].mean()" 2465 | ] 2466 | }, 2467 | { 2468 | "cell_type": "code", 2469 | "execution_count": 49, 2470 | "id": "7754e53f", 2471 | "metadata": { 2472 | "scrolled": false 2473 | }, 2474 | "outputs": [ 2475 | { 2476 | "data": { 2477 | "text/html": [ 2478 | "
\n", 2479 | "\n", 2492 | "\n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | " \n", 2497 | " \n", 2498 | " \n", 2499 | " \n", 2500 | " \n", 2501 | " \n", 2502 | " \n", 2503 | " \n", 2504 | " \n", 2505 | " \n", 2506 | " \n", 2507 | " \n", 2508 | " \n", 2509 | " \n", 2510 | " \n", 2511 | " \n", 2512 | " \n", 2513 | " \n", 2514 | " \n", 2515 | " \n", 2516 | " \n", 2517 | " \n", 2518 | " \n", 2519 | " \n", 2520 | " \n", 2521 | " \n", 2522 | " \n", 2523 | " \n", 2524 | " \n", 2525 | " \n", 2526 | " \n", 2527 | " \n", 2528 | " \n", 2529 | " \n", 2530 | " \n", 2531 | " \n", 2532 | " \n", 2533 | "
survived
sexembark_town
femaleCherbourg0.876712
Queenstown0.750000
Southampton0.689655
maleCherbourg0.305263
Queenstown0.073171
Southampton0.174603
\n", 2534 | "
" 2535 | ], 2536 | "text/plain": [ 2537 | " survived\n", 2538 | "sex embark_town \n", 2539 | "female Cherbourg 0.876712\n", 2540 | " Queenstown 0.750000\n", 2541 | " Southampton 0.689655\n", 2542 | "male Cherbourg 0.305263\n", 2543 | " Queenstown 0.073171\n", 2544 | " Southampton 0.174603" 2545 | ] 2546 | }, 2547 | "execution_count": 49, 2548 | "metadata": {}, 2549 | "output_type": "execute_result" 2550 | } 2551 | ], 2552 | "source": [ 2553 | "df.groupby([\"sex\", \"embark_town\"])[[\"survived\"]].mean()" 2554 | ] 2555 | }, 2556 | { 2557 | "cell_type": "code", 2558 | "execution_count": 50, 2559 | "id": "f0133d55", 2560 | "metadata": {}, 2561 | "outputs": [ 2562 | { 2563 | "data": { 2564 | "text/html": [ 2565 | "
\n", 2566 | "\n", 2583 | "\n", 2584 | " \n", 2585 | " \n", 2586 | " \n", 2587 | " \n", 2588 | " \n", 2589 | " \n", 2590 | " \n", 2591 | " \n", 2592 | " \n", 2593 | " \n", 2594 | " \n", 2595 | " \n", 2596 | " \n", 2597 | " \n", 2598 | " \n", 2599 | " \n", 2600 | " \n", 2601 | " \n", 2602 | " \n", 2603 | " \n", 2604 | " \n", 2605 | " \n", 2606 | " \n", 2607 | " \n", 2608 | " \n", 2609 | " \n", 2610 | " \n", 2611 | " \n", 2612 | " \n", 2613 | " \n", 2614 | " \n", 2615 | " \n", 2616 | "
survived
embark_townCherbourgQueenstownSouthampton
sex
female0.8767120.7500000.689655
male0.3052630.0731710.174603
\n", 2617 | "
" 2618 | ], 2619 | "text/plain": [ 2620 | " survived \n", 2621 | "embark_town Cherbourg Queenstown Southampton\n", 2622 | "sex \n", 2623 | "female 0.876712 0.750000 0.689655\n", 2624 | "male 0.305263 0.073171 0.174603" 2625 | ] 2626 | }, 2627 | "execution_count": 50, 2628 | "metadata": {}, 2629 | "output_type": "execute_result" 2630 | } 2631 | ], 2632 | "source": [ 2633 | "df.groupby([\"sex\", \"embark_town\"])[[\"survived\"]].mean().unstack()" 2634 | ] 2635 | }, 2636 | { 2637 | "cell_type": "code", 2638 | "execution_count": 51, 2639 | "id": "9855d1e6", 2640 | "metadata": {}, 2641 | "outputs": [ 2642 | { 2643 | "data": { 2644 | "text/html": [ 2645 | "
\n", 2646 | "\n", 2659 | "\n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | " \n", 2676 | " \n", 2677 | " \n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | "
embark_townCherbourgQueenstownSouthampton
sex
female0.8767120.7500000.689655
male0.3052630.0731710.174603
\n", 2689 | "
" 2690 | ], 2691 | "text/plain": [ 2692 | "embark_town Cherbourg Queenstown Southampton\n", 2693 | "sex \n", 2694 | "female 0.876712 0.750000 0.689655\n", 2695 | "male 0.305263 0.073171 0.174603" 2696 | ] 2697 | }, 2698 | "execution_count": 51, 2699 | "metadata": {}, 2700 | "output_type": "execute_result" 2701 | } 2702 | ], 2703 | "source": [ 2704 | "df.pivot_table(\"survived\", index = \"sex\", columns = \"embark_town\")" 2705 | ] 2706 | }, 2707 | { 2708 | "cell_type": "code", 2709 | "execution_count": 52, 2710 | "id": "04e050d2", 2711 | "metadata": { 2712 | "scrolled": true 2713 | }, 2714 | "outputs": [ 2715 | { 2716 | "data": { 2717 | "text/html": [ 2718 | "
\n", 2719 | "\n", 2736 | "\n", 2737 | " \n", 2738 | " \n", 2739 | " \n", 2740 | " \n", 2741 | " \n", 2742 | " \n", 2743 | " \n", 2744 | " \n", 2745 | " \n", 2746 | " \n", 2747 | " \n", 2748 | " \n", 2749 | " \n", 2750 | " \n", 2751 | " \n", 2752 | " \n", 2753 | " \n", 2754 | " \n", 2755 | " \n", 2756 | " \n", 2757 | " \n", 2758 | " \n", 2759 | " \n", 2760 | " \n", 2761 | " \n", 2762 | " \n", 2763 | " \n", 2764 | " \n", 2765 | " \n", 2766 | " \n", 2767 | " \n", 2768 | " \n", 2769 | " \n", 2770 | " \n", 2771 | " \n", 2772 | " \n", 2773 | " \n", 2774 | " \n", 2775 | " \n", 2776 | " \n", 2777 | " \n", 2778 | " \n", 2779 | " \n", 2780 | " \n", 2781 | " \n", 2782 | " \n", 2783 | " \n", 2784 | " \n", 2785 | " \n", 2786 | " \n", 2787 | " \n", 2788 | " \n", 2789 | " \n", 2790 | " \n", 2791 | " \n", 2792 | " \n", 2793 | " \n", 2794 | " \n", 2795 | "
summeanstd
embark_townCherbourgQueenstownSouthamptonCherbourgQueenstownSouthamptonCherbourgQueenstownSouthampton
sex
female64271400.8767120.7500000.6896550.3310420.4391550.463778
male293770.3052630.0731710.1746030.4629620.2636520.380058
\n", 2796 | "
" 2797 | ], 2798 | "text/plain": [ 2799 | " sum mean \\\n", 2800 | "embark_town Cherbourg Queenstown Southampton Cherbourg Queenstown Southampton \n", 2801 | "sex \n", 2802 | "female 64 27 140 0.876712 0.750000 0.689655 \n", 2803 | "male 29 3 77 0.305263 0.073171 0.174603 \n", 2804 | "\n", 2805 | " std \n", 2806 | "embark_town Cherbourg Queenstown Southampton \n", 2807 | "sex \n", 2808 | "female 0.331042 0.439155 0.463778 \n", 2809 | "male 0.462962 0.263652 0.380058 " 2810 | ] 2811 | }, 2812 | "execution_count": 52, 2813 | "metadata": {}, 2814 | "output_type": "execute_result" 2815 | } 2816 | ], 2817 | "source": [ 2818 | "df.pivot_table(\"survived\", index = \"sex\", columns = \"embark_town\", aggfunc = [\"sum\", \"mean\", \"std\"])" 2819 | ] 2820 | }, 2821 | { 2822 | "cell_type": "code", 2823 | "execution_count": 53, 2824 | "id": "9985ede3", 2825 | "metadata": {}, 2826 | "outputs": [ 2827 | { 2828 | "data": { 2829 | "text/html": [ 2830 | "
\n", 2831 | "\n", 2848 | "\n", 2849 | " \n", 2850 | " \n", 2851 | " \n", 2852 | " \n", 2853 | " \n", 2854 | " \n", 2855 | " \n", 2856 | " \n", 2857 | " \n", 2858 | " \n", 2859 | " \n", 2860 | " \n", 2861 | " \n", 2862 | " \n", 2863 | " \n", 2864 | " \n", 2865 | " \n", 2866 | " \n", 2867 | " \n", 2868 | " \n", 2869 | " \n", 2870 | " \n", 2871 | " \n", 2872 | " \n", 2873 | " \n", 2874 | " \n", 2875 | " \n", 2876 | " \n", 2877 | " \n", 2878 | " \n", 2879 | " \n", 2880 | " \n", 2881 | " \n", 2882 | " \n", 2883 | " \n", 2884 | " \n", 2885 | " \n", 2886 | " \n", 2887 | " \n", 2888 | " \n", 2889 | " \n", 2890 | " \n", 2891 | " \n", 2892 | " \n", 2893 | " \n", 2894 | " \n", 2895 | " \n", 2896 | " \n", 2897 | " \n", 2898 | " \n", 2899 | " \n", 2900 | " \n", 2901 | " \n", 2902 | " \n", 2903 | " \n", 2904 | " \n", 2905 | " \n", 2906 | " \n", 2907 | " \n", 2908 | " \n", 2909 | " \n", 2910 | " \n", 2911 | " \n", 2912 | " \n", 2913 | " \n", 2914 | " \n", 2915 | " \n", 2916 | " \n", 2917 | " \n", 2918 | " \n", 2919 | " \n", 2920 | " \n", 2921 | " \n", 2922 | " \n", 2923 | " \n", 2924 | " \n", 2925 | " \n", 2926 | " \n", 2927 | " \n", 2928 | " \n", 2929 | " \n", 2930 | " \n", 2931 | " \n", 2932 | " \n", 2933 | " \n", 2934 | " \n", 2935 | " \n", 2936 | " \n", 2937 | " \n", 2938 | " \n", 2939 | " \n", 2940 | " \n", 2941 | " \n", 2942 | " \n", 2943 | " \n", 2944 | " \n", 2945 | " \n", 2946 | " \n", 2947 | " \n", 2948 | " \n", 2949 | " \n", 2950 | " \n", 2951 | " \n", 2952 | " \n", 2953 | " \n", 2954 | " \n", 2955 | " \n", 2956 | " \n", 2957 | " \n", 2958 | " \n", 2959 | " \n", 2960 | "
summeanstd
embark_townCherbourgQueenstownSouthamptonCherbourgQueenstownSouthamptonCherbourgQueenstownSouthampton
sexclass
femaleFirst421460.9767441.0000000.9583330.152499NaN0.201941
Second72611.0000001.0000000.9104480.0000000.0000000.287694
Third1524330.6521740.7272730.3750000.4869850.4522670.486897
maleFirst170280.4047620.0000000.3544300.496796NaN0.481397
Second20150.2000000.0000000.1546390.421637NaN0.363439
Third103340.2325580.0769230.1283020.4274630.2699530.335058
\n", 2961 | "
" 2962 | ], 2963 | "text/plain": [ 2964 | " sum mean \\\n", 2965 | "embark_town Cherbourg Queenstown Southampton Cherbourg Queenstown \n", 2966 | "sex class \n", 2967 | "female First 42 1 46 0.976744 1.000000 \n", 2968 | " Second 7 2 61 1.000000 1.000000 \n", 2969 | " Third 15 24 33 0.652174 0.727273 \n", 2970 | "male First 17 0 28 0.404762 0.000000 \n", 2971 | " Second 2 0 15 0.200000 0.000000 \n", 2972 | " Third 10 3 34 0.232558 0.076923 \n", 2973 | "\n", 2974 | " std \n", 2975 | "embark_town Southampton Cherbourg Queenstown Southampton \n", 2976 | "sex class \n", 2977 | "female First 0.958333 0.152499 NaN 0.201941 \n", 2978 | " Second 0.910448 0.000000 0.000000 0.287694 \n", 2979 | " Third 0.375000 0.486985 0.452267 0.486897 \n", 2980 | "male First 0.354430 0.496796 NaN 0.481397 \n", 2981 | " Second 0.154639 0.421637 NaN 0.363439 \n", 2982 | " Third 0.128302 0.427463 0.269953 0.335058 " 2983 | ] 2984 | }, 2985 | "execution_count": 53, 2986 | "metadata": {}, 2987 | "output_type": "execute_result" 2988 | } 2989 | ], 2990 | "source": [ 2991 | "df.pivot_table(\"survived\", index = [\"sex\", \"class\"], columns = \"embark_town\", aggfunc = [\"sum\", \"mean\", \"std\"])" 2992 | ] 2993 | }, 2994 | { 2995 | "cell_type": "markdown", 2996 | "id": "6266834b", 2997 | "metadata": {}, 2998 | "source": [ 2999 | "## External Data Entry to Pandas Library" 3000 | ] 3001 | }, 3002 | { 3003 | "cell_type": "markdown", 3004 | "id": "58c0393a", 3005 | "metadata": {}, 3006 | "source": [ 3007 | "## Data Entry with Csv and Txt Files" 3008 | ] 3009 | }, 3010 | { 3011 | "cell_type": "code", 3012 | "execution_count": 1, 3013 | "id": "9c6f4bd4", 3014 | "metadata": {}, 3015 | "outputs": [], 3016 | "source": [ 3017 | "import pandas as pd" 3018 | ] 3019 | }, 3020 | { 3021 | "cell_type": "code", 3022 | "execution_count": 2, 3023 | "id": "c31e005e", 3024 | "metadata": { 3025 | "scrolled": true 3026 | }, 3027 | "outputs": [ 3028 | { 3029 | "data": { 3030 | "text/html": [ 3031 | "
\n", 3032 | "\n", 3045 | "\n", 3046 | " \n", 3047 | " \n", 3048 | " \n", 3049 | " \n", 3050 | " \n", 3051 | " \n", 3052 | " \n", 3053 | " \n", 3054 | " \n", 3055 | " \n", 3056 | " \n", 3057 | " \n", 3058 | " \n", 3059 | " \n", 3060 | " \n", 3061 | " \n", 3062 | " \n", 3063 | " \n", 3064 | " \n", 3065 | " \n", 3066 | " \n", 3067 | " \n", 3068 | " \n", 3069 | " \n", 3070 | " \n", 3071 | " \n", 3072 | " \n", 3073 | " \n", 3074 | " \n", 3075 | " \n", 3076 | " \n", 3077 | " \n", 3078 | " \n", 3079 | " \n", 3080 | " \n", 3081 | " \n", 3082 | " \n", 3083 | " \n", 3084 | " \n", 3085 | " \n", 3086 | " \n", 3087 | " \n", 3088 | " \n", 3089 | " \n", 3090 | " \n", 3091 | " \n", 3092 | " \n", 3093 | " \n", 3094 | " \n", 3095 | " \n", 3096 | " \n", 3097 | " \n", 3098 | " \n", 3099 | " \n", 3100 | " \n", 3101 | " \n", 3102 | " \n", 3103 | " \n", 3104 | " \n", 3105 | " \n", 3106 | "
VAL1VAL2VAL3VAL4VAL5
02.7068500.6281330.9079690.5038260.651118
1-0.319318-0.8480770.605965-2.0181680.740122
20.528813-0.5890010.188695-0.758872-0.933237
30.9550570.1907941.9787572.6059670.683509
40.3026651.693723-1.706086-1.159119-0.134841
50.3905280.1669050.1845020.8077060.072960
\n", 3107 | "
" 3108 | ], 3109 | "text/plain": [ 3110 | " VAL1 VAL2 VAL3 VAL4 VAL5\n", 3111 | "0 2.706850 0.628133 0.907969 0.503826 0.651118\n", 3112 | "1 -0.319318 -0.848077 0.605965 -2.018168 0.740122\n", 3113 | "2 0.528813 -0.589001 0.188695 -0.758872 -0.933237\n", 3114 | "3 0.955057 0.190794 1.978757 2.605967 0.683509\n", 3115 | "4 0.302665 1.693723 -1.706086 -1.159119 -0.134841\n", 3116 | "5 0.390528 0.166905 0.184502 0.807706 0.072960" 3117 | ] 3118 | }, 3119 | "execution_count": 2, 3120 | "metadata": {}, 3121 | "output_type": "execute_result" 3122 | } 3123 | ], 3124 | "source": [ 3125 | "pd.read_csv(\"pandas\\csv_example_data.csv\")" 3126 | ] 3127 | }, 3128 | { 3129 | "cell_type": "code", 3130 | "execution_count": 3, 3131 | "id": "a310c007", 3132 | "metadata": {}, 3133 | "outputs": [ 3134 | { 3135 | "data": { 3136 | "text/html": [ 3137 | "
\n", 3138 | "\n", 3151 | "\n", 3152 | " \n", 3153 | " \n", 3154 | " \n", 3155 | " \n", 3156 | " \n", 3157 | " \n", 3158 | " \n", 3159 | " \n", 3160 | " \n", 3161 | " \n", 3162 | " \n", 3163 | " \n", 3164 | " \n", 3165 | " \n", 3166 | " \n", 3167 | " \n", 3168 | " \n", 3169 | " \n", 3170 | " \n", 3171 | " \n", 3172 | " \n", 3173 | " \n", 3174 | " \n", 3175 | " \n", 3176 | " \n", 3177 | " \n", 3178 | " \n", 3179 | " \n", 3180 | " \n", 3181 | " \n", 3182 | " \n", 3183 | " \n", 3184 | "
employee;department;Year
0Julia;Data Science;2005
1Marie;Web Development;2008
2Adam;Data Science;2011
3Nicole;Cyber Security;2002
4Joseph;App Developer;2007
5Kayra;Data Science;2008
\n", 3185 | "
" 3186 | ], 3187 | "text/plain": [ 3188 | " employee;department;Year\n", 3189 | "0 Julia;Data Science;2005\n", 3190 | "1 Marie;Web Development;2008\n", 3191 | "2 Adam;Data Science;2011\n", 3192 | "3 Nicole;Cyber Security;2002\n", 3193 | "4 Joseph;App Developer;2007\n", 3194 | "5 Kayra;Data Science;2008" 3195 | ] 3196 | }, 3197 | "execution_count": 3, 3198 | "metadata": {}, 3199 | "output_type": "execute_result" 3200 | } 3201 | ], 3202 | "source": [ 3203 | "pd.read_csv(\"pandas\\csv_example_data_2.csv\")" 3204 | ] 3205 | }, 3206 | { 3207 | "cell_type": "code", 3208 | "execution_count": 4, 3209 | "id": "04d1b1a0", 3210 | "metadata": {}, 3211 | "outputs": [ 3212 | { 3213 | "data": { 3214 | "text/html": [ 3215 | "
\n", 3216 | "\n", 3229 | "\n", 3230 | " \n", 3231 | " \n", 3232 | " \n", 3233 | " \n", 3234 | " \n", 3235 | " \n", 3236 | " \n", 3237 | " \n", 3238 | " \n", 3239 | " \n", 3240 | " \n", 3241 | " \n", 3242 | " \n", 3243 | " \n", 3244 | " \n", 3245 | " \n", 3246 | " \n", 3247 | " \n", 3248 | " \n", 3249 | " \n", 3250 | " \n", 3251 | " \n", 3252 | " \n", 3253 | " \n", 3254 | " \n", 3255 | " \n", 3256 | " \n", 3257 | " \n", 3258 | " \n", 3259 | " \n", 3260 | " \n", 3261 | " \n", 3262 | " \n", 3263 | " \n", 3264 | " \n", 3265 | " \n", 3266 | " \n", 3267 | " \n", 3268 | " \n", 3269 | " \n", 3270 | " \n", 3271 | " \n", 3272 | " \n", 3273 | " \n", 3274 | " \n", 3275 | " \n", 3276 | "
employeedepartmentYear
0JuliaData Science2005
1MarieWeb Development2008
2AdamData Science2011
3NicoleCyber Security2002
4JosephApp Developer2007
5KayraData Science2008
\n", 3277 | "
" 3278 | ], 3279 | "text/plain": [ 3280 | " employee department Year\n", 3281 | "0 Julia Data Science 2005\n", 3282 | "1 Marie Web Development 2008\n", 3283 | "2 Adam Data Science 2011\n", 3284 | "3 Nicole Cyber Security 2002\n", 3285 | "4 Joseph App Developer 2007\n", 3286 | "5 Kayra Data Science 2008" 3287 | ] 3288 | }, 3289 | "execution_count": 4, 3290 | "metadata": {}, 3291 | "output_type": "execute_result" 3292 | } 3293 | ], 3294 | "source": [ 3295 | "pd.read_csv(\"pandas\\csv_example_data_2.csv\", sep = \";\")" 3296 | ] 3297 | }, 3298 | { 3299 | "cell_type": "code", 3300 | "execution_count": 5, 3301 | "id": "3b93f8a8", 3302 | "metadata": { 3303 | "scrolled": false 3304 | }, 3305 | "outputs": [ 3306 | { 3307 | "data": { 3308 | "text/html": [ 3309 | "
\n", 3310 | "\n", 3323 | "\n", 3324 | " \n", 3325 | " \n", 3326 | " \n", 3327 | " \n", 3328 | " \n", 3329 | " \n", 3330 | " \n", 3331 | " \n", 3332 | " \n", 3333 | " \n", 3334 | " \n", 3335 | " \n", 3336 | " \n", 3337 | " \n", 3338 | " \n", 3339 | " \n", 3340 | " \n", 3341 | " \n", 3342 | " \n", 3343 | " \n", 3344 | " \n", 3345 | " \n", 3346 | " \n", 3347 | " \n", 3348 | " \n", 3349 | " \n", 3350 | " \n", 3351 | " \n", 3352 | " \n", 3353 | " \n", 3354 | " \n", 3355 | " \n", 3356 | " \n", 3357 | " \n", 3358 | " \n", 3359 | " \n", 3360 | " \n", 3361 | " \n", 3362 | " \n", 3363 | " \n", 3364 | " \n", 3365 | " \n", 3366 | " \n", 3367 | " \n", 3368 | " \n", 3369 | " \n", 3370 | "
employeedepartmentYear
0JuliaData Science2005
1MarieWeb Development2008
2AdamData Science2011
3NicoleCyber Security2002
4JosephApp Developer2007
5KayraData Science2008
\n", 3371 | "
" 3372 | ], 3373 | "text/plain": [ 3374 | " employee department Year\n", 3375 | "0 Julia Data Science 2005\n", 3376 | "1 Marie Web Development 2008\n", 3377 | "2 Adam Data Science 2011\n", 3378 | "3 Nicole Cyber Security 2002\n", 3379 | "4 Joseph App Developer 2007\n", 3380 | "5 Kayra Data Science 2008" 3381 | ] 3382 | }, 3383 | "execution_count": 5, 3384 | "metadata": {}, 3385 | "output_type": "execute_result" 3386 | } 3387 | ], 3388 | "source": [ 3389 | "pd.read_csv(\"pandas\\csv_example_data_2.csv\", delimiter = \";\")" 3390 | ] 3391 | }, 3392 | { 3393 | "cell_type": "code", 3394 | "execution_count": 59, 3395 | "id": "168c3b9b", 3396 | "metadata": {}, 3397 | "outputs": [ 3398 | { 3399 | "data": { 3400 | "text/html": [ 3401 | "
\n", 3402 | "\n", 3415 | "\n", 3416 | " \n", 3417 | " \n", 3418 | " \n", 3419 | " \n", 3420 | " \n", 3421 | " \n", 3422 | " \n", 3423 | " \n", 3424 | " \n", 3425 | " \n", 3426 | " \n", 3427 | " \n", 3428 | " \n", 3429 | " \n", 3430 | " \n", 3431 | " \n", 3432 | " \n", 3433 | " \n", 3434 | " \n", 3435 | " \n", 3436 | " \n", 3437 | " \n", 3438 | " \n", 3439 | " \n", 3440 | " \n", 3441 | " \n", 3442 | " \n", 3443 | " \n", 3444 | " \n", 3445 | " \n", 3446 | " \n", 3447 | " \n", 3448 | " \n", 3449 | " \n", 3450 | " \n", 3451 | " \n", 3452 | " \n", 3453 | " \n", 3454 | " \n", 3455 | " \n", 3456 | " \n", 3457 | " \n", 3458 | " \n", 3459 | " \n", 3460 | " \n", 3461 | " \n", 3462 | " \n", 3463 | " \n", 3464 | " \n", 3465 | " \n", 3466 | " \n", 3467 | " \n", 3468 | " \n", 3469 | "
Unnamed: 0employeedepartmentYear
00JuliaData Science2005
11MarieWeb Development2008
22AdamData Science2011
33NicoleCyber Security2002
44JosephApp Developer2007
55KayraData Science2008
\n", 3470 | "
" 3471 | ], 3472 | "text/plain": [ 3473 | " Unnamed: 0 employee department Year\n", 3474 | "0 0 Julia Data Science 2005\n", 3475 | "1 1 Marie Web Development 2008\n", 3476 | "2 2 Adam Data Science 2011\n", 3477 | "3 3 Nicole Cyber Security 2002\n", 3478 | "4 4 Joseph App Developer 2007\n", 3479 | "5 5 Kayra Data Science 2008" 3480 | ] 3481 | }, 3482 | "execution_count": 59, 3483 | "metadata": {}, 3484 | "output_type": "execute_result" 3485 | } 3486 | ], 3487 | "source": [ 3488 | "pd.read_csv(\"pandas\\csv_example_data_3.csv\")" 3489 | ] 3490 | }, 3491 | { 3492 | "cell_type": "code", 3493 | "execution_count": 60, 3494 | "id": "a3818718", 3495 | "metadata": { 3496 | "scrolled": true 3497 | }, 3498 | "outputs": [ 3499 | { 3500 | "data": { 3501 | "text/html": [ 3502 | "
\n", 3503 | "\n", 3516 | "\n", 3517 | " \n", 3518 | " \n", 3519 | " \n", 3520 | " \n", 3521 | " \n", 3522 | " \n", 3523 | " \n", 3524 | " \n", 3525 | " \n", 3526 | " \n", 3527 | " \n", 3528 | " \n", 3529 | " \n", 3530 | " \n", 3531 | " \n", 3532 | " \n", 3533 | " \n", 3534 | " \n", 3535 | " \n", 3536 | " \n", 3537 | " \n", 3538 | " \n", 3539 | " \n", 3540 | " \n", 3541 | " \n", 3542 | " \n", 3543 | " \n", 3544 | " \n", 3545 | " \n", 3546 | " \n", 3547 | " \n", 3548 | " \n", 3549 | " \n", 3550 | " \n", 3551 | " \n", 3552 | " \n", 3553 | " \n", 3554 | " \n", 3555 | " \n", 3556 | " \n", 3557 | " \n", 3558 | " \n", 3559 | " \n", 3560 | " \n", 3561 | " \n", 3562 | " \n", 3563 | "
employeedepartmentYear
0JuliaData Science2005
1MarieWeb Development2008
2AdamData Science2011
3NicoleCyber Security2002
4JosephApp Developer2007
5KayraData Science2008
\n", 3564 | "
" 3565 | ], 3566 | "text/plain": [ 3567 | " employee department Year\n", 3568 | "0 Julia Data Science 2005\n", 3569 | "1 Marie Web Development 2008\n", 3570 | "2 Adam Data Science 2011\n", 3571 | "3 Nicole Cyber Security 2002\n", 3572 | "4 Joseph App Developer 2007\n", 3573 | "5 Kayra Data Science 2008" 3574 | ] 3575 | }, 3576 | "execution_count": 60, 3577 | "metadata": {}, 3578 | "output_type": "execute_result" 3579 | } 3580 | ], 3581 | "source": [ 3582 | "pd.read_csv(\"pandas\\csv_example_data_3.csv\", index_col=0)" 3583 | ] 3584 | }, 3585 | { 3586 | "cell_type": "code", 3587 | "execution_count": 61, 3588 | "id": "8c831319", 3589 | "metadata": {}, 3590 | "outputs": [], 3591 | "source": [ 3592 | "df = pd.read_csv(\"pandas\\csv_example_data_3.csv\", index_col=0)" 3593 | ] 3594 | }, 3595 | { 3596 | "cell_type": "code", 3597 | "execution_count": 62, 3598 | "id": "e5ebe664", 3599 | "metadata": {}, 3600 | "outputs": [ 3601 | { 3602 | "data": { 3603 | "text/html": [ 3604 | "
\n", 3605 | "\n", 3618 | "\n", 3619 | " \n", 3620 | " \n", 3621 | " \n", 3622 | " \n", 3623 | " \n", 3624 | " \n", 3625 | " \n", 3626 | " \n", 3627 | " \n", 3628 | " \n", 3629 | " \n", 3630 | " \n", 3631 | " \n", 3632 | " \n", 3633 | " \n", 3634 | " \n", 3635 | " \n", 3636 | " \n", 3637 | " \n", 3638 | " \n", 3639 | " \n", 3640 | " \n", 3641 | " \n", 3642 | " \n", 3643 | " \n", 3644 | " \n", 3645 | " \n", 3646 | " \n", 3647 | " \n", 3648 | " \n", 3649 | " \n", 3650 | " \n", 3651 | " \n", 3652 | " \n", 3653 | " \n", 3654 | " \n", 3655 | " \n", 3656 | " \n", 3657 | " \n", 3658 | " \n", 3659 | "
employeedepartmentYear
0JuliaData Science2005
1MarieWeb Development2008
2AdamData Science2011
3NicoleCyber Security2002
4JosephApp Developer2007
\n", 3660 | "
" 3661 | ], 3662 | "text/plain": [ 3663 | " employee department Year\n", 3664 | "0 Julia Data Science 2005\n", 3665 | "1 Marie Web Development 2008\n", 3666 | "2 Adam Data Science 2011\n", 3667 | "3 Nicole Cyber Security 2002\n", 3668 | "4 Joseph App Developer 2007" 3669 | ] 3670 | }, 3671 | "execution_count": 62, 3672 | "metadata": {}, 3673 | "output_type": "execute_result" 3674 | } 3675 | ], 3676 | "source": [ 3677 | "df.head()" 3678 | ] 3679 | }, 3680 | { 3681 | "cell_type": "code", 3682 | "execution_count": 63, 3683 | "id": "494c7b2c", 3684 | "metadata": {}, 3685 | "outputs": [], 3686 | "source": [ 3687 | "df2 = pd.read_csv(\"pandas\\example_text_data.txt\")" 3688 | ] 3689 | }, 3690 | { 3691 | "cell_type": "code", 3692 | "execution_count": 64, 3693 | "id": "15c903f2", 3694 | "metadata": {}, 3695 | "outputs": [ 3696 | { 3697 | "data": { 3698 | "text/html": [ 3699 | "
\n", 3700 | "\n", 3713 | "\n", 3714 | " \n", 3715 | " \n", 3716 | " \n", 3717 | " \n", 3718 | " \n", 3719 | " \n", 3720 | " \n", 3721 | " \n", 3722 | " \n", 3723 | " \n", 3724 | " \n", 3725 | " \n", 3726 | " \n", 3727 | " \n", 3728 | " \n", 3729 | " \n", 3730 | " \n", 3731 | " \n", 3732 | " \n", 3733 | " \n", 3734 | " \n", 3735 | " \n", 3736 | " \n", 3737 | " \n", 3738 | " \n", 3739 | " \n", 3740 | " \n", 3741 | " \n", 3742 | " \n", 3743 | " \n", 3744 | " \n", 3745 | " \n", 3746 | " \n", 3747 | " \n", 3748 | " \n", 3749 | " \n", 3750 | " \n", 3751 | " \n", 3752 | " \n", 3753 | " \n", 3754 | " \n", 3755 | " \n", 3756 | " \n", 3757 | " \n", 3758 | "
VAL1 VAL2
010 100
125 250
230 300
35 50
48 80
53 30
615 150
77 70
86 60
\n", 3759 | "
" 3760 | ], 3761 | "text/plain": [ 3762 | " VAL1 VAL2\n", 3763 | "0 10 100\n", 3764 | "1 25 250\n", 3765 | "2 30 300\n", 3766 | "3 5 50\n", 3767 | "4 8 80\n", 3768 | "5 3 30\n", 3769 | "6 15 150\n", 3770 | "7 7 70\n", 3771 | "8 6 60" 3772 | ] 3773 | }, 3774 | "execution_count": 64, 3775 | "metadata": {}, 3776 | "output_type": "execute_result" 3777 | } 3778 | ], 3779 | "source": [ 3780 | "df2" 3781 | ] 3782 | }, 3783 | { 3784 | "cell_type": "code", 3785 | "execution_count": 65, 3786 | "id": "1f11d542", 3787 | "metadata": {}, 3788 | "outputs": [], 3789 | "source": [ 3790 | "df3 = pd.read_csv(\"pandas\\example_text_data.txt\", delimiter = \" \")" 3791 | ] 3792 | }, 3793 | { 3794 | "cell_type": "code", 3795 | "execution_count": 66, 3796 | "id": "2650f64f", 3797 | "metadata": {}, 3798 | "outputs": [ 3799 | { 3800 | "data": { 3801 | "text/html": [ 3802 | "
\n", 3803 | "\n", 3816 | "\n", 3817 | " \n", 3818 | " \n", 3819 | " \n", 3820 | " \n", 3821 | " \n", 3822 | " \n", 3823 | " \n", 3824 | " \n", 3825 | " \n", 3826 | " \n", 3827 | " \n", 3828 | " \n", 3829 | " \n", 3830 | " \n", 3831 | " \n", 3832 | " \n", 3833 | " \n", 3834 | " \n", 3835 | " \n", 3836 | " \n", 3837 | " \n", 3838 | " \n", 3839 | " \n", 3840 | " \n", 3841 | " \n", 3842 | " \n", 3843 | " \n", 3844 | " \n", 3845 | " \n", 3846 | " \n", 3847 | " \n", 3848 | " \n", 3849 | " \n", 3850 | " \n", 3851 | " \n", 3852 | " \n", 3853 | " \n", 3854 | " \n", 3855 | " \n", 3856 | " \n", 3857 | " \n", 3858 | " \n", 3859 | " \n", 3860 | " \n", 3861 | " \n", 3862 | " \n", 3863 | " \n", 3864 | " \n", 3865 | " \n", 3866 | " \n", 3867 | " \n", 3868 | " \n", 3869 | " \n", 3870 | " \n", 3871 | "
VAL1VAL2
010100
125250
230300
3550
4880
5330
615150
7770
8660
\n", 3872 | "
" 3873 | ], 3874 | "text/plain": [ 3875 | " VAL1 VAL2\n", 3876 | "0 10 100\n", 3877 | "1 25 250\n", 3878 | "2 30 300\n", 3879 | "3 5 50\n", 3880 | "4 8 80\n", 3881 | "5 3 30\n", 3882 | "6 15 150\n", 3883 | "7 7 70\n", 3884 | "8 6 60" 3885 | ] 3886 | }, 3887 | "execution_count": 66, 3888 | "metadata": {}, 3889 | "output_type": "execute_result" 3890 | } 3891 | ], 3892 | "source": [ 3893 | "df3" 3894 | ] 3895 | }, 3896 | { 3897 | "cell_type": "markdown", 3898 | "id": "f2534fab", 3899 | "metadata": {}, 3900 | "source": [ 3901 | "## Data Entry with Excel Files" 3902 | ] 3903 | }, 3904 | { 3905 | "cell_type": "code", 3906 | "execution_count": 67, 3907 | "id": "5d70e36b", 3908 | "metadata": {}, 3909 | "outputs": [], 3910 | "source": [ 3911 | "import pandas as pd" 3912 | ] 3913 | }, 3914 | { 3915 | "cell_type": "code", 3916 | "execution_count": 68, 3917 | "id": "79833759", 3918 | "metadata": {}, 3919 | "outputs": [], 3920 | "source": [ 3921 | "df1 = pd.read_excel(\"pandas\\excel_example_data.xlsx\")" 3922 | ] 3923 | }, 3924 | { 3925 | "cell_type": "code", 3926 | "execution_count": 69, 3927 | "id": "d2b1a127", 3928 | "metadata": {}, 3929 | "outputs": [ 3930 | { 3931 | "data": { 3932 | "text/html": [ 3933 | "
\n", 3934 | "\n", 3947 | "\n", 3948 | " \n", 3949 | " \n", 3950 | " \n", 3951 | " \n", 3952 | " \n", 3953 | " \n", 3954 | " \n", 3955 | " \n", 3956 | " \n", 3957 | " \n", 3958 | " \n", 3959 | " \n", 3960 | " \n", 3961 | " \n", 3962 | " \n", 3963 | " \n", 3964 | " \n", 3965 | " \n", 3966 | " \n", 3967 | " \n", 3968 | " \n", 3969 | " \n", 3970 | " \n", 3971 | " \n", 3972 | " \n", 3973 | " \n", 3974 | " \n", 3975 | " \n", 3976 | " \n", 3977 | " \n", 3978 | " \n", 3979 | " \n", 3980 | " \n", 3981 | " \n", 3982 | " \n", 3983 | " \n", 3984 | " \n", 3985 | " \n", 3986 | " \n", 3987 | " \n", 3988 | " \n", 3989 | " \n", 3990 | " \n", 3991 | " \n", 3992 | " \n", 3993 | " \n", 3994 | "
employeedepartmentYear
0JuliaData Science2005
1MarieWeb Development2008
2AdamData Science2011
3NicoleCyber Security2002
4JosephApp Developer2007
5KayraData Science2008
\n", 3995 | "
" 3996 | ], 3997 | "text/plain": [ 3998 | " employee department Year\n", 3999 | "0 Julia Data Science 2005\n", 4000 | "1 Marie Web Development 2008\n", 4001 | "2 Adam Data Science 2011\n", 4002 | "3 Nicole Cyber Security 2002\n", 4003 | "4 Joseph App Developer 2007\n", 4004 | "5 Kayra Data Science 2008" 4005 | ] 4006 | }, 4007 | "execution_count": 69, 4008 | "metadata": {}, 4009 | "output_type": "execute_result" 4010 | } 4011 | ], 4012 | "source": [ 4013 | "df1" 4014 | ] 4015 | }, 4016 | { 4017 | "cell_type": "code", 4018 | "execution_count": 70, 4019 | "id": "6e0fa9ef", 4020 | "metadata": {}, 4021 | "outputs": [], 4022 | "source": [ 4023 | "df2 = pd.read_excel(\"pandas\\excel_example_data.xlsx\", sheet_name = \"Sheet2\")" 4024 | ] 4025 | }, 4026 | { 4027 | "cell_type": "code", 4028 | "execution_count": 71, 4029 | "id": "440e2492", 4030 | "metadata": {}, 4031 | "outputs": [ 4032 | { 4033 | "data": { 4034 | "text/html": [ 4035 | "
\n", 4036 | "\n", 4049 | "\n", 4050 | " \n", 4051 | " \n", 4052 | " \n", 4053 | " \n", 4054 | " \n", 4055 | " \n", 4056 | " \n", 4057 | " \n", 4058 | " \n", 4059 | " \n", 4060 | " \n", 4061 | " \n", 4062 | " \n", 4063 | " \n", 4064 | " \n", 4065 | " \n", 4066 | " \n", 4067 | " \n", 4068 | " \n", 4069 | " \n", 4070 | " \n", 4071 | " \n", 4072 | " \n", 4073 | " \n", 4074 | " \n", 4075 | " \n", 4076 | " \n", 4077 | " \n", 4078 | " \n", 4079 | " \n", 4080 | " \n", 4081 | " \n", 4082 | " \n", 4083 | " \n", 4084 | " \n", 4085 | " \n", 4086 | " \n", 4087 | " \n", 4088 | " \n", 4089 | "
employeeage
0Julia35
1Marie40
2Adam38
3Nicole33
4Joseph29
5Kayra27
\n", 4090 | "
" 4091 | ], 4092 | "text/plain": [ 4093 | " employee age\n", 4094 | "0 Julia 35\n", 4095 | "1 Marie 40\n", 4096 | "2 Adam 38\n", 4097 | "3 Nicole 33\n", 4098 | "4 Joseph 29\n", 4099 | "5 Kayra 27" 4100 | ] 4101 | }, 4102 | "execution_count": 71, 4103 | "metadata": {}, 4104 | "output_type": "execute_result" 4105 | } 4106 | ], 4107 | "source": [ 4108 | "df2" 4109 | ] 4110 | }, 4111 | { 4112 | "cell_type": "code", 4113 | "execution_count": 72, 4114 | "id": "831b0c23", 4115 | "metadata": {}, 4116 | "outputs": [], 4117 | "source": [ 4118 | "df3 = pd.merge(df1, df2)" 4119 | ] 4120 | }, 4121 | { 4122 | "cell_type": "code", 4123 | "execution_count": 73, 4124 | "id": "b5914a5f", 4125 | "metadata": {}, 4126 | "outputs": [ 4127 | { 4128 | "data": { 4129 | "text/html": [ 4130 | "
\n", 4131 | "\n", 4144 | "\n", 4145 | " \n", 4146 | " \n", 4147 | " \n", 4148 | " \n", 4149 | " \n", 4150 | " \n", 4151 | " \n", 4152 | " \n", 4153 | " \n", 4154 | " \n", 4155 | " \n", 4156 | " \n", 4157 | " \n", 4158 | " \n", 4159 | " \n", 4160 | " \n", 4161 | " \n", 4162 | " \n", 4163 | " \n", 4164 | " \n", 4165 | " \n", 4166 | " \n", 4167 | " \n", 4168 | " \n", 4169 | " \n", 4170 | " \n", 4171 | " \n", 4172 | " \n", 4173 | " \n", 4174 | " \n", 4175 | " \n", 4176 | " \n", 4177 | " \n", 4178 | " \n", 4179 | " \n", 4180 | " \n", 4181 | " \n", 4182 | " \n", 4183 | " \n", 4184 | " \n", 4185 | " \n", 4186 | " \n", 4187 | " \n", 4188 | " \n", 4189 | " \n", 4190 | " \n", 4191 | " \n", 4192 | " \n", 4193 | " \n", 4194 | " \n", 4195 | " \n", 4196 | " \n", 4197 | " \n", 4198 | "
employeedepartmentYearage
0JuliaData Science200535
1MarieWeb Development200840
2AdamData Science201138
3NicoleCyber Security200233
4JosephApp Developer200729
5KayraData Science200827
\n", 4199 | "
" 4200 | ], 4201 | "text/plain": [ 4202 | " employee department Year age\n", 4203 | "0 Julia Data Science 2005 35\n", 4204 | "1 Marie Web Development 2008 40\n", 4205 | "2 Adam Data Science 2011 38\n", 4206 | "3 Nicole Cyber Security 2002 33\n", 4207 | "4 Joseph App Developer 2007 29\n", 4208 | "5 Kayra Data Science 2008 27" 4209 | ] 4210 | }, 4211 | "execution_count": 73, 4212 | "metadata": {}, 4213 | "output_type": "execute_result" 4214 | } 4215 | ], 4216 | "source": [ 4217 | "df3" 4218 | ] 4219 | }, 4220 | { 4221 | "cell_type": "markdown", 4222 | "id": "15ed8aa4", 4223 | "metadata": {}, 4224 | "source": [ 4225 | "## Data Output with Pandas Library\n", 4226 | "## Output of File with CSV Extension\n", 4227 | " " 4228 | ] 4229 | }, 4230 | { 4231 | "cell_type": "code", 4232 | "execution_count": 76, 4233 | "id": "7c6654da", 4234 | "metadata": {}, 4235 | "outputs": [], 4236 | "source": [ 4237 | "import pandas as pd" 4238 | ] 4239 | }, 4240 | { 4241 | "cell_type": "code", 4242 | "execution_count": 77, 4243 | "id": "08742a87", 4244 | "metadata": {}, 4245 | "outputs": [], 4246 | "source": [ 4247 | "df1 = pd.DataFrame({'employee': ['Julia', 'Marie', 'Adam', 'Nicole'],\n", 4248 | " 'department': ['Data Science', 'Web Development', 'Data Science', 'Cyber Security'],\n", 4249 | " 'Year': ['2005', '2008', '2011', '2002']})\n", 4250 | " \n", 4251 | "df2 = pd.DataFrame({'employee': ['Nicole', 'Adam', 'Julia', 'Marie'],\n", 4252 | " 'country': ['Canada', 'England', 'USA', 'Germany'],\n", 4253 | " 'salary': ['22000', '16000', '20000', '17500']}) " 4254 | ] 4255 | }, 4256 | { 4257 | "cell_type": "code", 4258 | "execution_count": 78, 4259 | "id": "03759175", 4260 | "metadata": { 4261 | "scrolled": true 4262 | }, 4263 | "outputs": [ 4264 | { 4265 | "data": { 4266 | "text/html": [ 4267 | "
\n", 4268 | "\n", 4281 | "\n", 4282 | " \n", 4283 | " \n", 4284 | " \n", 4285 | " \n", 4286 | " \n", 4287 | " \n", 4288 | " \n", 4289 | " \n", 4290 | " \n", 4291 | " \n", 4292 | " \n", 4293 | " \n", 4294 | " \n", 4295 | " \n", 4296 | " \n", 4297 | " \n", 4298 | " \n", 4299 | " \n", 4300 | " \n", 4301 | " \n", 4302 | " \n", 4303 | " \n", 4304 | " \n", 4305 | " \n", 4306 | " \n", 4307 | " \n", 4308 | " \n", 4309 | " \n", 4310 | " \n", 4311 | " \n", 4312 | " \n", 4313 | " \n", 4314 | " \n", 4315 | " \n", 4316 | "
employeedepartmentYear
0JuliaData Science2005
1MarieWeb Development2008
2AdamData Science2011
3NicoleCyber Security2002
\n", 4317 | "
" 4318 | ], 4319 | "text/plain": [ 4320 | " employee department Year\n", 4321 | "0 Julia Data Science 2005\n", 4322 | "1 Marie Web Development 2008\n", 4323 | "2 Adam Data Science 2011\n", 4324 | "3 Nicole Cyber Security 2002" 4325 | ] 4326 | }, 4327 | "execution_count": 78, 4328 | "metadata": {}, 4329 | "output_type": "execute_result" 4330 | } 4331 | ], 4332 | "source": [ 4333 | "df1" 4334 | ] 4335 | }, 4336 | { 4337 | "cell_type": "code", 4338 | "execution_count": 79, 4339 | "id": "8951838a", 4340 | "metadata": { 4341 | "scrolled": true 4342 | }, 4343 | "outputs": [ 4344 | { 4345 | "data": { 4346 | "text/html": [ 4347 | "
\n", 4348 | "\n", 4361 | "\n", 4362 | " \n", 4363 | " \n", 4364 | " \n", 4365 | " \n", 4366 | " \n", 4367 | " \n", 4368 | " \n", 4369 | " \n", 4370 | " \n", 4371 | " \n", 4372 | " \n", 4373 | " \n", 4374 | " \n", 4375 | " \n", 4376 | " \n", 4377 | " \n", 4378 | " \n", 4379 | " \n", 4380 | " \n", 4381 | " \n", 4382 | " \n", 4383 | " \n", 4384 | " \n", 4385 | " \n", 4386 | " \n", 4387 | " \n", 4388 | " \n", 4389 | " \n", 4390 | " \n", 4391 | " \n", 4392 | " \n", 4393 | " \n", 4394 | " \n", 4395 | " \n", 4396 | "
employeecountrysalary
0NicoleCanada22000
1AdamEngland16000
2JuliaUSA20000
3MarieGermany17500
\n", 4397 | "
" 4398 | ], 4399 | "text/plain": [ 4400 | " employee country salary\n", 4401 | "0 Nicole Canada 22000\n", 4402 | "1 Adam England 16000\n", 4403 | "2 Julia USA 20000\n", 4404 | "3 Marie Germany 17500" 4405 | ] 4406 | }, 4407 | "execution_count": 79, 4408 | "metadata": {}, 4409 | "output_type": "execute_result" 4410 | } 4411 | ], 4412 | "source": [ 4413 | "df2" 4414 | ] 4415 | }, 4416 | { 4417 | "cell_type": "code", 4418 | "execution_count": 80, 4419 | "id": "affaebf0", 4420 | "metadata": {}, 4421 | "outputs": [], 4422 | "source": [ 4423 | "df3 = pd.merge(df1, df2)" 4424 | ] 4425 | }, 4426 | { 4427 | "cell_type": "code", 4428 | "execution_count": 81, 4429 | "id": "c7b86528", 4430 | "metadata": {}, 4431 | "outputs": [ 4432 | { 4433 | "data": { 4434 | "text/html": [ 4435 | "
\n", 4436 | "\n", 4449 | "\n", 4450 | " \n", 4451 | " \n", 4452 | " \n", 4453 | " \n", 4454 | " \n", 4455 | " \n", 4456 | " \n", 4457 | " \n", 4458 | " \n", 4459 | " \n", 4460 | " \n", 4461 | " \n", 4462 | " \n", 4463 | " \n", 4464 | " \n", 4465 | " \n", 4466 | " \n", 4467 | " \n", 4468 | " \n", 4469 | " \n", 4470 | " \n", 4471 | " \n", 4472 | " \n", 4473 | " \n", 4474 | " \n", 4475 | " \n", 4476 | " \n", 4477 | " \n", 4478 | " \n", 4479 | " \n", 4480 | " \n", 4481 | " \n", 4482 | " \n", 4483 | " \n", 4484 | " \n", 4485 | " \n", 4486 | " \n", 4487 | " \n", 4488 | " \n", 4489 | " \n", 4490 | " \n", 4491 | " \n", 4492 | " \n", 4493 | " \n", 4494 | "
employeedepartmentYearcountrysalary
0JuliaData Science2005USA20000
1MarieWeb Development2008Germany17500
2AdamData Science2011England16000
3NicoleCyber Security2002Canada22000
\n", 4495 | "
" 4496 | ], 4497 | "text/plain": [ 4498 | " employee department Year country salary\n", 4499 | "0 Julia Data Science 2005 USA 20000\n", 4500 | "1 Marie Web Development 2008 Germany 17500\n", 4501 | "2 Adam Data Science 2011 England 16000\n", 4502 | "3 Nicole Cyber Security 2002 Canada 22000" 4503 | ] 4504 | }, 4505 | "execution_count": 81, 4506 | "metadata": {}, 4507 | "output_type": "execute_result" 4508 | } 4509 | ], 4510 | "source": [ 4511 | "df3" 4512 | ] 4513 | }, 4514 | { 4515 | "cell_type": "code", 4516 | "execution_count": 82, 4517 | "id": "20772126", 4518 | "metadata": {}, 4519 | "outputs": [], 4520 | "source": [ 4521 | "df3.to_csv(\"new_data_set.csv\")" 4522 | ] 4523 | }, 4524 | { 4525 | "cell_type": "code", 4526 | "execution_count": 83, 4527 | "id": "39ff724c", 4528 | "metadata": {}, 4529 | "outputs": [], 4530 | "source": [ 4531 | "df4 = pd.read_csv(\"new_data_set.csv\")" 4532 | ] 4533 | }, 4534 | { 4535 | "cell_type": "code", 4536 | "execution_count": 84, 4537 | "id": "24ffa12c", 4538 | "metadata": { 4539 | "scrolled": true 4540 | }, 4541 | "outputs": [ 4542 | { 4543 | "data": { 4544 | "text/html": [ 4545 | "
\n", 4546 | "\n", 4559 | "\n", 4560 | " \n", 4561 | " \n", 4562 | " \n", 4563 | " \n", 4564 | " \n", 4565 | " \n", 4566 | " \n", 4567 | " \n", 4568 | " \n", 4569 | " \n", 4570 | " \n", 4571 | " \n", 4572 | " \n", 4573 | " \n", 4574 | " \n", 4575 | " \n", 4576 | " \n", 4577 | " \n", 4578 | " \n", 4579 | " \n", 4580 | " \n", 4581 | " \n", 4582 | " \n", 4583 | " \n", 4584 | " \n", 4585 | " \n", 4586 | " \n", 4587 | " \n", 4588 | " \n", 4589 | " \n", 4590 | " \n", 4591 | " \n", 4592 | " \n", 4593 | " \n", 4594 | " \n", 4595 | " \n", 4596 | " \n", 4597 | " \n", 4598 | " \n", 4599 | " \n", 4600 | " \n", 4601 | " \n", 4602 | " \n", 4603 | " \n", 4604 | " \n", 4605 | " \n", 4606 | " \n", 4607 | " \n", 4608 | " \n", 4609 | "
Unnamed: 0employeedepartmentYearcountrysalary
00JuliaData Science2005USA20000
11MarieWeb Development2008Germany17500
22AdamData Science2011England16000
33NicoleCyber Security2002Canada22000
\n", 4610 | "
" 4611 | ], 4612 | "text/plain": [ 4613 | " Unnamed: 0 employee department Year country salary\n", 4614 | "0 0 Julia Data Science 2005 USA 20000\n", 4615 | "1 1 Marie Web Development 2008 Germany 17500\n", 4616 | "2 2 Adam Data Science 2011 England 16000\n", 4617 | "3 3 Nicole Cyber Security 2002 Canada 22000" 4618 | ] 4619 | }, 4620 | "execution_count": 84, 4621 | "metadata": {}, 4622 | "output_type": "execute_result" 4623 | } 4624 | ], 4625 | "source": [ 4626 | "df4" 4627 | ] 4628 | }, 4629 | { 4630 | "cell_type": "code", 4631 | "execution_count": 85, 4632 | "id": "da56db67", 4633 | "metadata": {}, 4634 | "outputs": [], 4635 | "source": [ 4636 | "df5 = pd.read_csv(\"new_data_set.csv\", index_col = 0)" 4637 | ] 4638 | }, 4639 | { 4640 | "cell_type": "code", 4641 | "execution_count": 86, 4642 | "id": "d76351a3", 4643 | "metadata": {}, 4644 | "outputs": [ 4645 | { 4646 | "data": { 4647 | "text/html": [ 4648 | "
\n", 4649 | "\n", 4662 | "\n", 4663 | " \n", 4664 | " \n", 4665 | " \n", 4666 | " \n", 4667 | " \n", 4668 | " \n", 4669 | " \n", 4670 | " \n", 4671 | " \n", 4672 | " \n", 4673 | " \n", 4674 | " \n", 4675 | " \n", 4676 | " \n", 4677 | " \n", 4678 | " \n", 4679 | " \n", 4680 | " \n", 4681 | " \n", 4682 | " \n", 4683 | " \n", 4684 | " \n", 4685 | " \n", 4686 | " \n", 4687 | " \n", 4688 | " \n", 4689 | " \n", 4690 | " \n", 4691 | " \n", 4692 | " \n", 4693 | " \n", 4694 | " \n", 4695 | " \n", 4696 | " \n", 4697 | " \n", 4698 | " \n", 4699 | " \n", 4700 | " \n", 4701 | " \n", 4702 | " \n", 4703 | " \n", 4704 | " \n", 4705 | " \n", 4706 | " \n", 4707 | "
employeedepartmentYearcountrysalary
0JuliaData Science2005USA20000
1MarieWeb Development2008Germany17500
2AdamData Science2011England16000
3NicoleCyber Security2002Canada22000
\n", 4708 | "
" 4709 | ], 4710 | "text/plain": [ 4711 | " employee department Year country salary\n", 4712 | "0 Julia Data Science 2005 USA 20000\n", 4713 | "1 Marie Web Development 2008 Germany 17500\n", 4714 | "2 Adam Data Science 2011 England 16000\n", 4715 | "3 Nicole Cyber Security 2002 Canada 22000" 4716 | ] 4717 | }, 4718 | "execution_count": 86, 4719 | "metadata": {}, 4720 | "output_type": "execute_result" 4721 | } 4722 | ], 4723 | "source": [ 4724 | "df5" 4725 | ] 4726 | }, 4727 | { 4728 | "cell_type": "code", 4729 | "execution_count": 87, 4730 | "id": "ba2c642e", 4731 | "metadata": {}, 4732 | "outputs": [], 4733 | "source": [ 4734 | "df3.to_csv(\"new_data_set2.csv\", index = False)" 4735 | ] 4736 | }, 4737 | { 4738 | "cell_type": "code", 4739 | "execution_count": 88, 4740 | "id": "37c53d52", 4741 | "metadata": {}, 4742 | "outputs": [], 4743 | "source": [ 4744 | "df6 = pd.read_csv(\"new_data_set2.csv\")" 4745 | ] 4746 | }, 4747 | { 4748 | "cell_type": "code", 4749 | "execution_count": 89, 4750 | "id": "cdeac636", 4751 | "metadata": { 4752 | "scrolled": true 4753 | }, 4754 | "outputs": [ 4755 | { 4756 | "data": { 4757 | "text/html": [ 4758 | "
\n", 4759 | "\n", 4772 | "\n", 4773 | " \n", 4774 | " \n", 4775 | " \n", 4776 | " \n", 4777 | " \n", 4778 | " \n", 4779 | " \n", 4780 | " \n", 4781 | " \n", 4782 | " \n", 4783 | " \n", 4784 | " \n", 4785 | " \n", 4786 | " \n", 4787 | " \n", 4788 | " \n", 4789 | " \n", 4790 | " \n", 4791 | " \n", 4792 | " \n", 4793 | " \n", 4794 | " \n", 4795 | " \n", 4796 | " \n", 4797 | " \n", 4798 | " \n", 4799 | " \n", 4800 | " \n", 4801 | " \n", 4802 | " \n", 4803 | " \n", 4804 | " \n", 4805 | " \n", 4806 | " \n", 4807 | " \n", 4808 | " \n", 4809 | " \n", 4810 | " \n", 4811 | " \n", 4812 | " \n", 4813 | " \n", 4814 | " \n", 4815 | " \n", 4816 | " \n", 4817 | "
employeedepartmentYearcountrysalary
0JuliaData Science2005USA20000
1MarieWeb Development2008Germany17500
2AdamData Science2011England16000
3NicoleCyber Security2002Canada22000
\n", 4818 | "
" 4819 | ], 4820 | "text/plain": [ 4821 | " employee department Year country salary\n", 4822 | "0 Julia Data Science 2005 USA 20000\n", 4823 | "1 Marie Web Development 2008 Germany 17500\n", 4824 | "2 Adam Data Science 2011 England 16000\n", 4825 | "3 Nicole Cyber Security 2002 Canada 22000" 4826 | ] 4827 | }, 4828 | "execution_count": 89, 4829 | "metadata": {}, 4830 | "output_type": "execute_result" 4831 | } 4832 | ], 4833 | "source": [ 4834 | "df6" 4835 | ] 4836 | }, 4837 | { 4838 | "cell_type": "markdown", 4839 | "id": "3b7e4e3b", 4840 | "metadata": {}, 4841 | "source": [ 4842 | "## Outputting as an Excel File" 4843 | ] 4844 | }, 4845 | { 4846 | "cell_type": "code", 4847 | "execution_count": 90, 4848 | "id": "40e30ce6", 4849 | "metadata": {}, 4850 | "outputs": [], 4851 | "source": [ 4852 | "import pandas as pd" 4853 | ] 4854 | }, 4855 | { 4856 | "cell_type": "code", 4857 | "execution_count": 91, 4858 | "id": "ecaaf3fc", 4859 | "metadata": {}, 4860 | "outputs": [], 4861 | "source": [ 4862 | "df1 = pd.DataFrame({'employee': ['Julia', 'Marie', 'Adam', 'Nicole'],\n", 4863 | " 'department': ['Data Science', 'Web Development', 'Data Science', 'Cyber Security'],\n", 4864 | " 'Year': ['2005', '2008', '2011', '2002']})\n", 4865 | " \n", 4866 | "df2 = pd.DataFrame({'employee': ['Nicole', 'Adam', 'Julia', 'Marie'],\n", 4867 | " 'country': ['Canada', 'England', 'USA', 'Germany'],\n", 4868 | " 'salary': ['22000', '16000', '20000', '17500']}) " 4869 | ] 4870 | }, 4871 | { 4872 | "cell_type": "code", 4873 | "execution_count": 92, 4874 | "id": "6bd6a284", 4875 | "metadata": {}, 4876 | "outputs": [], 4877 | "source": [ 4878 | "df3 = pd.merge(df1, df2)" 4879 | ] 4880 | }, 4881 | { 4882 | "cell_type": "code", 4883 | "execution_count": 93, 4884 | "id": "fe73506a", 4885 | "metadata": {}, 4886 | "outputs": [ 4887 | { 4888 | "data": { 4889 | "text/html": [ 4890 | "
\n", 4891 | "\n", 4904 | "\n", 4905 | " \n", 4906 | " \n", 4907 | " \n", 4908 | " \n", 4909 | " \n", 4910 | " \n", 4911 | " \n", 4912 | " \n", 4913 | " \n", 4914 | " \n", 4915 | " \n", 4916 | " \n", 4917 | " \n", 4918 | " \n", 4919 | " \n", 4920 | " \n", 4921 | " \n", 4922 | " \n", 4923 | " \n", 4924 | " \n", 4925 | " \n", 4926 | " \n", 4927 | " \n", 4928 | " \n", 4929 | " \n", 4930 | " \n", 4931 | " \n", 4932 | " \n", 4933 | " \n", 4934 | " \n", 4935 | " \n", 4936 | " \n", 4937 | " \n", 4938 | " \n", 4939 | " \n", 4940 | " \n", 4941 | " \n", 4942 | " \n", 4943 | " \n", 4944 | " \n", 4945 | " \n", 4946 | " \n", 4947 | " \n", 4948 | " \n", 4949 | "
employeedepartmentYearcountrysalary
0JuliaData Science2005USA20000
1MarieWeb Development2008Germany17500
2AdamData Science2011England16000
3NicoleCyber Security2002Canada22000
\n", 4950 | "
" 4951 | ], 4952 | "text/plain": [ 4953 | " employee department Year country salary\n", 4954 | "0 Julia Data Science 2005 USA 20000\n", 4955 | "1 Marie Web Development 2008 Germany 17500\n", 4956 | "2 Adam Data Science 2011 England 16000\n", 4957 | "3 Nicole Cyber Security 2002 Canada 22000" 4958 | ] 4959 | }, 4960 | "execution_count": 93, 4961 | "metadata": {}, 4962 | "output_type": "execute_result" 4963 | } 4964 | ], 4965 | "source": [ 4966 | "df3" 4967 | ] 4968 | }, 4969 | { 4970 | "cell_type": "code", 4971 | "execution_count": 94, 4972 | "id": "c8142a9c", 4973 | "metadata": {}, 4974 | "outputs": [], 4975 | "source": [ 4976 | "df3.to_excel(\"excel_sample.xlsx\", sheet_name = \"new_sheet1\", index = False)" 4977 | ] 4978 | }, 4979 | { 4980 | "cell_type": "code", 4981 | "execution_count": 95, 4982 | "id": "00497451", 4983 | "metadata": {}, 4984 | "outputs": [], 4985 | "source": [ 4986 | "df4 = pd.read_excel(\"excel_sample.xlsx\")" 4987 | ] 4988 | }, 4989 | { 4990 | "cell_type": "code", 4991 | "execution_count": 96, 4992 | "id": "7c8c0584", 4993 | "metadata": {}, 4994 | "outputs": [ 4995 | { 4996 | "data": { 4997 | "text/html": [ 4998 | "
\n", 4999 | "\n", 5012 | "\n", 5013 | " \n", 5014 | " \n", 5015 | " \n", 5016 | " \n", 5017 | " \n", 5018 | " \n", 5019 | " \n", 5020 | " \n", 5021 | " \n", 5022 | " \n", 5023 | " \n", 5024 | " \n", 5025 | " \n", 5026 | " \n", 5027 | " \n", 5028 | " \n", 5029 | " \n", 5030 | " \n", 5031 | " \n", 5032 | " \n", 5033 | " \n", 5034 | " \n", 5035 | " \n", 5036 | " \n", 5037 | " \n", 5038 | " \n", 5039 | " \n", 5040 | " \n", 5041 | " \n", 5042 | " \n", 5043 | " \n", 5044 | " \n", 5045 | " \n", 5046 | " \n", 5047 | " \n", 5048 | " \n", 5049 | " \n", 5050 | " \n", 5051 | " \n", 5052 | " \n", 5053 | " \n", 5054 | " \n", 5055 | " \n", 5056 | " \n", 5057 | "
employeedepartmentYearcountrysalary
0JuliaData Science2005USA20000
1MarieWeb Development2008Germany17500
2AdamData Science2011England16000
3NicoleCyber Security2002Canada22000
\n", 5058 | "
" 5059 | ], 5060 | "text/plain": [ 5061 | " employee department Year country salary\n", 5062 | "0 Julia Data Science 2005 USA 20000\n", 5063 | "1 Marie Web Development 2008 Germany 17500\n", 5064 | "2 Adam Data Science 2011 England 16000\n", 5065 | "3 Nicole Cyber Security 2002 Canada 22000" 5066 | ] 5067 | }, 5068 | "execution_count": 96, 5069 | "metadata": {}, 5070 | "output_type": "execute_result" 5071 | } 5072 | ], 5073 | "source": [ 5074 | "df4" 5075 | ] 5076 | } 5077 | ], 5078 | "metadata": { 5079 | "kernelspec": { 5080 | "display_name": "Python 3", 5081 | "language": "python", 5082 | "name": "python3" 5083 | }, 5084 | "language_info": { 5085 | "codemirror_mode": { 5086 | "name": "ipython", 5087 | "version": 3 5088 | }, 5089 | "file_extension": ".py", 5090 | "mimetype": "text/x-python", 5091 | "name": "python", 5092 | "nbconvert_exporter": "python", 5093 | "pygments_lexer": "ipython3", 5094 | "version": "3.8.8" 5095 | } 5096 | }, 5097 | "nbformat": 4, 5098 | "nbformat_minor": 5 5099 | } 5100 | --------------------------------------------------------------------------------