├── .gitignore ├── 01-Loading Data.ipynb ├── 02-DataFrame and Series.ipynb ├── 03-Indexes.ipynb ├── 04-Filtering.ipynb ├── 05-Updating Rows and Columns.ipynb ├── 06-Add Remove Rows and Columns.ipynb ├── 07-Sorting Data.ipynb ├── 08-Grouping and Aggregating.ipynb ├── 09-Cleaning Data.ipynb ├── 10-Working with Dates and Time Series Data.ipynb ├── 11-Reading and Writing Data.ipynb ├── README.md ├── data ├── ETH_1h.csv.zip ├── README_2019.txt ├── so_survey_2019.pdf ├── survey_results_public.csv.zip ├── survey_results_schema.csv └── survey_results_schema.csv.zip └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | 154 | *.csv 155 | *.json 156 | *.tsv 157 | *.xlsx 158 | 159 | -------------------------------------------------------------------------------- /02-DataFrame and Series.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# DataFrame and Series Basics" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 3, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 4, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "person = {\n", 26 | " \"first\": \"Phil\",\n", 27 | " \"last\": \"Lembo\",\n", 28 | " \"email\": \"phil.lembo@gmail.com\"\n", 29 | "}" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 5, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "people = {\n", 39 | " \"first\": [\"Phil\"],\n", 40 | " \"last\": [\"Lembo\"],\n", 41 | " \"email\": [\"phil.lembo@gmail.com\"]\n", 42 | "}" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 6, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "people = {\n", 52 | " \"first\": [\"Phil\", \"Jane\", \"Rob\"],\n", 53 | " \"last\": [\"Lembo\", \"Doe\", \"Roe\"],\n", 54 | " \"email\": [\"phil.lembo@gmail.com\", \"janedoe@email.com\", \"robroe@email.com\"]\n", 55 | "}" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": 7, 61 | "metadata": {}, 62 | "outputs": [ 63 | { 64 | "data": { 65 | "text/plain": [ 66 | "['phil.lembo@gmail.com', 'janedoe@email.com', 'robroe@email.com']" 67 | ] 68 | }, 69 | "execution_count": 7, 70 | "metadata": {}, 71 | "output_type": "execute_result" 72 | } 73 | ], 74 | "source": [ 75 | "people[\"email\"]" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": 8, 81 | "metadata": {}, 82 | "outputs": [], 83 | "source": [ 84 | "df = pd.DataFrame(people)" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 9, 90 | "metadata": {}, 91 | "outputs": [ 92 | { 93 | "data": { 94 | "text/html": [ 95 | "
\n", 96 | "\n", 109 | "\n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | "
firstlastemail
0PhilLembophil.lembo@gmail.com
1JaneDoejanedoe@email.com
2RobRoerobroe@email.com
\n", 139 | "
" 140 | ], 141 | "text/plain": [ 142 | " first last email\n", 143 | "0 Phil Lembo phil.lembo@gmail.com\n", 144 | "1 Jane Doe janedoe@email.com\n", 145 | "2 Rob Roe robroe@email.com" 146 | ] 147 | }, 148 | "execution_count": 9, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "df" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "metadata": {}, 160 | "source": [ 161 | "There are two major data types in pandas: DataFrames and Series, and df here has the DataFrame data type." 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": 40, 167 | "metadata": {}, 168 | "outputs": [ 169 | { 170 | "data": { 171 | "text/plain": [ 172 | "pandas.core.frame.DataFrame" 173 | ] 174 | }, 175 | "execution_count": 40, 176 | "metadata": {}, 177 | "output_type": "execute_result" 178 | } 179 | ], 180 | "source": [ 181 | "type(df)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 10, 187 | "metadata": {}, 188 | "outputs": [ 189 | { 190 | "data": { 191 | "text/plain": [ 192 | "0 phil.lembo@gmail.com\n", 193 | "1 janedoe@email.com\n", 194 | "2 robroe@email.com\n", 195 | "Name: email, dtype: object" 196 | ] 197 | }, 198 | "execution_count": 10, 199 | "metadata": {}, 200 | "output_type": "execute_result" 201 | } 202 | ], 203 | "source": [ 204 | "df['email']" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "The type of the data stored in email is a pandas Series." 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 11, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "pandas.core.series.Series" 223 | ] 224 | }, 225 | "execution_count": 11, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "type(df['email'])" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "This is an alternative way of calling the email column, but using it risks it being confused with methods." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 12, 244 | "metadata": {}, 245 | "outputs": [ 246 | { 247 | "data": { 248 | "text/plain": [ 249 | "0 phil.lembo@gmail.com\n", 250 | "1 janedoe@email.com\n", 251 | "2 robroe@email.com\n", 252 | "Name: email, dtype: object" 253 | ] 254 | }, 255 | "execution_count": 12, 256 | "metadata": {}, 257 | "output_type": "execute_result" 258 | } 259 | ], 260 | "source": [ 261 | "df.email" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "Pass a list of columns using double-bracket notation." 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 13, 274 | "metadata": {}, 275 | "outputs": [ 276 | { 277 | "data": { 278 | "text/html": [ 279 | "
\n", 280 | "\n", 293 | "\n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | "
lastemail
0Lembophil.lembo@gmail.com
1Doejanedoe@email.com
2Roerobroe@email.com
\n", 319 | "
" 320 | ], 321 | "text/plain": [ 322 | " last email\n", 323 | "0 Lembo phil.lembo@gmail.com\n", 324 | "1 Doe janedoe@email.com\n", 325 | "2 Roe robroe@email.com" 326 | ] 327 | }, 328 | "execution_count": 13, 329 | "metadata": {}, 330 | "output_type": "execute_result" 331 | } 332 | ], 333 | "source": [ 334 | "df[['last', 'email']]" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "This retrieves a DataFrame." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": 14, 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/plain": [ 352 | "pandas.core.frame.DataFrame" 353 | ] 354 | }, 355 | "execution_count": 14, 356 | "metadata": {}, 357 | "output_type": "execute_result" 358 | } 359 | ], 360 | "source": [ 361 | "type(df[['last', 'email']])" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "Show columns in dataframe." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 15, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "data": { 378 | "text/plain": [ 379 | "Index(['first', 'last', 'email'], dtype='object')" 380 | ] 381 | }, 382 | "execution_count": 15, 383 | "metadata": {}, 384 | "output_type": "execute_result" 385 | } 386 | ], 387 | "source": [ 388 | "df.columns" 389 | ] 390 | }, 391 | { 392 | "cell_type": "markdown", 393 | "metadata": {}, 394 | "source": [ 395 | "To get rows, use loc and iloc indexers.\n", 396 | "\n", 397 | "iloc = \"integer location\", seach by numeric index\n", 398 | "\n", 399 | "loc = search by label\n" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 16, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "data": { 409 | "text/plain": [ 410 | "first Phil\n", 411 | "last Lembo\n", 412 | "email phil.lembo@gmail.com\n", 413 | "Name: 0, dtype: object" 414 | ] 415 | }, 416 | "execution_count": 16, 417 | "metadata": {}, 418 | "output_type": "execute_result" 419 | } 420 | ], 421 | "source": [ 422 | "df.iloc[0]" 423 | ] 424 | }, 425 | { 426 | "cell_type": "code", 427 | "execution_count": 17, 428 | "metadata": {}, 429 | "outputs": [ 430 | { 431 | "data": { 432 | "text/plain": [ 433 | "pandas.core.series.Series" 434 | ] 435 | }, 436 | "execution_count": 17, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "type(df.iloc[0])" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 18, 448 | "metadata": {}, 449 | "outputs": [ 450 | { 451 | "data": { 452 | "text/html": [ 453 | "
\n", 454 | "\n", 467 | "\n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | "
firstlastemail
0PhilLembophil.lembo@gmail.com
1JaneDoejanedoe@email.com
\n", 491 | "
" 492 | ], 493 | "text/plain": [ 494 | " first last email\n", 495 | "0 Phil Lembo phil.lembo@gmail.com\n", 496 | "1 Jane Doe janedoe@email.com" 497 | ] 498 | }, 499 | "execution_count": 18, 500 | "metadata": {}, 501 | "output_type": "execute_result" 502 | } 503 | ], 504 | "source": [ 505 | "df.iloc[[0, 1]]" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": {}, 511 | "source": [ 512 | "Grab first two rows of the email column (column 3, index 2)." 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 19, 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "data": { 522 | "text/plain": [ 523 | "0 phil.lembo@gmail.com\n", 524 | "1 janedoe@email.com\n", 525 | "Name: email, dtype: object" 526 | ] 527 | }, 528 | "execution_count": 19, 529 | "metadata": {}, 530 | "output_type": "execute_result" 531 | } 532 | ], 533 | "source": [ 534 | "df.iloc[[0, 1], 2]" 535 | ] 536 | }, 537 | { 538 | "cell_type": "markdown", 539 | "metadata": {}, 540 | "source": [ 541 | "Using loc without custom labels, looks alot like iloc because you have to use a numeric value." 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 20, 547 | "metadata": {}, 548 | "outputs": [ 549 | { 550 | "data": { 551 | "text/plain": [ 552 | "first Phil\n", 553 | "last Lembo\n", 554 | "email phil.lembo@gmail.com\n", 555 | "Name: 0, dtype: object" 556 | ] 557 | }, 558 | "execution_count": 20, 559 | "metadata": {}, 560 | "output_type": "execute_result" 561 | } 562 | ], 563 | "source": [ 564 | "df.loc[0]" 565 | ] 566 | }, 567 | { 568 | "cell_type": "code", 569 | "execution_count": 21, 570 | "metadata": {}, 571 | "outputs": [ 572 | { 573 | "data": { 574 | "text/html": [ 575 | "
\n", 576 | "\n", 589 | "\n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | "
firstlastemail
0PhilLembophil.lembo@gmail.com
1JaneDoejanedoe@email.com
\n", 613 | "
" 614 | ], 615 | "text/plain": [ 616 | " first last email\n", 617 | "0 Phil Lembo phil.lembo@gmail.com\n", 618 | "1 Jane Doe janedoe@email.com" 619 | ] 620 | }, 621 | "execution_count": 21, 622 | "metadata": {}, 623 | "output_type": "execute_result" 624 | } 625 | ], 626 | "source": [ 627 | "df.loc[[0, 1]]" 628 | ] 629 | }, 630 | { 631 | "cell_type": "markdown", 632 | "metadata": {}, 633 | "source": [ 634 | "But now we can use a column label." 635 | ] 636 | }, 637 | { 638 | "cell_type": "code", 639 | "execution_count": 22, 640 | "metadata": {}, 641 | "outputs": [ 642 | { 643 | "data": { 644 | "text/plain": [ 645 | "0 phil.lembo@gmail.com\n", 646 | "1 janedoe@email.com\n", 647 | "Name: email, dtype: object" 648 | ] 649 | }, 650 | "execution_count": 22, 651 | "metadata": {}, 652 | "output_type": "execute_result" 653 | } 654 | ], 655 | "source": [ 656 | "df.loc[[0, 1], 'email']" 657 | ] 658 | }, 659 | { 660 | "cell_type": "markdown", 661 | "metadata": {}, 662 | "source": [ 663 | "... or a list of labels!" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 23, 669 | "metadata": {}, 670 | "outputs": [ 671 | { 672 | "data": { 673 | "text/html": [ 674 | "
\n", 675 | "\n", 688 | "\n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | "
emaillast
0phil.lembo@gmail.comLembo
1janedoe@email.comDoe
\n", 709 | "
" 710 | ], 711 | "text/plain": [ 712 | " email last\n", 713 | "0 phil.lembo@gmail.com Lembo\n", 714 | "1 janedoe@email.com Doe" 715 | ] 716 | }, 717 | "execution_count": 23, 718 | "metadata": {}, 719 | "output_type": "execute_result" 720 | } 721 | ], 722 | "source": [ 723 | "df.loc[[0, 1], ['email', 'last']]" 724 | ] 725 | }, 726 | { 727 | "cell_type": "code", 728 | "execution_count": 24, 729 | "metadata": {}, 730 | "outputs": [], 731 | "source": [ 732 | "res_df = pd.read_csv('data/survey_results_public.csv')\n", 733 | "schema_df = pd.read_csv('data/survey_results_schema.csv')\n", 734 | "pd.set_option('display.max_columns', 85)\n", 735 | "pd.set_option('display.max_rows', 85)" 736 | ] 737 | }, 738 | { 739 | "cell_type": "markdown", 740 | "metadata": {}, 741 | "source": [ 742 | "Basic characteristics of dataframe (number of rows, number of columns)." 743 | ] 744 | }, 745 | { 746 | "cell_type": "code", 747 | "execution_count": 25, 748 | "metadata": {}, 749 | "outputs": [ 750 | { 751 | "data": { 752 | "text/plain": [ 753 | "(88883, 85)" 754 | ] 755 | }, 756 | "execution_count": 25, 757 | "metadata": {}, 758 | "output_type": "execute_result" 759 | } 760 | ], 761 | "source": [ 762 | "res_df.shape" 763 | ] 764 | }, 765 | { 766 | "cell_type": "markdown", 767 | "metadata": {}, 768 | "source": [ 769 | "List all the column labels." 770 | ] 771 | }, 772 | { 773 | "cell_type": "code", 774 | "execution_count": 26, 775 | "metadata": {}, 776 | "outputs": [ 777 | { 778 | "data": { 779 | "text/plain": [ 780 | "Index(['Respondent', 'MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource',\n", 781 | " 'Employment', 'Country', 'Student', 'EdLevel', 'UndergradMajor',\n", 782 | " 'EduOther', 'OrgSize', 'DevType', 'YearsCode', 'Age1stCode',\n", 783 | " 'YearsCodePro', 'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney',\n", 784 | " 'MgrWant', 'JobSeek', 'LastHireDate', 'LastInt', 'FizzBuzz',\n", 785 | " 'JobFactors', 'ResumeUpdate', 'CurrencySymbol', 'CurrencyDesc',\n", 786 | " 'CompTotal', 'CompFreq', 'ConvertedComp', 'WorkWeekHrs', 'WorkPlan',\n", 787 | " 'WorkChallenge', 'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev',\n", 788 | " 'CodeRevHrs', 'UnitTests', 'PurchaseHow', 'PurchaseWhat',\n", 789 | " 'LanguageWorkedWith', 'LanguageDesireNextYear', 'DatabaseWorkedWith',\n", 790 | " 'DatabaseDesireNextYear', 'PlatformWorkedWith',\n", 791 | " 'PlatformDesireNextYear', 'WebFrameWorkedWith',\n", 792 | " 'WebFrameDesireNextYear', 'MiscTechWorkedWith',\n", 793 | " 'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',\n", 794 | " 'BlockchainOrg', 'BlockchainIs', 'BetterLife', 'ITperson', 'OffOn',\n", 795 | " 'SocialMedia', 'Extraversion', 'ScreenName', 'SOVisit1st',\n", 796 | " 'SOVisitFreq', 'SOVisitTo', 'SOFindAnswer', 'SOTimeSaved',\n", 797 | " 'SOHowMuchTime', 'SOAccount', 'SOPartFreq', 'SOJobs', 'EntTeams',\n", 798 | " 'SOComm', 'WelcomeChange', 'SONewContent', 'Age', 'Gender', 'Trans',\n", 799 | " 'Sexuality', 'Ethnicity', 'Dependents', 'SurveyLength', 'SurveyEase'],\n", 800 | " dtype='object')" 801 | ] 802 | }, 803 | "execution_count": 26, 804 | "metadata": {}, 805 | "output_type": "execute_result" 806 | } 807 | ], 808 | "source": [ 809 | "res_df.columns" 810 | ] 811 | }, 812 | { 813 | "cell_type": "markdown", 814 | "metadata": {}, 815 | "source": [ 816 | "All responses in the Hobbyist column." 817 | ] 818 | }, 819 | { 820 | "cell_type": "code", 821 | "execution_count": 27, 822 | "metadata": {}, 823 | "outputs": [ 824 | { 825 | "data": { 826 | "text/plain": [ 827 | "0 Yes\n", 828 | "1 No\n", 829 | "2 Yes\n", 830 | "3 No\n", 831 | "4 Yes\n", 832 | " ... \n", 833 | "88878 Yes\n", 834 | "88879 No\n", 835 | "88880 No\n", 836 | "88881 No\n", 837 | "88882 Yes\n", 838 | "Name: Hobbyist, Length: 88883, dtype: object" 839 | ] 840 | }, 841 | "execution_count": 27, 842 | "metadata": {}, 843 | "output_type": "execute_result" 844 | } 845 | ], 846 | "source": [ 847 | "res_df['Hobbyist']" 848 | ] 849 | }, 850 | { 851 | "cell_type": "markdown", 852 | "metadata": {}, 853 | "source": [ 854 | "Number of each response to question ('Yeses' and 'Nos')." 855 | ] 856 | }, 857 | { 858 | "cell_type": "code", 859 | "execution_count": 28, 860 | "metadata": {}, 861 | "outputs": [ 862 | { 863 | "data": { 864 | "text/plain": [ 865 | "Yes 71257\n", 866 | "No 17626\n", 867 | "Name: Hobbyist, dtype: int64" 868 | ] 869 | }, 870 | "execution_count": 28, 871 | "metadata": {}, 872 | "output_type": "execute_result" 873 | } 874 | ], 875 | "source": [ 876 | "res_df['Hobbyist'].value_counts()" 877 | ] 878 | }, 879 | { 880 | "cell_type": "markdown", 881 | "metadata": {}, 882 | "source": [ 883 | "All responses from first row." 884 | ] 885 | }, 886 | { 887 | "cell_type": "code", 888 | "execution_count": 29, 889 | "metadata": {}, 890 | "outputs": [ 891 | { 892 | "data": { 893 | "text/plain": [ 894 | "Respondent 1\n", 895 | "MainBranch I am a student who is learning to code\n", 896 | "Hobbyist Yes\n", 897 | "OpenSourcer Never\n", 898 | "OpenSource The quality of OSS and closed source software ...\n", 899 | "Employment Not employed, and not looking for work\n", 900 | "Country United Kingdom\n", 901 | "Student No\n", 902 | "EdLevel Primary/elementary school\n", 903 | "UndergradMajor NaN\n", 904 | "EduOther Taught yourself a new language, framework, or ...\n", 905 | "OrgSize NaN\n", 906 | "DevType NaN\n", 907 | "YearsCode 4\n", 908 | "Age1stCode 10\n", 909 | "YearsCodePro NaN\n", 910 | "CareerSat NaN\n", 911 | "JobSat NaN\n", 912 | "MgrIdiot NaN\n", 913 | "MgrMoney NaN\n", 914 | "MgrWant NaN\n", 915 | "JobSeek NaN\n", 916 | "LastHireDate NaN\n", 917 | "LastInt NaN\n", 918 | "FizzBuzz NaN\n", 919 | "JobFactors NaN\n", 920 | "ResumeUpdate NaN\n", 921 | "CurrencySymbol NaN\n", 922 | "CurrencyDesc NaN\n", 923 | "CompTotal NaN\n", 924 | "CompFreq NaN\n", 925 | "ConvertedComp NaN\n", 926 | "WorkWeekHrs NaN\n", 927 | "WorkPlan NaN\n", 928 | "WorkChallenge NaN\n", 929 | "WorkRemote NaN\n", 930 | "WorkLoc NaN\n", 931 | "ImpSyn NaN\n", 932 | "CodeRev NaN\n", 933 | "CodeRevHrs NaN\n", 934 | "UnitTests NaN\n", 935 | "PurchaseHow NaN\n", 936 | "PurchaseWhat NaN\n", 937 | "LanguageWorkedWith HTML/CSS;Java;JavaScript;Python\n", 938 | "LanguageDesireNextYear C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL\n", 939 | "DatabaseWorkedWith SQLite\n", 940 | "DatabaseDesireNextYear MySQL\n", 941 | "PlatformWorkedWith MacOS;Windows\n", 942 | "PlatformDesireNextYear Android;Arduino;Windows\n", 943 | "WebFrameWorkedWith Django;Flask\n", 944 | "WebFrameDesireNextYear Flask;jQuery\n", 945 | "MiscTechWorkedWith Node.js\n", 946 | "MiscTechDesireNextYear Node.js\n", 947 | "DevEnviron IntelliJ;Notepad++;PyCharm\n", 948 | "OpSys Windows\n", 949 | "Containers I do not use containers\n", 950 | "BlockchainOrg NaN\n", 951 | "BlockchainIs NaN\n", 952 | "BetterLife Yes\n", 953 | "ITperson Fortunately, someone else has that title\n", 954 | "OffOn Yes\n", 955 | "SocialMedia Twitter\n", 956 | "Extraversion Online\n", 957 | "ScreenName Username\n", 958 | "SOVisit1st 2017\n", 959 | "SOVisitFreq A few times per month or weekly\n", 960 | "SOVisitTo Find answers to specific questions;Learn how t...\n", 961 | "SOFindAnswer 3-5 times per week\n", 962 | "SOTimeSaved Stack Overflow was much faster\n", 963 | "SOHowMuchTime 31-60 minutes\n", 964 | "SOAccount No\n", 965 | "SOPartFreq NaN\n", 966 | "SOJobs No, I didn't know that Stack Overflow had a jo...\n", 967 | "EntTeams No, and I don't know what those are\n", 968 | "SOComm Neutral\n", 969 | "WelcomeChange Just as welcome now as I felt last year\n", 970 | "SONewContent Tech articles written by other developers;Indu...\n", 971 | "Age 14\n", 972 | "Gender Man\n", 973 | "Trans No\n", 974 | "Sexuality Straight / Heterosexual\n", 975 | "Ethnicity NaN\n", 976 | "Dependents No\n", 977 | "SurveyLength Appropriate in length\n", 978 | "SurveyEase Neither easy nor difficult\n", 979 | "Name: 0, dtype: object" 980 | ] 981 | }, 982 | "execution_count": 29, 983 | "metadata": {}, 984 | "output_type": "execute_result" 985 | } 986 | ], 987 | "source": [ 988 | "res_df.loc[0]" 989 | ] 990 | }, 991 | { 992 | "cell_type": "markdown", 993 | "metadata": {}, 994 | "source": [ 995 | "Get responses to Hobbyist question in first three rows by passing in a list of rows." 996 | ] 997 | }, 998 | { 999 | "cell_type": "code", 1000 | "execution_count": 30, 1001 | "metadata": {}, 1002 | "outputs": [ 1003 | { 1004 | "data": { 1005 | "text/plain": [ 1006 | "0 Yes\n", 1007 | "1 No\n", 1008 | "2 Yes\n", 1009 | "Name: Hobbyist, dtype: object" 1010 | ] 1011 | }, 1012 | "execution_count": 30, 1013 | "metadata": {}, 1014 | "output_type": "execute_result" 1015 | } 1016 | ], 1017 | "source": [ 1018 | "res_df.loc[[0, 1, 2], 'Hobbyist']" 1019 | ] 1020 | }, 1021 | { 1022 | "cell_type": "markdown", 1023 | "metadata": {}, 1024 | "source": [ 1025 | "Can also use slice notation to retrieve a range of rows." 1026 | ] 1027 | }, 1028 | { 1029 | "cell_type": "code", 1030 | "execution_count": 34, 1031 | "metadata": {}, 1032 | "outputs": [ 1033 | { 1034 | "data": { 1035 | "text/html": [ 1036 | "
\n", 1037 | "\n", 1050 | "\n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | " \n", 1197 | " \n", 1198 | " \n", 1199 | " \n", 1200 | " \n", 1201 | " \n", 1202 | " \n", 1203 | " \n", 1204 | " \n", 1205 | " \n", 1206 | " \n", 1207 | " \n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | " \n", 1241 | " \n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | " \n", 1347 | " \n", 1348 | " \n", 1349 | " \n", 1350 | " \n", 1351 | " \n", 1352 | " \n", 1353 | " \n", 1354 | " \n", 1355 | " \n", 1356 | " \n", 1357 | " \n", 1358 | " \n", 1359 | " \n", 1360 | " \n", 1361 | " \n", 1362 | " \n", 1363 | " \n", 1364 | " \n", 1365 | " \n", 1366 | " \n", 1367 | " \n", 1368 | " \n", 1369 | " \n", 1370 | " \n", 1371 | " \n", 1372 | " \n", 1373 | " \n", 1374 | " \n", 1375 | " \n", 1376 | " \n", 1377 | " \n", 1378 | " \n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | "
RespondentMainBranchHobbyistOpenSourcerOpenSourceEmploymentCountryStudentEdLevelUndergradMajorEduOtherOrgSizeDevTypeYearsCodeAge1stCodeYearsCodeProCareerSatJobSatMgrIdiotMgrMoneyMgrWantJobSeekLastHireDateLastIntFizzBuzzJobFactorsResumeUpdateCurrencySymbolCurrencyDescCompTotalCompFreqConvertedCompWorkWeekHrsWorkPlanWorkChallengeWorkRemoteWorkLocImpSynCodeRevCodeRevHrsUnitTestsPurchaseHowPurchaseWhatLanguageWorkedWithLanguageDesireNextYearDatabaseWorkedWithDatabaseDesireNextYearPlatformWorkedWithPlatformDesireNextYearWebFrameWorkedWithWebFrameDesireNextYearMiscTechWorkedWithMiscTechDesireNextYearDevEnvironOpSysContainersBlockchainOrgBlockchainIsBetterLifeITpersonOffOnSocialMediaExtraversionScreenNameSOVisit1stSOVisitFreqSOVisitToSOFindAnswerSOTimeSavedSOHowMuchTimeSOAccountSOPartFreqSOJobsEntTeamsSOCommWelcomeChangeSONewContentAgeGenderTransSexualityEthnicityDependentsSurveyLengthSurveyEase
01I am a student who is learning to codeYesNeverThe quality of OSS and closed source software ...Not employed, and not looking for workUnited KingdomNoPrimary/elementary schoolNaNTaught yourself a new language, framework, or ...NaNNaN410NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNHTML/CSS;Java;JavaScript;PythonC;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQLSQLiteMySQLMacOS;WindowsAndroid;Arduino;WindowsDjango;FlaskFlask;jQueryNode.jsNode.jsIntelliJ;Notepad++;PyCharmWindowsI do not use containersNaNNaNYesFortunately, someone else has that titleYesTwitterOnlineUsername2017A few times per month or weeklyFind answers to specific questions;Learn how t...3-5 times per weekStack Overflow was much faster31-60 minutesNoNaNNo, I didn't know that Stack Overflow had a jo...No, and I don't know what those areNeutralJust as welcome now as I felt last yearTech articles written by other developers;Indu...14.0ManNoStraight / HeterosexualNaNNoAppropriate in lengthNeither easy nor difficult
12I am a student who is learning to codeNoLess than once per yearThe quality of OSS and closed source software ...Not employed, but looking for workBosnia and HerzegovinaYes, full-timeSecondary school (e.g. American high school, G...NaNTaken an online course in programming or softw...NaNDeveloper, desktop or enterprise applications;...NaN17NaNNaNNaNNaNNaNNaNI am actively looking for a jobI've never had a jobNaNNaNFinancial performance or funding status of the...Something else changed (education, award, medi...NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNC++;HTML/CSS;PythonC++;HTML/CSS;JavaScript;SQLNaNMySQLWindowsWindowsDjangoDjangoNaNNaNAtom;PyCharmWindowsI do not use containersNaNUseful across many domains and could change ma...YesYesYesInstagramOnlineUsername2017Daily or almost dailyFind answers to specific questions;Learn how t...3-5 times per weekStack Overflow was much faster11-30 minutesYesA few times per month or weeklyNo, I knew that Stack Overflow had a job board...No, and I don't know what those areYes, somewhatJust as welcome now as I felt last yearTech articles written by other developers;Indu...19.0ManNoStraight / HeterosexualNaNNoAppropriate in lengthNeither easy nor difficult
23I am not primarily a developer, but I write co...YesNeverThe quality of OSS and closed source software ...Employed full-timeThailandNoBachelor’s degree (BA, BS, B.Eng., etc.)Web development or web designTaught yourself a new language, framework, or ...100 to 499 employeesDesigner;Developer, back-end;Developer, front-...3221Slightly satisfiedSlightly satisfiedNot at all confidentNot sureNot sureI’m not actively looking, but I am open to new...1-2 years agoInterview with people in peer rolesNoLanguages, frameworks, and other technologies ...I was preparing for a job searchTHBThai baht23000.0Monthly8820.040.0There's no schedule or spec; I work on what se...Distracting work environment;Inadequate access...Less than once per month / NeverHomeAverageNoNaNNo, but I think we shouldNot sureI have little or no influenceHTML/CSSElixir;HTML/CSSPostgreSQLPostgreSQLNaNNaNNaNOther(s):NaNNaNVim;Visual Studio CodeLinux-basedI do not use containersNaNNaNYesYesYesRedditIn real life (in person)Username2011A few times per weekFind answers to specific questions;Learn how t...6-10 times per weekThey were about the sameNaNYesLess than once per month or monthlyYesNo, I've heard of them, but I am not part of a...NeutralJust as welcome now as I felt last yearTech meetups or events in your area;Courses on...28.0ManNoStraight / HeterosexualNaNYesAppropriate in lengthNeither easy nor difficult
\n", 1408 | "
" 1409 | ], 1410 | "text/plain": [ 1411 | " Respondent MainBranch Hobbyist \\\n", 1412 | "0 1 I am a student who is learning to code Yes \n", 1413 | "1 2 I am a student who is learning to code No \n", 1414 | "2 3 I am not primarily a developer, but I write co... Yes \n", 1415 | "\n", 1416 | " OpenSourcer OpenSource \\\n", 1417 | "0 Never The quality of OSS and closed source software ... \n", 1418 | "1 Less than once per year The quality of OSS and closed source software ... \n", 1419 | "2 Never The quality of OSS and closed source software ... \n", 1420 | "\n", 1421 | " Employment Country \\\n", 1422 | "0 Not employed, and not looking for work United Kingdom \n", 1423 | "1 Not employed, but looking for work Bosnia and Herzegovina \n", 1424 | "2 Employed full-time Thailand \n", 1425 | "\n", 1426 | " Student EdLevel \\\n", 1427 | "0 No Primary/elementary school \n", 1428 | "1 Yes, full-time Secondary school (e.g. American high school, G... \n", 1429 | "2 No Bachelor’s degree (BA, BS, B.Eng., etc.) \n", 1430 | "\n", 1431 | " UndergradMajor \\\n", 1432 | "0 NaN \n", 1433 | "1 NaN \n", 1434 | "2 Web development or web design \n", 1435 | "\n", 1436 | " EduOther OrgSize \\\n", 1437 | "0 Taught yourself a new language, framework, or ... NaN \n", 1438 | "1 Taken an online course in programming or softw... NaN \n", 1439 | "2 Taught yourself a new language, framework, or ... 100 to 499 employees \n", 1440 | "\n", 1441 | " DevType YearsCode Age1stCode \\\n", 1442 | "0 NaN 4 10 \n", 1443 | "1 Developer, desktop or enterprise applications;... NaN 17 \n", 1444 | "2 Designer;Developer, back-end;Developer, front-... 3 22 \n", 1445 | "\n", 1446 | " YearsCodePro CareerSat JobSat MgrIdiot \\\n", 1447 | "0 NaN NaN NaN NaN \n", 1448 | "1 NaN NaN NaN NaN \n", 1449 | "2 1 Slightly satisfied Slightly satisfied Not at all confident \n", 1450 | "\n", 1451 | " MgrMoney MgrWant JobSeek \\\n", 1452 | "0 NaN NaN NaN \n", 1453 | "1 NaN NaN I am actively looking for a job \n", 1454 | "2 Not sure Not sure I’m not actively looking, but I am open to new... \n", 1455 | "\n", 1456 | " LastHireDate LastInt FizzBuzz \\\n", 1457 | "0 NaN NaN NaN \n", 1458 | "1 I've never had a job NaN NaN \n", 1459 | "2 1-2 years ago Interview with people in peer roles No \n", 1460 | "\n", 1461 | " JobFactors \\\n", 1462 | "0 NaN \n", 1463 | "1 Financial performance or funding status of the... \n", 1464 | "2 Languages, frameworks, and other technologies ... \n", 1465 | "\n", 1466 | " ResumeUpdate CurrencySymbol \\\n", 1467 | "0 NaN NaN \n", 1468 | "1 Something else changed (education, award, medi... NaN \n", 1469 | "2 I was preparing for a job search THB \n", 1470 | "\n", 1471 | " CurrencyDesc CompTotal CompFreq ConvertedComp WorkWeekHrs \\\n", 1472 | "0 NaN NaN NaN NaN NaN \n", 1473 | "1 NaN NaN NaN NaN NaN \n", 1474 | "2 Thai baht 23000.0 Monthly 8820.0 40.0 \n", 1475 | "\n", 1476 | " WorkPlan \\\n", 1477 | "0 NaN \n", 1478 | "1 NaN \n", 1479 | "2 There's no schedule or spec; I work on what se... \n", 1480 | "\n", 1481 | " WorkChallenge \\\n", 1482 | "0 NaN \n", 1483 | "1 NaN \n", 1484 | "2 Distracting work environment;Inadequate access... \n", 1485 | "\n", 1486 | " WorkRemote WorkLoc ImpSyn CodeRev CodeRevHrs \\\n", 1487 | "0 NaN NaN NaN NaN NaN \n", 1488 | "1 NaN NaN NaN NaN NaN \n", 1489 | "2 Less than once per month / Never Home Average No NaN \n", 1490 | "\n", 1491 | " UnitTests PurchaseHow PurchaseWhat \\\n", 1492 | "0 NaN NaN NaN \n", 1493 | "1 NaN NaN NaN \n", 1494 | "2 No, but I think we should Not sure I have little or no influence \n", 1495 | "\n", 1496 | " LanguageWorkedWith \\\n", 1497 | "0 HTML/CSS;Java;JavaScript;Python \n", 1498 | "1 C++;HTML/CSS;Python \n", 1499 | "2 HTML/CSS \n", 1500 | "\n", 1501 | " LanguageDesireNextYear DatabaseWorkedWith \\\n", 1502 | "0 C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL SQLite \n", 1503 | "1 C++;HTML/CSS;JavaScript;SQL NaN \n", 1504 | "2 Elixir;HTML/CSS PostgreSQL \n", 1505 | "\n", 1506 | " DatabaseDesireNextYear PlatformWorkedWith PlatformDesireNextYear \\\n", 1507 | "0 MySQL MacOS;Windows Android;Arduino;Windows \n", 1508 | "1 MySQL Windows Windows \n", 1509 | "2 PostgreSQL NaN NaN \n", 1510 | "\n", 1511 | " WebFrameWorkedWith WebFrameDesireNextYear MiscTechWorkedWith \\\n", 1512 | "0 Django;Flask Flask;jQuery Node.js \n", 1513 | "1 Django Django NaN \n", 1514 | "2 NaN Other(s): NaN \n", 1515 | "\n", 1516 | " MiscTechDesireNextYear DevEnviron OpSys \\\n", 1517 | "0 Node.js IntelliJ;Notepad++;PyCharm Windows \n", 1518 | "1 NaN Atom;PyCharm Windows \n", 1519 | "2 NaN Vim;Visual Studio Code Linux-based \n", 1520 | "\n", 1521 | " Containers BlockchainOrg \\\n", 1522 | "0 I do not use containers NaN \n", 1523 | "1 I do not use containers NaN \n", 1524 | "2 I do not use containers NaN \n", 1525 | "\n", 1526 | " BlockchainIs BetterLife \\\n", 1527 | "0 NaN Yes \n", 1528 | "1 Useful across many domains and could change ma... Yes \n", 1529 | "2 NaN Yes \n", 1530 | "\n", 1531 | " ITperson OffOn SocialMedia \\\n", 1532 | "0 Fortunately, someone else has that title Yes Twitter \n", 1533 | "1 Yes Yes Instagram \n", 1534 | "2 Yes Yes Reddit \n", 1535 | "\n", 1536 | " Extraversion ScreenName SOVisit1st \\\n", 1537 | "0 Online Username 2017 \n", 1538 | "1 Online Username 2017 \n", 1539 | "2 In real life (in person) Username 2011 \n", 1540 | "\n", 1541 | " SOVisitFreq \\\n", 1542 | "0 A few times per month or weekly \n", 1543 | "1 Daily or almost daily \n", 1544 | "2 A few times per week \n", 1545 | "\n", 1546 | " SOVisitTo SOFindAnswer \\\n", 1547 | "0 Find answers to specific questions;Learn how t... 3-5 times per week \n", 1548 | "1 Find answers to specific questions;Learn how t... 3-5 times per week \n", 1549 | "2 Find answers to specific questions;Learn how t... 6-10 times per week \n", 1550 | "\n", 1551 | " SOTimeSaved SOHowMuchTime SOAccount \\\n", 1552 | "0 Stack Overflow was much faster 31-60 minutes No \n", 1553 | "1 Stack Overflow was much faster 11-30 minutes Yes \n", 1554 | "2 They were about the same NaN Yes \n", 1555 | "\n", 1556 | " SOPartFreq \\\n", 1557 | "0 NaN \n", 1558 | "1 A few times per month or weekly \n", 1559 | "2 Less than once per month or monthly \n", 1560 | "\n", 1561 | " SOJobs \\\n", 1562 | "0 No, I didn't know that Stack Overflow had a jo... \n", 1563 | "1 No, I knew that Stack Overflow had a job board... \n", 1564 | "2 Yes \n", 1565 | "\n", 1566 | " EntTeams SOComm \\\n", 1567 | "0 No, and I don't know what those are Neutral \n", 1568 | "1 No, and I don't know what those are Yes, somewhat \n", 1569 | "2 No, I've heard of them, but I am not part of a... Neutral \n", 1570 | "\n", 1571 | " WelcomeChange \\\n", 1572 | "0 Just as welcome now as I felt last year \n", 1573 | "1 Just as welcome now as I felt last year \n", 1574 | "2 Just as welcome now as I felt last year \n", 1575 | "\n", 1576 | " SONewContent Age Gender Trans \\\n", 1577 | "0 Tech articles written by other developers;Indu... 14.0 Man No \n", 1578 | "1 Tech articles written by other developers;Indu... 19.0 Man No \n", 1579 | "2 Tech meetups or events in your area;Courses on... 28.0 Man No \n", 1580 | "\n", 1581 | " Sexuality Ethnicity Dependents SurveyLength \\\n", 1582 | "0 Straight / Heterosexual NaN No Appropriate in length \n", 1583 | "1 Straight / Heterosexual NaN No Appropriate in length \n", 1584 | "2 Straight / Heterosexual NaN Yes Appropriate in length \n", 1585 | "\n", 1586 | " SurveyEase \n", 1587 | "0 Neither easy nor difficult \n", 1588 | "1 Neither easy nor difficult \n", 1589 | "2 Neither easy nor difficult " 1590 | ] 1591 | }, 1592 | "execution_count": 34, 1593 | "metadata": {}, 1594 | "output_type": "execute_result" 1595 | } 1596 | ], 1597 | "source": [ 1598 | "res_df.loc[0:2]" 1599 | ] 1600 | }, 1601 | { 1602 | "cell_type": "markdown", 1603 | "metadata": {}, 1604 | "source": [ 1605 | "How the first row responded to the Hobbyist question." 1606 | ] 1607 | }, 1608 | { 1609 | "cell_type": "code", 1610 | "execution_count": 38, 1611 | "metadata": {}, 1612 | "outputs": [ 1613 | { 1614 | "data": { 1615 | "text/plain": [ 1616 | "'Yes'" 1617 | ] 1618 | }, 1619 | "execution_count": 38, 1620 | "metadata": {}, 1621 | "output_type": "execute_result" 1622 | } 1623 | ], 1624 | "source": [ 1625 | "res_df.loc[0, 'Hobbyist']" 1626 | ] 1627 | }, 1628 | { 1629 | "cell_type": "markdown", 1630 | "metadata": {}, 1631 | "source": [ 1632 | "Note we can drop the brackets when selecting rows _and_ a column together." 1633 | ] 1634 | }, 1635 | { 1636 | "cell_type": "markdown", 1637 | "metadata": {}, 1638 | "source": [ 1639 | "Get the responses of the first three rows to the Hobbyist question by passing in a slice of rows and the column label." 1640 | ] 1641 | }, 1642 | { 1643 | "cell_type": "code", 1644 | "execution_count": 37, 1645 | "metadata": {}, 1646 | "outputs": [ 1647 | { 1648 | "data": { 1649 | "text/plain": [ 1650 | "0 Yes\n", 1651 | "1 No\n", 1652 | "2 Yes\n", 1653 | "Name: Hobbyist, dtype: object" 1654 | ] 1655 | }, 1656 | "execution_count": 37, 1657 | "metadata": {}, 1658 | "output_type": "execute_result" 1659 | } 1660 | ], 1661 | "source": [ 1662 | "res_df.loc[0:2, 'Hobbyist']" 1663 | ] 1664 | }, 1665 | { 1666 | "cell_type": "markdown", 1667 | "metadata": {}, 1668 | "source": [ 1669 | "When selecting a slice of rows and columns, drop the brackets to avoid a syntax error." 1670 | ] 1671 | }, 1672 | { 1673 | "cell_type": "markdown", 1674 | "metadata": {}, 1675 | "source": [ 1676 | "Retrieve a slice of rows together with a slice of columns." 1677 | ] 1678 | }, 1679 | { 1680 | "cell_type": "code", 1681 | "execution_count": 39, 1682 | "metadata": {}, 1683 | "outputs": [ 1684 | { 1685 | "data": { 1686 | "text/html": [ 1687 | "
\n", 1688 | "\n", 1701 | "\n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | "
HobbyistOpenSourcerOpenSourceEmployment
0YesNeverThe quality of OSS and closed source software ...Not employed, and not looking for work
1NoLess than once per yearThe quality of OSS and closed source software ...Not employed, but looking for work
2YesNeverThe quality of OSS and closed source software ...Employed full-time
\n", 1735 | "
" 1736 | ], 1737 | "text/plain": [ 1738 | " Hobbyist OpenSourcer \\\n", 1739 | "0 Yes Never \n", 1740 | "1 No Less than once per year \n", 1741 | "2 Yes Never \n", 1742 | "\n", 1743 | " OpenSource \\\n", 1744 | "0 The quality of OSS and closed source software ... \n", 1745 | "1 The quality of OSS and closed source software ... \n", 1746 | "2 The quality of OSS and closed source software ... \n", 1747 | "\n", 1748 | " Employment \n", 1749 | "0 Not employed, and not looking for work \n", 1750 | "1 Not employed, but looking for work \n", 1751 | "2 Employed full-time " 1752 | ] 1753 | }, 1754 | "execution_count": 39, 1755 | "metadata": {}, 1756 | "output_type": "execute_result" 1757 | } 1758 | ], 1759 | "source": [ 1760 | "res_df.loc[0:2, 'Hobbyist':'Employment']" 1761 | ] 1762 | }, 1763 | { 1764 | "cell_type": "markdown", 1765 | "metadata": {}, 1766 | "source": [ 1767 | "Note slicing is inclusive to avoid driving users insane." 1768 | ] 1769 | }, 1770 | { 1771 | "cell_type": "code", 1772 | "execution_count": null, 1773 | "metadata": {}, 1774 | "outputs": [], 1775 | "source": [] 1776 | } 1777 | ], 1778 | "metadata": { 1779 | "kernelspec": { 1780 | "display_name": "Python 3", 1781 | "language": "python", 1782 | "name": "python3" 1783 | }, 1784 | "language_info": { 1785 | "codemirror_mode": { 1786 | "name": "ipython", 1787 | "version": 3 1788 | }, 1789 | "file_extension": ".py", 1790 | "mimetype": "text/x-python", 1791 | "name": "python", 1792 | "nbconvert_exporter": "python", 1793 | "pygments_lexer": "ipython3", 1794 | "version": "3.6.9" 1795 | } 1796 | }, 1797 | "nbformat": 4, 1798 | "nbformat_minor": 2 1799 | } 1800 | -------------------------------------------------------------------------------- /03-Indexes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Indexes: How to Set, Reset and Use Indexes" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import pandas as pd" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "people = {\n", 26 | " \"first\": [\"Phil\", \"Jane\", \"Rob\"],\n", 27 | " \"last\": [\"Lembo\", \"Doe\", \"Roe\"],\n", 28 | " \"email\": [\"phil.lembo@gmail.com\", \"janedoe@email.com\", \"robroe@email.com\"]\n", 29 | "}" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "df = pd.DataFrame(people)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/html": [ 49 | "
\n", 50 | "\n", 63 | "\n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
firstlastemail
0PhilLembophil.lembo@gmail.com
1JaneDoejanedoe@email.com
2RobRoerobroe@email.com
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " first last email\n", 97 | "0 Phil Lembo phil.lembo@gmail.com\n", 98 | "1 Jane Doe janedoe@email.com\n", 99 | "2 Rob Roe robroe@email.com" 100 | ] 101 | }, 102 | "execution_count": 4, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "df" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 5, 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "data": { 118 | "text/plain": [ 119 | "0 phil.lembo@gmail.com\n", 120 | "1 janedoe@email.com\n", 121 | "2 robroe@email.com\n", 122 | "Name: email, dtype: object" 123 | ] 124 | }, 125 | "execution_count": 5, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "df['email']" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 6, 137 | "metadata": {}, 138 | "outputs": [ 139 | { 140 | "data": { 141 | "text/html": [ 142 | "
\n", 143 | "\n", 156 | "\n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | "
firstlast
email
phil.lembo@gmail.comPhilLembo
janedoe@email.comJaneDoe
robroe@email.comRobRoe
\n", 187 | "
" 188 | ], 189 | "text/plain": [ 190 | " first last\n", 191 | "email \n", 192 | "phil.lembo@gmail.com Phil Lembo\n", 193 | "janedoe@email.com Jane Doe\n", 194 | "robroe@email.com Rob Roe" 195 | ] 196 | }, 197 | "execution_count": 6, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "df.set_index('email')" 204 | ] 205 | }, 206 | { 207 | "cell_type": "markdown", 208 | "metadata": {}, 209 | "source": [ 210 | "By default, pandas won't change the original DataFrame." 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 7, 216 | "metadata": {}, 217 | "outputs": [ 218 | { 219 | "data": { 220 | "text/html": [ 221 | "
\n", 222 | "\n", 235 | "\n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | "
firstlastemail
0PhilLembophil.lembo@gmail.com
1JaneDoejanedoe@email.com
2RobRoerobroe@email.com
\n", 265 | "
" 266 | ], 267 | "text/plain": [ 268 | " first last email\n", 269 | "0 Phil Lembo phil.lembo@gmail.com\n", 270 | "1 Jane Doe janedoe@email.com\n", 271 | "2 Rob Roe robroe@email.com" 272 | ] 273 | }, 274 | "execution_count": 7, 275 | "metadata": {}, 276 | "output_type": "execute_result" 277 | } 278 | ], 279 | "source": [ 280 | "df" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "To change the index in place, you need to use the \"inplace=True\" flag." 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 8, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "df.set_index('email', inplace=True)" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 9, 302 | "metadata": {}, 303 | "outputs": [ 304 | { 305 | "data": { 306 | "text/html": [ 307 | "
\n", 308 | "\n", 321 | "\n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | "
firstlast
email
phil.lembo@gmail.comPhilLembo
janedoe@email.comJaneDoe
robroe@email.comRobRoe
\n", 352 | "
" 353 | ], 354 | "text/plain": [ 355 | " first last\n", 356 | "email \n", 357 | "phil.lembo@gmail.com Phil Lembo\n", 358 | "janedoe@email.com Jane Doe\n", 359 | "robroe@email.com Rob Roe" 360 | ] 361 | }, 362 | "execution_count": 9, 363 | "metadata": {}, 364 | "output_type": "execute_result" 365 | } 366 | ], 367 | "source": [ 368 | "df" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 10, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "data": { 378 | "text/plain": [ 379 | "Index(['phil.lembo@gmail.com', 'janedoe@email.com', 'robroe@email.com'], dtype='object', name='email')" 380 | ] 381 | }, 382 | "execution_count": 10, 383 | "metadata": {}, 384 | "output_type": "execute_result" 385 | } 386 | ], 387 | "source": [ 388 | "df.index" 389 | ] 390 | }, 391 | { 392 | "cell_type": "code", 393 | "execution_count": 11, 394 | "metadata": {}, 395 | "outputs": [ 396 | { 397 | "data": { 398 | "text/plain": [ 399 | "first Phil\n", 400 | "last Lembo\n", 401 | "Name: phil.lembo@gmail.com, dtype: object" 402 | ] 403 | }, 404 | "execution_count": 11, 405 | "metadata": {}, 406 | "output_type": "execute_result" 407 | } 408 | ], 409 | "source": [ 410 | "df.loc['phil.lembo@gmail.com']" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 12, 416 | "metadata": {}, 417 | "outputs": [ 418 | { 419 | "data": { 420 | "text/plain": [ 421 | "'Lembo'" 422 | ] 423 | }, 424 | "execution_count": 12, 425 | "metadata": {}, 426 | "output_type": "execute_result" 427 | } 428 | ], 429 | "source": [ 430 | "df.loc['phil.lembo@gmail.com', 'last']" 431 | ] 432 | }, 433 | { 434 | "cell_type": "markdown", 435 | "metadata": {}, 436 | "source": [ 437 | "Note, we no longer have those integers as our index." 438 | ] 439 | }, 440 | { 441 | "cell_type": "code", 442 | "execution_count": 13, 443 | "metadata": {}, 444 | "outputs": [ 445 | { 446 | "ename": "TypeError", 447 | "evalue": "cannot do label indexing on with these indexers [0] of ", 448 | "output_type": "error", 449 | "traceback": [ 450 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 451 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 452 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 453 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1422\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1426\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 454 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1847\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1848\u001b[0m \u001b[0;31m# fall thru to straight lookup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1849\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1850\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_label\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1851\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 455 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_validate_key\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1724\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_list_like_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1725\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_scalar_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1726\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1727\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 456 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_convert_scalar_indexer\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[0max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[0;31m# a scalar\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 274\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_scalar_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 275\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_convert_slice_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 457 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_convert_scalar_indexer\u001b[0;34m(self, key, kind)\u001b[0m\n\u001b[1;32m 3136\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"loc\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3137\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3138\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_invalid_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"label\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3140\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 458 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_invalid_indexer\u001b[0;34m(self, form, key)\u001b[0m\n\u001b[1;32m 3338\u001b[0m \u001b[0;34m\"cannot do {form} indexing on {klass} with these \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3339\u001b[0m \"indexers [{key}] of {kind}\".format(\n\u001b[0;32m-> 3340\u001b[0;31m \u001b[0mform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mform\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mklass\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3341\u001b[0m )\n\u001b[1;32m 3342\u001b[0m )\n", 459 | "\u001b[0;31mTypeError\u001b[0m: cannot do label indexing on with these indexers [0] of " 460 | ] 461 | } 462 | ], 463 | "source": [ 464 | "df.loc[0]" 465 | ] 466 | }, 467 | { 468 | "cell_type": "markdown", 469 | "metadata": {}, 470 | "source": [ 471 | "Instead, we now need to employ iloc to use integers." 472 | ] 473 | }, 474 | { 475 | "cell_type": "code", 476 | "execution_count": null, 477 | "metadata": {}, 478 | "outputs": [], 479 | "source": [ 480 | "df.iloc[0]" 481 | ] 482 | }, 483 | { 484 | "cell_type": "markdown", 485 | "metadata": {}, 486 | "source": [ 487 | "To reset, use the reset_index method." 488 | ] 489 | }, 490 | { 491 | "cell_type": "code", 492 | "execution_count": null, 493 | "metadata": {}, 494 | "outputs": [], 495 | "source": [ 496 | "df.reset_index(inplace=True)" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": null, 502 | "metadata": {}, 503 | "outputs": [], 504 | "source": [ 505 | "df" 506 | ] 507 | }, 508 | { 509 | "cell_type": "markdown", 510 | "metadata": {}, 511 | "source": [ 512 | "Now turn to survey data." 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": {}, 519 | "outputs": [], 520 | "source": [ 521 | "res_df = pd.read_csv('data/survey_results_public.csv')\n", 522 | "schema_df = pd.read_csv('data/survey_results_schema.csv')" 523 | ] 524 | }, 525 | { 526 | "cell_type": "code", 527 | "execution_count": null, 528 | "metadata": {}, 529 | "outputs": [], 530 | "source": [ 531 | "pd.set_option('display.max_columns', 85)\n", 532 | "pd.set_option('display.max_rows', 85)" 533 | ] 534 | }, 535 | { 536 | "cell_type": "code", 537 | "execution_count": null, 538 | "metadata": {}, 539 | "outputs": [], 540 | "source": [ 541 | "df" 542 | ] 543 | }, 544 | { 545 | "cell_type": "markdown", 546 | "metadata": {}, 547 | "source": [ 548 | "Set index when loading data." 549 | ] 550 | }, 551 | { 552 | "cell_type": "code", 553 | "execution_count": null, 554 | "metadata": {}, 555 | "outputs": [], 556 | "source": [ 557 | "res_df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')" 558 | ] 559 | }, 560 | { 561 | "cell_type": "code", 562 | "execution_count": null, 563 | "metadata": {}, 564 | "outputs": [], 565 | "source": [ 566 | "res_df" 567 | ] 568 | }, 569 | { 570 | "cell_type": "markdown", 571 | "metadata": {}, 572 | "source": [ 573 | "To retrieve respondent number 1." 574 | ] 575 | }, 576 | { 577 | "cell_type": "code", 578 | "execution_count": null, 579 | "metadata": {}, 580 | "outputs": [], 581 | "source": [ 582 | "res_df.loc[1]" 583 | ] 584 | }, 585 | { 586 | "cell_type": "code", 587 | "execution_count": null, 588 | "metadata": {}, 589 | "outputs": [], 590 | "source": [ 591 | "schema_df" 592 | ] 593 | }, 594 | { 595 | "cell_type": "markdown", 596 | "metadata": {}, 597 | "source": [ 598 | "What if I want to be able to call up a schema definition without having to scroll through frame? Set \"Column\" as the index!" 599 | ] 600 | }, 601 | { 602 | "cell_type": "code", 603 | "execution_count": null, 604 | "metadata": {}, 605 | "outputs": [], 606 | "source": [ 607 | "schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": null, 613 | "metadata": {}, 614 | "outputs": [], 615 | "source": [ 616 | "schema_df" 617 | ] 618 | }, 619 | { 620 | "cell_type": "code", 621 | "execution_count": null, 622 | "metadata": {}, 623 | "outputs": [], 624 | "source": [ 625 | "schema_df.loc['Hobbyist']" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": null, 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [ 634 | "schema_df.loc['MgrIdiot']" 635 | ] 636 | }, 637 | { 638 | "cell_type": "markdown", 639 | "metadata": {}, 640 | "source": [ 641 | "By default, pandas truncates its response. This can be changed, but you can also retrieve the full text by specifying both the index _and_ column names (in this case \"QuestionText\")." 642 | ] 643 | }, 644 | { 645 | "cell_type": "code", 646 | "execution_count": null, 647 | "metadata": {}, 648 | "outputs": [], 649 | "source": [ 650 | "schema_df.loc['MgrIdiot', 'QuestionText']" 651 | ] 652 | }, 653 | { 654 | "cell_type": "markdown", 655 | "metadata": {}, 656 | "source": [ 657 | "We can sort to make life easier!" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": null, 663 | "metadata": {}, 664 | "outputs": [], 665 | "source": [ 666 | "schema_df.sort_index()" 667 | ] 668 | }, 669 | { 670 | "cell_type": "markdown", 671 | "metadata": {}, 672 | "source": [ 673 | "To reverse order, use \"ascending\" flag." 674 | ] 675 | }, 676 | { 677 | "cell_type": "code", 678 | "execution_count": null, 679 | "metadata": {}, 680 | "outputs": [], 681 | "source": [ 682 | "schema_df.sort_index(ascending=False)" 683 | ] 684 | }, 685 | { 686 | "cell_type": "markdown", 687 | "metadata": {}, 688 | "source": [ 689 | "To make change persistent, use \"inplace=True\" flag." 690 | ] 691 | }, 692 | { 693 | "cell_type": "code", 694 | "execution_count": null, 695 | "metadata": {}, 696 | "outputs": [], 697 | "source": [ 698 | "schema_df.sort_index(inplace=True)\n", 699 | "schema_df" 700 | ] 701 | }, 702 | { 703 | "cell_type": "code", 704 | "execution_count": null, 705 | "metadata": {}, 706 | "outputs": [], 707 | "source": [] 708 | } 709 | ], 710 | "metadata": { 711 | "kernelspec": { 712 | "display_name": "Python 3", 713 | "language": "python", 714 | "name": "python3" 715 | }, 716 | "language_info": { 717 | "codemirror_mode": { 718 | "name": "ipython", 719 | "version": 3 720 | }, 721 | "file_extension": ".py", 722 | "mimetype": "text/x-python", 723 | "name": "python", 724 | "nbconvert_exporter": "python", 725 | "pygments_lexer": "ipython3", 726 | "version": "3.6.9" 727 | } 728 | }, 729 | "nbformat": 4, 730 | "nbformat_minor": 2 731 | } 732 | -------------------------------------------------------------------------------- /06-Add Remove Rows and Columns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Add/Remove Rows and Columns from DataFrames" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 17, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "people = {\n", 17 | " \"first\": [\"Corey\", \"Jane\", \"John\"],\n", 18 | " \"last\": [\"Schafer\", \"Doe\", \"Doe\"],\n", 19 | " \"email\": [\"CoreyMSchafer@gmail.com\", \"JaneDoe@email.com\", \"JohnDoe@email.com\"]\n", 20 | "}" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 18, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "import pandas as pd" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 19, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "df = pd.DataFrame(people)" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 20, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/html": [ 49 | "
\n", 50 | "\n", 63 | "\n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
firstlastemail
0CoreySchaferCoreyMSchafer@gmail.com
1JaneDoeJaneDoe@email.com
2JohnDoeJohnDoe@email.com
\n", 93 | "
" 94 | ], 95 | "text/plain": [ 96 | " first last email\n", 97 | "0 Corey Schafer CoreyMSchafer@gmail.com\n", 98 | "1 Jane Doe JaneDoe@email.com\n", 99 | "2 John Doe JohnDoe@email.com" 100 | ] 101 | }, 102 | "execution_count": 20, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "df" 109 | ] 110 | }, 111 | { 112 | "cell_type": "markdown", 113 | "metadata": {}, 114 | "source": [ 115 | "Combine first and last name column:" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": 21, 121 | "metadata": {}, 122 | "outputs": [ 123 | { 124 | "data": { 125 | "text/plain": [ 126 | "0 Corey Schafer\n", 127 | "1 Jane Doe\n", 128 | "2 John Doe\n", 129 | "dtype: object" 130 | ] 131 | }, 132 | "execution_count": 21, 133 | "metadata": {}, 134 | "output_type": "execute_result" 135 | } 136 | ], 137 | "source": [ 138 | "df['first'] + ' ' + df['last']" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": 22, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "df['full_name'] = df['first'] + ' ' + df['last']" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 23, 153 | "metadata": {}, 154 | "outputs": [ 155 | { 156 | "data": { 157 | "text/html": [ 158 | "
\n", 159 | "\n", 172 | "\n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | "
firstlastemailfull_name
0CoreySchaferCoreyMSchafer@gmail.comCorey Schafer
1JaneDoeJaneDoe@email.comJane Doe
2JohnDoeJohnDoe@email.comJohn Doe
\n", 206 | "
" 207 | ], 208 | "text/plain": [ 209 | " first last email full_name\n", 210 | "0 Corey Schafer CoreyMSchafer@gmail.com Corey Schafer\n", 211 | "1 Jane Doe JaneDoe@email.com Jane Doe\n", 212 | "2 John Doe JohnDoe@email.com John Doe" 213 | ] 214 | }, 215 | "execution_count": 23, 216 | "metadata": {}, 217 | "output_type": "execute_result" 218 | } 219 | ], 220 | "source": [ 221 | "df" 222 | ] 223 | }, 224 | { 225 | "cell_type": "markdown", 226 | "metadata": {}, 227 | "source": [ 228 | "Note: Cannot use dot notation when assigning columns like this, must use brackets (python will think you're trying to assign an attribute)." 229 | ] 230 | }, 231 | { 232 | "cell_type": "markdown", 233 | "metadata": {}, 234 | "source": [ 235 | "To delete columns:" 236 | ] 237 | }, 238 | { 239 | "cell_type": "code", 240 | "execution_count": 24, 241 | "metadata": {}, 242 | "outputs": [ 243 | { 244 | "data": { 245 | "text/html": [ 246 | "
\n", 247 | "\n", 260 | "\n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | "
emailfull_name
0CoreyMSchafer@gmail.comCorey Schafer
1JaneDoe@email.comJane Doe
2JohnDoe@email.comJohn Doe
\n", 286 | "
" 287 | ], 288 | "text/plain": [ 289 | " email full_name\n", 290 | "0 CoreyMSchafer@gmail.com Corey Schafer\n", 291 | "1 JaneDoe@email.com Jane Doe\n", 292 | "2 JohnDoe@email.com John Doe" 293 | ] 294 | }, 295 | "execution_count": 24, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "df.drop(columns=['first', 'last'])" 302 | ] 303 | }, 304 | { 305 | "cell_type": "markdown", 306 | "metadata": {}, 307 | "source": [ 308 | "This change not yet applied to df, need to explicitly use \"inplace=True\"." 309 | ] 310 | }, 311 | { 312 | "cell_type": "code", 313 | "execution_count": 25, 314 | "metadata": {}, 315 | "outputs": [ 316 | { 317 | "data": { 318 | "text/html": [ 319 | "
\n", 320 | "\n", 333 | "\n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | "
firstlastemailfull_name
0CoreySchaferCoreyMSchafer@gmail.comCorey Schafer
1JaneDoeJaneDoe@email.comJane Doe
2JohnDoeJohnDoe@email.comJohn Doe
\n", 367 | "
" 368 | ], 369 | "text/plain": [ 370 | " first last email full_name\n", 371 | "0 Corey Schafer CoreyMSchafer@gmail.com Corey Schafer\n", 372 | "1 Jane Doe JaneDoe@email.com Jane Doe\n", 373 | "2 John Doe JohnDoe@email.com John Doe" 374 | ] 375 | }, 376 | "execution_count": 25, 377 | "metadata": {}, 378 | "output_type": "execute_result" 379 | } 380 | ], 381 | "source": [ 382 | "df" 383 | ] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "execution_count": 26, 388 | "metadata": {}, 389 | "outputs": [], 390 | "source": [ 391 | "df.drop(columns=['first', 'last'], inplace=True)" 392 | ] 393 | }, 394 | { 395 | "cell_type": "code", 396 | "execution_count": 27, 397 | "metadata": {}, 398 | "outputs": [ 399 | { 400 | "data": { 401 | "text/html": [ 402 | "
\n", 403 | "\n", 416 | "\n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | "
emailfull_name
0CoreyMSchafer@gmail.comCorey Schafer
1JaneDoe@email.comJane Doe
2JohnDoe@email.comJohn Doe
\n", 442 | "
" 443 | ], 444 | "text/plain": [ 445 | " email full_name\n", 446 | "0 CoreyMSchafer@gmail.com Corey Schafer\n", 447 | "1 JaneDoe@email.com Jane Doe\n", 448 | "2 JohnDoe@email.com John Doe" 449 | ] 450 | }, 451 | "execution_count": 27, 452 | "metadata": {}, 453 | "output_type": "execute_result" 454 | } 455 | ], 456 | "source": [ 457 | "df" 458 | ] 459 | }, 460 | { 461 | "cell_type": "markdown", 462 | "metadata": {}, 463 | "source": [ 464 | "To split full_name into separate columns for each part of name:" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 28, 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "data": { 474 | "text/plain": [ 475 | "0 [Corey, Schafer]\n", 476 | "1 [Jane, Doe]\n", 477 | "2 [John, Doe]\n", 478 | "Name: full_name, dtype: object" 479 | ] 480 | }, 481 | "execution_count": 28, 482 | "metadata": {}, 483 | "output_type": "execute_result" 484 | } 485 | ], 486 | "source": [ 487 | "df['full_name'].str.split(' ')" 488 | ] 489 | }, 490 | { 491 | "cell_type": "markdown", 492 | "metadata": {}, 493 | "source": [ 494 | "Result is first and last name in a list." 495 | ] 496 | }, 497 | { 498 | "cell_type": "markdown", 499 | "metadata": {}, 500 | "source": [ 501 | "To assign to separate columns, use expand argument:" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": 29, 507 | "metadata": {}, 508 | "outputs": [ 509 | { 510 | "data": { 511 | "text/html": [ 512 | "
\n", 513 | "\n", 526 | "\n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | "
01
0CoreySchafer
1JaneDoe
2JohnDoe
\n", 552 | "
" 553 | ], 554 | "text/plain": [ 555 | " 0 1\n", 556 | "0 Corey Schafer\n", 557 | "1 Jane Doe\n", 558 | "2 John Doe" 559 | ] 560 | }, 561 | "execution_count": 29, 562 | "metadata": {}, 563 | "output_type": "execute_result" 564 | } 565 | ], 566 | "source": [ 567 | "df['full_name'].str.split(' ', expand=True)" 568 | ] 569 | }, 570 | { 571 | "cell_type": "markdown", 572 | "metadata": {}, 573 | "source": [ 574 | "Now set two columns in data from for these, by passing in the list:" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": 30, 580 | "metadata": {}, 581 | "outputs": [], 582 | "source": [ 583 | "df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)" 584 | ] 585 | }, 586 | { 587 | "cell_type": "code", 588 | "execution_count": 31, 589 | "metadata": {}, 590 | "outputs": [ 591 | { 592 | "data": { 593 | "text/html": [ 594 | "
\n", 595 | "\n", 608 | "\n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
1JaneDoe@email.comJane DoeJaneDoe
2JohnDoe@email.comJohn DoeJohnDoe
\n", 642 | "
" 643 | ], 644 | "text/plain": [ 645 | " email full_name first last\n", 646 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 647 | "1 JaneDoe@email.com Jane Doe Jane Doe\n", 648 | "2 JohnDoe@email.com John Doe John Doe" 649 | ] 650 | }, 651 | "execution_count": 31, 652 | "metadata": {}, 653 | "output_type": "execute_result" 654 | } 655 | ], 656 | "source": [ 657 | "df" 658 | ] 659 | }, 660 | { 661 | "cell_type": "markdown", 662 | "metadata": {}, 663 | "source": [ 664 | "On to adding and removing rows...\n", 665 | "\n", 666 | "First, add a single row of data with append:" 667 | ] 668 | }, 669 | { 670 | "cell_type": "code", 671 | "execution_count": 32, 672 | "metadata": {}, 673 | "outputs": [ 674 | { 675 | "ename": "TypeError", 676 | "evalue": "Can only append a Series if ignore_index=True or if the Series has a name", 677 | "output_type": "error", 678 | "traceback": [ 679 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 680 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", 681 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'first'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'Tony'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 682 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mappend\u001b[0;34m(self, other, ignore_index, verify_integrity, sort)\u001b[0m\n\u001b[1;32m 7096\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7097\u001b[0m raise TypeError(\n\u001b[0;32m-> 7098\u001b[0;31m \u001b[0;34m\"Can only append a Series if ignore_index=True\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7099\u001b[0m \u001b[0;34m\" or if the Series has a name\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7100\u001b[0m )\n", 683 | "\u001b[0;31mTypeError\u001b[0m: Can only append a Series if ignore_index=True or if the Series has a name" 684 | ] 685 | } 686 | ], 687 | "source": [ 688 | "df.append({'first': 'Tony'})" 689 | ] 690 | }, 691 | { 692 | "cell_type": "markdown", 693 | "metadata": {}, 694 | "source": [ 695 | "Error because there's no index, but if we say \"ignore_index=True\" the existing df will assign an index by default." 696 | ] 697 | }, 698 | { 699 | "cell_type": "code", 700 | "execution_count": 33, 701 | "metadata": {}, 702 | "outputs": [ 703 | { 704 | "data": { 705 | "text/html": [ 706 | "
\n", 707 | "\n", 720 | "\n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
1JaneDoe@email.comJane DoeJaneDoe
2JohnDoe@email.comJohn DoeJohnDoe
3NaNNaNTonyNaN
\n", 761 | "
" 762 | ], 763 | "text/plain": [ 764 | " email full_name first last\n", 765 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 766 | "1 JaneDoe@email.com Jane Doe Jane Doe\n", 767 | "2 JohnDoe@email.com John Doe John Doe\n", 768 | "3 NaN NaN Tony NaN" 769 | ] 770 | }, 771 | "execution_count": 33, 772 | "metadata": {}, 773 | "output_type": "execute_result" 774 | } 775 | ], 776 | "source": [ 777 | "df.append({'first': 'Tony'}, ignore_index=True)" 778 | ] 779 | }, 780 | { 781 | "cell_type": "markdown", 782 | "metadata": {}, 783 | "source": [ 784 | "New name was appended, but since we only assigned one value the other cells are \"NaN\".\n", 785 | "\n", 786 | "We can also append a new dataframe to an existing dataframe. First create the second dataframe:" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": 34, 792 | "metadata": {}, 793 | "outputs": [], 794 | "source": [ 795 | "people = {\n", 796 | " \"first\": [\"Tony\", \"Steve\"],\n", 797 | " \"last\": [\"Stark\", \"Rogers\"],\n", 798 | " \"email\": [\"ironman@avenge.com\", \"cap@avenge.com\"]\n", 799 | "}\n", 800 | "df2 = pd.DataFrame(people)" 801 | ] 802 | }, 803 | { 804 | "cell_type": "code", 805 | "execution_count": 35, 806 | "metadata": {}, 807 | "outputs": [ 808 | { 809 | "data": { 810 | "text/html": [ 811 | "
\n", 812 | "\n", 825 | "\n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | "
firstlastemail
0TonyStarkironman@avenge.com
1SteveRogerscap@avenge.com
\n", 849 | "
" 850 | ], 851 | "text/plain": [ 852 | " first last email\n", 853 | "0 Tony Stark ironman@avenge.com\n", 854 | "1 Steve Rogers cap@avenge.com" 855 | ] 856 | }, 857 | "execution_count": 35, 858 | "metadata": {}, 859 | "output_type": "execute_result" 860 | } 861 | ], 862 | "source": [ 863 | "df2" 864 | ] 865 | }, 866 | { 867 | "cell_type": "markdown", 868 | "metadata": {}, 869 | "source": [ 870 | "Now append, remembering to ignore_index:" 871 | ] 872 | }, 873 | { 874 | "cell_type": "code", 875 | "execution_count": 36, 876 | "metadata": {}, 877 | "outputs": [ 878 | { 879 | "name": "stderr", 880 | "output_type": "stream", 881 | "text": [ 882 | "/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py:7138: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", 883 | "of pandas will change to not sort by default.\n", 884 | "\n", 885 | "To accept the future behavior, pass 'sort=False'.\n", 886 | "\n", 887 | "To retain the current behavior and silence the warning, pass 'sort=True'.\n", 888 | "\n", 889 | " sort=sort,\n" 890 | ] 891 | }, 892 | { 893 | "data": { 894 | "text/html": [ 895 | "
\n", 896 | "\n", 909 | "\n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | "
emailfirstfull_namelast
0CoreyMSchafer@gmail.comCoreyCorey SchaferSchafer
1JaneDoe@email.comJaneJane DoeDoe
2JohnDoe@email.comJohnJohn DoeDoe
3ironman@avenge.comTonyNaNStark
4cap@avenge.comSteveNaNRogers
\n", 957 | "
" 958 | ], 959 | "text/plain": [ 960 | " email first full_name last\n", 961 | "0 CoreyMSchafer@gmail.com Corey Corey Schafer Schafer\n", 962 | "1 JaneDoe@email.com Jane Jane Doe Doe\n", 963 | "2 JohnDoe@email.com John John Doe Doe\n", 964 | "3 ironman@avenge.com Tony NaN Stark\n", 965 | "4 cap@avenge.com Steve NaN Rogers" 966 | ] 967 | }, 968 | "execution_count": 36, 969 | "metadata": {}, 970 | "output_type": "execute_result" 971 | } 972 | ], 973 | "source": [ 974 | "df.append(df2, ignore_index=True)" 975 | ] 976 | }, 977 | { 978 | "cell_type": "markdown", 979 | "metadata": {}, 980 | "source": [ 981 | "Added new rows. Reason for the warning in that we didn't pass all columns in same order. In future will set sort to False by default.\n", 982 | "\n", 983 | "If option \"sort=False\" is set, warning will be suppressed.\n", 984 | "\n", 985 | "The append method doesn't have an \"inplace\" argument, so we have to redefine df to make permanent:" 986 | ] 987 | }, 988 | { 989 | "cell_type": "code", 990 | "execution_count": 37, 991 | "metadata": {}, 992 | "outputs": [], 993 | "source": [ 994 | "df = df.append(df2, ignore_index=True, sort=False)" 995 | ] 996 | }, 997 | { 998 | "cell_type": "code", 999 | "execution_count": 38, 1000 | "metadata": {}, 1001 | "outputs": [ 1002 | { 1003 | "data": { 1004 | "text/html": [ 1005 | "
\n", 1006 | "\n", 1019 | "\n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
1JaneDoe@email.comJane DoeJaneDoe
2JohnDoe@email.comJohn DoeJohnDoe
3ironman@avenge.comNaNTonyStark
4cap@avenge.comNaNSteveRogers
\n", 1067 | "
" 1068 | ], 1069 | "text/plain": [ 1070 | " email full_name first last\n", 1071 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 1072 | "1 JaneDoe@email.com Jane Doe Jane Doe\n", 1073 | "2 JohnDoe@email.com John Doe John Doe\n", 1074 | "3 ironman@avenge.com NaN Tony Stark\n", 1075 | "4 cap@avenge.com NaN Steve Rogers" 1076 | ] 1077 | }, 1078 | "execution_count": 38, 1079 | "metadata": {}, 1080 | "output_type": "execute_result" 1081 | } 1082 | ], 1083 | "source": [ 1084 | "df" 1085 | ] 1086 | }, 1087 | { 1088 | "cell_type": "markdown", 1089 | "metadata": {}, 1090 | "source": [ 1091 | "Now let's remove rows. Instead of specifying columns to drops, specify indexes:" 1092 | ] 1093 | }, 1094 | { 1095 | "cell_type": "code", 1096 | "execution_count": 39, 1097 | "metadata": {}, 1098 | "outputs": [ 1099 | { 1100 | "data": { 1101 | "text/html": [ 1102 | "
\n", 1103 | "\n", 1116 | "\n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
1JaneDoe@email.comJane DoeJaneDoe
2JohnDoe@email.comJohn DoeJohnDoe
3ironman@avenge.comNaNTonyStark
\n", 1157 | "
" 1158 | ], 1159 | "text/plain": [ 1160 | " email full_name first last\n", 1161 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 1162 | "1 JaneDoe@email.com Jane Doe Jane Doe\n", 1163 | "2 JohnDoe@email.com John Doe John Doe\n", 1164 | "3 ironman@avenge.com NaN Tony Stark" 1165 | ] 1166 | }, 1167 | "execution_count": 39, 1168 | "metadata": {}, 1169 | "output_type": "execute_result" 1170 | } 1171 | ], 1172 | "source": [ 1173 | "df.drop(index=4)" 1174 | ] 1175 | }, 1176 | { 1177 | "cell_type": "markdown", 1178 | "metadata": {}, 1179 | "source": [ 1180 | "To apply change permanently, use \"inplace=True\".\n", 1181 | "\n", 1182 | "Can use filter with drop method by passing in indexes of filter:" 1183 | ] 1184 | }, 1185 | { 1186 | "cell_type": "code", 1187 | "execution_count": 40, 1188 | "metadata": {}, 1189 | "outputs": [ 1190 | { 1191 | "data": { 1192 | "text/html": [ 1193 | "
\n", 1194 | "\n", 1207 | "\n", 1208 | " \n", 1209 | " \n", 1210 | " \n", 1211 | " \n", 1212 | " \n", 1213 | " \n", 1214 | " \n", 1215 | " \n", 1216 | " \n", 1217 | " \n", 1218 | " \n", 1219 | " \n", 1220 | " \n", 1221 | " \n", 1222 | " \n", 1223 | " \n", 1224 | " \n", 1225 | " \n", 1226 | " \n", 1227 | " \n", 1228 | " \n", 1229 | " \n", 1230 | " \n", 1231 | " \n", 1232 | " \n", 1233 | " \n", 1234 | " \n", 1235 | " \n", 1236 | " \n", 1237 | " \n", 1238 | " \n", 1239 | " \n", 1240 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
3ironman@avenge.comNaNTonyStark
4cap@avenge.comNaNSteveRogers
\n", 1241 | "
" 1242 | ], 1243 | "text/plain": [ 1244 | " email full_name first last\n", 1245 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 1246 | "3 ironman@avenge.com NaN Tony Stark\n", 1247 | "4 cap@avenge.com NaN Steve Rogers" 1248 | ] 1249 | }, 1250 | "execution_count": 40, 1251 | "metadata": {}, 1252 | "output_type": "execute_result" 1253 | } 1254 | ], 1255 | "source": [ 1256 | "df.drop(index=df[df['last'] == 'Doe'].index)" 1257 | ] 1258 | }, 1259 | { 1260 | "cell_type": "code", 1261 | "execution_count": 42, 1262 | "metadata": {}, 1263 | "outputs": [ 1264 | { 1265 | "data": { 1266 | "text/html": [ 1267 | "
\n", 1268 | "\n", 1281 | "\n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
1JaneDoe@email.comJane DoeJaneDoe
2JohnDoe@email.comJohn DoeJohnDoe
3ironman@avenge.comNaNTonyStark
4cap@avenge.comNaNSteveRogers
\n", 1329 | "
" 1330 | ], 1331 | "text/plain": [ 1332 | " email full_name first last\n", 1333 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 1334 | "1 JaneDoe@email.com Jane Doe Jane Doe\n", 1335 | "2 JohnDoe@email.com John Doe John Doe\n", 1336 | "3 ironman@avenge.com NaN Tony Stark\n", 1337 | "4 cap@avenge.com NaN Steve Rogers" 1338 | ] 1339 | }, 1340 | "execution_count": 42, 1341 | "metadata": {}, 1342 | "output_type": "execute_result" 1343 | } 1344 | ], 1345 | "source": [ 1346 | "df" 1347 | ] 1348 | }, 1349 | { 1350 | "cell_type": "markdown", 1351 | "metadata": {}, 1352 | "source": [ 1353 | "Corey thinks this is hard to read. Instead do this:" 1354 | ] 1355 | }, 1356 | { 1357 | "cell_type": "code", 1358 | "execution_count": 43, 1359 | "metadata": {}, 1360 | "outputs": [ 1361 | { 1362 | "data": { 1363 | "text/html": [ 1364 | "
\n", 1365 | "\n", 1378 | "\n", 1379 | " \n", 1380 | " \n", 1381 | " \n", 1382 | " \n", 1383 | " \n", 1384 | " \n", 1385 | " \n", 1386 | " \n", 1387 | " \n", 1388 | " \n", 1389 | " \n", 1390 | " \n", 1391 | " \n", 1392 | " \n", 1393 | " \n", 1394 | " \n", 1395 | " \n", 1396 | " \n", 1397 | " \n", 1398 | " \n", 1399 | " \n", 1400 | " \n", 1401 | " \n", 1402 | " \n", 1403 | " \n", 1404 | " \n", 1405 | " \n", 1406 | " \n", 1407 | " \n", 1408 | " \n", 1409 | " \n", 1410 | " \n", 1411 | "
emailfull_namefirstlast
0CoreyMSchafer@gmail.comCorey SchaferCoreySchafer
3ironman@avenge.comNaNTonyStark
4cap@avenge.comNaNSteveRogers
\n", 1412 | "
" 1413 | ], 1414 | "text/plain": [ 1415 | " email full_name first last\n", 1416 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n", 1417 | "3 ironman@avenge.com NaN Tony Stark\n", 1418 | "4 cap@avenge.com NaN Steve Rogers" 1419 | ] 1420 | }, 1421 | "execution_count": 43, 1422 | "metadata": {}, 1423 | "output_type": "execute_result" 1424 | } 1425 | ], 1426 | "source": [ 1427 | "filt = df['last'] == 'Doe'\n", 1428 | "df.drop(index=df[filt].index)" 1429 | ] 1430 | }, 1431 | { 1432 | "cell_type": "code", 1433 | "execution_count": null, 1434 | "metadata": {}, 1435 | "outputs": [], 1436 | "source": [] 1437 | } 1438 | ], 1439 | "metadata": { 1440 | "kernelspec": { 1441 | "display_name": "Python 3", 1442 | "language": "python", 1443 | "name": "python3" 1444 | }, 1445 | "language_info": { 1446 | "codemirror_mode": { 1447 | "name": "ipython", 1448 | "version": 3 1449 | }, 1450 | "file_extension": ".py", 1451 | "mimetype": "text/x-python", 1452 | "name": "python", 1453 | "nbconvert_exporter": "python", 1454 | "pygments_lexer": "ipython3", 1455 | "version": "3.6.9" 1456 | } 1457 | }, 1458 | "nbformat": 4, 1459 | "nbformat_minor": 2 1460 | } 1461 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pandas Tutorials 2 | Tutorials on how to use python pandas from [Corey Schafer](https://github.com/CoreyMSchafer). 3 | 4 | YouTube playlist: 5 | 6 | https://www.youtube.com/playlist?list=PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS 7 | 8 | Code snippets: 9 | 10 | https://github.com/CoreyMSchafer/code_snippets/tree/master/Python/Pandas 11 | 12 | Updating as course progresses. 13 | 14 | -------------------------------------------------------------------------------- /data/ETH_1h.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/ETH_1h.csv.zip -------------------------------------------------------------------------------- /data/README_2019.txt: -------------------------------------------------------------------------------- 1 | The Public 2019 Stack Overflow Developer Survey Results 2 | 3 | Description: 4 | 5 | The enclosed data set is the full, cleaned results of the 2019 Stack Overflow Developer Survey. Free response submissions and personally identifying information have been removed from the results to protect the privacy of respondents. There are three files besides this README: 6 | 7 | 1. survey_results_public.csv - CSV file with main survey results, one respondent per row and one column per answer 8 | 2. survey_results_schema.csv - CSV file with survey schema, i.e., the questions that correspond to each column name 9 | 3. so_survey_2019.pdf - PDF file of survey instrument 10 | 11 | The survey was fielded from January 23 to February 14, 2019. The median time spent on the survey for qualified responses was 23.3 minutes. 12 | 13 | Respondents were recruited primarily through channels owned by Stack Overflow. The top 5 sources of respondents were onsite messaging, blog posts, email lists, Meta posts, banner ads, and social media posts. Since respondents were recruited in this way, highly engaged users on Stack Overflow were more likely to notice the links for the survey and click to begin it. 14 | 15 | As an incentive, respondents who finished the survey could opt in to a "Census" badge if they completed the survey. 16 | 17 | You can find the official published results here: 18 | 19 | https://insights.stackoverflow.com/survey/2019 20 | 21 | Find previous survey results here: 22 | 23 | https://insights.stackoverflow.com/survey 24 | 25 | Legal: 26 | 27 | This database - The Public 2019 Stack Overflow Developer Survey Results - is made available under the Open Database License (ODbL): http://opendatacommons.org/licenses/odbl/1.0/. Any rights in individual contents of the database are licensed under the Database Contents License: http://opendatacommons.org/licenses/dbcl/1.0/ 28 | 29 | TLDR: You are free to share, adapt, and create derivative works from The Public 2019 Stack Overflow Developer Survey Results as long as you attribute Stack Overflow, keep the database open (if you redistribute it), and continue to share-alike any adapted database under the ODbl. 30 | 31 | Acknowledgment: 32 | 33 | Massive, heartfelt thanks to all Stack Overflow contributors and lurking developers of the world who took part in the survey this year. We value your generous participation more than you know. <3 34 | -------------------------------------------------------------------------------- /data/so_survey_2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/so_survey_2019.pdf -------------------------------------------------------------------------------- /data/survey_results_public.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/survey_results_public.csv.zip -------------------------------------------------------------------------------- /data/survey_results_schema.csv: -------------------------------------------------------------------------------- 1 | Column,QuestionText 2 | Respondent,Randomized respondent ID number (not in order of survey response time) 3 | MainBranch,"Which of the following options best describes you today? Here, by ""developer"" we mean ""someone who writes code.""" 4 | Hobbyist,Do you code as a hobby? 5 | OpenSourcer,How often do you contribute to open source? 6 | OpenSource,How do you feel about the quality of open source software (OSS)? 7 | Employment,Which of the following best describes your current employment status? 8 | Country,In which country do you currently reside? 9 | Student,"Are you currently enrolled in a formal, degree-granting college or university program?" 10 | EdLevel,Which of the following best describes the highest level of formal education that you’ve completed? 11 | UndergradMajor,What was your main or most important field of study? 12 | EduOther,Which of the following types of non-degree education have you used or participated in? Please select all that apply. 13 | OrgSize,Approximately how many people are employed by the company or organization you work for? 14 | DevType,Which of the following describe you? Please select all that apply. 15 | YearsCode,"Including any education, how many years have you been coding?" 16 | Age1stCode,"At what age did you write your first line of code or program? (E.g., webpage, Hello World, Scratch project)" 17 | YearsCodePro,How many years have you coded professionally (as a part of your work)? 18 | CareerSat,"Overall, how satisfied are you with your career thus far?" 19 | JobSat,"How satisfied are you with your current job? (If you work multiple jobs, answer for the one you spend the most hours on.)" 20 | MgrIdiot,How confident are you that your manager knows what they’re doing? 21 | MgrMoney,Do you believe that you need to be a manager to make more money? 22 | MgrWant,Do you want to become a manager yourself in the future? 23 | JobSeek,Which of the following best describes your current job-seeking status? 24 | LastHireDate,When was the last time that you took a job with a new employer? 25 | LastInt,"In your most recent successful job interview (resulting in a job offer), you were asked to... (check all that apply)" 26 | FizzBuzz,Have you ever been asked to solve FizzBuzz in an interview? 27 | JobFactors,"Imagine that you are deciding between two job offers with the same compensation, benefits, and location. Of the following factors, which 3 are MOST important to you?" 28 | ResumeUpdate,"Think back to the last time you updated your resumé, CV, or an online profile on a job site. What is the PRIMARY reason that you did so?" 29 | CurrencySymbol,"Which currency do you use day-to-day? If your answer is complicated, please pick the one you're most comfortable estimating in." 30 | CurrencyDesc,"Which currency do you use day-to-day? If your answer is complicated, please pick the one you're most comfortable estimating in." 31 | CompTotal,"What is your current total compensation (salary, bonuses, and perks, before taxes and deductions), in `CurrencySymbol`? Please enter a whole number in the box below, without any punctuation. If you are paid hourly, please estimate an equivalent weekly, monthly, or yearly salary. If you prefer not to answer, please leave the box empty." 32 | CompFreq,"Is that compensation weekly, monthly, or yearly?" 33 | ConvertedComp,"Salary converted to annual USD salaries using the exchange rate on 2019-02-01, assuming 12 working months and 50 working weeks." 34 | WorkWeekHrs,"On average, how many hours per week do you work?" 35 | WorkPlan,How structured or planned is your work? 36 | WorkChallenge,"Of these options, what are your greatest challenges to productivity as a developer? Select up to 3:" 37 | WorkRemote,How often do you work remotely? 38 | WorkLoc,Where would you prefer to work? 39 | ImpSyn,"For the specific work you do, and the years of experience you have, how do you rate your own level of competence?" 40 | CodeRev,Do you review code as part of your work? 41 | CodeRevHrs,"On average, how many hours per week do you spend on code review?" 42 | UnitTests,Does your company regularly employ unit tests in the development of their products? 43 | PurchaseHow,"How does your company make decisions about purchasing new technology (cloud, AI, IoT, databases)?" 44 | PurchaseWhat,"What level of influence do you, personally, have over new technology purchases at your organization?" 45 | LanguageWorkedWith,"Which of the following programming, scripting, and markup languages have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the language and want to continue to do so, please check both boxes in that row.)" 46 | LanguageDesireNextYear,"Which of the following programming, scripting, and markup languages have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the language and want to continue to do so, please check both boxes in that row.)" 47 | DatabaseWorkedWith,"Which of the following database environments have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the database and want to continue to do so, please check both boxes in that row.)" 48 | DatabaseDesireNextYear,"Which of the following database environments have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the database and want to continue to do so, please check both boxes in that row.)" 49 | PlatformWorkedWith,"Which of the following platforms have you done extensive development work for over the past year? (If you both developed for the platform and want to continue to do so, please check both boxes in that row.)" 50 | PlatformDesireNextYear,"Which of the following platforms have you done extensive development work for over the past year? (If you both developed for the platform and want to continue to do so, please check both boxes in that row.)" 51 | WebFrameWorkedWith,"Which of the following web frameworks have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the framework and want to continue to do so, please check both boxes in that row.)" 52 | WebFrameDesireNextYear,"Which of the following web frameworks have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the framework and want to continue to do so, please check both boxes in that row.)" 53 | MiscTechWorkedWith,"Which of the following other frameworks, libraries, and tools have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the technology and want to continue to do so, please check both boxes in that row.)" 54 | MiscTechDesireNextYear,"Which of the following other frameworks, libraries, and tools have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the technology and want to continue to do so, please check both boxes in that row.)" 55 | DevEnviron,Which development environment(s) do you use regularly? Please check all that apply. 56 | OpSys,What is the primary operating system in which you work? 57 | Containers,"How do you use containers (Docker, Open Container Initiative (OCI), etc.)?" 58 | BlockchainOrg,How is your organization thinking about or implementing blockchain technology? 59 | BlockchainIs,Blockchain / cryptocurrency technology is primarily: 60 | BetterLife,Do you think people born today will have a better life than their parents? 61 | ITperson,"Are you the ""IT support person"" for your family?" 62 | OffOn,Have you tried turning it off and on again? 63 | SocialMedia,What social media site do you use the most? 64 | Extraversion,Do you prefer online chat or IRL conversations? 65 | ScreenName,What do you call it? 66 | SOVisit1st,"To the best of your memory, when did you first visit Stack Overflow?" 67 | SOVisitFreq,How frequently would you say you visit Stack Overflow? 68 | SOVisitTo,I visit Stack Overflow to... (check all that apply) 69 | SOFindAnswer,"On average, how many times a week do you find (and use) an answer on Stack Overflow?" 70 | SOTimeSaved,"Think back to the last time you solved a coding problem using Stack Overflow, as well as the last time you solved a problem using a different resource. Which was faster?" 71 | SOHowMuchTime,"About how much time did you save? If you're not sure, please use your best estimate." 72 | SOAccount,Do you have a Stack Overflow account? 73 | SOPartFreq,"How frequently would you say you participate in Q&A on Stack Overflow? By participate we mean ask, answer, vote for, or comment on questions." 74 | SOJobs,Have you ever used or visited Stack Overflow Jobs? 75 | EntTeams,Have you ever used Stack Overflow for Enterprise or Stack Overflow for Teams? 76 | SOComm,Do you consider yourself a member of the Stack Overflow community? 77 | WelcomeChange,"Compared to last year, how welcome do you feel on Stack Overflow?" 78 | SONewContent,Would you like to see any of the following on Stack Overflow? Check all that apply. 79 | Age,"What is your age (in years)? If you prefer not to answer, you may leave this question blank." 80 | Gender,"Which of the following do you currently identify as? Please select all that apply. If you prefer not to answer, you may leave this question blank." 81 | Trans,Do you identify as transgender? 82 | Sexuality,"Which of the following do you currently identify as? Please select all that apply. If you prefer not to answer, you may leave this question blank." 83 | Ethnicity,"Which of the following do you identify as? Please check all that apply. If you prefer not to answer, you may leave this question blank." 84 | Dependents,"Do you have any dependents (e.g., children, elders, or others) that you care for?" 85 | SurveyLength,How do you feel about the length of the survey this year? 86 | SurveyEase,How easy or difficult was this survey to complete? 87 | -------------------------------------------------------------------------------- /data/survey_results_schema.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/survey_results_schema.csv.zip -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | matplotlib 3 | pandas 4 | sqlalchemy 5 | psycopg2 6 | --------------------------------------------------------------------------------