├── .gitignore
├── 01-Loading Data.ipynb
├── 02-DataFrame and Series.ipynb
├── 03-Indexes.ipynb
├── 04-Filtering.ipynb
├── 05-Updating Rows and Columns.ipynb
├── 06-Add Remove Rows and Columns.ipynb
├── 07-Sorting Data.ipynb
├── 08-Grouping and Aggregating.ipynb
├── 09-Cleaning Data.ipynb
├── 10-Working with Dates and Time Series Data.ipynb
├── 11-Reading and Writing Data.ipynb
├── README.md
├── data
├── ETH_1h.csv.zip
├── README_2019.txt
├── so_survey_2019.pdf
├── survey_results_public.csv.zip
├── survey_results_schema.csv
└── survey_results_schema.csv.zip
└── requirements.txt
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
105 | __pypackages__/
106 |
107 | # Celery stuff
108 | celerybeat-schedule
109 | celerybeat.pid
110 |
111 | # SageMath parsed files
112 | *.sage.py
113 |
114 | # Environments
115 | .env
116 | .venv
117 | env/
118 | venv/
119 | ENV/
120 | env.bak/
121 | venv.bak/
122 |
123 | # Spyder project settings
124 | .spyderproject
125 | .spyproject
126 |
127 | # Rope project settings
128 | .ropeproject
129 |
130 | # mkdocs documentation
131 | /site
132 |
133 | # mypy
134 | .mypy_cache/
135 | .dmypy.json
136 | dmypy.json
137 |
138 | # Pyre type checker
139 | .pyre/
140 |
141 | # pytype static type analyzer
142 | .pytype/
143 |
144 | # Cython debug symbols
145 | cython_debug/
146 |
147 | # PyCharm
148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
150 | # and can be added to the global gitignore or merged into this file. For a more nuclear
151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
152 | #.idea/
153 |
154 | *.csv
155 | *.json
156 | *.tsv
157 | *.xlsx
158 |
159 |
--------------------------------------------------------------------------------
/02-DataFrame and Series.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# DataFrame and Series Basics"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 3,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 4,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "person = {\n",
26 | " \"first\": \"Phil\",\n",
27 | " \"last\": \"Lembo\",\n",
28 | " \"email\": \"phil.lembo@gmail.com\"\n",
29 | "}"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 5,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "people = {\n",
39 | " \"first\": [\"Phil\"],\n",
40 | " \"last\": [\"Lembo\"],\n",
41 | " \"email\": [\"phil.lembo@gmail.com\"]\n",
42 | "}"
43 | ]
44 | },
45 | {
46 | "cell_type": "code",
47 | "execution_count": 6,
48 | "metadata": {},
49 | "outputs": [],
50 | "source": [
51 | "people = {\n",
52 | " \"first\": [\"Phil\", \"Jane\", \"Rob\"],\n",
53 | " \"last\": [\"Lembo\", \"Doe\", \"Roe\"],\n",
54 | " \"email\": [\"phil.lembo@gmail.com\", \"janedoe@email.com\", \"robroe@email.com\"]\n",
55 | "}"
56 | ]
57 | },
58 | {
59 | "cell_type": "code",
60 | "execution_count": 7,
61 | "metadata": {},
62 | "outputs": [
63 | {
64 | "data": {
65 | "text/plain": [
66 | "['phil.lembo@gmail.com', 'janedoe@email.com', 'robroe@email.com']"
67 | ]
68 | },
69 | "execution_count": 7,
70 | "metadata": {},
71 | "output_type": "execute_result"
72 | }
73 | ],
74 | "source": [
75 | "people[\"email\"]"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": 8,
81 | "metadata": {},
82 | "outputs": [],
83 | "source": [
84 | "df = pd.DataFrame(people)"
85 | ]
86 | },
87 | {
88 | "cell_type": "code",
89 | "execution_count": 9,
90 | "metadata": {},
91 | "outputs": [
92 | {
93 | "data": {
94 | "text/html": [
95 | "
\n",
96 | "\n",
109 | "
\n",
110 | " \n",
111 | " \n",
112 | " | \n",
113 | " first | \n",
114 | " last | \n",
115 | " email | \n",
116 | "
\n",
117 | " \n",
118 | " \n",
119 | " \n",
120 | " 0 | \n",
121 | " Phil | \n",
122 | " Lembo | \n",
123 | " phil.lembo@gmail.com | \n",
124 | "
\n",
125 | " \n",
126 | " 1 | \n",
127 | " Jane | \n",
128 | " Doe | \n",
129 | " janedoe@email.com | \n",
130 | "
\n",
131 | " \n",
132 | " 2 | \n",
133 | " Rob | \n",
134 | " Roe | \n",
135 | " robroe@email.com | \n",
136 | "
\n",
137 | " \n",
138 | "
\n",
139 | "
"
140 | ],
141 | "text/plain": [
142 | " first last email\n",
143 | "0 Phil Lembo phil.lembo@gmail.com\n",
144 | "1 Jane Doe janedoe@email.com\n",
145 | "2 Rob Roe robroe@email.com"
146 | ]
147 | },
148 | "execution_count": 9,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "df"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "metadata": {},
160 | "source": [
161 | "There are two major data types in pandas: DataFrames and Series, and df here has the DataFrame data type."
162 | ]
163 | },
164 | {
165 | "cell_type": "code",
166 | "execution_count": 40,
167 | "metadata": {},
168 | "outputs": [
169 | {
170 | "data": {
171 | "text/plain": [
172 | "pandas.core.frame.DataFrame"
173 | ]
174 | },
175 | "execution_count": 40,
176 | "metadata": {},
177 | "output_type": "execute_result"
178 | }
179 | ],
180 | "source": [
181 | "type(df)"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 10,
187 | "metadata": {},
188 | "outputs": [
189 | {
190 | "data": {
191 | "text/plain": [
192 | "0 phil.lembo@gmail.com\n",
193 | "1 janedoe@email.com\n",
194 | "2 robroe@email.com\n",
195 | "Name: email, dtype: object"
196 | ]
197 | },
198 | "execution_count": 10,
199 | "metadata": {},
200 | "output_type": "execute_result"
201 | }
202 | ],
203 | "source": [
204 | "df['email']"
205 | ]
206 | },
207 | {
208 | "cell_type": "markdown",
209 | "metadata": {},
210 | "source": [
211 | "The type of the data stored in email is a pandas Series."
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 11,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/plain": [
222 | "pandas.core.series.Series"
223 | ]
224 | },
225 | "execution_count": 11,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "type(df['email'])"
232 | ]
233 | },
234 | {
235 | "cell_type": "markdown",
236 | "metadata": {},
237 | "source": [
238 | "This is an alternative way of calling the email column, but using it risks it being confused with methods."
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": 12,
244 | "metadata": {},
245 | "outputs": [
246 | {
247 | "data": {
248 | "text/plain": [
249 | "0 phil.lembo@gmail.com\n",
250 | "1 janedoe@email.com\n",
251 | "2 robroe@email.com\n",
252 | "Name: email, dtype: object"
253 | ]
254 | },
255 | "execution_count": 12,
256 | "metadata": {},
257 | "output_type": "execute_result"
258 | }
259 | ],
260 | "source": [
261 | "df.email"
262 | ]
263 | },
264 | {
265 | "cell_type": "markdown",
266 | "metadata": {},
267 | "source": [
268 | "Pass a list of columns using double-bracket notation."
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 13,
274 | "metadata": {},
275 | "outputs": [
276 | {
277 | "data": {
278 | "text/html": [
279 | "\n",
280 | "\n",
293 | "
\n",
294 | " \n",
295 | " \n",
296 | " | \n",
297 | " last | \n",
298 | " email | \n",
299 | "
\n",
300 | " \n",
301 | " \n",
302 | " \n",
303 | " 0 | \n",
304 | " Lembo | \n",
305 | " phil.lembo@gmail.com | \n",
306 | "
\n",
307 | " \n",
308 | " 1 | \n",
309 | " Doe | \n",
310 | " janedoe@email.com | \n",
311 | "
\n",
312 | " \n",
313 | " 2 | \n",
314 | " Roe | \n",
315 | " robroe@email.com | \n",
316 | "
\n",
317 | " \n",
318 | "
\n",
319 | "
"
320 | ],
321 | "text/plain": [
322 | " last email\n",
323 | "0 Lembo phil.lembo@gmail.com\n",
324 | "1 Doe janedoe@email.com\n",
325 | "2 Roe robroe@email.com"
326 | ]
327 | },
328 | "execution_count": 13,
329 | "metadata": {},
330 | "output_type": "execute_result"
331 | }
332 | ],
333 | "source": [
334 | "df[['last', 'email']]"
335 | ]
336 | },
337 | {
338 | "cell_type": "markdown",
339 | "metadata": {},
340 | "source": [
341 | "This retrieves a DataFrame."
342 | ]
343 | },
344 | {
345 | "cell_type": "code",
346 | "execution_count": 14,
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "data": {
351 | "text/plain": [
352 | "pandas.core.frame.DataFrame"
353 | ]
354 | },
355 | "execution_count": 14,
356 | "metadata": {},
357 | "output_type": "execute_result"
358 | }
359 | ],
360 | "source": [
361 | "type(df[['last', 'email']])"
362 | ]
363 | },
364 | {
365 | "cell_type": "markdown",
366 | "metadata": {},
367 | "source": [
368 | "Show columns in dataframe."
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": 15,
374 | "metadata": {},
375 | "outputs": [
376 | {
377 | "data": {
378 | "text/plain": [
379 | "Index(['first', 'last', 'email'], dtype='object')"
380 | ]
381 | },
382 | "execution_count": 15,
383 | "metadata": {},
384 | "output_type": "execute_result"
385 | }
386 | ],
387 | "source": [
388 | "df.columns"
389 | ]
390 | },
391 | {
392 | "cell_type": "markdown",
393 | "metadata": {},
394 | "source": [
395 | "To get rows, use loc and iloc indexers.\n",
396 | "\n",
397 | "iloc = \"integer location\", seach by numeric index\n",
398 | "\n",
399 | "loc = search by label\n"
400 | ]
401 | },
402 | {
403 | "cell_type": "code",
404 | "execution_count": 16,
405 | "metadata": {},
406 | "outputs": [
407 | {
408 | "data": {
409 | "text/plain": [
410 | "first Phil\n",
411 | "last Lembo\n",
412 | "email phil.lembo@gmail.com\n",
413 | "Name: 0, dtype: object"
414 | ]
415 | },
416 | "execution_count": 16,
417 | "metadata": {},
418 | "output_type": "execute_result"
419 | }
420 | ],
421 | "source": [
422 | "df.iloc[0]"
423 | ]
424 | },
425 | {
426 | "cell_type": "code",
427 | "execution_count": 17,
428 | "metadata": {},
429 | "outputs": [
430 | {
431 | "data": {
432 | "text/plain": [
433 | "pandas.core.series.Series"
434 | ]
435 | },
436 | "execution_count": 17,
437 | "metadata": {},
438 | "output_type": "execute_result"
439 | }
440 | ],
441 | "source": [
442 | "type(df.iloc[0])"
443 | ]
444 | },
445 | {
446 | "cell_type": "code",
447 | "execution_count": 18,
448 | "metadata": {},
449 | "outputs": [
450 | {
451 | "data": {
452 | "text/html": [
453 | "\n",
454 | "\n",
467 | "
\n",
468 | " \n",
469 | " \n",
470 | " | \n",
471 | " first | \n",
472 | " last | \n",
473 | " email | \n",
474 | "
\n",
475 | " \n",
476 | " \n",
477 | " \n",
478 | " 0 | \n",
479 | " Phil | \n",
480 | " Lembo | \n",
481 | " phil.lembo@gmail.com | \n",
482 | "
\n",
483 | " \n",
484 | " 1 | \n",
485 | " Jane | \n",
486 | " Doe | \n",
487 | " janedoe@email.com | \n",
488 | "
\n",
489 | " \n",
490 | "
\n",
491 | "
"
492 | ],
493 | "text/plain": [
494 | " first last email\n",
495 | "0 Phil Lembo phil.lembo@gmail.com\n",
496 | "1 Jane Doe janedoe@email.com"
497 | ]
498 | },
499 | "execution_count": 18,
500 | "metadata": {},
501 | "output_type": "execute_result"
502 | }
503 | ],
504 | "source": [
505 | "df.iloc[[0, 1]]"
506 | ]
507 | },
508 | {
509 | "cell_type": "markdown",
510 | "metadata": {},
511 | "source": [
512 | "Grab first two rows of the email column (column 3, index 2)."
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": 19,
518 | "metadata": {},
519 | "outputs": [
520 | {
521 | "data": {
522 | "text/plain": [
523 | "0 phil.lembo@gmail.com\n",
524 | "1 janedoe@email.com\n",
525 | "Name: email, dtype: object"
526 | ]
527 | },
528 | "execution_count": 19,
529 | "metadata": {},
530 | "output_type": "execute_result"
531 | }
532 | ],
533 | "source": [
534 | "df.iloc[[0, 1], 2]"
535 | ]
536 | },
537 | {
538 | "cell_type": "markdown",
539 | "metadata": {},
540 | "source": [
541 | "Using loc without custom labels, looks alot like iloc because you have to use a numeric value."
542 | ]
543 | },
544 | {
545 | "cell_type": "code",
546 | "execution_count": 20,
547 | "metadata": {},
548 | "outputs": [
549 | {
550 | "data": {
551 | "text/plain": [
552 | "first Phil\n",
553 | "last Lembo\n",
554 | "email phil.lembo@gmail.com\n",
555 | "Name: 0, dtype: object"
556 | ]
557 | },
558 | "execution_count": 20,
559 | "metadata": {},
560 | "output_type": "execute_result"
561 | }
562 | ],
563 | "source": [
564 | "df.loc[0]"
565 | ]
566 | },
567 | {
568 | "cell_type": "code",
569 | "execution_count": 21,
570 | "metadata": {},
571 | "outputs": [
572 | {
573 | "data": {
574 | "text/html": [
575 | "\n",
576 | "\n",
589 | "
\n",
590 | " \n",
591 | " \n",
592 | " | \n",
593 | " first | \n",
594 | " last | \n",
595 | " email | \n",
596 | "
\n",
597 | " \n",
598 | " \n",
599 | " \n",
600 | " 0 | \n",
601 | " Phil | \n",
602 | " Lembo | \n",
603 | " phil.lembo@gmail.com | \n",
604 | "
\n",
605 | " \n",
606 | " 1 | \n",
607 | " Jane | \n",
608 | " Doe | \n",
609 | " janedoe@email.com | \n",
610 | "
\n",
611 | " \n",
612 | "
\n",
613 | "
"
614 | ],
615 | "text/plain": [
616 | " first last email\n",
617 | "0 Phil Lembo phil.lembo@gmail.com\n",
618 | "1 Jane Doe janedoe@email.com"
619 | ]
620 | },
621 | "execution_count": 21,
622 | "metadata": {},
623 | "output_type": "execute_result"
624 | }
625 | ],
626 | "source": [
627 | "df.loc[[0, 1]]"
628 | ]
629 | },
630 | {
631 | "cell_type": "markdown",
632 | "metadata": {},
633 | "source": [
634 | "But now we can use a column label."
635 | ]
636 | },
637 | {
638 | "cell_type": "code",
639 | "execution_count": 22,
640 | "metadata": {},
641 | "outputs": [
642 | {
643 | "data": {
644 | "text/plain": [
645 | "0 phil.lembo@gmail.com\n",
646 | "1 janedoe@email.com\n",
647 | "Name: email, dtype: object"
648 | ]
649 | },
650 | "execution_count": 22,
651 | "metadata": {},
652 | "output_type": "execute_result"
653 | }
654 | ],
655 | "source": [
656 | "df.loc[[0, 1], 'email']"
657 | ]
658 | },
659 | {
660 | "cell_type": "markdown",
661 | "metadata": {},
662 | "source": [
663 | "... or a list of labels!"
664 | ]
665 | },
666 | {
667 | "cell_type": "code",
668 | "execution_count": 23,
669 | "metadata": {},
670 | "outputs": [
671 | {
672 | "data": {
673 | "text/html": [
674 | "\n",
675 | "\n",
688 | "
\n",
689 | " \n",
690 | " \n",
691 | " | \n",
692 | " email | \n",
693 | " last | \n",
694 | "
\n",
695 | " \n",
696 | " \n",
697 | " \n",
698 | " 0 | \n",
699 | " phil.lembo@gmail.com | \n",
700 | " Lembo | \n",
701 | "
\n",
702 | " \n",
703 | " 1 | \n",
704 | " janedoe@email.com | \n",
705 | " Doe | \n",
706 | "
\n",
707 | " \n",
708 | "
\n",
709 | "
"
710 | ],
711 | "text/plain": [
712 | " email last\n",
713 | "0 phil.lembo@gmail.com Lembo\n",
714 | "1 janedoe@email.com Doe"
715 | ]
716 | },
717 | "execution_count": 23,
718 | "metadata": {},
719 | "output_type": "execute_result"
720 | }
721 | ],
722 | "source": [
723 | "df.loc[[0, 1], ['email', 'last']]"
724 | ]
725 | },
726 | {
727 | "cell_type": "code",
728 | "execution_count": 24,
729 | "metadata": {},
730 | "outputs": [],
731 | "source": [
732 | "res_df = pd.read_csv('data/survey_results_public.csv')\n",
733 | "schema_df = pd.read_csv('data/survey_results_schema.csv')\n",
734 | "pd.set_option('display.max_columns', 85)\n",
735 | "pd.set_option('display.max_rows', 85)"
736 | ]
737 | },
738 | {
739 | "cell_type": "markdown",
740 | "metadata": {},
741 | "source": [
742 | "Basic characteristics of dataframe (number of rows, number of columns)."
743 | ]
744 | },
745 | {
746 | "cell_type": "code",
747 | "execution_count": 25,
748 | "metadata": {},
749 | "outputs": [
750 | {
751 | "data": {
752 | "text/plain": [
753 | "(88883, 85)"
754 | ]
755 | },
756 | "execution_count": 25,
757 | "metadata": {},
758 | "output_type": "execute_result"
759 | }
760 | ],
761 | "source": [
762 | "res_df.shape"
763 | ]
764 | },
765 | {
766 | "cell_type": "markdown",
767 | "metadata": {},
768 | "source": [
769 | "List all the column labels."
770 | ]
771 | },
772 | {
773 | "cell_type": "code",
774 | "execution_count": 26,
775 | "metadata": {},
776 | "outputs": [
777 | {
778 | "data": {
779 | "text/plain": [
780 | "Index(['Respondent', 'MainBranch', 'Hobbyist', 'OpenSourcer', 'OpenSource',\n",
781 | " 'Employment', 'Country', 'Student', 'EdLevel', 'UndergradMajor',\n",
782 | " 'EduOther', 'OrgSize', 'DevType', 'YearsCode', 'Age1stCode',\n",
783 | " 'YearsCodePro', 'CareerSat', 'JobSat', 'MgrIdiot', 'MgrMoney',\n",
784 | " 'MgrWant', 'JobSeek', 'LastHireDate', 'LastInt', 'FizzBuzz',\n",
785 | " 'JobFactors', 'ResumeUpdate', 'CurrencySymbol', 'CurrencyDesc',\n",
786 | " 'CompTotal', 'CompFreq', 'ConvertedComp', 'WorkWeekHrs', 'WorkPlan',\n",
787 | " 'WorkChallenge', 'WorkRemote', 'WorkLoc', 'ImpSyn', 'CodeRev',\n",
788 | " 'CodeRevHrs', 'UnitTests', 'PurchaseHow', 'PurchaseWhat',\n",
789 | " 'LanguageWorkedWith', 'LanguageDesireNextYear', 'DatabaseWorkedWith',\n",
790 | " 'DatabaseDesireNextYear', 'PlatformWorkedWith',\n",
791 | " 'PlatformDesireNextYear', 'WebFrameWorkedWith',\n",
792 | " 'WebFrameDesireNextYear', 'MiscTechWorkedWith',\n",
793 | " 'MiscTechDesireNextYear', 'DevEnviron', 'OpSys', 'Containers',\n",
794 | " 'BlockchainOrg', 'BlockchainIs', 'BetterLife', 'ITperson', 'OffOn',\n",
795 | " 'SocialMedia', 'Extraversion', 'ScreenName', 'SOVisit1st',\n",
796 | " 'SOVisitFreq', 'SOVisitTo', 'SOFindAnswer', 'SOTimeSaved',\n",
797 | " 'SOHowMuchTime', 'SOAccount', 'SOPartFreq', 'SOJobs', 'EntTeams',\n",
798 | " 'SOComm', 'WelcomeChange', 'SONewContent', 'Age', 'Gender', 'Trans',\n",
799 | " 'Sexuality', 'Ethnicity', 'Dependents', 'SurveyLength', 'SurveyEase'],\n",
800 | " dtype='object')"
801 | ]
802 | },
803 | "execution_count": 26,
804 | "metadata": {},
805 | "output_type": "execute_result"
806 | }
807 | ],
808 | "source": [
809 | "res_df.columns"
810 | ]
811 | },
812 | {
813 | "cell_type": "markdown",
814 | "metadata": {},
815 | "source": [
816 | "All responses in the Hobbyist column."
817 | ]
818 | },
819 | {
820 | "cell_type": "code",
821 | "execution_count": 27,
822 | "metadata": {},
823 | "outputs": [
824 | {
825 | "data": {
826 | "text/plain": [
827 | "0 Yes\n",
828 | "1 No\n",
829 | "2 Yes\n",
830 | "3 No\n",
831 | "4 Yes\n",
832 | " ... \n",
833 | "88878 Yes\n",
834 | "88879 No\n",
835 | "88880 No\n",
836 | "88881 No\n",
837 | "88882 Yes\n",
838 | "Name: Hobbyist, Length: 88883, dtype: object"
839 | ]
840 | },
841 | "execution_count": 27,
842 | "metadata": {},
843 | "output_type": "execute_result"
844 | }
845 | ],
846 | "source": [
847 | "res_df['Hobbyist']"
848 | ]
849 | },
850 | {
851 | "cell_type": "markdown",
852 | "metadata": {},
853 | "source": [
854 | "Number of each response to question ('Yeses' and 'Nos')."
855 | ]
856 | },
857 | {
858 | "cell_type": "code",
859 | "execution_count": 28,
860 | "metadata": {},
861 | "outputs": [
862 | {
863 | "data": {
864 | "text/plain": [
865 | "Yes 71257\n",
866 | "No 17626\n",
867 | "Name: Hobbyist, dtype: int64"
868 | ]
869 | },
870 | "execution_count": 28,
871 | "metadata": {},
872 | "output_type": "execute_result"
873 | }
874 | ],
875 | "source": [
876 | "res_df['Hobbyist'].value_counts()"
877 | ]
878 | },
879 | {
880 | "cell_type": "markdown",
881 | "metadata": {},
882 | "source": [
883 | "All responses from first row."
884 | ]
885 | },
886 | {
887 | "cell_type": "code",
888 | "execution_count": 29,
889 | "metadata": {},
890 | "outputs": [
891 | {
892 | "data": {
893 | "text/plain": [
894 | "Respondent 1\n",
895 | "MainBranch I am a student who is learning to code\n",
896 | "Hobbyist Yes\n",
897 | "OpenSourcer Never\n",
898 | "OpenSource The quality of OSS and closed source software ...\n",
899 | "Employment Not employed, and not looking for work\n",
900 | "Country United Kingdom\n",
901 | "Student No\n",
902 | "EdLevel Primary/elementary school\n",
903 | "UndergradMajor NaN\n",
904 | "EduOther Taught yourself a new language, framework, or ...\n",
905 | "OrgSize NaN\n",
906 | "DevType NaN\n",
907 | "YearsCode 4\n",
908 | "Age1stCode 10\n",
909 | "YearsCodePro NaN\n",
910 | "CareerSat NaN\n",
911 | "JobSat NaN\n",
912 | "MgrIdiot NaN\n",
913 | "MgrMoney NaN\n",
914 | "MgrWant NaN\n",
915 | "JobSeek NaN\n",
916 | "LastHireDate NaN\n",
917 | "LastInt NaN\n",
918 | "FizzBuzz NaN\n",
919 | "JobFactors NaN\n",
920 | "ResumeUpdate NaN\n",
921 | "CurrencySymbol NaN\n",
922 | "CurrencyDesc NaN\n",
923 | "CompTotal NaN\n",
924 | "CompFreq NaN\n",
925 | "ConvertedComp NaN\n",
926 | "WorkWeekHrs NaN\n",
927 | "WorkPlan NaN\n",
928 | "WorkChallenge NaN\n",
929 | "WorkRemote NaN\n",
930 | "WorkLoc NaN\n",
931 | "ImpSyn NaN\n",
932 | "CodeRev NaN\n",
933 | "CodeRevHrs NaN\n",
934 | "UnitTests NaN\n",
935 | "PurchaseHow NaN\n",
936 | "PurchaseWhat NaN\n",
937 | "LanguageWorkedWith HTML/CSS;Java;JavaScript;Python\n",
938 | "LanguageDesireNextYear C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL\n",
939 | "DatabaseWorkedWith SQLite\n",
940 | "DatabaseDesireNextYear MySQL\n",
941 | "PlatformWorkedWith MacOS;Windows\n",
942 | "PlatformDesireNextYear Android;Arduino;Windows\n",
943 | "WebFrameWorkedWith Django;Flask\n",
944 | "WebFrameDesireNextYear Flask;jQuery\n",
945 | "MiscTechWorkedWith Node.js\n",
946 | "MiscTechDesireNextYear Node.js\n",
947 | "DevEnviron IntelliJ;Notepad++;PyCharm\n",
948 | "OpSys Windows\n",
949 | "Containers I do not use containers\n",
950 | "BlockchainOrg NaN\n",
951 | "BlockchainIs NaN\n",
952 | "BetterLife Yes\n",
953 | "ITperson Fortunately, someone else has that title\n",
954 | "OffOn Yes\n",
955 | "SocialMedia Twitter\n",
956 | "Extraversion Online\n",
957 | "ScreenName Username\n",
958 | "SOVisit1st 2017\n",
959 | "SOVisitFreq A few times per month or weekly\n",
960 | "SOVisitTo Find answers to specific questions;Learn how t...\n",
961 | "SOFindAnswer 3-5 times per week\n",
962 | "SOTimeSaved Stack Overflow was much faster\n",
963 | "SOHowMuchTime 31-60 minutes\n",
964 | "SOAccount No\n",
965 | "SOPartFreq NaN\n",
966 | "SOJobs No, I didn't know that Stack Overflow had a jo...\n",
967 | "EntTeams No, and I don't know what those are\n",
968 | "SOComm Neutral\n",
969 | "WelcomeChange Just as welcome now as I felt last year\n",
970 | "SONewContent Tech articles written by other developers;Indu...\n",
971 | "Age 14\n",
972 | "Gender Man\n",
973 | "Trans No\n",
974 | "Sexuality Straight / Heterosexual\n",
975 | "Ethnicity NaN\n",
976 | "Dependents No\n",
977 | "SurveyLength Appropriate in length\n",
978 | "SurveyEase Neither easy nor difficult\n",
979 | "Name: 0, dtype: object"
980 | ]
981 | },
982 | "execution_count": 29,
983 | "metadata": {},
984 | "output_type": "execute_result"
985 | }
986 | ],
987 | "source": [
988 | "res_df.loc[0]"
989 | ]
990 | },
991 | {
992 | "cell_type": "markdown",
993 | "metadata": {},
994 | "source": [
995 | "Get responses to Hobbyist question in first three rows by passing in a list of rows."
996 | ]
997 | },
998 | {
999 | "cell_type": "code",
1000 | "execution_count": 30,
1001 | "metadata": {},
1002 | "outputs": [
1003 | {
1004 | "data": {
1005 | "text/plain": [
1006 | "0 Yes\n",
1007 | "1 No\n",
1008 | "2 Yes\n",
1009 | "Name: Hobbyist, dtype: object"
1010 | ]
1011 | },
1012 | "execution_count": 30,
1013 | "metadata": {},
1014 | "output_type": "execute_result"
1015 | }
1016 | ],
1017 | "source": [
1018 | "res_df.loc[[0, 1, 2], 'Hobbyist']"
1019 | ]
1020 | },
1021 | {
1022 | "cell_type": "markdown",
1023 | "metadata": {},
1024 | "source": [
1025 | "Can also use slice notation to retrieve a range of rows."
1026 | ]
1027 | },
1028 | {
1029 | "cell_type": "code",
1030 | "execution_count": 34,
1031 | "metadata": {},
1032 | "outputs": [
1033 | {
1034 | "data": {
1035 | "text/html": [
1036 | "\n",
1037 | "\n",
1050 | "
\n",
1051 | " \n",
1052 | " \n",
1053 | " | \n",
1054 | " Respondent | \n",
1055 | " MainBranch | \n",
1056 | " Hobbyist | \n",
1057 | " OpenSourcer | \n",
1058 | " OpenSource | \n",
1059 | " Employment | \n",
1060 | " Country | \n",
1061 | " Student | \n",
1062 | " EdLevel | \n",
1063 | " UndergradMajor | \n",
1064 | " EduOther | \n",
1065 | " OrgSize | \n",
1066 | " DevType | \n",
1067 | " YearsCode | \n",
1068 | " Age1stCode | \n",
1069 | " YearsCodePro | \n",
1070 | " CareerSat | \n",
1071 | " JobSat | \n",
1072 | " MgrIdiot | \n",
1073 | " MgrMoney | \n",
1074 | " MgrWant | \n",
1075 | " JobSeek | \n",
1076 | " LastHireDate | \n",
1077 | " LastInt | \n",
1078 | " FizzBuzz | \n",
1079 | " JobFactors | \n",
1080 | " ResumeUpdate | \n",
1081 | " CurrencySymbol | \n",
1082 | " CurrencyDesc | \n",
1083 | " CompTotal | \n",
1084 | " CompFreq | \n",
1085 | " ConvertedComp | \n",
1086 | " WorkWeekHrs | \n",
1087 | " WorkPlan | \n",
1088 | " WorkChallenge | \n",
1089 | " WorkRemote | \n",
1090 | " WorkLoc | \n",
1091 | " ImpSyn | \n",
1092 | " CodeRev | \n",
1093 | " CodeRevHrs | \n",
1094 | " UnitTests | \n",
1095 | " PurchaseHow | \n",
1096 | " PurchaseWhat | \n",
1097 | " LanguageWorkedWith | \n",
1098 | " LanguageDesireNextYear | \n",
1099 | " DatabaseWorkedWith | \n",
1100 | " DatabaseDesireNextYear | \n",
1101 | " PlatformWorkedWith | \n",
1102 | " PlatformDesireNextYear | \n",
1103 | " WebFrameWorkedWith | \n",
1104 | " WebFrameDesireNextYear | \n",
1105 | " MiscTechWorkedWith | \n",
1106 | " MiscTechDesireNextYear | \n",
1107 | " DevEnviron | \n",
1108 | " OpSys | \n",
1109 | " Containers | \n",
1110 | " BlockchainOrg | \n",
1111 | " BlockchainIs | \n",
1112 | " BetterLife | \n",
1113 | " ITperson | \n",
1114 | " OffOn | \n",
1115 | " SocialMedia | \n",
1116 | " Extraversion | \n",
1117 | " ScreenName | \n",
1118 | " SOVisit1st | \n",
1119 | " SOVisitFreq | \n",
1120 | " SOVisitTo | \n",
1121 | " SOFindAnswer | \n",
1122 | " SOTimeSaved | \n",
1123 | " SOHowMuchTime | \n",
1124 | " SOAccount | \n",
1125 | " SOPartFreq | \n",
1126 | " SOJobs | \n",
1127 | " EntTeams | \n",
1128 | " SOComm | \n",
1129 | " WelcomeChange | \n",
1130 | " SONewContent | \n",
1131 | " Age | \n",
1132 | " Gender | \n",
1133 | " Trans | \n",
1134 | " Sexuality | \n",
1135 | " Ethnicity | \n",
1136 | " Dependents | \n",
1137 | " SurveyLength | \n",
1138 | " SurveyEase | \n",
1139 | "
\n",
1140 | " \n",
1141 | " \n",
1142 | " \n",
1143 | " 0 | \n",
1144 | " 1 | \n",
1145 | " I am a student who is learning to code | \n",
1146 | " Yes | \n",
1147 | " Never | \n",
1148 | " The quality of OSS and closed source software ... | \n",
1149 | " Not employed, and not looking for work | \n",
1150 | " United Kingdom | \n",
1151 | " No | \n",
1152 | " Primary/elementary school | \n",
1153 | " NaN | \n",
1154 | " Taught yourself a new language, framework, or ... | \n",
1155 | " NaN | \n",
1156 | " NaN | \n",
1157 | " 4 | \n",
1158 | " 10 | \n",
1159 | " NaN | \n",
1160 | " NaN | \n",
1161 | " NaN | \n",
1162 | " NaN | \n",
1163 | " NaN | \n",
1164 | " NaN | \n",
1165 | " NaN | \n",
1166 | " NaN | \n",
1167 | " NaN | \n",
1168 | " NaN | \n",
1169 | " NaN | \n",
1170 | " NaN | \n",
1171 | " NaN | \n",
1172 | " NaN | \n",
1173 | " NaN | \n",
1174 | " NaN | \n",
1175 | " NaN | \n",
1176 | " NaN | \n",
1177 | " NaN | \n",
1178 | " NaN | \n",
1179 | " NaN | \n",
1180 | " NaN | \n",
1181 | " NaN | \n",
1182 | " NaN | \n",
1183 | " NaN | \n",
1184 | " NaN | \n",
1185 | " NaN | \n",
1186 | " NaN | \n",
1187 | " HTML/CSS;Java;JavaScript;Python | \n",
1188 | " C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL | \n",
1189 | " SQLite | \n",
1190 | " MySQL | \n",
1191 | " MacOS;Windows | \n",
1192 | " Android;Arduino;Windows | \n",
1193 | " Django;Flask | \n",
1194 | " Flask;jQuery | \n",
1195 | " Node.js | \n",
1196 | " Node.js | \n",
1197 | " IntelliJ;Notepad++;PyCharm | \n",
1198 | " Windows | \n",
1199 | " I do not use containers | \n",
1200 | " NaN | \n",
1201 | " NaN | \n",
1202 | " Yes | \n",
1203 | " Fortunately, someone else has that title | \n",
1204 | " Yes | \n",
1205 | " Twitter | \n",
1206 | " Online | \n",
1207 | " Username | \n",
1208 | " 2017 | \n",
1209 | " A few times per month or weekly | \n",
1210 | " Find answers to specific questions;Learn how t... | \n",
1211 | " 3-5 times per week | \n",
1212 | " Stack Overflow was much faster | \n",
1213 | " 31-60 minutes | \n",
1214 | " No | \n",
1215 | " NaN | \n",
1216 | " No, I didn't know that Stack Overflow had a jo... | \n",
1217 | " No, and I don't know what those are | \n",
1218 | " Neutral | \n",
1219 | " Just as welcome now as I felt last year | \n",
1220 | " Tech articles written by other developers;Indu... | \n",
1221 | " 14.0 | \n",
1222 | " Man | \n",
1223 | " No | \n",
1224 | " Straight / Heterosexual | \n",
1225 | " NaN | \n",
1226 | " No | \n",
1227 | " Appropriate in length | \n",
1228 | " Neither easy nor difficult | \n",
1229 | "
\n",
1230 | " \n",
1231 | " 1 | \n",
1232 | " 2 | \n",
1233 | " I am a student who is learning to code | \n",
1234 | " No | \n",
1235 | " Less than once per year | \n",
1236 | " The quality of OSS and closed source software ... | \n",
1237 | " Not employed, but looking for work | \n",
1238 | " Bosnia and Herzegovina | \n",
1239 | " Yes, full-time | \n",
1240 | " Secondary school (e.g. American high school, G... | \n",
1241 | " NaN | \n",
1242 | " Taken an online course in programming or softw... | \n",
1243 | " NaN | \n",
1244 | " Developer, desktop or enterprise applications;... | \n",
1245 | " NaN | \n",
1246 | " 17 | \n",
1247 | " NaN | \n",
1248 | " NaN | \n",
1249 | " NaN | \n",
1250 | " NaN | \n",
1251 | " NaN | \n",
1252 | " NaN | \n",
1253 | " I am actively looking for a job | \n",
1254 | " I've never had a job | \n",
1255 | " NaN | \n",
1256 | " NaN | \n",
1257 | " Financial performance or funding status of the... | \n",
1258 | " Something else changed (education, award, medi... | \n",
1259 | " NaN | \n",
1260 | " NaN | \n",
1261 | " NaN | \n",
1262 | " NaN | \n",
1263 | " NaN | \n",
1264 | " NaN | \n",
1265 | " NaN | \n",
1266 | " NaN | \n",
1267 | " NaN | \n",
1268 | " NaN | \n",
1269 | " NaN | \n",
1270 | " NaN | \n",
1271 | " NaN | \n",
1272 | " NaN | \n",
1273 | " NaN | \n",
1274 | " NaN | \n",
1275 | " C++;HTML/CSS;Python | \n",
1276 | " C++;HTML/CSS;JavaScript;SQL | \n",
1277 | " NaN | \n",
1278 | " MySQL | \n",
1279 | " Windows | \n",
1280 | " Windows | \n",
1281 | " Django | \n",
1282 | " Django | \n",
1283 | " NaN | \n",
1284 | " NaN | \n",
1285 | " Atom;PyCharm | \n",
1286 | " Windows | \n",
1287 | " I do not use containers | \n",
1288 | " NaN | \n",
1289 | " Useful across many domains and could change ma... | \n",
1290 | " Yes | \n",
1291 | " Yes | \n",
1292 | " Yes | \n",
1293 | " Instagram | \n",
1294 | " Online | \n",
1295 | " Username | \n",
1296 | " 2017 | \n",
1297 | " Daily or almost daily | \n",
1298 | " Find answers to specific questions;Learn how t... | \n",
1299 | " 3-5 times per week | \n",
1300 | " Stack Overflow was much faster | \n",
1301 | " 11-30 minutes | \n",
1302 | " Yes | \n",
1303 | " A few times per month or weekly | \n",
1304 | " No, I knew that Stack Overflow had a job board... | \n",
1305 | " No, and I don't know what those are | \n",
1306 | " Yes, somewhat | \n",
1307 | " Just as welcome now as I felt last year | \n",
1308 | " Tech articles written by other developers;Indu... | \n",
1309 | " 19.0 | \n",
1310 | " Man | \n",
1311 | " No | \n",
1312 | " Straight / Heterosexual | \n",
1313 | " NaN | \n",
1314 | " No | \n",
1315 | " Appropriate in length | \n",
1316 | " Neither easy nor difficult | \n",
1317 | "
\n",
1318 | " \n",
1319 | " 2 | \n",
1320 | " 3 | \n",
1321 | " I am not primarily a developer, but I write co... | \n",
1322 | " Yes | \n",
1323 | " Never | \n",
1324 | " The quality of OSS and closed source software ... | \n",
1325 | " Employed full-time | \n",
1326 | " Thailand | \n",
1327 | " No | \n",
1328 | " Bachelor’s degree (BA, BS, B.Eng., etc.) | \n",
1329 | " Web development or web design | \n",
1330 | " Taught yourself a new language, framework, or ... | \n",
1331 | " 100 to 499 employees | \n",
1332 | " Designer;Developer, back-end;Developer, front-... | \n",
1333 | " 3 | \n",
1334 | " 22 | \n",
1335 | " 1 | \n",
1336 | " Slightly satisfied | \n",
1337 | " Slightly satisfied | \n",
1338 | " Not at all confident | \n",
1339 | " Not sure | \n",
1340 | " Not sure | \n",
1341 | " I’m not actively looking, but I am open to new... | \n",
1342 | " 1-2 years ago | \n",
1343 | " Interview with people in peer roles | \n",
1344 | " No | \n",
1345 | " Languages, frameworks, and other technologies ... | \n",
1346 | " I was preparing for a job search | \n",
1347 | " THB | \n",
1348 | " Thai baht | \n",
1349 | " 23000.0 | \n",
1350 | " Monthly | \n",
1351 | " 8820.0 | \n",
1352 | " 40.0 | \n",
1353 | " There's no schedule or spec; I work on what se... | \n",
1354 | " Distracting work environment;Inadequate access... | \n",
1355 | " Less than once per month / Never | \n",
1356 | " Home | \n",
1357 | " Average | \n",
1358 | " No | \n",
1359 | " NaN | \n",
1360 | " No, but I think we should | \n",
1361 | " Not sure | \n",
1362 | " I have little or no influence | \n",
1363 | " HTML/CSS | \n",
1364 | " Elixir;HTML/CSS | \n",
1365 | " PostgreSQL | \n",
1366 | " PostgreSQL | \n",
1367 | " NaN | \n",
1368 | " NaN | \n",
1369 | " NaN | \n",
1370 | " Other(s): | \n",
1371 | " NaN | \n",
1372 | " NaN | \n",
1373 | " Vim;Visual Studio Code | \n",
1374 | " Linux-based | \n",
1375 | " I do not use containers | \n",
1376 | " NaN | \n",
1377 | " NaN | \n",
1378 | " Yes | \n",
1379 | " Yes | \n",
1380 | " Yes | \n",
1381 | " Reddit | \n",
1382 | " In real life (in person) | \n",
1383 | " Username | \n",
1384 | " 2011 | \n",
1385 | " A few times per week | \n",
1386 | " Find answers to specific questions;Learn how t... | \n",
1387 | " 6-10 times per week | \n",
1388 | " They were about the same | \n",
1389 | " NaN | \n",
1390 | " Yes | \n",
1391 | " Less than once per month or monthly | \n",
1392 | " Yes | \n",
1393 | " No, I've heard of them, but I am not part of a... | \n",
1394 | " Neutral | \n",
1395 | " Just as welcome now as I felt last year | \n",
1396 | " Tech meetups or events in your area;Courses on... | \n",
1397 | " 28.0 | \n",
1398 | " Man | \n",
1399 | " No | \n",
1400 | " Straight / Heterosexual | \n",
1401 | " NaN | \n",
1402 | " Yes | \n",
1403 | " Appropriate in length | \n",
1404 | " Neither easy nor difficult | \n",
1405 | "
\n",
1406 | " \n",
1407 | "
\n",
1408 | "
"
1409 | ],
1410 | "text/plain": [
1411 | " Respondent MainBranch Hobbyist \\\n",
1412 | "0 1 I am a student who is learning to code Yes \n",
1413 | "1 2 I am a student who is learning to code No \n",
1414 | "2 3 I am not primarily a developer, but I write co... Yes \n",
1415 | "\n",
1416 | " OpenSourcer OpenSource \\\n",
1417 | "0 Never The quality of OSS and closed source software ... \n",
1418 | "1 Less than once per year The quality of OSS and closed source software ... \n",
1419 | "2 Never The quality of OSS and closed source software ... \n",
1420 | "\n",
1421 | " Employment Country \\\n",
1422 | "0 Not employed, and not looking for work United Kingdom \n",
1423 | "1 Not employed, but looking for work Bosnia and Herzegovina \n",
1424 | "2 Employed full-time Thailand \n",
1425 | "\n",
1426 | " Student EdLevel \\\n",
1427 | "0 No Primary/elementary school \n",
1428 | "1 Yes, full-time Secondary school (e.g. American high school, G... \n",
1429 | "2 No Bachelor’s degree (BA, BS, B.Eng., etc.) \n",
1430 | "\n",
1431 | " UndergradMajor \\\n",
1432 | "0 NaN \n",
1433 | "1 NaN \n",
1434 | "2 Web development or web design \n",
1435 | "\n",
1436 | " EduOther OrgSize \\\n",
1437 | "0 Taught yourself a new language, framework, or ... NaN \n",
1438 | "1 Taken an online course in programming or softw... NaN \n",
1439 | "2 Taught yourself a new language, framework, or ... 100 to 499 employees \n",
1440 | "\n",
1441 | " DevType YearsCode Age1stCode \\\n",
1442 | "0 NaN 4 10 \n",
1443 | "1 Developer, desktop or enterprise applications;... NaN 17 \n",
1444 | "2 Designer;Developer, back-end;Developer, front-... 3 22 \n",
1445 | "\n",
1446 | " YearsCodePro CareerSat JobSat MgrIdiot \\\n",
1447 | "0 NaN NaN NaN NaN \n",
1448 | "1 NaN NaN NaN NaN \n",
1449 | "2 1 Slightly satisfied Slightly satisfied Not at all confident \n",
1450 | "\n",
1451 | " MgrMoney MgrWant JobSeek \\\n",
1452 | "0 NaN NaN NaN \n",
1453 | "1 NaN NaN I am actively looking for a job \n",
1454 | "2 Not sure Not sure I’m not actively looking, but I am open to new... \n",
1455 | "\n",
1456 | " LastHireDate LastInt FizzBuzz \\\n",
1457 | "0 NaN NaN NaN \n",
1458 | "1 I've never had a job NaN NaN \n",
1459 | "2 1-2 years ago Interview with people in peer roles No \n",
1460 | "\n",
1461 | " JobFactors \\\n",
1462 | "0 NaN \n",
1463 | "1 Financial performance or funding status of the... \n",
1464 | "2 Languages, frameworks, and other technologies ... \n",
1465 | "\n",
1466 | " ResumeUpdate CurrencySymbol \\\n",
1467 | "0 NaN NaN \n",
1468 | "1 Something else changed (education, award, medi... NaN \n",
1469 | "2 I was preparing for a job search THB \n",
1470 | "\n",
1471 | " CurrencyDesc CompTotal CompFreq ConvertedComp WorkWeekHrs \\\n",
1472 | "0 NaN NaN NaN NaN NaN \n",
1473 | "1 NaN NaN NaN NaN NaN \n",
1474 | "2 Thai baht 23000.0 Monthly 8820.0 40.0 \n",
1475 | "\n",
1476 | " WorkPlan \\\n",
1477 | "0 NaN \n",
1478 | "1 NaN \n",
1479 | "2 There's no schedule or spec; I work on what se... \n",
1480 | "\n",
1481 | " WorkChallenge \\\n",
1482 | "0 NaN \n",
1483 | "1 NaN \n",
1484 | "2 Distracting work environment;Inadequate access... \n",
1485 | "\n",
1486 | " WorkRemote WorkLoc ImpSyn CodeRev CodeRevHrs \\\n",
1487 | "0 NaN NaN NaN NaN NaN \n",
1488 | "1 NaN NaN NaN NaN NaN \n",
1489 | "2 Less than once per month / Never Home Average No NaN \n",
1490 | "\n",
1491 | " UnitTests PurchaseHow PurchaseWhat \\\n",
1492 | "0 NaN NaN NaN \n",
1493 | "1 NaN NaN NaN \n",
1494 | "2 No, but I think we should Not sure I have little or no influence \n",
1495 | "\n",
1496 | " LanguageWorkedWith \\\n",
1497 | "0 HTML/CSS;Java;JavaScript;Python \n",
1498 | "1 C++;HTML/CSS;Python \n",
1499 | "2 HTML/CSS \n",
1500 | "\n",
1501 | " LanguageDesireNextYear DatabaseWorkedWith \\\n",
1502 | "0 C;C++;C#;Go;HTML/CSS;Java;JavaScript;Python;SQL SQLite \n",
1503 | "1 C++;HTML/CSS;JavaScript;SQL NaN \n",
1504 | "2 Elixir;HTML/CSS PostgreSQL \n",
1505 | "\n",
1506 | " DatabaseDesireNextYear PlatformWorkedWith PlatformDesireNextYear \\\n",
1507 | "0 MySQL MacOS;Windows Android;Arduino;Windows \n",
1508 | "1 MySQL Windows Windows \n",
1509 | "2 PostgreSQL NaN NaN \n",
1510 | "\n",
1511 | " WebFrameWorkedWith WebFrameDesireNextYear MiscTechWorkedWith \\\n",
1512 | "0 Django;Flask Flask;jQuery Node.js \n",
1513 | "1 Django Django NaN \n",
1514 | "2 NaN Other(s): NaN \n",
1515 | "\n",
1516 | " MiscTechDesireNextYear DevEnviron OpSys \\\n",
1517 | "0 Node.js IntelliJ;Notepad++;PyCharm Windows \n",
1518 | "1 NaN Atom;PyCharm Windows \n",
1519 | "2 NaN Vim;Visual Studio Code Linux-based \n",
1520 | "\n",
1521 | " Containers BlockchainOrg \\\n",
1522 | "0 I do not use containers NaN \n",
1523 | "1 I do not use containers NaN \n",
1524 | "2 I do not use containers NaN \n",
1525 | "\n",
1526 | " BlockchainIs BetterLife \\\n",
1527 | "0 NaN Yes \n",
1528 | "1 Useful across many domains and could change ma... Yes \n",
1529 | "2 NaN Yes \n",
1530 | "\n",
1531 | " ITperson OffOn SocialMedia \\\n",
1532 | "0 Fortunately, someone else has that title Yes Twitter \n",
1533 | "1 Yes Yes Instagram \n",
1534 | "2 Yes Yes Reddit \n",
1535 | "\n",
1536 | " Extraversion ScreenName SOVisit1st \\\n",
1537 | "0 Online Username 2017 \n",
1538 | "1 Online Username 2017 \n",
1539 | "2 In real life (in person) Username 2011 \n",
1540 | "\n",
1541 | " SOVisitFreq \\\n",
1542 | "0 A few times per month or weekly \n",
1543 | "1 Daily or almost daily \n",
1544 | "2 A few times per week \n",
1545 | "\n",
1546 | " SOVisitTo SOFindAnswer \\\n",
1547 | "0 Find answers to specific questions;Learn how t... 3-5 times per week \n",
1548 | "1 Find answers to specific questions;Learn how t... 3-5 times per week \n",
1549 | "2 Find answers to specific questions;Learn how t... 6-10 times per week \n",
1550 | "\n",
1551 | " SOTimeSaved SOHowMuchTime SOAccount \\\n",
1552 | "0 Stack Overflow was much faster 31-60 minutes No \n",
1553 | "1 Stack Overflow was much faster 11-30 minutes Yes \n",
1554 | "2 They were about the same NaN Yes \n",
1555 | "\n",
1556 | " SOPartFreq \\\n",
1557 | "0 NaN \n",
1558 | "1 A few times per month or weekly \n",
1559 | "2 Less than once per month or monthly \n",
1560 | "\n",
1561 | " SOJobs \\\n",
1562 | "0 No, I didn't know that Stack Overflow had a jo... \n",
1563 | "1 No, I knew that Stack Overflow had a job board... \n",
1564 | "2 Yes \n",
1565 | "\n",
1566 | " EntTeams SOComm \\\n",
1567 | "0 No, and I don't know what those are Neutral \n",
1568 | "1 No, and I don't know what those are Yes, somewhat \n",
1569 | "2 No, I've heard of them, but I am not part of a... Neutral \n",
1570 | "\n",
1571 | " WelcomeChange \\\n",
1572 | "0 Just as welcome now as I felt last year \n",
1573 | "1 Just as welcome now as I felt last year \n",
1574 | "2 Just as welcome now as I felt last year \n",
1575 | "\n",
1576 | " SONewContent Age Gender Trans \\\n",
1577 | "0 Tech articles written by other developers;Indu... 14.0 Man No \n",
1578 | "1 Tech articles written by other developers;Indu... 19.0 Man No \n",
1579 | "2 Tech meetups or events in your area;Courses on... 28.0 Man No \n",
1580 | "\n",
1581 | " Sexuality Ethnicity Dependents SurveyLength \\\n",
1582 | "0 Straight / Heterosexual NaN No Appropriate in length \n",
1583 | "1 Straight / Heterosexual NaN No Appropriate in length \n",
1584 | "2 Straight / Heterosexual NaN Yes Appropriate in length \n",
1585 | "\n",
1586 | " SurveyEase \n",
1587 | "0 Neither easy nor difficult \n",
1588 | "1 Neither easy nor difficult \n",
1589 | "2 Neither easy nor difficult "
1590 | ]
1591 | },
1592 | "execution_count": 34,
1593 | "metadata": {},
1594 | "output_type": "execute_result"
1595 | }
1596 | ],
1597 | "source": [
1598 | "res_df.loc[0:2]"
1599 | ]
1600 | },
1601 | {
1602 | "cell_type": "markdown",
1603 | "metadata": {},
1604 | "source": [
1605 | "How the first row responded to the Hobbyist question."
1606 | ]
1607 | },
1608 | {
1609 | "cell_type": "code",
1610 | "execution_count": 38,
1611 | "metadata": {},
1612 | "outputs": [
1613 | {
1614 | "data": {
1615 | "text/plain": [
1616 | "'Yes'"
1617 | ]
1618 | },
1619 | "execution_count": 38,
1620 | "metadata": {},
1621 | "output_type": "execute_result"
1622 | }
1623 | ],
1624 | "source": [
1625 | "res_df.loc[0, 'Hobbyist']"
1626 | ]
1627 | },
1628 | {
1629 | "cell_type": "markdown",
1630 | "metadata": {},
1631 | "source": [
1632 | "Note we can drop the brackets when selecting rows _and_ a column together."
1633 | ]
1634 | },
1635 | {
1636 | "cell_type": "markdown",
1637 | "metadata": {},
1638 | "source": [
1639 | "Get the responses of the first three rows to the Hobbyist question by passing in a slice of rows and the column label."
1640 | ]
1641 | },
1642 | {
1643 | "cell_type": "code",
1644 | "execution_count": 37,
1645 | "metadata": {},
1646 | "outputs": [
1647 | {
1648 | "data": {
1649 | "text/plain": [
1650 | "0 Yes\n",
1651 | "1 No\n",
1652 | "2 Yes\n",
1653 | "Name: Hobbyist, dtype: object"
1654 | ]
1655 | },
1656 | "execution_count": 37,
1657 | "metadata": {},
1658 | "output_type": "execute_result"
1659 | }
1660 | ],
1661 | "source": [
1662 | "res_df.loc[0:2, 'Hobbyist']"
1663 | ]
1664 | },
1665 | {
1666 | "cell_type": "markdown",
1667 | "metadata": {},
1668 | "source": [
1669 | "When selecting a slice of rows and columns, drop the brackets to avoid a syntax error."
1670 | ]
1671 | },
1672 | {
1673 | "cell_type": "markdown",
1674 | "metadata": {},
1675 | "source": [
1676 | "Retrieve a slice of rows together with a slice of columns."
1677 | ]
1678 | },
1679 | {
1680 | "cell_type": "code",
1681 | "execution_count": 39,
1682 | "metadata": {},
1683 | "outputs": [
1684 | {
1685 | "data": {
1686 | "text/html": [
1687 | "\n",
1688 | "\n",
1701 | "
\n",
1702 | " \n",
1703 | " \n",
1704 | " | \n",
1705 | " Hobbyist | \n",
1706 | " OpenSourcer | \n",
1707 | " OpenSource | \n",
1708 | " Employment | \n",
1709 | "
\n",
1710 | " \n",
1711 | " \n",
1712 | " \n",
1713 | " 0 | \n",
1714 | " Yes | \n",
1715 | " Never | \n",
1716 | " The quality of OSS and closed source software ... | \n",
1717 | " Not employed, and not looking for work | \n",
1718 | "
\n",
1719 | " \n",
1720 | " 1 | \n",
1721 | " No | \n",
1722 | " Less than once per year | \n",
1723 | " The quality of OSS and closed source software ... | \n",
1724 | " Not employed, but looking for work | \n",
1725 | "
\n",
1726 | " \n",
1727 | " 2 | \n",
1728 | " Yes | \n",
1729 | " Never | \n",
1730 | " The quality of OSS and closed source software ... | \n",
1731 | " Employed full-time | \n",
1732 | "
\n",
1733 | " \n",
1734 | "
\n",
1735 | "
"
1736 | ],
1737 | "text/plain": [
1738 | " Hobbyist OpenSourcer \\\n",
1739 | "0 Yes Never \n",
1740 | "1 No Less than once per year \n",
1741 | "2 Yes Never \n",
1742 | "\n",
1743 | " OpenSource \\\n",
1744 | "0 The quality of OSS and closed source software ... \n",
1745 | "1 The quality of OSS and closed source software ... \n",
1746 | "2 The quality of OSS and closed source software ... \n",
1747 | "\n",
1748 | " Employment \n",
1749 | "0 Not employed, and not looking for work \n",
1750 | "1 Not employed, but looking for work \n",
1751 | "2 Employed full-time "
1752 | ]
1753 | },
1754 | "execution_count": 39,
1755 | "metadata": {},
1756 | "output_type": "execute_result"
1757 | }
1758 | ],
1759 | "source": [
1760 | "res_df.loc[0:2, 'Hobbyist':'Employment']"
1761 | ]
1762 | },
1763 | {
1764 | "cell_type": "markdown",
1765 | "metadata": {},
1766 | "source": [
1767 | "Note slicing is inclusive to avoid driving users insane."
1768 | ]
1769 | },
1770 | {
1771 | "cell_type": "code",
1772 | "execution_count": null,
1773 | "metadata": {},
1774 | "outputs": [],
1775 | "source": []
1776 | }
1777 | ],
1778 | "metadata": {
1779 | "kernelspec": {
1780 | "display_name": "Python 3",
1781 | "language": "python",
1782 | "name": "python3"
1783 | },
1784 | "language_info": {
1785 | "codemirror_mode": {
1786 | "name": "ipython",
1787 | "version": 3
1788 | },
1789 | "file_extension": ".py",
1790 | "mimetype": "text/x-python",
1791 | "name": "python",
1792 | "nbconvert_exporter": "python",
1793 | "pygments_lexer": "ipython3",
1794 | "version": "3.6.9"
1795 | }
1796 | },
1797 | "nbformat": 4,
1798 | "nbformat_minor": 2
1799 | }
1800 |
--------------------------------------------------------------------------------
/03-Indexes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Indexes: How to Set, Reset and Use Indexes"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 1,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "import pandas as pd"
17 | ]
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 2,
22 | "metadata": {},
23 | "outputs": [],
24 | "source": [
25 | "people = {\n",
26 | " \"first\": [\"Phil\", \"Jane\", \"Rob\"],\n",
27 | " \"last\": [\"Lembo\", \"Doe\", \"Roe\"],\n",
28 | " \"email\": [\"phil.lembo@gmail.com\", \"janedoe@email.com\", \"robroe@email.com\"]\n",
29 | "}"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 3,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "df = pd.DataFrame(people)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 4,
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/html": [
49 | "\n",
50 | "\n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " | \n",
67 | " first | \n",
68 | " last | \n",
69 | " email | \n",
70 | "
\n",
71 | " \n",
72 | " \n",
73 | " \n",
74 | " 0 | \n",
75 | " Phil | \n",
76 | " Lembo | \n",
77 | " phil.lembo@gmail.com | \n",
78 | "
\n",
79 | " \n",
80 | " 1 | \n",
81 | " Jane | \n",
82 | " Doe | \n",
83 | " janedoe@email.com | \n",
84 | "
\n",
85 | " \n",
86 | " 2 | \n",
87 | " Rob | \n",
88 | " Roe | \n",
89 | " robroe@email.com | \n",
90 | "
\n",
91 | " \n",
92 | "
\n",
93 | "
"
94 | ],
95 | "text/plain": [
96 | " first last email\n",
97 | "0 Phil Lembo phil.lembo@gmail.com\n",
98 | "1 Jane Doe janedoe@email.com\n",
99 | "2 Rob Roe robroe@email.com"
100 | ]
101 | },
102 | "execution_count": 4,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "df"
109 | ]
110 | },
111 | {
112 | "cell_type": "code",
113 | "execution_count": 5,
114 | "metadata": {},
115 | "outputs": [
116 | {
117 | "data": {
118 | "text/plain": [
119 | "0 phil.lembo@gmail.com\n",
120 | "1 janedoe@email.com\n",
121 | "2 robroe@email.com\n",
122 | "Name: email, dtype: object"
123 | ]
124 | },
125 | "execution_count": 5,
126 | "metadata": {},
127 | "output_type": "execute_result"
128 | }
129 | ],
130 | "source": [
131 | "df['email']"
132 | ]
133 | },
134 | {
135 | "cell_type": "code",
136 | "execution_count": 6,
137 | "metadata": {},
138 | "outputs": [
139 | {
140 | "data": {
141 | "text/html": [
142 | "\n",
143 | "\n",
156 | "
\n",
157 | " \n",
158 | " \n",
159 | " | \n",
160 | " first | \n",
161 | " last | \n",
162 | "
\n",
163 | " \n",
164 | " email | \n",
165 | " | \n",
166 | " | \n",
167 | "
\n",
168 | " \n",
169 | " \n",
170 | " \n",
171 | " phil.lembo@gmail.com | \n",
172 | " Phil | \n",
173 | " Lembo | \n",
174 | "
\n",
175 | " \n",
176 | " janedoe@email.com | \n",
177 | " Jane | \n",
178 | " Doe | \n",
179 | "
\n",
180 | " \n",
181 | " robroe@email.com | \n",
182 | " Rob | \n",
183 | " Roe | \n",
184 | "
\n",
185 | " \n",
186 | "
\n",
187 | "
"
188 | ],
189 | "text/plain": [
190 | " first last\n",
191 | "email \n",
192 | "phil.lembo@gmail.com Phil Lembo\n",
193 | "janedoe@email.com Jane Doe\n",
194 | "robroe@email.com Rob Roe"
195 | ]
196 | },
197 | "execution_count": 6,
198 | "metadata": {},
199 | "output_type": "execute_result"
200 | }
201 | ],
202 | "source": [
203 | "df.set_index('email')"
204 | ]
205 | },
206 | {
207 | "cell_type": "markdown",
208 | "metadata": {},
209 | "source": [
210 | "By default, pandas won't change the original DataFrame."
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": 7,
216 | "metadata": {},
217 | "outputs": [
218 | {
219 | "data": {
220 | "text/html": [
221 | "\n",
222 | "\n",
235 | "
\n",
236 | " \n",
237 | " \n",
238 | " | \n",
239 | " first | \n",
240 | " last | \n",
241 | " email | \n",
242 | "
\n",
243 | " \n",
244 | " \n",
245 | " \n",
246 | " 0 | \n",
247 | " Phil | \n",
248 | " Lembo | \n",
249 | " phil.lembo@gmail.com | \n",
250 | "
\n",
251 | " \n",
252 | " 1 | \n",
253 | " Jane | \n",
254 | " Doe | \n",
255 | " janedoe@email.com | \n",
256 | "
\n",
257 | " \n",
258 | " 2 | \n",
259 | " Rob | \n",
260 | " Roe | \n",
261 | " robroe@email.com | \n",
262 | "
\n",
263 | " \n",
264 | "
\n",
265 | "
"
266 | ],
267 | "text/plain": [
268 | " first last email\n",
269 | "0 Phil Lembo phil.lembo@gmail.com\n",
270 | "1 Jane Doe janedoe@email.com\n",
271 | "2 Rob Roe robroe@email.com"
272 | ]
273 | },
274 | "execution_count": 7,
275 | "metadata": {},
276 | "output_type": "execute_result"
277 | }
278 | ],
279 | "source": [
280 | "df"
281 | ]
282 | },
283 | {
284 | "cell_type": "markdown",
285 | "metadata": {},
286 | "source": [
287 | "To change the index in place, you need to use the \"inplace=True\" flag."
288 | ]
289 | },
290 | {
291 | "cell_type": "code",
292 | "execution_count": 8,
293 | "metadata": {},
294 | "outputs": [],
295 | "source": [
296 | "df.set_index('email', inplace=True)"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": 9,
302 | "metadata": {},
303 | "outputs": [
304 | {
305 | "data": {
306 | "text/html": [
307 | "\n",
308 | "\n",
321 | "
\n",
322 | " \n",
323 | " \n",
324 | " | \n",
325 | " first | \n",
326 | " last | \n",
327 | "
\n",
328 | " \n",
329 | " email | \n",
330 | " | \n",
331 | " | \n",
332 | "
\n",
333 | " \n",
334 | " \n",
335 | " \n",
336 | " phil.lembo@gmail.com | \n",
337 | " Phil | \n",
338 | " Lembo | \n",
339 | "
\n",
340 | " \n",
341 | " janedoe@email.com | \n",
342 | " Jane | \n",
343 | " Doe | \n",
344 | "
\n",
345 | " \n",
346 | " robroe@email.com | \n",
347 | " Rob | \n",
348 | " Roe | \n",
349 | "
\n",
350 | " \n",
351 | "
\n",
352 | "
"
353 | ],
354 | "text/plain": [
355 | " first last\n",
356 | "email \n",
357 | "phil.lembo@gmail.com Phil Lembo\n",
358 | "janedoe@email.com Jane Doe\n",
359 | "robroe@email.com Rob Roe"
360 | ]
361 | },
362 | "execution_count": 9,
363 | "metadata": {},
364 | "output_type": "execute_result"
365 | }
366 | ],
367 | "source": [
368 | "df"
369 | ]
370 | },
371 | {
372 | "cell_type": "code",
373 | "execution_count": 10,
374 | "metadata": {},
375 | "outputs": [
376 | {
377 | "data": {
378 | "text/plain": [
379 | "Index(['phil.lembo@gmail.com', 'janedoe@email.com', 'robroe@email.com'], dtype='object', name='email')"
380 | ]
381 | },
382 | "execution_count": 10,
383 | "metadata": {},
384 | "output_type": "execute_result"
385 | }
386 | ],
387 | "source": [
388 | "df.index"
389 | ]
390 | },
391 | {
392 | "cell_type": "code",
393 | "execution_count": 11,
394 | "metadata": {},
395 | "outputs": [
396 | {
397 | "data": {
398 | "text/plain": [
399 | "first Phil\n",
400 | "last Lembo\n",
401 | "Name: phil.lembo@gmail.com, dtype: object"
402 | ]
403 | },
404 | "execution_count": 11,
405 | "metadata": {},
406 | "output_type": "execute_result"
407 | }
408 | ],
409 | "source": [
410 | "df.loc['phil.lembo@gmail.com']"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 12,
416 | "metadata": {},
417 | "outputs": [
418 | {
419 | "data": {
420 | "text/plain": [
421 | "'Lembo'"
422 | ]
423 | },
424 | "execution_count": 12,
425 | "metadata": {},
426 | "output_type": "execute_result"
427 | }
428 | ],
429 | "source": [
430 | "df.loc['phil.lembo@gmail.com', 'last']"
431 | ]
432 | },
433 | {
434 | "cell_type": "markdown",
435 | "metadata": {},
436 | "source": [
437 | "Note, we no longer have those integers as our index."
438 | ]
439 | },
440 | {
441 | "cell_type": "code",
442 | "execution_count": 13,
443 | "metadata": {},
444 | "outputs": [
445 | {
446 | "ename": "TypeError",
447 | "evalue": "cannot do label indexing on with these indexers [0] of ",
448 | "output_type": "error",
449 | "traceback": [
450 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
451 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
452 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mloc\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
453 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, key)\u001b[0m\n\u001b[1;32m 1422\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1423\u001b[0m \u001b[0mmaybe_callable\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mapply_if_callable\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1424\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_getitem_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmaybe_callable\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1426\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
454 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_getitem_axis\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1847\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1848\u001b[0m \u001b[0;31m# fall thru to straight lookup\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1849\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_key\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1850\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_label\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1851\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
455 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_validate_key\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 1723\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1724\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mis_list_like_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1725\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_scalar_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1726\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1727\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_is_scalar_access\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mTuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
456 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexing.py\u001b[0m in \u001b[0;36m_convert_scalar_indexer\u001b[0;34m(self, key, axis)\u001b[0m\n\u001b[1;32m 272\u001b[0m \u001b[0max\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mobj\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_get_axis\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0maxis\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mndim\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[0;31m# a scalar\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 274\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0max\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_convert_scalar_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 275\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 276\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_convert_slice_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
457 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_convert_scalar_indexer\u001b[0;34m(self, key, kind)\u001b[0m\n\u001b[1;32m 3136\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mkind\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m\"loc\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mis_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3137\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mholds_integer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 3138\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_invalid_indexer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"label\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3139\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3140\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
458 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/indexes/base.py\u001b[0m in \u001b[0;36m_invalid_indexer\u001b[0;34m(self, form, key)\u001b[0m\n\u001b[1;32m 3338\u001b[0m \u001b[0;34m\"cannot do {form} indexing on {klass} with these \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3339\u001b[0m \"indexers [{key}] of {kind}\".format(\n\u001b[0;32m-> 3340\u001b[0;31m \u001b[0mform\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mform\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mklass\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkey\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkind\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 3341\u001b[0m )\n\u001b[1;32m 3342\u001b[0m )\n",
459 | "\u001b[0;31mTypeError\u001b[0m: cannot do label indexing on with these indexers [0] of "
460 | ]
461 | }
462 | ],
463 | "source": [
464 | "df.loc[0]"
465 | ]
466 | },
467 | {
468 | "cell_type": "markdown",
469 | "metadata": {},
470 | "source": [
471 | "Instead, we now need to employ iloc to use integers."
472 | ]
473 | },
474 | {
475 | "cell_type": "code",
476 | "execution_count": null,
477 | "metadata": {},
478 | "outputs": [],
479 | "source": [
480 | "df.iloc[0]"
481 | ]
482 | },
483 | {
484 | "cell_type": "markdown",
485 | "metadata": {},
486 | "source": [
487 | "To reset, use the reset_index method."
488 | ]
489 | },
490 | {
491 | "cell_type": "code",
492 | "execution_count": null,
493 | "metadata": {},
494 | "outputs": [],
495 | "source": [
496 | "df.reset_index(inplace=True)"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": null,
502 | "metadata": {},
503 | "outputs": [],
504 | "source": [
505 | "df"
506 | ]
507 | },
508 | {
509 | "cell_type": "markdown",
510 | "metadata": {},
511 | "source": [
512 | "Now turn to survey data."
513 | ]
514 | },
515 | {
516 | "cell_type": "code",
517 | "execution_count": null,
518 | "metadata": {},
519 | "outputs": [],
520 | "source": [
521 | "res_df = pd.read_csv('data/survey_results_public.csv')\n",
522 | "schema_df = pd.read_csv('data/survey_results_schema.csv')"
523 | ]
524 | },
525 | {
526 | "cell_type": "code",
527 | "execution_count": null,
528 | "metadata": {},
529 | "outputs": [],
530 | "source": [
531 | "pd.set_option('display.max_columns', 85)\n",
532 | "pd.set_option('display.max_rows', 85)"
533 | ]
534 | },
535 | {
536 | "cell_type": "code",
537 | "execution_count": null,
538 | "metadata": {},
539 | "outputs": [],
540 | "source": [
541 | "df"
542 | ]
543 | },
544 | {
545 | "cell_type": "markdown",
546 | "metadata": {},
547 | "source": [
548 | "Set index when loading data."
549 | ]
550 | },
551 | {
552 | "cell_type": "code",
553 | "execution_count": null,
554 | "metadata": {},
555 | "outputs": [],
556 | "source": [
557 | "res_df = pd.read_csv('data/survey_results_public.csv', index_col='Respondent')"
558 | ]
559 | },
560 | {
561 | "cell_type": "code",
562 | "execution_count": null,
563 | "metadata": {},
564 | "outputs": [],
565 | "source": [
566 | "res_df"
567 | ]
568 | },
569 | {
570 | "cell_type": "markdown",
571 | "metadata": {},
572 | "source": [
573 | "To retrieve respondent number 1."
574 | ]
575 | },
576 | {
577 | "cell_type": "code",
578 | "execution_count": null,
579 | "metadata": {},
580 | "outputs": [],
581 | "source": [
582 | "res_df.loc[1]"
583 | ]
584 | },
585 | {
586 | "cell_type": "code",
587 | "execution_count": null,
588 | "metadata": {},
589 | "outputs": [],
590 | "source": [
591 | "schema_df"
592 | ]
593 | },
594 | {
595 | "cell_type": "markdown",
596 | "metadata": {},
597 | "source": [
598 | "What if I want to be able to call up a schema definition without having to scroll through frame? Set \"Column\" as the index!"
599 | ]
600 | },
601 | {
602 | "cell_type": "code",
603 | "execution_count": null,
604 | "metadata": {},
605 | "outputs": [],
606 | "source": [
607 | "schema_df = pd.read_csv('data/survey_results_schema.csv', index_col='Column')"
608 | ]
609 | },
610 | {
611 | "cell_type": "code",
612 | "execution_count": null,
613 | "metadata": {},
614 | "outputs": [],
615 | "source": [
616 | "schema_df"
617 | ]
618 | },
619 | {
620 | "cell_type": "code",
621 | "execution_count": null,
622 | "metadata": {},
623 | "outputs": [],
624 | "source": [
625 | "schema_df.loc['Hobbyist']"
626 | ]
627 | },
628 | {
629 | "cell_type": "code",
630 | "execution_count": null,
631 | "metadata": {},
632 | "outputs": [],
633 | "source": [
634 | "schema_df.loc['MgrIdiot']"
635 | ]
636 | },
637 | {
638 | "cell_type": "markdown",
639 | "metadata": {},
640 | "source": [
641 | "By default, pandas truncates its response. This can be changed, but you can also retrieve the full text by specifying both the index _and_ column names (in this case \"QuestionText\")."
642 | ]
643 | },
644 | {
645 | "cell_type": "code",
646 | "execution_count": null,
647 | "metadata": {},
648 | "outputs": [],
649 | "source": [
650 | "schema_df.loc['MgrIdiot', 'QuestionText']"
651 | ]
652 | },
653 | {
654 | "cell_type": "markdown",
655 | "metadata": {},
656 | "source": [
657 | "We can sort to make life easier!"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": null,
663 | "metadata": {},
664 | "outputs": [],
665 | "source": [
666 | "schema_df.sort_index()"
667 | ]
668 | },
669 | {
670 | "cell_type": "markdown",
671 | "metadata": {},
672 | "source": [
673 | "To reverse order, use \"ascending\" flag."
674 | ]
675 | },
676 | {
677 | "cell_type": "code",
678 | "execution_count": null,
679 | "metadata": {},
680 | "outputs": [],
681 | "source": [
682 | "schema_df.sort_index(ascending=False)"
683 | ]
684 | },
685 | {
686 | "cell_type": "markdown",
687 | "metadata": {},
688 | "source": [
689 | "To make change persistent, use \"inplace=True\" flag."
690 | ]
691 | },
692 | {
693 | "cell_type": "code",
694 | "execution_count": null,
695 | "metadata": {},
696 | "outputs": [],
697 | "source": [
698 | "schema_df.sort_index(inplace=True)\n",
699 | "schema_df"
700 | ]
701 | },
702 | {
703 | "cell_type": "code",
704 | "execution_count": null,
705 | "metadata": {},
706 | "outputs": [],
707 | "source": []
708 | }
709 | ],
710 | "metadata": {
711 | "kernelspec": {
712 | "display_name": "Python 3",
713 | "language": "python",
714 | "name": "python3"
715 | },
716 | "language_info": {
717 | "codemirror_mode": {
718 | "name": "ipython",
719 | "version": 3
720 | },
721 | "file_extension": ".py",
722 | "mimetype": "text/x-python",
723 | "name": "python",
724 | "nbconvert_exporter": "python",
725 | "pygments_lexer": "ipython3",
726 | "version": "3.6.9"
727 | }
728 | },
729 | "nbformat": 4,
730 | "nbformat_minor": 2
731 | }
732 |
--------------------------------------------------------------------------------
/06-Add Remove Rows and Columns.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Add/Remove Rows and Columns from DataFrames"
8 | ]
9 | },
10 | {
11 | "cell_type": "code",
12 | "execution_count": 17,
13 | "metadata": {},
14 | "outputs": [],
15 | "source": [
16 | "people = {\n",
17 | " \"first\": [\"Corey\", \"Jane\", \"John\"],\n",
18 | " \"last\": [\"Schafer\", \"Doe\", \"Doe\"],\n",
19 | " \"email\": [\"CoreyMSchafer@gmail.com\", \"JaneDoe@email.com\", \"JohnDoe@email.com\"]\n",
20 | "}"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 18,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "import pandas as pd"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 19,
35 | "metadata": {},
36 | "outputs": [],
37 | "source": [
38 | "df = pd.DataFrame(people)"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 20,
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/html": [
49 | "\n",
50 | "\n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " | \n",
67 | " first | \n",
68 | " last | \n",
69 | " email | \n",
70 | "
\n",
71 | " \n",
72 | " \n",
73 | " \n",
74 | " 0 | \n",
75 | " Corey | \n",
76 | " Schafer | \n",
77 | " CoreyMSchafer@gmail.com | \n",
78 | "
\n",
79 | " \n",
80 | " 1 | \n",
81 | " Jane | \n",
82 | " Doe | \n",
83 | " JaneDoe@email.com | \n",
84 | "
\n",
85 | " \n",
86 | " 2 | \n",
87 | " John | \n",
88 | " Doe | \n",
89 | " JohnDoe@email.com | \n",
90 | "
\n",
91 | " \n",
92 | "
\n",
93 | "
"
94 | ],
95 | "text/plain": [
96 | " first last email\n",
97 | "0 Corey Schafer CoreyMSchafer@gmail.com\n",
98 | "1 Jane Doe JaneDoe@email.com\n",
99 | "2 John Doe JohnDoe@email.com"
100 | ]
101 | },
102 | "execution_count": 20,
103 | "metadata": {},
104 | "output_type": "execute_result"
105 | }
106 | ],
107 | "source": [
108 | "df"
109 | ]
110 | },
111 | {
112 | "cell_type": "markdown",
113 | "metadata": {},
114 | "source": [
115 | "Combine first and last name column:"
116 | ]
117 | },
118 | {
119 | "cell_type": "code",
120 | "execution_count": 21,
121 | "metadata": {},
122 | "outputs": [
123 | {
124 | "data": {
125 | "text/plain": [
126 | "0 Corey Schafer\n",
127 | "1 Jane Doe\n",
128 | "2 John Doe\n",
129 | "dtype: object"
130 | ]
131 | },
132 | "execution_count": 21,
133 | "metadata": {},
134 | "output_type": "execute_result"
135 | }
136 | ],
137 | "source": [
138 | "df['first'] + ' ' + df['last']"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": 22,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "df['full_name'] = df['first'] + ' ' + df['last']"
148 | ]
149 | },
150 | {
151 | "cell_type": "code",
152 | "execution_count": 23,
153 | "metadata": {},
154 | "outputs": [
155 | {
156 | "data": {
157 | "text/html": [
158 | "\n",
159 | "\n",
172 | "
\n",
173 | " \n",
174 | " \n",
175 | " | \n",
176 | " first | \n",
177 | " last | \n",
178 | " email | \n",
179 | " full_name | \n",
180 | "
\n",
181 | " \n",
182 | " \n",
183 | " \n",
184 | " 0 | \n",
185 | " Corey | \n",
186 | " Schafer | \n",
187 | " CoreyMSchafer@gmail.com | \n",
188 | " Corey Schafer | \n",
189 | "
\n",
190 | " \n",
191 | " 1 | \n",
192 | " Jane | \n",
193 | " Doe | \n",
194 | " JaneDoe@email.com | \n",
195 | " Jane Doe | \n",
196 | "
\n",
197 | " \n",
198 | " 2 | \n",
199 | " John | \n",
200 | " Doe | \n",
201 | " JohnDoe@email.com | \n",
202 | " John Doe | \n",
203 | "
\n",
204 | " \n",
205 | "
\n",
206 | "
"
207 | ],
208 | "text/plain": [
209 | " first last email full_name\n",
210 | "0 Corey Schafer CoreyMSchafer@gmail.com Corey Schafer\n",
211 | "1 Jane Doe JaneDoe@email.com Jane Doe\n",
212 | "2 John Doe JohnDoe@email.com John Doe"
213 | ]
214 | },
215 | "execution_count": 23,
216 | "metadata": {},
217 | "output_type": "execute_result"
218 | }
219 | ],
220 | "source": [
221 | "df"
222 | ]
223 | },
224 | {
225 | "cell_type": "markdown",
226 | "metadata": {},
227 | "source": [
228 | "Note: Cannot use dot notation when assigning columns like this, must use brackets (python will think you're trying to assign an attribute)."
229 | ]
230 | },
231 | {
232 | "cell_type": "markdown",
233 | "metadata": {},
234 | "source": [
235 | "To delete columns:"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 24,
241 | "metadata": {},
242 | "outputs": [
243 | {
244 | "data": {
245 | "text/html": [
246 | "\n",
247 | "\n",
260 | "
\n",
261 | " \n",
262 | " \n",
263 | " | \n",
264 | " email | \n",
265 | " full_name | \n",
266 | "
\n",
267 | " \n",
268 | " \n",
269 | " \n",
270 | " 0 | \n",
271 | " CoreyMSchafer@gmail.com | \n",
272 | " Corey Schafer | \n",
273 | "
\n",
274 | " \n",
275 | " 1 | \n",
276 | " JaneDoe@email.com | \n",
277 | " Jane Doe | \n",
278 | "
\n",
279 | " \n",
280 | " 2 | \n",
281 | " JohnDoe@email.com | \n",
282 | " John Doe | \n",
283 | "
\n",
284 | " \n",
285 | "
\n",
286 | "
"
287 | ],
288 | "text/plain": [
289 | " email full_name\n",
290 | "0 CoreyMSchafer@gmail.com Corey Schafer\n",
291 | "1 JaneDoe@email.com Jane Doe\n",
292 | "2 JohnDoe@email.com John Doe"
293 | ]
294 | },
295 | "execution_count": 24,
296 | "metadata": {},
297 | "output_type": "execute_result"
298 | }
299 | ],
300 | "source": [
301 | "df.drop(columns=['first', 'last'])"
302 | ]
303 | },
304 | {
305 | "cell_type": "markdown",
306 | "metadata": {},
307 | "source": [
308 | "This change not yet applied to df, need to explicitly use \"inplace=True\"."
309 | ]
310 | },
311 | {
312 | "cell_type": "code",
313 | "execution_count": 25,
314 | "metadata": {},
315 | "outputs": [
316 | {
317 | "data": {
318 | "text/html": [
319 | "\n",
320 | "\n",
333 | "
\n",
334 | " \n",
335 | " \n",
336 | " | \n",
337 | " first | \n",
338 | " last | \n",
339 | " email | \n",
340 | " full_name | \n",
341 | "
\n",
342 | " \n",
343 | " \n",
344 | " \n",
345 | " 0 | \n",
346 | " Corey | \n",
347 | " Schafer | \n",
348 | " CoreyMSchafer@gmail.com | \n",
349 | " Corey Schafer | \n",
350 | "
\n",
351 | " \n",
352 | " 1 | \n",
353 | " Jane | \n",
354 | " Doe | \n",
355 | " JaneDoe@email.com | \n",
356 | " Jane Doe | \n",
357 | "
\n",
358 | " \n",
359 | " 2 | \n",
360 | " John | \n",
361 | " Doe | \n",
362 | " JohnDoe@email.com | \n",
363 | " John Doe | \n",
364 | "
\n",
365 | " \n",
366 | "
\n",
367 | "
"
368 | ],
369 | "text/plain": [
370 | " first last email full_name\n",
371 | "0 Corey Schafer CoreyMSchafer@gmail.com Corey Schafer\n",
372 | "1 Jane Doe JaneDoe@email.com Jane Doe\n",
373 | "2 John Doe JohnDoe@email.com John Doe"
374 | ]
375 | },
376 | "execution_count": 25,
377 | "metadata": {},
378 | "output_type": "execute_result"
379 | }
380 | ],
381 | "source": [
382 | "df"
383 | ]
384 | },
385 | {
386 | "cell_type": "code",
387 | "execution_count": 26,
388 | "metadata": {},
389 | "outputs": [],
390 | "source": [
391 | "df.drop(columns=['first', 'last'], inplace=True)"
392 | ]
393 | },
394 | {
395 | "cell_type": "code",
396 | "execution_count": 27,
397 | "metadata": {},
398 | "outputs": [
399 | {
400 | "data": {
401 | "text/html": [
402 | "\n",
403 | "\n",
416 | "
\n",
417 | " \n",
418 | " \n",
419 | " | \n",
420 | " email | \n",
421 | " full_name | \n",
422 | "
\n",
423 | " \n",
424 | " \n",
425 | " \n",
426 | " 0 | \n",
427 | " CoreyMSchafer@gmail.com | \n",
428 | " Corey Schafer | \n",
429 | "
\n",
430 | " \n",
431 | " 1 | \n",
432 | " JaneDoe@email.com | \n",
433 | " Jane Doe | \n",
434 | "
\n",
435 | " \n",
436 | " 2 | \n",
437 | " JohnDoe@email.com | \n",
438 | " John Doe | \n",
439 | "
\n",
440 | " \n",
441 | "
\n",
442 | "
"
443 | ],
444 | "text/plain": [
445 | " email full_name\n",
446 | "0 CoreyMSchafer@gmail.com Corey Schafer\n",
447 | "1 JaneDoe@email.com Jane Doe\n",
448 | "2 JohnDoe@email.com John Doe"
449 | ]
450 | },
451 | "execution_count": 27,
452 | "metadata": {},
453 | "output_type": "execute_result"
454 | }
455 | ],
456 | "source": [
457 | "df"
458 | ]
459 | },
460 | {
461 | "cell_type": "markdown",
462 | "metadata": {},
463 | "source": [
464 | "To split full_name into separate columns for each part of name:"
465 | ]
466 | },
467 | {
468 | "cell_type": "code",
469 | "execution_count": 28,
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "data": {
474 | "text/plain": [
475 | "0 [Corey, Schafer]\n",
476 | "1 [Jane, Doe]\n",
477 | "2 [John, Doe]\n",
478 | "Name: full_name, dtype: object"
479 | ]
480 | },
481 | "execution_count": 28,
482 | "metadata": {},
483 | "output_type": "execute_result"
484 | }
485 | ],
486 | "source": [
487 | "df['full_name'].str.split(' ')"
488 | ]
489 | },
490 | {
491 | "cell_type": "markdown",
492 | "metadata": {},
493 | "source": [
494 | "Result is first and last name in a list."
495 | ]
496 | },
497 | {
498 | "cell_type": "markdown",
499 | "metadata": {},
500 | "source": [
501 | "To assign to separate columns, use expand argument:"
502 | ]
503 | },
504 | {
505 | "cell_type": "code",
506 | "execution_count": 29,
507 | "metadata": {},
508 | "outputs": [
509 | {
510 | "data": {
511 | "text/html": [
512 | "\n",
513 | "\n",
526 | "
\n",
527 | " \n",
528 | " \n",
529 | " | \n",
530 | " 0 | \n",
531 | " 1 | \n",
532 | "
\n",
533 | " \n",
534 | " \n",
535 | " \n",
536 | " 0 | \n",
537 | " Corey | \n",
538 | " Schafer | \n",
539 | "
\n",
540 | " \n",
541 | " 1 | \n",
542 | " Jane | \n",
543 | " Doe | \n",
544 | "
\n",
545 | " \n",
546 | " 2 | \n",
547 | " John | \n",
548 | " Doe | \n",
549 | "
\n",
550 | " \n",
551 | "
\n",
552 | "
"
553 | ],
554 | "text/plain": [
555 | " 0 1\n",
556 | "0 Corey Schafer\n",
557 | "1 Jane Doe\n",
558 | "2 John Doe"
559 | ]
560 | },
561 | "execution_count": 29,
562 | "metadata": {},
563 | "output_type": "execute_result"
564 | }
565 | ],
566 | "source": [
567 | "df['full_name'].str.split(' ', expand=True)"
568 | ]
569 | },
570 | {
571 | "cell_type": "markdown",
572 | "metadata": {},
573 | "source": [
574 | "Now set two columns in data from for these, by passing in the list:"
575 | ]
576 | },
577 | {
578 | "cell_type": "code",
579 | "execution_count": 30,
580 | "metadata": {},
581 | "outputs": [],
582 | "source": [
583 | "df[['first', 'last']] = df['full_name'].str.split(' ', expand=True)"
584 | ]
585 | },
586 | {
587 | "cell_type": "code",
588 | "execution_count": 31,
589 | "metadata": {},
590 | "outputs": [
591 | {
592 | "data": {
593 | "text/html": [
594 | "\n",
595 | "\n",
608 | "
\n",
609 | " \n",
610 | " \n",
611 | " | \n",
612 | " email | \n",
613 | " full_name | \n",
614 | " first | \n",
615 | " last | \n",
616 | "
\n",
617 | " \n",
618 | " \n",
619 | " \n",
620 | " 0 | \n",
621 | " CoreyMSchafer@gmail.com | \n",
622 | " Corey Schafer | \n",
623 | " Corey | \n",
624 | " Schafer | \n",
625 | "
\n",
626 | " \n",
627 | " 1 | \n",
628 | " JaneDoe@email.com | \n",
629 | " Jane Doe | \n",
630 | " Jane | \n",
631 | " Doe | \n",
632 | "
\n",
633 | " \n",
634 | " 2 | \n",
635 | " JohnDoe@email.com | \n",
636 | " John Doe | \n",
637 | " John | \n",
638 | " Doe | \n",
639 | "
\n",
640 | " \n",
641 | "
\n",
642 | "
"
643 | ],
644 | "text/plain": [
645 | " email full_name first last\n",
646 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
647 | "1 JaneDoe@email.com Jane Doe Jane Doe\n",
648 | "2 JohnDoe@email.com John Doe John Doe"
649 | ]
650 | },
651 | "execution_count": 31,
652 | "metadata": {},
653 | "output_type": "execute_result"
654 | }
655 | ],
656 | "source": [
657 | "df"
658 | ]
659 | },
660 | {
661 | "cell_type": "markdown",
662 | "metadata": {},
663 | "source": [
664 | "On to adding and removing rows...\n",
665 | "\n",
666 | "First, add a single row of data with append:"
667 | ]
668 | },
669 | {
670 | "cell_type": "code",
671 | "execution_count": 32,
672 | "metadata": {},
673 | "outputs": [
674 | {
675 | "ename": "TypeError",
676 | "evalue": "Can only append a Series if ignore_index=True or if the Series has a name",
677 | "output_type": "error",
678 | "traceback": [
679 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
680 | "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
681 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m{\u001b[0m\u001b[0;34m'first'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'Tony'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
682 | "\u001b[0;32m/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mappend\u001b[0;34m(self, other, ignore_index, verify_integrity, sort)\u001b[0m\n\u001b[1;32m 7096\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mother\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mignore_index\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7097\u001b[0m raise TypeError(\n\u001b[0;32m-> 7098\u001b[0;31m \u001b[0;34m\"Can only append a Series if ignore_index=True\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7099\u001b[0m \u001b[0;34m\" or if the Series has a name\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7100\u001b[0m )\n",
683 | "\u001b[0;31mTypeError\u001b[0m: Can only append a Series if ignore_index=True or if the Series has a name"
684 | ]
685 | }
686 | ],
687 | "source": [
688 | "df.append({'first': 'Tony'})"
689 | ]
690 | },
691 | {
692 | "cell_type": "markdown",
693 | "metadata": {},
694 | "source": [
695 | "Error because there's no index, but if we say \"ignore_index=True\" the existing df will assign an index by default."
696 | ]
697 | },
698 | {
699 | "cell_type": "code",
700 | "execution_count": 33,
701 | "metadata": {},
702 | "outputs": [
703 | {
704 | "data": {
705 | "text/html": [
706 | "\n",
707 | "\n",
720 | "
\n",
721 | " \n",
722 | " \n",
723 | " | \n",
724 | " email | \n",
725 | " full_name | \n",
726 | " first | \n",
727 | " last | \n",
728 | "
\n",
729 | " \n",
730 | " \n",
731 | " \n",
732 | " 0 | \n",
733 | " CoreyMSchafer@gmail.com | \n",
734 | " Corey Schafer | \n",
735 | " Corey | \n",
736 | " Schafer | \n",
737 | "
\n",
738 | " \n",
739 | " 1 | \n",
740 | " JaneDoe@email.com | \n",
741 | " Jane Doe | \n",
742 | " Jane | \n",
743 | " Doe | \n",
744 | "
\n",
745 | " \n",
746 | " 2 | \n",
747 | " JohnDoe@email.com | \n",
748 | " John Doe | \n",
749 | " John | \n",
750 | " Doe | \n",
751 | "
\n",
752 | " \n",
753 | " 3 | \n",
754 | " NaN | \n",
755 | " NaN | \n",
756 | " Tony | \n",
757 | " NaN | \n",
758 | "
\n",
759 | " \n",
760 | "
\n",
761 | "
"
762 | ],
763 | "text/plain": [
764 | " email full_name first last\n",
765 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
766 | "1 JaneDoe@email.com Jane Doe Jane Doe\n",
767 | "2 JohnDoe@email.com John Doe John Doe\n",
768 | "3 NaN NaN Tony NaN"
769 | ]
770 | },
771 | "execution_count": 33,
772 | "metadata": {},
773 | "output_type": "execute_result"
774 | }
775 | ],
776 | "source": [
777 | "df.append({'first': 'Tony'}, ignore_index=True)"
778 | ]
779 | },
780 | {
781 | "cell_type": "markdown",
782 | "metadata": {},
783 | "source": [
784 | "New name was appended, but since we only assigned one value the other cells are \"NaN\".\n",
785 | "\n",
786 | "We can also append a new dataframe to an existing dataframe. First create the second dataframe:"
787 | ]
788 | },
789 | {
790 | "cell_type": "code",
791 | "execution_count": 34,
792 | "metadata": {},
793 | "outputs": [],
794 | "source": [
795 | "people = {\n",
796 | " \"first\": [\"Tony\", \"Steve\"],\n",
797 | " \"last\": [\"Stark\", \"Rogers\"],\n",
798 | " \"email\": [\"ironman@avenge.com\", \"cap@avenge.com\"]\n",
799 | "}\n",
800 | "df2 = pd.DataFrame(people)"
801 | ]
802 | },
803 | {
804 | "cell_type": "code",
805 | "execution_count": 35,
806 | "metadata": {},
807 | "outputs": [
808 | {
809 | "data": {
810 | "text/html": [
811 | "\n",
812 | "\n",
825 | "
\n",
826 | " \n",
827 | " \n",
828 | " | \n",
829 | " first | \n",
830 | " last | \n",
831 | " email | \n",
832 | "
\n",
833 | " \n",
834 | " \n",
835 | " \n",
836 | " 0 | \n",
837 | " Tony | \n",
838 | " Stark | \n",
839 | " ironman@avenge.com | \n",
840 | "
\n",
841 | " \n",
842 | " 1 | \n",
843 | " Steve | \n",
844 | " Rogers | \n",
845 | " cap@avenge.com | \n",
846 | "
\n",
847 | " \n",
848 | "
\n",
849 | "
"
850 | ],
851 | "text/plain": [
852 | " first last email\n",
853 | "0 Tony Stark ironman@avenge.com\n",
854 | "1 Steve Rogers cap@avenge.com"
855 | ]
856 | },
857 | "execution_count": 35,
858 | "metadata": {},
859 | "output_type": "execute_result"
860 | }
861 | ],
862 | "source": [
863 | "df2"
864 | ]
865 | },
866 | {
867 | "cell_type": "markdown",
868 | "metadata": {},
869 | "source": [
870 | "Now append, remembering to ignore_index:"
871 | ]
872 | },
873 | {
874 | "cell_type": "code",
875 | "execution_count": 36,
876 | "metadata": {},
877 | "outputs": [
878 | {
879 | "name": "stderr",
880 | "output_type": "stream",
881 | "text": [
882 | "/usr/local/lib/python3.6/dist-packages/pandas/core/frame.py:7138: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
883 | "of pandas will change to not sort by default.\n",
884 | "\n",
885 | "To accept the future behavior, pass 'sort=False'.\n",
886 | "\n",
887 | "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
888 | "\n",
889 | " sort=sort,\n"
890 | ]
891 | },
892 | {
893 | "data": {
894 | "text/html": [
895 | "\n",
896 | "\n",
909 | "
\n",
910 | " \n",
911 | " \n",
912 | " | \n",
913 | " email | \n",
914 | " first | \n",
915 | " full_name | \n",
916 | " last | \n",
917 | "
\n",
918 | " \n",
919 | " \n",
920 | " \n",
921 | " 0 | \n",
922 | " CoreyMSchafer@gmail.com | \n",
923 | " Corey | \n",
924 | " Corey Schafer | \n",
925 | " Schafer | \n",
926 | "
\n",
927 | " \n",
928 | " 1 | \n",
929 | " JaneDoe@email.com | \n",
930 | " Jane | \n",
931 | " Jane Doe | \n",
932 | " Doe | \n",
933 | "
\n",
934 | " \n",
935 | " 2 | \n",
936 | " JohnDoe@email.com | \n",
937 | " John | \n",
938 | " John Doe | \n",
939 | " Doe | \n",
940 | "
\n",
941 | " \n",
942 | " 3 | \n",
943 | " ironman@avenge.com | \n",
944 | " Tony | \n",
945 | " NaN | \n",
946 | " Stark | \n",
947 | "
\n",
948 | " \n",
949 | " 4 | \n",
950 | " cap@avenge.com | \n",
951 | " Steve | \n",
952 | " NaN | \n",
953 | " Rogers | \n",
954 | "
\n",
955 | " \n",
956 | "
\n",
957 | "
"
958 | ],
959 | "text/plain": [
960 | " email first full_name last\n",
961 | "0 CoreyMSchafer@gmail.com Corey Corey Schafer Schafer\n",
962 | "1 JaneDoe@email.com Jane Jane Doe Doe\n",
963 | "2 JohnDoe@email.com John John Doe Doe\n",
964 | "3 ironman@avenge.com Tony NaN Stark\n",
965 | "4 cap@avenge.com Steve NaN Rogers"
966 | ]
967 | },
968 | "execution_count": 36,
969 | "metadata": {},
970 | "output_type": "execute_result"
971 | }
972 | ],
973 | "source": [
974 | "df.append(df2, ignore_index=True)"
975 | ]
976 | },
977 | {
978 | "cell_type": "markdown",
979 | "metadata": {},
980 | "source": [
981 | "Added new rows. Reason for the warning in that we didn't pass all columns in same order. In future will set sort to False by default.\n",
982 | "\n",
983 | "If option \"sort=False\" is set, warning will be suppressed.\n",
984 | "\n",
985 | "The append method doesn't have an \"inplace\" argument, so we have to redefine df to make permanent:"
986 | ]
987 | },
988 | {
989 | "cell_type": "code",
990 | "execution_count": 37,
991 | "metadata": {},
992 | "outputs": [],
993 | "source": [
994 | "df = df.append(df2, ignore_index=True, sort=False)"
995 | ]
996 | },
997 | {
998 | "cell_type": "code",
999 | "execution_count": 38,
1000 | "metadata": {},
1001 | "outputs": [
1002 | {
1003 | "data": {
1004 | "text/html": [
1005 | "\n",
1006 | "\n",
1019 | "
\n",
1020 | " \n",
1021 | " \n",
1022 | " | \n",
1023 | " email | \n",
1024 | " full_name | \n",
1025 | " first | \n",
1026 | " last | \n",
1027 | "
\n",
1028 | " \n",
1029 | " \n",
1030 | " \n",
1031 | " 0 | \n",
1032 | " CoreyMSchafer@gmail.com | \n",
1033 | " Corey Schafer | \n",
1034 | " Corey | \n",
1035 | " Schafer | \n",
1036 | "
\n",
1037 | " \n",
1038 | " 1 | \n",
1039 | " JaneDoe@email.com | \n",
1040 | " Jane Doe | \n",
1041 | " Jane | \n",
1042 | " Doe | \n",
1043 | "
\n",
1044 | " \n",
1045 | " 2 | \n",
1046 | " JohnDoe@email.com | \n",
1047 | " John Doe | \n",
1048 | " John | \n",
1049 | " Doe | \n",
1050 | "
\n",
1051 | " \n",
1052 | " 3 | \n",
1053 | " ironman@avenge.com | \n",
1054 | " NaN | \n",
1055 | " Tony | \n",
1056 | " Stark | \n",
1057 | "
\n",
1058 | " \n",
1059 | " 4 | \n",
1060 | " cap@avenge.com | \n",
1061 | " NaN | \n",
1062 | " Steve | \n",
1063 | " Rogers | \n",
1064 | "
\n",
1065 | " \n",
1066 | "
\n",
1067 | "
"
1068 | ],
1069 | "text/plain": [
1070 | " email full_name first last\n",
1071 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
1072 | "1 JaneDoe@email.com Jane Doe Jane Doe\n",
1073 | "2 JohnDoe@email.com John Doe John Doe\n",
1074 | "3 ironman@avenge.com NaN Tony Stark\n",
1075 | "4 cap@avenge.com NaN Steve Rogers"
1076 | ]
1077 | },
1078 | "execution_count": 38,
1079 | "metadata": {},
1080 | "output_type": "execute_result"
1081 | }
1082 | ],
1083 | "source": [
1084 | "df"
1085 | ]
1086 | },
1087 | {
1088 | "cell_type": "markdown",
1089 | "metadata": {},
1090 | "source": [
1091 | "Now let's remove rows. Instead of specifying columns to drops, specify indexes:"
1092 | ]
1093 | },
1094 | {
1095 | "cell_type": "code",
1096 | "execution_count": 39,
1097 | "metadata": {},
1098 | "outputs": [
1099 | {
1100 | "data": {
1101 | "text/html": [
1102 | "\n",
1103 | "\n",
1116 | "
\n",
1117 | " \n",
1118 | " \n",
1119 | " | \n",
1120 | " email | \n",
1121 | " full_name | \n",
1122 | " first | \n",
1123 | " last | \n",
1124 | "
\n",
1125 | " \n",
1126 | " \n",
1127 | " \n",
1128 | " 0 | \n",
1129 | " CoreyMSchafer@gmail.com | \n",
1130 | " Corey Schafer | \n",
1131 | " Corey | \n",
1132 | " Schafer | \n",
1133 | "
\n",
1134 | " \n",
1135 | " 1 | \n",
1136 | " JaneDoe@email.com | \n",
1137 | " Jane Doe | \n",
1138 | " Jane | \n",
1139 | " Doe | \n",
1140 | "
\n",
1141 | " \n",
1142 | " 2 | \n",
1143 | " JohnDoe@email.com | \n",
1144 | " John Doe | \n",
1145 | " John | \n",
1146 | " Doe | \n",
1147 | "
\n",
1148 | " \n",
1149 | " 3 | \n",
1150 | " ironman@avenge.com | \n",
1151 | " NaN | \n",
1152 | " Tony | \n",
1153 | " Stark | \n",
1154 | "
\n",
1155 | " \n",
1156 | "
\n",
1157 | "
"
1158 | ],
1159 | "text/plain": [
1160 | " email full_name first last\n",
1161 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
1162 | "1 JaneDoe@email.com Jane Doe Jane Doe\n",
1163 | "2 JohnDoe@email.com John Doe John Doe\n",
1164 | "3 ironman@avenge.com NaN Tony Stark"
1165 | ]
1166 | },
1167 | "execution_count": 39,
1168 | "metadata": {},
1169 | "output_type": "execute_result"
1170 | }
1171 | ],
1172 | "source": [
1173 | "df.drop(index=4)"
1174 | ]
1175 | },
1176 | {
1177 | "cell_type": "markdown",
1178 | "metadata": {},
1179 | "source": [
1180 | "To apply change permanently, use \"inplace=True\".\n",
1181 | "\n",
1182 | "Can use filter with drop method by passing in indexes of filter:"
1183 | ]
1184 | },
1185 | {
1186 | "cell_type": "code",
1187 | "execution_count": 40,
1188 | "metadata": {},
1189 | "outputs": [
1190 | {
1191 | "data": {
1192 | "text/html": [
1193 | "\n",
1194 | "\n",
1207 | "
\n",
1208 | " \n",
1209 | " \n",
1210 | " | \n",
1211 | " email | \n",
1212 | " full_name | \n",
1213 | " first | \n",
1214 | " last | \n",
1215 | "
\n",
1216 | " \n",
1217 | " \n",
1218 | " \n",
1219 | " 0 | \n",
1220 | " CoreyMSchafer@gmail.com | \n",
1221 | " Corey Schafer | \n",
1222 | " Corey | \n",
1223 | " Schafer | \n",
1224 | "
\n",
1225 | " \n",
1226 | " 3 | \n",
1227 | " ironman@avenge.com | \n",
1228 | " NaN | \n",
1229 | " Tony | \n",
1230 | " Stark | \n",
1231 | "
\n",
1232 | " \n",
1233 | " 4 | \n",
1234 | " cap@avenge.com | \n",
1235 | " NaN | \n",
1236 | " Steve | \n",
1237 | " Rogers | \n",
1238 | "
\n",
1239 | " \n",
1240 | "
\n",
1241 | "
"
1242 | ],
1243 | "text/plain": [
1244 | " email full_name first last\n",
1245 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
1246 | "3 ironman@avenge.com NaN Tony Stark\n",
1247 | "4 cap@avenge.com NaN Steve Rogers"
1248 | ]
1249 | },
1250 | "execution_count": 40,
1251 | "metadata": {},
1252 | "output_type": "execute_result"
1253 | }
1254 | ],
1255 | "source": [
1256 | "df.drop(index=df[df['last'] == 'Doe'].index)"
1257 | ]
1258 | },
1259 | {
1260 | "cell_type": "code",
1261 | "execution_count": 42,
1262 | "metadata": {},
1263 | "outputs": [
1264 | {
1265 | "data": {
1266 | "text/html": [
1267 | "\n",
1268 | "\n",
1281 | "
\n",
1282 | " \n",
1283 | " \n",
1284 | " | \n",
1285 | " email | \n",
1286 | " full_name | \n",
1287 | " first | \n",
1288 | " last | \n",
1289 | "
\n",
1290 | " \n",
1291 | " \n",
1292 | " \n",
1293 | " 0 | \n",
1294 | " CoreyMSchafer@gmail.com | \n",
1295 | " Corey Schafer | \n",
1296 | " Corey | \n",
1297 | " Schafer | \n",
1298 | "
\n",
1299 | " \n",
1300 | " 1 | \n",
1301 | " JaneDoe@email.com | \n",
1302 | " Jane Doe | \n",
1303 | " Jane | \n",
1304 | " Doe | \n",
1305 | "
\n",
1306 | " \n",
1307 | " 2 | \n",
1308 | " JohnDoe@email.com | \n",
1309 | " John Doe | \n",
1310 | " John | \n",
1311 | " Doe | \n",
1312 | "
\n",
1313 | " \n",
1314 | " 3 | \n",
1315 | " ironman@avenge.com | \n",
1316 | " NaN | \n",
1317 | " Tony | \n",
1318 | " Stark | \n",
1319 | "
\n",
1320 | " \n",
1321 | " 4 | \n",
1322 | " cap@avenge.com | \n",
1323 | " NaN | \n",
1324 | " Steve | \n",
1325 | " Rogers | \n",
1326 | "
\n",
1327 | " \n",
1328 | "
\n",
1329 | "
"
1330 | ],
1331 | "text/plain": [
1332 | " email full_name first last\n",
1333 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
1334 | "1 JaneDoe@email.com Jane Doe Jane Doe\n",
1335 | "2 JohnDoe@email.com John Doe John Doe\n",
1336 | "3 ironman@avenge.com NaN Tony Stark\n",
1337 | "4 cap@avenge.com NaN Steve Rogers"
1338 | ]
1339 | },
1340 | "execution_count": 42,
1341 | "metadata": {},
1342 | "output_type": "execute_result"
1343 | }
1344 | ],
1345 | "source": [
1346 | "df"
1347 | ]
1348 | },
1349 | {
1350 | "cell_type": "markdown",
1351 | "metadata": {},
1352 | "source": [
1353 | "Corey thinks this is hard to read. Instead do this:"
1354 | ]
1355 | },
1356 | {
1357 | "cell_type": "code",
1358 | "execution_count": 43,
1359 | "metadata": {},
1360 | "outputs": [
1361 | {
1362 | "data": {
1363 | "text/html": [
1364 | "\n",
1365 | "\n",
1378 | "
\n",
1379 | " \n",
1380 | " \n",
1381 | " | \n",
1382 | " email | \n",
1383 | " full_name | \n",
1384 | " first | \n",
1385 | " last | \n",
1386 | "
\n",
1387 | " \n",
1388 | " \n",
1389 | " \n",
1390 | " 0 | \n",
1391 | " CoreyMSchafer@gmail.com | \n",
1392 | " Corey Schafer | \n",
1393 | " Corey | \n",
1394 | " Schafer | \n",
1395 | "
\n",
1396 | " \n",
1397 | " 3 | \n",
1398 | " ironman@avenge.com | \n",
1399 | " NaN | \n",
1400 | " Tony | \n",
1401 | " Stark | \n",
1402 | "
\n",
1403 | " \n",
1404 | " 4 | \n",
1405 | " cap@avenge.com | \n",
1406 | " NaN | \n",
1407 | " Steve | \n",
1408 | " Rogers | \n",
1409 | "
\n",
1410 | " \n",
1411 | "
\n",
1412 | "
"
1413 | ],
1414 | "text/plain": [
1415 | " email full_name first last\n",
1416 | "0 CoreyMSchafer@gmail.com Corey Schafer Corey Schafer\n",
1417 | "3 ironman@avenge.com NaN Tony Stark\n",
1418 | "4 cap@avenge.com NaN Steve Rogers"
1419 | ]
1420 | },
1421 | "execution_count": 43,
1422 | "metadata": {},
1423 | "output_type": "execute_result"
1424 | }
1425 | ],
1426 | "source": [
1427 | "filt = df['last'] == 'Doe'\n",
1428 | "df.drop(index=df[filt].index)"
1429 | ]
1430 | },
1431 | {
1432 | "cell_type": "code",
1433 | "execution_count": null,
1434 | "metadata": {},
1435 | "outputs": [],
1436 | "source": []
1437 | }
1438 | ],
1439 | "metadata": {
1440 | "kernelspec": {
1441 | "display_name": "Python 3",
1442 | "language": "python",
1443 | "name": "python3"
1444 | },
1445 | "language_info": {
1446 | "codemirror_mode": {
1447 | "name": "ipython",
1448 | "version": 3
1449 | },
1450 | "file_extension": ".py",
1451 | "mimetype": "text/x-python",
1452 | "name": "python",
1453 | "nbconvert_exporter": "python",
1454 | "pygments_lexer": "ipython3",
1455 | "version": "3.6.9"
1456 | }
1457 | },
1458 | "nbformat": 4,
1459 | "nbformat_minor": 2
1460 | }
1461 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Pandas Tutorials
2 | Tutorials on how to use python pandas from [Corey Schafer](https://github.com/CoreyMSchafer).
3 |
4 | YouTube playlist:
5 |
6 | https://www.youtube.com/playlist?list=PL-osiE80TeTsWmV9i9c58mdDCSskIFdDS
7 |
8 | Code snippets:
9 |
10 | https://github.com/CoreyMSchafer/code_snippets/tree/master/Python/Pandas
11 |
12 | Updating as course progresses.
13 |
14 |
--------------------------------------------------------------------------------
/data/ETH_1h.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/ETH_1h.csv.zip
--------------------------------------------------------------------------------
/data/README_2019.txt:
--------------------------------------------------------------------------------
1 | The Public 2019 Stack Overflow Developer Survey Results
2 |
3 | Description:
4 |
5 | The enclosed data set is the full, cleaned results of the 2019 Stack Overflow Developer Survey. Free response submissions and personally identifying information have been removed from the results to protect the privacy of respondents. There are three files besides this README:
6 |
7 | 1. survey_results_public.csv - CSV file with main survey results, one respondent per row and one column per answer
8 | 2. survey_results_schema.csv - CSV file with survey schema, i.e., the questions that correspond to each column name
9 | 3. so_survey_2019.pdf - PDF file of survey instrument
10 |
11 | The survey was fielded from January 23 to February 14, 2019. The median time spent on the survey for qualified responses was 23.3 minutes.
12 |
13 | Respondents were recruited primarily through channels owned by Stack Overflow. The top 5 sources of respondents were onsite messaging, blog posts, email lists, Meta posts, banner ads, and social media posts. Since respondents were recruited in this way, highly engaged users on Stack Overflow were more likely to notice the links for the survey and click to begin it.
14 |
15 | As an incentive, respondents who finished the survey could opt in to a "Census" badge if they completed the survey.
16 |
17 | You can find the official published results here:
18 |
19 | https://insights.stackoverflow.com/survey/2019
20 |
21 | Find previous survey results here:
22 |
23 | https://insights.stackoverflow.com/survey
24 |
25 | Legal:
26 |
27 | This database - The Public 2019 Stack Overflow Developer Survey Results - is made available under the Open Database License (ODbL): http://opendatacommons.org/licenses/odbl/1.0/. Any rights in individual contents of the database are licensed under the Database Contents License: http://opendatacommons.org/licenses/dbcl/1.0/
28 |
29 | TLDR: You are free to share, adapt, and create derivative works from The Public 2019 Stack Overflow Developer Survey Results as long as you attribute Stack Overflow, keep the database open (if you redistribute it), and continue to share-alike any adapted database under the ODbl.
30 |
31 | Acknowledgment:
32 |
33 | Massive, heartfelt thanks to all Stack Overflow contributors and lurking developers of the world who took part in the survey this year. We value your generous participation more than you know. <3
34 |
--------------------------------------------------------------------------------
/data/so_survey_2019.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/so_survey_2019.pdf
--------------------------------------------------------------------------------
/data/survey_results_public.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/survey_results_public.csv.zip
--------------------------------------------------------------------------------
/data/survey_results_schema.csv:
--------------------------------------------------------------------------------
1 | Column,QuestionText
2 | Respondent,Randomized respondent ID number (not in order of survey response time)
3 | MainBranch,"Which of the following options best describes you today? Here, by ""developer"" we mean ""someone who writes code."""
4 | Hobbyist,Do you code as a hobby?
5 | OpenSourcer,How often do you contribute to open source?
6 | OpenSource,How do you feel about the quality of open source software (OSS)?
7 | Employment,Which of the following best describes your current employment status?
8 | Country,In which country do you currently reside?
9 | Student,"Are you currently enrolled in a formal, degree-granting college or university program?"
10 | EdLevel,Which of the following best describes the highest level of formal education that you’ve completed?
11 | UndergradMajor,What was your main or most important field of study?
12 | EduOther,Which of the following types of non-degree education have you used or participated in? Please select all that apply.
13 | OrgSize,Approximately how many people are employed by the company or organization you work for?
14 | DevType,Which of the following describe you? Please select all that apply.
15 | YearsCode,"Including any education, how many years have you been coding?"
16 | Age1stCode,"At what age did you write your first line of code or program? (E.g., webpage, Hello World, Scratch project)"
17 | YearsCodePro,How many years have you coded professionally (as a part of your work)?
18 | CareerSat,"Overall, how satisfied are you with your career thus far?"
19 | JobSat,"How satisfied are you with your current job? (If you work multiple jobs, answer for the one you spend the most hours on.)"
20 | MgrIdiot,How confident are you that your manager knows what they’re doing?
21 | MgrMoney,Do you believe that you need to be a manager to make more money?
22 | MgrWant,Do you want to become a manager yourself in the future?
23 | JobSeek,Which of the following best describes your current job-seeking status?
24 | LastHireDate,When was the last time that you took a job with a new employer?
25 | LastInt,"In your most recent successful job interview (resulting in a job offer), you were asked to... (check all that apply)"
26 | FizzBuzz,Have you ever been asked to solve FizzBuzz in an interview?
27 | JobFactors,"Imagine that you are deciding between two job offers with the same compensation, benefits, and location. Of the following factors, which 3 are MOST important to you?"
28 | ResumeUpdate,"Think back to the last time you updated your resumé, CV, or an online profile on a job site. What is the PRIMARY reason that you did so?"
29 | CurrencySymbol,"Which currency do you use day-to-day? If your answer is complicated, please pick the one you're most comfortable estimating in."
30 | CurrencyDesc,"Which currency do you use day-to-day? If your answer is complicated, please pick the one you're most comfortable estimating in."
31 | CompTotal,"What is your current total compensation (salary, bonuses, and perks, before taxes and deductions), in `CurrencySymbol`? Please enter a whole number in the box below, without any punctuation. If you are paid hourly, please estimate an equivalent weekly, monthly, or yearly salary. If you prefer not to answer, please leave the box empty."
32 | CompFreq,"Is that compensation weekly, monthly, or yearly?"
33 | ConvertedComp,"Salary converted to annual USD salaries using the exchange rate on 2019-02-01, assuming 12 working months and 50 working weeks."
34 | WorkWeekHrs,"On average, how many hours per week do you work?"
35 | WorkPlan,How structured or planned is your work?
36 | WorkChallenge,"Of these options, what are your greatest challenges to productivity as a developer? Select up to 3:"
37 | WorkRemote,How often do you work remotely?
38 | WorkLoc,Where would you prefer to work?
39 | ImpSyn,"For the specific work you do, and the years of experience you have, how do you rate your own level of competence?"
40 | CodeRev,Do you review code as part of your work?
41 | CodeRevHrs,"On average, how many hours per week do you spend on code review?"
42 | UnitTests,Does your company regularly employ unit tests in the development of their products?
43 | PurchaseHow,"How does your company make decisions about purchasing new technology (cloud, AI, IoT, databases)?"
44 | PurchaseWhat,"What level of influence do you, personally, have over new technology purchases at your organization?"
45 | LanguageWorkedWith,"Which of the following programming, scripting, and markup languages have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the language and want to continue to do so, please check both boxes in that row.)"
46 | LanguageDesireNextYear,"Which of the following programming, scripting, and markup languages have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the language and want to continue to do so, please check both boxes in that row.)"
47 | DatabaseWorkedWith,"Which of the following database environments have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the database and want to continue to do so, please check both boxes in that row.)"
48 | DatabaseDesireNextYear,"Which of the following database environments have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the database and want to continue to do so, please check both boxes in that row.)"
49 | PlatformWorkedWith,"Which of the following platforms have you done extensive development work for over the past year? (If you both developed for the platform and want to continue to do so, please check both boxes in that row.)"
50 | PlatformDesireNextYear,"Which of the following platforms have you done extensive development work for over the past year? (If you both developed for the platform and want to continue to do so, please check both boxes in that row.)"
51 | WebFrameWorkedWith,"Which of the following web frameworks have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the framework and want to continue to do so, please check both boxes in that row.)"
52 | WebFrameDesireNextYear,"Which of the following web frameworks have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the framework and want to continue to do so, please check both boxes in that row.)"
53 | MiscTechWorkedWith,"Which of the following other frameworks, libraries, and tools have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the technology and want to continue to do so, please check both boxes in that row.)"
54 | MiscTechDesireNextYear,"Which of the following other frameworks, libraries, and tools have you done extensive development work in over the past year, and which do you want to work in over the next year? (If you both worked with the technology and want to continue to do so, please check both boxes in that row.)"
55 | DevEnviron,Which development environment(s) do you use regularly? Please check all that apply.
56 | OpSys,What is the primary operating system in which you work?
57 | Containers,"How do you use containers (Docker, Open Container Initiative (OCI), etc.)?"
58 | BlockchainOrg,How is your organization thinking about or implementing blockchain technology?
59 | BlockchainIs,Blockchain / cryptocurrency technology is primarily:
60 | BetterLife,Do you think people born today will have a better life than their parents?
61 | ITperson,"Are you the ""IT support person"" for your family?"
62 | OffOn,Have you tried turning it off and on again?
63 | SocialMedia,What social media site do you use the most?
64 | Extraversion,Do you prefer online chat or IRL conversations?
65 | ScreenName,What do you call it?
66 | SOVisit1st,"To the best of your memory, when did you first visit Stack Overflow?"
67 | SOVisitFreq,How frequently would you say you visit Stack Overflow?
68 | SOVisitTo,I visit Stack Overflow to... (check all that apply)
69 | SOFindAnswer,"On average, how many times a week do you find (and use) an answer on Stack Overflow?"
70 | SOTimeSaved,"Think back to the last time you solved a coding problem using Stack Overflow, as well as the last time you solved a problem using a different resource. Which was faster?"
71 | SOHowMuchTime,"About how much time did you save? If you're not sure, please use your best estimate."
72 | SOAccount,Do you have a Stack Overflow account?
73 | SOPartFreq,"How frequently would you say you participate in Q&A on Stack Overflow? By participate we mean ask, answer, vote for, or comment on questions."
74 | SOJobs,Have you ever used or visited Stack Overflow Jobs?
75 | EntTeams,Have you ever used Stack Overflow for Enterprise or Stack Overflow for Teams?
76 | SOComm,Do you consider yourself a member of the Stack Overflow community?
77 | WelcomeChange,"Compared to last year, how welcome do you feel on Stack Overflow?"
78 | SONewContent,Would you like to see any of the following on Stack Overflow? Check all that apply.
79 | Age,"What is your age (in years)? If you prefer not to answer, you may leave this question blank."
80 | Gender,"Which of the following do you currently identify as? Please select all that apply. If you prefer not to answer, you may leave this question blank."
81 | Trans,Do you identify as transgender?
82 | Sexuality,"Which of the following do you currently identify as? Please select all that apply. If you prefer not to answer, you may leave this question blank."
83 | Ethnicity,"Which of the following do you identify as? Please check all that apply. If you prefer not to answer, you may leave this question blank."
84 | Dependents,"Do you have any dependents (e.g., children, elders, or others) that you care for?"
85 | SurveyLength,How do you feel about the length of the survey this year?
86 | SurveyEase,How easy or difficult was this survey to complete?
87 |
--------------------------------------------------------------------------------
/data/survey_results_schema.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/plembo/pandas-tutorials/02fdbb8196586bdeec1da9e89a0934c15b44b53d/data/survey_results_schema.csv.zip
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | matplotlib
3 | pandas
4 | sqlalchemy
5 | psycopg2
6 |
--------------------------------------------------------------------------------