├── 1.Python Crash Course
└── Python Crash Course.ipynb
├── 2.Intro to Pandas
├── 1.Creating a Dataframe.ipynb
├── 2.Displaying a Dataframe.ipynb
├── 3. Basic Attributes, Methods and Functions.ipynb
├── 4.Selecting One Column from a Dataframe.ipynb
├── 5.Selecting Two or More Columns from a Dataframe.ipynb
├── 6.Add New Column to a Dataframe.ipynb
├── 7.Operations on Dataframes.ipynb
├── 8.The value_counts() method.ipynb
├── 9.Sort a Dataframe with sort_values().ipynb
└── StudentsPerformance.csv
├── 3.Pivot Table
├── 1.pivot() and pivot_table().ipynb
├── gdp.csv
└── supermarket_sales.xlsx
├── 4.Data Visualization
├── 1.Dataset Overview and Making Pivot Table.ipynb
├── 2.Data Visualization with Pandas.ipynb
├── 3.Adding Matplotlib to Pandas.ipynb
└── population_total.csv
├── Exercises
├── Intro to Pandas
│ ├── Introduction to Pandas-Exercise.ipynb
│ ├── Introduction to Pandas-Solution.ipynb
│ └── bestsellers with categories.csv
└── Merging and Concatenating DataFrames
│ ├── IMDb movies.csv.zip
│ ├── IMDb ratings.csv.zip
│ ├── Merging and Concatenating DataFrames-Exercise.ipynb
│ └── Merging and Concatenating DataFrames-Solution.ipynb
├── README.md
├── Web Scraping with Pandas.ipynb
├── loc vs iloc.ipynb
└── players_20.csv
/2.Intro to Pandas/1.Creating a Dataframe.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 2,
6 | "id": "7fa9cddf",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd\n",
11 | "import numpy as np"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "id": "81a09ffd",
17 | "metadata": {},
18 | "source": [
19 | "# Creating a dataframe from an array"
20 | ]
21 | },
22 | {
23 | "cell_type": "markdown",
24 | "id": "6db0cb3f",
25 | "metadata": {},
26 | "source": [
27 | "## Option 1"
28 | ]
29 | },
30 | {
31 | "cell_type": "code",
32 | "execution_count": 3,
33 | "id": "93d02bc5",
34 | "metadata": {},
35 | "outputs": [],
36 | "source": [
37 | "# creating an array\n",
38 | "data = np.array([[1, 4], [2, 5], [3, 6]])"
39 | ]
40 | },
41 | {
42 | "cell_type": "code",
43 | "execution_count": 4,
44 | "id": "b9be7874",
45 | "metadata": {},
46 | "outputs": [],
47 | "source": [
48 | "# creating a dataframe\n",
49 | "df = pd.DataFrame(data, index=['row1', 'row2', 'row3'],\n",
50 | " columns=['col1', 'col2'])"
51 | ]
52 | },
53 | {
54 | "cell_type": "code",
55 | "execution_count": 5,
56 | "id": "7787533c",
57 | "metadata": {},
58 | "outputs": [
59 | {
60 | "data": {
61 | "text/html": [
62 | "
\n",
63 | "\n",
76 | "
\n",
77 | " \n",
78 | " \n",
79 | " | \n",
80 | " col1 | \n",
81 | " col2 | \n",
82 | "
\n",
83 | " \n",
84 | " \n",
85 | " \n",
86 | " row1 | \n",
87 | " 1 | \n",
88 | " 4 | \n",
89 | "
\n",
90 | " \n",
91 | " row2 | \n",
92 | " 2 | \n",
93 | " 5 | \n",
94 | "
\n",
95 | " \n",
96 | " row3 | \n",
97 | " 3 | \n",
98 | " 6 | \n",
99 | "
\n",
100 | " \n",
101 | "
\n",
102 | "
"
103 | ],
104 | "text/plain": [
105 | " col1 col2\n",
106 | "row1 1 4\n",
107 | "row2 2 5\n",
108 | "row3 3 6"
109 | ]
110 | },
111 | "execution_count": 5,
112 | "metadata": {},
113 | "output_type": "execute_result"
114 | }
115 | ],
116 | "source": [
117 | "# showing the datafrane\n",
118 | "df"
119 | ]
120 | },
121 | {
122 | "cell_type": "markdown",
123 | "id": "9a66d12d",
124 | "metadata": {},
125 | "source": [
126 | "## Option 2"
127 | ]
128 | },
129 | {
130 | "cell_type": "code",
131 | "execution_count": 6,
132 | "id": "39caa078",
133 | "metadata": {},
134 | "outputs": [],
135 | "source": [
136 | "# creating an array with list shape\n",
137 | "data = [[1, 4], [2, 5], [3, 6]]"
138 | ]
139 | },
140 | {
141 | "cell_type": "code",
142 | "execution_count": 7,
143 | "id": "dfca7ffa",
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "# creating a dataframe\n",
148 | "df = pd.DataFrame(data, index=['row1', 'row2', 'row3'],\n",
149 | " columns=['col1', 'col2'])"
150 | ]
151 | },
152 | {
153 | "cell_type": "code",
154 | "execution_count": 8,
155 | "id": "7c057728",
156 | "metadata": {},
157 | "outputs": [
158 | {
159 | "data": {
160 | "text/html": [
161 | "\n",
162 | "\n",
175 | "
\n",
176 | " \n",
177 | " \n",
178 | " | \n",
179 | " col1 | \n",
180 | " col2 | \n",
181 | "
\n",
182 | " \n",
183 | " \n",
184 | " \n",
185 | " row1 | \n",
186 | " 1 | \n",
187 | " 4 | \n",
188 | "
\n",
189 | " \n",
190 | " row2 | \n",
191 | " 2 | \n",
192 | " 5 | \n",
193 | "
\n",
194 | " \n",
195 | " row3 | \n",
196 | " 3 | \n",
197 | " 6 | \n",
198 | "
\n",
199 | " \n",
200 | "
\n",
201 | "
"
202 | ],
203 | "text/plain": [
204 | " col1 col2\n",
205 | "row1 1 4\n",
206 | "row2 2 5\n",
207 | "row3 3 6"
208 | ]
209 | },
210 | "execution_count": 8,
211 | "metadata": {},
212 | "output_type": "execute_result"
213 | }
214 | ],
215 | "source": [
216 | "# showing the datafrane\n",
217 | "df"
218 | ]
219 | },
220 | {
221 | "cell_type": "markdown",
222 | "id": "d7ee2ed9",
223 | "metadata": {},
224 | "source": [
225 | "# Creating a DataFrame from a dictionary"
226 | ]
227 | },
228 | {
229 | "cell_type": "code",
230 | "execution_count": 9,
231 | "id": "63bf85b8",
232 | "metadata": {},
233 | "outputs": [],
234 | "source": [
235 | "# lists used for this example\n",
236 | "states = [\"California\", \"Texas\", \"Florida\", \"New York\"]\n",
237 | "population = [39613493, 29730311, 21944577, 19299981]"
238 | ]
239 | },
240 | {
241 | "cell_type": "code",
242 | "execution_count": 10,
243 | "id": "b309ebe8",
244 | "metadata": {},
245 | "outputs": [],
246 | "source": [
247 | "# Storing lists within a dictionary\n",
248 | "dict_states = {'States': states, 'Population': population}"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": 11,
254 | "id": "4e774499",
255 | "metadata": {
256 | "scrolled": false
257 | },
258 | "outputs": [],
259 | "source": [
260 | "# Creating the dataframe\n",
261 | "df_population = pd.DataFrame(dict_states)\n",
262 | "# df_population = pd.DataFrame.from_dict(dict_states)"
263 | ]
264 | },
265 | {
266 | "cell_type": "code",
267 | "execution_count": 12,
268 | "id": "585c7ff6",
269 | "metadata": {},
270 | "outputs": [
271 | {
272 | "data": {
273 | "text/html": [
274 | "\n",
275 | "\n",
288 | "
\n",
289 | " \n",
290 | " \n",
291 | " | \n",
292 | " States | \n",
293 | " Population | \n",
294 | "
\n",
295 | " \n",
296 | " \n",
297 | " \n",
298 | " 0 | \n",
299 | " California | \n",
300 | " 39613493 | \n",
301 | "
\n",
302 | " \n",
303 | " 1 | \n",
304 | " Texas | \n",
305 | " 29730311 | \n",
306 | "
\n",
307 | " \n",
308 | " 2 | \n",
309 | " Florida | \n",
310 | " 21944577 | \n",
311 | "
\n",
312 | " \n",
313 | " 3 | \n",
314 | " New York | \n",
315 | " 19299981 | \n",
316 | "
\n",
317 | " \n",
318 | "
\n",
319 | "
"
320 | ],
321 | "text/plain": [
322 | " States Population\n",
323 | "0 California 39613493\n",
324 | "1 Texas 29730311\n",
325 | "2 Florida 21944577\n",
326 | "3 New York 19299981"
327 | ]
328 | },
329 | "execution_count": 12,
330 | "metadata": {},
331 | "output_type": "execute_result"
332 | }
333 | ],
334 | "source": [
335 | "# showing the datafrane\n",
336 | "df_population"
337 | ]
338 | },
339 | {
340 | "cell_type": "markdown",
341 | "id": "245b58bf",
342 | "metadata": {},
343 | "source": [
344 | "# Creating a DataFrame from a csv file"
345 | ]
346 | },
347 | {
348 | "cell_type": "code",
349 | "execution_count": 13,
350 | "id": "5a3dcc1e",
351 | "metadata": {},
352 | "outputs": [],
353 | "source": [
354 | "# reading the csv file\n",
355 | "df_exams = pd.read_csv('StudentsPerformance.csv')"
356 | ]
357 | },
358 | {
359 | "cell_type": "code",
360 | "execution_count": 14,
361 | "id": "585ed37d",
362 | "metadata": {},
363 | "outputs": [
364 | {
365 | "data": {
366 | "text/html": [
367 | "\n",
368 | "\n",
381 | "
\n",
382 | " \n",
383 | " \n",
384 | " | \n",
385 | " gender | \n",
386 | " race/ethnicity | \n",
387 | " parental level of education | \n",
388 | " lunch | \n",
389 | " test preparation course | \n",
390 | " math score | \n",
391 | " reading score | \n",
392 | " writing score | \n",
393 | "
\n",
394 | " \n",
395 | " \n",
396 | " \n",
397 | " 0 | \n",
398 | " female | \n",
399 | " group B | \n",
400 | " bachelor's degree | \n",
401 | " standard | \n",
402 | " none | \n",
403 | " 72 | \n",
404 | " 72 | \n",
405 | " 74 | \n",
406 | "
\n",
407 | " \n",
408 | " 1 | \n",
409 | " female | \n",
410 | " group C | \n",
411 | " some college | \n",
412 | " standard | \n",
413 | " completed | \n",
414 | " 69 | \n",
415 | " 90 | \n",
416 | " 88 | \n",
417 | "
\n",
418 | " \n",
419 | " 2 | \n",
420 | " female | \n",
421 | " group B | \n",
422 | " master's degree | \n",
423 | " standard | \n",
424 | " none | \n",
425 | " 90 | \n",
426 | " 95 | \n",
427 | " 93 | \n",
428 | "
\n",
429 | " \n",
430 | " 3 | \n",
431 | " male | \n",
432 | " group A | \n",
433 | " associate's degree | \n",
434 | " free/reduced | \n",
435 | " none | \n",
436 | " 47 | \n",
437 | " 57 | \n",
438 | " 44 | \n",
439 | "
\n",
440 | " \n",
441 | " 4 | \n",
442 | " male | \n",
443 | " group C | \n",
444 | " some college | \n",
445 | " standard | \n",
446 | " none | \n",
447 | " 76 | \n",
448 | " 78 | \n",
449 | " 75 | \n",
450 | "
\n",
451 | " \n",
452 | " ... | \n",
453 | " ... | \n",
454 | " ... | \n",
455 | " ... | \n",
456 | " ... | \n",
457 | " ... | \n",
458 | " ... | \n",
459 | " ... | \n",
460 | " ... | \n",
461 | "
\n",
462 | " \n",
463 | " 995 | \n",
464 | " female | \n",
465 | " group E | \n",
466 | " master's degree | \n",
467 | " standard | \n",
468 | " completed | \n",
469 | " 88 | \n",
470 | " 99 | \n",
471 | " 95 | \n",
472 | "
\n",
473 | " \n",
474 | " 996 | \n",
475 | " male | \n",
476 | " group C | \n",
477 | " high school | \n",
478 | " free/reduced | \n",
479 | " none | \n",
480 | " 62 | \n",
481 | " 55 | \n",
482 | " 55 | \n",
483 | "
\n",
484 | " \n",
485 | " 997 | \n",
486 | " female | \n",
487 | " group C | \n",
488 | " high school | \n",
489 | " free/reduced | \n",
490 | " completed | \n",
491 | " 59 | \n",
492 | " 71 | \n",
493 | " 65 | \n",
494 | "
\n",
495 | " \n",
496 | " 998 | \n",
497 | " female | \n",
498 | " group D | \n",
499 | " some college | \n",
500 | " standard | \n",
501 | " completed | \n",
502 | " 68 | \n",
503 | " 78 | \n",
504 | " 77 | \n",
505 | "
\n",
506 | " \n",
507 | " 999 | \n",
508 | " female | \n",
509 | " group D | \n",
510 | " some college | \n",
511 | " free/reduced | \n",
512 | " none | \n",
513 | " 77 | \n",
514 | " 86 | \n",
515 | " 86 | \n",
516 | "
\n",
517 | " \n",
518 | "
\n",
519 | "
1000 rows × 8 columns
\n",
520 | "
"
521 | ],
522 | "text/plain": [
523 | " gender race/ethnicity parental level of education lunch \\\n",
524 | "0 female group B bachelor's degree standard \n",
525 | "1 female group C some college standard \n",
526 | "2 female group B master's degree standard \n",
527 | "3 male group A associate's degree free/reduced \n",
528 | "4 male group C some college standard \n",
529 | ".. ... ... ... ... \n",
530 | "995 female group E master's degree standard \n",
531 | "996 male group C high school free/reduced \n",
532 | "997 female group C high school free/reduced \n",
533 | "998 female group D some college standard \n",
534 | "999 female group D some college free/reduced \n",
535 | "\n",
536 | " test preparation course math score reading score writing score \n",
537 | "0 none 72 72 74 \n",
538 | "1 completed 69 90 88 \n",
539 | "2 none 90 95 93 \n",
540 | "3 none 47 57 44 \n",
541 | "4 none 76 78 75 \n",
542 | ".. ... ... ... ... \n",
543 | "995 completed 88 99 95 \n",
544 | "996 none 62 55 55 \n",
545 | "997 completed 59 71 65 \n",
546 | "998 completed 68 78 77 \n",
547 | "999 none 77 86 86 \n",
548 | "\n",
549 | "[1000 rows x 8 columns]"
550 | ]
551 | },
552 | "execution_count": 14,
553 | "metadata": {},
554 | "output_type": "execute_result"
555 | }
556 | ],
557 | "source": [
558 | "# show first 5 rows in a dataframe\n",
559 | "df_exams"
560 | ]
561 | }
562 | ],
563 | "metadata": {
564 | "kernelspec": {
565 | "display_name": "Python 3",
566 | "language": "python",
567 | "name": "python3"
568 | },
569 | "language_info": {
570 | "codemirror_mode": {
571 | "name": "ipython",
572 | "version": 3
573 | },
574 | "file_extension": ".py",
575 | "mimetype": "text/x-python",
576 | "name": "python",
577 | "nbconvert_exporter": "python",
578 | "pygments_lexer": "ipython3",
579 | "version": "3.8.8"
580 | },
581 | "toc": {
582 | "base_numbering": 1,
583 | "nav_menu": {},
584 | "number_sections": true,
585 | "sideBar": true,
586 | "skip_h1_title": false,
587 | "title_cell": "Table of Contents",
588 | "title_sidebar": "Contents",
589 | "toc_cell": false,
590 | "toc_position": {},
591 | "toc_section_display": true,
592 | "toc_window_display": false
593 | }
594 | },
595 | "nbformat": 4,
596 | "nbformat_minor": 5
597 | }
598 |
--------------------------------------------------------------------------------
/2.Intro to Pandas/3. Basic Attributes, Methods and Functions.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "3cadeaac",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "faf4a761",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "# reading the csv file\n",
21 | "df_exams = pd.read_csv('StudentsPerformance.csv')"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 5,
27 | "id": "e6159fe8",
28 | "metadata": {},
29 | "outputs": [
30 | {
31 | "data": {
32 | "text/html": [
33 | "\n",
34 | "\n",
47 | "
\n",
48 | " \n",
49 | " \n",
50 | " | \n",
51 | " gender | \n",
52 | " race/ethnicity | \n",
53 | " parental level of education | \n",
54 | " lunch | \n",
55 | " test preparation course | \n",
56 | " math score | \n",
57 | " reading score | \n",
58 | " writing score | \n",
59 | "
\n",
60 | " \n",
61 | " \n",
62 | " \n",
63 | " 0 | \n",
64 | " female | \n",
65 | " group B | \n",
66 | " bachelor's degree | \n",
67 | " standard | \n",
68 | " none | \n",
69 | " 72 | \n",
70 | " 72 | \n",
71 | " 74 | \n",
72 | "
\n",
73 | " \n",
74 | " 1 | \n",
75 | " female | \n",
76 | " group C | \n",
77 | " some college | \n",
78 | " standard | \n",
79 | " completed | \n",
80 | " 69 | \n",
81 | " 90 | \n",
82 | " 88 | \n",
83 | "
\n",
84 | " \n",
85 | " 2 | \n",
86 | " female | \n",
87 | " group B | \n",
88 | " master's degree | \n",
89 | " standard | \n",
90 | " none | \n",
91 | " 90 | \n",
92 | " 95 | \n",
93 | " 93 | \n",
94 | "
\n",
95 | " \n",
96 | " 3 | \n",
97 | " male | \n",
98 | " group A | \n",
99 | " associate's degree | \n",
100 | " free/reduced | \n",
101 | " none | \n",
102 | " 47 | \n",
103 | " 57 | \n",
104 | " 44 | \n",
105 | "
\n",
106 | " \n",
107 | " 4 | \n",
108 | " male | \n",
109 | " group C | \n",
110 | " some college | \n",
111 | " standard | \n",
112 | " none | \n",
113 | " 76 | \n",
114 | " 78 | \n",
115 | " 75 | \n",
116 | "
\n",
117 | " \n",
118 | " ... | \n",
119 | " ... | \n",
120 | " ... | \n",
121 | " ... | \n",
122 | " ... | \n",
123 | " ... | \n",
124 | " ... | \n",
125 | " ... | \n",
126 | " ... | \n",
127 | "
\n",
128 | " \n",
129 | " 995 | \n",
130 | " female | \n",
131 | " group E | \n",
132 | " master's degree | \n",
133 | " standard | \n",
134 | " completed | \n",
135 | " 88 | \n",
136 | " 99 | \n",
137 | " 95 | \n",
138 | "
\n",
139 | " \n",
140 | " 996 | \n",
141 | " male | \n",
142 | " group C | \n",
143 | " high school | \n",
144 | " free/reduced | \n",
145 | " none | \n",
146 | " 62 | \n",
147 | " 55 | \n",
148 | " 55 | \n",
149 | "
\n",
150 | " \n",
151 | " 997 | \n",
152 | " female | \n",
153 | " group C | \n",
154 | " high school | \n",
155 | " free/reduced | \n",
156 | " completed | \n",
157 | " 59 | \n",
158 | " 71 | \n",
159 | " 65 | \n",
160 | "
\n",
161 | " \n",
162 | " 998 | \n",
163 | " female | \n",
164 | " group D | \n",
165 | " some college | \n",
166 | " standard | \n",
167 | " completed | \n",
168 | " 68 | \n",
169 | " 78 | \n",
170 | " 77 | \n",
171 | "
\n",
172 | " \n",
173 | " 999 | \n",
174 | " female | \n",
175 | " group D | \n",
176 | " some college | \n",
177 | " free/reduced | \n",
178 | " none | \n",
179 | " 77 | \n",
180 | " 86 | \n",
181 | " 86 | \n",
182 | "
\n",
183 | " \n",
184 | "
\n",
185 | "
1000 rows × 8 columns
\n",
186 | "
"
187 | ],
188 | "text/plain": [
189 | " gender race/ethnicity parental level of education lunch \\\n",
190 | "0 female group B bachelor's degree standard \n",
191 | "1 female group C some college standard \n",
192 | "2 female group B master's degree standard \n",
193 | "3 male group A associate's degree free/reduced \n",
194 | "4 male group C some college standard \n",
195 | ".. ... ... ... ... \n",
196 | "995 female group E master's degree standard \n",
197 | "996 male group C high school free/reduced \n",
198 | "997 female group C high school free/reduced \n",
199 | "998 female group D some college standard \n",
200 | "999 female group D some college free/reduced \n",
201 | "\n",
202 | " test preparation course math score reading score writing score \n",
203 | "0 none 72 72 74 \n",
204 | "1 completed 69 90 88 \n",
205 | "2 none 90 95 93 \n",
206 | "3 none 47 57 44 \n",
207 | "4 none 76 78 75 \n",
208 | ".. ... ... ... ... \n",
209 | "995 completed 88 99 95 \n",
210 | "996 none 62 55 55 \n",
211 | "997 completed 59 71 65 \n",
212 | "998 completed 68 78 77 \n",
213 | "999 none 77 86 86 \n",
214 | "\n",
215 | "[1000 rows x 8 columns]"
216 | ]
217 | },
218 | "execution_count": 5,
219 | "metadata": {},
220 | "output_type": "execute_result"
221 | }
222 | ],
223 | "source": [
224 | "# showing the dataframe\n",
225 | "df_exams"
226 | ]
227 | },
228 | {
229 | "cell_type": "markdown",
230 | "id": "63bf386f",
231 | "metadata": {},
232 | "source": [
233 | "# Attributes"
234 | ]
235 | },
236 | {
237 | "cell_type": "code",
238 | "execution_count": 6,
239 | "id": "8889cc6c",
240 | "metadata": {},
241 | "outputs": [
242 | {
243 | "data": {
244 | "text/plain": [
245 | "(1000, 8)"
246 | ]
247 | },
248 | "execution_count": 6,
249 | "metadata": {},
250 | "output_type": "execute_result"
251 | }
252 | ],
253 | "source": [
254 | "# getting access to the shape attribute\n",
255 | "df_exams.shape"
256 | ]
257 | },
258 | {
259 | "cell_type": "code",
260 | "execution_count": 8,
261 | "id": "c1d18854",
262 | "metadata": {},
263 | "outputs": [
264 | {
265 | "data": {
266 | "text/plain": [
267 | "RangeIndex(start=0, stop=1000, step=1)"
268 | ]
269 | },
270 | "execution_count": 8,
271 | "metadata": {},
272 | "output_type": "execute_result"
273 | }
274 | ],
275 | "source": [
276 | "# getting access to the index attribute\n",
277 | "df_exams.index"
278 | ]
279 | },
280 | {
281 | "cell_type": "code",
282 | "execution_count": 9,
283 | "id": "002ebb03",
284 | "metadata": {},
285 | "outputs": [
286 | {
287 | "data": {
288 | "text/plain": [
289 | "Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',\n",
290 | " 'test preparation course', 'math score', 'reading score',\n",
291 | " 'writing score'],\n",
292 | " dtype='object')"
293 | ]
294 | },
295 | "execution_count": 9,
296 | "metadata": {},
297 | "output_type": "execute_result"
298 | }
299 | ],
300 | "source": [
301 | "# getting access to the column attribute\n",
302 | "df_exams.columns"
303 | ]
304 | },
305 | {
306 | "cell_type": "code",
307 | "execution_count": 12,
308 | "id": "1928828c",
309 | "metadata": {},
310 | "outputs": [
311 | {
312 | "data": {
313 | "text/plain": [
314 | "gender object\n",
315 | "race/ethnicity object\n",
316 | "parental level of education object\n",
317 | "lunch object\n",
318 | "test preparation course object\n",
319 | "math score int64\n",
320 | "reading score int64\n",
321 | "writing score int64\n",
322 | "dtype: object"
323 | ]
324 | },
325 | "execution_count": 12,
326 | "metadata": {},
327 | "output_type": "execute_result"
328 | }
329 | ],
330 | "source": [
331 | "# data types of each column\n",
332 | "df_exams.dtypes"
333 | ]
334 | },
335 | {
336 | "cell_type": "markdown",
337 | "id": "3ee3d60a",
338 | "metadata": {},
339 | "source": [
340 | "# Methods"
341 | ]
342 | },
343 | {
344 | "cell_type": "code",
345 | "execution_count": 10,
346 | "id": "d6ff61f9",
347 | "metadata": {},
348 | "outputs": [
349 | {
350 | "data": {
351 | "text/html": [
352 | "\n",
353 | "\n",
366 | "
\n",
367 | " \n",
368 | " \n",
369 | " | \n",
370 | " gender | \n",
371 | " race/ethnicity | \n",
372 | " parental level of education | \n",
373 | " lunch | \n",
374 | " test preparation course | \n",
375 | " math score | \n",
376 | " reading score | \n",
377 | " writing score | \n",
378 | "
\n",
379 | " \n",
380 | " \n",
381 | " \n",
382 | " 0 | \n",
383 | " female | \n",
384 | " group B | \n",
385 | " bachelor's degree | \n",
386 | " standard | \n",
387 | " none | \n",
388 | " 72 | \n",
389 | " 72 | \n",
390 | " 74 | \n",
391 | "
\n",
392 | " \n",
393 | " 1 | \n",
394 | " female | \n",
395 | " group C | \n",
396 | " some college | \n",
397 | " standard | \n",
398 | " completed | \n",
399 | " 69 | \n",
400 | " 90 | \n",
401 | " 88 | \n",
402 | "
\n",
403 | " \n",
404 | " 2 | \n",
405 | " female | \n",
406 | " group B | \n",
407 | " master's degree | \n",
408 | " standard | \n",
409 | " none | \n",
410 | " 90 | \n",
411 | " 95 | \n",
412 | " 93 | \n",
413 | "
\n",
414 | " \n",
415 | " 3 | \n",
416 | " male | \n",
417 | " group A | \n",
418 | " associate's degree | \n",
419 | " free/reduced | \n",
420 | " none | \n",
421 | " 47 | \n",
422 | " 57 | \n",
423 | " 44 | \n",
424 | "
\n",
425 | " \n",
426 | " 4 | \n",
427 | " male | \n",
428 | " group C | \n",
429 | " some college | \n",
430 | " standard | \n",
431 | " none | \n",
432 | " 76 | \n",
433 | " 78 | \n",
434 | " 75 | \n",
435 | "
\n",
436 | " \n",
437 | "
\n",
438 | "
"
439 | ],
440 | "text/plain": [
441 | " gender race/ethnicity parental level of education lunch \\\n",
442 | "0 female group B bachelor's degree standard \n",
443 | "1 female group C some college standard \n",
444 | "2 female group B master's degree standard \n",
445 | "3 male group A associate's degree free/reduced \n",
446 | "4 male group C some college standard \n",
447 | "\n",
448 | " test preparation course math score reading score writing score \n",
449 | "0 none 72 72 74 \n",
450 | "1 completed 69 90 88 \n",
451 | "2 none 90 95 93 \n",
452 | "3 none 47 57 44 \n",
453 | "4 none 76 78 75 "
454 | ]
455 | },
456 | "execution_count": 10,
457 | "metadata": {},
458 | "output_type": "execute_result"
459 | }
460 | ],
461 | "source": [
462 | "# showing the first 5 columns\n",
463 | "df_exams.head()"
464 | ]
465 | },
466 | {
467 | "cell_type": "code",
468 | "execution_count": 15,
469 | "id": "7852fee9",
470 | "metadata": {},
471 | "outputs": [
472 | {
473 | "name": "stdout",
474 | "output_type": "stream",
475 | "text": [
476 | "\n",
477 | "RangeIndex: 1000 entries, 0 to 999\n",
478 | "Data columns (total 8 columns):\n",
479 | " # Column Non-Null Count Dtype \n",
480 | "--- ------ -------------- ----- \n",
481 | " 0 gender 1000 non-null object\n",
482 | " 1 race/ethnicity 1000 non-null object\n",
483 | " 2 parental level of education 1000 non-null object\n",
484 | " 3 lunch 1000 non-null object\n",
485 | " 4 test preparation course 1000 non-null object\n",
486 | " 5 math score 1000 non-null int64 \n",
487 | " 6 reading score 1000 non-null int64 \n",
488 | " 7 writing score 1000 non-null int64 \n",
489 | "dtypes: int64(3), object(5)\n",
490 | "memory usage: 62.6+ KB\n"
491 | ]
492 | }
493 | ],
494 | "source": [
495 | "# showing the info of the dataframe\n",
496 | "df_exams.info()"
497 | ]
498 | },
499 | {
500 | "cell_type": "code",
501 | "execution_count": 16,
502 | "id": "7e44a628",
503 | "metadata": {},
504 | "outputs": [
505 | {
506 | "data": {
507 | "text/html": [
508 | "\n",
509 | "\n",
522 | "
\n",
523 | " \n",
524 | " \n",
525 | " | \n",
526 | " math score | \n",
527 | " reading score | \n",
528 | " writing score | \n",
529 | "
\n",
530 | " \n",
531 | " \n",
532 | " \n",
533 | " count | \n",
534 | " 1000.00000 | \n",
535 | " 1000.000000 | \n",
536 | " 1000.000000 | \n",
537 | "
\n",
538 | " \n",
539 | " mean | \n",
540 | " 66.08900 | \n",
541 | " 69.169000 | \n",
542 | " 68.054000 | \n",
543 | "
\n",
544 | " \n",
545 | " std | \n",
546 | " 15.16308 | \n",
547 | " 14.600192 | \n",
548 | " 15.195657 | \n",
549 | "
\n",
550 | " \n",
551 | " min | \n",
552 | " 0.00000 | \n",
553 | " 17.000000 | \n",
554 | " 10.000000 | \n",
555 | "
\n",
556 | " \n",
557 | " 25% | \n",
558 | " 57.00000 | \n",
559 | " 59.000000 | \n",
560 | " 57.750000 | \n",
561 | "
\n",
562 | " \n",
563 | " 50% | \n",
564 | " 66.00000 | \n",
565 | " 70.000000 | \n",
566 | " 69.000000 | \n",
567 | "
\n",
568 | " \n",
569 | " 75% | \n",
570 | " 77.00000 | \n",
571 | " 79.000000 | \n",
572 | " 79.000000 | \n",
573 | "
\n",
574 | " \n",
575 | " max | \n",
576 | " 100.00000 | \n",
577 | " 100.000000 | \n",
578 | " 100.000000 | \n",
579 | "
\n",
580 | " \n",
581 | "
\n",
582 | "
"
583 | ],
584 | "text/plain": [
585 | " math score reading score writing score\n",
586 | "count 1000.00000 1000.000000 1000.000000\n",
587 | "mean 66.08900 69.169000 68.054000\n",
588 | "std 15.16308 14.600192 15.195657\n",
589 | "min 0.00000 17.000000 10.000000\n",
590 | "25% 57.00000 59.000000 57.750000\n",
591 | "50% 66.00000 70.000000 69.000000\n",
592 | "75% 77.00000 79.000000 79.000000\n",
593 | "max 100.00000 100.000000 100.000000"
594 | ]
595 | },
596 | "execution_count": 16,
597 | "metadata": {},
598 | "output_type": "execute_result"
599 | }
600 | ],
601 | "source": [
602 | "# describing basic statistics of the dataframe\n",
603 | "df_exams.describe()"
604 | ]
605 | },
606 | {
607 | "cell_type": "markdown",
608 | "id": "22a32e4e",
609 | "metadata": {},
610 | "source": [
611 | "# Functions"
612 | ]
613 | },
614 | {
615 | "cell_type": "code",
616 | "execution_count": 17,
617 | "id": "a71af478",
618 | "metadata": {},
619 | "outputs": [
620 | {
621 | "data": {
622 | "text/plain": [
623 | "1000"
624 | ]
625 | },
626 | "execution_count": 17,
627 | "metadata": {},
628 | "output_type": "execute_result"
629 | }
630 | ],
631 | "source": [
632 | "# obtaining the length of the dataframe (number of rows)\n",
633 | "len(df_exams)"
634 | ]
635 | },
636 | {
637 | "cell_type": "code",
638 | "execution_count": 22,
639 | "id": "595fcbde",
640 | "metadata": {
641 | "scrolled": true
642 | },
643 | "outputs": [
644 | {
645 | "data": {
646 | "text/plain": [
647 | "999"
648 | ]
649 | },
650 | "execution_count": 22,
651 | "metadata": {},
652 | "output_type": "execute_result"
653 | }
654 | ],
655 | "source": [
656 | "# obtaining the highest index of the dataframe\n",
657 | "max(df_exams.index)"
658 | ]
659 | },
660 | {
661 | "cell_type": "code",
662 | "execution_count": 26,
663 | "id": "7aba282f",
664 | "metadata": {},
665 | "outputs": [
666 | {
667 | "data": {
668 | "text/plain": [
669 | "0"
670 | ]
671 | },
672 | "execution_count": 26,
673 | "metadata": {},
674 | "output_type": "execute_result"
675 | }
676 | ],
677 | "source": [
678 | "# obtaining the lowest index of the dataframe\n",
679 | "min(df_exams.index)"
680 | ]
681 | },
682 | {
683 | "cell_type": "code",
684 | "execution_count": 27,
685 | "id": "8ce53edc",
686 | "metadata": {},
687 | "outputs": [
688 | {
689 | "data": {
690 | "text/plain": [
691 | "pandas.core.frame.DataFrame"
692 | ]
693 | },
694 | "execution_count": 27,
695 | "metadata": {},
696 | "output_type": "execute_result"
697 | }
698 | ],
699 | "source": [
700 | "# obtaining the data type\n",
701 | "type(df_exams)"
702 | ]
703 | },
704 | {
705 | "cell_type": "code",
706 | "execution_count": 28,
707 | "id": "224775a3",
708 | "metadata": {},
709 | "outputs": [
710 | {
711 | "data": {
712 | "text/html": [
713 | "\n",
714 | "\n",
727 | "
\n",
728 | " \n",
729 | " \n",
730 | " | \n",
731 | " gender | \n",
732 | " race/ethnicity | \n",
733 | " parental level of education | \n",
734 | " lunch | \n",
735 | " test preparation course | \n",
736 | " math score | \n",
737 | " reading score | \n",
738 | " writing score | \n",
739 | "
\n",
740 | " \n",
741 | " \n",
742 | " \n",
743 | " 0 | \n",
744 | " female | \n",
745 | " group B | \n",
746 | " bachelor's degree | \n",
747 | " standard | \n",
748 | " none | \n",
749 | " 72 | \n",
750 | " 72 | \n",
751 | " 74 | \n",
752 | "
\n",
753 | " \n",
754 | " 1 | \n",
755 | " female | \n",
756 | " group C | \n",
757 | " some college | \n",
758 | " standard | \n",
759 | " completed | \n",
760 | " 69 | \n",
761 | " 90 | \n",
762 | " 88 | \n",
763 | "
\n",
764 | " \n",
765 | " 2 | \n",
766 | " female | \n",
767 | " group B | \n",
768 | " master's degree | \n",
769 | " standard | \n",
770 | " none | \n",
771 | " 90 | \n",
772 | " 95 | \n",
773 | " 93 | \n",
774 | "
\n",
775 | " \n",
776 | " 3 | \n",
777 | " male | \n",
778 | " group A | \n",
779 | " associate's degree | \n",
780 | " free/reduced | \n",
781 | " none | \n",
782 | " 47 | \n",
783 | " 57 | \n",
784 | " 44 | \n",
785 | "
\n",
786 | " \n",
787 | " 4 | \n",
788 | " male | \n",
789 | " group C | \n",
790 | " some college | \n",
791 | " standard | \n",
792 | " none | \n",
793 | " 76 | \n",
794 | " 78 | \n",
795 | " 75 | \n",
796 | "
\n",
797 | " \n",
798 | " ... | \n",
799 | " ... | \n",
800 | " ... | \n",
801 | " ... | \n",
802 | " ... | \n",
803 | " ... | \n",
804 | " ... | \n",
805 | " ... | \n",
806 | " ... | \n",
807 | "
\n",
808 | " \n",
809 | " 995 | \n",
810 | " female | \n",
811 | " group E | \n",
812 | " master's degree | \n",
813 | " standard | \n",
814 | " completed | \n",
815 | " 88 | \n",
816 | " 99 | \n",
817 | " 95 | \n",
818 | "
\n",
819 | " \n",
820 | " 996 | \n",
821 | " male | \n",
822 | " group C | \n",
823 | " high school | \n",
824 | " free/reduced | \n",
825 | " none | \n",
826 | " 62 | \n",
827 | " 55 | \n",
828 | " 55 | \n",
829 | "
\n",
830 | " \n",
831 | " 997 | \n",
832 | " female | \n",
833 | " group C | \n",
834 | " high school | \n",
835 | " free/reduced | \n",
836 | " completed | \n",
837 | " 59 | \n",
838 | " 71 | \n",
839 | " 65 | \n",
840 | "
\n",
841 | " \n",
842 | " 998 | \n",
843 | " female | \n",
844 | " group D | \n",
845 | " some college | \n",
846 | " standard | \n",
847 | " completed | \n",
848 | " 68 | \n",
849 | " 78 | \n",
850 | " 77 | \n",
851 | "
\n",
852 | " \n",
853 | " 999 | \n",
854 | " female | \n",
855 | " group D | \n",
856 | " some college | \n",
857 | " free/reduced | \n",
858 | " none | \n",
859 | " 77 | \n",
860 | " 86 | \n",
861 | " 86 | \n",
862 | "
\n",
863 | " \n",
864 | "
\n",
865 | "
1000 rows × 8 columns
\n",
866 | "
"
867 | ],
868 | "text/plain": [
869 | " gender race/ethnicity parental level of education lunch \\\n",
870 | "0 female group B bachelor's degree standard \n",
871 | "1 female group C some college standard \n",
872 | "2 female group B master's degree standard \n",
873 | "3 male group A associate's degree free/reduced \n",
874 | "4 male group C some college standard \n",
875 | ".. ... ... ... ... \n",
876 | "995 female group E master's degree standard \n",
877 | "996 male group C high school free/reduced \n",
878 | "997 female group C high school free/reduced \n",
879 | "998 female group D some college standard \n",
880 | "999 female group D some college free/reduced \n",
881 | "\n",
882 | " test preparation course math score reading score writing score \n",
883 | "0 none 72 72 74 \n",
884 | "1 completed 69 90 88 \n",
885 | "2 none 90 95 93 \n",
886 | "3 none 47 57 44 \n",
887 | "4 none 76 78 75 \n",
888 | ".. ... ... ... ... \n",
889 | "995 completed 88 99 95 \n",
890 | "996 none 62 55 55 \n",
891 | "997 completed 59 71 65 \n",
892 | "998 completed 68 78 77 \n",
893 | "999 none 77 86 86 \n",
894 | "\n",
895 | "[1000 rows x 8 columns]"
896 | ]
897 | },
898 | "execution_count": 28,
899 | "metadata": {},
900 | "output_type": "execute_result"
901 | }
902 | ],
903 | "source": [
904 | "# rounding the values of the dataset\n",
905 | "round(df_exams, 2)"
906 | ]
907 | }
908 | ],
909 | "metadata": {
910 | "kernelspec": {
911 | "display_name": "Python 3",
912 | "language": "python",
913 | "name": "python3"
914 | },
915 | "language_info": {
916 | "codemirror_mode": {
917 | "name": "ipython",
918 | "version": 3
919 | },
920 | "file_extension": ".py",
921 | "mimetype": "text/x-python",
922 | "name": "python",
923 | "nbconvert_exporter": "python",
924 | "pygments_lexer": "ipython3",
925 | "version": "3.8.8"
926 | },
927 | "toc": {
928 | "base_numbering": 1,
929 | "nav_menu": {},
930 | "number_sections": true,
931 | "sideBar": true,
932 | "skip_h1_title": false,
933 | "title_cell": "Table of Contents",
934 | "title_sidebar": "Contents",
935 | "toc_cell": false,
936 | "toc_position": {},
937 | "toc_section_display": true,
938 | "toc_window_display": false
939 | }
940 | },
941 | "nbformat": 4,
942 | "nbformat_minor": 5
943 | }
944 |
--------------------------------------------------------------------------------
/2.Intro to Pandas/4.Selecting One Column from a Dataframe.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "4e3fdade",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "9dfaf0d2",
17 | "metadata": {
18 | "scrolled": false
19 | },
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " | \n",
42 | " gender | \n",
43 | " race/ethnicity | \n",
44 | " parental level of education | \n",
45 | " lunch | \n",
46 | " test preparation course | \n",
47 | " math score | \n",
48 | " reading score | \n",
49 | " writing score | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " female | \n",
56 | " group B | \n",
57 | " bachelor's degree | \n",
58 | " standard | \n",
59 | " none | \n",
60 | " 72 | \n",
61 | " 72 | \n",
62 | " 74 | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " female | \n",
67 | " group C | \n",
68 | " some college | \n",
69 | " standard | \n",
70 | " completed | \n",
71 | " 69 | \n",
72 | " 90 | \n",
73 | " 88 | \n",
74 | "
\n",
75 | " \n",
76 | " 2 | \n",
77 | " female | \n",
78 | " group B | \n",
79 | " master's degree | \n",
80 | " standard | \n",
81 | " none | \n",
82 | " 90 | \n",
83 | " 95 | \n",
84 | " 93 | \n",
85 | "
\n",
86 | " \n",
87 | " 3 | \n",
88 | " male | \n",
89 | " group A | \n",
90 | " associate's degree | \n",
91 | " free/reduced | \n",
92 | " none | \n",
93 | " 47 | \n",
94 | " 57 | \n",
95 | " 44 | \n",
96 | "
\n",
97 | " \n",
98 | " 4 | \n",
99 | " male | \n",
100 | " group C | \n",
101 | " some college | \n",
102 | " standard | \n",
103 | " none | \n",
104 | " 76 | \n",
105 | " 78 | \n",
106 | " 75 | \n",
107 | "
\n",
108 | " \n",
109 | "
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " gender race/ethnicity parental level of education lunch \\\n",
114 | "0 female group B bachelor's degree standard \n",
115 | "1 female group C some college standard \n",
116 | "2 female group B master's degree standard \n",
117 | "3 male group A associate's degree free/reduced \n",
118 | "4 male group C some college standard \n",
119 | "\n",
120 | " test preparation course math score reading score writing score \n",
121 | "0 none 72 72 74 \n",
122 | "1 completed 69 90 88 \n",
123 | "2 none 90 95 93 \n",
124 | "3 none 47 57 44 \n",
125 | "4 none 76 78 75 "
126 | ]
127 | },
128 | "execution_count": 2,
129 | "metadata": {},
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "# reading the csv file\n",
135 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n",
136 | "df_exams.head()"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "id": "de860322",
142 | "metadata": {},
143 | "source": [
144 | "# Selecting one column"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "id": "4869f1d5",
150 | "metadata": {},
151 | "source": [
152 | "## Syntax 1"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 3,
158 | "id": "79ba5c16",
159 | "metadata": {},
160 | "outputs": [
161 | {
162 | "data": {
163 | "text/plain": [
164 | "0 female\n",
165 | "1 female\n",
166 | "2 female\n",
167 | "3 male\n",
168 | "4 male\n",
169 | " ... \n",
170 | "995 female\n",
171 | "996 male\n",
172 | "997 female\n",
173 | "998 female\n",
174 | "999 female\n",
175 | "Name: gender, Length: 1000, dtype: object"
176 | ]
177 | },
178 | "execution_count": 3,
179 | "metadata": {},
180 | "output_type": "execute_result"
181 | }
182 | ],
183 | "source": [
184 | "# select a column with [] (preferred way to select a column)\n",
185 | "df_exams['gender']"
186 | ]
187 | },
188 | {
189 | "cell_type": "code",
190 | "execution_count": 4,
191 | "id": "1da5d438",
192 | "metadata": {},
193 | "outputs": [
194 | {
195 | "data": {
196 | "text/plain": [
197 | "pandas.core.series.Series"
198 | ]
199 | },
200 | "execution_count": 4,
201 | "metadata": {},
202 | "output_type": "execute_result"
203 | }
204 | ],
205 | "source": [
206 | "# check out the data type of a column\n",
207 | "type(df_exams['gender'])"
208 | ]
209 | },
210 | {
211 | "cell_type": "code",
212 | "execution_count": 5,
213 | "id": "d7041d6f",
214 | "metadata": {},
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/plain": [
219 | "0 female\n",
220 | "1 female\n",
221 | "2 female\n",
222 | "3 male\n",
223 | "4 male\n",
224 | "Name: gender, dtype: object"
225 | ]
226 | },
227 | "execution_count": 5,
228 | "metadata": {},
229 | "output_type": "execute_result"
230 | }
231 | ],
232 | "source": [
233 | "# series: attributes and methods\n",
234 | "df_exams['gender'].index\n",
235 | "df_exams['gender'].head()"
236 | ]
237 | },
238 | {
239 | "cell_type": "markdown",
240 | "id": "025f3788",
241 | "metadata": {},
242 | "source": [
243 | "## Syntax 2"
244 | ]
245 | },
246 | {
247 | "cell_type": "code",
248 | "execution_count": 6,
249 | "id": "5250d3b6",
250 | "metadata": {},
251 | "outputs": [
252 | {
253 | "data": {
254 | "text/plain": [
255 | "0 female\n",
256 | "1 female\n",
257 | "2 female\n",
258 | "3 male\n",
259 | "4 male\n",
260 | " ... \n",
261 | "995 female\n",
262 | "996 male\n",
263 | "997 female\n",
264 | "998 female\n",
265 | "999 female\n",
266 | "Name: gender, Length: 1000, dtype: object"
267 | ]
268 | },
269 | "execution_count": 6,
270 | "metadata": {},
271 | "output_type": "execute_result"
272 | }
273 | ],
274 | "source": [
275 | "# select a column with .\n",
276 | "df_exams.gender"
277 | ]
278 | },
279 | {
280 | "cell_type": "code",
281 | "execution_count": 7,
282 | "id": "aa3433f7",
283 | "metadata": {},
284 | "outputs": [
285 | {
286 | "ename": "SyntaxError",
287 | "evalue": "invalid syntax (, line 2)",
288 | "output_type": "error",
289 | "traceback": [
290 | "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m df_exams.math score\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
291 | ]
292 | }
293 | ],
294 | "source": [
295 | "# select a column with . (pitfalls)\n",
296 | "df_exams.math score"
297 | ]
298 | },
299 | {
300 | "cell_type": "code",
301 | "execution_count": 8,
302 | "id": "92b4ae17",
303 | "metadata": {},
304 | "outputs": [
305 | {
306 | "data": {
307 | "text/plain": [
308 | "0 72\n",
309 | "1 69\n",
310 | "2 90\n",
311 | "3 47\n",
312 | "4 76\n",
313 | " ..\n",
314 | "995 88\n",
315 | "996 62\n",
316 | "997 59\n",
317 | "998 68\n",
318 | "999 77\n",
319 | "Name: math score, Length: 1000, dtype: int64"
320 | ]
321 | },
322 | "execution_count": 8,
323 | "metadata": {},
324 | "output_type": "execute_result"
325 | }
326 | ],
327 | "source": [
328 | "# select the same column using []\n",
329 | "df_exams[\"math score\"]"
330 | ]
331 | }
332 | ],
333 | "metadata": {
334 | "kernelspec": {
335 | "display_name": "Python 3",
336 | "language": "python",
337 | "name": "python3"
338 | },
339 | "language_info": {
340 | "codemirror_mode": {
341 | "name": "ipython",
342 | "version": 3
343 | },
344 | "file_extension": ".py",
345 | "mimetype": "text/x-python",
346 | "name": "python",
347 | "nbconvert_exporter": "python",
348 | "pygments_lexer": "ipython3",
349 | "version": "3.8.8"
350 | },
351 | "toc": {
352 | "base_numbering": 1,
353 | "nav_menu": {},
354 | "number_sections": true,
355 | "sideBar": true,
356 | "skip_h1_title": false,
357 | "title_cell": "Table of Contents",
358 | "title_sidebar": "Contents",
359 | "toc_cell": false,
360 | "toc_position": {},
361 | "toc_section_display": true,
362 | "toc_window_display": false
363 | }
364 | },
365 | "nbformat": 4,
366 | "nbformat_minor": 5
367 | }
368 |
--------------------------------------------------------------------------------
/2.Intro to Pandas/5.Selecting Two or More Columns from a Dataframe.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "4e3fdade",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "9dfaf0d2",
17 | "metadata": {
18 | "scrolled": false
19 | },
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " | \n",
42 | " gender | \n",
43 | " race/ethnicity | \n",
44 | " parental level of education | \n",
45 | " lunch | \n",
46 | " test preparation course | \n",
47 | " math score | \n",
48 | " reading score | \n",
49 | " writing score | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " female | \n",
56 | " group B | \n",
57 | " bachelor's degree | \n",
58 | " standard | \n",
59 | " none | \n",
60 | " 72 | \n",
61 | " 72 | \n",
62 | " 74 | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " female | \n",
67 | " group C | \n",
68 | " some college | \n",
69 | " standard | \n",
70 | " completed | \n",
71 | " 69 | \n",
72 | " 90 | \n",
73 | " 88 | \n",
74 | "
\n",
75 | " \n",
76 | " 2 | \n",
77 | " female | \n",
78 | " group B | \n",
79 | " master's degree | \n",
80 | " standard | \n",
81 | " none | \n",
82 | " 90 | \n",
83 | " 95 | \n",
84 | " 93 | \n",
85 | "
\n",
86 | " \n",
87 | " 3 | \n",
88 | " male | \n",
89 | " group A | \n",
90 | " associate's degree | \n",
91 | " free/reduced | \n",
92 | " none | \n",
93 | " 47 | \n",
94 | " 57 | \n",
95 | " 44 | \n",
96 | "
\n",
97 | " \n",
98 | " 4 | \n",
99 | " male | \n",
100 | " group C | \n",
101 | " some college | \n",
102 | " standard | \n",
103 | " none | \n",
104 | " 76 | \n",
105 | " 78 | \n",
106 | " 75 | \n",
107 | "
\n",
108 | " \n",
109 | "
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " gender race/ethnicity parental level of education lunch \\\n",
114 | "0 female group B bachelor's degree standard \n",
115 | "1 female group C some college standard \n",
116 | "2 female group B master's degree standard \n",
117 | "3 male group A associate's degree free/reduced \n",
118 | "4 male group C some college standard \n",
119 | "\n",
120 | " test preparation course math score reading score writing score \n",
121 | "0 none 72 72 74 \n",
122 | "1 completed 69 90 88 \n",
123 | "2 none 90 95 93 \n",
124 | "3 none 47 57 44 \n",
125 | "4 none 76 78 75 "
126 | ]
127 | },
128 | "execution_count": 2,
129 | "metadata": {},
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "# reading the csv file\n",
135 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n",
136 | "df_exams.head()"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "id": "de860322",
142 | "metadata": {},
143 | "source": [
144 | "# Selecting two or more columns"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": 3,
150 | "id": "79ba5c16",
151 | "metadata": {},
152 | "outputs": [
153 | {
154 | "data": {
155 | "text/html": [
156 | "\n",
157 | "\n",
170 | "
\n",
171 | " \n",
172 | " \n",
173 | " | \n",
174 | " gender | \n",
175 | " math score | \n",
176 | "
\n",
177 | " \n",
178 | " \n",
179 | " \n",
180 | " 0 | \n",
181 | " female | \n",
182 | " 72 | \n",
183 | "
\n",
184 | " \n",
185 | " 1 | \n",
186 | " female | \n",
187 | " 69 | \n",
188 | "
\n",
189 | " \n",
190 | " 2 | \n",
191 | " female | \n",
192 | " 90 | \n",
193 | "
\n",
194 | " \n",
195 | " 3 | \n",
196 | " male | \n",
197 | " 47 | \n",
198 | "
\n",
199 | " \n",
200 | " 4 | \n",
201 | " male | \n",
202 | " 76 | \n",
203 | "
\n",
204 | " \n",
205 | " ... | \n",
206 | " ... | \n",
207 | " ... | \n",
208 | "
\n",
209 | " \n",
210 | " 995 | \n",
211 | " female | \n",
212 | " 88 | \n",
213 | "
\n",
214 | " \n",
215 | " 996 | \n",
216 | " male | \n",
217 | " 62 | \n",
218 | "
\n",
219 | " \n",
220 | " 997 | \n",
221 | " female | \n",
222 | " 59 | \n",
223 | "
\n",
224 | " \n",
225 | " 998 | \n",
226 | " female | \n",
227 | " 68 | \n",
228 | "
\n",
229 | " \n",
230 | " 999 | \n",
231 | " female | \n",
232 | " 77 | \n",
233 | "
\n",
234 | " \n",
235 | "
\n",
236 | "
1000 rows × 2 columns
\n",
237 | "
"
238 | ],
239 | "text/plain": [
240 | " gender math score\n",
241 | "0 female 72\n",
242 | "1 female 69\n",
243 | "2 female 90\n",
244 | "3 male 47\n",
245 | "4 male 76\n",
246 | ".. ... ...\n",
247 | "995 female 88\n",
248 | "996 male 62\n",
249 | "997 female 59\n",
250 | "998 female 68\n",
251 | "999 female 77\n",
252 | "\n",
253 | "[1000 rows x 2 columns]"
254 | ]
255 | },
256 | "execution_count": 3,
257 | "metadata": {},
258 | "output_type": "execute_result"
259 | }
260 | ],
261 | "source": [
262 | "# select 2 columns using [[]]\n",
263 | "df_exams[['gender', 'math score']]"
264 | ]
265 | },
266 | {
267 | "cell_type": "code",
268 | "execution_count": 4,
269 | "id": "1da5d438",
270 | "metadata": {},
271 | "outputs": [
272 | {
273 | "data": {
274 | "text/plain": [
275 | "pandas.core.frame.DataFrame"
276 | ]
277 | },
278 | "execution_count": 4,
279 | "metadata": {},
280 | "output_type": "execute_result"
281 | }
282 | ],
283 | "source": [
284 | "# check out the data type of the selection\n",
285 | "type(df_exams[['gender', 'math score']])"
286 | ]
287 | },
288 | {
289 | "cell_type": "code",
290 | "execution_count": 5,
291 | "id": "42b11033",
292 | "metadata": {},
293 | "outputs": [
294 | {
295 | "data": {
296 | "text/html": [
297 | "\n",
298 | "\n",
311 | "
\n",
312 | " \n",
313 | " \n",
314 | " | \n",
315 | " gender | \n",
316 | " math score | \n",
317 | " reading score | \n",
318 | " writing score | \n",
319 | "
\n",
320 | " \n",
321 | " \n",
322 | " \n",
323 | " 0 | \n",
324 | " female | \n",
325 | " 72 | \n",
326 | " 72 | \n",
327 | " 74 | \n",
328 | "
\n",
329 | " \n",
330 | " 1 | \n",
331 | " female | \n",
332 | " 69 | \n",
333 | " 90 | \n",
334 | " 88 | \n",
335 | "
\n",
336 | " \n",
337 | " 2 | \n",
338 | " female | \n",
339 | " 90 | \n",
340 | " 95 | \n",
341 | " 93 | \n",
342 | "
\n",
343 | " \n",
344 | " 3 | \n",
345 | " male | \n",
346 | " 47 | \n",
347 | " 57 | \n",
348 | " 44 | \n",
349 | "
\n",
350 | " \n",
351 | " 4 | \n",
352 | " male | \n",
353 | " 76 | \n",
354 | " 78 | \n",
355 | " 75 | \n",
356 | "
\n",
357 | " \n",
358 | " ... | \n",
359 | " ... | \n",
360 | " ... | \n",
361 | " ... | \n",
362 | " ... | \n",
363 | "
\n",
364 | " \n",
365 | " 995 | \n",
366 | " female | \n",
367 | " 88 | \n",
368 | " 99 | \n",
369 | " 95 | \n",
370 | "
\n",
371 | " \n",
372 | " 996 | \n",
373 | " male | \n",
374 | " 62 | \n",
375 | " 55 | \n",
376 | " 55 | \n",
377 | "
\n",
378 | " \n",
379 | " 997 | \n",
380 | " female | \n",
381 | " 59 | \n",
382 | " 71 | \n",
383 | " 65 | \n",
384 | "
\n",
385 | " \n",
386 | " 998 | \n",
387 | " female | \n",
388 | " 68 | \n",
389 | " 78 | \n",
390 | " 77 | \n",
391 | "
\n",
392 | " \n",
393 | " 999 | \n",
394 | " female | \n",
395 | " 77 | \n",
396 | " 86 | \n",
397 | " 86 | \n",
398 | "
\n",
399 | " \n",
400 | "
\n",
401 | "
1000 rows × 4 columns
\n",
402 | "
"
403 | ],
404 | "text/plain": [
405 | " gender math score reading score writing score\n",
406 | "0 female 72 72 74\n",
407 | "1 female 69 90 88\n",
408 | "2 female 90 95 93\n",
409 | "3 male 47 57 44\n",
410 | "4 male 76 78 75\n",
411 | ".. ... ... ... ...\n",
412 | "995 female 88 99 95\n",
413 | "996 male 62 55 55\n",
414 | "997 female 59 71 65\n",
415 | "998 female 68 78 77\n",
416 | "999 female 77 86 86\n",
417 | "\n",
418 | "[1000 rows x 4 columns]"
419 | ]
420 | },
421 | "execution_count": 5,
422 | "metadata": {},
423 | "output_type": "execute_result"
424 | }
425 | ],
426 | "source": [
427 | "# select 2 or more columns using [[]]\n",
428 | "df_exams[['gender', 'math score', 'reading score', 'writing score']]"
429 | ]
430 | },
431 | {
432 | "cell_type": "code",
433 | "execution_count": 6,
434 | "id": "45b07093",
435 | "metadata": {},
436 | "outputs": [],
437 | "source": [
438 | "# we can't select 2 or more columns with the \".\"\n",
439 | "# df_exams.'gender', 'math score'"
440 | ]
441 | }
442 | ],
443 | "metadata": {
444 | "kernelspec": {
445 | "display_name": "Python 3",
446 | "language": "python",
447 | "name": "python3"
448 | },
449 | "language_info": {
450 | "codemirror_mode": {
451 | "name": "ipython",
452 | "version": 3
453 | },
454 | "file_extension": ".py",
455 | "mimetype": "text/x-python",
456 | "name": "python",
457 | "nbconvert_exporter": "python",
458 | "pygments_lexer": "ipython3",
459 | "version": "3.8.8"
460 | },
461 | "toc": {
462 | "base_numbering": 1,
463 | "nav_menu": {},
464 | "number_sections": true,
465 | "sideBar": true,
466 | "skip_h1_title": false,
467 | "title_cell": "Table of Contents",
468 | "title_sidebar": "Contents",
469 | "toc_cell": false,
470 | "toc_position": {},
471 | "toc_section_display": true,
472 | "toc_window_display": false
473 | }
474 | },
475 | "nbformat": 4,
476 | "nbformat_minor": 5
477 | }
478 |
--------------------------------------------------------------------------------
/2.Intro to Pandas/7.Operations on Dataframes.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "4e3fdade",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "9dfaf0d2",
17 | "metadata": {
18 | "scrolled": true
19 | },
20 | "outputs": [
21 | {
22 | "data": {
23 | "text/html": [
24 | "\n",
25 | "\n",
38 | "
\n",
39 | " \n",
40 | " \n",
41 | " | \n",
42 | " gender | \n",
43 | " race/ethnicity | \n",
44 | " parental level of education | \n",
45 | " lunch | \n",
46 | " test preparation course | \n",
47 | " math score | \n",
48 | " reading score | \n",
49 | " writing score | \n",
50 | "
\n",
51 | " \n",
52 | " \n",
53 | " \n",
54 | " 0 | \n",
55 | " female | \n",
56 | " group B | \n",
57 | " bachelor's degree | \n",
58 | " standard | \n",
59 | " none | \n",
60 | " 72 | \n",
61 | " 72 | \n",
62 | " 74 | \n",
63 | "
\n",
64 | " \n",
65 | " 1 | \n",
66 | " female | \n",
67 | " group C | \n",
68 | " some college | \n",
69 | " standard | \n",
70 | " completed | \n",
71 | " 69 | \n",
72 | " 90 | \n",
73 | " 88 | \n",
74 | "
\n",
75 | " \n",
76 | " 2 | \n",
77 | " female | \n",
78 | " group B | \n",
79 | " master's degree | \n",
80 | " standard | \n",
81 | " none | \n",
82 | " 90 | \n",
83 | " 95 | \n",
84 | " 93 | \n",
85 | "
\n",
86 | " \n",
87 | " 3 | \n",
88 | " male | \n",
89 | " group A | \n",
90 | " associate's degree | \n",
91 | " free/reduced | \n",
92 | " none | \n",
93 | " 47 | \n",
94 | " 57 | \n",
95 | " 44 | \n",
96 | "
\n",
97 | " \n",
98 | " 4 | \n",
99 | " male | \n",
100 | " group C | \n",
101 | " some college | \n",
102 | " standard | \n",
103 | " none | \n",
104 | " 76 | \n",
105 | " 78 | \n",
106 | " 75 | \n",
107 | "
\n",
108 | " \n",
109 | "
\n",
110 | "
"
111 | ],
112 | "text/plain": [
113 | " gender race/ethnicity parental level of education lunch \\\n",
114 | "0 female group B bachelor's degree standard \n",
115 | "1 female group C some college standard \n",
116 | "2 female group B master's degree standard \n",
117 | "3 male group A associate's degree free/reduced \n",
118 | "4 male group C some college standard \n",
119 | "\n",
120 | " test preparation course math score reading score writing score \n",
121 | "0 none 72 72 74 \n",
122 | "1 completed 69 90 88 \n",
123 | "2 none 90 95 93 \n",
124 | "3 none 47 57 44 \n",
125 | "4 none 76 78 75 "
126 | ]
127 | },
128 | "execution_count": 2,
129 | "metadata": {},
130 | "output_type": "execute_result"
131 | }
132 | ],
133 | "source": [
134 | "# reading the csv file\n",
135 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n",
136 | "df_exams.head()"
137 | ]
138 | },
139 | {
140 | "cell_type": "markdown",
141 | "id": "65a8afc2",
142 | "metadata": {},
143 | "source": [
144 | "# Math Operations"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "id": "e93c4e2b",
150 | "metadata": {},
151 | "source": [
152 | "## Operations in columns"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 3,
158 | "id": "63bd52ec",
159 | "metadata": {
160 | "scrolled": true
161 | },
162 | "outputs": [
163 | {
164 | "data": {
165 | "text/plain": [
166 | "66089"
167 | ]
168 | },
169 | "execution_count": 3,
170 | "metadata": {},
171 | "output_type": "execute_result"
172 | }
173 | ],
174 | "source": [
175 | "# select a column and calculate total sum\n",
176 | "df_exams['math score'].sum()"
177 | ]
178 | },
179 | {
180 | "cell_type": "code",
181 | "execution_count": 4,
182 | "id": "200f9c1e",
183 | "metadata": {},
184 | "outputs": [
185 | {
186 | "data": {
187 | "text/plain": [
188 | "0"
189 | ]
190 | },
191 | "execution_count": 4,
192 | "metadata": {},
193 | "output_type": "execute_result"
194 | }
195 | ],
196 | "source": [
197 | "# count, mean, std, max, and min\n",
198 | "df_exams['math score'].count()\n",
199 | "df_exams['math score'].mean()\n",
200 | "df_exams['math score'].std()\n",
201 | "df_exams['math score'].max()\n",
202 | "df_exams['math score'].min()"
203 | ]
204 | },
205 | {
206 | "cell_type": "code",
207 | "execution_count": 5,
208 | "id": "a4847df7",
209 | "metadata": {},
210 | "outputs": [
211 | {
212 | "data": {
213 | "text/html": [
214 | "\n",
215 | "\n",
228 | "
\n",
229 | " \n",
230 | " \n",
231 | " | \n",
232 | " math score | \n",
233 | " reading score | \n",
234 | " writing score | \n",
235 | "
\n",
236 | " \n",
237 | " \n",
238 | " \n",
239 | " count | \n",
240 | " 1000.00000 | \n",
241 | " 1000.000000 | \n",
242 | " 1000.000000 | \n",
243 | "
\n",
244 | " \n",
245 | " mean | \n",
246 | " 66.08900 | \n",
247 | " 69.169000 | \n",
248 | " 68.054000 | \n",
249 | "
\n",
250 | " \n",
251 | " std | \n",
252 | " 15.16308 | \n",
253 | " 14.600192 | \n",
254 | " 15.195657 | \n",
255 | "
\n",
256 | " \n",
257 | " min | \n",
258 | " 0.00000 | \n",
259 | " 17.000000 | \n",
260 | " 10.000000 | \n",
261 | "
\n",
262 | " \n",
263 | " 25% | \n",
264 | " 57.00000 | \n",
265 | " 59.000000 | \n",
266 | " 57.750000 | \n",
267 | "
\n",
268 | " \n",
269 | " 50% | \n",
270 | " 66.00000 | \n",
271 | " 70.000000 | \n",
272 | " 69.000000 | \n",
273 | "
\n",
274 | " \n",
275 | " 75% | \n",
276 | " 77.00000 | \n",
277 | " 79.000000 | \n",
278 | " 79.000000 | \n",
279 | "
\n",
280 | " \n",
281 | " max | \n",
282 | " 100.00000 | \n",
283 | " 100.000000 | \n",
284 | " 100.000000 | \n",
285 | "
\n",
286 | " \n",
287 | "
\n",
288 | "
"
289 | ],
290 | "text/plain": [
291 | " math score reading score writing score\n",
292 | "count 1000.00000 1000.000000 1000.000000\n",
293 | "mean 66.08900 69.169000 68.054000\n",
294 | "std 15.16308 14.600192 15.195657\n",
295 | "min 0.00000 17.000000 10.000000\n",
296 | "25% 57.00000 59.000000 57.750000\n",
297 | "50% 66.00000 70.000000 69.000000\n",
298 | "75% 77.00000 79.000000 79.000000\n",
299 | "max 100.00000 100.000000 100.000000"
300 | ]
301 | },
302 | "execution_count": 5,
303 | "metadata": {},
304 | "output_type": "execute_result"
305 | }
306 | ],
307 | "source": [
308 | "# easier calculation with .describe()\n",
309 | "df_exams.describe()"
310 | ]
311 | },
312 | {
313 | "cell_type": "markdown",
314 | "id": "d85482c6",
315 | "metadata": {},
316 | "source": [
317 | "## Operations in rows"
318 | ]
319 | },
320 | {
321 | "cell_type": "code",
322 | "execution_count": 6,
323 | "id": "21edc88a",
324 | "metadata": {},
325 | "outputs": [
326 | {
327 | "data": {
328 | "text/plain": [
329 | "0 218\n",
330 | "1 247\n",
331 | "2 278\n",
332 | "3 148\n",
333 | "4 229\n",
334 | " ... \n",
335 | "995 282\n",
336 | "996 172\n",
337 | "997 195\n",
338 | "998 223\n",
339 | "999 249\n",
340 | "Length: 1000, dtype: int64"
341 | ]
342 | },
343 | "execution_count": 6,
344 | "metadata": {},
345 | "output_type": "execute_result"
346 | }
347 | ],
348 | "source": [
349 | "# calculating the sum in a row\n",
350 | "df_exams['math score'] + df_exams['reading score'] + df_exams['writing score']"
351 | ]
352 | },
353 | {
354 | "cell_type": "code",
355 | "execution_count": 7,
356 | "id": "e62eb260",
357 | "metadata": {},
358 | "outputs": [],
359 | "source": [
360 | "# calculating the average score and assigning the result to a new column\n",
361 | "df_exams['average'] = (df_exams['math score'] + df_exams['reading score'] + df_exams['writing score'])/3"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": 8,
367 | "id": "63d8f21d",
368 | "metadata": {},
369 | "outputs": [
370 | {
371 | "data": {
372 | "text/html": [
373 | "\n",
374 | "\n",
387 | "
\n",
388 | " \n",
389 | " \n",
390 | " | \n",
391 | " gender | \n",
392 | " race/ethnicity | \n",
393 | " parental level of education | \n",
394 | " lunch | \n",
395 | " test preparation course | \n",
396 | " math score | \n",
397 | " reading score | \n",
398 | " writing score | \n",
399 | " average | \n",
400 | "
\n",
401 | " \n",
402 | " \n",
403 | " \n",
404 | " 0 | \n",
405 | " female | \n",
406 | " group B | \n",
407 | " bachelor's degree | \n",
408 | " standard | \n",
409 | " none | \n",
410 | " 72 | \n",
411 | " 72 | \n",
412 | " 74 | \n",
413 | " 72.666667 | \n",
414 | "
\n",
415 | " \n",
416 | " 1 | \n",
417 | " female | \n",
418 | " group C | \n",
419 | " some college | \n",
420 | " standard | \n",
421 | " completed | \n",
422 | " 69 | \n",
423 | " 90 | \n",
424 | " 88 | \n",
425 | " 82.333333 | \n",
426 | "
\n",
427 | " \n",
428 | " 2 | \n",
429 | " female | \n",
430 | " group B | \n",
431 | " master's degree | \n",
432 | " standard | \n",
433 | " none | \n",
434 | " 90 | \n",
435 | " 95 | \n",
436 | " 93 | \n",
437 | " 92.666667 | \n",
438 | "
\n",
439 | " \n",
440 | " 3 | \n",
441 | " male | \n",
442 | " group A | \n",
443 | " associate's degree | \n",
444 | " free/reduced | \n",
445 | " none | \n",
446 | " 47 | \n",
447 | " 57 | \n",
448 | " 44 | \n",
449 | " 49.333333 | \n",
450 | "
\n",
451 | " \n",
452 | " 4 | \n",
453 | " male | \n",
454 | " group C | \n",
455 | " some college | \n",
456 | " standard | \n",
457 | " none | \n",
458 | " 76 | \n",
459 | " 78 | \n",
460 | " 75 | \n",
461 | " 76.333333 | \n",
462 | "
\n",
463 | " \n",
464 | " ... | \n",
465 | " ... | \n",
466 | " ... | \n",
467 | " ... | \n",
468 | " ... | \n",
469 | " ... | \n",
470 | " ... | \n",
471 | " ... | \n",
472 | " ... | \n",
473 | " ... | \n",
474 | "
\n",
475 | " \n",
476 | " 995 | \n",
477 | " female | \n",
478 | " group E | \n",
479 | " master's degree | \n",
480 | " standard | \n",
481 | " completed | \n",
482 | " 88 | \n",
483 | " 99 | \n",
484 | " 95 | \n",
485 | " 94.000000 | \n",
486 | "
\n",
487 | " \n",
488 | " 996 | \n",
489 | " male | \n",
490 | " group C | \n",
491 | " high school | \n",
492 | " free/reduced | \n",
493 | " none | \n",
494 | " 62 | \n",
495 | " 55 | \n",
496 | " 55 | \n",
497 | " 57.333333 | \n",
498 | "
\n",
499 | " \n",
500 | " 997 | \n",
501 | " female | \n",
502 | " group C | \n",
503 | " high school | \n",
504 | " free/reduced | \n",
505 | " completed | \n",
506 | " 59 | \n",
507 | " 71 | \n",
508 | " 65 | \n",
509 | " 65.000000 | \n",
510 | "
\n",
511 | " \n",
512 | " 998 | \n",
513 | " female | \n",
514 | " group D | \n",
515 | " some college | \n",
516 | " standard | \n",
517 | " completed | \n",
518 | " 68 | \n",
519 | " 78 | \n",
520 | " 77 | \n",
521 | " 74.333333 | \n",
522 | "
\n",
523 | " \n",
524 | " 999 | \n",
525 | " female | \n",
526 | " group D | \n",
527 | " some college | \n",
528 | " free/reduced | \n",
529 | " none | \n",
530 | " 77 | \n",
531 | " 86 | \n",
532 | " 86 | \n",
533 | " 83.000000 | \n",
534 | "
\n",
535 | " \n",
536 | "
\n",
537 | "
1000 rows × 9 columns
\n",
538 | "
"
539 | ],
540 | "text/plain": [
541 | " gender race/ethnicity parental level of education lunch \\\n",
542 | "0 female group B bachelor's degree standard \n",
543 | "1 female group C some college standard \n",
544 | "2 female group B master's degree standard \n",
545 | "3 male group A associate's degree free/reduced \n",
546 | "4 male group C some college standard \n",
547 | ".. ... ... ... ... \n",
548 | "995 female group E master's degree standard \n",
549 | "996 male group C high school free/reduced \n",
550 | "997 female group C high school free/reduced \n",
551 | "998 female group D some college standard \n",
552 | "999 female group D some college free/reduced \n",
553 | "\n",
554 | " test preparation course math score reading score writing score \\\n",
555 | "0 none 72 72 74 \n",
556 | "1 completed 69 90 88 \n",
557 | "2 none 90 95 93 \n",
558 | "3 none 47 57 44 \n",
559 | "4 none 76 78 75 \n",
560 | ".. ... ... ... ... \n",
561 | "995 completed 88 99 95 \n",
562 | "996 none 62 55 55 \n",
563 | "997 completed 59 71 65 \n",
564 | "998 completed 68 78 77 \n",
565 | "999 none 77 86 86 \n",
566 | "\n",
567 | " average \n",
568 | "0 72.666667 \n",
569 | "1 82.333333 \n",
570 | "2 92.666667 \n",
571 | "3 49.333333 \n",
572 | "4 76.333333 \n",
573 | ".. ... \n",
574 | "995 94.000000 \n",
575 | "996 57.333333 \n",
576 | "997 65.000000 \n",
577 | "998 74.333333 \n",
578 | "999 83.000000 \n",
579 | "\n",
580 | "[1000 rows x 9 columns]"
581 | ]
582 | },
583 | "execution_count": 8,
584 | "metadata": {},
585 | "output_type": "execute_result"
586 | }
587 | ],
588 | "source": [
589 | "# showing the dataframe\n",
590 | "df_exams"
591 | ]
592 | }
593 | ],
594 | "metadata": {
595 | "kernelspec": {
596 | "display_name": "Python 3",
597 | "language": "python",
598 | "name": "python3"
599 | },
600 | "language_info": {
601 | "codemirror_mode": {
602 | "name": "ipython",
603 | "version": 3
604 | },
605 | "file_extension": ".py",
606 | "mimetype": "text/x-python",
607 | "name": "python",
608 | "nbconvert_exporter": "python",
609 | "pygments_lexer": "ipython3",
610 | "version": "3.8.8"
611 | },
612 | "toc": {
613 | "base_numbering": 1,
614 | "nav_menu": {},
615 | "number_sections": true,
616 | "sideBar": true,
617 | "skip_h1_title": false,
618 | "title_cell": "Table of Contents",
619 | "title_sidebar": "Contents",
620 | "toc_cell": false,
621 | "toc_position": {},
622 | "toc_section_display": true,
623 | "toc_window_display": false
624 | }
625 | },
626 | "nbformat": 4,
627 | "nbformat_minor": 5
628 | }
629 |
--------------------------------------------------------------------------------
/2.Intro to Pandas/8.The value_counts() method.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "4e3fdade",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "9dfaf0d2",
17 | "metadata": {},
18 | "outputs": [
19 | {
20 | "data": {
21 | "text/html": [
22 | "\n",
23 | "\n",
36 | "
\n",
37 | " \n",
38 | " \n",
39 | " | \n",
40 | " gender | \n",
41 | " race/ethnicity | \n",
42 | " parental level of education | \n",
43 | " lunch | \n",
44 | " test preparation course | \n",
45 | " math score | \n",
46 | " reading score | \n",
47 | " writing score | \n",
48 | "
\n",
49 | " \n",
50 | " \n",
51 | " \n",
52 | " 0 | \n",
53 | " female | \n",
54 | " group B | \n",
55 | " bachelor's degree | \n",
56 | " standard | \n",
57 | " none | \n",
58 | " 72 | \n",
59 | " 72 | \n",
60 | " 74 | \n",
61 | "
\n",
62 | " \n",
63 | " 1 | \n",
64 | " female | \n",
65 | " group C | \n",
66 | " some college | \n",
67 | " standard | \n",
68 | " completed | \n",
69 | " 69 | \n",
70 | " 90 | \n",
71 | " 88 | \n",
72 | "
\n",
73 | " \n",
74 | " 2 | \n",
75 | " female | \n",
76 | " group B | \n",
77 | " master's degree | \n",
78 | " standard | \n",
79 | " none | \n",
80 | " 90 | \n",
81 | " 95 | \n",
82 | " 93 | \n",
83 | "
\n",
84 | " \n",
85 | " 3 | \n",
86 | " male | \n",
87 | " group A | \n",
88 | " associate's degree | \n",
89 | " free/reduced | \n",
90 | " none | \n",
91 | " 47 | \n",
92 | " 57 | \n",
93 | " 44 | \n",
94 | "
\n",
95 | " \n",
96 | " 4 | \n",
97 | " male | \n",
98 | " group C | \n",
99 | " some college | \n",
100 | " standard | \n",
101 | " none | \n",
102 | " 76 | \n",
103 | " 78 | \n",
104 | " 75 | \n",
105 | "
\n",
106 | " \n",
107 | "
\n",
108 | "
"
109 | ],
110 | "text/plain": [
111 | " gender race/ethnicity parental level of education lunch \\\n",
112 | "0 female group B bachelor's degree standard \n",
113 | "1 female group C some college standard \n",
114 | "2 female group B master's degree standard \n",
115 | "3 male group A associate's degree free/reduced \n",
116 | "4 male group C some college standard \n",
117 | "\n",
118 | " test preparation course math score reading score writing score \n",
119 | "0 none 72 72 74 \n",
120 | "1 completed 69 90 88 \n",
121 | "2 none 90 95 93 \n",
122 | "3 none 47 57 44 \n",
123 | "4 none 76 78 75 "
124 | ]
125 | },
126 | "execution_count": 2,
127 | "metadata": {},
128 | "output_type": "execute_result"
129 | }
130 | ],
131 | "source": [
132 | "# reading the csv file\n",
133 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n",
134 | "df_exams.head()"
135 | ]
136 | },
137 | {
138 | "cell_type": "markdown",
139 | "id": "9a47f15a",
140 | "metadata": {},
141 | "source": [
142 | "# Value Counts"
143 | ]
144 | },
145 | {
146 | "cell_type": "code",
147 | "execution_count": 3,
148 | "id": "e04ff454",
149 | "metadata": {},
150 | "outputs": [
151 | {
152 | "data": {
153 | "text/plain": [
154 | "1000"
155 | ]
156 | },
157 | "execution_count": 3,
158 | "metadata": {},
159 | "output_type": "execute_result"
160 | }
161 | ],
162 | "source": [
163 | "# counting gender elements\n",
164 | "\n",
165 | "# len function\n",
166 | "len(df_exams['gender'])\n",
167 | "# .count() method\n",
168 | "df_exams['gender'].count()"
169 | ]
170 | },
171 | {
172 | "cell_type": "code",
173 | "execution_count": 4,
174 | "id": "07ab6370",
175 | "metadata": {},
176 | "outputs": [
177 | {
178 | "data": {
179 | "text/plain": [
180 | "female 518\n",
181 | "male 482\n",
182 | "Name: gender, dtype: int64"
183 | ]
184 | },
185 | "execution_count": 4,
186 | "metadata": {},
187 | "output_type": "execute_result"
188 | }
189 | ],
190 | "source": [
191 | "# counting gender elements by category\n",
192 | "df_exams['gender'].value_counts()"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 5,
198 | "id": "e4f100a9",
199 | "metadata": {},
200 | "outputs": [
201 | {
202 | "data": {
203 | "text/plain": [
204 | "female 0.518\n",
205 | "male 0.482\n",
206 | "Name: gender, dtype: float64"
207 | ]
208 | },
209 | "execution_count": 5,
210 | "metadata": {},
211 | "output_type": "execute_result"
212 | }
213 | ],
214 | "source": [
215 | "# return the relative frequency (divide all values by the sum of values)\n",
216 | "df_exams['gender'].value_counts(normalize=True)"
217 | ]
218 | },
219 | {
220 | "cell_type": "code",
221 | "execution_count": 6,
222 | "id": "620d6e12",
223 | "metadata": {},
224 | "outputs": [
225 | {
226 | "data": {
227 | "text/plain": [
228 | "some college 226\n",
229 | "associate's degree 222\n",
230 | "high school 196\n",
231 | "some high school 179\n",
232 | "bachelor's degree 118\n",
233 | "master's degree 59\n",
234 | "Name: parental level of education, dtype: int64"
235 | ]
236 | },
237 | "execution_count": 6,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "# counting \"parental level of education\" elements by category\n",
244 | "df_exams['parental level of education'].value_counts()"
245 | ]
246 | },
247 | {
248 | "cell_type": "code",
249 | "execution_count": 7,
250 | "id": "43ce51e7",
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "data": {
255 | "text/plain": [
256 | "some college 0.23\n",
257 | "associate's degree 0.22\n",
258 | "high school 0.20\n",
259 | "some high school 0.18\n",
260 | "bachelor's degree 0.12\n",
261 | "master's degree 0.06\n",
262 | "Name: parental level of education, dtype: float64"
263 | ]
264 | },
265 | "execution_count": 7,
266 | "metadata": {},
267 | "output_type": "execute_result"
268 | }
269 | ],
270 | "source": [
271 | "# return the relative frequency and round to 2 decimals\n",
272 | "df_exams['parental level of education'].value_counts(normalize=True).round(2)"
273 | ]
274 | }
275 | ],
276 | "metadata": {
277 | "kernelspec": {
278 | "display_name": "Python 3",
279 | "language": "python",
280 | "name": "python3"
281 | },
282 | "language_info": {
283 | "codemirror_mode": {
284 | "name": "ipython",
285 | "version": 3
286 | },
287 | "file_extension": ".py",
288 | "mimetype": "text/x-python",
289 | "name": "python",
290 | "nbconvert_exporter": "python",
291 | "pygments_lexer": "ipython3",
292 | "version": "3.8.8"
293 | },
294 | "toc": {
295 | "base_numbering": 1,
296 | "nav_menu": {},
297 | "number_sections": true,
298 | "sideBar": true,
299 | "skip_h1_title": false,
300 | "title_cell": "Table of Contents",
301 | "title_sidebar": "Contents",
302 | "toc_cell": false,
303 | "toc_position": {},
304 | "toc_section_display": true,
305 | "toc_window_display": false
306 | }
307 | },
308 | "nbformat": 4,
309 | "nbformat_minor": 5
310 | }
311 |
--------------------------------------------------------------------------------
/3.Pivot Table/gdp.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/3.Pivot Table/gdp.csv
--------------------------------------------------------------------------------
/3.Pivot Table/supermarket_sales.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/3.Pivot Table/supermarket_sales.xlsx
--------------------------------------------------------------------------------
/4.Data Visualization/1.Dataset Overview and Making Pivot Table.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "270bafb9",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "47fc4e0d",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "# reading the csv file\n",
21 | "df_population_raw = pd.read_csv('population_total.csv')"
22 | ]
23 | },
24 | {
25 | "cell_type": "code",
26 | "execution_count": 3,
27 | "id": "d44cbeb9",
28 | "metadata": {
29 | "scrolled": false
30 | },
31 | "outputs": [
32 | {
33 | "data": {
34 | "text/html": [
35 | "\n",
36 | "\n",
49 | "
\n",
50 | " \n",
51 | " \n",
52 | " | \n",
53 | " country | \n",
54 | " year | \n",
55 | " population | \n",
56 | "
\n",
57 | " \n",
58 | " \n",
59 | " \n",
60 | " 0 | \n",
61 | " China | \n",
62 | " 2020.0 | \n",
63 | " 1.439324e+09 | \n",
64 | "
\n",
65 | " \n",
66 | " 1 | \n",
67 | " China | \n",
68 | " 2019.0 | \n",
69 | " 1.433784e+09 | \n",
70 | "
\n",
71 | " \n",
72 | " 2 | \n",
73 | " China | \n",
74 | " 2018.0 | \n",
75 | " 1.427648e+09 | \n",
76 | "
\n",
77 | " \n",
78 | " 3 | \n",
79 | " China | \n",
80 | " 2017.0 | \n",
81 | " 1.421022e+09 | \n",
82 | "
\n",
83 | " \n",
84 | " 4 | \n",
85 | " China | \n",
86 | " 2016.0 | \n",
87 | " 1.414049e+09 | \n",
88 | "
\n",
89 | " \n",
90 | " ... | \n",
91 | " ... | \n",
92 | " ... | \n",
93 | " ... | \n",
94 | "
\n",
95 | " \n",
96 | " 4180 | \n",
97 | " United States | \n",
98 | " 1965.0 | \n",
99 | " 1.997337e+08 | \n",
100 | "
\n",
101 | " \n",
102 | " 4181 | \n",
103 | " United States | \n",
104 | " 1960.0 | \n",
105 | " 1.867206e+08 | \n",
106 | "
\n",
107 | " \n",
108 | " 4182 | \n",
109 | " United States | \n",
110 | " 1955.0 | \n",
111 | " 1.716853e+08 | \n",
112 | "
\n",
113 | " \n",
114 | " 4183 | \n",
115 | " India | \n",
116 | " 1960.0 | \n",
117 | " 4.505477e+08 | \n",
118 | "
\n",
119 | " \n",
120 | " 4184 | \n",
121 | " India | \n",
122 | " 1955.0 | \n",
123 | " 4.098806e+08 | \n",
124 | "
\n",
125 | " \n",
126 | "
\n",
127 | "
4185 rows × 3 columns
\n",
128 | "
"
129 | ],
130 | "text/plain": [
131 | " country year population\n",
132 | "0 China 2020.0 1.439324e+09\n",
133 | "1 China 2019.0 1.433784e+09\n",
134 | "2 China 2018.0 1.427648e+09\n",
135 | "3 China 2017.0 1.421022e+09\n",
136 | "4 China 2016.0 1.414049e+09\n",
137 | "... ... ... ...\n",
138 | "4180 United States 1965.0 1.997337e+08\n",
139 | "4181 United States 1960.0 1.867206e+08\n",
140 | "4182 United States 1955.0 1.716853e+08\n",
141 | "4183 India 1960.0 4.505477e+08\n",
142 | "4184 India 1955.0 4.098806e+08\n",
143 | "\n",
144 | "[4185 rows x 3 columns]"
145 | ]
146 | },
147 | "execution_count": 3,
148 | "metadata": {},
149 | "output_type": "execute_result"
150 | }
151 | ],
152 | "source": [
153 | "# first look at the dataset\n",
154 | "df_population_raw"
155 | ]
156 | },
157 | {
158 | "cell_type": "markdown",
159 | "id": "ec86a50f",
160 | "metadata": {},
161 | "source": [
162 | "# Making a Pivot Table"
163 | ]
164 | },
165 | {
166 | "cell_type": "markdown",
167 | "id": "1450c112",
168 | "metadata": {},
169 | "source": [
170 | ".pivot(): Returns reshaped DataFrame organized by given index / column values (\"pivot without aggregation\")"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 4,
176 | "id": "4d01f725",
177 | "metadata": {},
178 | "outputs": [],
179 | "source": [
180 | "# dropping null values\n",
181 | "df_population_raw.dropna(inplace=True)"
182 | ]
183 | },
184 | {
185 | "cell_type": "code",
186 | "execution_count": 5,
187 | "id": "aeb60686",
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "# making a pivot table\n",
192 | "df_pivot = df_population_raw.pivot(index='year', columns='country',\n",
193 | " values='population')"
194 | ]
195 | },
196 | {
197 | "cell_type": "code",
198 | "execution_count": 6,
199 | "id": "918dc0db",
200 | "metadata": {},
201 | "outputs": [],
202 | "source": [
203 | "# selecting some countries\n",
204 | "df_pivot = df_pivot[['United States', 'India', 'China', \n",
205 | " 'Indonesia', 'Brazil']]"
206 | ]
207 | },
208 | {
209 | "cell_type": "code",
210 | "execution_count": 7,
211 | "id": "d8c76de7",
212 | "metadata": {
213 | "scrolled": false
214 | },
215 | "outputs": [
216 | {
217 | "data": {
218 | "text/html": [
219 | "\n",
220 | "\n",
233 | "
\n",
234 | " \n",
235 | " \n",
236 | " country | \n",
237 | " United States | \n",
238 | " India | \n",
239 | " China | \n",
240 | " Indonesia | \n",
241 | " Brazil | \n",
242 | "
\n",
243 | " \n",
244 | " year | \n",
245 | " | \n",
246 | " | \n",
247 | " | \n",
248 | " | \n",
249 | " | \n",
250 | "
\n",
251 | " \n",
252 | " \n",
253 | " \n",
254 | " 1955.0 | \n",
255 | " 171685336.0 | \n",
256 | " 4.098806e+08 | \n",
257 | " 6.122416e+08 | \n",
258 | " 77273425.0 | \n",
259 | " 62533919.0 | \n",
260 | "
\n",
261 | " \n",
262 | " 1960.0 | \n",
263 | " 186720571.0 | \n",
264 | " 4.505477e+08 | \n",
265 | " 6.604081e+08 | \n",
266 | " 87751068.0 | \n",
267 | " 72179226.0 | \n",
268 | "
\n",
269 | " \n",
270 | " 1965.0 | \n",
271 | " 199733676.0 | \n",
272 | " 4.991233e+08 | \n",
273 | " 7.242190e+08 | \n",
274 | " 100267062.0 | \n",
275 | " 83373530.0 | \n",
276 | "
\n",
277 | " \n",
278 | " 1970.0 | \n",
279 | " 209513341.0 | \n",
280 | " 5.551898e+08 | \n",
281 | " 8.276014e+08 | \n",
282 | " 114793178.0 | \n",
283 | " 95113265.0 | \n",
284 | "
\n",
285 | " \n",
286 | " 1975.0 | \n",
287 | " 219081251.0 | \n",
288 | " 6.231029e+08 | \n",
289 | " 9.262409e+08 | \n",
290 | " 130680727.0 | \n",
291 | " 107216205.0 | \n",
292 | "
\n",
293 | " \n",
294 | " 1980.0 | \n",
295 | " 229476354.0 | \n",
296 | " 6.989528e+08 | \n",
297 | " 1.000089e+09 | \n",
298 | " 147447836.0 | \n",
299 | " 120694009.0 | \n",
300 | "
\n",
301 | " \n",
302 | " 1985.0 | \n",
303 | " 240499825.0 | \n",
304 | " 7.843600e+08 | \n",
305 | " 1.075589e+09 | \n",
306 | " 164982451.0 | \n",
307 | " 135274080.0 | \n",
308 | "
\n",
309 | " \n",
310 | " 1990.0 | \n",
311 | " 252120309.0 | \n",
312 | " 8.732778e+08 | \n",
313 | " 1.176884e+09 | \n",
314 | " 181413402.0 | \n",
315 | " 149003223.0 | \n",
316 | "
\n",
317 | " \n",
318 | " 1995.0 | \n",
319 | " 265163745.0 | \n",
320 | " 9.639226e+08 | \n",
321 | " 1.240921e+09 | \n",
322 | " 196934260.0 | \n",
323 | " 162019896.0 | \n",
324 | "
\n",
325 | " \n",
326 | " 2000.0 | \n",
327 | " 281710909.0 | \n",
328 | " 1.056576e+09 | \n",
329 | " 1.290551e+09 | \n",
330 | " 211513823.0 | \n",
331 | " 174790340.0 | \n",
332 | "
\n",
333 | " \n",
334 | " 2005.0 | \n",
335 | " 294993511.0 | \n",
336 | " 1.147610e+09 | \n",
337 | " 1.330776e+09 | \n",
338 | " 226289470.0 | \n",
339 | " 186127103.0 | \n",
340 | "
\n",
341 | " \n",
342 | " 2010.0 | \n",
343 | " 309011475.0 | \n",
344 | " 1.234281e+09 | \n",
345 | " 1.368811e+09 | \n",
346 | " 241834215.0 | \n",
347 | " 195713635.0 | \n",
348 | "
\n",
349 | " \n",
350 | " 2015.0 | \n",
351 | " 320878310.0 | \n",
352 | " 1.310152e+09 | \n",
353 | " 1.406848e+09 | \n",
354 | " 258383256.0 | \n",
355 | " 204471769.0 | \n",
356 | "
\n",
357 | " \n",
358 | " 2016.0 | \n",
359 | " 323015995.0 | \n",
360 | " 1.324517e+09 | \n",
361 | " 1.414049e+09 | \n",
362 | " 261556381.0 | \n",
363 | " 206163053.0 | \n",
364 | "
\n",
365 | " \n",
366 | " 2017.0 | \n",
367 | " 325084756.0 | \n",
368 | " 1.338677e+09 | \n",
369 | " 1.421022e+09 | \n",
370 | " 264650963.0 | \n",
371 | " 207833823.0 | \n",
372 | "
\n",
373 | " \n",
374 | " 2018.0 | \n",
375 | " 327096265.0 | \n",
376 | " 1.352642e+09 | \n",
377 | " 1.427648e+09 | \n",
378 | " 267670543.0 | \n",
379 | " 209469323.0 | \n",
380 | "
\n",
381 | " \n",
382 | " 2019.0 | \n",
383 | " 329064917.0 | \n",
384 | " 1.366418e+09 | \n",
385 | " 1.433784e+09 | \n",
386 | " 270625568.0 | \n",
387 | " 211049527.0 | \n",
388 | "
\n",
389 | " \n",
390 | " 2020.0 | \n",
391 | " 331002651.0 | \n",
392 | " 1.380004e+09 | \n",
393 | " 1.439324e+09 | \n",
394 | " 273523615.0 | \n",
395 | " 212559417.0 | \n",
396 | "
\n",
397 | " \n",
398 | "
\n",
399 | "
"
400 | ],
401 | "text/plain": [
402 | "country United States India China Indonesia Brazil\n",
403 | "year \n",
404 | "1955.0 171685336.0 4.098806e+08 6.122416e+08 77273425.0 62533919.0\n",
405 | "1960.0 186720571.0 4.505477e+08 6.604081e+08 87751068.0 72179226.0\n",
406 | "1965.0 199733676.0 4.991233e+08 7.242190e+08 100267062.0 83373530.0\n",
407 | "1970.0 209513341.0 5.551898e+08 8.276014e+08 114793178.0 95113265.0\n",
408 | "1975.0 219081251.0 6.231029e+08 9.262409e+08 130680727.0 107216205.0\n",
409 | "1980.0 229476354.0 6.989528e+08 1.000089e+09 147447836.0 120694009.0\n",
410 | "1985.0 240499825.0 7.843600e+08 1.075589e+09 164982451.0 135274080.0\n",
411 | "1990.0 252120309.0 8.732778e+08 1.176884e+09 181413402.0 149003223.0\n",
412 | "1995.0 265163745.0 9.639226e+08 1.240921e+09 196934260.0 162019896.0\n",
413 | "2000.0 281710909.0 1.056576e+09 1.290551e+09 211513823.0 174790340.0\n",
414 | "2005.0 294993511.0 1.147610e+09 1.330776e+09 226289470.0 186127103.0\n",
415 | "2010.0 309011475.0 1.234281e+09 1.368811e+09 241834215.0 195713635.0\n",
416 | "2015.0 320878310.0 1.310152e+09 1.406848e+09 258383256.0 204471769.0\n",
417 | "2016.0 323015995.0 1.324517e+09 1.414049e+09 261556381.0 206163053.0\n",
418 | "2017.0 325084756.0 1.338677e+09 1.421022e+09 264650963.0 207833823.0\n",
419 | "2018.0 327096265.0 1.352642e+09 1.427648e+09 267670543.0 209469323.0\n",
420 | "2019.0 329064917.0 1.366418e+09 1.433784e+09 270625568.0 211049527.0\n",
421 | "2020.0 331002651.0 1.380004e+09 1.439324e+09 273523615.0 212559417.0"
422 | ]
423 | },
424 | "execution_count": 7,
425 | "metadata": {},
426 | "output_type": "execute_result"
427 | }
428 | ],
429 | "source": [
430 | "# showing pivot table\n",
431 | "df_pivot"
432 | ]
433 | }
434 | ],
435 | "metadata": {
436 | "kernelspec": {
437 | "display_name": "Python 3",
438 | "language": "python",
439 | "name": "python3"
440 | },
441 | "language_info": {
442 | "codemirror_mode": {
443 | "name": "ipython",
444 | "version": 3
445 | },
446 | "file_extension": ".py",
447 | "mimetype": "text/x-python",
448 | "name": "python",
449 | "nbconvert_exporter": "python",
450 | "pygments_lexer": "ipython3",
451 | "version": "3.8.8"
452 | },
453 | "toc": {
454 | "base_numbering": 1,
455 | "nav_menu": {},
456 | "number_sections": true,
457 | "sideBar": true,
458 | "skip_h1_title": false,
459 | "title_cell": "Table of Contents",
460 | "title_sidebar": "Contents",
461 | "toc_cell": false,
462 | "toc_position": {},
463 | "toc_section_display": true,
464 | "toc_window_display": false
465 | }
466 | },
467 | "nbformat": 4,
468 | "nbformat_minor": 5
469 | }
470 |
--------------------------------------------------------------------------------
/Exercises/Intro to Pandas/Introduction to Pandas-Exercise.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "2bac431d",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "# import pandas\n",
11 | "import pandas as pd"
12 | ]
13 | },
14 | {
15 | "cell_type": "code",
16 | "execution_count": null,
17 | "id": "730ef2aa",
18 | "metadata": {},
19 | "outputs": [],
20 | "source": [
21 | "# Optional: use the pd.set_option() to display all rows in a dataframe by default\n"
22 | ]
23 | },
24 | {
25 | "cell_type": "markdown",
26 | "id": "05c7ec54",
27 | "metadata": {},
28 | "source": [
29 | "# Create a DataFrame"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": null,
35 | "id": "cfb39f8c",
36 | "metadata": {},
37 | "outputs": [],
38 | "source": [
39 | "# read the \"bestsellers with categories\" csv file (Dataset on Amazon's Top 50 bestselling books from 2009 to 2019.)\n"
40 | ]
41 | },
42 | {
43 | "cell_type": "code",
44 | "execution_count": null,
45 | "id": "618209c8",
46 | "metadata": {
47 | "scrolled": true
48 | },
49 | "outputs": [],
50 | "source": [
51 | "# get access to the shape attribute\n"
52 | ]
53 | },
54 | {
55 | "cell_type": "code",
56 | "execution_count": null,
57 | "id": "3acde70f",
58 | "metadata": {},
59 | "outputs": [],
60 | "source": [
61 | "# find the data types of each column\n"
62 | ]
63 | },
64 | {
65 | "cell_type": "markdown",
66 | "id": "7f15b10b",
67 | "metadata": {},
68 | "source": [
69 | "# Display a DataFrame"
70 | ]
71 | },
72 | {
73 | "cell_type": "code",
74 | "execution_count": null,
75 | "id": "ef6b8d45",
76 | "metadata": {
77 | "scrolled": true
78 | },
79 | "outputs": [],
80 | "source": [
81 | "# show first 5 rows in a dataframe\n"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": null,
87 | "id": "a3da1b0a",
88 | "metadata": {},
89 | "outputs": [],
90 | "source": [
91 | "# describe basic statistics of the dataframe (mean, std, min, max)\n"
92 | ]
93 | },
94 | {
95 | "cell_type": "markdown",
96 | "id": "51f4d331",
97 | "metadata": {},
98 | "source": [
99 | "# Add a new Column with an array"
100 | ]
101 | },
102 | {
103 | "cell_type": "code",
104 | "execution_count": null,
105 | "id": "94c526df",
106 | "metadata": {},
107 | "outputs": [],
108 | "source": [
109 | "# Your task is to create a column named 'Critic Rating' that should have random integer numbers between 1 and 4\n",
110 | "\n",
111 | "# 1. import numpy and create 550 random integer numbers between 1 and 4\n",
112 | "\n",
113 | "# 2. add new 'Critic Rating' column to dataframe using the random numbers created\n",
114 | "\n",
115 | "\n",
116 | "# Note the random numbers in this new 'Critic Rating' column will be different between your solution and mine, but we'll focus only on the code, in this section."
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "id": "af7cec1e",
123 | "metadata": {},
124 | "outputs": [],
125 | "source": [
126 | "# show first 5 rows\n"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "id": "822167f7",
132 | "metadata": {},
133 | "source": [
134 | "# Basic Attributes, Methods and Functions"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "id": "3b8dfa8e",
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "# get access to the columns attribute\n"
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "id": "6fe2c5f5",
150 | "metadata": {},
151 | "source": [
152 | "# Selecting Two or More Columns from a Dataframe"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": null,
158 | "id": "1690dbfe",
159 | "metadata": {},
160 | "outputs": [],
161 | "source": [
162 | "# move the new 'Critic Rating' column between the columns \"User Rating\" and \"Reviews\" Then update the dataframe\n",
163 | "\n",
164 | "# Tip: Copy and paste the column names obtained with the columns attribute and then rearrange elements using [[]]\n"
165 | ]
166 | },
167 | {
168 | "cell_type": "code",
169 | "execution_count": null,
170 | "id": "4a294c74",
171 | "metadata": {},
172 | "outputs": [],
173 | "source": [
174 | "# show first 5 rows\n"
175 | ]
176 | },
177 | {
178 | "cell_type": "markdown",
179 | "id": "fedaf280",
180 | "metadata": {},
181 | "source": [
182 | "# Operations on Dataframes"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "id": "da1c2205",
189 | "metadata": {},
190 | "outputs": [],
191 | "source": [
192 | "# create a column named \"Average Rating\" by using the following formula: Average Rating = (User Rating + Critic Rating)/2\n"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": null,
198 | "id": "ab6c226b",
199 | "metadata": {},
200 | "outputs": [],
201 | "source": [
202 | "# use the round function to round the values of the dataframe to 1 decimal and update the dataframe\n"
203 | ]
204 | },
205 | {
206 | "cell_type": "markdown",
207 | "id": "5142f8af",
208 | "metadata": {},
209 | "source": [
210 | "# Value Counts"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "id": "a3a1e3fc",
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "# count elements in \"Genre\" column by category and return the relative frequency \n"
221 | ]
222 | },
223 | {
224 | "cell_type": "markdown",
225 | "id": "13ff41dd",
226 | "metadata": {},
227 | "source": [
228 | "# Rename Columns"
229 | ]
230 | },
231 | {
232 | "cell_type": "code",
233 | "execution_count": null,
234 | "id": "f86a8194",
235 | "metadata": {},
236 | "outputs": [],
237 | "source": [
238 | "# rename columns \"User Rating,\" \"Critic Rating\" and \"Average Rating\" to \"UR,\" \"CR\" and \"AR\" then update the dataframe with the inplace paraneter\n"
239 | ]
240 | },
241 | {
242 | "cell_type": "code",
243 | "execution_count": null,
244 | "id": "3df4c15c",
245 | "metadata": {},
246 | "outputs": [],
247 | "source": [
248 | "# show first 5 rows\n"
249 | ]
250 | },
251 | {
252 | "cell_type": "code",
253 | "execution_count": null,
254 | "id": "474246ef",
255 | "metadata": {},
256 | "outputs": [],
257 | "source": [
258 | "# select only \"Name\", \"Author\", \"UR\", \"CR\", \"AR\" and \"Year\" columns and update dataframe\n"
259 | ]
260 | },
261 | {
262 | "cell_type": "markdown",
263 | "id": "6c458cf9",
264 | "metadata": {},
265 | "source": [
266 | "# Sort a dataframe"
267 | ]
268 | },
269 | {
270 | "cell_type": "code",
271 | "execution_count": null,
272 | "id": "50a7d2a8",
273 | "metadata": {},
274 | "outputs": [],
275 | "source": [
276 | "# sort the dataframe descending by \"UR\" and \"CR\"\n"
277 | ]
278 | }
279 | ],
280 | "metadata": {
281 | "kernelspec": {
282 | "display_name": "Python 3",
283 | "language": "python",
284 | "name": "python3"
285 | },
286 | "language_info": {
287 | "codemirror_mode": {
288 | "name": "ipython",
289 | "version": 3
290 | },
291 | "file_extension": ".py",
292 | "mimetype": "text/x-python",
293 | "name": "python",
294 | "nbconvert_exporter": "python",
295 | "pygments_lexer": "ipython3",
296 | "version": "3.8.8"
297 | },
298 | "toc": {
299 | "base_numbering": 1,
300 | "nav_menu": {},
301 | "number_sections": true,
302 | "sideBar": true,
303 | "skip_h1_title": false,
304 | "title_cell": "Table of Contents",
305 | "title_sidebar": "Contents",
306 | "toc_cell": false,
307 | "toc_position": {},
308 | "toc_section_display": true,
309 | "toc_window_display": false
310 | }
311 | },
312 | "nbformat": 4,
313 | "nbformat_minor": 5
314 | }
315 |
--------------------------------------------------------------------------------
/Exercises/Merging and Concatenating DataFrames/IMDb movies.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/Exercises/Merging and Concatenating DataFrames/IMDb movies.csv.zip
--------------------------------------------------------------------------------
/Exercises/Merging and Concatenating DataFrames/IMDb ratings.csv.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/Exercises/Merging and Concatenating DataFrames/IMDb ratings.csv.zip
--------------------------------------------------------------------------------
/Exercises/Merging and Concatenating DataFrames/Merging and Concatenating DataFrames-Exercise.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "id": "fad49839",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": null,
16 | "id": "09c6213f",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "# read 'IMDb movies.csv' and 'IMDb ratings.csv'\n",
21 | "df_movies = pd.read_csv('IMDb movies.csv', low_memory=False)\n",
22 | "df_ratings = pd.read_csv('IMDb ratings.csv')"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": null,
28 | "id": "d5670913",
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# select columns\n",
33 | "df_movies = df_movies[['imdb_title_id', 'title', 'year',\n",
34 | " 'genre', 'country']]\n",
35 | "\n",
36 | "df_ratings = df_ratings[['imdb_title_id', 'total_votes', 'mean_vote']]"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "id": "310fc4cc",
42 | "metadata": {},
43 | "source": [
44 | "# merge()"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "de4aa87a",
50 | "metadata": {},
51 | "source": [
52 | "## Inner join"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": null,
58 | "id": "e50bde09",
59 | "metadata": {},
60 | "outputs": [],
61 | "source": [
62 | "# merge df_movies and df_ratings (inner join)\n"
63 | ]
64 | },
65 | {
66 | "cell_type": "markdown",
67 | "id": "232dc587",
68 | "metadata": {},
69 | "source": [
70 | "## Outer join (Full join)"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": null,
76 | "id": "eb7cbafe",
77 | "metadata": {},
78 | "outputs": [],
79 | "source": [
80 | "# merge df_movies and df_ratings (outer join)\n"
81 | ]
82 | },
83 | {
84 | "cell_type": "markdown",
85 | "id": "554af406",
86 | "metadata": {},
87 | "source": [
88 | "## Exclusive Outer join (Exclusive Full join)"
89 | ]
90 | },
91 | {
92 | "cell_type": "code",
93 | "execution_count": null,
94 | "id": "18842159",
95 | "metadata": {},
96 | "outputs": [],
97 | "source": [
98 | "# merge df_movies and df_ratings (Exclusive Full join)\n"
99 | ]
100 | },
101 | {
102 | "cell_type": "markdown",
103 | "id": "dbee2e97",
104 | "metadata": {},
105 | "source": [
106 | "## Left join"
107 | ]
108 | },
109 | {
110 | "cell_type": "code",
111 | "execution_count": null,
112 | "id": "5317af4a",
113 | "metadata": {},
114 | "outputs": [],
115 | "source": [
116 | "# extract a 50% sample of the df_movies dataframe\n"
117 | ]
118 | },
119 | {
120 | "cell_type": "code",
121 | "execution_count": null,
122 | "id": "3e41bd9f",
123 | "metadata": {},
124 | "outputs": [],
125 | "source": [
126 | "# merge df_movies_sample and df_ratings (left join)\n"
127 | ]
128 | },
129 | {
130 | "cell_type": "markdown",
131 | "id": "3014d002",
132 | "metadata": {},
133 | "source": [
134 | "## Exclusive Left join"
135 | ]
136 | },
137 | {
138 | "cell_type": "code",
139 | "execution_count": null,
140 | "id": "06b64daa",
141 | "metadata": {},
142 | "outputs": [],
143 | "source": [
144 | "# make a copy of the df_movies dataframe\n"
145 | ]
146 | },
147 | {
148 | "cell_type": "code",
149 | "execution_count": null,
150 | "id": "192b0988",
151 | "metadata": {},
152 | "outputs": [],
153 | "source": [
154 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "id": "d79c5ba4",
161 | "metadata": {},
162 | "outputs": [],
163 | "source": [
164 | "# merge df_movies_2 and df_ratings (exclusive left join)\n"
165 | ]
166 | },
167 | {
168 | "cell_type": "markdown",
169 | "id": "4a2007ee",
170 | "metadata": {},
171 | "source": [
172 | "## Right join"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": null,
178 | "id": "fe292156",
179 | "metadata": {},
180 | "outputs": [],
181 | "source": [
182 | "# extract a 30% sample of the df_ratings dataframe\n"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": null,
188 | "id": "0ffdcd45",
189 | "metadata": {},
190 | "outputs": [],
191 | "source": [
192 | "# merge df_movies and df_ratings_sample (right join)\n"
193 | ]
194 | },
195 | {
196 | "cell_type": "markdown",
197 | "id": "c79f3005",
198 | "metadata": {},
199 | "source": [
200 | "## Exclusive Right join"
201 | ]
202 | },
203 | {
204 | "cell_type": "code",
205 | "execution_count": null,
206 | "id": "048c2de3",
207 | "metadata": {},
208 | "outputs": [],
209 | "source": [
210 | "# make a copy of the df_ratings dataframe\n"
211 | ]
212 | },
213 | {
214 | "cell_type": "code",
215 | "execution_count": null,
216 | "id": "f71e384a",
217 | "metadata": {},
218 | "outputs": [],
219 | "source": [
220 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": null,
226 | "id": "358048b3",
227 | "metadata": {},
228 | "outputs": [],
229 | "source": [
230 | "# merge df_movies and df_ratings_2 (exclusive right join)\n"
231 | ]
232 | }
233 | ],
234 | "metadata": {
235 | "kernelspec": {
236 | "display_name": "Python 3",
237 | "language": "python",
238 | "name": "python3"
239 | },
240 | "language_info": {
241 | "codemirror_mode": {
242 | "name": "ipython",
243 | "version": 3
244 | },
245 | "file_extension": ".py",
246 | "mimetype": "text/x-python",
247 | "name": "python",
248 | "nbconvert_exporter": "python",
249 | "pygments_lexer": "ipython3",
250 | "version": "3.8.8"
251 | },
252 | "toc": {
253 | "base_numbering": 1,
254 | "nav_menu": {},
255 | "number_sections": true,
256 | "sideBar": true,
257 | "skip_h1_title": false,
258 | "title_cell": "Table of Contents",
259 | "title_sidebar": "Contents",
260 | "toc_cell": false,
261 | "toc_position": {},
262 | "toc_section_display": true,
263 | "toc_window_display": false
264 | }
265 | },
266 | "nbformat": 4,
267 | "nbformat_minor": 5
268 | }
269 |
--------------------------------------------------------------------------------
/Exercises/Merging and Concatenating DataFrames/Merging and Concatenating DataFrames-Solution.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "fad49839",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "code",
15 | "execution_count": 2,
16 | "id": "09c6213f",
17 | "metadata": {},
18 | "outputs": [],
19 | "source": [
20 | "# read 'IMDb movies.csv' and 'IMDb ratings.csv'\n",
21 | "df_movies = pd.read_csv('IMDb movies.csv', low_memory=False)\n",
22 | "df_ratings = pd.read_csv('IMDb ratings.csv')"
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 3,
28 | "id": "d5670913",
29 | "metadata": {},
30 | "outputs": [],
31 | "source": [
32 | "# select columns\n",
33 | "df_movies = df_movies[['imdb_title_id', 'title', 'year',\n",
34 | " 'genre', 'country']]\n",
35 | "\n",
36 | "df_ratings = df_ratings[['imdb_title_id', 'total_votes', 'mean_vote']]"
37 | ]
38 | },
39 | {
40 | "cell_type": "markdown",
41 | "id": "310fc4cc",
42 | "metadata": {},
43 | "source": [
44 | "# merge()"
45 | ]
46 | },
47 | {
48 | "cell_type": "markdown",
49 | "id": "de4aa87a",
50 | "metadata": {},
51 | "source": [
52 | "## Inner join"
53 | ]
54 | },
55 | {
56 | "cell_type": "code",
57 | "execution_count": 4,
58 | "id": "e50bde09",
59 | "metadata": {},
60 | "outputs": [
61 | {
62 | "data": {
63 | "text/html": [
64 | "\n",
65 | "\n",
78 | "
\n",
79 | " \n",
80 | " \n",
81 | " | \n",
82 | " imdb_title_id | \n",
83 | " title | \n",
84 | " year | \n",
85 | " genre | \n",
86 | " country | \n",
87 | " total_votes | \n",
88 | " mean_vote | \n",
89 | "
\n",
90 | " \n",
91 | " \n",
92 | " \n",
93 | " 0 | \n",
94 | " tt0000009 | \n",
95 | " Miss Jerry | \n",
96 | " 1894 | \n",
97 | " Romance | \n",
98 | " USA | \n",
99 | " 154 | \n",
100 | " 5.9 | \n",
101 | "
\n",
102 | " \n",
103 | " 1 | \n",
104 | " tt0000574 | \n",
105 | " The Story of the Kelly Gang | \n",
106 | " 1906 | \n",
107 | " Biography, Crime, Drama | \n",
108 | " Australia | \n",
109 | " 589 | \n",
110 | " 6.3 | \n",
111 | "
\n",
112 | " \n",
113 | " 2 | \n",
114 | " tt0001892 | \n",
115 | " Den sorte drøm | \n",
116 | " 1911 | \n",
117 | " Drama | \n",
118 | " Germany, Denmark | \n",
119 | " 188 | \n",
120 | " 6.0 | \n",
121 | "
\n",
122 | " \n",
123 | " 3 | \n",
124 | " tt0002101 | \n",
125 | " Cleopatra | \n",
126 | " 1912 | \n",
127 | " Drama, History | \n",
128 | " USA | \n",
129 | " 446 | \n",
130 | " 5.3 | \n",
131 | "
\n",
132 | " \n",
133 | " 4 | \n",
134 | " tt0002130 | \n",
135 | " L'Inferno | \n",
136 | " 1911 | \n",
137 | " Adventure, Drama, Fantasy | \n",
138 | " Italy | \n",
139 | " 2237 | \n",
140 | " 6.9 | \n",
141 | "
\n",
142 | " \n",
143 | " ... | \n",
144 | " ... | \n",
145 | " ... | \n",
146 | " ... | \n",
147 | " ... | \n",
148 | " ... | \n",
149 | " ... | \n",
150 | " ... | \n",
151 | "
\n",
152 | " \n",
153 | " 85850 | \n",
154 | " tt9908390 | \n",
155 | " Le lion | \n",
156 | " 2020 | \n",
157 | " Comedy | \n",
158 | " France, Belgium | \n",
159 | " 398 | \n",
160 | " 5.5 | \n",
161 | "
\n",
162 | " \n",
163 | " 85851 | \n",
164 | " tt9911196 | \n",
165 | " De Beentjes van Sint-Hildegard | \n",
166 | " 2020 | \n",
167 | " Comedy, Drama | \n",
168 | " Netherlands | \n",
169 | " 724 | \n",
170 | " 7.9 | \n",
171 | "
\n",
172 | " \n",
173 | " 85852 | \n",
174 | " tt9911774 | \n",
175 | " Padmavyuhathile Abhimanyu | \n",
176 | " 2019 | \n",
177 | " Drama | \n",
178 | " India | \n",
179 | " 265 | \n",
180 | " 7.8 | \n",
181 | "
\n",
182 | " \n",
183 | " 85853 | \n",
184 | " tt9914286 | \n",
185 | " Sokagin Çocuklari | \n",
186 | " 2019 | \n",
187 | " Drama, Family | \n",
188 | " Turkey | \n",
189 | " 194 | \n",
190 | " 9.4 | \n",
191 | "
\n",
192 | " \n",
193 | " 85854 | \n",
194 | " tt9914942 | \n",
195 | " La vida sense la Sara Amat | \n",
196 | " 2019 | \n",
197 | " Drama | \n",
198 | " Spain | \n",
199 | " 102 | \n",
200 | " 6.8 | \n",
201 | "
\n",
202 | " \n",
203 | "
\n",
204 | "
85855 rows × 7 columns
\n",
205 | "
"
206 | ],
207 | "text/plain": [
208 | " imdb_title_id title year \\\n",
209 | "0 tt0000009 Miss Jerry 1894 \n",
210 | "1 tt0000574 The Story of the Kelly Gang 1906 \n",
211 | "2 tt0001892 Den sorte drøm 1911 \n",
212 | "3 tt0002101 Cleopatra 1912 \n",
213 | "4 tt0002130 L'Inferno 1911 \n",
214 | "... ... ... ... \n",
215 | "85850 tt9908390 Le lion 2020 \n",
216 | "85851 tt9911196 De Beentjes van Sint-Hildegard 2020 \n",
217 | "85852 tt9911774 Padmavyuhathile Abhimanyu 2019 \n",
218 | "85853 tt9914286 Sokagin Çocuklari 2019 \n",
219 | "85854 tt9914942 La vida sense la Sara Amat 2019 \n",
220 | "\n",
221 | " genre country total_votes mean_vote \n",
222 | "0 Romance USA 154 5.9 \n",
223 | "1 Biography, Crime, Drama Australia 589 6.3 \n",
224 | "2 Drama Germany, Denmark 188 6.0 \n",
225 | "3 Drama, History USA 446 5.3 \n",
226 | "4 Adventure, Drama, Fantasy Italy 2237 6.9 \n",
227 | "... ... ... ... ... \n",
228 | "85850 Comedy France, Belgium 398 5.5 \n",
229 | "85851 Comedy, Drama Netherlands 724 7.9 \n",
230 | "85852 Drama India 265 7.8 \n",
231 | "85853 Drama, Family Turkey 194 9.4 \n",
232 | "85854 Drama Spain 102 6.8 \n",
233 | "\n",
234 | "[85855 rows x 7 columns]"
235 | ]
236 | },
237 | "execution_count": 4,
238 | "metadata": {},
239 | "output_type": "execute_result"
240 | }
241 | ],
242 | "source": [
243 | "# merge df_movies and df_ratings (inner join)\n",
244 | "df_movies.merge(df_ratings, on='imdb_title_id')"
245 | ]
246 | },
247 | {
248 | "cell_type": "markdown",
249 | "id": "232dc587",
250 | "metadata": {},
251 | "source": [
252 | "## Outer join (Full join)"
253 | ]
254 | },
255 | {
256 | "cell_type": "code",
257 | "execution_count": 5,
258 | "id": "eb7cbafe",
259 | "metadata": {},
260 | "outputs": [
261 | {
262 | "data": {
263 | "text/html": [
264 | "\n",
265 | "\n",
278 | "
\n",
279 | " \n",
280 | " \n",
281 | " | \n",
282 | " imdb_title_id | \n",
283 | " title | \n",
284 | " year | \n",
285 | " genre | \n",
286 | " country | \n",
287 | " total_votes | \n",
288 | " mean_vote | \n",
289 | "
\n",
290 | " \n",
291 | " \n",
292 | " \n",
293 | " 0 | \n",
294 | " tt0000009 | \n",
295 | " Miss Jerry | \n",
296 | " 1894 | \n",
297 | " Romance | \n",
298 | " USA | \n",
299 | " 154 | \n",
300 | " 5.9 | \n",
301 | "
\n",
302 | " \n",
303 | " 1 | \n",
304 | " tt0000574 | \n",
305 | " The Story of the Kelly Gang | \n",
306 | " 1906 | \n",
307 | " Biography, Crime, Drama | \n",
308 | " Australia | \n",
309 | " 589 | \n",
310 | " 6.3 | \n",
311 | "
\n",
312 | " \n",
313 | " 2 | \n",
314 | " tt0001892 | \n",
315 | " Den sorte drøm | \n",
316 | " 1911 | \n",
317 | " Drama | \n",
318 | " Germany, Denmark | \n",
319 | " 188 | \n",
320 | " 6.0 | \n",
321 | "
\n",
322 | " \n",
323 | " 3 | \n",
324 | " tt0002101 | \n",
325 | " Cleopatra | \n",
326 | " 1912 | \n",
327 | " Drama, History | \n",
328 | " USA | \n",
329 | " 446 | \n",
330 | " 5.3 | \n",
331 | "
\n",
332 | " \n",
333 | " 4 | \n",
334 | " tt0002130 | \n",
335 | " L'Inferno | \n",
336 | " 1911 | \n",
337 | " Adventure, Drama, Fantasy | \n",
338 | " Italy | \n",
339 | " 2237 | \n",
340 | " 6.9 | \n",
341 | "
\n",
342 | " \n",
343 | " ... | \n",
344 | " ... | \n",
345 | " ... | \n",
346 | " ... | \n",
347 | " ... | \n",
348 | " ... | \n",
349 | " ... | \n",
350 | " ... | \n",
351 | "
\n",
352 | " \n",
353 | " 85850 | \n",
354 | " tt9908390 | \n",
355 | " Le lion | \n",
356 | " 2020 | \n",
357 | " Comedy | \n",
358 | " France, Belgium | \n",
359 | " 398 | \n",
360 | " 5.5 | \n",
361 | "
\n",
362 | " \n",
363 | " 85851 | \n",
364 | " tt9911196 | \n",
365 | " De Beentjes van Sint-Hildegard | \n",
366 | " 2020 | \n",
367 | " Comedy, Drama | \n",
368 | " Netherlands | \n",
369 | " 724 | \n",
370 | " 7.9 | \n",
371 | "
\n",
372 | " \n",
373 | " 85852 | \n",
374 | " tt9911774 | \n",
375 | " Padmavyuhathile Abhimanyu | \n",
376 | " 2019 | \n",
377 | " Drama | \n",
378 | " India | \n",
379 | " 265 | \n",
380 | " 7.8 | \n",
381 | "
\n",
382 | " \n",
383 | " 85853 | \n",
384 | " tt9914286 | \n",
385 | " Sokagin Çocuklari | \n",
386 | " 2019 | \n",
387 | " Drama, Family | \n",
388 | " Turkey | \n",
389 | " 194 | \n",
390 | " 9.4 | \n",
391 | "
\n",
392 | " \n",
393 | " 85854 | \n",
394 | " tt9914942 | \n",
395 | " La vida sense la Sara Amat | \n",
396 | " 2019 | \n",
397 | " Drama | \n",
398 | " Spain | \n",
399 | " 102 | \n",
400 | " 6.8 | \n",
401 | "
\n",
402 | " \n",
403 | "
\n",
404 | "
85855 rows × 7 columns
\n",
405 | "
"
406 | ],
407 | "text/plain": [
408 | " imdb_title_id title year \\\n",
409 | "0 tt0000009 Miss Jerry 1894 \n",
410 | "1 tt0000574 The Story of the Kelly Gang 1906 \n",
411 | "2 tt0001892 Den sorte drøm 1911 \n",
412 | "3 tt0002101 Cleopatra 1912 \n",
413 | "4 tt0002130 L'Inferno 1911 \n",
414 | "... ... ... ... \n",
415 | "85850 tt9908390 Le lion 2020 \n",
416 | "85851 tt9911196 De Beentjes van Sint-Hildegard 2020 \n",
417 | "85852 tt9911774 Padmavyuhathile Abhimanyu 2019 \n",
418 | "85853 tt9914286 Sokagin Çocuklari 2019 \n",
419 | "85854 tt9914942 La vida sense la Sara Amat 2019 \n",
420 | "\n",
421 | " genre country total_votes mean_vote \n",
422 | "0 Romance USA 154 5.9 \n",
423 | "1 Biography, Crime, Drama Australia 589 6.3 \n",
424 | "2 Drama Germany, Denmark 188 6.0 \n",
425 | "3 Drama, History USA 446 5.3 \n",
426 | "4 Adventure, Drama, Fantasy Italy 2237 6.9 \n",
427 | "... ... ... ... ... \n",
428 | "85850 Comedy France, Belgium 398 5.5 \n",
429 | "85851 Comedy, Drama Netherlands 724 7.9 \n",
430 | "85852 Drama India 265 7.8 \n",
431 | "85853 Drama, Family Turkey 194 9.4 \n",
432 | "85854 Drama Spain 102 6.8 \n",
433 | "\n",
434 | "[85855 rows x 7 columns]"
435 | ]
436 | },
437 | "execution_count": 5,
438 | "metadata": {},
439 | "output_type": "execute_result"
440 | }
441 | ],
442 | "source": [
443 | "# merge df_movies and df_ratings (outer join)\n",
444 | "df_movies.merge(df_ratings, on='imdb_title_id', how='outer')"
445 | ]
446 | },
447 | {
448 | "cell_type": "markdown",
449 | "id": "554af406",
450 | "metadata": {},
451 | "source": [
452 | "## Exclusive Outer join (Exclusive Full join)"
453 | ]
454 | },
455 | {
456 | "cell_type": "code",
457 | "execution_count": 6,
458 | "id": "18842159",
459 | "metadata": {},
460 | "outputs": [
461 | {
462 | "data": {
463 | "text/html": [
464 | "\n",
465 | "\n",
478 | "
\n",
479 | " \n",
480 | " \n",
481 | " | \n",
482 | " imdb_title_id | \n",
483 | " title | \n",
484 | " year | \n",
485 | " genre | \n",
486 | " country | \n",
487 | " total_votes | \n",
488 | " mean_vote | \n",
489 | " _merge | \n",
490 | "
\n",
491 | " \n",
492 | " \n",
493 | " \n",
494 | "
\n",
495 | "
"
496 | ],
497 | "text/plain": [
498 | "Empty DataFrame\n",
499 | "Columns: [imdb_title_id, title, year, genre, country, total_votes, mean_vote, _merge]\n",
500 | "Index: []"
501 | ]
502 | },
503 | "execution_count": 6,
504 | "metadata": {},
505 | "output_type": "execute_result"
506 | }
507 | ],
508 | "source": [
509 | "# merge df_movies and df_ratings (Exclusive Full join)\n",
510 | "df_movies.merge(df_ratings, on='imdb_title_id', how='outer', \n",
511 | " indicator=True).query(\"_merge=='left_only' or _merge=='right_only'\")"
512 | ]
513 | },
514 | {
515 | "cell_type": "markdown",
516 | "id": "dbee2e97",
517 | "metadata": {},
518 | "source": [
519 | "## Left join"
520 | ]
521 | },
522 | {
523 | "cell_type": "code",
524 | "execution_count": 7,
525 | "id": "5317af4a",
526 | "metadata": {},
527 | "outputs": [],
528 | "source": [
529 | "# extract a 50% sample of the df_movies dataframe\n",
530 | "df_movies_sample = df_movies.sample(frac=0.5)"
531 | ]
532 | },
533 | {
534 | "cell_type": "code",
535 | "execution_count": 8,
536 | "id": "3e41bd9f",
537 | "metadata": {},
538 | "outputs": [],
539 | "source": [
540 | "# merge df_movies_sample and df_ratings (left join)\n",
541 | "df_left = df_movies_sample.merge(df_ratings, on='imdb_title_id',\n",
542 | " how='left')"
543 | ]
544 | },
545 | {
546 | "cell_type": "markdown",
547 | "id": "3014d002",
548 | "metadata": {},
549 | "source": [
550 | "## Exclusive Left join"
551 | ]
552 | },
553 | {
554 | "cell_type": "code",
555 | "execution_count": 9,
556 | "id": "06b64daa",
557 | "metadata": {},
558 | "outputs": [],
559 | "source": [
560 | "# make a copy of the df_movies dataframe\n",
561 | "df_movies_2 = df_movies.copy()"
562 | ]
563 | },
564 | {
565 | "cell_type": "code",
566 | "execution_count": 10,
567 | "id": "192b0988",
568 | "metadata": {},
569 | "outputs": [],
570 | "source": [
571 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n",
572 | "for index in df_movies_2.index:\n",
573 | " if index < 1000:\n",
574 | " df_movies_2.loc[index, 'imdb_title_id'] = 'tt1234567890'"
575 | ]
576 | },
577 | {
578 | "cell_type": "code",
579 | "execution_count": 11,
580 | "id": "d79c5ba4",
581 | "metadata": {},
582 | "outputs": [],
583 | "source": [
584 | "# merge df_movies_2 and df_ratings (exclusive left join)\n",
585 | "df_exclusive_left = df_movies_2.merge(df_ratings,\n",
586 | " on='imdb_title_id',\n",
587 | " how='outer', \n",
588 | " indicator=True).query(\"_merge=='left_only'\")"
589 | ]
590 | },
591 | {
592 | "cell_type": "markdown",
593 | "id": "4a2007ee",
594 | "metadata": {},
595 | "source": [
596 | "## Right join"
597 | ]
598 | },
599 | {
600 | "cell_type": "code",
601 | "execution_count": 12,
602 | "id": "fe292156",
603 | "metadata": {},
604 | "outputs": [],
605 | "source": [
606 | "# extract a 30% sample of the df_ratings dataframe\n",
607 | "df_ratings_sample = df_ratings.sample(frac=0.3)"
608 | ]
609 | },
610 | {
611 | "cell_type": "code",
612 | "execution_count": 13,
613 | "id": "0ffdcd45",
614 | "metadata": {},
615 | "outputs": [],
616 | "source": [
617 | "# merge df_movies and df_ratings_sample (right join)\n",
618 | "df_right = df_movies.merge(df_ratings_sample, on='imdb_title_id',\n",
619 | " how='right')"
620 | ]
621 | },
622 | {
623 | "cell_type": "markdown",
624 | "id": "c79f3005",
625 | "metadata": {},
626 | "source": [
627 | "## Exclusive Right join"
628 | ]
629 | },
630 | {
631 | "cell_type": "code",
632 | "execution_count": 14,
633 | "id": "048c2de3",
634 | "metadata": {},
635 | "outputs": [],
636 | "source": [
637 | "# make a copy of the df_ratings dataframe\n",
638 | "df_ratings_2 = df_ratings.copy()"
639 | ]
640 | },
641 | {
642 | "cell_type": "code",
643 | "execution_count": 15,
644 | "id": "f71e384a",
645 | "metadata": {},
646 | "outputs": [],
647 | "source": [
648 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n",
649 | "for index in df_ratings_2.index:\n",
650 | " if index < 1000:\n",
651 | " df_ratings_2.loc[index, 'imdb_title_id'] = 'tt1234567890'"
652 | ]
653 | },
654 | {
655 | "cell_type": "code",
656 | "execution_count": 16,
657 | "id": "358048b3",
658 | "metadata": {},
659 | "outputs": [],
660 | "source": [
661 | "# merge df_movies and df_ratings_2 (exclusive right join)\n",
662 | "df_exclusive_right = df_movies.merge(df_ratings_2,\n",
663 | " on='imdb_title_id',\n",
664 | " how='outer', \n",
665 | " indicator=True).query(\"_merge=='right_only'\")"
666 | ]
667 | }
668 | ],
669 | "metadata": {
670 | "kernelspec": {
671 | "display_name": "Python 3",
672 | "language": "python",
673 | "name": "python3"
674 | },
675 | "language_info": {
676 | "codemirror_mode": {
677 | "name": "ipython",
678 | "version": 3
679 | },
680 | "file_extension": ".py",
681 | "mimetype": "text/x-python",
682 | "name": "python",
683 | "nbconvert_exporter": "python",
684 | "pygments_lexer": "ipython3",
685 | "version": "3.8.8"
686 | },
687 | "toc": {
688 | "base_numbering": 1,
689 | "nav_menu": {},
690 | "number_sections": true,
691 | "sideBar": true,
692 | "skip_h1_title": false,
693 | "title_cell": "Table of Contents",
694 | "title_sidebar": "Contents",
695 | "toc_cell": false,
696 | "toc_position": {},
697 | "toc_section_display": true,
698 | "toc_window_display": false
699 | }
700 | },
701 | "nbformat": 4,
702 | "nbformat_minor": 5
703 | }
704 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## FREE Python Cheat Sheet
2 |
3 | 🇺🇸 Link: https://artificialcorner.com/p/redeem-my-udemy-courses-for-free
4 |
5 | ## Formulario de Python Gratis
6 |
7 | 🇪🇸 Link: https://artificialcorner.com/p/formularios-gratis-de-python
8 |
--------------------------------------------------------------------------------
/Web Scraping with Pandas.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "id": "9bcec48b",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import pandas as pd"
11 | ]
12 | },
13 | {
14 | "cell_type": "markdown",
15 | "id": "b8784766",
16 | "metadata": {},
17 | "source": [
18 | "# Read a .csv from a URL with Pandas"
19 | ]
20 | },
21 | {
22 | "cell_type": "markdown",
23 | "id": "1e116955",
24 | "metadata": {},
25 | "source": [
26 | "Target website: https://www.football-data.co.uk/data.php"
27 | ]
28 | },
29 | {
30 | "cell_type": "code",
31 | "execution_count": 2,
32 | "id": "62ad1716",
33 | "metadata": {},
34 | "outputs": [],
35 | "source": [
36 | "# reading 1 csv file from the website\n",
37 | "df_premier21 = pd.read_csv('https://www.football-data.co.uk/mmz4281/2122/E0.csv')"
38 | ]
39 | },
40 | {
41 | "cell_type": "code",
42 | "execution_count": 3,
43 | "id": "e0b6be7b",
44 | "metadata": {},
45 | "outputs": [
46 | {
47 | "data": {
48 | "text/html": [
49 | "\n",
50 | "\n",
63 | "
\n",
64 | " \n",
65 | " \n",
66 | " | \n",
67 | " Div | \n",
68 | " Date | \n",
69 | " Time | \n",
70 | " HomeTeam | \n",
71 | " AwayTeam | \n",
72 | " FTHG | \n",
73 | " FTAG | \n",
74 | " FTR | \n",
75 | " HTHG | \n",
76 | " HTAG | \n",
77 | " ... | \n",
78 | " AvgC<2.5 | \n",
79 | " AHCh | \n",
80 | " B365CAHH | \n",
81 | " B365CAHA | \n",
82 | " PCAHH | \n",
83 | " PCAHA | \n",
84 | " MaxCAHH | \n",
85 | " MaxCAHA | \n",
86 | " AvgCAHH | \n",
87 | " AvgCAHA | \n",
88 | "
\n",
89 | " \n",
90 | " \n",
91 | " \n",
92 | " 0 | \n",
93 | " E0 | \n",
94 | " 13/08/2021 | \n",
95 | " 20:00 | \n",
96 | " Brentford | \n",
97 | " Arsenal | \n",
98 | " 2 | \n",
99 | " 0 | \n",
100 | " H | \n",
101 | " 1 | \n",
102 | " 0 | \n",
103 | " ... | \n",
104 | " 1.62 | \n",
105 | " 0.50 | \n",
106 | " 1.75 | \n",
107 | " 2.05 | \n",
108 | " 1.81 | \n",
109 | " 2.13 | \n",
110 | " 2.05 | \n",
111 | " 2.17 | \n",
112 | " 1.80 | \n",
113 | " 2.09 | \n",
114 | "
\n",
115 | " \n",
116 | " 1 | \n",
117 | " E0 | \n",
118 | " 14/08/2021 | \n",
119 | " 12:30 | \n",
120 | " Man United | \n",
121 | " Leeds | \n",
122 | " 5 | \n",
123 | " 1 | \n",
124 | " H | \n",
125 | " 1 | \n",
126 | " 0 | \n",
127 | " ... | \n",
128 | " 2.25 | \n",
129 | " -1.00 | \n",
130 | " 2.05 | \n",
131 | " 1.75 | \n",
132 | " 2.17 | \n",
133 | " 1.77 | \n",
134 | " 2.19 | \n",
135 | " 1.93 | \n",
136 | " 2.10 | \n",
137 | " 1.79 | \n",
138 | "
\n",
139 | " \n",
140 | " 2 | \n",
141 | " E0 | \n",
142 | " 14/08/2021 | \n",
143 | " 15:00 | \n",
144 | " Burnley | \n",
145 | " Brighton | \n",
146 | " 1 | \n",
147 | " 2 | \n",
148 | " A | \n",
149 | " 1 | \n",
150 | " 0 | \n",
151 | " ... | \n",
152 | " 1.62 | \n",
153 | " 0.25 | \n",
154 | " 1.79 | \n",
155 | " 2.15 | \n",
156 | " 1.81 | \n",
157 | " 2.14 | \n",
158 | " 1.82 | \n",
159 | " 2.19 | \n",
160 | " 1.79 | \n",
161 | " 2.12 | \n",
162 | "
\n",
163 | " \n",
164 | " 3 | \n",
165 | " E0 | \n",
166 | " 14/08/2021 | \n",
167 | " 15:00 | \n",
168 | " Chelsea | \n",
169 | " Crystal Palace | \n",
170 | " 3 | \n",
171 | " 0 | \n",
172 | " H | \n",
173 | " 2 | \n",
174 | " 0 | \n",
175 | " ... | \n",
176 | " 1.94 | \n",
177 | " -1.50 | \n",
178 | " 2.05 | \n",
179 | " 1.75 | \n",
180 | " 2.12 | \n",
181 | " 1.81 | \n",
182 | " 2.16 | \n",
183 | " 1.93 | \n",
184 | " 2.06 | \n",
185 | " 1.82 | \n",
186 | "
\n",
187 | " \n",
188 | " 4 | \n",
189 | " E0 | \n",
190 | " 14/08/2021 | \n",
191 | " 15:00 | \n",
192 | " Everton | \n",
193 | " Southampton | \n",
194 | " 3 | \n",
195 | " 1 | \n",
196 | " H | \n",
197 | " 0 | \n",
198 | " 1 | \n",
199 | " ... | \n",
200 | " 1.67 | \n",
201 | " -0.50 | \n",
202 | " 2.05 | \n",
203 | " 1.88 | \n",
204 | " 2.05 | \n",
205 | " 1.88 | \n",
206 | " 2.08 | \n",
207 | " 1.90 | \n",
208 | " 2.03 | \n",
209 | " 1.86 | \n",
210 | "
\n",
211 | " \n",
212 | " ... | \n",
213 | " ... | \n",
214 | " ... | \n",
215 | " ... | \n",
216 | " ... | \n",
217 | " ... | \n",
218 | " ... | \n",
219 | " ... | \n",
220 | " ... | \n",
221 | " ... | \n",
222 | " ... | \n",
223 | " ... | \n",
224 | " ... | \n",
225 | " ... | \n",
226 | " ... | \n",
227 | " ... | \n",
228 | " ... | \n",
229 | " ... | \n",
230 | " ... | \n",
231 | " ... | \n",
232 | " ... | \n",
233 | " ... | \n",
234 | "
\n",
235 | " \n",
236 | " 210 | \n",
237 | " E0 | \n",
238 | " 23/01/2022 | \n",
239 | " 14:00 | \n",
240 | " Arsenal | \n",
241 | " Burnley | \n",
242 | " 0 | \n",
243 | " 0 | \n",
244 | " D | \n",
245 | " 0 | \n",
246 | " 0 | \n",
247 | " ... | \n",
248 | " 2.32 | \n",
249 | " -1.50 | \n",
250 | " 1.92 | \n",
251 | " 2.01 | \n",
252 | " 1.93 | \n",
253 | " 2.00 | \n",
254 | " 1.93 | \n",
255 | " 2.11 | \n",
256 | " 1.90 | \n",
257 | " 1.98 | \n",
258 | "
\n",
259 | " \n",
260 | " 211 | \n",
261 | " E0 | \n",
262 | " 23/01/2022 | \n",
263 | " 14:00 | \n",
264 | " Crystal Palace | \n",
265 | " Liverpool | \n",
266 | " 1 | \n",
267 | " 3 | \n",
268 | " A | \n",
269 | " 0 | \n",
270 | " 2 | \n",
271 | " ... | \n",
272 | " 2.11 | \n",
273 | " 1.00 | \n",
274 | " 1.96 | \n",
275 | " 1.97 | \n",
276 | " 1.94 | \n",
277 | " 1.97 | \n",
278 | " 2.07 | \n",
279 | " 2.02 | \n",
280 | " 1.92 | \n",
281 | " 1.96 | \n",
282 | "
\n",
283 | " \n",
284 | " 212 | \n",
285 | " E0 | \n",
286 | " 23/01/2022 | \n",
287 | " 14:00 | \n",
288 | " Leicester | \n",
289 | " Brighton | \n",
290 | " 1 | \n",
291 | " 1 | \n",
292 | " D | \n",
293 | " 0 | \n",
294 | " 0 | \n",
295 | " ... | \n",
296 | " 2.10 | \n",
297 | " 0.00 | \n",
298 | " 1.92 | \n",
299 | " 2.01 | \n",
300 | " 1.93 | \n",
301 | " 1.99 | \n",
302 | " 1.94 | \n",
303 | " 2.13 | \n",
304 | " 1.87 | \n",
305 | " 2.01 | \n",
306 | "
\n",
307 | " \n",
308 | " 213 | \n",
309 | " E0 | \n",
310 | " 23/01/2022 | \n",
311 | " 16:30 | \n",
312 | " Chelsea | \n",
313 | " Tottenham | \n",
314 | " 2 | \n",
315 | " 0 | \n",
316 | " H | \n",
317 | " 0 | \n",
318 | " 0 | \n",
319 | " ... | \n",
320 | " 1.84 | \n",
321 | " -0.75 | \n",
322 | " 1.99 | \n",
323 | " 1.94 | \n",
324 | " 1.97 | \n",
325 | " 1.95 | \n",
326 | " 2.03 | \n",
327 | " 2.07 | \n",
328 | " 1.96 | \n",
329 | " 1.92 | \n",
330 | "
\n",
331 | " \n",
332 | " 214 | \n",
333 | " E0 | \n",
334 | " 05/02/2022 | \n",
335 | " 18:00 | \n",
336 | " Burnley | \n",
337 | " Watford | \n",
338 | " 0 | \n",
339 | " 0 | \n",
340 | " D | \n",
341 | " 0 | \n",
342 | " 0 | \n",
343 | " ... | \n",
344 | " 1.73 | \n",
345 | " -0.25 | \n",
346 | " 1.90 | \n",
347 | " 2.00 | \n",
348 | " 1.89 | \n",
349 | " 2.03 | \n",
350 | " 1.93 | \n",
351 | " 2.07 | \n",
352 | " 1.87 | \n",
353 | " 2.01 | \n",
354 | "
\n",
355 | " \n",
356 | "
\n",
357 | "
215 rows × 106 columns
\n",
358 | "
"
359 | ],
360 | "text/plain": [
361 | " Div Date Time HomeTeam AwayTeam FTHG FTAG FTR \\\n",
362 | "0 E0 13/08/2021 20:00 Brentford Arsenal 2 0 H \n",
363 | "1 E0 14/08/2021 12:30 Man United Leeds 5 1 H \n",
364 | "2 E0 14/08/2021 15:00 Burnley Brighton 1 2 A \n",
365 | "3 E0 14/08/2021 15:00 Chelsea Crystal Palace 3 0 H \n",
366 | "4 E0 14/08/2021 15:00 Everton Southampton 3 1 H \n",
367 | ".. .. ... ... ... ... ... ... .. \n",
368 | "210 E0 23/01/2022 14:00 Arsenal Burnley 0 0 D \n",
369 | "211 E0 23/01/2022 14:00 Crystal Palace Liverpool 1 3 A \n",
370 | "212 E0 23/01/2022 14:00 Leicester Brighton 1 1 D \n",
371 | "213 E0 23/01/2022 16:30 Chelsea Tottenham 2 0 H \n",
372 | "214 E0 05/02/2022 18:00 Burnley Watford 0 0 D \n",
373 | "\n",
374 | " HTHG HTAG ... AvgC<2.5 AHCh B365CAHH B365CAHA PCAHH PCAHA \\\n",
375 | "0 1 0 ... 1.62 0.50 1.75 2.05 1.81 2.13 \n",
376 | "1 1 0 ... 2.25 -1.00 2.05 1.75 2.17 1.77 \n",
377 | "2 1 0 ... 1.62 0.25 1.79 2.15 1.81 2.14 \n",
378 | "3 2 0 ... 1.94 -1.50 2.05 1.75 2.12 1.81 \n",
379 | "4 0 1 ... 1.67 -0.50 2.05 1.88 2.05 1.88 \n",
380 | ".. ... ... ... ... ... ... ... ... ... \n",
381 | "210 0 0 ... 2.32 -1.50 1.92 2.01 1.93 2.00 \n",
382 | "211 0 2 ... 2.11 1.00 1.96 1.97 1.94 1.97 \n",
383 | "212 0 0 ... 2.10 0.00 1.92 2.01 1.93 1.99 \n",
384 | "213 0 0 ... 1.84 -0.75 1.99 1.94 1.97 1.95 \n",
385 | "214 0 0 ... 1.73 -0.25 1.90 2.00 1.89 2.03 \n",
386 | "\n",
387 | " MaxCAHH MaxCAHA AvgCAHH AvgCAHA \n",
388 | "0 2.05 2.17 1.80 2.09 \n",
389 | "1 2.19 1.93 2.10 1.79 \n",
390 | "2 1.82 2.19 1.79 2.12 \n",
391 | "3 2.16 1.93 2.06 1.82 \n",
392 | "4 2.08 1.90 2.03 1.86 \n",
393 | ".. ... ... ... ... \n",
394 | "210 1.93 2.11 1.90 1.98 \n",
395 | "211 2.07 2.02 1.92 1.96 \n",
396 | "212 1.94 2.13 1.87 2.01 \n",
397 | "213 2.03 2.07 1.96 1.92 \n",
398 | "214 1.93 2.07 1.87 2.01 \n",
399 | "\n",
400 | "[215 rows x 106 columns]"
401 | ]
402 | },
403 | "execution_count": 3,
404 | "metadata": {},
405 | "output_type": "execute_result"
406 | }
407 | ],
408 | "source": [
409 | "# showing dataframe\n",
410 | "df_premier21"
411 | ]
412 | },
413 | {
414 | "cell_type": "code",
415 | "execution_count": 4,
416 | "id": "f9bcd9f5",
417 | "metadata": {},
418 | "outputs": [],
419 | "source": [
420 | "# rename columns\n",
421 | "df_premier21 = df_premier21.rename(columns={'Date':'date',\n",
422 | " 'HomeTeam':'home_team',\n",
423 | " 'AwayTeam':'away_team',\n",
424 | " 'FTHG': 'home_goals',\n",
425 | " 'FTAG': 'away_goals'})"
426 | ]
427 | },
428 | {
429 | "cell_type": "code",
430 | "execution_count": 5,
431 | "id": "a650ca82",
432 | "metadata": {},
433 | "outputs": [
434 | {
435 | "data": {
436 | "text/html": [
437 | "\n",
438 | "\n",
451 | "
\n",
452 | " \n",
453 | " \n",
454 | " | \n",
455 | " Div | \n",
456 | " date | \n",
457 | " Time | \n",
458 | " home_team | \n",
459 | " away_team | \n",
460 | " home_goals | \n",
461 | " away_goals | \n",
462 | " FTR | \n",
463 | " HTHG | \n",
464 | " HTAG | \n",
465 | " ... | \n",
466 | " AvgC<2.5 | \n",
467 | " AHCh | \n",
468 | " B365CAHH | \n",
469 | " B365CAHA | \n",
470 | " PCAHH | \n",
471 | " PCAHA | \n",
472 | " MaxCAHH | \n",
473 | " MaxCAHA | \n",
474 | " AvgCAHH | \n",
475 | " AvgCAHA | \n",
476 | "
\n",
477 | " \n",
478 | " \n",
479 | " \n",
480 | " 0 | \n",
481 | " E0 | \n",
482 | " 13/08/2021 | \n",
483 | " 20:00 | \n",
484 | " Brentford | \n",
485 | " Arsenal | \n",
486 | " 2 | \n",
487 | " 0 | \n",
488 | " H | \n",
489 | " 1 | \n",
490 | " 0 | \n",
491 | " ... | \n",
492 | " 1.62 | \n",
493 | " 0.50 | \n",
494 | " 1.75 | \n",
495 | " 2.05 | \n",
496 | " 1.81 | \n",
497 | " 2.13 | \n",
498 | " 2.05 | \n",
499 | " 2.17 | \n",
500 | " 1.80 | \n",
501 | " 2.09 | \n",
502 | "
\n",
503 | " \n",
504 | " 1 | \n",
505 | " E0 | \n",
506 | " 14/08/2021 | \n",
507 | " 12:30 | \n",
508 | " Man United | \n",
509 | " Leeds | \n",
510 | " 5 | \n",
511 | " 1 | \n",
512 | " H | \n",
513 | " 1 | \n",
514 | " 0 | \n",
515 | " ... | \n",
516 | " 2.25 | \n",
517 | " -1.00 | \n",
518 | " 2.05 | \n",
519 | " 1.75 | \n",
520 | " 2.17 | \n",
521 | " 1.77 | \n",
522 | " 2.19 | \n",
523 | " 1.93 | \n",
524 | " 2.10 | \n",
525 | " 1.79 | \n",
526 | "
\n",
527 | " \n",
528 | " 2 | \n",
529 | " E0 | \n",
530 | " 14/08/2021 | \n",
531 | " 15:00 | \n",
532 | " Burnley | \n",
533 | " Brighton | \n",
534 | " 1 | \n",
535 | " 2 | \n",
536 | " A | \n",
537 | " 1 | \n",
538 | " 0 | \n",
539 | " ... | \n",
540 | " 1.62 | \n",
541 | " 0.25 | \n",
542 | " 1.79 | \n",
543 | " 2.15 | \n",
544 | " 1.81 | \n",
545 | " 2.14 | \n",
546 | " 1.82 | \n",
547 | " 2.19 | \n",
548 | " 1.79 | \n",
549 | " 2.12 | \n",
550 | "
\n",
551 | " \n",
552 | " 3 | \n",
553 | " E0 | \n",
554 | " 14/08/2021 | \n",
555 | " 15:00 | \n",
556 | " Chelsea | \n",
557 | " Crystal Palace | \n",
558 | " 3 | \n",
559 | " 0 | \n",
560 | " H | \n",
561 | " 2 | \n",
562 | " 0 | \n",
563 | " ... | \n",
564 | " 1.94 | \n",
565 | " -1.50 | \n",
566 | " 2.05 | \n",
567 | " 1.75 | \n",
568 | " 2.12 | \n",
569 | " 1.81 | \n",
570 | " 2.16 | \n",
571 | " 1.93 | \n",
572 | " 2.06 | \n",
573 | " 1.82 | \n",
574 | "
\n",
575 | " \n",
576 | " 4 | \n",
577 | " E0 | \n",
578 | " 14/08/2021 | \n",
579 | " 15:00 | \n",
580 | " Everton | \n",
581 | " Southampton | \n",
582 | " 3 | \n",
583 | " 1 | \n",
584 | " H | \n",
585 | " 0 | \n",
586 | " 1 | \n",
587 | " ... | \n",
588 | " 1.67 | \n",
589 | " -0.50 | \n",
590 | " 2.05 | \n",
591 | " 1.88 | \n",
592 | " 2.05 | \n",
593 | " 1.88 | \n",
594 | " 2.08 | \n",
595 | " 1.90 | \n",
596 | " 2.03 | \n",
597 | " 1.86 | \n",
598 | "
\n",
599 | " \n",
600 | " ... | \n",
601 | " ... | \n",
602 | " ... | \n",
603 | " ... | \n",
604 | " ... | \n",
605 | " ... | \n",
606 | " ... | \n",
607 | " ... | \n",
608 | " ... | \n",
609 | " ... | \n",
610 | " ... | \n",
611 | " ... | \n",
612 | " ... | \n",
613 | " ... | \n",
614 | " ... | \n",
615 | " ... | \n",
616 | " ... | \n",
617 | " ... | \n",
618 | " ... | \n",
619 | " ... | \n",
620 | " ... | \n",
621 | " ... | \n",
622 | "
\n",
623 | " \n",
624 | " 210 | \n",
625 | " E0 | \n",
626 | " 23/01/2022 | \n",
627 | " 14:00 | \n",
628 | " Arsenal | \n",
629 | " Burnley | \n",
630 | " 0 | \n",
631 | " 0 | \n",
632 | " D | \n",
633 | " 0 | \n",
634 | " 0 | \n",
635 | " ... | \n",
636 | " 2.32 | \n",
637 | " -1.50 | \n",
638 | " 1.92 | \n",
639 | " 2.01 | \n",
640 | " 1.93 | \n",
641 | " 2.00 | \n",
642 | " 1.93 | \n",
643 | " 2.11 | \n",
644 | " 1.90 | \n",
645 | " 1.98 | \n",
646 | "
\n",
647 | " \n",
648 | " 211 | \n",
649 | " E0 | \n",
650 | " 23/01/2022 | \n",
651 | " 14:00 | \n",
652 | " Crystal Palace | \n",
653 | " Liverpool | \n",
654 | " 1 | \n",
655 | " 3 | \n",
656 | " A | \n",
657 | " 0 | \n",
658 | " 2 | \n",
659 | " ... | \n",
660 | " 2.11 | \n",
661 | " 1.00 | \n",
662 | " 1.96 | \n",
663 | " 1.97 | \n",
664 | " 1.94 | \n",
665 | " 1.97 | \n",
666 | " 2.07 | \n",
667 | " 2.02 | \n",
668 | " 1.92 | \n",
669 | " 1.96 | \n",
670 | "
\n",
671 | " \n",
672 | " 212 | \n",
673 | " E0 | \n",
674 | " 23/01/2022 | \n",
675 | " 14:00 | \n",
676 | " Leicester | \n",
677 | " Brighton | \n",
678 | " 1 | \n",
679 | " 1 | \n",
680 | " D | \n",
681 | " 0 | \n",
682 | " 0 | \n",
683 | " ... | \n",
684 | " 2.10 | \n",
685 | " 0.00 | \n",
686 | " 1.92 | \n",
687 | " 2.01 | \n",
688 | " 1.93 | \n",
689 | " 1.99 | \n",
690 | " 1.94 | \n",
691 | " 2.13 | \n",
692 | " 1.87 | \n",
693 | " 2.01 | \n",
694 | "
\n",
695 | " \n",
696 | " 213 | \n",
697 | " E0 | \n",
698 | " 23/01/2022 | \n",
699 | " 16:30 | \n",
700 | " Chelsea | \n",
701 | " Tottenham | \n",
702 | " 2 | \n",
703 | " 0 | \n",
704 | " H | \n",
705 | " 0 | \n",
706 | " 0 | \n",
707 | " ... | \n",
708 | " 1.84 | \n",
709 | " -0.75 | \n",
710 | " 1.99 | \n",
711 | " 1.94 | \n",
712 | " 1.97 | \n",
713 | " 1.95 | \n",
714 | " 2.03 | \n",
715 | " 2.07 | \n",
716 | " 1.96 | \n",
717 | " 1.92 | \n",
718 | "
\n",
719 | " \n",
720 | " 214 | \n",
721 | " E0 | \n",
722 | " 05/02/2022 | \n",
723 | " 18:00 | \n",
724 | " Burnley | \n",
725 | " Watford | \n",
726 | " 0 | \n",
727 | " 0 | \n",
728 | " D | \n",
729 | " 0 | \n",
730 | " 0 | \n",
731 | " ... | \n",
732 | " 1.73 | \n",
733 | " -0.25 | \n",
734 | " 1.90 | \n",
735 | " 2.00 | \n",
736 | " 1.89 | \n",
737 | " 2.03 | \n",
738 | " 1.93 | \n",
739 | " 2.07 | \n",
740 | " 1.87 | \n",
741 | " 2.01 | \n",
742 | "
\n",
743 | " \n",
744 | "
\n",
745 | "
215 rows × 106 columns
\n",
746 | "
"
747 | ],
748 | "text/plain": [
749 | " Div date Time home_team away_team home_goals \\\n",
750 | "0 E0 13/08/2021 20:00 Brentford Arsenal 2 \n",
751 | "1 E0 14/08/2021 12:30 Man United Leeds 5 \n",
752 | "2 E0 14/08/2021 15:00 Burnley Brighton 1 \n",
753 | "3 E0 14/08/2021 15:00 Chelsea Crystal Palace 3 \n",
754 | "4 E0 14/08/2021 15:00 Everton Southampton 3 \n",
755 | ".. .. ... ... ... ... ... \n",
756 | "210 E0 23/01/2022 14:00 Arsenal Burnley 0 \n",
757 | "211 E0 23/01/2022 14:00 Crystal Palace Liverpool 1 \n",
758 | "212 E0 23/01/2022 14:00 Leicester Brighton 1 \n",
759 | "213 E0 23/01/2022 16:30 Chelsea Tottenham 2 \n",
760 | "214 E0 05/02/2022 18:00 Burnley Watford 0 \n",
761 | "\n",
762 | " away_goals FTR HTHG HTAG ... AvgC<2.5 AHCh B365CAHH B365CAHA \\\n",
763 | "0 0 H 1 0 ... 1.62 0.50 1.75 2.05 \n",
764 | "1 1 H 1 0 ... 2.25 -1.00 2.05 1.75 \n",
765 | "2 2 A 1 0 ... 1.62 0.25 1.79 2.15 \n",
766 | "3 0 H 2 0 ... 1.94 -1.50 2.05 1.75 \n",
767 | "4 1 H 0 1 ... 1.67 -0.50 2.05 1.88 \n",
768 | ".. ... .. ... ... ... ... ... ... ... \n",
769 | "210 0 D 0 0 ... 2.32 -1.50 1.92 2.01 \n",
770 | "211 3 A 0 2 ... 2.11 1.00 1.96 1.97 \n",
771 | "212 1 D 0 0 ... 2.10 0.00 1.92 2.01 \n",
772 | "213 0 H 0 0 ... 1.84 -0.75 1.99 1.94 \n",
773 | "214 0 D 0 0 ... 1.73 -0.25 1.90 2.00 \n",
774 | "\n",
775 | " PCAHH PCAHA MaxCAHH MaxCAHA AvgCAHH AvgCAHA \n",
776 | "0 1.81 2.13 2.05 2.17 1.80 2.09 \n",
777 | "1 2.17 1.77 2.19 1.93 2.10 1.79 \n",
778 | "2 1.81 2.14 1.82 2.19 1.79 2.12 \n",
779 | "3 2.12 1.81 2.16 1.93 2.06 1.82 \n",
780 | "4 2.05 1.88 2.08 1.90 2.03 1.86 \n",
781 | ".. ... ... ... ... ... ... \n",
782 | "210 1.93 2.00 1.93 2.11 1.90 1.98 \n",
783 | "211 1.94 1.97 2.07 2.02 1.92 1.96 \n",
784 | "212 1.93 1.99 1.94 2.13 1.87 2.01 \n",
785 | "213 1.97 1.95 2.03 2.07 1.96 1.92 \n",
786 | "214 1.89 2.03 1.93 2.07 1.87 2.01 \n",
787 | "\n",
788 | "[215 rows x 106 columns]"
789 | ]
790 | },
791 | "execution_count": 5,
792 | "metadata": {},
793 | "output_type": "execute_result"
794 | }
795 | ],
796 | "source": [
797 | "# show dataframe\n",
798 | "df_premier21"
799 | ]
800 | },
801 | {
802 | "cell_type": "markdown",
803 | "id": "sacred-march",
804 | "metadata": {},
805 | "source": []
806 | },
807 | {
808 | "cell_type": "markdown",
809 | "id": "sacred-directive",
810 | "metadata": {},
811 | "source": [
812 | "# Read HTML"
813 | ]
814 | },
815 | {
816 | "cell_type": "markdown",
817 | "id": "dental-cradle",
818 | "metadata": {},
819 | "source": [
820 | "Target Website: https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)"
821 | ]
822 | },
823 | {
824 | "cell_type": "code",
825 | "execution_count": 6,
826 | "id": "sacred-louisville",
827 | "metadata": {},
828 | "outputs": [],
829 | "source": [
830 | "simpsons = pd.read_html('https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)')"
831 | ]
832 | },
833 | {
834 | "cell_type": "code",
835 | "execution_count": 7,
836 | "id": "pending-plaza",
837 | "metadata": {},
838 | "outputs": [
839 | {
840 | "data": {
841 | "text/html": [
842 | "\n",
843 | "\n",
856 | "
\n",
857 | " \n",
858 | " \n",
859 | " | \n",
860 | " No.overall | \n",
861 | " No. inseason | \n",
862 | " Title | \n",
863 | " Directed by | \n",
864 | " Written by | \n",
865 | " Original air date | \n",
866 | " Prod.code | \n",
867 | " U.S. viewers(millions) | \n",
868 | "
\n",
869 | " \n",
870 | " \n",
871 | " \n",
872 | " 0 | \n",
873 | " 1 | \n",
874 | " 1 | \n",
875 | " \"Simpsons Roasting on an Open Fire\" | \n",
876 | " David Silverman | \n",
877 | " Mimi Pond | \n",
878 | " December 17, 1989 | \n",
879 | " 7G08 | \n",
880 | " 26.7[46] | \n",
881 | "
\n",
882 | " \n",
883 | " 1 | \n",
884 | " 2 | \n",
885 | " 2 | \n",
886 | " \"Bart the Genius\" | \n",
887 | " David Silverman | \n",
888 | " Jon Vitti | \n",
889 | " January 14, 1990 | \n",
890 | " 7G02 | \n",
891 | " 24.5[46] | \n",
892 | "
\n",
893 | " \n",
894 | " 2 | \n",
895 | " 3 | \n",
896 | " 3 | \n",
897 | " \"Homer's Odyssey\" | \n",
898 | " Wesley Archer | \n",
899 | " Jay Kogen & Wallace Wolodarsky | \n",
900 | " January 21, 1990 | \n",
901 | " 7G03 | \n",
902 | " 27.5[47] | \n",
903 | "
\n",
904 | " \n",
905 | " 3 | \n",
906 | " 4 | \n",
907 | " 4 | \n",
908 | " \"There's No Disgrace Like Home\" | \n",
909 | " Gregg Vanzo & Kent Butterworth | \n",
910 | " Al Jean & Mike Reiss | \n",
911 | " January 28, 1990 | \n",
912 | " 7G04 | \n",
913 | " 20.2[48] | \n",
914 | "
\n",
915 | " \n",
916 | " 4 | \n",
917 | " 5 | \n",
918 | " 5 | \n",
919 | " \"Bart the General\" | \n",
920 | " David Silverman | \n",
921 | " John Swartzwelder | \n",
922 | " February 4, 1990 | \n",
923 | " 7G05 | \n",
924 | " 27.1[49] | \n",
925 | "
\n",
926 | " \n",
927 | "
\n",
928 | "
"
929 | ],
930 | "text/plain": [
931 | " No.overall No. inseason Title \\\n",
932 | "0 1 1 \"Simpsons Roasting on an Open Fire\" \n",
933 | "1 2 2 \"Bart the Genius\" \n",
934 | "2 3 3 \"Homer's Odyssey\" \n",
935 | "3 4 4 \"There's No Disgrace Like Home\" \n",
936 | "4 5 5 \"Bart the General\" \n",
937 | "\n",
938 | " Directed by Written by \\\n",
939 | "0 David Silverman Mimi Pond \n",
940 | "1 David Silverman Jon Vitti \n",
941 | "2 Wesley Archer Jay Kogen & Wallace Wolodarsky \n",
942 | "3 Gregg Vanzo & Kent Butterworth Al Jean & Mike Reiss \n",
943 | "4 David Silverman John Swartzwelder \n",
944 | "\n",
945 | " Original air date Prod.code U.S. viewers(millions) \n",
946 | "0 December 17, 1989 7G08 26.7[46] \n",
947 | "1 January 14, 1990 7G02 24.5[46] \n",
948 | "2 January 21, 1990 7G03 27.5[47] \n",
949 | "3 January 28, 1990 7G04 20.2[48] \n",
950 | "4 February 4, 1990 7G05 27.1[49] "
951 | ]
952 | },
953 | "execution_count": 7,
954 | "metadata": {},
955 | "output_type": "execute_result"
956 | }
957 | ],
958 | "source": [
959 | "simpsons[1].head()"
960 | ]
961 | }
962 | ],
963 | "metadata": {
964 | "hide_input": false,
965 | "kernelspec": {
966 | "display_name": "Python 3",
967 | "language": "python",
968 | "name": "python3"
969 | },
970 | "language_info": {
971 | "codemirror_mode": {
972 | "name": "ipython",
973 | "version": 3
974 | },
975 | "file_extension": ".py",
976 | "mimetype": "text/x-python",
977 | "name": "python",
978 | "nbconvert_exporter": "python",
979 | "pygments_lexer": "ipython3",
980 | "version": "3.7.3"
981 | },
982 | "nbTranslate": {
983 | "displayLangs": [
984 | "es",
985 | "en"
986 | ],
987 | "hotkey": "alt-t",
988 | "langInMainMenu": true,
989 | "sourceLang": "en",
990 | "targetLang": "es",
991 | "useGoogleTranslate": true
992 | },
993 | "toc": {
994 | "base_numbering": 1,
995 | "nav_menu": {},
996 | "number_sections": true,
997 | "sideBar": true,
998 | "skip_h1_title": false,
999 | "title_cell": "Table of Contents",
1000 | "title_sidebar": "Contents",
1001 | "toc_cell": false,
1002 | "toc_position": {},
1003 | "toc_section_display": true,
1004 | "toc_window_display": false
1005 | }
1006 | },
1007 | "nbformat": 4,
1008 | "nbformat_minor": 5
1009 | }
1010 |
--------------------------------------------------------------------------------
/loc vs iloc.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "id": "90a2167f",
6 | "metadata": {},
7 | "source": [
8 | "# loc vs iloc"
9 | ]
10 | },
11 | {
12 | "cell_type": "code",
13 | "execution_count": 1,
14 | "id": "bc43ac4b",
15 | "metadata": {},
16 | "outputs": [],
17 | "source": [
18 | "import pandas as pd"
19 | ]
20 | },
21 | {
22 | "cell_type": "code",
23 | "execution_count": 2,
24 | "id": "bc70a60e",
25 | "metadata": {},
26 | "outputs": [],
27 | "source": [
28 | "names = ['L. Messi', 'Cristiano Ronaldo', 'Neymar Jr', 'J. Oblak', 'E. Hazard']\n",
29 | "age = [32, 34, 27, 26, 28]\n",
30 | "height_cm = [170, 187, 175, 188, 175]\n",
31 | "nationality = ['Argentina', 'Portugal', 'Brazil', 'Slovenia', 'Belgium']\n",
32 | "club = ['Paris Saint-Germain', 'Manchester United', 'Paris Saint-Germain', 'Atlético Madrid', 'Real Madrid']\n",
33 | "\n",
34 | "df = pd.DataFrame(index=names, data={'age':age, 'height_cm':height_cm, 'nationality':nationality, 'club':club})"
35 | ]
36 | },
37 | {
38 | "cell_type": "code",
39 | "execution_count": 3,
40 | "id": "f85e1494",
41 | "metadata": {},
42 | "outputs": [
43 | {
44 | "data": {
45 | "text/html": [
46 | "\n",
47 | "\n",
60 | "
\n",
61 | " \n",
62 | " \n",
63 | " | \n",
64 | " age | \n",
65 | " height_cm | \n",
66 | " nationality | \n",
67 | " club | \n",
68 | "
\n",
69 | " \n",
70 | " \n",
71 | " \n",
72 | " L. Messi | \n",
73 | " 32 | \n",
74 | " 170 | \n",
75 | " Argentina | \n",
76 | " Paris Saint-Germain | \n",
77 | "
\n",
78 | " \n",
79 | " Cristiano Ronaldo | \n",
80 | " 34 | \n",
81 | " 187 | \n",
82 | " Portugal | \n",
83 | " Manchester United | \n",
84 | "
\n",
85 | " \n",
86 | " Neymar Jr | \n",
87 | " 27 | \n",
88 | " 175 | \n",
89 | " Brazil | \n",
90 | " Paris Saint-Germain | \n",
91 | "
\n",
92 | " \n",
93 | " J. Oblak | \n",
94 | " 26 | \n",
95 | " 188 | \n",
96 | " Slovenia | \n",
97 | " Atlético Madrid | \n",
98 | "
\n",
99 | " \n",
100 | " E. Hazard | \n",
101 | " 28 | \n",
102 | " 175 | \n",
103 | " Belgium | \n",
104 | " Real Madrid | \n",
105 | "
\n",
106 | " \n",
107 | "
\n",
108 | "
"
109 | ],
110 | "text/plain": [
111 | " age height_cm nationality club\n",
112 | "L. Messi 32 170 Argentina Paris Saint-Germain\n",
113 | "Cristiano Ronaldo 34 187 Portugal Manchester United\n",
114 | "Neymar Jr 27 175 Brazil Paris Saint-Germain\n",
115 | "J. Oblak 26 188 Slovenia Atlético Madrid\n",
116 | "E. Hazard 28 175 Belgium Real Madrid"
117 | ]
118 | },
119 | "execution_count": 3,
120 | "metadata": {},
121 | "output_type": "execute_result"
122 | }
123 | ],
124 | "source": [
125 | "df"
126 | ]
127 | },
128 | {
129 | "cell_type": "markdown",
130 | "id": "afe4e373",
131 | "metadata": {},
132 | "source": [
133 | "## Selecting with a single value"
134 | ]
135 | },
136 | {
137 | "cell_type": "code",
138 | "execution_count": 4,
139 | "id": "e67bf7a1",
140 | "metadata": {},
141 | "outputs": [
142 | {
143 | "data": {
144 | "text/plain": [
145 | "170"
146 | ]
147 | },
148 | "execution_count": 4,
149 | "metadata": {},
150 | "output_type": "execute_result"
151 | }
152 | ],
153 | "source": [
154 | "# get the height of L.Messi\n",
155 | "# loc\n",
156 | "df.loc['L. Messi', 'height_cm']\n",
157 | "# iloc\n",
158 | "df.iloc[0, 1]"
159 | ]
160 | },
161 | {
162 | "cell_type": "code",
163 | "execution_count": 5,
164 | "id": "fcebb39c",
165 | "metadata": {},
166 | "outputs": [
167 | {
168 | "data": {
169 | "text/plain": [
170 | "187"
171 | ]
172 | },
173 | "execution_count": 5,
174 | "metadata": {},
175 | "output_type": "execute_result"
176 | }
177 | ],
178 | "source": [
179 | "# get the height of Cristiano Ronaldo\n",
180 | "# loc\n",
181 | "df.loc['Cristiano Ronaldo', 'height_cm']\n",
182 | "# iloc\n",
183 | "df.iloc[1, 1]"
184 | ]
185 | },
186 | {
187 | "cell_type": "code",
188 | "execution_count": 6,
189 | "id": "e86ba675",
190 | "metadata": {},
191 | "outputs": [
192 | {
193 | "data": {
194 | "text/plain": [
195 | "age 32\n",
196 | "height_cm 170\n",
197 | "nationality Argentina\n",
198 | "club Paris Saint-Germain\n",
199 | "Name: L. Messi, dtype: object"
200 | ]
201 | },
202 | "execution_count": 6,
203 | "metadata": {},
204 | "output_type": "execute_result"
205 | }
206 | ],
207 | "source": [
208 | "# get all the data about L.Messi\n",
209 | "# loc\n",
210 | "df.loc['L. Messi', :]\n",
211 | "# iloc\n",
212 | "df.iloc[0, :]"
213 | ]
214 | },
215 | {
216 | "cell_type": "markdown",
217 | "id": "71d2b2b8",
218 | "metadata": {},
219 | "source": [
220 | "## Selecting with a list of values"
221 | ]
222 | },
223 | {
224 | "cell_type": "code",
225 | "execution_count": 7,
226 | "id": "f62aa834",
227 | "metadata": {},
228 | "outputs": [
229 | {
230 | "data": {
231 | "text/html": [
232 | "\n",
233 | "\n",
246 | "
\n",
247 | " \n",
248 | " \n",
249 | " | \n",
250 | " age | \n",
251 | " height_cm | \n",
252 | " nationality | \n",
253 | " club | \n",
254 | "
\n",
255 | " \n",
256 | " \n",
257 | " \n",
258 | " L. Messi | \n",
259 | " 32 | \n",
260 | " 170 | \n",
261 | " Argentina | \n",
262 | " Paris Saint-Germain | \n",
263 | "
\n",
264 | " \n",
265 | " Cristiano Ronaldo | \n",
266 | " 34 | \n",
267 | " 187 | \n",
268 | " Portugal | \n",
269 | " Manchester United | \n",
270 | "
\n",
271 | " \n",
272 | "
\n",
273 | "
"
274 | ],
275 | "text/plain": [
276 | " age height_cm nationality club\n",
277 | "L. Messi 32 170 Argentina Paris Saint-Germain\n",
278 | "Cristiano Ronaldo 34 187 Portugal Manchester United"
279 | ]
280 | },
281 | "execution_count": 7,
282 | "metadata": {},
283 | "output_type": "execute_result"
284 | }
285 | ],
286 | "source": [
287 | "# get all data about L.Messi and Cristiano Ronaldo\n",
288 | "# loc\n",
289 | "df.loc[['L. Messi', 'Cristiano Ronaldo']]\n",
290 | "# iloc\n",
291 | "df.iloc[[0, 1]]"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": 8,
297 | "id": "78a4fed7",
298 | "metadata": {},
299 | "outputs": [
300 | {
301 | "data": {
302 | "text/plain": [
303 | "L. Messi 170\n",
304 | "Cristiano Ronaldo 187\n",
305 | "Name: height_cm, dtype: int64"
306 | ]
307 | },
308 | "execution_count": 8,
309 | "metadata": {},
310 | "output_type": "execute_result"
311 | }
312 | ],
313 | "source": [
314 | "# get the height of L.Messi and Cristiano Ronaldo\n",
315 | "df.loc[['L. Messi', 'Cristiano Ronaldo'], 'height_cm']\n",
316 | "\n",
317 | "# get the height of L.Messi and Cristiano Ronaldo\n",
318 | "df.iloc[[0, 1], 1]"
319 | ]
320 | },
321 | {
322 | "cell_type": "markdown",
323 | "id": "4501bd3d",
324 | "metadata": {},
325 | "source": [
326 | "## Selecting a range of data with a slice"
327 | ]
328 | },
329 | {
330 | "cell_type": "code",
331 | "execution_count": 9,
332 | "id": "06fba14b",
333 | "metadata": {},
334 | "outputs": [
335 | {
336 | "data": {
337 | "text/html": [
338 | "\n",
339 | "\n",
352 | "
\n",
353 | " \n",
354 | " \n",
355 | " | \n",
356 | " age | \n",
357 | " height_cm | \n",
358 | " nationality | \n",
359 | "
\n",
360 | " \n",
361 | " \n",
362 | " \n",
363 | " L. Messi | \n",
364 | " 32 | \n",
365 | " 170 | \n",
366 | " Argentina | \n",
367 | "
\n",
368 | " \n",
369 | " Cristiano Ronaldo | \n",
370 | " 34 | \n",
371 | " 187 | \n",
372 | " Portugal | \n",
373 | "
\n",
374 | " \n",
375 | "
\n",
376 | "
"
377 | ],
378 | "text/plain": [
379 | " age height_cm nationality\n",
380 | "L. Messi 32 170 Argentina\n",
381 | "Cristiano Ronaldo 34 187 Portugal"
382 | ]
383 | },
384 | "execution_count": 9,
385 | "metadata": {},
386 | "output_type": "execute_result"
387 | }
388 | ],
389 | "source": [
390 | "# slice column labels: from age to nationality\n",
391 | "# loc\n",
392 | "players = ['L. Messi', 'Cristiano Ronaldo']\n",
393 | "df.loc[players, 'age':'nationality']\n",
394 | "\n",
395 | "# iloc\n",
396 | "players = [0, 1]\n",
397 | "df.iloc[players, 0:3] # age:nationality+1"
398 | ]
399 | },
400 | {
401 | "cell_type": "markdown",
402 | "id": "c17b2be7",
403 | "metadata": {},
404 | "source": [
405 | "## Selecting with conditions"
406 | ]
407 | },
408 | {
409 | "cell_type": "code",
410 | "execution_count": 10,
411 | "id": "355313ed",
412 | "metadata": {},
413 | "outputs": [
414 | {
415 | "data": {
416 | "text/html": [
417 | "\n",
418 | "\n",
431 | "
\n",
432 | " \n",
433 | " \n",
434 | " | \n",
435 | " age | \n",
436 | " height_cm | \n",
437 | " club | \n",
438 | "
\n",
439 | " \n",
440 | " \n",
441 | " \n",
442 | " Cristiano Ronaldo | \n",
443 | " 34 | \n",
444 | " 187 | \n",
445 | " Manchester United | \n",
446 | "
\n",
447 | " \n",
448 | " J. Oblak | \n",
449 | " 26 | \n",
450 | " 188 | \n",
451 | " Atlético Madrid | \n",
452 | "
\n",
453 | " \n",
454 | "
\n",
455 | "
"
456 | ],
457 | "text/plain": [
458 | " age height_cm club\n",
459 | "Cristiano Ronaldo 34 187 Manchester United\n",
460 | "J. Oblak 26 188 Atlético Madrid"
461 | ]
462 | },
463 | "execution_count": 10,
464 | "metadata": {},
465 | "output_type": "execute_result"
466 | }
467 | ],
468 | "source": [
469 | "# one condition: select player with height above 180cm\n",
470 | "# loc\n",
471 | "columns = ['age', 'height_cm', 'club']\n",
472 | "df.loc[df['height_cm']>180, columns]\n",
473 | "\n",
474 | "# iloc\n",
475 | "columns = [0,1,3]\n",
476 | "df.iloc[list(df['height_cm']>180), columns]"
477 | ]
478 | },
479 | {
480 | "cell_type": "code",
481 | "execution_count": 11,
482 | "id": "2515c8f2",
483 | "metadata": {},
484 | "outputs": [
485 | {
486 | "data": {
487 | "text/html": [
488 | "\n",
489 | "\n",
502 | "
\n",
503 | " \n",
504 | " \n",
505 | " | \n",
506 | " age | \n",
507 | " height_cm | \n",
508 | " nationality | \n",
509 | " club | \n",
510 | "
\n",
511 | " \n",
512 | " \n",
513 | " \n",
514 | " Neymar Jr | \n",
515 | " 27 | \n",
516 | " 175 | \n",
517 | " Brazil | \n",
518 | " Paris Saint-Germain | \n",
519 | "
\n",
520 | " \n",
521 | "
\n",
522 | "
"
523 | ],
524 | "text/plain": [
525 | " age height_cm nationality club\n",
526 | "Neymar Jr 27 175 Brazil Paris Saint-Germain"
527 | ]
528 | },
529 | "execution_count": 11,
530 | "metadata": {},
531 | "output_type": "execute_result"
532 | }
533 | ],
534 | "source": [
535 | "# multiple conditions: select player with height above 180cm that played in PSG\n",
536 | "# loc\n",
537 | "df.loc[(df['height_cm']>170) & (df['club']=='Paris Saint-Germain'), :]\n",
538 | "\n",
539 | "# iloc\n",
540 | "df.iloc[list((df['height_cm']>170) & (df['club']=='Paris Saint-Germain')), :]"
541 | ]
542 | }
543 | ],
544 | "metadata": {
545 | "kernelspec": {
546 | "display_name": "Python 3",
547 | "language": "python",
548 | "name": "python3"
549 | },
550 | "language_info": {
551 | "codemirror_mode": {
552 | "name": "ipython",
553 | "version": 3
554 | },
555 | "file_extension": ".py",
556 | "mimetype": "text/x-python",
557 | "name": "python",
558 | "nbconvert_exporter": "python",
559 | "pygments_lexer": "ipython3",
560 | "version": "3.8.8"
561 | },
562 | "toc": {
563 | "base_numbering": 1,
564 | "nav_menu": {},
565 | "number_sections": true,
566 | "sideBar": true,
567 | "skip_h1_title": false,
568 | "title_cell": "Table of Contents",
569 | "title_sidebar": "Contents",
570 | "toc_cell": false,
571 | "toc_position": {},
572 | "toc_section_display": true,
573 | "toc_window_display": false
574 | }
575 | },
576 | "nbformat": 4,
577 | "nbformat_minor": 5
578 | }
579 |
--------------------------------------------------------------------------------