├── 1.Python Crash Course └── Python Crash Course.ipynb ├── 2.Intro to Pandas ├── 1.Creating a Dataframe.ipynb ├── 2.Displaying a Dataframe.ipynb ├── 3. Basic Attributes, Methods and Functions.ipynb ├── 4.Selecting One Column from a Dataframe.ipynb ├── 5.Selecting Two or More Columns from a Dataframe.ipynb ├── 6.Add New Column to a Dataframe.ipynb ├── 7.Operations on Dataframes.ipynb ├── 8.The value_counts() method.ipynb ├── 9.Sort a Dataframe with sort_values().ipynb └── StudentsPerformance.csv ├── 3.Pivot Table ├── 1.pivot() and pivot_table().ipynb ├── gdp.csv └── supermarket_sales.xlsx ├── 4.Data Visualization ├── 1.Dataset Overview and Making Pivot Table.ipynb ├── 2.Data Visualization with Pandas.ipynb ├── 3.Adding Matplotlib to Pandas.ipynb └── population_total.csv ├── Exercises ├── Intro to Pandas │ ├── Introduction to Pandas-Exercise.ipynb │ ├── Introduction to Pandas-Solution.ipynb │ └── bestsellers with categories.csv └── Merging and Concatenating DataFrames │ ├── IMDb movies.csv.zip │ ├── IMDb ratings.csv.zip │ ├── Merging and Concatenating DataFrames-Exercise.ipynb │ └── Merging and Concatenating DataFrames-Solution.ipynb ├── README.md ├── Web Scraping with Pandas.ipynb ├── loc vs iloc.ipynb └── players_20.csv /2.Intro to Pandas/1.Creating a Dataframe.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "id": "7fa9cddf", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd\n", 11 | "import numpy as np" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "id": "81a09ffd", 17 | "metadata": {}, 18 | "source": [ 19 | "# Creating a dataframe from an array" 20 | ] 21 | }, 22 | { 23 | "cell_type": "markdown", 24 | "id": "6db0cb3f", 25 | "metadata": {}, 26 | "source": [ 27 | "## Option 1" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 3, 33 | "id": "93d02bc5", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "# creating an array\n", 38 | "data = np.array([[1, 4], [2, 5], [3, 6]])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 4, 44 | "id": "b9be7874", 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "# creating a dataframe\n", 49 | "df = pd.DataFrame(data, index=['row1', 'row2', 'row3'],\n", 50 | " columns=['col1', 'col2'])" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 5, 56 | "id": "7787533c", 57 | "metadata": {}, 58 | "outputs": [ 59 | { 60 | "data": { 61 | "text/html": [ 62 | "
\n", 63 | "\n", 76 | "\n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | "
col1col2
row114
row225
row336
\n", 102 | "
" 103 | ], 104 | "text/plain": [ 105 | " col1 col2\n", 106 | "row1 1 4\n", 107 | "row2 2 5\n", 108 | "row3 3 6" 109 | ] 110 | }, 111 | "execution_count": 5, 112 | "metadata": {}, 113 | "output_type": "execute_result" 114 | } 115 | ], 116 | "source": [ 117 | "# showing the datafrane\n", 118 | "df" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "9a66d12d", 124 | "metadata": {}, 125 | "source": [ 126 | "## Option 2" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 6, 132 | "id": "39caa078", 133 | "metadata": {}, 134 | "outputs": [], 135 | "source": [ 136 | "# creating an array with list shape\n", 137 | "data = [[1, 4], [2, 5], [3, 6]]" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 7, 143 | "id": "dfca7ffa", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "# creating a dataframe\n", 148 | "df = pd.DataFrame(data, index=['row1', 'row2', 'row3'],\n", 149 | " columns=['col1', 'col2'])" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 8, 155 | "id": "7c057728", 156 | "metadata": {}, 157 | "outputs": [ 158 | { 159 | "data": { 160 | "text/html": [ 161 | "
\n", 162 | "\n", 175 | "\n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | "
col1col2
row114
row225
row336
\n", 201 | "
" 202 | ], 203 | "text/plain": [ 204 | " col1 col2\n", 205 | "row1 1 4\n", 206 | "row2 2 5\n", 207 | "row3 3 6" 208 | ] 209 | }, 210 | "execution_count": 8, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "# showing the datafrane\n", 217 | "df" 218 | ] 219 | }, 220 | { 221 | "cell_type": "markdown", 222 | "id": "d7ee2ed9", 223 | "metadata": {}, 224 | "source": [ 225 | "# Creating a DataFrame from a dictionary" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 9, 231 | "id": "63bf85b8", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "# lists used for this example\n", 236 | "states = [\"California\", \"Texas\", \"Florida\", \"New York\"]\n", 237 | "population = [39613493, 29730311, 21944577, 19299981]" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": 10, 243 | "id": "b309ebe8", 244 | "metadata": {}, 245 | "outputs": [], 246 | "source": [ 247 | "# Storing lists within a dictionary\n", 248 | "dict_states = {'States': states, 'Population': population}" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": 11, 254 | "id": "4e774499", 255 | "metadata": { 256 | "scrolled": false 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "# Creating the dataframe\n", 261 | "df_population = pd.DataFrame(dict_states)\n", 262 | "# df_population = pd.DataFrame.from_dict(dict_states)" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 12, 268 | "id": "585c7ff6", 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "data": { 273 | "text/html": [ 274 | "
\n", 275 | "\n", 288 | "\n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | "
StatesPopulation
0California39613493
1Texas29730311
2Florida21944577
3New York19299981
\n", 319 | "
" 320 | ], 321 | "text/plain": [ 322 | " States Population\n", 323 | "0 California 39613493\n", 324 | "1 Texas 29730311\n", 325 | "2 Florida 21944577\n", 326 | "3 New York 19299981" 327 | ] 328 | }, 329 | "execution_count": 12, 330 | "metadata": {}, 331 | "output_type": "execute_result" 332 | } 333 | ], 334 | "source": [ 335 | "# showing the datafrane\n", 336 | "df_population" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "id": "245b58bf", 342 | "metadata": {}, 343 | "source": [ 344 | "# Creating a DataFrame from a csv file" 345 | ] 346 | }, 347 | { 348 | "cell_type": "code", 349 | "execution_count": 13, 350 | "id": "5a3dcc1e", 351 | "metadata": {}, 352 | "outputs": [], 353 | "source": [ 354 | "# reading the csv file\n", 355 | "df_exams = pd.read_csv('StudentsPerformance.csv')" 356 | ] 357 | }, 358 | { 359 | "cell_type": "code", 360 | "execution_count": 14, 361 | "id": "585ed37d", 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/html": [ 367 | "
\n", 368 | "\n", 381 | "\n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
...........................
995femalegroup Emaster's degreestandardcompleted889995
996malegroup Chigh schoolfree/reducednone625555
997femalegroup Chigh schoolfree/reducedcompleted597165
998femalegroup Dsome collegestandardcompleted687877
999femalegroup Dsome collegefree/reducednone778686
\n", 519 | "

1000 rows × 8 columns

\n", 520 | "
" 521 | ], 522 | "text/plain": [ 523 | " gender race/ethnicity parental level of education lunch \\\n", 524 | "0 female group B bachelor's degree standard \n", 525 | "1 female group C some college standard \n", 526 | "2 female group B master's degree standard \n", 527 | "3 male group A associate's degree free/reduced \n", 528 | "4 male group C some college standard \n", 529 | ".. ... ... ... ... \n", 530 | "995 female group E master's degree standard \n", 531 | "996 male group C high school free/reduced \n", 532 | "997 female group C high school free/reduced \n", 533 | "998 female group D some college standard \n", 534 | "999 female group D some college free/reduced \n", 535 | "\n", 536 | " test preparation course math score reading score writing score \n", 537 | "0 none 72 72 74 \n", 538 | "1 completed 69 90 88 \n", 539 | "2 none 90 95 93 \n", 540 | "3 none 47 57 44 \n", 541 | "4 none 76 78 75 \n", 542 | ".. ... ... ... ... \n", 543 | "995 completed 88 99 95 \n", 544 | "996 none 62 55 55 \n", 545 | "997 completed 59 71 65 \n", 546 | "998 completed 68 78 77 \n", 547 | "999 none 77 86 86 \n", 548 | "\n", 549 | "[1000 rows x 8 columns]" 550 | ] 551 | }, 552 | "execution_count": 14, 553 | "metadata": {}, 554 | "output_type": "execute_result" 555 | } 556 | ], 557 | "source": [ 558 | "# show first 5 rows in a dataframe\n", 559 | "df_exams" 560 | ] 561 | } 562 | ], 563 | "metadata": { 564 | "kernelspec": { 565 | "display_name": "Python 3", 566 | "language": "python", 567 | "name": "python3" 568 | }, 569 | "language_info": { 570 | "codemirror_mode": { 571 | "name": "ipython", 572 | "version": 3 573 | }, 574 | "file_extension": ".py", 575 | "mimetype": "text/x-python", 576 | "name": "python", 577 | "nbconvert_exporter": "python", 578 | "pygments_lexer": "ipython3", 579 | "version": "3.8.8" 580 | }, 581 | "toc": { 582 | "base_numbering": 1, 583 | "nav_menu": {}, 584 | "number_sections": true, 585 | "sideBar": true, 586 | "skip_h1_title": false, 587 | "title_cell": "Table of Contents", 588 | "title_sidebar": "Contents", 589 | "toc_cell": false, 590 | "toc_position": {}, 591 | "toc_section_display": true, 592 | "toc_window_display": false 593 | } 594 | }, 595 | "nbformat": 4, 596 | "nbformat_minor": 5 597 | } 598 | -------------------------------------------------------------------------------- /2.Intro to Pandas/3. Basic Attributes, Methods and Functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "3cadeaac", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "faf4a761", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# reading the csv file\n", 21 | "df_exams = pd.read_csv('StudentsPerformance.csv')" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 5, 27 | "id": "e6159fe8", 28 | "metadata": {}, 29 | "outputs": [ 30 | { 31 | "data": { 32 | "text/html": [ 33 | "
\n", 34 | "\n", 47 | "\n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
...........................
995femalegroup Emaster's degreestandardcompleted889995
996malegroup Chigh schoolfree/reducednone625555
997femalegroup Chigh schoolfree/reducedcompleted597165
998femalegroup Dsome collegestandardcompleted687877
999femalegroup Dsome collegefree/reducednone778686
\n", 185 | "

1000 rows × 8 columns

\n", 186 | "
" 187 | ], 188 | "text/plain": [ 189 | " gender race/ethnicity parental level of education lunch \\\n", 190 | "0 female group B bachelor's degree standard \n", 191 | "1 female group C some college standard \n", 192 | "2 female group B master's degree standard \n", 193 | "3 male group A associate's degree free/reduced \n", 194 | "4 male group C some college standard \n", 195 | ".. ... ... ... ... \n", 196 | "995 female group E master's degree standard \n", 197 | "996 male group C high school free/reduced \n", 198 | "997 female group C high school free/reduced \n", 199 | "998 female group D some college standard \n", 200 | "999 female group D some college free/reduced \n", 201 | "\n", 202 | " test preparation course math score reading score writing score \n", 203 | "0 none 72 72 74 \n", 204 | "1 completed 69 90 88 \n", 205 | "2 none 90 95 93 \n", 206 | "3 none 47 57 44 \n", 207 | "4 none 76 78 75 \n", 208 | ".. ... ... ... ... \n", 209 | "995 completed 88 99 95 \n", 210 | "996 none 62 55 55 \n", 211 | "997 completed 59 71 65 \n", 212 | "998 completed 68 78 77 \n", 213 | "999 none 77 86 86 \n", 214 | "\n", 215 | "[1000 rows x 8 columns]" 216 | ] 217 | }, 218 | "execution_count": 5, 219 | "metadata": {}, 220 | "output_type": "execute_result" 221 | } 222 | ], 223 | "source": [ 224 | "# showing the dataframe\n", 225 | "df_exams" 226 | ] 227 | }, 228 | { 229 | "cell_type": "markdown", 230 | "id": "63bf386f", 231 | "metadata": {}, 232 | "source": [ 233 | "# Attributes" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 6, 239 | "id": "8889cc6c", 240 | "metadata": {}, 241 | "outputs": [ 242 | { 243 | "data": { 244 | "text/plain": [ 245 | "(1000, 8)" 246 | ] 247 | }, 248 | "execution_count": 6, 249 | "metadata": {}, 250 | "output_type": "execute_result" 251 | } 252 | ], 253 | "source": [ 254 | "# getting access to the shape attribute\n", 255 | "df_exams.shape" 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": 8, 261 | "id": "c1d18854", 262 | "metadata": {}, 263 | "outputs": [ 264 | { 265 | "data": { 266 | "text/plain": [ 267 | "RangeIndex(start=0, stop=1000, step=1)" 268 | ] 269 | }, 270 | "execution_count": 8, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "# getting access to the index attribute\n", 277 | "df_exams.index" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 9, 283 | "id": "002ebb03", 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/plain": [ 289 | "Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',\n", 290 | " 'test preparation course', 'math score', 'reading score',\n", 291 | " 'writing score'],\n", 292 | " dtype='object')" 293 | ] 294 | }, 295 | "execution_count": 9, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "# getting access to the column attribute\n", 302 | "df_exams.columns" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 12, 308 | "id": "1928828c", 309 | "metadata": {}, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "gender object\n", 315 | "race/ethnicity object\n", 316 | "parental level of education object\n", 317 | "lunch object\n", 318 | "test preparation course object\n", 319 | "math score int64\n", 320 | "reading score int64\n", 321 | "writing score int64\n", 322 | "dtype: object" 323 | ] 324 | }, 325 | "execution_count": 12, 326 | "metadata": {}, 327 | "output_type": "execute_result" 328 | } 329 | ], 330 | "source": [ 331 | "# data types of each column\n", 332 | "df_exams.dtypes" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "id": "3ee3d60a", 338 | "metadata": {}, 339 | "source": [ 340 | "# Methods" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": 10, 346 | "id": "d6ff61f9", 347 | "metadata": {}, 348 | "outputs": [ 349 | { 350 | "data": { 351 | "text/html": [ 352 | "
\n", 353 | "\n", 366 | "\n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
\n", 438 | "
" 439 | ], 440 | "text/plain": [ 441 | " gender race/ethnicity parental level of education lunch \\\n", 442 | "0 female group B bachelor's degree standard \n", 443 | "1 female group C some college standard \n", 444 | "2 female group B master's degree standard \n", 445 | "3 male group A associate's degree free/reduced \n", 446 | "4 male group C some college standard \n", 447 | "\n", 448 | " test preparation course math score reading score writing score \n", 449 | "0 none 72 72 74 \n", 450 | "1 completed 69 90 88 \n", 451 | "2 none 90 95 93 \n", 452 | "3 none 47 57 44 \n", 453 | "4 none 76 78 75 " 454 | ] 455 | }, 456 | "execution_count": 10, 457 | "metadata": {}, 458 | "output_type": "execute_result" 459 | } 460 | ], 461 | "source": [ 462 | "# showing the first 5 columns\n", 463 | "df_exams.head()" 464 | ] 465 | }, 466 | { 467 | "cell_type": "code", 468 | "execution_count": 15, 469 | "id": "7852fee9", 470 | "metadata": {}, 471 | "outputs": [ 472 | { 473 | "name": "stdout", 474 | "output_type": "stream", 475 | "text": [ 476 | "\n", 477 | "RangeIndex: 1000 entries, 0 to 999\n", 478 | "Data columns (total 8 columns):\n", 479 | " # Column Non-Null Count Dtype \n", 480 | "--- ------ -------------- ----- \n", 481 | " 0 gender 1000 non-null object\n", 482 | " 1 race/ethnicity 1000 non-null object\n", 483 | " 2 parental level of education 1000 non-null object\n", 484 | " 3 lunch 1000 non-null object\n", 485 | " 4 test preparation course 1000 non-null object\n", 486 | " 5 math score 1000 non-null int64 \n", 487 | " 6 reading score 1000 non-null int64 \n", 488 | " 7 writing score 1000 non-null int64 \n", 489 | "dtypes: int64(3), object(5)\n", 490 | "memory usage: 62.6+ KB\n" 491 | ] 492 | } 493 | ], 494 | "source": [ 495 | "# showing the info of the dataframe\n", 496 | "df_exams.info()" 497 | ] 498 | }, 499 | { 500 | "cell_type": "code", 501 | "execution_count": 16, 502 | "id": "7e44a628", 503 | "metadata": {}, 504 | "outputs": [ 505 | { 506 | "data": { 507 | "text/html": [ 508 | "
\n", 509 | "\n", 522 | "\n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | "
math scorereading scorewriting score
count1000.000001000.0000001000.000000
mean66.0890069.16900068.054000
std15.1630814.60019215.195657
min0.0000017.00000010.000000
25%57.0000059.00000057.750000
50%66.0000070.00000069.000000
75%77.0000079.00000079.000000
max100.00000100.000000100.000000
\n", 582 | "
" 583 | ], 584 | "text/plain": [ 585 | " math score reading score writing score\n", 586 | "count 1000.00000 1000.000000 1000.000000\n", 587 | "mean 66.08900 69.169000 68.054000\n", 588 | "std 15.16308 14.600192 15.195657\n", 589 | "min 0.00000 17.000000 10.000000\n", 590 | "25% 57.00000 59.000000 57.750000\n", 591 | "50% 66.00000 70.000000 69.000000\n", 592 | "75% 77.00000 79.000000 79.000000\n", 593 | "max 100.00000 100.000000 100.000000" 594 | ] 595 | }, 596 | "execution_count": 16, 597 | "metadata": {}, 598 | "output_type": "execute_result" 599 | } 600 | ], 601 | "source": [ 602 | "# describing basic statistics of the dataframe\n", 603 | "df_exams.describe()" 604 | ] 605 | }, 606 | { 607 | "cell_type": "markdown", 608 | "id": "22a32e4e", 609 | "metadata": {}, 610 | "source": [ 611 | "# Functions" 612 | ] 613 | }, 614 | { 615 | "cell_type": "code", 616 | "execution_count": 17, 617 | "id": "a71af478", 618 | "metadata": {}, 619 | "outputs": [ 620 | { 621 | "data": { 622 | "text/plain": [ 623 | "1000" 624 | ] 625 | }, 626 | "execution_count": 17, 627 | "metadata": {}, 628 | "output_type": "execute_result" 629 | } 630 | ], 631 | "source": [ 632 | "# obtaining the length of the dataframe (number of rows)\n", 633 | "len(df_exams)" 634 | ] 635 | }, 636 | { 637 | "cell_type": "code", 638 | "execution_count": 22, 639 | "id": "595fcbde", 640 | "metadata": { 641 | "scrolled": true 642 | }, 643 | "outputs": [ 644 | { 645 | "data": { 646 | "text/plain": [ 647 | "999" 648 | ] 649 | }, 650 | "execution_count": 22, 651 | "metadata": {}, 652 | "output_type": "execute_result" 653 | } 654 | ], 655 | "source": [ 656 | "# obtaining the highest index of the dataframe\n", 657 | "max(df_exams.index)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 26, 663 | "id": "7aba282f", 664 | "metadata": {}, 665 | "outputs": [ 666 | { 667 | "data": { 668 | "text/plain": [ 669 | "0" 670 | ] 671 | }, 672 | "execution_count": 26, 673 | "metadata": {}, 674 | "output_type": "execute_result" 675 | } 676 | ], 677 | "source": [ 678 | "# obtaining the lowest index of the dataframe\n", 679 | "min(df_exams.index)" 680 | ] 681 | }, 682 | { 683 | "cell_type": "code", 684 | "execution_count": 27, 685 | "id": "8ce53edc", 686 | "metadata": {}, 687 | "outputs": [ 688 | { 689 | "data": { 690 | "text/plain": [ 691 | "pandas.core.frame.DataFrame" 692 | ] 693 | }, 694 | "execution_count": 27, 695 | "metadata": {}, 696 | "output_type": "execute_result" 697 | } 698 | ], 699 | "source": [ 700 | "# obtaining the data type\n", 701 | "type(df_exams)" 702 | ] 703 | }, 704 | { 705 | "cell_type": "code", 706 | "execution_count": 28, 707 | "id": "224775a3", 708 | "metadata": {}, 709 | "outputs": [ 710 | { 711 | "data": { 712 | "text/html": [ 713 | "
\n", 714 | "\n", 727 | "\n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | " \n", 745 | " \n", 746 | " \n", 747 | " \n", 748 | " \n", 749 | " \n", 750 | " \n", 751 | " \n", 752 | " \n", 753 | " \n", 754 | " \n", 755 | " \n", 756 | " \n", 757 | " \n", 758 | " \n", 759 | " \n", 760 | " \n", 761 | " \n", 762 | " \n", 763 | " \n", 764 | " \n", 765 | " \n", 766 | " \n", 767 | " \n", 768 | " \n", 769 | " \n", 770 | " \n", 771 | " \n", 772 | " \n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
...........................
995femalegroup Emaster's degreestandardcompleted889995
996malegroup Chigh schoolfree/reducednone625555
997femalegroup Chigh schoolfree/reducedcompleted597165
998femalegroup Dsome collegestandardcompleted687877
999femalegroup Dsome collegefree/reducednone778686
\n", 865 | "

1000 rows × 8 columns

\n", 866 | "
" 867 | ], 868 | "text/plain": [ 869 | " gender race/ethnicity parental level of education lunch \\\n", 870 | "0 female group B bachelor's degree standard \n", 871 | "1 female group C some college standard \n", 872 | "2 female group B master's degree standard \n", 873 | "3 male group A associate's degree free/reduced \n", 874 | "4 male group C some college standard \n", 875 | ".. ... ... ... ... \n", 876 | "995 female group E master's degree standard \n", 877 | "996 male group C high school free/reduced \n", 878 | "997 female group C high school free/reduced \n", 879 | "998 female group D some college standard \n", 880 | "999 female group D some college free/reduced \n", 881 | "\n", 882 | " test preparation course math score reading score writing score \n", 883 | "0 none 72 72 74 \n", 884 | "1 completed 69 90 88 \n", 885 | "2 none 90 95 93 \n", 886 | "3 none 47 57 44 \n", 887 | "4 none 76 78 75 \n", 888 | ".. ... ... ... ... \n", 889 | "995 completed 88 99 95 \n", 890 | "996 none 62 55 55 \n", 891 | "997 completed 59 71 65 \n", 892 | "998 completed 68 78 77 \n", 893 | "999 none 77 86 86 \n", 894 | "\n", 895 | "[1000 rows x 8 columns]" 896 | ] 897 | }, 898 | "execution_count": 28, 899 | "metadata": {}, 900 | "output_type": "execute_result" 901 | } 902 | ], 903 | "source": [ 904 | "# rounding the values of the dataset\n", 905 | "round(df_exams, 2)" 906 | ] 907 | } 908 | ], 909 | "metadata": { 910 | "kernelspec": { 911 | "display_name": "Python 3", 912 | "language": "python", 913 | "name": "python3" 914 | }, 915 | "language_info": { 916 | "codemirror_mode": { 917 | "name": "ipython", 918 | "version": 3 919 | }, 920 | "file_extension": ".py", 921 | "mimetype": "text/x-python", 922 | "name": "python", 923 | "nbconvert_exporter": "python", 924 | "pygments_lexer": "ipython3", 925 | "version": "3.8.8" 926 | }, 927 | "toc": { 928 | "base_numbering": 1, 929 | "nav_menu": {}, 930 | "number_sections": true, 931 | "sideBar": true, 932 | "skip_h1_title": false, 933 | "title_cell": "Table of Contents", 934 | "title_sidebar": "Contents", 935 | "toc_cell": false, 936 | "toc_position": {}, 937 | "toc_section_display": true, 938 | "toc_window_display": false 939 | } 940 | }, 941 | "nbformat": 4, 942 | "nbformat_minor": 5 943 | } 944 | -------------------------------------------------------------------------------- /2.Intro to Pandas/4.Selecting One Column from a Dataframe.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "4e3fdade", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "9dfaf0d2", 17 | "metadata": { 18 | "scrolled": false 19 | }, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
\n", 110 | "
" 111 | ], 112 | "text/plain": [ 113 | " gender race/ethnicity parental level of education lunch \\\n", 114 | "0 female group B bachelor's degree standard \n", 115 | "1 female group C some college standard \n", 116 | "2 female group B master's degree standard \n", 117 | "3 male group A associate's degree free/reduced \n", 118 | "4 male group C some college standard \n", 119 | "\n", 120 | " test preparation course math score reading score writing score \n", 121 | "0 none 72 72 74 \n", 122 | "1 completed 69 90 88 \n", 123 | "2 none 90 95 93 \n", 124 | "3 none 47 57 44 \n", 125 | "4 none 76 78 75 " 126 | ] 127 | }, 128 | "execution_count": 2, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "# reading the csv file\n", 135 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n", 136 | "df_exams.head()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "de860322", 142 | "metadata": {}, 143 | "source": [ 144 | "# Selecting one column" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "id": "4869f1d5", 150 | "metadata": {}, 151 | "source": [ 152 | "## Syntax 1" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 3, 158 | "id": "79ba5c16", 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "0 female\n", 165 | "1 female\n", 166 | "2 female\n", 167 | "3 male\n", 168 | "4 male\n", 169 | " ... \n", 170 | "995 female\n", 171 | "996 male\n", 172 | "997 female\n", 173 | "998 female\n", 174 | "999 female\n", 175 | "Name: gender, Length: 1000, dtype: object" 176 | ] 177 | }, 178 | "execution_count": 3, 179 | "metadata": {}, 180 | "output_type": "execute_result" 181 | } 182 | ], 183 | "source": [ 184 | "# select a column with [] (preferred way to select a column)\n", 185 | "df_exams['gender']" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 4, 191 | "id": "1da5d438", 192 | "metadata": {}, 193 | "outputs": [ 194 | { 195 | "data": { 196 | "text/plain": [ 197 | "pandas.core.series.Series" 198 | ] 199 | }, 200 | "execution_count": 4, 201 | "metadata": {}, 202 | "output_type": "execute_result" 203 | } 204 | ], 205 | "source": [ 206 | "# check out the data type of a column\n", 207 | "type(df_exams['gender'])" 208 | ] 209 | }, 210 | { 211 | "cell_type": "code", 212 | "execution_count": 5, 213 | "id": "d7041d6f", 214 | "metadata": {}, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/plain": [ 219 | "0 female\n", 220 | "1 female\n", 221 | "2 female\n", 222 | "3 male\n", 223 | "4 male\n", 224 | "Name: gender, dtype: object" 225 | ] 226 | }, 227 | "execution_count": 5, 228 | "metadata": {}, 229 | "output_type": "execute_result" 230 | } 231 | ], 232 | "source": [ 233 | "# series: attributes and methods\n", 234 | "df_exams['gender'].index\n", 235 | "df_exams['gender'].head()" 236 | ] 237 | }, 238 | { 239 | "cell_type": "markdown", 240 | "id": "025f3788", 241 | "metadata": {}, 242 | "source": [ 243 | "## Syntax 2" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": 6, 249 | "id": "5250d3b6", 250 | "metadata": {}, 251 | "outputs": [ 252 | { 253 | "data": { 254 | "text/plain": [ 255 | "0 female\n", 256 | "1 female\n", 257 | "2 female\n", 258 | "3 male\n", 259 | "4 male\n", 260 | " ... \n", 261 | "995 female\n", 262 | "996 male\n", 263 | "997 female\n", 264 | "998 female\n", 265 | "999 female\n", 266 | "Name: gender, Length: 1000, dtype: object" 267 | ] 268 | }, 269 | "execution_count": 6, 270 | "metadata": {}, 271 | "output_type": "execute_result" 272 | } 273 | ], 274 | "source": [ 275 | "# select a column with .\n", 276 | "df_exams.gender" 277 | ] 278 | }, 279 | { 280 | "cell_type": "code", 281 | "execution_count": 7, 282 | "id": "aa3433f7", 283 | "metadata": {}, 284 | "outputs": [ 285 | { 286 | "ename": "SyntaxError", 287 | "evalue": "invalid syntax (, line 2)", 288 | "output_type": "error", 289 | "traceback": [ 290 | "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m2\u001b[0m\n\u001b[0;31m df_exams.math score\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "# select a column with . (pitfalls)\n", 296 | "df_exams.math score" 297 | ] 298 | }, 299 | { 300 | "cell_type": "code", 301 | "execution_count": 8, 302 | "id": "92b4ae17", 303 | "metadata": {}, 304 | "outputs": [ 305 | { 306 | "data": { 307 | "text/plain": [ 308 | "0 72\n", 309 | "1 69\n", 310 | "2 90\n", 311 | "3 47\n", 312 | "4 76\n", 313 | " ..\n", 314 | "995 88\n", 315 | "996 62\n", 316 | "997 59\n", 317 | "998 68\n", 318 | "999 77\n", 319 | "Name: math score, Length: 1000, dtype: int64" 320 | ] 321 | }, 322 | "execution_count": 8, 323 | "metadata": {}, 324 | "output_type": "execute_result" 325 | } 326 | ], 327 | "source": [ 328 | "# select the same column using []\n", 329 | "df_exams[\"math score\"]" 330 | ] 331 | } 332 | ], 333 | "metadata": { 334 | "kernelspec": { 335 | "display_name": "Python 3", 336 | "language": "python", 337 | "name": "python3" 338 | }, 339 | "language_info": { 340 | "codemirror_mode": { 341 | "name": "ipython", 342 | "version": 3 343 | }, 344 | "file_extension": ".py", 345 | "mimetype": "text/x-python", 346 | "name": "python", 347 | "nbconvert_exporter": "python", 348 | "pygments_lexer": "ipython3", 349 | "version": "3.8.8" 350 | }, 351 | "toc": { 352 | "base_numbering": 1, 353 | "nav_menu": {}, 354 | "number_sections": true, 355 | "sideBar": true, 356 | "skip_h1_title": false, 357 | "title_cell": "Table of Contents", 358 | "title_sidebar": "Contents", 359 | "toc_cell": false, 360 | "toc_position": {}, 361 | "toc_section_display": true, 362 | "toc_window_display": false 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 5 367 | } 368 | -------------------------------------------------------------------------------- /2.Intro to Pandas/5.Selecting Two or More Columns from a Dataframe.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "4e3fdade", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "9dfaf0d2", 17 | "metadata": { 18 | "scrolled": false 19 | }, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
\n", 110 | "
" 111 | ], 112 | "text/plain": [ 113 | " gender race/ethnicity parental level of education lunch \\\n", 114 | "0 female group B bachelor's degree standard \n", 115 | "1 female group C some college standard \n", 116 | "2 female group B master's degree standard \n", 117 | "3 male group A associate's degree free/reduced \n", 118 | "4 male group C some college standard \n", 119 | "\n", 120 | " test preparation course math score reading score writing score \n", 121 | "0 none 72 72 74 \n", 122 | "1 completed 69 90 88 \n", 123 | "2 none 90 95 93 \n", 124 | "3 none 47 57 44 \n", 125 | "4 none 76 78 75 " 126 | ] 127 | }, 128 | "execution_count": 2, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "# reading the csv file\n", 135 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n", 136 | "df_exams.head()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "de860322", 142 | "metadata": {}, 143 | "source": [ 144 | "# Selecting two or more columns" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 3, 150 | "id": "79ba5c16", 151 | "metadata": {}, 152 | "outputs": [ 153 | { 154 | "data": { 155 | "text/html": [ 156 | "
\n", 157 | "\n", 170 | "\n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | "
gendermath score
0female72
1female69
2female90
3male47
4male76
.........
995female88
996male62
997female59
998female68
999female77
\n", 236 | "

1000 rows × 2 columns

\n", 237 | "
" 238 | ], 239 | "text/plain": [ 240 | " gender math score\n", 241 | "0 female 72\n", 242 | "1 female 69\n", 243 | "2 female 90\n", 244 | "3 male 47\n", 245 | "4 male 76\n", 246 | ".. ... ...\n", 247 | "995 female 88\n", 248 | "996 male 62\n", 249 | "997 female 59\n", 250 | "998 female 68\n", 251 | "999 female 77\n", 252 | "\n", 253 | "[1000 rows x 2 columns]" 254 | ] 255 | }, 256 | "execution_count": 3, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "# select 2 columns using [[]]\n", 263 | "df_exams[['gender', 'math score']]" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 4, 269 | "id": "1da5d438", 270 | "metadata": {}, 271 | "outputs": [ 272 | { 273 | "data": { 274 | "text/plain": [ 275 | "pandas.core.frame.DataFrame" 276 | ] 277 | }, 278 | "execution_count": 4, 279 | "metadata": {}, 280 | "output_type": "execute_result" 281 | } 282 | ], 283 | "source": [ 284 | "# check out the data type of the selection\n", 285 | "type(df_exams[['gender', 'math score']])" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": 5, 291 | "id": "42b11033", 292 | "metadata": {}, 293 | "outputs": [ 294 | { 295 | "data": { 296 | "text/html": [ 297 | "
\n", 298 | "\n", 311 | "\n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | "
gendermath scorereading scorewriting score
0female727274
1female699088
2female909593
3male475744
4male767875
...............
995female889995
996male625555
997female597165
998female687877
999female778686
\n", 401 | "

1000 rows × 4 columns

\n", 402 | "
" 403 | ], 404 | "text/plain": [ 405 | " gender math score reading score writing score\n", 406 | "0 female 72 72 74\n", 407 | "1 female 69 90 88\n", 408 | "2 female 90 95 93\n", 409 | "3 male 47 57 44\n", 410 | "4 male 76 78 75\n", 411 | ".. ... ... ... ...\n", 412 | "995 female 88 99 95\n", 413 | "996 male 62 55 55\n", 414 | "997 female 59 71 65\n", 415 | "998 female 68 78 77\n", 416 | "999 female 77 86 86\n", 417 | "\n", 418 | "[1000 rows x 4 columns]" 419 | ] 420 | }, 421 | "execution_count": 5, 422 | "metadata": {}, 423 | "output_type": "execute_result" 424 | } 425 | ], 426 | "source": [ 427 | "# select 2 or more columns using [[]]\n", 428 | "df_exams[['gender', 'math score', 'reading score', 'writing score']]" 429 | ] 430 | }, 431 | { 432 | "cell_type": "code", 433 | "execution_count": 6, 434 | "id": "45b07093", 435 | "metadata": {}, 436 | "outputs": [], 437 | "source": [ 438 | "# we can't select 2 or more columns with the \".\"\n", 439 | "# df_exams.'gender', 'math score'" 440 | ] 441 | } 442 | ], 443 | "metadata": { 444 | "kernelspec": { 445 | "display_name": "Python 3", 446 | "language": "python", 447 | "name": "python3" 448 | }, 449 | "language_info": { 450 | "codemirror_mode": { 451 | "name": "ipython", 452 | "version": 3 453 | }, 454 | "file_extension": ".py", 455 | "mimetype": "text/x-python", 456 | "name": "python", 457 | "nbconvert_exporter": "python", 458 | "pygments_lexer": "ipython3", 459 | "version": "3.8.8" 460 | }, 461 | "toc": { 462 | "base_numbering": 1, 463 | "nav_menu": {}, 464 | "number_sections": true, 465 | "sideBar": true, 466 | "skip_h1_title": false, 467 | "title_cell": "Table of Contents", 468 | "title_sidebar": "Contents", 469 | "toc_cell": false, 470 | "toc_position": {}, 471 | "toc_section_display": true, 472 | "toc_window_display": false 473 | } 474 | }, 475 | "nbformat": 4, 476 | "nbformat_minor": 5 477 | } 478 | -------------------------------------------------------------------------------- /2.Intro to Pandas/7.Operations on Dataframes.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "4e3fdade", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "9dfaf0d2", 17 | "metadata": { 18 | "scrolled": true 19 | }, 20 | "outputs": [ 21 | { 22 | "data": { 23 | "text/html": [ 24 | "
\n", 25 | "\n", 38 | "\n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
\n", 110 | "
" 111 | ], 112 | "text/plain": [ 113 | " gender race/ethnicity parental level of education lunch \\\n", 114 | "0 female group B bachelor's degree standard \n", 115 | "1 female group C some college standard \n", 116 | "2 female group B master's degree standard \n", 117 | "3 male group A associate's degree free/reduced \n", 118 | "4 male group C some college standard \n", 119 | "\n", 120 | " test preparation course math score reading score writing score \n", 121 | "0 none 72 72 74 \n", 122 | "1 completed 69 90 88 \n", 123 | "2 none 90 95 93 \n", 124 | "3 none 47 57 44 \n", 125 | "4 none 76 78 75 " 126 | ] 127 | }, 128 | "execution_count": 2, 129 | "metadata": {}, 130 | "output_type": "execute_result" 131 | } 132 | ], 133 | "source": [ 134 | "# reading the csv file\n", 135 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n", 136 | "df_exams.head()" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "65a8afc2", 142 | "metadata": {}, 143 | "source": [ 144 | "# Math Operations" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "id": "e93c4e2b", 150 | "metadata": {}, 151 | "source": [ 152 | "## Operations in columns" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 3, 158 | "id": "63bd52ec", 159 | "metadata": { 160 | "scrolled": true 161 | }, 162 | "outputs": [ 163 | { 164 | "data": { 165 | "text/plain": [ 166 | "66089" 167 | ] 168 | }, 169 | "execution_count": 3, 170 | "metadata": {}, 171 | "output_type": "execute_result" 172 | } 173 | ], 174 | "source": [ 175 | "# select a column and calculate total sum\n", 176 | "df_exams['math score'].sum()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 4, 182 | "id": "200f9c1e", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "0" 189 | ] 190 | }, 191 | "execution_count": 4, 192 | "metadata": {}, 193 | "output_type": "execute_result" 194 | } 195 | ], 196 | "source": [ 197 | "# count, mean, std, max, and min\n", 198 | "df_exams['math score'].count()\n", 199 | "df_exams['math score'].mean()\n", 200 | "df_exams['math score'].std()\n", 201 | "df_exams['math score'].max()\n", 202 | "df_exams['math score'].min()" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 5, 208 | "id": "a4847df7", 209 | "metadata": {}, 210 | "outputs": [ 211 | { 212 | "data": { 213 | "text/html": [ 214 | "
\n", 215 | "\n", 228 | "\n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | "
math scorereading scorewriting score
count1000.000001000.0000001000.000000
mean66.0890069.16900068.054000
std15.1630814.60019215.195657
min0.0000017.00000010.000000
25%57.0000059.00000057.750000
50%66.0000070.00000069.000000
75%77.0000079.00000079.000000
max100.00000100.000000100.000000
\n", 288 | "
" 289 | ], 290 | "text/plain": [ 291 | " math score reading score writing score\n", 292 | "count 1000.00000 1000.000000 1000.000000\n", 293 | "mean 66.08900 69.169000 68.054000\n", 294 | "std 15.16308 14.600192 15.195657\n", 295 | "min 0.00000 17.000000 10.000000\n", 296 | "25% 57.00000 59.000000 57.750000\n", 297 | "50% 66.00000 70.000000 69.000000\n", 298 | "75% 77.00000 79.000000 79.000000\n", 299 | "max 100.00000 100.000000 100.000000" 300 | ] 301 | }, 302 | "execution_count": 5, 303 | "metadata": {}, 304 | "output_type": "execute_result" 305 | } 306 | ], 307 | "source": [ 308 | "# easier calculation with .describe()\n", 309 | "df_exams.describe()" 310 | ] 311 | }, 312 | { 313 | "cell_type": "markdown", 314 | "id": "d85482c6", 315 | "metadata": {}, 316 | "source": [ 317 | "## Operations in rows" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 6, 323 | "id": "21edc88a", 324 | "metadata": {}, 325 | "outputs": [ 326 | { 327 | "data": { 328 | "text/plain": [ 329 | "0 218\n", 330 | "1 247\n", 331 | "2 278\n", 332 | "3 148\n", 333 | "4 229\n", 334 | " ... \n", 335 | "995 282\n", 336 | "996 172\n", 337 | "997 195\n", 338 | "998 223\n", 339 | "999 249\n", 340 | "Length: 1000, dtype: int64" 341 | ] 342 | }, 343 | "execution_count": 6, 344 | "metadata": {}, 345 | "output_type": "execute_result" 346 | } 347 | ], 348 | "source": [ 349 | "# calculating the sum in a row\n", 350 | "df_exams['math score'] + df_exams['reading score'] + df_exams['writing score']" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 7, 356 | "id": "e62eb260", 357 | "metadata": {}, 358 | "outputs": [], 359 | "source": [ 360 | "# calculating the average score and assigning the result to a new column\n", 361 | "df_exams['average'] = (df_exams['math score'] + df_exams['reading score'] + df_exams['writing score'])/3" 362 | ] 363 | }, 364 | { 365 | "cell_type": "code", 366 | "execution_count": 8, 367 | "id": "63d8f21d", 368 | "metadata": {}, 369 | "outputs": [ 370 | { 371 | "data": { 372 | "text/html": [ 373 | "
\n", 374 | "\n", 387 | "\n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | " \n", 427 | " \n", 428 | " \n", 429 | " \n", 430 | " \n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting scoreaverage
0femalegroup Bbachelor's degreestandardnone72727472.666667
1femalegroup Csome collegestandardcompleted69908882.333333
2femalegroup Bmaster's degreestandardnone90959392.666667
3malegroup Aassociate's degreefree/reducednone47574449.333333
4malegroup Csome collegestandardnone76787576.333333
..............................
995femalegroup Emaster's degreestandardcompleted88999594.000000
996malegroup Chigh schoolfree/reducednone62555557.333333
997femalegroup Chigh schoolfree/reducedcompleted59716565.000000
998femalegroup Dsome collegestandardcompleted68787774.333333
999femalegroup Dsome collegefree/reducednone77868683.000000
\n", 537 | "

1000 rows × 9 columns

\n", 538 | "
" 539 | ], 540 | "text/plain": [ 541 | " gender race/ethnicity parental level of education lunch \\\n", 542 | "0 female group B bachelor's degree standard \n", 543 | "1 female group C some college standard \n", 544 | "2 female group B master's degree standard \n", 545 | "3 male group A associate's degree free/reduced \n", 546 | "4 male group C some college standard \n", 547 | ".. ... ... ... ... \n", 548 | "995 female group E master's degree standard \n", 549 | "996 male group C high school free/reduced \n", 550 | "997 female group C high school free/reduced \n", 551 | "998 female group D some college standard \n", 552 | "999 female group D some college free/reduced \n", 553 | "\n", 554 | " test preparation course math score reading score writing score \\\n", 555 | "0 none 72 72 74 \n", 556 | "1 completed 69 90 88 \n", 557 | "2 none 90 95 93 \n", 558 | "3 none 47 57 44 \n", 559 | "4 none 76 78 75 \n", 560 | ".. ... ... ... ... \n", 561 | "995 completed 88 99 95 \n", 562 | "996 none 62 55 55 \n", 563 | "997 completed 59 71 65 \n", 564 | "998 completed 68 78 77 \n", 565 | "999 none 77 86 86 \n", 566 | "\n", 567 | " average \n", 568 | "0 72.666667 \n", 569 | "1 82.333333 \n", 570 | "2 92.666667 \n", 571 | "3 49.333333 \n", 572 | "4 76.333333 \n", 573 | ".. ... \n", 574 | "995 94.000000 \n", 575 | "996 57.333333 \n", 576 | "997 65.000000 \n", 577 | "998 74.333333 \n", 578 | "999 83.000000 \n", 579 | "\n", 580 | "[1000 rows x 9 columns]" 581 | ] 582 | }, 583 | "execution_count": 8, 584 | "metadata": {}, 585 | "output_type": "execute_result" 586 | } 587 | ], 588 | "source": [ 589 | "# showing the dataframe\n", 590 | "df_exams" 591 | ] 592 | } 593 | ], 594 | "metadata": { 595 | "kernelspec": { 596 | "display_name": "Python 3", 597 | "language": "python", 598 | "name": "python3" 599 | }, 600 | "language_info": { 601 | "codemirror_mode": { 602 | "name": "ipython", 603 | "version": 3 604 | }, 605 | "file_extension": ".py", 606 | "mimetype": "text/x-python", 607 | "name": "python", 608 | "nbconvert_exporter": "python", 609 | "pygments_lexer": "ipython3", 610 | "version": "3.8.8" 611 | }, 612 | "toc": { 613 | "base_numbering": 1, 614 | "nav_menu": {}, 615 | "number_sections": true, 616 | "sideBar": true, 617 | "skip_h1_title": false, 618 | "title_cell": "Table of Contents", 619 | "title_sidebar": "Contents", 620 | "toc_cell": false, 621 | "toc_position": {}, 622 | "toc_section_display": true, 623 | "toc_window_display": false 624 | } 625 | }, 626 | "nbformat": 4, 627 | "nbformat_minor": 5 628 | } 629 | -------------------------------------------------------------------------------- /2.Intro to Pandas/8.The value_counts() method.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "4e3fdade", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "9dfaf0d2", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "data": { 21 | "text/html": [ 22 | "
\n", 23 | "\n", 36 | "\n", 37 | " \n", 38 | " \n", 39 | " \n", 40 | " \n", 41 | " \n", 42 | " \n", 43 | " \n", 44 | " \n", 45 | " \n", 46 | " \n", 47 | " \n", 48 | " \n", 49 | " \n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | "
genderrace/ethnicityparental level of educationlunchtest preparation coursemath scorereading scorewriting score
0femalegroup Bbachelor's degreestandardnone727274
1femalegroup Csome collegestandardcompleted699088
2femalegroup Bmaster's degreestandardnone909593
3malegroup Aassociate's degreefree/reducednone475744
4malegroup Csome collegestandardnone767875
\n", 108 | "
" 109 | ], 110 | "text/plain": [ 111 | " gender race/ethnicity parental level of education lunch \\\n", 112 | "0 female group B bachelor's degree standard \n", 113 | "1 female group C some college standard \n", 114 | "2 female group B master's degree standard \n", 115 | "3 male group A associate's degree free/reduced \n", 116 | "4 male group C some college standard \n", 117 | "\n", 118 | " test preparation course math score reading score writing score \n", 119 | "0 none 72 72 74 \n", 120 | "1 completed 69 90 88 \n", 121 | "2 none 90 95 93 \n", 122 | "3 none 47 57 44 \n", 123 | "4 none 76 78 75 " 124 | ] 125 | }, 126 | "execution_count": 2, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "# reading the csv file\n", 133 | "df_exams = pd.read_csv('StudentsPerformance.csv')\n", 134 | "df_exams.head()" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "id": "9a47f15a", 140 | "metadata": {}, 141 | "source": [ 142 | "# Value Counts" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 3, 148 | "id": "e04ff454", 149 | "metadata": {}, 150 | "outputs": [ 151 | { 152 | "data": { 153 | "text/plain": [ 154 | "1000" 155 | ] 156 | }, 157 | "execution_count": 3, 158 | "metadata": {}, 159 | "output_type": "execute_result" 160 | } 161 | ], 162 | "source": [ 163 | "# counting gender elements\n", 164 | "\n", 165 | "# len function\n", 166 | "len(df_exams['gender'])\n", 167 | "# .count() method\n", 168 | "df_exams['gender'].count()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": 4, 174 | "id": "07ab6370", 175 | "metadata": {}, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "female 518\n", 181 | "male 482\n", 182 | "Name: gender, dtype: int64" 183 | ] 184 | }, 185 | "execution_count": 4, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "# counting gender elements by category\n", 192 | "df_exams['gender'].value_counts()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 5, 198 | "id": "e4f100a9", 199 | "metadata": {}, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "female 0.518\n", 205 | "male 0.482\n", 206 | "Name: gender, dtype: float64" 207 | ] 208 | }, 209 | "execution_count": 5, 210 | "metadata": {}, 211 | "output_type": "execute_result" 212 | } 213 | ], 214 | "source": [ 215 | "# return the relative frequency (divide all values by the sum of values)\n", 216 | "df_exams['gender'].value_counts(normalize=True)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 6, 222 | "id": "620d6e12", 223 | "metadata": {}, 224 | "outputs": [ 225 | { 226 | "data": { 227 | "text/plain": [ 228 | "some college 226\n", 229 | "associate's degree 222\n", 230 | "high school 196\n", 231 | "some high school 179\n", 232 | "bachelor's degree 118\n", 233 | "master's degree 59\n", 234 | "Name: parental level of education, dtype: int64" 235 | ] 236 | }, 237 | "execution_count": 6, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "# counting \"parental level of education\" elements by category\n", 244 | "df_exams['parental level of education'].value_counts()" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": 7, 250 | "id": "43ce51e7", 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "some college 0.23\n", 257 | "associate's degree 0.22\n", 258 | "high school 0.20\n", 259 | "some high school 0.18\n", 260 | "bachelor's degree 0.12\n", 261 | "master's degree 0.06\n", 262 | "Name: parental level of education, dtype: float64" 263 | ] 264 | }, 265 | "execution_count": 7, 266 | "metadata": {}, 267 | "output_type": "execute_result" 268 | } 269 | ], 270 | "source": [ 271 | "# return the relative frequency and round to 2 decimals\n", 272 | "df_exams['parental level of education'].value_counts(normalize=True).round(2)" 273 | ] 274 | } 275 | ], 276 | "metadata": { 277 | "kernelspec": { 278 | "display_name": "Python 3", 279 | "language": "python", 280 | "name": "python3" 281 | }, 282 | "language_info": { 283 | "codemirror_mode": { 284 | "name": "ipython", 285 | "version": 3 286 | }, 287 | "file_extension": ".py", 288 | "mimetype": "text/x-python", 289 | "name": "python", 290 | "nbconvert_exporter": "python", 291 | "pygments_lexer": "ipython3", 292 | "version": "3.8.8" 293 | }, 294 | "toc": { 295 | "base_numbering": 1, 296 | "nav_menu": {}, 297 | "number_sections": true, 298 | "sideBar": true, 299 | "skip_h1_title": false, 300 | "title_cell": "Table of Contents", 301 | "title_sidebar": "Contents", 302 | "toc_cell": false, 303 | "toc_position": {}, 304 | "toc_section_display": true, 305 | "toc_window_display": false 306 | } 307 | }, 308 | "nbformat": 4, 309 | "nbformat_minor": 5 310 | } 311 | -------------------------------------------------------------------------------- /3.Pivot Table/gdp.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/3.Pivot Table/gdp.csv -------------------------------------------------------------------------------- /3.Pivot Table/supermarket_sales.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/3.Pivot Table/supermarket_sales.xlsx -------------------------------------------------------------------------------- /4.Data Visualization/1.Dataset Overview and Making Pivot Table.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "270bafb9", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "47fc4e0d", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# reading the csv file\n", 21 | "df_population_raw = pd.read_csv('population_total.csv')" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 3, 27 | "id": "d44cbeb9", 28 | "metadata": { 29 | "scrolled": false 30 | }, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 49 | "\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | "
countryyearpopulation
0China2020.01.439324e+09
1China2019.01.433784e+09
2China2018.01.427648e+09
3China2017.01.421022e+09
4China2016.01.414049e+09
............
4180United States1965.01.997337e+08
4181United States1960.01.867206e+08
4182United States1955.01.716853e+08
4183India1960.04.505477e+08
4184India1955.04.098806e+08
\n", 127 | "

4185 rows × 3 columns

\n", 128 | "
" 129 | ], 130 | "text/plain": [ 131 | " country year population\n", 132 | "0 China 2020.0 1.439324e+09\n", 133 | "1 China 2019.0 1.433784e+09\n", 134 | "2 China 2018.0 1.427648e+09\n", 135 | "3 China 2017.0 1.421022e+09\n", 136 | "4 China 2016.0 1.414049e+09\n", 137 | "... ... ... ...\n", 138 | "4180 United States 1965.0 1.997337e+08\n", 139 | "4181 United States 1960.0 1.867206e+08\n", 140 | "4182 United States 1955.0 1.716853e+08\n", 141 | "4183 India 1960.0 4.505477e+08\n", 142 | "4184 India 1955.0 4.098806e+08\n", 143 | "\n", 144 | "[4185 rows x 3 columns]" 145 | ] 146 | }, 147 | "execution_count": 3, 148 | "metadata": {}, 149 | "output_type": "execute_result" 150 | } 151 | ], 152 | "source": [ 153 | "# first look at the dataset\n", 154 | "df_population_raw" 155 | ] 156 | }, 157 | { 158 | "cell_type": "markdown", 159 | "id": "ec86a50f", 160 | "metadata": {}, 161 | "source": [ 162 | "# Making a Pivot Table" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "id": "1450c112", 168 | "metadata": {}, 169 | "source": [ 170 | ".pivot(): Returns reshaped DataFrame organized by given index / column values (\"pivot without aggregation\")" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 4, 176 | "id": "4d01f725", 177 | "metadata": {}, 178 | "outputs": [], 179 | "source": [ 180 | "# dropping null values\n", 181 | "df_population_raw.dropna(inplace=True)" 182 | ] 183 | }, 184 | { 185 | "cell_type": "code", 186 | "execution_count": 5, 187 | "id": "aeb60686", 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "# making a pivot table\n", 192 | "df_pivot = df_population_raw.pivot(index='year', columns='country',\n", 193 | " values='population')" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 6, 199 | "id": "918dc0db", 200 | "metadata": {}, 201 | "outputs": [], 202 | "source": [ 203 | "# selecting some countries\n", 204 | "df_pivot = df_pivot[['United States', 'India', 'China', \n", 205 | " 'Indonesia', 'Brazil']]" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": 7, 211 | "id": "d8c76de7", 212 | "metadata": { 213 | "scrolled": false 214 | }, 215 | "outputs": [ 216 | { 217 | "data": { 218 | "text/html": [ 219 | "
\n", 220 | "\n", 233 | "\n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | "
countryUnited StatesIndiaChinaIndonesiaBrazil
year
1955.0171685336.04.098806e+086.122416e+0877273425.062533919.0
1960.0186720571.04.505477e+086.604081e+0887751068.072179226.0
1965.0199733676.04.991233e+087.242190e+08100267062.083373530.0
1970.0209513341.05.551898e+088.276014e+08114793178.095113265.0
1975.0219081251.06.231029e+089.262409e+08130680727.0107216205.0
1980.0229476354.06.989528e+081.000089e+09147447836.0120694009.0
1985.0240499825.07.843600e+081.075589e+09164982451.0135274080.0
1990.0252120309.08.732778e+081.176884e+09181413402.0149003223.0
1995.0265163745.09.639226e+081.240921e+09196934260.0162019896.0
2000.0281710909.01.056576e+091.290551e+09211513823.0174790340.0
2005.0294993511.01.147610e+091.330776e+09226289470.0186127103.0
2010.0309011475.01.234281e+091.368811e+09241834215.0195713635.0
2015.0320878310.01.310152e+091.406848e+09258383256.0204471769.0
2016.0323015995.01.324517e+091.414049e+09261556381.0206163053.0
2017.0325084756.01.338677e+091.421022e+09264650963.0207833823.0
2018.0327096265.01.352642e+091.427648e+09267670543.0209469323.0
2019.0329064917.01.366418e+091.433784e+09270625568.0211049527.0
2020.0331002651.01.380004e+091.439324e+09273523615.0212559417.0
\n", 399 | "
" 400 | ], 401 | "text/plain": [ 402 | "country United States India China Indonesia Brazil\n", 403 | "year \n", 404 | "1955.0 171685336.0 4.098806e+08 6.122416e+08 77273425.0 62533919.0\n", 405 | "1960.0 186720571.0 4.505477e+08 6.604081e+08 87751068.0 72179226.0\n", 406 | "1965.0 199733676.0 4.991233e+08 7.242190e+08 100267062.0 83373530.0\n", 407 | "1970.0 209513341.0 5.551898e+08 8.276014e+08 114793178.0 95113265.0\n", 408 | "1975.0 219081251.0 6.231029e+08 9.262409e+08 130680727.0 107216205.0\n", 409 | "1980.0 229476354.0 6.989528e+08 1.000089e+09 147447836.0 120694009.0\n", 410 | "1985.0 240499825.0 7.843600e+08 1.075589e+09 164982451.0 135274080.0\n", 411 | "1990.0 252120309.0 8.732778e+08 1.176884e+09 181413402.0 149003223.0\n", 412 | "1995.0 265163745.0 9.639226e+08 1.240921e+09 196934260.0 162019896.0\n", 413 | "2000.0 281710909.0 1.056576e+09 1.290551e+09 211513823.0 174790340.0\n", 414 | "2005.0 294993511.0 1.147610e+09 1.330776e+09 226289470.0 186127103.0\n", 415 | "2010.0 309011475.0 1.234281e+09 1.368811e+09 241834215.0 195713635.0\n", 416 | "2015.0 320878310.0 1.310152e+09 1.406848e+09 258383256.0 204471769.0\n", 417 | "2016.0 323015995.0 1.324517e+09 1.414049e+09 261556381.0 206163053.0\n", 418 | "2017.0 325084756.0 1.338677e+09 1.421022e+09 264650963.0 207833823.0\n", 419 | "2018.0 327096265.0 1.352642e+09 1.427648e+09 267670543.0 209469323.0\n", 420 | "2019.0 329064917.0 1.366418e+09 1.433784e+09 270625568.0 211049527.0\n", 421 | "2020.0 331002651.0 1.380004e+09 1.439324e+09 273523615.0 212559417.0" 422 | ] 423 | }, 424 | "execution_count": 7, 425 | "metadata": {}, 426 | "output_type": "execute_result" 427 | } 428 | ], 429 | "source": [ 430 | "# showing pivot table\n", 431 | "df_pivot" 432 | ] 433 | } 434 | ], 435 | "metadata": { 436 | "kernelspec": { 437 | "display_name": "Python 3", 438 | "language": "python", 439 | "name": "python3" 440 | }, 441 | "language_info": { 442 | "codemirror_mode": { 443 | "name": "ipython", 444 | "version": 3 445 | }, 446 | "file_extension": ".py", 447 | "mimetype": "text/x-python", 448 | "name": "python", 449 | "nbconvert_exporter": "python", 450 | "pygments_lexer": "ipython3", 451 | "version": "3.8.8" 452 | }, 453 | "toc": { 454 | "base_numbering": 1, 455 | "nav_menu": {}, 456 | "number_sections": true, 457 | "sideBar": true, 458 | "skip_h1_title": false, 459 | "title_cell": "Table of Contents", 460 | "title_sidebar": "Contents", 461 | "toc_cell": false, 462 | "toc_position": {}, 463 | "toc_section_display": true, 464 | "toc_window_display": false 465 | } 466 | }, 467 | "nbformat": 4, 468 | "nbformat_minor": 5 469 | } 470 | -------------------------------------------------------------------------------- /Exercises/Intro to Pandas/Introduction to Pandas-Exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "2bac431d", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# import pandas\n", 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "id": "730ef2aa", 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# Optional: use the pd.set_option() to display all rows in a dataframe by default\n" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "id": "05c7ec54", 27 | "metadata": {}, 28 | "source": [ 29 | "# Create a DataFrame" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "id": "cfb39f8c", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# read the \"bestsellers with categories\" csv file (Dataset on Amazon's Top 50 bestselling books from 2009 to 2019.)\n" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "618209c8", 46 | "metadata": { 47 | "scrolled": true 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "# get access to the shape attribute\n" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "3acde70f", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# find the data types of each column\n" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "id": "7f15b10b", 67 | "metadata": {}, 68 | "source": [ 69 | "# Display a DataFrame" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "ef6b8d45", 76 | "metadata": { 77 | "scrolled": true 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "# show first 5 rows in a dataframe\n" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "id": "a3da1b0a", 88 | "metadata": {}, 89 | "outputs": [], 90 | "source": [ 91 | "# describe basic statistics of the dataframe (mean, std, min, max)\n" 92 | ] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "id": "51f4d331", 97 | "metadata": {}, 98 | "source": [ 99 | "# Add a new Column with an array" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "id": "94c526df", 106 | "metadata": {}, 107 | "outputs": [], 108 | "source": [ 109 | "# Your task is to create a column named 'Critic Rating' that should have random integer numbers between 1 and 4\n", 110 | "\n", 111 | "# 1. import numpy and create 550 random integer numbers between 1 and 4\n", 112 | "\n", 113 | "# 2. add new 'Critic Rating' column to dataframe using the random numbers created\n", 114 | "\n", 115 | "\n", 116 | "# Note the random numbers in this new 'Critic Rating' column will be different between your solution and mine, but we'll focus only on the code, in this section." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "id": "af7cec1e", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# show first 5 rows\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "id": "822167f7", 132 | "metadata": {}, 133 | "source": [ 134 | "# Basic Attributes, Methods and Functions" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "3b8dfa8e", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# get access to the columns attribute\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "id": "6fe2c5f5", 150 | "metadata": {}, 151 | "source": [ 152 | "# Selecting Two or More Columns from a Dataframe" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": null, 158 | "id": "1690dbfe", 159 | "metadata": {}, 160 | "outputs": [], 161 | "source": [ 162 | "# move the new 'Critic Rating' column between the columns \"User Rating\" and \"Reviews\" Then update the dataframe\n", 163 | "\n", 164 | "# Tip: Copy and paste the column names obtained with the columns attribute and then rearrange elements using [[]]\n" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": null, 170 | "id": "4a294c74", 171 | "metadata": {}, 172 | "outputs": [], 173 | "source": [ 174 | "# show first 5 rows\n" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "id": "fedaf280", 180 | "metadata": {}, 181 | "source": [ 182 | "# Operations on Dataframes" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "da1c2205", 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# create a column named \"Average Rating\" by using the following formula: Average Rating = (User Rating + Critic Rating)/2\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "id": "ab6c226b", 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "# use the round function to round the values of the dataframe to 1 decimal and update the dataframe\n" 203 | ] 204 | }, 205 | { 206 | "cell_type": "markdown", 207 | "id": "5142f8af", 208 | "metadata": {}, 209 | "source": [ 210 | "# Value Counts" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "id": "a3a1e3fc", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "# count elements in \"Genre\" column by category and return the relative frequency \n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "id": "13ff41dd", 226 | "metadata": {}, 227 | "source": [ 228 | "# Rename Columns" 229 | ] 230 | }, 231 | { 232 | "cell_type": "code", 233 | "execution_count": null, 234 | "id": "f86a8194", 235 | "metadata": {}, 236 | "outputs": [], 237 | "source": [ 238 | "# rename columns \"User Rating,\" \"Critic Rating\" and \"Average Rating\" to \"UR,\" \"CR\" and \"AR\" then update the dataframe with the inplace paraneter\n" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "id": "3df4c15c", 245 | "metadata": {}, 246 | "outputs": [], 247 | "source": [ 248 | "# show first 5 rows\n" 249 | ] 250 | }, 251 | { 252 | "cell_type": "code", 253 | "execution_count": null, 254 | "id": "474246ef", 255 | "metadata": {}, 256 | "outputs": [], 257 | "source": [ 258 | "# select only \"Name\", \"Author\", \"UR\", \"CR\", \"AR\" and \"Year\" columns and update dataframe\n" 259 | ] 260 | }, 261 | { 262 | "cell_type": "markdown", 263 | "id": "6c458cf9", 264 | "metadata": {}, 265 | "source": [ 266 | "# Sort a dataframe" 267 | ] 268 | }, 269 | { 270 | "cell_type": "code", 271 | "execution_count": null, 272 | "id": "50a7d2a8", 273 | "metadata": {}, 274 | "outputs": [], 275 | "source": [ 276 | "# sort the dataframe descending by \"UR\" and \"CR\"\n" 277 | ] 278 | } 279 | ], 280 | "metadata": { 281 | "kernelspec": { 282 | "display_name": "Python 3", 283 | "language": "python", 284 | "name": "python3" 285 | }, 286 | "language_info": { 287 | "codemirror_mode": { 288 | "name": "ipython", 289 | "version": 3 290 | }, 291 | "file_extension": ".py", 292 | "mimetype": "text/x-python", 293 | "name": "python", 294 | "nbconvert_exporter": "python", 295 | "pygments_lexer": "ipython3", 296 | "version": "3.8.8" 297 | }, 298 | "toc": { 299 | "base_numbering": 1, 300 | "nav_menu": {}, 301 | "number_sections": true, 302 | "sideBar": true, 303 | "skip_h1_title": false, 304 | "title_cell": "Table of Contents", 305 | "title_sidebar": "Contents", 306 | "toc_cell": false, 307 | "toc_position": {}, 308 | "toc_section_display": true, 309 | "toc_window_display": false 310 | } 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 5 314 | } 315 | -------------------------------------------------------------------------------- /Exercises/Merging and Concatenating DataFrames/IMDb movies.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/Exercises/Merging and Concatenating DataFrames/IMDb movies.csv.zip -------------------------------------------------------------------------------- /Exercises/Merging and Concatenating DataFrames/IMDb ratings.csv.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thepycoach/python-course-for-excel-users/a0c109d8f092b15cde0209a18f67de35cab45c87/Exercises/Merging and Concatenating DataFrames/IMDb ratings.csv.zip -------------------------------------------------------------------------------- /Exercises/Merging and Concatenating DataFrames/Merging and Concatenating DataFrames-Exercise.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "fad49839", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": null, 16 | "id": "09c6213f", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# read 'IMDb movies.csv' and 'IMDb ratings.csv'\n", 21 | "df_movies = pd.read_csv('IMDb movies.csv', low_memory=False)\n", 22 | "df_ratings = pd.read_csv('IMDb ratings.csv')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "id": "d5670913", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# select columns\n", 33 | "df_movies = df_movies[['imdb_title_id', 'title', 'year',\n", 34 | " 'genre', 'country']]\n", 35 | "\n", 36 | "df_ratings = df_ratings[['imdb_title_id', 'total_votes', 'mean_vote']]" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "310fc4cc", 42 | "metadata": {}, 43 | "source": [ 44 | "# merge()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "de4aa87a", 50 | "metadata": {}, 51 | "source": [ 52 | "## Inner join" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "e50bde09", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [ 62 | "# merge df_movies and df_ratings (inner join)\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "id": "232dc587", 68 | "metadata": {}, 69 | "source": [ 70 | "## Outer join (Full join)" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "id": "eb7cbafe", 77 | "metadata": {}, 78 | "outputs": [], 79 | "source": [ 80 | "# merge df_movies and df_ratings (outer join)\n" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "id": "554af406", 86 | "metadata": {}, 87 | "source": [ 88 | "## Exclusive Outer join (Exclusive Full join)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "18842159", 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# merge df_movies and df_ratings (Exclusive Full join)\n" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "id": "dbee2e97", 104 | "metadata": {}, 105 | "source": [ 106 | "## Left join" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "5317af4a", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# extract a 50% sample of the df_movies dataframe\n" 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "execution_count": null, 122 | "id": "3e41bd9f", 123 | "metadata": {}, 124 | "outputs": [], 125 | "source": [ 126 | "# merge df_movies_sample and df_ratings (left join)\n" 127 | ] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "id": "3014d002", 132 | "metadata": {}, 133 | "source": [ 134 | "## Exclusive Left join" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "id": "06b64daa", 141 | "metadata": {}, 142 | "outputs": [], 143 | "source": [ 144 | "# make a copy of the df_movies dataframe\n" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "192b0988", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "id": "d79c5ba4", 161 | "metadata": {}, 162 | "outputs": [], 163 | "source": [ 164 | "# merge df_movies_2 and df_ratings (exclusive left join)\n" 165 | ] 166 | }, 167 | { 168 | "cell_type": "markdown", 169 | "id": "4a2007ee", 170 | "metadata": {}, 171 | "source": [ 172 | "## Right join" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "id": "fe292156", 179 | "metadata": {}, 180 | "outputs": [], 181 | "source": [ 182 | "# extract a 30% sample of the df_ratings dataframe\n" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "0ffdcd45", 189 | "metadata": {}, 190 | "outputs": [], 191 | "source": [ 192 | "# merge df_movies and df_ratings_sample (right join)\n" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "id": "c79f3005", 198 | "metadata": {}, 199 | "source": [ 200 | "## Exclusive Right join" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "id": "048c2de3", 207 | "metadata": {}, 208 | "outputs": [], 209 | "source": [ 210 | "# make a copy of the df_ratings dataframe\n" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": null, 216 | "id": "f71e384a", 217 | "metadata": {}, 218 | "outputs": [], 219 | "source": [ 220 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "id": "358048b3", 227 | "metadata": {}, 228 | "outputs": [], 229 | "source": [ 230 | "# merge df_movies and df_ratings_2 (exclusive right join)\n" 231 | ] 232 | } 233 | ], 234 | "metadata": { 235 | "kernelspec": { 236 | "display_name": "Python 3", 237 | "language": "python", 238 | "name": "python3" 239 | }, 240 | "language_info": { 241 | "codemirror_mode": { 242 | "name": "ipython", 243 | "version": 3 244 | }, 245 | "file_extension": ".py", 246 | "mimetype": "text/x-python", 247 | "name": "python", 248 | "nbconvert_exporter": "python", 249 | "pygments_lexer": "ipython3", 250 | "version": "3.8.8" 251 | }, 252 | "toc": { 253 | "base_numbering": 1, 254 | "nav_menu": {}, 255 | "number_sections": true, 256 | "sideBar": true, 257 | "skip_h1_title": false, 258 | "title_cell": "Table of Contents", 259 | "title_sidebar": "Contents", 260 | "toc_cell": false, 261 | "toc_position": {}, 262 | "toc_section_display": true, 263 | "toc_window_display": false 264 | } 265 | }, 266 | "nbformat": 4, 267 | "nbformat_minor": 5 268 | } 269 | -------------------------------------------------------------------------------- /Exercises/Merging and Concatenating DataFrames/Merging and Concatenating DataFrames-Solution.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "fad49839", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 2, 16 | "id": "09c6213f", 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "# read 'IMDb movies.csv' and 'IMDb ratings.csv'\n", 21 | "df_movies = pd.read_csv('IMDb movies.csv', low_memory=False)\n", 22 | "df_ratings = pd.read_csv('IMDb ratings.csv')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 3, 28 | "id": "d5670913", 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# select columns\n", 33 | "df_movies = df_movies[['imdb_title_id', 'title', 'year',\n", 34 | " 'genre', 'country']]\n", 35 | "\n", 36 | "df_ratings = df_ratings[['imdb_title_id', 'total_votes', 'mean_vote']]" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "id": "310fc4cc", 42 | "metadata": {}, 43 | "source": [ 44 | "# merge()" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "de4aa87a", 50 | "metadata": {}, 51 | "source": [ 52 | "## Inner join" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 4, 58 | "id": "e50bde09", 59 | "metadata": {}, 60 | "outputs": [ 61 | { 62 | "data": { 63 | "text/html": [ 64 | "
\n", 65 | "\n", 78 | "\n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | "
imdb_title_idtitleyeargenrecountrytotal_votesmean_vote
0tt0000009Miss Jerry1894RomanceUSA1545.9
1tt0000574The Story of the Kelly Gang1906Biography, Crime, DramaAustralia5896.3
2tt0001892Den sorte drøm1911DramaGermany, Denmark1886.0
3tt0002101Cleopatra1912Drama, HistoryUSA4465.3
4tt0002130L'Inferno1911Adventure, Drama, FantasyItaly22376.9
........................
85850tt9908390Le lion2020ComedyFrance, Belgium3985.5
85851tt9911196De Beentjes van Sint-Hildegard2020Comedy, DramaNetherlands7247.9
85852tt9911774Padmavyuhathile Abhimanyu2019DramaIndia2657.8
85853tt9914286Sokagin Çocuklari2019Drama, FamilyTurkey1949.4
85854tt9914942La vida sense la Sara Amat2019DramaSpain1026.8
\n", 204 | "

85855 rows × 7 columns

\n", 205 | "
" 206 | ], 207 | "text/plain": [ 208 | " imdb_title_id title year \\\n", 209 | "0 tt0000009 Miss Jerry 1894 \n", 210 | "1 tt0000574 The Story of the Kelly Gang 1906 \n", 211 | "2 tt0001892 Den sorte drøm 1911 \n", 212 | "3 tt0002101 Cleopatra 1912 \n", 213 | "4 tt0002130 L'Inferno 1911 \n", 214 | "... ... ... ... \n", 215 | "85850 tt9908390 Le lion 2020 \n", 216 | "85851 tt9911196 De Beentjes van Sint-Hildegard 2020 \n", 217 | "85852 tt9911774 Padmavyuhathile Abhimanyu 2019 \n", 218 | "85853 tt9914286 Sokagin Çocuklari 2019 \n", 219 | "85854 tt9914942 La vida sense la Sara Amat 2019 \n", 220 | "\n", 221 | " genre country total_votes mean_vote \n", 222 | "0 Romance USA 154 5.9 \n", 223 | "1 Biography, Crime, Drama Australia 589 6.3 \n", 224 | "2 Drama Germany, Denmark 188 6.0 \n", 225 | "3 Drama, History USA 446 5.3 \n", 226 | "4 Adventure, Drama, Fantasy Italy 2237 6.9 \n", 227 | "... ... ... ... ... \n", 228 | "85850 Comedy France, Belgium 398 5.5 \n", 229 | "85851 Comedy, Drama Netherlands 724 7.9 \n", 230 | "85852 Drama India 265 7.8 \n", 231 | "85853 Drama, Family Turkey 194 9.4 \n", 232 | "85854 Drama Spain 102 6.8 \n", 233 | "\n", 234 | "[85855 rows x 7 columns]" 235 | ] 236 | }, 237 | "execution_count": 4, 238 | "metadata": {}, 239 | "output_type": "execute_result" 240 | } 241 | ], 242 | "source": [ 243 | "# merge df_movies and df_ratings (inner join)\n", 244 | "df_movies.merge(df_ratings, on='imdb_title_id')" 245 | ] 246 | }, 247 | { 248 | "cell_type": "markdown", 249 | "id": "232dc587", 250 | "metadata": {}, 251 | "source": [ 252 | "## Outer join (Full join)" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 5, 258 | "id": "eb7cbafe", 259 | "metadata": {}, 260 | "outputs": [ 261 | { 262 | "data": { 263 | "text/html": [ 264 | "
\n", 265 | "\n", 278 | "\n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | " \n", 376 | " \n", 377 | " \n", 378 | " \n", 379 | " \n", 380 | " \n", 381 | " \n", 382 | " \n", 383 | " \n", 384 | " \n", 385 | " \n", 386 | " \n", 387 | " \n", 388 | " \n", 389 | " \n", 390 | " \n", 391 | " \n", 392 | " \n", 393 | " \n", 394 | " \n", 395 | " \n", 396 | " \n", 397 | " \n", 398 | " \n", 399 | " \n", 400 | " \n", 401 | " \n", 402 | " \n", 403 | "
imdb_title_idtitleyeargenrecountrytotal_votesmean_vote
0tt0000009Miss Jerry1894RomanceUSA1545.9
1tt0000574The Story of the Kelly Gang1906Biography, Crime, DramaAustralia5896.3
2tt0001892Den sorte drøm1911DramaGermany, Denmark1886.0
3tt0002101Cleopatra1912Drama, HistoryUSA4465.3
4tt0002130L'Inferno1911Adventure, Drama, FantasyItaly22376.9
........................
85850tt9908390Le lion2020ComedyFrance, Belgium3985.5
85851tt9911196De Beentjes van Sint-Hildegard2020Comedy, DramaNetherlands7247.9
85852tt9911774Padmavyuhathile Abhimanyu2019DramaIndia2657.8
85853tt9914286Sokagin Çocuklari2019Drama, FamilyTurkey1949.4
85854tt9914942La vida sense la Sara Amat2019DramaSpain1026.8
\n", 404 | "

85855 rows × 7 columns

\n", 405 | "
" 406 | ], 407 | "text/plain": [ 408 | " imdb_title_id title year \\\n", 409 | "0 tt0000009 Miss Jerry 1894 \n", 410 | "1 tt0000574 The Story of the Kelly Gang 1906 \n", 411 | "2 tt0001892 Den sorte drøm 1911 \n", 412 | "3 tt0002101 Cleopatra 1912 \n", 413 | "4 tt0002130 L'Inferno 1911 \n", 414 | "... ... ... ... \n", 415 | "85850 tt9908390 Le lion 2020 \n", 416 | "85851 tt9911196 De Beentjes van Sint-Hildegard 2020 \n", 417 | "85852 tt9911774 Padmavyuhathile Abhimanyu 2019 \n", 418 | "85853 tt9914286 Sokagin Çocuklari 2019 \n", 419 | "85854 tt9914942 La vida sense la Sara Amat 2019 \n", 420 | "\n", 421 | " genre country total_votes mean_vote \n", 422 | "0 Romance USA 154 5.9 \n", 423 | "1 Biography, Crime, Drama Australia 589 6.3 \n", 424 | "2 Drama Germany, Denmark 188 6.0 \n", 425 | "3 Drama, History USA 446 5.3 \n", 426 | "4 Adventure, Drama, Fantasy Italy 2237 6.9 \n", 427 | "... ... ... ... ... \n", 428 | "85850 Comedy France, Belgium 398 5.5 \n", 429 | "85851 Comedy, Drama Netherlands 724 7.9 \n", 430 | "85852 Drama India 265 7.8 \n", 431 | "85853 Drama, Family Turkey 194 9.4 \n", 432 | "85854 Drama Spain 102 6.8 \n", 433 | "\n", 434 | "[85855 rows x 7 columns]" 435 | ] 436 | }, 437 | "execution_count": 5, 438 | "metadata": {}, 439 | "output_type": "execute_result" 440 | } 441 | ], 442 | "source": [ 443 | "# merge df_movies and df_ratings (outer join)\n", 444 | "df_movies.merge(df_ratings, on='imdb_title_id', how='outer')" 445 | ] 446 | }, 447 | { 448 | "cell_type": "markdown", 449 | "id": "554af406", 450 | "metadata": {}, 451 | "source": [ 452 | "## Exclusive Outer join (Exclusive Full join)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 6, 458 | "id": "18842159", 459 | "metadata": {}, 460 | "outputs": [ 461 | { 462 | "data": { 463 | "text/html": [ 464 | "
\n", 465 | "\n", 478 | "\n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | "
imdb_title_idtitleyeargenrecountrytotal_votesmean_vote_merge
\n", 495 | "
" 496 | ], 497 | "text/plain": [ 498 | "Empty DataFrame\n", 499 | "Columns: [imdb_title_id, title, year, genre, country, total_votes, mean_vote, _merge]\n", 500 | "Index: []" 501 | ] 502 | }, 503 | "execution_count": 6, 504 | "metadata": {}, 505 | "output_type": "execute_result" 506 | } 507 | ], 508 | "source": [ 509 | "# merge df_movies and df_ratings (Exclusive Full join)\n", 510 | "df_movies.merge(df_ratings, on='imdb_title_id', how='outer', \n", 511 | " indicator=True).query(\"_merge=='left_only' or _merge=='right_only'\")" 512 | ] 513 | }, 514 | { 515 | "cell_type": "markdown", 516 | "id": "dbee2e97", 517 | "metadata": {}, 518 | "source": [ 519 | "## Left join" 520 | ] 521 | }, 522 | { 523 | "cell_type": "code", 524 | "execution_count": 7, 525 | "id": "5317af4a", 526 | "metadata": {}, 527 | "outputs": [], 528 | "source": [ 529 | "# extract a 50% sample of the df_movies dataframe\n", 530 | "df_movies_sample = df_movies.sample(frac=0.5)" 531 | ] 532 | }, 533 | { 534 | "cell_type": "code", 535 | "execution_count": 8, 536 | "id": "3e41bd9f", 537 | "metadata": {}, 538 | "outputs": [], 539 | "source": [ 540 | "# merge df_movies_sample and df_ratings (left join)\n", 541 | "df_left = df_movies_sample.merge(df_ratings, on='imdb_title_id',\n", 542 | " how='left')" 543 | ] 544 | }, 545 | { 546 | "cell_type": "markdown", 547 | "id": "3014d002", 548 | "metadata": {}, 549 | "source": [ 550 | "## Exclusive Left join" 551 | ] 552 | }, 553 | { 554 | "cell_type": "code", 555 | "execution_count": 9, 556 | "id": "06b64daa", 557 | "metadata": {}, 558 | "outputs": [], 559 | "source": [ 560 | "# make a copy of the df_movies dataframe\n", 561 | "df_movies_2 = df_movies.copy()" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 10, 567 | "id": "192b0988", 568 | "metadata": {}, 569 | "outputs": [], 570 | "source": [ 571 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n", 572 | "for index in df_movies_2.index:\n", 573 | " if index < 1000:\n", 574 | " df_movies_2.loc[index, 'imdb_title_id'] = 'tt1234567890'" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": 11, 580 | "id": "d79c5ba4", 581 | "metadata": {}, 582 | "outputs": [], 583 | "source": [ 584 | "# merge df_movies_2 and df_ratings (exclusive left join)\n", 585 | "df_exclusive_left = df_movies_2.merge(df_ratings,\n", 586 | " on='imdb_title_id',\n", 587 | " how='outer', \n", 588 | " indicator=True).query(\"_merge=='left_only'\")" 589 | ] 590 | }, 591 | { 592 | "cell_type": "markdown", 593 | "id": "4a2007ee", 594 | "metadata": {}, 595 | "source": [ 596 | "## Right join" 597 | ] 598 | }, 599 | { 600 | "cell_type": "code", 601 | "execution_count": 12, 602 | "id": "fe292156", 603 | "metadata": {}, 604 | "outputs": [], 605 | "source": [ 606 | "# extract a 30% sample of the df_ratings dataframe\n", 607 | "df_ratings_sample = df_ratings.sample(frac=0.3)" 608 | ] 609 | }, 610 | { 611 | "cell_type": "code", 612 | "execution_count": 13, 613 | "id": "0ffdcd45", 614 | "metadata": {}, 615 | "outputs": [], 616 | "source": [ 617 | "# merge df_movies and df_ratings_sample (right join)\n", 618 | "df_right = df_movies.merge(df_ratings_sample, on='imdb_title_id',\n", 619 | " how='right')" 620 | ] 621 | }, 622 | { 623 | "cell_type": "markdown", 624 | "id": "c79f3005", 625 | "metadata": {}, 626 | "source": [ 627 | "## Exclusive Right join" 628 | ] 629 | }, 630 | { 631 | "cell_type": "code", 632 | "execution_count": 14, 633 | "id": "048c2de3", 634 | "metadata": {}, 635 | "outputs": [], 636 | "source": [ 637 | "# make a copy of the df_ratings dataframe\n", 638 | "df_ratings_2 = df_ratings.copy()" 639 | ] 640 | }, 641 | { 642 | "cell_type": "code", 643 | "execution_count": 15, 644 | "id": "f71e384a", 645 | "metadata": {}, 646 | "outputs": [], 647 | "source": [ 648 | "# set the first 1000 values of 'imdb_title_id' column as 'tt1234567890'\n", 649 | "for index in df_ratings_2.index:\n", 650 | " if index < 1000:\n", 651 | " df_ratings_2.loc[index, 'imdb_title_id'] = 'tt1234567890'" 652 | ] 653 | }, 654 | { 655 | "cell_type": "code", 656 | "execution_count": 16, 657 | "id": "358048b3", 658 | "metadata": {}, 659 | "outputs": [], 660 | "source": [ 661 | "# merge df_movies and df_ratings_2 (exclusive right join)\n", 662 | "df_exclusive_right = df_movies.merge(df_ratings_2,\n", 663 | " on='imdb_title_id',\n", 664 | " how='outer', \n", 665 | " indicator=True).query(\"_merge=='right_only'\")" 666 | ] 667 | } 668 | ], 669 | "metadata": { 670 | "kernelspec": { 671 | "display_name": "Python 3", 672 | "language": "python", 673 | "name": "python3" 674 | }, 675 | "language_info": { 676 | "codemirror_mode": { 677 | "name": "ipython", 678 | "version": 3 679 | }, 680 | "file_extension": ".py", 681 | "mimetype": "text/x-python", 682 | "name": "python", 683 | "nbconvert_exporter": "python", 684 | "pygments_lexer": "ipython3", 685 | "version": "3.8.8" 686 | }, 687 | "toc": { 688 | "base_numbering": 1, 689 | "nav_menu": {}, 690 | "number_sections": true, 691 | "sideBar": true, 692 | "skip_h1_title": false, 693 | "title_cell": "Table of Contents", 694 | "title_sidebar": "Contents", 695 | "toc_cell": false, 696 | "toc_position": {}, 697 | "toc_section_display": true, 698 | "toc_window_display": false 699 | } 700 | }, 701 | "nbformat": 4, 702 | "nbformat_minor": 5 703 | } 704 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## FREE Python Cheat Sheet 2 | 3 | 🇺🇸 Link: https://artificialcorner.com/p/redeem-my-udemy-courses-for-free 4 | 5 | ## Formulario de Python Gratis 6 | 7 | 🇪🇸 Link: https://artificialcorner.com/p/formularios-gratis-de-python 8 | -------------------------------------------------------------------------------- /Web Scraping with Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "9bcec48b", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import pandas as pd" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "b8784766", 16 | "metadata": {}, 17 | "source": [ 18 | "# Read a .csv from a URL with Pandas" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "id": "1e116955", 24 | "metadata": {}, 25 | "source": [ 26 | "Target website: https://www.football-data.co.uk/data.php" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "id": "62ad1716", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# reading 1 csv file from the website\n", 37 | "df_premier21 = pd.read_csv('https://www.football-data.co.uk/mmz4281/2122/E0.csv')" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 3, 43 | "id": "e0b6be7b", 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "data": { 48 | "text/html": [ 49 | "
\n", 50 | "\n", 63 | "\n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | " \n", 273 | " \n", 274 | " \n", 275 | " \n", 276 | " \n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n", 282 | " \n", 283 | " \n", 284 | " \n", 285 | " \n", 286 | " \n", 287 | " \n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | " \n", 308 | " \n", 309 | " \n", 310 | " \n", 311 | " \n", 312 | " \n", 313 | " \n", 314 | " \n", 315 | " \n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | " \n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | "
DivDateTimeHomeTeamAwayTeamFTHGFTAGFTRHTHGHTAG...AvgC<2.5AHChB365CAHHB365CAHAPCAHHPCAHAMaxCAHHMaxCAHAAvgCAHHAvgCAHA
0E013/08/202120:00BrentfordArsenal20H10...1.620.501.752.051.812.132.052.171.802.09
1E014/08/202112:30Man UnitedLeeds51H10...2.25-1.002.051.752.171.772.191.932.101.79
2E014/08/202115:00BurnleyBrighton12A10...1.620.251.792.151.812.141.822.191.792.12
3E014/08/202115:00ChelseaCrystal Palace30H20...1.94-1.502.051.752.121.812.161.932.061.82
4E014/08/202115:00EvertonSouthampton31H01...1.67-0.502.051.882.051.882.081.902.031.86
..................................................................
210E023/01/202214:00ArsenalBurnley00D00...2.32-1.501.922.011.932.001.932.111.901.98
211E023/01/202214:00Crystal PalaceLiverpool13A02...2.111.001.961.971.941.972.072.021.921.96
212E023/01/202214:00LeicesterBrighton11D00...2.100.001.922.011.931.991.942.131.872.01
213E023/01/202216:30ChelseaTottenham20H00...1.84-0.751.991.941.971.952.032.071.961.92
214E005/02/202218:00BurnleyWatford00D00...1.73-0.251.902.001.892.031.932.071.872.01
\n", 357 | "

215 rows × 106 columns

\n", 358 | "
" 359 | ], 360 | "text/plain": [ 361 | " Div Date Time HomeTeam AwayTeam FTHG FTAG FTR \\\n", 362 | "0 E0 13/08/2021 20:00 Brentford Arsenal 2 0 H \n", 363 | "1 E0 14/08/2021 12:30 Man United Leeds 5 1 H \n", 364 | "2 E0 14/08/2021 15:00 Burnley Brighton 1 2 A \n", 365 | "3 E0 14/08/2021 15:00 Chelsea Crystal Palace 3 0 H \n", 366 | "4 E0 14/08/2021 15:00 Everton Southampton 3 1 H \n", 367 | ".. .. ... ... ... ... ... ... .. \n", 368 | "210 E0 23/01/2022 14:00 Arsenal Burnley 0 0 D \n", 369 | "211 E0 23/01/2022 14:00 Crystal Palace Liverpool 1 3 A \n", 370 | "212 E0 23/01/2022 14:00 Leicester Brighton 1 1 D \n", 371 | "213 E0 23/01/2022 16:30 Chelsea Tottenham 2 0 H \n", 372 | "214 E0 05/02/2022 18:00 Burnley Watford 0 0 D \n", 373 | "\n", 374 | " HTHG HTAG ... AvgC<2.5 AHCh B365CAHH B365CAHA PCAHH PCAHA \\\n", 375 | "0 1 0 ... 1.62 0.50 1.75 2.05 1.81 2.13 \n", 376 | "1 1 0 ... 2.25 -1.00 2.05 1.75 2.17 1.77 \n", 377 | "2 1 0 ... 1.62 0.25 1.79 2.15 1.81 2.14 \n", 378 | "3 2 0 ... 1.94 -1.50 2.05 1.75 2.12 1.81 \n", 379 | "4 0 1 ... 1.67 -0.50 2.05 1.88 2.05 1.88 \n", 380 | ".. ... ... ... ... ... ... ... ... ... \n", 381 | "210 0 0 ... 2.32 -1.50 1.92 2.01 1.93 2.00 \n", 382 | "211 0 2 ... 2.11 1.00 1.96 1.97 1.94 1.97 \n", 383 | "212 0 0 ... 2.10 0.00 1.92 2.01 1.93 1.99 \n", 384 | "213 0 0 ... 1.84 -0.75 1.99 1.94 1.97 1.95 \n", 385 | "214 0 0 ... 1.73 -0.25 1.90 2.00 1.89 2.03 \n", 386 | "\n", 387 | " MaxCAHH MaxCAHA AvgCAHH AvgCAHA \n", 388 | "0 2.05 2.17 1.80 2.09 \n", 389 | "1 2.19 1.93 2.10 1.79 \n", 390 | "2 1.82 2.19 1.79 2.12 \n", 391 | "3 2.16 1.93 2.06 1.82 \n", 392 | "4 2.08 1.90 2.03 1.86 \n", 393 | ".. ... ... ... ... \n", 394 | "210 1.93 2.11 1.90 1.98 \n", 395 | "211 2.07 2.02 1.92 1.96 \n", 396 | "212 1.94 2.13 1.87 2.01 \n", 397 | "213 2.03 2.07 1.96 1.92 \n", 398 | "214 1.93 2.07 1.87 2.01 \n", 399 | "\n", 400 | "[215 rows x 106 columns]" 401 | ] 402 | }, 403 | "execution_count": 3, 404 | "metadata": {}, 405 | "output_type": "execute_result" 406 | } 407 | ], 408 | "source": [ 409 | "# showing dataframe\n", 410 | "df_premier21" 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": 4, 416 | "id": "f9bcd9f5", 417 | "metadata": {}, 418 | "outputs": [], 419 | "source": [ 420 | "# rename columns\n", 421 | "df_premier21 = df_premier21.rename(columns={'Date':'date',\n", 422 | " 'HomeTeam':'home_team',\n", 423 | " 'AwayTeam':'away_team',\n", 424 | " 'FTHG': 'home_goals',\n", 425 | " 'FTAG': 'away_goals'})" 426 | ] 427 | }, 428 | { 429 | "cell_type": "code", 430 | "execution_count": 5, 431 | "id": "a650ca82", 432 | "metadata": {}, 433 | "outputs": [ 434 | { 435 | "data": { 436 | "text/html": [ 437 | "
\n", 438 | "\n", 451 | "\n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 461 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | " \n", 724 | " \n", 725 | " \n", 726 | " \n", 727 | " \n", 728 | " \n", 729 | " \n", 730 | " \n", 731 | " \n", 732 | " \n", 733 | " \n", 734 | " \n", 735 | " \n", 736 | " \n", 737 | " \n", 738 | " \n", 739 | " \n", 740 | " \n", 741 | " \n", 742 | " \n", 743 | " \n", 744 | "
DivdateTimehome_teamaway_teamhome_goalsaway_goalsFTRHTHGHTAG...AvgC<2.5AHChB365CAHHB365CAHAPCAHHPCAHAMaxCAHHMaxCAHAAvgCAHHAvgCAHA
0E013/08/202120:00BrentfordArsenal20H10...1.620.501.752.051.812.132.052.171.802.09
1E014/08/202112:30Man UnitedLeeds51H10...2.25-1.002.051.752.171.772.191.932.101.79
2E014/08/202115:00BurnleyBrighton12A10...1.620.251.792.151.812.141.822.191.792.12
3E014/08/202115:00ChelseaCrystal Palace30H20...1.94-1.502.051.752.121.812.161.932.061.82
4E014/08/202115:00EvertonSouthampton31H01...1.67-0.502.051.882.051.882.081.902.031.86
..................................................................
210E023/01/202214:00ArsenalBurnley00D00...2.32-1.501.922.011.932.001.932.111.901.98
211E023/01/202214:00Crystal PalaceLiverpool13A02...2.111.001.961.971.941.972.072.021.921.96
212E023/01/202214:00LeicesterBrighton11D00...2.100.001.922.011.931.991.942.131.872.01
213E023/01/202216:30ChelseaTottenham20H00...1.84-0.751.991.941.971.952.032.071.961.92
214E005/02/202218:00BurnleyWatford00D00...1.73-0.251.902.001.892.031.932.071.872.01
\n", 745 | "

215 rows × 106 columns

\n", 746 | "
" 747 | ], 748 | "text/plain": [ 749 | " Div date Time home_team away_team home_goals \\\n", 750 | "0 E0 13/08/2021 20:00 Brentford Arsenal 2 \n", 751 | "1 E0 14/08/2021 12:30 Man United Leeds 5 \n", 752 | "2 E0 14/08/2021 15:00 Burnley Brighton 1 \n", 753 | "3 E0 14/08/2021 15:00 Chelsea Crystal Palace 3 \n", 754 | "4 E0 14/08/2021 15:00 Everton Southampton 3 \n", 755 | ".. .. ... ... ... ... ... \n", 756 | "210 E0 23/01/2022 14:00 Arsenal Burnley 0 \n", 757 | "211 E0 23/01/2022 14:00 Crystal Palace Liverpool 1 \n", 758 | "212 E0 23/01/2022 14:00 Leicester Brighton 1 \n", 759 | "213 E0 23/01/2022 16:30 Chelsea Tottenham 2 \n", 760 | "214 E0 05/02/2022 18:00 Burnley Watford 0 \n", 761 | "\n", 762 | " away_goals FTR HTHG HTAG ... AvgC<2.5 AHCh B365CAHH B365CAHA \\\n", 763 | "0 0 H 1 0 ... 1.62 0.50 1.75 2.05 \n", 764 | "1 1 H 1 0 ... 2.25 -1.00 2.05 1.75 \n", 765 | "2 2 A 1 0 ... 1.62 0.25 1.79 2.15 \n", 766 | "3 0 H 2 0 ... 1.94 -1.50 2.05 1.75 \n", 767 | "4 1 H 0 1 ... 1.67 -0.50 2.05 1.88 \n", 768 | ".. ... .. ... ... ... ... ... ... ... \n", 769 | "210 0 D 0 0 ... 2.32 -1.50 1.92 2.01 \n", 770 | "211 3 A 0 2 ... 2.11 1.00 1.96 1.97 \n", 771 | "212 1 D 0 0 ... 2.10 0.00 1.92 2.01 \n", 772 | "213 0 H 0 0 ... 1.84 -0.75 1.99 1.94 \n", 773 | "214 0 D 0 0 ... 1.73 -0.25 1.90 2.00 \n", 774 | "\n", 775 | " PCAHH PCAHA MaxCAHH MaxCAHA AvgCAHH AvgCAHA \n", 776 | "0 1.81 2.13 2.05 2.17 1.80 2.09 \n", 777 | "1 2.17 1.77 2.19 1.93 2.10 1.79 \n", 778 | "2 1.81 2.14 1.82 2.19 1.79 2.12 \n", 779 | "3 2.12 1.81 2.16 1.93 2.06 1.82 \n", 780 | "4 2.05 1.88 2.08 1.90 2.03 1.86 \n", 781 | ".. ... ... ... ... ... ... \n", 782 | "210 1.93 2.00 1.93 2.11 1.90 1.98 \n", 783 | "211 1.94 1.97 2.07 2.02 1.92 1.96 \n", 784 | "212 1.93 1.99 1.94 2.13 1.87 2.01 \n", 785 | "213 1.97 1.95 2.03 2.07 1.96 1.92 \n", 786 | "214 1.89 2.03 1.93 2.07 1.87 2.01 \n", 787 | "\n", 788 | "[215 rows x 106 columns]" 789 | ] 790 | }, 791 | "execution_count": 5, 792 | "metadata": {}, 793 | "output_type": "execute_result" 794 | } 795 | ], 796 | "source": [ 797 | "# show dataframe\n", 798 | "df_premier21" 799 | ] 800 | }, 801 | { 802 | "cell_type": "markdown", 803 | "id": "sacred-march", 804 | "metadata": {}, 805 | "source": [] 806 | }, 807 | { 808 | "cell_type": "markdown", 809 | "id": "sacred-directive", 810 | "metadata": {}, 811 | "source": [ 812 | "# Read HTML" 813 | ] 814 | }, 815 | { 816 | "cell_type": "markdown", 817 | "id": "dental-cradle", 818 | "metadata": {}, 819 | "source": [ 820 | "Target Website: https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)" 821 | ] 822 | }, 823 | { 824 | "cell_type": "code", 825 | "execution_count": 6, 826 | "id": "sacred-louisville", 827 | "metadata": {}, 828 | "outputs": [], 829 | "source": [ 830 | "simpsons = pd.read_html('https://en.wikipedia.org/wiki/List_of_The_Simpsons_episodes_(seasons_1%E2%80%9320)')" 831 | ] 832 | }, 833 | { 834 | "cell_type": "code", 835 | "execution_count": 7, 836 | "id": "pending-plaza", 837 | "metadata": {}, 838 | "outputs": [ 839 | { 840 | "data": { 841 | "text/html": [ 842 | "
\n", 843 | "\n", 856 | "\n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | "
No.overallNo. inseasonTitleDirected byWritten byOriginal air dateProd.codeU.S. viewers(millions)
011\"Simpsons Roasting on an Open Fire\"David SilvermanMimi PondDecember 17, 19897G0826.7[46]
122\"Bart the Genius\"David SilvermanJon VittiJanuary 14, 19907G0224.5[46]
233\"Homer's Odyssey\"Wesley ArcherJay Kogen & Wallace WolodarskyJanuary 21, 19907G0327.5[47]
344\"There's No Disgrace Like Home\"Gregg Vanzo & Kent ButterworthAl Jean & Mike ReissJanuary 28, 19907G0420.2[48]
455\"Bart the General\"David SilvermanJohn SwartzwelderFebruary 4, 19907G0527.1[49]
\n", 928 | "
" 929 | ], 930 | "text/plain": [ 931 | " No.overall No. inseason Title \\\n", 932 | "0 1 1 \"Simpsons Roasting on an Open Fire\" \n", 933 | "1 2 2 \"Bart the Genius\" \n", 934 | "2 3 3 \"Homer's Odyssey\" \n", 935 | "3 4 4 \"There's No Disgrace Like Home\" \n", 936 | "4 5 5 \"Bart the General\" \n", 937 | "\n", 938 | " Directed by Written by \\\n", 939 | "0 David Silverman Mimi Pond \n", 940 | "1 David Silverman Jon Vitti \n", 941 | "2 Wesley Archer Jay Kogen & Wallace Wolodarsky \n", 942 | "3 Gregg Vanzo & Kent Butterworth Al Jean & Mike Reiss \n", 943 | "4 David Silverman John Swartzwelder \n", 944 | "\n", 945 | " Original air date Prod.code U.S. viewers(millions) \n", 946 | "0 December 17, 1989 7G08 26.7[46] \n", 947 | "1 January 14, 1990 7G02 24.5[46] \n", 948 | "2 January 21, 1990 7G03 27.5[47] \n", 949 | "3 January 28, 1990 7G04 20.2[48] \n", 950 | "4 February 4, 1990 7G05 27.1[49] " 951 | ] 952 | }, 953 | "execution_count": 7, 954 | "metadata": {}, 955 | "output_type": "execute_result" 956 | } 957 | ], 958 | "source": [ 959 | "simpsons[1].head()" 960 | ] 961 | } 962 | ], 963 | "metadata": { 964 | "hide_input": false, 965 | "kernelspec": { 966 | "display_name": "Python 3", 967 | "language": "python", 968 | "name": "python3" 969 | }, 970 | "language_info": { 971 | "codemirror_mode": { 972 | "name": "ipython", 973 | "version": 3 974 | }, 975 | "file_extension": ".py", 976 | "mimetype": "text/x-python", 977 | "name": "python", 978 | "nbconvert_exporter": "python", 979 | "pygments_lexer": "ipython3", 980 | "version": "3.7.3" 981 | }, 982 | "nbTranslate": { 983 | "displayLangs": [ 984 | "es", 985 | "en" 986 | ], 987 | "hotkey": "alt-t", 988 | "langInMainMenu": true, 989 | "sourceLang": "en", 990 | "targetLang": "es", 991 | "useGoogleTranslate": true 992 | }, 993 | "toc": { 994 | "base_numbering": 1, 995 | "nav_menu": {}, 996 | "number_sections": true, 997 | "sideBar": true, 998 | "skip_h1_title": false, 999 | "title_cell": "Table of Contents", 1000 | "title_sidebar": "Contents", 1001 | "toc_cell": false, 1002 | "toc_position": {}, 1003 | "toc_section_display": true, 1004 | "toc_window_display": false 1005 | } 1006 | }, 1007 | "nbformat": 4, 1008 | "nbformat_minor": 5 1009 | } 1010 | -------------------------------------------------------------------------------- /loc vs iloc.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "90a2167f", 6 | "metadata": {}, 7 | "source": [ 8 | "# loc vs iloc" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": 1, 14 | "id": "bc43ac4b", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import pandas as pd" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "id": "bc70a60e", 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "names = ['L. Messi', 'Cristiano Ronaldo', 'Neymar Jr', 'J. Oblak', 'E. Hazard']\n", 29 | "age = [32, 34, 27, 26, 28]\n", 30 | "height_cm = [170, 187, 175, 188, 175]\n", 31 | "nationality = ['Argentina', 'Portugal', 'Brazil', 'Slovenia', 'Belgium']\n", 32 | "club = ['Paris Saint-Germain', 'Manchester United', 'Paris Saint-Germain', 'Atlético Madrid', 'Real Madrid']\n", 33 | "\n", 34 | "df = pd.DataFrame(index=names, data={'age':age, 'height_cm':height_cm, 'nationality':nationality, 'club':club})" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "id": "f85e1494", 41 | "metadata": {}, 42 | "outputs": [ 43 | { 44 | "data": { 45 | "text/html": [ 46 | "
\n", 47 | "\n", 60 | "\n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | "
ageheight_cmnationalityclub
L. Messi32170ArgentinaParis Saint-Germain
Cristiano Ronaldo34187PortugalManchester United
Neymar Jr27175BrazilParis Saint-Germain
J. Oblak26188SloveniaAtlético Madrid
E. Hazard28175BelgiumReal Madrid
\n", 108 | "
" 109 | ], 110 | "text/plain": [ 111 | " age height_cm nationality club\n", 112 | "L. Messi 32 170 Argentina Paris Saint-Germain\n", 113 | "Cristiano Ronaldo 34 187 Portugal Manchester United\n", 114 | "Neymar Jr 27 175 Brazil Paris Saint-Germain\n", 115 | "J. Oblak 26 188 Slovenia Atlético Madrid\n", 116 | "E. Hazard 28 175 Belgium Real Madrid" 117 | ] 118 | }, 119 | "execution_count": 3, 120 | "metadata": {}, 121 | "output_type": "execute_result" 122 | } 123 | ], 124 | "source": [ 125 | "df" 126 | ] 127 | }, 128 | { 129 | "cell_type": "markdown", 130 | "id": "afe4e373", 131 | "metadata": {}, 132 | "source": [ 133 | "## Selecting with a single value" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": 4, 139 | "id": "e67bf7a1", 140 | "metadata": {}, 141 | "outputs": [ 142 | { 143 | "data": { 144 | "text/plain": [ 145 | "170" 146 | ] 147 | }, 148 | "execution_count": 4, 149 | "metadata": {}, 150 | "output_type": "execute_result" 151 | } 152 | ], 153 | "source": [ 154 | "# get the height of L.Messi\n", 155 | "# loc\n", 156 | "df.loc['L. Messi', 'height_cm']\n", 157 | "# iloc\n", 158 | "df.iloc[0, 1]" 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "execution_count": 5, 164 | "id": "fcebb39c", 165 | "metadata": {}, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "187" 171 | ] 172 | }, 173 | "execution_count": 5, 174 | "metadata": {}, 175 | "output_type": "execute_result" 176 | } 177 | ], 178 | "source": [ 179 | "# get the height of Cristiano Ronaldo\n", 180 | "# loc\n", 181 | "df.loc['Cristiano Ronaldo', 'height_cm']\n", 182 | "# iloc\n", 183 | "df.iloc[1, 1]" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 6, 189 | "id": "e86ba675", 190 | "metadata": {}, 191 | "outputs": [ 192 | { 193 | "data": { 194 | "text/plain": [ 195 | "age 32\n", 196 | "height_cm 170\n", 197 | "nationality Argentina\n", 198 | "club Paris Saint-Germain\n", 199 | "Name: L. Messi, dtype: object" 200 | ] 201 | }, 202 | "execution_count": 6, 203 | "metadata": {}, 204 | "output_type": "execute_result" 205 | } 206 | ], 207 | "source": [ 208 | "# get all the data about L.Messi\n", 209 | "# loc\n", 210 | "df.loc['L. Messi', :]\n", 211 | "# iloc\n", 212 | "df.iloc[0, :]" 213 | ] 214 | }, 215 | { 216 | "cell_type": "markdown", 217 | "id": "71d2b2b8", 218 | "metadata": {}, 219 | "source": [ 220 | "## Selecting with a list of values" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": 7, 226 | "id": "f62aa834", 227 | "metadata": {}, 228 | "outputs": [ 229 | { 230 | "data": { 231 | "text/html": [ 232 | "
\n", 233 | "\n", 246 | "\n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | " \n", 259 | " \n", 260 | " \n", 261 | " \n", 262 | " \n", 263 | " \n", 264 | " \n", 265 | " \n", 266 | " \n", 267 | " \n", 268 | " \n", 269 | " \n", 270 | " \n", 271 | " \n", 272 | "
ageheight_cmnationalityclub
L. Messi32170ArgentinaParis Saint-Germain
Cristiano Ronaldo34187PortugalManchester United
\n", 273 | "
" 274 | ], 275 | "text/plain": [ 276 | " age height_cm nationality club\n", 277 | "L. Messi 32 170 Argentina Paris Saint-Germain\n", 278 | "Cristiano Ronaldo 34 187 Portugal Manchester United" 279 | ] 280 | }, 281 | "execution_count": 7, 282 | "metadata": {}, 283 | "output_type": "execute_result" 284 | } 285 | ], 286 | "source": [ 287 | "# get all data about L.Messi and Cristiano Ronaldo\n", 288 | "# loc\n", 289 | "df.loc[['L. Messi', 'Cristiano Ronaldo']]\n", 290 | "# iloc\n", 291 | "df.iloc[[0, 1]]" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": 8, 297 | "id": "78a4fed7", 298 | "metadata": {}, 299 | "outputs": [ 300 | { 301 | "data": { 302 | "text/plain": [ 303 | "L. Messi 170\n", 304 | "Cristiano Ronaldo 187\n", 305 | "Name: height_cm, dtype: int64" 306 | ] 307 | }, 308 | "execution_count": 8, 309 | "metadata": {}, 310 | "output_type": "execute_result" 311 | } 312 | ], 313 | "source": [ 314 | "# get the height of L.Messi and Cristiano Ronaldo\n", 315 | "df.loc[['L. Messi', 'Cristiano Ronaldo'], 'height_cm']\n", 316 | "\n", 317 | "# get the height of L.Messi and Cristiano Ronaldo\n", 318 | "df.iloc[[0, 1], 1]" 319 | ] 320 | }, 321 | { 322 | "cell_type": "markdown", 323 | "id": "4501bd3d", 324 | "metadata": {}, 325 | "source": [ 326 | "## Selecting a range of data with a slice" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 9, 332 | "id": "06fba14b", 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/html": [ 338 | "
\n", 339 | "\n", 352 | "\n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | " \n", 358 | " \n", 359 | " \n", 360 | " \n", 361 | " \n", 362 | " \n", 363 | " \n", 364 | " \n", 365 | " \n", 366 | " \n", 367 | " \n", 368 | " \n", 369 | " \n", 370 | " \n", 371 | " \n", 372 | " \n", 373 | " \n", 374 | " \n", 375 | "
ageheight_cmnationality
L. Messi32170Argentina
Cristiano Ronaldo34187Portugal
\n", 376 | "
" 377 | ], 378 | "text/plain": [ 379 | " age height_cm nationality\n", 380 | "L. Messi 32 170 Argentina\n", 381 | "Cristiano Ronaldo 34 187 Portugal" 382 | ] 383 | }, 384 | "execution_count": 9, 385 | "metadata": {}, 386 | "output_type": "execute_result" 387 | } 388 | ], 389 | "source": [ 390 | "# slice column labels: from age to nationality\n", 391 | "# loc\n", 392 | "players = ['L. Messi', 'Cristiano Ronaldo']\n", 393 | "df.loc[players, 'age':'nationality']\n", 394 | "\n", 395 | "# iloc\n", 396 | "players = [0, 1]\n", 397 | "df.iloc[players, 0:3] # age:nationality+1" 398 | ] 399 | }, 400 | { 401 | "cell_type": "markdown", 402 | "id": "c17b2be7", 403 | "metadata": {}, 404 | "source": [ 405 | "## Selecting with conditions" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 10, 411 | "id": "355313ed", 412 | "metadata": {}, 413 | "outputs": [ 414 | { 415 | "data": { 416 | "text/html": [ 417 | "
\n", 418 | "\n", 431 | "\n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | "
ageheight_cmclub
Cristiano Ronaldo34187Manchester United
J. Oblak26188Atlético Madrid
\n", 455 | "
" 456 | ], 457 | "text/plain": [ 458 | " age height_cm club\n", 459 | "Cristiano Ronaldo 34 187 Manchester United\n", 460 | "J. Oblak 26 188 Atlético Madrid" 461 | ] 462 | }, 463 | "execution_count": 10, 464 | "metadata": {}, 465 | "output_type": "execute_result" 466 | } 467 | ], 468 | "source": [ 469 | "# one condition: select player with height above 180cm\n", 470 | "# loc\n", 471 | "columns = ['age', 'height_cm', 'club']\n", 472 | "df.loc[df['height_cm']>180, columns]\n", 473 | "\n", 474 | "# iloc\n", 475 | "columns = [0,1,3]\n", 476 | "df.iloc[list(df['height_cm']>180), columns]" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 11, 482 | "id": "2515c8f2", 483 | "metadata": {}, 484 | "outputs": [ 485 | { 486 | "data": { 487 | "text/html": [ 488 | "
\n", 489 | "\n", 502 | "\n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | "
ageheight_cmnationalityclub
Neymar Jr27175BrazilParis Saint-Germain
\n", 522 | "
" 523 | ], 524 | "text/plain": [ 525 | " age height_cm nationality club\n", 526 | "Neymar Jr 27 175 Brazil Paris Saint-Germain" 527 | ] 528 | }, 529 | "execution_count": 11, 530 | "metadata": {}, 531 | "output_type": "execute_result" 532 | } 533 | ], 534 | "source": [ 535 | "# multiple conditions: select player with height above 180cm that played in PSG\n", 536 | "# loc\n", 537 | "df.loc[(df['height_cm']>170) & (df['club']=='Paris Saint-Germain'), :]\n", 538 | "\n", 539 | "# iloc\n", 540 | "df.iloc[list((df['height_cm']>170) & (df['club']=='Paris Saint-Germain')), :]" 541 | ] 542 | } 543 | ], 544 | "metadata": { 545 | "kernelspec": { 546 | "display_name": "Python 3", 547 | "language": "python", 548 | "name": "python3" 549 | }, 550 | "language_info": { 551 | "codemirror_mode": { 552 | "name": "ipython", 553 | "version": 3 554 | }, 555 | "file_extension": ".py", 556 | "mimetype": "text/x-python", 557 | "name": "python", 558 | "nbconvert_exporter": "python", 559 | "pygments_lexer": "ipython3", 560 | "version": "3.8.8" 561 | }, 562 | "toc": { 563 | "base_numbering": 1, 564 | "nav_menu": {}, 565 | "number_sections": true, 566 | "sideBar": true, 567 | "skip_h1_title": false, 568 | "title_cell": "Table of Contents", 569 | "title_sidebar": "Contents", 570 | "toc_cell": false, 571 | "toc_position": {}, 572 | "toc_section_display": true, 573 | "toc_window_display": false 574 | } 575 | }, 576 | "nbformat": 4, 577 | "nbformat_minor": 5 578 | } 579 | --------------------------------------------------------------------------------