├── README.md
└── 12 Amazing Pandas & Numpy Functions.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # Medium : 12-Amazing-Pandas-NumPy-Functions
2 | 
3 | ### This Jupyter Notebook is linked to my article published on medium.com
4 | 
5 | https://towardsdatascience.com/12-amazing-pandas-numpy-functions-22e5671a45b8
6 | 


--------------------------------------------------------------------------------
/12 Amazing Pandas & Numpy Functions.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# NumPy Functions"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 102,
  13 |    "metadata": {},
  14 |    "outputs": [],
  15 |    "source": [
  16 |     "import numpy as np"
  17 |    ]
  18 |   },
  19 |   {
  20 |    "cell_type": "markdown",
  21 |    "metadata": {},
  22 |    "source": [
  23 |     "## Argpartition()"
  24 |    ]
  25 |   },
  26 |   {
  27 |    "cell_type": "code",
  28 |    "execution_count": 103,
  29 |    "metadata": {},
  30 |    "outputs": [],
  31 |    "source": [
  32 |     "# Random array\n",
  33 |     "x = np.array([12, 10, 12, 0, 6, 8, 9, 1, 16, 4, 6, 0])"
  34 |    ]
  35 |   },
  36 |   {
  37 |    "cell_type": "code",
  38 |    "execution_count": 104,
  39 |    "metadata": {},
  40 |    "outputs": [
  41 |     {
  42 |      "data": {
  43 |       "text/plain": [
  44 |        "array([1, 8, 2, 0], dtype=int64)"
  45 |       ]
  46 |      },
  47 |      "execution_count": 104,
  48 |      "metadata": {},
  49 |      "output_type": "execute_result"
  50 |     }
  51 |    ],
  52 |    "source": [
  53 |     "index_val = np.argpartition(x, -4)[-4:]\n",
  54 |     "index_val"
  55 |    ]
  56 |   },
  57 |   {
  58 |    "cell_type": "code",
  59 |    "execution_count": 105,
  60 |    "metadata": {},
  61 |    "outputs": [
  62 |     {
  63 |      "data": {
  64 |       "text/plain": [
  65 |        "array([10, 12, 12, 16])"
  66 |       ]
  67 |      },
  68 |      "execution_count": 105,
  69 |      "metadata": {},
  70 |      "output_type": "execute_result"
  71 |     }
  72 |    ],
  73 |    "source": [
  74 |     "np.sort(x[index_val])"
  75 |    ]
  76 |   },
  77 |   {
  78 |    "cell_type": "markdown",
  79 |    "metadata": {},
  80 |    "source": [
  81 |     "## Allclose()"
  82 |    ]
  83 |   },
  84 |   {
  85 |    "cell_type": "code",
  86 |    "execution_count": 106,
  87 |    "metadata": {},
  88 |    "outputs": [],
  89 |    "source": [
  90 |     "array1 = np.array([0.12,0.17,0.24,0.29])\n",
  91 |     "array2 = np.array([0.13,0.19,0.26,0.31])"
  92 |    ]
  93 |   },
  94 |   {
  95 |    "cell_type": "code",
  96 |    "execution_count": 107,
  97 |    "metadata": {},
  98 |    "outputs": [
  99 |     {
 100 |      "data": {
 101 |       "text/plain": [
 102 |        "False"
 103 |       ]
 104 |      },
 105 |      "execution_count": 107,
 106 |      "metadata": {},
 107 |      "output_type": "execute_result"
 108 |     }
 109 |    ],
 110 |    "source": [
 111 |     "# with a tolerance of 0.1, it should return False:\n",
 112 |     "np.allclose(array1,array2,0.1)"
 113 |    ]
 114 |   },
 115 |   {
 116 |    "cell_type": "code",
 117 |    "execution_count": 108,
 118 |    "metadata": {},
 119 |    "outputs": [
 120 |     {
 121 |      "data": {
 122 |       "text/plain": [
 123 |        "True"
 124 |       ]
 125 |      },
 126 |      "execution_count": 108,
 127 |      "metadata": {},
 128 |      "output_type": "execute_result"
 129 |     }
 130 |    ],
 131 |    "source": [
 132 |     "# with a tolerance of 0.2, it should return True:\n",
 133 |     "np.allclose(array1,array2,0.2)"
 134 |    ]
 135 |   },
 136 |   {
 137 |    "cell_type": "markdown",
 138 |    "metadata": {},
 139 |    "source": [
 140 |     "## Clip()"
 141 |    ]
 142 |   },
 143 |   {
 144 |    "cell_type": "code",
 145 |    "execution_count": 109,
 146 |    "metadata": {},
 147 |    "outputs": [],
 148 |    "source": [
 149 |     "x = np.array([3, 17, 14, 23, 2, 2, 6, 8, 1, 2, 16, 0])"
 150 |    ]
 151 |   },
 152 |   {
 153 |    "cell_type": "code",
 154 |    "execution_count": 110,
 155 |    "metadata": {},
 156 |    "outputs": [
 157 |     {
 158 |      "data": {
 159 |       "text/plain": [
 160 |        "array([3, 5, 5, 5, 2, 2, 5, 5, 2, 2, 5, 2])"
 161 |       ]
 162 |      },
 163 |      "execution_count": 110,
 164 |      "metadata": {},
 165 |      "output_type": "execute_result"
 166 |     }
 167 |    ],
 168 |    "source": [
 169 |     "np.clip(x,2,5)"
 170 |    ]
 171 |   },
 172 |   {
 173 |    "cell_type": "markdown",
 174 |    "metadata": {},
 175 |    "source": [
 176 |     "## Extract()"
 177 |    ]
 178 |   },
 179 |   {
 180 |    "cell_type": "code",
 181 |    "execution_count": 111,
 182 |    "metadata": {},
 183 |    "outputs": [
 184 |     {
 185 |      "data": {
 186 |       "text/plain": [
 187 |        "array([17, 14,  6, 10, 12,  4, 13,  4,  3, 11,  0, 10])"
 188 |       ]
 189 |      },
 190 |      "execution_count": 111,
 191 |      "metadata": {},
 192 |      "output_type": "execute_result"
 193 |     }
 194 |    ],
 195 |    "source": [
 196 |     "# Random integers\n",
 197 |     "array = np.random.randint(20, size=12)\n",
 198 |     "array"
 199 |    ]
 200 |   },
 201 |   {
 202 |    "cell_type": "code",
 203 |    "execution_count": 112,
 204 |    "metadata": {},
 205 |    "outputs": [
 206 |     {
 207 |      "data": {
 208 |       "text/plain": [
 209 |        "array([ True, False, False, False, False, False,  True, False,  True,\n",
 210 |        "        True, False, False])"
 211 |       ]
 212 |      },
 213 |      "execution_count": 112,
 214 |      "metadata": {},
 215 |      "output_type": "execute_result"
 216 |     }
 217 |    ],
 218 |    "source": [
 219 |     "#  Divide by 2 and check if remainder is 1\n",
 220 |     "cond = np.mod(array, 2)==1\n",
 221 |     "cond"
 222 |    ]
 223 |   },
 224 |   {
 225 |    "cell_type": "code",
 226 |    "execution_count": 113,
 227 |    "metadata": {},
 228 |    "outputs": [
 229 |     {
 230 |      "data": {
 231 |       "text/plain": [
 232 |        "array([17, 13,  3, 11])"
 233 |       ]
 234 |      },
 235 |      "execution_count": 113,
 236 |      "metadata": {},
 237 |      "output_type": "execute_result"
 238 |     }
 239 |    ],
 240 |    "source": [
 241 |     "# Use extract to get the values\n",
 242 |     "np.extract(cond, array)"
 243 |    ]
 244 |   },
 245 |   {
 246 |    "cell_type": "code",
 247 |    "execution_count": 114,
 248 |    "metadata": {},
 249 |    "outputs": [
 250 |     {
 251 |      "data": {
 252 |       "text/plain": [
 253 |        "array([17,  0])"
 254 |       ]
 255 |      },
 256 |      "execution_count": 114,
 257 |      "metadata": {},
 258 |      "output_type": "execute_result"
 259 |     }
 260 |    ],
 261 |    "source": [
 262 |     "# Apply condition on extract directly\n",
 263 |     "np.extract(((array < 3) | (array > 15)), array)"
 264 |    ]
 265 |   },
 266 |   {
 267 |    "cell_type": "markdown",
 268 |    "metadata": {},
 269 |    "source": [
 270 |     "## Where()"
 271 |    ]
 272 |   },
 273 |   {
 274 |    "cell_type": "code",
 275 |    "execution_count": 115,
 276 |    "metadata": {},
 277 |    "outputs": [],
 278 |    "source": [
 279 |     "y = np.array([1,5,6,8,1,7,3,6,9])"
 280 |    ]
 281 |   },
 282 |   {
 283 |    "cell_type": "code",
 284 |    "execution_count": 116,
 285 |    "metadata": {},
 286 |    "outputs": [
 287 |     {
 288 |      "data": {
 289 |       "text/plain": [
 290 |        "(array([2, 3, 5, 7, 8], dtype=int64),)"
 291 |       ]
 292 |      },
 293 |      "execution_count": 116,
 294 |      "metadata": {},
 295 |      "output_type": "execute_result"
 296 |     }
 297 |    ],
 298 |    "source": [
 299 |     "# Where y is greater than 5, returns index position\n",
 300 |     "np.where(y>5)"
 301 |    ]
 302 |   },
 303 |   {
 304 |    "cell_type": "code",
 305 |    "execution_count": 117,
 306 |    "metadata": {},
 307 |    "outputs": [
 308 |     {
 309 |      "data": {
 310 |       "text/plain": [
 311 |        "array(['Miss', 'Miss', 'Hit', 'Hit', 'Miss', 'Hit', 'Miss', 'Hit', 'Hit'],\n",
 312 |        "      dtype='<U4')"
 313 |       ]
 314 |      },
 315 |      "execution_count": 117,
 316 |      "metadata": {},
 317 |      "output_type": "execute_result"
 318 |     }
 319 |    ],
 320 |    "source": [
 321 |     "# First will replace the values that match the condition, second will replace the values that does not\n",
 322 |     "np.where(y>5, \"Hit\", \"Miss\")"
 323 |    ]
 324 |   },
 325 |   {
 326 |    "cell_type": "markdown",
 327 |    "metadata": {},
 328 |    "source": [
 329 |     "## Percentile()"
 330 |    ]
 331 |   },
 332 |   {
 333 |    "cell_type": "code",
 334 |    "execution_count": 118,
 335 |    "metadata": {},
 336 |    "outputs": [],
 337 |    "source": [
 338 |     "a = np.array([1,5,6,8,1,7,3,6,9])"
 339 |    ]
 340 |   },
 341 |   {
 342 |    "cell_type": "code",
 343 |    "execution_count": 119,
 344 |    "metadata": {},
 345 |    "outputs": [
 346 |     {
 347 |      "name": "stdout",
 348 |      "output_type": "stream",
 349 |      "text": [
 350 |       "50th Percentile of arr, axis = 0 :  6.0\n"
 351 |      ]
 352 |     }
 353 |    ],
 354 |    "source": [
 355 |     "print(\"50th Percentile of arr, axis = 0 : \",  \n",
 356 |     "      np.percentile(a, 50, axis =0))"
 357 |    ]
 358 |   },
 359 |   {
 360 |    "cell_type": "code",
 361 |    "execution_count": 120,
 362 |    "metadata": {},
 363 |    "outputs": [],
 364 |    "source": [
 365 |     "b = np.array([[10, 7, 4], [3, 2, 1]])"
 366 |    ]
 367 |   },
 368 |   {
 369 |    "cell_type": "code",
 370 |    "execution_count": 121,
 371 |    "metadata": {},
 372 |    "outputs": [
 373 |     {
 374 |      "name": "stdout",
 375 |      "output_type": "stream",
 376 |      "text": [
 377 |       "30th Percentile of arr, axis = 0 :  [5.1 3.5 1.9]\n"
 378 |      ]
 379 |     }
 380 |    ],
 381 |    "source": [
 382 |     "print(\"30th Percentile of arr, axis = 0 : \",  \n",
 383 |     "      np.percentile(b, 30, axis =0))"
 384 |    ]
 385 |   },
 386 |   {
 387 |    "cell_type": "code",
 388 |    "execution_count": null,
 389 |    "metadata": {},
 390 |    "outputs": [],
 391 |    "source": []
 392 |   },
 393 |   {
 394 |    "cell_type": "markdown",
 395 |    "metadata": {},
 396 |    "source": [
 397 |     "# Pandas Functions"
 398 |    ]
 399 |   },
 400 |   {
 401 |    "cell_type": "code",
 402 |    "execution_count": 122,
 403 |    "metadata": {},
 404 |    "outputs": [],
 405 |    "source": [
 406 |     "import pandas as pd"
 407 |    ]
 408 |   },
 409 |   {
 410 |    "cell_type": "markdown",
 411 |    "metadata": {},
 412 |    "source": [
 413 |     "## read_csv(nrows=10)"
 414 |    ]
 415 |   },
 416 |   {
 417 |    "cell_type": "code",
 418 |    "execution_count": 128,
 419 |    "metadata": {},
 420 |    "outputs": [],
 421 |    "source": [
 422 |     "import io\n",
 423 |     "import requests\n",
 424 |     "\n",
 425 |     "# I am using this online data set just to make things easier for you guys\n",
 426 |     "url = \"https://raw.github.com/vincentarelbundock/Rdatasets/master/csv/datasets/AirPassengers.csv\"\n",
 427 |     "s = requests.get(url).content"
 428 |    ]
 429 |   },
 430 |   {
 431 |    "cell_type": "code",
 432 |    "execution_count": 132,
 433 |    "metadata": {},
 434 |    "outputs": [
 435 |     {
 436 |      "data": {
 437 |       "text/html": [
 438 |        "<div>\n",
 439 |        "<style scoped>\n",
 440 |        "    .dataframe tbody tr th:only-of-type {\n",
 441 |        "        vertical-align: middle;\n",
 442 |        "    }\n",
 443 |        "\n",
 444 |        "    .dataframe tbody tr th {\n",
 445 |        "        vertical-align: top;\n",
 446 |        "    }\n",
 447 |        "\n",
 448 |        "    .dataframe thead th {\n",
 449 |        "        text-align: right;\n",
 450 |        "    }\n",
 451 |        "</style>\n",
 452 |        "<table border=\"1\" class=\"dataframe\">\n",
 453 |        "  <thead>\n",
 454 |        "    <tr style=\"text-align: right;\">\n",
 455 |        "      <th></th>\n",
 456 |        "      <th>time</th>\n",
 457 |        "      <th>value</th>\n",
 458 |        "    </tr>\n",
 459 |        "  </thead>\n",
 460 |        "  <tbody>\n",
 461 |        "    <tr>\n",
 462 |        "      <th>1</th>\n",
 463 |        "      <td>1949.000000</td>\n",
 464 |        "      <td>112</td>\n",
 465 |        "    </tr>\n",
 466 |        "    <tr>\n",
 467 |        "      <th>2</th>\n",
 468 |        "      <td>1949.083333</td>\n",
 469 |        "      <td>118</td>\n",
 470 |        "    </tr>\n",
 471 |        "    <tr>\n",
 472 |        "      <th>3</th>\n",
 473 |        "      <td>1949.166667</td>\n",
 474 |        "      <td>132</td>\n",
 475 |        "    </tr>\n",
 476 |        "    <tr>\n",
 477 |        "      <th>4</th>\n",
 478 |        "      <td>1949.250000</td>\n",
 479 |        "      <td>129</td>\n",
 480 |        "    </tr>\n",
 481 |        "    <tr>\n",
 482 |        "      <th>5</th>\n",
 483 |        "      <td>1949.333333</td>\n",
 484 |        "      <td>121</td>\n",
 485 |        "    </tr>\n",
 486 |        "    <tr>\n",
 487 |        "      <th>6</th>\n",
 488 |        "      <td>1949.416667</td>\n",
 489 |        "      <td>135</td>\n",
 490 |        "    </tr>\n",
 491 |        "    <tr>\n",
 492 |        "      <th>7</th>\n",
 493 |        "      <td>1949.500000</td>\n",
 494 |        "      <td>148</td>\n",
 495 |        "    </tr>\n",
 496 |        "    <tr>\n",
 497 |        "      <th>8</th>\n",
 498 |        "      <td>1949.583333</td>\n",
 499 |        "      <td>148</td>\n",
 500 |        "    </tr>\n",
 501 |        "    <tr>\n",
 502 |        "      <th>9</th>\n",
 503 |        "      <td>1949.666667</td>\n",
 504 |        "      <td>136</td>\n",
 505 |        "    </tr>\n",
 506 |        "    <tr>\n",
 507 |        "      <th>10</th>\n",
 508 |        "      <td>1949.750000</td>\n",
 509 |        "      <td>119</td>\n",
 510 |        "    </tr>\n",
 511 |        "  </tbody>\n",
 512 |        "</table>\n",
 513 |        "</div>"
 514 |       ],
 515 |       "text/plain": [
 516 |        "           time  value\n",
 517 |        "1   1949.000000    112\n",
 518 |        "2   1949.083333    118\n",
 519 |        "3   1949.166667    132\n",
 520 |        "4   1949.250000    129\n",
 521 |        "5   1949.333333    121\n",
 522 |        "6   1949.416667    135\n",
 523 |        "7   1949.500000    148\n",
 524 |        "8   1949.583333    148\n",
 525 |        "9   1949.666667    136\n",
 526 |        "10  1949.750000    119"
 527 |       ]
 528 |      },
 529 |      "execution_count": 132,
 530 |      "metadata": {},
 531 |      "output_type": "execute_result"
 532 |     }
 533 |    ],
 534 |    "source": [
 535 |     "# read only first 10 rows\n",
 536 |     "df = pd.read_csv(io.StringIO(s.decode('utf-8')),nrows=10 , index_col=0)\n",
 537 |     "df"
 538 |    ]
 539 |   },
 540 |   {
 541 |    "cell_type": "markdown",
 542 |    "metadata": {},
 543 |    "source": [
 544 |     "## map()"
 545 |    ]
 546 |   },
 547 |   {
 548 |    "cell_type": "code",
 549 |    "execution_count": 138,
 550 |    "metadata": {},
 551 |    "outputs": [
 552 |     {
 553 |      "data": {
 554 |       "text/html": [
 555 |        "<div>\n",
 556 |        "<style scoped>\n",
 557 |        "    .dataframe tbody tr th:only-of-type {\n",
 558 |        "        vertical-align: middle;\n",
 559 |        "    }\n",
 560 |        "\n",
 561 |        "    .dataframe tbody tr th {\n",
 562 |        "        vertical-align: top;\n",
 563 |        "    }\n",
 564 |        "\n",
 565 |        "    .dataframe thead th {\n",
 566 |        "        text-align: right;\n",
 567 |        "    }\n",
 568 |        "</style>\n",
 569 |        "<table border=\"1\" class=\"dataframe\">\n",
 570 |        "  <thead>\n",
 571 |        "    <tr style=\"text-align: right;\">\n",
 572 |        "      <th></th>\n",
 573 |        "      <th>b</th>\n",
 574 |        "      <th>d</th>\n",
 575 |        "      <th>e</th>\n",
 576 |        "    </tr>\n",
 577 |        "  </thead>\n",
 578 |        "  <tbody>\n",
 579 |        "    <tr>\n",
 580 |        "      <th>India</th>\n",
 581 |        "      <td>1.302245</td>\n",
 582 |        "      <td>0.571917</td>\n",
 583 |        "      <td>0.573815</td>\n",
 584 |        "    </tr>\n",
 585 |        "    <tr>\n",
 586 |        "      <th>USA</th>\n",
 587 |        "      <td>-0.481980</td>\n",
 588 |        "      <td>-0.464518</td>\n",
 589 |        "      <td>0.528452</td>\n",
 590 |        "    </tr>\n",
 591 |        "    <tr>\n",
 592 |        "      <th>China</th>\n",
 593 |        "      <td>-0.339589</td>\n",
 594 |        "      <td>1.244659</td>\n",
 595 |        "      <td>-0.518640</td>\n",
 596 |        "    </tr>\n",
 597 |        "    <tr>\n",
 598 |        "      <th>Russia</th>\n",
 599 |        "      <td>1.590290</td>\n",
 600 |        "      <td>1.839274</td>\n",
 601 |        "      <td>-0.250006</td>\n",
 602 |        "    </tr>\n",
 603 |        "  </tbody>\n",
 604 |        "</table>\n",
 605 |        "</div>"
 606 |       ],
 607 |       "text/plain": [
 608 |        "               b         d         e\n",
 609 |        "India   1.302245  0.571917  0.573815\n",
 610 |        "USA    -0.481980 -0.464518  0.528452\n",
 611 |        "China  -0.339589  1.244659 -0.518640\n",
 612 |        "Russia  1.590290  1.839274 -0.250006"
 613 |       ]
 614 |      },
 615 |      "execution_count": 138,
 616 |      "metadata": {},
 617 |      "output_type": "execute_result"
 618 |     }
 619 |    ],
 620 |    "source": [
 621 |     "# create a dataframe\n",
 622 |     "dframe = pd.DataFrame(np.random.randn(4, 3), columns=list('bde'), index=['India', 'USA', 'China', 'Russia'])\n",
 623 |     "dframe"
 624 |    ]
 625 |   },
 626 |   {
 627 |    "cell_type": "code",
 628 |    "execution_count": 141,
 629 |    "metadata": {},
 630 |    "outputs": [],
 631 |    "source": [
 632 |     "#compute a formatted string from each floating point value in frame\n",
 633 |     "changefn = lambda x: '%.2f' % x"
 634 |    ]
 635 |   },
 636 |   {
 637 |    "cell_type": "code",
 638 |    "execution_count": 142,
 639 |    "metadata": {},
 640 |    "outputs": [
 641 |     {
 642 |      "data": {
 643 |       "text/plain": [
 644 |        "India      0.57\n",
 645 |        "USA       -0.46\n",
 646 |        "China      1.24\n",
 647 |        "Russia     1.84\n",
 648 |        "Name: d, dtype: object"
 649 |       ]
 650 |      },
 651 |      "execution_count": 142,
 652 |      "metadata": {},
 653 |      "output_type": "execute_result"
 654 |     }
 655 |    ],
 656 |    "source": [
 657 |     "# Make changes element-wise\n",
 658 |     "dframe['d'].map(changefn)"
 659 |    ]
 660 |   },
 661 |   {
 662 |    "cell_type": "markdown",
 663 |    "metadata": {},
 664 |    "source": [
 665 |     "## Apply()"
 666 |    ]
 667 |   },
 668 |   {
 669 |    "cell_type": "code",
 670 |    "execution_count": 143,
 671 |    "metadata": {},
 672 |    "outputs": [],
 673 |    "source": [
 674 |     "# max minus mix lambda fn\n",
 675 |     "fn = lambda x: x.max() - x.min()"
 676 |    ]
 677 |   },
 678 |   {
 679 |    "cell_type": "code",
 680 |    "execution_count": 144,
 681 |    "metadata": {},
 682 |    "outputs": [
 683 |     {
 684 |      "data": {
 685 |       "text/plain": [
 686 |        "b    2.072270\n",
 687 |        "d    2.303792\n",
 688 |        "e    1.092456\n",
 689 |        "dtype: float64"
 690 |       ]
 691 |      },
 692 |      "execution_count": 144,
 693 |      "metadata": {},
 694 |      "output_type": "execute_result"
 695 |     }
 696 |    ],
 697 |    "source": [
 698 |     "# Apply this on dframe\n",
 699 |     "dframe.apply(fn)"
 700 |    ]
 701 |   },
 702 |   {
 703 |    "cell_type": "markdown",
 704 |    "metadata": {},
 705 |    "source": [
 706 |     "## isin()"
 707 |    ]
 708 |   },
 709 |   {
 710 |    "cell_type": "code",
 711 |    "execution_count": 158,
 712 |    "metadata": {},
 713 |    "outputs": [],
 714 |    "source": [
 715 |     "# Using the dataframe we created for read_csv\n",
 716 |     "filter1 = df[\"value\"].isin([112]) \n",
 717 |     "filter2 = df[\"time\"].isin([1949.000000]) "
 718 |    ]
 719 |   },
 720 |   {
 721 |    "cell_type": "code",
 722 |    "execution_count": 159,
 723 |    "metadata": {},
 724 |    "outputs": [
 725 |     {
 726 |      "data": {
 727 |       "text/html": [
 728 |        "<div>\n",
 729 |        "<style scoped>\n",
 730 |        "    .dataframe tbody tr th:only-of-type {\n",
 731 |        "        vertical-align: middle;\n",
 732 |        "    }\n",
 733 |        "\n",
 734 |        "    .dataframe tbody tr th {\n",
 735 |        "        vertical-align: top;\n",
 736 |        "    }\n",
 737 |        "\n",
 738 |        "    .dataframe thead th {\n",
 739 |        "        text-align: right;\n",
 740 |        "    }\n",
 741 |        "</style>\n",
 742 |        "<table border=\"1\" class=\"dataframe\">\n",
 743 |        "  <thead>\n",
 744 |        "    <tr style=\"text-align: right;\">\n",
 745 |        "      <th></th>\n",
 746 |        "      <th>time</th>\n",
 747 |        "      <th>value</th>\n",
 748 |        "    </tr>\n",
 749 |        "  </thead>\n",
 750 |        "  <tbody>\n",
 751 |        "    <tr>\n",
 752 |        "      <th>1</th>\n",
 753 |        "      <td>1949.0</td>\n",
 754 |        "      <td>112</td>\n",
 755 |        "    </tr>\n",
 756 |        "  </tbody>\n",
 757 |        "</table>\n",
 758 |        "</div>"
 759 |       ],
 760 |       "text/plain": [
 761 |        "     time  value\n",
 762 |        "1  1949.0    112"
 763 |       ]
 764 |      },
 765 |      "execution_count": 159,
 766 |      "metadata": {},
 767 |      "output_type": "execute_result"
 768 |     }
 769 |    ],
 770 |    "source": [
 771 |     "df [filter1 & filter2]"
 772 |    ]
 773 |   },
 774 |   {
 775 |    "cell_type": "markdown",
 776 |    "metadata": {},
 777 |    "source": [
 778 |     "## copy()"
 779 |    ]
 780 |   },
 781 |   {
 782 |    "cell_type": "code",
 783 |    "execution_count": 160,
 784 |    "metadata": {},
 785 |    "outputs": [],
 786 |    "source": [
 787 |     "# creating sample series \n",
 788 |     "data = pd.Series(['India', 'Pakistan', 'China', 'Mongolia']) "
 789 |    ]
 790 |   },
 791 |   {
 792 |    "cell_type": "code",
 793 |    "execution_count": 173,
 794 |    "metadata": {},
 795 |    "outputs": [
 796 |     {
 797 |      "data": {
 798 |       "text/plain": [
 799 |        "0         USA\n",
 800 |        "1    Pakistan\n",
 801 |        "2       China\n",
 802 |        "3    Mongolia\n",
 803 |        "dtype: object"
 804 |       ]
 805 |      },
 806 |      "execution_count": 173,
 807 |      "metadata": {},
 808 |      "output_type": "execute_result"
 809 |     }
 810 |    ],
 811 |    "source": [
 812 |     "# Assigning issue that we face\n",
 813 |     "data1= data\n",
 814 |     "# Change a value\n",
 815 |     "data1[0]='USA'\n",
 816 |     "# Also changes value in old dataframe\n",
 817 |     "data"
 818 |    ]
 819 |   },
 820 |   {
 821 |    "cell_type": "code",
 822 |    "execution_count": 161,
 823 |    "metadata": {},
 824 |    "outputs": [],
 825 |    "source": [
 826 |     "# creating copy of series \n",
 827 |     "new = data.copy() "
 828 |    ]
 829 |   },
 830 |   {
 831 |    "cell_type": "code",
 832 |    "execution_count": 162,
 833 |    "metadata": {},
 834 |    "outputs": [],
 835 |    "source": [
 836 |     "# assigning new values \n",
 837 |     "new[1]='Changed value'"
 838 |    ]
 839 |   },
 840 |   {
 841 |    "cell_type": "code",
 842 |    "execution_count": 169,
 843 |    "metadata": {},
 844 |    "outputs": [
 845 |     {
 846 |      "name": "stdout",
 847 |      "output_type": "stream",
 848 |      "text": [
 849 |       "0            India\n",
 850 |       "1    Changed value\n",
 851 |       "2            China\n",
 852 |       "3         Mongolia\n",
 853 |       "dtype: object\n",
 854 |       "0       India\n",
 855 |       "1    Pakistan\n",
 856 |       "2       China\n",
 857 |       "3    Mongolia\n",
 858 |       "dtype: object\n"
 859 |      ]
 860 |     }
 861 |    ],
 862 |    "source": [
 863 |     "# printing data \n",
 864 |     "print(new) \n",
 865 |     "print(data) "
 866 |    ]
 867 |   },
 868 |   {
 869 |    "cell_type": "markdown",
 870 |    "metadata": {},
 871 |    "source": [
 872 |     "## select_dtypes()"
 873 |    ]
 874 |   },
 875 |   {
 876 |    "cell_type": "code",
 877 |    "execution_count": 176,
 878 |    "metadata": {},
 879 |    "outputs": [],
 880 |    "source": [
 881 |     "framex =  df.select_dtypes(include=\"float64\")"
 882 |    ]
 883 |   },
 884 |   {
 885 |    "cell_type": "code",
 886 |    "execution_count": 177,
 887 |    "metadata": {},
 888 |    "outputs": [
 889 |     {
 890 |      "data": {
 891 |       "text/html": [
 892 |        "<div>\n",
 893 |        "<style scoped>\n",
 894 |        "    .dataframe tbody tr th:only-of-type {\n",
 895 |        "        vertical-align: middle;\n",
 896 |        "    }\n",
 897 |        "\n",
 898 |        "    .dataframe tbody tr th {\n",
 899 |        "        vertical-align: top;\n",
 900 |        "    }\n",
 901 |        "\n",
 902 |        "    .dataframe thead th {\n",
 903 |        "        text-align: right;\n",
 904 |        "    }\n",
 905 |        "</style>\n",
 906 |        "<table border=\"1\" class=\"dataframe\">\n",
 907 |        "  <thead>\n",
 908 |        "    <tr style=\"text-align: right;\">\n",
 909 |        "      <th></th>\n",
 910 |        "      <th>time</th>\n",
 911 |        "    </tr>\n",
 912 |        "  </thead>\n",
 913 |        "  <tbody>\n",
 914 |        "    <tr>\n",
 915 |        "      <th>1</th>\n",
 916 |        "      <td>1949.000000</td>\n",
 917 |        "    </tr>\n",
 918 |        "    <tr>\n",
 919 |        "      <th>2</th>\n",
 920 |        "      <td>1949.083333</td>\n",
 921 |        "    </tr>\n",
 922 |        "    <tr>\n",
 923 |        "      <th>3</th>\n",
 924 |        "      <td>1949.166667</td>\n",
 925 |        "    </tr>\n",
 926 |        "    <tr>\n",
 927 |        "      <th>4</th>\n",
 928 |        "      <td>1949.250000</td>\n",
 929 |        "    </tr>\n",
 930 |        "    <tr>\n",
 931 |        "      <th>5</th>\n",
 932 |        "      <td>1949.333333</td>\n",
 933 |        "    </tr>\n",
 934 |        "    <tr>\n",
 935 |        "      <th>6</th>\n",
 936 |        "      <td>1949.416667</td>\n",
 937 |        "    </tr>\n",
 938 |        "    <tr>\n",
 939 |        "      <th>7</th>\n",
 940 |        "      <td>1949.500000</td>\n",
 941 |        "    </tr>\n",
 942 |        "    <tr>\n",
 943 |        "      <th>8</th>\n",
 944 |        "      <td>1949.583333</td>\n",
 945 |        "    </tr>\n",
 946 |        "    <tr>\n",
 947 |        "      <th>9</th>\n",
 948 |        "      <td>1949.666667</td>\n",
 949 |        "    </tr>\n",
 950 |        "    <tr>\n",
 951 |        "      <th>10</th>\n",
 952 |        "      <td>1949.750000</td>\n",
 953 |        "    </tr>\n",
 954 |        "  </tbody>\n",
 955 |        "</table>\n",
 956 |        "</div>"
 957 |       ],
 958 |       "text/plain": [
 959 |        "           time\n",
 960 |        "1   1949.000000\n",
 961 |        "2   1949.083333\n",
 962 |        "3   1949.166667\n",
 963 |        "4   1949.250000\n",
 964 |        "5   1949.333333\n",
 965 |        "6   1949.416667\n",
 966 |        "7   1949.500000\n",
 967 |        "8   1949.583333\n",
 968 |        "9   1949.666667\n",
 969 |        "10  1949.750000"
 970 |       ]
 971 |      },
 972 |      "execution_count": 177,
 973 |      "metadata": {},
 974 |      "output_type": "execute_result"
 975 |     }
 976 |    ],
 977 |    "source": [
 978 |     "framex"
 979 |    ]
 980 |   },
 981 |   {
 982 |    "cell_type": "markdown",
 983 |    "metadata": {},
 984 |    "source": [
 985 |     "# Bonus:"
 986 |    ]
 987 |   },
 988 |   {
 989 |    "cell_type": "markdown",
 990 |    "metadata": {},
 991 |    "source": [
 992 |     "## pivot_table()"
 993 |    ]
 994 |   },
 995 |   {
 996 |    "cell_type": "code",
 997 |    "execution_count": 182,
 998 |    "metadata": {},
 999 |    "outputs": [],
1000 |    "source": [
1001 |     "# Create a sample dataframe\n",
1002 |     "school = pd.DataFrame({'A': ['Jay', 'Usher', 'Nicky', 'Romero', 'Will'], \n",
1003 |     "      'B': ['Masters', 'Graduate', 'Graduate', 'Masters', 'Graduate'], \n",
1004 |     "      'C': [26, 22, 20, 23, 24]}) "
1005 |    ]
1006 |   },
1007 |   {
1008 |    "cell_type": "code",
1009 |    "execution_count": 181,
1010 |    "metadata": {},
1011 |    "outputs": [
1012 |     {
1013 |      "data": {
1014 |       "text/html": [
1015 |        "<div>\n",
1016 |        "<style scoped>\n",
1017 |        "    .dataframe tbody tr th:only-of-type {\n",
1018 |        "        vertical-align: middle;\n",
1019 |        "    }\n",
1020 |        "\n",
1021 |        "    .dataframe tbody tr th {\n",
1022 |        "        vertical-align: top;\n",
1023 |        "    }\n",
1024 |        "\n",
1025 |        "    .dataframe thead th {\n",
1026 |        "        text-align: right;\n",
1027 |        "    }\n",
1028 |        "</style>\n",
1029 |        "<table border=\"1\" class=\"dataframe\">\n",
1030 |        "  <thead>\n",
1031 |        "    <tr style=\"text-align: right;\">\n",
1032 |        "      <th></th>\n",
1033 |        "      <th>B</th>\n",
1034 |        "      <th>Graduate</th>\n",
1035 |        "      <th>Masters</th>\n",
1036 |        "    </tr>\n",
1037 |        "    <tr>\n",
1038 |        "      <th>B</th>\n",
1039 |        "      <th>C</th>\n",
1040 |        "      <th></th>\n",
1041 |        "      <th></th>\n",
1042 |        "    </tr>\n",
1043 |        "  </thead>\n",
1044 |        "  <tbody>\n",
1045 |        "    <tr>\n",
1046 |        "      <th rowspan=\"3\" valign=\"top\">Graduate</th>\n",
1047 |        "      <th>20</th>\n",
1048 |        "      <td>Nicky</td>\n",
1049 |        "      <td>Not Available</td>\n",
1050 |        "    </tr>\n",
1051 |        "    <tr>\n",
1052 |        "      <th>22</th>\n",
1053 |        "      <td>Usher</td>\n",
1054 |        "      <td>Not Available</td>\n",
1055 |        "    </tr>\n",
1056 |        "    <tr>\n",
1057 |        "      <th>24</th>\n",
1058 |        "      <td>Will</td>\n",
1059 |        "      <td>Not Available</td>\n",
1060 |        "    </tr>\n",
1061 |        "    <tr>\n",
1062 |        "      <th rowspan=\"2\" valign=\"top\">Masters</th>\n",
1063 |        "      <th>23</th>\n",
1064 |        "      <td>Not Available</td>\n",
1065 |        "      <td>Romero</td>\n",
1066 |        "    </tr>\n",
1067 |        "    <tr>\n",
1068 |        "      <th>26</th>\n",
1069 |        "      <td>Not Available</td>\n",
1070 |        "      <td>Jay</td>\n",
1071 |        "    </tr>\n",
1072 |        "  </tbody>\n",
1073 |        "</table>\n",
1074 |        "</div>"
1075 |       ],
1076 |       "text/plain": [
1077 |        "B                 Graduate        Masters\n",
1078 |        "B        C                               \n",
1079 |        "Graduate 20          Nicky  Not Available\n",
1080 |        "         22          Usher  Not Available\n",
1081 |        "         24           Will  Not Available\n",
1082 |        "Masters  23  Not Available         Romero\n",
1083 |        "         26  Not Available            Jay"
1084 |       ]
1085 |      },
1086 |      "execution_count": 181,
1087 |      "metadata": {},
1088 |      "output_type": "execute_result"
1089 |     }
1090 |    ],
1091 |    "source": [
1092 |     "# Lets create a pivot table to segregate students based on age and course\n",
1093 |     "table = pd.pivot_table(school, values ='A', index =['B', 'C'], \n",
1094 |     "                         columns =['B'], aggfunc = np.sum, fill_value=\"Not Available\") \n",
1095 |     "  \n",
1096 |     "table"
1097 |    ]
1098 |   },
1099 |   {
1100 |    "cell_type": "markdown",
1101 |    "metadata": {},
1102 |    "source": [
1103 |     "# Thank yoouuuuuuuu ! "
1104 |    ]
1105 |   }
1106 |  ],
1107 |  "metadata": {
1108 |   "kernelspec": {
1109 |    "display_name": "Python 3",
1110 |    "language": "python",
1111 |    "name": "python3"
1112 |   },
1113 |   "language_info": {
1114 |    "codemirror_mode": {
1115 |     "name": "ipython",
1116 |     "version": 3
1117 |    },
1118 |    "file_extension": ".py",
1119 |    "mimetype": "text/x-python",
1120 |    "name": "python",
1121 |    "nbconvert_exporter": "python",
1122 |    "pygments_lexer": "ipython3",
1123 |    "version": "3.7.3"
1124 |   }
1125 |  },
1126 |  "nbformat": 4,
1127 |  "nbformat_minor": 2
1128 | }
1129 | 


--------------------------------------------------------------------------------