├── README.md ├── 03-datastructures-code.ipynb ├── 01-intro-code.ipynb ├── 02-install-code.ipynb ├── 04-series-code.ipynb ├── 16-dataframe-code.ipynb └── 33-exporting-code.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # effective_pandas_book 2 | Errata and code for Effective Pandas book 3 | 4 | If you are interested in this book, [considering purchasing a copy](https://store.metasnake.com/effective-pandas-book). 5 | 6 | Physical version [available on Amazon](https://www.amazon.com/Effective-Pandas-Patterns-Manipulation-Treading/dp/B09MYXXSFM/ref=sr_1_1?keywords=effective+panda&qid=1639521930&sr=8-1). 7 | -------------------------------------------------------------------------------- /03-datastructures-code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "#. If you had a spreadsheet with data, which pandas data structure would you use" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": { 13 | "lines_to_next_cell": 2 14 | }, 15 | "source": [ 16 | "#. If you had a database with data, which pandas data structure would you use" 17 | ] 18 | } 19 | ], 20 | "metadata": { 21 | "jupytext": { 22 | "cell_metadata_filter": "-all", 23 | "main_language": "python", 24 | "notebook_metadata_filter": "-all" 25 | }, 26 | "kernelspec": { 27 | "display_name": "Python 3", 28 | "language": "python", 29 | "name": "python3" 30 | }, 31 | "language_info": { 32 | "codemirror_mode": { 33 | "name": "ipython", 34 | "version": 3 35 | }, 36 | "file_extension": ".py", 37 | "mimetype": "text/x-python", 38 | "name": "python", 39 | "nbconvert_exporter": "python", 40 | "pygments_lexer": "ipython3", 41 | "version": "3.8.5" 42 | } 43 | }, 44 | "nbformat": 4, 45 | "nbformat_minor": 4 46 | } 47 | -------------------------------------------------------------------------------- /01-intro-code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import seaborn as sns # REMOVELINE\n", 10 | "sns.set_context('paper') # REMOVELINE\n", 11 | "sns.set_style('white') # REMOVELINE" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [] 20 | } 21 | ], 22 | "metadata": { 23 | "jupytext": { 24 | "cell_metadata_filter": "-all", 25 | "main_language": "python", 26 | "notebook_metadata_filter": "-all" 27 | }, 28 | "kernelspec": { 29 | "display_name": "Python 3", 30 | "language": "python", 31 | "name": "python3" 32 | }, 33 | "language_info": { 34 | "codemirror_mode": { 35 | "name": "ipython", 36 | "version": 3 37 | }, 38 | "file_extension": ".py", 39 | "mimetype": "text/x-python", 40 | "name": "python", 41 | "nbconvert_exporter": "python", 42 | "pygments_lexer": "ipython3", 43 | "version": "3.8.5" 44 | } 45 | }, 46 | "nbformat": 4, 47 | "nbformat_minor": 4 48 | } 49 | -------------------------------------------------------------------------------- /02-install-code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "lines_to_next_cell": 2 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas\n", 12 | "pandas.__version__" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": { 19 | "lines_to_next_cell": 2 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "import pandas\n", 24 | "pandas.__version__" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "lines_to_next_cell": 2 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "import pandas\n", 36 | "pandas.__version__" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "#. Install pandas on your machine (using Anaconda or pip)." 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "#. Install Jupyter on your machine." 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": { 56 | "lines_to_next_cell": 2 57 | }, 58 | "source": [ 59 | "#. Launch Jupyter and run the following in a cell::" 60 | ] 61 | } 62 | ], 63 | "metadata": { 64 | "jupytext": { 65 | "cell_metadata_filter": "-all", 66 | "main_language": "python", 67 | "notebook_metadata_filter": "-all" 68 | }, 69 | "kernelspec": { 70 | "display_name": "Python 3", 71 | "language": "python", 72 | "name": "python3" 73 | }, 74 | "language_info": { 75 | "codemirror_mode": { 76 | "name": "ipython", 77 | "version": 3 78 | }, 79 | "file_extension": ".py", 80 | "mimetype": "text/x-python", 81 | "name": "python", 82 | "nbconvert_exporter": "python", 83 | "pygments_lexer": "ipython3", 84 | "version": "3.8.5" 85 | } 86 | }, 87 | "nbformat": 4, 88 | "nbformat_minor": 4 89 | } 90 | -------------------------------------------------------------------------------- /04-series-code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "series = {\n", 10 | " 'index':[0, 1, 2, 3],\n", 11 | " 'data':[145, 142, 38, 13],\n", 12 | " 'name':'songs'\n", 13 | " }" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 2, 19 | "metadata": {}, 20 | "outputs": [], 21 | "source": [ 22 | "def get(series, idx):\n", 23 | " value_idx = series['index'].index(idx)\n", 24 | " return series['data'][value_idx]" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 3, 30 | "metadata": { 31 | "lines_to_next_cell": 2 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "142" 38 | ] 39 | }, 40 | "execution_count": 3, 41 | "metadata": {}, 42 | "output_type": "execute_result" 43 | } 44 | ], 45 | "source": [ 46 | "get(series, 1)" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": { 53 | "lines_to_next_cell": 2 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "songs = {\n", 58 | " 'index':['Paul', 'John', 'George', 'Ringo'],\n", 59 | " 'data':[145, 142, 38, 13],\n", 60 | " 'name':'counts'\n", 61 | " }" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "metadata": { 68 | "lines_to_next_cell": 2 69 | }, 70 | "outputs": [ 71 | { 72 | "data": { 73 | "text/plain": [ 74 | "142" 75 | ] 76 | }, 77 | "execution_count": 5, 78 | "metadata": {}, 79 | "output_type": "execute_result" 80 | } 81 | ], 82 | "source": [ 83 | "get(songs, 'John')" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 6, 89 | "metadata": { 90 | "lines_to_next_cell": 2 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "import pandas as pd\n", 95 | "songs2 = pd.Series([145, 142, 38, 13],\n", 96 | " name='counts')" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 7, 102 | "metadata": { 103 | "lines_to_next_cell": 2 104 | }, 105 | "outputs": [ 106 | { 107 | "data": { 108 | "text/plain": [ 109 | "0 145\n", 110 | "1 142\n", 111 | "2 38\n", 112 | "3 13\n", 113 | "Name: counts, dtype: int64" 114 | ] 115 | }, 116 | "execution_count": 7, 117 | "metadata": {}, 118 | "output_type": "execute_result" 119 | } 120 | ], 121 | "source": [ 122 | "songs2" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 8, 128 | "metadata": { 129 | "lines_to_next_cell": 2 130 | }, 131 | "outputs": [ 132 | { 133 | "data": { 134 | "text/plain": [ 135 | "RangeIndex(start=0, stop=4, step=1)" 136 | ] 137 | }, 138 | "execution_count": 8, 139 | "metadata": {}, 140 | "output_type": "execute_result" 141 | } 142 | ], 143 | "source": [ 144 | "songs2.index" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 9, 150 | "metadata": { 151 | "lines_to_next_cell": 2 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "songs3 = pd.Series([145, 142, 38, 13],\n", 156 | " name='counts',\n", 157 | " index=['Paul', 'John', 'George', 'Ringo'])" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": 10, 163 | "metadata": { 164 | "lines_to_next_cell": 2 165 | }, 166 | "outputs": [ 167 | { 168 | "data": { 169 | "text/plain": [ 170 | "Paul 145\n", 171 | "John 142\n", 172 | "George 38\n", 173 | "Ringo 13\n", 174 | "Name: counts, dtype: int64" 175 | ] 176 | }, 177 | "execution_count": 10, 178 | "metadata": {}, 179 | "output_type": "execute_result" 180 | } 181 | ], 182 | "source": [ 183 | "songs3" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": 11, 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "Index(['Paul', 'John', 'George', 'Ringo'], dtype='object')" 195 | ] 196 | }, 197 | "execution_count": 11, 198 | "metadata": {}, 199 | "output_type": "execute_result" 200 | } 201 | ], 202 | "source": [ 203 | "songs3.index # doctest: +NORMALIZE_WHITESPACE" 204 | ] 205 | }, 206 | { 207 | "cell_type": "code", 208 | "execution_count": 12, 209 | "metadata": {}, 210 | "outputs": [], 211 | "source": [ 212 | "class Foo:\n", 213 | " pass" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 13, 219 | "metadata": { 220 | "lines_to_next_cell": 2 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "ringo = pd.Series(\n", 225 | " ['Richard', 'Starkey', 13, Foo()],\n", 226 | " name='ringo')" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 14, 232 | "metadata": { 233 | "lines_to_next_cell": 2 234 | }, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/plain": [ 239 | "0 Richard\n", 240 | "1 Starkey\n", 241 | "2 13\n", 242 | "3 <__main__.Foo object at 0x7f125dd9b190>\n", 243 | "Name: ringo, dtype: object" 244 | ] 245 | }, 246 | "execution_count": 14, 247 | "metadata": {}, 248 | "output_type": "execute_result" 249 | } 250 | ], 251 | "source": [ 252 | "ringo # doctest: +SKIP +NORMALIZE_WHITESPACE" 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "execution_count": 15, 258 | "metadata": { 259 | "lines_to_next_cell": 2 260 | }, 261 | "outputs": [ 262 | { 263 | "data": { 264 | "text/plain": [ 265 | "Ono 2.0\n", 266 | "Clapton NaN\n", 267 | "dtype: float64" 268 | ] 269 | }, 270 | "execution_count": 15, 271 | "metadata": {}, 272 | "output_type": "execute_result" 273 | } 274 | ], 275 | "source": [ 276 | "import numpy as np\n", 277 | "nan_series = pd.Series([2, np.nan],\n", 278 | " index=['Ono', 'Clapton'])\n", 279 | "nan_series" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "execution_count": 16, 285 | "metadata": { 286 | "lines_to_next_cell": 2 287 | }, 288 | "outputs": [ 289 | { 290 | "data": { 291 | "text/plain": [ 292 | "1" 293 | ] 294 | }, 295 | "execution_count": 16, 296 | "metadata": {}, 297 | "output_type": "execute_result" 298 | } 299 | ], 300 | "source": [ 301 | "nan_series.count()" 302 | ] 303 | }, 304 | { 305 | "cell_type": "code", 306 | "execution_count": 17, 307 | "metadata": { 308 | "lines_to_next_cell": 2 309 | }, 310 | "outputs": [ 311 | { 312 | "data": { 313 | "text/plain": [ 314 | "2" 315 | ] 316 | }, 317 | "execution_count": 17, 318 | "metadata": {}, 319 | "output_type": "execute_result" 320 | } 321 | ], 322 | "source": [ 323 | "nan_series.size" 324 | ] 325 | }, 326 | { 327 | "cell_type": "code", 328 | "execution_count": 18, 329 | "metadata": { 330 | "lines_to_next_cell": 2 331 | }, 332 | "outputs": [ 333 | { 334 | "data": { 335 | "text/plain": [ 336 | "Ono 2\n", 337 | "Clapton \n", 338 | "dtype: Int64" 339 | ] 340 | }, 341 | "execution_count": 18, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "nan_series2 = pd.Series([2, None],\n", 348 | " index=['Ono', 'Clapton'],\n", 349 | " dtype='Int64')\n", 350 | "nan_series2" 351 | ] 352 | }, 353 | { 354 | "cell_type": "code", 355 | "execution_count": 19, 356 | "metadata": { 357 | "lines_to_next_cell": 2 358 | }, 359 | "outputs": [ 360 | { 361 | "data": { 362 | "text/plain": [ 363 | "1" 364 | ] 365 | }, 366 | "execution_count": 19, 367 | "metadata": {}, 368 | "output_type": "execute_result" 369 | } 370 | ], 371 | "source": [ 372 | "nan_series2.count()" 373 | ] 374 | }, 375 | { 376 | "cell_type": "code", 377 | "execution_count": 20, 378 | "metadata": { 379 | "lines_to_next_cell": 2 380 | }, 381 | "outputs": [ 382 | { 383 | "data": { 384 | "text/plain": [ 385 | "Ono 2\n", 386 | "Clapton \n", 387 | "dtype: Int64" 388 | ] 389 | }, 390 | "execution_count": 20, 391 | "metadata": {}, 392 | "output_type": "execute_result" 393 | } 394 | ], 395 | "source": [ 396 | "nan_series.astype('Int64')" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 21, 402 | "metadata": { 403 | "lines_to_next_cell": 2 404 | }, 405 | "outputs": [ 406 | { 407 | "data": { 408 | "text/plain": [ 409 | "142" 410 | ] 411 | }, 412 | "execution_count": 21, 413 | "metadata": {}, 414 | "output_type": "execute_result" 415 | } 416 | ], 417 | "source": [ 418 | "import numpy as np\n", 419 | "numpy_ser = np.array([145, 142, 38, 13])\n", 420 | "songs3[1]" 421 | ] 422 | }, 423 | { 424 | "cell_type": "code", 425 | "execution_count": 22, 426 | "metadata": { 427 | "lines_to_next_cell": 2 428 | }, 429 | "outputs": [ 430 | { 431 | "data": { 432 | "text/plain": [ 433 | "142" 434 | ] 435 | }, 436 | "execution_count": 22, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "numpy_ser[1]" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 23, 448 | "metadata": { 449 | "lines_to_next_cell": 2 450 | }, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/plain": [ 455 | "84.5" 456 | ] 457 | }, 458 | "execution_count": 23, 459 | "metadata": {}, 460 | "output_type": "execute_result" 461 | } 462 | ], 463 | "source": [ 464 | "songs3.mean()" 465 | ] 466 | }, 467 | { 468 | "cell_type": "code", 469 | "execution_count": 24, 470 | "metadata": { 471 | "lines_to_next_cell": 2 472 | }, 473 | "outputs": [ 474 | { 475 | "data": { 476 | "text/plain": [ 477 | "84.5" 478 | ] 479 | }, 480 | "execution_count": 24, 481 | "metadata": {}, 482 | "output_type": "execute_result" 483 | } 484 | ], 485 | "source": [ 486 | "numpy_ser.mean()" 487 | ] 488 | }, 489 | { 490 | "cell_type": "code", 491 | "execution_count": 25, 492 | "metadata": { 493 | "lines_to_next_cell": 2 494 | }, 495 | "outputs": [], 496 | "source": [ 497 | "mask = songs3 > songs3.median() # boolean array" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 26, 503 | "metadata": { 504 | "lines_to_next_cell": 2 505 | }, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "Paul True\n", 511 | "John True\n", 512 | "George False\n", 513 | "Ringo False\n", 514 | "Name: counts, dtype: bool" 515 | ] 516 | }, 517 | "execution_count": 26, 518 | "metadata": {}, 519 | "output_type": "execute_result" 520 | } 521 | ], 522 | "source": [ 523 | "mask" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 27, 529 | "metadata": { 530 | "lines_to_next_cell": 2 531 | }, 532 | "outputs": [ 533 | { 534 | "data": { 535 | "text/plain": [ 536 | "Paul 145\n", 537 | "John 142\n", 538 | "Name: counts, dtype: int64" 539 | ] 540 | }, 541 | "execution_count": 27, 542 | "metadata": {}, 543 | "output_type": "execute_result" 544 | } 545 | ], 546 | "source": [ 547 | "songs3[mask]" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 28, 553 | "metadata": { 554 | "lines_to_next_cell": 2 555 | }, 556 | "outputs": [ 557 | { 558 | "data": { 559 | "text/plain": [ 560 | "array([145, 142])" 561 | ] 562 | }, 563 | "execution_count": 28, 564 | "metadata": {}, 565 | "output_type": "execute_result" 566 | } 567 | ], 568 | "source": [ 569 | "numpy_ser[numpy_ser > np.median(numpy_ser)]" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": 29, 575 | "metadata": { 576 | "lines_to_next_cell": 2 577 | }, 578 | "outputs": [], 579 | "source": [ 580 | "import pandas as pd\n", 581 | "import numpy as np" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": 30, 587 | "metadata": { 588 | "lines_to_next_cell": 2 589 | }, 590 | "outputs": [], 591 | "source": [ 592 | "from pandas import *" 593 | ] 594 | }, 595 | { 596 | "cell_type": "code", 597 | "execution_count": 31, 598 | "metadata": { 599 | "lines_to_next_cell": 2 600 | }, 601 | "outputs": [ 602 | { 603 | "data": { 604 | "text/plain": [ 605 | "0 m\n", 606 | "1 l\n", 607 | "2 xs\n", 608 | "3 s\n", 609 | "4 xl\n", 610 | "dtype: category\n", 611 | "Categories (5, object): ['l', 'm', 's', 'xl', 'xs']" 612 | ] 613 | }, 614 | "execution_count": 31, 615 | "metadata": {}, 616 | "output_type": "execute_result" 617 | } 618 | ], 619 | "source": [ 620 | "s = pd.Series(['m', 'l', 'xs', 's', 'xl'], dtype='category')\n", 621 | "s" 622 | ] 623 | }, 624 | { 625 | "cell_type": "code", 626 | "execution_count": 32, 627 | "metadata": { 628 | "lines_to_next_cell": 2 629 | }, 630 | "outputs": [ 631 | { 632 | "data": { 633 | "text/plain": [ 634 | "False" 635 | ] 636 | }, 637 | "execution_count": 32, 638 | "metadata": {}, 639 | "output_type": "execute_result" 640 | } 641 | ], 642 | "source": [ 643 | "s.cat.ordered" 644 | ] 645 | }, 646 | { 647 | "cell_type": "code", 648 | "execution_count": 33, 649 | "metadata": { 650 | "lines_to_next_cell": 2 651 | }, 652 | "outputs": [], 653 | "source": [ 654 | "s2 = pd.Series(['m', 'l', 'xs', 's', 'xl'])\n", 655 | "size_type = pd.api.types.CategoricalDtype(\n", 656 | " categories=['s','m','l'], ordered=True)\n", 657 | "s3 = s2.astype(size_type)" 658 | ] 659 | }, 660 | { 661 | "cell_type": "code", 662 | "execution_count": 34, 663 | "metadata": { 664 | "lines_to_next_cell": 2 665 | }, 666 | "outputs": [ 667 | { 668 | "data": { 669 | "text/plain": [ 670 | "0 m\n", 671 | "1 l\n", 672 | "2 NaN\n", 673 | "3 s\n", 674 | "4 NaN\n", 675 | "dtype: category\n", 676 | "Categories (3, object): ['s' < 'm' < 'l']" 677 | ] 678 | }, 679 | "execution_count": 34, 680 | "metadata": {}, 681 | "output_type": "execute_result" 682 | } 683 | ], 684 | "source": [ 685 | "s3" 686 | ] 687 | }, 688 | { 689 | "cell_type": "code", 690 | "execution_count": 35, 691 | "metadata": { 692 | "lines_to_next_cell": 2 693 | }, 694 | "outputs": [ 695 | { 696 | "data": { 697 | "text/plain": [ 698 | "0 True\n", 699 | "1 True\n", 700 | "2 False\n", 701 | "3 False\n", 702 | "4 False\n", 703 | "dtype: bool" 704 | ] 705 | }, 706 | "execution_count": 35, 707 | "metadata": {}, 708 | "output_type": "execute_result" 709 | } 710 | ], 711 | "source": [ 712 | "s3 > 's'" 713 | ] 714 | }, 715 | { 716 | "cell_type": "code", 717 | "execution_count": 36, 718 | "metadata": { 719 | "lines_to_next_cell": 2 720 | }, 721 | "outputs": [ 722 | { 723 | "data": { 724 | "text/plain": [ 725 | "0 m\n", 726 | "1 l\n", 727 | "2 xs\n", 728 | "3 s\n", 729 | "4 xl\n", 730 | "dtype: category\n", 731 | "Categories (5, object): ['xs' < 's' < 'm' < 'l' < 'xl']" 732 | ] 733 | }, 734 | "execution_count": 36, 735 | "metadata": {}, 736 | "output_type": "execute_result" 737 | } 738 | ], 739 | "source": [ 740 | "s.cat.reorder_categories(['xs','s','m','l', 'xl'],\n", 741 | " ordered=True)" 742 | ] 743 | }, 744 | { 745 | "cell_type": "code", 746 | "execution_count": 37, 747 | "metadata": { 748 | "lines_to_next_cell": 2, 749 | "scrolled": true 750 | }, 751 | "outputs": [ 752 | { 753 | "data": { 754 | "text/plain": [ 755 | "0 M\n", 756 | "1 L\n", 757 | "2 NaN\n", 758 | "3 S\n", 759 | "4 NaN\n", 760 | "dtype: object" 761 | ] 762 | }, 763 | "execution_count": 37, 764 | "metadata": {}, 765 | "output_type": "execute_result" 766 | } 767 | ], 768 | "source": [ 769 | "s3.str.upper()" 770 | ] 771 | } 772 | ], 773 | "metadata": { 774 | "jupytext": { 775 | "cell_metadata_filter": "-all", 776 | "main_language": "python", 777 | "notebook_metadata_filter": "-all" 778 | }, 779 | "kernelspec": { 780 | "display_name": "Python 3", 781 | "language": "python", 782 | "name": "python3" 783 | }, 784 | "language_info": { 785 | "codemirror_mode": { 786 | "name": "ipython", 787 | "version": 3 788 | }, 789 | "file_extension": ".py", 790 | "mimetype": "text/x-python", 791 | "name": "python", 792 | "nbconvert_exporter": "python", 793 | "pygments_lexer": "ipython3", 794 | "version": "3.8.5" 795 | } 796 | }, 797 | "nbformat": 4, 798 | "nbformat_minor": 4 799 | } 800 | -------------------------------------------------------------------------------- /16-dataframe-code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "df = {\n", 10 | " 'index':[0,1,2],\n", 11 | " 'cols': [\n", 12 | " { 'name':'growth',\n", 13 | " 'data':[.5, .7, 1.2] },\n", 14 | " { 'name':'Name',\n", 15 | " 'data':['Paul', 'George', 'Ringo'] },\n", 16 | " ]\n", 17 | "}" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": {}, 24 | "outputs": [], 25 | "source": [ 26 | "def get_row(df, idx):\n", 27 | " results = []\n", 28 | " value_idx = df['index'].index(idx)\n", 29 | " for col in df['cols']:\n", 30 | " results.append(col['data'][value_idx])\n", 31 | " return results" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 3, 37 | "metadata": {}, 38 | "outputs": [ 39 | { 40 | "data": { 41 | "text/plain": [ 42 | "[0.7, 'George']" 43 | ] 44 | }, 45 | "execution_count": 3, 46 | "metadata": {}, 47 | "output_type": "execute_result" 48 | } 49 | ], 50 | "source": [ 51 | "get_row(df, 1)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": 4, 57 | "metadata": {}, 58 | "outputs": [], 59 | "source": [ 60 | "def get_col(df, name):\n", 61 | " for col in df['cols']:\n", 62 | " if col['name'] == name:\n", 63 | " return col['data']" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 5, 69 | "metadata": { 70 | "lines_to_next_cell": 2 71 | }, 72 | "outputs": [ 73 | { 74 | "data": { 75 | "text/plain": [ 76 | "['Paul', 'George', 'Ringo']" 77 | ] 78 | }, 79 | "execution_count": 5, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "get_col(df, 'Name')" 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": 6, 91 | "metadata": { 92 | "lines_to_next_cell": 2 93 | }, 94 | "outputs": [], 95 | "source": [ 96 | "import pandas as pd\n", 97 | "df = pd.DataFrame({\n", 98 | " 'growth':[.5, .7, 1.2],\n", 99 | " 'Name':['Paul', 'George', 'Ringo'] })" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 7, 105 | "metadata": { 106 | "lines_to_next_cell": 2 107 | }, 108 | "outputs": [ 109 | { 110 | "data": { 111 | "text/html": [ 112 | "
\n", 113 | "\n", 126 | "\n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | "
growthName
00.5Paul
10.7George
21.2Ringo
\n", 152 | "
" 153 | ], 154 | "text/plain": [ 155 | " growth Name\n", 156 | "0 0.5 Paul\n", 157 | "1 0.7 George\n", 158 | "2 1.2 Ringo" 159 | ] 160 | }, 161 | "execution_count": 7, 162 | "metadata": {}, 163 | "output_type": "execute_result" 164 | } 165 | ], 166 | "source": [ 167 | "df" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": 8, 173 | "metadata": { 174 | "lines_to_next_cell": 2 175 | }, 176 | "outputs": [ 177 | { 178 | "data": { 179 | "text/plain": [ 180 | "growth 1.2\n", 181 | "Name Ringo\n", 182 | "Name: 2, dtype: object" 183 | ] 184 | }, 185 | "execution_count": 8, 186 | "metadata": {}, 187 | "output_type": "execute_result" 188 | } 189 | ], 190 | "source": [ 191 | "df.iloc[2]" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 9, 197 | "metadata": { 198 | "lines_to_next_cell": 2 199 | }, 200 | "outputs": [ 201 | { 202 | "data": { 203 | "text/plain": [ 204 | "0 Paul\n", 205 | "1 George\n", 206 | "2 Ringo\n", 207 | "Name: Name, dtype: object" 208 | ] 209 | }, 210 | "execution_count": 9, 211 | "metadata": {}, 212 | "output_type": "execute_result" 213 | } 214 | ], 215 | "source": [ 216 | "df['Name']" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 10, 222 | "metadata": { 223 | "lines_to_next_cell": 2 224 | }, 225 | "outputs": [ 226 | { 227 | "data": { 228 | "text/plain": [ 229 | "pandas.core.series.Series" 230 | ] 231 | }, 232 | "execution_count": 10, 233 | "metadata": {}, 234 | "output_type": "execute_result" 235 | } 236 | ], 237 | "source": [ 238 | "type(df['Name'])" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": 11, 244 | "metadata": { 245 | "lines_to_next_cell": 2 246 | }, 247 | "outputs": [ 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "0 paul\n", 252 | "1 george\n", 253 | "2 ringo\n", 254 | "Name: Name, dtype: object" 255 | ] 256 | }, 257 | "execution_count": 11, 258 | "metadata": {}, 259 | "output_type": "execute_result" 260 | } 261 | ], 262 | "source": [ 263 | "df['Name'].str.lower()" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": 12, 269 | "metadata": { 270 | "lines_to_next_cell": 2 271 | }, 272 | "outputs": [ 273 | { 274 | "data": { 275 | "text/plain": [ 276 | "0 Paul\n", 277 | "1 George\n", 278 | "2 Ringo\n", 279 | "Name: Name, dtype: object" 280 | ] 281 | }, 282 | "execution_count": 12, 283 | "metadata": {}, 284 | "output_type": "execute_result" 285 | } 286 | ], 287 | "source": [ 288 | "df.Name" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": 13, 294 | "metadata": { 295 | "lines_to_next_cell": 2 296 | }, 297 | "outputs": [ 298 | { 299 | "data": { 300 | "text/html": [ 301 | "
\n", 302 | "\n", 315 | "\n", 316 | " \n", 317 | " \n", 318 | " \n", 319 | " \n", 320 | " \n", 321 | " \n", 322 | " \n", 323 | " \n", 324 | " \n", 325 | " \n", 326 | " \n", 327 | " \n", 328 | " \n", 329 | " \n", 330 | " \n", 331 | " \n", 332 | " \n", 333 | " \n", 334 | " \n", 335 | " \n", 336 | " \n", 337 | " \n", 338 | " \n", 339 | " \n", 340 | "
growthName
00.5Paul
10.7George
21.2Ringo
\n", 341 | "
" 342 | ], 343 | "text/plain": [ 344 | " growth Name\n", 345 | "0 0.5 Paul\n", 346 | "1 0.7 George\n", 347 | "2 1.2 Ringo" 348 | ] 349 | }, 350 | "execution_count": 13, 351 | "metadata": {}, 352 | "output_type": "execute_result" 353 | } 354 | ], 355 | "source": [ 356 | "pd.DataFrame([\n", 357 | " {'growth':.5, 'Name':'Paul'},\n", 358 | " {'growth':.7, 'Name':'George'},\n", 359 | " {'growth':1.2, 'Name':'Ringo'}])" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": 14, 365 | "metadata": { 366 | "lines_to_next_cell": 2 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "from io import StringIO\n", 371 | "csv_file = StringIO(\"\"\"growth,Name\n", 372 | ".5,Paul\n", 373 | ".7,George\n", 374 | "1.2,Ringo\"\"\")" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": 15, 380 | "metadata": { 381 | "lines_to_next_cell": 2 382 | }, 383 | "outputs": [ 384 | { 385 | "data": { 386 | "text/html": [ 387 | "
\n", 388 | "\n", 401 | "\n", 402 | " \n", 403 | " \n", 404 | " \n", 405 | " \n", 406 | " \n", 407 | " \n", 408 | " \n", 409 | " \n", 410 | " \n", 411 | " \n", 412 | " \n", 413 | " \n", 414 | " \n", 415 | " \n", 416 | " \n", 417 | " \n", 418 | " \n", 419 | " \n", 420 | " \n", 421 | " \n", 422 | " \n", 423 | " \n", 424 | " \n", 425 | " \n", 426 | "
growthName
00.5Paul
10.7George
21.2Ringo
\n", 427 | "
" 428 | ], 429 | "text/plain": [ 430 | " growth Name\n", 431 | "0 0.5 Paul\n", 432 | "1 0.7 George\n", 433 | "2 1.2 Ringo" 434 | ] 435 | }, 436 | "execution_count": 15, 437 | "metadata": {}, 438 | "output_type": "execute_result" 439 | } 440 | ], 441 | "source": [ 442 | "pd.read_csv(csv_file)" 443 | ] 444 | }, 445 | { 446 | "cell_type": "code", 447 | "execution_count": 16, 448 | "metadata": { 449 | "lines_to_next_cell": 2 450 | }, 451 | "outputs": [ 452 | { 453 | "data": { 454 | "text/html": [ 455 | "
\n", 456 | "\n", 469 | "\n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | "
abc
00.496714-0.1382640.647689
11.523030-0.234153-0.234137
21.5792130.767435-0.469474
30.542560-0.463418-0.465730
40.241962-1.913280-1.724918
5-0.562288-1.0128310.314247
6-0.908024-1.4123041.465649
7-0.2257760.067528-1.424748
8-0.5443830.110923-1.150994
90.375698-0.600639-0.291694
\n", 541 | "
" 542 | ], 543 | "text/plain": [ 544 | " a b c\n", 545 | "0 0.496714 -0.138264 0.647689\n", 546 | "1 1.523030 -0.234153 -0.234137\n", 547 | "2 1.579213 0.767435 -0.469474\n", 548 | "3 0.542560 -0.463418 -0.465730\n", 549 | "4 0.241962 -1.913280 -1.724918\n", 550 | "5 -0.562288 -1.012831 0.314247\n", 551 | "6 -0.908024 -1.412304 1.465649\n", 552 | "7 -0.225776 0.067528 -1.424748\n", 553 | "8 -0.544383 0.110923 -1.150994\n", 554 | "9 0.375698 -0.600639 -0.291694" 555 | ] 556 | }, 557 | "execution_count": 16, 558 | "metadata": {}, 559 | "output_type": "execute_result" 560 | } 561 | ], 562 | "source": [ 563 | "import numpy as np\n", 564 | "np.random.seed(42)\n", 565 | "pd.DataFrame(np.random.randn(10,3),\n", 566 | " columns=['a', 'b', 'c'])" 567 | ] 568 | }, 569 | { 570 | "cell_type": "code", 571 | "execution_count": 17, 572 | "metadata": { 573 | "lines_to_next_cell": 2 574 | }, 575 | "outputs": [ 576 | { 577 | "data": { 578 | "text/plain": [ 579 | "[RangeIndex(start=0, stop=3, step=1),\n", 580 | " Index(['growth', 'Name'], dtype='object')]" 581 | ] 582 | }, 583 | "execution_count": 17, 584 | "metadata": {}, 585 | "output_type": "execute_result" 586 | } 587 | ], 588 | "source": [ 589 | "df.axes # doctest: +NORMALIZE_WHITESPACE" 590 | ] 591 | }, 592 | { 593 | "cell_type": "code", 594 | "execution_count": 18, 595 | "metadata": { 596 | "lines_to_next_cell": 2 597 | }, 598 | "outputs": [ 599 | { 600 | "data": { 601 | "text/plain": [ 602 | "growth 2.4\n", 603 | "Name PaulGeorgeRingo\n", 604 | "dtype: object" 605 | ] 606 | }, 607 | "execution_count": 18, 608 | "metadata": {}, 609 | "output_type": "execute_result" 610 | } 611 | ], 612 | "source": [ 613 | "df.sum(axis=0)" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": 19, 619 | "metadata": { 620 | "lines_to_next_cell": 2 621 | }, 622 | "outputs": [ 623 | { 624 | "name": "stderr", 625 | "output_type": "stream", 626 | "text": [ 627 | ":1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n", 628 | " df.sum(axis=1)\n" 629 | ] 630 | }, 631 | { 632 | "data": { 633 | "text/plain": [ 634 | "0 0.5\n", 635 | "1 0.7\n", 636 | "2 1.2\n", 637 | "dtype: float64" 638 | ] 639 | }, 640 | "execution_count": 19, 641 | "metadata": {}, 642 | "output_type": "execute_result" 643 | } 644 | ], 645 | "source": [ 646 | "df.sum(axis=1)" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 20, 652 | "metadata": { 653 | "lines_to_next_cell": 2 654 | }, 655 | "outputs": [ 656 | { 657 | "data": { 658 | "text/plain": [ 659 | "growth 2.4\n", 660 | "Name PaulGeorgeRingo\n", 661 | "dtype: object" 662 | ] 663 | }, 664 | "execution_count": 20, 665 | "metadata": {}, 666 | "output_type": "execute_result" 667 | } 668 | ], 669 | "source": [ 670 | "df.sum(axis='index')" 671 | ] 672 | }, 673 | { 674 | "cell_type": "code", 675 | "execution_count": 21, 676 | "metadata": { 677 | "lines_to_next_cell": 2 678 | }, 679 | "outputs": [ 680 | { 681 | "name": "stderr", 682 | "output_type": "stream", 683 | "text": [ 684 | ":1: FutureWarning: Dropping of nuisance columns in DataFrame reductions (with 'numeric_only=None') is deprecated; in a future version this will raise TypeError. Select only valid columns before calling the reduction.\n", 685 | " df.sum(axis='columns')\n" 686 | ] 687 | }, 688 | { 689 | "data": { 690 | "text/plain": [ 691 | "0 0.5\n", 692 | "1 0.7\n", 693 | "2 1.2\n", 694 | "dtype: float64" 695 | ] 696 | }, 697 | "execution_count": 21, 698 | "metadata": {}, 699 | "output_type": "execute_result" 700 | } 701 | ], 702 | "source": [ 703 | "df.sum(axis='columns')" 704 | ] 705 | }, 706 | { 707 | "cell_type": "code", 708 | "execution_count": 22, 709 | "metadata": { 710 | "lines_to_next_cell": 2 711 | }, 712 | "outputs": [ 713 | { 714 | "data": { 715 | "text/plain": [ 716 | "RangeIndex(start=0, stop=3, step=1)" 717 | ] 718 | }, 719 | "execution_count": 22, 720 | "metadata": {}, 721 | "output_type": "execute_result" 722 | } 723 | ], 724 | "source": [ 725 | "df.axes[0]" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": 23, 731 | "metadata": { 732 | "lines_to_next_cell": 2 733 | }, 734 | "outputs": [ 735 | { 736 | "data": { 737 | "text/plain": [ 738 | "Index(['growth', 'Name'], dtype='object')" 739 | ] 740 | }, 741 | "execution_count": 23, 742 | "metadata": {}, 743 | "output_type": "execute_result" 744 | } 745 | ], 746 | "source": [ 747 | "df.axes[1]" 748 | ] 749 | }, 750 | { 751 | "cell_type": "code", 752 | "execution_count": 24, 753 | "metadata": { 754 | "lines_to_next_cell": 2 755 | }, 756 | "outputs": [ 757 | { 758 | "data": { 759 | "text/html": [ 760 | "
\n", 761 | "\n", 774 | "\n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | "
Score1Score2
0None85
1None90
\n", 795 | "
" 796 | ], 797 | "text/plain": [ 798 | " Score1 Score2\n", 799 | "0 None 85\n", 800 | "1 None 90" 801 | ] 802 | }, 803 | "execution_count": 24, 804 | "metadata": {}, 805 | "output_type": "execute_result" 806 | } 807 | ], 808 | "source": [ 809 | "df = pd.DataFrame({'Score1': [None, None],\n", 810 | " 'Score2': [85, 90]})\n", 811 | "df" 812 | ] 813 | }, 814 | { 815 | "cell_type": "code", 816 | "execution_count": 25, 817 | "metadata": { 818 | "lines_to_next_cell": 2 819 | }, 820 | "outputs": [ 821 | { 822 | "data": { 823 | "text/plain": [ 824 | "Score1 0\n", 825 | "Score2 175\n", 826 | "dtype: int64" 827 | ] 828 | }, 829 | "execution_count": 25, 830 | "metadata": {}, 831 | "output_type": "execute_result" 832 | } 833 | ], 834 | "source": [ 835 | "df.apply(np.sum, axis=0)" 836 | ] 837 | }, 838 | { 839 | "cell_type": "code", 840 | "execution_count": 26, 841 | "metadata": { 842 | "lines_to_next_cell": 2 843 | }, 844 | "outputs": [ 845 | { 846 | "data": { 847 | "text/plain": [ 848 | "0 85\n", 849 | "1 90\n", 850 | "dtype: int64" 851 | ] 852 | }, 853 | "execution_count": 26, 854 | "metadata": {}, 855 | "output_type": "execute_result" 856 | } 857 | ], 858 | "source": [ 859 | "df.apply(np.sum, axis=1)" 860 | ] 861 | } 862 | ], 863 | "metadata": { 864 | "jupytext": { 865 | "cell_metadata_filter": "-all", 866 | "main_language": "python", 867 | "notebook_metadata_filter": "-all" 868 | }, 869 | "kernelspec": { 870 | "display_name": "Python 3", 871 | "language": "python", 872 | "name": "python3" 873 | }, 874 | "language_info": { 875 | "codemirror_mode": { 876 | "name": "ipython", 877 | "version": 3 878 | }, 879 | "file_extension": ".py", 880 | "mimetype": "text/x-python", 881 | "name": "python", 882 | "nbconvert_exporter": "python", 883 | "pygments_lexer": "ipython3", 884 | "version": "3.8.5" 885 | } 886 | }, 887 | "nbformat": 4, 888 | "nbformat_minor": 4 889 | } 890 | -------------------------------------------------------------------------------- /33-exporting-code.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [ 8 | { 9 | "name": "stderr", 10 | "output_type": "stream", 11 | "text": [ 12 | "/home/matt/envs/menv/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3418: DtypeWarning: Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.\n", 13 | " exec(code_obj, self.user_global_ns, self.user_ns)\n" 14 | ] 15 | } 16 | ], 17 | "source": [ 18 | "import pandas as pd\n", 19 | "url = 'https://github.com/mattharrison/datasets/raw/master'\\\n", 20 | " '/data/dirtydevil.txt'\n", 21 | "df = pd.read_csv(url, skiprows=lambda num: num <34 or num == 35,\n", 22 | " sep='\\t')\n", 23 | "def to_denver_time(df_, time_col, tz_col):\n", 24 | " return (df_\n", 25 | " .assign(**{tz_col: df_[tz_col].replace('MDT', 'MST7MDT')})\n", 26 | " .groupby(tz_col)\n", 27 | " [time_col]\n", 28 | " .transform(lambda s: pd.to_datetime(s)\n", 29 | " .dt.tz_localize(s.name, ambiguous=True)\n", 30 | " .dt.tz_convert('America/Denver'))\n", 31 | " )" 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "execution_count": 2, 37 | "metadata": {}, 38 | "outputs": [], 39 | "source": [ 40 | "def tweak_river(df_):\n", 41 | " return (df_\n", 42 | " .assign(datetime=to_denver_time(df_, 'datetime', 'tz_cd'))\n", 43 | " .rename(columns={'144166_00060': 'cfs',\n", 44 | " '144167_00065': 'gage_height'})\n", 45 | " .set_index('datetime')\n", 46 | " )" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 3, 52 | "metadata": { 53 | "lines_to_next_cell": 2 54 | }, 55 | "outputs": [ 56 | { 57 | "data": { 58 | "text/html": [ 59 | "
\n", 60 | "\n", 73 | "\n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | "
agency_cdsite_notz_cdcfs144166_00060_cdgage_height144167_00065_cd
datetime
2001-05-07 01:00:00-06:00USGS9333500MDT71.00A:[91]NaNNaN
2001-05-07 01:15:00-06:00USGS9333500MDT71.00A:[91]NaNNaN
2001-05-07 01:30:00-06:00USGS9333500MDT71.00A:[91]NaNNaN
2001-05-07 01:45:00-06:00USGS9333500MDT70.00A:[91]NaNNaN
2001-05-07 02:00:00-06:00USGS9333500MDT70.00A:[91]NaNNaN
........................
2020-09-28 08:30:00-06:00USGS9333500MDT9.53P6.16P
2020-09-28 08:45:00-06:00USGS9333500MDT9.20P6.15P
2020-09-28 09:00:00-06:00USGS9333500MDT9.20P6.15P
2020-09-28 09:15:00-06:00USGS9333500MDT9.20P6.15P
2020-09-28 09:30:00-06:00USGS9333500MDT9.20P6.15P
\n", 209 | "

539305 rows × 7 columns

\n", 210 | "
" 211 | ], 212 | "text/plain": [ 213 | " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", 214 | "datetime \n", 215 | "2001-05-07 01:00:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", 216 | "2001-05-07 01:15:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", 217 | "2001-05-07 01:30:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", 218 | "2001-05-07 01:45:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", 219 | "2001-05-07 02:00:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", 220 | "... ... ... ... ... ... \n", 221 | "2020-09-28 08:30:00-06:00 USGS 9333500 MDT 9.53 P \n", 222 | "2020-09-28 08:45:00-06:00 USGS 9333500 MDT 9.20 P \n", 223 | "2020-09-28 09:00:00-06:00 USGS 9333500 MDT 9.20 P \n", 224 | "2020-09-28 09:15:00-06:00 USGS 9333500 MDT 9.20 P \n", 225 | "2020-09-28 09:30:00-06:00 USGS 9333500 MDT 9.20 P \n", 226 | "\n", 227 | " gage_height 144167_00065_cd \n", 228 | "datetime \n", 229 | "2001-05-07 01:00:00-06:00 NaN NaN \n", 230 | "2001-05-07 01:15:00-06:00 NaN NaN \n", 231 | "2001-05-07 01:30:00-06:00 NaN NaN \n", 232 | "2001-05-07 01:45:00-06:00 NaN NaN \n", 233 | "2001-05-07 02:00:00-06:00 NaN NaN \n", 234 | "... ... ... \n", 235 | "2020-09-28 08:30:00-06:00 6.16 P \n", 236 | "2020-09-28 08:45:00-06:00 6.15 P \n", 237 | "2020-09-28 09:00:00-06:00 6.15 P \n", 238 | "2020-09-28 09:15:00-06:00 6.15 P \n", 239 | "2020-09-28 09:30:00-06:00 6.15 P \n", 240 | "\n", 241 | "[539305 rows x 7 columns]" 242 | ] 243 | }, 244 | "execution_count": 3, 245 | "metadata": {}, 246 | "output_type": "execute_result" 247 | } 248 | ], 249 | "source": [ 250 | "dd = tweak_river(df)\n", 251 | "dd" 252 | ] 253 | }, 254 | { 255 | "cell_type": "code", 256 | "execution_count": 4, 257 | "metadata": { 258 | "lines_to_next_cell": 2 259 | }, 260 | "outputs": [], 261 | "source": [ 262 | "dd.to_csv('/tmp/dd.csv') # doctest: +SKIP" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 5, 268 | "metadata": { 269 | "lines_to_next_cell": 2 270 | }, 271 | "outputs": [ 272 | { 273 | "name": "stdout", 274 | "output_type": "stream", 275 | "text": [ 276 | "datetime,agency_cd,site_no,tz_cd,cfs,144166_00060_cd,gage_height,144167_00065_cd\n", 277 | "2001-05-07 01:00:00-06:00,USGS,9333500,MDT,71.0,A:[91],,\n", 278 | "2001-05-07 01:15:00-06:00,USGS,9333500,MDT,71.0,A:[91],,\n", 279 | "2001-05-07 01:30:00-06:00,USGS,9333500,MDT,71.0,A:[91],,\n", 280 | "2001-05-07 01:45:00-06:00,USGS,9333500,MDT,70.0,A:[91],,\n", 281 | "2001-05-07 02:00:00-06:00,USGS,9333500,MDT,70.0,A:[91],,\n", 282 | "\n" 283 | ] 284 | } 285 | ], 286 | "source": [ 287 | "print(dd.head(5).to_csv())" 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": 6, 293 | "metadata": { 294 | "lines_to_next_cell": 2 295 | }, 296 | "outputs": [ 297 | { 298 | "name": "stderr", 299 | "output_type": "stream", 300 | "text": [ 301 | "/home/matt/envs/menv/lib/python3.8/site-packages/IPython/core/interactiveshell.py:3418: DtypeWarning: Columns (7) have mixed types.Specify dtype option on import or set low_memory=False.\n", 302 | " exec(code_obj, self.user_global_ns, self.user_ns)\n" 303 | ] 304 | } 305 | ], 306 | "source": [ 307 | "dd2 = pd.read_csv('/tmp/dd.csv', index_col='datetime')" 308 | ] 309 | }, 310 | { 311 | "cell_type": "code", 312 | "execution_count": 7, 313 | "metadata": { 314 | "lines_to_next_cell": 2 315 | }, 316 | "outputs": [ 317 | { 318 | "ename": "ValueError", 319 | "evalue": "Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel.", 320 | "output_type": "error", 321 | "traceback": [ 322 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 323 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 324 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_excel\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/tmp/dd.xlsx'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 325 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/core/generic.py\u001b[0m in \u001b[0;36mto_excel\u001b[0;34m(self, excel_writer, sheet_name, na_rep, float_format, columns, header, index, index_label, startrow, startcol, engine, merge_cells, encoding, inf_rep, verbose, freeze_panes, storage_options)\u001b[0m\n\u001b[1;32m 2282\u001b[0m \u001b[0minf_rep\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minf_rep\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2283\u001b[0m )\n\u001b[0;32m-> 2284\u001b[0;31m formatter.write(\n\u001b[0m\u001b[1;32m 2285\u001b[0m \u001b[0mexcel_writer\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2286\u001b[0m \u001b[0msheet_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msheet_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 326 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/formats/excel.py\u001b[0m in \u001b[0;36mwrite\u001b[0;34m(self, writer, sheet_name, startrow, startcol, freeze_panes, engine, storage_options)\u001b[0m\n\u001b[1;32m 838\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 839\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 840\u001b[0;31m writer.write_cells(\n\u001b[0m\u001b[1;32m 841\u001b[0m \u001b[0mformatted_cells\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 842\u001b[0m \u001b[0msheet_name\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 327 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/excel/_xlsxwriter.py\u001b[0m in \u001b[0;36mwrite_cells\u001b[0;34m(self, cells, sheet_name, startrow, startcol, freeze_panes)\u001b[0m\n\u001b[1;32m 225\u001b[0m \u001b[0mwks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfreeze_panes\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfreeze_panes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 226\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 227\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mcell\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcells\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 228\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfmt\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_value_with_fmt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 328 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/formats/excel.py\u001b[0m in \u001b[0;36mget_formatted_cells\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 776\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_formatted_cells\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mIterable\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mExcelCell\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 777\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mcell\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mitertools\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_header\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_body\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 778\u001b[0;31m \u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_format_value\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcell\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 779\u001b[0m \u001b[0;32myield\u001b[0m \u001b[0mcell\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 780\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", 329 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/formats/excel.py\u001b[0m in \u001b[0;36m_format_value\u001b[0;34m(self, val)\u001b[0m\n\u001b[1;32m 524\u001b[0m \u001b[0mval\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfloat_format\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0mval\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 525\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mval\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"tzinfo\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 526\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 527\u001b[0m \u001b[0;34m\"Excel does not support datetimes with \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 528\u001b[0m \u001b[0;34m\"timezones. Please ensure that datetimes \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 330 | "\u001b[0;31mValueError\u001b[0m: Excel does not support datetimes with timezones. Please ensure that datetimes are timezone unaware before writing to Excel." 331 | ] 332 | } 333 | ], 334 | "source": [ 335 | "dd.to_excel('/tmp/dd.xlsx')" 336 | ] 337 | }, 338 | { 339 | "cell_type": "code", 340 | "execution_count": 8, 341 | "metadata": { 342 | "lines_to_next_cell": 2 343 | }, 344 | "outputs": [], 345 | "source": [ 346 | "(dd # doctest: +SKIP\n", 347 | " .reset_index()\n", 348 | " .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))\n", 349 | " .set_index('datetime')\n", 350 | " .to_excel('/tmp/dd.xlsx')\n", 351 | ")" 352 | ] 353 | }, 354 | { 355 | "cell_type": "code", 356 | "execution_count": 9, 357 | "metadata": { 358 | "lines_to_next_cell": 2 359 | }, 360 | "outputs": [], 361 | "source": [ 362 | "writer = pd.ExcelWriter('/tmp/dd2.xlsx')\n", 363 | "dd2 = (dd \n", 364 | " .reset_index()\n", 365 | " .assign(datetime=lambda df_: df_.datetime.dt.tz_convert(tz=None))\n", 366 | " .set_index('datetime')\n", 367 | ")\n", 368 | "(dd2 # doctest: +SKIP\n", 369 | " .loc['2010':'2010-12-31']\n", 370 | " .to_excel(writer, sheet_name='2010')\n", 371 | ")\n", 372 | "(dd2 # doctest: +SKIP\n", 373 | " .loc['2011':'2011-12-31'] \n", 374 | " .to_excel(writer, sheet_name='2011')\n", 375 | ")\n", 376 | "writer.save() # doctest: +SKIP" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 10, 382 | "metadata": { 383 | "lines_to_next_cell": 2, 384 | "scrolled": true 385 | }, 386 | "outputs": [ 387 | { 388 | "ename": "ValueError", 389 | "evalue": "feather does not support serializing for the index; you can .reset_index() to make the index into column(s)", 390 | "output_type": "error", 391 | "traceback": [ 392 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 393 | "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", 394 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mdd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_feather\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'/tmp/dd.fea'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 395 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/util/_decorators.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mnew_arg_name\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnew_arg_value\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 207\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 208\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mF\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mwrapper\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 396 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mto_feather\u001b[0;34m(self, path, **kwargs)\u001b[0m\n\u001b[1;32m 2517\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mpandas\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mio\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfeather_format\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mto_feather\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2518\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 2519\u001b[0;31m \u001b[0mto_feather\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2520\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2521\u001b[0m @doc(\n", 397 | "\u001b[0;32m~/envs/menv/lib/python3.8/site-packages/pandas/io/feather_format.py\u001b[0m in \u001b[0;36mto_feather\u001b[0;34m(df, path, storage_options, **kwargs)\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mInt64Index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mRangeIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 60\u001b[0m \u001b[0mtyp\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdf\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mindex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 61\u001b[0;31m raise ValueError(\n\u001b[0m\u001b[1;32m 62\u001b[0m \u001b[0;34mf\"feather does not support serializing {typ} \"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 63\u001b[0m \u001b[0;34m\"for the index; you can .reset_index() to make the index into column(s)\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", 398 | "\u001b[0;31mValueError\u001b[0m: feather does not support serializing for the index; you can .reset_index() to make the index into column(s)" 399 | ] 400 | } 401 | ], 402 | "source": [ 403 | "dd.to_feather('/tmp/dd.fea')" 404 | ] 405 | }, 406 | { 407 | "cell_type": "code", 408 | "execution_count": 11, 409 | "metadata": { 410 | "lines_to_next_cell": 2 411 | }, 412 | "outputs": [], 413 | "source": [ 414 | "(dd\n", 415 | " .reset_index()\n", 416 | " .to_feather('/tmp/dd.fea')\n", 417 | ")" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": 12, 423 | "metadata": { 424 | "lines_to_next_cell": 2 425 | }, 426 | "outputs": [ 427 | { 428 | "data": { 429 | "text/plain": [ 430 | "True" 431 | ] 432 | }, 433 | "execution_count": 12, 434 | "metadata": {}, 435 | "output_type": "execute_result" 436 | } 437 | ], 438 | "source": [ 439 | "dd2 = pd.read_feather('/tmp/dd.fea')\n", 440 | "dd2.set_index('datetime').equals(dd)" 441 | ] 442 | }, 443 | { 444 | "cell_type": "code", 445 | "execution_count": 13, 446 | "metadata": { 447 | "lines_to_next_cell": 2 448 | }, 449 | "outputs": [], 450 | "source": [ 451 | "import sqlite3\n", 452 | "con = sqlite3.connect('dd.db')\n", 453 | "dd.to_sql('dd', con, if_exists='replace') # doctest: +SKIP" 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": 14, 459 | "metadata": { 460 | "lines_to_next_cell": 2 461 | }, 462 | "outputs": [ 463 | { 464 | "data": { 465 | "text/plain": [ 466 | "False" 467 | ] 468 | }, 469 | "execution_count": 14, 470 | "metadata": {}, 471 | "output_type": "execute_result" 472 | } 473 | ], 474 | "source": [ 475 | "import sqlalchemy as sa\n", 476 | "eng = sa.create_engine('sqlite:///dd.db')\n", 477 | "sa_con = eng.connect()\n", 478 | "dd2 = pd.read_sql('dd', sa_con, index_col='datetime') # doctest: +SKIP\n", 479 | "dd2.equals(dd) # doctest: +SKIP" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": 15, 485 | "metadata": { 486 | "lines_to_next_cell": 2 487 | }, 488 | "outputs": [ 489 | { 490 | "data": { 491 | "text/html": [ 492 | "
\n", 493 | "\n", 506 | "\n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | " \n", 573 | " \n", 574 | " \n", 575 | " \n", 576 | " \n", 577 | " \n", 578 | " \n", 579 | " \n", 580 | " \n", 581 | " \n", 582 | " \n", 583 | " \n", 584 | " \n", 585 | " \n", 586 | " \n", 587 | " \n", 588 | " \n", 589 | " \n", 590 | " \n", 591 | " \n", 592 | " \n", 593 | " \n", 594 | " \n", 595 | " \n", 596 | " \n", 597 | " \n", 598 | " \n", 599 | " \n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | " \n", 629 | " \n", 630 | " \n", 631 | " \n", 632 | " \n", 633 | " \n", 634 | " \n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | "
agency_cdsite_notz_cdcfs144166_00060_cdgage_height144167_00065_cd
datetime
2001-05-07 01:00:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 01:15:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 01:30:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 01:45:00USGS9333500MDT70.00A:[91]NaNNone
2001-05-07 02:00:00USGS9333500MDT70.00A:[91]NaNNone
........................
2020-09-28 08:30:00USGS9333500MDT9.53P6.16P
2020-09-28 08:45:00USGS9333500MDT9.20P6.15P
2020-09-28 09:00:00USGS9333500MDT9.20P6.15P
2020-09-28 09:15:00USGS9333500MDT9.20P6.15P
2020-09-28 09:30:00USGS9333500MDT9.20P6.15P
\n", 642 | "

539305 rows × 7 columns

\n", 643 | "
" 644 | ], 645 | "text/plain": [ 646 | " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", 647 | "datetime \n", 648 | "2001-05-07 01:00:00 USGS 9333500 MDT 71.00 A:[91] \n", 649 | "2001-05-07 01:15:00 USGS 9333500 MDT 71.00 A:[91] \n", 650 | "2001-05-07 01:30:00 USGS 9333500 MDT 71.00 A:[91] \n", 651 | "2001-05-07 01:45:00 USGS 9333500 MDT 70.00 A:[91] \n", 652 | "2001-05-07 02:00:00 USGS 9333500 MDT 70.00 A:[91] \n", 653 | "... ... ... ... ... ... \n", 654 | "2020-09-28 08:30:00 USGS 9333500 MDT 9.53 P \n", 655 | "2020-09-28 08:45:00 USGS 9333500 MDT 9.20 P \n", 656 | "2020-09-28 09:00:00 USGS 9333500 MDT 9.20 P \n", 657 | "2020-09-28 09:15:00 USGS 9333500 MDT 9.20 P \n", 658 | "2020-09-28 09:30:00 USGS 9333500 MDT 9.20 P \n", 659 | "\n", 660 | " gage_height 144167_00065_cd \n", 661 | "datetime \n", 662 | "2001-05-07 01:00:00 NaN None \n", 663 | "2001-05-07 01:15:00 NaN None \n", 664 | "2001-05-07 01:30:00 NaN None \n", 665 | "2001-05-07 01:45:00 NaN None \n", 666 | "2001-05-07 02:00:00 NaN None \n", 667 | "... ... ... \n", 668 | "2020-09-28 08:30:00 6.16 P \n", 669 | "2020-09-28 08:45:00 6.15 P \n", 670 | "2020-09-28 09:00:00 6.15 P \n", 671 | "2020-09-28 09:15:00 6.15 P \n", 672 | "2020-09-28 09:30:00 6.15 P \n", 673 | "\n", 674 | "[539305 rows x 7 columns]" 675 | ] 676 | }, 677 | "execution_count": 15, 678 | "metadata": {}, 679 | "output_type": "execute_result" 680 | } 681 | ], 682 | "source": [ 683 | "dd2" 684 | ] 685 | }, 686 | { 687 | "cell_type": "code", 688 | "execution_count": 16, 689 | "metadata": { 690 | "lines_to_next_cell": 2 691 | }, 692 | "outputs": [ 693 | { 694 | "data": { 695 | "text/plain": [ 696 | "False" 697 | ] 698 | }, 699 | "execution_count": 16, 700 | "metadata": {}, 701 | "output_type": "execute_result" 702 | } 703 | ], 704 | "source": [ 705 | "(dd2 # doctest: +SKIP\n", 706 | " .reset_index()\n", 707 | " .assign(datetime=lambda df_: df_.datetime\n", 708 | " .dt.tz_localize('America/Denver', ambiguous=False))\n", 709 | " .set_index('datetime')\n", 710 | " .equals(dd)\n", 711 | ")" 712 | ] 713 | }, 714 | { 715 | "cell_type": "code", 716 | "execution_count": 17, 717 | "metadata": { 718 | "lines_to_next_cell": 2 719 | }, 720 | "outputs": [], 721 | "source": [ 722 | "obj = dd.to_dict()" 723 | ] 724 | }, 725 | { 726 | "cell_type": "code", 727 | "execution_count": 18, 728 | "metadata": { 729 | "lines_to_next_cell": 2 730 | }, 731 | "outputs": [ 732 | { 733 | "data": { 734 | "text/plain": [ 735 | "True" 736 | ] 737 | }, 738 | "execution_count": 18, 739 | "metadata": {}, 740 | "output_type": "execute_result" 741 | } 742 | ], 743 | "source": [ 744 | "dd2 = pd.DataFrame.from_dict(obj)\n", 745 | "dd.equals(dd2)" 746 | ] 747 | }, 748 | { 749 | "cell_type": "code", 750 | "execution_count": 19, 751 | "metadata": { 752 | "lines_to_next_cell": 2 753 | }, 754 | "outputs": [ 755 | { 756 | "data": { 757 | "text/html": [ 758 | "
\n", 759 | "\n", 772 | "\n", 773 | " \n", 774 | " \n", 775 | " \n", 776 | " \n", 777 | " \n", 778 | " \n", 779 | " \n", 780 | " \n", 781 | " \n", 782 | " \n", 783 | " \n", 784 | " \n", 785 | " \n", 786 | " \n", 787 | " \n", 788 | " \n", 789 | " \n", 790 | " \n", 791 | " \n", 792 | " \n", 793 | " \n", 794 | " \n", 795 | " \n", 796 | " \n", 797 | " \n", 798 | " \n", 799 | " \n", 800 | " \n", 801 | " \n", 802 | " \n", 803 | " \n", 804 | " \n", 805 | " \n", 806 | " \n", 807 | " \n", 808 | " \n", 809 | " \n", 810 | " \n", 811 | " \n", 812 | " \n", 813 | " \n", 814 | " \n", 815 | " \n", 816 | " \n", 817 | " \n", 818 | " \n", 819 | " \n", 820 | " \n", 821 | " \n", 822 | " \n", 823 | " \n", 824 | " \n", 825 | " \n", 826 | " \n", 827 | " \n", 828 | " \n", 829 | " \n", 830 | " \n", 831 | " \n", 832 | " \n", 833 | " \n", 834 | " \n", 835 | " \n", 836 | " \n", 837 | " \n", 838 | " \n", 839 | " \n", 840 | " \n", 841 | " \n", 842 | " \n", 843 | " \n", 844 | " \n", 845 | " \n", 846 | " \n", 847 | " \n", 848 | " \n", 849 | " \n", 850 | " \n", 851 | " \n", 852 | " \n", 853 | " \n", 854 | " \n", 855 | " \n", 856 | " \n", 857 | " \n", 858 | " \n", 859 | " \n", 860 | " \n", 861 | " \n", 862 | " \n", 863 | " \n", 864 | " \n", 865 | " \n", 866 | " \n", 867 | " \n", 868 | " \n", 869 | " \n", 870 | " \n", 871 | " \n", 872 | " \n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | "
agency_cdsite_notz_cdcfs144166_00060_cdgage_height144167_00065_cd
2001-05-07 07:00:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 07:15:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 07:30:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 07:45:00USGS9333500MDT70.00A:[91]NaNNone
2001-05-07 08:00:00USGS9333500MDT70.00A:[91]NaNNone
........................
2020-09-28 14:30:00USGS9333500MDT9.53P6.16P
2020-09-28 14:45:00USGS9333500MDT9.20P6.15P
2020-09-28 15:00:00USGS9333500MDT9.20P6.15P
2020-09-28 15:15:00USGS9333500MDT9.20P6.15P
2020-09-28 15:30:00USGS9333500MDT9.20P6.15P
\n", 898 | "

539305 rows × 7 columns

\n", 899 | "
" 900 | ], 901 | "text/plain": [ 902 | " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", 903 | "2001-05-07 07:00:00 USGS 9333500 MDT 71.00 A:[91] \n", 904 | "2001-05-07 07:15:00 USGS 9333500 MDT 71.00 A:[91] \n", 905 | "2001-05-07 07:30:00 USGS 9333500 MDT 71.00 A:[91] \n", 906 | "2001-05-07 07:45:00 USGS 9333500 MDT 70.00 A:[91] \n", 907 | "2001-05-07 08:00:00 USGS 9333500 MDT 70.00 A:[91] \n", 908 | "... ... ... ... ... ... \n", 909 | "2020-09-28 14:30:00 USGS 9333500 MDT 9.53 P \n", 910 | "2020-09-28 14:45:00 USGS 9333500 MDT 9.20 P \n", 911 | "2020-09-28 15:00:00 USGS 9333500 MDT 9.20 P \n", 912 | "2020-09-28 15:15:00 USGS 9333500 MDT 9.20 P \n", 913 | "2020-09-28 15:30:00 USGS 9333500 MDT 9.20 P \n", 914 | "\n", 915 | " gage_height 144167_00065_cd \n", 916 | "2001-05-07 07:00:00 NaN None \n", 917 | "2001-05-07 07:15:00 NaN None \n", 918 | "2001-05-07 07:30:00 NaN None \n", 919 | "2001-05-07 07:45:00 NaN None \n", 920 | "2001-05-07 08:00:00 NaN None \n", 921 | "... ... ... \n", 922 | "2020-09-28 14:30:00 6.16 P \n", 923 | "2020-09-28 14:45:00 6.15 P \n", 924 | "2020-09-28 15:00:00 6.15 P \n", 925 | "2020-09-28 15:15:00 6.15 P \n", 926 | "2020-09-28 15:30:00 6.15 P \n", 927 | "\n", 928 | "[539305 rows x 7 columns]" 929 | ] 930 | }, 931 | "execution_count": 19, 932 | "metadata": {}, 933 | "output_type": "execute_result" 934 | } 935 | ], 936 | "source": [ 937 | "dd.to_json('/tmp/dd.json.gz')\n", 938 | "dd2 = pd.read_json('/tmp/dd.json')\n", 939 | "dd2" 940 | ] 941 | }, 942 | { 943 | "cell_type": "code", 944 | "execution_count": 20, 945 | "metadata": { 946 | "lines_to_next_cell": 2 947 | }, 948 | "outputs": [ 949 | { 950 | "data": { 951 | "text/plain": [ 952 | "False" 953 | ] 954 | }, 955 | "execution_count": 20, 956 | "metadata": {}, 957 | "output_type": "execute_result" 958 | } 959 | ], 960 | "source": [ 961 | "dd2.equals(dd)" 962 | ] 963 | }, 964 | { 965 | "cell_type": "code", 966 | "execution_count": 21, 967 | "metadata": { 968 | "lines_to_next_cell": 2 969 | }, 970 | "outputs": [], 971 | "source": [ 972 | "dd3 = (dd2\n", 973 | " .reset_index()\n", 974 | " .rename(columns={'index':'datetime'})\n", 975 | " .assign(datetime=lambda df_: df_.datetime.dt.tz_localize(tz='UTC')\n", 976 | " .dt.tz_convert('America/Denver'))\n", 977 | " .set_index('datetime')\n", 978 | ")" 979 | ] 980 | }, 981 | { 982 | "cell_type": "code", 983 | "execution_count": 22, 984 | "metadata": { 985 | "lines_to_next_cell": 2 986 | }, 987 | "outputs": [ 988 | { 989 | "data": { 990 | "text/html": [ 991 | "
\n", 992 | "\n", 1005 | "\n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | " \n", 1070 | " \n", 1071 | " \n", 1072 | " \n", 1073 | " \n", 1074 | " \n", 1075 | " \n", 1076 | " \n", 1077 | " \n", 1078 | " \n", 1079 | " \n", 1080 | " \n", 1081 | " \n", 1082 | " \n", 1083 | " \n", 1084 | " \n", 1085 | " \n", 1086 | " \n", 1087 | " \n", 1088 | " \n", 1089 | " \n", 1090 | " \n", 1091 | " \n", 1092 | " \n", 1093 | " \n", 1094 | " \n", 1095 | " \n", 1096 | " \n", 1097 | " \n", 1098 | " \n", 1099 | " \n", 1100 | " \n", 1101 | " \n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | "
agency_cdsite_notz_cdcfs144166_00060_cdgage_height144167_00065_cd
datetime
2001-05-07 01:00:00-06:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 01:15:00-06:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 01:30:00-06:00USGS9333500MDT71.00A:[91]NaNNone
2001-05-07 01:45:00-06:00USGS9333500MDT70.00A:[91]NaNNone
2001-05-07 02:00:00-06:00USGS9333500MDT70.00A:[91]NaNNone
........................
2020-09-28 08:30:00-06:00USGS9333500MDT9.53P6.16P
2020-09-28 08:45:00-06:00USGS9333500MDT9.20P6.15P
2020-09-28 09:00:00-06:00USGS9333500MDT9.20P6.15P
2020-09-28 09:15:00-06:00USGS9333500MDT9.20P6.15P
2020-09-28 09:30:00-06:00USGS9333500MDT9.20P6.15P
\n", 1141 | "

539305 rows × 7 columns

\n", 1142 | "
" 1143 | ], 1144 | "text/plain": [ 1145 | " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", 1146 | "datetime \n", 1147 | "2001-05-07 01:00:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", 1148 | "2001-05-07 01:15:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", 1149 | "2001-05-07 01:30:00-06:00 USGS 9333500 MDT 71.00 A:[91] \n", 1150 | "2001-05-07 01:45:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", 1151 | "2001-05-07 02:00:00-06:00 USGS 9333500 MDT 70.00 A:[91] \n", 1152 | "... ... ... ... ... ... \n", 1153 | "2020-09-28 08:30:00-06:00 USGS 9333500 MDT 9.53 P \n", 1154 | "2020-09-28 08:45:00-06:00 USGS 9333500 MDT 9.20 P \n", 1155 | "2020-09-28 09:00:00-06:00 USGS 9333500 MDT 9.20 P \n", 1156 | "2020-09-28 09:15:00-06:00 USGS 9333500 MDT 9.20 P \n", 1157 | "2020-09-28 09:30:00-06:00 USGS 9333500 MDT 9.20 P \n", 1158 | "\n", 1159 | " gage_height 144167_00065_cd \n", 1160 | "datetime \n", 1161 | "2001-05-07 01:00:00-06:00 NaN None \n", 1162 | "2001-05-07 01:15:00-06:00 NaN None \n", 1163 | "2001-05-07 01:30:00-06:00 NaN None \n", 1164 | "2001-05-07 01:45:00-06:00 NaN None \n", 1165 | "2001-05-07 02:00:00-06:00 NaN None \n", 1166 | "... ... ... \n", 1167 | "2020-09-28 08:30:00-06:00 6.16 P \n", 1168 | "2020-09-28 08:45:00-06:00 6.15 P \n", 1169 | "2020-09-28 09:00:00-06:00 6.15 P \n", 1170 | "2020-09-28 09:15:00-06:00 6.15 P \n", 1171 | "2020-09-28 09:30:00-06:00 6.15 P \n", 1172 | "\n", 1173 | "[539305 rows x 7 columns]" 1174 | ] 1175 | }, 1176 | "execution_count": 22, 1177 | "metadata": {}, 1178 | "output_type": "execute_result" 1179 | } 1180 | ], 1181 | "source": [ 1182 | "dd3" 1183 | ] 1184 | }, 1185 | { 1186 | "cell_type": "code", 1187 | "execution_count": 23, 1188 | "metadata": { 1189 | "lines_to_next_cell": 2 1190 | }, 1191 | "outputs": [ 1192 | { 1193 | "data": { 1194 | "text/plain": [ 1195 | "False" 1196 | ] 1197 | }, 1198 | "execution_count": 23, 1199 | "metadata": {}, 1200 | "output_type": "execute_result" 1201 | } 1202 | ], 1203 | "source": [ 1204 | "dd3.equals(dd)" 1205 | ] 1206 | }, 1207 | { 1208 | "cell_type": "code", 1209 | "execution_count": 24, 1210 | "metadata": { 1211 | "lines_to_next_cell": 2 1212 | }, 1213 | "outputs": [ 1214 | { 1215 | "data": { 1216 | "text/plain": [ 1217 | "True" 1218 | ] 1219 | }, 1220 | "execution_count": 24, 1221 | "metadata": {}, 1222 | "output_type": "execute_result" 1223 | } 1224 | ], 1225 | "source": [ 1226 | "dd3.round(3).equals(dd)" 1227 | ] 1228 | }, 1229 | { 1230 | "cell_type": "code", 1231 | "execution_count": 25, 1232 | "metadata": { 1233 | "lines_to_next_cell": 2 1234 | }, 1235 | "outputs": [ 1236 | { 1237 | "data": { 1238 | "text/html": [ 1239 | "
\n", 1240 | "\n", 1253 | "\n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | "
agency_cdsite_notz_cdcfs144166_00060_cdgage_height144167_00065_cd
datetime
2001-05-07 01:00:00-06:00USGS9333500MDT71.0A:[91]NaNNaN
2001-05-07 01:15:00-06:00USGS9333500MDT71.0A:[91]NaNNaN
2001-05-07 01:30:00-06:00USGS9333500MDT71.0A:[91]NaNNaN
2001-05-07 01:45:00-06:00USGS9333500MDT70.0A:[91]NaNNaN
2001-05-07 02:00:00-06:00USGS9333500MDT70.0A:[91]NaNNaN
\n", 1329 | "
" 1330 | ], 1331 | "text/plain": [ 1332 | " agency_cd site_no tz_cd cfs 144166_00060_cd \\\n", 1333 | "datetime \n", 1334 | "2001-05-07 01:00:00-06:00 USGS 9333500 MDT 71.0 A:[91] \n", 1335 | "2001-05-07 01:15:00-06:00 USGS 9333500 MDT 71.0 A:[91] \n", 1336 | "2001-05-07 01:30:00-06:00 USGS 9333500 MDT 71.0 A:[91] \n", 1337 | "2001-05-07 01:45:00-06:00 USGS 9333500 MDT 70.0 A:[91] \n", 1338 | "2001-05-07 02:00:00-06:00 USGS 9333500 MDT 70.0 A:[91] \n", 1339 | "\n", 1340 | " gage_height 144167_00065_cd \n", 1341 | "datetime \n", 1342 | "2001-05-07 01:00:00-06:00 NaN NaN \n", 1343 | "2001-05-07 01:15:00-06:00 NaN NaN \n", 1344 | "2001-05-07 01:30:00-06:00 NaN NaN \n", 1345 | "2001-05-07 01:45:00-06:00 NaN NaN \n", 1346 | "2001-05-07 02:00:00-06:00 NaN NaN " 1347 | ] 1348 | }, 1349 | "execution_count": 25, 1350 | "metadata": {}, 1351 | "output_type": "execute_result" 1352 | } 1353 | ], 1354 | "source": [ 1355 | "dd.head()" 1356 | ] 1357 | }, 1358 | { 1359 | "cell_type": "code", 1360 | "execution_count": 26, 1361 | "metadata": { 1362 | "lines_to_next_cell": 2 1363 | }, 1364 | "outputs": [ 1365 | { 1366 | "data": { 1367 | "text/plain": [ 1368 | "'{\"agency_cd\":{\"989218800000\":\"USGS\",\"989219700000\":\"USGS\",\"9'" 1369 | ] 1370 | }, 1371 | "execution_count": 26, 1372 | "metadata": {}, 1373 | "output_type": "execute_result" 1374 | } 1375 | ], 1376 | "source": [ 1377 | "dd.head().to_json()[:60]" 1378 | ] 1379 | } 1380 | ], 1381 | "metadata": { 1382 | "jupytext": { 1383 | "cell_metadata_filter": "-all", 1384 | "main_language": "python", 1385 | "notebook_metadata_filter": "-all" 1386 | }, 1387 | "kernelspec": { 1388 | "display_name": "Python 3", 1389 | "language": "python", 1390 | "name": "python3" 1391 | }, 1392 | "language_info": { 1393 | "codemirror_mode": { 1394 | "name": "ipython", 1395 | "version": 3 1396 | }, 1397 | "file_extension": ".py", 1398 | "mimetype": "text/x-python", 1399 | "name": "python", 1400 | "nbconvert_exporter": "python", 1401 | "pygments_lexer": "ipython3", 1402 | "version": "3.8.5" 1403 | } 1404 | }, 1405 | "nbformat": 4, 1406 | "nbformat_minor": 4 1407 | } 1408 | --------------------------------------------------------------------------------