├── .gitignore ├── PDXPython2015.ipynb ├── PDXPythonMarch2015.pdf ├── README.md ├── diamonds.csv └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | .env/ 12 | .ipynb_checkpoints 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Django stuff: 50 | *.log 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # PyBuilder 56 | target/ 57 | 58 | # Bcolz 59 | diamonds/ -------------------------------------------------------------------------------- /PDXPython2015.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# stdlib!" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 97, 13 | "metadata": { 14 | "collapsed": false 15 | }, 16 | "outputs": [], 17 | "source": [ 18 | "import csv\n", 19 | "\n", 20 | "conversion_map = {\n", 21 | " 'carat': float,\n", 22 | " 'depth': float,\n", 23 | " 'price': int,\n", 24 | " 'table': float,\n", 25 | " 'x': float,\n", 26 | " 'y': float,\n", 27 | " 'z': float\n", 28 | "}\n", 29 | "def converter(type_map, row):\n", 30 | " \"\"\"Yep, we need to roll our own type conversions.\"\"\"\n", 31 | " converted_row = {}\n", 32 | " for col, val in row.items():\n", 33 | " converter = type_map.get(col)\n", 34 | " if converter:\n", 35 | " converted_row[col] = converter(val)\n", 36 | " else:\n", 37 | " converted_row[col] = val\n", 38 | " return converted_row\n", 39 | "\n", 40 | "with open('diamonds.csv', 'r') as f:\n", 41 | " reader = csv.DictReader(f)\n", 42 | " diamonds = [converter(conversion_map, r) for r in reader]" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 98, 48 | "metadata": { 49 | "collapsed": false 50 | }, 51 | "outputs": [ 52 | { 53 | "data": { 54 | "text/plain": [ 55 | "[{'': '1',\n", 56 | " 'carat': 0.23,\n", 57 | " 'clarity': 'SI2',\n", 58 | " 'color': 'E',\n", 59 | " 'cut': 'Ideal',\n", 60 | " 'depth': 61.5,\n", 61 | " 'price': 326,\n", 62 | " 'table': 55.0,\n", 63 | " 'x': 3.95,\n", 64 | " 'y': 3.98,\n", 65 | " 'z': 2.43},\n", 66 | " {'': '2',\n", 67 | " 'carat': 0.21,\n", 68 | " 'clarity': 'SI1',\n", 69 | " 'color': 'E',\n", 70 | " 'cut': 'Premium',\n", 71 | " 'depth': 59.8,\n", 72 | " 'price': 326,\n", 73 | " 'table': 61.0,\n", 74 | " 'x': 3.89,\n", 75 | " 'y': 3.84,\n", 76 | " 'z': 2.31}]" 77 | ] 78 | }, 79 | "execution_count": 98, 80 | "metadata": {}, 81 | "output_type": "execute_result" 82 | } 83 | ], 84 | "source": [ 85 | "diamonds[:2]" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "source": [ 94 | "Adding things up is easy enough..." 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 99, 100 | "metadata": { 101 | "collapsed": false 102 | }, 103 | "outputs": [ 104 | { 105 | "data": { 106 | "text/plain": [ 107 | "43040.86999999912" 108 | ] 109 | }, 110 | "execution_count": 99, 111 | "metadata": {}, 112 | "output_type": "execute_result" 113 | } 114 | ], 115 | "source": [ 116 | "def get_total_carats():\n", 117 | " total_carats = 0\n", 118 | " for row in diamonds:\n", 119 | " total_carats += row['carat']\n", 120 | " return total_carats\n", 121 | "get_total_carats()" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 100, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [ 131 | { 132 | "data": { 133 | "text/plain": [ 134 | "43040.86999999912" 135 | ] 136 | }, 137 | "execution_count": 100, 138 | "metadata": {}, 139 | "output_type": "execute_result" 140 | } 141 | ], 142 | "source": [ 143 | "# Faster/more compact: Generator expression!\n", 144 | "sum(row['carat'] for row in diamonds)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": 101, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [ 154 | { 155 | "name": "stdout", 156 | "output_type": "stream", 157 | "text": [ 158 | "100 loops, best of 3: 6.46 ms per loop\n" 159 | ] 160 | } 161 | ], 162 | "source": [ 163 | "# Which is faster? \n", 164 | "%timeit get_total_carats()" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 102, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [ 174 | { 175 | "name": "stdout", 176 | "output_type": "stream", 177 | "text": [ 178 | "100 loops, best of 3: 6.76 ms per loop\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "%timeit sum(row['carat'] for row in diamonds)" 184 | ] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "metadata": {}, 189 | "source": [ 190 | "But what if we want to group, then add?" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": 103, 196 | "metadata": { 197 | "collapsed": true 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "# defaultdict is awesome. defaultdict is awesome.\n", 202 | "from collections import defaultdict\n", 203 | "\n", 204 | "def grouper(grouping_col, seq):\n", 205 | " \"\"\"Surely someone has written a faster version than what I'm about to write\"\"\"\n", 206 | " groups = {}\n", 207 | " for row in seq:\n", 208 | " group = groups.get(row[grouping_col])\n", 209 | " if group is not None:\n", 210 | " for k, v in row.items():\n", 211 | " if k != grouping_col:\n", 212 | " group[k].append(v)\n", 213 | " else:\n", 214 | " groups[row[grouping_col]] = defaultdict(list)\n", 215 | " return groups" 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": 104, 221 | "metadata": { 222 | "collapsed": false 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "groups = grouper('cut', diamonds)" 227 | ] 228 | }, 229 | { 230 | "cell_type": "code", 231 | "execution_count": 105, 232 | "metadata": { 233 | "collapsed": false 234 | }, 235 | "outputs": [ 236 | { 237 | "data": { 238 | "text/plain": [ 239 | "{'Fair', 'Good', 'Ideal', 'Premium', 'Very Good'}" 240 | ] 241 | }, 242 | "execution_count": 105, 243 | "metadata": {}, 244 | "output_type": "execute_result" 245 | } 246 | ], 247 | "source": [ 248 | "set(groups)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "What if I wanted to do something like \n", 256 | "```sql\n", 257 | "select cut, mean(price)\n", 258 | "from diamonds\n", 259 | "groupby cut;\n", 260 | "```" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": 106, 266 | "metadata": { 267 | "collapsed": false 268 | }, 269 | "outputs": [ 270 | { 271 | "data": { 272 | "text/plain": [ 273 | "{'Fair': 4361, 'Good': 3929, 'Ideal': 3457, 'Premium': 4584, 'Very Good': 3982}" 274 | ] 275 | }, 276 | "execution_count": 106, 277 | "metadata": {}, 278 | "output_type": "execute_result" 279 | } 280 | ], 281 | "source": [ 282 | "summary = {}\n", 283 | "for group, values in groups.items():\n", 284 | " summary[group] = sum(values['price']) / len(values['price'])\n", 285 | "summary" 286 | ] 287 | }, 288 | { 289 | "cell_type": "markdown", 290 | "metadata": {}, 291 | "source": [ 292 | "or how about this one:\n", 293 | "```sql\n", 294 | "select max(price)\n", 295 | "from diamonds\n", 296 | "where carat > 1;\n", 297 | "```" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": 107, 303 | "metadata": { 304 | "collapsed": false 305 | }, 306 | "outputs": [ 307 | { 308 | "data": { 309 | "text/plain": [ 310 | "18823" 311 | ] 312 | }, 313 | "execution_count": 107, 314 | "metadata": {}, 315 | "output_type": "execute_result" 316 | } 317 | ], 318 | "source": [ 319 | "def get_max_price():\n", 320 | " max_price = 0\n", 321 | " for row in diamonds:\n", 322 | " if row['carat'] > 1 and row['price'] > max_price:\n", 323 | " max_price = row['price']\n", 324 | " return max_price\n", 325 | "get_max_price()" 326 | ] 327 | }, 328 | { 329 | "cell_type": "code", 330 | "execution_count": 108, 331 | "metadata": { 332 | "collapsed": false 333 | }, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "18823" 339 | ] 340 | }, 341 | "execution_count": 108, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "# More compact yet again: generator expression!\n", 348 | "max(row['price'] for row in diamonds if row['carat'] > 1)" 349 | ] 350 | }, 351 | { 352 | "cell_type": "code", 353 | "execution_count": 109, 354 | "metadata": { 355 | "collapsed": false 356 | }, 357 | "outputs": [ 358 | { 359 | "name": "stdout", 360 | "output_type": "stream", 361 | "text": [ 362 | "100 loops, best of 3: 8.38 ms per loop\n" 363 | ] 364 | } 365 | ], 366 | "source": [ 367 | "# Which is faster?\n", 368 | "%timeit get_max_price()" 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": 110, 374 | "metadata": { 375 | "collapsed": false 376 | }, 377 | "outputs": [ 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "100 loops, best of 3: 9.22 ms per loop\n" 383 | ] 384 | } 385 | ], 386 | "source": [ 387 | "%timeit max(row['price'] for row in diamonds if row['carat'] > 1)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "### itertools interlude\n", 395 | "\n", 396 | "The itertools module does lots of nice things. You should be aware of it, and use it where you would be writing your own bespoke counting thing. " 397 | ] 398 | }, 399 | { 400 | "cell_type": "markdown", 401 | "metadata": {}, 402 | "source": [ 403 | "# Toolz!\n", 404 | "\n", 405 | "Lets see what it looks like to repeat some of this analysis using the toolz library. PSA: toolz has a pretty huge API, and it's worth reading through the docs: http://toolz.readthedocs.org/en/latest/api.html" 406 | ] 407 | }, 408 | { 409 | "cell_type": "code", 410 | "execution_count": 111, 411 | "metadata": { 412 | "collapsed": true 413 | }, 414 | "outputs": [], 415 | "source": [ 416 | "import toolz as tz" 417 | ] 418 | }, 419 | { 420 | "cell_type": "markdown", 421 | "metadata": {}, 422 | "source": [ 423 | "Some quick toolz fun things:" 424 | ] 425 | }, 426 | { 427 | "cell_type": "code", 428 | "execution_count": 112, 429 | "metadata": { 430 | "collapsed": false 431 | }, 432 | "outputs": [ 433 | { 434 | "data": { 435 | "text/plain": [ 436 | "[1, 2, 3, 4, 5, 6]" 437 | ] 438 | }, 439 | "execution_count": 112, 440 | "metadata": {}, 441 | "output_type": "execute_result" 442 | } 443 | ], 444 | "source": [ 445 | "list(tz.concat(([1, 2, 3], (4, 5, 6))))" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": 113, 451 | "metadata": { 452 | "collapsed": false 453 | }, 454 | "outputs": [ 455 | { 456 | "data": { 457 | "text/plain": [ 458 | "['A-foo', 'B-foo', 'c-foo', 'd-foo', 'bar-foo', 'baz-foo']" 459 | ] 460 | }, 461 | "execution_count": 113, 462 | "metadata": {}, 463 | "output_type": "execute_result" 464 | } 465 | ], 466 | "source": [ 467 | "list(tz.mapcat(lambda r: [x + \"-foo\" for x in r],\n", 468 | " [[\"A\", \"B\"], (\"c\", \"d\"), (\"bar\", \"baz\")]))" 469 | ] 470 | }, 471 | { 472 | "cell_type": "code", 473 | "execution_count": 114, 474 | "metadata": { 475 | "collapsed": false 476 | }, 477 | "outputs": [ 478 | { 479 | "data": { 480 | "text/plain": [ 481 | "{'D': 6775, 'E': 9797, 'F': 9542, 'G': 11292, 'H': 8304, 'I': 5422, 'J': 2808}" 482 | ] 483 | }, 484 | "execution_count": 114, 485 | "metadata": {}, 486 | "output_type": "execute_result" 487 | } 488 | ], 489 | "source": [ 490 | "tz.frequencies([r['color'] for r in diamonds])" 491 | ] 492 | }, 493 | { 494 | "cell_type": "markdown", 495 | "metadata": {}, 496 | "source": [ 497 | "Remember that toolz is lazy- functions will return generator-like things:" 498 | ] 499 | }, 500 | { 501 | "cell_type": "code", 502 | "execution_count": 115, 503 | "metadata": { 504 | "collapsed": false 505 | }, 506 | "outputs": [ 507 | { 508 | "data": { 509 | "text/plain": [ 510 | "" 511 | ] 512 | }, 513 | "execution_count": 115, 514 | "metadata": {}, 515 | "output_type": "execute_result" 516 | } 517 | ], 518 | "source": [ 519 | "take_2 = tz.take(2, diamonds)\n", 520 | "take_2" 521 | ] 522 | }, 523 | { 524 | "cell_type": "code", 525 | "execution_count": 116, 526 | "metadata": { 527 | "collapsed": false 528 | }, 529 | "outputs": [ 530 | { 531 | "data": { 532 | "text/plain": [ 533 | "" 534 | ] 535 | }, 536 | "execution_count": 116, 537 | "metadata": {}, 538 | "output_type": "execute_result" 539 | } 540 | ], 541 | "source": [ 542 | "unique_clarity = tz.unique(diamonds, key=lambda x: x.get('clarity'))\n", 543 | "unique_clarity" 544 | ] 545 | }, 546 | { 547 | "cell_type": "code", 548 | "execution_count": 117, 549 | "metadata": { 550 | "collapsed": false 551 | }, 552 | "outputs": [ 553 | { 554 | "data": { 555 | "text/plain": [ 556 | "[{'': '1',\n", 557 | " 'carat': 0.23,\n", 558 | " 'clarity': 'SI2',\n", 559 | " 'color': 'E',\n", 560 | " 'cut': 'Ideal',\n", 561 | " 'depth': 61.5,\n", 562 | " 'price': 326,\n", 563 | " 'table': 55.0,\n", 564 | " 'x': 3.95,\n", 565 | " 'y': 3.98,\n", 566 | " 'z': 2.43},\n", 567 | " {'': '2',\n", 568 | " 'carat': 0.21,\n", 569 | " 'clarity': 'SI1',\n", 570 | " 'color': 'E',\n", 571 | " 'cut': 'Premium',\n", 572 | " 'depth': 59.8,\n", 573 | " 'price': 326,\n", 574 | " 'table': 61.0,\n", 575 | " 'x': 3.89,\n", 576 | " 'y': 3.84,\n", 577 | " 'z': 2.31}]" 578 | ] 579 | }, 580 | "execution_count": 117, 581 | "metadata": {}, 582 | "output_type": "execute_result" 583 | } 584 | ], 585 | "source": [ 586 | "list(take_2)" 587 | ] 588 | }, 589 | { 590 | "cell_type": "code", 591 | "execution_count": 118, 592 | "metadata": { 593 | "collapsed": false 594 | }, 595 | "outputs": [ 596 | { 597 | "data": { 598 | "text/plain": [ 599 | "[{'': '1',\n", 600 | " 'carat': 0.23,\n", 601 | " 'clarity': 'SI2',\n", 602 | " 'color': 'E',\n", 603 | " 'cut': 'Ideal',\n", 604 | " 'depth': 61.5,\n", 605 | " 'price': 326,\n", 606 | " 'table': 55.0,\n", 607 | " 'x': 3.95,\n", 608 | " 'y': 3.98,\n", 609 | " 'z': 2.43},\n", 610 | " {'': '2',\n", 611 | " 'carat': 0.21,\n", 612 | " 'clarity': 'SI1',\n", 613 | " 'color': 'E',\n", 614 | " 'cut': 'Premium',\n", 615 | " 'depth': 59.8,\n", 616 | " 'price': 326,\n", 617 | " 'table': 61.0,\n", 618 | " 'x': 3.89,\n", 619 | " 'y': 3.84,\n", 620 | " 'z': 2.31},\n", 621 | " {'': '3',\n", 622 | " 'carat': 0.23,\n", 623 | " 'clarity': 'VS1',\n", 624 | " 'color': 'E',\n", 625 | " 'cut': 'Good',\n", 626 | " 'depth': 56.9,\n", 627 | " 'price': 327,\n", 628 | " 'table': 65.0,\n", 629 | " 'x': 4.05,\n", 630 | " 'y': 4.07,\n", 631 | " 'z': 2.31},\n", 632 | " {'': '4',\n", 633 | " 'carat': 0.29,\n", 634 | " 'clarity': 'VS2',\n", 635 | " 'color': 'I',\n", 636 | " 'cut': 'Premium',\n", 637 | " 'depth': 62.4,\n", 638 | " 'price': 334,\n", 639 | " 'table': 58.0,\n", 640 | " 'x': 4.2,\n", 641 | " 'y': 4.23,\n", 642 | " 'z': 2.63},\n", 643 | " {'': '6',\n", 644 | " 'carat': 0.24,\n", 645 | " 'clarity': 'VVS2',\n", 646 | " 'color': 'J',\n", 647 | " 'cut': 'Very Good',\n", 648 | " 'depth': 62.8,\n", 649 | " 'price': 336,\n", 650 | " 'table': 57.0,\n", 651 | " 'x': 3.94,\n", 652 | " 'y': 3.96,\n", 653 | " 'z': 2.48},\n", 654 | " {'': '7',\n", 655 | " 'carat': 0.24,\n", 656 | " 'clarity': 'VVS1',\n", 657 | " 'color': 'I',\n", 658 | " 'cut': 'Very Good',\n", 659 | " 'depth': 62.3,\n", 660 | " 'price': 336,\n", 661 | " 'table': 57.0,\n", 662 | " 'x': 3.95,\n", 663 | " 'y': 3.98,\n", 664 | " 'z': 2.47},\n", 665 | " {'': '16',\n", 666 | " 'carat': 0.32,\n", 667 | " 'clarity': 'I1',\n", 668 | " 'color': 'E',\n", 669 | " 'cut': 'Premium',\n", 670 | " 'depth': 60.9,\n", 671 | " 'price': 345,\n", 672 | " 'table': 58.0,\n", 673 | " 'x': 4.38,\n", 674 | " 'y': 4.42,\n", 675 | " 'z': 2.68},\n", 676 | " {'': '230',\n", 677 | " 'carat': 0.52,\n", 678 | " 'clarity': 'IF',\n", 679 | " 'color': 'F',\n", 680 | " 'cut': 'Ideal',\n", 681 | " 'depth': 62.2,\n", 682 | " 'price': 2783,\n", 683 | " 'table': 55.0,\n", 684 | " 'x': 5.14,\n", 685 | " 'y': 5.18,\n", 686 | " 'z': 3.21}]" 687 | ] 688 | }, 689 | "execution_count": 118, 690 | "metadata": {}, 691 | "output_type": "execute_result" 692 | } 693 | ], 694 | "source": [ 695 | "# Note that this returns the entire object\n", 696 | "list(unique_clarity)" 697 | ] 698 | }, 699 | { 700 | "cell_type": "code", 701 | "execution_count": 119, 702 | "metadata": { 703 | "collapsed": false 704 | }, 705 | "outputs": [ 706 | { 707 | "data": { 708 | "text/plain": [ 709 | "{'I1': 741,\n", 710 | " 'IF': 1790,\n", 711 | " 'SI1': 13065,\n", 712 | " 'SI2': 9194,\n", 713 | " 'VS1': 8171,\n", 714 | " 'VS2': 12258,\n", 715 | " 'VVS1': 3655,\n", 716 | " 'VVS2': 5066}" 717 | ] 718 | }, 719 | "execution_count": 119, 720 | "metadata": {}, 721 | "output_type": "execute_result" 722 | } 723 | ], 724 | "source": [ 725 | "# What are our clarity counts?\n", 726 | "tz.countby(lambda x: x['clarity'], diamonds)" 727 | ] 728 | }, 729 | { 730 | "cell_type": "code", 731 | "execution_count": 120, 732 | "metadata": { 733 | "collapsed": false 734 | }, 735 | "outputs": [ 736 | { 737 | "data": { 738 | "text/plain": [ 739 | "18823" 740 | ] 741 | }, 742 | "execution_count": 120, 743 | "metadata": {}, 744 | "output_type": "execute_result" 745 | } 746 | ], 747 | "source": [ 748 | "# What about our max price from above? Reduction!\n", 749 | "def comparo(accum, row):\n", 750 | " price = row['price']\n", 751 | " if price > accum:\n", 752 | " return price\n", 753 | " else:\n", 754 | " return accum\n", 755 | " \n", 756 | "tz.reduce(comparo, diamonds, 0)" 757 | ] 758 | }, 759 | { 760 | "cell_type": "code", 761 | "execution_count": 121, 762 | "metadata": { 763 | "collapsed": false 764 | }, 765 | "outputs": [ 766 | { 767 | "data": { 768 | "text/plain": [ 769 | "18823" 770 | ] 771 | }, 772 | "execution_count": 121, 773 | "metadata": {}, 774 | "output_type": "execute_result" 775 | } 776 | ], 777 | "source": [ 778 | "# We could have also threaded here\n", 779 | "tz.thread_last(diamonds, \n", 780 | " (tz.map, lambda x: x['price']), \n", 781 | " max)" 782 | ] 783 | }, 784 | { 785 | "cell_type": "code", 786 | "execution_count": 122, 787 | "metadata": { 788 | "collapsed": false 789 | }, 790 | "outputs": [ 791 | { 792 | "name": "stdout", 793 | "output_type": "stream", 794 | "text": [ 795 | "100 loops, best of 3: 12.1 ms per loop\n" 796 | ] 797 | } 798 | ], 799 | "source": [ 800 | "# Which is faster?\n", 801 | "%timeit tz.reduce(comparo, diamonds, 0)" 802 | ] 803 | }, 804 | { 805 | "cell_type": "code", 806 | "execution_count": 123, 807 | "metadata": { 808 | "collapsed": false 809 | }, 810 | "outputs": [ 811 | { 812 | "name": "stdout", 813 | "output_type": "stream", 814 | "text": [ 815 | "100 loops, best of 3: 14.6 ms per loop\n" 816 | ] 817 | } 818 | ], 819 | "source": [ 820 | "%timeit tz.thread_last(diamonds, (tz.map, lambda x: x['price']), max)" 821 | ] 822 | }, 823 | { 824 | "cell_type": "markdown", 825 | "metadata": {}, 826 | "source": [ 827 | "Let's look at another SQL query- say we want clarity and carats where price > 1000:\n", 828 | "```sql\n", 829 | "select count(1)\n", 830 | "from diamonds\n", 831 | "where price > 1000\n", 832 | "groupby clarity;\n", 833 | "```" 834 | ] 835 | }, 836 | { 837 | "cell_type": "code", 838 | "execution_count": 124, 839 | "metadata": { 840 | "collapsed": false 841 | }, 842 | "outputs": [ 843 | { 844 | "data": { 845 | "text/plain": [ 846 | "{'I1': 675,\n", 847 | " 'IF': 1042,\n", 848 | " 'SI1': 9978,\n", 849 | " 'SI2': 8118,\n", 850 | " 'VS1': 5702,\n", 851 | " 'VS2': 8647,\n", 852 | " 'VVS1': 2108,\n", 853 | " 'VVS2': 3146}" 854 | ] 855 | }, 856 | "execution_count": 124, 857 | "metadata": {}, 858 | "output_type": "execute_result" 859 | } 860 | ], 861 | "source": [ 862 | "# Toolz has currying!\n", 863 | "import toolz.curried as tzc\n", 864 | "tzc.pipe(diamonds, \n", 865 | " tzc.filter(lambda r: r['price'] > 1000),\n", 866 | " tzc.map(lambda r: (r['clarity'],)),\n", 867 | " tzc.countby(lambda r: r[0]),\n", 868 | " dict)" 869 | ] 870 | }, 871 | { 872 | "cell_type": "code", 873 | "execution_count": 29, 874 | "metadata": { 875 | "collapsed": false 876 | }, 877 | "outputs": [ 878 | { 879 | "data": { 880 | "text/plain": [ 881 | "{'I1': 675,\n", 882 | " 'IF': 1042,\n", 883 | " 'SI1': 9978,\n", 884 | " 'SI2': 8118,\n", 885 | " 'VS1': 5702,\n", 886 | " 'VS2': 8647,\n", 887 | " 'VVS1': 2108,\n", 888 | " 'VVS2': 3146}" 889 | ] 890 | }, 891 | "execution_count": 29, 892 | "metadata": {}, 893 | "output_type": "execute_result" 894 | } 895 | ], 896 | "source": [ 897 | "# We can go about this another way as well:\n", 898 | "def filter_and_count(kv):\n", 899 | " f_and_c = tz.thread_last(kv[1],\n", 900 | " (tz.filter, lambda r: r['price'] > 1000),\n", 901 | " tz.count)\n", 902 | " \n", 903 | " return kv[0], f_and_c\n", 904 | "\n", 905 | "tz.thread_last(diamonds,\n", 906 | " (tz.groupby, 'clarity'),\n", 907 | " (tz.itemmap, filter_and_count))\n" 908 | ] 909 | }, 910 | { 911 | "cell_type": "code", 912 | "execution_count": 125, 913 | "metadata": { 914 | "collapsed": false 915 | }, 916 | "outputs": [ 917 | { 918 | "data": { 919 | "text/plain": [ 920 | "{'I1': 675,\n", 921 | " 'IF': 1042,\n", 922 | " 'SI1': 9978,\n", 923 | " 'SI2': 8118,\n", 924 | " 'VS1': 5702,\n", 925 | " 'VS2': 8647,\n", 926 | " 'VVS1': 2108,\n", 927 | " 'VVS2': 3146}" 928 | ] 929 | }, 930 | "execution_count": 125, 931 | "metadata": {}, 932 | "output_type": "execute_result" 933 | } 934 | ], 935 | "source": [ 936 | "# Cleanest/best way: reduceby: Groupby + reduce\n", 937 | "def increment(accum, row):\n", 938 | " if row['price'] > 1000:\n", 939 | " return accum + 1\n", 940 | " else:\n", 941 | " return accum\n", 942 | "\n", 943 | "tz.reduceby('clarity', \n", 944 | " increment,\n", 945 | " diamonds, 0) " 946 | ] 947 | }, 948 | { 949 | "cell_type": "markdown", 950 | "metadata": { 951 | "collapsed": true 952 | }, 953 | "source": [ 954 | "# Pandas!" 955 | ] 956 | }, 957 | { 958 | "cell_type": "code", 959 | "execution_count": 31, 960 | "metadata": { 961 | "collapsed": false 962 | }, 963 | "outputs": [], 964 | "source": [ 965 | "import pandas as pd\n", 966 | "# We don't need this to use Pandas, FYI\n", 967 | "import numpy as np\n", 968 | "\n", 969 | "# CSV reader is fast!\n", 970 | "df = pd.read_csv('diamonds.csv', index_col=0)\n", 971 | "# Keep this for later, we're going to overwrite df\n", 972 | "df_diamonds = df" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 32, 978 | "metadata": { 979 | "collapsed": false 980 | }, 981 | "outputs": [ 982 | { 983 | "data": { 984 | "text/html": [ 985 | "
\n", 986 | "\n", 987 | " \n", 988 | " \n", 989 | " \n", 990 | " \n", 991 | " \n", 992 | " \n", 993 | " \n", 994 | " \n", 995 | " \n", 996 | " \n", 997 | " \n", 998 | " \n", 999 | " \n", 1000 | " \n", 1001 | " \n", 1002 | " \n", 1003 | " \n", 1004 | " \n", 1005 | " \n", 1006 | " \n", 1007 | " \n", 1008 | " \n", 1009 | " \n", 1010 | " \n", 1011 | " \n", 1012 | " \n", 1013 | " \n", 1014 | " \n", 1015 | " \n", 1016 | " \n", 1017 | " \n", 1018 | " \n", 1019 | " \n", 1020 | " \n", 1021 | " \n", 1022 | " \n", 1023 | " \n", 1024 | " \n", 1025 | " \n", 1026 | " \n", 1027 | " \n", 1028 | " \n", 1029 | " \n", 1030 | " \n", 1031 | " \n", 1032 | " \n", 1033 | " \n", 1034 | " \n", 1035 | " \n", 1036 | " \n", 1037 | " \n", 1038 | " \n", 1039 | " \n", 1040 | " \n", 1041 | " \n", 1042 | " \n", 1043 | " \n", 1044 | " \n", 1045 | " \n", 1046 | " \n", 1047 | " \n", 1048 | " \n", 1049 | " \n", 1050 | " \n", 1051 | " \n", 1052 | " \n", 1053 | " \n", 1054 | " \n", 1055 | " \n", 1056 | " \n", 1057 | " \n", 1058 | " \n", 1059 | " \n", 1060 | " \n", 1061 | " \n", 1062 | " \n", 1063 | " \n", 1064 | " \n", 1065 | " \n", 1066 | " \n", 1067 | " \n", 1068 | " \n", 1069 | "
caratcutcolorclaritydepthtablepricexyz
10.23IdealESI261.5553263.953.982.43
20.21PremiumESI159.8613263.893.842.31
30.23GoodEVS156.9653274.054.072.31
40.29PremiumIVS262.4583344.204.232.63
50.31GoodJSI263.3583354.344.352.75
\n", 1070 | "
" 1071 | ], 1072 | "text/plain": [ 1073 | " carat cut color clarity depth table price x y z\n", 1074 | "1 0.23 Ideal E SI2 61.5 55 326 3.95 3.98 2.43\n", 1075 | "2 0.21 Premium E SI1 59.8 61 326 3.89 3.84 2.31\n", 1076 | "3 0.23 Good E VS1 56.9 65 327 4.05 4.07 2.31\n", 1077 | "4 0.29 Premium I VS2 62.4 58 334 4.20 4.23 2.63\n", 1078 | "5 0.31 Good J SI2 63.3 58 335 4.34 4.35 2.75" 1079 | ] 1080 | }, 1081 | "execution_count": 32, 1082 | "metadata": {}, 1083 | "output_type": "execute_result" 1084 | } 1085 | ], 1086 | "source": [ 1087 | "df.head()" 1088 | ] 1089 | }, 1090 | { 1091 | "cell_type": "code", 1092 | "execution_count": 33, 1093 | "metadata": { 1094 | "collapsed": false 1095 | }, 1096 | "outputs": [ 1097 | { 1098 | "data": { 1099 | "text/html": [ 1100 | "
\n", 1101 | "\n", 1102 | " \n", 1103 | " \n", 1104 | " \n", 1105 | " \n", 1106 | " \n", 1107 | " \n", 1108 | " \n", 1109 | " \n", 1110 | " \n", 1111 | " \n", 1112 | " \n", 1113 | " \n", 1114 | " \n", 1115 | " \n", 1116 | " \n", 1117 | " \n", 1118 | " \n", 1119 | " \n", 1120 | " \n", 1121 | " \n", 1122 | " \n", 1123 | " \n", 1124 | " \n", 1125 | " \n", 1126 | " \n", 1127 | " \n", 1128 | " \n", 1129 | " \n", 1130 | " \n", 1131 | " \n", 1132 | " \n", 1133 | " \n", 1134 | " \n", 1135 | " \n", 1136 | " \n", 1137 | " \n", 1138 | " \n", 1139 | " \n", 1140 | " \n", 1141 | " \n", 1142 | " \n", 1143 | " \n", 1144 | " \n", 1145 | " \n", 1146 | " \n", 1147 | " \n", 1148 | " \n", 1149 | " \n", 1150 | " \n", 1151 | " \n", 1152 | " \n", 1153 | " \n", 1154 | " \n", 1155 | " \n", 1156 | " \n", 1157 | " \n", 1158 | " \n", 1159 | " \n", 1160 | " \n", 1161 | " \n", 1162 | " \n", 1163 | " \n", 1164 | " \n", 1165 | " \n", 1166 | " \n", 1167 | " \n", 1168 | " \n", 1169 | " \n", 1170 | " \n", 1171 | " \n", 1172 | " \n", 1173 | " \n", 1174 | " \n", 1175 | " \n", 1176 | " \n", 1177 | " \n", 1178 | " \n", 1179 | " \n", 1180 | " \n", 1181 | " \n", 1182 | " \n", 1183 | " \n", 1184 | " \n", 1185 | " \n", 1186 | " \n", 1187 | " \n", 1188 | " \n", 1189 | " \n", 1190 | " \n", 1191 | " \n", 1192 | " \n", 1193 | " \n", 1194 | " \n", 1195 | " \n", 1196 | "
caratdepthtablepricexyz
count53940.00000053940.00000053940.00000053940.00000053940.00000053940.00000053940.000000
mean0.79794061.74940557.4571843932.7997225.7311575.7345263.538734
std0.4740111.4326212.2344913989.4397381.1217611.1421350.705699
min0.20000043.00000043.000000326.0000000.0000000.0000000.000000
25%0.40000061.00000056.000000950.0000004.7100004.7200002.910000
50%0.70000061.80000057.0000002401.0000005.7000005.7100003.530000
75%1.04000062.50000059.0000005324.2500006.5400006.5400004.040000
max5.01000079.00000095.00000018823.00000010.74000058.90000031.800000
\n", 1197 | "
" 1198 | ], 1199 | "text/plain": [ 1200 | " carat depth table price x \\\n", 1201 | "count 53940.000000 53940.000000 53940.000000 53940.000000 53940.000000 \n", 1202 | "mean 0.797940 61.749405 57.457184 3932.799722 5.731157 \n", 1203 | "std 0.474011 1.432621 2.234491 3989.439738 1.121761 \n", 1204 | "min 0.200000 43.000000 43.000000 326.000000 0.000000 \n", 1205 | "25% 0.400000 61.000000 56.000000 950.000000 4.710000 \n", 1206 | "50% 0.700000 61.800000 57.000000 2401.000000 5.700000 \n", 1207 | "75% 1.040000 62.500000 59.000000 5324.250000 6.540000 \n", 1208 | "max 5.010000 79.000000 95.000000 18823.000000 10.740000 \n", 1209 | "\n", 1210 | " y z \n", 1211 | "count 53940.000000 53940.000000 \n", 1212 | "mean 5.734526 3.538734 \n", 1213 | "std 1.142135 0.705699 \n", 1214 | "min 0.000000 0.000000 \n", 1215 | "25% 4.720000 2.910000 \n", 1216 | "50% 5.710000 3.530000 \n", 1217 | "75% 6.540000 4.040000 \n", 1218 | "max 58.900000 31.800000 " 1219 | ] 1220 | }, 1221 | "execution_count": 33, 1222 | "metadata": {}, 1223 | "output_type": "execute_result" 1224 | } 1225 | ], 1226 | "source": [ 1227 | "df.describe()" 1228 | ] 1229 | }, 1230 | { 1231 | "cell_type": "code", 1232 | "execution_count": 34, 1233 | "metadata": { 1234 | "collapsed": false 1235 | }, 1236 | "outputs": [ 1237 | { 1238 | "data": { 1239 | "text/html": [ 1240 | "
\n", 1241 | "\n", 1242 | " \n", 1243 | " \n", 1244 | " \n", 1245 | " \n", 1246 | " \n", 1247 | " \n", 1248 | " \n", 1249 | " \n", 1250 | " \n", 1251 | " \n", 1252 | " \n", 1253 | " \n", 1254 | " \n", 1255 | " \n", 1256 | " \n", 1257 | " \n", 1258 | " \n", 1259 | " \n", 1260 | " \n", 1261 | " \n", 1262 | " \n", 1263 | " \n", 1264 | " \n", 1265 | " \n", 1266 | " \n", 1267 | " \n", 1268 | " \n", 1269 | " \n", 1270 | " \n", 1271 | " \n", 1272 | " \n", 1273 | " \n", 1274 | " \n", 1275 | " \n", 1276 | " \n", 1277 | " \n", 1278 | " \n", 1279 | " \n", 1280 | " \n", 1281 | " \n", 1282 | " \n", 1283 | " \n", 1284 | " \n", 1285 | " \n", 1286 | " \n", 1287 | " \n", 1288 | " \n", 1289 | " \n", 1290 | " \n", 1291 | " \n", 1292 | " \n", 1293 | " \n", 1294 | " \n", 1295 | " \n", 1296 | " \n", 1297 | " \n", 1298 | " \n", 1299 | " \n", 1300 | " \n", 1301 | " \n", 1302 | " \n", 1303 | " \n", 1304 | " \n", 1305 | " \n", 1306 | " \n", 1307 | " \n", 1308 | " \n", 1309 | " \n", 1310 | " \n", 1311 | " \n", 1312 | " \n", 1313 | " \n", 1314 | " \n", 1315 | " \n", 1316 | " \n", 1317 | " \n", 1318 | " \n", 1319 | " \n", 1320 | " \n", 1321 | " \n", 1322 | " \n", 1323 | " \n", 1324 | " \n", 1325 | " \n", 1326 | " \n", 1327 | " \n", 1328 | " \n", 1329 | " \n", 1330 | " \n", 1331 | " \n", 1332 | " \n", 1333 | " \n", 1334 | " \n", 1335 | " \n", 1336 | " \n", 1337 | " \n", 1338 | " \n", 1339 | " \n", 1340 | " \n", 1341 | " \n", 1342 | " \n", 1343 | " \n", 1344 | " \n", 1345 | " \n", 1346 | "
caratdepthtablepricexyz
clarity
I11.28384662.73427858.3037793924.1686916.7610936.7093794.207908
IF0.50512361.51061556.5072072864.8391064.9684024.9898273.061659
SI10.85048261.85304257.6625413996.0011485.8883835.8882563.639845
SI21.07764861.77216757.9271815063.0286066.4013706.3978263.948478
VS10.72715861.66745857.3151513839.4553915.5721785.5818283.441007
VS20.76393561.72441757.4174013924.9893955.6577095.6588593.491478
VVS10.50332161.62465156.8844602523.1146374.9603644.9750753.061294
VVS20.59620261.66377857.0249903283.7370715.2184545.2321183.221465
\n", 1347 | "
" 1348 | ], 1349 | "text/plain": [ 1350 | " carat depth table price x y \\\n", 1351 | "clarity \n", 1352 | "I1 1.283846 62.734278 58.303779 3924.168691 6.761093 6.709379 \n", 1353 | "IF 0.505123 61.510615 56.507207 2864.839106 4.968402 4.989827 \n", 1354 | "SI1 0.850482 61.853042 57.662541 3996.001148 5.888383 5.888256 \n", 1355 | "SI2 1.077648 61.772167 57.927181 5063.028606 6.401370 6.397826 \n", 1356 | "VS1 0.727158 61.667458 57.315151 3839.455391 5.572178 5.581828 \n", 1357 | "VS2 0.763935 61.724417 57.417401 3924.989395 5.657709 5.658859 \n", 1358 | "VVS1 0.503321 61.624651 56.884460 2523.114637 4.960364 4.975075 \n", 1359 | "VVS2 0.596202 61.663778 57.024990 3283.737071 5.218454 5.232118 \n", 1360 | "\n", 1361 | " z \n", 1362 | "clarity \n", 1363 | "I1 4.207908 \n", 1364 | "IF 3.061659 \n", 1365 | "SI1 3.639845 \n", 1366 | "SI2 3.948478 \n", 1367 | "VS1 3.441007 \n", 1368 | "VS2 3.491478 \n", 1369 | "VVS1 3.061294 \n", 1370 | "VVS2 3.221465 " 1371 | ] 1372 | }, 1373 | "execution_count": 34, 1374 | "metadata": {}, 1375 | "output_type": "execute_result" 1376 | } 1377 | ], 1378 | "source": [ 1379 | "df.groupby('clarity').mean()" 1380 | ] 1381 | }, 1382 | { 1383 | "cell_type": "markdown", 1384 | "metadata": {}, 1385 | "source": [ 1386 | "Our previous queries:\n", 1387 | "```sql\n", 1388 | "select cut, mean(price)\n", 1389 | "from diamonds\n", 1390 | "groupby cut;\n", 1391 | "\n", 1392 | "select count(carat)\n", 1393 | "from diamonds\n", 1394 | "where price > 1000\n", 1395 | "groupby clarity;\n", 1396 | "\n", 1397 | "select max(price)\n", 1398 | "from diamonds\n", 1399 | "where carat > 1;\n", 1400 | "\n", 1401 | "select cut, price\n", 1402 | "from diamonds\n", 1403 | "where cut in ('Ideal', 'Premium')\n", 1404 | "order by price desc\n", 1405 | "limit 10;\n", 1406 | "```\n", 1407 | "\n", 1408 | "Are pretty trivial operations in Pandas:" 1409 | ] 1410 | }, 1411 | { 1412 | "cell_type": "code", 1413 | "execution_count": 35, 1414 | "metadata": { 1415 | "collapsed": false 1416 | }, 1417 | "outputs": [ 1418 | { 1419 | "data": { 1420 | "text/plain": [ 1421 | "cut\n", 1422 | "Fair 4358.757764\n", 1423 | "Good 3928.864452\n", 1424 | "Ideal 3457.541970\n", 1425 | "Premium 4584.257704\n", 1426 | "Very Good 3981.759891\n", 1427 | "Name: price, dtype: float64" 1428 | ] 1429 | }, 1430 | "execution_count": 35, 1431 | "metadata": {}, 1432 | "output_type": "execute_result" 1433 | } 1434 | ], 1435 | "source": [ 1436 | "df.groupby('cut')['price'].mean()" 1437 | ] 1438 | }, 1439 | { 1440 | "cell_type": "code", 1441 | "execution_count": 36, 1442 | "metadata": { 1443 | "collapsed": false 1444 | }, 1445 | "outputs": [ 1446 | { 1447 | "data": { 1448 | "text/plain": [ 1449 | "clarity\n", 1450 | "I1 675\n", 1451 | "IF 1042\n", 1452 | "SI1 9978\n", 1453 | "SI2 8118\n", 1454 | "VS1 5702\n", 1455 | "VS2 8647\n", 1456 | "VVS1 2108\n", 1457 | "VVS2 3146\n", 1458 | "Name: carat, dtype: int64" 1459 | ] 1460 | }, 1461 | "execution_count": 36, 1462 | "metadata": {}, 1463 | "output_type": "execute_result" 1464 | } 1465 | ], 1466 | "source": [ 1467 | "df[df['price'] > 1000].groupby('clarity')['carat'].count()" 1468 | ] 1469 | }, 1470 | { 1471 | "cell_type": "code", 1472 | "execution_count": 37, 1473 | "metadata": { 1474 | "collapsed": false 1475 | }, 1476 | "outputs": [ 1477 | { 1478 | "data": { 1479 | "text/html": [ 1480 | "
\n", 1481 | "\n", 1482 | " \n", 1483 | " \n", 1484 | " \n", 1485 | " \n", 1486 | " \n", 1487 | " \n", 1488 | " \n", 1489 | " \n", 1490 | " \n", 1491 | " \n", 1492 | " \n", 1493 | " \n", 1494 | " \n", 1495 | " \n", 1496 | " \n", 1497 | " \n", 1498 | " \n", 1499 | " \n", 1500 | " \n", 1501 | " \n", 1502 | " \n", 1503 | " \n", 1504 | " \n", 1505 | " \n", 1506 | " \n", 1507 | " \n", 1508 | " \n", 1509 | " \n", 1510 | " \n", 1511 | " \n", 1512 | " \n", 1513 | " \n", 1514 | " \n", 1515 | " \n", 1516 | " \n", 1517 | " \n", 1518 | " \n", 1519 | " \n", 1520 | " \n", 1521 | " \n", 1522 | " \n", 1523 | " \n", 1524 | " \n", 1525 | " \n", 1526 | " \n", 1527 | " \n", 1528 | " \n", 1529 | " \n", 1530 | " \n", 1531 | " \n", 1532 | " \n", 1533 | " \n", 1534 | " \n", 1535 | " \n", 1536 | " \n", 1537 | " \n", 1538 | " \n", 1539 | " \n", 1540 | " \n", 1541 | " \n", 1542 | " \n", 1543 | " \n", 1544 | " \n", 1545 | " \n", 1546 | " \n", 1547 | " \n", 1548 | " \n", 1549 | " \n", 1550 | " \n", 1551 | " \n", 1552 | " \n", 1553 | " \n", 1554 | " \n", 1555 | " \n", 1556 | " \n", 1557 | " \n", 1558 | " \n", 1559 | " \n", 1560 | " \n", 1561 | " \n", 1562 | " \n", 1563 | " \n", 1564 | " \n", 1565 | " \n", 1566 | " \n", 1567 | " \n", 1568 | " \n", 1569 | " \n", 1570 | " \n", 1571 | " \n", 1572 | " \n", 1573 | " \n", 1574 | " \n", 1575 | " \n", 1576 | " \n", 1577 | " \n", 1578 | " \n", 1579 | " \n", 1580 | " \n", 1581 | " \n", 1582 | " \n", 1583 | " \n", 1584 | " \n", 1585 | " \n", 1586 | " \n", 1587 | " \n", 1588 | " \n", 1589 | " \n", 1590 | " \n", 1591 | " \n", 1592 | " \n", 1593 | " \n", 1594 | " \n", 1595 | " \n", 1596 | " \n", 1597 | " \n", 1598 | " \n", 1599 | " \n", 1600 | " \n", 1601 | " \n", 1602 | " \n", 1603 | " \n", 1604 | " \n", 1605 | " \n", 1606 | " \n", 1607 | " \n", 1608 | " \n", 1609 | " \n", 1610 | " \n", 1611 | " \n", 1612 | " \n", 1613 | " \n", 1614 | " \n", 1615 | " \n", 1616 | " \n", 1617 | " \n", 1618 | " \n", 1619 | " \n", 1620 | " \n", 1621 | " \n", 1622 | " \n", 1623 | " \n", 1624 | " \n", 1625 | " \n", 1626 | " \n", 1627 | " \n", 1628 | " \n", 1629 | "
caratcutcolorclaritydepthtablepricexyz
277502.29PremiumIVS260.860188238.508.475.16
277481.51IdealGIF61.755188067.377.414.56
277472.07IdealGSI262.555188048.208.135.11
277452.29PremiumISI161.859187978.528.455.24
277442.00PremiumIVS160.859187958.138.024.91
277432.04PremiumHSI158.160187958.378.284.84
277422.15IdealGSI262.654187918.298.355.21
277411.71PremiumFVS262.359187917.577.534.70
277392.05IdealGSI161.957187878.108.165.03
277382.05PremiumFSI260.259187848.288.335.00
\n", 1630 | "
" 1631 | ], 1632 | "text/plain": [ 1633 | " carat cut color clarity depth table price x y z\n", 1634 | "27750 2.29 Premium I VS2 60.8 60 18823 8.50 8.47 5.16\n", 1635 | "27748 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56\n", 1636 | "27747 2.07 Ideal G SI2 62.5 55 18804 8.20 8.13 5.11\n", 1637 | "27745 2.29 Premium I SI1 61.8 59 18797 8.52 8.45 5.24\n", 1638 | "27744 2.00 Premium I VS1 60.8 59 18795 8.13 8.02 4.91\n", 1639 | "27743 2.04 Premium H SI1 58.1 60 18795 8.37 8.28 4.84\n", 1640 | "27742 2.15 Ideal G SI2 62.6 54 18791 8.29 8.35 5.21\n", 1641 | "27741 1.71 Premium F VS2 62.3 59 18791 7.57 7.53 4.70\n", 1642 | "27739 2.05 Ideal G SI1 61.9 57 18787 8.10 8.16 5.03\n", 1643 | "27738 2.05 Premium F SI2 60.2 59 18784 8.28 8.33 5.00" 1644 | ] 1645 | }, 1646 | "execution_count": 37, 1647 | "metadata": {}, 1648 | "output_type": "execute_result" 1649 | } 1650 | ], 1651 | "source": [ 1652 | "df[df['cut'].isin(['Ideal', 'Premium'])].sort('price', ascending=False)[:10]" 1653 | ] 1654 | }, 1655 | { 1656 | "cell_type": "markdown", 1657 | "metadata": {}, 1658 | "source": [ 1659 | "In which I do a bunch of cool Pandas things without a real goal" 1660 | ] 1661 | }, 1662 | { 1663 | "cell_type": "code", 1664 | "execution_count": 38, 1665 | "metadata": { 1666 | "collapsed": false 1667 | }, 1668 | "outputs": [ 1669 | { 1670 | "data": { 1671 | "text/html": [ 1672 | "
\n", 1673 | "\n", 1674 | " \n", 1675 | " \n", 1676 | " \n", 1677 | " \n", 1678 | " \n", 1679 | " \n", 1680 | " \n", 1681 | " \n", 1682 | " \n", 1683 | " \n", 1684 | " \n", 1685 | " \n", 1686 | " \n", 1687 | " \n", 1688 | " \n", 1689 | " \n", 1690 | " \n", 1691 | " \n", 1692 | " \n", 1693 | " \n", 1694 | " \n", 1695 | " \n", 1696 | " \n", 1697 | " \n", 1698 | " \n", 1699 | " \n", 1700 | " \n", 1701 | " \n", 1702 | " \n", 1703 | " \n", 1704 | " \n", 1705 | " \n", 1706 | " \n", 1707 | " \n", 1708 | " \n", 1709 | " \n", 1710 | " \n", 1711 | " \n", 1712 | " \n", 1713 | " \n", 1714 | " \n", 1715 | " \n", 1716 | " \n", 1717 | " \n", 1718 | " \n", 1719 | " \n", 1720 | " \n", 1721 | " \n", 1722 | " \n", 1723 | " \n", 1724 | " \n", 1725 | " \n", 1726 | " \n", 1727 | " \n", 1728 | " \n", 1729 | " \n", 1730 | " \n", 1731 | " \n", 1732 | " \n", 1733 | " \n", 1734 | " \n", 1735 | " \n", 1736 | " \n", 1737 | " \n", 1738 | " \n", 1739 | " \n", 1740 | " \n", 1741 | " \n", 1742 | " \n", 1743 | " \n", 1744 | " \n", 1745 | " \n", 1746 | " \n", 1747 | " \n", 1748 | " \n", 1749 | " \n", 1750 | " \n", 1751 | " \n", 1752 | " \n", 1753 | " \n", 1754 | " \n", 1755 | " \n", 1756 | " \n", 1757 | " \n", 1758 | " \n", 1759 | " \n", 1760 | " \n", 1761 | " \n", 1762 | " \n", 1763 | " \n", 1764 | " \n", 1765 | " \n", 1766 | " \n", 1767 | " \n", 1768 | " \n", 1769 | " \n", 1770 | " \n", 1771 | " \n", 1772 | " \n", 1773 | " \n", 1774 | " \n", 1775 | " \n", 1776 | " \n", 1777 | " \n", 1778 | " \n", 1779 | " \n", 1780 | " \n", 1781 | " \n", 1782 | " \n", 1783 | " \n", 1784 | " \n", 1785 | " \n", 1786 | " \n", 1787 | " \n", 1788 | " \n", 1789 | " \n", 1790 | " \n", 1791 | " \n", 1792 | " \n", 1793 | " \n", 1794 | " \n", 1795 | " \n", 1796 | " \n", 1797 | " \n", 1798 | " \n", 1799 | " \n", 1800 | " \n", 1801 | " \n", 1802 | " \n", 1803 | " \n", 1804 | " \n", 1805 | " \n", 1806 | " \n", 1807 | " \n", 1808 | " \n", 1809 | " \n", 1810 | " \n", 1811 | " \n", 1812 | " \n", 1813 | " \n", 1814 | " \n", 1815 | " \n", 1816 | " \n", 1817 | " \n", 1818 | " \n", 1819 | " \n", 1820 | " \n", 1821 | " \n", 1822 | " \n", 1823 | " \n", 1824 | " \n", 1825 | " \n", 1826 | " \n", 1827 | " \n", 1828 | " \n", 1829 | " \n", 1830 | " \n", 1831 | " \n", 1832 | " \n", 1833 | " \n", 1834 | " \n", 1835 | " \n", 1836 | " \n", 1837 | " \n", 1838 | " \n", 1839 | " \n", 1840 | " \n", 1841 | " \n", 1842 | " \n", 1843 | " \n", 1844 | " \n", 1845 | " \n", 1846 | " \n", 1847 | " \n", 1848 | " \n", 1849 | " \n", 1850 | " \n", 1851 | " \n", 1852 | " \n", 1853 | " \n", 1854 | " \n", 1855 | " \n", 1856 | " \n", 1857 | " \n", 1858 | " \n", 1859 | " \n", 1860 | " \n", 1861 | " \n", 1862 | " \n", 1863 | " \n", 1864 | " \n", 1865 | " \n", 1866 | " \n", 1867 | " \n", 1868 | " \n", 1869 | " \n", 1870 | " \n", 1871 | " \n", 1872 | " \n", 1873 | " \n", 1874 | " \n", 1875 | " \n", 1876 | " \n", 1877 | " \n", 1878 | " \n", 1879 | " \n", 1880 | " \n", 1881 | " \n", 1882 | " \n", 1883 | " \n", 1884 | " \n", 1885 | " \n", 1886 | " \n", 1887 | " \n", 1888 | " \n", 1889 | " \n", 1890 | " \n", 1891 | " \n", 1892 | " \n", 1893 | " \n", 1894 | " \n", 1895 | " \n", 1896 | " \n", 1897 | " \n", 1898 | " \n", 1899 | " \n", 1900 | " \n", 1901 | " \n", 1902 | " \n", 1903 | " \n", 1904 | " \n", 1905 | " \n", 1906 | " \n", 1907 | " \n", 1908 | " \n", 1909 | " \n", 1910 | " \n", 1911 | " \n", 1912 | " \n", 1913 | " \n", 1914 | " \n", 1915 | " \n", 1916 | " \n", 1917 | " \n", 1918 | " \n", 1919 | " \n", 1920 | " \n", 1921 | " \n", 1922 | " \n", 1923 | " \n", 1924 | " \n", 1925 | " \n", 1926 | " \n", 1927 | " \n", 1928 | " \n", 1929 | " \n", 1930 | " \n", 1931 | " \n", 1932 | " \n", 1933 | " \n", 1934 | " \n", 1935 | " \n", 1936 | " \n", 1937 | " \n", 1938 | " \n", 1939 | " \n", 1940 | " \n", 1941 | " \n", 1942 | "
12345678910...53931539325393353934539355393653937539385393953940
carat0.230.210.230.290.310.240.240.260.220.23...0.710.710.70.70.720.720.720.70.860.75
cutIdealPremiumGoodPremiumGoodVery GoodVery GoodVery GoodFairVery Good...PremiumPremiumVery GoodVery GoodPremiumIdealGoodVery GoodPremiumIdeal
colorEEEIJJIHEH...EFEEDDDDHD
claritySI2SI1VS1VS2SI2VVS2VVS1SI1VS2VS1...SI1SI1VS2VS2SI1SI1SI1SI1SI2SI2
depth61.559.856.962.463.362.862.361.965.159.4...60.559.860.561.262.760.863.162.86162.2
table55616558585757556161...55625959595755605855
price326326327334335336336337337338...2756275627572757275727572757275727572757
x3.953.894.054.24.343.943.954.073.874...5.795.745.715.695.695.755.695.666.155.83
y3.983.844.074.234.353.963.984.113.784.05...5.745.735.765.725.735.765.755.686.125.87
z2.432.312.312.632.752.482.472.532.492.39...3.493.433.473.493.583.53.613.563.743.64
\n", 1943 | "

10 rows × 53940 columns

\n", 1944 | "
" 1945 | ], 1946 | "text/plain": [ 1947 | " 1 2 3 4 5 6 7 8 \\\n", 1948 | "carat 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 \n", 1949 | "cut Ideal Premium Good Premium Good Very Good Very Good Very Good \n", 1950 | "color E E E I J J I H \n", 1951 | "clarity SI2 SI1 VS1 VS2 SI2 VVS2 VVS1 SI1 \n", 1952 | "depth 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 \n", 1953 | "table 55 61 65 58 58 57 57 55 \n", 1954 | "price 326 326 327 334 335 336 336 337 \n", 1955 | "x 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 \n", 1956 | "y 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 \n", 1957 | "z 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 \n", 1958 | "\n", 1959 | " 9 10 ... 53931 53932 53933 53934 \\\n", 1960 | "carat 0.22 0.23 ... 0.71 0.71 0.7 0.7 \n", 1961 | "cut Fair Very Good ... Premium Premium Very Good Very Good \n", 1962 | "color E H ... E F E E \n", 1963 | "clarity VS2 VS1 ... SI1 SI1 VS2 VS2 \n", 1964 | "depth 65.1 59.4 ... 60.5 59.8 60.5 61.2 \n", 1965 | "table 61 61 ... 55 62 59 59 \n", 1966 | "price 337 338 ... 2756 2756 2757 2757 \n", 1967 | "x 3.87 4 ... 5.79 5.74 5.71 5.69 \n", 1968 | "y 3.78 4.05 ... 5.74 5.73 5.76 5.72 \n", 1969 | "z 2.49 2.39 ... 3.49 3.43 3.47 3.49 \n", 1970 | "\n", 1971 | " 53935 53936 53937 53938 53939 53940 \n", 1972 | "carat 0.72 0.72 0.72 0.7 0.86 0.75 \n", 1973 | "cut Premium Ideal Good Very Good Premium Ideal \n", 1974 | "color D D D D H D \n", 1975 | "clarity SI1 SI1 SI1 SI1 SI2 SI2 \n", 1976 | "depth 62.7 60.8 63.1 62.8 61 62.2 \n", 1977 | "table 59 57 55 60 58 55 \n", 1978 | "price 2757 2757 2757 2757 2757 2757 \n", 1979 | "x 5.69 5.75 5.69 5.66 6.15 5.83 \n", 1980 | "y 5.73 5.76 5.75 5.68 6.12 5.87 \n", 1981 | "z 3.58 3.5 3.61 3.56 3.74 3.64 \n", 1982 | "\n", 1983 | "[10 rows x 53940 columns]" 1984 | ] 1985 | }, 1986 | "execution_count": 38, 1987 | "metadata": {}, 1988 | "output_type": "execute_result" 1989 | } 1990 | ], 1991 | "source": [ 1992 | "# I can Transpose things!\n", 1993 | "df.T" 1994 | ] 1995 | }, 1996 | { 1997 | "cell_type": "code", 1998 | "execution_count": 39, 1999 | "metadata": { 2000 | "collapsed": false 2001 | }, 2002 | "outputs": [ 2003 | { 2004 | "data": { 2005 | "text/html": [ 2006 | "
\n", 2007 | "\n", 2008 | " \n", 2009 | " \n", 2010 | " \n", 2011 | " \n", 2012 | " \n", 2013 | " \n", 2014 | " \n", 2015 | " \n", 2016 | " \n", 2017 | " \n", 2018 | " \n", 2019 | " \n", 2020 | " \n", 2021 | " \n", 2022 | " \n", 2023 | " \n", 2024 | " \n", 2025 | " \n", 2026 | " \n", 2027 | " \n", 2028 | " \n", 2029 | " \n", 2030 | " \n", 2031 | " \n", 2032 | " \n", 2033 | " \n", 2034 | " \n", 2035 | " \n", 2036 | " \n", 2037 | " \n", 2038 | " \n", 2039 | " \n", 2040 | " \n", 2041 | " \n", 2042 | " \n", 2043 | " \n", 2044 | " \n", 2045 | " \n", 2046 | " \n", 2047 | " \n", 2048 | " \n", 2049 | " \n", 2050 | " \n", 2051 | " \n", 2052 | " \n", 2053 | " \n", 2054 | " \n", 2055 | " \n", 2056 | " \n", 2057 | " \n", 2058 | " \n", 2059 | " \n", 2060 | " \n", 2061 | " \n", 2062 | " \n", 2063 | " \n", 2064 | " \n", 2065 | " \n", 2066 | " \n", 2067 | " \n", 2068 | " \n", 2069 | " \n", 2070 | " \n", 2071 | " \n", 2072 | " \n", 2073 | " \n", 2074 | " \n", 2075 | " \n", 2076 | " \n", 2077 | " \n", 2078 | " \n", 2079 | " \n", 2080 | " \n", 2081 | " \n", 2082 | " \n", 2083 | " \n", 2084 | " \n", 2085 | " \n", 2086 | " \n", 2087 | " \n", 2088 | " \n", 2089 | " \n", 2090 | "
caratcutcolorclaritydepthtablepricexyz
277502.29PremiumIVS260.860188238.508.475.16
277492.00Very GoodGSI163.556188187.907.975.04
277481.51IdealGIF61.755188067.377.414.56
277472.07IdealGSI262.555188048.208.135.11
277462.00Very GoodHSI162.857188037.958.005.01
\n", 2091 | "
" 2092 | ], 2093 | "text/plain": [ 2094 | " carat cut color clarity depth table price x y z\n", 2095 | "27750 2.29 Premium I VS2 60.8 60 18823 8.50 8.47 5.16\n", 2096 | "27749 2.00 Very Good G SI1 63.5 56 18818 7.90 7.97 5.04\n", 2097 | "27748 1.51 Ideal G IF 61.7 55 18806 7.37 7.41 4.56\n", 2098 | "27747 2.07 Ideal G SI2 62.5 55 18804 8.20 8.13 5.11\n", 2099 | "27746 2.00 Very Good H SI1 62.8 57 18803 7.95 8.00 5.01" 2100 | ] 2101 | }, 2102 | "execution_count": 39, 2103 | "metadata": {}, 2104 | "output_type": "execute_result" 2105 | } 2106 | ], 2107 | "source": [ 2108 | "# SORT ALL THE THINGS!\n", 2109 | "df.sort(['price', 'carat'], ascending=False).head()" 2110 | ] 2111 | }, 2112 | { 2113 | "cell_type": "code", 2114 | "execution_count": 40, 2115 | "metadata": { 2116 | "collapsed": false 2117 | }, 2118 | "outputs": [ 2119 | { 2120 | "data": { 2121 | "text/html": [ 2122 | "
\n", 2123 | "\n", 2124 | " \n", 2125 | " \n", 2126 | " \n", 2127 | " \n", 2128 | " \n", 2129 | " \n", 2130 | " \n", 2131 | " \n", 2132 | " \n", 2133 | " \n", 2134 | " \n", 2135 | " \n", 2136 | " \n", 2137 | " \n", 2138 | " \n", 2139 | " \n", 2140 | " \n", 2141 | " \n", 2142 | " \n", 2143 | " \n", 2144 | " \n", 2145 | " \n", 2146 | " \n", 2147 | " \n", 2148 | " \n", 2149 | " \n", 2150 | " \n", 2151 | " \n", 2152 | " \n", 2153 | " \n", 2154 | " \n", 2155 | " \n", 2156 | " \n", 2157 | " \n", 2158 | " \n", 2159 | " \n", 2160 | " \n", 2161 | " \n", 2162 | " \n", 2163 | " \n", 2164 | " \n", 2165 | " \n", 2166 | " \n", 2167 | " \n", 2168 | " \n", 2169 | " \n", 2170 | " \n", 2171 | " \n", 2172 | " \n", 2173 | " \n", 2174 | " \n", 2175 | " \n", 2176 | " \n", 2177 | " \n", 2178 | " \n", 2179 | " \n", 2180 | " \n", 2181 | " \n", 2182 | " \n", 2183 | " \n", 2184 | " \n", 2185 | " \n", 2186 | " \n", 2187 | " \n", 2188 | " \n", 2189 | " \n", 2190 | " \n", 2191 | " \n", 2192 | " \n", 2193 | " \n", 2194 | " \n", 2195 | " \n", 2196 | " \n", 2197 | " \n", 2198 | " \n", 2199 | " \n", 2200 | " \n", 2201 | " \n", 2202 | " \n", 2203 | " \n", 2204 | " \n", 2205 | "
float_colint_colstr_coltime_col
a1.51a2015-01-01
b2.52b2015-01-02
c3.53c2015-01-03
a4.54d2015-01-04
b5.55e2015-01-05
c6.56f2015-01-06
a7.57g2015-01-07
b8.58h2015-01-08
c9.59i2015-01-09
a10.510j2015-01-10
\n", 2206 | "
" 2207 | ], 2208 | "text/plain": [ 2209 | " float_col int_col str_col time_col\n", 2210 | "a 1.5 1 a 2015-01-01\n", 2211 | "b 2.5 2 b 2015-01-02\n", 2212 | "c 3.5 3 c 2015-01-03\n", 2213 | "a 4.5 4 d 2015-01-04\n", 2214 | "b 5.5 5 e 2015-01-05\n", 2215 | "c 6.5 6 f 2015-01-06\n", 2216 | "a 7.5 7 g 2015-01-07\n", 2217 | "b 8.5 8 h 2015-01-08\n", 2218 | "c 9.5 9 i 2015-01-09\n", 2219 | "a 10.5 10 j 2015-01-10" 2220 | ] 2221 | }, 2222 | "execution_count": 40, 2223 | "metadata": {}, 2224 | "output_type": "execute_result" 2225 | } 2226 | ], 2227 | "source": [ 2228 | "# Lets use some fake data to show off some stuff:\n", 2229 | "simple_data_1 = {\"int_col\": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n", 2230 | " \"str_col\": [\"a\", \"b\", \"c\", \"d\", \"e\", \"f\", \"g\", \"h\", \"i\", \"j\"],\n", 2231 | " \"float_col\": [1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5, 10.5],\n", 2232 | " \"time_col\": [\"2015-01-01\", \"2015-01-02\", \"2015-01-03\", \"2015-01-04\", \"2015-01-05\",\n", 2233 | " \"2015-01-06\", \"2015-01-07\", \"2015-01-08\", \"2015-01-09\", \"2015-01-10\"]}\n", 2234 | "my_index = [\"a\", \"b\", \"c\", \"a\", \"b\", \"c\", \"a\", \"b\", \"c\", \"a\"]\n", 2235 | "df = pd.DataFrame(simple_data_1, index=my_index)\n", 2236 | "df" 2237 | ] 2238 | }, 2239 | { 2240 | "cell_type": "code", 2241 | "execution_count": 41, 2242 | "metadata": { 2243 | "collapsed": false 2244 | }, 2245 | "outputs": [ 2246 | { 2247 | "data": { 2248 | "text/html": [ 2249 | "
\n", 2250 | "\n", 2251 | " \n", 2252 | " \n", 2253 | " \n", 2254 | " \n", 2255 | " \n", 2256 | " \n", 2257 | " \n", 2258 | " \n", 2259 | " \n", 2260 | " \n", 2261 | " \n", 2262 | " \n", 2263 | " \n", 2264 | " \n", 2265 | " \n", 2266 | " \n", 2267 | " \n", 2268 | " \n", 2269 | " \n", 2270 | " \n", 2271 | " \n", 2272 | " \n", 2273 | " \n", 2274 | " \n", 2275 | " \n", 2276 | " \n", 2277 | " \n", 2278 | " \n", 2279 | " \n", 2280 | " \n", 2281 | " \n", 2282 | " \n", 2283 | " \n", 2284 | " \n", 2285 | " \n", 2286 | " \n", 2287 | " \n", 2288 | " \n", 2289 | " \n", 2290 | "
float_colint_colstr_coltime_col
a1.51a2015-01-01
a4.54d2015-01-04
a7.57g2015-01-07
a10.510j2015-01-10
\n", 2291 | "
" 2292 | ], 2293 | "text/plain": [ 2294 | " float_col int_col str_col time_col\n", 2295 | "a 1.5 1 a 2015-01-01\n", 2296 | "a 4.5 4 d 2015-01-04\n", 2297 | "a 7.5 7 g 2015-01-07\n", 2298 | "a 10.5 10 j 2015-01-10" 2299 | ] 2300 | }, 2301 | "execution_count": 41, 2302 | "metadata": {}, 2303 | "output_type": "execute_result" 2304 | } 2305 | ], 2306 | "source": [ 2307 | "# DataFrames have indices that can be gotten by label or position\n", 2308 | "df.loc['a']" 2309 | ] 2310 | }, 2311 | { 2312 | "cell_type": "code", 2313 | "execution_count": 42, 2314 | "metadata": { 2315 | "collapsed": false 2316 | }, 2317 | "outputs": [ 2318 | { 2319 | "data": { 2320 | "text/plain": [ 2321 | "float_col 6.5\n", 2322 | "int_col 6\n", 2323 | "str_col f\n", 2324 | "time_col 2015-01-06\n", 2325 | "Name: c, dtype: object" 2326 | ] 2327 | }, 2328 | "execution_count": 42, 2329 | "metadata": {}, 2330 | "output_type": "execute_result" 2331 | } 2332 | ], 2333 | "source": [ 2334 | "df.iloc[5]" 2335 | ] 2336 | }, 2337 | { 2338 | "cell_type": "code", 2339 | "execution_count": 43, 2340 | "metadata": { 2341 | "collapsed": false 2342 | }, 2343 | "outputs": [ 2344 | { 2345 | "data": { 2346 | "text/html": [ 2347 | "
\n", 2348 | "\n", 2349 | " \n", 2350 | " \n", 2351 | " \n", 2352 | " \n", 2353 | " \n", 2354 | " \n", 2355 | " \n", 2356 | " \n", 2357 | " \n", 2358 | " \n", 2359 | " \n", 2360 | " \n", 2361 | " \n", 2362 | " \n", 2363 | " \n", 2364 | " \n", 2365 | " \n", 2366 | " \n", 2367 | " \n", 2368 | " \n", 2369 | " \n", 2370 | " \n", 2371 | " \n", 2372 | " \n", 2373 | " \n", 2374 | "
float_colint_colstr_coltime_col
c3.53c2015-01-03
a4.54d2015-01-04
\n", 2375 | "
" 2376 | ], 2377 | "text/plain": [ 2378 | " float_col int_col str_col time_col\n", 2379 | "c 3.5 3 c 2015-01-03\n", 2380 | "a 4.5 4 d 2015-01-04" 2381 | ] 2382 | }, 2383 | "execution_count": 43, 2384 | "metadata": {}, 2385 | "output_type": "execute_result" 2386 | } 2387 | ], 2388 | "source": [ 2389 | "df[2:4]" 2390 | ] 2391 | }, 2392 | { 2393 | "cell_type": "code", 2394 | "execution_count": 44, 2395 | "metadata": { 2396 | "collapsed": false 2397 | }, 2398 | "outputs": [ 2399 | { 2400 | "data": { 2401 | "text/html": [ 2402 | "
\n", 2403 | "\n", 2404 | " \n", 2405 | " \n", 2406 | " \n", 2407 | " \n", 2408 | " \n", 2409 | " \n", 2410 | " \n", 2411 | " \n", 2412 | " \n", 2413 | " \n", 2414 | " \n", 2415 | " \n", 2416 | " \n", 2417 | " \n", 2418 | " \n", 2419 | " \n", 2420 | " \n", 2421 | " \n", 2422 | " \n", 2423 | " \n", 2424 | " \n", 2425 | " \n", 2426 | " \n", 2427 | " \n", 2428 | " \n", 2429 | " \n", 2430 | " \n", 2431 | " \n", 2432 | " \n", 2433 | " \n", 2434 | " \n", 2435 | " \n", 2436 | " \n", 2437 | " \n", 2438 | " \n", 2439 | " \n", 2440 | " \n", 2441 | " \n", 2442 | " \n", 2443 | " \n", 2444 | " \n", 2445 | " \n", 2446 | " \n", 2447 | " \n", 2448 | " \n", 2449 | " \n", 2450 | " \n", 2451 | " \n", 2452 | " \n", 2453 | " \n", 2454 | " \n", 2455 | " \n", 2456 | " \n", 2457 | " \n", 2458 | " \n", 2459 | " \n", 2460 | " \n", 2461 | " \n", 2462 | " \n", 2463 | " \n", 2464 | " \n", 2465 | " \n", 2466 | " \n", 2467 | " \n", 2468 | " \n", 2469 | " \n", 2470 | " \n", 2471 | " \n", 2472 | " \n", 2473 | " \n", 2474 | " \n", 2475 | " \n", 2476 | " \n", 2477 | " \n", 2478 | " \n", 2479 | " \n", 2480 | " \n", 2481 | " \n", 2482 | " \n", 2483 | " \n", 2484 | " \n", 2485 | " \n", 2486 | " \n", 2487 | " \n", 2488 | " \n", 2489 | " \n", 2490 | " \n", 2491 | " \n", 2492 | " \n", 2493 | " \n", 2494 | " \n", 2495 | " \n", 2496 | "
float_colint_colstr_coltime_colnew_col
a1.51a2015-01-01NaN
b2.52b2015-01-02NaN
c3.53c2015-01-031
a4.54d2015-01-042
b5.55e2015-01-05NaN
c6.56f2015-01-064
a7.57g2015-01-075
b8.58h2015-01-089
c9.59i2015-01-09NaN
a10.510j2015-01-1010
\n", 2497 | "
" 2498 | ], 2499 | "text/plain": [ 2500 | " float_col int_col str_col time_col new_col\n", 2501 | "a 1.5 1 a 2015-01-01 NaN\n", 2502 | "b 2.5 2 b 2015-01-02 NaN\n", 2503 | "c 3.5 3 c 2015-01-03 1\n", 2504 | "a 4.5 4 d 2015-01-04 2\n", 2505 | "b 5.5 5 e 2015-01-05 NaN\n", 2506 | "c 6.5 6 f 2015-01-06 4\n", 2507 | "a 7.5 7 g 2015-01-07 5\n", 2508 | "b 8.5 8 h 2015-01-08 9\n", 2509 | "c 9.5 9 i 2015-01-09 NaN\n", 2510 | "a 10.5 10 j 2015-01-10 10" 2511 | ] 2512 | }, 2513 | "execution_count": 44, 2514 | "metadata": {}, 2515 | "output_type": "execute_result" 2516 | } 2517 | ], 2518 | "source": [ 2519 | "# New Column! With Missing Data!\n", 2520 | "df['new_col'] = [np.nan, np.nan, 1.0, 2.0, np.nan, 4.0, 5.0, 9.0, np.nan, 10.0]\n", 2521 | "df" 2522 | ] 2523 | }, 2524 | { 2525 | "cell_type": "code", 2526 | "execution_count": 45, 2527 | "metadata": { 2528 | "collapsed": false 2529 | }, 2530 | "outputs": [ 2531 | { 2532 | "data": { 2533 | "text/html": [ 2534 | "
\n", 2535 | "\n", 2536 | " \n", 2537 | " \n", 2538 | " \n", 2539 | " \n", 2540 | " \n", 2541 | " \n", 2542 | " \n", 2543 | " \n", 2544 | " \n", 2545 | " \n", 2546 | " \n", 2547 | " \n", 2548 | " \n", 2549 | " \n", 2550 | " \n", 2551 | " \n", 2552 | " \n", 2553 | " \n", 2554 | " \n", 2555 | " \n", 2556 | " \n", 2557 | " \n", 2558 | " \n", 2559 | " \n", 2560 | " \n", 2561 | " \n", 2562 | " \n", 2563 | " \n", 2564 | " \n", 2565 | " \n", 2566 | " \n", 2567 | " \n", 2568 | " \n", 2569 | " \n", 2570 | " \n", 2571 | " \n", 2572 | " \n", 2573 | " \n", 2574 | " \n", 2575 | " \n", 2576 | " \n", 2577 | " \n", 2578 | " \n", 2579 | " \n", 2580 | " \n", 2581 | " \n", 2582 | " \n", 2583 | " \n", 2584 | " \n", 2585 | " \n", 2586 | " \n", 2587 | " \n", 2588 | " \n", 2589 | " \n", 2590 | " \n", 2591 | " \n", 2592 | " \n", 2593 | " \n", 2594 | " \n", 2595 | " \n", 2596 | "
float_colint_colstr_coltime_colnew_col
c3.53c2015-01-031
a4.54d2015-01-042
c6.56f2015-01-064
a7.57g2015-01-075
b8.58h2015-01-089
a10.510j2015-01-1010
\n", 2597 | "
" 2598 | ], 2599 | "text/plain": [ 2600 | " float_col int_col str_col time_col new_col\n", 2601 | "c 3.5 3 c 2015-01-03 1\n", 2602 | "a 4.5 4 d 2015-01-04 2\n", 2603 | "c 6.5 6 f 2015-01-06 4\n", 2604 | "a 7.5 7 g 2015-01-07 5\n", 2605 | "b 8.5 8 h 2015-01-08 9\n", 2606 | "a 10.5 10 j 2015-01-10 10" 2607 | ] 2608 | }, 2609 | "execution_count": 45, 2610 | "metadata": {}, 2611 | "output_type": "execute_result" 2612 | } 2613 | ], 2614 | "source": [ 2615 | "# Removing missing data!\n", 2616 | "df.dropna()" 2617 | ] 2618 | }, 2619 | { 2620 | "cell_type": "code", 2621 | "execution_count": 46, 2622 | "metadata": { 2623 | "collapsed": false 2624 | }, 2625 | "outputs": [ 2626 | { 2627 | "data": { 2628 | "text/html": [ 2629 | "
\n", 2630 | "\n", 2631 | " \n", 2632 | " \n", 2633 | " \n", 2634 | " \n", 2635 | " \n", 2636 | " \n", 2637 | " \n", 2638 | " \n", 2639 | " \n", 2640 | " \n", 2641 | " \n", 2642 | " \n", 2643 | " \n", 2644 | " \n", 2645 | " \n", 2646 | " \n", 2647 | " \n", 2648 | " \n", 2649 | " \n", 2650 | " \n", 2651 | " \n", 2652 | " \n", 2653 | " \n", 2654 | " \n", 2655 | " \n", 2656 | " \n", 2657 | " \n", 2658 | " \n", 2659 | " \n", 2660 | " \n", 2661 | " \n", 2662 | " \n", 2663 | " \n", 2664 | " \n", 2665 | " \n", 2666 | " \n", 2667 | " \n", 2668 | " \n", 2669 | " \n", 2670 | " \n", 2671 | " \n", 2672 | " \n", 2673 | " \n", 2674 | " \n", 2675 | " \n", 2676 | " \n", 2677 | " \n", 2678 | " \n", 2679 | " \n", 2680 | " \n", 2681 | " \n", 2682 | " \n", 2683 | " \n", 2684 | " \n", 2685 | " \n", 2686 | " \n", 2687 | " \n", 2688 | " \n", 2689 | " \n", 2690 | " \n", 2691 | " \n", 2692 | " \n", 2693 | " \n", 2694 | " \n", 2695 | " \n", 2696 | " \n", 2697 | " \n", 2698 | " \n", 2699 | " \n", 2700 | " \n", 2701 | " \n", 2702 | " \n", 2703 | " \n", 2704 | " \n", 2705 | " \n", 2706 | " \n", 2707 | " \n", 2708 | " \n", 2709 | " \n", 2710 | " \n", 2711 | " \n", 2712 | " \n", 2713 | " \n", 2714 | " \n", 2715 | " \n", 2716 | " \n", 2717 | " \n", 2718 | " \n", 2719 | " \n", 2720 | " \n", 2721 | " \n", 2722 | " \n", 2723 | "
float_colint_colstr_coltime_colnew_col
a1.51a2015-01-01FOO!
b2.52b2015-01-02FOO!
c3.53c2015-01-031
a4.54d2015-01-042
b5.55e2015-01-05FOO!
c6.56f2015-01-064
a7.57g2015-01-075
b8.58h2015-01-089
c9.59i2015-01-09FOO!
a10.510j2015-01-1010
\n", 2724 | "
" 2725 | ], 2726 | "text/plain": [ 2727 | " float_col int_col str_col time_col new_col\n", 2728 | "a 1.5 1 a 2015-01-01 FOO!\n", 2729 | "b 2.5 2 b 2015-01-02 FOO!\n", 2730 | "c 3.5 3 c 2015-01-03 1\n", 2731 | "a 4.5 4 d 2015-01-04 2\n", 2732 | "b 5.5 5 e 2015-01-05 FOO!\n", 2733 | "c 6.5 6 f 2015-01-06 4\n", 2734 | "a 7.5 7 g 2015-01-07 5\n", 2735 | "b 8.5 8 h 2015-01-08 9\n", 2736 | "c 9.5 9 i 2015-01-09 FOO!\n", 2737 | "a 10.5 10 j 2015-01-10 10" 2738 | ] 2739 | }, 2740 | "execution_count": 46, 2741 | "metadata": {}, 2742 | "output_type": "execute_result" 2743 | } 2744 | ], 2745 | "source": [ 2746 | "# Fill missing data!\n", 2747 | "df.fillna(\"FOO!\")" 2748 | ] 2749 | }, 2750 | { 2751 | "cell_type": "code", 2752 | "execution_count": 47, 2753 | "metadata": { 2754 | "collapsed": false 2755 | }, 2756 | "outputs": [ 2757 | { 2758 | "data": { 2759 | "text/html": [ 2760 | "
\n", 2761 | "\n", 2762 | " \n", 2763 | " \n", 2764 | " \n", 2765 | " \n", 2766 | " \n", 2767 | " \n", 2768 | " \n", 2769 | " \n", 2770 | " \n", 2771 | " \n", 2772 | " \n", 2773 | " \n", 2774 | " \n", 2775 | " \n", 2776 | " \n", 2777 | " \n", 2778 | " \n", 2779 | " \n", 2780 | " \n", 2781 | " \n", 2782 | " \n", 2783 | " \n", 2784 | " \n", 2785 | " \n", 2786 | " \n", 2787 | " \n", 2788 | " \n", 2789 | " \n", 2790 | " \n", 2791 | " \n", 2792 | " \n", 2793 | " \n", 2794 | " \n", 2795 | " \n", 2796 | " \n", 2797 | " \n", 2798 | " \n", 2799 | " \n", 2800 | " \n", 2801 | " \n", 2802 | " \n", 2803 | " \n", 2804 | " \n", 2805 | " \n", 2806 | " \n", 2807 | " \n", 2808 | " \n", 2809 | " \n", 2810 | " \n", 2811 | " \n", 2812 | " \n", 2813 | " \n", 2814 | " \n", 2815 | " \n", 2816 | " \n", 2817 | " \n", 2818 | " \n", 2819 | " \n", 2820 | " \n", 2821 | " \n", 2822 | " \n", 2823 | " \n", 2824 | " \n", 2825 | " \n", 2826 | " \n", 2827 | " \n", 2828 | " \n", 2829 | " \n", 2830 | " \n", 2831 | " \n", 2832 | " \n", 2833 | " \n", 2834 | " \n", 2835 | " \n", 2836 | " \n", 2837 | " \n", 2838 | " \n", 2839 | " \n", 2840 | " \n", 2841 | " \n", 2842 | " \n", 2843 | " \n", 2844 | " \n", 2845 | " \n", 2846 | " \n", 2847 | " \n", 2848 | " \n", 2849 | " \n", 2850 | " \n", 2851 | " \n", 2852 | " \n", 2853 | " \n", 2854 | "
float_colint_colstr_coltime_colnew_col
a1.51a2015-01-011
b2.52b2015-01-021
c3.53c2015-01-031
a4.54d2015-01-042
b5.55e2015-01-054
c6.56f2015-01-064
a7.57g2015-01-075
b8.58h2015-01-089
c9.59i2015-01-0910
a10.510j2015-01-1010
\n", 2855 | "
" 2856 | ], 2857 | "text/plain": [ 2858 | " float_col int_col str_col time_col new_col\n", 2859 | "a 1.5 1 a 2015-01-01 1\n", 2860 | "b 2.5 2 b 2015-01-02 1\n", 2861 | "c 3.5 3 c 2015-01-03 1\n", 2862 | "a 4.5 4 d 2015-01-04 2\n", 2863 | "b 5.5 5 e 2015-01-05 4\n", 2864 | "c 6.5 6 f 2015-01-06 4\n", 2865 | "a 7.5 7 g 2015-01-07 5\n", 2866 | "b 8.5 8 h 2015-01-08 9\n", 2867 | "c 9.5 9 i 2015-01-09 10\n", 2868 | "a 10.5 10 j 2015-01-10 10" 2869 | ] 2870 | }, 2871 | "execution_count": 47, 2872 | "metadata": {}, 2873 | "output_type": "execute_result" 2874 | } 2875 | ], 2876 | "source": [ 2877 | "# Backfill missing data!\n", 2878 | "df.fillna(method='bfill')" 2879 | ] 2880 | }, 2881 | { 2882 | "cell_type": "code", 2883 | "execution_count": 48, 2884 | "metadata": { 2885 | "collapsed": false 2886 | }, 2887 | "outputs": [ 2888 | { 2889 | "data": { 2890 | "text/html": [ 2891 | "
\n", 2892 | "\n", 2893 | " \n", 2894 | " \n", 2895 | " \n", 2896 | " \n", 2897 | " \n", 2898 | " \n", 2899 | " \n", 2900 | " \n", 2901 | " \n", 2902 | " \n", 2903 | " \n", 2904 | " \n", 2905 | " \n", 2906 | " \n", 2907 | " \n", 2908 | " \n", 2909 | " \n", 2910 | " \n", 2911 | " \n", 2912 | " \n", 2913 | " \n", 2914 | " \n", 2915 | " \n", 2916 | " \n", 2917 | " \n", 2918 | " \n", 2919 | " \n", 2920 | " \n", 2921 | " \n", 2922 | " \n", 2923 | " \n", 2924 | " \n", 2925 | " \n", 2926 | " \n", 2927 | " \n", 2928 | " \n", 2929 | " \n", 2930 | " \n", 2931 | " \n", 2932 | " \n", 2933 | " \n", 2934 | " \n", 2935 | " \n", 2936 | " \n", 2937 | " \n", 2938 | " \n", 2939 | " \n", 2940 | " \n", 2941 | " \n", 2942 | " \n", 2943 | " \n", 2944 | " \n", 2945 | " \n", 2946 | " \n", 2947 | " \n", 2948 | " \n", 2949 | " \n", 2950 | " \n", 2951 | " \n", 2952 | " \n", 2953 | " \n", 2954 | " \n", 2955 | " \n", 2956 | " \n", 2957 | " \n", 2958 | " \n", 2959 | " \n", 2960 | " \n", 2961 | " \n", 2962 | " \n", 2963 | " \n", 2964 | " \n", 2965 | " \n", 2966 | " \n", 2967 | " \n", 2968 | " \n", 2969 | " \n", 2970 | " \n", 2971 | " \n", 2972 | " \n", 2973 | " \n", 2974 | " \n", 2975 | " \n", 2976 | " \n", 2977 | " \n", 2978 | " \n", 2979 | " \n", 2980 | " \n", 2981 | " \n", 2982 | " \n", 2983 | " \n", 2984 | " \n", 2985 | "
float_colint_colstr_coltime_colnew_col
a1.51FOO2015-01-01NaN
b2.52FoO2015-01-02NaN
c3.53Foo2015-01-031
a4.54Fo o2015-01-042
b5.55FOO2015-01-05NaN
c6.56fOO2015-01-064
a7.57fOo2015-01-075
b8.58FoO2015-01-089
c9.59foO2015-01-09NaN
a10.510Foo2015-01-1010
\n", 2986 | "
" 2987 | ], 2988 | "text/plain": [ 2989 | " float_col int_col str_col time_col new_col\n", 2990 | "a 1.5 1 FOO 2015-01-01 NaN\n", 2991 | "b 2.5 2 FoO 2015-01-02 NaN\n", 2992 | "c 3.5 3 Foo 2015-01-03 1\n", 2993 | "a 4.5 4 Fo o 2015-01-04 2\n", 2994 | "b 5.5 5 FOO 2015-01-05 NaN\n", 2995 | "c 6.5 6 fOO 2015-01-06 4\n", 2996 | "a 7.5 7 fOo 2015-01-07 5\n", 2997 | "b 8.5 8 FoO 2015-01-08 9\n", 2998 | "c 9.5 9 foO 2015-01-09 NaN\n", 2999 | "a 10.5 10 Foo 2015-01-10 10" 3000 | ] 3001 | }, 3002 | "execution_count": 48, 3003 | "metadata": {}, 3004 | "output_type": "execute_result" 3005 | } 3006 | ], 3007 | "source": [ 3008 | "# Vectorized string methods!\n", 3009 | "df['str_col'] = [\"FOO\", \"FoO\", \"Foo\", \"Fo o\", \"FOO\", \"fOO\", \"fOo\", \"FoO\", \"foO\", \"Foo \"]\n", 3010 | "df" 3011 | ] 3012 | }, 3013 | { 3014 | "cell_type": "code", 3015 | "execution_count": 49, 3016 | "metadata": { 3017 | "collapsed": false 3018 | }, 3019 | "outputs": [ 3020 | { 3021 | "data": { 3022 | "text/plain": [ 3023 | "a foo\n", 3024 | "b foo\n", 3025 | "c foo\n", 3026 | "a foo\n", 3027 | "b foo\n", 3028 | "c foo\n", 3029 | "a foo\n", 3030 | "b foo\n", 3031 | "c foo\n", 3032 | "a foo\n", 3033 | "Name: str_col, dtype: object" 3034 | ] 3035 | }, 3036 | "execution_count": 49, 3037 | "metadata": {}, 3038 | "output_type": "execute_result" 3039 | } 3040 | ], 3041 | "source": [ 3042 | "df['str_col'].str.lower().str.replace(' ', '')" 3043 | ] 3044 | }, 3045 | { 3046 | "cell_type": "code", 3047 | "execution_count": 50, 3048 | "metadata": { 3049 | "collapsed": false 3050 | }, 3051 | "outputs": [ 3052 | { 3053 | "data": { 3054 | "text/html": [ 3055 | "
\n", 3056 | "\n", 3057 | " \n", 3058 | " \n", 3059 | " \n", 3060 | " \n", 3061 | " \n", 3062 | " \n", 3063 | " \n", 3064 | " \n", 3065 | " \n", 3066 | " \n", 3067 | " \n", 3068 | " \n", 3069 | " \n", 3070 | " \n", 3071 | " \n", 3072 | " \n", 3073 | " \n", 3074 | " \n", 3075 | " \n", 3076 | " \n", 3077 | " \n", 3078 | " \n", 3079 | "
keylvalrval
0foo14
1bar25
\n", 3080 | "
" 3081 | ], 3082 | "text/plain": [ 3083 | " key lval rval\n", 3084 | "0 foo 1 4\n", 3085 | "1 bar 2 5" 3086 | ] 3087 | }, 3088 | "execution_count": 50, 3089 | "metadata": {}, 3090 | "output_type": "execute_result" 3091 | } 3092 | ], 3093 | "source": [ 3094 | "# Database style joins!!\n", 3095 | "left = pd.DataFrame({'key': ['foo', 'bar', 'fizz'], 'lval': [1, 2, 3]})\n", 3096 | "right = pd.DataFrame({'key': ['foo', 'bar', 'baz', 'qux'], \n", 3097 | " 'rval': [4, 5, 6, 7]})\n", 3098 | "left.merge(right, how='inner')" 3099 | ] 3100 | }, 3101 | { 3102 | "cell_type": "code", 3103 | "execution_count": 51, 3104 | "metadata": { 3105 | "collapsed": false 3106 | }, 3107 | "outputs": [ 3108 | { 3109 | "data": { 3110 | "text/html": [ 3111 | "
\n", 3112 | "\n", 3113 | " \n", 3114 | " \n", 3115 | " \n", 3116 | " \n", 3117 | " \n", 3118 | " \n", 3119 | " \n", 3120 | " \n", 3121 | " \n", 3122 | " \n", 3123 | " \n", 3124 | " \n", 3125 | " \n", 3126 | " \n", 3127 | " \n", 3128 | " \n", 3129 | " \n", 3130 | " \n", 3131 | " \n", 3132 | " \n", 3133 | " \n", 3134 | " \n", 3135 | " \n", 3136 | " \n", 3137 | " \n", 3138 | " \n", 3139 | " \n", 3140 | " \n", 3141 | " \n", 3142 | " \n", 3143 | " \n", 3144 | " \n", 3145 | " \n", 3146 | " \n", 3147 | " \n", 3148 | " \n", 3149 | " \n", 3150 | " \n", 3151 | " \n", 3152 | " \n", 3153 | "
keyrvallval
0foo41
1bar52
2baz6NaN
3qux7NaN
4fizzNaN3
\n", 3154 | "
" 3155 | ], 3156 | "text/plain": [ 3157 | " key rval lval\n", 3158 | "0 foo 4 1\n", 3159 | "1 bar 5 2\n", 3160 | "2 baz 6 NaN\n", 3161 | "3 qux 7 NaN\n", 3162 | "4 fizz NaN 3" 3163 | ] 3164 | }, 3165 | "execution_count": 51, 3166 | "metadata": {}, 3167 | "output_type": "execute_result" 3168 | } 3169 | ], 3170 | "source": [ 3171 | "right.merge(left, how='outer')" 3172 | ] 3173 | }, 3174 | { 3175 | "cell_type": "code", 3176 | "execution_count": 52, 3177 | "metadata": { 3178 | "collapsed": false 3179 | }, 3180 | "outputs": [ 3181 | { 3182 | "data": { 3183 | "text/html": [ 3184 | "
\n", 3185 | "\n", 3186 | " \n", 3187 | " \n", 3188 | " \n", 3189 | " \n", 3190 | " \n", 3191 | " \n", 3192 | " \n", 3193 | " \n", 3194 | " \n", 3195 | " \n", 3196 | " \n", 3197 | " \n", 3198 | " \n", 3199 | " \n", 3200 | " \n", 3201 | " \n", 3202 | " \n", 3203 | " \n", 3204 | " \n", 3205 | " \n", 3206 | " \n", 3207 | " \n", 3208 | " \n", 3209 | " \n", 3210 | " \n", 3211 | " \n", 3212 | " \n", 3213 | " \n", 3214 | "
keylvalrval
0foo14
1bar25
2fizz3NaN
\n", 3215 | "
" 3216 | ], 3217 | "text/plain": [ 3218 | " key lval rval\n", 3219 | "0 foo 1 4\n", 3220 | "1 bar 2 5\n", 3221 | "2 fizz 3 NaN" 3222 | ] 3223 | }, 3224 | "execution_count": 52, 3225 | "metadata": {}, 3226 | "output_type": "execute_result" 3227 | } 3228 | ], 3229 | "source": [ 3230 | "left.merge(right, how='left')" 3231 | ] 3232 | }, 3233 | { 3234 | "cell_type": "code", 3235 | "execution_count": 53, 3236 | "metadata": { 3237 | "collapsed": false 3238 | }, 3239 | "outputs": [ 3240 | { 3241 | "data": { 3242 | "text/plain": [ 3243 | "\n", 3244 | "[2015-03-25 00:00:00, ..., 2015-03-31 05:00:00]\n", 3245 | "Length: 150, Freq: H, Timezone: None" 3246 | ] 3247 | }, 3248 | "execution_count": 53, 3249 | "metadata": {}, 3250 | "output_type": "execute_result" 3251 | } 3252 | ], 3253 | "source": [ 3254 | "# Lets make some random timeseries data\n", 3255 | "dates = pd.date_range('2015-03-25', periods=150, freq='H')\n", 3256 | "dates" 3257 | ] 3258 | }, 3259 | { 3260 | "cell_type": "code", 3261 | "execution_count": 54, 3262 | "metadata": { 3263 | "collapsed": false 3264 | }, 3265 | "outputs": [ 3266 | { 3267 | "data": { 3268 | "text/html": [ 3269 | "
\n", 3270 | "\n", 3271 | " \n", 3272 | " \n", 3273 | " \n", 3274 | " \n", 3275 | " \n", 3276 | " \n", 3277 | " \n", 3278 | " \n", 3279 | " \n", 3280 | " \n", 3281 | " \n", 3282 | " \n", 3283 | " \n", 3284 | " \n", 3285 | " \n", 3286 | " \n", 3287 | " \n", 3288 | " \n", 3289 | " \n", 3290 | " \n", 3291 | " \n", 3292 | " \n", 3293 | " \n", 3294 | " \n", 3295 | " \n", 3296 | " \n", 3297 | " \n", 3298 | " \n", 3299 | "
Numeric
2015-03-25 00:00:00224
2015-03-25 01:00:00124
2015-03-25 02:00:006
2015-03-25 03:00:00140
2015-03-25 04:00:00130
\n", 3300 | "
" 3301 | ], 3302 | "text/plain": [ 3303 | " Numeric\n", 3304 | "2015-03-25 00:00:00 224\n", 3305 | "2015-03-25 01:00:00 124\n", 3306 | "2015-03-25 02:00:00 6\n", 3307 | "2015-03-25 03:00:00 140\n", 3308 | "2015-03-25 04:00:00 130" 3309 | ] 3310 | }, 3311 | "execution_count": 54, 3312 | "metadata": {}, 3313 | "output_type": "execute_result" 3314 | } 3315 | ], 3316 | "source": [ 3317 | "time_df = pd.DataFrame(np.random.randint(0, 500, 150), index=dates, columns=[\"Numeric\"])\n", 3318 | "time_df.head()" 3319 | ] 3320 | }, 3321 | { 3322 | "cell_type": "code", 3323 | "execution_count": 55, 3324 | "metadata": { 3325 | "collapsed": false 3326 | }, 3327 | "outputs": [ 3328 | { 3329 | "data": { 3330 | "text/html": [ 3331 | "
\n", 3332 | "\n", 3333 | " \n", 3334 | " \n", 3335 | " \n", 3336 | " \n", 3337 | " \n", 3338 | " \n", 3339 | " \n", 3340 | " \n", 3341 | " \n", 3342 | " \n", 3343 | " \n", 3344 | " \n", 3345 | " \n", 3346 | " \n", 3347 | " \n", 3348 | " \n", 3349 | " \n", 3350 | " \n", 3351 | " \n", 3352 | " \n", 3353 | " \n", 3354 | " \n", 3355 | " \n", 3356 | " \n", 3357 | " \n", 3358 | " \n", 3359 | " \n", 3360 | " \n", 3361 | " \n", 3362 | " \n", 3363 | " \n", 3364 | " \n", 3365 | " \n", 3366 | " \n", 3367 | " \n", 3368 | " \n", 3369 | "
Numeric
2015-03-25239.208333
2015-03-26295.541667
2015-03-27246.791667
2015-03-28222.250000
2015-03-29262.458333
2015-03-30246.125000
2015-03-31265.500000
\n", 3370 | "
" 3371 | ], 3372 | "text/plain": [ 3373 | " Numeric\n", 3374 | "2015-03-25 239.208333\n", 3375 | "2015-03-26 295.541667\n", 3376 | "2015-03-27 246.791667\n", 3377 | "2015-03-28 222.250000\n", 3378 | "2015-03-29 262.458333\n", 3379 | "2015-03-30 246.125000\n", 3380 | "2015-03-31 265.500000" 3381 | ] 3382 | }, 3383 | "execution_count": 55, 3384 | "metadata": {}, 3385 | "output_type": "execute_result" 3386 | } 3387 | ], 3388 | "source": [ 3389 | "# RESAMPLE!\n", 3390 | "time_df.resample('D', how='mean')" 3391 | ] 3392 | }, 3393 | { 3394 | "cell_type": "code", 3395 | "execution_count": 56, 3396 | "metadata": { 3397 | "collapsed": false 3398 | }, 3399 | "outputs": [ 3400 | { 3401 | "data": { 3402 | "text/plain": [ 3403 | "\n", 3404 | "[2014-03-25 23:02:00, ..., 2015-03-25 00:00:00]\n", 3405 | "Length: 3, Freq: None, Timezone: None" 3406 | ] 3407 | }, 3408 | "execution_count": 56, 3409 | "metadata": {}, 3410 | "output_type": "execute_result" 3411 | } 3412 | ], 3413 | "source": [ 3414 | "# Convert weird date formats!\n", 3415 | "the_worst = ['3/25/2014 23:02:00', '2014-03-25 23:01:00', 'March 25 2015']\n", 3416 | "pd.to_datetime(the_worst)" 3417 | ] 3418 | }, 3419 | { 3420 | "cell_type": "code", 3421 | "execution_count": 57, 3422 | "metadata": { 3423 | "collapsed": false 3424 | }, 3425 | "outputs": [ 3426 | { 3427 | "data": { 3428 | "text/html": [ 3429 | "
\n", 3430 | "\n", 3431 | " \n", 3432 | " \n", 3433 | " \n", 3434 | " \n", 3435 | " \n", 3436 | " \n", 3437 | " \n", 3438 | " \n", 3439 | " \n", 3440 | " \n", 3441 | " \n", 3442 | " \n", 3443 | " \n", 3444 | " \n", 3445 | " \n", 3446 | " \n", 3447 | " \n", 3448 | " \n", 3449 | " \n", 3450 | " \n", 3451 | " \n", 3452 | " \n", 3453 | " \n", 3454 | " \n", 3455 | " \n", 3456 | " \n", 3457 | " \n", 3458 | " \n", 3459 | "
Numeric
2015-03-26 00:00:00224
2015-03-26 01:00:00124
2015-03-26 02:00:006
2015-03-26 03:00:00140
2015-03-26 04:00:00130
\n", 3460 | "
" 3461 | ], 3462 | "text/plain": [ 3463 | " Numeric\n", 3464 | "2015-03-26 00:00:00 224\n", 3465 | "2015-03-26 01:00:00 124\n", 3466 | "2015-03-26 02:00:00 6\n", 3467 | "2015-03-26 03:00:00 140\n", 3468 | "2015-03-26 04:00:00 130" 3469 | ] 3470 | }, 3471 | "execution_count": 57, 3472 | "metadata": {}, 3473 | "output_type": "execute_result" 3474 | } 3475 | ], 3476 | "source": [ 3477 | "# Shift dates!\n", 3478 | "time_df.shift(1, freq='D').head()" 3479 | ] 3480 | }, 3481 | { 3482 | "cell_type": "code", 3483 | "execution_count": 58, 3484 | "metadata": { 3485 | "collapsed": false 3486 | }, 3487 | "outputs": [ 3488 | { 3489 | "data": { 3490 | "text/html": [ 3491 | "
\n", 3492 | "\n", 3493 | " \n", 3494 | " \n", 3495 | " \n", 3496 | " \n", 3497 | " \n", 3498 | " \n", 3499 | " \n", 3500 | " \n", 3501 | " \n", 3502 | " \n", 3503 | " \n", 3504 | " \n", 3505 | " \n", 3506 | " \n", 3507 | " \n", 3508 | " \n", 3509 | " \n", 3510 | " \n", 3511 | " \n", 3512 | " \n", 3513 | "
Numeric
2015-03-250
2015-03-307
2015-04-059
\n", 3514 | "
" 3515 | ], 3516 | "text/plain": [ 3517 | " Numeric\n", 3518 | "2015-03-25 0\n", 3519 | "2015-03-30 7\n", 3520 | "2015-04-05 9" 3521 | ] 3522 | }, 3523 | "execution_count": 58, 3524 | "metadata": {}, 3525 | "output_type": "execute_result" 3526 | } 3527 | ], 3528 | "source": [ 3529 | "# What if I have missing dates?\n", 3530 | "missing = pd.to_datetime(['2015-03-25', '2015-03-30', '2015-04-05'])\n", 3531 | "missing_df = pd.DataFrame(np.random.randint(0, 10, 3), index=missing, columns=[\"Numeric\"])\n", 3532 | "missing_df" 3533 | ] 3534 | }, 3535 | { 3536 | "cell_type": "code", 3537 | "execution_count": 59, 3538 | "metadata": { 3539 | "collapsed": false 3540 | }, 3541 | "outputs": [ 3542 | { 3543 | "data": { 3544 | "text/html": [ 3545 | "
\n", 3546 | "\n", 3547 | " \n", 3548 | " \n", 3549 | " \n", 3550 | " \n", 3551 | " \n", 3552 | " \n", 3553 | " \n", 3554 | " \n", 3555 | " \n", 3556 | " \n", 3557 | " \n", 3558 | " \n", 3559 | " \n", 3560 | " \n", 3561 | " \n", 3562 | " \n", 3563 | " \n", 3564 | " \n", 3565 | " \n", 3566 | " \n", 3567 | " \n", 3568 | " \n", 3569 | " \n", 3570 | " \n", 3571 | " \n", 3572 | " \n", 3573 | " \n", 3574 | " \n", 3575 | " \n", 3576 | " \n", 3577 | " \n", 3578 | " \n", 3579 | " \n", 3580 | " \n", 3581 | " \n", 3582 | " \n", 3583 | " \n", 3584 | " \n", 3585 | " \n", 3586 | " \n", 3587 | " \n", 3588 | " \n", 3589 | " \n", 3590 | " \n", 3591 | " \n", 3592 | " \n", 3593 | " \n", 3594 | " \n", 3595 | " \n", 3596 | " \n", 3597 | " \n", 3598 | " \n", 3599 | " \n", 3600 | " \n", 3601 | " \n", 3602 | " \n", 3603 | "
Numeric
2015-03-250
2015-03-26NaN
2015-03-27NaN
2015-03-28NaN
2015-03-29NaN
2015-03-307
2015-03-31NaN
2015-04-01NaN
2015-04-02NaN
2015-04-03NaN
2015-04-04NaN
2015-04-059
\n", 3604 | "
" 3605 | ], 3606 | "text/plain": [ 3607 | " Numeric\n", 3608 | "2015-03-25 0\n", 3609 | "2015-03-26 NaN\n", 3610 | "2015-03-27 NaN\n", 3611 | "2015-03-28 NaN\n", 3612 | "2015-03-29 NaN\n", 3613 | "2015-03-30 7\n", 3614 | "2015-03-31 NaN\n", 3615 | "2015-04-01 NaN\n", 3616 | "2015-04-02 NaN\n", 3617 | "2015-04-03 NaN\n", 3618 | "2015-04-04 NaN\n", 3619 | "2015-04-05 9" 3620 | ] 3621 | }, 3622 | "execution_count": 59, 3623 | "metadata": {}, 3624 | "output_type": "execute_result" 3625 | } 3626 | ], 3627 | "source": [ 3628 | "missing_df.asfreq('D')" 3629 | ] 3630 | }, 3631 | { 3632 | "cell_type": "code", 3633 | "execution_count": 60, 3634 | "metadata": { 3635 | "collapsed": false 3636 | }, 3637 | "outputs": [ 3638 | { 3639 | "data": { 3640 | "text/html": [ 3641 | "
\n", 3642 | "\n", 3643 | " \n", 3644 | " \n", 3645 | " \n", 3646 | " \n", 3647 | " \n", 3648 | " \n", 3649 | " \n", 3650 | " \n", 3651 | " \n", 3652 | " \n", 3653 | " \n", 3654 | " \n", 3655 | " \n", 3656 | " \n", 3657 | " \n", 3658 | " \n", 3659 | " \n", 3660 | " \n", 3661 | " \n", 3662 | " \n", 3663 | " \n", 3664 | " \n", 3665 | " \n", 3666 | " \n", 3667 | " \n", 3668 | " \n", 3669 | " \n", 3670 | " \n", 3671 | " \n", 3672 | " \n", 3673 | " \n", 3674 | " \n", 3675 | " \n", 3676 | " \n", 3677 | " \n", 3678 | " \n", 3679 | " \n", 3680 | " \n", 3681 | " \n", 3682 | " \n", 3683 | " \n", 3684 | " \n", 3685 | " \n", 3686 | " \n", 3687 | " \n", 3688 | " \n", 3689 | " \n", 3690 | " \n", 3691 | " \n", 3692 | " \n", 3693 | " \n", 3694 | " \n", 3695 | " \n", 3696 | " \n", 3697 | " \n", 3698 | " \n", 3699 | "
Numeric
2015-03-250
2015-03-260
2015-03-270
2015-03-280
2015-03-290
2015-03-307
2015-03-317
2015-04-017
2015-04-027
2015-04-037
2015-04-047
2015-04-059
\n", 3700 | "
" 3701 | ], 3702 | "text/plain": [ 3703 | " Numeric\n", 3704 | "2015-03-25 0\n", 3705 | "2015-03-26 0\n", 3706 | "2015-03-27 0\n", 3707 | "2015-03-28 0\n", 3708 | "2015-03-29 0\n", 3709 | "2015-03-30 7\n", 3710 | "2015-03-31 7\n", 3711 | "2015-04-01 7\n", 3712 | "2015-04-02 7\n", 3713 | "2015-04-03 7\n", 3714 | "2015-04-04 7\n", 3715 | "2015-04-05 9" 3716 | ] 3717 | }, 3718 | "execution_count": 60, 3719 | "metadata": {}, 3720 | "output_type": "execute_result" 3721 | } 3722 | ], 3723 | "source": [ 3724 | "missing_df.asfreq('D', method='pad')" 3725 | ] 3726 | }, 3727 | { 3728 | "cell_type": "markdown", 3729 | "metadata": {}, 3730 | "source": [ 3731 | "#XRAY!" 3732 | ] 3733 | }, 3734 | { 3735 | "cell_type": "code", 3736 | "execution_count": 61, 3737 | "metadata": { 3738 | "collapsed": true 3739 | }, 3740 | "outputs": [], 3741 | "source": [ 3742 | "import xray" 3743 | ] 3744 | }, 3745 | { 3746 | "cell_type": "code", 3747 | "execution_count": 62, 3748 | "metadata": { 3749 | "collapsed": false 3750 | }, 3751 | "outputs": [ 3752 | { 3753 | "data": { 3754 | "text/plain": [ 3755 | "\n", 3756 | "array([[ 1, 2, 3, 4],\n", 3757 | " [ 10, 20, 30, 40],\n", 3758 | " [100, 200, 300, 400]])\n", 3759 | "Coordinates:\n", 3760 | " * x (x) |S1 'a' 'b' 'c'\n", 3761 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 3762 | ] 3763 | }, 3764 | "execution_count": 62, 3765 | "metadata": {}, 3766 | "output_type": "execute_result" 3767 | } 3768 | ], 3769 | "source": [ 3770 | "arr = np.array([[1, 2, 3, 4], \n", 3771 | " [10, 20, 30, 40],\n", 3772 | " [100, 200, 300, 400]])\n", 3773 | "dim0_coords = ['a', 'b', 'c']\n", 3774 | "dim1_coords = ['foo', 'bar', 'baz', 'qux']\n", 3775 | "da = xray.DataArray(arr, [('x', dim0_coords), ('y', dim1_coords)])\n", 3776 | "da" 3777 | ] 3778 | }, 3779 | { 3780 | "cell_type": "code", 3781 | "execution_count": 63, 3782 | "metadata": { 3783 | "collapsed": false 3784 | }, 3785 | "outputs": [ 3786 | { 3787 | "data": { 3788 | "text/plain": [ 3789 | "array([[ 1, 2, 3, 4],\n", 3790 | " [ 10, 20, 30, 40],\n", 3791 | " [100, 200, 300, 400]])" 3792 | ] 3793 | }, 3794 | "execution_count": 63, 3795 | "metadata": {}, 3796 | "output_type": "execute_result" 3797 | } 3798 | ], 3799 | "source": [ 3800 | "# Arrays!\n", 3801 | "da.values" 3802 | ] 3803 | }, 3804 | { 3805 | "cell_type": "code", 3806 | "execution_count": 64, 3807 | "metadata": { 3808 | "collapsed": false 3809 | }, 3810 | "outputs": [ 3811 | { 3812 | "data": { 3813 | "text/plain": [ 3814 | "('x', 'y')" 3815 | ] 3816 | }, 3817 | "execution_count": 64, 3818 | "metadata": {}, 3819 | "output_type": "execute_result" 3820 | } 3821 | ], 3822 | "source": [ 3823 | "da.dims" 3824 | ] 3825 | }, 3826 | { 3827 | "cell_type": "code", 3828 | "execution_count": 65, 3829 | "metadata": { 3830 | "collapsed": false 3831 | }, 3832 | "outputs": [ 3833 | { 3834 | "data": { 3835 | "text/plain": [ 3836 | "Coordinates:\n", 3837 | " * x (x) |S1 'a' 'b' 'c'\n", 3838 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 3839 | ] 3840 | }, 3841 | "execution_count": 65, 3842 | "metadata": {}, 3843 | "output_type": "execute_result" 3844 | } 3845 | ], 3846 | "source": [ 3847 | "da.coords" 3848 | ] 3849 | }, 3850 | { 3851 | "cell_type": "code", 3852 | "execution_count": 66, 3853 | "metadata": { 3854 | "collapsed": false 3855 | }, 3856 | "outputs": [ 3857 | { 3858 | "data": { 3859 | "text/plain": [ 3860 | "\n", 3861 | "array([[ 1, 2, 3, 4],\n", 3862 | " [10, 20, 30, 40]])\n", 3863 | "Coordinates:\n", 3864 | " * x (x) |S1 'a' 'b'\n", 3865 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 3866 | ] 3867 | }, 3868 | "execution_count": 66, 3869 | "metadata": {}, 3870 | "output_type": "execute_result" 3871 | } 3872 | ], 3873 | "source": [ 3874 | "# But with some Pandas-like powers!\n", 3875 | "\n", 3876 | "# Index by slice\n", 3877 | "da[0:2]" 3878 | ] 3879 | }, 3880 | { 3881 | "cell_type": "code", 3882 | "execution_count": 67, 3883 | "metadata": { 3884 | "collapsed": false 3885 | }, 3886 | "outputs": [ 3887 | { 3888 | "data": { 3889 | "text/plain": [ 3890 | "\n", 3891 | "array([1, 2, 3, 4])\n", 3892 | "Coordinates:\n", 3893 | " x |S1 'a'\n", 3894 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 3895 | ] 3896 | }, 3897 | "execution_count": 67, 3898 | "metadata": {}, 3899 | "output_type": "execute_result" 3900 | } 3901 | ], 3902 | "source": [ 3903 | "# Indexing!\n", 3904 | "da.loc['a']" 3905 | ] 3906 | }, 3907 | { 3908 | "cell_type": "code", 3909 | "execution_count": 68, 3910 | "metadata": { 3911 | "collapsed": false 3912 | }, 3913 | "outputs": [ 3914 | { 3915 | "data": { 3916 | "text/plain": [ 3917 | "\n", 3918 | "array([[ 1, 2, 3, 4],\n", 3919 | " [ 10, 20, 30, 40],\n", 3920 | " [100, 200, 300, 400]])\n", 3921 | "Coordinates:\n", 3922 | " * x (x) |S1 'a' 'b' 'c'\n", 3923 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 3924 | ] 3925 | }, 3926 | "execution_count": 68, 3927 | "metadata": {}, 3928 | "output_type": "execute_result" 3929 | } 3930 | ], 3931 | "source": [ 3932 | "da.loc['a':'c']" 3933 | ] 3934 | }, 3935 | { 3936 | "cell_type": "code", 3937 | "execution_count": 69, 3938 | "metadata": { 3939 | "collapsed": false 3940 | }, 3941 | "outputs": [ 3942 | { 3943 | "data": { 3944 | "text/plain": [ 3945 | "\n", 3946 | "array([[ 1, 2, 3, 4],\n", 3947 | " [100, 200, 300, 400]])\n", 3948 | "Coordinates:\n", 3949 | " * x (x) |S1 'a' 'c'\n", 3950 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 3951 | ] 3952 | }, 3953 | "execution_count": 69, 3954 | "metadata": {}, 3955 | "output_type": "execute_result" 3956 | } 3957 | ], 3958 | "source": [ 3959 | "da.sel(x=['a', 'c'])" 3960 | ] 3961 | }, 3962 | { 3963 | "cell_type": "code", 3964 | "execution_count": 70, 3965 | "metadata": { 3966 | "collapsed": false 3967 | }, 3968 | "outputs": [ 3969 | { 3970 | "data": { 3971 | "text/plain": [ 3972 | "\n", 3973 | "array([ 1, 10, 100])\n", 3974 | "Coordinates:\n", 3975 | " * x (x) |S1 'a' 'b' 'c'\n", 3976 | " y |S3 'foo'" 3977 | ] 3978 | }, 3979 | "execution_count": 70, 3980 | "metadata": {}, 3981 | "output_type": "execute_result" 3982 | } 3983 | ], 3984 | "source": [ 3985 | "da.sel(y='foo')" 3986 | ] 3987 | }, 3988 | { 3989 | "cell_type": "code", 3990 | "execution_count": 71, 3991 | "metadata": { 3992 | "collapsed": false 3993 | }, 3994 | "outputs": [ 3995 | { 3996 | "data": { 3997 | "text/plain": [ 3998 | "\n", 3999 | "array([1, 2, 3, 4])\n", 4000 | "Coordinates:\n", 4001 | " x |S1 'a'\n", 4002 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 4003 | ] 4004 | }, 4005 | "execution_count": 71, 4006 | "metadata": {}, 4007 | "output_type": "execute_result" 4008 | } 4009 | ], 4010 | "source": [ 4011 | "da.isel(x=0)" 4012 | ] 4013 | }, 4014 | { 4015 | "cell_type": "code", 4016 | "execution_count": 72, 4017 | "metadata": { 4018 | "collapsed": false 4019 | }, 4020 | "outputs": [ 4021 | { 4022 | "data": { 4023 | "text/plain": [ 4024 | "(\n", 4025 | " array(92.5), \n", 4026 | " array(1110))" 4027 | ] 4028 | }, 4029 | "execution_count": 72, 4030 | "metadata": {}, 4031 | "output_type": "execute_result" 4032 | } 4033 | ], 4034 | "source": [ 4035 | "# Do numpy stuff\n", 4036 | "np.mean(da), np.sum(da)" 4037 | ] 4038 | }, 4039 | { 4040 | "cell_type": "code", 4041 | "execution_count": 73, 4042 | "metadata": { 4043 | "collapsed": false 4044 | }, 4045 | "outputs": [ 4046 | { 4047 | "data": { 4048 | "text/plain": [ 4049 | "\n", 4050 | "array([[ 0.84147098, 0.90929743, 0.14112001, -0.7568025 ],\n", 4051 | " [-0.54402111, 0.91294525, -0.98803162, 0.74511316],\n", 4052 | " [-0.50636564, -0.8732973 , -0.99975584, -0.85091936]])\n", 4053 | "Coordinates:\n", 4054 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'\n", 4055 | " * x (x) |S1 'a' 'b' 'c'" 4056 | ] 4057 | }, 4058 | "execution_count": 73, 4059 | "metadata": {}, 4060 | "output_type": "execute_result" 4061 | } 4062 | ], 4063 | "source": [ 4064 | "np.sin(da)" 4065 | ] 4066 | }, 4067 | { 4068 | "cell_type": "code", 4069 | "execution_count": 74, 4070 | "metadata": { 4071 | "collapsed": false 4072 | }, 4073 | "outputs": [ 4074 | { 4075 | "data": { 4076 | "text/plain": [ 4077 | "\n", 4078 | "array([[101, 102, 103, 104],\n", 4079 | " [110, 120, 130, 140],\n", 4080 | " [200, 300, 400, 500]])\n", 4081 | "Coordinates:\n", 4082 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'\n", 4083 | " * x (x) |S1 'a' 'b' 'c'" 4084 | ] 4085 | }, 4086 | "execution_count": 74, 4087 | "metadata": {}, 4088 | "output_type": "execute_result" 4089 | } 4090 | ], 4091 | "source": [ 4092 | "# Broadcast!\n", 4093 | "da + 100" 4094 | ] 4095 | }, 4096 | { 4097 | "cell_type": "code", 4098 | "execution_count": 75, 4099 | "metadata": { 4100 | "collapsed": false 4101 | }, 4102 | "outputs": [ 4103 | { 4104 | "data": { 4105 | "text/plain": [ 4106 | "\n", 4107 | "array([ 37., 74., 111., 148.])\n", 4108 | "Coordinates:\n", 4109 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 4110 | ] 4111 | }, 4112 | "execution_count": 75, 4113 | "metadata": {}, 4114 | "output_type": "execute_result" 4115 | } 4116 | ], 4117 | "source": [ 4118 | "# Do all of the above by label\n", 4119 | "da.mean(dim='x')" 4120 | ] 4121 | }, 4122 | { 4123 | "cell_type": "code", 4124 | "execution_count": 76, 4125 | "metadata": { 4126 | "collapsed": false 4127 | }, 4128 | "outputs": [ 4129 | { 4130 | "data": { 4131 | "text/plain": [ 4132 | "\n", 4133 | "array([ 10, 100, 1000])\n", 4134 | "Coordinates:\n", 4135 | " * x (x) |S1 'a' 'b' 'c'" 4136 | ] 4137 | }, 4138 | "execution_count": 76, 4139 | "metadata": {}, 4140 | "output_type": "execute_result" 4141 | } 4142 | ], 4143 | "source": [ 4144 | "da.sum(dim='y')" 4145 | ] 4146 | }, 4147 | { 4148 | "cell_type": "code", 4149 | "execution_count": 77, 4150 | "metadata": { 4151 | "collapsed": false 4152 | }, 4153 | "outputs": [ 4154 | { 4155 | "data": { 4156 | "text/plain": [ 4157 | "\n", 4158 | "array([[ 1, 2, 3, 4],\n", 4159 | " [ 10, 20, 30, 40],\n", 4160 | " [100, 200, 300, 400]])\n", 4161 | "Coordinates:\n", 4162 | " * x (x) |S1 'a' 'b' 'c'\n", 4163 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 4164 | ] 4165 | }, 4166 | "execution_count": 77, 4167 | "metadata": {}, 4168 | "output_type": "execute_result" 4169 | } 4170 | ], 4171 | "source": [ 4172 | "da" 4173 | ] 4174 | }, 4175 | { 4176 | "cell_type": "code", 4177 | "execution_count": 78, 4178 | "metadata": { 4179 | "collapsed": false 4180 | }, 4181 | "outputs": [ 4182 | { 4183 | "data": { 4184 | "text/plain": [ 4185 | "\n", 4186 | "array([ 2.5, 25. , 250. ])\n", 4187 | "Coordinates:\n", 4188 | " * x (x) |S1 'a' 'b' 'c'" 4189 | ] 4190 | }, 4191 | "execution_count": 78, 4192 | "metadata": {}, 4193 | "output_type": "execute_result" 4194 | } 4195 | ], 4196 | "source": [ 4197 | "# Group stuff\n", 4198 | "da.groupby('x').mean()" 4199 | ] 4200 | }, 4201 | { 4202 | "cell_type": "code", 4203 | "execution_count": 79, 4204 | "metadata": { 4205 | "collapsed": false 4206 | }, 4207 | "outputs": [ 4208 | { 4209 | "data": { 4210 | "text/plain": [ 4211 | "\n", 4212 | "array([111, 222, 333, 444])\n", 4213 | "Coordinates:\n", 4214 | " * y (y) |S3 'foo' 'bar' 'baz' 'qux'" 4215 | ] 4216 | }, 4217 | "execution_count": 79, 4218 | "metadata": {}, 4219 | "output_type": "execute_result" 4220 | } 4221 | ], 4222 | "source": [ 4223 | "da.groupby('y').sum()" 4224 | ] 4225 | }, 4226 | { 4227 | "cell_type": "markdown", 4228 | "metadata": {}, 4229 | "source": [ 4230 | "#Blaze\n", 4231 | "\n", 4232 | "For this demo, we're going to focus on a couple queries we used in the Pandas demo:\n", 4233 | "```\n", 4234 | "select cut, mean(price)\n", 4235 | "from diamonds\n", 4236 | "groupby cut;\n", 4237 | "\n", 4238 | "select count(carat)\n", 4239 | "from diamonds\n", 4240 | "where price > 1000\n", 4241 | "group by clarity;\n", 4242 | "```" 4243 | ] 4244 | }, 4245 | { 4246 | "cell_type": "code", 4247 | "execution_count": 80, 4248 | "metadata": { 4249 | "collapsed": false 4250 | }, 4251 | "outputs": [], 4252 | "source": [ 4253 | "import blaze as bz\n", 4254 | "\n", 4255 | "diamonds = bz.symbol('diamonds', bz.discover(df_diamonds))" 4256 | ] 4257 | }, 4258 | { 4259 | "cell_type": "code", 4260 | "execution_count": 81, 4261 | "metadata": { 4262 | "collapsed": false 4263 | }, 4264 | "outputs": [], 4265 | "source": [ 4266 | "mean_price = bz.by(diamonds.cut, price=diamonds.price.mean())\n", 4267 | "carat_count = bz.by(diamonds[diamonds.price > 1000].clarity,\n", 4268 | " carat=diamonds.carat.count())" 4269 | ] 4270 | }, 4271 | { 4272 | "cell_type": "code", 4273 | "execution_count": 82, 4274 | "metadata": { 4275 | "collapsed": false 4276 | }, 4277 | "outputs": [ 4278 | { 4279 | "data": { 4280 | "text/html": [ 4281 | "
\n", 4282 | "\n", 4283 | " \n", 4284 | " \n", 4285 | " \n", 4286 | " \n", 4287 | " \n", 4288 | " \n", 4289 | " \n", 4290 | " \n", 4291 | " \n", 4292 | " \n", 4293 | " \n", 4294 | " \n", 4295 | " \n", 4296 | " \n", 4297 | " \n", 4298 | " \n", 4299 | " \n", 4300 | " \n", 4301 | " \n", 4302 | " \n", 4303 | " \n", 4304 | " \n", 4305 | " \n", 4306 | " \n", 4307 | " \n", 4308 | " \n", 4309 | " \n", 4310 | " \n", 4311 | " \n", 4312 | " \n", 4313 | " \n", 4314 | " \n", 4315 | " \n", 4316 | " \n", 4317 | "
cutprice
0Fair4358.757764
1Good3928.864452
2Ideal3457.541970
3Premium4584.257704
4Very Good3981.759891
\n", 4318 | "
" 4319 | ], 4320 | "text/plain": [ 4321 | " cut price\n", 4322 | "0 Fair 4358.757764\n", 4323 | "1 Good 3928.864452\n", 4324 | "2 Ideal 3457.541970\n", 4325 | "3 Premium 4584.257704\n", 4326 | "4 Very Good 3981.759891" 4327 | ] 4328 | }, 4329 | "execution_count": 82, 4330 | "metadata": {}, 4331 | "output_type": "execute_result" 4332 | } 4333 | ], 4334 | "source": [ 4335 | "# We haven't actually computed anything yet!\n", 4336 | "# Let's make Pandas compute it. \n", 4337 | "bz.compute(mean_price, df_diamonds)" 4338 | ] 4339 | }, 4340 | { 4341 | "cell_type": "code", 4342 | "execution_count": 83, 4343 | "metadata": { 4344 | "collapsed": false 4345 | }, 4346 | "outputs": [ 4347 | { 4348 | "data": { 4349 | "text/html": [ 4350 | "
\n", 4351 | "\n", 4352 | " \n", 4353 | " \n", 4354 | " \n", 4355 | " \n", 4356 | " \n", 4357 | " \n", 4358 | " \n", 4359 | " \n", 4360 | " \n", 4361 | " \n", 4362 | " \n", 4363 | " \n", 4364 | " \n", 4365 | " \n", 4366 | " \n", 4367 | " \n", 4368 | " \n", 4369 | " \n", 4370 | " \n", 4371 | " \n", 4372 | " \n", 4373 | " \n", 4374 | " \n", 4375 | " \n", 4376 | " \n", 4377 | " \n", 4378 | " \n", 4379 | " \n", 4380 | " \n", 4381 | " \n", 4382 | " \n", 4383 | " \n", 4384 | " \n", 4385 | " \n", 4386 | " \n", 4387 | " \n", 4388 | " \n", 4389 | " \n", 4390 | " \n", 4391 | " \n", 4392 | " \n", 4393 | " \n", 4394 | " \n", 4395 | " \n", 4396 | " \n", 4397 | " \n", 4398 | " \n", 4399 | " \n", 4400 | " \n", 4401 | "
claritycarat
0I1741
1IF1790
2SI113065
3SI29194
4VS18171
5VS212258
6VVS13655
7VVS25066
\n", 4402 | "
" 4403 | ], 4404 | "text/plain": [ 4405 | " clarity carat\n", 4406 | "0 I1 741\n", 4407 | "1 IF 1790\n", 4408 | "2 SI1 13065\n", 4409 | "3 SI2 9194\n", 4410 | "4 VS1 8171\n", 4411 | "5 VS2 12258\n", 4412 | "6 VVS1 3655\n", 4413 | "7 VVS2 5066" 4414 | ] 4415 | }, 4416 | "execution_count": 83, 4417 | "metadata": {}, 4418 | "output_type": "execute_result" 4419 | } 4420 | ], 4421 | "source": [ 4422 | "bz.compute(carat_count, df_diamonds)" 4423 | ] 4424 | }, 4425 | { 4426 | "cell_type": "markdown", 4427 | "metadata": {}, 4428 | "source": [ 4429 | "Ok, so what? You made Pandas do a thing we already did. \n", 4430 | "\n", 4431 | "### Oh Yeah, what if we want Postgres to compute it? \n", 4432 | "\n", 4433 | "## WAT" 4434 | ] 4435 | }, 4436 | { 4437 | "cell_type": "code", 4438 | "execution_count": 84, 4439 | "metadata": { 4440 | "collapsed": false 4441 | }, 4442 | "outputs": [], 4443 | "source": [ 4444 | "# Blaze/Odo make it easy to move data between containers\n", 4445 | "# Note that we have an empty table already created\n", 4446 | "pg_datasource = bz.odo(df_diamonds, \"postgresql://postgres:postgres@localhost/pydata::diamonds\")" 4447 | ] 4448 | }, 4449 | { 4450 | "cell_type": "code", 4451 | "execution_count": 85, 4452 | "metadata": { 4453 | "collapsed": false 4454 | }, 4455 | "outputs": [ 4456 | { 4457 | "data": { 4458 | "text/html": [ 4459 | "
\n", 4460 | "\n", 4461 | " \n", 4462 | " \n", 4463 | " \n", 4464 | " \n", 4465 | " \n", 4466 | " \n", 4467 | " \n", 4468 | " \n", 4469 | " \n", 4470 | " \n", 4471 | " \n", 4472 | " \n", 4473 | " \n", 4474 | " \n", 4475 | " \n", 4476 | " \n", 4477 | " \n", 4478 | " \n", 4479 | " \n", 4480 | " \n", 4481 | " \n", 4482 | " \n", 4483 | " \n", 4484 | " \n", 4485 | " \n", 4486 | " \n", 4487 | " \n", 4488 | " \n", 4489 | " \n", 4490 | " \n", 4491 | " \n", 4492 | " \n", 4493 | " \n", 4494 | " \n", 4495 | " \n", 4496 | " \n", 4497 | " \n", 4498 | " \n", 4499 | " \n", 4500 | " \n", 4501 | " \n", 4502 | " \n", 4503 | " \n", 4504 | " \n", 4505 | " \n", 4506 | " \n", 4507 | " \n", 4508 | " \n", 4509 | " \n", 4510 | "
f0f1
0IF1790
1I1741
2VVS13655
3VS212258
4VS18171
5VVS25066
6SI29194
7SI113065
\n", 4511 | "
" 4512 | ], 4513 | "text/plain": [ 4514 | " f0 f1\n", 4515 | "0 IF 1790\n", 4516 | "1 I1 741\n", 4517 | "2 VVS1 3655\n", 4518 | "3 VS2 12258\n", 4519 | "4 VS1 8171\n", 4520 | "5 VVS2 5066\n", 4521 | "6 SI2 9194\n", 4522 | "7 SI1 13065" 4523 | ] 4524 | }, 4525 | "execution_count": 85, 4526 | "metadata": {}, 4527 | "output_type": "execute_result" 4528 | } 4529 | ], 4530 | "source": [ 4531 | "# Now we're going to use Postgres as our computation engine\n", 4532 | "bz.odo(bz.compute(carat_count, pg_datasource), pd.DataFrame)" 4533 | ] 4534 | }, 4535 | { 4536 | "cell_type": "markdown", 4537 | "metadata": {}, 4538 | "source": [ 4539 | "You can use any SQL supported by SQLAlchemy as your computation. It also supports MongoDB. \n", 4540 | "\n", 4541 | "So what about Out-of-Core?" 4542 | ] 4543 | }, 4544 | { 4545 | "cell_type": "code", 4546 | "execution_count": 86, 4547 | "metadata": { 4548 | "collapsed": false 4549 | }, 4550 | "outputs": [ 4551 | { 4552 | "data": { 4553 | "text/plain": [ 4554 | "((chunk, by(chunk[chunk.price > 1000].clarity, carat=count(chunk.carat))),\n", 4555 | " (aggregate, by(aggregate.clarity, carat=sum(aggregate.carat))))" 4556 | ] 4557 | }, 4558 | "execution_count": 86, 4559 | "metadata": {}, 4560 | "output_type": "execute_result" 4561 | } 4562 | ], 4563 | "source": [ 4564 | "# Blaze is doing it for you!\n", 4565 | "from blaze.expr.split import split\n", 4566 | "split(diamonds, carat_count)" 4567 | ] 4568 | }, 4569 | { 4570 | "cell_type": "markdown", 4571 | "metadata": {}, 4572 | "source": [ 4573 | "This diamonds dataset not quite big enough to trigger it, but if you really want, you can provide your own Multiprocessing pool: " 4574 | ] 4575 | }, 4576 | { 4577 | "cell_type": "code", 4578 | "execution_count": 87, 4579 | "metadata": { 4580 | "collapsed": false 4581 | }, 4582 | "outputs": [ 4583 | { 4584 | "data": { 4585 | "text/html": [ 4586 | "
\n", 4587 | "\n", 4588 | " \n", 4589 | " \n", 4590 | " \n", 4591 | " \n", 4592 | " \n", 4593 | " \n", 4594 | " \n", 4595 | " \n", 4596 | " \n", 4597 | " \n", 4598 | " \n", 4599 | " \n", 4600 | " \n", 4601 | " \n", 4602 | " \n", 4603 | " \n", 4604 | " \n", 4605 | " \n", 4606 | " \n", 4607 | " \n", 4608 | " \n", 4609 | " \n", 4610 | " \n", 4611 | " \n", 4612 | " \n", 4613 | " \n", 4614 | " \n", 4615 | " \n", 4616 | " \n", 4617 | " \n", 4618 | " \n", 4619 | " \n", 4620 | " \n", 4621 | " \n", 4622 | " \n", 4623 | " \n", 4624 | " \n", 4625 | " \n", 4626 | " \n", 4627 | " \n", 4628 | " \n", 4629 | " \n", 4630 | " \n", 4631 | " \n", 4632 | " \n", 4633 | " \n", 4634 | " \n", 4635 | " \n", 4636 | " \n", 4637 | "
claritycarat
0I1741
1IF1790
2SI113065
3SI29194
4VS18171
5VS212258
6VVS13655
7VVS25066
\n", 4638 | "
" 4639 | ], 4640 | "text/plain": [ 4641 | " clarity carat\n", 4642 | "0 I1 741\n", 4643 | "1 IF 1790\n", 4644 | "2 SI1 13065\n", 4645 | "3 SI2 9194\n", 4646 | "4 VS1 8171\n", 4647 | "5 VS2 12258\n", 4648 | "6 VVS1 3655\n", 4649 | "7 VVS2 5066" 4650 | ] 4651 | }, 4652 | "execution_count": 87, 4653 | "metadata": {}, 4654 | "output_type": "execute_result" 4655 | } 4656 | ], 4657 | "source": [ 4658 | "import multiprocessing \n", 4659 | "pool = multiprocessing.Pool(4)\n", 4660 | "bz.compute(carat_count, df_diamonds, map=pool)" 4661 | ] 4662 | }, 4663 | { 4664 | "cell_type": "markdown", 4665 | "metadata": {}, 4666 | "source": [ 4667 | "# bcolz!" 4668 | ] 4669 | }, 4670 | { 4671 | "cell_type": "code", 4672 | "execution_count": 88, 4673 | "metadata": { 4674 | "collapsed": false 4675 | }, 4676 | "outputs": [ 4677 | { 4678 | "data": { 4679 | "text/plain": [ 4680 | "ctable((53940,), [('carat', ' 1000)\"]" 4841 | ] 4842 | }, 4843 | { 4844 | "cell_type": "code", 4845 | "execution_count": 93, 4846 | "metadata": { 4847 | "collapsed": false 4848 | }, 4849 | "outputs": [], 4850 | "source": [ 4851 | "# We can do the same thing with the ctable on disk!\n", 4852 | "diskdc = dc.copy(rootdir='diamonds')" 4853 | ] 4854 | }, 4855 | { 4856 | "cell_type": "code", 4857 | "execution_count": 94, 4858 | "metadata": { 4859 | "collapsed": false 4860 | }, 4861 | "outputs": [ 4862 | { 4863 | "data": { 4864 | "text/plain": [ 4865 | "array([(0.7, 'Ideal', 'E', 'SI1', 62.5, 57.0, 2757, 5.7, 5.72, 3.57),\n", 4866 | " (0.7, 'Ideal', 'G', 'VS2', 61.6, 56.0, 2757, 5.7, 5.67, 3.5),\n", 4867 | " (0.74, 'Ideal', 'G', 'SI1', 61.6, 55.0, 2760, 5.8, 5.85, 3.59), ...,\n", 4868 | " (0.71, 'Ideal', 'G', 'VS1', 61.4, 56.0, 2756, 5.76, 5.73, 3.53),\n", 4869 | " (0.72, 'Ideal', 'D', 'SI1', 60.8, 57.0, 2757, 5.75, 5.76, 3.5),\n", 4870 | " (0.75, 'Ideal', 'D', 'SI2', 62.2, 55.0, 2757, 5.83, 5.87, 3.64)], \n", 4871 | " dtype=[('carat', ' 1000)\"]" 4881 | ] 4882 | }, 4883 | { 4884 | "cell_type": "code", 4885 | "execution_count": 95, 4886 | "metadata": { 4887 | "collapsed": false 4888 | }, 4889 | "outputs": [ 4890 | { 4891 | "name": "stdout", 4892 | "output_type": "stream", 4893 | "text": [ 4894 | "diamonds/\n", 4895 | " __attrs__\n", 4896 | " __rootdirs__\n", 4897 | " carat/\n", 4898 | " __attrs__\n", 4899 | " data/\n", 4900 | " __0.blp\n", 4901 | " __1.blp\n", 4902 | " meta/\n", 4903 | " sizes\n", 4904 | " storage\n", 4905 | " clarity/\n", 4906 | " __attrs__\n", 4907 | " data/\n", 4908 | " __0.blp\n", 4909 | " meta/\n", 4910 | " sizes\n", 4911 | " storage\n", 4912 | " color/\n", 4913 | " __attrs__\n", 4914 | " data/\n", 4915 | " __0.blp\n", 4916 | " meta/\n", 4917 | " sizes\n", 4918 | " storage\n", 4919 | " cut/\n", 4920 | " __attrs__\n", 4921 | " data/\n", 4922 | " __0.blp\n", 4923 | " __1.blp\n", 4924 | " meta/\n", 4925 | " sizes\n", 4926 | " storage\n", 4927 | " depth/\n", 4928 | " __attrs__\n", 4929 | " data/\n", 4930 | " __0.blp\n", 4931 | " __1.blp\n", 4932 | " meta/\n", 4933 | " sizes\n", 4934 | " storage\n", 4935 | " price/\n", 4936 | " __attrs__\n", 4937 | " data/\n", 4938 | " __0.blp\n", 4939 | " __1.blp\n", 4940 | " meta/\n", 4941 | " sizes\n", 4942 | " storage\n", 4943 | " table/\n", 4944 | " __attrs__\n", 4945 | " data/\n", 4946 | " __0.blp\n", 4947 | " __1.blp\n", 4948 | " meta/\n", 4949 | " sizes\n", 4950 | " storage\n", 4951 | " x/\n", 4952 | " __attrs__\n", 4953 | " data/\n", 4954 | " __0.blp\n", 4955 | " __1.blp\n", 4956 | " meta/\n", 4957 | " sizes\n", 4958 | " storage\n", 4959 | " y/\n", 4960 | " __attrs__\n", 4961 | " data/\n", 4962 | " __0.blp\n", 4963 | " __1.blp\n", 4964 | " meta/\n", 4965 | " sizes\n", 4966 | " storage\n", 4967 | " z/\n", 4968 | " __attrs__\n", 4969 | " data/\n", 4970 | " __0.blp\n", 4971 | " __1.blp\n", 4972 | " meta/\n", 4973 | " sizes\n", 4974 | " storage\n" 4975 | ] 4976 | } 4977 | ], 4978 | "source": [ 4979 | "import os\n", 4980 | "\n", 4981 | "for root, dirs, files in os.walk('diamonds'):\n", 4982 | " level = root.replace('diamonds', '').count(os.sep)\n", 4983 | " indent = ' ' * 4 * (level)\n", 4984 | " print('{}{}/'.format(indent, os.path.basename(root)))\n", 4985 | " subindent = ' ' * 4 * (level + 1)\n", 4986 | " for f in files:\n", 4987 | " print('{}{}'.format(subindent, f))" 4988 | ] 4989 | } 4990 | ], 4991 | "metadata": { 4992 | "kernelspec": { 4993 | "display_name": "Python 2", 4994 | "language": "python", 4995 | "name": "python2" 4996 | }, 4997 | "language_info": { 4998 | "codemirror_mode": { 4999 | "name": "ipython", 5000 | "version": 2 5001 | }, 5002 | "file_extension": ".py", 5003 | "mimetype": "text/x-python", 5004 | "name": "python", 5005 | "nbconvert_exporter": "python", 5006 | "pygments_lexer": "ipython2", 5007 | "version": "2.7.9" 5008 | } 5009 | }, 5010 | "nbformat": 4, 5011 | "nbformat_minor": 0 5012 | } 5013 | -------------------------------------------------------------------------------- /PDXPythonMarch2015.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wrobstory/pdxpython2015/97ef83b02c899356af0aa7d8d1cbbb252b9e1813/PDXPythonMarch2015.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pdxpython2015 2 | Portland Python Meetup March 2015 3 | 4 | [See the IPython/Jupyter Notebook on nbviewer!](http://nbviewer.ipython.org/github/wrobstory/pdxpython2015/blob/master/PDXPython2015.ipynb) 5 | 6 | [Read the slides!](https://github.com/wrobstory/pdxpython2015/blob/master/PDXPythonMarch2015.pdf) 7 | 8 | [Install the dependencies!](https://github.com/wrobstory/pdxpython2015/blob/master/requirements.txt) 9 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Babel==1.3 2 | Cython==0.22 3 | DataShape==0.4.4 4 | Jinja2==2.7.3 5 | MarkupSafe==0.23 6 | Pygments==2.0.2 7 | SQLAlchemy==0.9.9 8 | Sphinx==1.3.1 9 | alabaster==0.7.3 10 | backports.ssl-match-hostname==3.4.0.2 11 | bcolz==0.8.1 12 | blaze==0.7.3 13 | certifi==14.05.14 14 | cytoolz==0.7.2 15 | decorator==3.4.2 16 | docutils==0.12 17 | gnureadline==6.3.3 18 | ipython==3.0.0 19 | jsonschema==2.4.0 20 | mistune==0.5.1 21 | multipledispatch==0.4.7 22 | networkx==1.9.1 23 | nose==1.3.4 24 | numpy==1.9.2 25 | numpydoc==0.5 26 | odo==0.3.1 27 | pandas==0.16.0 28 | psutil==2.2.1 29 | psycopg2==2.6 30 | ptyprocess==0.4 31 | python-dateutil==2.4.1 32 | pytz==2015.2 33 | pyzmq==14.5.0 34 | requests==2.6.0 35 | scipy==0.15.1 36 | six==1.9.0 37 | snowballstemmer==1.2.0 38 | sphinx-rtd-theme==0.1.7 39 | terminado==0.5 40 | toolz==0.7.1 41 | tornado==4.1 42 | wsgiref==0.1.2 43 | xray==0.4.1 44 | --------------------------------------------------------------------------------