├── .gitignore ├── README.md ├── books ├── intro-to-python │ ├── 01 - Basic Data Types.ipynb │ ├── 02 - Python Complex Types.ipynb │ └── 03 - Python Logic, Loops and Functions.ipynb ├── other-notebooks │ ├── 2016 Election FEC Data.ipynb │ ├── Fun with Dask Distributed.ipynb │ ├── Fun with Dask.ipynb │ └── SF Salaries.ipynb ├── pycon-2015-examples │ ├── Lesson 01 - csv and tsv.ipynb │ ├── Lesson 02 - XLSX.ipynb │ ├── Lesson 03 - databasing.ipynb │ ├── Lesson 04 - API.ipynb │ ├── Lesson 05 - Regex.ipynb │ ├── Lesson 06 - More String Analysis.ipynb │ ├── Lesson 07 - Calculate.ipynb │ ├── Lesson 08 - Journalism Library.ipynb │ ├── Lesson 09 - Matplotlib.ipynb │ ├── Lesson 10 - PyGal.ipynb │ ├── Lesson 11 - Bokeh.ipynb │ ├── Lesson 12 - Pandas Import and Inspection.ipynb │ ├── Lesson 13 - Pandas Filtering and Joins.ipynb │ └── Lesson 14 - Pandas Data Analysis.ipynb └── pydata-examples │ ├── 01 - Simple formats with Pandas.ipynb │ ├── 02 - More complex formats with Pandas.ipynb │ ├── 03 - Pandas Data Reader.ipynb │ ├── 04 - APIs.ipynb │ ├── 05 - Messy Data.ipynb │ ├── 06 - Data Analysis with Pandas.ipynb │ ├── 07 - Split Apply Combine.ipynb │ ├── 08 - Bokeh for Data Visualization.ipynb │ ├── Basic Data Visualization with Pandas.ipynb │ ├── Introduction to Joins.ipynb │ ├── Introduction to Regex.ipynb │ ├── Update HN data.ipynb │ └── solutions │ ├── data_analysis_solution.py │ ├── regex_solution.py │ ├── sac_solution.py │ ├── stocks_solution.py │ ├── visualization_solution.py │ ├── weather_solution_fix_stations.py │ └── weather_solution_rainyday.py ├── conda_requirements.txt ├── data ├── WEF_GlobalCompetitivenessReport_2014-15.pdf ├── berlin_weather_oldest.csv ├── comments.json ├── departments.csv ├── employees.csv ├── hn.json ├── imf_indicators.tsv ├── iso-2.csv ├── titles.csv ├── topstories.json └── wb │ ├── Broad Money (M2) to foreign reserves, ratio.xlsx │ ├── Broad Money (M2) to monetary base, ratio (M2 multiplier).xlsx │ ├── CPI Price, % y-o-y, median weighted, seas. adj..xlsx │ ├── CPI Price, % y-o-y, nominal, seas. adj..xlsx │ ├── CPI Price, nominal, not seas. adj..xlsx │ ├── CPI Price, nominal, seas. adj..xlsx │ ├── Commodity Prices.xlsx │ ├── Core CPI, not seas. adj..xlsx │ ├── Core CPI, seas. adj..xlsx │ ├── Emerging Market Bond Index (JPM Total Return Index).xlsx │ ├── Exchange rate, new LCU per USD extended backward, period average.xlsx │ ├── Exchange rate, old LCU per USD extended forward, period average.xlsx │ ├── Exports Merchandise, Customs, Price, US$, seas. adj..xlsx │ ├── Exports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx │ ├── Exports Merchandise, Customs, current US$, millions, seas. adj..xlsx │ ├── Foreign Reserves, Months Import Cover, Goods.xlsx │ ├── GDP Deflator at Market Prices, LCU.xlsx │ ├── GDP at market prices, constant 2010 LCU, millions, seas. adj..xlsx │ ├── GDP at market prices, constant 2010 US$, millions, seas. adj..xlsx │ ├── GDP at market prices, current LCU, millions, seas. adj..xlsx │ ├── GDP at market prices, current US$, millions, seas. adj..xlsx │ ├── GDP_Current_Dollars.xlsx │ ├── Imports Merchandise, Customs, Price, US$, seas. adj..xlsx │ ├── Imports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx │ ├── Imports Merchandise, Customs, current US$, millions, seas. adj..xlsx │ ├── Industrial Production, constant 2010 US$, not seasonally adjusted.xlsx │ ├── Industrial Production, constant 2010 US$, seas. adj..xlsx │ ├── Nominal Effecive Exchange Rate.xlsx │ ├── Official exchange rate, LCU per USD, period average.xlsx │ ├── Real Effective Exchange Rate.xlsx │ ├── Retail Sales Volume Index, seas. adj..xlsx │ ├── Sovereign Bond Interest Rate Spreads, basis points over US Treasuries.xlsx │ ├── Stock Markets, LCU.xlsx │ ├── Stock Markets, US$.xlsx │ ├── Terms of Trade.xlsx │ ├── Total Reserves.xlsx │ ├── stock_market.xlsx │ ├── stock_metadata.xlsx │ └── unemployment.xlsx ├── data_wrangling_3.yml ├── py3_requirements.txt ├── py3_server_requirements.txt ├── pycon_2015_requirements.txt ├── requirements.txt └── scripts └── pycon-2015 ├── __init__.py ├── lesson01_csv.py ├── lesson02_xlsx.py ├── lesson03_databases.py ├── lesson04_05_api_regex.py ├── lesson06_string_processing.py ├── lesson07_calculate.py ├── lesson08_journalism.py ├── lesson09_matplotlib.py ├── lesson10_pygal.py ├── lesson11_bokeh.py ├── lesson12_pandas.py ├── lesson13_pandas_join.py └── lesson14_pandas_compute.py /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | openbooks/* 4 | *.db 5 | .* 6 | !/.gitignore 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Data Wrangling Introduction 2 | ======================= 3 | 4 | Welcome to Data Wrangling! Here you'll find some useful scripts and data to get started with Data Wrangling with Python. 5 | 6 | To Install requirements if you are using Python 2.7+ 7 | ----------------------- 8 | $ pip install -r requirements.txt 9 | 10 | To Install requirements if you are using Python 3.4+ 11 | ----------------------- 12 | $ pip install -r py3_requirements.txt 13 | 14 | To Install requirements using Conda Environments and default Python 15 | -------------------------------------------------- 16 | $ conda create --name conda_dw --file conda_requirements.txt 17 | $ source activate conda_dw 18 | $ conda install openpyxl 19 | 20 | To Install requirements using Conda Environments and Python3 21 | -------------------------------------------------- 22 | $ conda env create -f data_wrangling_3.yml 23 | $ source activate conda_dw3 24 | 25 | To Access iPython Notebook 26 | ---------------------- 27 | 28 | [PY3 Notebook](https://class.kjamistan.com) 29 | 30 | 31 | Questions? 32 | ---------- 33 | 34 | kjam on twitter / freenode 35 | katharine at kjamistan dot com 36 | 37 | 38 | Contributions 39 | -------------- 40 | 41 | Many thanks to [Steven Van den Berghe](https://be.linkedin.com/in/svdberghe) for his help debugging conda environments and providing the ´data_wrangling_3.yml´ file. 42 | 43 | Massive Kudos to [Viacheslav Naydenov](https://github.com/vnaydionov/) for porting pdftables to Py3! 44 | 45 | 46 | Fair Use 47 | ---------- 48 | 49 | If you would like to use this content for your own course, please do so with attribution and without modification. If you'd like to modify this course and use it, please reach out regarding modifications made. Thank you! :) 50 | 51 | -------------------------------------------------------------------------------- /books/intro-to-python/03 - Python Logic, Loops and Functions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "### Python Logic, Loops and Functions\n", 8 | "\n", 9 | "We will explore how to perform logic trees (if this, do that), loops (do this for every item) and functions (do this repeated code for me any time I type it's name)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": { 16 | "collapsed": true 17 | }, 18 | "outputs": [], 19 | "source": [ 20 | "from __future__ import print_function" 21 | ] 22 | }, 23 | { 24 | "cell_type": "markdown", 25 | "metadata": {}, 26 | "source": [ 27 | "#### Logic" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": { 34 | "collapsed": true 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "if 0:\n", 39 | " print(1 + 1)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "metadata": { 46 | "collapsed": false 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "if 1:\n", 51 | " print(1 + 1)" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "if True:\n", 63 | " print('!!!')" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "if False:\n", 75 | " print('???')" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": true 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "if True and False:\n", 87 | " print('?!?')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "metadata": { 94 | "collapsed": false 95 | }, 96 | "outputs": [], 97 | "source": [ 98 | "if True or False:\n", 99 | " print('...')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "if not False:\n", 111 | " print('...')" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "if not True:\n", 123 | " print('???')" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "if False:\n", 135 | " print('what?')\n", 136 | "else:\n", 137 | " print('thats better')" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": null, 143 | "metadata": { 144 | "collapsed": false 145 | }, 146 | "outputs": [], 147 | "source": [ 148 | "if False:\n", 149 | " pass\n", 150 | "elif True:\n", 151 | " print('yep!')\n", 152 | "else:\n", 153 | " print('down here!')" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "collapsed": false 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "if 4 > 5:\n", 165 | " print('true!')\n", 166 | "else:\n", 167 | " print('false!')" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "if 89 < 122:\n", 179 | " print('math!')" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": true 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "my_age = 33" 191 | ] 192 | }, 193 | { 194 | "cell_type": "code", 195 | "execution_count": null, 196 | "metadata": { 197 | "collapsed": false 198 | }, 199 | "outputs": [], 200 | "source": [ 201 | "if my_age < 22:\n", 202 | " print('too young!')\n", 203 | "elif my_age > 30:\n", 204 | " print('thats ok')\n", 205 | "elif my_age >= 100:\n", 206 | " print('too old!')" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": null, 212 | "metadata": { 213 | "collapsed": false 214 | }, 215 | "outputs": [], 216 | "source": [ 217 | "if True == 1:\n", 218 | " print('true')" 219 | ] 220 | }, 221 | { 222 | "cell_type": "code", 223 | "execution_count": null, 224 | "metadata": { 225 | "collapsed": false 226 | }, 227 | "outputs": [], 228 | "source": [ 229 | "if []:\n", 230 | " print('false')" 231 | ] 232 | }, 233 | { 234 | "cell_type": "markdown", 235 | "metadata": {}, 236 | "source": [ 237 | "#### For loops" 238 | ] 239 | }, 240 | { 241 | "cell_type": "code", 242 | "execution_count": null, 243 | "metadata": { 244 | "collapsed": false 245 | }, 246 | "outputs": [], 247 | "source": [ 248 | "for x in [1, 2, 3]:\n", 249 | " print(x)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "collapsed": false 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "for y in 'my_long_string':\n", 261 | " print(y)" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": true 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "my_pets = ['ber', 'lil bunny', 'birdie']" 273 | ] 274 | }, 275 | { 276 | "cell_type": "code", 277 | "execution_count": null, 278 | "metadata": { 279 | "collapsed": false 280 | }, 281 | "outputs": [], 282 | "source": [ 283 | "for pet in my_pets:\n", 284 | " print('I love my pet %s' % pet)" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": { 291 | "collapsed": true 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "my_integers = [3, 6, 7, 2, 8]" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": null, 301 | "metadata": { 302 | "collapsed": true 303 | }, 304 | "outputs": [], 305 | "source": [ 306 | "new_list = []" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": { 313 | "collapsed": false 314 | }, 315 | "outputs": [], 316 | "source": [ 317 | "for integer in my_integers:\n", 318 | " new_list.append(integer + 2)" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": null, 324 | "metadata": { 325 | "collapsed": false 326 | }, 327 | "outputs": [], 328 | "source": [ 329 | "my_integers" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "collapsed": false 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "new_list" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": false 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "for letter in 'my name':\n", 352 | " print(letter)" 353 | ] 354 | }, 355 | { 356 | "cell_type": "code", 357 | "execution_count": null, 358 | "metadata": { 359 | "collapsed": true 360 | }, 361 | "outputs": [], 362 | "source": [ 363 | "my_dict = {'foo': 1, 'bar': 2, 'baz': 3}" 364 | ] 365 | }, 366 | { 367 | "cell_type": "code", 368 | "execution_count": null, 369 | "metadata": { 370 | "collapsed": false 371 | }, 372 | "outputs": [], 373 | "source": [ 374 | "my_dict.items()" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": { 381 | "collapsed": true 382 | }, 383 | "outputs": [], 384 | "source": [ 385 | "for my_dict_key, my_dict_value in my_dict.items():\n", 386 | " my_dict[x] += 1" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": { 393 | "collapsed": false 394 | }, 395 | "outputs": [], 396 | "source": [ 397 | "x" 398 | ] 399 | }, 400 | { 401 | "cell_type": "code", 402 | "execution_count": null, 403 | "metadata": { 404 | "collapsed": false 405 | }, 406 | "outputs": [], 407 | "source": [ 408 | "my_dict" 409 | ] 410 | }, 411 | { 412 | "cell_type": "code", 413 | "execution_count": null, 414 | "metadata": { 415 | "collapsed": true 416 | }, 417 | "outputs": [], 418 | "source": [ 419 | "my_dict_with_lists = {'first_list': [1, 2, 3], 'second_list': [5, 6, 7]}" 420 | ] 421 | }, 422 | { 423 | "cell_type": "code", 424 | "execution_count": null, 425 | "metadata": { 426 | "collapsed": false 427 | }, 428 | "outputs": [], 429 | "source": [ 430 | "for key, value in my_dict_with_lists.items():\n", 431 | " for val in value:\n", 432 | " print(val)\n" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": null, 438 | "metadata": { 439 | "collapsed": true 440 | }, 441 | "outputs": [], 442 | "source": [ 443 | "def my_addition_function(x, y):\n", 444 | " \"\"\"This returns the sum of the two inputs.\"\"\"\n", 445 | " return x + y" 446 | ] 447 | }, 448 | { 449 | "cell_type": "code", 450 | "execution_count": null, 451 | "metadata": { 452 | "collapsed": false 453 | }, 454 | "outputs": [], 455 | "source": [ 456 | "my_addition_function(3, 4)" 457 | ] 458 | }, 459 | { 460 | "cell_type": "code", 461 | "execution_count": null, 462 | "metadata": { 463 | "collapsed": false 464 | }, 465 | "outputs": [], 466 | "source": [ 467 | "my_addition_funciton('test', 'foo')" 468 | ] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": { 474 | "collapsed": false 475 | }, 476 | "outputs": [], 477 | "source": [ 478 | "my_addition_funciton([12, 23], 'bar')" 479 | ] 480 | }, 481 | { 482 | "cell_type": "code", 483 | "execution_count": null, 484 | "metadata": { 485 | "collapsed": true 486 | }, 487 | "outputs": [], 488 | "source": [ 489 | "def say_hello(input_string):\n", 490 | " \"\"\"This will print hello to the input_string and return None\"\"\"\n", 491 | " print('hello {}!'.format(input_string))" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": null, 497 | "metadata": { 498 | "collapsed": false 499 | }, 500 | "outputs": [], 501 | "source": [ 502 | "say_hello('test')" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "metadata": { 509 | "collapsed": false 510 | }, 511 | "outputs": [], 512 | "source": [ 513 | "my_return = say_hello('test')" 514 | ] 515 | }, 516 | { 517 | "cell_type": "code", 518 | "execution_count": null, 519 | "metadata": { 520 | "collapsed": true 521 | }, 522 | "outputs": [], 523 | "source": [ 524 | "my_return" 525 | ] 526 | }, 527 | { 528 | "cell_type": "code", 529 | "execution_count": null, 530 | "metadata": { 531 | "collapsed": false 532 | }, 533 | "outputs": [], 534 | "source": [ 535 | "my_return is None" 536 | ] 537 | }, 538 | { 539 | "cell_type": "code", 540 | "execution_count": null, 541 | "metadata": { 542 | "collapsed": true 543 | }, 544 | "outputs": [], 545 | "source": [ 546 | "def choose_math_with_kwargs(x, y, math_to_run='addition'):\n", 547 | " \"\"\" This will perform math based on the math_to_run keyword argument.\n", 548 | " params:\n", 549 | " x: unknown type\n", 550 | " y: unknown type\n", 551 | " kwargs:\n", 552 | " math_to_run: string (default: addition)\n", 553 | " returns math_to_run using x and y\n", 554 | " \"\"\"\n", 555 | " if math_to_run == 'addition':\n", 556 | " return x + y\n", 557 | " elif math_to_run == 'subtraction':\n", 558 | " return x - y" 559 | ] 560 | }, 561 | { 562 | "cell_type": "code", 563 | "execution_count": null, 564 | "metadata": { 565 | "collapsed": false 566 | }, 567 | "outputs": [], 568 | "source": [ 569 | "choose_math_with_kwargs(2, 3)" 570 | ] 571 | }, 572 | { 573 | "cell_type": "code", 574 | "execution_count": null, 575 | "metadata": { 576 | "collapsed": false 577 | }, 578 | "outputs": [], 579 | "source": [ 580 | "choose_math_with_kwargs(3, 4, math_to_run='addition')" 581 | ] 582 | }, 583 | { 584 | "cell_type": "code", 585 | "execution_count": null, 586 | "metadata": { 587 | "collapsed": false 588 | }, 589 | "outputs": [], 590 | "source": [ 591 | "choose_math_with_kwargs(3, 4, math_to_run='subtraction')" 592 | ] 593 | }, 594 | { 595 | "cell_type": "code", 596 | "execution_count": null, 597 | "metadata": { 598 | "collapsed": true 599 | }, 600 | "outputs": [], 601 | "source": [ 602 | "choose_math_with_kwargs(3, 4, math_to_run='division')" 603 | ] 604 | }, 605 | { 606 | "cell_type": "markdown", 607 | "metadata": {}, 608 | "source": [ 609 | "#### Your turn!\n", 610 | "\n", 611 | "* Implement the other basic math operators\n", 612 | "* Add a default return\n", 613 | "* BONUS: can you send an error message if people input anything other than integers?" 614 | ] 615 | }, 616 | { 617 | "cell_type": "code", 618 | "execution_count": null, 619 | "metadata": { 620 | "collapsed": true 621 | }, 622 | "outputs": [], 623 | "source": [] 624 | } 625 | ], 626 | "metadata": { 627 | "anaconda-cloud": {}, 628 | "kernelspec": { 629 | "display_name": "Python [default]", 630 | "language": "python", 631 | "name": "python3" 632 | }, 633 | "language_info": { 634 | "codemirror_mode": { 635 | "name": "ipython", 636 | "version": 3 637 | }, 638 | "file_extension": ".py", 639 | "mimetype": "text/x-python", 640 | "name": "python", 641 | "nbconvert_exporter": "python", 642 | "pygments_lexer": "ipython3", 643 | "version": "3.5.2" 644 | } 645 | }, 646 | "nbformat": 4, 647 | "nbformat_minor": 0 648 | } 649 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 01 - csv and tsv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "['Country/Series-specific Notes', 'Scale', 'Estimates Start After', '2015', 'Country', 'Subject Descriptor', '2020', '2019', '2018', 'Units', '2014', '2017', '2016', '2013']\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "from csv import DictReader\n", 20 | "\n", 21 | "\n", 22 | "rdr = DictReader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n", 23 | "\n", 24 | "all_lines = [r for r in rdr]\n", 25 | "\n", 26 | "print all_lines[0].keys()\n", 27 | "\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 2, 33 | "metadata": { 34 | "collapsed": false 35 | }, 36 | "outputs": [ 37 | { 38 | "name": "stdout", 39 | "output_type": "stream", 40 | "text": [ 41 | "Australia: 1,136.565 (2015 Billions)\n", 42 | "Australia: 47,607.697 (2015 Units)\n", 43 | "Austria: 402.420 (2015 Billions)\n", 44 | "Austria: 47,031.004 (2015 Units)\n", 45 | "Belgium: 492.267 (2015 Billions)\n", 46 | "Belgium: 43,800.208 (2015 Units)\n", 47 | "Canada: 1,640.370 (2015 Billions)\n", 48 | "Canada: 45,722.971 (2015 Units)\n", 49 | "Cyprus: 27.700 (2015 Billions)\n", 50 | "Cyprus: 30,769.578 (2015 Units)\n", 51 | "Czech Republic: 325.285 (2015 Billions)\n", 52 | "Czech Republic: 30,895.365 (2015 Units)\n", 53 | "Denmark: 255.866 (2015 Billions)\n", 54 | "Denmark: 45,451.273 (2015 Units)\n", 55 | "Estonia: 36.845 (2015 Billions)\n", 56 | "Estonia: 27,994.860 (2015 Units)\n", 57 | "Finland: 224.846 (2015 Billions)\n", 58 | "Finland: 40,838.367 (2015 Units)\n", 59 | "France: 2,633.896 (2015 Billions)\n", 60 | "France: 41,018.205 (2015 Units)\n", 61 | "Germany: 3,815.462 (2015 Billions)\n", 62 | "Germany: 46,895.970 (2015 Units)\n", 63 | "Greece: 294.014 (2015 Billions)\n", 64 | "Greece: 26,773.369 (2015 Units)\n", 65 | "Hong Kong SAR: 412.300 (2015 Billions)\n", 66 | "Hong Kong SAR: 56,428.069 (2015 Units)\n", 67 | "Iceland: 14.837 (2015 Billions)\n", 68 | "Iceland: 45,268.941 (2015 Units)\n", 69 | "Ireland: 237.629 (2015 Billions)\n", 70 | "Ireland: 51,118.997 (2015 Units)\n", 71 | "Israel: 280.390 (2015 Billions)\n", 72 | "Israel: 33,495.170 (2015 Units)\n", 73 | "Italy: 2,157.123 (2015 Billions)\n", 74 | "Italy: 35,811.443 (2015 Units)\n", 75 | "Japan: 4,843.066 (2015 Billions)\n", 76 | "Japan: 38,215.917 (2015 Units)\n", 77 | "Korea: 1,853.515 (2015 Billions)\n", 78 | "Korea: 36,601.073 (2015 Units)\n", 79 | "Latvia: 49.731 (2015 Billions)\n", 80 | "Latvia: 24,540.593 (2015 Units)\n", 81 | "Lithuania: 82.622 (2015 Billions)\n", 82 | "Lithuania: 28,210.268 (2015 Units)\n", 83 | "Luxembourg: 53.174 (2015 Billions)\n", 84 | "Luxembourg: 93,173.614 (2015 Units)\n", 85 | "Malta: 14.716 (2015 Billions)\n", 86 | "Malta: 34,544.387 (2015 Units)\n", 87 | "Netherlands: 818.249 (2015 Billions)\n", 88 | "Netherlands: 48,317.050 (2015 Units)\n", 89 | "New Zealand: 164.965 (2015 Billions)\n", 90 | "New Zealand: 36,151.808 (2015 Units)\n", 91 | "Norway: 351.603 (2015 Billions)\n", 92 | "Norway: 67,445.198 (2015 Units)\n", 93 | "Portugal: 287.388 (2015 Billions)\n", 94 | "Portugal: 27,624.227 (2015 Units)\n", 95 | "San Marino: 1.942 (2015 Billions)\n", 96 | "San Marino: 61,836.317 (2015 Units)\n", 97 | "Singapore: 470.551 (2015 Billions)\n", 98 | "Singapore: 85,198.159 (2015 Units)\n", 99 | "Slovak Republic: 158.428 (2015 Billions)\n", 100 | "Slovak Republic: 29,209.564 (2015 Units)\n", 101 | "Slovenia: 62.949 (2015 Billions)\n", 102 | "Slovenia: 30,508.288 (2015 Units)\n", 103 | "Spain: 1,619.093 (2015 Billions)\n", 104 | "Spain: 34,899.403 (2015 Units)\n", 105 | "Sweden: 464.264 (2015 Billions)\n", 106 | "Sweden: 47,228.981 (2015 Units)\n", 107 | "Switzerland: 480.938 (2015 Billions)\n", 108 | "Switzerland: 58,730.924 (2015 Units)\n", 109 | "Taiwan Province of China: 1,125.338 (2015 Billions)\n", 110 | "Taiwan Province of China: 47,898.660 (2015 Units)\n", 111 | "United Kingdom: 2,641.432 (2015 Billions)\n", 112 | "United Kingdom: 40,676.475 (2015 Units)\n", 113 | "United States: 18,124.731 (2015 Billions)\n", 114 | "United States: 56,421.393 (2015 Units)\n", 115 | "ERROR: {'Country/Series-specific Notes': None, 'Scale': None, 'Estimates Start After': None, '2015': None, 'Country': 'International Monetary Fund, World Economic Outlook Database, April 2015', 'Subject Descriptor': None, '2020': None, '2019': None, '2018': None, 'Units': None, '2014': None, '2017': None, '2016': None, '2013': None}\n" 116 | ] 117 | } 118 | ], 119 | "source": [ 120 | "for line in all_lines:\n", 121 | " try:\n", 122 | " if 'Gross domestic product' in line.get('Subject Descriptor') and \\\n", 123 | " 'international dollar' in line.get('Units'):\n", 124 | " print '{}: {} ({} {})'.format(\n", 125 | " line.get('Country'), line.get('2015'), '2015', line.get('Scale'))\n", 126 | " except:\n", 127 | " print \"ERROR: \", line\n" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [] 138 | } 139 | ], 140 | "metadata": { 141 | "kernelspec": { 142 | "display_name": "Python 2", 143 | "language": "python", 144 | "name": "python2" 145 | }, 146 | "language_info": { 147 | "codemirror_mode": { 148 | "name": "ipython", 149 | "version": 2 150 | }, 151 | "file_extension": ".py", 152 | "mimetype": "text/x-python", 153 | "name": "python", 154 | "nbconvert_exporter": "python", 155 | "pygments_lexer": "ipython2", 156 | "version": "2.7.6" 157 | } 158 | }, 159 | "nbformat": 4, 160 | "nbformat_minor": 0 161 | } 162 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 02 - XLSX.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stdout", 12 | "output_type": "stream", 13 | "text": [ 14 | "Data\n", 15 | "Definition and Source\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import xlrd\n", 21 | "\n", 22 | "\n", 23 | "notebook = xlrd.open_workbook('../../data/wb/GDP_Current_Dollars.xlsx')\n", 24 | "\n", 25 | "for sheet in notebook.sheets():\n", 26 | " print sheet.name" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 2, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [ 36 | { 37 | "name": "stdout", 38 | "output_type": "stream", 39 | "text": [ 40 | "[u'Series Name', u'Series Code', u'Country Name', u'Country Code', u'1990 [YR1990]', u'2000 [YR2000]', u'2005 [YR2005]', u'2006 [YR2006]', u'2007 [YR2007]', u'2008 [YR2008]', u'2009 [YR2009]', u'2010 [YR2010]', u'2011 [YR2011]', u'2012 [YR2012]', u'2013 [YR2013]', u'2014 [YR2014]']\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "sheet = notebook.sheet_by_name('Data')\n", 46 | "\n", 47 | "titles = sheet.row_values(0)\n", 48 | "print titles" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 3, 54 | "metadata": { 55 | "collapsed": false 56 | }, 57 | "outputs": [ 58 | { 59 | "name": "stdout", 60 | "output_type": "stream", 61 | "text": [ 62 | "Afghanistan 20841951232.0\n", 63 | "Albania 13370191506.0\n", 64 | "Algeria 2.14063173188e+11\n", 65 | "American Samoa ..\n", 66 | "Andorra ..\n", 67 | "Angola 1.31400635026e+11\n", 68 | "Antigua and Barbuda 1269117037.04\n", 69 | "Argentina 5.40197457444e+11\n", 70 | "Armenia 10881605059.4\n", 71 | "Aruba ..\n", 72 | "Australia 1.45377021067e+12\n", 73 | "Austria 4.36343622435e+11\n", 74 | "Azerbaijan 75198010965.2\n", 75 | "Bahamas, The 8510500000.0\n", 76 | "Bahrain 33868989361.7\n", 77 | "Bangladesh 1.73818932216e+11\n", 78 | "Barbados 4348000000.0\n", 79 | "Belarus 76139250364.5\n", 80 | "Belgium 5.33382785676e+11\n", 81 | "Belize ..\n", 82 | "Benin 8746992733.5\n", 83 | "Bermuda ..\n", 84 | "Bhutan 1821412872.84\n", 85 | "Bolivia 34175832127.4\n", 86 | "Bosnia and Herzegovina 18344278252.6\n", 87 | "Botswana 15813371063.2\n", 88 | "Brazil 2.34611817519e+12\n", 89 | "Brunei Darussalam 17256754269.2\n", 90 | "Bulgaria 55734676434.7\n", 91 | "Burkina Faso 12542969275.2\n", 92 | "Burundi 3093647293.48\n", 93 | "Cabo Verde 1871187333.5\n", 94 | "Cambodia 16709432402.7\n", 95 | "Cameroon 32548591285.9\n", 96 | "Canada 1.78665506451e+12\n", 97 | "Cayman Islands ..\n", 98 | "Central African Republic 1782927902.67\n", 99 | "Chad 13922224560.8\n", 100 | "Channel Islands ..\n", 101 | "Chile 2.58061522887e+11\n", 102 | "China 1.03601052479e+13\n", 103 | "Colombia 3.77739622866e+11\n", 104 | "Comoros 647720102.468\n", 105 | "Congo, Dem. Rep. 32962261155.7\n", 106 | "Congo, Rep. 14135462555.8\n", 107 | "Costa Rica 49552580683.1\n", 108 | "Cote d'Ivoire 34253611098.3\n", 109 | "Croatia 57222574023.2\n", 110 | "Cuba ..\n", 111 | "Curacao ..\n", 112 | "Cyprus 23226158986.2\n", 113 | "Czech Republic 2.05522871251e+11\n", 114 | "Denmark 3.4195160773e+11\n", 115 | "Djibouti 1581519705.53\n", 116 | "Dominica 537777777.778\n", 117 | "Dominican Republic 63968961563.1\n", 118 | "Ecuador 1.00543173e+11\n", 119 | "Egypt, Arab Rep. 2.86538047766e+11\n", 120 | "El Salvador 25220000000.0\n", 121 | "Equatorial Guinea 14308094224.7\n", 122 | "Eritrea 3857821138.21\n", 123 | "Estonia 25904874312.3\n", 124 | "Ethiopia 54797679657.5\n", 125 | "Faeroe Islands ..\n", 126 | "Fiji 4029989728.83\n", 127 | "Finland 2.70673584162e+11\n", 128 | "France 2.82919203917e+12\n", 129 | "French Polynesia ..\n", 130 | "Gabon 17228443336.4\n", 131 | "Gambia, The 807069488.192\n", 132 | "Georgia 16529963187.4\n", 133 | "Germany 3.85255616966e+12\n", 134 | "Ghana 38648154100.4\n", 135 | "Greece 2.37592274371e+11\n", 136 | "Greenland ..\n", 137 | "Grenada 882222222.222\n", 138 | "Guam ..\n", 139 | "Guatemala 58728232327.2\n", 140 | "Guinea 6624068036.99\n", 141 | "Guinea-Bissau 1022371991.53\n", 142 | "Guyana 3228372887.86\n", 143 | "Haiti 8713031260.19\n", 144 | "Honduras 19385309985.8\n", 145 | "Hong Kong SAR, China 2.90896409544e+11\n", 146 | "Hungary 1.37103927313e+11\n", 147 | "Iceland 17071004499.2\n", 148 | "India 2.06690239733e+12\n", 149 | "Indonesia 8.88538201025e+11\n", 150 | "Iran, Islamic Rep. 4.15338504536e+11\n", 151 | "Iraq 2.20505682865e+11\n", 152 | "Ireland 2.45920712756e+11\n", 153 | "Isle of Man ..\n", 154 | "Israel 3.0422633627e+11\n", 155 | "Italy 2.14433818506e+12\n", 156 | "Jamaica ..\n", 157 | "Japan 4.60146120689e+12\n", 158 | "Jordan 35826925774.6\n", 159 | "Kazakhstan 2.12247913268e+11\n", 160 | "Kenya 60936509778.0\n", 161 | "Kiribati 166762323.639\n", 162 | "Korea, Dem. Rep. ..\n", 163 | "Korea, Rep. 1.41038294397e+12\n", 164 | "Kosovo 7273849011.54\n", 165 | "Kuwait ..\n", 166 | "Kyrgyz Republic 7404412710.31\n", 167 | "Lao PDR 11771725797.6\n", 168 | "Latvia 31920815648.3\n", 169 | "Lebanon 45730945273.6\n", 170 | "Lesotho 2088021624.11\n", 171 | "Liberia 2026939595.32\n", 172 | "Libya 41119144923.0\n", 173 | "Liechtenstein ..\n", 174 | "Lithuania 48172242517.3\n", 175 | "Luxembourg ..\n", 176 | "Macao SAR, China 55501532528.1\n", 177 | "Macedonia, FYR 11323761623.5\n", 178 | "Madagascar 10593147526.9\n", 179 | "Malawi 4258033615.3\n", 180 | "Malaysia 3.26933043801e+11\n", 181 | "Maldives 3032239478.12\n", 182 | "Mali 12074473001.8\n", 183 | "Malta ..\n", 184 | "Marshall Islands ..\n", 185 | "Mauritania 5061180371.05\n", 186 | "Mauritius 12616421088.4\n", 187 | "Mexico 1.28271995486e+12\n", 188 | "Micronesia, Fed. Sts. ..\n", 189 | "Moldova 7944184929.75\n", 190 | "Monaco ..\n", 191 | "Mongolia 12015944336.5\n", 192 | "Montenegro 4583198885.5\n", 193 | "Morocco 1.07004984357e+11\n", 194 | "Mozambique 16385584919.0\n", 195 | "Myanmar 64330038664.7\n", 196 | "Namibia 13429503284.9\n", 197 | "Nepal 19636186469.3\n", 198 | "Netherlands 8.6950812548e+11\n", 199 | "New Caledonia ..\n", 200 | "New Zealand ..\n", 201 | "Nicaragua 11805641286.8\n", 202 | "Niger 8168695869.87\n", 203 | "Nigeria 5.68508262378e+11\n", 204 | "Northern Mariana Islands ..\n", 205 | "Norway 5.00103094419e+11\n", 206 | "Oman 81796618985.7\n", 207 | "Pakistan 2.46876324189e+11\n", 208 | "Palau 250625562.794\n", 209 | "Panama 46212600000.0\n", 210 | "Papua New Guinea ..\n", 211 | "Paraguay 30984747863.3\n", 212 | "Peru 2.02902760293e+11\n", 213 | "Philippines 2.84582023121e+11\n", 214 | "Poland 5.48003360279e+11\n", 215 | "Portugal 2.2958371149e+11\n", 216 | "Puerto Rico ..\n", 217 | "Qatar 2.11816758242e+11\n", 218 | "Romania 1.99043652215e+11\n", 219 | "Russian Federation 1.86059792276e+12\n", 220 | "Rwanda 7890190336.75\n", 221 | "Samoa 800586671.241\n", 222 | "San Marino ..\n", 223 | "Sao Tome and Principe 334902362.057\n", 224 | "Saudi Arabia 7.46248533333e+11\n", 225 | "Senegal 15578916865.4\n", 226 | "Serbia 43866423166.9\n", 227 | "Seychelles 1405764157.88\n", 228 | "Sierra Leone 4892363979.23\n", 229 | "Singapore 3.07871907186e+11\n", 230 | "Sint Maarten (Dutch part) ..\n", 231 | "Slovak Republic 99790145652.8\n", 232 | "Slovenia 49416055609.2\n", 233 | "Solomon Islands 1158183053.76\n", 234 | "Somalia ..\n", 235 | "South Africa 3.49817096206e+11\n", 236 | "South Sudan 13069991258.3\n", 237 | "Spain 1.40430653606e+12\n", 238 | "Sri Lanka 74941183242.0\n", 239 | "St. Kitts and Nevis 833333333.333\n", 240 | "St. Lucia 1365426555.56\n", 241 | "St. Martin (French part) ..\n", 242 | "St. Vincent and the Grenadines 728696703.704\n", 243 | "Sudan 73815376184.6\n", 244 | "Suriname ..\n", 245 | "Swaziland 3400422936.23\n", 246 | "Sweden 5.7059126616e+11\n", 247 | "Switzerland ..\n", 248 | "Syrian Arab Republic ..\n", 249 | "Taiwan, China ..\n", 250 | "Tajikistan 9241627840.61\n", 251 | "Tanzania 49183884817.5\n", 252 | "Thailand 3.73804134912e+11\n", 253 | "Timor-Leste 1552000000.0\n", 254 | "Togo 4518443907.45\n", 255 | "Tonga 434386307.693\n", 256 | "Trinidad and Tobago ..\n", 257 | "Tunisia ..\n", 258 | "Turkey 7.99534963354e+11\n", 259 | "Turkmenistan 47931929824.6\n", 260 | "Turks and Caicos Islands ..\n", 261 | "Tuvalu ..\n", 262 | "Uganda 26312399301.4\n", 263 | "Ukraine 1.31805126738e+11\n", 264 | "United Arab Emirates 4.01646583173e+11\n", 265 | "United Kingdom 2.94188553746e+12\n", 266 | "United States 1.7419e+13\n", 267 | "Uruguay 57471277325.1\n", 268 | "Uzbekistan 62643953021.8\n", 269 | "Vanuatu ..\n", 270 | "Venezuela, RB 5.09964084931e+11\n", 271 | "Vietnam 1.86204652922e+11\n", 272 | "Virgin Islands (U.S.) ..\n", 273 | "West Bank and Gaza 12737613125.0\n", 274 | "Yemen, Rep. ..\n", 275 | "Zambia 27066230009.1\n", 276 | "Zimbabwe 13663314279.7\n", 277 | " \n", 278 | " \n", 279 | " \n", 280 | " \n", 281 | " \n" 282 | ] 283 | } 284 | ], 285 | "source": [ 286 | "def build_array(sheet, titles, start_row=1):\n", 287 | " new_arr = []\n", 288 | " while start_row < sheet.nrows:\n", 289 | " new_arr.append(\n", 290 | " dict(zip(titles, sheet.row_values(start_row)))\n", 291 | " )\n", 292 | " start_row += 1\n", 293 | " return new_arr\n", 294 | "\n", 295 | "\n", 296 | "arr = build_array(sheet, titles)\n", 297 | "\n", 298 | "for line in arr:\n", 299 | " print line.get('Country Name'), line.get('2014 [YR2014]')\n" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "collapsed": true 307 | }, 308 | "outputs": [], 309 | "source": [] 310 | } 311 | ], 312 | "metadata": { 313 | "kernelspec": { 314 | "display_name": "Python 2", 315 | "language": "python", 316 | "name": "python2" 317 | }, 318 | "language_info": { 319 | "codemirror_mode": { 320 | "name": "ipython", 321 | "version": 2 322 | }, 323 | "file_extension": ".py", 324 | "mimetype": "text/x-python", 325 | "name": "python", 326 | "nbconvert_exporter": "python", 327 | "pygments_lexer": "ipython2", 328 | "version": "2.7.6" 329 | } 330 | }, 331 | "nbformat": 4, 332 | "nbformat_minor": 0 333 | } 334 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 03 - databasing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "data": { 12 | "text/plain": [ 13 | "2" 14 | ] 15 | }, 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "output_type": "execute_result" 19 | } 20 | ], 21 | "source": [ 22 | "import dataset\n", 23 | "\n", 24 | "db = dataset.connect('sqlite:///../../data/data_analysis.db')\n", 25 | "\n", 26 | "my_sources = db['sources']\n", 27 | "\n", 28 | "my_sources.insert({'organization': 'IMF',\n", 29 | " 'file_name': 'imf_indicators.tsv',\n", 30 | " 'url': 'http://www.imf.org/external/pubs/ft/weo/2015/01/weodata/index.aspx',\n", 31 | " 'description': 'IMF World Economic Outlook Dataset',\n", 32 | " })\n", 33 | "\n", 34 | "my_sources.insert({'organization': 'World Bank',\n", 35 | " 'file_name': 'wb/GDP_Current_Dollars.xlsx',\n", 36 | " 'url': 'http://databank.worldbank.org/data/reports.aspx?source=2&series=NY.GDP.MKTP.CD#',\n", 37 | " 'description': 'World Bank GDP Dataset',\n", 38 | " })" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 3, 44 | "metadata": { 45 | "collapsed": false 46 | }, 47 | "outputs": [ 48 | { 49 | "name": "stdout", 50 | "output_type": "stream", 51 | "text": [ 52 | "[u'sources']\n", 53 | "IMF World Economic Outlook Dataset\n", 54 | "World Bank GDP Dataset\n" 55 | ] 56 | } 57 | ], 58 | "source": [ 59 | "print db.tables\n", 60 | "\n", 61 | "for row in db['sources']:\n", 62 | " print row['description']\n" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [] 73 | } 74 | ], 75 | "metadata": { 76 | "kernelspec": { 77 | "display_name": "Python 2", 78 | "language": "python", 79 | "name": "python2" 80 | }, 81 | "language_info": { 82 | "codemirror_mode": { 83 | "name": "ipython", 84 | "version": 2 85 | }, 86 | "file_extension": ".py", 87 | "mimetype": "text/x-python", 88 | "name": "python", 89 | "nbconvert_exporter": "python", 90 | "pygments_lexer": "ipython2", 91 | "version": "2.7.6" 92 | } 93 | }, 94 | "nbformat": 4, 95 | "nbformat_minor": 0 96 | } 97 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 04 - API.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 5, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "def get_story(story_id):\n", 23 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 24 | " resp = requests.get(url)\n", 25 | " return resp.json()\n" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 6, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "def get_top_stories():\n", 37 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 38 | " resp = requests.get(url)\n", 39 | " all_stories = [get_story(sid) for sid in resp.json()[:10]]\n", 40 | " return all_stories\n", 41 | " " 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 7, 47 | "metadata": { 48 | "collapsed": false 49 | }, 50 | "outputs": [ 51 | { 52 | "data": { 53 | "text/plain": [ 54 | "[{u'by': u'jmduke',\n", 55 | " u'descendants': 38,\n", 56 | " u'id': 9920121,\n", 57 | " u'kids': [9920697,\n", 58 | " 9920460,\n", 59 | " 9920607,\n", 60 | " 9920308,\n", 61 | " 9920772,\n", 62 | " 9920837,\n", 63 | " 9920324,\n", 64 | " 9920681,\n", 65 | " 9920736,\n", 66 | " 9920671,\n", 67 | " 9920342,\n", 68 | " 9920413,\n", 69 | " 9920211,\n", 70 | " 9920796,\n", 71 | " 9920417,\n", 72 | " 9920371,\n", 73 | " 9920541],\n", 74 | " u'score': 228,\n", 75 | " u'text': u'',\n", 76 | " u'time': 1437441778,\n", 77 | " u'title': u'Web Design: The First 100 Years (2014)',\n", 78 | " u'type': u'story',\n", 79 | " u'url': u'http://idlewords.com/talks/web_design_first_100_years.htm'},\n", 80 | " {u'by': u'mattkrisiloff',\n", 81 | " u'descendants': 328,\n", 82 | " u'id': 9917334,\n", 83 | " u'kids': [9919609,\n", 84 | " 9919370,\n", 85 | " 9917792,\n", 86 | " 9919301,\n", 87 | " 9918094,\n", 88 | " 9917359,\n", 89 | " 9918659,\n", 90 | " 9917458,\n", 91 | " 9918755,\n", 92 | " 9920544,\n", 93 | " 9918719,\n", 94 | " 9917583,\n", 95 | " 9918554,\n", 96 | " 9917593,\n", 97 | " 9920819,\n", 98 | " 9920656,\n", 99 | " 9917437,\n", 100 | " 9920427,\n", 101 | " 9917400,\n", 102 | " 9920266,\n", 103 | " 9917880,\n", 104 | " 9919417,\n", 105 | " 9920224,\n", 106 | " 9920647,\n", 107 | " 9920752,\n", 108 | " 9917781,\n", 109 | " 9920151,\n", 110 | " 9920655,\n", 111 | " 9918291,\n", 112 | " 9920614,\n", 113 | " 9920200,\n", 114 | " 9917372,\n", 115 | " 9917422,\n", 116 | " 9920168,\n", 117 | " 9917384,\n", 118 | " 9917689,\n", 119 | " 9920454,\n", 120 | " 9917830,\n", 121 | " 9920517,\n", 122 | " 9919318,\n", 123 | " 9918802,\n", 124 | " 9919573,\n", 125 | " 9920192,\n", 126 | " 9919447,\n", 127 | " 9918062,\n", 128 | " 9918891,\n", 129 | " 9918944,\n", 130 | " 9919559,\n", 131 | " 9919709,\n", 132 | " 9918183,\n", 133 | " 9918189,\n", 134 | " 9917945,\n", 135 | " 9917587,\n", 136 | " 9918200,\n", 137 | " 9918137,\n", 138 | " 9918176,\n", 139 | " 9919437,\n", 140 | " 9918344,\n", 141 | " 9919350,\n", 142 | " 9918233,\n", 143 | " 9918086,\n", 144 | " 9917799,\n", 145 | " 9919928,\n", 146 | " 9917373,\n", 147 | " 9918758,\n", 148 | " 9919230,\n", 149 | " 9919890,\n", 150 | " 9918834,\n", 151 | " 9918595,\n", 152 | " 9917551,\n", 153 | " 9917466,\n", 154 | " 9919406,\n", 155 | " 9917760,\n", 156 | " 9917849,\n", 157 | " 9917467,\n", 158 | " 9917399,\n", 159 | " 9917889,\n", 160 | " 9917561,\n", 161 | " 9918942,\n", 162 | " 9917440,\n", 163 | " 9917416],\n", 164 | " u'score': 847,\n", 165 | " u'text': u'',\n", 166 | " u'time': 1437411626,\n", 167 | " u'title': u'YC Fellowship',\n", 168 | " u'type': u'story',\n", 169 | " u'url': u'http://blog.ycombinator.com/yc-fellowship'},\n", 170 | " {u'by': u'cnnrjcbsn',\n", 171 | " u'descendants': 124,\n", 172 | " u'id': 9917442,\n", 173 | " u'kids': [9918674,\n", 174 | " 9917665,\n", 175 | " 9917794,\n", 176 | " 9919619,\n", 177 | " 9917729,\n", 178 | " 9917730,\n", 179 | " 9917897,\n", 180 | " 9918958,\n", 181 | " 9920829,\n", 182 | " 9918130,\n", 183 | " 9918009,\n", 184 | " 9918243,\n", 185 | " 9918382,\n", 186 | " 9917660,\n", 187 | " 9917734,\n", 188 | " 9918734,\n", 189 | " 9918271,\n", 190 | " 9920838,\n", 191 | " 9918612,\n", 192 | " 9918204,\n", 193 | " 9919142,\n", 194 | " 9919320,\n", 195 | " 9918146,\n", 196 | " 9918359,\n", 197 | " 9918417,\n", 198 | " 9917611,\n", 199 | " 9919686,\n", 200 | " 9918051,\n", 201 | " 9918492,\n", 202 | " 9919432,\n", 203 | " 9917647,\n", 204 | " 9917726,\n", 205 | " 9919511,\n", 206 | " 9918617,\n", 207 | " 9917837,\n", 208 | " 9918552,\n", 209 | " 9917702,\n", 210 | " 9917670,\n", 211 | " 9917631,\n", 212 | " 9920323,\n", 213 | " 9919171,\n", 214 | " 9918493,\n", 215 | " 9920355,\n", 216 | " 9918174,\n", 217 | " 9917650,\n", 218 | " 9918266],\n", 219 | " u'score': 774,\n", 220 | " u'text': u'',\n", 221 | " u'time': 1437412422,\n", 222 | " u'title': u'Grooveshark co-founder, 28, found dead in home',\n", 223 | " u'type': u'story',\n", 224 | " u'url': u'http://www.gainesville.com/article/20150720/ARTICLES/150729990'},\n", 225 | " {u'by': u'ivank',\n", 226 | " u'descendants': 47,\n", 227 | " u'id': 9919641,\n", 228 | " u'kids': [9919937,\n", 229 | " 9919764,\n", 230 | " 9919923,\n", 231 | " 9919994,\n", 232 | " 9919797,\n", 233 | " 9920002,\n", 234 | " 9920592,\n", 235 | " 9919703,\n", 236 | " 9919964,\n", 237 | " 9919986],\n", 238 | " u'score': 107,\n", 239 | " u'text': u'',\n", 240 | " u'time': 1437432872,\n", 241 | " u'title': u'SpaceX CRS-7 Failure Investigation Teleconference Thread',\n", 242 | " u'type': u'story',\n", 243 | " u'url': u'https://www.reddit.com/r/spacex/comments/3dyvta/rspacex_crs7_failure_investigation_teleconference/'},\n", 244 | " {u'by': u'dluan',\n", 245 | " u'descendants': 26,\n", 246 | " u'id': 9919852,\n", 247 | " u'kids': [9920313,\n", 248 | " 9920021,\n", 249 | " 9920368,\n", 250 | " 9920222,\n", 251 | " 9920009,\n", 252 | " 9920172,\n", 253 | " 9920030,\n", 254 | " 9920574,\n", 255 | " 9920113,\n", 256 | " 9920060,\n", 257 | " 9920019,\n", 258 | " 9920015,\n", 259 | " 9920011],\n", 260 | " u'score': 94,\n", 261 | " u'text': u'',\n", 262 | " u'time': 1437436325,\n", 263 | " u'title': u'Cindy Wu and Experiment.com (YC W13)',\n", 264 | " u'type': u'story',\n", 265 | " u'url': u'http://nytlive.nytimes.com/womenintheworld/2015/07/08/woman-raised-1-2-million-with-a-spirited-3-minute-speech/'},\n", 266 | " {u'by': u'aaronbrethorst',\n", 267 | " u'descendants': 141,\n", 268 | " u'id': 9916974,\n", 269 | " u'kids': [9918485,\n", 270 | " 9919264,\n", 271 | " 9918591,\n", 272 | " 9920201,\n", 273 | " 9919838,\n", 274 | " 9918105,\n", 275 | " 9919096,\n", 276 | " 9919121,\n", 277 | " 9919004,\n", 278 | " 9918240,\n", 279 | " 9919462,\n", 280 | " 9919676,\n", 281 | " 9919382,\n", 282 | " 9918181,\n", 283 | " 9917957,\n", 284 | " 9919157,\n", 285 | " 9919517,\n", 286 | " 9917939,\n", 287 | " 9918727,\n", 288 | " 9920566,\n", 289 | " 9918202],\n", 290 | " u'score': 244,\n", 291 | " u'text': u'',\n", 292 | " u'time': 1437408433,\n", 293 | " u'title': u'A Man Who Flies Around the World for Free',\n", 294 | " u'type': u'story',\n", 295 | " u'url': u'http://www.rollingstone.com/culture/features/ben-schlappig-airlines-fly-free-20150720'},\n", 296 | " {u'by': u'monkeypod',\n", 297 | " u'descendants': 86,\n", 298 | " u'id': 9918273,\n", 299 | " u'kids': [9918498,\n", 300 | " 9919239,\n", 301 | " 9919056,\n", 302 | " 9918603,\n", 303 | " 9918897,\n", 304 | " 9918632,\n", 305 | " 9919476,\n", 306 | " 9918479,\n", 307 | " 9918826,\n", 308 | " 9919160,\n", 309 | " 9918695,\n", 310 | " 9918751,\n", 311 | " 9918657,\n", 312 | " 9919303,\n", 313 | " 9920643,\n", 314 | " 9919506,\n", 315 | " 9918761],\n", 316 | " u'score': 154,\n", 317 | " u'text': u'',\n", 318 | " u'time': 1437419219,\n", 319 | " u'title': u'Coin Card Teardown',\n", 320 | " u'type': u'story',\n", 321 | " u'url': u'http://www.bitsofcents.com/post/124593977646/coin-card-teardown'},\n", 322 | " {u'by': u'_jomo',\n", 323 | " u'descendants': 37,\n", 324 | " u'id': 9918862,\n", 325 | " u'kids': [9920870,\n", 326 | " 9919229,\n", 327 | " 9919270,\n", 328 | " 9919307,\n", 329 | " 9919272,\n", 330 | " 9920765,\n", 331 | " 9920038,\n", 332 | " 9920522,\n", 333 | " 9919992,\n", 334 | " 9919210,\n", 335 | " 9919162,\n", 336 | " 9920325,\n", 337 | " 9920337,\n", 338 | " 9920350],\n", 339 | " u'score': 116,\n", 340 | " u'text': u'',\n", 341 | " u'time': 1437424405,\n", 342 | " u'title': u'Google, the Wassenaar Arrangement, and vulnerability research',\n", 343 | " u'type': u'story',\n", 344 | " u'url': u'https://googleonlinesecurity.blogspot.com/2015/07/google-wassenaar-arrangement-and.html'},\n", 345 | " {u'by': u'TheRedBarron',\n", 346 | " u'descendants': 1,\n", 347 | " u'id': 9920756,\n", 348 | " u'kids': [9920813],\n", 349 | " u'score': 5,\n", 350 | " u'text': u'',\n", 351 | " u'time': 1437458633,\n", 352 | " u'title': u'Remember the Tetris Lamp? Here is proof that two sets can create a rectangle',\n", 353 | " u'type': u'story',\n", 354 | " u'url': u'https://barronwasteland.wordpress.com/2015/07/21/527/'},\n", 355 | " {u'by': u'moyayo',\n", 356 | " u'id': 9920815,\n", 357 | " u'score': 1,\n", 358 | " u'text': u'',\n", 359 | " u'time': 1437460741,\n", 360 | " u'title': u'SmartSpot (YC W15) Is Hiring Full Time iOS Dev Passionate about Fitness',\n", 361 | " u'type': u'job',\n", 362 | " u'url': u'https://smartspot.wufoo.com/forms/smartspot-hiring/'}]" 363 | ] 364 | }, 365 | "execution_count": 7, 366 | "metadata": {}, 367 | "output_type": "execute_result" 368 | } 369 | ], 370 | "source": [ 371 | "get_top_stories()" 372 | ] 373 | }, 374 | { 375 | "cell_type": "code", 376 | "execution_count": null, 377 | "metadata": { 378 | "collapsed": true 379 | }, 380 | "outputs": [], 381 | "source": [] 382 | } 383 | ], 384 | "metadata": { 385 | "kernelspec": { 386 | "display_name": "Python 2", 387 | "language": "python", 388 | "name": "python2" 389 | }, 390 | "language_info": { 391 | "codemirror_mode": { 392 | "name": "ipython", 393 | "version": 2 394 | }, 395 | "file_extension": ".py", 396 | "mimetype": "text/x-python", 397 | "name": "python", 398 | "nbconvert_exporter": "python", 399 | "pygments_lexer": "ipython2", 400 | "version": "2.7.6" 401 | } 402 | }, 403 | "nbformat": 4, 404 | "nbformat_minor": 0 405 | } 406 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 05 - Regex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "import re\n", 13 | "from multiprocessing import Process, Manager\n", 14 | "\n", 15 | "MATCHING = (\n", 16 | " ('Python', '(p|P)ython'),\n", 17 | " ('Ruby', '(r|R)uby'),\n", 18 | " ('JavaScript', 'js|(J|j)ava(s|S)cript'),\n", 19 | " ('NodeJS', 'node(\\.?)(?:\\js|JS)'),\n", 20 | " ('Java', '(j|J)ava[^(S|s)cript]'),\n", 21 | " ('Objective-C', 'Obj(ective?)(?:\\ |-)(C|c)'),\n", 22 | ")" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def get_story(story_id, stories):\n", 34 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 35 | " resp = requests.get(url)\n", 36 | " stories.append(resp.json())" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "def get_top_stories():\n", 48 | " manager = Manager()\n", 49 | " stories = manager.list()\n", 50 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 51 | " ids = requests.get(url)\n", 52 | " processes = [Process(target=get_story, args=(sid, stories))\n", 53 | " for sid in ids.json()[:40]]\n", 54 | " for p in processes:\n", 55 | " p.start()\n", 56 | " for p in processes:\n", 57 | " p.join()\n", 58 | " return stories" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "def count_languages():\n", 70 | " stories = get_top_stories()\n", 71 | " final_tallies = {}\n", 72 | " for s in stories:\n", 73 | " long_string = u'{} {}'.format(s.get('title'), s.get('url'))\n", 74 | " for language, regex in dict(MATCHING).items():\n", 75 | " if re.search(regex, long_string):\n", 76 | " if language not in final_tallies.keys():\n", 77 | " final_tallies[language] = {\n", 78 | " 'score': s.get('score'),\n", 79 | " 'descendants': s.get('descendants')}\n", 80 | " else:\n", 81 | " final_tallies[language]['score'] += s.get('score')\n", 82 | " final_tallies[language][\n", 83 | " 'descendants'] += s.get('descendants')\n", 84 | " return final_tallies" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": 5, 90 | "metadata": { 91 | "collapsed": false 92 | }, 93 | "outputs": [ 94 | { 95 | "data": { 96 | "text/plain": [ 97 | "{'JavaScript': {'descendants': 54, 'score': 88},\n", 98 | " 'Python': {'descendants': 1, 'score': 28},\n", 99 | " 'Ruby': {'descendants': 11, 'score': 59}}" 100 | ] 101 | }, 102 | "execution_count": 5, 103 | "metadata": {}, 104 | "output_type": "execute_result" 105 | } 106 | ], 107 | "source": [ 108 | "count_languages()" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": { 115 | "collapsed": true 116 | }, 117 | "outputs": [], 118 | "source": [] 119 | } 120 | ], 121 | "metadata": { 122 | "kernelspec": { 123 | "display_name": "Python 2", 124 | "language": "python", 125 | "name": "python2" 126 | }, 127 | "language_info": { 128 | "codemirror_mode": { 129 | "name": "ipython", 130 | "version": 2 131 | }, 132 | "file_extension": ".py", 133 | "mimetype": "text/x-python", 134 | "name": "python", 135 | "nbconvert_exporter": "python", 136 | "pygments_lexer": "ipython2", 137 | "version": "2.7.6" 138 | } 139 | }, 140 | "nbformat": 4, 141 | "nbformat_minor": 0 142 | } 143 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 07 - Calculate.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import calculate\n", 12 | "import requests\n", 13 | "from multiprocessing import Process, Manager\n", 14 | "from decimal import Decimal" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "def get_story(story_id, stories):\n", 26 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 27 | " resp = requests.get(url)\n", 28 | " story_data = resp.json()\n", 29 | " user_data = get_user(story_data.get('by'))\n", 30 | " story_data['user_karma'] = user_data.get('karma') or 0\n", 31 | " stories.append(story_data)\n", 32 | " return stories" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "def get_user(user_id):\n", 44 | " url = 'https://hacker-news.firebaseio.com/v0/user/%s.json' % user_id\n", 45 | " resp = requests.get(url)\n", 46 | " return resp.json()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": 4, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "def get_top_stories_with_user_karma():\n", 58 | " manager = Manager()\n", 59 | " stories = manager.list()\n", 60 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 61 | " ids = requests.get(url)\n", 62 | " processes = [Process(target=get_story, args=(sid, stories))\n", 63 | " for sid in ids.json()[:40]]\n", 64 | " for p in processes:\n", 65 | " p.start()\n", 66 | " for p in processes:\n", 67 | " p.join()\n", 68 | " return stories" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 5, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "def calculate_summary_karma():\n", 80 | " stories = get_top_stories_with_user_karma()\n", 81 | " return calculate.summary_stats([\n", 82 | " Decimal(s.get('score')) for s in stories])" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 6, 88 | "metadata": { 89 | "collapsed": true 90 | }, 91 | "outputs": [], 92 | "source": [ 93 | "def pearsons_karma():\n", 94 | " stories = get_top_stories_with_user_karma()\n", 95 | " user_karma = [Decimal(s.get('user_karma')) for s in stories]\n", 96 | " story_karma = [Decimal(s.get('score')) for s in stories]\n", 97 | " return calculate.pearson(user_karma, story_karma)" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": 7, 103 | "metadata": { 104 | "collapsed": false 105 | }, 106 | "outputs": [ 107 | { 108 | "data": { 109 | "text/plain": [ 110 | "-0.005659751973886795" 111 | ] 112 | }, 113 | "execution_count": 7, 114 | "metadata": {}, 115 | "output_type": "execute_result" 116 | } 117 | ], 118 | "source": [ 119 | "pearsons_karma()" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 8, 125 | "metadata": { 126 | "collapsed": false 127 | }, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "| Statistic | Value |\n", 134 | "-----------------------------------------\n", 135 | "| n | 40 |\n", 136 | "| mean | 110.3 |\n", 137 | "| median | 32.0 |\n", 138 | "| mode | None |\n", 139 | "| maximum | 934 |\n", 140 | "| minimum | 1 |\n", 141 | "| range | 933.0 |\n", 142 | "| standard deviation | 211.448244258 |\n", 143 | "| variation coefficient | 1.91702850642 |\n", 144 | "\n" 145 | ] 146 | } 147 | ], 148 | "source": [ 149 | "calculate_summary_karma()" 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": null, 155 | "metadata": { 156 | "collapsed": true 157 | }, 158 | "outputs": [], 159 | "source": [] 160 | } 161 | ], 162 | "metadata": { 163 | "kernelspec": { 164 | "display_name": "Python 2", 165 | "language": "python", 166 | "name": "python2" 167 | }, 168 | "language_info": { 169 | "codemirror_mode": { 170 | "name": "ipython", 171 | "version": 2 172 | }, 173 | "file_extension": ".py", 174 | "mimetype": "text/x-python", 175 | "name": "python", 176 | "nbconvert_exporter": "python", 177 | "pygments_lexer": "ipython2", 178 | "version": "2.7.6" 179 | } 180 | }, 181 | "nbformat": 4, 182 | "nbformat_minor": 0 183 | } 184 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 08 - Journalism Library.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 15, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import journalism\n", 12 | "import logging\n", 13 | "from csv import reader\n", 14 | "\n", 15 | "text_type = journalism.TextType()\n", 16 | "number_type = journalism.NumberType()\n", 17 | "date_type = journalism.DateType()" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 16, 23 | "metadata": { 24 | "collapsed": true 25 | }, 26 | "outputs": [], 27 | "source": [ 28 | "def get_table(datarows, types, titles):\n", 29 | " try:\n", 30 | " table = journalism.Table(datarows, types, titles)\n", 31 | " return table\n", 32 | " except:\n", 33 | " logging.exception('problem loading table')\n", 34 | " return None" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 17, 40 | "metadata": { 41 | "collapsed": true 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "def clean_text(row):\n", 46 | " new_row = []\n", 47 | " for item in row:\n", 48 | " if isinstance(item, (str, unicode)):\n", 49 | " item = item.decode('utf-8', 'replace')\n", 50 | " if item in [u'--', u'n/a']:\n", 51 | " item = None\n", 52 | " new_row.append(item)\n", 53 | "\n", 54 | " return new_row" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 18, 60 | "metadata": { 61 | "collapsed": true 62 | }, 63 | "outputs": [], 64 | "source": [ 65 | "def clean_rows(all_rows):\n", 66 | " new_data = []\n", 67 | " for row in all_rows:\n", 68 | " new_data.append(clean_text(row))\n", 69 | " return new_data" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 22, 75 | "metadata": { 76 | "collapsed": true 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "def load_imf_data():\n", 81 | " rdr = reader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n", 82 | " all_rows = [r for r in rdr if len(r) > 1]\n", 83 | " titles = all_rows.pop(0)\n", 84 | " cleaned_rows = clean_rows(all_rows)\n", 85 | " types = [text_type, text_type, text_type, text_type, text_type,\n", 86 | " number_type, number_type, number_type, number_type,\n", 87 | " number_type, number_type, number_type, number_type,\n", 88 | " date_type]\n", 89 | " return get_table(cleaned_rows, types, titles)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": 23, 95 | "metadata": { 96 | "collapsed": true 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "def add_last_percent_change():\n", 101 | " table = load_imf_data()\n", 102 | " table = table.where(lambda r: r.get('2015') is not\n", 103 | " None and r.get('2014') is not None)\n", 104 | " table = table.where(lambda r: 'Unemployment' in\n", 105 | " r.get('Subject Descriptor'))\n", 106 | " table = table.percent_change('2014', '2015', 'last_change')\n", 107 | " return table" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 24, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "table = add_last_percent_change()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": 25, 124 | "metadata": { 125 | "collapsed": false 126 | }, 127 | "outputs": [ 128 | { 129 | "data": { 130 | "text/plain": [ 131 | "Decimal('-2.595430263422718959419383707')" 132 | ] 133 | }, 134 | "execution_count": 25, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "table.columns['last_change'].mean()" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 26, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [ 150 | { 151 | "data": { 152 | "text/plain": [ 153 | "Decimal('8.016756756756756756756756757')" 154 | ] 155 | }, 156 | "execution_count": 26, 157 | "metadata": {}, 158 | "output_type": "execute_result" 159 | } 160 | ], 161 | "source": [ 162 | "table.columns['2015'].mean()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 2", 178 | "language": "python", 179 | "name": "python2" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 2 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython2", 191 | "version": "2.7.6" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 0 196 | } 197 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 09 - Matplotlib.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 8, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "%matplotlib inline" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 1, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "from csv import DictReader\n", 23 | "from decimal import Decimal\n", 24 | "import calculate\n", 25 | "import pylab" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 6, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "def load_imf_unemployment():\n", 37 | " rdr = DictReader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n", 38 | " return [r for r in rdr if r.get('Subject Descriptor') and\n", 39 | " 'Unemployment' in r.get('Subject Descriptor')]" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": { 46 | "collapsed": true 47 | }, 48 | "outputs": [], 49 | "source": [ 50 | "def get_avg_unemployment(data, start_year=2013, end_year=2015):\n", 51 | " avgs = {}\n", 52 | " while start_year <= end_year:\n", 53 | " avg = calculate.mean([\n", 54 | " Decimal(rate.get(str(start_year))) for\n", 55 | " rate in data if rate.get(str(start_year))])\n", 56 | " avgs[str(start_year)] = avg\n", 57 | " start_year += 1\n", 58 | " return avgs" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": { 65 | "collapsed": true 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "def chart_unemployment():\n", 70 | " imf_data = load_imf_unemployment()\n", 71 | " averages = get_avg_unemployment(imf_data)\n", 72 | " pylab.plot(averages.keys(), averages.values())\n", 73 | " pylab.ylabel('Average Unemployment')\n", 74 | " pylab.xlabel('Years')\n", 75 | " pylab.title('Average Unemployment Over Time')\n", 76 | " pylab.ylim([0, sorted(averages.values(), reverse=True)[0] + 1])\n", 77 | " pylab.show()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 9, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [ 87 | { 88 | "data": { 89 | "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEZCAYAAACU3p4jAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAG6RJREFUeJzt3XucJFV58PHf3mBv7BKBAMttdQUVkYsXQIXYSEJAN+AF\nuQgBwah5o2/QIIkkqGMSfeU1iqIxiiA3AQGRICoIKIMXWFQuu8hNxF2XOy6aZRd2F9iZ/PGcpmt6\ne3qqZrqmZ6Z+38+nP9PVXdXndE31c049daoKJEmSJEmSJEmSJEmSJEmSJGnYeoDzu12Jivkv4JRu\nV0Lqpl7gD8BGXa5HJ5wD/FvTa/OBPmDyaFdmCB9n/AX8PuBFQ8yzLXABsAJYDdwMvLnketVdBaxK\nj2eAdZnpL49SHdTGWPsRVs18YE/gceDgEj5/agmf2U5/eowHk7pdgWFqV+8XAD8F1gI7A5sBpwEX\nAm8voS5TmqYPAjZJjwuAUzPTf1dC+SrIgN9dxwDXET3NY9NrGwP/A7w8M98WwNPA5ml6IXA78Efg\nZ8ArMvMuA/4RWEL0rKYAHwF+AzwJ3Am8JTP/ZOCzwO+B3wIfYGCPfC5wFvAw8CDRg2+33QwVSJcB\nJwKL0/f8ZvrOdUN9tw9nvttZwJZEz3IlcC2waZp3fvoe7wEeSvU/sU29DibWzR+B64GXptdPAr7V\nNO/pwOfT815infws1ek7xP/pglSnnwM7ZJZ9aarnE8A9wDsy750D/CfwXeJ/tYhGj/7H6e/iVE52\nuboPpeXeTXQi1hHr95PE/xgirfKZpuWuSMsCzAMuS8v/Fvi/mfl6iHVxfvpux9Je87ZwDo09wBqx\nPZ2UynqY2C7fBPyaWD8fafqs+na8ArgY+JMhypfGlN8ARwE7ErvAW6TXzwL+PTPf+4Hvp+d7AI8B\nryF+BMcAS4Fp6f1lwK3ANjQC6aHAVun5YcSu/pZp+m+JQDePCJbXAetpBPXLiSAxI9XvZuC9g3yf\nsxk6pbOUCGRbET/Yu4D35fxuS4EbUz3mpXlvBXZL3/WHwMeayr0g1X0XIrDsn97voZHS2Smtk/2J\nBvIk4D5iD2nr9N7cNO/UVO4eabqXCFAvBOYQ6/I+4I3ps84Fvp7mnQU8QATKycDuREP7svT+OUQw\ne3Va9hvARZl1OVRKZxGRqmr2wrTsjsC+wPLMe39CdCa2SnW6hcizT03L3Q8ckObtIbbT+t7o9DZ1\nabUtnA38a3peA55NZU0B/ob47hcQ62nnVK96Y3kC8b+fR2wPXyH2XKRxYR9gDbG7C9Gr/WB6vj/R\nGNT9DDg6Pf8vGj+aunuIHzJEUHzXEGXfBvxVev4johdctz+NAL0lkR7I/rCPTMu0kjfgvzPz/qnE\nd4J83+3IzHvfInrEdR8gGqhsuTs1lXVmet5DI+B/lOgJ100iep9/lqavIgISxB7InZl5rwdOzkz/\nB/C9zPRCYn0DHE6jp173VRqN1DnAGZn3DgLuzkwPFfDvo3VjPD0t+1riu/2Oxjp9D9HIA+yV3ss6\nmUaD1UM0cHkMFvCzPfynaewFbJLq+JrM/L+k0bjcTTSidVsTjY9ZigJcWd1zLHANsXsOcCmNXeRe\nYCaR359P9GDrgWwHIjXxx8xjW6LnU/dAU1nHEEGnPv8uNNJDWzfN/2Dm+Q5Eb+qRzLJfobEn0uw5\nGr3xumnED7kv89qjmedrgNkFvttjTctmp9dmPqsu+92WN31W3TwG9nr703LbpOlzaTS4RwPnNS3f\nXIfHB6nTDkRQzX6/d9LY2+pnw+/X/H3aWUHr77d15v1+onGrN5zvJHrV9frNa6rfycCfZj4ru32M\n1BM0jvmsSX8H+/47EL+Ber3uIra3LVFuo31QT2EGkVqZTARTiJTEpsCuRI76EuJH+ThwJfBUmm85\nkZP9VJvPzx443YHoNb4RuCm9dxuNntUjwHaZ+bPPHyDywJsxMGAPZjkDjz1ApAWaG6DB6pvnuzUb\n6pjB9sC9mecPtZjnIQYeK5hErIf6vFcQo0x2IUa8fLhNee0OWi8HbqCRIum064C3AZ9oqsdhqez7\n0vRFRGfjVKJTcUimfksZuFeU1YmD8sNdfjlwHLENa5js4XfHW4jeycuI3vtu6flPiN44RH7yCKIH\nls1Vfo3Iu+9JBKZZRBAarCc4i/iRrSD+38cRgavuEiI/Ws/h/xONH+UjRGD4HLHLPRlYQCPV0eyy\nVJe/IPKy84gc7UWDzF9XD9pFv1sepxAN7MuJVNfFLea5NJXzRmKP5ESiZ35jen8N8d0uJI5hNPdy\nJw3yvNn3iGB6dCpnGpHCqB8gHqrxeoxY/4M5jcZB9i2JVM6RwD8TxyXqbie2hzOBq4kDvRAHmFcR\nB/1nEP/DXYhjCnnql9Vq3kkFPyPrK0RHYPs0vQXljGyb0Az43XEMkRd9kOjBP078mL9EBPjJxI9v\nNbE7flVm2VuIvOuXiPH796XPG6zndBcxQuMmIpWyCzF0r+5rRFBfkj77e8RB23qP/hjiHIG7UnmX\n0jgA3KqsI4H/R+yu35jK/cSga2Jgr7Hod6PpvVY90BuI4yHXEaNTrmsx771EEP4icRD1zcQxjucy\nn3Muse5ajd0fqg716VVE7/4IYu/hEWJdbZRjWYgc+rlESuPQFvX4A3FsaDrxv1hBHBc6mvi/ZV1I\nNHDZzkQfccxhd2KEzu+JvcM5beo3mMG+S/O6an5/MF8gRkBdQzRQNxEdA0kjcBAx0me8m09nT/ja\njkirjWRvQ5K6ajox9nkqcZByEZHCGe/m07mAP5kYd3/mUDNK0lg2g0gfPUmklc5iYvRi5zPwfILh\nmkWk1u6gMWpHkiRJkqQu6+oFpHbbbbf+xYsXd7MKkjQeLSZGUxXS7SsG9vf3j5eLK459PT099PT0\ndLsaE4brs7Ncn50zadIkGEb8dhy+JFWEAV+SKsKAP4HUarVuV2FCcX12luuz+8zhS9I4Yw5fktSW\nAV+SKsKAL0kVYcCXpIow4EtSRRjwJakiDPiSVBEGfEmqCAO+JFWEAV+SKsKAL0kVYcCXpIow4EtS\nRRjwJakiDPiSVBEGfEmqiKndrsCsWTB3LsyZE3/rjyLT06fDpG7fykWSxrhuh8n+Vav6WbkSVq6E\nJ5+k5fOhpvv6RtZgzJ0Ls2fDZPd3JI0Dw73jVdcDfiducbhuXbEGotX0009H0M/TQLR7b9q0DqwV\nSWqj0gG/E9avh1Wr8jUQ7d7baKOR723MmGGKStLgDPhjQH9/7CmMdG/juefy71EMNj1njikqaaIy\n4E8gzzwz+B5F3sZj9erOHBDfaKNurw1JzQz4GqCvL1JUI93bmDp15CmqmTNNUUmdZMBXx/X3w9q1\n+RuIwd5bt64zKaopU7q9RqSxwYCvMevZZyP4j2RvY9Wq2FMo2mA0v7fxxt1eG9LIGfA1ofX1wVNP\njSw9tXJlpJbyNhazZ8dxkMEeM2d6YFzdYcCXcqinqPI0DqtXRyMz2GPNmjjLu12jMJLH1K6fB6+x\nyoAvjbK+vgj67RqF5sfTT+efd8qU9nsXI2lMNt7YA+njmQFfmkD6+2N4bpHGpMhj/fqRNxqDPWbM\nMNVVtrEa8E8Gjgb6gDuA44B1mfcN+FIXPPfc8BuLofZS1q4dmOrqdMNiqmtsBvz5wI+AlxFB/mLg\n+8C5mXkM+NIEM5xUV5HH1Knl7JnMnDl+Ul3DDfhltpVPAs8CM4H16e9DJZYnaQyYPLkRRDutvz/O\n6yjSQDz2WP69lIme6ioz4P8B+CywHFgD/AC4rsTyJE1wkyZFumj6dNhss85//rPPFjuwvnIlPPxw\nvnnXro2g34nGY7jKDPgLgA8SqZ2VwKXAUcAFJZYpScM2bVrjPIxO6+sr1pg89RSsWNH69eEqM+C/\nGrgReCJNfxt4HU0Bv6en5/nntVqNWq1WYpUkqTsmT46T+WbPLr5sb28vvb29z08vWjS8OpR5eGI3\nIri/BlgLnAP8HPjPzDwetJWkgoZ70LbMQwiLgfOAXwJL0mtnlFieJKmNbg9AsocvSQWNxR6+JGkM\nMeBLUkUY8CWpIgz4klQRBnxJqggDviRVhAFfkirCgC9JFZEn4L8j52uSpDEsz5latwF75HhtODzT\nVpIKKuMGKAcBbwK2AU7PfPgmxI1NJEnjSLuA/zBwC3BI+lsP+E8CHyq5XpKkDsuzSzCN8nr0pnQk\nqaAy72m7F/Bx4s5V9fn7gRcVLUyS1D15Woh7iVsV3krcjLxuRQfKt4cvSQWV2cP/H+Cqoh8sSRpb\n8rQQnwamEPekXZd5/dYOlG8PX5IKGm4PP88CvUTOvtl+RQtrwYAvSQWVGfDLZMCXpILKvMXhVsBZ\nwNVpemfg3UULkiR1V56Afw5wDTAvTd+HJ15J0riTJ+BvDlxMY0jms8BzpdVIklSKPAF/NbBZZnpv\nYGU51ZEklSXPOPwTgSuJM2tvBLYADi2zUpKkzst7lHcasFOa/146d20dR+lIUkFlDsucCryZDa+l\n87mihbVgwJekgsq8tMKVwBrgDqCvaAGSpLEhT8DfBti17IpIksqVZ5TONcBfll0RSVK58vTwbwQu\nJxqH+sHafmBOWZWSJHVenqT/MuBg4Fd0PofvQVtJKqjMa+ksB+7EA7aSNK7lSeksBa4nboLyTHqt\nU8MyJUmjJG/AXwpslB6SpHEoTw5oV2BJSeWbw5ekgso80/anwMbA2cAFdPbCaQZ8SSqozIO2+wBH\nAdsT97G9CDigaEGSpO4q0kJMBd4CnE708icD/wxc1maZTYEzgZcTB3qPBxZl3reHL0kFlZnS2Q14\nF7AQuJYI4LcSd8BaRPT8B3MucAPwdaLBmMXAlJABX5IKKjPg30Dc0/ZS4iJqWccA5w2y3FzgNuI6\n+oMx4EtSQWUGfIiDtjum53mvh7878FXgLmIv4RbgBODpzDwGfEkqqMzLI9eI1Mzv0vT2wLFEz3+o\nz34l8AHgF8DngY8AH8vO1NPT0yioVqNWq+WokiRVR29vL729vSP+nDwtxK3AkUTPHuLOV98kgnk7\nWwE3AS9M0/sQAX9hZh57+JJUUJnDMqfSCPYAvybfnsGjwANEAwHw58Q1eSRJXZCnhTgbWA98I81/\nFNFQHJ9j2d2IUT0bAfcDx+EoHUkakTIP2k4H3g+8Pk3/BPgysK5oYS0Y8CWpoLJH6ZTFgC9JBZUx\nSueONu/1431uJWlcaddCzB9i2WUdKN8eviQVVHZKZytgL+KuV78gRuB0ggFfkgoqc1jm3wA/B94G\nHArcDLy7aEGSpO7K00L8Gngt8ESa3ow4oWqnQZfIzx6+JBVUZg9/BbA6M706vSZJGkfytBDnA7sA\nV6TpQ4hbHi5h5Dczt4cvSQWVefG0+9OjHpmvSM9nFy1MktQ9nnglSeNMmT381xC3Mpyfmd8TryRp\nnMk7SufDwK+Icfh1yzpQvj18SSqozB7+74HvFP1gSdLYkqeFOAA4HLgOeCa91g98uwPl28OXpILK\n7OEfC7wkzZtN6XQi4EuSRkmeFuJe4KU0hmV2kj18SSqozDNtbwR2LvrBkqSxJU8LcQ+wAFhK4y5X\nnRqWaQ9fkgoq8/LI8wd5fVnRwlow4EtSQWWmdJYB2wH7pedPDacgSVJ35QncPcCriJE6OwHbAJfQ\nuKn5SNjDl6SCyuzhv5W4QuZTafohYJOiBUmSuitPwF/HwPH3s0qqiySpRHkC/qXAV4FNgfcCPwTO\nLLNSkqTOy5sDOiA9AH4AXNuh8s3hS1JBZQ7LLJMBX5IKKvOg7duB+4AngVXp8WTRgiRJ3ZWnhbgf\nWAjcXUL59vAlqaAye/iPUk6wlySNojyXR/4lcDHw33T+eviSpFGSJ+DPBdbQGKVTZ8CXpHHEUTqS\nNM6UccerLzZN9xP3t70e+GnRgiRJ3dUu4N/Chne52gz4DHHxtNPKqpQkqfOGk9KZAdwE7N6B8k3p\nSFJBZQ7LbLaGcu5vK0kqUZ5ROlnTgKOBB0uoiySpRO0C/mo27MmvAW4A3legjCnEWP4Hgb8qVDtJ\nUse0C/izO1TGCcBdeNMUSeqq4eTwi9gWeBNx/fxuj/mXpEorO+CfBpzEwDtmSZK6oOhB2yIWAo8D\ntwG1wWbq6el5/nmtVqNWG3RWSaqk3t5eent7R/w5edMs+wIvBs4GtiDy+0uHWOZTwF8DzwHTgTnA\nZcAxmXkchy9JBZV5x6se4FXAS4CdgG2IM21fX6CcNwAfZsNROgZ8SSqozBOv3gocAjyVph9ieCNu\njOyS1EV5cvjrGHjQddYwyrkhPSRJXZKnh38p8FVgU+C9wA+JYZaSpHEkbw7oABo3QPkBcG2HyjeH\nL0kFlXnQtkwGfEkqqMyDtqtaPB4ELgdeVLRASVJ35Dlo+wXgAeCiNH0EsIA4oerrtDmpSpI0duTZ\nJVgC7Nr02u3EDVAWA7uNoHxTOpJUUJkpnaeBw9O8k4HDgLXpPaO1JI0TeVqIBURaZ+80vQj4IHEC\n1qsY2Q3N7eFLUkGO0pGkihhuwM9z0HYG8G5gZ+IiaHXHFy1MktQ9eXL45wNbAgcSl0fYjrj9oSRp\nHMmzS1AfkVMfrTONyNvv1YHyTelIUkFljtJ5Jv1dCbyCuKbOFkULkiR1V54c/hnAC4BTgO8QNz/5\naJmVkiR13lABfzJxKYU/EPn7F5ZeI0lSKfLkgG4hxtuXwRy+JBVU5jj8TwMrgItp3PUKotc/UgZ8\nSSqozIC/jNaXUOhEeseAL0kFeaatJFVEmcMyZxGjcr6WpncEFhYtSJLUXXkC/tnEWPzXpemHgU+W\nViNJUinyBPwFwKk0TsB6qs28kqQxKk/AX0dcQK1uQXpNkjSO5DnTtge4GtgWuBB4PfCu8qokSSpD\n3qO8m9O4AcrNwO87VL6jdCSpoDKvh38lcQPzKzB/L0njVp4c/meBfYG7gG8BhzLwRiiSpHGgyC7B\nVGA/4D3EzVDmdKB8UzqSVFCZKR2IUToHA4cBrwTOLVqQJKm78rQQlxB3t7oa+CZxmeS+DpVvD1+S\nCirzWjoHAtcC69P0vsARwPuLFtaCAV+SCiozpXM1kcY5kkjpLAUuK1qQJKm72gX8lxBB/nBi3P2l\nRItSK79akqROa7dL0Ad8F/gAsDy9tpTO3ubQlI4kFVTG5ZHfBqwBfgx8Bdh/OAVIksaGPAF8NnAI\nkd7ZDzgPuBy4pgPl28OXpIJG645XLyDOtD0CeGPRwlow4EtSQWP1FofbEXsEf0rcF/cM4PTM+wZ8\nSSporAb8rdLjdiI1dAvwFuDu9L4BX5IKKvOetiPxKBHsAVYTgX5eyWVKklooO+BnzQf2IK6nL0ka\nZXkvnjZSs4lLK59A9PSf19PT8/zzWq1GrVYbpSpJ0vjQ29tLb2/viD9nNMbVTyNO4LoK+HzTe+bw\nJamgsXrQdhJxKeUngA+1eN+AL0kFjdWAvw9xpu4SYlgmwMnEBdnAgC9JhY3VgD8UA74kFTRWh2VK\nksYIA74kVYQBX5IqwoAvSRVhwJekijDgS1JFGPAlqSIM+JJUEQZ8SaoIA74kVYQBX5IqwoAvSRVh\nwJekijDgS1JFGPAlqSIM+JJUEQZ8SaoIA74kVYQBX5IqwoAvSRVhwJekijDgS1JFGPAlqSIM+JJU\nEQZ8SaoIA74kVYQBX5IqwoAvSRVhwJekijDgS1JFGPAlqSIM+JJUEQZ8SaoIA74kVYQBX5IqwoAv\nSRVRdsA/ELgHuA/4p5LLkiS1UWbAnwJ8iQj6OwNHAi8rsbzK6+3t7XYVJhTXZ2e5PruvzIC/J/Ab\nYBnwLPBN4JASy6s8f1Cd5frsLNdn95UZ8LcBHshMP5hekyR1QZkBv7/Ez5YkFTSpxM/eG+ghcvgA\nJwN9wKmZeX4DLCixDpI0Ed0PvLjblciaSlRqPrARcDsetJWkCesg4F6iJ39yl+siSZIkqVPynIB1\nenp/MbDHKNVrvBpqfdaAlcBt6XHKqNVs/Pk68BhwR5t53DbzG2p91nDbzGs74HrgTuBXwN8PMt+Y\n2j6nECmd+cA0Wufy3wR8Pz3fC1g0WpUbh/KszxrwnVGt1fi1L/EjGSxAuW0WM9T6rOG2mddWwO7p\n+WwiPT6i2Dka19LJcwLWwcC56fnNwKbAlqNQt/Eo7wltZY7Amkh+Avyxzftum8UMtT7BbTOvR4kO\nHcBq4G5gXtM8hbbP0Qj4eU7AajXPtiXXa7zKsz77gdcRu3jfJy5toeFx2+wst83hmU/sOd3c9Hqh\n7XNqx6u1obwnYDW3+p641Vqe9XIrkf97mhgp9d/ATmVWaoJz2+wct83iZgPfAk4gevrNcm+fo9HD\nf4j4B9dtR7RC7ebZNr2mDeVZn6uIHxTAVUSu/wXlV21CctvsLLfNYqYBlwHfIBrHZmNu+8xzAlb2\nwMPeeGCsnTzrc0sarf6eRL5fg5tPvoO2bpv5zGfw9em2md8k4DzgtDbzjMnts9UJWO9Lj7ovpfcX\nA68c1dqNP0Otz/cTw7huB24kNgS1dhHwMPAMkQs9HrfNkRhqfbpt5rcPcTma22kMYz0It09JkiRJ\nkiRJkiRJkiRNFJ8hLmWwGPg2MHeQ+Qa7kOE7iIuerWfgyJk9aYy4WQIcnqMuZxEjdZYAl7epiyRp\nCDXg7KbX/oLGiamfTo9m7S5k+FLiDOLrGRjwZ2Q+dytgRfqcdjbJPP8sI7jC6GicaSuNBZOIC3sd\nmHntHcTZnqq2VpciuJYYAw9x/ZpW16dpdyHDe4Bft1hmTeZzZxCXil6fpg8gzk24BbgEmJVeX5X+\nTkrLrBji+wzKgK+q6Af+FvgcsDFxfZJPAn83zM8bjetQaXQMdfXO42mczZqV50KGrexJpHvuBP4h\nvbY58C/A/sCriKD/D5llzgYeAXYFzsxRRksGfFXJncCVwEeAjxHXJzmF6MHdSlxqFmIX/cfEj+4W\n4LXp9Rqxl3AFcbboTOB7xK78HcBh5X8FddAiIpf+NeJ/X8+tH5CZ51+Is4YvbLH8cC+i93Pg5USq\n5wtETn5v4sqhN6Y6HANsn1nmOOLSyEtSnSTlMJPY3V4CfAo4Kr2+KXG5ipnEbvPG6fUdgV+k5zXi\naoU7pOm3A2dkPntOWZVWqd7Ahjl8gHcBPwOmD7Lc3sDVmemT2fAOdM05/GY/BF4NLKR1o9Lsz4Dv\n5phPUvIJ4CQikN9Bo2e3DHgJ0eM6n2gUbqNxSdoa8KPM5+wILCUO6O1TfrVVkhobBvwDiT3Czdss\nl+dChtcTKZq6+TTSgTsAy4mOwhbA74AF6b1ZxPYF8OL0dxLwH8C/tf02kgb4OHAi8EsaP6qsHuD/\np+dTiANyEIHhyqZ5NyX2EnqBj3a2mholbyDuxZt1HxGA652BL6fX5xFpvLpWFzIEeCuR319D3Lmq\nPjjgr4l04G1Eaic7iGC/9Nri9FhIBPmfEp2PJameM4b1LaWKqgf8TwJfzLxevwH052gcMDuOxqiK\nGgMD/tY0dvcXEmOkpTHLkQaqqn5i1/jzRM9pMvBb4uDdl4mbThxD5GhXNy1X9wriBJ0+4sDe/ym9\n1pIkSZIkSZIkSZIkSZIkSZIkSZIkqXP+F5G96ken973AAAAAAElFTkSuQmCC\n", 90 | "text/plain": [ 91 | "" 92 | ] 93 | }, 94 | "metadata": {}, 95 | "output_type": "display_data" 96 | } 97 | ], 98 | "source": [ 99 | "chart_unemployment()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": true 107 | }, 108 | "outputs": [], 109 | "source": [] 110 | } 111 | ], 112 | "metadata": { 113 | "kernelspec": { 114 | "display_name": "Python 2", 115 | "language": "python", 116 | "name": "python2" 117 | }, 118 | "language_info": { 119 | "codemirror_mode": { 120 | "name": "ipython", 121 | "version": 2 122 | }, 123 | "file_extension": ".py", 124 | "mimetype": "text/x-python", 125 | "name": "python", 126 | "nbconvert_exporter": "python", 127 | "pygments_lexer": "ipython2", 128 | "version": "2.7.6" 129 | } 130 | }, 131 | "nbformat": 4, 132 | "nbformat_minor": 0 133 | } 134 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 10 - PyGal.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 25, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pygal\n", 12 | "from csv import DictReader" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 54, 18 | "metadata": { 19 | "collapsed": true 20 | }, 21 | "outputs": [], 22 | "source": [ 23 | "%matplotlib inline\n", 24 | "#%config InlineBackend.figure_format = 'svg'\n", 25 | "from IPython.display import set_matplotlib_formats\n", 26 | "set_matplotlib_formats('pdf', 'svg')\n", 27 | "\n", 28 | "from IPython.display import SVG, HTML, Image" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 71, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "html_pygal = u\"\"\"\n", 40 | "
\n", 41 | " \n", 42 | "
\n", 43 | "\"\"\"" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": 28, 49 | "metadata": { 50 | "collapsed": true 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "def load_imf_unemployment():\n", 55 | " rdr = DictReader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n", 56 | " return [r for r in rdr if r.get('Subject Descriptor') and\n", 57 | " 'Unemployment' in r.get('Subject Descriptor')]" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 29, 63 | "metadata": { 64 | "collapsed": true 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "def load_iso_codes():\n", 69 | " iso_dict = {}\n", 70 | " for row in DictReader(open('../../data/iso-2.csv', 'rb')):\n", 71 | " iso_dict[row.get('Name')] = row.get('Code')\n", 72 | " return iso_dict" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 30, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "def load_and_merge_data():\n", 84 | " iso_dict = load_iso_codes()\n", 85 | " imf_data = load_imf_unemployment()\n", 86 | " for d in imf_data:\n", 87 | " d['iso'] = iso_dict[d.get('Country')]\n", 88 | " return imf_data" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 78, 94 | "metadata": { 95 | "collapsed": true 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "def draw_unemployment():\n", 100 | " imf_data = load_and_merge_data()\n", 101 | " worldmap_data = {}\n", 102 | " for row in imf_data:\n", 103 | " worldmap_data[row.get('iso').lower()] = float(row.get('2015'))\n", 104 | " worldmap_chart = pygal.Worldmap()\n", 105 | " worldmap_chart.title = '2015 Unemployment'\n", 106 | " worldmap_chart.add('Total Unemployment (%)', worldmap_data)\n", 107 | " worldmap_chart.render_to_png('../../../static/test.png')" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 79, 113 | "metadata": { 114 | "collapsed": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "draw_unemployment()" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "metadata": { 125 | "collapsed": true 126 | }, 127 | "outputs": [], 128 | "source": [] 129 | } 130 | ], 131 | "metadata": { 132 | "kernelspec": { 133 | "display_name": "Python 2", 134 | "language": "python", 135 | "name": "python2" 136 | }, 137 | "language_info": { 138 | "codemirror_mode": { 139 | "name": "ipython", 140 | "version": 2 141 | }, 142 | "file_extension": ".py", 143 | "mimetype": "text/x-python", 144 | "name": "python", 145 | "nbconvert_exporter": "python", 146 | "pygments_lexer": "ipython2", 147 | "version": "2.7.6" 148 | } 149 | }, 150 | "nbformat": 4, 151 | "nbformat_minor": 0 152 | } 153 | -------------------------------------------------------------------------------- /books/pycon-2015-examples/Lesson 14 - Pandas Data Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "def get_wb_unemployment_data():\n", 23 | " return pd.read_excel('../../data/wb/unemployment.xlsx',\n", 24 | " index_col=0, header=0, skiprows=[1])" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": true 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "def get_wb_market_data():\n", 36 | " return pd.read_excel('../../data/wb/stock_market.xlsx',\n", 37 | " index_col=0, header=0, skiprows=[1])" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": true 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "def get_metadata():\n", 49 | " return pd.read_excel('../../data/wb/stock_metadata.xlsx',\n", 50 | " sheetname=1, index_col=0, header=0)" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": { 57 | "collapsed": true 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "def get_gdp():\n", 62 | " return pd.read_excel('../../data/wb/GDP_Current_Dollars.xlsx',\n", 63 | " index_col=3, header=0)" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": true 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "def clean_market_columns():\n", 75 | " market_data = get_wb_market_data()\n", 76 | " market_data.columns = market_data.columns.map(lambda x: x[:3])\n", 77 | " market_data.index = market_data.index.map(lambda x: '{} SM'.format(x))\n", 78 | " return market_data.transpose()" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": true 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "def update_gdp_cols(colname):\n", 90 | " if colname[:4].isdigit():\n", 91 | " return '{} GDP'.format(colname[:4])\n", 92 | " return colname" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": true 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "def join_market_and_gdp():\n", 104 | " market_data = clean_market_columns()\n", 105 | " gdp_data = get_gdp()\n", 106 | " gdp_data.columns = gdp_data.columns.map(update_gdp_cols)\n", 107 | " return market_data.join(gdp_data)" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": { 114 | "collapsed": true 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "def just_spain():\n", 119 | " joined = join_market_and_gdp()\n", 120 | " spain = joined.loc['ESP'].copy()\n", 121 | " spain_gdp = spain[spain.index.map(lambda x: 'GDP' in x)]\n", 122 | " spain_stock = spain[spain.index.map(lambda x: 'SM' in x)]\n", 123 | " spain_gdp.index = spain_gdp.index.map(lambda x: x.rstrip(' GDP'))\n", 124 | " spain_stock.index = spain_stock.index.map(lambda x: x.rstrip(' SM'))\n", 125 | " spain_stock.name = 'Stocks'\n", 126 | " spain_gdp.name = 'GDP'\n", 127 | " return spain_stock, spain_gdp" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": null, 133 | "metadata": { 134 | "collapsed": true 135 | }, 136 | "outputs": [], 137 | "source": [ 138 | "def merge_back():\n", 139 | " spain_stock, spain_gdp = just_spain()\n", 140 | " return pd.concat([spain_stock, spain_gdp], axis=1)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": 22, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "spain = merge_back()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": 23, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "\n", 166 | "Index: 21 entries, 1990 to 2015\n", 167 | "Data columns (total 2 columns):\n", 168 | "Stocks 20 non-null object\n", 169 | "GDP 12 non-null object\n", 170 | "dtypes: object(2)\n", 171 | "memory usage: 504.0+ bytes\n" 172 | ] 173 | } 174 | ], 175 | "source": [ 176 | "spain.info()" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 24, 182 | "metadata": { 183 | "collapsed": false 184 | }, 185 | "outputs": [ 186 | { 187 | "data": { 188 | "text/html": [ 189 | "
\n", 190 | "\n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | "
\n", 199 | "
" 200 | ], 201 | "text/plain": [ 202 | "Empty DataFrame\n", 203 | "Columns: []\n", 204 | "Index: []" 205 | ] 206 | }, 207 | "execution_count": 24, 208 | "metadata": {}, 209 | "output_type": "execute_result" 210 | } 211 | ], 212 | "source": [ 213 | "spain.corr()" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": 26, 219 | "metadata": { 220 | "collapsed": false 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "spain = spain.convert_objects(convert_dates=True, convert_numeric=True)" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": 27, 230 | "metadata": { 231 | "collapsed": false 232 | }, 233 | "outputs": [ 234 | { 235 | "data": { 236 | "text/html": [ 237 | "
\n", 238 | "\n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | " \n", 250 | " \n", 251 | " \n", 252 | " \n", 253 | " \n", 254 | " \n", 255 | " \n", 256 | " \n", 257 | " \n", 258 | "
StocksGDP
Stocks1.0000000.566494
GDP0.5664941.000000
\n", 259 | "
" 260 | ], 261 | "text/plain": [ 262 | " Stocks GDP\n", 263 | "Stocks 1.000000 0.566494\n", 264 | "GDP 0.566494 1.000000" 265 | ] 266 | }, 267 | "execution_count": 27, 268 | "metadata": {}, 269 | "output_type": "execute_result" 270 | } 271 | ], 272 | "source": [ 273 | "spain.corr()" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": 28, 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "outputs": [ 283 | { 284 | "data": { 285 | "text/html": [ 286 | "
\n", 287 | "\n", 288 | " \n", 289 | " \n", 290 | " \n", 291 | " \n", 292 | " \n", 293 | " \n", 294 | " \n", 295 | " \n", 296 | " \n", 297 | " \n", 298 | " \n", 299 | " \n", 300 | " \n", 301 | " \n", 302 | " \n", 303 | " \n", 304 | " \n", 305 | " \n", 306 | " \n", 307 | "
StocksGDP
Stocks6.478334e+023.424133e+12
GDP3.424133e+121.231342e+23
\n", 308 | "
" 309 | ], 310 | "text/plain": [ 311 | " Stocks GDP\n", 312 | "Stocks 6.478334e+02 3.424133e+12\n", 313 | "GDP 3.424133e+12 1.231342e+23" 314 | ] 315 | }, 316 | "execution_count": 28, 317 | "metadata": {}, 318 | "output_type": "execute_result" 319 | } 320 | ], 321 | "source": [ 322 | "spain.cov()" 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": 29, 328 | "metadata": { 329 | "collapsed": true 330 | }, 331 | "outputs": [], 332 | "source": [ 333 | "spain.to_csv('../../data/spain_export.csv')" 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "metadata": { 340 | "collapsed": true 341 | }, 342 | "outputs": [], 343 | "source": [] 344 | } 345 | ], 346 | "metadata": { 347 | "kernelspec": { 348 | "display_name": "Python 2", 349 | "language": "python", 350 | "name": "python2" 351 | }, 352 | "language_info": { 353 | "codemirror_mode": { 354 | "name": "ipython", 355 | "version": 2 356 | }, 357 | "file_extension": ".py", 358 | "mimetype": "text/x-python", 359 | "name": "python", 360 | "nbconvert_exporter": "python", 361 | "pygments_lexer": "ipython2", 362 | "version": "2.7.6" 363 | } 364 | }, 365 | "nbformat": 4, 366 | "nbformat_minor": 0 367 | } 368 | -------------------------------------------------------------------------------- /books/pydata-examples/01 - Simple formats with Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', encoding='ISO-8859-1')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "df.head()" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "df.dtypes" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', thousands=',', encoding='ISO-8859-1')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "df.dtypes" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "df['2017'].value_counts()" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', thousands=',', na_values=['n/a'], \n", 89 | " encoding='ISO-8859-1')" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "df.dtypes" 101 | ] 102 | }, 103 | { 104 | "cell_type": "code", 105 | "execution_count": null, 106 | "metadata": { 107 | "collapsed": false 108 | }, 109 | "outputs": [], 110 | "source": [ 111 | "df['Country'].value_counts()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "df[df['Country'] == 'Germany']" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "### Show values and single out a particular subject descriptor or units or use both: & (and join) | (or join)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": null, 135 | "metadata": { 136 | "collapsed": false 137 | }, 138 | "outputs": [], 139 | "source": [ 140 | "df[(df['Country'] == 'Germany') & (df['Units'].isin(['National currency']))]" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": true 148 | }, 149 | "outputs": [], 150 | "source": [] 151 | } 152 | ], 153 | "metadata": { 154 | "kernelspec": { 155 | "display_name": "Python 3", 156 | "language": "python", 157 | "name": "python3" 158 | }, 159 | "language_info": { 160 | "codemirror_mode": { 161 | "name": "ipython", 162 | "version": 3 163 | }, 164 | "file_extension": ".py", 165 | "mimetype": "text/x-python", 166 | "name": "python", 167 | "nbconvert_exporter": "python", 168 | "pygments_lexer": "ipython3", 169 | "version": "3.4.3" 170 | } 171 | }, 172 | "nbformat": 4, 173 | "nbformat_minor": 0 174 | } 175 | -------------------------------------------------------------------------------- /books/pydata-examples/02 - More complex formats with Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": false 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import pdftables" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "pdftables.get_tables?" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "pdftables.page_to_tables?" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "my_pdf = open('../../data/WEF_GlobalCompetitivenessReport_2014-15.pdf', 'rb')" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "chart_page = pdftables.get_pdf_page(my_pdf, 29)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "chart_page" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": true 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "table = pdftables.page_to_tables(chart_page)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "table" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "table[0]" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": true 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "titles = zip(table[0][0], table[0][1])[:5]" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "titles" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "titles = [''.join([title[0], title[1]]) for title in titles]\n", 144 | "print(titles)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": true 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "all_rows = []\n", 156 | "for row_data in table[0][2:]:\n", 157 | " all_rows.extend([row_data[:5], row_data[5:]])" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "collapsed": false 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "all_rows" 169 | ] 170 | }, 171 | { 172 | "cell_type": "code", 173 | "execution_count": null, 174 | "metadata": { 175 | "collapsed": false 176 | }, 177 | "outputs": [], 178 | "source": [ 179 | "df = pd.DataFrame(all_rows, columns=titles)" 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "execution_count": null, 185 | "metadata": { 186 | "collapsed": false 187 | }, 188 | "outputs": [], 189 | "source": [ 190 | "df.head()" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "### Your turn: Try with page 30, 31 or 32 " 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": true 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "new_chart_page = pdftables.get_pdf_page(my_pdf, 30)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "code", 213 | "execution_count": null, 214 | "metadata": { 215 | "collapsed": true 216 | }, 217 | "outputs": [], 218 | "source": [ 219 | "table = pdftables.page_to_tables(new_chart_page)" 220 | ] 221 | }, 222 | { 223 | "cell_type": "code", 224 | "execution_count": null, 225 | "metadata": { 226 | "collapsed": false 227 | }, 228 | "outputs": [], 229 | "source": [ 230 | "table[0]" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "collapsed": true 238 | }, 239 | "outputs": [], 240 | "source": [] 241 | } 242 | ], 243 | "metadata": { 244 | "kernelspec": { 245 | "display_name": "Python 3", 246 | "language": "python", 247 | "name": "python3" 248 | }, 249 | "language_info": { 250 | "codemirror_mode": { 251 | "name": "ipython", 252 | "version": 3 253 | }, 254 | "file_extension": ".py", 255 | "mimetype": "text/x-python", 256 | "name": "python", 257 | "nbconvert_exporter": "python", 258 | "pygments_lexer": "ipython3", 259 | "version": "3.4.3" 260 | } 261 | }, 262 | "nbformat": 4, 263 | "nbformat_minor": 0 264 | } 265 | -------------------------------------------------------------------------------- /books/pydata-examples/04 - APIs.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "def get_story(story_id):\n", 23 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 24 | " resp = requests.get(url)\n", 25 | " return resp.json()" 26 | ] 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "collapsed": true 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "def get_top_stories():\n", 37 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 38 | " resp = requests.get(url)\n", 39 | " all_stories = [get_story(sid) for sid in resp.json()[:10]]\n", 40 | " return all_stories" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": { 47 | "collapsed": false 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "import urllib3.contrib.pyopenssl\n", 52 | "urllib3.contrib.pyopenssl.inject_into_urllib3()" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "metadata": { 59 | "collapsed": false 60 | }, 61 | "outputs": [], 62 | "source": [ 63 | "top_stories = get_top_stories()" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": { 70 | "collapsed": false 71 | }, 72 | "outputs": [], 73 | "source": [ 74 | "top_stories[:5]" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": true 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "MATCHING = (\n", 86 | " ('Python', '(p|P)ython'),\n", 87 | " ('Ruby', '(r|R)uby'),\n", 88 | " ('JavaScript', 'js|(J|j)ava(s|S)cript'),\n", 89 | " ('NodeJS', 'node(\\.?)(?:\\js|JS)'),\n", 90 | " ('Java', '(j|J)ava[^(S|s)cript]'),\n", 91 | " ('Objective-C', 'Obj(ective?)(?:\\ |-)(C|c)'),\n", 92 | " ('Go', '(g|G)o'),\n", 93 | " ('C++', '(c|C)(\\+)+')\n", 94 | ")" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": { 101 | "collapsed": true 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "import re" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": { 112 | "collapsed": true 113 | }, 114 | "outputs": [], 115 | "source": [ 116 | "def count_languages():\n", 117 | " stories = get_top_stories()\n", 118 | " final_tallies = {}\n", 119 | " for s in stories:\n", 120 | " long_string = u'{} {}'.format(s.get('title'), s.get('url'))\n", 121 | " for language, regex in dict(MATCHING).items():\n", 122 | " if re.search(regex, long_string):\n", 123 | " if language not in final_tallies.keys():\n", 124 | " final_tallies[language] = {\n", 125 | " 'score': s.get('score'),\n", 126 | " 'descendants': s.get('descendants')}\n", 127 | " else:\n", 128 | " final_tallies[language]['score'] += s.get('score')\n", 129 | " final_tallies[language][\n", 130 | " 'descendants'] += s.get('descendants')\n", 131 | " return final_tallies" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "count_languages()" 143 | ] 144 | }, 145 | { 146 | "cell_type": "markdown", 147 | "metadata": {}, 148 | "source": [ 149 | "### Can you add the URL to this? What about a good Regex for matching Julia?\n", 150 | "\n", 151 | "Again, try it yourself first! :)" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "%load solutions/regex_solution.py" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": true 170 | }, 171 | "outputs": [], 172 | "source": [] 173 | } 174 | ], 175 | "metadata": { 176 | "kernelspec": { 177 | "display_name": "Python 3", 178 | "language": "python", 179 | "name": "python3" 180 | }, 181 | "language_info": { 182 | "codemirror_mode": { 183 | "name": "ipython", 184 | "version": 3 185 | }, 186 | "file_extension": ".py", 187 | "mimetype": "text/x-python", 188 | "name": "python", 189 | "nbconvert_exporter": "python", 190 | "pygments_lexer": "ipython3", 191 | "version": "3.4.3" 192 | } 193 | }, 194 | "nbformat": 4, 195 | "nbformat_minor": 0 196 | } 197 | -------------------------------------------------------------------------------- /books/pydata-examples/05 - Messy Data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "import numpy as np\n", 13 | "from datetime import datetime" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "weather = pd.read_csv('../../data/berlin_weather_oldest.csv')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "weather.dtypes" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": { 42 | "collapsed": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "weather.head()" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": true 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "weather = weather.applymap(lambda x: np.nan if x == -9999 else x)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": null, 63 | "metadata": { 64 | "collapsed": false 65 | }, 66 | "outputs": [], 67 | "source": [ 68 | "weather.head()" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": true 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "weather['DATE'] = weather['DATE'].map(lambda x: datetime.strptime(str(x), '%Y%m%d').date())" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "weather['DATE']" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "weather.notnull().head()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "weather.dropna()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": false 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "weather.dropna(how='all', axis=1)" 124 | ] 125 | }, 126 | { 127 | "cell_type": "code", 128 | "execution_count": null, 129 | "metadata": { 130 | "collapsed": false 131 | }, 132 | "outputs": [], 133 | "source": [ 134 | "weather.shape" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": { 141 | "collapsed": false 142 | }, 143 | "outputs": [], 144 | "source": [ 145 | "weather.dropna(thresh=weather.shape[0] * .1, axis=1)" 146 | ] 147 | }, 148 | { 149 | "cell_type": "code", 150 | "execution_count": null, 151 | "metadata": { 152 | "collapsed": true 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "weather = weather.set_index(pd.DatetimeIndex(weather['DATE']))" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": false 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "weather.head()" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "metadata": { 174 | "collapsed": false 175 | }, 176 | "outputs": [], 177 | "source": [ 178 | "weather.index.duplicated()" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "metadata": { 185 | "collapsed": false 186 | }, 187 | "outputs": [], 188 | "source": [ 189 | "weather['STATION_NAME'].value_counts()" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "metadata": { 196 | "collapsed": false 197 | }, 198 | "outputs": [], 199 | "source": [ 200 | "weather.index.drop_duplicates().sort_values()" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": { 207 | "collapsed": false 208 | }, 209 | "outputs": [], 210 | "source": [ 211 | "weather.groupby('STATION_NAME').resample('D').mean().head()" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": null, 217 | "metadata": { 218 | "collapsed": true 219 | }, 220 | "outputs": [], 221 | "source": [ 222 | "rainy = weather[weather.PRCP >= weather.PRCP.std() * 3 + weather.PRCP.mean()]" 223 | ] 224 | }, 225 | { 226 | "cell_type": "code", 227 | "execution_count": null, 228 | "metadata": { 229 | "collapsed": false 230 | }, 231 | "outputs": [], 232 | "source": [ 233 | "rainy['month'] = rainy.index.month" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": null, 239 | "metadata": { 240 | "collapsed": false 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "%pylab inline" 245 | ] 246 | }, 247 | { 248 | "cell_type": "code", 249 | "execution_count": null, 250 | "metadata": { 251 | "collapsed": false 252 | }, 253 | "outputs": [], 254 | "source": [ 255 | "rainy.groupby('month')['PRCP'].sum().plot()" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "### Find the rainiest day in the past century\n", 263 | "\n", 264 | "Try it yourself first!" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": { 271 | "collapsed": false 272 | }, 273 | "outputs": [], 274 | "source": [ 275 | "%load solutions/weather_solution_rainyday.py" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": null, 281 | "metadata": { 282 | "collapsed": true 283 | }, 284 | "outputs": [], 285 | "source": [] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "metadata": {}, 290 | "source": [ 291 | "### Bonus: Fix station names so they are properly matching & chart precipitation over time with one of them" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": { 298 | "collapsed": false 299 | }, 300 | "outputs": [], 301 | "source": [ 302 | "%load solutions/weather_solution_fix_stations.py" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": true 310 | }, 311 | "outputs": [], 312 | "source": [] 313 | } 314 | ], 315 | "metadata": { 316 | "kernelspec": { 317 | "display_name": "Python 3", 318 | "language": "python", 319 | "name": "python3" 320 | }, 321 | "language_info": { 322 | "codemirror_mode": { 323 | "name": "ipython", 324 | "version": 3 325 | }, 326 | "file_extension": ".py", 327 | "mimetype": "text/x-python", 328 | "name": "python", 329 | "nbconvert_exporter": "python", 330 | "pygments_lexer": "ipython3", 331 | "version": "3.4.3" 332 | } 333 | }, 334 | "nbformat": 4, 335 | "nbformat_minor": 0 336 | } 337 | -------------------------------------------------------------------------------- /books/pydata-examples/06 - Data Analysis with Pandas.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import requests" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def get_story(story_id):\n", 34 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 35 | " resp = requests.get(url)\n", 36 | " return resp.json()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "def get_top_stories():\n", 48 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 49 | " resp = requests.get(url)\n", 50 | " all_stories = [get_story(sid) for sid in resp.json()[:50]]\n", 51 | " return all_stories" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": true 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "df = pd.read_json('../../data/hn.json')" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": false 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "# df = pd.DataFrame(get_top_stories())" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": false 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "df.head()" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "df = df.set_index('id')" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "df.head()" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "df.by.value_counts()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "df.type.value_counts()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "df.corr()" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": { 146 | "collapsed": false 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "df.cov()" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "df.score.min()" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "df.score.max()" 173 | ] 174 | }, 175 | { 176 | "cell_type": "code", 177 | "execution_count": null, 178 | "metadata": { 179 | "collapsed": false 180 | }, 181 | "outputs": [], 182 | "source": [ 183 | "%pylab inline" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "metadata": { 190 | "collapsed": false 191 | }, 192 | "outputs": [], 193 | "source": [ 194 | "df.plot(x='time', y='score', marker='.')" 195 | ] 196 | }, 197 | { 198 | "cell_type": "code", 199 | "execution_count": null, 200 | "metadata": { 201 | "collapsed": false 202 | }, 203 | "outputs": [], 204 | "source": [ 205 | "df.sort_values('time').plot(x='time', y='score', marker='.')" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": true 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "df['time'] = pd.to_datetime(df['time'],unit='s')" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": false 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "df.time" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "metadata": { 234 | "collapsed": false 235 | }, 236 | "outputs": [], 237 | "source": [ 238 | "df['hour'] = df['time'].map(lambda x: x.hour)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "df['hour'].value_counts()" 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "metadata": { 256 | "collapsed": false 257 | }, 258 | "outputs": [], 259 | "source": [ 260 | "df.corr()" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "collapsed": false 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "df.plot(x='time', y='score')" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": null, 277 | "metadata": { 278 | "collapsed": false 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "df.sort_values('hour').plot(x='hour', y='score')" 283 | ] 284 | }, 285 | { 286 | "cell_type": "markdown", 287 | "metadata": {}, 288 | "source": [ 289 | "## We'll be focusing on groupby next, but here's a teaser!" 290 | ] 291 | }, 292 | { 293 | "cell_type": "code", 294 | "execution_count": null, 295 | "metadata": { 296 | "collapsed": false 297 | }, 298 | "outputs": [], 299 | "source": [ 300 | "df['hourly_mean'] = df.groupby('hour')['score'].transform(mean)" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "df.sort_values('hour').plot(x='hour', y='hourly_mean')" 312 | ] 313 | }, 314 | { 315 | "cell_type": "markdown", 316 | "metadata": {}, 317 | "source": [ 318 | "### Can you chart num of descendants across hours? How about length of kids? \n", 319 | "(Solution uses groups using max and median, again, we will return to those next!)" 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "execution_count": null, 325 | "metadata": { 326 | "collapsed": false 327 | }, 328 | "outputs": [], 329 | "source": [ 330 | "%load solutions/data_analysis_solution.py" 331 | ] 332 | }, 333 | { 334 | "cell_type": "code", 335 | "execution_count": null, 336 | "metadata": { 337 | "collapsed": true 338 | }, 339 | "outputs": [], 340 | "source": [] 341 | }, 342 | { 343 | "cell_type": "code", 344 | "execution_count": null, 345 | "metadata": { 346 | "collapsed": true 347 | }, 348 | "outputs": [], 349 | "source": [] 350 | } 351 | ], 352 | "metadata": { 353 | "kernelspec": { 354 | "display_name": "Python 3", 355 | "language": "python", 356 | "name": "python3" 357 | }, 358 | "language_info": { 359 | "codemirror_mode": { 360 | "name": "ipython", 361 | "version": 3 362 | }, 363 | "file_extension": ".py", 364 | "mimetype": "text/x-python", 365 | "name": "python", 366 | "nbconvert_exporter": "python", 367 | "pygments_lexer": "ipython3", 368 | "version": "3.4.3" 369 | } 370 | }, 371 | "nbformat": 4, 372 | "nbformat_minor": 0 373 | } 374 | -------------------------------------------------------------------------------- /books/pydata-examples/07 - Split Apply Combine.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "import requests" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "def get_story(story_id):\n", 34 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 35 | " resp = requests.get(url)\n", 36 | " return resp.json()" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": true 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "def get_top_stories():\n", 48 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 49 | " resp = requests.get(url)\n", 50 | " all_stories = [get_story(sid) for sid in resp.json()[:50]]\n", 51 | " return all_stories" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "collapsed": false 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "df = pd.read_json('../../data/hn.json')" 63 | ] 64 | }, 65 | { 66 | "cell_type": "code", 67 | "execution_count": null, 68 | "metadata": { 69 | "collapsed": true 70 | }, 71 | "outputs": [], 72 | "source": [ 73 | "# df = pd.DataFrame(get_top_stories())" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": null, 79 | "metadata": { 80 | "collapsed": true 81 | }, 82 | "outputs": [], 83 | "source": [ 84 | "df['time'] = pd.to_datetime(df['time'],unit='s')" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": { 91 | "collapsed": true 92 | }, 93 | "outputs": [], 94 | "source": [ 95 | "df['hour'] = df['time'].map(lambda x: x.hour)" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": null, 101 | "metadata": { 102 | "collapsed": false 103 | }, 104 | "outputs": [], 105 | "source": [ 106 | "df['day_of_week'] = df['time'].map(lambda x: x.weekday())" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false 114 | }, 115 | "outputs": [], 116 | "source": [ 117 | "df.head()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "metadata": { 124 | "collapsed": false 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "df.groupby('hour')" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": false 136 | }, 137 | "outputs": [], 138 | "source": [ 139 | "for group, items in df.groupby('hour'):\n", 140 | " print(group, items)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "code", 145 | "execution_count": null, 146 | "metadata": { 147 | "collapsed": false 148 | }, 149 | "outputs": [], 150 | "source": [ 151 | "df.groupby('hour').sum()" 152 | ] 153 | }, 154 | { 155 | "cell_type": "code", 156 | "execution_count": null, 157 | "metadata": { 158 | "collapsed": false 159 | }, 160 | "outputs": [], 161 | "source": [ 162 | "df.groupby('hour')['score'].sum()" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "%pylab inline" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "df.groupby('hour')['score'].sum().plot()" 185 | ] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "execution_count": null, 190 | "metadata": { 191 | "collapsed": true 192 | }, 193 | "outputs": [], 194 | "source": [ 195 | "df['median_hourly_score'] = df.groupby('hour')['score'].transform('median')" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": { 202 | "collapsed": false 203 | }, 204 | "outputs": [], 205 | "source": [ 206 | "df.head()" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "### Can you find most popular users or days of the week? Can you add those to the DF using transform?" 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "collapsed": false 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "%load solutions/sac_solution.py" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "metadata": { 231 | "collapsed": true 232 | }, 233 | "outputs": [], 234 | "source": [] 235 | } 236 | ], 237 | "metadata": { 238 | "kernelspec": { 239 | "display_name": "Python 3", 240 | "language": "python", 241 | "name": "python3" 242 | }, 243 | "language_info": { 244 | "codemirror_mode": { 245 | "name": "ipython", 246 | "version": 3 247 | }, 248 | "file_extension": ".py", 249 | "mimetype": "text/x-python", 250 | "name": "python", 251 | "nbconvert_exporter": "python", 252 | "pygments_lexer": "ipython3", 253 | "version": "3.4.3" 254 | } 255 | }, 256 | "nbformat": 4, 257 | "nbformat_minor": 0 258 | } 259 | -------------------------------------------------------------------------------- /books/pydata-examples/08 - Bokeh for Data Visualization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd\n", 12 | "from bokeh.plotting import figure, show\n", 13 | "from bokeh.io import output_notebook\n", 14 | "from bokeh.palettes import Spectral6" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', thousands=',', na_values=['n/a'],\n", 26 | " encoding='ISO-8859-1')" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "df.head()" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "df['Subject Descriptor'].value_counts()" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "def mscatter(chart, x, y, typestr):\n", 60 | " chart.scatter(x, y, marker=typestr, line_color=\"#6666ee\",\n", 61 | " fill_color=\"#ee6666\", fill_alpha=0.5, size=12)" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": null, 67 | "metadata": { 68 | "collapsed": true 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "def draw_scatter(df):\n", 73 | " chart = figure(title=\"IMF Unemployment\")\n", 74 | " output_notebook()\n", 75 | " for year in ['2013', '2014', '2015', '2016']:\n", 76 | " df[year].map(lambda x: mscatter(chart, int(year), x, 'circle'))\n", 77 | " return chart" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "chart = draw_scatter(df[df['Subject Descriptor'] == 'Unemployment rate'])\n", 89 | "show(chart)" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": { 96 | "collapsed": false 97 | }, 98 | "outputs": [], 99 | "source": [ 100 | "grouped_mean_df = df[df['Subject Descriptor'] == 'Unemployment rate'].groupby(\n", 101 | " 'Country')[['2013', '2014', '2015', '2016']].mean()" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "metadata": { 108 | "collapsed": false 109 | }, 110 | "outputs": [], 111 | "source": [ 112 | "grouped_mean_df.head()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "code", 117 | "execution_count": null, 118 | "metadata": { 119 | "collapsed": true 120 | }, 121 | "outputs": [], 122 | "source": [ 123 | "def draw_line(df):\n", 124 | " chart = figure(title=\"IMF Unemployment\")\n", 125 | " output_notebook()\n", 126 | " color_count = 0\n", 127 | " for country, data in df.iterrows():\n", 128 | " chart.line(data.index.values, data.values, legend=country, line_color=Spectral6[color_count])\n", 129 | " if color_count+1 != len(Spectral6):\n", 130 | " color_count += 1\n", 131 | " else:\n", 132 | " color_count = 0\n", 133 | " return chart" 134 | ] 135 | }, 136 | { 137 | "cell_type": "code", 138 | "execution_count": null, 139 | "metadata": { 140 | "collapsed": false 141 | }, 142 | "outputs": [], 143 | "source": [ 144 | "chart = draw_line(grouped_mean_df)" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "metadata": { 151 | "collapsed": false 152 | }, 153 | "outputs": [], 154 | "source": [ 155 | "show(chart)" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "### Can you rewrite this to show a reasonable number of countries (focus on between four to seven)" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": { 169 | "collapsed": false 170 | }, 171 | "outputs": [], 172 | "source": [ 173 | "%load solutions/visualization_solution.py" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": true 181 | }, 182 | "outputs": [], 183 | "source": [] 184 | } 185 | ], 186 | "metadata": { 187 | "kernelspec": { 188 | "display_name": "Python 3", 189 | "language": "python", 190 | "name": "python3" 191 | }, 192 | "language_info": { 193 | "codemirror_mode": { 194 | "name": "ipython", 195 | "version": 3 196 | }, 197 | "file_extension": ".py", 198 | "mimetype": "text/x-python", 199 | "name": "python", 200 | "nbconvert_exporter": "python", 201 | "pygments_lexer": "ipython3", 202 | "version": "3.4.3" 203 | } 204 | }, 205 | "nbformat": 4, 206 | "nbformat_minor": 0 207 | } 208 | -------------------------------------------------------------------------------- /books/pydata-examples/Introduction to Joins.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import pandas as pd" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "employees = pd.read_csv('../../data/employees.csv')" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": false 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "employees" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": true 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "employees = employees.set_index('id')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "employees" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": true 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "titles = pd.read_csv('../../data/titles.csv', index_col=0)" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "titles" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "employees.join(titles, rsuffix='_title')" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "employees.join(titles, on=['title_id'], rsuffix='_title')" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "empl_with_title = employees.join(titles, on=['title_id'], rsuffix='_title')" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": false 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "departments = pd.read_csv('../../data/departments.csv', index_col=0)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": false 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "departments" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "empl_with_title.join(departments, on=['department_id'], rsuffix='_dept')" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "empl_with_title.join(departments, on=['department_id'], how='inner', rsuffix='_dept')" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "empl_with_title.join(departments, on=['department_id'], how='right', rsuffix='_dept')" 166 | ] 167 | } 168 | ], 169 | "metadata": { 170 | "kernelspec": { 171 | "display_name": "Python 3", 172 | "language": "python", 173 | "name": "python3" 174 | }, 175 | "language_info": { 176 | "codemirror_mode": { 177 | "name": "ipython", 178 | "version": 3 179 | }, 180 | "file_extension": ".py", 181 | "mimetype": "text/x-python", 182 | "name": "python", 183 | "nbconvert_exporter": "python", 184 | "pygments_lexer": "ipython3", 185 | "version": "3.4.3" 186 | } 187 | }, 188 | "nbformat": 4, 189 | "nbformat_minor": 0 190 | } 191 | -------------------------------------------------------------------------------- /books/pydata-examples/Introduction to Regex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import re" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": { 18 | "collapsed": true 19 | }, 20 | "outputs": [], 21 | "source": [ 22 | "word = r'\\w+'" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": null, 28 | "metadata": { 29 | "collapsed": true 30 | }, 31 | "outputs": [], 32 | "source": [ 33 | "sentence = 'I am testing with Regex.'" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "collapsed": false 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "search_result = re.search(word, sentence)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": null, 50 | "metadata": { 51 | "collapsed": false 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "search_result" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "metadata": { 62 | "collapsed": false 63 | }, 64 | "outputs": [], 65 | "source": [ 66 | "search_result.group()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": null, 72 | "metadata": { 73 | "collapsed": false 74 | }, 75 | "outputs": [], 76 | "source": [ 77 | "match_result = re.match(word, sentence)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": null, 83 | "metadata": { 84 | "collapsed": false 85 | }, 86 | "outputs": [], 87 | "source": [ 88 | "match_result" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "match_result.group()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "re.findall(word, sentence)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "metadata": { 117 | "collapsed": true 118 | }, 119 | "outputs": [], 120 | "source": [ 121 | "capitalized_word = r'[A-Z]\\w+'" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "collapsed": true 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "search_result = re.search(capitalized_word, sentence)" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": null, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "search_result.group()" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": true 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "match_result = re.match(capitalized_word, sentence)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "match_result" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": true 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "sentence_with_digits = 'The airport is 4,300 meters away, but I still hear 10 planes at night.'" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": null, 182 | "metadata": { 183 | "collapsed": true 184 | }, 185 | "outputs": [], 186 | "source": [ 187 | "numbers = r'\\d+'" 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "execution_count": null, 193 | "metadata": { 194 | "collapsed": false 195 | }, 196 | "outputs": [], 197 | "source": [ 198 | "re.findall(numbers, sentence_with_digits)" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "metadata": { 205 | "collapsed": true 206 | }, 207 | "outputs": [], 208 | "source": [ 209 | "thousands_numbers = '(\\d+,\\d+|\\d+)'" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "re.findall(thousands_numbers, sentence_with_digits)" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": true 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "city_state = '(?P[\\w\\s]+), (?P[A-Z]{2})'" 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": null, 237 | "metadata": { 238 | "collapsed": true 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "address = 'My House, 123 Main Street, Los Angeles, CA 90013'" 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": null, 248 | "metadata": { 249 | "collapsed": false 250 | }, 251 | "outputs": [], 252 | "source": [ 253 | "match = re.finditer(city_state, address)" 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "execution_count": null, 259 | "metadata": { 260 | "collapsed": false 261 | }, 262 | "outputs": [], 263 | "source": [ 264 | "for city in match:\n", 265 | " print(city.group('city'))" 266 | ] 267 | }, 268 | { 269 | "cell_type": "markdown", 270 | "metadata": { 271 | "collapsed": true 272 | }, 273 | "source": [ 274 | "### Write the regex to match the street name and number" 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": null, 280 | "metadata": { 281 | "collapsed": true 282 | }, 283 | "outputs": [], 284 | "source": [] 285 | } 286 | ], 287 | "metadata": { 288 | "kernelspec": { 289 | "display_name": "Python 3", 290 | "language": "python", 291 | "name": "python3" 292 | }, 293 | "language_info": { 294 | "codemirror_mode": { 295 | "name": "ipython", 296 | "version": 3 297 | }, 298 | "file_extension": ".py", 299 | "mimetype": "text/x-python", 300 | "name": "python", 301 | "nbconvert_exporter": "python", 302 | "pygments_lexer": "ipython3", 303 | "version": "3.4.3" 304 | } 305 | }, 306 | "nbformat": 4, 307 | "nbformat_minor": 0 308 | } 309 | -------------------------------------------------------------------------------- /books/pydata-examples/Update HN data.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "collapsed": true 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import requests\n", 12 | "from multiprocessing import Process, Manager\n", 13 | "import json\n", 14 | "import pandas as pd" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": { 21 | "collapsed": true 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "def get_story(story_id, stories):\n", 26 | " url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n", 27 | " resp = requests.get(url)\n", 28 | " stories.append(resp.json())" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": { 35 | "collapsed": true 36 | }, 37 | "outputs": [], 38 | "source": [ 39 | "def get_top_stories():\n", 40 | " manager = Manager()\n", 41 | " stories = manager.list()\n", 42 | " url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n", 43 | " ids = requests.get(url)\n", 44 | " processes = [Process(target=get_story, args=(sid, stories))\n", 45 | " for sid in ids.json()]\n", 46 | " for p in processes:\n", 47 | " p.start()\n", 48 | " for p in processes:\n", 49 | " p.join()\n", 50 | " return stories" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": 4, 56 | "metadata": { 57 | "collapsed": false 58 | }, 59 | "outputs": [], 60 | "source": [ 61 | "df = pd.read_json('../../data/hn.json')" 62 | ] 63 | }, 64 | { 65 | "cell_type": "code", 66 | "execution_count": 5, 67 | "metadata": { 68 | "collapsed": false 69 | }, 70 | "outputs": [], 71 | "source": [ 72 | "df['id'] = df.index.astype(int)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 6, 78 | "metadata": { 79 | "collapsed": false 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "top_stories = get_top_stories()" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": 7, 89 | "metadata": { 90 | "collapsed": false 91 | }, 92 | "outputs": [], 93 | "source": [ 94 | "new_df = pd.DataFrame([t for t in top_stories])" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": 8, 100 | "metadata": { 101 | "collapsed": false 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "joined = df.append(new_df)" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 9, 111 | "metadata": { 112 | "collapsed": false 113 | }, 114 | "outputs": [ 115 | { 116 | "data": { 117 | "text/plain": [ 118 | "Int64Index([11693122, 11693168, 11693169, 11693184, 11693185, 11693193,\n", 119 | " 11693217, 11693232, 11693240, 11693296,\n", 120 | " ...\n", 121 | " 11722573, 11707449, 11711640, 11713423, 11708769, 11718494,\n", 122 | " 11717710, 11717907, 11720912, 11707805],\n", 123 | " dtype='int64', name='id', length=1174)" 124 | ] 125 | }, 126 | "execution_count": 9, 127 | "metadata": {}, 128 | "output_type": "execute_result" 129 | } 130 | ], 131 | "source": [ 132 | "joined.set_index('id').index" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 10, 138 | "metadata": { 139 | "collapsed": false 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "df = joined.drop_duplicates(subset='id', keep='last')" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 11, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "df.set_index('id').to_json('../../data/hn.json')" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": true 162 | }, 163 | "outputs": [], 164 | "source": [] 165 | } 166 | ], 167 | "metadata": { 168 | "kernelspec": { 169 | "display_name": "Python 3", 170 | "language": "python", 171 | "name": "python3" 172 | }, 173 | "language_info": { 174 | "codemirror_mode": { 175 | "name": "ipython", 176 | "version": 3 177 | }, 178 | "file_extension": ".py", 179 | "mimetype": "text/x-python", 180 | "name": "python", 181 | "nbconvert_exporter": "python", 182 | "pygments_lexer": "ipython3", 183 | "version": "3.4.3" 184 | } 185 | }, 186 | "nbformat": 4, 187 | "nbformat_minor": 0 188 | } 189 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/data_analysis_solution.py: -------------------------------------------------------------------------------- 1 | import ast 2 | 3 | def get_list_len(line): 4 | if not line: 5 | return 0 6 | elif isinstance(line, list): 7 | return len(line) 8 | return len(ast.literal_eval(line)) 9 | 10 | 11 | df['hourly_desc_max'] = df.groupby('hour')['descendants'].transform(max) 12 | 13 | df.sort_values('hour').plot(x='hour', y='hourly_desc_max') 14 | 15 | df['num_kids'] = df['kids'].map(get_list_len) 16 | 17 | df['hourly_kids_median'] = df.groupby('hour')['num_kids'].transform(median) 18 | 19 | df.sort_values('hour').plot(x='hour', y='hourly_kids_median') 20 | 21 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/regex_solution.py: -------------------------------------------------------------------------------- 1 | 2 | MATCHING += (('Julia', '(J|j)ulia'), ) 3 | 4 | def count_languages(): 5 | stories = get_top_stories() 6 | final_tallies = {} 7 | for s in stories: 8 | long_string = u'{} {}'.format(s.get('title'), s.get('url')) 9 | for language, regex in dict(MATCHING).items(): 10 | if re.search(regex, long_string): 11 | if language not in final_tallies.keys(): 12 | final_tallies[language] = { 13 | 'score': s.get('score'), 14 | 'descendants': s.get('descendants'), 15 | 'urls': [s.get('url')]} 16 | else: 17 | final_tallies[language]['score'] += s.get('score') 18 | final_tallies[language][ 19 | 'descendants'] += s.get('descendants') 20 | final_tallies[language]['urls'].append(s.get('url')) 21 | return final_tallies 22 | 23 | count_languages() 24 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/sac_solution.py: -------------------------------------------------------------------------------- 1 | df['score_sum_dow'] = df.groupby('day_of_week')['score'].transform(sum) 2 | 3 | df.sort_values('day_of_week').plot(x='day_of_week', y='score_sum_dow') 4 | 5 | 6 | df['score_sum_user'] = df.groupby('by')['score'].transform(sum) 7 | 8 | df.sort_values('score_sum_user', ascending=False).groupby('by')[['by', 'score_sum_user']].head(1) 9 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/stocks_solution.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | 3 | ibm = data.DataReader('IBM', 'yahoo', datetime(2007,1, 1), datetime(2016, 1, 1)) 4 | ibm['Stock'] = 'IBM' 5 | 6 | merged = merged.append(ibm) 7 | 8 | 9 | lowest_ibm = merged[merged['Stock'] == 'IBM'].sort_values('Close').head(1) 10 | lowest_fb = merged[merged['Stock'] == 'FB'].sort_values('Close').iloc[0] 11 | lowest_goog = merged[merged['Stock'] == 'GOOGL'].sort_values('Close').ix[0] 12 | 13 | (lowest_ibm.index, lowest_fb.name, lowest_goog.name) 14 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/visualization_solution.py: -------------------------------------------------------------------------------- 1 | print(grouped_mean_df.index) 2 | 3 | subset = grouped_mean_df.loc[['United Kingdom', 'Germany', 'Greece', 'United States', 'Czech Republic']] 4 | 5 | chart = draw_line(subset) 6 | 7 | show(chart) 8 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/weather_solution_fix_stations.py: -------------------------------------------------------------------------------- 1 | weather['STATION_NAME'] = weather['STATION_NAME'].map(lambda x: x.replace('BERLIN ', '')) 2 | 3 | weather[weather['STATION_NAME'] == 'TEMPELHOF GM']['PRCP'].plot() 4 | 5 | weather[weather['STATION_NAME'] == 'TEMPELHOF GM'].reset_index().plot('DATE', 'PRCP', style='g--') 6 | -------------------------------------------------------------------------------- /books/pydata-examples/solutions/weather_solution_rainyday.py: -------------------------------------------------------------------------------- 1 | rainy[rainy['PRCP'] == rainy['PRCP'].max()] 2 | 3 | # Check: 4 | # (DE) https://meteocb.wordpress.com/2014/09/19/08-08-1978-als-das-wasser-kam-und-der-osten-baden-ging/ 5 | # or (ENG) http://dailyiowan.lib.uiowa.edu/DI/1948/di1948-08-14.pdf to read about 6 | # that day! 7 | -------------------------------------------------------------------------------- /conda_requirements.txt: -------------------------------------------------------------------------------- 1 | bokeh==0.11.1 2 | certifi==2016.2.28 3 | cffi==1.6.0 4 | cryptography==1.3.2 5 | cycler==0.10.0 6 | decorator==4.0.9 7 | enum34==1.1.6 8 | futures==3.0.5 9 | idna==2.1 10 | ipaddress==1.0.16 11 | ipykernel==4.3.1 12 | ipython==4.2.0 13 | ipython_genutils==0.1.0 14 | ipywidgets 15 | jdcal==1.2 16 | Jinja2==2.8 17 | jsonschema==2.5.1 18 | jupyter==1.0.0 19 | jupyter_client==4.2.2 20 | jupyter_console==4.1.1 21 | lxml==3.6.0 22 | MarkupSafe==0.23 23 | matplotlib==1.5.1 24 | mistune==0.7.2 25 | nbconvert==4.2.0 26 | nbformat==4.0.1 27 | ndg_httpsclient 28 | notebook==4.2.0 29 | numpy==1.11.0 30 | pandas==0.18.1 31 | pandas-datareader==0.2.1 32 | pathlib2==2.1.0 33 | pexpect==4.0.1 34 | pickleshare==0.7.2 35 | ptyprocess==0.5.1 36 | pyasn1==0.1.9 37 | pycparser==2.14 38 | Pygments==2.1.3 39 | pyparsing==2.1.4 40 | python-dateutil==2.5.3 41 | pytz==2016.4 42 | PyYAML==3.11 43 | pyzmq==15.2.0 44 | qtconsole==4.2.1 45 | requests==2.10.0 46 | requests-file==1.4 47 | simplegeneric==0.8.1 48 | singledispatch==3.4.0.3 49 | six==1.10.0 50 | terminado==0.6 51 | tornado==4.3 52 | traitlets==4.2.1 53 | xlrd==0.9.4 54 | -------------------------------------------------------------------------------- /data/WEF_GlobalCompetitivenessReport_2014-15.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/WEF_GlobalCompetitivenessReport_2014-15.pdf -------------------------------------------------------------------------------- /data/comments.json: -------------------------------------------------------------------------------- 1 | [{"parent": 9933918, "text": "If you want an excellent book about Commodore:

http://www.amazon.com/On-Edge-Spectacular-Rise-Commodore/dp/...", "id": 9934512, "time": 1437638136, "type": "comment", "by": "walkingolof"}, {"kids": [9934328], "parent": 9933918, "text": "In 4th grade a friend got an Amiga. He boasted that it had 4096 colors, and we ridiculed him mercilessly. There aren't that many colors! Can you name them?

We had our Apple IIs and TI-99/4As with their 16 colors or less and couldn't imagine anything greater.

Then we went to his house and saw Defender of the Crown, and we were humbled.", "id": 9934310, "time": 1437633136, "type": "comment", "by": "nsxwolf"}, {"parent": 9933918, "text": "I am so going to this! My 13yo son is going to be so excited to attend.", "id": 9934097, "time": 1437628297, "type": "comment", "by": "shawndumas"}, {"parent": 9933918, "text": "One can read "The Future Was Here", to learn more about the platform and its contributors. https://mitpress.mit.edu/books/future-was-here - there's also an accompanying website: http://amiga.filfre.net

Note that the Platform Studies series (http://www.platformstudies.com) has exciting upcoming stuff.", "id": 9934191, "time": 1437630223, "type": "comment", "by": "foobarge"}, {"parent": 9934113, "text": "I was a power plant operator on a nuclear submarine. We were shown a classified film that about the SL-1 incident as part of our training. Recalling that film still gives me chills.

A lot was learned about reactor safety as result of that disaster.", "id": 9934316, "time": 1437633252, "type": "comment", "by": "MurMan"}, {"kids": [9934364, 9934367], "parent": 9934113, "text": "Does the Demon Core count as a reactor?", "id": 9934317, "time": 1437633253, "type": "comment", "by": "anabis"}, {"kids": [9934377, 9934390, 9934304, 9934355, 9934334], "parent": 9934113, "text": ""...killed by the only fatal reactor accident in US history."

Is that really true?

'\u201cNobody died at Three Mile Island\u201d \u2014 unless you count babies.'

http://www.counterpunch.org/2015/03/27/cancer-and-infant-mor...", "id": 9934251, "time": 1437631496, "type": "comment", "by": "retrogradeorbit"}, {"parent": 9934113, "text": "That part of Idaho is interesting to drive through. It's near craters of the moon national park. The Idaho Nuclear Laboratory is pretty big. I remember it being nearly 50 miles along one side of the road with all barbed wire fences. It's pretty lonely and creepy out there. There was one cliff in Atomic City that was covered with graffiti of large random numbers. It turned out that the numbers were from high school graduating classes but the numbers weren't in order so it seemed like some random code.", "id": 9934273, "time": 1437632179, "type": "comment", "by": "abecode"}, {"parent": 9934113, "text": "The SL-1 story is well known in the industry. The Atomic Energy Commission has a video.[1] SL-1 was an experimental reactor at the National Reactor Testing Station in Idaho, an 8000 square mile AEC reservation. It's an isolated area, and the test reactors were many miles apart, just in case.

[1] https://www.youtube.com/watch?v=qOt7xDKxmCM", "id": 9934314, "time": 1437633227, "type": "comment", "by": "Animats"}, {"parent": 9934113, "text": "I find the term "accident" a somewhat meaningless distinction. Millions of people have gotten cancer (statistically) from the radiation in the atmosphere as a result of nuclear tests. Does it really matter that the tests were intentional or not?", "id": 9934417, "time": 1437636062, "type": "comment", "by": "JohnGB"}, {"parent": 9934113, "text": "For what it's worth, I like the original title on the article better.", "id": 9934444, "time": 1437636442, "type": "comment", "by": "hudibras"}, {"parent": 9933757, "text": "I'd feel smug about my choice of cell phone but Samsung sponsored the Olympics.", "id": 9934491, "time": 1437637613, "type": "comment", "by": "billpg"}, {"kids": [9934464, 9934447], "parent": 9933757, "text": "This is why large companies should be more regulated. They should not be able to put pressure on labels or producers and get all the benefits. \nWhat Apple did by killing this startup is not ethical. Plus, by killing it, they tried to get a monopole. Microsoft got so many fines at the time because they had the monopole on PCs. If a large company tries to destroy startups just because they feel threatened, they should be penalised the same way Microsoft did (With huge fines) or I think of another way: forcing large companies to work with startups instead of shutting them down.", "id": 9934360, "time": 1437634765, "type": "comment", "by": "JohnyLy"}, {"parent": 9933757, "text": "Hint: when you start a service solving a very generic problem, you have a big chance of being outmaneuvered by a competitor.

Don't cry if you fail.", "id": 9934494, "time": 1437637717, "type": "comment", "by": "amelius"}, {"kids": [9934384], "parent": 9933757, "text": "I kept waiting for the payoff, but the article never gave proof that Steve Jobs was behind it. Sounds to me like what happened is that Steve when negotiating with Warner Brothers said something like, "How can you charge us those rates for your music when I have a whole list of places online that have it for free?" At which point Warner said, "We'll take care of those. So the rates can stand."", "id": 9934365, "time": 1437634880, "type": "comment", "by": "dangero"}, {"parent": 9933757, "text": "This article reminds me of a lot of what Dalton Caldwell spoke about in his talk about why not to start a music startup at a Startup School in the last several years.

This business seems to have been run and have proceeded horribly, for the exact reasons that Caldwell and everyone else with experience in that area seems to be extremely familiar with.

Is there anything legitimately interesting to the "Apple" and "Steve Jobs" parts of this story other than the usual clickbait?", "id": 9934394, "time": 1437635446, "type": "comment", "by": "npp"}, {"kids": [9934403, 9934402], "parent": 9933757, "text": "I read this half asleep and a little inebriated but it seemed to evolve into some kind of bizarre self-aggrandizing pitch on how this Kasian Franks is a super-human with deep hard-to-duplicate knowledge and insights spanning multiple important and lucrative industries. Almost some kind of cred piece bolstered by his proxy encounter with Steve Jobs. Weird.", "id": 9934371, "time": 1437634947, "type": "comment", "by": "dxhdr"}, {"parent": 9933757, "text": "I actually like the second part of the article a lot more intriguing where he talks about his and the company's personal background. It's these stories that inspire to start a company.", "id": 9934385, "time": 1437635102, "type": "comment", "by": "Libermentix"}, {"parent": 9933757, "text": "TL;DR Music technology startups suck.", "dead": true, "id": 9934414, "time": 1437635958, "type": "comment", "by": "brianmcconnell"}, {"parent": 9933757, "text": "Whoever uses "analyzation" publicly deserves to have a failed start up.", "id": 9934435, "time": 1437636366, "type": "comment", "by": "jevgeni"}, {"kids": [9934471], "parent": 9933757, "text": "TL;DR Music technology startups suck.", "id": 9934413, "time": 1437635956, "type": "comment", "by": "brianmcconnell"}] -------------------------------------------------------------------------------- /data/departments.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,sales 3 | 2,product 4 | 3,technical support 5 | 4,human resources 6 | -------------------------------------------------------------------------------- /data/employees.csv: -------------------------------------------------------------------------------- 1 | id,name,title_id,years,department_id 2 | 2,chris,1,15,2 3 | 6,sandy,2,5,7 4 | 7,christine,1,5,4 5 | 12,aaron,3,3,3 6 | -------------------------------------------------------------------------------- /data/imf_indicators.tsv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/imf_indicators.tsv -------------------------------------------------------------------------------- /data/iso-2.csv: -------------------------------------------------------------------------------- 1 | Name,Code 2 | Afghanistan,AF 3 | Åland Islands,AX 4 | Albania,AL 5 | Algeria,DZ 6 | American Samoa,AS 7 | Andorra,AD 8 | Angola,AO 9 | Anguilla,AI 10 | Antarctica,AQ 11 | Antigua and Barbuda,AG 12 | Argentina,AR 13 | Armenia,AM 14 | Aruba,AW 15 | Australia,AU 16 | Austria,AT 17 | Azerbaijan,AZ 18 | Bahamas,BS 19 | Bahrain,BH 20 | Bangladesh,BD 21 | Barbados,BB 22 | Belarus,BY 23 | Belgium,BE 24 | Belize,BZ 25 | Benin,BJ 26 | Bermuda,BM 27 | Bhutan,BT 28 | "Bolivia, Plurinational State of",BO 29 | "Bonaire, Sint Eustatius and Saba",BQ 30 | Bosnia and Herzegovina,BA 31 | Botswana,BW 32 | Bouvet Island,BV 33 | Brazil,BR 34 | British Indian Ocean Territory,IO 35 | Brunei Darussalam,BN 36 | Bulgaria,BG 37 | Burkina Faso,BF 38 | Burundi,BI 39 | Cambodia,KH 40 | Cameroon,CM 41 | Canada,CA 42 | Cape Verde,CV 43 | Cayman Islands,KY 44 | Central African Republic,CF 45 | Chad,TD 46 | Chile,CL 47 | China,CN 48 | Christmas Island,CX 49 | Cocos (Keeling) Islands,CC 50 | Colombia,CO 51 | Comoros,KM 52 | Congo,CG 53 | "Congo, the Democratic Republic of the",CD 54 | Cook Islands,CK 55 | Costa Rica,CR 56 | Côte d'Ivoire,CI 57 | Croatia,HR 58 | Cuba,CU 59 | Curaçao,CW 60 | Cyprus,CY 61 | Czech Republic,CZ 62 | Denmark,DK 63 | Djibouti,DJ 64 | Dominica,DM 65 | Dominican Republic,DO 66 | Ecuador,EC 67 | Egypt,EG 68 | El Salvador,SV 69 | Equatorial Guinea,GQ 70 | Eritrea,ER 71 | Estonia,EE 72 | Ethiopia,ET 73 | Falkland Islands (Malvinas),FK 74 | Faroe Islands,FO 75 | Fiji,FJ 76 | Finland,FI 77 | France,FR 78 | French Guiana,GF 79 | French Polynesia,PF 80 | French Southern Territories,TF 81 | Gabon,GA 82 | Gambia,GM 83 | Georgia,GE 84 | Germany,DE 85 | Ghana,GH 86 | Gibraltar,GI 87 | Greece,GR 88 | Greenland,GL 89 | Grenada,GD 90 | Guadeloupe,GP 91 | Guam,GU 92 | Guatemala,GT 93 | Guernsey,GG 94 | Guinea,GN 95 | Guinea-Bissau,GW 96 | Guyana,GY 97 | Haiti,HT 98 | Heard Island and McDonald Islands,HM 99 | Holy See (Vatican City State),VA 100 | Honduras,HN 101 | Hong Kong SAR,HK 102 | Hungary,HU 103 | Iceland,IS 104 | India,IN 105 | Indonesia,ID 106 | "Iran, Islamic Republic of",IR 107 | Iraq,IQ 108 | Ireland,IE 109 | Isle of Man,IM 110 | Israel,IL 111 | Italy,IT 112 | Jamaica,JM 113 | Japan,JP 114 | Jersey,JE 115 | Jordan,JO 116 | Kazakhstan,KZ 117 | Kenya,KE 118 | Kiribati,KI 119 | "Korea, Democratic People's Republic of",KP 120 | Korea,KR 121 | Kuwait,KW 122 | Kyrgyzstan,KG 123 | Lao People's Democratic Republic,LA 124 | Latvia,LV 125 | Lebanon,LB 126 | Lesotho,LS 127 | Liberia,LR 128 | Libya,LY 129 | Liechtenstein,LI 130 | Lithuania,LT 131 | Luxembourg,LU 132 | Macao,MO 133 | "Macedonia, the Former Yugoslav Republic of",MK 134 | Madagascar,MG 135 | Malawi,MW 136 | Malaysia,MY 137 | Maldives,MV 138 | Mali,ML 139 | Malta,MT 140 | Marshall Islands,MH 141 | Martinique,MQ 142 | Mauritania,MR 143 | Mauritius,MU 144 | Mayotte,YT 145 | Mexico,MX 146 | "Micronesia, Federated States of",FM 147 | "Moldova, Republic of",MD 148 | Monaco,MC 149 | Mongolia,MN 150 | Montenegro,ME 151 | Montserrat,MS 152 | Morocco,MA 153 | Mozambique,MZ 154 | Myanmar,MM 155 | Namibia,NA 156 | Nauru,NR 157 | Nepal,NP 158 | Netherlands,NL 159 | New Caledonia,NC 160 | New Zealand,NZ 161 | Nicaragua,NI 162 | Niger,NE 163 | Nigeria,NG 164 | Niue,NU 165 | Norfolk Island,NF 166 | Northern Mariana Islands,MP 167 | Norway,NO 168 | Oman,OM 169 | Pakistan,PK 170 | Palau,PW 171 | "Palestine, State of",PS 172 | Panama,PA 173 | Papua New Guinea,PG 174 | Paraguay,PY 175 | Peru,PE 176 | Philippines,PH 177 | Pitcairn,PN 178 | Poland,PL 179 | Portugal,PT 180 | Puerto Rico,PR 181 | Qatar,QA 182 | Réunion,RE 183 | Romania,RO 184 | Russian Federation,RU 185 | Rwanda,RW 186 | Saint Barthélemy,BL 187 | "Saint Helena, Ascension and Tristan da Cunha",SH 188 | Saint Kitts and Nevis,KN 189 | Saint Lucia,LC 190 | Saint Martin (French part),MF 191 | Saint Pierre and Miquelon,PM 192 | Saint Vincent and the Grenadines,VC 193 | Samoa,WS 194 | San Marino,SM 195 | Sao Tome and Principe,ST 196 | Saudi Arabia,SA 197 | Senegal,SN 198 | Serbia,RS 199 | Seychelles,SC 200 | Sierra Leone,SL 201 | Singapore,SG 202 | Sint Maarten (Dutch part),SX 203 | Slovak Republic,SK 204 | Slovenia,SI 205 | Solomon Islands,SB 206 | Somalia,SO 207 | South Africa,ZA 208 | South Georgia and the South Sandwich Islands,GS 209 | South Sudan,SS 210 | Spain,ES 211 | Sri Lanka,LK 212 | Sudan,SD 213 | Suriname,SR 214 | Svalbard and Jan Mayen,SJ 215 | Swaziland,SZ 216 | Sweden,SE 217 | Switzerland,CH 218 | Syrian Arab Republic,SY 219 | "Taiwan Province of China",TW 220 | Tajikistan,TJ 221 | "Tanzania, United Republic of",TZ 222 | Thailand,TH 223 | Timor-Leste,TL 224 | Togo,TG 225 | Tokelau,TK 226 | Tonga,TO 227 | Trinidad and Tobago,TT 228 | Tunisia,TN 229 | Turkey,TR 230 | Turkmenistan,TM 231 | Turks and Caicos Islands,TC 232 | Tuvalu,TV 233 | Uganda,UG 234 | Ukraine,UA 235 | United Arab Emirates,AE 236 | United Kingdom,GB 237 | United States,US 238 | United States Minor Outlying Islands,UM 239 | Uruguay,UY 240 | Uzbekistan,UZ 241 | Vanuatu,VU 242 | "Venezuela, Bolivarian Republic of",VE 243 | Viet Nam,VN 244 | "Virgin Islands, British",VG 245 | "Virgin Islands, U.S.",VI 246 | Wallis and Futuna,WF 247 | Western Sahara,EH 248 | Yemen,YE 249 | Zambia,ZM 250 | Zimbabwe,ZW 251 | -------------------------------------------------------------------------------- /data/titles.csv: -------------------------------------------------------------------------------- 1 | id,name 2 | 1,President 3 | 2,Lead Engineer 4 | 3,Intern 5 | -------------------------------------------------------------------------------- /data/topstories.json: -------------------------------------------------------------------------------- 1 | [{"kids": [9934443], "descendants": 1, "url": "http://www.nytimes.com/2015/07/22/books/dr-seuss-book-a-discovery-in-a-box-and-then-a-reconstruction.html", "text": "", "title": "Dr. Seuss Book: Yes, They Found It in a Box", "by": "pepys", "score": 27, "time": 1437609293, "type": "story", "id": 9933147}, {"kids": [9934496, 9934507], "descendants": 3, "url": "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=1&f=G&l=50&co1=AND&d=PTXT&s1=9087098.PN.&OS=PN/9087098&RS=PN/9087098", "text": "", "title": "Time-Series Database patented by GE", "by": "rodionos", "score": 14, "time": 1437633378, "type": "story", "id": 9934323}, {"title": "Bosun \u2013 open-source monitoring and alerting system by Stack Exchange", "url": "http://bosun.org/", "descendants": 0, "by": "aps-sids", "score": 15, "time": 1437627794, "type": "story", "id": 9934074}, {"kids": [9934493, 9934489, 9934500], "descendants": 3, "url": "https://en.wikipedia.org/wiki/ILoo", "text": "", "title": "iLoo", "by": "vezzy-fnord", "score": 28, "time": 1437614726, "type": "story", "id": 9933435}, {"kids": [9934039, 9933888, 9933800, 9933971, 9933779, 9934061, 9933880, 9934267, 9933916, 9933697, 9933968, 9934225, 9934064, 9933849, 9934058], "descendants": 75, "url": "https://arenavc.com/2015/07/airbnb-my-1-billion-lesson/", "text": "", "title": "Airbnb, My $1B Lesson", "by": "rvcamo", "score": 254, "time": 1437617429, "type": "story", "id": 9933600}, {"kids": [9933518, 9933645, 9933495, 9933461, 9933731, 9933507, 9933664, 9933470], "descendants": 43, "url": "https://lwn.net/SubscriberLink/651645/f0f5d5e6460edc60/", "text": "", "title": "rm -r fs/ext3", "by": "JoshTriplett", "score": 136, "time": 1437613222, "type": "story", "id": 9933354}, {"descendants": 0, "url": "https://ca.news.yahoo.com/blogs/dailybrew/surrey-bc-rolls-out-siri-for-cities-app-powered-185104504.html", "text": "", "title": "Surrey rolls out 'Siri for cities' app powered by IBM's Watson", "by": "hack4supper", "score": 5, "time": 1437623020, "type": "story", "id": 9933859}, {"kids": [9931667, 9933925, 9931668, 9932775, 9932646, 9932309, 9933995, 9932933, 9933433, 9931851, 9931734, 9931335], "descendants": 43, "url": "http://hapgood.us/2015/07/21/beyond-conversation/", "title": "Links as originally imagined were a separate layer of annotation on documents", "by": "jeremya", "score": 104, "time": 1437568493, "type": "story", "id": 9929187}, {"kids": [9931518, 9932569, 9932060, 9931620, 9931610, 9934139, 9933646, 9933530, 9931446], "descendants": 41, "url": "http://www.spiegel.de/international/europe/traveling-from-romania-to-portugal-in-an-illicit-van-a-1043797.html", "text": "", "title": "The Last European: Romanian Driver Navigates the Soul of the EU", "by": "lermontov", "score": 86, "time": 1437583450, "type": "story", "id": 9930713}, {"kids": [9933352, 9933368, 9933024, 9934400, 9932715, 9934214, 9933661, 9932630, 9933941, 9932754, 9933072, 9933074, 9932150], "descendants": 33, "url": "http://blog.comealive.io/Alive-Beta/", "title": "Alive Beta \u2013 Live Coding in Visual Studio", "by": "Permit", "score": 84, "time": 1437597115, "type": "story", "id": 9932118}, {"kids": [9932107, 9932832, 9931741, 9932928, 9931888, 9932172], "descendants": 24, "url": "http://www.coreboot.org/pipermail/coreboot/2015-July/080120.html", "text": "", "title": "Announcing coreboot 4.1", "by": "conductor", "score": 112, "time": 1437591932, "type": "story", "id": 9931636}, {"kids": [9932959, 9933275, 9933290, 9932069, 9933515], "descendants": 38, "url": "http://www.brainpickings.org/2014/08/21/leo-tolstoy-gandhi-letter-to-a-hindu/", "title": "Why We Hurt Each Other: Tolstoy\u2019s Letters to Gandhi", "by": "atmosx", "score": 98, "time": 1437586669, "type": "story", "id": 9931072}, {"kids": [9934430, 9931647, 9931095, 9934296, 9934283, 9931333, 9931452, 9931673, 9932394, 9931196, 9931106, 9931235], "descendants": 97, "url": "http://blog.jruby.org/2015/07/jruby_9000/", "text": "", "title": "JRuby 9000 released", "by": "headius", "score": 244, "time": 1437580868, "type": "story", "id": 9930399}, {"kids": [9934149, 9934445, 9933854, 9934179], "descendants": 7, "url": "http://makezine.com/2015/07/22/with-linux-and-creative-commons-the-9-chip-computer-reveals-its-open-source-details/", "text": "", "title": "The $9 CHIP Computer Reveals Its Open Source Details", "by": "dcschelt", "score": 58, "time": 1437610368, "type": "story", "id": 9933226}, {"descendants": 0, "url": "http://code.dblock.org/2010/11/04/corporate-change-contributing-to-open-source.html", "text": "", "title": "Corporate Change: Contributing to Open Source (2010)", "by": "walterbell", "score": 5, "time": 1437626626, "type": "story", "id": 9934026}, {"kids": [9931836, 9933063, 9931601, 9931616, 9932469, 9931598, 9932260, 9932298, 9931957, 9931871, 9932485, 9933636, 9933002], "descendants": 64, "url": "http://www.fastcompany.com/3047428/how-two-bored-1970s-housewives-helped-create-the-pc-industry", "text": "", "title": "How two bored 1970s housewives helped create the PC industry", "by": "technologizer", "score": 142, "time": 1437570293, "type": "story", "id": 9929333}, {"title": "Flaws in trials of deworming pills show the importance of sharing data", "url": "http://www.buzzfeed.com/bengoldacre/deworming-trials", "descendants": 0, "by": "bootload", "score": 11, "time": 1437614417, "type": "story", "id": 9933418}, {"descendants": 0, "url": "http://adage.com/article/news/boar-s-head-a-deli-meat-beast/236406/", "text": "", "title": "How Boar's Head Became a Deli Meat Beast", "by": "kelvintran", "score": 6, "time": 1437629197, "type": "story", "id": 9934131}, {"kids": [9932267, 9933725, 9931970, 9931848, 9934120, 9933106, 9932284, 9931877, 9931883, 9933192, 9931951, 9932294, 9932491, 9932163, 9932517, 9931923, 9932183, 9932705, 9932745, 9933504, 9932072, 9933906, 9933325, 9931802, 9933707, 9931921, 9933490, 9931952, 9932039, 9932051, 9932607, 9931900, 9931950, 9932471, 9932126, 9932301, 9931956, 9931907, 9932002, 9931723, 9933042, 9932280, 9933050, 9931776, 9931858], "descendants": 167, "url": "http://techcrunch.com/2015/07/22/uber-for-developers/", "title": "Gigster (YC S15) Does The Dev Work To Turn Your Idea Into An App", "by": "rogerdickey", "score": 105, "time": 1437591552, "type": "story", "id": 9931596}, {"kids": [9934478, 9933611, 9932885, 9933817, 9933385], "descendants": 11, "url": "https://izbicki.me/blog/fast-nearest-neighbor-queries-in-haskell.html", "text": "", "title": "Fast Nearest Neighbor Queries in Haskell", "by": "andrus", "score": 71, "time": 1437598751, "type": "story", "id": 9932266}, {"kids": [9934516], "descendants": 1, "url": "https://github.com/cymen/show-me-the-react", "title": "Chrome React extension that highlights components on the page", "by": "obilgic", "score": 19, "time": 1437616029, "type": "story", "id": 9933511}, {"kids": [9931258, 9931310, 9931291, 9931170, 9932514, 9931325, 9931453, 9931812, 9934193, 9931187, 9931401, 9932368, 9933892, 9931239, 9931780, 9932862, 9931540, 9931797, 9931173, 9931937, 9931748, 9931144, 9931218, 9931270, 9931162, 9931663, 9931206], "descendants": 97, "url": "http://blog.ycombinator.com/pro-rata", "title": "Pro Rata", "by": "craigkerstiens", "score": 328, "time": 1437587125, "type": "story", "id": 9931121}, {"kids": [9934314, 9934251, 9934316, 9934273, 9934417, 9934444, 9934376, 9934317], "descendants": 15, "url": "https://passingstrangeness.wordpress.com/2015/07/20/sl-1-murder-by-nuclear-reactor/", "title": "SL-1: The only fatal nuclear reactor accident in US history", "by": "herendin", "score": 31, "time": 1437628703, "type": "story", "id": 9934113}, {"kids": [9934512, 9934310, 9934191, 9934097], "descendants": 8, "url": "http://amiga30.com/", "text": "", "title": "Today, 30 years ago, Commodore introduced the Amiga", "by": "pdknsk", "score": 47, "time": 1437624046, "type": "story", "id": 9933918}, {"kids": [9933901, 9933890, 9933848], "descendants": 9, "url": "http://www.febo.com/hamdocs/intronos.html?", "text": "", "title": "Getting Started with TCP/IP on Packet Radio (1992)", "by": "taf2", "score": 32, "time": 1437618559, "type": "story", "id": 9933648}, {"kids": [9930872, 9930809, 9933711, 9933474], "descendants": 69, "url": "https://www.mnot.net/blog/2015/07/20/snowden_meets_the_ietf", "title": "Snowden Meets the IETF", "by": "kazuho", "score": 187, "time": 1437563664, "type": "story", "id": 9928879}, {"title": "Experiment (YC W13) is hiring Rails hackers", "url": "https://experiment.com/jobs/engineer", "text": "", "id": 9934275, "score": 1, "time": 1437632231, "type": "job", "by": "dluan"}, {"kids": [9933376, 9933203, 9933271, 9933080, 9933619, 9933339, 9933184, 9933499, 9933860, 9934215, 9933522], "descendants": 27, "url": "http://techcrunch.com/2015/07/22/yc-backed-fonticons-is-a-subscription-icon-service-from-the-creator-of-font-awesome/", "title": "Fonticons (YC S15) Is A Subscription Icon Service From The Maker Of Font Awesome", "by": "katm", "score": 81, "time": 1437608079, "type": "story", "id": 9933067}, {"descendants": 0, "url": "http://www.lrb.co.uk/v37/n15/julian-barnes/selfie-with-sunflowers", "text": "", "title": "Selfie with \u2018Sunflowers\u2019", "by": "prismatic", "score": 7, "time": 1437626737, "type": "story", "id": 9934032}, {"kids": [9934321, 9934288, 9934244, 9934484], "descendants": 7, "url": "http://www.ap.org/content/press-release/2015/ap-makes-one-million-minutes-of-history-available-on-youtube", "title": "AP makes one million minutes of historical footage available on YouTube", "by": "mxfh", "score": 54, "time": 1437606964, "type": "story", "id": 9932996}, {"kids": [9934514, 9934426, 9931735, 9934102, 9931992, 9932149, 9933277, 9933260, 9932603, 9932434, 9932327, 9932177], "descendants": 64, "url": "http://www.cs.toronto.edu/~graves/handwriting.html", "title": "Handwriting Generation with Recurrent Neural Networks", "by": "cjdulberger", "score": 216, "time": 1437586331, "type": "story", "id": 9931041}, {"kids": [9933165, 9933281, 9933242, 9933234, 9933346, 9933353, 9933109, 9933400, 9933428, 9933158, 9933099, 9933288, 9934068, 9933675, 9933557, 9933110, 9933104, 9933168, 9933457, 9933423], "descendants": 180, "url": "http://www.vox.com/2015/7/22/9015443/bill-de-blasio-uber", "title": "Uber has defeated Bill de Blasio\u2019s plan to rein them\u00a0in", "by": "jseliger", "score": 132, "time": 1437606977, "type": "story", "id": 9932997}, {"kids": [9933326, 9933248, 9933701, 9933525, 9934300, 9934021, 9933292, 9934172, 9933375, 9933472, 9933253, 9933407, 9933693, 9933788, 9933576], "descendants": 34, "url": "http://well.blogs.nytimes.com/2015/07/22/how-nature-changes-the-brain/", "text": "", "title": "How Walking in Nature Changes the Brain", "by": "joshrotenberg", "score": 91, "time": 1437604126, "type": "story", "id": 9932793}, {"kids": [9932352, 9931954, 9933268, 9933614, 9933318, 9931775], "descendants": 20, "url": "http://arxiv.org/abs/1507.05724v1", "title": "HORNET: High-speed Onion Routing at the Network Layer", "by": "sp332", "score": 105, "time": 1437585355, "type": "story", "id": 9930929}, {"kids": [9934349, 9933566], "descendants": 2, "url": "http://adrianchadd.blogspot.com/2015/07/freebsd-now-has-numa-whyd-it-take-so.html", "title": "NUMA support in FreeBSD", "by": "adamnemecek", "score": 61, "time": 1437601951, "type": "story", "id": 9932591}, {"kids": [9930299, 9930930, 9931004, 9930904, 9930361, 9930489, 9930941, 9930379, 9930355, 9930323, 9934145, 9933006, 9931844, 9930575, 9930503, 9931561, 9931316, 9931134, 9933868, 9930378, 9931584, 9932026, 9930692, 9931165, 9931337, 9931508, 9931738, 9930441], "descendants": 63, "url": "http://f21threadscreen.com/", "title": "F21 Thread Screen", "by": "s0rce", "score": 276, "time": 1437578273, "type": "story", "id": 9930097}, {"kids": [9934164, 9934222, 9934277, 9934170, 9934186, 9934175, 9934219, 9934185, 9934261, 9934200, 9934187], "descendants": 13, "url": "https://medium.com/tweet-stormed/on-full-stack-startups-c6436f445cc8", "text": "", "title": "On Full-Stack Startups", "by": "peter123", "score": 21, "time": 1437605131, "type": "story", "id": 9932856}, {"kids": [9934256, 9934276, 9934398, 9934154], "descendants": 9, "url": "http://www.csmonitor.com/Science/2015/0722/Can-we-colonize-the-moon", "title": "Can we colonize the moon?", "by": "cpeterso", "score": 23, "time": 1437622687, "type": "story", "id": 9933839}, {"kids": [9934371, 9934365, 9934494, 9934435, 9934413, 9934491, 9934394, 9934385, 9934414, 9934360], "descendants": 20, "url": "https://medium.com/@492727ZED/steve-jobs-made-warner-music-sue-my-startup-9a81c5a21d68", "title": "How Apple Influenced The Labels To Shut Down My Music Streaming Startup", "by": "meeper16", "score": 77, "time": 1437620972, "type": "story", "id": 9933757}, {"kids": [9931350, 9931035, 9931408, 9931152, 9931114, 9931570, 9930834, 9930806, 9932886], "descendants": 34, "url": "http://blog.algorithmia.com/post/124542129914/mining-product-hunt-detecting-vote-rings", "text": "", "title": "Mining Product Hunt, Part 1: Detecting Vote-Rings", "by": "ANaimi", "score": 125, "time": 1437582290, "type": "story", "id": 9930582}] -------------------------------------------------------------------------------- /data/wb/Broad Money (M2) to foreign reserves, ratio.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Broad Money (M2) to foreign reserves, ratio.xlsx -------------------------------------------------------------------------------- /data/wb/Broad Money (M2) to monetary base, ratio (M2 multiplier).xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Broad Money (M2) to monetary base, ratio (M2 multiplier).xlsx -------------------------------------------------------------------------------- /data/wb/CPI Price, % y-o-y, median weighted, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, % y-o-y, median weighted, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/CPI Price, % y-o-y, nominal, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, % y-o-y, nominal, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/CPI Price, nominal, not seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, nominal, not seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/CPI Price, nominal, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, nominal, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Commodity Prices.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Commodity Prices.xlsx -------------------------------------------------------------------------------- /data/wb/Core CPI, not seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Core CPI, not seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Core CPI, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Core CPI, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Emerging Market Bond Index (JPM Total Return Index).xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Emerging Market Bond Index (JPM Total Return Index).xlsx -------------------------------------------------------------------------------- /data/wb/Exchange rate, new LCU per USD extended backward, period average.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exchange rate, new LCU per USD extended backward, period average.xlsx -------------------------------------------------------------------------------- /data/wb/Exchange rate, old LCU per USD extended forward, period average.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exchange rate, old LCU per USD extended forward, period average.xlsx -------------------------------------------------------------------------------- /data/wb/Exports Merchandise, Customs, Price, US$, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exports Merchandise, Customs, Price, US$, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Exports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Exports Merchandise, Customs, current US$, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exports Merchandise, Customs, current US$, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Foreign Reserves, Months Import Cover, Goods.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Foreign Reserves, Months Import Cover, Goods.xlsx -------------------------------------------------------------------------------- /data/wb/GDP Deflator at Market Prices, LCU.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP Deflator at Market Prices, LCU.xlsx -------------------------------------------------------------------------------- /data/wb/GDP at market prices, constant 2010 LCU, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, constant 2010 LCU, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/GDP at market prices, constant 2010 US$, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, constant 2010 US$, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/GDP at market prices, current LCU, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, current LCU, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/GDP at market prices, current US$, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, current US$, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/GDP_Current_Dollars.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP_Current_Dollars.xlsx -------------------------------------------------------------------------------- /data/wb/Imports Merchandise, Customs, Price, US$, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Imports Merchandise, Customs, Price, US$, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Imports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Imports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Imports Merchandise, Customs, current US$, millions, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Imports Merchandise, Customs, current US$, millions, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Industrial Production, constant 2010 US$, not seasonally adjusted.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Industrial Production, constant 2010 US$, not seasonally adjusted.xlsx -------------------------------------------------------------------------------- /data/wb/Industrial Production, constant 2010 US$, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Industrial Production, constant 2010 US$, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Nominal Effecive Exchange Rate.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Nominal Effecive Exchange Rate.xlsx -------------------------------------------------------------------------------- /data/wb/Official exchange rate, LCU per USD, period average.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Official exchange rate, LCU per USD, period average.xlsx -------------------------------------------------------------------------------- /data/wb/Real Effective Exchange Rate.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Real Effective Exchange Rate.xlsx -------------------------------------------------------------------------------- /data/wb/Retail Sales Volume Index, seas. adj..xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Retail Sales Volume Index, seas. adj..xlsx -------------------------------------------------------------------------------- /data/wb/Sovereign Bond Interest Rate Spreads, basis points over US Treasuries.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Sovereign Bond Interest Rate Spreads, basis points over US Treasuries.xlsx -------------------------------------------------------------------------------- /data/wb/Stock Markets, LCU.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Stock Markets, LCU.xlsx -------------------------------------------------------------------------------- /data/wb/Stock Markets, US$.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Stock Markets, US$.xlsx -------------------------------------------------------------------------------- /data/wb/Terms of Trade.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Terms of Trade.xlsx -------------------------------------------------------------------------------- /data/wb/Total Reserves.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Total Reserves.xlsx -------------------------------------------------------------------------------- /data/wb/stock_market.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/stock_market.xlsx -------------------------------------------------------------------------------- /data/wb/stock_metadata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/stock_metadata.xlsx -------------------------------------------------------------------------------- /data/wb/unemployment.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/unemployment.xlsx -------------------------------------------------------------------------------- /data_wrangling_3.yml: -------------------------------------------------------------------------------- 1 | name: conda_dw34 2 | dependencies: 3 | - backports=1.0=py34_0 4 | - backports_abc=0.4=py34_0 5 | - bokeh=0.12.0=py34_0 6 | - cairo=1.12.18=6 7 | - certifi=2016.2.28=py34_0 8 | - cffi=1.6.0=py34_0 9 | - cryptography=1.4=py34_0 10 | - cycler=0.10.0=py34_0 11 | - decorator=4.0.10=py34_0 12 | - entrypoints=0.2.2=py34_0 13 | - et_xmlfile=1.0.1=py34_0 14 | - fontconfig=2.11.1=6 15 | - freetype=2.5.5=1 16 | - futures=3.0.3=py34_0 17 | - get_terminal_size=1.0.0=py34_0 18 | - glib=2.43.0=1 19 | - harfbuzz=0.9.39=1 20 | - idna=2.1=py34_0 21 | - ipykernel=4.3.1=py34_0 22 | - ipython=5.0.0=py34_0 23 | - ipython_genutils=0.1.0=py34_0 24 | - ipywidgets=4.1.1=py34_0 25 | - jdcal=1.2=py34_1 26 | - jinja2=2.8=py34_1 27 | - jsonschema=2.5.1=py34_0 28 | - jupyter=1.0.0=py34_3 29 | - jupyter_client=4.3.0=py34_0 30 | - jupyter_console=5.0.0=py34_0 31 | - jupyter_core=4.1.0=py34_0 32 | - libffi=3.2.1=0 33 | - libpng=1.6.22=0 34 | - libsodium=1.0.10=0 35 | - libxml2=2.9.2=0 36 | - libxslt=1.1.28=0 37 | - lxml=3.6.0=py34_0 38 | - markupsafe=0.23=py34_2 39 | - matplotlib=1.5.1=np111py34_0 40 | - mistune=0.7.2=py34_0 41 | - mkl=11.3.3=0 42 | - nbconvert=4.2.0=py34_0 43 | - nbformat=4.0.1=py34_0 44 | - notebook=4.2.1=py34_0 45 | - numpy=1.11.1=py34_0 46 | - openpyxl=2.3.2=py34_0 47 | - openssl=1.0.2h=1 48 | - pandas=0.18.1=np111py34_0 49 | - pandas-datareader=0.2.1=py34_0 50 | - pango=1.39.0=1 51 | - path.py=8.2.1=py34_0 52 | - pathlib2=2.1.0=py34_0 53 | - pdfminer.six==20160614 54 | - pdftables.six==0.0.5 55 | - pexpect=4.0.1=py34_0 56 | - pickleshare=0.7.2=py34_0 57 | - pip=8.1.2=py34_0 58 | - pixman=0.32.6=0 59 | - prompt_toolkit=1.0.3=py34_0 60 | - ptyprocess=0.5.1=py34_0 61 | - pyasn1=0.1.9=py34_0 62 | - pycparser=2.14=py34_1 63 | - pygments=2.1.3=py34_0 64 | - pyparsing=2.1.4=py34_0 65 | - pyqt=4.11.4=py34_4 66 | - python=3.4.5=0 67 | - python-dateutil=2.5.3=py34_0 68 | - pytz=2016.6.1=py34_0 69 | - pyyaml=3.11=py34_4 70 | - pyzmq=15.3.0=py34_0 71 | - qt=4.8.7=4 72 | - qtconsole=4.2.1=py34_0 73 | - readline=6.2=2 74 | - requests=2.10.0=py34_0 75 | - requests-file=1.4=py34_0 76 | - setuptools=23.0.0=py34_0 77 | - simplegeneric=0.8.1=py34_1 78 | - singledispatch=3.4.0.3=py34_0 79 | - sip=4.18=py34_0 80 | - six=1.10.0=py34_0 81 | - sqlite=3.13.0=0 82 | - ssl_match_hostname=3.4.0.2=py34_0 83 | - terminado=0.6=py34_0 84 | - tk=8.5.18=0 85 | - tornado=4.3=py34_1 86 | - traitlets=4.2.2=py34_0 87 | - wcwidth=0.1.7=py34_0 88 | - wheel=0.29.0=py34_0 89 | - xlrd=1.0.0=py34_0 90 | - xz=5.2.2=0 91 | - yaml=0.1.6=0 92 | - zeromq=4.1.4=0 93 | - zlib=1.2.8=3 94 | - pip: 95 | - backports-abc==0.4 96 | - backports.shutil-get-terminal-size==1.0.0 97 | - backports.ssl-match-hostname==3.4.0.2 98 | - configparser==3.5.0 99 | - enum34==1.1.6 100 | - et-xmlfile==1.0.1 101 | - ipaddress==1.0.16 102 | - ipython-genutils==0.1.0 103 | - jupyter-client==4.3.0 104 | - jupyter-console==5.0.0 105 | - jupyter-core==4.1.0 106 | - ndg-httpsclient==0.4.2 107 | - prompt-toolkit==1.0.3 108 | - pyopenssl==16.0.0 109 | - urllib3==1.16 110 | 111 | 112 | -------------------------------------------------------------------------------- /py3_requirements.txt: -------------------------------------------------------------------------------- 1 | backports-abc==0.4 2 | backports.shutil-get-terminal-size==1.0.0 3 | backports.ssl-match-hostname==3.5.0.1 4 | bokeh==0.11.1 5 | certifi==2016.2.28 6 | cffi==1.6.0 7 | cryptography==1.3.2 8 | cycler==0.10.0 9 | decorator==4.0.9 10 | entrypoints==0.2.1 11 | enum34==1.1.6 12 | et-xmlfile==1.0.1 13 | futures==3.0.5 14 | idna==2.1 15 | ipaddress==1.0.16 16 | ipykernel==4.3.1 17 | ipython==4.2.0 18 | ipython-genutils==0.1.0 19 | ipywidgets==5.1.3 20 | jdcal==1.2 21 | Jinja2==2.8 22 | jsonschema==2.5.1 23 | jupyter==1.0.0 24 | jupyter-client==4.2.2 25 | jupyter-console==4.1.1 26 | jupyter-core==4.1.0 27 | lxml==3.6.0 28 | MarkupSafe==0.23 29 | matplotlib==1.5.1 30 | mistune==0.7.2 31 | nbconvert==4.2.0 32 | nbformat==4.0.1 33 | ndg-httpsclient==0.4.0 34 | notebook==4.2.0 35 | numpy==1.11.0 36 | oauthlib==1.1.1 37 | openpyxl==2.3.5 38 | pandas==0.18.1 39 | pandas-datareader==0.2.1 40 | pathlib2==2.1.0 41 | pdfminer.six==20160614 42 | pdftables.six==0.0.5 43 | pexpect==4.0.1 44 | pickleshare==0.7.2 45 | ptyprocess==0.5.1 46 | pyasn1==0.1.9 47 | pycparser==2.14 48 | Pygments==2.1.3 49 | pyOpenSSL==16.0.0 50 | pyparsing==2.1.4 51 | python-dateutil==2.5.3 52 | pytz==2016.4 53 | PyYAML==3.11 54 | pyzmq==15.2.0 55 | qtconsole==4.2.1 56 | requests==2.10.0 57 | requests-file==1.4 58 | requests-oauthlib==0.6.1 59 | simplegeneric==0.8.1 60 | singledispatch==3.4.0.3 61 | six==1.10.0 62 | terminado==0.6 63 | tornado==4.3 64 | traitlets==4.2.1 65 | tweepy==3.5.0 66 | urllib3==1.15.1 67 | widgetsnbextension==1.2.2 68 | xlrd==0.9.4 69 | -------------------------------------------------------------------------------- /py3_server_requirements.txt: -------------------------------------------------------------------------------- 1 | backports-abc==0.4 2 | backports.shutil-get-terminal-size==1.0.0 3 | backports.ssl-match-hostname==3.5.0.1 4 | bokeh==0.11.1 5 | certifi==2016.2.28 6 | cffi==1.6.0 7 | chardet==2.3.0 8 | cryptography==1.3.2 9 | cssselect==0.9.1 10 | cycler==0.10.0 11 | Cython==0.24.1 12 | decorator==4.0.9 13 | dill==0.2.5 14 | entrypoints==0.2.1 15 | enum34==1.1.6 16 | et-xmlfile==1.0.1 17 | flake8==2.6.2 18 | funcsigs==1.0.2 19 | futures==3.0.5 20 | idna==2.1 21 | ipaddress==1.0.16 22 | ipykernel==4.3.1 23 | ipyparallel==5.1.1 24 | ipython==4.2.0 25 | ipython-genutils==0.1.0 26 | ipywidgets==5.1.3 27 | jdcal==1.2 28 | Jinja2==2.8 29 | jsonschema==2.5.1 30 | jupyter==1.0.0 31 | jupyter-client==4.2.2 32 | jupyter-console==4.1.1 33 | jupyter-core==4.1.0 34 | jupyterhub==0.6.1 35 | line-profiler==1.0 36 | lxml==3.6.0 37 | MarkupSafe==0.23 38 | matplotlib==1.5.1 39 | mccabe==0.5.0 40 | multiprocess==0.70.4 41 | nbconvert==4.2.0 42 | nbformat==4.0.1 43 | ndg-httpsclient==0.4.0 44 | nose==1.3.7 45 | notebook==4.2.0 46 | numpy==1.11.0 47 | oauthenticator==0.5.0 48 | oauthlib==1.1.1 49 | openpyxl==2.3.5 50 | pamela==0.2.1 51 | pandas==0.18.1 52 | pandas-datareader==0.2.1 53 | paramiko==2.0.1 54 | pathlib2==2.1.0 55 | pathos==0.2.0 56 | pdfminer.six==20160614 57 | pdftables.six==0.0.5 58 | pep8==1.7.0 59 | pexpect==4.0.1 60 | pickleshare==0.7.2 61 | pox==0.2.2 62 | ppft==1.6.4.6 63 | ptyprocess==0.5.1 64 | pyasn1==0.1.9 65 | pycodestyle==2.0.0 66 | pycparser==2.14 67 | pyflakes==1.2.3 68 | Pygments==2.1.3 69 | pyOpenSSL==16.0.0 70 | pyparsing==2.1.4 71 | python-dateutil==2.5.3 72 | pytz==2016.4 73 | PyYAML==3.11 74 | pyzmq==15.2.0 75 | qtconsole==4.2.1 76 | requests==2.10.0 77 | requests-file==1.4 78 | requests-oauthlib==0.6.1 79 | simplegeneric==0.8.1 80 | singledispatch==3.4.0.3 81 | six==1.10.0 82 | SQLAlchemy==1.0.15 83 | terminado==0.6 84 | tornado==4.3 85 | traitlets==4.2.1 86 | tweepy==3.5.0 87 | urllib3==1.15.1 88 | widgetsnbextension==1.2.2 89 | xlrd==0.9.4 90 | -------------------------------------------------------------------------------- /pycon_2015_requirements.txt: -------------------------------------------------------------------------------- 1 | xlrd 2 | requests 3 | fuzzywuzzy 4 | textblob 5 | python-Levenshtein 6 | journalism 7 | latimes-calculate 8 | bokeh 9 | pygal 10 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | backports-abc==0.4 2 | backports.shutil-get-terminal-size==1.0.0 3 | backports.ssl-match-hostname==3.5.0.1 4 | bokeh==0.11.1 5 | certifi==2016.2.28 6 | cffi==1.6.0 7 | configparser==3.3.0.post2 8 | cryptography==1.3.2 9 | cycler==0.10.0 10 | decorator==4.0.9 11 | entrypoints==0.2.1 12 | enum34==1.1.6 13 | et-xmlfile==1.0.1 14 | functools32==3.2.3.post2 15 | futures==3.0.5 16 | idna==2.1 17 | ipaddress==1.0.16 18 | ipykernel==4.3.1 19 | ipython==4.2.0 20 | ipython-genutils==0.1.0 21 | ipywidgets==5.1.3 22 | jdcal==1.2 23 | Jinja2==2.8 24 | jsonschema==2.5.1 25 | jupyter==1.0.0 26 | jupyter-client==4.2.2 27 | jupyter-console==4.1.1 28 | jupyter-core==4.1.0 29 | lxml==3.6.0 30 | MarkupSafe==0.23 31 | matplotlib==1.5.1 32 | mistune==0.7.2 33 | nbconvert==4.2.0 34 | nbformat==4.0.1 35 | ndg-httpsclient==0.4.0 36 | notebook==4.2.0 37 | numpy==1.11.0 38 | oauthlib==1.1.1 39 | openpyxl==2.3.5 40 | pandas==0.18.1 41 | pandas-datareader==0.2.1 42 | pathlib2==2.1.0 43 | pdfminer==20110515 44 | pdftables==0.0.4 45 | pexpect==4.0.1 46 | pickleshare==0.7.2 47 | ptyprocess==0.5.1 48 | pyasn1==0.1.9 49 | pycparser==2.14 50 | Pygments==2.1.3 51 | pyOpenSSL==16.0.0 52 | pyparsing==2.1.4 53 | python-dateutil==2.5.3 54 | pytz==2016.4 55 | PyYAML==3.11 56 | pyzmq==15.2.0 57 | qtconsole==4.2.1 58 | requests==2.10.0 59 | requests-file==1.4 60 | requests-oauthlib==0.6.1 61 | simplegeneric==0.8.1 62 | singledispatch==3.4.0.3 63 | six==1.10.0 64 | terminado==0.6 65 | tornado==4.3 66 | traitlets==4.2.1 67 | tweepy==3.5.0 68 | urllib3==1.15.1 69 | widgetsnbextension==1.2.2 70 | xlrd==0.9.4 71 | -------------------------------------------------------------------------------- /scripts/pycon-2015/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/scripts/pycon-2015/__init__.py -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson01_csv.py: -------------------------------------------------------------------------------- 1 | from csv import DictReader 2 | 3 | 4 | rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t') 5 | 6 | all_lines = [r for r in rdr] 7 | 8 | print all_lines[0].keys() 9 | 10 | for line in all_lines: 11 | try: 12 | if 'Gross domestic product' in line.get('Subject Descriptor') and \ 13 | 'international dollar' in line.get('Units'): 14 | print '{}: {} ({} {})'.format( 15 | line.get('Country'), line.get('2015'), '2015', line.get('Scale')) 16 | except: 17 | print "ERROR: ", line 18 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson02_xlsx.py: -------------------------------------------------------------------------------- 1 | import xlrd 2 | 3 | 4 | notebook = xlrd.open_workbook('../data/wb/GDP_Current_Dollars.xlsx') 5 | 6 | for sheet in notebook.sheets(): 7 | print sheet.name 8 | 9 | sheet = notebook.sheet_by_name('Data') 10 | 11 | titles = sheet.row_values(0) 12 | print titles 13 | 14 | 15 | def build_array(sheet, titles, start_row=1): 16 | new_arr = [] 17 | while start_row < sheet.nrows: 18 | new_arr.append( 19 | dict(zip(titles, sheet.row_values(start_row))) 20 | ) 21 | start_row += 1 22 | return new_arr 23 | 24 | 25 | arr = build_array(sheet, titles) 26 | 27 | for line in arr: 28 | print line.get('Country Name'), line.get('2014 [YR2014]') 29 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson03_databases.py: -------------------------------------------------------------------------------- 1 | import dataset 2 | 3 | db = dataset.connect('sqlite:///../data/data_analysis.db') 4 | 5 | my_sources = db['sources'] 6 | 7 | my_sources.insert({'organization': 'IMF', 8 | 'file_name': 'imf_indicators.tsv', 9 | 'url': 'http://www.imf.org/external/pubs/ft/weo/2015/01/weodata/index.aspx', 10 | 'description': 'IMF World Economic Outlook Dataset', 11 | }) 12 | 13 | my_sources.insert({'organization': 'World Bank', 14 | 'file_name': 'wb/GDP_Current_Dollars.xlsx', 15 | 'url': 'http://databank.worldbank.org/data/reports.aspx?source=2&series=NY.GDP.MKTP.CD#', 16 | 'description': 'World Bank GDP Dataset', 17 | }) 18 | 19 | print db.tables 20 | 21 | for row in db['sources']: 22 | print row['description'] 23 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson04_05_api_regex.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import re 3 | import json 4 | from multiprocessing import Process, Manager 5 | 6 | MATCHING = ( 7 | ('Python', '(p|P)ython'), 8 | ('Ruby', '(r|R)uby'), 9 | ('JavaScript', 'js|(J|j)ava(s|S)cript'), 10 | ('NodeJS', 'node(\.?)(?:\js|JS)'), 11 | ('Java', '(j|J)ava[^(S|s)cript]'), 12 | ('Objective-C', 'Obj(ective?)(?:\ |-)(C|c)'), 13 | ) 14 | 15 | 16 | def get_story(story_id, stories): 17 | url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id 18 | resp = requests.get(url) 19 | stories.append(resp.json()) 20 | 21 | 22 | def get_top_stories(): 23 | manager = Manager() 24 | stories = manager.list() 25 | url = 'https://hacker-news.firebaseio.com/v0/topstories.json' 26 | ids = requests.get(url) 27 | processes = [Process(target=get_story, args=(sid, stories)) 28 | for sid in ids.json()[:40]] 29 | for p in processes: 30 | p.start() 31 | for p in processes: 32 | p.join() 33 | return stories 34 | 35 | 36 | def get_json_stories(): 37 | return json.load(open('../data/topstories.json', 'rb')) 38 | 39 | 40 | def count_languages(): 41 | stories = get_top_stories() 42 | final_tallies = {} 43 | for s in stories: 44 | long_string = u'{} {}'.format(s.get('title'), s.get('url')) 45 | for language, regex in dict(MATCHING).items(): 46 | if re.search(regex, long_string): 47 | if language not in final_tallies.keys(): 48 | final_tallies[language] = { 49 | 'score': s.get('score'), 50 | 'descendants': s.get('descendants')} 51 | else: 52 | final_tallies[language]['score'] += s.get('score') 53 | final_tallies[language][ 54 | 'descendants'] += s.get('descendants') 55 | return final_tallies 56 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson06_string_processing.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from multiprocessing import Process, Manager 3 | from fuzzywuzzy import fuzz 4 | from textblob import TextBlob 5 | import re 6 | import json 7 | 8 | 9 | def get_story(story_id, stories): 10 | url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id 11 | resp = requests.get(url) 12 | stories.append(resp.json()) 13 | return stories 14 | 15 | 16 | def get_top_stories(): 17 | manager = Manager() 18 | stories = manager.list() 19 | url = 'https://hacker-news.firebaseio.com/v0/topstories.json' 20 | ids = requests.get(url) 21 | processes = [Process(target=get_story, args=(sid, stories)) 22 | for sid in ids.json()[:40]] 23 | for p in processes: 24 | p.start() 25 | for p in processes: 26 | p.join() 27 | return stories 28 | 29 | 30 | def get_json_stories(): 31 | return json.load(open('../data/topstories.json', 'rb')) 32 | 33 | 34 | def get_json_comments(): 35 | return json.load(open('../data/comments.json', 'rb')) 36 | 37 | 38 | def get_all_comments(sid): 39 | manager = Manager() 40 | comments = manager.list() 41 | story = get_story(sid, []) 42 | if not story[0].get('kids'): 43 | return [] 44 | processes = [Process(target=get_story, args=(cid, comments)) 45 | for cid in story[0].get('kids')] 46 | for p in processes: 47 | p.start() 48 | for p in processes: 49 | p.join() 50 | return [c for c in comments if c and not c.get('deleted')] 51 | 52 | 53 | def remove_html(text): 54 | try: 55 | return re.sub('<[^<]+?>', '', text) 56 | except: 57 | print text 58 | return text 59 | 60 | 61 | def is_match(first, second): 62 | ratio = fuzz.token_sort_ratio(first, second) 63 | if ratio > 50: 64 | return True 65 | return False 66 | 67 | 68 | def find_matching_comments(): 69 | stories = get_top_stories() 70 | comments = [] 71 | while len(comments) < 1: 72 | for s in stories: 73 | comments.extend(get_all_comments(s.get('id'))) 74 | matches = [] 75 | comment_text = ['%s - %s' % (c.get('by'), 76 | remove_html(c.get('text'))) for c in comments] 77 | for c in comments: 78 | ctext = remove_html(c.get('text')) 79 | comment_text.remove('%s - %s' % (c.get('by'), ctext)) 80 | for txt in comment_text: 81 | if is_match(ctext, txt): 82 | matches.append((c, txt)) 83 | return matches 84 | 85 | 86 | def comment_sentiment(): 87 | stories = get_top_stories() 88 | comments = get_all_comments(stories[0].get('id')) 89 | for comm in comments: 90 | comm['sentiment'] = TextBlob(comm.get( 91 | 'text')).sentiment.polarity 92 | comments.sort(key=lambda x: x.get('sentiment')) 93 | return comments 94 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson07_calculate.py: -------------------------------------------------------------------------------- 1 | import calculate 2 | import requests 3 | from multiprocessing import Process, Manager 4 | from decimal import Decimal 5 | 6 | 7 | def get_story(story_id, stories): 8 | url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id 9 | resp = requests.get(url) 10 | story_data = resp.json() 11 | user_data = get_user(story_data.get('by')) 12 | story_data['user_karma'] = user_data.get('karma') or 0 13 | stories.append(story_data) 14 | return stories 15 | 16 | 17 | def get_user(user_id): 18 | url = 'https://hacker-news.firebaseio.com/v0/user/%s.json' % user_id 19 | resp = requests.get(url) 20 | return resp.json() 21 | 22 | 23 | def get_top_stories_with_user_karma(): 24 | manager = Manager() 25 | stories = manager.list() 26 | url = 'https://hacker-news.firebaseio.com/v0/topstories.json' 27 | ids = requests.get(url) 28 | processes = [Process(target=get_story, args=(sid, stories)) 29 | for sid in ids.json()[:40]] 30 | for p in processes: 31 | p.start() 32 | for p in processes: 33 | p.join() 34 | return stories 35 | 36 | 37 | def calculate_summary_karma(): 38 | stories = get_top_stories_with_user_karma() 39 | return calculate.summary_stats([ 40 | Decimal(s.get('score')) for s in stories]) 41 | 42 | 43 | def pearsons_karma(): 44 | stories = get_top_stories_with_user_karma() 45 | user_karma = [Decimal(s.get('user_karma')) for s in stories] 46 | story_karma = [Decimal(s.get('score')) for s in stories] 47 | return calculate.pearson(user_karma, story_karma) 48 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson08_journalism.py: -------------------------------------------------------------------------------- 1 | import journalism 2 | import logging 3 | from csv import reader 4 | 5 | text_type = journalism.TextType() 6 | number_type = journalism.NumberType() 7 | date_type = journalism.DateType() 8 | 9 | 10 | def get_table(datarows, types, titles): 11 | try: 12 | table = journalism.Table(datarows, types, titles) 13 | return table 14 | except: 15 | logging.exception('problem loading table') 16 | return None 17 | 18 | 19 | def clean_text(row): 20 | new_row = [] 21 | for item in row: 22 | if isinstance(item, (str, unicode)): 23 | item = item.decode('utf-8', 'replace') 24 | if item in [u'--', u'n/a']: 25 | item = None 26 | new_row.append(item) 27 | 28 | return new_row 29 | 30 | 31 | def clean_rows(all_rows): 32 | new_data = [] 33 | for row in all_rows: 34 | new_data.append(clean_text(row)) 35 | return new_data 36 | 37 | 38 | def load_imf_data(): 39 | rdr = reader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t') 40 | all_rows = [r for r in rdr if len(r) > 1] 41 | titles = all_rows.pop(0) 42 | cleaned_rows = clean_rows(all_rows) 43 | types = [text_type, text_type, text_type, text_type, text_type, 44 | number_type, number_type, number_type, number_type, 45 | number_type, number_type, number_type, number_type, 46 | date_type] 47 | return get_table(cleaned_rows, types, titles) 48 | 49 | 50 | def add_last_percent_change(): 51 | table = load_imf_data() 52 | table = table.where(lambda r: r.get('2015') is not 53 | None and r.get('2014') is not None) 54 | table = table.where(lambda r: 'Unemployment' in 55 | r.get('Subject Descriptor')) 56 | table = table.percent_change('2014', '2015', 'last_change') 57 | return table 58 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson09_matplotlib.py: -------------------------------------------------------------------------------- 1 | from csv import DictReader 2 | from decimal import Decimal 3 | import calculate 4 | import pylab 5 | 6 | 7 | def load_imf_unemployment(): 8 | rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t') 9 | return [r for r in rdr if r.get('Subject Descriptor') and 10 | 'Unemployment' in r.get('Subject Descriptor')] 11 | 12 | 13 | def get_avg_unemployment(data, start_year=2013, end_year=2015): 14 | avgs = {} 15 | while start_year <= end_year: 16 | avg = calculate.mean([ 17 | Decimal(rate.get(str(start_year))) for 18 | rate in data if rate.get(str(start_year))]) 19 | avgs[str(start_year)] = avg 20 | start_year += 1 21 | return avgs 22 | 23 | 24 | def chart_unemployment(): 25 | imf_data = load_imf_unemployment() 26 | averages = get_avg_unemployment(imf_data) 27 | pylab.plot(averages.keys(), averages.values()) 28 | pylab.ylabel('Average Unemployment') 29 | pylab.xlabel('Years') 30 | pylab.title('Average Unemployment Over Time') 31 | pylab.ylim([0, sorted(averages.values(), reverse=True)[0] + 1]) 32 | pylab.show() 33 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson10_pygal.py: -------------------------------------------------------------------------------- 1 | import pygal 2 | from csv import DictReader 3 | 4 | 5 | def load_imf_unemployment(): 6 | rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t') 7 | return [r for r in rdr if r.get('Subject Descriptor') and 8 | 'Unemployment' in r.get('Subject Descriptor')] 9 | 10 | 11 | def load_iso_codes(): 12 | iso_dict = {} 13 | for row in DictReader(open('../data/iso-2.csv', 'rb')): 14 | iso_dict[row.get('Name')] = row.get('Code') 15 | return iso_dict 16 | 17 | 18 | def load_and_merge_data(): 19 | iso_dict = load_iso_codes() 20 | imf_data = load_imf_unemployment() 21 | for d in imf_data: 22 | d['iso'] = iso_dict[d.get('Country')] 23 | return imf_data 24 | 25 | 26 | def draw_unemployment(): 27 | imf_data = load_and_merge_data() 28 | worldmap_data = {} 29 | for row in imf_data: 30 | worldmap_data[row.get('iso').lower()] = float(row.get('2015')) 31 | worldmap_chart = pygal.Worldmap() 32 | worldmap_chart.title = '2015 Unemployment' 33 | worldmap_chart.add('Total Unemployment (%)', worldmap_data) 34 | worldmap_chart.render() 35 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson11_bokeh.py: -------------------------------------------------------------------------------- 1 | from bokeh.plotting import figure, show, output_file 2 | from csv import DictReader 3 | 4 | 5 | def load_imf_unemployment(): 6 | rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t') 7 | return [r for r in rdr if r.get('Subject Descriptor') and 8 | 'Unemployment' in r.get('Subject Descriptor')] 9 | 10 | 11 | def mscatter(chart, x, y, typestr): 12 | chart.scatter(x, y, marker=typestr, line_color="#6666ee", 13 | fill_color="#ee6666", fill_alpha=0.5, size=12) 14 | 15 | 16 | def draw_scatter(): 17 | chart = figure(title="IMF Unemployment") 18 | output_file("../../static/unemployment.html") 19 | imf_data = load_imf_unemployment() 20 | for line in imf_data: 21 | for year in ['2013', '2014', '2015']: 22 | mscatter(chart, int(year), float(line.get(year)), 'circle') 23 | show(chart) 24 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson12_pandas.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def get_wb_unemployment_data(): 5 | return pd.read_excel('../data/wb/unemployment.xlsx', 6 | index_col=0, header=0, skiprows=[1]) 7 | 8 | 9 | def get_wb_market_data(): 10 | return pd.read_excel('../data/wb/stock_market.xlsx', 11 | index_col=0, header=0, skiprows=[1]) 12 | 13 | 14 | def get_metadata(): 15 | return pd.read_excel('../data/wb/stock_metadata.xlsx', 16 | sheetname=1, index_col=0, header=0) 17 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson13_pandas_join.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def get_wb_unemployment_data(): 5 | return pd.read_excel('../data/wb/unemployment.xlsx', 6 | index_col=0, header=0, skiprows=[1]) 7 | 8 | 9 | def get_wb_market_data(): 10 | return pd.read_excel('../data/wb/stock_market.xlsx', 11 | index_col=0, header=0, skiprows=[1]) 12 | 13 | 14 | def get_metadata(): 15 | return pd.read_excel('../data/wb/stock_metadata.xlsx', 16 | sheetname=1, index_col=0, header=0) 17 | 18 | 19 | def get_gdp(): 20 | return pd.read_excel('../data/wb/GDP_Current_Dollars.xlsx', 21 | index_col=3, header=0) 22 | 23 | 24 | def clean_market_columns(): 25 | market_data = get_wb_market_data() 26 | market_data.columns = market_data.columns.map(lambda x: x[:3]) 27 | market_data.index = market_data.index.map(lambda x: '{} SM'.format(x)) 28 | return market_data.transpose() 29 | 30 | 31 | def update_gdp_cols(colname): 32 | if colname[:4].isdigit(): 33 | return '{} GDP'.format(colname[:4]) 34 | return colname 35 | 36 | 37 | def join_market_and_gdp(): 38 | market_data = clean_market_columns() 39 | gdp_data = get_gdp() 40 | gdp_data.columns = gdp_data.columns.map(update_gdp_cols) 41 | return market_data.join(gdp_data) 42 | -------------------------------------------------------------------------------- /scripts/pycon-2015/lesson14_pandas_compute.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def get_wb_unemployment_data(): 5 | return pd.read_excel('../data/wb/unemployment.xlsx', 6 | index_col=0, header=0, skiprows=[1]) 7 | 8 | 9 | def get_wb_market_data(): 10 | return pd.read_excel('../data/wb/stock_market.xlsx', 11 | index_col=0, header=0, skiprows=[1]) 12 | 13 | 14 | def get_metadata(): 15 | return pd.read_excel('../data/wb/stock_metadata.xlsx', 16 | sheetname=1, index_col=0, header=0) 17 | 18 | 19 | def get_gdp(): 20 | return pd.read_excel('../data/wb/GDP_Current_Dollars.xlsx', 21 | index_col=3, header=0) 22 | 23 | 24 | def clean_market_columns(): 25 | market_data = get_wb_market_data() 26 | market_data.columns = market_data.columns.map(lambda x: x[:3]) 27 | market_data.index = market_data.index.map(lambda x: '{} SM'.format(x)) 28 | return market_data.transpose() 29 | 30 | 31 | def update_gdp_cols(colname): 32 | if colname[:4].isdigit(): 33 | return '{} GDP'.format(colname[:4]) 34 | return colname 35 | 36 | 37 | def join_market_and_gdp(): 38 | market_data = clean_market_columns() 39 | gdp_data = get_gdp() 40 | gdp_data.columns = gdp_data.columns.map(update_gdp_cols) 41 | return market_data.join(gdp_data) 42 | 43 | 44 | def just_spain(): 45 | joined = join_market_and_gdp() 46 | spain = joined.loc['ESP'].copy() 47 | spain_gdp = spain[spain.index.map(lambda x: 'GDP' in x)] 48 | spain_stock = spain[spain.index.map(lambda x: 'SM' in x)] 49 | spain_gdp.index = spain_gdp.index.map(lambda x: x.rstrip(' GDP')) 50 | spain_stock.index = spain_stock.index.map(lambda x: x.rstrip(' SM')) 51 | spain_stock.name = 'Stocks' 52 | spain_gdp.name = 'GDP' 53 | return spain_stock, spain_gdp 54 | 55 | 56 | def merge_back(): 57 | spain_stock, spain_gdp = just_spain() 58 | return pd.concat([spain_stock, spain_gdp], axis=1) 59 | --------------------------------------------------------------------------------