├── .gitignore
├── README.md
├── books
    ├── intro-to-python
    │   ├── 01 - Basic Data Types.ipynb
    │   ├── 02 - Python Complex Types.ipynb
    │   └── 03 - Python Logic, Loops and Functions.ipynb
    ├── other-notebooks
    │   ├── 2016 Election FEC Data.ipynb
    │   ├── Fun with Dask Distributed.ipynb
    │   ├── Fun with Dask.ipynb
    │   └── SF Salaries.ipynb
    ├── pycon-2015-examples
    │   ├── Lesson 01 - csv and tsv.ipynb
    │   ├── Lesson 02 - XLSX.ipynb
    │   ├── Lesson 03 - databasing.ipynb
    │   ├── Lesson 04 - API.ipynb
    │   ├── Lesson 05 - Regex.ipynb
    │   ├── Lesson 06 - More String Analysis.ipynb
    │   ├── Lesson 07 - Calculate.ipynb
    │   ├── Lesson 08 - Journalism Library.ipynb
    │   ├── Lesson 09 - Matplotlib.ipynb
    │   ├── Lesson 10 - PyGal.ipynb
    │   ├── Lesson 11 - Bokeh.ipynb
    │   ├── Lesson 12 - Pandas Import and Inspection.ipynb
    │   ├── Lesson 13 - Pandas Filtering and Joins.ipynb
    │   └── Lesson 14 - Pandas Data Analysis.ipynb
    └── pydata-examples
    │   ├── 01 - Simple formats with Pandas.ipynb
    │   ├── 02 - More complex formats with Pandas.ipynb
    │   ├── 03 - Pandas Data Reader.ipynb
    │   ├── 04 - APIs.ipynb
    │   ├── 05 - Messy Data.ipynb
    │   ├── 06 - Data Analysis with Pandas.ipynb
    │   ├── 07 - Split Apply Combine.ipynb
    │   ├── 08 - Bokeh for Data Visualization.ipynb
    │   ├── Basic Data Visualization with Pandas.ipynb
    │   ├── Introduction to Joins.ipynb
    │   ├── Introduction to Regex.ipynb
    │   ├── Update HN data.ipynb
    │   └── solutions
    │       ├── data_analysis_solution.py
    │       ├── regex_solution.py
    │       ├── sac_solution.py
    │       ├── stocks_solution.py
    │       ├── visualization_solution.py
    │       ├── weather_solution_fix_stations.py
    │       └── weather_solution_rainyday.py
├── conda_requirements.txt
├── data
    ├── WEF_GlobalCompetitivenessReport_2014-15.pdf
    ├── berlin_weather_oldest.csv
    ├── comments.json
    ├── departments.csv
    ├── employees.csv
    ├── hn.json
    ├── imf_indicators.tsv
    ├── iso-2.csv
    ├── titles.csv
    ├── topstories.json
    └── wb
    │   ├── Broad Money (M2) to foreign reserves, ratio.xlsx
    │   ├── Broad Money (M2) to monetary base, ratio (M2 multiplier).xlsx
    │   ├── CPI Price, % y-o-y, median weighted, seas. adj..xlsx
    │   ├── CPI Price, % y-o-y, nominal, seas. adj..xlsx
    │   ├── CPI Price, nominal, not seas. adj..xlsx
    │   ├── CPI Price, nominal, seas. adj..xlsx
    │   ├── Commodity Prices.xlsx
    │   ├── Core CPI, not seas. adj..xlsx
    │   ├── Core CPI, seas. adj..xlsx
    │   ├── Emerging Market Bond Index (JPM Total Return Index).xlsx
    │   ├── Exchange rate, new LCU per USD extended backward, period average.xlsx
    │   ├── Exchange rate, old LCU per USD extended forward, period average.xlsx
    │   ├── Exports Merchandise, Customs, Price, US$, seas. adj..xlsx
    │   ├── Exports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx
    │   ├── Exports Merchandise, Customs, current US$, millions, seas. adj..xlsx
    │   ├── Foreign Reserves, Months Import Cover, Goods.xlsx
    │   ├── GDP Deflator at Market Prices, LCU.xlsx
    │   ├── GDP at market prices, constant 2010 LCU, millions, seas. adj..xlsx
    │   ├── GDP at market prices, constant 2010 US$, millions, seas. adj..xlsx
    │   ├── GDP at market prices, current LCU, millions, seas. adj..xlsx
    │   ├── GDP at market prices, current US$, millions, seas. adj..xlsx
    │   ├── GDP_Current_Dollars.xlsx
    │   ├── Imports Merchandise, Customs, Price, US$, seas. adj..xlsx
    │   ├── Imports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx
    │   ├── Imports Merchandise, Customs, current US$, millions, seas. adj..xlsx
    │   ├── Industrial Production, constant 2010 US$, not seasonally adjusted.xlsx
    │   ├── Industrial Production, constant 2010 US$, seas. adj..xlsx
    │   ├── Nominal Effecive Exchange Rate.xlsx
    │   ├── Official exchange rate, LCU per USD, period average.xlsx
    │   ├── Real Effective Exchange Rate.xlsx
    │   ├── Retail Sales Volume Index, seas. adj..xlsx
    │   ├── Sovereign Bond Interest Rate Spreads, basis points over US Treasuries.xlsx
    │   ├── Stock Markets, LCU.xlsx
    │   ├── Stock Markets, US$.xlsx
    │   ├── Terms of Trade.xlsx
    │   ├── Total Reserves.xlsx
    │   ├── stock_market.xlsx
    │   ├── stock_metadata.xlsx
    │   └── unemployment.xlsx
├── data_wrangling_3.yml
├── py3_requirements.txt
├── py3_server_requirements.txt
├── pycon_2015_requirements.txt
├── requirements.txt
└── scripts
    └── pycon-2015
        ├── __init__.py
        ├── lesson01_csv.py
        ├── lesson02_xlsx.py
        ├── lesson03_databases.py
        ├── lesson04_05_api_regex.py
        ├── lesson06_string_processing.py
        ├── lesson07_calculate.py
        ├── lesson08_journalism.py
        ├── lesson09_matplotlib.py
        ├── lesson10_pygal.py
        ├── lesson11_bokeh.py
        ├── lesson12_pandas.py
        ├── lesson13_pandas_join.py
        └── lesson14_pandas_compute.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | openbooks/*
4 | *.db
5 | .*
6 | !/.gitignore
7 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Data Wrangling Introduction
 2 | =======================
 3 | 
 4 | Welcome to Data Wrangling! Here you'll find some useful scripts and data to get started with Data Wrangling with Python. 
 5 | 
 6 | To Install requirements if you are using Python 2.7+
 7 | -----------------------
 8 |     $ pip install -r requirements.txt
 9 | 
10 | To Install requirements if you are using Python 3.4+
11 | -----------------------
12 |     $ pip install -r py3_requirements.txt
13 | 
14 | To Install requirements using Conda Environments and default Python
15 | --------------------------------------------------
16 |     $ conda create --name conda_dw --file conda_requirements.txt
17 |     $ source activate conda_dw
18 |     $ conda install openpyxl
19 | 
20 | To Install requirements using Conda Environments and Python3
21 | --------------------------------------------------
22 |     $ conda env create -f data_wrangling_3.yml
23 |     $ source activate conda_dw3
24 | 
25 | To Access iPython Notebook
26 | ----------------------
27 | 
28 | [PY3 Notebook](https://class.kjamistan.com)
29 | 
30 | 
31 | Questions?
32 | ----------
33 | 
34 | kjam on twitter / freenode
35 | katharine at kjamistan dot com
36 | 
37 | 
38 | Contributions
39 | --------------
40 | 
41 | Many thanks to [Steven Van den Berghe](https://be.linkedin.com/in/svdberghe) for his help debugging conda environments and providing the ´data_wrangling_3.yml´ file.
42 | 
43 | Massive Kudos to [Viacheslav Naydenov](https://github.com/vnaydionov/) for porting pdftables to Py3!
44 | 
45 | 
46 | Fair Use
47 | ----------
48 | 
49 | If you would like to use this content for your own course, please do so with attribution and without modification. If you'd like to modify this course and use it, please reach out regarding modifications made. Thank you! :)
50 | 
51 | 


--------------------------------------------------------------------------------
/books/intro-to-python/03 - Python Logic, Loops and Functions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "### Python Logic, Loops and Functions\n",
  8 |     "\n",
  9 |     "We will explore how to perform logic trees (if this, do that), loops (do this for every item) and functions (do this repeated code for me any time I type it's name)."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {
 16 |     "collapsed": true
 17 |    },
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "from __future__ import print_function"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "metadata": {},
 26 |    "source": [
 27 |     "#### Logic"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {
 34 |     "collapsed": true
 35 |    },
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "if 0:\n",
 39 |     "    print(1 + 1)"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {
 46 |     "collapsed": false
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "if 1:\n",
 51 |     "    print(1 + 1)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "collapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "if True:\n",
 63 |     "    print('!!!')"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "if False:\n",
 75 |     "    print('???')"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {
 82 |     "collapsed": true
 83 |    },
 84 |    "outputs": [],
 85 |    "source": [
 86 |     "if True and False:\n",
 87 |     "    print('?!?')"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {
 94 |     "collapsed": false
 95 |    },
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "if True or False:\n",
 99 |     "    print('...')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "if not False:\n",
111 |     "    print('...')"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": true
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "if not True:\n",
123 |     "    print('???')"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "collapsed": false
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "if False:\n",
135 |     "    print('what?')\n",
136 |     "else:\n",
137 |     "    print('thats better')"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "code",
142 |    "execution_count": null,
143 |    "metadata": {
144 |     "collapsed": false
145 |    },
146 |    "outputs": [],
147 |    "source": [
148 |     "if False:\n",
149 |     "    pass\n",
150 |     "elif True:\n",
151 |     "    print('yep!')\n",
152 |     "else:\n",
153 |     "    print('down here!')"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": false
161 |    },
162 |    "outputs": [],
163 |    "source": [
164 |     "if 4 > 5:\n",
165 |     "    print('true!')\n",
166 |     "else:\n",
167 |     "    print('false!')"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "if 89 < 122:\n",
179 |     "    print('math!')"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": true
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "my_age = 33"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {
197 |     "collapsed": false
198 |    },
199 |    "outputs": [],
200 |    "source": [
201 |     "if my_age < 22:\n",
202 |     "    print('too young!')\n",
203 |     "elif my_age > 30:\n",
204 |     "    print('thats ok')\n",
205 |     "elif my_age >= 100:\n",
206 |     "    print('too old!')"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "code",
211 |    "execution_count": null,
212 |    "metadata": {
213 |     "collapsed": false
214 |    },
215 |    "outputs": [],
216 |    "source": [
217 |     "if True == 1:\n",
218 |     "    print('true')"
219 |    ]
220 |   },
221 |   {
222 |    "cell_type": "code",
223 |    "execution_count": null,
224 |    "metadata": {
225 |     "collapsed": false
226 |    },
227 |    "outputs": [],
228 |    "source": [
229 |     "if []:\n",
230 |     "    print('false')"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "markdown",
235 |    "metadata": {},
236 |    "source": [
237 |     "#### For loops"
238 |    ]
239 |   },
240 |   {
241 |    "cell_type": "code",
242 |    "execution_count": null,
243 |    "metadata": {
244 |     "collapsed": false
245 |    },
246 |    "outputs": [],
247 |    "source": [
248 |     "for x in [1, 2, 3]:\n",
249 |     "    print(x)"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {
256 |     "collapsed": false
257 |    },
258 |    "outputs": [],
259 |    "source": [
260 |     "for y in 'my_long_string':\n",
261 |     "    print(y)"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": null,
267 |    "metadata": {
268 |     "collapsed": true
269 |    },
270 |    "outputs": [],
271 |    "source": [
272 |     "my_pets = ['ber', 'lil bunny', 'birdie']"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "metadata": {
279 |     "collapsed": false
280 |    },
281 |    "outputs": [],
282 |    "source": [
283 |     "for pet in my_pets:\n",
284 |     "    print('I love my pet %s' % pet)"
285 |    ]
286 |   },
287 |   {
288 |    "cell_type": "code",
289 |    "execution_count": null,
290 |    "metadata": {
291 |     "collapsed": true
292 |    },
293 |    "outputs": [],
294 |    "source": [
295 |     "my_integers = [3, 6, 7, 2, 8]"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "code",
300 |    "execution_count": null,
301 |    "metadata": {
302 |     "collapsed": true
303 |    },
304 |    "outputs": [],
305 |    "source": [
306 |     "new_list = []"
307 |    ]
308 |   },
309 |   {
310 |    "cell_type": "code",
311 |    "execution_count": null,
312 |    "metadata": {
313 |     "collapsed": false
314 |    },
315 |    "outputs": [],
316 |    "source": [
317 |     "for integer in my_integers:\n",
318 |     "    new_list.append(integer + 2)"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "metadata": {
325 |     "collapsed": false
326 |    },
327 |    "outputs": [],
328 |    "source": [
329 |     "my_integers"
330 |    ]
331 |   },
332 |   {
333 |    "cell_type": "code",
334 |    "execution_count": null,
335 |    "metadata": {
336 |     "collapsed": false
337 |    },
338 |    "outputs": [],
339 |    "source": [
340 |     "new_list"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {
347 |     "collapsed": false
348 |    },
349 |    "outputs": [],
350 |    "source": [
351 |     "for letter in 'my name':\n",
352 |     "    print(letter)"
353 |    ]
354 |   },
355 |   {
356 |    "cell_type": "code",
357 |    "execution_count": null,
358 |    "metadata": {
359 |     "collapsed": true
360 |    },
361 |    "outputs": [],
362 |    "source": [
363 |     "my_dict = {'foo': 1, 'bar': 2, 'baz': 3}"
364 |    ]
365 |   },
366 |   {
367 |    "cell_type": "code",
368 |    "execution_count": null,
369 |    "metadata": {
370 |     "collapsed": false
371 |    },
372 |    "outputs": [],
373 |    "source": [
374 |     "my_dict.items()"
375 |    ]
376 |   },
377 |   {
378 |    "cell_type": "code",
379 |    "execution_count": null,
380 |    "metadata": {
381 |     "collapsed": true
382 |    },
383 |    "outputs": [],
384 |    "source": [
385 |     "for my_dict_key, my_dict_value in my_dict.items():\n",
386 |     "    my_dict[x] += 1"
387 |    ]
388 |   },
389 |   {
390 |    "cell_type": "code",
391 |    "execution_count": null,
392 |    "metadata": {
393 |     "collapsed": false
394 |    },
395 |    "outputs": [],
396 |    "source": [
397 |     "x"
398 |    ]
399 |   },
400 |   {
401 |    "cell_type": "code",
402 |    "execution_count": null,
403 |    "metadata": {
404 |     "collapsed": false
405 |    },
406 |    "outputs": [],
407 |    "source": [
408 |     "my_dict"
409 |    ]
410 |   },
411 |   {
412 |    "cell_type": "code",
413 |    "execution_count": null,
414 |    "metadata": {
415 |     "collapsed": true
416 |    },
417 |    "outputs": [],
418 |    "source": [
419 |     "my_dict_with_lists = {'first_list': [1, 2, 3], 'second_list': [5, 6, 7]}"
420 |    ]
421 |   },
422 |   {
423 |    "cell_type": "code",
424 |    "execution_count": null,
425 |    "metadata": {
426 |     "collapsed": false
427 |    },
428 |    "outputs": [],
429 |    "source": [
430 |     "for key, value in my_dict_with_lists.items():\n",
431 |     "    for val in value:\n",
432 |     "        print(val)\n"
433 |    ]
434 |   },
435 |   {
436 |    "cell_type": "code",
437 |    "execution_count": null,
438 |    "metadata": {
439 |     "collapsed": true
440 |    },
441 |    "outputs": [],
442 |    "source": [
443 |     "def my_addition_function(x, y):\n",
444 |     "    \"\"\"This returns the sum of the two inputs.\"\"\"\n",
445 |     "    return x + y"
446 |    ]
447 |   },
448 |   {
449 |    "cell_type": "code",
450 |    "execution_count": null,
451 |    "metadata": {
452 |     "collapsed": false
453 |    },
454 |    "outputs": [],
455 |    "source": [
456 |     "my_addition_function(3, 4)"
457 |    ]
458 |   },
459 |   {
460 |    "cell_type": "code",
461 |    "execution_count": null,
462 |    "metadata": {
463 |     "collapsed": false
464 |    },
465 |    "outputs": [],
466 |    "source": [
467 |     "my_addition_funciton('test', 'foo')"
468 |    ]
469 |   },
470 |   {
471 |    "cell_type": "code",
472 |    "execution_count": null,
473 |    "metadata": {
474 |     "collapsed": false
475 |    },
476 |    "outputs": [],
477 |    "source": [
478 |     "my_addition_funciton([12, 23], 'bar')"
479 |    ]
480 |   },
481 |   {
482 |    "cell_type": "code",
483 |    "execution_count": null,
484 |    "metadata": {
485 |     "collapsed": true
486 |    },
487 |    "outputs": [],
488 |    "source": [
489 |     "def say_hello(input_string):\n",
490 |     "    \"\"\"This will print hello to the input_string and return None\"\"\"\n",
491 |     "    print('hello {}!'.format(input_string))"
492 |    ]
493 |   },
494 |   {
495 |    "cell_type": "code",
496 |    "execution_count": null,
497 |    "metadata": {
498 |     "collapsed": false
499 |    },
500 |    "outputs": [],
501 |    "source": [
502 |     "say_hello('test')"
503 |    ]
504 |   },
505 |   {
506 |    "cell_type": "code",
507 |    "execution_count": null,
508 |    "metadata": {
509 |     "collapsed": false
510 |    },
511 |    "outputs": [],
512 |    "source": [
513 |     "my_return = say_hello('test')"
514 |    ]
515 |   },
516 |   {
517 |    "cell_type": "code",
518 |    "execution_count": null,
519 |    "metadata": {
520 |     "collapsed": true
521 |    },
522 |    "outputs": [],
523 |    "source": [
524 |     "my_return"
525 |    ]
526 |   },
527 |   {
528 |    "cell_type": "code",
529 |    "execution_count": null,
530 |    "metadata": {
531 |     "collapsed": false
532 |    },
533 |    "outputs": [],
534 |    "source": [
535 |     "my_return is None"
536 |    ]
537 |   },
538 |   {
539 |    "cell_type": "code",
540 |    "execution_count": null,
541 |    "metadata": {
542 |     "collapsed": true
543 |    },
544 |    "outputs": [],
545 |    "source": [
546 |     "def choose_math_with_kwargs(x, y, math_to_run='addition'):\n",
547 |     "    \"\"\" This will perform math based on the math_to_run keyword argument.\n",
548 |     "        params:\n",
549 |     "            x: unknown type\n",
550 |     "            y: unknown type\n",
551 |     "            kwargs:\n",
552 |     "                math_to_run: string (default: addition)\n",
553 |     "        returns math_to_run using x and y\n",
554 |     "    \"\"\"\n",
555 |     "    if math_to_run == 'addition':\n",
556 |     "        return x + y\n",
557 |     "    elif math_to_run == 'subtraction':\n",
558 |     "        return x - y"
559 |    ]
560 |   },
561 |   {
562 |    "cell_type": "code",
563 |    "execution_count": null,
564 |    "metadata": {
565 |     "collapsed": false
566 |    },
567 |    "outputs": [],
568 |    "source": [
569 |     "choose_math_with_kwargs(2, 3)"
570 |    ]
571 |   },
572 |   {
573 |    "cell_type": "code",
574 |    "execution_count": null,
575 |    "metadata": {
576 |     "collapsed": false
577 |    },
578 |    "outputs": [],
579 |    "source": [
580 |     "choose_math_with_kwargs(3, 4, math_to_run='addition')"
581 |    ]
582 |   },
583 |   {
584 |    "cell_type": "code",
585 |    "execution_count": null,
586 |    "metadata": {
587 |     "collapsed": false
588 |    },
589 |    "outputs": [],
590 |    "source": [
591 |     "choose_math_with_kwargs(3, 4, math_to_run='subtraction')"
592 |    ]
593 |   },
594 |   {
595 |    "cell_type": "code",
596 |    "execution_count": null,
597 |    "metadata": {
598 |     "collapsed": true
599 |    },
600 |    "outputs": [],
601 |    "source": [
602 |     "choose_math_with_kwargs(3, 4, math_to_run='division')"
603 |    ]
604 |   },
605 |   {
606 |    "cell_type": "markdown",
607 |    "metadata": {},
608 |    "source": [
609 |     "#### Your turn!\n",
610 |     "\n",
611 |     "* Implement the other basic math operators\n",
612 |     "* Add a default return\n",
613 |     "* BONUS: can you send an error message if people input anything other than integers?"
614 |    ]
615 |   },
616 |   {
617 |    "cell_type": "code",
618 |    "execution_count": null,
619 |    "metadata": {
620 |     "collapsed": true
621 |    },
622 |    "outputs": [],
623 |    "source": []
624 |   }
625 |  ],
626 |  "metadata": {
627 |   "anaconda-cloud": {},
628 |   "kernelspec": {
629 |    "display_name": "Python [default]",
630 |    "language": "python",
631 |    "name": "python3"
632 |   },
633 |   "language_info": {
634 |    "codemirror_mode": {
635 |     "name": "ipython",
636 |     "version": 3
637 |    },
638 |    "file_extension": ".py",
639 |    "mimetype": "text/x-python",
640 |    "name": "python",
641 |    "nbconvert_exporter": "python",
642 |    "pygments_lexer": "ipython3",
643 |    "version": "3.5.2"
644 |   }
645 |  },
646 |  "nbformat": 4,
647 |  "nbformat_minor": 0
648 | }
649 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 01 - csv and tsv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "['Country/Series-specific Notes', 'Scale', 'Estimates Start After', '2015', 'Country', 'Subject Descriptor', '2020', '2019', '2018', 'Units', '2014', '2017', '2016', '2013']\n"
 15 |      ]
 16 |     }
 17 |    ],
 18 |    "source": [
 19 |     "from csv import DictReader\n",
 20 |     "\n",
 21 |     "\n",
 22 |     "rdr = DictReader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n",
 23 |     "\n",
 24 |     "all_lines = [r for r in rdr]\n",
 25 |     "\n",
 26 |     "print all_lines[0].keys()\n",
 27 |     "\n"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 2,
 33 |    "metadata": {
 34 |     "collapsed": false
 35 |    },
 36 |    "outputs": [
 37 |     {
 38 |      "name": "stdout",
 39 |      "output_type": "stream",
 40 |      "text": [
 41 |       "Australia: 1,136.565 (2015 Billions)\n",
 42 |       "Australia: 47,607.697 (2015 Units)\n",
 43 |       "Austria: 402.420 (2015 Billions)\n",
 44 |       "Austria: 47,031.004 (2015 Units)\n",
 45 |       "Belgium: 492.267 (2015 Billions)\n",
 46 |       "Belgium: 43,800.208 (2015 Units)\n",
 47 |       "Canada: 1,640.370 (2015 Billions)\n",
 48 |       "Canada: 45,722.971 (2015 Units)\n",
 49 |       "Cyprus: 27.700 (2015 Billions)\n",
 50 |       "Cyprus: 30,769.578 (2015 Units)\n",
 51 |       "Czech Republic: 325.285 (2015 Billions)\n",
 52 |       "Czech Republic: 30,895.365 (2015 Units)\n",
 53 |       "Denmark: 255.866 (2015 Billions)\n",
 54 |       "Denmark: 45,451.273 (2015 Units)\n",
 55 |       "Estonia: 36.845 (2015 Billions)\n",
 56 |       "Estonia: 27,994.860 (2015 Units)\n",
 57 |       "Finland: 224.846 (2015 Billions)\n",
 58 |       "Finland: 40,838.367 (2015 Units)\n",
 59 |       "France: 2,633.896 (2015 Billions)\n",
 60 |       "France: 41,018.205 (2015 Units)\n",
 61 |       "Germany: 3,815.462 (2015 Billions)\n",
 62 |       "Germany: 46,895.970 (2015 Units)\n",
 63 |       "Greece: 294.014 (2015 Billions)\n",
 64 |       "Greece: 26,773.369 (2015 Units)\n",
 65 |       "Hong Kong SAR: 412.300 (2015 Billions)\n",
 66 |       "Hong Kong SAR: 56,428.069 (2015 Units)\n",
 67 |       "Iceland: 14.837 (2015 Billions)\n",
 68 |       "Iceland: 45,268.941 (2015 Units)\n",
 69 |       "Ireland: 237.629 (2015 Billions)\n",
 70 |       "Ireland: 51,118.997 (2015 Units)\n",
 71 |       "Israel: 280.390 (2015 Billions)\n",
 72 |       "Israel: 33,495.170 (2015 Units)\n",
 73 |       "Italy: 2,157.123 (2015 Billions)\n",
 74 |       "Italy: 35,811.443 (2015 Units)\n",
 75 |       "Japan: 4,843.066 (2015 Billions)\n",
 76 |       "Japan: 38,215.917 (2015 Units)\n",
 77 |       "Korea: 1,853.515 (2015 Billions)\n",
 78 |       "Korea: 36,601.073 (2015 Units)\n",
 79 |       "Latvia: 49.731 (2015 Billions)\n",
 80 |       "Latvia: 24,540.593 (2015 Units)\n",
 81 |       "Lithuania: 82.622 (2015 Billions)\n",
 82 |       "Lithuania: 28,210.268 (2015 Units)\n",
 83 |       "Luxembourg: 53.174 (2015 Billions)\n",
 84 |       "Luxembourg: 93,173.614 (2015 Units)\n",
 85 |       "Malta: 14.716 (2015 Billions)\n",
 86 |       "Malta: 34,544.387 (2015 Units)\n",
 87 |       "Netherlands: 818.249 (2015 Billions)\n",
 88 |       "Netherlands: 48,317.050 (2015 Units)\n",
 89 |       "New Zealand: 164.965 (2015 Billions)\n",
 90 |       "New Zealand: 36,151.808 (2015 Units)\n",
 91 |       "Norway: 351.603 (2015 Billions)\n",
 92 |       "Norway: 67,445.198 (2015 Units)\n",
 93 |       "Portugal: 287.388 (2015 Billions)\n",
 94 |       "Portugal: 27,624.227 (2015 Units)\n",
 95 |       "San Marino: 1.942 (2015 Billions)\n",
 96 |       "San Marino: 61,836.317 (2015 Units)\n",
 97 |       "Singapore: 470.551 (2015 Billions)\n",
 98 |       "Singapore: 85,198.159 (2015 Units)\n",
 99 |       "Slovak Republic: 158.428 (2015 Billions)\n",
100 |       "Slovak Republic: 29,209.564 (2015 Units)\n",
101 |       "Slovenia: 62.949 (2015 Billions)\n",
102 |       "Slovenia: 30,508.288 (2015 Units)\n",
103 |       "Spain: 1,619.093 (2015 Billions)\n",
104 |       "Spain: 34,899.403 (2015 Units)\n",
105 |       "Sweden: 464.264 (2015 Billions)\n",
106 |       "Sweden: 47,228.981 (2015 Units)\n",
107 |       "Switzerland: 480.938 (2015 Billions)\n",
108 |       "Switzerland: 58,730.924 (2015 Units)\n",
109 |       "Taiwan Province of China: 1,125.338 (2015 Billions)\n",
110 |       "Taiwan Province of China: 47,898.660 (2015 Units)\n",
111 |       "United Kingdom: 2,641.432 (2015 Billions)\n",
112 |       "United Kingdom: 40,676.475 (2015 Units)\n",
113 |       "United States: 18,124.731 (2015 Billions)\n",
114 |       "United States: 56,421.393 (2015 Units)\n",
115 |       "ERROR:  {'Country/Series-specific Notes': None, 'Scale': None, 'Estimates Start After': None, '2015': None, 'Country': 'International Monetary Fund, World Economic Outlook Database, April 2015', 'Subject Descriptor': None, '2020': None, '2019': None, '2018': None, 'Units': None, '2014': None, '2017': None, '2016': None, '2013': None}\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "for line in all_lines:\n",
121 |     "    try:\n",
122 |     "        if 'Gross domestic product' in line.get('Subject Descriptor') and \\\n",
123 |     "           'international dollar' in line.get('Units'):\n",
124 |     "            print '{}: {} ({} {})'.format(\n",
125 |     "                line.get('Country'), line.get('2015'), '2015', line.get('Scale'))\n",
126 |     "    except:\n",
127 |     "        print \"ERROR: \", line\n"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": []
138 |   }
139 |  ],
140 |  "metadata": {
141 |   "kernelspec": {
142 |    "display_name": "Python 2",
143 |    "language": "python",
144 |    "name": "python2"
145 |   },
146 |   "language_info": {
147 |    "codemirror_mode": {
148 |     "name": "ipython",
149 |     "version": 2
150 |    },
151 |    "file_extension": ".py",
152 |    "mimetype": "text/x-python",
153 |    "name": "python",
154 |    "nbconvert_exporter": "python",
155 |    "pygments_lexer": "ipython2",
156 |    "version": "2.7.6"
157 |   }
158 |  },
159 |  "nbformat": 4,
160 |  "nbformat_minor": 0
161 | }
162 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 02 - XLSX.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [
 10 |     {
 11 |      "name": "stdout",
 12 |      "output_type": "stream",
 13 |      "text": [
 14 |       "Data\n",
 15 |       "Definition and Source\n"
 16 |      ]
 17 |     }
 18 |    ],
 19 |    "source": [
 20 |     "import xlrd\n",
 21 |     "\n",
 22 |     "\n",
 23 |     "notebook = xlrd.open_workbook('../../data/wb/GDP_Current_Dollars.xlsx')\n",
 24 |     "\n",
 25 |     "for sheet in notebook.sheets():\n",
 26 |     "    print sheet.name"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 2,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [
 36 |     {
 37 |      "name": "stdout",
 38 |      "output_type": "stream",
 39 |      "text": [
 40 |       "[u'Series Name', u'Series Code', u'Country Name', u'Country Code', u'1990 [YR1990]', u'2000 [YR2000]', u'2005 [YR2005]', u'2006 [YR2006]', u'2007 [YR2007]', u'2008 [YR2008]', u'2009 [YR2009]', u'2010 [YR2010]', u'2011 [YR2011]', u'2012 [YR2012]', u'2013 [YR2013]', u'2014 [YR2014]']\n"
 41 |      ]
 42 |     }
 43 |    ],
 44 |    "source": [
 45 |     "sheet = notebook.sheet_by_name('Data')\n",
 46 |     "\n",
 47 |     "titles = sheet.row_values(0)\n",
 48 |     "print titles"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {
 55 |     "collapsed": false
 56 |    },
 57 |    "outputs": [
 58 |     {
 59 |      "name": "stdout",
 60 |      "output_type": "stream",
 61 |      "text": [
 62 |       "Afghanistan 20841951232.0\n",
 63 |       "Albania 13370191506.0\n",
 64 |       "Algeria 2.14063173188e+11\n",
 65 |       "American Samoa ..\n",
 66 |       "Andorra ..\n",
 67 |       "Angola 1.31400635026e+11\n",
 68 |       "Antigua and Barbuda 1269117037.04\n",
 69 |       "Argentina 5.40197457444e+11\n",
 70 |       "Armenia 10881605059.4\n",
 71 |       "Aruba ..\n",
 72 |       "Australia 1.45377021067e+12\n",
 73 |       "Austria 4.36343622435e+11\n",
 74 |       "Azerbaijan 75198010965.2\n",
 75 |       "Bahamas, The 8510500000.0\n",
 76 |       "Bahrain 33868989361.7\n",
 77 |       "Bangladesh 1.73818932216e+11\n",
 78 |       "Barbados 4348000000.0\n",
 79 |       "Belarus 76139250364.5\n",
 80 |       "Belgium 5.33382785676e+11\n",
 81 |       "Belize ..\n",
 82 |       "Benin 8746992733.5\n",
 83 |       "Bermuda ..\n",
 84 |       "Bhutan 1821412872.84\n",
 85 |       "Bolivia 34175832127.4\n",
 86 |       "Bosnia and Herzegovina 18344278252.6\n",
 87 |       "Botswana 15813371063.2\n",
 88 |       "Brazil 2.34611817519e+12\n",
 89 |       "Brunei Darussalam 17256754269.2\n",
 90 |       "Bulgaria 55734676434.7\n",
 91 |       "Burkina Faso 12542969275.2\n",
 92 |       "Burundi 3093647293.48\n",
 93 |       "Cabo Verde 1871187333.5\n",
 94 |       "Cambodia 16709432402.7\n",
 95 |       "Cameroon 32548591285.9\n",
 96 |       "Canada 1.78665506451e+12\n",
 97 |       "Cayman Islands ..\n",
 98 |       "Central African Republic 1782927902.67\n",
 99 |       "Chad 13922224560.8\n",
100 |       "Channel Islands ..\n",
101 |       "Chile 2.58061522887e+11\n",
102 |       "China 1.03601052479e+13\n",
103 |       "Colombia 3.77739622866e+11\n",
104 |       "Comoros 647720102.468\n",
105 |       "Congo, Dem. Rep. 32962261155.7\n",
106 |       "Congo, Rep. 14135462555.8\n",
107 |       "Costa Rica 49552580683.1\n",
108 |       "Cote d'Ivoire 34253611098.3\n",
109 |       "Croatia 57222574023.2\n",
110 |       "Cuba ..\n",
111 |       "Curacao ..\n",
112 |       "Cyprus 23226158986.2\n",
113 |       "Czech Republic 2.05522871251e+11\n",
114 |       "Denmark 3.4195160773e+11\n",
115 |       "Djibouti 1581519705.53\n",
116 |       "Dominica 537777777.778\n",
117 |       "Dominican Republic 63968961563.1\n",
118 |       "Ecuador 1.00543173e+11\n",
119 |       "Egypt, Arab Rep. 2.86538047766e+11\n",
120 |       "El Salvador 25220000000.0\n",
121 |       "Equatorial Guinea 14308094224.7\n",
122 |       "Eritrea 3857821138.21\n",
123 |       "Estonia 25904874312.3\n",
124 |       "Ethiopia 54797679657.5\n",
125 |       "Faeroe Islands ..\n",
126 |       "Fiji 4029989728.83\n",
127 |       "Finland 2.70673584162e+11\n",
128 |       "France 2.82919203917e+12\n",
129 |       "French Polynesia ..\n",
130 |       "Gabon 17228443336.4\n",
131 |       "Gambia, The 807069488.192\n",
132 |       "Georgia 16529963187.4\n",
133 |       "Germany 3.85255616966e+12\n",
134 |       "Ghana 38648154100.4\n",
135 |       "Greece 2.37592274371e+11\n",
136 |       "Greenland ..\n",
137 |       "Grenada 882222222.222\n",
138 |       "Guam ..\n",
139 |       "Guatemala 58728232327.2\n",
140 |       "Guinea 6624068036.99\n",
141 |       "Guinea-Bissau 1022371991.53\n",
142 |       "Guyana 3228372887.86\n",
143 |       "Haiti 8713031260.19\n",
144 |       "Honduras 19385309985.8\n",
145 |       "Hong Kong SAR, China 2.90896409544e+11\n",
146 |       "Hungary 1.37103927313e+11\n",
147 |       "Iceland 17071004499.2\n",
148 |       "India 2.06690239733e+12\n",
149 |       "Indonesia 8.88538201025e+11\n",
150 |       "Iran, Islamic Rep. 4.15338504536e+11\n",
151 |       "Iraq 2.20505682865e+11\n",
152 |       "Ireland 2.45920712756e+11\n",
153 |       "Isle of Man ..\n",
154 |       "Israel 3.0422633627e+11\n",
155 |       "Italy 2.14433818506e+12\n",
156 |       "Jamaica ..\n",
157 |       "Japan 4.60146120689e+12\n",
158 |       "Jordan 35826925774.6\n",
159 |       "Kazakhstan 2.12247913268e+11\n",
160 |       "Kenya 60936509778.0\n",
161 |       "Kiribati 166762323.639\n",
162 |       "Korea, Dem. Rep. ..\n",
163 |       "Korea, Rep. 1.41038294397e+12\n",
164 |       "Kosovo 7273849011.54\n",
165 |       "Kuwait ..\n",
166 |       "Kyrgyz Republic 7404412710.31\n",
167 |       "Lao PDR 11771725797.6\n",
168 |       "Latvia 31920815648.3\n",
169 |       "Lebanon 45730945273.6\n",
170 |       "Lesotho 2088021624.11\n",
171 |       "Liberia 2026939595.32\n",
172 |       "Libya 41119144923.0\n",
173 |       "Liechtenstein ..\n",
174 |       "Lithuania 48172242517.3\n",
175 |       "Luxembourg ..\n",
176 |       "Macao SAR, China 55501532528.1\n",
177 |       "Macedonia, FYR 11323761623.5\n",
178 |       "Madagascar 10593147526.9\n",
179 |       "Malawi 4258033615.3\n",
180 |       "Malaysia 3.26933043801e+11\n",
181 |       "Maldives 3032239478.12\n",
182 |       "Mali 12074473001.8\n",
183 |       "Malta ..\n",
184 |       "Marshall Islands ..\n",
185 |       "Mauritania 5061180371.05\n",
186 |       "Mauritius 12616421088.4\n",
187 |       "Mexico 1.28271995486e+12\n",
188 |       "Micronesia, Fed. Sts. ..\n",
189 |       "Moldova 7944184929.75\n",
190 |       "Monaco ..\n",
191 |       "Mongolia 12015944336.5\n",
192 |       "Montenegro 4583198885.5\n",
193 |       "Morocco 1.07004984357e+11\n",
194 |       "Mozambique 16385584919.0\n",
195 |       "Myanmar 64330038664.7\n",
196 |       "Namibia 13429503284.9\n",
197 |       "Nepal 19636186469.3\n",
198 |       "Netherlands 8.6950812548e+11\n",
199 |       "New Caledonia ..\n",
200 |       "New Zealand ..\n",
201 |       "Nicaragua 11805641286.8\n",
202 |       "Niger 8168695869.87\n",
203 |       "Nigeria 5.68508262378e+11\n",
204 |       "Northern Mariana Islands ..\n",
205 |       "Norway 5.00103094419e+11\n",
206 |       "Oman 81796618985.7\n",
207 |       "Pakistan 2.46876324189e+11\n",
208 |       "Palau 250625562.794\n",
209 |       "Panama 46212600000.0\n",
210 |       "Papua New Guinea ..\n",
211 |       "Paraguay 30984747863.3\n",
212 |       "Peru 2.02902760293e+11\n",
213 |       "Philippines 2.84582023121e+11\n",
214 |       "Poland 5.48003360279e+11\n",
215 |       "Portugal 2.2958371149e+11\n",
216 |       "Puerto Rico ..\n",
217 |       "Qatar 2.11816758242e+11\n",
218 |       "Romania 1.99043652215e+11\n",
219 |       "Russian Federation 1.86059792276e+12\n",
220 |       "Rwanda 7890190336.75\n",
221 |       "Samoa 800586671.241\n",
222 |       "San Marino ..\n",
223 |       "Sao Tome and Principe 334902362.057\n",
224 |       "Saudi Arabia 7.46248533333e+11\n",
225 |       "Senegal 15578916865.4\n",
226 |       "Serbia 43866423166.9\n",
227 |       "Seychelles 1405764157.88\n",
228 |       "Sierra Leone 4892363979.23\n",
229 |       "Singapore 3.07871907186e+11\n",
230 |       "Sint Maarten (Dutch part) ..\n",
231 |       "Slovak Republic 99790145652.8\n",
232 |       "Slovenia 49416055609.2\n",
233 |       "Solomon Islands 1158183053.76\n",
234 |       "Somalia ..\n",
235 |       "South Africa 3.49817096206e+11\n",
236 |       "South Sudan 13069991258.3\n",
237 |       "Spain 1.40430653606e+12\n",
238 |       "Sri Lanka 74941183242.0\n",
239 |       "St. Kitts and Nevis 833333333.333\n",
240 |       "St. Lucia 1365426555.56\n",
241 |       "St. Martin (French part) ..\n",
242 |       "St. Vincent and the Grenadines 728696703.704\n",
243 |       "Sudan 73815376184.6\n",
244 |       "Suriname ..\n",
245 |       "Swaziland 3400422936.23\n",
246 |       "Sweden 5.7059126616e+11\n",
247 |       "Switzerland ..\n",
248 |       "Syrian Arab Republic ..\n",
249 |       "Taiwan, China ..\n",
250 |       "Tajikistan 9241627840.61\n",
251 |       "Tanzania 49183884817.5\n",
252 |       "Thailand 3.73804134912e+11\n",
253 |       "Timor-Leste 1552000000.0\n",
254 |       "Togo 4518443907.45\n",
255 |       "Tonga 434386307.693\n",
256 |       "Trinidad and Tobago ..\n",
257 |       "Tunisia ..\n",
258 |       "Turkey 7.99534963354e+11\n",
259 |       "Turkmenistan 47931929824.6\n",
260 |       "Turks and Caicos Islands ..\n",
261 |       "Tuvalu ..\n",
262 |       "Uganda 26312399301.4\n",
263 |       "Ukraine 1.31805126738e+11\n",
264 |       "United Arab Emirates 4.01646583173e+11\n",
265 |       "United Kingdom 2.94188553746e+12\n",
266 |       "United States 1.7419e+13\n",
267 |       "Uruguay 57471277325.1\n",
268 |       "Uzbekistan 62643953021.8\n",
269 |       "Vanuatu ..\n",
270 |       "Venezuela, RB 5.09964084931e+11\n",
271 |       "Vietnam 1.86204652922e+11\n",
272 |       "Virgin Islands (U.S.) ..\n",
273 |       "West Bank and Gaza 12737613125.0\n",
274 |       "Yemen, Rep. ..\n",
275 |       "Zambia 27066230009.1\n",
276 |       "Zimbabwe 13663314279.7\n",
277 |       " \n",
278 |       " \n",
279 |       " \n",
280 |       " \n",
281 |       " \n"
282 |      ]
283 |     }
284 |    ],
285 |    "source": [
286 |     "def build_array(sheet, titles, start_row=1):\n",
287 |     "    new_arr = []\n",
288 |     "    while start_row < sheet.nrows:\n",
289 |     "        new_arr.append(\n",
290 |     "            dict(zip(titles, sheet.row_values(start_row)))\n",
291 |     "        )\n",
292 |     "        start_row += 1\n",
293 |     "    return new_arr\n",
294 |     "\n",
295 |     "\n",
296 |     "arr = build_array(sheet, titles)\n",
297 |     "\n",
298 |     "for line in arr:\n",
299 |     "    print line.get('Country Name'), line.get('2014 [YR2014]')\n"
300 |    ]
301 |   },
302 |   {
303 |    "cell_type": "code",
304 |    "execution_count": null,
305 |    "metadata": {
306 |     "collapsed": true
307 |    },
308 |    "outputs": [],
309 |    "source": []
310 |   }
311 |  ],
312 |  "metadata": {
313 |   "kernelspec": {
314 |    "display_name": "Python 2",
315 |    "language": "python",
316 |    "name": "python2"
317 |   },
318 |   "language_info": {
319 |    "codemirror_mode": {
320 |     "name": "ipython",
321 |     "version": 2
322 |    },
323 |    "file_extension": ".py",
324 |    "mimetype": "text/x-python",
325 |    "name": "python",
326 |    "nbconvert_exporter": "python",
327 |    "pygments_lexer": "ipython2",
328 |    "version": "2.7.6"
329 |   }
330 |  },
331 |  "nbformat": 4,
332 |  "nbformat_minor": 0
333 | }
334 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 03 - databasing.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": 2,
 6 |    "metadata": {
 7 |     "collapsed": false
 8 |    },
 9 |    "outputs": [
10 |     {
11 |      "data": {
12 |       "text/plain": [
13 |        "2"
14 |       ]
15 |      },
16 |      "execution_count": 2,
17 |      "metadata": {},
18 |      "output_type": "execute_result"
19 |     }
20 |    ],
21 |    "source": [
22 |     "import dataset\n",
23 |     "\n",
24 |     "db = dataset.connect('sqlite:///../../data/data_analysis.db')\n",
25 |     "\n",
26 |     "my_sources = db['sources']\n",
27 |     "\n",
28 |     "my_sources.insert({'organization': 'IMF',\n",
29 |     "                   'file_name': 'imf_indicators.tsv',\n",
30 |     "                   'url': 'http://www.imf.org/external/pubs/ft/weo/2015/01/weodata/index.aspx',\n",
31 |     "                   'description': 'IMF World Economic Outlook Dataset',\n",
32 |     "                   })\n",
33 |     "\n",
34 |     "my_sources.insert({'organization': 'World Bank',\n",
35 |     "                   'file_name': 'wb/GDP_Current_Dollars.xlsx',\n",
36 |     "                   'url': 'http://databank.worldbank.org/data/reports.aspx?source=2&series=NY.GDP.MKTP.CD#',\n",
37 |     "                   'description': 'World Bank GDP Dataset',\n",
38 |     "                   })"
39 |    ]
40 |   },
41 |   {
42 |    "cell_type": "code",
43 |    "execution_count": 3,
44 |    "metadata": {
45 |     "collapsed": false
46 |    },
47 |    "outputs": [
48 |     {
49 |      "name": "stdout",
50 |      "output_type": "stream",
51 |      "text": [
52 |       "[u'sources']\n",
53 |       "IMF World Economic Outlook Dataset\n",
54 |       "World Bank GDP Dataset\n"
55 |      ]
56 |     }
57 |    ],
58 |    "source": [
59 |     "print db.tables\n",
60 |     "\n",
61 |     "for row in db['sources']:\n",
62 |     "    print row['description']\n"
63 |    ]
64 |   },
65 |   {
66 |    "cell_type": "code",
67 |    "execution_count": null,
68 |    "metadata": {
69 |     "collapsed": true
70 |    },
71 |    "outputs": [],
72 |    "source": []
73 |   }
74 |  ],
75 |  "metadata": {
76 |   "kernelspec": {
77 |    "display_name": "Python 2",
78 |    "language": "python",
79 |    "name": "python2"
80 |   },
81 |   "language_info": {
82 |    "codemirror_mode": {
83 |     "name": "ipython",
84 |     "version": 2
85 |    },
86 |    "file_extension": ".py",
87 |    "mimetype": "text/x-python",
88 |    "name": "python",
89 |    "nbconvert_exporter": "python",
90 |    "pygments_lexer": "ipython2",
91 |    "version": "2.7.6"
92 |   }
93 |  },
94 |  "nbformat": 4,
95 |  "nbformat_minor": 0
96 | }
97 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 04 - API.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import requests"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 5,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "def get_story(story_id):\n",
 23 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 24 |     "    resp = requests.get(url)\n",
 25 |     "    return resp.json()\n"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 6,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "def get_top_stories():\n",
 37 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 38 |     "    resp = requests.get(url)\n",
 39 |     "    all_stories = [get_story(sid) for sid in resp.json()[:10]]\n",
 40 |     "    return all_stories\n",
 41 |     "        "
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 7,
 47 |    "metadata": {
 48 |     "collapsed": false
 49 |    },
 50 |    "outputs": [
 51 |     {
 52 |      "data": {
 53 |       "text/plain": [
 54 |        "[{u'by': u'jmduke',\n",
 55 |        "  u'descendants': 38,\n",
 56 |        "  u'id': 9920121,\n",
 57 |        "  u'kids': [9920697,\n",
 58 |        "   9920460,\n",
 59 |        "   9920607,\n",
 60 |        "   9920308,\n",
 61 |        "   9920772,\n",
 62 |        "   9920837,\n",
 63 |        "   9920324,\n",
 64 |        "   9920681,\n",
 65 |        "   9920736,\n",
 66 |        "   9920671,\n",
 67 |        "   9920342,\n",
 68 |        "   9920413,\n",
 69 |        "   9920211,\n",
 70 |        "   9920796,\n",
 71 |        "   9920417,\n",
 72 |        "   9920371,\n",
 73 |        "   9920541],\n",
 74 |        "  u'score': 228,\n",
 75 |        "  u'text': u'',\n",
 76 |        "  u'time': 1437441778,\n",
 77 |        "  u'title': u'Web Design: The First 100 Years (2014)',\n",
 78 |        "  u'type': u'story',\n",
 79 |        "  u'url': u'http://idlewords.com/talks/web_design_first_100_years.htm'},\n",
 80 |        " {u'by': u'mattkrisiloff',\n",
 81 |        "  u'descendants': 328,\n",
 82 |        "  u'id': 9917334,\n",
 83 |        "  u'kids': [9919609,\n",
 84 |        "   9919370,\n",
 85 |        "   9917792,\n",
 86 |        "   9919301,\n",
 87 |        "   9918094,\n",
 88 |        "   9917359,\n",
 89 |        "   9918659,\n",
 90 |        "   9917458,\n",
 91 |        "   9918755,\n",
 92 |        "   9920544,\n",
 93 |        "   9918719,\n",
 94 |        "   9917583,\n",
 95 |        "   9918554,\n",
 96 |        "   9917593,\n",
 97 |        "   9920819,\n",
 98 |        "   9920656,\n",
 99 |        "   9917437,\n",
100 |        "   9920427,\n",
101 |        "   9917400,\n",
102 |        "   9920266,\n",
103 |        "   9917880,\n",
104 |        "   9919417,\n",
105 |        "   9920224,\n",
106 |        "   9920647,\n",
107 |        "   9920752,\n",
108 |        "   9917781,\n",
109 |        "   9920151,\n",
110 |        "   9920655,\n",
111 |        "   9918291,\n",
112 |        "   9920614,\n",
113 |        "   9920200,\n",
114 |        "   9917372,\n",
115 |        "   9917422,\n",
116 |        "   9920168,\n",
117 |        "   9917384,\n",
118 |        "   9917689,\n",
119 |        "   9920454,\n",
120 |        "   9917830,\n",
121 |        "   9920517,\n",
122 |        "   9919318,\n",
123 |        "   9918802,\n",
124 |        "   9919573,\n",
125 |        "   9920192,\n",
126 |        "   9919447,\n",
127 |        "   9918062,\n",
128 |        "   9918891,\n",
129 |        "   9918944,\n",
130 |        "   9919559,\n",
131 |        "   9919709,\n",
132 |        "   9918183,\n",
133 |        "   9918189,\n",
134 |        "   9917945,\n",
135 |        "   9917587,\n",
136 |        "   9918200,\n",
137 |        "   9918137,\n",
138 |        "   9918176,\n",
139 |        "   9919437,\n",
140 |        "   9918344,\n",
141 |        "   9919350,\n",
142 |        "   9918233,\n",
143 |        "   9918086,\n",
144 |        "   9917799,\n",
145 |        "   9919928,\n",
146 |        "   9917373,\n",
147 |        "   9918758,\n",
148 |        "   9919230,\n",
149 |        "   9919890,\n",
150 |        "   9918834,\n",
151 |        "   9918595,\n",
152 |        "   9917551,\n",
153 |        "   9917466,\n",
154 |        "   9919406,\n",
155 |        "   9917760,\n",
156 |        "   9917849,\n",
157 |        "   9917467,\n",
158 |        "   9917399,\n",
159 |        "   9917889,\n",
160 |        "   9917561,\n",
161 |        "   9918942,\n",
162 |        "   9917440,\n",
163 |        "   9917416],\n",
164 |        "  u'score': 847,\n",
165 |        "  u'text': u'',\n",
166 |        "  u'time': 1437411626,\n",
167 |        "  u'title': u'YC Fellowship',\n",
168 |        "  u'type': u'story',\n",
169 |        "  u'url': u'http://blog.ycombinator.com/yc-fellowship'},\n",
170 |        " {u'by': u'cnnrjcbsn',\n",
171 |        "  u'descendants': 124,\n",
172 |        "  u'id': 9917442,\n",
173 |        "  u'kids': [9918674,\n",
174 |        "   9917665,\n",
175 |        "   9917794,\n",
176 |        "   9919619,\n",
177 |        "   9917729,\n",
178 |        "   9917730,\n",
179 |        "   9917897,\n",
180 |        "   9918958,\n",
181 |        "   9920829,\n",
182 |        "   9918130,\n",
183 |        "   9918009,\n",
184 |        "   9918243,\n",
185 |        "   9918382,\n",
186 |        "   9917660,\n",
187 |        "   9917734,\n",
188 |        "   9918734,\n",
189 |        "   9918271,\n",
190 |        "   9920838,\n",
191 |        "   9918612,\n",
192 |        "   9918204,\n",
193 |        "   9919142,\n",
194 |        "   9919320,\n",
195 |        "   9918146,\n",
196 |        "   9918359,\n",
197 |        "   9918417,\n",
198 |        "   9917611,\n",
199 |        "   9919686,\n",
200 |        "   9918051,\n",
201 |        "   9918492,\n",
202 |        "   9919432,\n",
203 |        "   9917647,\n",
204 |        "   9917726,\n",
205 |        "   9919511,\n",
206 |        "   9918617,\n",
207 |        "   9917837,\n",
208 |        "   9918552,\n",
209 |        "   9917702,\n",
210 |        "   9917670,\n",
211 |        "   9917631,\n",
212 |        "   9920323,\n",
213 |        "   9919171,\n",
214 |        "   9918493,\n",
215 |        "   9920355,\n",
216 |        "   9918174,\n",
217 |        "   9917650,\n",
218 |        "   9918266],\n",
219 |        "  u'score': 774,\n",
220 |        "  u'text': u'',\n",
221 |        "  u'time': 1437412422,\n",
222 |        "  u'title': u'Grooveshark co-founder, 28, found dead in home',\n",
223 |        "  u'type': u'story',\n",
224 |        "  u'url': u'http://www.gainesville.com/article/20150720/ARTICLES/150729990'},\n",
225 |        " {u'by': u'ivank',\n",
226 |        "  u'descendants': 47,\n",
227 |        "  u'id': 9919641,\n",
228 |        "  u'kids': [9919937,\n",
229 |        "   9919764,\n",
230 |        "   9919923,\n",
231 |        "   9919994,\n",
232 |        "   9919797,\n",
233 |        "   9920002,\n",
234 |        "   9920592,\n",
235 |        "   9919703,\n",
236 |        "   9919964,\n",
237 |        "   9919986],\n",
238 |        "  u'score': 107,\n",
239 |        "  u'text': u'',\n",
240 |        "  u'time': 1437432872,\n",
241 |        "  u'title': u'SpaceX CRS-7 Failure Investigation Teleconference Thread',\n",
242 |        "  u'type': u'story',\n",
243 |        "  u'url': u'https://www.reddit.com/r/spacex/comments/3dyvta/rspacex_crs7_failure_investigation_teleconference/'},\n",
244 |        " {u'by': u'dluan',\n",
245 |        "  u'descendants': 26,\n",
246 |        "  u'id': 9919852,\n",
247 |        "  u'kids': [9920313,\n",
248 |        "   9920021,\n",
249 |        "   9920368,\n",
250 |        "   9920222,\n",
251 |        "   9920009,\n",
252 |        "   9920172,\n",
253 |        "   9920030,\n",
254 |        "   9920574,\n",
255 |        "   9920113,\n",
256 |        "   9920060,\n",
257 |        "   9920019,\n",
258 |        "   9920015,\n",
259 |        "   9920011],\n",
260 |        "  u'score': 94,\n",
261 |        "  u'text': u'',\n",
262 |        "  u'time': 1437436325,\n",
263 |        "  u'title': u'Cindy Wu and Experiment.com (YC W13)',\n",
264 |        "  u'type': u'story',\n",
265 |        "  u'url': u'http://nytlive.nytimes.com/womenintheworld/2015/07/08/woman-raised-1-2-million-with-a-spirited-3-minute-speech/'},\n",
266 |        " {u'by': u'aaronbrethorst',\n",
267 |        "  u'descendants': 141,\n",
268 |        "  u'id': 9916974,\n",
269 |        "  u'kids': [9918485,\n",
270 |        "   9919264,\n",
271 |        "   9918591,\n",
272 |        "   9920201,\n",
273 |        "   9919838,\n",
274 |        "   9918105,\n",
275 |        "   9919096,\n",
276 |        "   9919121,\n",
277 |        "   9919004,\n",
278 |        "   9918240,\n",
279 |        "   9919462,\n",
280 |        "   9919676,\n",
281 |        "   9919382,\n",
282 |        "   9918181,\n",
283 |        "   9917957,\n",
284 |        "   9919157,\n",
285 |        "   9919517,\n",
286 |        "   9917939,\n",
287 |        "   9918727,\n",
288 |        "   9920566,\n",
289 |        "   9918202],\n",
290 |        "  u'score': 244,\n",
291 |        "  u'text': u'',\n",
292 |        "  u'time': 1437408433,\n",
293 |        "  u'title': u'A Man Who Flies Around the World for Free',\n",
294 |        "  u'type': u'story',\n",
295 |        "  u'url': u'http://www.rollingstone.com/culture/features/ben-schlappig-airlines-fly-free-20150720'},\n",
296 |        " {u'by': u'monkeypod',\n",
297 |        "  u'descendants': 86,\n",
298 |        "  u'id': 9918273,\n",
299 |        "  u'kids': [9918498,\n",
300 |        "   9919239,\n",
301 |        "   9919056,\n",
302 |        "   9918603,\n",
303 |        "   9918897,\n",
304 |        "   9918632,\n",
305 |        "   9919476,\n",
306 |        "   9918479,\n",
307 |        "   9918826,\n",
308 |        "   9919160,\n",
309 |        "   9918695,\n",
310 |        "   9918751,\n",
311 |        "   9918657,\n",
312 |        "   9919303,\n",
313 |        "   9920643,\n",
314 |        "   9919506,\n",
315 |        "   9918761],\n",
316 |        "  u'score': 154,\n",
317 |        "  u'text': u'',\n",
318 |        "  u'time': 1437419219,\n",
319 |        "  u'title': u'Coin Card Teardown',\n",
320 |        "  u'type': u'story',\n",
321 |        "  u'url': u'http://www.bitsofcents.com/post/124593977646/coin-card-teardown'},\n",
322 |        " {u'by': u'_jomo',\n",
323 |        "  u'descendants': 37,\n",
324 |        "  u'id': 9918862,\n",
325 |        "  u'kids': [9920870,\n",
326 |        "   9919229,\n",
327 |        "   9919270,\n",
328 |        "   9919307,\n",
329 |        "   9919272,\n",
330 |        "   9920765,\n",
331 |        "   9920038,\n",
332 |        "   9920522,\n",
333 |        "   9919992,\n",
334 |        "   9919210,\n",
335 |        "   9919162,\n",
336 |        "   9920325,\n",
337 |        "   9920337,\n",
338 |        "   9920350],\n",
339 |        "  u'score': 116,\n",
340 |        "  u'text': u'',\n",
341 |        "  u'time': 1437424405,\n",
342 |        "  u'title': u'Google, the Wassenaar Arrangement, and vulnerability research',\n",
343 |        "  u'type': u'story',\n",
344 |        "  u'url': u'https://googleonlinesecurity.blogspot.com/2015/07/google-wassenaar-arrangement-and.html'},\n",
345 |        " {u'by': u'TheRedBarron',\n",
346 |        "  u'descendants': 1,\n",
347 |        "  u'id': 9920756,\n",
348 |        "  u'kids': [9920813],\n",
349 |        "  u'score': 5,\n",
350 |        "  u'text': u'',\n",
351 |        "  u'time': 1437458633,\n",
352 |        "  u'title': u'Remember the Tetris Lamp? Here is proof that two sets can create a rectangle',\n",
353 |        "  u'type': u'story',\n",
354 |        "  u'url': u'https://barronwasteland.wordpress.com/2015/07/21/527/'},\n",
355 |        " {u'by': u'moyayo',\n",
356 |        "  u'id': 9920815,\n",
357 |        "  u'score': 1,\n",
358 |        "  u'text': u'',\n",
359 |        "  u'time': 1437460741,\n",
360 |        "  u'title': u'SmartSpot (YC W15) Is Hiring Full Time iOS Dev Passionate about Fitness',\n",
361 |        "  u'type': u'job',\n",
362 |        "  u'url': u'https://smartspot.wufoo.com/forms/smartspot-hiring/'}]"
363 |       ]
364 |      },
365 |      "execution_count": 7,
366 |      "metadata": {},
367 |      "output_type": "execute_result"
368 |     }
369 |    ],
370 |    "source": [
371 |     "get_top_stories()"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "metadata": {
378 |     "collapsed": true
379 |    },
380 |    "outputs": [],
381 |    "source": []
382 |   }
383 |  ],
384 |  "metadata": {
385 |   "kernelspec": {
386 |    "display_name": "Python 2",
387 |    "language": "python",
388 |    "name": "python2"
389 |   },
390 |   "language_info": {
391 |    "codemirror_mode": {
392 |     "name": "ipython",
393 |     "version": 2
394 |    },
395 |    "file_extension": ".py",
396 |    "mimetype": "text/x-python",
397 |    "name": "python",
398 |    "nbconvert_exporter": "python",
399 |    "pygments_lexer": "ipython2",
400 |    "version": "2.7.6"
401 |   }
402 |  },
403 |  "nbformat": 4,
404 |  "nbformat_minor": 0
405 | }
406 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 05 - Regex.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import requests\n",
 12 |     "import re\n",
 13 |     "from multiprocessing import Process, Manager\n",
 14 |     "\n",
 15 |     "MATCHING = (\n",
 16 |     "    ('Python', '(p|P)ython'),\n",
 17 |     "    ('Ruby', '(r|R)uby'),\n",
 18 |     "    ('JavaScript', 'js|(J|j)ava(s|S)cript'),\n",
 19 |     "    ('NodeJS', 'node(\\.?)(?:\\js|JS)'),\n",
 20 |     "    ('Java', '(j|J)ava[^(S|s)cript]'),\n",
 21 |     "    ('Objective-C', 'Obj(ective?)(?:\\ |-)(C|c)'),\n",
 22 |     ")"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": 2,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def get_story(story_id, stories):\n",
 34 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 35 |     "    resp = requests.get(url)\n",
 36 |     "    stories.append(resp.json())"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "def get_top_stories():\n",
 48 |     "    manager = Manager()\n",
 49 |     "    stories = manager.list()\n",
 50 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 51 |     "    ids = requests.get(url)\n",
 52 |     "    processes = [Process(target=get_story, args=(sid, stories))\n",
 53 |     "                 for sid in ids.json()[:40]]\n",
 54 |     "    for p in processes:\n",
 55 |     "        p.start()\n",
 56 |     "    for p in processes:\n",
 57 |     "        p.join()\n",
 58 |     "    return stories"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "def count_languages():\n",
 70 |     "    stories = get_top_stories()\n",
 71 |     "    final_tallies = {}\n",
 72 |     "    for s in stories:\n",
 73 |     "        long_string = u'{} {}'.format(s.get('title'), s.get('url'))\n",
 74 |     "        for language, regex in dict(MATCHING).items():\n",
 75 |     "            if re.search(regex, long_string):\n",
 76 |     "                if language not in final_tallies.keys():\n",
 77 |     "                    final_tallies[language] = {\n",
 78 |     "                        'score': s.get('score'),\n",
 79 |     "                        'descendants': s.get('descendants')}\n",
 80 |     "                else:\n",
 81 |     "                    final_tallies[language]['score'] += s.get('score')\n",
 82 |     "                    final_tallies[language][\n",
 83 |     "                        'descendants'] += s.get('descendants')\n",
 84 |     "    return final_tallies"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": 5,
 90 |    "metadata": {
 91 |     "collapsed": false
 92 |    },
 93 |    "outputs": [
 94 |     {
 95 |      "data": {
 96 |       "text/plain": [
 97 |        "{'JavaScript': {'descendants': 54, 'score': 88},\n",
 98 |        " 'Python': {'descendants': 1, 'score': 28},\n",
 99 |        " 'Ruby': {'descendants': 11, 'score': 59}}"
100 |       ]
101 |      },
102 |      "execution_count": 5,
103 |      "metadata": {},
104 |      "output_type": "execute_result"
105 |     }
106 |    ],
107 |    "source": [
108 |     "count_languages()"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {
115 |     "collapsed": true
116 |    },
117 |    "outputs": [],
118 |    "source": []
119 |   }
120 |  ],
121 |  "metadata": {
122 |   "kernelspec": {
123 |    "display_name": "Python 2",
124 |    "language": "python",
125 |    "name": "python2"
126 |   },
127 |   "language_info": {
128 |    "codemirror_mode": {
129 |     "name": "ipython",
130 |     "version": 2
131 |    },
132 |    "file_extension": ".py",
133 |    "mimetype": "text/x-python",
134 |    "name": "python",
135 |    "nbconvert_exporter": "python",
136 |    "pygments_lexer": "ipython2",
137 |    "version": "2.7.6"
138 |   }
139 |  },
140 |  "nbformat": 4,
141 |  "nbformat_minor": 0
142 | }
143 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 07 - Calculate.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import calculate\n",
 12 |     "import requests\n",
 13 |     "from multiprocessing import Process, Manager\n",
 14 |     "from decimal import Decimal"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "def get_story(story_id, stories):\n",
 26 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 27 |     "    resp = requests.get(url)\n",
 28 |     "    story_data = resp.json()\n",
 29 |     "    user_data = get_user(story_data.get('by'))\n",
 30 |     "    story_data['user_karma'] = user_data.get('karma') or 0\n",
 31 |     "    stories.append(story_data)\n",
 32 |     "    return stories"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": 3,
 38 |    "metadata": {
 39 |     "collapsed": true
 40 |    },
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "def get_user(user_id):\n",
 44 |     "    url = 'https://hacker-news.firebaseio.com/v0/user/%s.json' % user_id\n",
 45 |     "    resp = requests.get(url)\n",
 46 |     "    return resp.json()"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 4,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "def get_top_stories_with_user_karma():\n",
 58 |     "    manager = Manager()\n",
 59 |     "    stories = manager.list()\n",
 60 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 61 |     "    ids = requests.get(url)\n",
 62 |     "    processes = [Process(target=get_story, args=(sid, stories))\n",
 63 |     "                 for sid in ids.json()[:40]]\n",
 64 |     "    for p in processes:\n",
 65 |     "        p.start()\n",
 66 |     "    for p in processes:\n",
 67 |     "        p.join()\n",
 68 |     "    return stories"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "metadata": {
 75 |     "collapsed": true
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "def calculate_summary_karma():\n",
 80 |     "    stories = get_top_stories_with_user_karma()\n",
 81 |     "    return calculate.summary_stats([\n",
 82 |     "        Decimal(s.get('score')) for s in stories])"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 6,
 88 |    "metadata": {
 89 |     "collapsed": true
 90 |    },
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "def pearsons_karma():\n",
 94 |     "    stories = get_top_stories_with_user_karma()\n",
 95 |     "    user_karma = [Decimal(s.get('user_karma')) for s in stories]\n",
 96 |     "    story_karma = [Decimal(s.get('score')) for s in stories]\n",
 97 |     "    return calculate.pearson(user_karma, story_karma)"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": 7,
103 |    "metadata": {
104 |     "collapsed": false
105 |    },
106 |    "outputs": [
107 |     {
108 |      "data": {
109 |       "text/plain": [
110 |        "-0.005659751973886795"
111 |       ]
112 |      },
113 |      "execution_count": 7,
114 |      "metadata": {},
115 |      "output_type": "execute_result"
116 |     }
117 |    ],
118 |    "source": [
119 |     "pearsons_karma()"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 8,
125 |    "metadata": {
126 |     "collapsed": false
127 |    },
128 |    "outputs": [
129 |     {
130 |      "name": "stdout",
131 |      "output_type": "stream",
132 |      "text": [
133 |       "| Statistic             | Value         |\n",
134 |       "-----------------------------------------\n",
135 |       "| n                     | 40            |\n",
136 |       "| mean                  | 110.3         |\n",
137 |       "| median                | 32.0          |\n",
138 |       "| mode                  | None          |\n",
139 |       "| maximum               | 934           |\n",
140 |       "| minimum               | 1             |\n",
141 |       "| range                 | 933.0         |\n",
142 |       "| standard deviation    | 211.448244258 |\n",
143 |       "| variation coefficient | 1.91702850642 |\n",
144 |       "\n"
145 |      ]
146 |     }
147 |    ],
148 |    "source": [
149 |     "calculate_summary_karma()"
150 |    ]
151 |   },
152 |   {
153 |    "cell_type": "code",
154 |    "execution_count": null,
155 |    "metadata": {
156 |     "collapsed": true
157 |    },
158 |    "outputs": [],
159 |    "source": []
160 |   }
161 |  ],
162 |  "metadata": {
163 |   "kernelspec": {
164 |    "display_name": "Python 2",
165 |    "language": "python",
166 |    "name": "python2"
167 |   },
168 |   "language_info": {
169 |    "codemirror_mode": {
170 |     "name": "ipython",
171 |     "version": 2
172 |    },
173 |    "file_extension": ".py",
174 |    "mimetype": "text/x-python",
175 |    "name": "python",
176 |    "nbconvert_exporter": "python",
177 |    "pygments_lexer": "ipython2",
178 |    "version": "2.7.6"
179 |   }
180 |  },
181 |  "nbformat": 4,
182 |  "nbformat_minor": 0
183 | }
184 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 08 - Journalism Library.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 15,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import journalism\n",
 12 |     "import logging\n",
 13 |     "from csv import reader\n",
 14 |     "\n",
 15 |     "text_type = journalism.TextType()\n",
 16 |     "number_type = journalism.NumberType()\n",
 17 |     "date_type = journalism.DateType()"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 16,
 23 |    "metadata": {
 24 |     "collapsed": true
 25 |    },
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "def get_table(datarows, types, titles):\n",
 29 |     "    try:\n",
 30 |     "        table = journalism.Table(datarows, types, titles)\n",
 31 |     "        return table\n",
 32 |     "    except:\n",
 33 |     "        logging.exception('problem loading table')\n",
 34 |     "    return None"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 17,
 40 |    "metadata": {
 41 |     "collapsed": true
 42 |    },
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "def clean_text(row):\n",
 46 |     "    new_row = []\n",
 47 |     "    for item in row:\n",
 48 |     "        if isinstance(item, (str, unicode)):\n",
 49 |     "            item = item.decode('utf-8', 'replace')\n",
 50 |     "        if item in [u'--', u'n/a']:\n",
 51 |     "            item = None\n",
 52 |     "        new_row.append(item)\n",
 53 |     "\n",
 54 |     "    return new_row"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": 18,
 60 |    "metadata": {
 61 |     "collapsed": true
 62 |    },
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "def clean_rows(all_rows):\n",
 66 |     "    new_data = []\n",
 67 |     "    for row in all_rows:\n",
 68 |     "        new_data.append(clean_text(row))\n",
 69 |     "    return new_data"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 22,
 75 |    "metadata": {
 76 |     "collapsed": true
 77 |    },
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "def load_imf_data():\n",
 81 |     "    rdr = reader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n",
 82 |     "    all_rows = [r for r in rdr if len(r) > 1]\n",
 83 |     "    titles = all_rows.pop(0)\n",
 84 |     "    cleaned_rows = clean_rows(all_rows)\n",
 85 |     "    types = [text_type, text_type, text_type, text_type, text_type,\n",
 86 |     "             number_type, number_type, number_type, number_type,\n",
 87 |     "             number_type, number_type, number_type, number_type,\n",
 88 |     "             date_type]\n",
 89 |     "    return get_table(cleaned_rows, types, titles)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 23,
 95 |    "metadata": {
 96 |     "collapsed": true
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "def add_last_percent_change():\n",
101 |     "    table = load_imf_data()\n",
102 |     "    table = table.where(lambda r: r.get('2015') is not\n",
103 |     "                        None and r.get('2014') is not None)\n",
104 |     "    table = table.where(lambda r: 'Unemployment' in\n",
105 |     "                        r.get('Subject Descriptor'))\n",
106 |     "    table = table.percent_change('2014', '2015', 'last_change')\n",
107 |     "    return table"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 24,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "table = add_last_percent_change()"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 25,
124 |    "metadata": {
125 |     "collapsed": false
126 |    },
127 |    "outputs": [
128 |     {
129 |      "data": {
130 |       "text/plain": [
131 |        "Decimal('-2.595430263422718959419383707')"
132 |       ]
133 |      },
134 |      "execution_count": 25,
135 |      "metadata": {},
136 |      "output_type": "execute_result"
137 |     }
138 |    ],
139 |    "source": [
140 |     "table.columns['last_change'].mean()"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 26,
146 |    "metadata": {
147 |     "collapsed": false
148 |    },
149 |    "outputs": [
150 |     {
151 |      "data": {
152 |       "text/plain": [
153 |        "Decimal('8.016756756756756756756756757')"
154 |       ]
155 |      },
156 |      "execution_count": 26,
157 |      "metadata": {},
158 |      "output_type": "execute_result"
159 |     }
160 |    ],
161 |    "source": [
162 |     "table.columns['2015'].mean()"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": []
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "kernelspec": {
177 |    "display_name": "Python 2",
178 |    "language": "python",
179 |    "name": "python2"
180 |   },
181 |   "language_info": {
182 |    "codemirror_mode": {
183 |     "name": "ipython",
184 |     "version": 2
185 |    },
186 |    "file_extension": ".py",
187 |    "mimetype": "text/x-python",
188 |    "name": "python",
189 |    "nbconvert_exporter": "python",
190 |    "pygments_lexer": "ipython2",
191 |    "version": "2.7.6"
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 0
196 | }
197 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 09 - Matplotlib.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 8,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "%matplotlib inline"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "from csv import DictReader\n",
 23 |     "from decimal import Decimal\n",
 24 |     "import calculate\n",
 25 |     "import pylab"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 6,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "def load_imf_unemployment():\n",
 37 |     "    rdr = DictReader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n",
 38 |     "    return [r for r in rdr if r.get('Subject Descriptor') and\n",
 39 |     "            'Unemployment' in r.get('Subject Descriptor')]"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 3,
 45 |    "metadata": {
 46 |     "collapsed": true
 47 |    },
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "def get_avg_unemployment(data, start_year=2013, end_year=2015):\n",
 51 |     "    avgs = {}\n",
 52 |     "    while start_year <= end_year:\n",
 53 |     "        avg = calculate.mean([\n",
 54 |     "            Decimal(rate.get(str(start_year))) for\n",
 55 |     "            rate in data if rate.get(str(start_year))])\n",
 56 |     "        avgs[str(start_year)] = avg\n",
 57 |     "        start_year += 1\n",
 58 |     "    return avgs"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": 4,
 64 |    "metadata": {
 65 |     "collapsed": true
 66 |    },
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "def chart_unemployment():\n",
 70 |     "    imf_data = load_imf_unemployment()\n",
 71 |     "    averages = get_avg_unemployment(imf_data)\n",
 72 |     "    pylab.plot(averages.keys(), averages.values())\n",
 73 |     "    pylab.ylabel('Average Unemployment')\n",
 74 |     "    pylab.xlabel('Years')\n",
 75 |     "    pylab.title('Average Unemployment Over Time')\n",
 76 |     "    pylab.ylim([0, sorted(averages.values(), reverse=True)[0] + 1])\n",
 77 |     "    pylab.show()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 9,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "data": {
 89 |       "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEZCAYAAACU3p4jAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAG6RJREFUeJzt3XucJFV58PHf3mBv7BKBAMttdQUVkYsXQIXYSEJAN+AF\nuQgBwah5o2/QIIkkqGMSfeU1iqIxiiA3AQGRICoIKIMXWFQuu8hNxF2XOy6aZRd2F9iZ/PGcpmt6\ne3qqZrqmZ6Z+38+nP9PVXdXndE31c049daoKJEmSJEmSJEmSJEmSJEmSJGnYeoDzu12Jivkv4JRu\nV0Lqpl7gD8BGXa5HJ5wD/FvTa/OBPmDyaFdmCB9n/AX8PuBFQ8yzLXABsAJYDdwMvLnketVdBaxK\nj2eAdZnpL49SHdTGWPsRVs18YE/gceDgEj5/agmf2U5/eowHk7pdgWFqV+8XAD8F1gI7A5sBpwEX\nAm8voS5TmqYPAjZJjwuAUzPTf1dC+SrIgN9dxwDXET3NY9NrGwP/A7w8M98WwNPA5ml6IXA78Efg\nZ8ArMvMuA/4RWEL0rKYAHwF+AzwJ3Am8JTP/ZOCzwO+B3wIfYGCPfC5wFvAw8CDRg2+33QwVSJcB\nJwKL0/f8ZvrOdUN9tw9nvttZwJZEz3IlcC2waZp3fvoe7wEeSvU/sU29DibWzR+B64GXptdPAr7V\nNO/pwOfT815infws1ek7xP/pglSnnwM7ZJZ9aarnE8A9wDsy750D/CfwXeJ/tYhGj/7H6e/iVE52\nuboPpeXeTXQi1hHr95PE/xgirfKZpuWuSMsCzAMuS8v/Fvi/mfl6iHVxfvpux9Je87ZwDo09wBqx\nPZ2UynqY2C7fBPyaWD8fafqs+na8ArgY+JMhypfGlN8ARwE7ErvAW6TXzwL+PTPf+4Hvp+d7AI8B\nryF+BMcAS4Fp6f1lwK3ANjQC6aHAVun5YcSu/pZp+m+JQDePCJbXAetpBPXLiSAxI9XvZuC9g3yf\nsxk6pbOUCGRbET/Yu4D35fxuS4EbUz3mpXlvBXZL3/WHwMeayr0g1X0XIrDsn97voZHS2Smtk/2J\nBvIk4D5iD2nr9N7cNO/UVO4eabqXCFAvBOYQ6/I+4I3ps84Fvp7mnQU8QATKycDuREP7svT+OUQw\ne3Va9hvARZl1OVRKZxGRqmr2wrTsjsC+wPLMe39CdCa2SnW6hcizT03L3Q8ckObtIbbT+t7o9DZ1\nabUtnA38a3peA55NZU0B/ob47hcQ62nnVK96Y3kC8b+fR2wPXyH2XKRxYR9gDbG7C9Gr/WB6vj/R\nGNT9DDg6Pf8vGj+aunuIHzJEUHzXEGXfBvxVev4johdctz+NAL0lkR7I/rCPTMu0kjfgvzPz/qnE\nd4J83+3IzHvfInrEdR8gGqhsuTs1lXVmet5DI+B/lOgJ100iep9/lqavIgISxB7InZl5rwdOzkz/\nB/C9zPRCYn0DHE6jp173VRqN1DnAGZn3DgLuzkwPFfDvo3VjPD0t+1riu/2Oxjp9D9HIA+yV3ss6\nmUaD1UM0cHkMFvCzPfynaewFbJLq+JrM/L+k0bjcTTSidVsTjY9ZigJcWd1zLHANsXsOcCmNXeRe\nYCaR359P9GDrgWwHIjXxx8xjW6LnU/dAU1nHEEGnPv8uNNJDWzfN/2Dm+Q5Eb+qRzLJfobEn0uw5\nGr3xumnED7kv89qjmedrgNkFvttjTctmp9dmPqsu+92WN31W3TwG9nr703LbpOlzaTS4RwPnNS3f\nXIfHB6nTDkRQzX6/d9LY2+pnw+/X/H3aWUHr77d15v1+onGrN5zvJHrV9frNa6rfycCfZj4ru32M\n1BM0jvmsSX8H+/47EL+Ber3uIra3LVFuo31QT2EGkVqZTARTiJTEpsCuRI76EuJH+ThwJfBUmm85\nkZP9VJvPzx443YHoNb4RuCm9dxuNntUjwHaZ+bPPHyDywJsxMGAPZjkDjz1ApAWaG6DB6pvnuzUb\n6pjB9sC9mecPtZjnIQYeK5hErIf6vFcQo0x2IUa8fLhNee0OWi8HbqCRIum064C3AZ9oqsdhqez7\n0vRFRGfjVKJTcUimfksZuFeU1YmD8sNdfjlwHLENa5js4XfHW4jeycuI3vtu6flPiN44RH7yCKIH\nls1Vfo3Iu+9JBKZZRBAarCc4i/iRrSD+38cRgavuEiI/Ws/h/xONH+UjRGD4HLHLPRlYQCPV0eyy\nVJe/IPKy84gc7UWDzF9XD9pFv1sepxAN7MuJVNfFLea5NJXzRmKP5ESiZ35jen8N8d0uJI5hNPdy\nJw3yvNn3iGB6dCpnGpHCqB8gHqrxeoxY/4M5jcZB9i2JVM6RwD8TxyXqbie2hzOBq4kDvRAHmFcR\nB/1nEP/DXYhjCnnql9Vq3kkFPyPrK0RHYPs0vQXljGyb0Az43XEMkRd9kOjBP078mL9EBPjJxI9v\nNbE7flVm2VuIvOuXiPH796XPG6zndBcxQuMmIpWyCzF0r+5rRFBfkj77e8RB23qP/hjiHIG7UnmX\n0jgA3KqsI4H/R+yu35jK/cSga2Jgr7Hod6PpvVY90BuI4yHXEaNTrmsx771EEP4icRD1zcQxjucy\nn3Muse5ajd0fqg716VVE7/4IYu/hEWJdbZRjWYgc+rlESuPQFvX4A3FsaDrxv1hBHBc6mvi/ZV1I\nNHDZzkQfccxhd2KEzu+JvcM5beo3mMG+S/O6an5/MF8gRkBdQzRQNxEdA0kjcBAx0me8m09nT/ja\njkirjWRvQ5K6ajox9nkqcZByEZHCGe/m07mAP5kYd3/mUDNK0lg2g0gfPUmklc5iYvRi5zPwfILh\nmkWk1u6gMWpHkiRJkqQu6+oFpHbbbbf+xYsXd7MKkjQeLSZGUxXS7SsG9vf3j5eLK459PT099PT0\ndLsaE4brs7Ncn50zadIkGEb8dhy+JFWEAV+SKsKAP4HUarVuV2FCcX12luuz+8zhS9I4Yw5fktSW\nAV+SKsKAL0kVYcCXpIow4EtSRRjwJakiDPiSVBEGfEmqCAO+JFWEAV+SKsKAL0kVYcCXpIow4EtS\nRRjwJakiDPiSVBEGfEmqiKndrsCsWTB3LsyZE3/rjyLT06fDpG7fykWSxrhuh8n+Vav6WbkSVq6E\nJ5+k5fOhpvv6RtZgzJ0Ls2fDZPd3JI0Dw73jVdcDfiducbhuXbEGotX0009H0M/TQLR7b9q0DqwV\nSWqj0gG/E9avh1Wr8jUQ7d7baKOR723MmGGKStLgDPhjQH9/7CmMdG/juefy71EMNj1njikqaaIy\n4E8gzzwz+B5F3sZj9erOHBDfaKNurw1JzQz4GqCvL1JUI93bmDp15CmqmTNNUUmdZMBXx/X3w9q1\n+RuIwd5bt64zKaopU7q9RqSxwYCvMevZZyP4j2RvY9Wq2FMo2mA0v7fxxt1eG9LIGfA1ofX1wVNP\njSw9tXJlpJbyNhazZ8dxkMEeM2d6YFzdYcCXcqinqPI0DqtXRyMz2GPNmjjLu12jMJLH1K6fB6+x\nyoAvjbK+vgj67RqF5sfTT+efd8qU9nsXI2lMNt7YA+njmQFfmkD6+2N4bpHGpMhj/fqRNxqDPWbM\nMNVVtrEa8E8Gjgb6gDuA44B1mfcN+FIXPPfc8BuLofZS1q4dmOrqdMNiqmtsBvz5wI+AlxFB/mLg\n+8C5mXkM+NIEM5xUV5HH1Knl7JnMnDl+Ul3DDfhltpVPAs8CM4H16e9DJZYnaQyYPLkRRDutvz/O\n6yjSQDz2WP69lIme6ioz4P8B+CywHFgD/AC4rsTyJE1wkyZFumj6dNhss85//rPPFjuwvnIlPPxw\nvnnXro2g34nGY7jKDPgLgA8SqZ2VwKXAUcAFJZYpScM2bVrjPIxO6+sr1pg89RSsWNH69eEqM+C/\nGrgReCJNfxt4HU0Bv6en5/nntVqNWq1WYpUkqTsmT46T+WbPLr5sb28vvb29z08vWjS8OpR5eGI3\nIri/BlgLnAP8HPjPzDwetJWkgoZ70LbMQwiLgfOAXwJL0mtnlFieJKmNbg9AsocvSQWNxR6+JGkM\nMeBLUkUY8CWpIgz4klQRBnxJqggDviRVhAFfkirCgC9JFZEn4L8j52uSpDEsz5latwF75HhtODzT\nVpIKKuMGKAcBbwK2AU7PfPgmxI1NJEnjSLuA/zBwC3BI+lsP+E8CHyq5XpKkDsuzSzCN8nr0pnQk\nqaAy72m7F/Bx4s5V9fn7gRcVLUyS1D15Woh7iVsV3krcjLxuRQfKt4cvSQWV2cP/H+Cqoh8sSRpb\n8rQQnwamEPekXZd5/dYOlG8PX5IKGm4PP88CvUTOvtl+RQtrwYAvSQWVGfDLZMCXpILKvMXhVsBZ\nwNVpemfg3UULkiR1V56Afw5wDTAvTd+HJ15J0riTJ+BvDlxMY0jms8BzpdVIklSKPAF/NbBZZnpv\nYGU51ZEklSXPOPwTgSuJM2tvBLYADi2zUpKkzst7lHcasFOa/146d20dR+lIUkFlDsucCryZDa+l\n87mihbVgwJekgsq8tMKVwBrgDqCvaAGSpLEhT8DfBti17IpIksqVZ5TONcBfll0RSVK58vTwbwQu\nJxqH+sHafmBOWZWSJHVenqT/MuBg4Fd0PofvQVtJKqjMa+ksB+7EA7aSNK7lSeksBa4nboLyTHqt\nU8MyJUmjJG/AXwpslB6SpHEoTw5oV2BJSeWbw5ekgso80/anwMbA2cAFdPbCaQZ8SSqozIO2+wBH\nAdsT97G9CDigaEGSpO4q0kJMBd4CnE708icD/wxc1maZTYEzgZcTB3qPBxZl3reHL0kFlZnS2Q14\nF7AQuJYI4LcSd8BaRPT8B3MucAPwdaLBmMXAlJABX5IKKjPg30Dc0/ZS4iJqWccA5w2y3FzgNuI6\n+oMx4EtSQWUGfIiDtjum53mvh7878FXgLmIv4RbgBODpzDwGfEkqqMzLI9eI1Mzv0vT2wLFEz3+o\nz34l8AHgF8DngY8AH8vO1NPT0yioVqNWq+WokiRVR29vL729vSP+nDwtxK3AkUTPHuLOV98kgnk7\nWwE3AS9M0/sQAX9hZh57+JJUUJnDMqfSCPYAvybfnsGjwANEAwHw58Q1eSRJXZCnhTgbWA98I81/\nFNFQHJ9j2d2IUT0bAfcDx+EoHUkakTIP2k4H3g+8Pk3/BPgysK5oYS0Y8CWpoLJH6ZTFgC9JBZUx\nSueONu/1431uJWlcaddCzB9i2WUdKN8eviQVVHZKZytgL+KuV78gRuB0ggFfkgoqc1jm3wA/B94G\nHArcDLy7aEGSpO7K00L8Gngt8ESa3ow4oWqnQZfIzx6+JBVUZg9/BbA6M706vSZJGkfytBDnA7sA\nV6TpQ4hbHi5h5Dczt4cvSQWVefG0+9OjHpmvSM9nFy1MktQ9nnglSeNMmT381xC3Mpyfmd8TryRp\nnMk7SufDwK+Icfh1yzpQvj18SSqozB7+74HvFP1gSdLYkqeFOAA4HLgOeCa91g98uwPl28OXpILK\n7OEfC7wkzZtN6XQi4EuSRkmeFuJe4KU0hmV2kj18SSqozDNtbwR2LvrBkqSxJU8LcQ+wAFhK4y5X\nnRqWaQ9fkgoq8/LI8wd5fVnRwlow4EtSQWWmdJYB2wH7pedPDacgSVJ35QncPcCriJE6OwHbAJfQ\nuKn5SNjDl6SCyuzhv5W4QuZTafohYJOiBUmSuitPwF/HwPH3s0qqiySpRHkC/qXAV4FNgfcCPwTO\nLLNSkqTOy5sDOiA9AH4AXNuh8s3hS1JBZQ7LLJMBX5IKKvOg7duB+4AngVXp8WTRgiRJ3ZWnhbgf\nWAjcXUL59vAlqaAye/iPUk6wlySNojyXR/4lcDHw33T+eviSpFGSJ+DPBdbQGKVTZ8CXpHHEUTqS\nNM6UccerLzZN9xP3t70e+GnRgiRJ3dUu4N/Chne52gz4DHHxtNPKqpQkqfOGk9KZAdwE7N6B8k3p\nSFJBZQ7LbLaGcu5vK0kqUZ5ROlnTgKOBB0uoiySpRO0C/mo27MmvAW4A3legjCnEWP4Hgb8qVDtJ\nUse0C/izO1TGCcBdeNMUSeqq4eTwi9gWeBNx/fxuj/mXpEorO+CfBpzEwDtmSZK6oOhB2yIWAo8D\ntwG1wWbq6el5/nmtVqNWG3RWSaqk3t5eent7R/w5edMs+wIvBs4GtiDy+0uHWOZTwF8DzwHTgTnA\nZcAxmXkchy9JBZV5x6se4FXAS4CdgG2IM21fX6CcNwAfZsNROgZ8SSqozBOv3gocAjyVph9ieCNu\njOyS1EV5cvjrGHjQddYwyrkhPSRJXZKnh38p8FVgU+C9wA+JYZaSpHEkbw7oABo3QPkBcG2HyjeH\nL0kFlXnQtkwGfEkqqMyDtqtaPB4ELgdeVLRASVJ35Dlo+wXgAeCiNH0EsIA4oerrtDmpSpI0duTZ\nJVgC7Nr02u3EDVAWA7uNoHxTOpJUUJkpnaeBw9O8k4HDgLXpPaO1JI0TeVqIBURaZ+80vQj4IHEC\n1qsY2Q3N7eFLUkGO0pGkihhuwM9z0HYG8G5gZ+IiaHXHFy1MktQ9eXL45wNbAgcSl0fYjrj9oSRp\nHMmzS1AfkVMfrTONyNvv1YHyTelIUkFljtJ5Jv1dCbyCuKbOFkULkiR1V54c/hnAC4BTgO8QNz/5\naJmVkiR13lABfzJxKYU/EPn7F5ZeI0lSKfLkgG4hxtuXwRy+JBVU5jj8TwMrgItp3PUKotc/UgZ8\nSSqozIC/jNaXUOhEeseAL0kFeaatJFVEmcMyZxGjcr6WpncEFhYtSJLUXXkC/tnEWPzXpemHgU+W\nViNJUinyBPwFwKk0TsB6qs28kqQxKk/AX0dcQK1uQXpNkjSO5DnTtge4GtgWuBB4PfCu8qokSSpD\n3qO8m9O4AcrNwO87VL6jdCSpoDKvh38lcQPzKzB/L0njVp4c/meBfYG7gG8BhzLwRiiSpHGgyC7B\nVGA/4D3EzVDmdKB8UzqSVFCZKR2IUToHA4cBrwTOLVqQJKm78rQQlxB3t7oa+CZxmeS+DpVvD1+S\nCirzWjoHAtcC69P0vsARwPuLFtaCAV+SCiozpXM1kcY5kkjpLAUuK1qQJKm72gX8lxBB/nBi3P2l\nRItSK79akqROa7dL0Ad8F/gAsDy9tpTO3ubQlI4kFVTG5ZHfBqwBfgx8Bdh/OAVIksaGPAF8NnAI\nkd7ZDzgPuBy4pgPl28OXpIJG645XLyDOtD0CeGPRwlow4EtSQWP1FofbEXsEf0rcF/cM4PTM+wZ8\nSSporAb8rdLjdiI1dAvwFuDu9L4BX5IKKvOetiPxKBHsAVYTgX5eyWVKklooO+BnzQf2IK6nL0ka\nZXkvnjZSs4lLK59A9PSf19PT8/zzWq1GrVYbpSpJ0vjQ29tLb2/viD9nNMbVTyNO4LoK+HzTe+bw\nJamgsXrQdhJxKeUngA+1eN+AL0kFjdWAvw9xpu4SYlgmwMnEBdnAgC9JhY3VgD8UA74kFTRWh2VK\nksYIA74kVYQBX5IqwoAvSRVhwJekijDgS1JFGPAlqSIM+JJUEQZ8SaoIA74kVYQBX5IqwoAvSRVh\nwJekijDgS1JFGPAlqSIM+JJUEQZ8SaoIA74kVYQBX5IqwoAvSRVhwJekijDgS1JFGPAlqSIM+JJU\nEQZ8SaoIA74kVYQBX5IqwoAvSRVhwJekijDgS1JFGPAlqSIM+JJUEQZ8SaoIA74kVYQBX5IqwoAv\nSRVRdsA/ELgHuA/4p5LLkiS1UWbAnwJ8iQj6OwNHAi8rsbzK6+3t7XYVJhTXZ2e5PruvzIC/J/Ab\nYBnwLPBN4JASy6s8f1Cd5frsLNdn95UZ8LcBHshMP5hekyR1QZkBv7/Ez5YkFTSpxM/eG+ghcvgA\nJwN9wKmZeX4DLCixDpI0Ed0PvLjblciaSlRqPrARcDsetJWkCesg4F6iJ39yl+siSZIkqVPynIB1\nenp/MbDHKNVrvBpqfdaAlcBt6XHKqNVs/Pk68BhwR5t53DbzG2p91nDbzGs74HrgTuBXwN8PMt+Y\n2j6nECmd+cA0Wufy3wR8Pz3fC1g0WpUbh/KszxrwnVGt1fi1L/EjGSxAuW0WM9T6rOG2mddWwO7p\n+WwiPT6i2Dka19LJcwLWwcC56fnNwKbAlqNQt/Eo7wltZY7Amkh+Avyxzftum8UMtT7BbTOvR4kO\nHcBq4G5gXtM8hbbP0Qj4eU7AajXPtiXXa7zKsz77gdcRu3jfJy5toeFx2+wst83hmU/sOd3c9Hqh\n7XNqx6u1obwnYDW3+p641Vqe9XIrkf97mhgp9d/ATmVWaoJz2+wct83iZgPfAk4gevrNcm+fo9HD\nf4j4B9dtR7RC7ebZNr2mDeVZn6uIHxTAVUSu/wXlV21CctvsLLfNYqYBlwHfIBrHZmNu+8xzAlb2\nwMPeeGCsnTzrc0sarf6eRL5fg5tPvoO2bpv5zGfw9em2md8k4DzgtDbzjMnts9UJWO9Lj7ovpfcX\nA68c1dqNP0Otz/cTw7huB24kNgS1dhHwMPAMkQs9HrfNkRhqfbpt5rcPcTma22kMYz0It09JkiRJ\nkiRJkiRJkiRNFJ8hLmWwGPg2MHeQ+Qa7kOE7iIuerWfgyJk9aYy4WQIcnqMuZxEjdZYAl7epiyRp\nCDXg7KbX/oLGiamfTo9m7S5k+FLiDOLrGRjwZ2Q+dytgRfqcdjbJPP8sI7jC6GicaSuNBZOIC3sd\nmHntHcTZnqq2VpciuJYYAw9x/ZpW16dpdyHDe4Bft1hmTeZzZxCXil6fpg8gzk24BbgEmJVeX5X+\nTkrLrBji+wzKgK+q6Af+FvgcsDFxfZJPAn83zM8bjetQaXQMdfXO42mczZqV50KGrexJpHvuBP4h\nvbY58C/A/sCriKD/D5llzgYeAXYFzsxRRksGfFXJncCVwEeAjxHXJzmF6MHdSlxqFmIX/cfEj+4W\n4LXp9Rqxl3AFcbboTOB7xK78HcBh5X8FddAiIpf+NeJ/X8+tH5CZ51+Is4YvbLH8cC+i93Pg5USq\n5wtETn5v4sqhN6Y6HANsn1nmOOLSyEtSnSTlMJPY3V4CfAo4Kr2+KXG5ipnEbvPG6fUdgV+k5zXi\naoU7pOm3A2dkPntOWZVWqd7Ahjl8gHcBPwOmD7Lc3sDVmemT2fAOdM05/GY/BF4NLKR1o9Lsz4Dv\n5phPUvIJ4CQikN9Bo2e3DHgJ0eM6n2gUbqNxSdoa8KPM5+wILCUO6O1TfrVVkhobBvwDiT3Czdss\nl+dChtcTKZq6+TTSgTsAy4mOwhbA74AF6b1ZxPYF8OL0dxLwH8C/tf02kgb4OHAi8EsaP6qsHuD/\np+dTiANyEIHhyqZ5NyX2EnqBj3a2mholbyDuxZt1HxGA652BL6fX5xFpvLpWFzIEeCuR319D3Lmq\nPjjgr4l04G1Eaic7iGC/9Nri9FhIBPmfEp2PJameM4b1LaWKqgf8TwJfzLxevwH052gcMDuOxqiK\nGgMD/tY0dvcXEmOkpTHLkQaqqn5i1/jzRM9pMvBb4uDdl4mbThxD5GhXNy1X9wriBJ0+4sDe/ym9\n1pIkSZIkSZIkSZIkSZIkSZIkSZIkqXP+F5G96ken973AAAAAAElFTkSuQmCC\n",
 90 |       "text/plain": [
 91 |        "<matplotlib.figure.Figure at 0x7fd867e0efd0>"
 92 |       ]
 93 |      },
 94 |      "metadata": {},
 95 |      "output_type": "display_data"
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "chart_unemployment()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": true
107 |    },
108 |    "outputs": [],
109 |    "source": []
110 |   }
111 |  ],
112 |  "metadata": {
113 |   "kernelspec": {
114 |    "display_name": "Python 2",
115 |    "language": "python",
116 |    "name": "python2"
117 |   },
118 |   "language_info": {
119 |    "codemirror_mode": {
120 |     "name": "ipython",
121 |     "version": 2
122 |    },
123 |    "file_extension": ".py",
124 |    "mimetype": "text/x-python",
125 |    "name": "python",
126 |    "nbconvert_exporter": "python",
127 |    "pygments_lexer": "ipython2",
128 |    "version": "2.7.6"
129 |   }
130 |  },
131 |  "nbformat": 4,
132 |  "nbformat_minor": 0
133 | }
134 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 10 - PyGal.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 25,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pygal\n",
 12 |     "from csv import DictReader"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 54,
 18 |    "metadata": {
 19 |     "collapsed": true
 20 |    },
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "%matplotlib inline\n",
 24 |     "#%config InlineBackend.figure_format = 'svg'\n",
 25 |     "from IPython.display import set_matplotlib_formats\n",
 26 |     "set_matplotlib_formats('pdf', 'svg')\n",
 27 |     "\n",
 28 |     "from IPython.display import SVG, HTML, Image"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 71,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "html_pygal = u\"\"\"\n",
 40 |     "<figure>\n",
 41 |     "      <embed type=\"image/svg+xml\" src=\"/test.svg\" />\n",
 42 |     "    </figure>\n",
 43 |     "\"\"\""
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 28,
 49 |    "metadata": {
 50 |     "collapsed": true
 51 |    },
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "def load_imf_unemployment():\n",
 55 |     "    rdr = DictReader(open('../../data/imf_indicators.tsv', 'rb'), delimiter='\\t')\n",
 56 |     "    return [r for r in rdr if r.get('Subject Descriptor') and\n",
 57 |     "            'Unemployment' in r.get('Subject Descriptor')]"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 29,
 63 |    "metadata": {
 64 |     "collapsed": true
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "def load_iso_codes():\n",
 69 |     "    iso_dict = {}\n",
 70 |     "    for row in DictReader(open('../../data/iso-2.csv', 'rb')):\n",
 71 |     "        iso_dict[row.get('Name')] = row.get('Code')\n",
 72 |     "    return iso_dict"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 30,
 78 |    "metadata": {
 79 |     "collapsed": true
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "def load_and_merge_data():\n",
 84 |     "    iso_dict = load_iso_codes()\n",
 85 |     "    imf_data = load_imf_unemployment()\n",
 86 |     "    for d in imf_data:\n",
 87 |     "        d['iso'] = iso_dict[d.get('Country')]\n",
 88 |     "    return imf_data"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 78,
 94 |    "metadata": {
 95 |     "collapsed": true
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "def draw_unemployment():\n",
100 |     "    imf_data = load_and_merge_data()\n",
101 |     "    worldmap_data = {}\n",
102 |     "    for row in imf_data:\n",
103 |     "        worldmap_data[row.get('iso').lower()] = float(row.get('2015'))\n",
104 |     "    worldmap_chart = pygal.Worldmap()\n",
105 |     "    worldmap_chart.title = '2015 Unemployment'\n",
106 |     "    worldmap_chart.add('Total Unemployment (%)', worldmap_data)\n",
107 |     "    worldmap_chart.render_to_png('../../../static/test.png')"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": 79,
113 |    "metadata": {
114 |     "collapsed": false
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "draw_unemployment()"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": null,
124 |    "metadata": {
125 |     "collapsed": true
126 |    },
127 |    "outputs": [],
128 |    "source": []
129 |   }
130 |  ],
131 |  "metadata": {
132 |   "kernelspec": {
133 |    "display_name": "Python 2",
134 |    "language": "python",
135 |    "name": "python2"
136 |   },
137 |   "language_info": {
138 |    "codemirror_mode": {
139 |     "name": "ipython",
140 |     "version": 2
141 |    },
142 |    "file_extension": ".py",
143 |    "mimetype": "text/x-python",
144 |    "name": "python",
145 |    "nbconvert_exporter": "python",
146 |    "pygments_lexer": "ipython2",
147 |    "version": "2.7.6"
148 |   }
149 |  },
150 |  "nbformat": 4,
151 |  "nbformat_minor": 0
152 | }
153 | 


--------------------------------------------------------------------------------
/books/pycon-2015-examples/Lesson 14 - Pandas Data Analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "def get_wb_unemployment_data():\n",
 23 |     "    return pd.read_excel('../../data/wb/unemployment.xlsx',\n",
 24 |     "                         index_col=0, header=0, skiprows=[1])"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {
 31 |     "collapsed": true
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "def get_wb_market_data():\n",
 36 |     "    return pd.read_excel('../../data/wb/stock_market.xlsx',\n",
 37 |     "                         index_col=0, header=0, skiprows=[1])"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "def get_metadata():\n",
 49 |     "    return pd.read_excel('../../data/wb/stock_metadata.xlsx',\n",
 50 |     "                         sheetname=1, index_col=0, header=0)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {
 57 |     "collapsed": true
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "def get_gdp():\n",
 62 |     "    return pd.read_excel('../../data/wb/GDP_Current_Dollars.xlsx',\n",
 63 |     "                         index_col=3, header=0)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": true
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "def clean_market_columns():\n",
 75 |     "    market_data = get_wb_market_data()\n",
 76 |     "    market_data.columns = market_data.columns.map(lambda x: x[:3])\n",
 77 |     "    market_data.index = market_data.index.map(lambda x: '{} SM'.format(x))\n",
 78 |     "    return market_data.transpose()"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {
 85 |     "collapsed": true
 86 |    },
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "def update_gdp_cols(colname):\n",
 90 |     "    if colname[:4].isdigit():\n",
 91 |     "        return '{} GDP'.format(colname[:4])\n",
 92 |     "    return colname"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {
 99 |     "collapsed": true
100 |    },
101 |    "outputs": [],
102 |    "source": [
103 |     "def join_market_and_gdp():\n",
104 |     "    market_data = clean_market_columns()\n",
105 |     "    gdp_data = get_gdp()\n",
106 |     "    gdp_data.columns = gdp_data.columns.map(update_gdp_cols)\n",
107 |     "    return market_data.join(gdp_data)"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {
114 |     "collapsed": true
115 |    },
116 |    "outputs": [],
117 |    "source": [
118 |     "def just_spain():\n",
119 |     "    joined = join_market_and_gdp()\n",
120 |     "    spain = joined.loc['ESP'].copy()\n",
121 |     "    spain_gdp = spain[spain.index.map(lambda x: 'GDP' in x)]\n",
122 |     "    spain_stock = spain[spain.index.map(lambda x: 'SM' in x)]\n",
123 |     "    spain_gdp.index = spain_gdp.index.map(lambda x: x.rstrip(' GDP'))\n",
124 |     "    spain_stock.index = spain_stock.index.map(lambda x: x.rstrip(' SM'))\n",
125 |     "    spain_stock.name = 'Stocks'\n",
126 |     "    spain_gdp.name = 'GDP'\n",
127 |     "    return spain_stock, spain_gdp"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {
134 |     "collapsed": true
135 |    },
136 |    "outputs": [],
137 |    "source": [
138 |     "def merge_back():\n",
139 |     "    spain_stock, spain_gdp = just_spain()\n",
140 |     "    return pd.concat([spain_stock, spain_gdp], axis=1)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": 22,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "spain = merge_back()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 23,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [
161 |     {
162 |      "name": "stdout",
163 |      "output_type": "stream",
164 |      "text": [
165 |       "<class 'pandas.core.frame.DataFrame'>\n",
166 |       "Index: 21 entries, 1990 to 2015\n",
167 |       "Data columns (total 2 columns):\n",
168 |       "Stocks    20 non-null object\n",
169 |       "GDP       12 non-null object\n",
170 |       "dtypes: object(2)\n",
171 |       "memory usage: 504.0+ bytes\n"
172 |      ]
173 |     }
174 |    ],
175 |    "source": [
176 |     "spain.info()"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 24,
182 |    "metadata": {
183 |     "collapsed": false
184 |    },
185 |    "outputs": [
186 |     {
187 |      "data": {
188 |       "text/html": [
189 |        "<div>\n",
190 |        "<table border=\"1\" class=\"dataframe\">\n",
191 |        "  <thead>\n",
192 |        "    <tr style=\"text-align: right;\">\n",
193 |        "      <th></th>\n",
194 |        "    </tr>\n",
195 |        "  </thead>\n",
196 |        "  <tbody>\n",
197 |        "  </tbody>\n",
198 |        "</table>\n",
199 |        "</div>"
200 |       ],
201 |       "text/plain": [
202 |        "Empty DataFrame\n",
203 |        "Columns: []\n",
204 |        "Index: []"
205 |       ]
206 |      },
207 |      "execution_count": 24,
208 |      "metadata": {},
209 |      "output_type": "execute_result"
210 |     }
211 |    ],
212 |    "source": [
213 |     "spain.corr()"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 26,
219 |    "metadata": {
220 |     "collapsed": false
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "spain = spain.convert_objects(convert_dates=True, convert_numeric=True)"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": 27,
230 |    "metadata": {
231 |     "collapsed": false
232 |    },
233 |    "outputs": [
234 |     {
235 |      "data": {
236 |       "text/html": [
237 |        "<div>\n",
238 |        "<table border=\"1\" class=\"dataframe\">\n",
239 |        "  <thead>\n",
240 |        "    <tr style=\"text-align: right;\">\n",
241 |        "      <th></th>\n",
242 |        "      <th>Stocks</th>\n",
243 |        "      <th>GDP</th>\n",
244 |        "    </tr>\n",
245 |        "  </thead>\n",
246 |        "  <tbody>\n",
247 |        "    <tr>\n",
248 |        "      <th>Stocks</th>\n",
249 |        "      <td>1.000000</td>\n",
250 |        "      <td>0.566494</td>\n",
251 |        "    </tr>\n",
252 |        "    <tr>\n",
253 |        "      <th>GDP</th>\n",
254 |        "      <td>0.566494</td>\n",
255 |        "      <td>1.000000</td>\n",
256 |        "    </tr>\n",
257 |        "  </tbody>\n",
258 |        "</table>\n",
259 |        "</div>"
260 |       ],
261 |       "text/plain": [
262 |        "          Stocks       GDP\n",
263 |        "Stocks  1.000000  0.566494\n",
264 |        "GDP     0.566494  1.000000"
265 |       ]
266 |      },
267 |      "execution_count": 27,
268 |      "metadata": {},
269 |      "output_type": "execute_result"
270 |     }
271 |    ],
272 |    "source": [
273 |     "spain.corr()"
274 |    ]
275 |   },
276 |   {
277 |    "cell_type": "code",
278 |    "execution_count": 28,
279 |    "metadata": {
280 |     "collapsed": false
281 |    },
282 |    "outputs": [
283 |     {
284 |      "data": {
285 |       "text/html": [
286 |        "<div>\n",
287 |        "<table border=\"1\" class=\"dataframe\">\n",
288 |        "  <thead>\n",
289 |        "    <tr style=\"text-align: right;\">\n",
290 |        "      <th></th>\n",
291 |        "      <th>Stocks</th>\n",
292 |        "      <th>GDP</th>\n",
293 |        "    </tr>\n",
294 |        "  </thead>\n",
295 |        "  <tbody>\n",
296 |        "    <tr>\n",
297 |        "      <th>Stocks</th>\n",
298 |        "      <td>6.478334e+02</td>\n",
299 |        "      <td>3.424133e+12</td>\n",
300 |        "    </tr>\n",
301 |        "    <tr>\n",
302 |        "      <th>GDP</th>\n",
303 |        "      <td>3.424133e+12</td>\n",
304 |        "      <td>1.231342e+23</td>\n",
305 |        "    </tr>\n",
306 |        "  </tbody>\n",
307 |        "</table>\n",
308 |        "</div>"
309 |       ],
310 |       "text/plain": [
311 |        "              Stocks           GDP\n",
312 |        "Stocks  6.478334e+02  3.424133e+12\n",
313 |        "GDP     3.424133e+12  1.231342e+23"
314 |       ]
315 |      },
316 |      "execution_count": 28,
317 |      "metadata": {},
318 |      "output_type": "execute_result"
319 |     }
320 |    ],
321 |    "source": [
322 |     "spain.cov()"
323 |    ]
324 |   },
325 |   {
326 |    "cell_type": "code",
327 |    "execution_count": 29,
328 |    "metadata": {
329 |     "collapsed": true
330 |    },
331 |    "outputs": [],
332 |    "source": [
333 |     "spain.to_csv('../../data/spain_export.csv')"
334 |    ]
335 |   },
336 |   {
337 |    "cell_type": "code",
338 |    "execution_count": null,
339 |    "metadata": {
340 |     "collapsed": true
341 |    },
342 |    "outputs": [],
343 |    "source": []
344 |   }
345 |  ],
346 |  "metadata": {
347 |   "kernelspec": {
348 |    "display_name": "Python 2",
349 |    "language": "python",
350 |    "name": "python2"
351 |   },
352 |   "language_info": {
353 |    "codemirror_mode": {
354 |     "name": "ipython",
355 |     "version": 2
356 |    },
357 |    "file_extension": ".py",
358 |    "mimetype": "text/x-python",
359 |    "name": "python",
360 |    "nbconvert_exporter": "python",
361 |    "pygments_lexer": "ipython2",
362 |    "version": "2.7.6"
363 |   }
364 |  },
365 |  "nbformat": 4,
366 |  "nbformat_minor": 0
367 | }
368 | 


--------------------------------------------------------------------------------
/books/pydata-examples/01 - Simple formats with Pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": false
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', encoding='ISO-8859-1')"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": false
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "df.head()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "df.dtypes"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', thousands=',', encoding='ISO-8859-1')"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "df.dtypes"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "df['2017'].value_counts()"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', thousands=',', na_values=['n/a'], \n",
 89 |     "                 encoding='ISO-8859-1')"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "df.dtypes"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "metadata": {
107 |     "collapsed": false
108 |    },
109 |    "outputs": [],
110 |    "source": [
111 |     "df['Country'].value_counts()"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {
118 |     "collapsed": false
119 |    },
120 |    "outputs": [],
121 |    "source": [
122 |     "df[df['Country'] == 'Germany']"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "metadata": {},
128 |    "source": [
129 |     "### Show values and single out a particular subject descriptor or units or use both: & (and join) | (or join)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": null,
135 |    "metadata": {
136 |     "collapsed": false
137 |    },
138 |    "outputs": [],
139 |    "source": [
140 |     "df[(df['Country'] == 'Germany') & (df['Units'].isin(['National currency']))]"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": true
148 |    },
149 |    "outputs": [],
150 |    "source": []
151 |   }
152 |  ],
153 |  "metadata": {
154 |   "kernelspec": {
155 |    "display_name": "Python 3",
156 |    "language": "python",
157 |    "name": "python3"
158 |   },
159 |   "language_info": {
160 |    "codemirror_mode": {
161 |     "name": "ipython",
162 |     "version": 3
163 |    },
164 |    "file_extension": ".py",
165 |    "mimetype": "text/x-python",
166 |    "name": "python",
167 |    "nbconvert_exporter": "python",
168 |    "pygments_lexer": "ipython3",
169 |    "version": "3.4.3"
170 |   }
171 |  },
172 |  "nbformat": 4,
173 |  "nbformat_minor": 0
174 | }
175 | 


--------------------------------------------------------------------------------
/books/pydata-examples/02 - More complex formats with Pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": false
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import pdftables"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "pdftables.get_tables?"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "pdftables.page_to_tables?"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "my_pdf = open('../../data/WEF_GlobalCompetitivenessReport_2014-15.pdf', 'rb')"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "chart_page = pdftables.get_pdf_page(my_pdf, 29)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "chart_page"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": true
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "table = pdftables.page_to_tables(chart_page)"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "table"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "table[0]"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "collapsed": true
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "titles = zip(table[0][0], table[0][1])[:5]"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "titles"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "titles = [''.join([title[0], title[1]]) for title in titles]\n",
144 |     "print(titles)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": true
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "all_rows = []\n",
156 |     "for row_data in table[0][2:]:\n",
157 |     "    all_rows.extend([row_data[:5], row_data[5:]])"
158 |    ]
159 |   },
160 |   {
161 |    "cell_type": "code",
162 |    "execution_count": null,
163 |    "metadata": {
164 |     "collapsed": false
165 |    },
166 |    "outputs": [],
167 |    "source": [
168 |     "all_rows"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "code",
173 |    "execution_count": null,
174 |    "metadata": {
175 |     "collapsed": false
176 |    },
177 |    "outputs": [],
178 |    "source": [
179 |     "df = pd.DataFrame(all_rows, columns=titles)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": null,
185 |    "metadata": {
186 |     "collapsed": false
187 |    },
188 |    "outputs": [],
189 |    "source": [
190 |     "df.head()"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "markdown",
195 |    "metadata": {},
196 |    "source": [
197 |     "### Your turn: Try with page 30, 31 or 32 "
198 |    ]
199 |   },
200 |   {
201 |    "cell_type": "code",
202 |    "execution_count": null,
203 |    "metadata": {
204 |     "collapsed": true
205 |    },
206 |    "outputs": [],
207 |    "source": [
208 |     "new_chart_page = pdftables.get_pdf_page(my_pdf, 30)"
209 |    ]
210 |   },
211 |   {
212 |    "cell_type": "code",
213 |    "execution_count": null,
214 |    "metadata": {
215 |     "collapsed": true
216 |    },
217 |    "outputs": [],
218 |    "source": [
219 |     "table = pdftables.page_to_tables(new_chart_page)"
220 |    ]
221 |   },
222 |   {
223 |    "cell_type": "code",
224 |    "execution_count": null,
225 |    "metadata": {
226 |     "collapsed": false
227 |    },
228 |    "outputs": [],
229 |    "source": [
230 |     "table[0]"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": null,
236 |    "metadata": {
237 |     "collapsed": true
238 |    },
239 |    "outputs": [],
240 |    "source": []
241 |   }
242 |  ],
243 |  "metadata": {
244 |   "kernelspec": {
245 |    "display_name": "Python 3",
246 |    "language": "python",
247 |    "name": "python3"
248 |   },
249 |   "language_info": {
250 |    "codemirror_mode": {
251 |     "name": "ipython",
252 |     "version": 3
253 |    },
254 |    "file_extension": ".py",
255 |    "mimetype": "text/x-python",
256 |    "name": "python",
257 |    "nbconvert_exporter": "python",
258 |    "pygments_lexer": "ipython3",
259 |    "version": "3.4.3"
260 |   }
261 |  },
262 |  "nbformat": 4,
263 |  "nbformat_minor": 0
264 | }
265 | 


--------------------------------------------------------------------------------
/books/pydata-examples/04 - APIs.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import requests"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "def get_story(story_id):\n",
 23 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 24 |     "    resp = requests.get(url)\n",
 25 |     "    return resp.json()"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {
 32 |     "collapsed": true
 33 |    },
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "def get_top_stories():\n",
 37 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 38 |     "    resp = requests.get(url)\n",
 39 |     "    all_stories = [get_story(sid) for sid in resp.json()[:10]]\n",
 40 |     "    return all_stories"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {
 47 |     "collapsed": false
 48 |    },
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import urllib3.contrib.pyopenssl\n",
 52 |     "urllib3.contrib.pyopenssl.inject_into_urllib3()"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {
 59 |     "collapsed": false
 60 |    },
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "top_stories = get_top_stories()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {
 70 |     "collapsed": false
 71 |    },
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "top_stories[:5]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {
 81 |     "collapsed": true
 82 |    },
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "MATCHING = (\n",
 86 |     "    ('Python', '(p|P)ython'),\n",
 87 |     "    ('Ruby', '(r|R)uby'),\n",
 88 |     "    ('JavaScript', 'js|(J|j)ava(s|S)cript'),\n",
 89 |     "    ('NodeJS', 'node(\\.?)(?:\\js|JS)'),\n",
 90 |     "    ('Java', '(j|J)ava[^(S|s)cript]'),\n",
 91 |     "    ('Objective-C', 'Obj(ective?)(?:\\ |-)(C|c)'),\n",
 92 |     "    ('Go', '(g|G)o'),\n",
 93 |     "    ('C++',  '(c|C)(\\+)+')\n",
 94 |     ")"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {
101 |     "collapsed": true
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "import re"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {
112 |     "collapsed": true
113 |    },
114 |    "outputs": [],
115 |    "source": [
116 |     "def count_languages():\n",
117 |     "    stories = get_top_stories()\n",
118 |     "    final_tallies = {}\n",
119 |     "    for s in stories:\n",
120 |     "        long_string = u'{} {}'.format(s.get('title'), s.get('url'))\n",
121 |     "        for language, regex in dict(MATCHING).items():\n",
122 |     "            if re.search(regex, long_string):\n",
123 |     "                if language not in final_tallies.keys():\n",
124 |     "                    final_tallies[language] = {\n",
125 |     "                        'score': s.get('score'),\n",
126 |     "                        'descendants': s.get('descendants')}\n",
127 |     "                else:\n",
128 |     "                    final_tallies[language]['score'] += s.get('score')\n",
129 |     "                    final_tallies[language][\n",
130 |     "                        'descendants'] += s.get('descendants')\n",
131 |     "    return final_tallies"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "code",
136 |    "execution_count": null,
137 |    "metadata": {
138 |     "collapsed": false
139 |    },
140 |    "outputs": [],
141 |    "source": [
142 |     "count_languages()"
143 |    ]
144 |   },
145 |   {
146 |    "cell_type": "markdown",
147 |    "metadata": {},
148 |    "source": [
149 |     "### Can you add the URL to this? What about a good Regex for matching Julia?\n",
150 |     "\n",
151 |     "Again, try it yourself first! :)"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "%load solutions/regex_solution.py"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": true
170 |    },
171 |    "outputs": [],
172 |    "source": []
173 |   }
174 |  ],
175 |  "metadata": {
176 |   "kernelspec": {
177 |    "display_name": "Python 3",
178 |    "language": "python",
179 |    "name": "python3"
180 |   },
181 |   "language_info": {
182 |    "codemirror_mode": {
183 |     "name": "ipython",
184 |     "version": 3
185 |    },
186 |    "file_extension": ".py",
187 |    "mimetype": "text/x-python",
188 |    "name": "python",
189 |    "nbconvert_exporter": "python",
190 |    "pygments_lexer": "ipython3",
191 |    "version": "3.4.3"
192 |   }
193 |  },
194 |  "nbformat": 4,
195 |  "nbformat_minor": 0
196 | }
197 | 


--------------------------------------------------------------------------------
/books/pydata-examples/05 - Messy Data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd\n",
 12 |     "import numpy as np\n",
 13 |     "from datetime import datetime"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {
 20 |     "collapsed": false
 21 |    },
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "weather = pd.read_csv('../../data/berlin_weather_oldest.csv')"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {
 31 |     "collapsed": false
 32 |    },
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "weather.dtypes"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": null,
 41 |    "metadata": {
 42 |     "collapsed": false
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "weather.head()"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {
 53 |     "collapsed": true
 54 |    },
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "weather = weather.applymap(lambda x: np.nan if x == -9999 else x)"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {
 64 |     "collapsed": false
 65 |    },
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "weather.head()"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {
 75 |     "collapsed": true
 76 |    },
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "weather['DATE'] = weather['DATE'].map(lambda x: datetime.strptime(str(x), '%Y%m%d').date())"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {
 86 |     "collapsed": false
 87 |    },
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "weather['DATE']"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {
 97 |     "collapsed": false
 98 |    },
 99 |    "outputs": [],
100 |    "source": [
101 |     "weather.notnull().head()"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "weather.dropna()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": false
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "weather.dropna(how='all', axis=1)"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": null,
129 |    "metadata": {
130 |     "collapsed": false
131 |    },
132 |    "outputs": [],
133 |    "source": [
134 |     "weather.shape"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "metadata": {
141 |     "collapsed": false
142 |    },
143 |    "outputs": [],
144 |    "source": [
145 |     "weather.dropna(thresh=weather.shape[0] * .1, axis=1)"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {
152 |     "collapsed": true
153 |    },
154 |    "outputs": [],
155 |    "source": [
156 |     "weather = weather.set_index(pd.DatetimeIndex(weather['DATE']))"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": null,
162 |    "metadata": {
163 |     "collapsed": false
164 |    },
165 |    "outputs": [],
166 |    "source": [
167 |     "weather.head()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": null,
173 |    "metadata": {
174 |     "collapsed": false
175 |    },
176 |    "outputs": [],
177 |    "source": [
178 |     "weather.index.duplicated()"
179 |    ]
180 |   },
181 |   {
182 |    "cell_type": "code",
183 |    "execution_count": null,
184 |    "metadata": {
185 |     "collapsed": false
186 |    },
187 |    "outputs": [],
188 |    "source": [
189 |     "weather['STATION_NAME'].value_counts()"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {
196 |     "collapsed": false
197 |    },
198 |    "outputs": [],
199 |    "source": [
200 |     "weather.index.drop_duplicates().sort_values()"
201 |    ]
202 |   },
203 |   {
204 |    "cell_type": "code",
205 |    "execution_count": null,
206 |    "metadata": {
207 |     "collapsed": false
208 |    },
209 |    "outputs": [],
210 |    "source": [
211 |     "weather.groupby('STATION_NAME').resample('D').mean().head()"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": null,
217 |    "metadata": {
218 |     "collapsed": true
219 |    },
220 |    "outputs": [],
221 |    "source": [
222 |     "rainy = weather[weather.PRCP >= weather.PRCP.std() * 3 + weather.PRCP.mean()]"
223 |    ]
224 |   },
225 |   {
226 |    "cell_type": "code",
227 |    "execution_count": null,
228 |    "metadata": {
229 |     "collapsed": false
230 |    },
231 |    "outputs": [],
232 |    "source": [
233 |     "rainy['month'] = rainy.index.month"
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": null,
239 |    "metadata": {
240 |     "collapsed": false
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "%pylab inline"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": null,
250 |    "metadata": {
251 |     "collapsed": false
252 |    },
253 |    "outputs": [],
254 |    "source": [
255 |     "rainy.groupby('month')['PRCP'].sum().plot()"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "markdown",
260 |    "metadata": {},
261 |    "source": [
262 |     "### Find the rainiest day in the past century\n",
263 |     "\n",
264 |     "Try it yourself first!"
265 |    ]
266 |   },
267 |   {
268 |    "cell_type": "code",
269 |    "execution_count": null,
270 |    "metadata": {
271 |     "collapsed": false
272 |    },
273 |    "outputs": [],
274 |    "source": [
275 |     "%load solutions/weather_solution_rainyday.py"
276 |    ]
277 |   },
278 |   {
279 |    "cell_type": "code",
280 |    "execution_count": null,
281 |    "metadata": {
282 |     "collapsed": true
283 |    },
284 |    "outputs": [],
285 |    "source": []
286 |   },
287 |   {
288 |    "cell_type": "markdown",
289 |    "metadata": {},
290 |    "source": [
291 |     "### Bonus: Fix station names so they are properly matching & chart precipitation over time with one of them"
292 |    ]
293 |   },
294 |   {
295 |    "cell_type": "code",
296 |    "execution_count": null,
297 |    "metadata": {
298 |     "collapsed": false
299 |    },
300 |    "outputs": [],
301 |    "source": [
302 |     "%load solutions/weather_solution_fix_stations.py"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {
309 |     "collapsed": true
310 |    },
311 |    "outputs": [],
312 |    "source": []
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "kernelspec": {
317 |    "display_name": "Python 3",
318 |    "language": "python",
319 |    "name": "python3"
320 |   },
321 |   "language_info": {
322 |    "codemirror_mode": {
323 |     "name": "ipython",
324 |     "version": 3
325 |    },
326 |    "file_extension": ".py",
327 |    "mimetype": "text/x-python",
328 |    "name": "python",
329 |    "nbconvert_exporter": "python",
330 |    "pygments_lexer": "ipython3",
331 |    "version": "3.4.3"
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 0
336 | }
337 | 


--------------------------------------------------------------------------------
/books/pydata-examples/06 - Data Analysis with Pandas.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import requests"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def get_story(story_id):\n",
 34 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 35 |     "    resp = requests.get(url)\n",
 36 |     "    return resp.json()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "def get_top_stories():\n",
 48 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 49 |     "    resp = requests.get(url)\n",
 50 |     "    all_stories = [get_story(sid) for sid in resp.json()[:50]]\n",
 51 |     "    return all_stories"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "collapsed": true
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "df = pd.read_json('../../data/hn.json')"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": false
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# df = pd.DataFrame(get_top_stories())"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": false
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "df.head()"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "df = df.set_index('id')"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "df.head()"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "df.by.value_counts()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "df.type.value_counts()"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "df.corr()"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": null,
145 |    "metadata": {
146 |     "collapsed": false
147 |    },
148 |    "outputs": [],
149 |    "source": [
150 |     "df.cov()"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {
157 |     "collapsed": false
158 |    },
159 |    "outputs": [],
160 |    "source": [
161 |     "df.score.min()"
162 |    ]
163 |   },
164 |   {
165 |    "cell_type": "code",
166 |    "execution_count": null,
167 |    "metadata": {
168 |     "collapsed": false
169 |    },
170 |    "outputs": [],
171 |    "source": [
172 |     "df.score.max()"
173 |    ]
174 |   },
175 |   {
176 |    "cell_type": "code",
177 |    "execution_count": null,
178 |    "metadata": {
179 |     "collapsed": false
180 |    },
181 |    "outputs": [],
182 |    "source": [
183 |     "%pylab inline"
184 |    ]
185 |   },
186 |   {
187 |    "cell_type": "code",
188 |    "execution_count": null,
189 |    "metadata": {
190 |     "collapsed": false
191 |    },
192 |    "outputs": [],
193 |    "source": [
194 |     "df.plot(x='time', y='score', marker='.')"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": null,
200 |    "metadata": {
201 |     "collapsed": false
202 |    },
203 |    "outputs": [],
204 |    "source": [
205 |     "df.sort_values('time').plot(x='time', y='score', marker='.')"
206 |    ]
207 |   },
208 |   {
209 |    "cell_type": "code",
210 |    "execution_count": null,
211 |    "metadata": {
212 |     "collapsed": true
213 |    },
214 |    "outputs": [],
215 |    "source": [
216 |     "df['time'] = pd.to_datetime(df['time'],unit='s')"
217 |    ]
218 |   },
219 |   {
220 |    "cell_type": "code",
221 |    "execution_count": null,
222 |    "metadata": {
223 |     "collapsed": false
224 |    },
225 |    "outputs": [],
226 |    "source": [
227 |     "df.time"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {
234 |     "collapsed": false
235 |    },
236 |    "outputs": [],
237 |    "source": [
238 |     "df['hour'] = df['time'].map(lambda x: x.hour)"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "code",
243 |    "execution_count": null,
244 |    "metadata": {
245 |     "collapsed": false
246 |    },
247 |    "outputs": [],
248 |    "source": [
249 |     "df['hour'].value_counts()"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": null,
255 |    "metadata": {
256 |     "collapsed": false
257 |    },
258 |    "outputs": [],
259 |    "source": [
260 |     "df.corr()"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {
267 |     "collapsed": false
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "df.plot(x='time', y='score')"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": null,
277 |    "metadata": {
278 |     "collapsed": false
279 |    },
280 |    "outputs": [],
281 |    "source": [
282 |     "df.sort_values('hour').plot(x='hour', y='score')"
283 |    ]
284 |   },
285 |   {
286 |    "cell_type": "markdown",
287 |    "metadata": {},
288 |    "source": [
289 |     "## We'll be focusing on groupby next, but here's a teaser!"
290 |    ]
291 |   },
292 |   {
293 |    "cell_type": "code",
294 |    "execution_count": null,
295 |    "metadata": {
296 |     "collapsed": false
297 |    },
298 |    "outputs": [],
299 |    "source": [
300 |     "df['hourly_mean'] = df.groupby('hour')['score'].transform(mean)"
301 |    ]
302 |   },
303 |   {
304 |    "cell_type": "code",
305 |    "execution_count": null,
306 |    "metadata": {
307 |     "collapsed": false
308 |    },
309 |    "outputs": [],
310 |    "source": [
311 |     "df.sort_values('hour').plot(x='hour', y='hourly_mean')"
312 |    ]
313 |   },
314 |   {
315 |    "cell_type": "markdown",
316 |    "metadata": {},
317 |    "source": [
318 |     "### Can you chart num of descendants across hours? How about length of kids? \n",
319 |     "(Solution uses groups using max and median, again, we will return to those next!)"
320 |    ]
321 |   },
322 |   {
323 |    "cell_type": "code",
324 |    "execution_count": null,
325 |    "metadata": {
326 |     "collapsed": false
327 |    },
328 |    "outputs": [],
329 |    "source": [
330 |     "%load solutions/data_analysis_solution.py"
331 |    ]
332 |   },
333 |   {
334 |    "cell_type": "code",
335 |    "execution_count": null,
336 |    "metadata": {
337 |     "collapsed": true
338 |    },
339 |    "outputs": [],
340 |    "source": []
341 |   },
342 |   {
343 |    "cell_type": "code",
344 |    "execution_count": null,
345 |    "metadata": {
346 |     "collapsed": true
347 |    },
348 |    "outputs": [],
349 |    "source": []
350 |   }
351 |  ],
352 |  "metadata": {
353 |   "kernelspec": {
354 |    "display_name": "Python 3",
355 |    "language": "python",
356 |    "name": "python3"
357 |   },
358 |   "language_info": {
359 |    "codemirror_mode": {
360 |     "name": "ipython",
361 |     "version": 3
362 |    },
363 |    "file_extension": ".py",
364 |    "mimetype": "text/x-python",
365 |    "name": "python",
366 |    "nbconvert_exporter": "python",
367 |    "pygments_lexer": "ipython3",
368 |    "version": "3.4.3"
369 |   }
370 |  },
371 |  "nbformat": 4,
372 |  "nbformat_minor": 0
373 | }
374 | 


--------------------------------------------------------------------------------
/books/pydata-examples/07 - Split Apply Combine.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import requests"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "def get_story(story_id):\n",
 34 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 35 |     "    resp = requests.get(url)\n",
 36 |     "    return resp.json()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {
 43 |     "collapsed": true
 44 |    },
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "def get_top_stories():\n",
 48 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 49 |     "    resp = requests.get(url)\n",
 50 |     "    all_stories = [get_story(sid) for sid in resp.json()[:50]]\n",
 51 |     "    return all_stories"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {
 58 |     "collapsed": false
 59 |    },
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "df = pd.read_json('../../data/hn.json')"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {
 69 |     "collapsed": true
 70 |    },
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# df = pd.DataFrame(get_top_stories())"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {
 80 |     "collapsed": true
 81 |    },
 82 |    "outputs": [],
 83 |    "source": [
 84 |     "df['time'] = pd.to_datetime(df['time'],unit='s')"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {
 91 |     "collapsed": true
 92 |    },
 93 |    "outputs": [],
 94 |    "source": [
 95 |     "df['hour'] = df['time'].map(lambda x: x.hour)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": null,
101 |    "metadata": {
102 |     "collapsed": false
103 |    },
104 |    "outputs": [],
105 |    "source": [
106 |     "df['day_of_week'] = df['time'].map(lambda x: x.weekday())"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "code",
111 |    "execution_count": null,
112 |    "metadata": {
113 |     "collapsed": false
114 |    },
115 |    "outputs": [],
116 |    "source": [
117 |     "df.head()"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": false
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "df.groupby('hour')"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {
135 |     "collapsed": false
136 |    },
137 |    "outputs": [],
138 |    "source": [
139 |     "for group, items in df.groupby('hour'):\n",
140 |     "    print(group, items)"
141 |    ]
142 |   },
143 |   {
144 |    "cell_type": "code",
145 |    "execution_count": null,
146 |    "metadata": {
147 |     "collapsed": false
148 |    },
149 |    "outputs": [],
150 |    "source": [
151 |     "df.groupby('hour').sum()"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": null,
157 |    "metadata": {
158 |     "collapsed": false
159 |    },
160 |    "outputs": [],
161 |    "source": [
162 |     "df.groupby('hour')['score'].sum()"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "%pylab inline"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "collapsed": false
181 |    },
182 |    "outputs": [],
183 |    "source": [
184 |     "df.groupby('hour')['score'].sum().plot()"
185 |    ]
186 |   },
187 |   {
188 |    "cell_type": "code",
189 |    "execution_count": null,
190 |    "metadata": {
191 |     "collapsed": true
192 |    },
193 |    "outputs": [],
194 |    "source": [
195 |     "df['median_hourly_score'] = df.groupby('hour')['score'].transform('median')"
196 |    ]
197 |   },
198 |   {
199 |    "cell_type": "code",
200 |    "execution_count": null,
201 |    "metadata": {
202 |     "collapsed": false
203 |    },
204 |    "outputs": [],
205 |    "source": [
206 |     "df.head()"
207 |    ]
208 |   },
209 |   {
210 |    "cell_type": "markdown",
211 |    "metadata": {},
212 |    "source": [
213 |     "### Can you find most popular users or days of the week? Can you add those to the DF using transform?"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": null,
219 |    "metadata": {
220 |     "collapsed": false
221 |    },
222 |    "outputs": [],
223 |    "source": [
224 |     "%load solutions/sac_solution.py"
225 |    ]
226 |   },
227 |   {
228 |    "cell_type": "code",
229 |    "execution_count": null,
230 |    "metadata": {
231 |     "collapsed": true
232 |    },
233 |    "outputs": [],
234 |    "source": []
235 |   }
236 |  ],
237 |  "metadata": {
238 |   "kernelspec": {
239 |    "display_name": "Python 3",
240 |    "language": "python",
241 |    "name": "python3"
242 |   },
243 |   "language_info": {
244 |    "codemirror_mode": {
245 |     "name": "ipython",
246 |     "version": 3
247 |    },
248 |    "file_extension": ".py",
249 |    "mimetype": "text/x-python",
250 |    "name": "python",
251 |    "nbconvert_exporter": "python",
252 |    "pygments_lexer": "ipython3",
253 |    "version": "3.4.3"
254 |   }
255 |  },
256 |  "nbformat": 4,
257 |  "nbformat_minor": 0
258 | }
259 | 


--------------------------------------------------------------------------------
/books/pydata-examples/08 - Bokeh for Data Visualization.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd\n",
 12 |     "from bokeh.plotting import figure, show\n",
 13 |     "from bokeh.io import output_notebook\n",
 14 |     "from bokeh.palettes import Spectral6"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {
 21 |     "collapsed": false
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "df = pd.read_csv('../../data/imf_indicators.tsv', delimiter='\\t', thousands=',', na_values=['n/a'],\n",
 26 |     "                 encoding='ISO-8859-1')"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": null,
 32 |    "metadata": {
 33 |     "collapsed": false
 34 |    },
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "df.head()"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {
 44 |     "collapsed": false
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "df['Subject Descriptor'].value_counts()"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {
 55 |     "collapsed": true
 56 |    },
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "def mscatter(chart, x, y, typestr):\n",
 60 |     "    chart.scatter(x, y, marker=typestr, line_color=\"#6666ee\",\n",
 61 |     "                  fill_color=\"#ee6666\", fill_alpha=0.5, size=12)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {
 68 |     "collapsed": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "def draw_scatter(df):\n",
 73 |     "    chart = figure(title=\"IMF Unemployment\")\n",
 74 |     "    output_notebook()\n",
 75 |     "    for year in ['2013', '2014', '2015', '2016']:\n",
 76 |     "        df[year].map(lambda x: mscatter(chart, int(year), x, 'circle'))\n",
 77 |     "    return chart"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "chart = draw_scatter(df[df['Subject Descriptor'] == 'Unemployment rate'])\n",
 89 |     "show(chart)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {
 96 |     "collapsed": false
 97 |    },
 98 |    "outputs": [],
 99 |    "source": [
100 |     "grouped_mean_df = df[df['Subject Descriptor'] == 'Unemployment rate'].groupby(\n",
101 |     "    'Country')[['2013', '2014', '2015', '2016']].mean()"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "code",
106 |    "execution_count": null,
107 |    "metadata": {
108 |     "collapsed": false
109 |    },
110 |    "outputs": [],
111 |    "source": [
112 |     "grouped_mean_df.head()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {
119 |     "collapsed": true
120 |    },
121 |    "outputs": [],
122 |    "source": [
123 |     "def draw_line(df):\n",
124 |     "    chart = figure(title=\"IMF Unemployment\")\n",
125 |     "    output_notebook()\n",
126 |     "    color_count = 0\n",
127 |     "    for country, data in df.iterrows():\n",
128 |     "        chart.line(data.index.values, data.values, legend=country, line_color=Spectral6[color_count])\n",
129 |     "        if color_count+1 != len(Spectral6):\n",
130 |     "            color_count += 1\n",
131 |     "        else:\n",
132 |     "            color_count = 0\n",
133 |     "    return chart"
134 |    ]
135 |   },
136 |   {
137 |    "cell_type": "code",
138 |    "execution_count": null,
139 |    "metadata": {
140 |     "collapsed": false
141 |    },
142 |    "outputs": [],
143 |    "source": [
144 |     "chart = draw_line(grouped_mean_df)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": null,
150 |    "metadata": {
151 |     "collapsed": false
152 |    },
153 |    "outputs": [],
154 |    "source": [
155 |     "show(chart)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "metadata": {},
161 |    "source": [
162 |     "### Can you rewrite this to show a reasonable number of countries (focus on between four to seven)"
163 |    ]
164 |   },
165 |   {
166 |    "cell_type": "code",
167 |    "execution_count": null,
168 |    "metadata": {
169 |     "collapsed": false
170 |    },
171 |    "outputs": [],
172 |    "source": [
173 |     "%load solutions/visualization_solution.py"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "code",
178 |    "execution_count": null,
179 |    "metadata": {
180 |     "collapsed": true
181 |    },
182 |    "outputs": [],
183 |    "source": []
184 |   }
185 |  ],
186 |  "metadata": {
187 |   "kernelspec": {
188 |    "display_name": "Python 3",
189 |    "language": "python",
190 |    "name": "python3"
191 |   },
192 |   "language_info": {
193 |    "codemirror_mode": {
194 |     "name": "ipython",
195 |     "version": 3
196 |    },
197 |    "file_extension": ".py",
198 |    "mimetype": "text/x-python",
199 |    "name": "python",
200 |    "nbconvert_exporter": "python",
201 |    "pygments_lexer": "ipython3",
202 |    "version": "3.4.3"
203 |   }
204 |  },
205 |  "nbformat": 4,
206 |  "nbformat_minor": 0
207 | }
208 | 


--------------------------------------------------------------------------------
/books/pydata-examples/Introduction to Joins.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import pandas as pd"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "employees = pd.read_csv('../../data/employees.csv')"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": false
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "employees"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": true
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "employees = employees.set_index('id')"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "employees"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": true
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "titles = pd.read_csv('../../data/titles.csv', index_col=0)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "titles"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "employees.join(titles, rsuffix='_title')"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "employees.join(titles, on=['title_id'], rsuffix='_title')"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "empl_with_title = employees.join(titles, on=['title_id'], rsuffix='_title')"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "collapsed": false
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "departments = pd.read_csv('../../data/departments.csv', index_col=0)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": false
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "departments"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "empl_with_title.join(departments, on=['department_id'], rsuffix='_dept')"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "empl_with_title.join(departments, on=['department_id'], how='inner', rsuffix='_dept')"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": false
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "empl_with_title.join(departments, on=['department_id'], how='right', rsuffix='_dept')"
166 |    ]
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "Python 3",
172 |    "language": "python",
173 |    "name": "python3"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.4.3"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 0
190 | }
191 | 


--------------------------------------------------------------------------------
/books/pydata-examples/Introduction to Regex.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import re"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {
 18 |     "collapsed": true
 19 |    },
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "word = r'\\w+'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {
 29 |     "collapsed": true
 30 |    },
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "sentence = 'I am testing with Regex.'"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {
 40 |     "collapsed": false
 41 |    },
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "search_result = re.search(word, sentence)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {
 51 |     "collapsed": false
 52 |    },
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "search_result"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {
 62 |     "collapsed": false
 63 |    },
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "search_result.group()"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {
 73 |     "collapsed": false
 74 |    },
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "match_result = re.match(word, sentence)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": null,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "match_result"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {
 95 |     "collapsed": false
 96 |    },
 97 |    "outputs": [],
 98 |    "source": [
 99 |     "match_result.group()"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "code",
104 |    "execution_count": null,
105 |    "metadata": {
106 |     "collapsed": false
107 |    },
108 |    "outputs": [],
109 |    "source": [
110 |     "re.findall(word, sentence)"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": null,
116 |    "metadata": {
117 |     "collapsed": true
118 |    },
119 |    "outputs": [],
120 |    "source": [
121 |     "capitalized_word = r'[A-Z]\\w+'"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "code",
126 |    "execution_count": null,
127 |    "metadata": {
128 |     "collapsed": true
129 |    },
130 |    "outputs": [],
131 |    "source": [
132 |     "search_result = re.search(capitalized_word, sentence)"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": null,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "search_result.group()"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": null,
149 |    "metadata": {
150 |     "collapsed": true
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "match_result = re.match(capitalized_word, sentence)"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": [
165 |     "match_result"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": null,
171 |    "metadata": {
172 |     "collapsed": true
173 |    },
174 |    "outputs": [],
175 |    "source": [
176 |     "sentence_with_digits = 'The airport is 4,300 meters away, but I still hear 10 planes at night.'"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": null,
182 |    "metadata": {
183 |     "collapsed": true
184 |    },
185 |    "outputs": [],
186 |    "source": [
187 |     "numbers = r'\\d+'"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": null,
193 |    "metadata": {
194 |     "collapsed": false
195 |    },
196 |    "outputs": [],
197 |    "source": [
198 |     "re.findall(numbers, sentence_with_digits)"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {
205 |     "collapsed": true
206 |    },
207 |    "outputs": [],
208 |    "source": [
209 |     "thousands_numbers = '(\\d+,\\d+|\\d+)'"
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": null,
215 |    "metadata": {
216 |     "collapsed": false
217 |    },
218 |    "outputs": [],
219 |    "source": [
220 |     "re.findall(thousands_numbers, sentence_with_digits)"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {
227 |     "collapsed": true
228 |    },
229 |    "outputs": [],
230 |    "source": [
231 |     "city_state = '(?P<city>[\\w\\s]+), (?P<state>[A-Z]{2})'"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "code",
236 |    "execution_count": null,
237 |    "metadata": {
238 |     "collapsed": true
239 |    },
240 |    "outputs": [],
241 |    "source": [
242 |     "address = 'My House, 123 Main Street, Los Angeles, CA 90013'"
243 |    ]
244 |   },
245 |   {
246 |    "cell_type": "code",
247 |    "execution_count": null,
248 |    "metadata": {
249 |     "collapsed": false
250 |    },
251 |    "outputs": [],
252 |    "source": [
253 |     "match = re.finditer(city_state, address)"
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": null,
259 |    "metadata": {
260 |     "collapsed": false
261 |    },
262 |    "outputs": [],
263 |    "source": [
264 |     "for city in match:\n",
265 |     "    print(city.group('city'))"
266 |    ]
267 |   },
268 |   {
269 |    "cell_type": "markdown",
270 |    "metadata": {
271 |     "collapsed": true
272 |    },
273 |    "source": [
274 |     "### Write the regex to match the street name and number"
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": null,
280 |    "metadata": {
281 |     "collapsed": true
282 |    },
283 |    "outputs": [],
284 |    "source": []
285 |   }
286 |  ],
287 |  "metadata": {
288 |   "kernelspec": {
289 |    "display_name": "Python 3",
290 |    "language": "python",
291 |    "name": "python3"
292 |   },
293 |   "language_info": {
294 |    "codemirror_mode": {
295 |     "name": "ipython",
296 |     "version": 3
297 |    },
298 |    "file_extension": ".py",
299 |    "mimetype": "text/x-python",
300 |    "name": "python",
301 |    "nbconvert_exporter": "python",
302 |    "pygments_lexer": "ipython3",
303 |    "version": "3.4.3"
304 |   }
305 |  },
306 |  "nbformat": 4,
307 |  "nbformat_minor": 0
308 | }
309 | 


--------------------------------------------------------------------------------
/books/pydata-examples/Update HN data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": true
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "import requests\n",
 12 |     "from multiprocessing import Process, Manager\n",
 13 |     "import json\n",
 14 |     "import pandas as pd"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 2,
 20 |    "metadata": {
 21 |     "collapsed": true
 22 |    },
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "def get_story(story_id, stories):\n",
 26 |     "    url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id\n",
 27 |     "    resp = requests.get(url)\n",
 28 |     "    stories.append(resp.json())"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 3,
 34 |    "metadata": {
 35 |     "collapsed": true
 36 |    },
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def get_top_stories():\n",
 40 |     "    manager = Manager()\n",
 41 |     "    stories = manager.list()\n",
 42 |     "    url = 'https://hacker-news.firebaseio.com/v0/topstories.json'\n",
 43 |     "    ids = requests.get(url)\n",
 44 |     "    processes = [Process(target=get_story, args=(sid, stories))\n",
 45 |     "                 for sid in ids.json()]\n",
 46 |     "    for p in processes:\n",
 47 |     "        p.start()\n",
 48 |     "    for p in processes:\n",
 49 |     "        p.join()\n",
 50 |     "    return stories"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 4,
 56 |    "metadata": {
 57 |     "collapsed": false
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "df = pd.read_json('../../data/hn.json')"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 5,
 67 |    "metadata": {
 68 |     "collapsed": false
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "df['id'] = df.index.astype(int)"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 6,
 78 |    "metadata": {
 79 |     "collapsed": false
 80 |    },
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "top_stories = get_top_stories()"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 7,
 89 |    "metadata": {
 90 |     "collapsed": false
 91 |    },
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "new_df = pd.DataFrame([t for t in top_stories])"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 8,
100 |    "metadata": {
101 |     "collapsed": false
102 |    },
103 |    "outputs": [],
104 |    "source": [
105 |     "joined = df.append(new_df)"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": 9,
111 |    "metadata": {
112 |     "collapsed": false
113 |    },
114 |    "outputs": [
115 |     {
116 |      "data": {
117 |       "text/plain": [
118 |        "Int64Index([11693122, 11693168, 11693169, 11693184, 11693185, 11693193,\n",
119 |        "            11693217, 11693232, 11693240, 11693296,\n",
120 |        "            ...\n",
121 |        "            11722573, 11707449, 11711640, 11713423, 11708769, 11718494,\n",
122 |        "            11717710, 11717907, 11720912, 11707805],\n",
123 |        "           dtype='int64', name='id', length=1174)"
124 |       ]
125 |      },
126 |      "execution_count": 9,
127 |      "metadata": {},
128 |      "output_type": "execute_result"
129 |     }
130 |    ],
131 |    "source": [
132 |     "joined.set_index('id').index"
133 |    ]
134 |   },
135 |   {
136 |    "cell_type": "code",
137 |    "execution_count": 10,
138 |    "metadata": {
139 |     "collapsed": false
140 |    },
141 |    "outputs": [],
142 |    "source": [
143 |     "df = joined.drop_duplicates(subset='id', keep='last')"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 11,
149 |    "metadata": {
150 |     "collapsed": false
151 |    },
152 |    "outputs": [],
153 |    "source": [
154 |     "df.set_index('id').to_json('../../data/hn.json')"
155 |    ]
156 |   },
157 |   {
158 |    "cell_type": "code",
159 |    "execution_count": null,
160 |    "metadata": {
161 |     "collapsed": true
162 |    },
163 |    "outputs": [],
164 |    "source": []
165 |   }
166 |  ],
167 |  "metadata": {
168 |   "kernelspec": {
169 |    "display_name": "Python 3",
170 |    "language": "python",
171 |    "name": "python3"
172 |   },
173 |   "language_info": {
174 |    "codemirror_mode": {
175 |     "name": "ipython",
176 |     "version": 3
177 |    },
178 |    "file_extension": ".py",
179 |    "mimetype": "text/x-python",
180 |    "name": "python",
181 |    "nbconvert_exporter": "python",
182 |    "pygments_lexer": "ipython3",
183 |    "version": "3.4.3"
184 |   }
185 |  },
186 |  "nbformat": 4,
187 |  "nbformat_minor": 0
188 | }
189 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/data_analysis_solution.py:
--------------------------------------------------------------------------------
 1 | import ast
 2 | 
 3 | def get_list_len(line):
 4 |     if not line:
 5 |         return 0
 6 |     elif isinstance(line, list):
 7 |         return len(line)
 8 |     return len(ast.literal_eval(line))
 9 | 
10 | 
11 | df['hourly_desc_max'] = df.groupby('hour')['descendants'].transform(max)
12 | 
13 | df.sort_values('hour').plot(x='hour', y='hourly_desc_max')
14 | 
15 | df['num_kids'] = df['kids'].map(get_list_len)
16 | 
17 | df['hourly_kids_median'] = df.groupby('hour')['num_kids'].transform(median)
18 | 
19 | df.sort_values('hour').plot(x='hour', y='hourly_kids_median')
20 | 
21 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/regex_solution.py:
--------------------------------------------------------------------------------
 1 | 
 2 | MATCHING += (('Julia', '(J|j)ulia'), )
 3 | 
 4 | def count_languages():
 5 |     stories = get_top_stories()
 6 |     final_tallies = {}
 7 |     for s in stories:
 8 |         long_string = u'{} {}'.format(s.get('title'), s.get('url'))
 9 |         for language, regex in dict(MATCHING).items():
10 |             if re.search(regex, long_string):
11 |                 if language not in final_tallies.keys():
12 |                     final_tallies[language] = {
13 |                         'score': s.get('score'),
14 |                         'descendants': s.get('descendants'),
15 |                         'urls': [s.get('url')]}
16 |                 else:
17 |                     final_tallies[language]['score'] += s.get('score')
18 |                     final_tallies[language][
19 |                         'descendants'] += s.get('descendants')
20 |                     final_tallies[language]['urls'].append(s.get('url'))
21 |     return final_tallies
22 | 
23 | count_languages()
24 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/sac_solution.py:
--------------------------------------------------------------------------------
1 | df['score_sum_dow'] = df.groupby('day_of_week')['score'].transform(sum)
2 | 
3 | df.sort_values('day_of_week').plot(x='day_of_week', y='score_sum_dow')
4 | 
5 | 
6 | df['score_sum_user'] = df.groupby('by')['score'].transform(sum)
7 | 
8 | df.sort_values('score_sum_user', ascending=False).groupby('by')[['by', 'score_sum_user']].head(1)
9 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/stocks_solution.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | 
 3 | ibm = data.DataReader('IBM', 'yahoo', datetime(2007,1, 1), datetime(2016, 1, 1))
 4 | ibm['Stock'] = 'IBM'
 5 | 
 6 | merged = merged.append(ibm)
 7 | 
 8 | 
 9 | lowest_ibm = merged[merged['Stock'] == 'IBM'].sort_values('Close').head(1)
10 | lowest_fb = merged[merged['Stock'] == 'FB'].sort_values('Close').iloc[0]
11 | lowest_goog = merged[merged['Stock'] == 'GOOGL'].sort_values('Close').ix[0]
12 | 
13 | (lowest_ibm.index, lowest_fb.name, lowest_goog.name)
14 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/visualization_solution.py:
--------------------------------------------------------------------------------
1 | print(grouped_mean_df.index)
2 | 
3 | subset = grouped_mean_df.loc[['United Kingdom', 'Germany', 'Greece', 'United States', 'Czech Republic']]
4 | 
5 | chart = draw_line(subset)
6 | 
7 | show(chart)
8 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/weather_solution_fix_stations.py:
--------------------------------------------------------------------------------
1 | weather['STATION_NAME'] = weather['STATION_NAME'].map(lambda x: x.replace('BERLIN ', ''))
2 | 
3 | weather[weather['STATION_NAME'] == 'TEMPELHOF GM']['PRCP'].plot()
4 | 
5 | weather[weather['STATION_NAME'] == 'TEMPELHOF GM'].reset_index().plot('DATE', 'PRCP', style='g--')
6 | 


--------------------------------------------------------------------------------
/books/pydata-examples/solutions/weather_solution_rainyday.py:
--------------------------------------------------------------------------------
1 | rainy[rainy['PRCP'] == rainy['PRCP'].max()]
2 | 
3 | # Check:
4 | # (DE) https://meteocb.wordpress.com/2014/09/19/08-08-1978-als-das-wasser-kam-und-der-osten-baden-ging/
5 | # or (ENG) http://dailyiowan.lib.uiowa.edu/DI/1948/di1948-08-14.pdf  to read about
6 | # that day!
7 | 


--------------------------------------------------------------------------------
/conda_requirements.txt:
--------------------------------------------------------------------------------
 1 | bokeh==0.11.1
 2 | certifi==2016.2.28
 3 | cffi==1.6.0
 4 | cryptography==1.3.2
 5 | cycler==0.10.0
 6 | decorator==4.0.9
 7 | enum34==1.1.6
 8 | futures==3.0.5
 9 | idna==2.1
10 | ipaddress==1.0.16
11 | ipykernel==4.3.1
12 | ipython==4.2.0
13 | ipython_genutils==0.1.0
14 | ipywidgets
15 | jdcal==1.2
16 | Jinja2==2.8
17 | jsonschema==2.5.1
18 | jupyter==1.0.0
19 | jupyter_client==4.2.2
20 | jupyter_console==4.1.1
21 | lxml==3.6.0
22 | MarkupSafe==0.23
23 | matplotlib==1.5.1
24 | mistune==0.7.2
25 | nbconvert==4.2.0
26 | nbformat==4.0.1
27 | ndg_httpsclient
28 | notebook==4.2.0
29 | numpy==1.11.0
30 | pandas==0.18.1
31 | pandas-datareader==0.2.1
32 | pathlib2==2.1.0
33 | pexpect==4.0.1
34 | pickleshare==0.7.2
35 | ptyprocess==0.5.1
36 | pyasn1==0.1.9
37 | pycparser==2.14
38 | Pygments==2.1.3
39 | pyparsing==2.1.4
40 | python-dateutil==2.5.3
41 | pytz==2016.4
42 | PyYAML==3.11
43 | pyzmq==15.2.0
44 | qtconsole==4.2.1
45 | requests==2.10.0
46 | requests-file==1.4
47 | simplegeneric==0.8.1
48 | singledispatch==3.4.0.3
49 | six==1.10.0
50 | terminado==0.6
51 | tornado==4.3
52 | traitlets==4.2.1
53 | xlrd==0.9.4
54 | 


--------------------------------------------------------------------------------
/data/WEF_GlobalCompetitivenessReport_2014-15.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/WEF_GlobalCompetitivenessReport_2014-15.pdf


--------------------------------------------------------------------------------
/data/comments.json:
--------------------------------------------------------------------------------
1 | [{"parent": 9933918, "text": "If you want an excellent book about Commodore:<p><a href=\"http:&#x2F;&#x2F;www.amazon.com&#x2F;On-Edge-Spectacular-Rise-Commodore&#x2F;dp&#x2F;0973864907\" rel=\"nofollow\">http:&#x2F;&#x2F;www.amazon.com&#x2F;On-Edge-Spectacular-Rise-Commodore&#x2F;dp&#x2F;...</a>", "id": 9934512, "time": 1437638136, "type": "comment", "by": "walkingolof"}, {"kids": [9934328], "parent": 9933918, "text": "In 4th grade a friend got an Amiga. He boasted that it had 4096 colors, and we ridiculed him mercilessly. There aren&#x27;t that many colors! Can you name them?<p>We had our Apple IIs and TI-99&#x2F;4As with their 16 colors or less and couldn&#x27;t imagine anything greater.<p>Then we went to his house and saw Defender of the Crown, and we were humbled.", "id": 9934310, "time": 1437633136, "type": "comment", "by": "nsxwolf"}, {"parent": 9933918, "text": "I am so going to this! My 13yo son is going to be so excited to attend.", "id": 9934097, "time": 1437628297, "type": "comment", "by": "shawndumas"}, {"parent": 9933918, "text": "One can read &quot;The Future Was Here&quot;, to learn more about the platform and its contributors. <a href=\"https:&#x2F;&#x2F;mitpress.mit.edu&#x2F;books&#x2F;future-was-here\" rel=\"nofollow\">https:&#x2F;&#x2F;mitpress.mit.edu&#x2F;books&#x2F;future-was-here</a> - there&#x27;s also an accompanying website: <a href=\"http:&#x2F;&#x2F;amiga.filfre.net\" rel=\"nofollow\">http:&#x2F;&#x2F;amiga.filfre.net</a><p>Note that the Platform Studies series (<a href=\"http:&#x2F;&#x2F;www.platformstudies.com\" rel=\"nofollow\">http:&#x2F;&#x2F;www.platformstudies.com</a>) has exciting upcoming stuff.", "id": 9934191, "time": 1437630223, "type": "comment", "by": "foobarge"}, {"parent": 9934113, "text": "I was a power plant operator on a nuclear submarine. We were shown a classified film that about the SL-1 incident as part of our training. Recalling that film still gives me chills.<p>A lot was learned about reactor safety as result of that disaster.", "id": 9934316, "time": 1437633252, "type": "comment", "by": "MurMan"}, {"kids": [9934364, 9934367], "parent": 9934113, "text": "Does the Demon Core count as a reactor?", "id": 9934317, "time": 1437633253, "type": "comment", "by": "anabis"}, {"kids": [9934377, 9934390, 9934304, 9934355, 9934334], "parent": 9934113, "text": "&quot;...killed by the only fatal reactor accident in US history.&quot;<p>Is that really true?<p>&#x27;\u201cNobody died at Three Mile Island\u201d \u2014 unless you count babies.&#x27;<p><a href=\"http:&#x2F;&#x2F;www.counterpunch.org&#x2F;2015&#x2F;03&#x2F;27&#x2F;cancer-and-infant-mortality-at-three-mile-island&#x2F;\" rel=\"nofollow\">http:&#x2F;&#x2F;www.counterpunch.org&#x2F;2015&#x2F;03&#x2F;27&#x2F;cancer-and-infant-mor...</a>", "id": 9934251, "time": 1437631496, "type": "comment", "by": "retrogradeorbit"}, {"parent": 9934113, "text": "That part of Idaho is interesting to drive through.  It&#x27;s near craters of the moon national park.  The Idaho Nuclear Laboratory is pretty big. I remember it being nearly 50 miles along one side of the road with all barbed wire fences. It&#x27;s pretty lonely and creepy out there.  There was one cliff in Atomic City that was covered with graffiti of large random numbers.  It turned out that the numbers were from high school graduating classes but the numbers weren&#x27;t in order so it seemed like some random code.", "id": 9934273, "time": 1437632179, "type": "comment", "by": "abecode"}, {"parent": 9934113, "text": "The SL-1 story is well known in the industry. The Atomic Energy Commission has a video.[1] SL-1 was an experimental reactor at the National Reactor Testing Station in Idaho, an 8000 square mile AEC reservation.  It&#x27;s an isolated area, and the test reactors were many miles apart, just in case.<p>[1] <a href=\"https:&#x2F;&#x2F;www.youtube.com&#x2F;watch?v=qOt7xDKxmCM\" rel=\"nofollow\">https:&#x2F;&#x2F;www.youtube.com&#x2F;watch?v=qOt7xDKxmCM</a>", "id": 9934314, "time": 1437633227, "type": "comment", "by": "Animats"}, {"parent": 9934113, "text": "I find the term &quot;accident&quot; a somewhat meaningless distinction. Millions of people have gotten cancer (statistically) from the radiation in the atmosphere as a result of nuclear tests.  Does it really matter that the tests were intentional or not?", "id": 9934417, "time": 1437636062, "type": "comment", "by": "JohnGB"}, {"parent": 9934113, "text": "For what it&#x27;s worth, I like the original title on the article better.", "id": 9934444, "time": 1437636442, "type": "comment", "by": "hudibras"}, {"parent": 9933757, "text": "I&#x27;d feel smug about my choice of cell phone but Samsung sponsored the Olympics.", "id": 9934491, "time": 1437637613, "type": "comment", "by": "billpg"}, {"kids": [9934464, 9934447], "parent": 9933757, "text": "This is why large companies should be more regulated. They should not be able to put pressure on labels or producers and get all the benefits. \nWhat Apple did by killing this startup is not ethical. Plus, by killing it, they tried to get a monopole. Microsoft got so many fines at the time because they had the monopole on PCs. If a large company tries to destroy startups just because they feel threatened, they should be penalised the same way Microsoft did (With huge fines) or I think of another way: forcing large companies to work with startups instead of shutting them down.", "id": 9934360, "time": 1437634765, "type": "comment", "by": "JohnyLy"}, {"parent": 9933757, "text": "Hint: when you start a service solving a <i>very generic</i> problem, you have a big chance of being outmaneuvered by a competitor.<p>Don&#x27;t cry if you fail.", "id": 9934494, "time": 1437637717, "type": "comment", "by": "amelius"}, {"kids": [9934384], "parent": 9933757, "text": "I kept waiting for the payoff, but the article never gave proof that Steve Jobs was behind it. Sounds to me like what happened is that Steve when negotiating with Warner Brothers said something like, &quot;How can you charge us those rates for your music when I have a whole list of places online that have it for free?&quot; At which point Warner said, &quot;We&#x27;ll take care of those. So the rates can stand.&quot;", "id": 9934365, "time": 1437634880, "type": "comment", "by": "dangero"}, {"parent": 9933757, "text": "This article reminds me of a lot of what Dalton Caldwell spoke about in his talk about why not to start a music startup at a Startup School in the last several years.<p>This business seems to have been run and have proceeded horribly, for the exact reasons that Caldwell and everyone else with experience in that area seems to be extremely familiar with.<p>Is there anything legitimately interesting to the &quot;Apple&quot; and &quot;Steve Jobs&quot; parts of this story other than the usual clickbait?", "id": 9934394, "time": 1437635446, "type": "comment", "by": "npp"}, {"kids": [9934403, 9934402], "parent": 9933757, "text": "I read this half asleep and a little inebriated but it seemed to evolve into some kind of bizarre self-aggrandizing pitch on how this Kasian Franks is a super-human with deep hard-to-duplicate knowledge and insights spanning multiple important and lucrative industries. Almost some kind of cred piece bolstered by his proxy encounter with Steve Jobs. Weird.", "id": 9934371, "time": 1437634947, "type": "comment", "by": "dxhdr"}, {"parent": 9933757, "text": "I actually like the second part of the article a lot more intriguing where he talks about his and the company&#x27;s personal background. It&#x27;s these stories that inspire to start a company.", "id": 9934385, "time": 1437635102, "type": "comment", "by": "Libermentix"}, {"parent": 9933757, "text": "TL;DR Music technology startups suck.", "dead": true, "id": 9934414, "time": 1437635958, "type": "comment", "by": "brianmcconnell"}, {"parent": 9933757, "text": "Whoever uses &quot;analyzation&quot; publicly deserves to have a failed start up.", "id": 9934435, "time": 1437636366, "type": "comment", "by": "jevgeni"}, {"kids": [9934471], "parent": 9933757, "text": "TL;DR Music technology startups suck.", "id": 9934413, "time": 1437635956, "type": "comment", "by": "brianmcconnell"}]


--------------------------------------------------------------------------------
/data/departments.csv:
--------------------------------------------------------------------------------
1 | id,name
2 | 1,sales
3 | 2,product
4 | 3,technical support
5 | 4,human resources
6 | 


--------------------------------------------------------------------------------
/data/employees.csv:
--------------------------------------------------------------------------------
1 | id,name,title_id,years,department_id
2 | 2,chris,1,15,2
3 | 6,sandy,2,5,7
4 | 7,christine,1,5,4
5 | 12,aaron,3,3,3
6 | 


--------------------------------------------------------------------------------
/data/imf_indicators.tsv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/imf_indicators.tsv


--------------------------------------------------------------------------------
/data/iso-2.csv:
--------------------------------------------------------------------------------
  1 | Name,Code
  2 | Afghanistan,AF
  3 | Åland Islands,AX
  4 | Albania,AL
  5 | Algeria,DZ
  6 | American Samoa,AS
  7 | Andorra,AD
  8 | Angola,AO
  9 | Anguilla,AI
 10 | Antarctica,AQ
 11 | Antigua and Barbuda,AG
 12 | Argentina,AR
 13 | Armenia,AM
 14 | Aruba,AW
 15 | Australia,AU
 16 | Austria,AT
 17 | Azerbaijan,AZ
 18 | Bahamas,BS
 19 | Bahrain,BH
 20 | Bangladesh,BD
 21 | Barbados,BB
 22 | Belarus,BY
 23 | Belgium,BE
 24 | Belize,BZ
 25 | Benin,BJ
 26 | Bermuda,BM
 27 | Bhutan,BT
 28 | "Bolivia, Plurinational State of",BO
 29 | "Bonaire, Sint Eustatius and Saba",BQ
 30 | Bosnia and Herzegovina,BA
 31 | Botswana,BW
 32 | Bouvet Island,BV
 33 | Brazil,BR
 34 | British Indian Ocean Territory,IO
 35 | Brunei Darussalam,BN
 36 | Bulgaria,BG
 37 | Burkina Faso,BF
 38 | Burundi,BI
 39 | Cambodia,KH
 40 | Cameroon,CM
 41 | Canada,CA
 42 | Cape Verde,CV
 43 | Cayman Islands,KY
 44 | Central African Republic,CF
 45 | Chad,TD
 46 | Chile,CL
 47 | China,CN
 48 | Christmas Island,CX
 49 | Cocos (Keeling) Islands,CC
 50 | Colombia,CO
 51 | Comoros,KM
 52 | Congo,CG
 53 | "Congo, the Democratic Republic of the",CD
 54 | Cook Islands,CK
 55 | Costa Rica,CR
 56 | Côte d'Ivoire,CI
 57 | Croatia,HR
 58 | Cuba,CU
 59 | Curaçao,CW
 60 | Cyprus,CY
 61 | Czech Republic,CZ
 62 | Denmark,DK
 63 | Djibouti,DJ
 64 | Dominica,DM
 65 | Dominican Republic,DO
 66 | Ecuador,EC
 67 | Egypt,EG
 68 | El Salvador,SV
 69 | Equatorial Guinea,GQ
 70 | Eritrea,ER
 71 | Estonia,EE
 72 | Ethiopia,ET
 73 | Falkland Islands (Malvinas),FK
 74 | Faroe Islands,FO
 75 | Fiji,FJ
 76 | Finland,FI
 77 | France,FR
 78 | French Guiana,GF
 79 | French Polynesia,PF
 80 | French Southern Territories,TF
 81 | Gabon,GA
 82 | Gambia,GM
 83 | Georgia,GE
 84 | Germany,DE
 85 | Ghana,GH
 86 | Gibraltar,GI
 87 | Greece,GR
 88 | Greenland,GL
 89 | Grenada,GD
 90 | Guadeloupe,GP
 91 | Guam,GU
 92 | Guatemala,GT
 93 | Guernsey,GG
 94 | Guinea,GN
 95 | Guinea-Bissau,GW
 96 | Guyana,GY
 97 | Haiti,HT
 98 | Heard Island and McDonald Islands,HM
 99 | Holy See (Vatican City State),VA
100 | Honduras,HN
101 | Hong Kong SAR,HK
102 | Hungary,HU
103 | Iceland,IS
104 | India,IN
105 | Indonesia,ID
106 | "Iran, Islamic Republic of",IR
107 | Iraq,IQ
108 | Ireland,IE
109 | Isle of Man,IM
110 | Israel,IL
111 | Italy,IT
112 | Jamaica,JM
113 | Japan,JP
114 | Jersey,JE
115 | Jordan,JO
116 | Kazakhstan,KZ
117 | Kenya,KE
118 | Kiribati,KI
119 | "Korea, Democratic People's Republic of",KP
120 | Korea,KR
121 | Kuwait,KW
122 | Kyrgyzstan,KG
123 | Lao People's Democratic Republic,LA
124 | Latvia,LV
125 | Lebanon,LB
126 | Lesotho,LS
127 | Liberia,LR
128 | Libya,LY
129 | Liechtenstein,LI
130 | Lithuania,LT
131 | Luxembourg,LU
132 | Macao,MO
133 | "Macedonia, the Former Yugoslav Republic of",MK
134 | Madagascar,MG
135 | Malawi,MW
136 | Malaysia,MY
137 | Maldives,MV
138 | Mali,ML
139 | Malta,MT
140 | Marshall Islands,MH
141 | Martinique,MQ
142 | Mauritania,MR
143 | Mauritius,MU
144 | Mayotte,YT
145 | Mexico,MX
146 | "Micronesia, Federated States of",FM
147 | "Moldova, Republic of",MD
148 | Monaco,MC
149 | Mongolia,MN
150 | Montenegro,ME
151 | Montserrat,MS
152 | Morocco,MA
153 | Mozambique,MZ
154 | Myanmar,MM
155 | Namibia,NA
156 | Nauru,NR
157 | Nepal,NP
158 | Netherlands,NL
159 | New Caledonia,NC
160 | New Zealand,NZ
161 | Nicaragua,NI
162 | Niger,NE
163 | Nigeria,NG
164 | Niue,NU
165 | Norfolk Island,NF
166 | Northern Mariana Islands,MP
167 | Norway,NO
168 | Oman,OM
169 | Pakistan,PK
170 | Palau,PW
171 | "Palestine, State of",PS
172 | Panama,PA
173 | Papua New Guinea,PG
174 | Paraguay,PY
175 | Peru,PE
176 | Philippines,PH
177 | Pitcairn,PN
178 | Poland,PL
179 | Portugal,PT
180 | Puerto Rico,PR
181 | Qatar,QA
182 | Réunion,RE
183 | Romania,RO
184 | Russian Federation,RU
185 | Rwanda,RW
186 | Saint Barthélemy,BL
187 | "Saint Helena, Ascension and Tristan da Cunha",SH
188 | Saint Kitts and Nevis,KN
189 | Saint Lucia,LC
190 | Saint Martin (French part),MF
191 | Saint Pierre and Miquelon,PM
192 | Saint Vincent and the Grenadines,VC
193 | Samoa,WS
194 | San Marino,SM
195 | Sao Tome and Principe,ST
196 | Saudi Arabia,SA
197 | Senegal,SN
198 | Serbia,RS
199 | Seychelles,SC
200 | Sierra Leone,SL
201 | Singapore,SG
202 | Sint Maarten (Dutch part),SX
203 | Slovak Republic,SK
204 | Slovenia,SI
205 | Solomon Islands,SB
206 | Somalia,SO
207 | South Africa,ZA
208 | South Georgia and the South Sandwich Islands,GS
209 | South Sudan,SS
210 | Spain,ES
211 | Sri Lanka,LK
212 | Sudan,SD
213 | Suriname,SR
214 | Svalbard and Jan Mayen,SJ
215 | Swaziland,SZ
216 | Sweden,SE
217 | Switzerland,CH
218 | Syrian Arab Republic,SY
219 | "Taiwan Province of China",TW
220 | Tajikistan,TJ
221 | "Tanzania, United Republic of",TZ
222 | Thailand,TH
223 | Timor-Leste,TL
224 | Togo,TG
225 | Tokelau,TK
226 | Tonga,TO
227 | Trinidad and Tobago,TT
228 | Tunisia,TN
229 | Turkey,TR
230 | Turkmenistan,TM
231 | Turks and Caicos Islands,TC
232 | Tuvalu,TV
233 | Uganda,UG
234 | Ukraine,UA
235 | United Arab Emirates,AE
236 | United Kingdom,GB
237 | United States,US
238 | United States Minor Outlying Islands,UM
239 | Uruguay,UY
240 | Uzbekistan,UZ
241 | Vanuatu,VU
242 | "Venezuela, Bolivarian Republic of",VE
243 | Viet Nam,VN
244 | "Virgin Islands, British",VG
245 | "Virgin Islands, U.S.",VI
246 | Wallis and Futuna,WF
247 | Western Sahara,EH
248 | Yemen,YE
249 | Zambia,ZM
250 | Zimbabwe,ZW
251 | 


--------------------------------------------------------------------------------
/data/titles.csv:
--------------------------------------------------------------------------------
1 | id,name
2 | 1,President
3 | 2,Lead Engineer
4 | 3,Intern
5 | 


--------------------------------------------------------------------------------
/data/topstories.json:
--------------------------------------------------------------------------------
1 | [{"kids": [9934443], "descendants": 1, "url": "http://www.nytimes.com/2015/07/22/books/dr-seuss-book-a-discovery-in-a-box-and-then-a-reconstruction.html", "text": "", "title": "Dr. Seuss Book: Yes, They Found It in a Box", "by": "pepys", "score": 27, "time": 1437609293, "type": "story", "id": 9933147}, {"kids": [9934496, 9934507], "descendants": 3, "url": "http://patft.uspto.gov/netacgi/nph-Parser?Sect1=PTO2&Sect2=HITOFF&p=1&u=%2Fnetahtml%2FPTO%2Fsearch-bool.html&r=1&f=G&l=50&co1=AND&d=PTXT&s1=9087098.PN.&OS=PN/9087098&RS=PN/9087098", "text": "", "title": "Time-Series Database patented by GE", "by": "rodionos", "score": 14, "time": 1437633378, "type": "story", "id": 9934323}, {"title": "Bosun \u2013 open-source monitoring and alerting system by Stack Exchange", "url": "http://bosun.org/", "descendants": 0, "by": "aps-sids", "score": 15, "time": 1437627794, "type": "story", "id": 9934074}, {"kids": [9934493, 9934489, 9934500], "descendants": 3, "url": "https://en.wikipedia.org/wiki/ILoo", "text": "", "title": "iLoo", "by": "vezzy-fnord", "score": 28, "time": 1437614726, "type": "story", "id": 9933435}, {"kids": [9934039, 9933888, 9933800, 9933971, 9933779, 9934061, 9933880, 9934267, 9933916, 9933697, 9933968, 9934225, 9934064, 9933849, 9934058], "descendants": 75, "url": "https://arenavc.com/2015/07/airbnb-my-1-billion-lesson/", "text": "", "title": "Airbnb, My $1B Lesson", "by": "rvcamo", "score": 254, "time": 1437617429, "type": "story", "id": 9933600}, {"kids": [9933518, 9933645, 9933495, 9933461, 9933731, 9933507, 9933664, 9933470], "descendants": 43, "url": "https://lwn.net/SubscriberLink/651645/f0f5d5e6460edc60/", "text": "", "title": "rm -r fs/ext3", "by": "JoshTriplett", "score": 136, "time": 1437613222, "type": "story", "id": 9933354}, {"descendants": 0, "url": "https://ca.news.yahoo.com/blogs/dailybrew/surrey-bc-rolls-out-siri-for-cities-app-powered-185104504.html", "text": "", "title": "Surrey rolls out 'Siri for cities' app powered by IBM's Watson", "by": "hack4supper", "score": 5, "time": 1437623020, "type": "story", "id": 9933859}, {"kids": [9931667, 9933925, 9931668, 9932775, 9932646, 9932309, 9933995, 9932933, 9933433, 9931851, 9931734, 9931335], "descendants": 43, "url": "http://hapgood.us/2015/07/21/beyond-conversation/", "title": "Links as originally imagined were a separate layer of annotation on documents", "by": "jeremya", "score": 104, "time": 1437568493, "type": "story", "id": 9929187}, {"kids": [9931518, 9932569, 9932060, 9931620, 9931610, 9934139, 9933646, 9933530, 9931446], "descendants": 41, "url": "http://www.spiegel.de/international/europe/traveling-from-romania-to-portugal-in-an-illicit-van-a-1043797.html", "text": "", "title": "The Last European: Romanian Driver Navigates the Soul of the EU", "by": "lermontov", "score": 86, "time": 1437583450, "type": "story", "id": 9930713}, {"kids": [9933352, 9933368, 9933024, 9934400, 9932715, 9934214, 9933661, 9932630, 9933941, 9932754, 9933072, 9933074, 9932150], "descendants": 33, "url": "http://blog.comealive.io/Alive-Beta/", "title": "Alive Beta \u2013 Live Coding in Visual Studio", "by": "Permit", "score": 84, "time": 1437597115, "type": "story", "id": 9932118}, {"kids": [9932107, 9932832, 9931741, 9932928, 9931888, 9932172], "descendants": 24, "url": "http://www.coreboot.org/pipermail/coreboot/2015-July/080120.html", "text": "", "title": "Announcing coreboot 4.1", "by": "conductor", "score": 112, "time": 1437591932, "type": "story", "id": 9931636}, {"kids": [9932959, 9933275, 9933290, 9932069, 9933515], "descendants": 38, "url": "http://www.brainpickings.org/2014/08/21/leo-tolstoy-gandhi-letter-to-a-hindu/", "title": "Why We Hurt Each Other: Tolstoy\u2019s Letters to Gandhi", "by": "atmosx", "score": 98, "time": 1437586669, "type": "story", "id": 9931072}, {"kids": [9934430, 9931647, 9931095, 9934296, 9934283, 9931333, 9931452, 9931673, 9932394, 9931196, 9931106, 9931235], "descendants": 97, "url": "http://blog.jruby.org/2015/07/jruby_9000/", "text": "", "title": "JRuby 9000 released", "by": "headius", "score": 244, "time": 1437580868, "type": "story", "id": 9930399}, {"kids": [9934149, 9934445, 9933854, 9934179], "descendants": 7, "url": "http://makezine.com/2015/07/22/with-linux-and-creative-commons-the-9-chip-computer-reveals-its-open-source-details/", "text": "", "title": "The $9 CHIP Computer Reveals Its Open Source Details", "by": "dcschelt", "score": 58, "time": 1437610368, "type": "story", "id": 9933226}, {"descendants": 0, "url": "http://code.dblock.org/2010/11/04/corporate-change-contributing-to-open-source.html", "text": "", "title": "Corporate Change: Contributing to Open Source (2010)", "by": "walterbell", "score": 5, "time": 1437626626, "type": "story", "id": 9934026}, {"kids": [9931836, 9933063, 9931601, 9931616, 9932469, 9931598, 9932260, 9932298, 9931957, 9931871, 9932485, 9933636, 9933002], "descendants": 64, "url": "http://www.fastcompany.com/3047428/how-two-bored-1970s-housewives-helped-create-the-pc-industry", "text": "", "title": "How two bored 1970s housewives helped create the PC industry", "by": "technologizer", "score": 142, "time": 1437570293, "type": "story", "id": 9929333}, {"title": "Flaws in trials of deworming pills show the importance of sharing data", "url": "http://www.buzzfeed.com/bengoldacre/deworming-trials", "descendants": 0, "by": "bootload", "score": 11, "time": 1437614417, "type": "story", "id": 9933418}, {"descendants": 0, "url": "http://adage.com/article/news/boar-s-head-a-deli-meat-beast/236406/", "text": "", "title": "How Boar's Head Became a Deli Meat Beast", "by": "kelvintran", "score": 6, "time": 1437629197, "type": "story", "id": 9934131}, {"kids": [9932267, 9933725, 9931970, 9931848, 9934120, 9933106, 9932284, 9931877, 9931883, 9933192, 9931951, 9932294, 9932491, 9932163, 9932517, 9931923, 9932183, 9932705, 9932745, 9933504, 9932072, 9933906, 9933325, 9931802, 9933707, 9931921, 9933490, 9931952, 9932039, 9932051, 9932607, 9931900, 9931950, 9932471, 9932126, 9932301, 9931956, 9931907, 9932002, 9931723, 9933042, 9932280, 9933050, 9931776, 9931858], "descendants": 167, "url": "http://techcrunch.com/2015/07/22/uber-for-developers/", "title": "Gigster (YC S15) Does The Dev Work To Turn Your Idea Into An App", "by": "rogerdickey", "score": 105, "time": 1437591552, "type": "story", "id": 9931596}, {"kids": [9934478, 9933611, 9932885, 9933817, 9933385], "descendants": 11, "url": "https://izbicki.me/blog/fast-nearest-neighbor-queries-in-haskell.html", "text": "", "title": "Fast Nearest Neighbor Queries in Haskell", "by": "andrus", "score": 71, "time": 1437598751, "type": "story", "id": 9932266}, {"kids": [9934516], "descendants": 1, "url": "https://github.com/cymen/show-me-the-react", "title": "Chrome React extension that highlights components on the page", "by": "obilgic", "score": 19, "time": 1437616029, "type": "story", "id": 9933511}, {"kids": [9931258, 9931310, 9931291, 9931170, 9932514, 9931325, 9931453, 9931812, 9934193, 9931187, 9931401, 9932368, 9933892, 9931239, 9931780, 9932862, 9931540, 9931797, 9931173, 9931937, 9931748, 9931144, 9931218, 9931270, 9931162, 9931663, 9931206], "descendants": 97, "url": "http://blog.ycombinator.com/pro-rata", "title": "Pro Rata", "by": "craigkerstiens", "score": 328, "time": 1437587125, "type": "story", "id": 9931121}, {"kids": [9934314, 9934251, 9934316, 9934273, 9934417, 9934444, 9934376, 9934317], "descendants": 15, "url": "https://passingstrangeness.wordpress.com/2015/07/20/sl-1-murder-by-nuclear-reactor/", "title": "SL-1: The only fatal nuclear reactor accident in US history", "by": "herendin", "score": 31, "time": 1437628703, "type": "story", "id": 9934113}, {"kids": [9934512, 9934310, 9934191, 9934097], "descendants": 8, "url": "http://amiga30.com/", "text": "", "title": "Today, 30 years ago, Commodore introduced the Amiga", "by": "pdknsk", "score": 47, "time": 1437624046, "type": "story", "id": 9933918}, {"kids": [9933901, 9933890, 9933848], "descendants": 9, "url": "http://www.febo.com/hamdocs/intronos.html?", "text": "", "title": "Getting Started with TCP/IP on Packet Radio (1992)", "by": "taf2", "score": 32, "time": 1437618559, "type": "story", "id": 9933648}, {"kids": [9930872, 9930809, 9933711, 9933474], "descendants": 69, "url": "https://www.mnot.net/blog/2015/07/20/snowden_meets_the_ietf", "title": "Snowden Meets the IETF", "by": "kazuho", "score": 187, "time": 1437563664, "type": "story", "id": 9928879}, {"title": "Experiment (YC W13) is hiring Rails hackers", "url": "https://experiment.com/jobs/engineer", "text": "", "id": 9934275, "score": 1, "time": 1437632231, "type": "job", "by": "dluan"}, {"kids": [9933376, 9933203, 9933271, 9933080, 9933619, 9933339, 9933184, 9933499, 9933860, 9934215, 9933522], "descendants": 27, "url": "http://techcrunch.com/2015/07/22/yc-backed-fonticons-is-a-subscription-icon-service-from-the-creator-of-font-awesome/", "title": "Fonticons (YC S15) Is A Subscription Icon Service From The Maker Of Font Awesome", "by": "katm", "score": 81, "time": 1437608079, "type": "story", "id": 9933067}, {"descendants": 0, "url": "http://www.lrb.co.uk/v37/n15/julian-barnes/selfie-with-sunflowers", "text": "", "title": "Selfie with \u2018Sunflowers\u2019", "by": "prismatic", "score": 7, "time": 1437626737, "type": "story", "id": 9934032}, {"kids": [9934321, 9934288, 9934244, 9934484], "descendants": 7, "url": "http://www.ap.org/content/press-release/2015/ap-makes-one-million-minutes-of-history-available-on-youtube", "title": "AP makes one million minutes of historical footage available on YouTube", "by": "mxfh", "score": 54, "time": 1437606964, "type": "story", "id": 9932996}, {"kids": [9934514, 9934426, 9931735, 9934102, 9931992, 9932149, 9933277, 9933260, 9932603, 9932434, 9932327, 9932177], "descendants": 64, "url": "http://www.cs.toronto.edu/~graves/handwriting.html", "title": "Handwriting Generation with Recurrent Neural Networks", "by": "cjdulberger", "score": 216, "time": 1437586331, "type": "story", "id": 9931041}, {"kids": [9933165, 9933281, 9933242, 9933234, 9933346, 9933353, 9933109, 9933400, 9933428, 9933158, 9933099, 9933288, 9934068, 9933675, 9933557, 9933110, 9933104, 9933168, 9933457, 9933423], "descendants": 180, "url": "http://www.vox.com/2015/7/22/9015443/bill-de-blasio-uber", "title": "Uber has defeated Bill de Blasio\u2019s plan to rein them\u00a0in", "by": "jseliger", "score": 132, "time": 1437606977, "type": "story", "id": 9932997}, {"kids": [9933326, 9933248, 9933701, 9933525, 9934300, 9934021, 9933292, 9934172, 9933375, 9933472, 9933253, 9933407, 9933693, 9933788, 9933576], "descendants": 34, "url": "http://well.blogs.nytimes.com/2015/07/22/how-nature-changes-the-brain/", "text": "", "title": "How Walking in Nature Changes the Brain", "by": "joshrotenberg", "score": 91, "time": 1437604126, "type": "story", "id": 9932793}, {"kids": [9932352, 9931954, 9933268, 9933614, 9933318, 9931775], "descendants": 20, "url": "http://arxiv.org/abs/1507.05724v1", "title": "HORNET: High-speed Onion Routing at the Network Layer", "by": "sp332", "score": 105, "time": 1437585355, "type": "story", "id": 9930929}, {"kids": [9934349, 9933566], "descendants": 2, "url": "http://adrianchadd.blogspot.com/2015/07/freebsd-now-has-numa-whyd-it-take-so.html", "title": "NUMA support in FreeBSD", "by": "adamnemecek", "score": 61, "time": 1437601951, "type": "story", "id": 9932591}, {"kids": [9930299, 9930930, 9931004, 9930904, 9930361, 9930489, 9930941, 9930379, 9930355, 9930323, 9934145, 9933006, 9931844, 9930575, 9930503, 9931561, 9931316, 9931134, 9933868, 9930378, 9931584, 9932026, 9930692, 9931165, 9931337, 9931508, 9931738, 9930441], "descendants": 63, "url": "http://f21threadscreen.com/", "title": "F21 Thread Screen", "by": "s0rce", "score": 276, "time": 1437578273, "type": "story", "id": 9930097}, {"kids": [9934164, 9934222, 9934277, 9934170, 9934186, 9934175, 9934219, 9934185, 9934261, 9934200, 9934187], "descendants": 13, "url": "https://medium.com/tweet-stormed/on-full-stack-startups-c6436f445cc8", "text": "", "title": "On Full-Stack Startups", "by": "peter123", "score": 21, "time": 1437605131, "type": "story", "id": 9932856}, {"kids": [9934256, 9934276, 9934398, 9934154], "descendants": 9, "url": "http://www.csmonitor.com/Science/2015/0722/Can-we-colonize-the-moon", "title": "Can we colonize the moon?", "by": "cpeterso", "score": 23, "time": 1437622687, "type": "story", "id": 9933839}, {"kids": [9934371, 9934365, 9934494, 9934435, 9934413, 9934491, 9934394, 9934385, 9934414, 9934360], "descendants": 20, "url": "https://medium.com/@492727ZED/steve-jobs-made-warner-music-sue-my-startup-9a81c5a21d68", "title": "How Apple Influenced The Labels To Shut Down My Music Streaming Startup", "by": "meeper16", "score": 77, "time": 1437620972, "type": "story", "id": 9933757}, {"kids": [9931350, 9931035, 9931408, 9931152, 9931114, 9931570, 9930834, 9930806, 9932886], "descendants": 34, "url": "http://blog.algorithmia.com/post/124542129914/mining-product-hunt-detecting-vote-rings", "text": "", "title": "Mining Product Hunt, Part 1: Detecting Vote-Rings", "by": "ANaimi", "score": 125, "time": 1437582290, "type": "story", "id": 9930582}]


--------------------------------------------------------------------------------
/data/wb/Broad Money (M2) to foreign reserves, ratio.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Broad Money (M2) to foreign reserves, ratio.xlsx


--------------------------------------------------------------------------------
/data/wb/Broad Money (M2) to monetary base, ratio (M2 multiplier).xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Broad Money (M2) to monetary base, ratio (M2 multiplier).xlsx


--------------------------------------------------------------------------------
/data/wb/CPI Price, % y-o-y, median weighted, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, % y-o-y, median weighted, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/CPI Price, % y-o-y, nominal, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, % y-o-y, nominal, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/CPI Price, nominal, not seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, nominal, not seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/CPI Price, nominal, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/CPI Price, nominal, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Commodity Prices.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Commodity Prices.xlsx


--------------------------------------------------------------------------------
/data/wb/Core CPI, not seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Core CPI, not seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Core CPI, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Core CPI, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Emerging Market Bond Index (JPM Total Return Index).xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Emerging Market Bond Index (JPM Total Return Index).xlsx


--------------------------------------------------------------------------------
/data/wb/Exchange rate, new LCU per USD extended backward, period average.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exchange rate, new LCU per USD extended backward, period average.xlsx


--------------------------------------------------------------------------------
/data/wb/Exchange rate, old LCU per USD extended forward, period average.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exchange rate, old LCU per USD extended forward, period average.xlsx


--------------------------------------------------------------------------------
/data/wb/Exports Merchandise, Customs, Price, US$, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exports Merchandise, Customs, Price, US$, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Exports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Exports Merchandise, Customs, current US$, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Exports Merchandise, Customs, current US$, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Foreign Reserves, Months Import Cover, Goods.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Foreign Reserves, Months Import Cover, Goods.xlsx


--------------------------------------------------------------------------------
/data/wb/GDP Deflator at Market Prices, LCU.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP Deflator at Market Prices, LCU.xlsx


--------------------------------------------------------------------------------
/data/wb/GDP at market prices, constant 2010 LCU, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, constant 2010 LCU, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/GDP at market prices, constant 2010 US$, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, constant 2010 US$, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/GDP at market prices, current LCU, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, current LCU, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/GDP at market prices, current US$, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP at market prices, current US$, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/GDP_Current_Dollars.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/GDP_Current_Dollars.xlsx


--------------------------------------------------------------------------------
/data/wb/Imports Merchandise, Customs, Price, US$, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Imports Merchandise, Customs, Price, US$, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Imports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Imports Merchandise, Customs, constant 2010 US$, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Imports Merchandise, Customs, current US$, millions, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Imports Merchandise, Customs, current US$, millions, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Industrial Production, constant 2010 US$, not seasonally adjusted.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Industrial Production, constant 2010 US$, not seasonally adjusted.xlsx


--------------------------------------------------------------------------------
/data/wb/Industrial Production, constant 2010 US$, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Industrial Production, constant 2010 US$, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Nominal Effecive Exchange Rate.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Nominal Effecive Exchange Rate.xlsx


--------------------------------------------------------------------------------
/data/wb/Official exchange rate, LCU per USD, period average.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Official exchange rate, LCU per USD, period average.xlsx


--------------------------------------------------------------------------------
/data/wb/Real Effective Exchange Rate.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Real Effective Exchange Rate.xlsx


--------------------------------------------------------------------------------
/data/wb/Retail Sales Volume Index, seas. adj..xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Retail Sales Volume Index, seas. adj..xlsx


--------------------------------------------------------------------------------
/data/wb/Sovereign Bond Interest Rate Spreads, basis points over US Treasuries.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Sovereign Bond Interest Rate Spreads, basis points over US Treasuries.xlsx


--------------------------------------------------------------------------------
/data/wb/Stock Markets, LCU.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Stock Markets, LCU.xlsx


--------------------------------------------------------------------------------
/data/wb/Stock Markets, US$.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Stock Markets, US$.xlsx


--------------------------------------------------------------------------------
/data/wb/Terms of Trade.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Terms of Trade.xlsx


--------------------------------------------------------------------------------
/data/wb/Total Reserves.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/Total Reserves.xlsx


--------------------------------------------------------------------------------
/data/wb/stock_market.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/stock_market.xlsx


--------------------------------------------------------------------------------
/data/wb/stock_metadata.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/stock_metadata.xlsx


--------------------------------------------------------------------------------
/data/wb/unemployment.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/data/wb/unemployment.xlsx


--------------------------------------------------------------------------------
/data_wrangling_3.yml:
--------------------------------------------------------------------------------
  1 | name: conda_dw34
  2 | dependencies:
  3 | - backports=1.0=py34_0
  4 | - backports_abc=0.4=py34_0
  5 | - bokeh=0.12.0=py34_0
  6 | - cairo=1.12.18=6
  7 | - certifi=2016.2.28=py34_0
  8 | - cffi=1.6.0=py34_0
  9 | - cryptography=1.4=py34_0
 10 | - cycler=0.10.0=py34_0
 11 | - decorator=4.0.10=py34_0
 12 | - entrypoints=0.2.2=py34_0
 13 | - et_xmlfile=1.0.1=py34_0
 14 | - fontconfig=2.11.1=6
 15 | - freetype=2.5.5=1
 16 | - futures=3.0.3=py34_0
 17 | - get_terminal_size=1.0.0=py34_0
 18 | - glib=2.43.0=1
 19 | - harfbuzz=0.9.39=1
 20 | - idna=2.1=py34_0
 21 | - ipykernel=4.3.1=py34_0
 22 | - ipython=5.0.0=py34_0
 23 | - ipython_genutils=0.1.0=py34_0
 24 | - ipywidgets=4.1.1=py34_0
 25 | - jdcal=1.2=py34_1
 26 | - jinja2=2.8=py34_1
 27 | - jsonschema=2.5.1=py34_0
 28 | - jupyter=1.0.0=py34_3
 29 | - jupyter_client=4.3.0=py34_0
 30 | - jupyter_console=5.0.0=py34_0
 31 | - jupyter_core=4.1.0=py34_0
 32 | - libffi=3.2.1=0
 33 | - libpng=1.6.22=0
 34 | - libsodium=1.0.10=0
 35 | - libxml2=2.9.2=0
 36 | - libxslt=1.1.28=0
 37 | - lxml=3.6.0=py34_0
 38 | - markupsafe=0.23=py34_2
 39 | - matplotlib=1.5.1=np111py34_0
 40 | - mistune=0.7.2=py34_0
 41 | - mkl=11.3.3=0
 42 | - nbconvert=4.2.0=py34_0
 43 | - nbformat=4.0.1=py34_0
 44 | - notebook=4.2.1=py34_0
 45 | - numpy=1.11.1=py34_0
 46 | - openpyxl=2.3.2=py34_0
 47 | - openssl=1.0.2h=1
 48 | - pandas=0.18.1=np111py34_0
 49 | - pandas-datareader=0.2.1=py34_0
 50 | - pango=1.39.0=1
 51 | - path.py=8.2.1=py34_0
 52 | - pathlib2=2.1.0=py34_0
 53 | - pdfminer.six==20160614
 54 | - pdftables.six==0.0.5
 55 | - pexpect=4.0.1=py34_0
 56 | - pickleshare=0.7.2=py34_0
 57 | - pip=8.1.2=py34_0
 58 | - pixman=0.32.6=0
 59 | - prompt_toolkit=1.0.3=py34_0
 60 | - ptyprocess=0.5.1=py34_0
 61 | - pyasn1=0.1.9=py34_0
 62 | - pycparser=2.14=py34_1
 63 | - pygments=2.1.3=py34_0
 64 | - pyparsing=2.1.4=py34_0
 65 | - pyqt=4.11.4=py34_4
 66 | - python=3.4.5=0
 67 | - python-dateutil=2.5.3=py34_0
 68 | - pytz=2016.6.1=py34_0
 69 | - pyyaml=3.11=py34_4
 70 | - pyzmq=15.3.0=py34_0
 71 | - qt=4.8.7=4
 72 | - qtconsole=4.2.1=py34_0
 73 | - readline=6.2=2
 74 | - requests=2.10.0=py34_0
 75 | - requests-file=1.4=py34_0
 76 | - setuptools=23.0.0=py34_0
 77 | - simplegeneric=0.8.1=py34_1
 78 | - singledispatch=3.4.0.3=py34_0
 79 | - sip=4.18=py34_0
 80 | - six=1.10.0=py34_0
 81 | - sqlite=3.13.0=0
 82 | - ssl_match_hostname=3.4.0.2=py34_0
 83 | - terminado=0.6=py34_0
 84 | - tk=8.5.18=0
 85 | - tornado=4.3=py34_1
 86 | - traitlets=4.2.2=py34_0
 87 | - wcwidth=0.1.7=py34_0
 88 | - wheel=0.29.0=py34_0
 89 | - xlrd=1.0.0=py34_0
 90 | - xz=5.2.2=0
 91 | - yaml=0.1.6=0
 92 | - zeromq=4.1.4=0
 93 | - zlib=1.2.8=3
 94 | - pip:
 95 |   - backports-abc==0.4
 96 |   - backports.shutil-get-terminal-size==1.0.0
 97 |   - backports.ssl-match-hostname==3.4.0.2
 98 |   - configparser==3.5.0
 99 |   - enum34==1.1.6
100 |   - et-xmlfile==1.0.1
101 |   - ipaddress==1.0.16
102 |   - ipython-genutils==0.1.0
103 |   - jupyter-client==4.3.0
104 |   - jupyter-console==5.0.0
105 |   - jupyter-core==4.1.0
106 |   - ndg-httpsclient==0.4.2
107 |   - prompt-toolkit==1.0.3
108 |   - pyopenssl==16.0.0
109 |   - urllib3==1.16
110 | 
111 | 
112 | 


--------------------------------------------------------------------------------
/py3_requirements.txt:
--------------------------------------------------------------------------------
 1 | backports-abc==0.4
 2 | backports.shutil-get-terminal-size==1.0.0
 3 | backports.ssl-match-hostname==3.5.0.1
 4 | bokeh==0.11.1
 5 | certifi==2016.2.28
 6 | cffi==1.6.0
 7 | cryptography==1.3.2
 8 | cycler==0.10.0
 9 | decorator==4.0.9
10 | entrypoints==0.2.1
11 | enum34==1.1.6
12 | et-xmlfile==1.0.1
13 | futures==3.0.5
14 | idna==2.1
15 | ipaddress==1.0.16
16 | ipykernel==4.3.1
17 | ipython==4.2.0
18 | ipython-genutils==0.1.0
19 | ipywidgets==5.1.3
20 | jdcal==1.2
21 | Jinja2==2.8
22 | jsonschema==2.5.1
23 | jupyter==1.0.0
24 | jupyter-client==4.2.2
25 | jupyter-console==4.1.1
26 | jupyter-core==4.1.0
27 | lxml==3.6.0
28 | MarkupSafe==0.23
29 | matplotlib==1.5.1
30 | mistune==0.7.2
31 | nbconvert==4.2.0
32 | nbformat==4.0.1
33 | ndg-httpsclient==0.4.0
34 | notebook==4.2.0
35 | numpy==1.11.0
36 | oauthlib==1.1.1
37 | openpyxl==2.3.5
38 | pandas==0.18.1
39 | pandas-datareader==0.2.1
40 | pathlib2==2.1.0
41 | pdfminer.six==20160614
42 | pdftables.six==0.0.5
43 | pexpect==4.0.1
44 | pickleshare==0.7.2
45 | ptyprocess==0.5.1
46 | pyasn1==0.1.9
47 | pycparser==2.14
48 | Pygments==2.1.3
49 | pyOpenSSL==16.0.0
50 | pyparsing==2.1.4
51 | python-dateutil==2.5.3
52 | pytz==2016.4
53 | PyYAML==3.11
54 | pyzmq==15.2.0
55 | qtconsole==4.2.1
56 | requests==2.10.0
57 | requests-file==1.4
58 | requests-oauthlib==0.6.1
59 | simplegeneric==0.8.1
60 | singledispatch==3.4.0.3
61 | six==1.10.0
62 | terminado==0.6
63 | tornado==4.3
64 | traitlets==4.2.1
65 | tweepy==3.5.0
66 | urllib3==1.15.1
67 | widgetsnbextension==1.2.2
68 | xlrd==0.9.4
69 | 


--------------------------------------------------------------------------------
/py3_server_requirements.txt:
--------------------------------------------------------------------------------
 1 | backports-abc==0.4
 2 | backports.shutil-get-terminal-size==1.0.0
 3 | backports.ssl-match-hostname==3.5.0.1
 4 | bokeh==0.11.1
 5 | certifi==2016.2.28
 6 | cffi==1.6.0
 7 | chardet==2.3.0
 8 | cryptography==1.3.2
 9 | cssselect==0.9.1
10 | cycler==0.10.0
11 | Cython==0.24.1
12 | decorator==4.0.9
13 | dill==0.2.5
14 | entrypoints==0.2.1
15 | enum34==1.1.6
16 | et-xmlfile==1.0.1
17 | flake8==2.6.2
18 | funcsigs==1.0.2
19 | futures==3.0.5
20 | idna==2.1
21 | ipaddress==1.0.16
22 | ipykernel==4.3.1
23 | ipyparallel==5.1.1
24 | ipython==4.2.0
25 | ipython-genutils==0.1.0
26 | ipywidgets==5.1.3
27 | jdcal==1.2
28 | Jinja2==2.8
29 | jsonschema==2.5.1
30 | jupyter==1.0.0
31 | jupyter-client==4.2.2
32 | jupyter-console==4.1.1
33 | jupyter-core==4.1.0
34 | jupyterhub==0.6.1
35 | line-profiler==1.0
36 | lxml==3.6.0
37 | MarkupSafe==0.23
38 | matplotlib==1.5.1
39 | mccabe==0.5.0
40 | multiprocess==0.70.4
41 | nbconvert==4.2.0
42 | nbformat==4.0.1
43 | ndg-httpsclient==0.4.0
44 | nose==1.3.7
45 | notebook==4.2.0
46 | numpy==1.11.0
47 | oauthenticator==0.5.0
48 | oauthlib==1.1.1
49 | openpyxl==2.3.5
50 | pamela==0.2.1
51 | pandas==0.18.1
52 | pandas-datareader==0.2.1
53 | paramiko==2.0.1
54 | pathlib2==2.1.0
55 | pathos==0.2.0
56 | pdfminer.six==20160614
57 | pdftables.six==0.0.5
58 | pep8==1.7.0
59 | pexpect==4.0.1
60 | pickleshare==0.7.2
61 | pox==0.2.2
62 | ppft==1.6.4.6
63 | ptyprocess==0.5.1
64 | pyasn1==0.1.9
65 | pycodestyle==2.0.0
66 | pycparser==2.14
67 | pyflakes==1.2.3
68 | Pygments==2.1.3
69 | pyOpenSSL==16.0.0
70 | pyparsing==2.1.4
71 | python-dateutil==2.5.3
72 | pytz==2016.4
73 | PyYAML==3.11
74 | pyzmq==15.2.0
75 | qtconsole==4.2.1
76 | requests==2.10.0
77 | requests-file==1.4
78 | requests-oauthlib==0.6.1
79 | simplegeneric==0.8.1
80 | singledispatch==3.4.0.3
81 | six==1.10.0
82 | SQLAlchemy==1.0.15
83 | terminado==0.6
84 | tornado==4.3
85 | traitlets==4.2.1
86 | tweepy==3.5.0
87 | urllib3==1.15.1
88 | widgetsnbextension==1.2.2
89 | xlrd==0.9.4
90 | 


--------------------------------------------------------------------------------
/pycon_2015_requirements.txt:
--------------------------------------------------------------------------------
 1 | xlrd
 2 | requests
 3 | fuzzywuzzy
 4 | textblob
 5 | python-Levenshtein
 6 | journalism
 7 | latimes-calculate
 8 | bokeh
 9 | pygal
10 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | backports-abc==0.4
 2 | backports.shutil-get-terminal-size==1.0.0
 3 | backports.ssl-match-hostname==3.5.0.1
 4 | bokeh==0.11.1
 5 | certifi==2016.2.28
 6 | cffi==1.6.0
 7 | configparser==3.3.0.post2
 8 | cryptography==1.3.2
 9 | cycler==0.10.0
10 | decorator==4.0.9
11 | entrypoints==0.2.1
12 | enum34==1.1.6
13 | et-xmlfile==1.0.1
14 | functools32==3.2.3.post2
15 | futures==3.0.5
16 | idna==2.1
17 | ipaddress==1.0.16
18 | ipykernel==4.3.1
19 | ipython==4.2.0
20 | ipython-genutils==0.1.0
21 | ipywidgets==5.1.3
22 | jdcal==1.2
23 | Jinja2==2.8
24 | jsonschema==2.5.1
25 | jupyter==1.0.0
26 | jupyter-client==4.2.2
27 | jupyter-console==4.1.1
28 | jupyter-core==4.1.0
29 | lxml==3.6.0
30 | MarkupSafe==0.23
31 | matplotlib==1.5.1
32 | mistune==0.7.2
33 | nbconvert==4.2.0
34 | nbformat==4.0.1
35 | ndg-httpsclient==0.4.0
36 | notebook==4.2.0
37 | numpy==1.11.0
38 | oauthlib==1.1.1
39 | openpyxl==2.3.5
40 | pandas==0.18.1
41 | pandas-datareader==0.2.1
42 | pathlib2==2.1.0
43 | pdfminer==20110515
44 | pdftables==0.0.4
45 | pexpect==4.0.1
46 | pickleshare==0.7.2
47 | ptyprocess==0.5.1
48 | pyasn1==0.1.9
49 | pycparser==2.14
50 | Pygments==2.1.3
51 | pyOpenSSL==16.0.0
52 | pyparsing==2.1.4
53 | python-dateutil==2.5.3
54 | pytz==2016.4
55 | PyYAML==3.11
56 | pyzmq==15.2.0
57 | qtconsole==4.2.1
58 | requests==2.10.0
59 | requests-file==1.4
60 | requests-oauthlib==0.6.1
61 | simplegeneric==0.8.1
62 | singledispatch==3.4.0.3
63 | six==1.10.0
64 | terminado==0.6
65 | tornado==4.3
66 | traitlets==4.2.1
67 | tweepy==3.5.0
68 | urllib3==1.15.1
69 | widgetsnbextension==1.2.2
70 | xlrd==0.9.4
71 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/kjam/data-wrangling-pycon/1744299260031d37730a70cc39a85d9bc62a5231/scripts/pycon-2015/__init__.py


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson01_csv.py:
--------------------------------------------------------------------------------
 1 | from csv import DictReader
 2 | 
 3 | 
 4 | rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t')
 5 | 
 6 | all_lines = [r for r in rdr]
 7 | 
 8 | print all_lines[0].keys()
 9 | 
10 | for line in all_lines:
11 |     try:
12 |         if 'Gross domestic product' in line.get('Subject Descriptor') and \
13 |            'international dollar' in line.get('Units'):
14 |             print '{}: {} ({} {})'.format(
15 |                 line.get('Country'), line.get('2015'), '2015', line.get('Scale'))
16 |     except:
17 |         print "ERROR: ", line
18 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson02_xlsx.py:
--------------------------------------------------------------------------------
 1 | import xlrd
 2 | 
 3 | 
 4 | notebook = xlrd.open_workbook('../data/wb/GDP_Current_Dollars.xlsx')
 5 | 
 6 | for sheet in notebook.sheets():
 7 |     print sheet.name
 8 | 
 9 | sheet = notebook.sheet_by_name('Data')
10 | 
11 | titles = sheet.row_values(0)
12 | print titles
13 | 
14 | 
15 | def build_array(sheet, titles, start_row=1):
16 |     new_arr = []
17 |     while start_row < sheet.nrows:
18 |         new_arr.append(
19 |             dict(zip(titles, sheet.row_values(start_row)))
20 |         )
21 |         start_row += 1
22 |     return new_arr
23 | 
24 | 
25 | arr = build_array(sheet, titles)
26 | 
27 | for line in arr:
28 |     print line.get('Country Name'), line.get('2014 [YR2014]')
29 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson03_databases.py:
--------------------------------------------------------------------------------
 1 | import dataset
 2 | 
 3 | db = dataset.connect('sqlite:///../data/data_analysis.db')
 4 | 
 5 | my_sources = db['sources']
 6 | 
 7 | my_sources.insert({'organization': 'IMF',
 8 |                    'file_name': 'imf_indicators.tsv',
 9 |                    'url': 'http://www.imf.org/external/pubs/ft/weo/2015/01/weodata/index.aspx',
10 |                    'description': 'IMF World Economic Outlook Dataset',
11 |                    })
12 | 
13 | my_sources.insert({'organization': 'World Bank',
14 |                    'file_name': 'wb/GDP_Current_Dollars.xlsx',
15 |                    'url': 'http://databank.worldbank.org/data/reports.aspx?source=2&series=NY.GDP.MKTP.CD#',
16 |                    'description': 'World Bank GDP Dataset',
17 |                    })
18 | 
19 | print db.tables
20 | 
21 | for row in db['sources']:
22 |     print row['description']
23 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson04_05_api_regex.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import re
 3 | import json
 4 | from multiprocessing import Process, Manager
 5 | 
 6 | MATCHING = (
 7 |     ('Python', '(p|P)ython'),
 8 |     ('Ruby', '(r|R)uby'),
 9 |     ('JavaScript', 'js|(J|j)ava(s|S)cript'),
10 |     ('NodeJS', 'node(\.?)(?:\js|JS)'),
11 |     ('Java', '(j|J)ava[^(S|s)cript]'),
12 |     ('Objective-C', 'Obj(ective?)(?:\ |-)(C|c)'),
13 | )
14 | 
15 | 
16 | def get_story(story_id, stories):
17 |     url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id
18 |     resp = requests.get(url)
19 |     stories.append(resp.json())
20 | 
21 | 
22 | def get_top_stories():
23 |     manager = Manager()
24 |     stories = manager.list()
25 |     url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
26 |     ids = requests.get(url)
27 |     processes = [Process(target=get_story, args=(sid, stories))
28 |                  for sid in ids.json()[:40]]
29 |     for p in processes:
30 |         p.start()
31 |     for p in processes:
32 |         p.join()
33 |     return stories
34 | 
35 | 
36 | def get_json_stories():
37 |     return json.load(open('../data/topstories.json', 'rb'))
38 | 
39 | 
40 | def count_languages():
41 |     stories = get_top_stories()
42 |     final_tallies = {}
43 |     for s in stories:
44 |         long_string = u'{} {}'.format(s.get('title'), s.get('url'))
45 |         for language, regex in dict(MATCHING).items():
46 |             if re.search(regex, long_string):
47 |                 if language not in final_tallies.keys():
48 |                     final_tallies[language] = {
49 |                         'score': s.get('score'),
50 |                         'descendants': s.get('descendants')}
51 |                 else:
52 |                     final_tallies[language]['score'] += s.get('score')
53 |                     final_tallies[language][
54 |                         'descendants'] += s.get('descendants')
55 |     return final_tallies
56 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson06_string_processing.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from multiprocessing import Process, Manager
 3 | from fuzzywuzzy import fuzz
 4 | from textblob import TextBlob
 5 | import re
 6 | import json
 7 | 
 8 | 
 9 | def get_story(story_id, stories):
10 |     url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id
11 |     resp = requests.get(url)
12 |     stories.append(resp.json())
13 |     return stories
14 | 
15 | 
16 | def get_top_stories():
17 |     manager = Manager()
18 |     stories = manager.list()
19 |     url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
20 |     ids = requests.get(url)
21 |     processes = [Process(target=get_story, args=(sid, stories))
22 |                  for sid in ids.json()[:40]]
23 |     for p in processes:
24 |         p.start()
25 |     for p in processes:
26 |         p.join()
27 |     return stories
28 | 
29 | 
30 | def get_json_stories():
31 |     return json.load(open('../data/topstories.json', 'rb'))
32 | 
33 | 
34 | def get_json_comments():
35 |     return json.load(open('../data/comments.json', 'rb'))
36 | 
37 | 
38 | def get_all_comments(sid):
39 |     manager = Manager()
40 |     comments = manager.list()
41 |     story = get_story(sid, [])
42 |     if not story[0].get('kids'):
43 |         return []
44 |     processes = [Process(target=get_story, args=(cid, comments))
45 |                  for cid in story[0].get('kids')]
46 |     for p in processes:
47 |         p.start()
48 |     for p in processes:
49 |         p.join()
50 |     return [c for c in comments if c and not c.get('deleted')]
51 | 
52 | 
53 | def remove_html(text):
54 |     try:
55 |         return re.sub('<[^<]+?>', '', text)
56 |     except:
57 |         print text
58 |     return text
59 | 
60 | 
61 | def is_match(first, second):
62 |     ratio = fuzz.token_sort_ratio(first, second)
63 |     if ratio > 50:
64 |         return True
65 |     return False
66 | 
67 | 
68 | def find_matching_comments():
69 |     stories = get_top_stories()
70 |     comments = []
71 |     while len(comments) < 1:
72 |         for s in stories:
73 |             comments.extend(get_all_comments(s.get('id')))
74 |     matches = []
75 |     comment_text = ['%s - %s' % (c.get('by'),
76 |                                  remove_html(c.get('text'))) for c in comments]
77 |     for c in comments:
78 |         ctext = remove_html(c.get('text'))
79 |         comment_text.remove('%s - %s' % (c.get('by'), ctext))
80 |         for txt in comment_text:
81 |             if is_match(ctext, txt):
82 |                 matches.append((c, txt))
83 |     return matches
84 | 
85 | 
86 | def comment_sentiment():
87 |     stories = get_top_stories()
88 |     comments = get_all_comments(stories[0].get('id'))
89 |     for comm in comments:
90 |         comm['sentiment'] = TextBlob(comm.get(
91 |             'text')).sentiment.polarity
92 |     comments.sort(key=lambda x: x.get('sentiment'))
93 |     return comments
94 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson07_calculate.py:
--------------------------------------------------------------------------------
 1 | import calculate
 2 | import requests
 3 | from multiprocessing import Process, Manager
 4 | from decimal import Decimal
 5 | 
 6 | 
 7 | def get_story(story_id, stories):
 8 |     url = 'https://hacker-news.firebaseio.com/v0/item/%d.json' % story_id
 9 |     resp = requests.get(url)
10 |     story_data = resp.json()
11 |     user_data = get_user(story_data.get('by'))
12 |     story_data['user_karma'] = user_data.get('karma') or 0
13 |     stories.append(story_data)
14 |     return stories
15 | 
16 | 
17 | def get_user(user_id):
18 |     url = 'https://hacker-news.firebaseio.com/v0/user/%s.json' % user_id
19 |     resp = requests.get(url)
20 |     return resp.json()
21 | 
22 | 
23 | def get_top_stories_with_user_karma():
24 |     manager = Manager()
25 |     stories = manager.list()
26 |     url = 'https://hacker-news.firebaseio.com/v0/topstories.json'
27 |     ids = requests.get(url)
28 |     processes = [Process(target=get_story, args=(sid, stories))
29 |                  for sid in ids.json()[:40]]
30 |     for p in processes:
31 |         p.start()
32 |     for p in processes:
33 |         p.join()
34 |     return stories
35 | 
36 | 
37 | def calculate_summary_karma():
38 |     stories = get_top_stories_with_user_karma()
39 |     return calculate.summary_stats([
40 |         Decimal(s.get('score')) for s in stories])
41 | 
42 | 
43 | def pearsons_karma():
44 |     stories = get_top_stories_with_user_karma()
45 |     user_karma = [Decimal(s.get('user_karma')) for s in stories]
46 |     story_karma = [Decimal(s.get('score')) for s in stories]
47 |     return calculate.pearson(user_karma, story_karma)
48 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson08_journalism.py:
--------------------------------------------------------------------------------
 1 | import journalism
 2 | import logging
 3 | from csv import reader
 4 | 
 5 | text_type = journalism.TextType()
 6 | number_type = journalism.NumberType()
 7 | date_type = journalism.DateType()
 8 | 
 9 | 
10 | def get_table(datarows, types, titles):
11 |     try:
12 |         table = journalism.Table(datarows, types, titles)
13 |         return table
14 |     except:
15 |         logging.exception('problem loading table')
16 |     return None
17 | 
18 | 
19 | def clean_text(row):
20 |     new_row = []
21 |     for item in row:
22 |         if isinstance(item, (str, unicode)):
23 |             item = item.decode('utf-8', 'replace')
24 |         if item in [u'--', u'n/a']:
25 |             item = None
26 |         new_row.append(item)
27 | 
28 |     return new_row
29 | 
30 | 
31 | def clean_rows(all_rows):
32 |     new_data = []
33 |     for row in all_rows:
34 |         new_data.append(clean_text(row))
35 |     return new_data
36 | 
37 | 
38 | def load_imf_data():
39 |     rdr = reader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t')
40 |     all_rows = [r for r in rdr if len(r) > 1]
41 |     titles = all_rows.pop(0)
42 |     cleaned_rows = clean_rows(all_rows)
43 |     types = [text_type, text_type, text_type, text_type, text_type,
44 |              number_type, number_type, number_type, number_type,
45 |              number_type, number_type, number_type, number_type,
46 |              date_type]
47 |     return get_table(cleaned_rows, types, titles)
48 | 
49 | 
50 | def add_last_percent_change():
51 |     table = load_imf_data()
52 |     table = table.where(lambda r: r.get('2015') is not
53 |                         None and r.get('2014') is not None)
54 |     table = table.where(lambda r: 'Unemployment' in
55 |                         r.get('Subject Descriptor'))
56 |     table = table.percent_change('2014', '2015', 'last_change')
57 |     return table
58 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson09_matplotlib.py:
--------------------------------------------------------------------------------
 1 | from csv import DictReader
 2 | from decimal import Decimal
 3 | import calculate
 4 | import pylab
 5 | 
 6 | 
 7 | def load_imf_unemployment():
 8 |     rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t')
 9 |     return [r for r in rdr if r.get('Subject Descriptor') and
10 |             'Unemployment' in r.get('Subject Descriptor')]
11 | 
12 | 
13 | def get_avg_unemployment(data, start_year=2013, end_year=2015):
14 |     avgs = {}
15 |     while start_year <= end_year:
16 |         avg = calculate.mean([
17 |             Decimal(rate.get(str(start_year))) for
18 |             rate in data if rate.get(str(start_year))])
19 |         avgs[str(start_year)] = avg
20 |         start_year += 1
21 |     return avgs
22 | 
23 | 
24 | def chart_unemployment():
25 |     imf_data = load_imf_unemployment()
26 |     averages = get_avg_unemployment(imf_data)
27 |     pylab.plot(averages.keys(), averages.values())
28 |     pylab.ylabel('Average Unemployment')
29 |     pylab.xlabel('Years')
30 |     pylab.title('Average Unemployment Over Time')
31 |     pylab.ylim([0, sorted(averages.values(), reverse=True)[0] + 1])
32 |     pylab.show()
33 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson10_pygal.py:
--------------------------------------------------------------------------------
 1 | import pygal
 2 | from csv import DictReader
 3 | 
 4 | 
 5 | def load_imf_unemployment():
 6 |     rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t')
 7 |     return [r for r in rdr if r.get('Subject Descriptor') and
 8 |             'Unemployment' in r.get('Subject Descriptor')]
 9 | 
10 | 
11 | def load_iso_codes():
12 |     iso_dict = {}
13 |     for row in DictReader(open('../data/iso-2.csv', 'rb')):
14 |         iso_dict[row.get('Name')] = row.get('Code')
15 |     return iso_dict
16 | 
17 | 
18 | def load_and_merge_data():
19 |     iso_dict = load_iso_codes()
20 |     imf_data = load_imf_unemployment()
21 |     for d in imf_data:
22 |         d['iso'] = iso_dict[d.get('Country')]
23 |     return imf_data
24 | 
25 | 
26 | def draw_unemployment():
27 |     imf_data = load_and_merge_data()
28 |     worldmap_data = {}
29 |     for row in imf_data:
30 |         worldmap_data[row.get('iso').lower()] = float(row.get('2015'))
31 |     worldmap_chart = pygal.Worldmap()
32 |     worldmap_chart.title = '2015 Unemployment'
33 |     worldmap_chart.add('Total Unemployment (%)', worldmap_data)
34 |     worldmap_chart.render()
35 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson11_bokeh.py:
--------------------------------------------------------------------------------
 1 | from bokeh.plotting import figure, show, output_file
 2 | from csv import DictReader
 3 | 
 4 | 
 5 | def load_imf_unemployment():
 6 |     rdr = DictReader(open('../data/imf_indicators.tsv', 'rb'), delimiter='\t')
 7 |     return [r for r in rdr if r.get('Subject Descriptor') and
 8 |             'Unemployment' in r.get('Subject Descriptor')]
 9 | 
10 | 
11 | def mscatter(chart, x, y, typestr):
12 |     chart.scatter(x, y, marker=typestr, line_color="#6666ee",
13 |                   fill_color="#ee6666", fill_alpha=0.5, size=12)
14 | 
15 | 
16 | def draw_scatter():
17 |     chart = figure(title="IMF Unemployment")
18 |     output_file("../../static/unemployment.html")
19 |     imf_data = load_imf_unemployment()
20 |     for line in imf_data:
21 |         for year in ['2013', '2014', '2015']:
22 |             mscatter(chart, int(year), float(line.get(year)), 'circle')
23 |     show(chart)
24 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson12_pandas.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def get_wb_unemployment_data():
 5 |     return pd.read_excel('../data/wb/unemployment.xlsx',
 6 |                          index_col=0, header=0, skiprows=[1])
 7 | 
 8 | 
 9 | def get_wb_market_data():
10 |     return pd.read_excel('../data/wb/stock_market.xlsx',
11 |                          index_col=0, header=0, skiprows=[1])
12 | 
13 | 
14 | def get_metadata():
15 |     return pd.read_excel('../data/wb/stock_metadata.xlsx',
16 |                          sheetname=1, index_col=0, header=0)
17 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson13_pandas_join.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def get_wb_unemployment_data():
 5 |     return pd.read_excel('../data/wb/unemployment.xlsx',
 6 |                          index_col=0, header=0, skiprows=[1])
 7 | 
 8 | 
 9 | def get_wb_market_data():
10 |     return pd.read_excel('../data/wb/stock_market.xlsx',
11 |                          index_col=0, header=0, skiprows=[1])
12 | 
13 | 
14 | def get_metadata():
15 |     return pd.read_excel('../data/wb/stock_metadata.xlsx',
16 |                          sheetname=1, index_col=0, header=0)
17 | 
18 | 
19 | def get_gdp():
20 |     return pd.read_excel('../data/wb/GDP_Current_Dollars.xlsx',
21 |                          index_col=3, header=0)
22 | 
23 | 
24 | def clean_market_columns():
25 |     market_data = get_wb_market_data()
26 |     market_data.columns = market_data.columns.map(lambda x: x[:3])
27 |     market_data.index = market_data.index.map(lambda x: '{} SM'.format(x))
28 |     return market_data.transpose()
29 | 
30 | 
31 | def update_gdp_cols(colname):
32 |     if colname[:4].isdigit():
33 |         return '{} GDP'.format(colname[:4])
34 |     return colname
35 | 
36 | 
37 | def join_market_and_gdp():
38 |     market_data = clean_market_columns()
39 |     gdp_data = get_gdp()
40 |     gdp_data.columns = gdp_data.columns.map(update_gdp_cols)
41 |     return market_data.join(gdp_data)
42 | 


--------------------------------------------------------------------------------
/scripts/pycon-2015/lesson14_pandas_compute.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def get_wb_unemployment_data():
 5 |     return pd.read_excel('../data/wb/unemployment.xlsx',
 6 |                          index_col=0, header=0, skiprows=[1])
 7 | 
 8 | 
 9 | def get_wb_market_data():
10 |     return pd.read_excel('../data/wb/stock_market.xlsx',
11 |                          index_col=0, header=0, skiprows=[1])
12 | 
13 | 
14 | def get_metadata():
15 |     return pd.read_excel('../data/wb/stock_metadata.xlsx',
16 |                          sheetname=1, index_col=0, header=0)
17 | 
18 | 
19 | def get_gdp():
20 |     return pd.read_excel('../data/wb/GDP_Current_Dollars.xlsx',
21 |                          index_col=3, header=0)
22 | 
23 | 
24 | def clean_market_columns():
25 |     market_data = get_wb_market_data()
26 |     market_data.columns = market_data.columns.map(lambda x: x[:3])
27 |     market_data.index = market_data.index.map(lambda x: '{} SM'.format(x))
28 |     return market_data.transpose()
29 | 
30 | 
31 | def update_gdp_cols(colname):
32 |     if colname[:4].isdigit():
33 |         return '{} GDP'.format(colname[:4])
34 |     return colname
35 | 
36 | 
37 | def join_market_and_gdp():
38 |     market_data = clean_market_columns()
39 |     gdp_data = get_gdp()
40 |     gdp_data.columns = gdp_data.columns.map(update_gdp_cols)
41 |     return market_data.join(gdp_data)
42 | 
43 | 
44 | def just_spain():
45 |     joined = join_market_and_gdp()
46 |     spain = joined.loc['ESP'].copy()
47 |     spain_gdp = spain[spain.index.map(lambda x: 'GDP' in x)]
48 |     spain_stock = spain[spain.index.map(lambda x: 'SM' in x)]
49 |     spain_gdp.index = spain_gdp.index.map(lambda x: x.rstrip(' GDP'))
50 |     spain_stock.index = spain_stock.index.map(lambda x: x.rstrip(' SM'))
51 |     spain_stock.name = 'Stocks'
52 |     spain_gdp.name = 'GDP'
53 |     return spain_stock, spain_gdp
54 | 
55 | 
56 | def merge_back():
57 |     spain_stock, spain_gdp = just_spain()
58 |     return pd.concat([spain_stock, spain_gdp], axis=1)
59 | 


--------------------------------------------------------------------------------