├── map.png ├── animation.m4v ├── edit_mode.png ├── map_overlay.png ├── command_mode.png ├── edit_shortcuts.png ├── ipython_logo.png ├── menubar_toolbar.png ├── command_shortcuts.png ├── dashboard_running_tab.png ├── dashboard_notebooks_tab.png ├── map_info.json ├── soln ├── displaying.py ├── colored.py ├── data_explorer.py ├── on_trait_change.py ├── param_plot_1.py ├── on_submit.py ├── string_sorting.py ├── selection.py ├── param_plot_2.py ├── link.py └── sliders.py ├── README.md ├── .gitignore ├── LICENSE ├── resources ├── Scipy Exercises.ipynb ├── vizarray.py ├── Scipy.ipynb ├── What is the IPython Notebook.ipynb ├── Numpy Exercises.ipynb ├── Working With Markdown Cells.ipynb ├── Notebook Basics.ipynb ├── Running the Notebook Server.ipynb └── Vis1Exercises.ipynb ├── 03 - Data 1.ipynb ├── 04 - Data 2.ipynb ├── 01 - Multimedia.ipynb ├── 00 - Introduction.ipynb └── 02 - Interactivity.ipynb /map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/map.png -------------------------------------------------------------------------------- /animation.m4v: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/animation.m4v -------------------------------------------------------------------------------- /edit_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/edit_mode.png -------------------------------------------------------------------------------- /map_overlay.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/map_overlay.png -------------------------------------------------------------------------------- /command_mode.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/command_mode.png -------------------------------------------------------------------------------- /edit_shortcuts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/edit_shortcuts.png -------------------------------------------------------------------------------- /ipython_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/ipython_logo.png -------------------------------------------------------------------------------- /menubar_toolbar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/menubar_toolbar.png -------------------------------------------------------------------------------- /command_shortcuts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/command_shortcuts.png -------------------------------------------------------------------------------- /dashboard_running_tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/dashboard_running_tab.png -------------------------------------------------------------------------------- /dashboard_notebooks_tab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jupyter/strata-sv-2015-tutorial/HEAD/dashboard_notebooks_tab.png -------------------------------------------------------------------------------- /map_info.json: -------------------------------------------------------------------------------- 1 | {"width": 630, "westLong": -122.572044, "eastLong": -121.709617, "northLat": 37.906558, "southLat": 37.354062, "height": 510} -------------------------------------------------------------------------------- /soln/displaying.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | from IPython.display import display 3 | w = TextWidget(value="test") 4 | display(w) 5 | w.keys 6 | -------------------------------------------------------------------------------- /soln/colored.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | w = HTMLWidget(value="Hello world!") 3 | w.set_css({ 4 | 'background': 'red', 5 | 'color': 'yellow', 6 | }) 7 | w -------------------------------------------------------------------------------- /soln/data_explorer.py: -------------------------------------------------------------------------------- 1 | def plot_iris(a=None, col1=0, col2=0): 2 | plt.scatter(a[:,col1], a[:,col2]) 3 | 4 | interact(plot_iris, a=fixed(iris_data.data), col1=(0,3), col2=(0,3)); -------------------------------------------------------------------------------- /soln/on_trait_change.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | w = TextWidget() 3 | def handle_submit(name, new): 4 | print(new) 5 | w.on_trait_change(handle_submit, 'value') 6 | w -------------------------------------------------------------------------------- /soln/param_plot_1.py: -------------------------------------------------------------------------------- 1 | def plot_sin(a, b): 2 | x = np.linspace(0,4*np.pi, 100) 3 | y = np.sin(a*x+b) 4 | plt.plot(x,y) 5 | 6 | interact(plot_sin, a=(0.0,5.0,0.1), b=(-5.0,5.0,0.1)); -------------------------------------------------------------------------------- /soln/on_submit.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | w = TextWidget() 3 | def handle_submit(sender): 4 | print(sender.value) 5 | sender.value = '' 6 | w.on_submit(handle_submit) 7 | w 8 | -------------------------------------------------------------------------------- /soln/string_sorting.py: -------------------------------------------------------------------------------- 1 | def sort_string(s, reverse=False): 2 | s = reversed(sorted(s)) if reverse else sorted(s) 3 | print(''.join(s)) 4 | 5 | interact(sort_string, s='Hi', reverse=False); 6 | -------------------------------------------------------------------------------- /soln/selection.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | from IPython.display import display 3 | w = RadioButtonsWidget(values={"Left": 0, "Center": 1, "Right": 2}, description="Alignment:") 4 | display(w) 5 | 6 | print(w.value) 7 | w.value = 1 8 | -------------------------------------------------------------------------------- /soln/param_plot_2.py: -------------------------------------------------------------------------------- 1 | @interact(a=(0.0,5.0,0.1), b=(-5.0,5.0,0.1), 2 | style={'dotted red': 'r.', 'dashed black': 'k--'}) 3 | def plot_sin2(a, b, style='r.'): 4 | x = np.linspace(0,4*np.pi, 100) 5 | y = np.sin(a*x+b) 6 | plt.plot(x, y, style) -------------------------------------------------------------------------------- /soln/link.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | from IPython.display import display 3 | from IPython.utils.traitlets import link 4 | code = TextareaWidget(description="Source:", value="Cool math: $\\frac{F}{m}=a$") 5 | preview = LatexWidget() 6 | display(code, preview) 7 | mylink = link((code, 'value'), (preview, 'value')) -------------------------------------------------------------------------------- /soln/sliders.py: -------------------------------------------------------------------------------- 1 | from IPython.html.widgets import * 2 | from IPython.display import display 3 | sliders = [FloatSliderWidget(description=str(i), orientation="vertical", value=50.) for i in range(10)] 4 | container = ContainerWidget(children=sliders) 5 | display(container) 6 | container.remove_class('vbox') 7 | container.add_class('hbox') -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Strata Silicon Valley 2015 Tutorial 2 | 3 | ## Installation and setup 4 | 5 | [Grab the Anaconda Python Distribution](http://continuum.io/downloads). 6 | 7 | Clone this repo or grab one of the (nonexistent) releases. 8 | 9 | ### "What if I don't use Anaconda?" 10 | 11 | You'll need these dependencies: 12 | 13 | * numpy 14 | * pandas 15 | * matplotlib 16 | * scipy 17 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | nosetests.xml 41 | coverage.xml 42 | 43 | # Translations 44 | *.mo 45 | *.pot 46 | 47 | # Django stuff: 48 | *.log 49 | 50 | # Sphinx documentation 51 | docs/_build/ 52 | 53 | # PyBuilder 54 | target/ 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, Project Jupyter 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of strata-sv-2015-tutorial nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /resources/Scipy Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "kernelspec": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 2 7 | }, 8 | "display_name": "IPython (Python 2)", 9 | "language": "python", 10 | "name": "python2" 11 | }, 12 | "name": "", 13 | "signature": "sha256:baea49e6eb5472f6760ab9ae128a9f184fcf709d2a1921e3101b322cc582ab28" 14 | }, 15 | "nbformat": 3, 16 | "nbformat_minor": 0, 17 | "worksheets": [ 18 | { 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Scipy Exercises" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "%matplotlib inline\n", 32 | "import matplotlib.pyplot as plt\n", 33 | "\n", 34 | "import numpy as np\n", 35 | "import scipy as sp" 36 | ], 37 | "language": "python", 38 | "metadata": {}, 39 | "outputs": [], 40 | "prompt_number": 1 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "## 1d integration" 47 | ] 48 | }, 49 | { 50 | "cell_type": "markdown", 51 | "metadata": {}, 52 | "source": [ 53 | "Use `scipy.integrate.quad` to integrate the function $f(x)=x$ over the range $[0,1]$." 54 | ] 55 | }, 56 | { 57 | "cell_type": "code", 58 | "collapsed": false, 59 | "input": [], 60 | "language": "python", 61 | "metadata": {}, 62 | "outputs": [] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Check your answer by doing the integral analytically." 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "## Simple optimization" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "Define a Python function $f(x)$ that computes the value of the function:\n", 83 | "\n", 84 | "$$\n", 85 | "f(x) = x^2 + 2 x - 4\n", 86 | "$$" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "collapsed": false, 92 | "input": [], 93 | "language": "python", 94 | "metadata": {}, 95 | "outputs": [] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "Use `np.linspace` and `plt.plot` to plot the function over a reasonable range that shows the minimum of the function:" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "collapsed": false, 107 | "input": [], 108 | "language": "python", 109 | "metadata": {}, 110 | "outputs": [] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Use an appropriate function from `scipy.optimize` to find the minimum of this function numerically. Make sure that the numerical answer makes sense." 117 | ] 118 | }, 119 | { 120 | "cell_type": "code", 121 | "collapsed": false, 122 | "input": [], 123 | "language": "python", 124 | "metadata": {}, 125 | "outputs": [] 126 | }, 127 | { 128 | "cell_type": "markdown", 129 | "metadata": {}, 130 | "source": [ 131 | "## Multiple minima" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "The following function has multiple minima:\n", 139 | "\n", 140 | "$$\n", 141 | "f(x) = 4x^3 + (x-2)^2 + x^4\n", 142 | "$$\n", 143 | "\n", 144 | "Plot this function over the range $[-4,2]$ and find all of its minima using an appropriate function from `scipy.optimize`." 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "collapsed": false, 150 | "input": [], 151 | "language": "python", 152 | "metadata": {}, 153 | "outputs": [] 154 | } 155 | ], 156 | "metadata": {} 157 | } 158 | ] 159 | } -------------------------------------------------------------------------------- /resources/vizarray.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """Vizualize NumPy arrays using ipythonblocks. 3 | 4 | To enable the automatic vizualization of arrays:: 5 | 6 | import vizarray 7 | vizarray.enable() 8 | 9 | To disable this:: 10 | 11 | vizarray.disable() 12 | 13 | To set the colormap (to any valid matplotlib colormap name):: 14 | 15 | vizarray.set_cmap('jet') 16 | 17 | To set the block_size in px (default is 30px):: 18 | 19 | vizarray.set_block_size(10) 20 | 21 | To turn off autoscaling of arrays: 22 | 23 | vizarray.set_scale(False) 24 | """ 25 | 26 | import ipythonblocks as ipb 27 | from ipythonblocks import BlockGrid 28 | import numpy as np 29 | import matplotlib.pyplot as plt 30 | import matplotlib.colors as colors 31 | 32 | _cmap = 'jet' 33 | _scale = True 34 | _vmin = None 35 | _vmax = None 36 | _block_size = 30 37 | 38 | def set_cmap(cmap_name): 39 | """Set the global value of cmap.""" 40 | global _cmap 41 | _cmap = cmap_name 42 | 43 | 44 | def set_scale(s): 45 | """Set the global value of scale.""" 46 | global _scale 47 | _scale = s 48 | 49 | 50 | def set_vmin(vm): 51 | """Set the global value of vmin.""" 52 | global _vmin 53 | _vmin = vm 54 | 55 | 56 | def set_vmax(vm): 57 | """Set the global value of vmax.""" 58 | global _vmax 59 | _vmax = vm 60 | 61 | 62 | def set_block_size(bs): 63 | """Set the global value of block_size.""" 64 | global _block_size 65 | _block_size = bs 66 | 67 | 68 | def list_colormaps(): 69 | """List all of the matplotlib colormap strings.""" 70 | return sorted(m for m in plt.cm.datad if not m.endswith("_r")) 71 | 72 | 73 | def _value_to_color(value, cmap): 74 | """Convert a value in the range [0,1] to an RGB tuple using a colormap.""" 75 | cm = plt.get_cmap(cmap) 76 | rgba = cm(value) 77 | return [int(round(255*v)) for v in rgba[0:3]] 78 | 79 | 80 | def vizarray(x, cmap=None, scale=None, vmin=None, vmax=None, block_size=None): 81 | """Visualize a NumPy array using ipythonblocks.""" 82 | if not (x.ndim == 2 or x.ndim == 1): 83 | raise TypeError('This function only works with 1 or 2 dimensional arrays') 84 | global _cmap, _scale, _vmin, _vmax, _block_size 85 | cmap = cmap if cmap is not None else _cmap 86 | scale = scale if scale is not None else _scale 87 | vmin = vmin if vmin is not None else _vmin 88 | vmax = vmax if vmax is not None else _vmax 89 | block_size = block_size if block_size is not None else _block_size 90 | base = x.base if x.base is not None else None 91 | data = x.copy() 92 | if scale: 93 | n = colors.Normalize(vmin=vmin, vmax=vmax) 94 | if base is not None: 95 | n.autoscale(base) 96 | data = n(data) 97 | if data.ndim == 1: 98 | rows = 1 99 | cols = data.shape[0] 100 | bg = BlockGrid(cols, rows, block_size=block_size) 101 | for col in range(cols): 102 | bg[0,col] = _value_to_color(data[col], cmap) 103 | elif data.ndim == 2: 104 | rows = data.shape[0] 105 | cols = data.shape[1] 106 | bg = BlockGrid(cols, rows, block_size=block_size) 107 | for row in range(rows): 108 | for col in range(cols): 109 | bg[row, col] = _value_to_color(data[row, col], cmap) 110 | return bg 111 | 112 | 113 | def _array_to_html(a): 114 | return vizarray(a)._repr_html_() 115 | 116 | 117 | def enable(): 118 | """Enable automatic visualization of NumPy arrays in the IPython Notebook.""" 119 | try: 120 | from IPython.core.getipython import get_ipython 121 | except ImportError: 122 | raise ImportError('This feature requires IPython 1.0+') 123 | ip = get_ipython() 124 | f = ip.display_formatter.formatters['text/html'] 125 | f.for_type(np.ndarray, _array_to_html) 126 | 127 | 128 | def disable(): 129 | """Disable automatic visualization of NumPy arrays in the IPython Notebook.""" 130 | try: 131 | from IPython.core.getipython import get_ipython 132 | except ImportError: 133 | raise ImportError('This feature requires IPython 1.0+') 134 | ip = get_ipython() 135 | f = ip.display_formatter.formatters['text/html'] 136 | f.type_printers.pop(np.ndarray, None) 137 | 138 | -------------------------------------------------------------------------------- /03 - Data 1.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "kernelspec": { 4 | "display_name": "Python 2", 5 | "language": "python", 6 | "name": "python2" 7 | }, 8 | "language_info": { 9 | "codemirror_mode": { 10 | "name": "ipython", 11 | "version": 2 12 | }, 13 | "file_extension": ".py", 14 | "mimetype": "text/x-python", 15 | "name": "python", 16 | "nbconvert_exporter": "python", 17 | "pygments_lexer": "ipython2", 18 | "version": "2.7.6" 19 | }, 20 | "name": "", 21 | "signature": "sha256:ed9ffa3dbf1e48bbe8b2834405c295cee4f6be9fd639b60454489ac345511916" 22 | }, 23 | "nbformat": 3, 24 | "nbformat_minor": 0, 25 | "worksheets": [ 26 | { 27 | "cells": [ 28 | { 29 | "cell_type": "heading", 30 | "level": 1, 31 | "metadata": {}, 32 | "source": [ 33 | "SF Purchases Example" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "In this example, interact is used to build a UI for exploring [San Francisco department purchases by city agency](https://data.sfgov.org/Economy-and-Community/Delegated-Departmental-Purchases-by-City-Agency-Pr/4q92-gm9f) data." 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "collapsed": false, 46 | "input": [ 47 | "# Import Pandas and then load the data.\n", 48 | "from pandas import read_csv\n", 49 | "df = read_csv('SFDeptPurchases.csv')" 50 | ], 51 | "language": "python", 52 | "metadata": {}, 53 | "outputs": [] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "You can take a quick look at the first 5 rows of the data set using a slice. Pandas knows how to display this as a table in IPython." 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "collapsed": false, 65 | "input": [ 66 | "df[:5]" 67 | ], 68 | "language": "python", 69 | "metadata": {}, 70 | "outputs": [] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "Notice that the totals are of type object (strings) instead of numbers." 77 | ] 78 | }, 79 | { 80 | "cell_type": "code", 81 | "collapsed": false, 82 | "input": [ 83 | "df[:5]['Total']" 84 | ], 85 | "language": "python", 86 | "metadata": {}, 87 | "outputs": [] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "Remove the dollar sign from the strings and cast them to numbers." 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "collapsed": false, 99 | "input": [ 100 | "df['Total'] = df['Total'].str.replace(r'[$,]', '').convert_objects(convert_numeric=True)" 101 | ], 102 | "language": "python", 103 | "metadata": {}, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "collapsed": false, 109 | "input": [ 110 | "df[:5]['Total']" 111 | ], 112 | "language": "python", 113 | "metadata": { 114 | "scrolled": true 115 | }, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "Now the data can be explored using matplotlib and interact. The following function plots the costs of the selected parameter type." 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "collapsed": false, 128 | "input": [ 129 | "%matplotlib inline\n", 130 | "from matplotlib import pyplot as plt\n", 131 | "from pandas import DataFrame\n", 132 | "\n", 133 | "def plot_by(df, column='Dept Name', count=10, ascending=False):\n", 134 | " \n", 135 | " # Group the data by the column specified and sum the totals.\n", 136 | " data = df.groupby(column)['Total'].sum().dropna()\n", 137 | " \n", 138 | " # Sort the data.\n", 139 | " data = DataFrame(data, columns=['Total']).sort('Total', ascending=ascending)\n", 140 | " \n", 141 | " # Plot the subset of the sorted data that the user is interested in.\n", 142 | " data = data[:count].plot(kind='bar')\n", 143 | " \n", 144 | " # Plot settings.\n", 145 | " plt.title('%s Costs' % column)\n", 146 | " plt.ylabel('Cost ($)')" 147 | ], 148 | "language": "python", 149 | "metadata": {}, 150 | "outputs": [] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "collapsed": false, 155 | "input": [ 156 | "from IPython.html.widgets import interact, fixed\n", 157 | "interact(plot_by, df=fixed(df), column=df.columns.tolist(), count=(5,15));" 158 | ], 159 | "language": "python", 160 | "metadata": {}, 161 | "outputs": [] 162 | } 163 | ], 164 | "metadata": {} 165 | } 166 | ] 167 | } -------------------------------------------------------------------------------- /resources/Scipy.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "kernelspec": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 2 7 | }, 8 | "display_name": "IPython (Python 2)", 9 | "language": "python", 10 | "name": "python2" 11 | }, 12 | "name": "", 13 | "signature": "sha256:28f1559f391867ac5916abfd3fd21a44b117afd469263418eaeb145d2ce501bb" 14 | }, 15 | "nbformat": 3, 16 | "nbformat_minor": 0, 17 | "worksheets": [ 18 | { 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# SciPy: Numerical Algorithms for Python" 25 | ] 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "metadata": {}, 30 | "source": [ 31 | "**Learning Objective:** Learn how to find and use numerical algorithms in the SciPy package." 32 | ] 33 | }, 34 | { 35 | "cell_type": "code", 36 | "collapsed": false, 37 | "input": [ 38 | "%matplotlib inline\n", 39 | "from matplotlib import pyplot as plt\n", 40 | "import numpy as np" 41 | ], 42 | "language": "python", 43 | "metadata": {}, 44 | "outputs": [], 45 | "prompt_number": 1 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Overview" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "The SciPy framework builds on top NumPy and provides a large number of numerical algorithms for working with data. Some of the topics that SciPy covers are:\n", 59 | "\n", 60 | "* Special functions ([scipy.special](http://docs.scipy.org/doc/scipy/reference/special.html))\n", 61 | "* Integration/ODEs ([scipy.integrate](http://docs.scipy.org/doc/scipy/reference/integrate.html))\n", 62 | "* Optimization ([scipy.optimize](http://docs.scipy.org/doc/scipy/reference/optimize.html))\n", 63 | "* Interpolation ([scipy.interpolate](http://docs.scipy.org/doc/scipy/reference/interpolate.html))\n", 64 | "* Fourier Transforms ([scipy.fftpack](http://docs.scipy.org/doc/scipy/reference/fftpack.html))\n", 65 | "* Signal Processing ([scipy.signal](http://docs.scipy.org/doc/scipy/reference/signal.html))\n", 66 | "* Linear Algebra ([scipy.linalg](http://docs.scipy.org/doc/scipy/reference/linalg.html))\n", 67 | "* Sparse Eigenvalue Problems ([scipy.sparse](http://docs.scipy.org/doc/scipy/reference/sparse.html))\n", 68 | "* Statistics ([scipy.stats](http://docs.scipy.org/doc/scipy/reference/stats.html))\n", 69 | "* Multi-dimensional image processing ([scipy.ndimage](http://docs.scipy.org/doc/scipy/reference/ndimage.html))\n", 70 | "* File IO ([scipy.io](http://docs.scipy.org/doc/scipy/reference/io.html))\n", 71 | "\n", 72 | "This notebook is not a complete tour of SciPy. Rather it focuses on the most important parts of the package for processing data.\n", 73 | "\n", 74 | "In many cases, you will want to import specific names from `scipy` subpackages. However, as a start, it is helpful to do the following import:" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "collapsed": false, 80 | "input": [ 81 | "import scipy as sp" 82 | ], 83 | "language": "python", 84 | "metadata": {}, 85 | "outputs": [], 86 | "prompt_number": 2 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "## Approach" 93 | ] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "metadata": {}, 98 | "source": [ 99 | "One of the most important skills in data science is to be able to find Python functions and classes in a module and learn how to use them yourself. Here are some recommended steps on how to go about this:\n", 100 | "\n", 101 | "* Find the online documentation for the package you are using.\n", 102 | "* Try to find the subpackage or even the function that looks like will do the job.\n", 103 | "* Import the module, function or class and use tab completion and `?` to explore it.\n", 104 | "* Try using the function or class for an extremely simple case where you know the answer.\n", 105 | "* Then try using for your real problem." 106 | ] 107 | }, 108 | { 109 | "cell_type": "markdown", 110 | "metadata": {}, 111 | "source": [ 112 | "## Resources" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": {}, 118 | "source": [ 119 | "* [SciPy Website](http://www.scipy.org)\n", 120 | "* [SciPy Reference Documentation](http://docs.scipy.org/doc/scipy/reference/)\n", 121 | "* [Python Scientific Lecture Notes](http://scipy-lectures.github.io/index.html), Edited by Valentin Haenel,\n", 122 | "Emmanuelle Gouillart and Ga\u00ebl Varoquaux.\n", 123 | "* [Lectures on Scientific Computing with Python](https://github.com/jrjohansson/scientific-python-lectures), J.R. Johansson.\n", 124 | "* [Introduction to Scientific Computing in Python](http://nbviewer.ipython.org/github/jakevdp/2014_fall_ASTR599/tree/master/), Jake Vanderplas." 125 | ] 126 | } 127 | ], 128 | "metadata": {} 129 | } 130 | ] 131 | } -------------------------------------------------------------------------------- /resources/What is the IPython Notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "celltoolbar": "Slideshow", 4 | "name": "", 5 | "signature": "sha256:ad9cb95b14212dbf9fc8ecafa475d0e368e4cb7c7306b89628b500e7e40068b6" 6 | }, 7 | "nbformat": 3, 8 | "nbformat_minor": 0, 9 | "worksheets": [ 10 | { 11 | "cells": [ 12 | { 13 | "cell_type": "heading", 14 | "level": 1, 15 | "metadata": { 16 | "slideshow": { 17 | "slide_type": "slide" 18 | } 19 | }, 20 | "source": [ 21 | "What is the IPython Notebook?" 22 | ] 23 | }, 24 | { 25 | "cell_type": "heading", 26 | "level": 2, 27 | "metadata": {}, 28 | "source": [ 29 | "Introduction" 30 | ] 31 | }, 32 | { 33 | "cell_type": "markdown", 34 | "metadata": {}, 35 | "source": [ 36 | "The IPython Notebook is an **interactive computing environment** that enables users to author notebook documents that include: \n", 37 | "- Live code\n", 38 | "- Interactive widgets\n", 39 | "- Plots\n", 40 | "- Narrative text\n", 41 | "- Equations\n", 42 | "- Images\n", 43 | "- Video\n", 44 | "\n", 45 | "These documents provide a **complete and self-contained record of a computation** that can be converted to various formats and shared with others using email, [Dropbox](http://dropbox.com), version control systems (like git/[GitHub](http://github.com)) or [nbviewer.ipython.org](http://nbviewer.ipython.org)." 46 | ] 47 | }, 48 | { 49 | "cell_type": "heading", 50 | "level": 3, 51 | "metadata": { 52 | "slideshow": { 53 | "slide_type": "slide" 54 | } 55 | }, 56 | "source": [ 57 | "Components" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "The IPython Notebook combines three components:\n", 65 | "\n", 66 | "* **The notebook web application**: An interactive web application for writing and running code interactively and authoring notebook documents.\n", 67 | "* **Kernels**: Separate processes started by the notebook web application that runs users' code in a given language and returns output back to the notebook web application. The kernel also handles things like computations for interactive widgets, tab completion and introspection. \n", 68 | "* **Notebook documents**: Self-contained documents that contain a representation of all content visible in the notebook web application, including inputs and outputs of the computations, narrative\n", 69 | "text, equations, images, and rich media representations of objects. Each notebook document has its own kernel." 70 | ] 71 | }, 72 | { 73 | "cell_type": "heading", 74 | "level": 2, 75 | "metadata": { 76 | "slideshow": { 77 | "slide_type": "slide" 78 | } 79 | }, 80 | "source": [ 81 | "Notebook web application" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "The notebook web application enables users to:\n", 89 | "\n", 90 | "* **Edit code in the browser**, with automatic syntax highlighting, indentation, and tab completion/introspection.\n", 91 | "* **Run code from the browser**, with the results of computations attached to the code which generated them.\n", 92 | "* See the results of computations with **rich media representations**, such as HTML, LaTeX, PNG, SVG, PDF, etc.\n", 93 | "* Create and use **interactive JavaScript wigets**, which bind interactive user interface controls and visualizations to reactive kernel side computations.\n", 94 | "* Author **narrative text** using the [Markdown](https://daringfireball.net/projects/markdown/) markup language.\n", 95 | "* Build **hierarchical documents** that are organized into sections with different levels of headings.\n", 96 | "* Include mathematical equations using **LaTeX syntax in Markdown**, which are rendered in-browser by [MathJax](http://www.mathjax.org/).\n", 97 | "* Start **parallel computing** clusters that work with IPython's interactive parallel computing libraries `IPython.parallel`." 98 | ] 99 | }, 100 | { 101 | "cell_type": "heading", 102 | "level": 2, 103 | "metadata": { 104 | "slideshow": { 105 | "slide_type": "slide" 106 | } 107 | }, 108 | "source": [ 109 | "Kernels" 110 | ] 111 | }, 112 | { 113 | "cell_type": "markdown", 114 | "metadata": {}, 115 | "source": [ 116 | "Through IPython's kernel and messaging architecture, the Notebook allows code to be run in a range of different programming languages. For each notebook document that a user opens, the web application starts a kernel that runs the code for that notebook. Each kernel is capable of running code in a single programming language and there are kernels available in the following languages:\n", 117 | "\n", 118 | "* Python(https://github.com/ipython/ipython)\n", 119 | "* Julia (https://github.com/JuliaLang/IJulia.jl)\n", 120 | "* R (https://github.com/takluyver/IRkernel)\n", 121 | "* Ruby (https://github.com/minrk/iruby)\n", 122 | "* Haskell (https://github.com/gibiansky/IHaskell)\n", 123 | "* Scala (https://github.com/Bridgewater/scala-notebook)\n", 124 | "* node.js (https://gist.github.com/Carreau/4279371)\n", 125 | "* Go (https://github.com/takluyver/igo)\n", 126 | "\n", 127 | "The default kernel runs Python code. When it is released in the Summer/Fall of 2014, IPython 3.0 will provide a simple way for users to pick which of these kernels is used for a given notebook. \n", 128 | "\n", 129 | "Each of these kernels communicate with the notebook web application and web browser using a JSON over ZeroMQ/WebSockets message protocol that is described [here](http://ipython.org/ipython-doc/dev/development/messaging.html). Most users don't need to know about these details, but it helps to understand that \"kernels run code.\"" 130 | ] 131 | }, 132 | { 133 | "cell_type": "heading", 134 | "level": 2, 135 | "metadata": { 136 | "slideshow": { 137 | "slide_type": "slide" 138 | } 139 | }, 140 | "source": [ 141 | "Notebook documents" 142 | ] 143 | }, 144 | { 145 | "cell_type": "markdown", 146 | "metadata": {}, 147 | "source": [ 148 | "Notebook documents contain the **inputs and outputs** of an interactive session as well as **narrative text** that accompanies the code but is not meant for execution. **Rich output** generated by running code, including HTML, images, video, and plots, is embeddeed in the notebook, which makes it a complete and self-contained record of a computation. " 149 | ] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "metadata": {}, 154 | "source": [ 155 | "When you run the notebook web application on your computer, notebook documents are just **files on your local filesystem with a `.ipynb` extension**. This allows you to use familiar workflows for organizing your notebooks into folders and sharing them with others using email, Dropbox and version control systems." 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Notebooks consist of a **linear sequence of cells**. There are four basic cell types:\n", 163 | "\n", 164 | "* **Code cells:** Input and output of live code that is run in the kernel\n", 165 | "* **Markdown cells:** Narrative text with embedded LaTeX equations\n", 166 | "* **Heading cells:** 6 levels of hierarchical organization and formatting\n", 167 | "* **Raw cells:** Unformatted text that is included, without modification, when notebooks are converted to different formats using nbconvert\n", 168 | "\n", 169 | "Internally, notebook documents are **[JSON](http://en.wikipedia.org/wiki/JSO) data** with **binary values [base64]**(http://en.wikipedia.org/wiki/Base64) encoded. This allows them to be **read and manipulated programmatically** by any programming language. Because JSON is a text format, notebook documents are version control friendly.\n", 170 | "\n", 171 | "**Notebooks can be exported** to different static formats including HTML, reStructeredText, LaTeX, PDF, and slide shows ([reveal.js](http://lab.hakim.se/reveal-js/#/)) using IPython's `nbconvert` utility.\n", 172 | "\n", 173 | "Furthermore, any notebook document available from a **public URL on or GitHub can be shared** via http://nbviewer.ipython.org. This service loads the notebook document from the URL and renders it as a static web page. The resulting web page may thus be shared with others **without their needing to install IPython**." 174 | ] 175 | } 176 | ], 177 | "metadata": {} 178 | } 179 | ] 180 | } -------------------------------------------------------------------------------- /resources/Numpy Exercises.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "kernelspec": { 4 | "codemirror_mode": { 5 | "name": "ipython", 6 | "version": 2 7 | }, 8 | "display_name": "IPython (Python 2)", 9 | "language": "python", 10 | "name": "python2" 11 | }, 12 | "name": "", 13 | "signature": "sha256:8b47ed8903b0024854b7e833c39ca7ae4107b1daa531f81d7e95ca7384eb036c" 14 | }, 15 | "nbformat": 3, 16 | "nbformat_minor": 0, 17 | "worksheets": [ 18 | { 19 | "cells": [ 20 | { 21 | "cell_type": "markdown", 22 | "metadata": {}, 23 | "source": [ 24 | "# Numpy Exercises" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "collapsed": false, 30 | "input": [ 31 | "import numpy as np\n", 32 | "import vizarray as vz\n", 33 | "%matplotlib inline\n", 34 | "import matplotlib.pyplot as plt\n", 35 | "plt.style.use('ggplot')" 36 | ], 37 | "language": "python", 38 | "metadata": {}, 39 | "outputs": [] 40 | }, 41 | { 42 | "cell_type": "markdown", 43 | "metadata": {}, 44 | "source": [ 45 | "## Checkerboard" 46 | ] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "metadata": {}, 51 | "source": [ 52 | "Write a Python function that creates a square `(size,size)` 2d Numpy array with the values `0.0` and `1.0` in a checkerboard pattern." 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "collapsed": false, 58 | "input": [ 59 | "def checkerboard(size):\n", 60 | " \"\"\"Return a 2d checkboard of 0s and 1s as a NumPy array.\"\"\"\n" 61 | ], 62 | "language": "python", 63 | "metadata": {}, 64 | "outputs": [] 65 | }, 66 | { 67 | "cell_type": "markdown", 68 | "metadata": {}, 69 | "source": [ 70 | "Use `vizarray` to visualize the checkerboard. " 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "collapsed": false, 76 | "input": [], 77 | "language": "python", 78 | "metadata": {}, 79 | "outputs": [], 80 | "prompt_number": 24 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "Using tab completion and `?` figure out how to list and change the colormap used by `vizarray`." 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "collapsed": false, 92 | "input": [], 93 | "language": "python", 94 | "metadata": {}, 95 | "outputs": [] 96 | }, 97 | { 98 | "cell_type": "markdown", 99 | "metadata": {}, 100 | "source": [ 101 | "## Stochastic Process" 102 | ] 103 | }, 104 | { 105 | "cell_type": "markdown", 106 | "metadata": {}, 107 | "source": [ 108 | "Here is a function that produces standard Brownian motion using NumPy." 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "collapsed": false, 114 | "input": [ 115 | "def brownian(maxt, n):\n", 116 | " \"\"\"Return one realization of a Brownian (Wiener) process with n steps and a max time of t.\"\"\"\n", 117 | " t = np.linspace(0.0,maxt,n)\n", 118 | " h = t[1]-t[0]\n", 119 | " Z = np.random.normal(0.0,1.0,n-1)\n", 120 | " dW = np.sqrt(h)*Z\n", 121 | " W = np.zeros(n)\n", 122 | " W[1:] = dW.cumsum()\n", 123 | " return t, W" 124 | ], 125 | "language": "python", 126 | "metadata": {}, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "collapsed": false, 132 | "input": [ 133 | "t, W = brownian(1.0, 1000)" 134 | ], 135 | "language": "python", 136 | "metadata": {}, 137 | "outputs": [] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "Visualize the process using `plt.plot` with `t` on the x-axis and `W(t)` on the y-axis:" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "collapsed": false, 149 | "input": [], 150 | "language": "python", 151 | "metadata": {}, 152 | "outputs": [] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "Use `np.diff` to compute the changes at each step of the motion and then use `plt.hist` to visualize the distributions of those changes with 30 bins." 159 | ] 160 | }, 161 | { 162 | "cell_type": "code", 163 | "collapsed": false, 164 | "input": [], 165 | "language": "python", 166 | "metadata": {}, 167 | "outputs": [] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "Write a function that takes $W(t)$ and converts it to geometric Brownian motion using the equation:\n", 174 | "\n", 175 | "$$\n", 176 | "X(t) = X_0 e^{((\\mu - \\sigma^2/2)t + \\sigma W(t))}\n", 177 | "$$\n", 178 | "\n", 179 | "Use Numpy ufuncs in your function." 180 | ] 181 | }, 182 | { 183 | "cell_type": "code", 184 | "collapsed": false, 185 | "input": [ 186 | "def geo_brownian(t, W, X0, mu, sigma):\n", 187 | " \"Return X(t) for geometric brownian motion with drift mu, volatility sigma.\"\"\"" 188 | ], 189 | "language": "python", 190 | "metadata": {}, 191 | "outputs": [] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "Use your function to simulate geometric brownian motion for $\\mu=0.5$ and $\\sigma=0.3$ and visualize it using `plt.plot`." 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "collapsed": false, 203 | "input": [], 204 | "language": "python", 205 | "metadata": {}, 206 | "outputs": [] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "collapsed": false, 211 | "input": [], 212 | "language": "python", 213 | "metadata": {}, 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "metadata": {}, 219 | "source": [ 220 | "## Factorial" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "Write a Python function that computes the factorial of small numbers using `np.arange` and `np.cumprod`." 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "collapsed": false, 233 | "input": [ 234 | "def my_fact(n):\n", 235 | " \"\"\"Compute n! = n*(n-1)*...*1 using Numpy.\"\"\"" 236 | ], 237 | "language": "python", 238 | "metadata": {}, 239 | "outputs": [] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "## Gathering data" 246 | ] 247 | }, 248 | { 249 | "cell_type": "markdown", 250 | "metadata": {}, 251 | "source": [ 252 | "Go to http://www.wunderground.com/ and find today's hourly temperature predictions for some location on the planet. Enter that data into a text files named `temps.txt` using IPython's `%%writefile` magic command." 253 | ] 254 | }, 255 | { 256 | "cell_type": "code", 257 | "collapsed": false, 258 | "input": [ 259 | "%%writefile temps.txt\n", 260 | "## Enter your data below, one value per line" 261 | ], 262 | "language": "python", 263 | "metadata": {}, 264 | "outputs": [ 265 | { 266 | "output_type": "stream", 267 | "stream": "stdout", 268 | "text": [ 269 | "Overwriting temps.txt\n" 270 | ] 271 | } 272 | ], 273 | "prompt_number": 30 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "Load that data as a Numpy array using `np.loadtxt`:" 280 | ] 281 | }, 282 | { 283 | "cell_type": "code", 284 | "collapsed": false, 285 | "input": [], 286 | "language": "python", 287 | "metadata": {}, 288 | "outputs": [ 289 | { 290 | "metadata": {}, 291 | "output_type": "pyout", 292 | "prompt_number": 31, 293 | "text": [ 294 | "array([ 1., 2., 3.])" 295 | ] 296 | } 297 | ], 298 | "prompt_number": 31 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "Plot the temperature using `plt.plot`. For this, you will also need to create a Numpy array of the hours of the day. See if you can figure out how to use `plt.title`, `plt.xlabel` and `plt.ylabel` to label your plot." 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "collapsed": false, 310 | "input": [], 311 | "language": "python", 312 | "metadata": {}, 313 | "outputs": [] 314 | }, 315 | { 316 | "cell_type": "markdown", 317 | "metadata": {}, 318 | "source": [ 319 | "Compute the min, max, mean and variance of the temperature." 320 | ] 321 | }, 322 | { 323 | "cell_type": "code", 324 | "collapsed": false, 325 | "input": [], 326 | "language": "python", 327 | "metadata": {}, 328 | "outputs": [] 329 | } 330 | ], 331 | "metadata": {} 332 | } 333 | ] 334 | } -------------------------------------------------------------------------------- /04 - Data 2.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "kernelspec": { 4 | "display_name": "Python 2", 5 | "language": "python", 6 | "name": "python2" 7 | }, 8 | "language_info": { 9 | "codemirror_mode": { 10 | "name": "ipython", 11 | "version": 2 12 | }, 13 | "file_extension": ".py", 14 | "mimetype": "text/x-python", 15 | "name": "python", 16 | "nbconvert_exporter": "python", 17 | "pygments_lexer": "ipython2", 18 | "version": "2.7.6" 19 | }, 20 | "name": "", 21 | "signature": "sha256:205dbc55f98cb3040ab4ce49ee6ceed1a8b74d71fc636b7f196bd6d477a7d2b1" 22 | }, 23 | "nbformat": 3, 24 | "nbformat_minor": 0, 25 | "worksheets": [ 26 | { 27 | "cells": [ 28 | { 29 | "cell_type": "heading", 30 | "level": 1, 31 | "metadata": {}, 32 | "source": [ 33 | "Bay Area 2bd Rental Example" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "In this example, rental prices of 2 bedroom apartments in the bay area are used to generate a price heat map overlayed on top of a Google map. The first step is to load the image's meta data which is stored externally in a json file. Python's `json` module is used. " 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "collapsed": true, 46 | "input": [ 47 | "import json\n", 48 | "with open('./map_info.json', 'r') as f:\n", 49 | " map_info = json.loads(f.read())\n", 50 | "coords = map_info\n", 51 | "image_size = [coords.pop('width'), coords.pop('height')]" 52 | ], 53 | "language": "python", 54 | "metadata": {}, 55 | "outputs": [] 56 | }, 57 | { 58 | "cell_type": "heading", 59 | "level": 2, 60 | "metadata": {}, 61 | "source": [ 62 | "Parsing data using Pandas" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "Pandas has a built in module for loading `json` files directly. Here it's used to load the markers data." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "collapsed": false, 75 | "input": [ 76 | "import pandas as pd\n", 77 | "markers = pd.io.json.read_json('./markers.json')" 78 | ], 79 | "language": "python", 80 | "metadata": {}, 81 | "outputs": [] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "collapsed": false, 86 | "input": [ 87 | "markers[:5]" 88 | ], 89 | "language": "python", 90 | "metadata": {}, 91 | "outputs": [] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "Each marker has details associated with it. Those details are called a \"listing\". The same Pandas loading method is used for the listings data." 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "collapsed": false, 103 | "input": [ 104 | "listings = pd.io.json.read_json('./listings.json')" 105 | ], 106 | "language": "python", 107 | "metadata": {}, 108 | "outputs": [] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "collapsed": false, 113 | "input": [ 114 | "listings[:5]" 115 | ], 116 | "language": "python", 117 | "metadata": {}, 118 | "outputs": [] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "As separate data frames, the data isn\u2019t as useful to us. Here the data is merged into one data frame by listing id." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "collapsed": false, 130 | "input": [ 131 | "df = markers.merge(listings, 'outer', 'id')\n", 132 | "df[:5]" 133 | ], 134 | "language": "python", 135 | "metadata": {}, 136 | "outputs": [] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "metadata": {}, 141 | "source": [ 142 | "Take a quick look at all of the columns to see what data is available." 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "collapsed": false, 148 | "input": [ 149 | "df.columns.tolist()" 150 | ], 151 | "language": "python", 152 | "metadata": {}, 153 | "outputs": [] 154 | }, 155 | { 156 | "cell_type": "markdown", 157 | "metadata": {}, 158 | "source": [ 159 | "Make a smaller frame containing the X and Y coordinates and the price." 160 | ] 161 | }, 162 | { 163 | "cell_type": "code", 164 | "collapsed": false, 165 | "input": [ 166 | "bd2_prices = df[['lat_x', 'lng_x', 'price']]\n", 167 | "bd2_prices[:5]" 168 | ], 169 | "language": "python", 170 | "metadata": { 171 | "scrolled": true 172 | }, 173 | "outputs": [] 174 | }, 175 | { 176 | "cell_type": "heading", 177 | "level": 2, 178 | "metadata": {}, 179 | "source": [ 180 | "Importing into numpy" 181 | ] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "metadata": {}, 186 | "source": [ 187 | "When working with numerical data, I tend to use numpy. Converting from a Pandas data frame to a numpy array is easy." 188 | ] 189 | }, 190 | { 191 | "cell_type": "code", 192 | "collapsed": false, 193 | "input": [ 194 | "array = bd2_prices.as_matrix()\n", 195 | "array" 196 | ], 197 | "language": "python", 198 | "metadata": {}, 199 | "outputs": [] 200 | }, 201 | { 202 | "cell_type": "markdown", 203 | "metadata": {}, 204 | "source": [ 205 | "Since the map image size and the longitude and latitude window is known, the longitudes and latitudes can be converted to pixel coordinates, which is necessary for plotting on top of the map image." 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "collapsed": false, 211 | "input": [ 212 | "long2px = lambda x: (x - coords['westLong']) / (coords['eastLong'] - coords['westLong']) * image_size[0]\n", 213 | "lat2px = lambda y: (1 - (y - coords['southLat']) / (coords['northLat'] - coords['southLat'])) * image_size[1]\n", 214 | "array[:, 0] = lat2px(array[:, 0])\n", 215 | "array[:, 1] = long2px(array[:, 1])\n", 216 | "array" 217 | ], 218 | "language": "python", 219 | "metadata": {}, 220 | "outputs": [] 221 | }, 222 | { 223 | "cell_type": "heading", 224 | "level": 2, 225 | "metadata": {}, 226 | "source": [ 227 | "Ploting with Matplotlib" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "Import matplotlib and set the necessary config options." 235 | ] 236 | }, 237 | { 238 | "cell_type": "code", 239 | "collapsed": true, 240 | "input": [ 241 | "import matplotlib.pyplot as plt\n", 242 | "%matplotlib inline\n", 243 | "%config InlineBackend.figure_format = 'retina'" 244 | ], 245 | "language": "python", 246 | "metadata": {}, 247 | "outputs": [] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "metadata": {}, 252 | "source": [ 253 | "Load the map image a plot it." 254 | ] 255 | }, 256 | { 257 | "cell_type": "code", 258 | "collapsed": false, 259 | "input": [ 260 | "cropped_map = plt.imread('./map.png')\n", 261 | "plt.imshow(cropped_map)" 262 | ], 263 | "language": "python", 264 | "metadata": {}, 265 | "outputs": [] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "Plot the markers above the map image." 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "collapsed": false, 277 | "input": [ 278 | "plt.imshow(cropped_map)\n", 279 | "plt.scatter(array[:, 1], array[:, 0], s=4., c='r', linewidths=0)\n", 280 | "plt.show()" 281 | ], 282 | "language": "python", 283 | "metadata": {}, 284 | "outputs": [] 285 | }, 286 | { 287 | "cell_type": "markdown", 288 | "metadata": {}, 289 | "source": [ 290 | "Use scipy's griddata function to interpolate the scattered markers into a heat map image. Plot that heat map above the Google map." 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "collapsed": false, 296 | "input": [ 297 | "import numpy as np\n", 298 | "from scipy.interpolate import griddata\n", 299 | "# define grid.\n", 300 | "xi = np.arange(0,image_size[0])\n", 301 | "yi = np.arange(0,image_size[1]-1)\n", 302 | "# grid the data.\n", 303 | "zi = griddata((array[:, 1], array[:, 0]), \n", 304 | " array[:, 2], (xi[None,:], yi[:,None]), \n", 305 | " method='linear', fill_value=array[:, 2].min())\n", 306 | "overlay = plt.imread('./map_overlay.png')\n", 307 | "zi = np.ma.masked_where(overlay[:,:,2]>0,zi)\n", 308 | "\n", 309 | "plt.figure(figsize = (8,8))\n", 310 | "plt.imshow(cropped_map)\n", 311 | "plt.imshow(zi, alpha=0.5, cmap='nipy_spectral')\n", 312 | "plt.colorbar()\n", 313 | "\n", 314 | "plt.title('2bd Apartment Monthly Rent')" 315 | ], 316 | "language": "python", 317 | "metadata": {}, 318 | "outputs": [] 319 | } 320 | ], 321 | "metadata": {} 322 | } 323 | ] 324 | } -------------------------------------------------------------------------------- /resources/Working With Markdown Cells.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "metadata": { 3 | "name": "", 4 | "signature": "sha256:3b7cae0c0936f25e6ccb7acafe310c08a4162a1a7fd66fa9874a52cffa0f64f9" 5 | }, 6 | "nbformat": 3, 7 | "nbformat_minor": 0, 8 | "worksheets": [ 9 | { 10 | "cells": [ 11 | { 12 | "cell_type": "heading", 13 | "level": 1, 14 | "metadata": {}, 15 | "source": [ 16 | "Markdown Cells" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "Text can be added to IPython Notebooks using Markdown cells. Markdown is a popular markup language that is a superset of HTML. Its specification can be found here:\n", 24 | "\n", 25 | "" 26 | ] 27 | }, 28 | { 29 | "cell_type": "heading", 30 | "level": 2, 31 | "metadata": {}, 32 | "source": [ 33 | "Markdown basics" 34 | ] 35 | }, 36 | { 37 | "cell_type": "markdown", 38 | "metadata": {}, 39 | "source": [ 40 | "You can make text *italic* or **bold**." 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "metadata": {}, 46 | "source": [ 47 | "You can build nested itemized or enumerated lists:\n", 48 | "\n", 49 | "* One\n", 50 | " - Sublist\n", 51 | " - This\n", 52 | " - Sublist\n", 53 | " - That\n", 54 | " - The other thing\n", 55 | "* Two\n", 56 | " - Sublist\n", 57 | "* Three\n", 58 | " - Sublist\n", 59 | "\n", 60 | "Now another list:\n", 61 | "\n", 62 | "1. Here we go\n", 63 | " 1. Sublist\n", 64 | " 2. Sublist\n", 65 | "2. There we go\n", 66 | "3. Now this" 67 | ] 68 | }, 69 | { 70 | "cell_type": "markdown", 71 | "metadata": {}, 72 | "source": [ 73 | "You can add horizontal rules:\n", 74 | "\n", 75 | "---" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "metadata": {}, 81 | "source": [ 82 | "Here is a blockquote:\n", 83 | "\n", 84 | "> Beautiful is better than ugly.\n", 85 | "> Explicit is better than implicit.\n", 86 | "> Simple is better than complex.\n", 87 | "> Complex is better than complicated.\n", 88 | "> Flat is better than nested.\n", 89 | "> Sparse is better than dense.\n", 90 | "> Readability counts.\n", 91 | "> Special cases aren't special enough to break the rules.\n", 92 | "> Although practicality beats purity.\n", 93 | "> Errors should never pass silently.\n", 94 | "> Unless explicitly silenced.\n", 95 | "> In the face of ambiguity, refuse the temptation to guess.\n", 96 | "> There should be one-- and preferably only one --obvious way to do it.\n", 97 | "> Although that way may not be obvious at first unless you're Dutch.\n", 98 | "> Now is better than never.\n", 99 | "> Although never is often better than *right* now.\n", 100 | "> If the implementation is hard to explain, it's a bad idea.\n", 101 | "> If the implementation is easy to explain, it may be a good idea.\n", 102 | "> Namespaces are one honking great idea -- let's do more of those!" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "And shorthand for links:\n", 110 | "\n", 111 | "[IPython's website](http://ipython.org)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "heading", 116 | "level": 2, 117 | "metadata": {}, 118 | "source": [ 119 | "Headings" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "metadata": {}, 125 | "source": [ 126 | "If you want, you can add headings using Markdown's syntax:\n", 127 | "\n", 128 | "# Heading 1\n", 129 | "# Heading 2\n", 130 | "## Heading 2.1\n", 131 | "## Heading 2.2" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "**BUT most of the time you should use the Notebook's Heading Cells to organize your Notebook content**, as they provide meaningful structure that can be interpreted by other tools, not just large bold fonts." 139 | ] 140 | }, 141 | { 142 | "cell_type": "heading", 143 | "level": 2, 144 | "metadata": {}, 145 | "source": [ 146 | "Embedded code" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "You can embed code meant for illustration instead of execution in Python:\n", 154 | "\n", 155 | " def f(x):\n", 156 | " \"\"\"a docstring\"\"\"\n", 157 | " return x**2\n", 158 | "\n", 159 | "or other languages:\n", 160 | "\n", 161 | " if (i=0; i\n", 199 | "```python\n", 200 | "print \"Hello World\"\n", 201 | "```\n", 202 | "\n", 203 | "```javascript\n", 204 | "console.log(\"Hello World\")\n", 205 | "```\n", 206 | "\n", 207 | "\n", 208 | "Gives \n", 209 | "```python\n", 210 | "print \"Hello World\"\n", 211 | "```\n", 212 | "\n", 213 | "```javascript\n", 214 | "console.log(\"Hello World\")\n", 215 | "```\n", 216 | "\n", 217 | "And a table like this : \n", 218 | "\n", 219 | "
\n",
220 |       "| This | is   |\n",
221 |       "|------|------|\n",
222 |       "|   a  | table| \n",
223 |       "
\n", 224 | "\n", 225 | "A nice Html Table\n", 226 | "\n", 227 | "| This | is |\n", 228 | "|------|------|\n", 229 | "| a | table| " 230 | ] 231 | }, 232 | { 233 | "cell_type": "heading", 234 | "level": 2, 235 | "metadata": {}, 236 | "source": [ 237 | "General HTML" 238 | ] 239 | }, 240 | { 241 | "cell_type": "markdown", 242 | "metadata": {}, 243 | "source": [ 244 | "Because Markdown is a superset of HTML you can even add things like HTML tables:\n", 245 | "\n", 246 | "\n", 247 | "\n", 248 | "\n", 249 | "\n", 250 | "\n", 251 | "\n", 252 | "\n", 253 | "\n", 254 | "\n", 255 | "\n", 256 | "\n", 257 | "\n", 258 | "\n", 259 | "
Header 1Header 2
row 1, cell 1row 1, cell 2
row 2, cell 1row 2, cell 2
" 260 | ] 261 | }, 262 | { 263 | "cell_type": "heading", 264 | "level": 2, 265 | "metadata": {}, 266 | "source": [ 267 | "Local files" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "If you have local files in your Notebook directory, you can refer to these files in Markdown cells directly:\n", 275 | "\n", 276 | " [subdirectory/]\n", 277 | "\n", 278 | "For example, in the images folder, we have the Python logo:\n", 279 | "\n", 280 | " \n", 281 | "\n", 282 | "\n", 283 | "\n", 284 | "and a video with the HTML5 video tag:\n", 285 | "\n", 286 | "