├── favicon.ico ├── 00_financial_data_science.pdf ├── .gitignore ├── README.md ├── server.py ├── custom.css ├── 05_financial_data_science_viz_stream.ipynb ├── 02_financial_data_science_sql_db.ipynb ├── 04_financial_data_science_viz_d3js.ipynb └── 03_financial_data_science_viz_basic.ipynb /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhilpisch/dnber15/master/favicon.ico -------------------------------------------------------------------------------- /00_financial_data_science.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhilpisch/dnber15/master/00_financial_data_science.pdf -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | _store 2 | plotly_creds 3 | *.h5 4 | *.csv 5 | *.bcolz 6 | *.hdf5 7 | *.sql 8 | *.png 9 | *.jpg 10 | *.jpeg 11 | .ipynb* 12 | *.html 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DataNatives Berlin 2015 Workshop 2 | 3 | Python for Financial Data Science. 4 | 5 | This repo contains the Jupyter Notebooks used for my workshop at DataNatives Conference Berlin 2015 on 20.11.2015. 6 | 7 | (c) Dr. Yves J. Hilpisch 8 | 9 | The Python Quants GmbH 10 | 11 | http://tpq.io | http://pqp.io | http://twitter.com/dyjh 12 | -------------------------------------------------------------------------------- /server.py: -------------------------------------------------------------------------------- 1 | #!flask/bin/python 2 | import numpy as np 3 | import datetime as dt 4 | from flask import Flask, jsonify 5 | 6 | 7 | app = Flask(__name__) 8 | 9 | tick = { 10 | "instrument": "EUR_USD", 11 | "time": "2014-03-07T20:58:07.461445Z", 12 | "bid": 1.3500, 13 | "ask": 1.3501 14 | } 15 | 16 | @app.route('/prices', methods=['GET']) 17 | def get_tasks(): 18 | tick['ask'] = np.round(np.random.normal(1.35, 0.1), 5) 19 | tick['bid'] = tick['ask'] + 0.0001 20 | tick['time'] = str(dt.datetime.now()).replace(' ', 'T') + 'Z' 21 | return jsonify({'tick': tick}) 22 | 23 | if __name__ == '__main__': 24 | app.run(debug=True) -------------------------------------------------------------------------------- /custom.css: -------------------------------------------------------------------------------- 1 | /* 2 | Placeholder for custom user CSS 3 | 4 | mainly to be overridden in profile/static/custom/custom.css 5 | 6 | This will always be an empty file in IPython 7 | */ 8 | 9 | 10 | @font-face { 11 | font-family: 'Open Sans'; 12 | font-style: normal; 13 | font-weight: 400; 14 | src: url(OpenSans-Regular.ttf) format('truetype'); 15 | } 16 | 17 | body { 18 | font-family: 'Open Sans', sans-serif; 19 | color: #0B79BD 20 | } 21 | 22 | .text_cell_render h1 {color: #0B79BD} 23 | .text_cell_render h2 {color: #0B79BD} 24 | .text_cell_render h3 {color: #0B79BD} 25 | .text_cell_render h4 {color: #0B79BD} 26 | .text_cell_render h5 {color: #0B79BD} 27 | .text_cell_render h6 {color: #0B79BD} 28 | 29 | div.text_cell_render 30 | {color: #6D6E71} 31 | 32 | div.cell.border-box-sizing.code_cell.running { 33 | border: 3px solid #111; 34 | } 35 | 36 | .CodeMirror { 37 | font-family: 'Courier'; 38 | font-style: normal; 39 | font-weight: 500; 40 | font-size: 16px; 41 | } 42 | 43 | div.prompt { 44 | font-family: 'Courier'; 45 | font-size: 16px; 46 | } 47 | 48 | div.output_area pre { 49 | font-family: 'Courier'; 50 | font-size: 16px; 51 | } 52 | 53 | #new_notebook { 54 | display: none; 55 | } 56 | 57 | #copy_notebook { 58 | display: none; 59 | } 60 | 61 | #open_notebook { 62 | display: none; 63 | } 64 | 65 | -------------------------------------------------------------------------------- /05_financial_data_science_viz_stream.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "slideshow": { 7 | "slide_type": "slide" 8 | } 9 | }, 10 | "source": [ 11 | "\"The
" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Python for Financial Data Science — Viz Streaming" 19 | ] 20 | }, 21 | { 22 | "cell_type": "markdown", 23 | "metadata": {}, 24 | "source": [ 25 | "**DataNatives Berlin 2015**" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "Dr Yves J Hilpisch\n", 33 | "\n", 34 | "team@tpq.io | http://tpq.io\n", 35 | "\n", 36 | "The Python Quants GmbH" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "metadata": {}, 42 | "source": [ 43 | "## Prerequisites" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": { 50 | "collapsed": false 51 | }, 52 | "outputs": [], 53 | "source": [ 54 | "import json\n", 55 | "import time\n", 56 | "import pandas as pd\n", 57 | "import requests\n", 58 | "import plotly.plotly as py\n", 59 | "import plotly.tools as tls \n", 60 | "from plotly.graph_objs import *\n", 61 | "import cufflinks" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "Loading **Plotly credentials** from file." 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": null, 74 | "metadata": { 75 | "collapsed": false 76 | }, 77 | "outputs": [], 78 | "source": [ 79 | "pcreds = json.load(open('plotly_creds'))" 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": null, 85 | "metadata": { 86 | "collapsed": false 87 | }, 88 | "outputs": [], 89 | "source": [ 90 | "py.sign_in('yves', pcreds['api_key'])" 91 | ] 92 | }, 93 | { 94 | "cell_type": "markdown", 95 | "metadata": {}, 96 | "source": [ 97 | "## Dummy Tick Data Source" 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "metadata": {}, 103 | "source": [ 104 | "For illustration purposes, we use this data source which provides **dummy tick data**." 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "metadata": { 111 | "collapsed": false 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "tick_url = 'http://cloud.datapark.io:12500/prices'" 116 | ] 117 | }, 118 | { 119 | "cell_type": "code", 120 | "execution_count": null, 121 | "metadata": { 122 | "collapsed": false 123 | }, 124 | "outputs": [], 125 | "source": [ 126 | "r = requests.get(tick_url)\n", 127 | "r.json()" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "## Collecting Data from Source" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": {}, 140 | "source": [ 141 | "Collecting **data via a loop**." 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "metadata": { 148 | "collapsed": false 149 | }, 150 | "outputs": [], 151 | "source": [ 152 | "df = pd.DataFrame()\n", 153 | "for _ in xrange(100):\n", 154 | " r = requests.get(tick_url)\n", 155 | " data = r.json()\n", 156 | " df = df.append(pd.DataFrame(data['tick'],\n", 157 | " index=(data['tick']['time'],)))" 158 | ] 159 | }, 160 | { 161 | "cell_type": "code", 162 | "execution_count": null, 163 | "metadata": { 164 | "collapsed": false 165 | }, 166 | "outputs": [], 167 | "source": [ 168 | "df.tail()" 169 | ] 170 | }, 171 | { 172 | "cell_type": "markdown", 173 | "metadata": {}, 174 | "source": [ 175 | "## Plotting with plotly" 176 | ] 177 | }, 178 | { 179 | "cell_type": "markdown", 180 | "metadata": {}, 181 | "source": [ 182 | "Converting the `DataFrame` data into **Plotly compatible format**." 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "metadata": { 189 | "collapsed": false 190 | }, 191 | "outputs": [], 192 | "source": [ 193 | "def df_to_iplot(df):\n", 194 | " '''\n", 195 | " Coverting a pandas DataFrame to Plotly interface.\n", 196 | " '''\n", 197 | " x = df.index.values\n", 198 | " lines = {}\n", 199 | " for key in df:\n", 200 | " lines[key] = {}\n", 201 | " lines[key][\"x\"] = x\n", 202 | " lines[key][\"y\"] = df[key].values\n", 203 | " lines[key][\"name\"] = key\n", 204 | " # Appending all lines\n", 205 | " lines_plotly=[lines[key] for key in df]\n", 206 | " return lines_plotly" 207 | ] 208 | }, 209 | { 210 | "cell_type": "markdown", 211 | "metadata": {}, 212 | "source": [ 213 | "And **iplotting** it." 214 | ] 215 | }, 216 | { 217 | "cell_type": "code", 218 | "execution_count": null, 219 | "metadata": { 220 | "collapsed": false 221 | }, 222 | "outputs": [], 223 | "source": [ 224 | "py.iplot(df_to_iplot(df[['ask']]))" 225 | ] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "metadata": {}, 230 | "source": [ 231 | "## More Simple with Cufflinks" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "Cufflinks wraps the Plotly API for the `DataFrame` class." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "df['ask'].iplot(world_readable=True)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "## Streaming Plot" 257 | ] 258 | }, 259 | { 260 | "cell_type": "markdown", 261 | "metadata": {}, 262 | "source": [ 263 | "Plotting **data streams** is a bit more involved. We need a `Stream` object ..." 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": { 270 | "collapsed": false 271 | }, 272 | "outputs": [], 273 | "source": [ 274 | "# Get stream id from stream id list \n", 275 | "stream_ids = pcreds['stream_ids']\n", 276 | "\n", 277 | "# Make instance of stream id object \n", 278 | "stream_0 = Stream(\n", 279 | " token=stream_ids[0],\n", 280 | " maxpoints=150)" 281 | ] 282 | }, 283 | { 284 | "cell_type": "markdown", 285 | "metadata": {}, 286 | "source": [ 287 | "... and a `Scatter` object which gets passed to a `Data` object." 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": { 294 | "collapsed": false 295 | }, 296 | "outputs": [], 297 | "source": [ 298 | "trace0 = Scatter(\n", 299 | " x=[], y=[],\n", 300 | " mode='lines+markers',\n", 301 | " stream=stream_0,\n", 302 | " name='price')\n", 303 | "\n", 304 | "dats = Data([trace0])" 305 | ] 306 | }, 307 | { 308 | "cell_type": "markdown", 309 | "metadata": {}, 310 | "source": [ 311 | "The rest then is again straightforward." 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": { 318 | "collapsed": false 319 | }, 320 | "outputs": [], 321 | "source": [ 322 | "layout = Layout(title='Streaming Plot')\n", 323 | "fig = Figure(data=dats, layout=layout)\n", 324 | "unique_url = py.plot(fig, filename='stream_plot', auto_open=False)" 325 | ] 326 | }, 327 | { 328 | "cell_type": "markdown", 329 | "metadata": {}, 330 | "source": [ 331 | "**Embedding** the plot first ..." 332 | ] 333 | }, 334 | { 335 | "cell_type": "code", 336 | "execution_count": null, 337 | "metadata": { 338 | "collapsed": false 339 | }, 340 | "outputs": [], 341 | "source": [ 342 | "tls.embed(unique_url)" 343 | ] 344 | }, 345 | { 346 | "cell_type": "markdown", 347 | "metadata": {}, 348 | "source": [ 349 | "... **streaming** the data second." 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": null, 355 | "metadata": { 356 | "collapsed": false 357 | }, 358 | "outputs": [], 359 | "source": [ 360 | "s0 = py.Stream(stream_ids[0])\n", 361 | "s0.open()\n", 362 | "for i in xrange(250):\n", 363 | " r = requests.get(tick_url)\n", 364 | " data = r.json()\n", 365 | " s0.write({'x': data['tick']['time'][11:-3], 'y': data['tick']['ask']})\n", 366 | " time.sleep(0.25)" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "metadata": {}, 372 | "source": [ 373 | "\"The
\n", 374 | "\n", 375 | "http://tpq.io | @dyjh | team@tpq.io\n", 376 | "\n", 377 | "**Quant Platform** |\n", 378 | "http://quant-platform.com\n", 379 | "\n", 380 | "**datapark.io** |\n", 381 | "http://datapark.io\n", 382 | "\n", 383 | "**Python for Finance** |\n", 384 | "Python for Finance @ O'Reilly\n", 385 | "\n", 386 | "**Derivatives Analytics with Python** |\n", 387 | "Derivatives Analytics @ Wiley Finance" 388 | ] 389 | } 390 | ], 391 | "metadata": { 392 | "kernelspec": { 393 | "display_name": "Python 2", 394 | "language": "python2", 395 | "name": "python2" 396 | }, 397 | "language_info": { 398 | "codemirror_mode": { 399 | "name": "ipython", 400 | "version": 2 401 | }, 402 | "file_extension": ".py", 403 | "mimetype": "text/x-python", 404 | "name": "python", 405 | "nbconvert_exporter": "python", 406 | "pygments_lexer": "ipython2", 407 | "version": "2.7.10" 408 | } 409 | }, 410 | "nbformat": 4, 411 | "nbformat_minor": 0 412 | } 413 | -------------------------------------------------------------------------------- /02_financial_data_science_sql_db.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\"The
" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Python for Financial Data Science — SQL DBs" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "**DataNatives Berlin 2015**" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Dr Yves J Hilpisch\n", 29 | "\n", 30 | "team@tpq.io | http://tpq.io\n", 31 | "\n", 32 | "The Python Quants GmbH" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Importing & Database Connection" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "This notebook is about the `ibis` project. See the tutorial under [http://blog.ibis-project.org/sqlite-crunchbase-quickstart/](http://blog.ibis-project.org/sqlite-crunchbase-quickstart/)." 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": { 53 | "collapsed": false 54 | }, 55 | "outputs": [], 56 | "source": [ 57 | "# getting the database to work with\n", 58 | "!wget https://ibis-resources.s3.amazonaws.com/data/crunchbase/crunchbase.db" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "import ibis\n", 70 | "ibis.options.interactive = True" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": null, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "con = ibis.sqlite.connect('crunchbase.db')" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "## Basic Operations and Lookups" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "metadata": { 95 | "collapsed": false 96 | }, 97 | "outputs": [], 98 | "source": [ 99 | "con.list_tables()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": { 106 | "collapsed": false 107 | }, 108 | "outputs": [], 109 | "source": [ 110 | "rounds = con.table('rounds')\n", 111 | "rounds.info()" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "rounds.funding_round_type.value_counts()" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": false 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "acquisitions = con.table('acquisitions')\n", 134 | "expr = (acquisitions.price_amount\n", 135 | " .isnull()\n", 136 | " .name('has_price')\n", 137 | " .value_counts())\n", 138 | "expr" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": { 145 | "collapsed": false 146 | }, 147 | "outputs": [], 148 | "source": [ 149 | "df = expr.execute()\n", 150 | "df" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": { 157 | "collapsed": false 158 | }, 159 | "outputs": [], 160 | "source": [ 161 | "type(expr)" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "metadata": { 168 | "collapsed": false 169 | }, 170 | "outputs": [], 171 | "source": [ 172 | "companies = con.table('companies')\n", 173 | "\n", 174 | "expr = companies.funding_total_usd.mean()\n", 175 | "type(expr)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": null, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "expr.execute()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "## Funding Metrics" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": null, 199 | "metadata": { 200 | "collapsed": false 201 | }, 202 | "outputs": [], 203 | "source": [ 204 | "funded_at = rounds.funded_at.cast('timestamp')\n", 205 | "funded_at.year().value_counts()" 206 | ] 207 | }, 208 | { 209 | "cell_type": "code", 210 | "execution_count": null, 211 | "metadata": { 212 | "collapsed": false 213 | }, 214 | "outputs": [], 215 | "source": [ 216 | "rounds.funding_round_code.value_counts()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": null, 222 | "metadata": { 223 | "collapsed": false 224 | }, 225 | "outputs": [], 226 | "source": [ 227 | "year = funded_at.year().name('year')\n", 228 | "\n", 229 | "expr = (rounds[(rounds.funding_round_type == 'venture') &\n", 230 | " year.between(2000, 2015) &\n", 231 | " rounds.funding_round_code.notnull()]\n", 232 | " .group_by([year, 'funding_round_code'])\n", 233 | " .size())\n", 234 | "\n", 235 | "results = expr.execute()\n", 236 | "results[:10]" 237 | ] 238 | }, 239 | { 240 | "cell_type": "code", 241 | "execution_count": null, 242 | "metadata": { 243 | "collapsed": false 244 | }, 245 | "outputs": [], 246 | "source": [ 247 | "pivoted = (results.set_index(['year', 'funding_round_code'])\n", 248 | " .unstack('funding_round_code')\n", 249 | " .fillna(0))\n", 250 | "pivoted" 251 | ] 252 | }, 253 | { 254 | "cell_type": "code", 255 | "execution_count": null, 256 | "metadata": { 257 | "collapsed": false 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "funding_buckets = [0, 1000000, 10000000, 50000000, 100000000, \n", 262 | " 500000000, 1000000000]\n", 263 | "\n", 264 | "bucket = (companies\n", 265 | " .funding_total_usd\n", 266 | " .bucket(funding_buckets, include_over=True))\n", 267 | "bucket.value_counts()" 268 | ] 269 | }, 270 | { 271 | "cell_type": "code", 272 | "execution_count": null, 273 | "metadata": { 274 | "collapsed": false 275 | }, 276 | "outputs": [], 277 | "source": [ 278 | "bucket_names = ['0 to 1m', '1m to 10m', '10m to 50m', \n", 279 | " '50m to 100m', '100m to 500m',\n", 280 | " '500m to 1b', 'Over 1b']\n", 281 | "\n", 282 | "counts = bucket.name('bucket').value_counts()\n", 283 | "labeled = counts.bucket.label(bucket_names)\n", 284 | "with_names = counts.mutate(bucket_name=labeled)\n", 285 | "with_names" 286 | ] 287 | }, 288 | { 289 | "cell_type": "code", 290 | "execution_count": null, 291 | "metadata": { 292 | "collapsed": false 293 | }, 294 | "outputs": [], 295 | "source": [ 296 | "metrics = (companies.group_by(bucket.name('bucket'))\n", 297 | " .aggregate(count=companies.count(),\n", 298 | " total_funding=companies.funding_total_usd.sum())\n", 299 | " .mutate(bucket_name=lambda x: x.bucket.label(bucket_names)))\n", 300 | "metrics" 301 | ] 302 | }, 303 | { 304 | "cell_type": "code", 305 | "execution_count": null, 306 | "metadata": { 307 | "collapsed": false 308 | }, 309 | "outputs": [], 310 | "source": [ 311 | "joined = (companies.mutate(bucket=bucket,\n", 312 | " status=companies.status.fillna('Unknown'))\n", 313 | " [(companies.founded_at > '2010-01-01') |\n", 314 | " companies.founded_at.isnull()]\n", 315 | " .group_by(['bucket', 'status'])\n", 316 | " .size()\n", 317 | " .mutate(bucket_name=lambda x: (x.bucket.label(bucket_names)\n", 318 | " .fillna('Unknown'))))\n", 319 | "\n", 320 | "table = joined.execute()\n", 321 | "\n", 322 | "table.set_index(['status', 'bucket', 'bucket_name'])['count'].unstack('status')" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "metadata": {}, 328 | "source": [ 329 | "## Generated SQL Queries" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": null, 335 | "metadata": { 336 | "collapsed": false 337 | }, 338 | "outputs": [], 339 | "source": [ 340 | "print(ibis.impala.compile(joined))" 341 | ] 342 | }, 343 | { 344 | "cell_type": "code", 345 | "execution_count": null, 346 | "metadata": { 347 | "collapsed": true 348 | }, 349 | "outputs": [], 350 | "source": [ 351 | "# remove the database\n", 352 | "!rm crunchbase.db" 353 | ] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "metadata": {}, 358 | "source": [ 359 | "\"The
\n", 360 | "\n", 361 | "http://tpq.io | @dyjh | team@tpq.io\n", 362 | "\n", 363 | "**Quant Platform** |\n", 364 | "http://quant-platform.com\n", 365 | "\n", 366 | "**datapark.io** |\n", 367 | "http://datapark.io\n", 368 | "\n", 369 | "**Python for Finance** |\n", 370 | "Python for Finance @ O'Reilly\n", 371 | "\n", 372 | "**Derivatives Analytics with Python** |\n", 373 | "Derivatives Analytics @ Wiley Finance" 374 | ] 375 | } 376 | ], 377 | "metadata": { 378 | "kernelspec": { 379 | "display_name": "Python 2", 380 | "language": "python2", 381 | "name": "python2" 382 | }, 383 | "language_info": { 384 | "codemirror_mode": { 385 | "name": "ipython", 386 | "version": 2 387 | }, 388 | "file_extension": ".py", 389 | "mimetype": "text/x-python", 390 | "name": "python", 391 | "nbconvert_exporter": "python", 392 | "pygments_lexer": "ipython2", 393 | "version": "2.7.10" 394 | } 395 | }, 396 | "nbformat": 4, 397 | "nbformat_minor": 0 398 | } 399 | -------------------------------------------------------------------------------- /04_financial_data_science_viz_d3js.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\"The
" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Python for Financial Data Science — Viz D3.js" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "**DataNatives Berlin 2015**" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Dr Yves J Hilpisch\n", 29 | "\n", 30 | "team@tpq.io | http://tpq.io\n", 31 | "\n", 32 | "The Python Quants GmbH" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "## Cufflinks" 40 | ] 41 | }, 42 | { 43 | "cell_type": "markdown", 44 | "metadata": {}, 45 | "source": [ 46 | "This library binds the power of [plotly](http://www.plot.ly) with the flexibility of [pandas](http://pandas.pydata.org/) for easy plotting. This library is available on https://github.com/santosjorge/cufflinks. You can easily `pip install` it. The code of this Jupyter Notebook is mainly from Jorge Santos.\n", 47 | "\n", 48 | "The following assumes that the plotly user credentials have already been configured as stated on the [getting started](https://plot.ly/python/getting-started/) guide. Alternatively, the demo account credentials can be used (see below)." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "import warnings; warnings.simplefilter('ignore')" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": null, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "import plotly.plotly as py\n", 71 | "import pandas as pd\n", 72 | "import pandas.io.data as web\n", 73 | "import cufflinks as cf\n", 74 | "import numpy as np" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": null, 80 | "metadata": { 81 | "collapsed": false 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "py.sign_in('Python-Demo-Account', 'gwt101uhh0')" 86 | ] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "metadata": {}, 91 | "source": [ 92 | "We retrieve adjusted historical closing prices for a number of symbols." 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "symbols = ['IBM', 'MSFT', 'AAPL', ]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [], 113 | "source": [ 114 | "df = pd.DataFrame()\n", 115 | "for sym in symbols:\n", 116 | " data = web.DataReader(sym, data_source='yahoo')\n", 117 | " df[sym] = data['Adj Close']" 118 | ] 119 | }, 120 | { 121 | "cell_type": "markdown", 122 | "metadata": {}, 123 | "source": [ 124 | "`iplot` can be used on any DataFrame to plot on a plotly chart. If no filename is specified then a generic *Plotly Playground* file is created. All the charts are created as private by default. To make them public you can use `world_readable=True`. Let's look at the avilable parameters." 125 | ] 126 | }, 127 | { 128 | "cell_type": "code", 129 | "execution_count": null, 130 | "metadata": { 131 | "collapsed": false 132 | }, 133 | "outputs": [], 134 | "source": [ 135 | "help(df.iplot)" 136 | ] 137 | }, 138 | { 139 | "cell_type": "code", 140 | "execution_count": null, 141 | "metadata": { 142 | "collapsed": false 143 | }, 144 | "outputs": [], 145 | "source": [ 146 | "df.iplot(filename='fin_time_series_1', world_readable=True)" 147 | ] 148 | }, 149 | { 150 | "cell_type": "markdown", 151 | "metadata": {}, 152 | "source": [ 153 | "### Pretty Printing Figures\n", 154 | "\n", 155 | "**iplot** can return a static *Plotly Figure* if we state **asFigure=True**. " 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": null, 161 | "metadata": { 162 | "collapsed": false 163 | }, 164 | "outputs": [], 165 | "source": [ 166 | "fig=df.iplot(filename='fin_time_series_1', world_readable=True, asFigure=True)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "metadata": {}, 172 | "source": [ 173 | "Cufflinks also provides a pretty print **pp** function that makes any object of type dictionary (figures,layouts) better readable." 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": null, 179 | "metadata": { 180 | "collapsed": false 181 | }, 182 | "outputs": [], 183 | "source": [ 184 | "cf.pp(fig['layout'])" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "metadata": {}, 190 | "source": [ 191 | "### Customizing Themes\n", 192 | "\n", 193 | "We can pass a **theme** to the **iplot** function. \n", 194 | "3 themes are available, but you can create your own\n", 195 | "* Solar\n", 196 | "* Pearl (Default)\n", 197 | "* White" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": null, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "df.iplot(theme='white', filename='fin_time_series_2', world_readable=True)" 209 | ] 210 | }, 211 | { 212 | "cell_type": "markdown", 213 | "metadata": {}, 214 | "source": [ 215 | "We can also pass common metadata for the chart, like title." 216 | ] 217 | }, 218 | { 219 | "cell_type": "code", 220 | "execution_count": null, 221 | "metadata": { 222 | "collapsed": false 223 | }, 224 | "outputs": [], 225 | "source": [ 226 | "df.iplot(theme='pearl', filename='fin_time_series_3', title='Stock Returns',\n", 227 | " xTitle='Return', yTitle='Dates', world_readable=True)" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "### Bestfit Lines\n", 235 | "\n", 236 | "We can easily add a bestfit line to any Series\n", 237 | "\n", 238 | "This will automatically add a best fit approximation and the equation as the legend." 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "df['IBM'].iplot(filename='fin_time_series_4', bestfit=True, world_readable=True)" 250 | ] 251 | }, 252 | { 253 | "cell_type": "markdown", 254 | "metadata": {}, 255 | "source": [ 256 | "### Customizing Colors\n", 257 | "\n", 258 | "We can pass any color (either by Hex, RGB or Text *) \n", 259 | "\n", 260 | "*Text values are specified in the cufflinks.colors modules" 261 | ] 262 | }, 263 | { 264 | "cell_type": "code", 265 | "execution_count": null, 266 | "metadata": { 267 | "collapsed": false 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "df['IBM'].iplot(filename='fin_time_series_5', bestfit=True, colors=['pink'],\n", 272 | " bestfit_colors=['blue'], world_readable=True)" 273 | ] 274 | }, 275 | { 276 | "cell_type": "markdown", 277 | "metadata": {}, 278 | "source": [ 279 | "### Filled Traces\n", 280 | "\n", 281 | "We can add a fill to a trace with **fill=True**" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "metadata": { 288 | "collapsed": false 289 | }, 290 | "outputs": [], 291 | "source": [ 292 | "df['MSFT'].iplot(filename='fin_time_series_7',\n", 293 | " fill=True,colors=['green'], world_readable=True)" 294 | ] 295 | }, 296 | { 297 | "cell_type": "markdown", 298 | "metadata": {}, 299 | "source": [ 300 | "### Bar Charts\n", 301 | "\n", 302 | "We can easily create a bar chart with the parameter **kind**" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": null, 308 | "metadata": { 309 | "collapsed": false 310 | }, 311 | "outputs": [], 312 | "source": [ 313 | "df.sum().iplot(kind='bar', filename='fin_time_series_9', world_readable=True)" 314 | ] 315 | }, 316 | { 317 | "cell_type": "markdown", 318 | "metadata": {}, 319 | "source": [ 320 | "Bars can also be stacked by a given dimension" 321 | ] 322 | }, 323 | { 324 | "cell_type": "code", 325 | "execution_count": null, 326 | "metadata": { 327 | "collapsed": false 328 | }, 329 | "outputs": [], 330 | "source": [ 331 | "df.resample('M').iplot(kind='bar', barmode='stacked', world_readable=True,\n", 332 | " filename='fin_time_series_10')" 333 | ] 334 | }, 335 | { 336 | "cell_type": "markdown", 337 | "metadata": {}, 338 | "source": [ 339 | "### Spread and Ratio charts\n", 340 | "\n", 341 | "We can also create spread and ratio charts on the fly with **kind='spread'** and **kind='ratio'**." 342 | ] 343 | }, 344 | { 345 | "cell_type": "code", 346 | "execution_count": null, 347 | "metadata": { 348 | "collapsed": false 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "df[['IBM', 'MSFT']].iplot(filename='fin_time_series_11', kind='spread',\n", 353 | " world_readable=True)" 354 | ] 355 | }, 356 | { 357 | "cell_type": "code", 358 | "execution_count": null, 359 | "metadata": { 360 | "collapsed": false 361 | }, 362 | "outputs": [], 363 | "source": [ 364 | "df[['IBM', 'MSFT']].iplot(filename='fin_time_series_12', kind='ratio',\n", 365 | " colors=['green','red'], world_readable=True)" 366 | ] 367 | }, 368 | { 369 | "cell_type": "markdown", 370 | "metadata": {}, 371 | "source": [ 372 | "### Annotations\n", 373 | "\n", 374 | "Annotations can be added to the chart and these are automatically positioned correctly. **Annotations** should be specified in dictionary form." 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "metadata": { 381 | "collapsed": false 382 | }, 383 | "outputs": [], 384 | "source": [ 385 | "annotations={'2013-01-15':'Dividends', '2014-03-31':'Split Announced'}\n", 386 | "df['MSFT'].iplot(filename='fin_time_series_13', annotations=annotations,\n", 387 | " world_readable=True)" 388 | ] 389 | }, 390 | { 391 | "cell_type": "markdown", 392 | "metadata": {}, 393 | "source": [ 394 | "### Output as Image\n", 395 | "\n", 396 | "The output of a chart can be in an image mode as well. For this we can use `asImage=True`. We can also set the dimensions (optional) with `dimensions=(width,height)`." 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": null, 402 | "metadata": { 403 | "collapsed": false 404 | }, 405 | "outputs": [], 406 | "source": [ 407 | "df[['MSFT', 'AAPL']].iplot(filename='fin_time_series_14', theme='white', \n", 408 | " colors=['pink','blue'], asImage=True, \n", 409 | " dimensions=(800, 500), world_readable=True)" 410 | ] 411 | }, 412 | { 413 | "cell_type": "markdown", 414 | "metadata": {}, 415 | "source": [ 416 | "### Advanced Use\n", 417 | "\n", 418 | "It is also possible to get the Plotly Figure as an output to tweak it manually. We can achieve this with `asFigure=True`." 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": { 425 | "collapsed": false 426 | }, 427 | "outputs": [], 428 | "source": [ 429 | "df['MSFT'].iplot(asFigure=True)" 430 | ] 431 | }, 432 | { 433 | "cell_type": "markdown", 434 | "metadata": {}, 435 | "source": [ 436 | "We can also get the **Data** object directly" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "metadata": { 443 | "collapsed": false 444 | }, 445 | "outputs": [], 446 | "source": [ 447 | "data = df.to_iplot()" 448 | ] 449 | }, 450 | { 451 | "cell_type": "code", 452 | "execution_count": null, 453 | "metadata": { 454 | "collapsed": false 455 | }, 456 | "outputs": [], 457 | "source": [ 458 | "data[0]['name']='My Custom Name'" 459 | ] 460 | }, 461 | { 462 | "cell_type": "markdown", 463 | "metadata": {}, 464 | "source": [ 465 | "And pass this directly to **iplot**" 466 | ] 467 | }, 468 | { 469 | "cell_type": "code", 470 | "execution_count": null, 471 | "metadata": { 472 | "collapsed": false 473 | }, 474 | "outputs": [], 475 | "source": [ 476 | "df.iplot(data=data, filename='fin_time_series_15', world_readable=True)" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "\"The
\n", 484 | "\n", 485 | "http://tpq.io | @dyjh | team@tpq.io\n", 486 | "\n", 487 | "**Quant Platform** |\n", 488 | "http://quant-platform.com\n", 489 | "\n", 490 | "**datapark.io** |\n", 491 | "http://datapark.io\n", 492 | "\n", 493 | "**Python for Finance** |\n", 494 | "Python for Finance @ O'Reilly\n", 495 | "\n", 496 | "**Derivatives Analytics with Python** |\n", 497 | "Derivatives Analytics @ Wiley Finance" 498 | ] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 2", 504 | "language": "python2", 505 | "name": "python2" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 2 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython2", 517 | "version": "2.7.10" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 0 522 | } 523 | -------------------------------------------------------------------------------- /03_financial_data_science_viz_basic.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "\"The
" 8 | ] 9 | }, 10 | { 11 | "cell_type": "markdown", 12 | "metadata": {}, 13 | "source": [ 14 | "# Python for Financial Data Science — Viz Basics" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "**DataNatives Berlin 2015**" 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "Dr Yves J Hilpisch\n", 29 | "\n", 30 | "team@tpq.io | http://tpq.io\n", 31 | "\n", 32 | "The Python Quants GmbH" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": null, 38 | "metadata": { 39 | "collapsed": false 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import warnings\n", 44 | "warnings.simplefilter('ignore')" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "metadata": {}, 50 | "source": [ 51 | "## Example Data Retrieval" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "Retrieval and visualization of financial data." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": { 65 | "collapsed": false 66 | }, 67 | "outputs": [], 68 | "source": [ 69 | "import pandas.io.data as web" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": { 76 | "collapsed": false 77 | }, 78 | "outputs": [], 79 | "source": [ 80 | "AAPL = web.DataReader('AAPL', data_source='yahoo')" 81 | ] 82 | }, 83 | { 84 | "cell_type": "code", 85 | "execution_count": null, 86 | "metadata": { 87 | "collapsed": false 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "AAPL.info()" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "AAPL.tail()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "metadata": {}, 108 | "source": [ 109 | "## Basic matplotlib plotting" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": null, 115 | "metadata": { 116 | "collapsed": false 117 | }, 118 | "outputs": [], 119 | "source": [ 120 | "%matplotlib inline" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": null, 126 | "metadata": { 127 | "collapsed": false 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "AAPL['Adj Close'].plot()" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "metadata": { 138 | "collapsed": false 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "import numpy as np\n", 143 | "log_rets = np.log(AAPL['Adj Close'] / AAPL['Adj Close'].shift(1))" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "metadata": { 150 | "collapsed": false 151 | }, 152 | "outputs": [], 153 | "source": [ 154 | "log_rets.hist(bins=30)" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "collapsed": false 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "import matplotlib.pyplot as plt" 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": null, 171 | "metadata": { 172 | "collapsed": false 173 | }, 174 | "outputs": [], 175 | "source": [ 176 | "log_rets.hist(figsize=(10, 6), bins=30)\n", 177 | "plt.xlabel('log returns')\n", 178 | "plt.ylabel('frequency')" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "## Financial Plots" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": null, 191 | "metadata": { 192 | "collapsed": false 193 | }, 194 | "outputs": [], 195 | "source": [ 196 | "import matplotlib.finance as mpf" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "metadata": { 203 | "collapsed": false 204 | }, 205 | "outputs": [], 206 | "source": [ 207 | "start = (2015, 4, 1)\n", 208 | "end = (2015, 5, 30)\n", 209 | "quotes = mpf.quotes_historical_yahoo_ochl('^GDAXI', start, end)" 210 | ] 211 | }, 212 | { 213 | "cell_type": "code", 214 | "execution_count": null, 215 | "metadata": { 216 | "collapsed": false 217 | }, 218 | "outputs": [], 219 | "source": [ 220 | "quotes[:2]" 221 | ] 222 | }, 223 | { 224 | "cell_type": "code", 225 | "execution_count": null, 226 | "metadata": { 227 | "collapsed": false 228 | }, 229 | "outputs": [], 230 | "source": [ 231 | "fig, ax = plt.subplots(figsize=(10, 6))\n", 232 | "fig.subplots_adjust(bottom=0.2)\n", 233 | "mpf.candlestick_ochl(ax, quotes, width=0.6, colorup='b', colordown='r')\n", 234 | "plt.grid(True)\n", 235 | "ax.xaxis_date()\n", 236 | "# dates on the x-axis\n", 237 | "ax.autoscale_view()\n", 238 | "plt.setp(plt.gca().get_xticklabels(), rotation=30)" 239 | ] 240 | }, 241 | { 242 | "cell_type": "code", 243 | "execution_count": null, 244 | "metadata": { 245 | "collapsed": false 246 | }, 247 | "outputs": [], 248 | "source": [ 249 | "fig, ax = plt.subplots(figsize=(8, 5))\n", 250 | "mpf.plot_day_summary(ax, quotes, colorup='b', colordown='r')\n", 251 | "plt.grid(True)\n", 252 | "ax.xaxis_date()\n", 253 | "plt.title('DAX Index')\n", 254 | "plt.ylabel('index level')\n", 255 | "plt.setp(plt.gca().get_xticklabels(), rotation=30)" 256 | ] 257 | }, 258 | { 259 | "cell_type": "markdown", 260 | "metadata": {}, 261 | "source": [ 262 | "## ggplot Style Plotting" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "metadata": { 269 | "collapsed": false 270 | }, 271 | "outputs": [], 272 | "source": [ 273 | "plt.style.use('ggplot')" 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "metadata": { 280 | "collapsed": false 281 | }, 282 | "outputs": [], 283 | "source": [ 284 | "AAPL['Adj Close'].plot()" 285 | ] 286 | }, 287 | { 288 | "cell_type": "code", 289 | "execution_count": null, 290 | "metadata": { 291 | "collapsed": false 292 | }, 293 | "outputs": [], 294 | "source": [ 295 | "log_rets.hist(figsize=(10, 6), bins=30)\n", 296 | "plt.xlabel('log returns')\n", 297 | "plt.ylabel('frequency')" 298 | ] 299 | }, 300 | { 301 | "cell_type": "markdown", 302 | "metadata": {}, 303 | "source": [ 304 | "### seaborn for Statistical Plotting" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "metadata": { 311 | "collapsed": false 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "import seaborn as sns\n", 316 | "sns.set()" 317 | ] 318 | }, 319 | { 320 | "cell_type": "code", 321 | "execution_count": null, 322 | "metadata": { 323 | "collapsed": false 324 | }, 325 | "outputs": [], 326 | "source": [ 327 | "AAPL['Adj Close'].plot()" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": null, 333 | "metadata": { 334 | "collapsed": false 335 | }, 336 | "outputs": [], 337 | "source": [ 338 | "fig, ax = plt.subplots(figsize=(10, 6))\n", 339 | "fig.subplots_adjust(bottom=0.2)\n", 340 | "mpf.candlestick_ochl(ax, quotes, width=0.6, colorup='b', colordown='r')\n", 341 | "plt.grid(True)\n", 342 | "ax.xaxis_date()\n", 343 | "# dates on the x-axis\n", 344 | "ax.autoscale_view()\n", 345 | "plt.setp(plt.gca().get_xticklabels(), rotation=30)" 346 | ] 347 | }, 348 | { 349 | "cell_type": "code", 350 | "execution_count": null, 351 | "metadata": { 352 | "collapsed": false 353 | }, 354 | "outputs": [], 355 | "source": [ 356 | "import pandas as pd\n", 357 | "AAPL['42d'] = pd.rolling_mean(AAPL['Adj Close'], 42)\n", 358 | "AAPL['252d'] = pd.rolling_mean(AAPL['Adj Close'], 200)\n", 359 | " # 42 & 252 days trends (moving averages)" 360 | ] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "execution_count": null, 365 | "metadata": { 366 | "collapsed": false 367 | }, 368 | "outputs": [], 369 | "source": [ 370 | "AAPL[['Adj Close', '42d', '252d']].plot(figsize=(12, 6))" 371 | ] 372 | }, 373 | { 374 | "cell_type": "code", 375 | "execution_count": null, 376 | "metadata": { 377 | "collapsed": false 378 | }, 379 | "outputs": [], 380 | "source": [ 381 | "MSFT = web.DataReader('MSFT', data_source='yahoo')" 382 | ] 383 | }, 384 | { 385 | "cell_type": "code", 386 | "execution_count": null, 387 | "metadata": { 388 | "collapsed": false 389 | }, 390 | "outputs": [], 391 | "source": [ 392 | "data = pd.DataFrame({'AAPL': AAPL['Adj Close'], 'MSFT': MSFT['Adj Close']})\n", 393 | "data.tail() " 394 | ] 395 | }, 396 | { 397 | "cell_type": "code", 398 | "execution_count": null, 399 | "metadata": { 400 | "collapsed": false 401 | }, 402 | "outputs": [], 403 | "source": [ 404 | "import numpy as np\n", 405 | "rets = np.log(data / data.shift(1))\n", 406 | "rets.tail()" 407 | ] 408 | }, 409 | { 410 | "cell_type": "code", 411 | "execution_count": null, 412 | "metadata": { 413 | "collapsed": false 414 | }, 415 | "outputs": [], 416 | "source": [ 417 | "sns.jointplot(rets['AAPL'], rets['MSFT'], size=8)" 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "metadata": { 424 | "collapsed": false 425 | }, 426 | "outputs": [], 427 | "source": [ 428 | "sns.jointplot(rets['AAPL'], rets['MSFT'], size=8, kind='reg')" 429 | ] 430 | }, 431 | { 432 | "cell_type": "markdown", 433 | "metadata": {}, 434 | "source": [ 435 | "### Interactive Plotting with plotly" 436 | ] 437 | }, 438 | { 439 | "cell_type": "code", 440 | "execution_count": null, 441 | "metadata": { 442 | "collapsed": false 443 | }, 444 | "outputs": [], 445 | "source": [ 446 | "import plotly.plotly as py\n", 447 | "import cufflinks\n", 448 | "py.sign_in('Python-Demo-Account', 'gwt101uhh0')" 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": { 455 | "collapsed": false 456 | }, 457 | "outputs": [], 458 | "source": [ 459 | "AAPL[['Adj Close', '42d', '252d']].iplot(filename='plotly',\n", 460 | " world_readable=True)" 461 | ] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": { 467 | "collapsed": false 468 | }, 469 | "outputs": [], 470 | "source": [ 471 | "AAPL['Adj Close'].iplot(filename='AAPL_fit', bestfit=True, colors=['pink'],\n", 472 | " bestfit_colors=['blue'], world_readable=True)" 473 | ] 474 | }, 475 | { 476 | "cell_type": "markdown", 477 | "metadata": {}, 478 | "source": [ 479 | "## Interacting & Plotting with R & ggplot" 480 | ] 481 | }, 482 | { 483 | "cell_type": "code", 484 | "execution_count": null, 485 | "metadata": { 486 | "collapsed": false 487 | }, 488 | "outputs": [], 489 | "source": [ 490 | "%load_ext rpy2.ipython" 491 | ] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": { 497 | "collapsed": false 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "aapl = AAPL['Adj Close'].values" 502 | ] 503 | }, 504 | { 505 | "cell_type": "code", 506 | "execution_count": null, 507 | "metadata": { 508 | "collapsed": false 509 | }, 510 | "outputs": [], 511 | "source": [ 512 | "%Rpush aapl" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": null, 518 | "metadata": { 519 | "collapsed": false 520 | }, 521 | "outputs": [], 522 | "source": [ 523 | "%R plot(aapl, pch=20, col='blue'); grid(); title(\"Apple Stock Prices\")" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": null, 529 | "metadata": { 530 | "collapsed": false 531 | }, 532 | "outputs": [], 533 | "source": [ 534 | "%R x = seq(1, length(aapl), by=1)" 535 | ] 536 | }, 537 | { 538 | "cell_type": "code", 539 | "execution_count": null, 540 | "metadata": { 541 | "collapsed": false 542 | }, 543 | "outputs": [], 544 | "source": [ 545 | "%R c = coef(lm(aapl ~ x))" 546 | ] 547 | }, 548 | { 549 | "cell_type": "code", 550 | "execution_count": null, 551 | "metadata": { 552 | "collapsed": false 553 | }, 554 | "outputs": [], 555 | "source": [ 556 | "%%R\n", 557 | "plot(aapl, pch=19, col='blue')\n", 558 | "grid(); abline(c, col='red', lwd=5)\n", 559 | "title('Stock Prices with Regression Line')" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": null, 565 | "metadata": { 566 | "collapsed": false 567 | }, 568 | "outputs": [], 569 | "source": [ 570 | "%R m = mean(aapl)" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": null, 576 | "metadata": { 577 | "collapsed": false 578 | }, 579 | "outputs": [], 580 | "source": [ 581 | "%Rpull m" 582 | ] 583 | }, 584 | { 585 | "cell_type": "code", 586 | "execution_count": null, 587 | "metadata": { 588 | "collapsed": false 589 | }, 590 | "outputs": [], 591 | "source": [ 592 | "print(\"Average stock price is %5.2f\" % m[0])" 593 | ] 594 | }, 595 | { 596 | "cell_type": "markdown", 597 | "metadata": {}, 598 | "source": [ 599 | "\"The
\n", 600 | "\n", 601 | "http://tpq.io | @dyjh | team@tpq.io\n", 602 | "\n", 603 | "**Quant Platform** |\n", 604 | "http://quant-platform.com\n", 605 | "\n", 606 | "**datapark.io** |\n", 607 | "http://datapark.io\n", 608 | "\n", 609 | "**Python for Finance** |\n", 610 | "Python for Finance @ O'Reilly\n", 611 | "\n", 612 | "**Derivatives Analytics with Python** |\n", 613 | "Derivatives Analytics @ Wiley Finance" 614 | ] 615 | } 616 | ], 617 | "metadata": { 618 | "kernelspec": { 619 | "display_name": "Python 2", 620 | "language": "python2", 621 | "name": "python2" 622 | }, 623 | "language_info": { 624 | "codemirror_mode": { 625 | "name": "ipython", 626 | "version": 2 627 | }, 628 | "file_extension": ".py", 629 | "mimetype": "text/x-python", 630 | "name": "python", 631 | "nbconvert_exporter": "python", 632 | "pygments_lexer": "ipython2", 633 | "version": "2.7.10" 634 | } 635 | }, 636 | "nbformat": 4, 637 | "nbformat_minor": 0 638 | } 639 | --------------------------------------------------------------------------------