├── README.md
├── lendingclub-scipy
    ├── 1.0-sl-lendingclub-scipy.ipynb
    └── data
    │   └── getdata.sh
├── planout-abtest
    └── 1.0-planout-abtest-analysis.ipynb
├── prophet-uber
    ├── 1.0-sl-prophet-uber.ipynb
    ├── data
    │   └── getdata.sh
    └── leach-pydata2017-prophet.pdf
├── simpy-broadcast
    └── 1.0-sl-simpy-message.ipynb
└── stepwise-processor
    └── 1.0-sl-stepwise-processor.ipynb


/README.md:
--------------------------------------------------------------------------------
1 | # python-notebooks
2 | Python notebooks for demonstrating various ideas, APIs, libraries.
3 | 
4 | - [planout-abtest](https://github.com/samuelleach/python-notebooks/blob/master/planout-abtest/1.0-planout-abtest-analysis.ipynb) - Defining an AB test experiment with [Planout](https://facebook.github.io/planout/), simulating exposures, and analysing the results.
5 | - [prophet-uber](https://github.com/samuelleach/python-notebooks/blob/master/prophet-uber/1.0-sl-prophet-uber.ipynb) - Timeseries forecasting of Uber data using [Prophet](https://facebookincubator.github.io/prophet/), which includes a trend, a weekly component and the impact of holidays.
6 | - [simpy-broadcast](https://github.com/samuelleach/python-notebooks/blob/master/simpy-broadcast/1.0-sl-simpy-message.ipynb) - Demonstration of using [simpy](https://simpy.readthedocs.io/en/latest/) to simulate a stream of read receipts for messages broadcast to 10 recipients.
7 | - [stepwise-processor](https://github.com/samuelleach/python-notebooks/blob/master/stepwise-processor/1.0-sl-stepwise-processor.ipynb) - Stepwise processor pattern from Mahmoud Hashemi's Enterprise Software with Python course. Demonstrates topic summarization using Wikipedia.
8 | - [lendingclub-scipy](https://github.com/samuelleach/python-notebooks/blob/master/lendingclub-scipy/1.0-sl-lendingclub-scipy.ipynb) - Demonstrates fitting an alpha distribution with scipy to LendingClub loan data.
9 | 


--------------------------------------------------------------------------------
/lendingclub-scipy/data/getdata.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | wget https://resources.lendingclub.com/LoanStats_2017Q1.csv.zip
3 | unzip *zip
4 | 


--------------------------------------------------------------------------------
/planout-abtest/1.0-planout-abtest-analysis.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "collapsed": false
  8 |    },
  9 |    "outputs": [],
 10 |    "source": [
 11 |     "# Notebook demonstrates end to end session with planout\n",
 12 |     "# experiment library including experimental specification,\n",
 13 |     "# simulation of decisions, reading of logs and analysis of data.\n",
 14 |     "\n",
 15 |     "# The simulated experiment is the proverbial 'button colour'\n",
 16 |     "# A/B test.\n",
 17 |     "\n",
 18 |     "# References:\n",
 19 |     "\n",
 20 |     "# https://facebook.github.io/planout/\n",
 21 |     "# Bakshy et al 2014\n",
 22 |     "\n",
 23 |     "# Extensions\n",
 24 |     "# - Log outcomes.\n",
 25 |     "# - Multiple parameters.\n",
 26 |     "# - Multiple units (userid, contentid).\n",
 27 |     "# - Exposure over time.\n",
 28 |     "\n",
 29 |     "import math\n",
 30 |     "import json\n",
 31 |     "import numpy as np\n",
 32 |     "import pandas as pd\n",
 33 |     "from scipy.stats import bernoulli, norm\n",
 34 |     "from planout.experiment import SimpleExperiment\n",
 35 |     "from planout.ops.random import UniformChoice"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 2,
 41 |    "metadata": {
 42 |     "collapsed": true
 43 |    },
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "# Define experiment\n",
 47 |     "\n",
 48 |     "class ButtonExperiment(SimpleExperiment):\n",
 49 |     "  def assign(self, params, userid):\n",
 50 |     "    params.button_color = UniformChoice(choices=['A', 'B'], unit=userid)"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 3,
 56 |    "metadata": {
 57 |     "collapsed": false
 58 |    },
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "# Simulate exposures and click throughs\n",
 62 |     "\n",
 63 |     "num_users = 1000\n",
 64 |     "\n",
 65 |     "# Simulated click through rates\n",
 66 |     "ctr = {'A': 0.5, 'B': 0.6}\n",
 67 |     "\n",
 68 |     "for userid in range(0, num_users):\n",
 69 |     "    exp = ButtonExperiment(userid=userid)\n",
 70 |     "\n",
 71 |     "    # Log exposure to VotingExperiment.log\n",
 72 |     "    call_to_action_color = exp.get('button_color')\n",
 73 |     "    \n",
 74 |     "    # Log an action\n",
 75 |     "    simulated_decision = bernoulli.rvs(ctr[call_to_action_color])\n",
 76 |     "    if simulated_decision:\n",
 77 |     "        exp.log_event('button_click')"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "code",
 82 |    "execution_count": 4,
 83 |    "metadata": {
 84 |     "collapsed": false
 85 |    },
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "event                  button_click  exposure\n",
 92 |       "params                                       \n",
 93 |       "{'button_color': 'A'}           267       519\n",
 94 |       "{'button_color': 'B'}           300       481\n"
 95 |      ]
 96 |     }
 97 |    ],
 98 |    "source": [
 99 |     "# Now perform summary statistics for each unique combination of parameters\n",
100 |     "\n",
101 |     "# params = A, B\n",
102 |     "# num_exposure, num_click\n",
103 |     "\n",
104 |     "# Read planout logs (rows of json data)\n",
105 |     "data = []\n",
106 |     "with open(exp.name+'.log') as file:\n",
107 |     "    for line in file:\n",
108 |     "       data.append(json.loads(line))\n",
109 |     "\n",
110 |     "# Extract event and params data. Convert params to string.\n",
111 |     "event = []\n",
112 |     "params = []\n",
113 |     "for line in data:\n",
114 |     "    event.append(line['event'])\n",
115 |     "    params.append(str(line['params']))\n",
116 |     "                  \n",
117 |     "# Insert data into a data frame\n",
118 |     "df = pd.DataFrame.from_items([('params', params), ('event', event) ])\n",
119 |     "\n",
120 |     "# Calculate cross table to get summary statistics\n",
121 |     "crosstab = pd.crosstab(df.params, df.event)\n",
122 |     "print(crosstab)"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": 5,
128 |    "metadata": {
129 |     "collapsed": false
130 |    },
131 |    "outputs": [
132 |     {
133 |      "name": "stdout",
134 |      "output_type": "stream",
135 |      "text": [
136 |       "{'button_color': 'A'} success rate =  0.514450867052\n",
137 |       "{'button_color': 'B'} success rate =  0.623700623701\n",
138 |       "Z statistic =  3.48370376386\n",
139 |       "Cumulative area under normal distribution = 0.99975273674\n"
140 |      ]
141 |     }
142 |    ],
143 |    "source": [
144 |     "# Discussion of Z statistic here:\n",
145 |     "# https://making.lyst.com/2014/05/10/bayesian-ab-testing/\n",
146 |     "\n",
147 |     "def zstat(crosstab):\n",
148 |     "    '''Calculate the Z statistic for an A/B test given\n",
149 |     "    the summary statistics in a crosstab (exposures and events)'''\n",
150 |     "    \n",
151 |     "    crosstab_values = crosstab.copy().values\n",
152 |     "    nrow = crosstab_values.shape[0]\n",
153 |     "\n",
154 |     "    success_rate = []\n",
155 |     "    exposure = []\n",
156 |     "    total_events = 0\n",
157 |     "    total_exposures = 0\n",
158 |     "    for row in range(0, nrow):\n",
159 |     "        events = crosstab_values[row, 0]\n",
160 |     "        exposures = crosstab_values[row, 1]\n",
161 |     "        \n",
162 |     "        rate = events/exposures\n",
163 |     "        print(crosstab.axes[0][row], 'success rate = ', rate)\n",
164 |     "        \n",
165 |     "        success_rate.append(rate)\n",
166 |     "        exposure.append(exposures)\n",
167 |     "        \n",
168 |     "        total_exposures += exposures\n",
169 |     "        total_events += events\n",
170 |     "    \n",
171 |     "    combined_success_rate = total_events/total_exposures\n",
172 |     "    \n",
173 |     "    return (success_rate[1] - success_rate[0])/ \\\n",
174 |     "            math.sqrt(combined_success_rate*(1.-combined_success_rate)* \\\n",
175 |     "            (1.0/exposure[0] + 1.0/exposure[1]))\n",
176 |     "\n",
177 |     "    \n",
178 |     "abtest_zstat = zstat(crosstab)\n",
179 |     "print('Z statistic = ', abtest_zstat)\n",
180 |     "print('Cumulative area under normal distribution =',norm.cdf(abtest_zstat))"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "code",
185 |    "execution_count": null,
186 |    "metadata": {
187 |     "collapsed": true
188 |    },
189 |    "outputs": [],
190 |    "source": []
191 |   }
192 |  ],
193 |  "metadata": {
194 |   "kernelspec": {
195 |    "display_name": "Python 3",
196 |    "language": "python",
197 |    "name": "python3"
198 |   },
199 |   "language_info": {
200 |    "codemirror_mode": {
201 |     "name": "ipython",
202 |     "version": 3
203 |    },
204 |    "file_extension": ".py",
205 |    "mimetype": "text/x-python",
206 |    "name": "python",
207 |    "nbconvert_exporter": "python",
208 |    "pygments_lexer": "ipython3",
209 |    "version": "3.5.2"
210 |   }
211 |  },
212 |  "nbformat": 4,
213 |  "nbformat_minor": 0
214 | }
215 | 


--------------------------------------------------------------------------------
/prophet-uber/data/getdata.sh:
--------------------------------------------------------------------------------
1 | wget https://github.com/fivethirtyeight/uber-tlc-foil-response/raw/master/uber-trip-data/uber-raw-data-janjune-15.csv.zip
2 | wget https://github.com/fivethirtyeight/uber-tlc-foil-response/raw/master/uber-trip-data/taxi-zone-lookup.csv
3 | unzip uber-raw-data-janjune-15.csv.zip
4 | 


--------------------------------------------------------------------------------
/prophet-uber/leach-pydata2017-prophet.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/samuelleach/python-notebooks/57dcd7c707a157d93d44fc4c6704af29a79eeb94/prophet-uber/leach-pydata2017-prophet.pdf


--------------------------------------------------------------------------------
/simpy-broadcast/1.0-sl-simpy-message.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Demonstration of using simpy to simulate read receipts for\n",
  8 |     "messages broadcast to 10 recipients.**\n",
  9 |     "\n",
 10 |     "Messages are queued, sent, received and then read.\n",
 11 |     "\n",
 12 |     "At each stage there is a random delay time.\n",
 13 |     "\n",
 14 |     "Output is a json stream of {userid, timestamp, message status}\n",
 15 |     "\n",
 16 |     "**Simpy idea**: Processes are represented by Python generators.\n",
 17 |     "\n",
 18 |     "Docs: https://simpy.readthedocs.io/en/latest/\n",
 19 |     "Simpy tutorial: https://www.youtube.com/watch?v=WHhJJxakIO4"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 1,
 25 |    "metadata": {
 26 |     "collapsed": true
 27 |    },
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import numpy as np\n",
 31 |     "import json\n",
 32 |     "\n",
 33 |     "# import simpy.rt # Uses 'real time' wall clock time. This won't work in a Jupyter notebook.\n",
 34 |     "# env = simpy.rt.RealtimeEnvironment()\n",
 35 |     "\n",
 36 |     "import simpy\n",
 37 |     "env = simpy.Environment()"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 2,
 43 |    "metadata": {
 44 |     "collapsed": true
 45 |    },
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "# Simulation parameters\n",
 49 |     "\n",
 50 |     "NUM_RECIPIENTS = 10\n",
 51 |     "\n",
 52 |     "SIM_DURATION = 1000\n",
 53 |     "\n",
 54 |     "MIN_QUEUE_TIME = 1\n",
 55 |     "MAX_QUEUE_TIME = 10\n",
 56 |     "\n",
 57 |     "MIN_DELIVERY_TIME = 1\n",
 58 |     "MAX_DELIVERY_TIME = 50\n",
 59 |     "\n",
 60 |     "MIN_READ_TIME = 5\n",
 61 |     "MAX_READ_TIME = 600"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 3,
 67 |    "metadata": {
 68 |     "collapsed": true
 69 |    },
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "def get_outdict(time, userid, status):\n",
 73 |     "    return {'time': time, 'userid': userid, 'Status': status}\n",
 74 |     "\n",
 75 |     "def message_status(env, userid):\n",
 76 |     "    outdict = get_outdict(env.now, userid, 'Queued')\n",
 77 |     "    print(json.dumps(outdict))\n",
 78 |     "    \n",
 79 |     "    yield env.timeout(np.random.uniform(low=MIN_QUEUE_TIME,\n",
 80 |     "                                        high=MAX_QUEUE_TIME))\n",
 81 |     "\n",
 82 |     "    outdict = get_outdict(env.now, userid, 'Sent')\n",
 83 |     "    print(json.dumps(outdict))\n",
 84 |     "    \n",
 85 |     "    yield env.timeout(np.random.uniform(low=MIN_DELIVERY_TIME,\n",
 86 |     "                                        high=MAX_DELIVERY_TIME))\n",
 87 |     "\n",
 88 |     "    outdict = get_outdict(env.now, userid, 'Delivered')\n",
 89 |     "    print(json.dumps(outdict))\n",
 90 |     "    \n",
 91 |     "    yield env.timeout(np.random.uniform(low=MIN_READ_TIME,\n",
 92 |     "                                        high=MAX_READ_TIME))\n",
 93 |     "\n",
 94 |     "    outdict = get_outdict(env.now, userid, 'Read')\n",
 95 |     "    print(json.dumps(outdict))"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 4,
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "{\"userid\": 1, \"time\": 0, \"Status\": \"Queued\"}\n",
108 |       "{\"userid\": 2, \"time\": 0, \"Status\": \"Queued\"}\n",
109 |       "{\"userid\": 3, \"time\": 0, \"Status\": \"Queued\"}\n",
110 |       "{\"userid\": 4, \"time\": 0, \"Status\": \"Queued\"}\n",
111 |       "{\"userid\": 5, \"time\": 0, \"Status\": \"Queued\"}\n",
112 |       "{\"userid\": 6, \"time\": 0, \"Status\": \"Queued\"}\n",
113 |       "{\"userid\": 7, \"time\": 0, \"Status\": \"Queued\"}\n",
114 |       "{\"userid\": 8, \"time\": 0, \"Status\": \"Queued\"}\n",
115 |       "{\"userid\": 9, \"time\": 0, \"Status\": \"Queued\"}\n",
116 |       "{\"userid\": 1, \"time\": 1.6904789593659402, \"Status\": \"Sent\"}\n",
117 |       "{\"userid\": 4, \"time\": 3.6414495270389624, \"Status\": \"Sent\"}\n",
118 |       "{\"userid\": 3, \"time\": 4.199239840553005, \"Status\": \"Sent\"}\n",
119 |       "{\"userid\": 6, \"time\": 4.298913656361343, \"Status\": \"Sent\"}\n",
120 |       "{\"userid\": 9, \"time\": 5.009273200185579, \"Status\": \"Sent\"}\n",
121 |       "{\"userid\": 2, \"time\": 6.251029090618595, \"Status\": \"Sent\"}\n",
122 |       "{\"userid\": 7, \"time\": 7.7489035609509616, \"Status\": \"Sent\"}\n",
123 |       "{\"userid\": 5, \"time\": 8.246723903477147, \"Status\": \"Sent\"}\n",
124 |       "{\"userid\": 8, \"time\": 8.316021648279747, \"Status\": \"Sent\"}\n",
125 |       "{\"userid\": 6, \"time\": 20.53233482681646, \"Status\": \"Delivered\"}\n",
126 |       "{\"userid\": 2, \"time\": 21.341225909053023, \"Status\": \"Delivered\"}\n",
127 |       "{\"userid\": 1, \"time\": 22.874492946913566, \"Status\": \"Delivered\"}\n",
128 |       "{\"userid\": 3, \"time\": 28.4981102449749, \"Status\": \"Delivered\"}\n",
129 |       "{\"userid\": 8, \"time\": 32.587726784761834, \"Status\": \"Delivered\"}\n",
130 |       "{\"userid\": 7, \"time\": 34.67475219548778, \"Status\": \"Delivered\"}\n",
131 |       "{\"userid\": 9, \"time\": 39.80680315571698, \"Status\": \"Delivered\"}\n",
132 |       "{\"userid\": 5, \"time\": 43.22672823718324, \"Status\": \"Delivered\"}\n",
133 |       "{\"userid\": 4, \"time\": 48.210970655899466, \"Status\": \"Delivered\"}\n",
134 |       "{\"userid\": 4, \"time\": 53.565263978918, \"Status\": \"Read\"}\n",
135 |       "{\"userid\": 5, \"time\": 129.17484530977288, \"Status\": \"Read\"}\n",
136 |       "{\"userid\": 6, \"time\": 159.71539995093036, \"Status\": \"Read\"}\n",
137 |       "{\"userid\": 3, \"time\": 210.80289168074313, \"Status\": \"Read\"}\n",
138 |       "{\"userid\": 1, \"time\": 238.10028338287506, \"Status\": \"Read\"}\n",
139 |       "{\"userid\": 7, \"time\": 337.60867180056033, \"Status\": \"Read\"}\n",
140 |       "{\"userid\": 8, \"time\": 458.2614460198782, \"Status\": \"Read\"}\n",
141 |       "{\"userid\": 9, \"time\": 463.5834920092545, \"Status\": \"Read\"}\n",
142 |       "{\"userid\": 2, \"time\": 515.3217872508587, \"Status\": \"Read\"}\n"
143 |      ]
144 |     }
145 |    ],
146 |    "source": [
147 |     "# Setup and start the simulation\n",
148 |     "\n",
149 |     "for userid in range(1, NUM_RECIPIENTS):\n",
150 |     "    message = message_status(env, userid)\n",
151 |     "    env.process(message)\n",
152 |     "\n",
153 |     "env.run(until=SIM_DURATION)"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {
160 |     "collapsed": true
161 |    },
162 |    "outputs": [],
163 |    "source": []
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "Python 3",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.5.3"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 2
187 | }
188 | 


--------------------------------------------------------------------------------
/stepwise-processor/1.0-sl-stepwise-processor.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "**Stepwise processor pattern from Mahmoud Hashemi's Enterprise Software with Python course.**\n",
  8 |     "\n",
  9 |     "Python3 version of Mahmoud Hashemi's Wikipedia topic summarizer\n",
 10 |     "\n",
 11 |     "References:\n",
 12 |     "\n",
 13 |     "https://github.com/mahmoud/espymetrics/blob/d4754e597a2f483e2e0b1c3efc8694774227f907/notebooks/stepwise_demo.ipynb\n",
 14 |     "https://www.safaribooksonline.com/library/view/enterprise-software-with/9781491943755/video239885.html"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 240,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "import json\n",
 24 |     "from urllib.request import urlopen\n",
 25 |     "\n",
 26 |     "from IPython.display import Image"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 241,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "class TopicSummarizer(object):\n",
 36 |     "    \"\"\"\n",
 37 |     "    Our stepwise processor that uses Wikipedia to summarize topics.\n",
 38 |     "    \n",
 39 |     "    Just instantiate with the topic name, call .process(), and get_results()\n",
 40 |     "    \"\"\"\n",
 41 |     "    \n",
 42 |     "    def __init__(self, topic):\n",
 43 |     "        self.topic = topic\n",
 44 |     "        \n",
 45 |     "    def process(self):\n",
 46 |     "        self._fetch_text()\n",
 47 |     "        self._fetch_thumbnail()\n",
 48 |     "        return self\n",
 49 |     "    \n",
 50 |     "    def get_results(self, as_text=False):\n",
 51 |     "        if as_text:\n",
 52 |     "            return self.topic + ' summary:' + self._text\n",
 53 |     "        return TopicSummary(self.topic, self._thumb_url, self._text)\n",
 54 |     "    \n",
 55 |     "    def _fetch_text(self):\n",
 56 |     "        self._text_api_url = TEXT_URL_TMPL.format(title=self.topic)\n",
 57 |     "        self._text_resp = self._get_url_json(self._text_api_url)\n",
 58 |     "        self._text = list(self._text_resp['query']['pages'].values())[0]['extract']\n",
 59 |     "        \n",
 60 |     "    def _fetch_thumbnail(self):\n",
 61 |     "        self._thumb_api_url = THUMB_URL_TMPL.format(title=self.topic)\n",
 62 |     "        self._thumb_resp = self._get_url_json(self._thumb_api_url)\n",
 63 |     "        self._thumb_url = list(self._thumb_resp['query']['pages'].values())[0]['thumbnail']['source']\n",
 64 |     "        \n",
 65 |     "    def _get_url_json(self, url):\n",
 66 |     "        resp = urlopen(url)\n",
 67 |     "        resp_body = resp.read().decode('utf8')\n",
 68 |     "        return json.loads(resp_body)\n",
 69 |     "\n",
 70 |     "class TopicSummary(object):\n",
 71 |     "    def __init__(self, topic, thumb_url, text):\n",
 72 |     "        self.topic = topic\n",
 73 |     "        self.thumb_url = thumb_url\n",
 74 |     "        self.text = text\n",
 75 |     "        \n",
 76 |     "    def __repr__(self):\n",
 77 |     "        cn = self.__class__.__name__\n",
 78 |     "        return '%s(%r, %r, %r)' % (cn, self.topic, self.thumb_url, self.text)\n",
 79 |     "    \n",
 80 |     "TEXT_URL_TMPL = 'https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exsentences=2&titles={title}&explaintext=1&exintro=1&format=json'\n",
 81 |     "THUMB_URL_TMPL = 'https://en.wikipedia.org/w/api.php?action=query&prop=pageimages&titles={title}&format=json'"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 242,
 87 |    "metadata": {},
 88 |    "outputs": [
 89 |     {
 90 |      "name": "stdout",
 91 |      "output_type": "stream",
 92 |      "text": [
 93 |       "TopicSummary('Coffee', 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/A_small_cup_of_coffee.JPG/50px-A_small_cup_of_coffee.JPG', 'Coffee is a brewed drink prepared from roasted coffee beans, which are the seeds of berries from the Coffea plant. The genus Coffea is native to tropical Africa, and Madagascar, the Comoros, Mauritius and Réunion in the Indian Ocean.')\n"
 94 |      ]
 95 |     }
 96 |    ],
 97 |    "source": [
 98 |     "# Demonstration of the summarizer\n",
 99 |     "\n",
100 |     "summarizer = TopicSummarizer('Coffee')\n",
101 |     "summarizer.process()\n",
102 |     "summary = summarizer.get_results()\n",
103 |     "print(summary)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 243,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "Coffee is a brewed drink prepared from roasted coffee beans, which are the seeds of berries from the Coffea plant. The genus Coffea is native to tropical Africa, and Madagascar, the Comoros, Mauritius and Réunion in the Indian Ocean.\n"
116 |      ]
117 |     },
118 |     {
119 |      "data": {
120 |       "text/html": [
121 |        "<img src=\"https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/A_small_cup_of_coffee.JPG/50px-A_small_cup_of_coffee.JPG\"/>"
122 |       ],
123 |       "text/plain": [
124 |        "<IPython.core.display.Image object>"
125 |       ]
126 |      },
127 |      "execution_count": 243,
128 |      "metadata": {},
129 |      "output_type": "execute_result"
130 |     }
131 |    ],
132 |    "source": [
133 |     "print(summary.text)\n",
134 |     "Image(url=summary.thumb_url)"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": 244,
140 |    "metadata": {},
141 |    "outputs": [
142 |     {
143 |      "name": "stdout",
144 |      "output_type": "stream",
145 |      "text": [
146 |       "Coffee summary:Coffee is a brewed drink prepared from roasted coffee beans, which are the seeds of berries from the Coffea plant. The genus Coffea is native to tropical Africa, and Madagascar, the Comoros, Mauritius and Réunion in the Indian Ocean.\n"
147 |      ]
148 |     }
149 |    ],
150 |    "source": [
151 |     "print(summarizer.get_results(as_text=True))"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "code",
156 |    "execution_count": 239,
157 |    "metadata": {},
158 |    "outputs": [
159 |     {
160 |      "data": {
161 |       "text/plain": [
162 |        "{'_text': 'Coffee is a brewed drink prepared from roasted coffee beans, which are the seeds of berries from the Coffea plant. The genus Coffea is native to tropical Africa, and Madagascar, the Comoros, Mauritius and Réunion in the Indian Ocean.',\n",
163 |        " '_text_api_url': 'https://en.wikipedia.org/w/api.php?action=query&prop=extracts&exsentences=2&titles=Coffee&explaintext=1&exintro=1&format=json',\n",
164 |        " '_text_resp': {'batchcomplete': '',\n",
165 |        "  'query': {'pages': {'604727': {'extract': 'Coffee is a brewed drink prepared from roasted coffee beans, which are the seeds of berries from the Coffea plant. The genus Coffea is native to tropical Africa, and Madagascar, the Comoros, Mauritius and Réunion in the Indian Ocean.',\n",
166 |        "     'ns': 0,\n",
167 |        "     'pageid': 604727,\n",
168 |        "     'title': 'Coffee'}}}},\n",
169 |        " '_thumb_api_url': 'https://en.wikipedia.org/w/api.php?action=query&prop=pageimages&titles=Coffee&format=json',\n",
170 |        " '_thumb_resp': {'batchcomplete': '',\n",
171 |        "  'query': {'pages': {'604727': {'ns': 0,\n",
172 |        "     'pageid': 604727,\n",
173 |        "     'pageimage': 'A_small_cup_of_coffee.JPG',\n",
174 |        "     'thumbnail': {'height': 38,\n",
175 |        "      'source': 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/A_small_cup_of_coffee.JPG/50px-A_small_cup_of_coffee.JPG',\n",
176 |        "      'width': 50},\n",
177 |        "     'title': 'Coffee'}}}},\n",
178 |        " '_thumb_url': 'https://upload.wikimedia.org/wikipedia/commons/thumb/4/45/A_small_cup_of_coffee.JPG/50px-A_small_cup_of_coffee.JPG',\n",
179 |        " 'topic': 'Coffee'}"
180 |       ]
181 |      },
182 |      "execution_count": 239,
183 |      "metadata": {},
184 |      "output_type": "execute_result"
185 |     }
186 |    ],
187 |    "source": [
188 |     "# Introspection of the summarizer\n",
189 |     "\n",
190 |     "summarizer.__dict__"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": null,
196 |    "metadata": {
197 |     "collapsed": true
198 |    },
199 |    "outputs": [],
200 |    "source": []
201 |   }
202 |  ],
203 |  "metadata": {
204 |   "kernelspec": {
205 |    "display_name": "Python 3",
206 |    "language": "python",
207 |    "name": "python3"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.5.3"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 2
224 | }
225 | 


--------------------------------------------------------------------------------