├── app ├── static │ ├── favicon.ico │ ├── favicon-16x16.png │ └── favicon-32x32.png ├── templates │ ├── 404.html │ ├── index.html │ └── base.html ├── config.py ├── views.py └── __init__.py ├── application.py ├── .gitignore ├── requirements.txt ├── gunicorn.sh ├── LICENSE ├── README.md ├── review-analysis-A.ipynb ├── textacy-modeling.ipynb ├── analyze-beer-soup.ipynb └── scrape-all-ba.ipynb /app/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillNetsky/beer_recommender/HEAD/app/static/favicon.ico -------------------------------------------------------------------------------- /app/static/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillNetsky/beer_recommender/HEAD/app/static/favicon-16x16.png -------------------------------------------------------------------------------- /app/static/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WillNetsky/beer_recommender/HEAD/app/static/favicon-32x32.png -------------------------------------------------------------------------------- /application.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from app import app as application 3 | 4 | if __name__ == '__main__': 5 | application.run(port=9000, threaded=True, debug=True) 6 | -------------------------------------------------------------------------------- /app/templates/404.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block body %} 4 |
5 |

404! Something Went Wrong!

6 |
7 | {% endblock %} -------------------------------------------------------------------------------- /app/config.py: -------------------------------------------------------------------------------- 1 | WTF_CSRF_ENABLED = True 2 | SECRET_KEY = 'you-will-never-guess' 3 | 4 | debug = True 5 | SQL_URL = 'postgresql://Netsky:BeerIsReallyGood@beer-recommender-db.cnkyfxbo8ito.us-west-2.rds.amazonaws.com:5432/beer' -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Python stuff 2 | *.pyc 3 | .ipynb_checkpoints 4 | 5 | # OS Stuff 6 | .DS_Store 7 | 8 | # Model Stuff 9 | *.pkl 10 | 11 | # Elastic Beanstalk Files 12 | .elasticbeanstalk/* 13 | !.elasticbeanstalk/*.cfg.yml 14 | !.elasticbeanstalk/*.global.yml 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.4.1 2 | boto==2.40.0 3 | bs4==0.0.1 4 | bz2file==0.98 5 | click==6.6 6 | Flask==0.11.1 7 | Flask-WTF==0.12 8 | gensim==0.13.1 9 | itsdangerous==0.24 10 | Jinja2==2.8 11 | MarkupSafe==0.23 12 | numpy==1.22.0 13 | pandas==0.18.1 14 | python-dateutil==2.5.3 15 | pytz==2016.4 16 | requests==2.10.0 17 | scipy==0.17.1 18 | six==1.10.0 19 | smart-open==1.3.3 20 | Werkzeug==0.11.10 21 | WTForms==2.1 22 | -------------------------------------------------------------------------------- /gunicorn.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | NAME="Punctuations-App" 4 | APPDIR=/home/ubuntu/projects/iris/app 5 | SOCKFILE=/home/ubuntu/www/sock 6 | NUM_WORKERS=3 7 | 8 | echo "Starting $NAME" 9 | 10 | # activate the virtualenv 11 | source activate env 12 | 13 | export PYTHONPATH=$APPDIR:$PYTHONPATH 14 | 15 | # Create the run directory if it doesn't exist 16 | RUNDIR=$(dirname $SOCKFILE) 17 | test -d $RUNDIR || mkdir -p $RUNDIR 18 | 19 | # Start your unicorn 20 | exec gunicorn run:application -b 0.0.0.0:9000 \ 21 | --name $NAME \ 22 | --workers $NUM_WORKERS \ 23 | --log-level=debug \ 24 | --bind=unix:$SOCKFILE 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Will Chernetsky 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # beer_recommender 2 | beer recommendation engine project for Metis 3 | 4 | # todo 5 | - [ ] update this todo, its very behind 6 | 7 | **scraping** 8 | - [x] take all beer styles 9 | - [x] scrape every beer with more than 25 hads (~20.8k) 10 | - [x] take first 100 reviews from top reviewers for those beers 11 | 12 | **text processing** 13 | - [x] remove names of breweries 14 | - [x] remove words common to beer names (IPA etc) 15 | - [x] remove words with numbers 16 | - [x] lemmatize all words 17 | - [x] clean up some more stopwords 18 | - [x] lemmatize all brewery names and remove them 19 | - [x] remove all words less than 3 chars (sn, rr, etc) 20 | 21 | **model** 22 | - [x] similar words between beers 23 | - [ ] kmeans on the lsi, hopefully this gives something similar to the beer styles 24 | 25 | **visualization** 26 | - [ ] force directed graph of beer types 27 | - [ ] teaswarm 28 | 29 | **Flask App** 30 | - [x] beer emoji favicon 31 | - [x] javascript autocomplete 32 | **result formatting** 33 | - [x] links to ba pages 34 | - [x] two columns 35 | - [x] make this into a table 36 | - [x] one is the beers with ba pages 37 | - [x] one is the keywords between beers 38 | - [ ] the other is the part of the visualization where these beers occur 39 | - [x] make text input wider 40 | - [ ] search for both beer and brewery 41 | - [ ] search for style as well 42 | 43 | **PRESENTATION** 44 | - [x] diagram of data flow 45 | 46 | **future additions** 47 | - [ ] hop word descriptions 48 | - [ ] beer menus integration 49 | - [ ] taphunter integration 50 | - [x] sql database 51 | - [ ] pictures 52 | - [x] take out submit button, must click on autocomplete item to submit 53 | - [ ] search capability 54 | -------------------------------------------------------------------------------- /app/views.py: -------------------------------------------------------------------------------- 1 | 2 | from flask import render_template 3 | from flask import jsonify 4 | from flask import request 5 | from flask_wtf import FlaskForm 6 | from wtforms import fields 7 | from wtforms.validators import Required, AnyOf 8 | 9 | import pandas as pd 10 | import json 11 | 12 | from . import app, get_beer_names, get_brewery_names, get_beer_info, get_beer_keywords, get_similar_beer_info, get_search_strings 13 | 14 | 15 | beer_names = get_beer_names() 16 | brewery_names = get_brewery_names() 17 | search_strings = get_search_strings() 18 | beer_and_brewery = [] 19 | # for beer, brewery in zip(beer_names,brewery_names): 20 | # beer_and_brewery.append(dict(beer = beer, brewery = brewery)) 21 | 22 | class PredictForm(FlaskForm): 23 | """Fields for Predict""" 24 | myChoices = ["one", "two", "three"] 25 | beer_input = fields.StringField('Search for a Beer and Select from the Menu', validators=[Required(),AnyOf(beer_names)]) 26 | 27 | submit = fields.SubmitField('Submit') 28 | 29 | 30 | @app.route('/', methods=('GET', 'POST')) 31 | def index(): 32 | 33 | """Index page""" 34 | form = PredictForm() 35 | 36 | similar_beers = [None]*6 37 | beer_keywords = (None, [None]*6) 38 | beer_inputted = False 39 | input_beer_keywords = [None]*5 40 | input_beer = None 41 | similar_beer_keywords = None 42 | 43 | if form.validate_on_submit(): 44 | # store the submitted values 45 | submitted_data = form.data 46 | print(submitted_data) 47 | 48 | # Retrieve values from form 49 | beer_input = submitted_data['beer_input'] 50 | 51 | # Input beer info 52 | input_beer = get_beer_info(beer_input) 53 | input_beer_keywords = get_beer_keywords(beer_input) 54 | similar_beers = input_beer_keywords[1] 55 | input_beer_keywords = input_beer_keywords[0] 56 | 57 | # Get similar beer recommendations 58 | beer_inputted = True 59 | similar_beers = get_similar_beer_info(similar_beers) 60 | 61 | return render_template('index.html', form=form, beer_inputted = beer_inputted, 62 | similar_beers= similar_beers, 63 | input_beer = input_beer, 64 | input_beer_keywords = input_beer_keywords) 65 | 66 | @app.route('/autocomplete', methods=['GET']) 67 | def autocomplete(): 68 | search = request.args.get('term') 69 | #app.logger.debug(search) 70 | 71 | return jsonify(beer=beer_names, brewery = brewery_names, search_string = search_strings) 72 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from flask import Flask 4 | import pickle 5 | import pandas as pd 6 | from gensim import similarities 7 | 8 | import sqlalchemy 9 | 10 | import config 11 | 12 | app = Flask(__name__) 13 | app.config.from_object("app.config") 14 | 15 | engine = sqlalchemy.create_engine(config.SQL_URL) 16 | beers = pd.read_pickle('app/models/beer_review_df.pkl') 17 | beer_names = list(beers.name) 18 | brewery_names = list(beers.brewery_name) 19 | search_strings = list(beers.search_string) 20 | beers = None 21 | 22 | def get_beer_names(): 23 | return beer_names 24 | 25 | def get_brewery_names(): 26 | return brewery_names 27 | 28 | def get_search_strings(): 29 | return search_strings 30 | 31 | def get_beer_keywords(text_input): 32 | connection = engine.connect() 33 | t = sqlalchemy.sql.expression.text('select * from beers where name = :name') 34 | sql_result = connection.execute(t,name = text_input) 35 | result = sql_result.first() 36 | connection.close() 37 | return ([result['kw1'],result['kw2'],result['kw3'],result['kw4'],result['kw5']], 38 | [result['sim_id_1'],result['sim_id_2'],result['sim_id_3'],result['sim_id_4'],result['sim_id_5']]) 39 | 40 | def get_similar_beer_info(similar_beers): 41 | connection = engine.connect() 42 | query = 'select * from beers where ' 43 | 44 | query += 'index = ' + str(similar_beers[0]) 45 | for beer in similar_beers[1:]: 46 | query += ' or index = ' + str(beer) 47 | 48 | sql_result = connection.execute(query) 49 | 50 | names = [] 51 | urls = [] 52 | breweries = [] 53 | websites = [] 54 | all_keywords = [] 55 | for row in sql_result: 56 | names.append(row['name']) 57 | urls.append(row['url']) 58 | breweries.append(row['brewery_name']) 59 | websites.append(row['brewery_website']) 60 | keywords = [row['kw1'],row['kw2'],row['kw3'],row['kw4'],row['kw5']] 61 | all_keywords.append(keywords) 62 | connection.close() 63 | return zip(zip(names,urls,breweries,websites),all_keywords) 64 | 65 | def get_beer_info(text_input): 66 | connection = engine.connect() 67 | t = sqlalchemy.sql.expression.text('select * from beers where name = :name') 68 | sql_result = connection.execute(t,name = text_input) 69 | result = sql_result.first() 70 | connection.close() 71 | return (result['name'],result['url'],result['brewery_name'],result['brewery_website']) 72 | 73 | from .views import * 74 | 75 | 76 | # Handle Bad Requests 77 | @app.errorhandler(404) 78 | def page_not_found(e): 79 | """Page Not Found""" 80 | return render_template('404.html'), 404 81 | -------------------------------------------------------------------------------- /app/templates/index.html: -------------------------------------------------------------------------------- 1 | {% extends 'base.html' %} 2 | 3 | {% block body %} 4 |
5 |

Beer Recommendation Engine

6 |
7 |
8 | 9 |
10 | 15 |
16 | {{ form.hidden_tag() }} 17 |
18 | {{ form.beer_input.label }} {{ form.beer_input() }} 19 |
20 |
21 |

22 | 23 | {% if beer_inputted %} 24 | {{input_beer[0]}} by {{input_beer[2]}} can be described as:
25 | {% for word in input_beer_keywords%} 26 | {{word}} 27 | {% endfor %}
28 |

29 |
30 | Here are some beers like {{input_beer[0]}}: 31 |
32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | {% for beer in similar_beers %} 40 | 41 | 44 | 48 | 49 | {% endfor %} 50 | 51 |
Similar BeerKeywords
42 | {{beer[0][0]}} by {{beer[0][2]}} 43 | {% for word in beer[1] %} 45 | {{word}} 46 | {% endfor %} 47 |
52 | {% endif %} 53 | 54 | 55 | 56 |
57 | {% endblock body %} 58 | 59 | {% block js %} 60 | 95 | {% endblock js %} -------------------------------------------------------------------------------- /app/templates/base.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | {% block title %}Beer Recommendation Engine{% endblock title %} 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 26 | 27 | 116 | 117 | {% block header %}{% endblock header %} 118 | 119 | 120 |
121 | {% block body %}{% endblock body %} 122 |
123 | {% block js %}{% endblock js %} 124 | 125 | 126 | -------------------------------------------------------------------------------- /review-analysis-A.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 14, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from gensim import corpora, models, similarities\n", 12 | "import pickle\n", 13 | "import string" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 15, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "def remove_punctuation(x):\n", 25 | " x = str(x)\n", 26 | " return x.translate(string.maketrans('',''),string.punctuation)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 16, 32 | "metadata": { 33 | "collapsed": false 34 | }, 35 | "outputs": [], 36 | "source": [ 37 | "beer_reviews = pickle.load(open('beer_reviews.pkl','rb'))" 38 | ] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": 17, 43 | "metadata": { 44 | "collapsed": false 45 | }, 46 | "outputs": [], 47 | "source": [ 48 | "documents = [reviews[1] for reviews in beer_reviews.items()]" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 18, 54 | "metadata": { 55 | "collapsed": true 56 | }, 57 | "outputs": [], 58 | "source": [ 59 | "documents = [' '.join(review) for review in documents]" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 19, 65 | "metadata": { 66 | "collapsed": false 67 | }, 68 | "outputs": [], 69 | "source": [ 70 | "documents = [remove_punctuation(doc) for doc in documents]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 20, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "documents = [review.lower() for review in documents]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 21, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "stoplist = set('for a of the and to in'.split())\n", 93 | "texts = [[word for word in document.lower().split() if word not in stoplist]for document in documents]" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 22, 99 | "metadata": { 100 | "collapsed": true 101 | }, 102 | "outputs": [], 103 | "source": [ 104 | "from collections import defaultdict" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": 23, 110 | "metadata": { 111 | "collapsed": true 112 | }, 113 | "outputs": [], 114 | "source": [ 115 | "frequency = defaultdict(int)\n", 116 | "for text in texts:\n", 117 | " for token in text:\n", 118 | " frequency[token] += 1\n", 119 | " \n", 120 | "texts = [[token for token in text if frequency[token] > 1]for text in texts]" 121 | ] 122 | }, 123 | { 124 | "cell_type": "code", 125 | "execution_count": 24, 126 | "metadata": { 127 | "collapsed": false 128 | }, 129 | "outputs": [], 130 | "source": [ 131 | "dictionary = corpora.Dictionary(texts)" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 25, 137 | "metadata": { 138 | "collapsed": true 139 | }, 140 | "outputs": [], 141 | "source": [ 142 | "corpus = [dictionary.doc2bow(text) for text in texts]" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": 26, 148 | "metadata": { 149 | "collapsed": true 150 | }, 151 | "outputs": [], 152 | "source": [ 153 | "tfidf = models.TfidfModel(corpus)" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 27, 159 | "metadata": { 160 | "collapsed": true 161 | }, 162 | "outputs": [], 163 | "source": [ 164 | "corpus_tfidf = tfidf[corpus]" 165 | ] 166 | }, 167 | { 168 | "cell_type": "code", 169 | "execution_count": 30, 170 | "metadata": { 171 | "collapsed": false 172 | }, 173 | "outputs": [], 174 | "source": [ 175 | "lsi = models.LsiModel(corpus_tfidf, id2word=dictionary, num_topics=500)" 176 | ] 177 | }, 178 | { 179 | "cell_type": "code", 180 | "execution_count": 31, 181 | "metadata": { 182 | "collapsed": false 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "index = similarities.MatrixSimilarity(lsi[corpus])" 187 | ] 188 | }, 189 | { 190 | "cell_type": "code", 191 | "execution_count": 32, 192 | "metadata": { 193 | "collapsed": true 194 | }, 195 | "outputs": [], 196 | "source": [ 197 | "beer_names = beer_reviews.keys()" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 33, 203 | "metadata": { 204 | "collapsed": false 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "doc = ''\n", 209 | "vec_bow = dictionary.doc2bow(doc.lower().split())\n", 210 | "vec_lsi = lsi[vec_bow]" 211 | ] 212 | }, 213 | { 214 | "cell_type": "code", 215 | "execution_count": 34, 216 | "metadata": { 217 | "collapsed": true 218 | }, 219 | "outputs": [], 220 | "source": [ 221 | "text_input = 'Heady Topper'" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 35, 227 | "metadata": { 228 | "collapsed": false, 229 | "scrolled": true 230 | }, 231 | "outputs": [ 232 | { 233 | "name": "stdout", 234 | "output_type": "stream", 235 | "text": [ 236 | "Pliny The Elder : 92.63\n", 237 | "Palate Wrecker : 91.26\n", 238 | "Abrasive Ale : 91.21\n", 239 | "Stone Enjoy By IPA : 90.74\n", 240 | "Stone RuinTen Triple IPA : 90.70\n" 241 | ] 242 | } 243 | ], 244 | "source": [ 245 | "# get the reviews for a beer\n", 246 | "beer_name_inputted = 1\n", 247 | "try:\n", 248 | " doc= documents[beer_names.index(text_input)]\n", 249 | "except ValueError:\n", 250 | " doc = text_input\n", 251 | " beer_name_inputted = 0\n", 252 | "vec_bow = dictionary.doc2bow(doc.lower().split())\n", 253 | "vec_lsi = lsi[vec_bow]\n", 254 | "\n", 255 | "sims = index[vec_lsi]\n", 256 | "for beer in sorted(enumerate(sims), key = lambda x: -x[1])[beer_name_inputted:beer_name_inputted+5]:\n", 257 | " print(beer_names[beer[0]] + ' : %.2f' % (beer[1]*100))" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 36, 263 | "metadata": { 264 | "collapsed": true 265 | }, 266 | "outputs": [], 267 | "source": [ 268 | "# documents\n", 269 | "# dictionary\n", 270 | "# lsi 41.2s\n", 271 | "# index 6.4s --rebuild on app" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 41, 277 | "metadata": { 278 | "collapsed": false 279 | }, 280 | "outputs": [], 281 | "source": [ 282 | "pickle.dump(documents,open('flask/app/models/documents.pkl','wb'))" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 42, 288 | "metadata": { 289 | "collapsed": true 290 | }, 291 | "outputs": [], 292 | "source": [ 293 | "pickle.dump(dictionary,open('flask/app/models/dictionary.pkl','wb'))" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 43, 299 | "metadata": { 300 | "collapsed": true 301 | }, 302 | "outputs": [], 303 | "source": [ 304 | "pickle.dump(lsi,open('flask/app/models/lsi.pkl','wb'))" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": 54, 310 | "metadata": { 311 | "collapsed": false 312 | }, 313 | "outputs": [], 314 | "source": [ 315 | "pickle.dump(corpus,open('flask/app/models/corpus.pkl','wb'))" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 55, 321 | "metadata": { 322 | "collapsed": true 323 | }, 324 | "outputs": [], 325 | "source": [ 326 | "pickle.dump(beer_names,open('flask/app/models/beer_names.pkl','wb'))" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 56, 332 | "metadata": { 333 | "collapsed": true 334 | }, 335 | "outputs": [], 336 | "source": [ 337 | "pickle.dump(index,open('flask/app/models/index.pkl','wb'))" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": { 344 | "collapsed": true 345 | }, 346 | "outputs": [], 347 | "source": [] 348 | }, 349 | { 350 | "cell_type": "code", 351 | "execution_count": 1, 352 | "metadata": { 353 | "collapsed": true 354 | }, 355 | "outputs": [], 356 | "source": [ 357 | "from sklearn.cluster import KMeans" 358 | ] 359 | }, 360 | { 361 | "cell_type": "code", 362 | "execution_count": null, 363 | "metadata": { 364 | "collapsed": true 365 | }, 366 | "outputs": [], 367 | "source": [] 368 | } 369 | ], 370 | "metadata": { 371 | "kernelspec": { 372 | "display_name": "Python 2", 373 | "language": "python", 374 | "name": "python2" 375 | }, 376 | "language_info": { 377 | "codemirror_mode": { 378 | "name": "ipython", 379 | "version": 2 380 | }, 381 | "file_extension": ".py", 382 | "mimetype": "text/x-python", 383 | "name": "python", 384 | "nbconvert_exporter": "python", 385 | "pygments_lexer": "ipython2", 386 | "version": "2.7.11" 387 | } 388 | }, 389 | "nbformat": 4, 390 | "nbformat_minor": 0 391 | } 392 | -------------------------------------------------------------------------------- /textacy-modeling.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 24, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "import textacy\n", 12 | "import pandas as pd\n", 13 | "from tqdm import tqdm" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": 40, 19 | "metadata": { 20 | "collapsed": true 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "beers = pd.read_pickle('all_beer_reviews.pkl')" 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 59, 30 | "metadata": { 31 | "collapsed": false 32 | }, 33 | "outputs": [ 34 | { 35 | "data": { 36 | "text/plain": [ 37 | "[' 2013 version poured into a snifter. Comes out rich, dark brown with bit of mocha foam that fades to a persistent lace. Smell is oak, rye, booze, chocolate, vanilla, slight malt. Very inviting nose on this. Taste is sweet, oak, vanilla, char, and nice rye bite near the tail end. Mouth is very smooth and creamy, though slightly light for the abv. It goes down very easy.This was a really enjoyable beer. Better than the 2012 OF IMO.',\n", 38 | " ' Cracked right before the Diaz/McGregor fight, pours as sturdy of thick black liquid as Eclipse ever has. Plenty of roast, chocolate malt, dark oak barrel notes in the aroma.Taste, easily comes off as the sweetest Eclipse variety. Less bourbon taste and more bourbon sweetness imparted, the char, vanilla aspect of a barrel is subdued in exchange for a greater, sweeter mouthfeel. Sweet toffee, mild oak along with that sweetness.',\n", 39 | " ' Thanks to far & away my favorite mule/wife for bringing this bottle back from San Diego. Which is funny, because I looked high & low for Eclipse stouts in my seven weeks in San Diego in the last year & came up 100% empty. Luck of the Irish. 22 oz. lime-waxed bottle split with said mule. The pour is eclipse black, which is funny, because you can\\xe2\\x80\\x99t actually see an eclipse. Which isn\\xe2\\x80\\x99t funny at all. The nose is unusually light for both this line & the universe of barrel-aged stouts, really just a little roast & malt sweetness. Pretty weak. Eclipse - High West Rye is wading in the shallow end of the gene pool. Most of the Eclipse are Emmy-ready, but this is more like the Prince of Bel-Air. Mild to the point of timid. Spare roast, barrel that\\xe2\\x80\\x99s faint, bit of the rye spice as it warms, sweetness, bit of wood. Nothing stands out. Thicker than the average Eclipse, but half a bottle is plenty. Not much to this. The barrel is thin bordering on anorexic, & without the highlight of the barrel, one is left with a base stout that\\xe2\\x80\\x99s $20 overpriced. That brings me down, man. Pretty good.',\n", 40 | " \" look: Dark dark brown as an Imp Stout should be, nearly opaque. Some cola highlights. Big ass tan head for a nearly 12%'r and thick creamy lacing.smell: Wow, lots of goodness here. A bit of Rye spice, in a honey roasty milkshake. taste: does not lose anything to the nose. If anything there's a chewy fudgey thing happening on top. So mellow and easy drinking. Very nice indeed.\",\n", 41 | " \" Smooth drinking stout for sure. You get the spiciness from the rye that doesn't translate to overt sweetness as bourbon might which is nice. My only major knock on the eclipse beers is that they are all a bit thin mouthfeel wise...even moreso than say KBS, but that doesn't make them bad. This goes down easy, especially for the abv.\",\n", 42 | " ' inky black, thin with a mocha head, good legsnice rye scent, spice notes, vanilla,alcohol is there, dark malts give good chocolate notes,complex charactorthe nose and taste match upgood balance. a bit thin.real good, not great for me. too pricey.',\n", 43 | " \" Poured from a 2015 Batch 1 bottle. Poured a dark black color with a minimal head on top. The nose show hints of chocolate, sweetness and coffee with the mild rye whisky in the back. The taste is smooth and follows the nose chocolate coffee and the rye whisky very nicely blended together. The rye whisky tends to me a little more mellow that some of the bourbon does and you really don't feel and alcohol burn. This one of the smoothest drinking barrel aged beers I've had in quite some time.\",\n", 44 | " \" 2015 batch, bottle purchases from the brewery at the release partyFrom lime-green wax-sealed bomber to Eclipse snifterInstead of reviewing each variety individually, I like to refer to my first review -http://www.beeradvocate.com/beer/profile/14936/64055/?ba=rand - and note any differences or unique nuances. Plus, I reviewed that when barrel-aging wasn't as ubiquitous as it is now, and I'd only had a handful of examples, so my palate descriptions were more specific and colorful.This one follows along the same flavor profile of all the Eclipse varieties, with distinct notes of chocolate and licorice and coffee. You can just subtly make out the rye whisky aging, which isn't quite as sweet as its bourbon counterparts. For some reason, this is tasty better than it did on tap, probably because my palate was numb from the onslaught of free-pouring whiskey-aged stout, but it doesn't seem as hot. And this is a clear step up from last years batch - this one is more drinkable and balanced.\",\n", 45 | " \" Bomber labeled Batch 2 (2014) into a snifter at cellar temp, allowed to warm.Look: robust brown, but not at all black; like lightly used motor oil. Minimal light brown head.Smell: at first I get sharp metallic notes, but eventually chocolate, coffee, and dates come through.Taste: bittersweet chocolate, roast, hints of rye spice, mild vanilla. Solid, but not a lot going on.Feel: medium bodied, silky, a bit dry.Overall: a nice tasty stout, and would be in an upper echelon for non-barrel-aged offerings (though still below Monster's Park). But if I didn't know it was barrel-aged, I wouldn't guess it. Maybe the year-plus of age was a bad idea. Nowhere near worth what it cost.\",\n", 46 | " ' Fantastic beer! Bourbon is there but nor overpowering at all, lovey sweet honey with dark malts, little coffee taste, dark fruit and rich dark chocolate. Very expensive but very very good',\n", 47 | " \" Poured from the bottle into a large snifter glass. Vintage 2014, lime green wax.Deep classic and simple walnut dark brown body. Head manages half a finger of off tan brown, slightly into mocha, but dissolves to a fine, and slightly bubbled collar. A bit of a slick top, but not too much. Some fine big bubbles manage to stick around on the top as well.Excellent aroma. Spicy notes and a real sense of sweet and grainy fig quality. There's even an interesting tropical note going on in here, that sort of hints at a bit of soft papaya or mango. Light notes of chocolate and woody grain touches. Soft dark chocolate aromas and almost with the tropical goes to sweet orange as well which is a great combination. Very complex, very varied, and just fantastic.Palate continues the solid goodness. Hints almost at fudgy character but takes a step back in thickness but complete oak round flavors and feel. Mid palate fills with fruited spice, hitting an almost sharp peppered fruit and spicy cinnamon. Chewy almost lime hop finish, with a crushing and oozy chocolate and char roast character. First sip feels like a fruited stout, then comes an almost a heavy robust black IPA on the finish. Light tastes of orange, tangerine, oils and mild bitter zest. Hardly any sense of booze, and some cocoa powder dryness and honey lip smacking stickiness on the front.What a great play and use of sweetness, unexpected fruity quality, and delicious spice and whiskey character. Another great feather in the cap for these guys and the Eclipse series.\",\n", 48 | " \" Bottle from my Vermont trip. Pours black with nice brown head. Aroma of lots of char, some fruity notes. Flavor follows with big char and sweetness. But this is way thin, even for an Eclipse variant. It's too bad they can't make a base beer that stands up to the barrels, because all the flavors here are really nice. Thicken this beer up, please.\",\n", 49 | " ' Aged 5 months, this pours smoothly with a slow rising head about 3/4 inch in a dark caramel. This smells pretty light. Not an overpowering aroma but has hints of bourbon and a light sweetness. The taste is rich with rye, light toffee, dark chocolate, and a creaminess that is reminiscent of a milk stout. The feel exceeds the flavor as it is silky with a medium to heavy body. Great brew and great experience, hands down.',\n", 50 | " ' 2014 22 oz bottle poured into snifterA: pure darkness very little head soapyS: bourbon some alcohol light roast T: very smooth light on the tongue nice roast some bourbon very softM: Have to combine the mouth with taste I talked a lot about how it feelsO: Its a good beer but lacks the depth and character that the Elijah Craig version does. I would pick this version up again.',\n", 51 | " ' 2014 Vintage11.9% ABVAppearance: Very dark brown bordering on black with a moderate sized dark beige head that gradually faded. A nice looking Imperial StoutSmell: Brown sugar, mild Whiskey, and mild roasted malt aromas. Not a bad nose but the barrel aromas are fairly mild.Taste: The rye Whisky bal-aging features prominently with some spicy notes not present in the Bourbon bal-aged Eclipses. The profile also features roast malts, faint vanilla, brown sugar and tannic oak notes. The tannic oak notes feature prominently in the taste profile and the finish is a bit drier than the Bourbon bal-aged variants. Fairy restrained alcohol presence.Mouth-feel: On the light side of medium bodied with a moderate level of carbonation. Overall: This was a good Eclipse variant with the spicy rye whiskey notes featuring prominently.',\n", 52 | " ' The beer offers a medium roasted malt, caramel sweetness, onto dark fruit, a spicy licorice, and a heavy on the finish. The beer has a medium mouthfeel. A bit spicy and intense without enough vanilla and chocolate to balance it out.',\n", 53 | " \" 2014 vintage. 22oz bottle with lime green wax. Bottle run 2. Pours a pitch black color with over an inch of medium brown head. Lots of lacing. The aroma is lots of wood and booze with dark chocolate, roasted malts, vanilla and earth. The taste is lots of rye flavor up front followed by chocolate syrup, roasted malts, a hint of smoke and some wood. Vanilla and caramel as well. Finishes with lots of the rye flavor. Nice amount of alcohol burn while still being relatively drinkable. Heavy mouthfeel with a good deal of carbonation. Overall, just like the other ones I have had, it's a very good BA Imperial Stout. Pricey, but worth buying once in a while.\",\n", 54 | " ' From 03/07/15 notes. 2013 vintage. 22 oz. bottle poured into a tulip.Pours a dark brown, almost black color, with one finger of dark tan head and moderate carbonation evident. Lots of milk chocolate, roasted malts, sweet chocolate, spice, rye harsh whiskey, caramel malts, and burnt malts. Medium body and low to moderate carbonation.Overall not my favorite variant but very good. Worth getting for sure if it is not too much effort.',\n", 55 | " \" 22oz, light lime green waxed bottle, BR 2 of 2014.This beer pours a solid black abyss, with the barest of back-lit basal cola edges, and one skinny finger of weakly foamy and mostly bubbly brown head, which leaves a bit of streaky and windblown ocean spray lace around the glass as things slowly abate.It smells of sugary and bready caramel malt, heady Bourbon barrel notes - edgy vanilla, spicy rye grain alcohol, and an acrid woodiness - black licorice, a touch of roasted day-old coffee, bittersweet chocolate, and a soft earthy, musty hop bitterness. The taste is more roasted graininess and dry cocoa powder, before that big-assed rye whiskey barrel comes 'a, um, barreling through my gentle palate - crisp and yet astringent booze-soaked wood staves, dry vanillan, toasted caramel, and spicy rye bread - over some still perky dry anise and coffee bean esters, and a well-sublimated metallic alcohol measure.The carbonation is still noticeable in its plain and understated frothiness, the body medium-heavy in weight, and mostly smooth, the rye-ness of it all behaving like a perfect southern gentleman. It finishes on the sweetly caramelized side, and very much like the rye it was aged in - spicy, caramelized, and woody.For a straight rye whiskey barrel aged imperial stout, this comes off as way more drinkable that it has any right to, IMHO. I guess it helps that said whiskey is one of the higher-end rye products on the shelves of your local liquor purveyor, and keeps (so I've been told) the typical tongue-scraping acerbity to the bare minimum, as such. Yeah, it's clear that I'm not a fan of the guest spirit in toto, but somehow my attitude changes as far as this product is concerned. Good stuff and go freaking figure.\",\n", 56 | " \" BR/2 2014 batch.A: Pitch-black pour with a nice tall, dense medium brown head. Head lightens quickly but refuses to fade. Great head retention. Marginal lacing, just tiny little spots here and there.S: Spicy rye whiskey is noticeable and dominant right off the bat. Good amount of oak with it. A touch of char and hershey's syrup. T: Oh yeah. Tons of spicy rye whiskey. Huge amount in fact. Initially it tastes like I've sipped on a shot of HW rye (I have some right next to me in fact). Big, sharp and spicy rye character. It fades gradually into the middle where that hershey's syrup shows up again with a bitter coffee backing. Finish and aftertaste are a nice mashup of bittersweet chocolate and spicy rye whiskey. Hint of oak lingers as well.M: Heavy, spicy and bittersweet all at the same time. Not as heavy as some of the other variants, lots of alcohol to cut it down.O: Out of all the variants I've had, this one highlights the spiciness of rye whiskey the best. Huge rye notes and a sharpness that has to be experienced to understand. The combination of rye and alcohol bite really cuts down on the thickness and sweetness of the beer, however, balancing it fairly nicely. Well, almost balanced; it's still a rye whiskey bomb no matter how you look at it. Still, once you get past the first few sips it's really enjoyable. Let this one warm up for sure, any amount of coldness only seems to enhance the harshness. It sweetens up and mellows a bit once it's fully warm.\",\n", 57 | " ' A: Thin dark brown in color. Appearance of a thin brown head. Good carbonation.S: Rye on the nose, faint bourbon, chocolate, and grain. Nose is not overwhelming.T: Slightly sweet but cut by the rye whiskey flavor, hints of vanilla and chocolate, wood, earthy grain. Very little booze and a mild warming.M: Medium feel on the mouth. Smooth with good carbonation. O: Very good BA stout. Good representation of rye whiskey barrel aging.',\n", 58 | " ' 22oz bottle into snifter. 2013 release. A: Pours a deep black with a thin dark brown head that quickly disappeared into a faint, bubbly film. A ton of tiny, spotty, soapy lace was left down the glass. S: Roasted malts, chocolate, warming booze, and spicy rye. Smells rich and potent, and a bit sweet.T: Toasted and sweet notes of dark chocolate, rye whisky barrel, toasted coconut, vanilla, caramel, brown sugar and woody barrel. Very smooth and decadent, and sweeter than I remember some of the other variants being. The finish is full of spicy rye, toast, and sticky sweet candied sugar and honey, and the finish has a lingering toastedness, mild tobacco, leather, and bitter earthy hops. M: Full bodied, chewy and very sticky-sweet mouthfeel, soft and smooth carbonation. The alcohol is very mild and only slightly warming. O: An all-around great stout and one of the beer, thicker, and sweeter eclipse variants in recent memory. Drinks very well, especially at room temp.',\n", 59 | " ' bottle poured into two Sam Adams perfect pints.1 inch of head that dissipates leaving a slight bit of head. dark mahogany color is only revealed using a very bright flashlight.good carbonation.nose is chocolate, booze and bread. taste is boozy but the goodness shines through.I like some other big stouts more but this is one to try.mouthfeel finishes dry and bready. seems a little light for such a strong beverage. some stickiness on the lips.',\n", 60 | " ' Bottle opened on 4.21.14Barrel Run #1 2013A - Pours a very dark brown with a fine light brown head with good retention.S - Sweet rye and chocolate, subtle and nice.T - Swwet rye, barrel, chocolate, a touch of toffee.D - Good carbonation with a heavy body.O - Sweet, but nice balance. Rye is there. Good stuff.',\n", 61 | " ' Bottle poured into a snifter. 2013 vintage - bottle run #1A - black with dark brown edges. S- whiskey and chocolate very faint vanillaT - whiskey goodness, lots of chocolate and some rye spice but not overpowering. Blends pretty good. M - a little thin compared to the other eclipse variants. O - damn good BA stout. I liked the Evan Williams a bit better, but the rye barrel is definitely very well down. I look forward to the Elijah Craig and four roses.']" 62 | ] 63 | }, 64 | "execution_count": 59, 65 | "metadata": {}, 66 | "output_type": "execute_result" 67 | } 68 | ], 69 | "source": [ 70 | "beers.reviews[0]" 71 | ] 72 | }, 73 | { 74 | "cell_type": "code", 75 | "execution_count": 42, 76 | "metadata": { 77 | "collapsed": false 78 | }, 79 | "outputs": [], 80 | "source": [ 81 | "documents = [review for review in beers.reviews]" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 45, 87 | "metadata": { 88 | "collapsed": false 89 | }, 90 | "outputs": [], 91 | "source": [ 92 | "documents = [' '.join(review) for review in documents]" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 47, 98 | "metadata": { 99 | "collapsed": false 100 | }, 101 | "outputs": [], 102 | "source": [ 103 | "documents = [unicode(review,errors='ignore') for review in documents]" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": 57, 109 | "metadata": { 110 | "collapsed": false 111 | }, 112 | "outputs": [ 113 | { 114 | "name": "stderr", 115 | "output_type": "stream", 116 | "text": [ 117 | "100%|██████████| 20381/20381 [01:00<00:00, 338.74it/s]\n" 118 | ] 119 | } 120 | ], 121 | "source": [ 122 | "documents_processed = []\n", 123 | "for document in tqdm(documents):\n", 124 | " document = textacy.preprocess.remove_punct(document)\n", 125 | " document = textacy.preprocess.replace_numbers(document,replace_with='')\n", 126 | " documents_processed.append(document)" 127 | ] 128 | }, 129 | { 130 | "cell_type": "code", 131 | "execution_count": 58, 132 | "metadata": { 133 | "collapsed": true 134 | }, 135 | "outputs": [ 136 | { 137 | "data": { 138 | "text/plain": [ 139 | "u' tapPours jet black with a tannish lacing Smells of mild cocoa and hints of coconut Bit of cocoa and lactose with a whisper of coconut and a nonastringent coffee element Medium body with lighter carbonation Pretty good stout good body for being a lighter stout and the coconut is a nice touch I wish there was a bit more coconut is all Would be willing to try this or other flavored stouts again for sure All but black with of head off the pour Soso retention 30Roasty malts dominate with some soily tart aroma like a Baltic Porter would smell Hint of nuttiness 30Wellkilned black malts with a shinethrough freshly grated coconut flavor Earthy with some mild malt bitterness A little greenness from the hops Tickling carb creates the impression of spiciness Hints of vanilla and marshmallow Medium body with overdrive spritz Not a particularly mellow drink 30This works okay Nothing wrong with it Lacking much in the nose and the feel is pretty average Not their best stout First had on tap at the breweryDark chocolate brown pour with a small tan cap of head that receded to a ringlet minimal lacing My wife took the first sip of this and claimed to detect no coconut whatsoever Well it just goes to show how peoples palates can differ because I got plenty of coconut out of this beer We experienced vice versa palateage for the cherry in the Quadzilla Not overblown but Id hardly call it subtle Nice smooth aroma of roast notes of coffee chocolate coconut Dark roasted malts up front in the taste with a distinct flavor of chocolatecoconut macaroons coming in midpalate sweetness and roastiness find a good equilibrium Maybe its because I detest coconut in most forms that the flavor was so blatant to me but I definitely got a good fix of it here even if it wasnt extremely potent Smooth and creamy mouthfeel not quite fullbodied but quite tasty and drinkable overall One of my favs from the Church I went for the cask version of the Coconut Stout which in my opinion was a mistake A few of my friends got it from the tap and I believe it was a better beer However it was still good Black pour with a good amount of light brown to white head Once poured the head forms much like a Guinness is poured The smell is almost toasted very sweet which I would expect from combining coconut and a stout The problem with Cask vs Tap for me came with the taste which for me was all over the place I considered the taste of the Tap version to be more controlled and balanced blending the coconut and stout flavors wonderfully whereas my version you didnt know what was going to happen with each drink not in a good way however The beer is full bodied and dry throughout I just think it was served too warm for my preference Like with Quadzilla I like to witness the beer evolve as it warms I sort of missed out with this one My advice If you like cask beers try it If youre not really sure go for the version thats on tap which I thought was nothing short of phenomenal Got this on tap at local pub for lunch todayA Pitch black and thin foam cap nice and tasty lookingS Light coffee dark chocolates and virtually no coconut to speak ofT This is a first for me and kind of a drag Lots of roasted malts and some dark chocolates slightly burnt coffee and then a faint coconut tasteM Thin and watery a disappointment hoping this would be rich and thickO Would probably not get another unless it was with the pub special lobster corn dogs the sweetness goes good togetheror something else that would be complimentary Had this at Ds in Monroeville PA on tap last weekends A few of the pleasures of life seem to coexist in the description coffee Mounds bars and Church Brew Works beer I recently had their Belgian Tripel which was topnotch so this was promising But there was concern that it could be cloyingly sweet It wasnt the coconut is subtle in the afternaste balancing with the rich stout flavors Sold out quickly I nice break from IPAs and summer wheats Ive been trying Worth trying if you find it in the PGH area A Salud 0Had this on tap at the brew pub Review is from notes taken on and poured from the tap to a pint glassAppearance Pour is a dark coffee brown and starts with a short one finger creamy tan head that fades to a thin and full yet uneven cap a thick creamy wall of lacing is left behind at first but eventually fades to swaths and patches that stick around a while body is dark and nearly entirely opaqueSmell Comes across a little weak some burnt coffee notes and a bit of bittersweet chocolate as it continues to warm the coconut starts to make a presence and comes across warm sweet and toasty like the top on top a pie that is baked for a few extra minutes to brown the coconut and dry it out a bitTaste Dark roasty coffee with a touch of an earthy metallic flavor that is reminiscent of coppery at first I dont really notice much of the coconut but as it sits and warms for a few more minutes it really starts to come out and make a nice creamy and sweet presence and it kind of reminds me of a Mounds bar when it blends in with a bit of bittersweet chocolate from the malts Definite solid and worth trying based upon thisMouthfeel A little thinner than what I like in a stout but for an American Stout its definitely within the style in terms of weight and viscosity body is soft and creamy and a thin dry stickiness is left on the roof of the mouth teeth feel a little gritty when its downOverall Not too bad and the flavor alone makes it worth trying I feel that the nose could be a little bigger and perhaps some time in the secondary on top of some more freshly toasted coconut would really bring it around or perhaps doing this in a more imperial style but then again I prefer that style to begin with A Dark brown that you cant see light through with a tan head that leaves a nice lacingS Lots of chocolate and coconut this beer doesnt pull any punches on aroma there some nuttyness as well but its mostly just the coconutT Strong chocolate and coconut flavors hit the tongue but quickly fades and leaves little aftertasteMD Mouthfeel is weak and watery and ruins what could have been a great beer This is reasonably drinkable porter and would probably be a favorite of a coconut lover but left me wanting just a litle bit more On tap at Smokin Joes Friday July review is from notes Poured into a pint glassA Like most normal stouts this one is black Very little head formed at all Just a very thin film of a roasted hazelnut color formed after it was placed in front of meS Smells like any other run of the mill stout Roasted malts some chocolate aromas and come slight coffee as well Little to no coconut aroma at all I reaaally had to search to try and ascertain any coconut in this beerT Just an average stout There is almost no coconut flavor in any way in this one quite disappointing Even after letting it warm for almost minutes the flavor still doesnt make an appearance like one would think it should If anything there is the slightest sweetness on the back end of the taste that could potentially be coconut but it is so incredibly masked I cant say for sure it isM Nice smooth creamy mouthfeel Very silkyD The drinkability is fine on this one Unfortunately after being very disappointed in the lack of the most significant flavor advertised in this beer I would not be reaching for another anytime soon Pint enjoyed the brew worksA very dark opaque brown Not much head and what is there doesnt last long S roasted malt Sweet chocolate covered fruit Some coffee Noticeable coconut aroma T similar to smell Im not picking up as much coconut flavor as other reviewers have noted but it is there Coffee and bitter chocolate in the finish M very nice soft velvety feel Well suited to to flavors of this beerD high The coconut flavor is in check and not tiresome I could definitely have another Never been a big fan of Church BW but I like this one On top of that its a great place to have a beer The coconut stout isnt worth seeking out but a trip to the brewpub is Church Brew Works Coconut Stout has a body that is somewhere between dark brown and black it is opaque The offwhite head is small and doesnt have much going on in the lacing or retention departmentsThe aroma has an odd slightly tart fruit smell to it which I find somewhat unpleasant Besides that a ton of roasted malt and a hint of coconut are in thereLuckily the taste omits the strange fruit from the aroma This is actually a pretty simple beer it has a bunch of roasted malt up front and the finish has a good fairly strong coconut flavor It isnt terribly complex but it is one of the better coconut beers that I have sampledThe body on this one is pretty light but I guess that should be expected with the relatively low ABVCoconut stout is simple but very drinkable I would probably order it again on tap poured into a pub glass at the brewpub in Pittsburgh pours a deep chocolate brownblack color with a half finger of light tan head weak retentionaroma is of roasted malts and a bit of chocolate with a distinct sweetness from the coconut which forms a base nicely proportioned in the nosetaste is of more roasted malts with a sweet coconut base that lingers after the fact somewhat light on the palate with a very satisfying finish well proportioned with a solid treatment of the coconutmouthfeel is medium bodied with a smooth character accented by the sweetness and lasting roasted malt qualities that work well with the coconut character interestingly drinkabledrinkability is great the coconut and roasted malt should not work well together in concept but in the final product do well to compliment one another very drinkable with a sublime aftertaste cheers Pint on tap at Capones Poured a dark brown color with a very small sizedalmost no off white head Aromas of chocolate roast and very light coconut Tastes of chocolate lighter roast coconut and vanilla Notes from and tasted on From notes On tap at CaponesAppears a dark brown with a small tan cap that slowly fades into a mild collar Spotty streaks of lacing are left around the glassSmell is of tart fruit caramel vanilla sugar and cocoaTaste is of the mentioned aromas with the standout cocoa caramel and vanilla along wiht a mild coconut flavor coming through in the aftertasteMouthfeel is medium bodied slick sugary with a tart finish On tap at the Church Brewpub in Pittsburgh Appears like a typical stout dark and creamy Has some of the typical smells dark roasted malt some chocolate and coffee but overwhelming with coconut Taste is dominated by coconut This is something different from any beer I have had before and unique enough that it was worth trying but not something I particularly enjoy There were other much better beers at this brewery ABlack with a thin tan head Left very spotty laceSSubtle and rather light roasty sweetness Coconut aroma is faintnot really sure if I would guess it was coconut if I didnt knowTSweet roast with a hint of the dryness of the toasted coconut Not very complexMMedium body with a fairly creamy feel and a nice dry finishDDecent but has room for improvement I would like a bit more chocolateroasty flavours to introduce some complexity The coconut adds a nice drying aspect and decent flavourbut that is where it ends I can see hints of a really good brew here but find it lacking Appearance Arrives at the bar with a nearly black color and a modest head that leaves some lacing while it lastsSmell Unsweetened coconut and roasty elementsTaste Up front the coconut and chocolate clearly come through but there is a developing nut skin bitterness that appears at midpalate the coconut continues into the finish as the bitter flavors fade and the sweet stout elements reassert themselvesMouthfeel Medium body slightly creamy with moderate carbonationDrinkability A nice combination but I cant help but think what this would be like in an Imperial version with a big mouthfeel and a lot more coffeechocolate character'" 140 | ] 141 | }, 142 | "execution_count": 58, 143 | "metadata": {}, 144 | "output_type": "execute_result" 145 | } 146 | ], 147 | "source": [ 148 | "for doc in documents_processed:\n", 149 | " " 150 | ] 151 | }, 152 | { 153 | "cell_type": "code", 154 | "execution_count": 62, 155 | "metadata": { 156 | "collapsed": false 157 | }, 158 | "outputs": [], 159 | "source": [ 160 | "test_doc = textacy.texts.TextDoc(documents_processed[0])" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": null, 166 | "metadata": { 167 | "collapsed": false 168 | }, 169 | "outputs": [], 170 | "source": [ 171 | "text_corpus = textacy.texts.TextCorpus.from_texts(lang='en',texts=documents_processed)" 172 | ] 173 | }, 174 | { 175 | "cell_type": "code", 176 | "execution_count": null, 177 | "metadata": { 178 | "collapsed": true 179 | }, 180 | "outputs": [], 181 | "source": [] 182 | } 183 | ], 184 | "metadata": { 185 | "kernelspec": { 186 | "display_name": "Python 2", 187 | "language": "python", 188 | "name": "python2" 189 | }, 190 | "language_info": { 191 | "codemirror_mode": { 192 | "name": "ipython", 193 | "version": 2 194 | }, 195 | "file_extension": ".py", 196 | "mimetype": "text/x-python", 197 | "name": "python", 198 | "nbconvert_exporter": "python", 199 | "pygments_lexer": "ipython2", 200 | "version": "2.7.11" 201 | } 202 | }, 203 | "nbformat": 4, 204 | "nbformat_minor": 0 205 | } 206 | -------------------------------------------------------------------------------- /analyze-beer-soup.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 2, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [ 10 | { 11 | "name": "stderr", 12 | "output_type": "stream", 13 | "text": [ 14 | "\r", 15 | "0it [00:00, ?it/s]" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "import pandas as pd\n", 21 | "import numpy as np\n", 22 | "from tqdm import tqdm, tqdm_pandas, tqdm_notebook\n", 23 | "import re\n", 24 | "import requests\n", 25 | "from bs4 import BeautifulSoup\n", 26 | "import glob\n", 27 | "\n", 28 | "\n", 29 | "from fake_useragent import UserAgent\n", 30 | "ua = UserAgent()\n", 31 | "\n", 32 | "tqdm_pandas(tqdm())" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 3, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "import sys\n", 44 | "sys.setrecursionlimit(100000000)" 45 | ] 46 | }, 47 | { 48 | "cell_type": "code", 49 | "execution_count": 6, 50 | "metadata": { 51 | "collapsed": true 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "def get_soup(url, timeout=5):\n", 56 | " headers = {'User-Agent':ua.random}\n", 57 | " try:\n", 58 | " response = requests.get(url,headers=headers)\n", 59 | " except:\n", 60 | " print(\"FAILED \"+ url)\n", 61 | " return 0\n", 62 | " attempts = 0\n", 63 | " while(not response.ok):\n", 64 | " #print((url+' failed with code: '+str(response.status_code)))\n", 65 | " if attempts > timeout:\n", 66 | " print(url+' failed with code: '+str(response.status_code))\n", 67 | " return BeautifulSoup('','lxml')\n", 68 | " response = requests.get(url)\n", 69 | " attempts += 1\n", 70 | " page = response.text\n", 71 | " soup = BeautifulSoup(page,'lxml')\n", 72 | " return soup" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 7, 78 | "metadata": { 79 | "collapsed": true 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "def get_beer_stats(row):\n", 84 | " soup = row['soup']\n", 85 | " stats = soup.find(id='item_stats').find('dl')\n", 86 | " row['ba_score'] = soup.find(class_='BAscore_big ba-score').get_text()\n", 87 | " row['num_reviews'] = int(stats.find(class_='ba-reviews').get_text().replace(',',''))\n", 88 | " row['num_ratings'] = int(stats.find(class_='ba-ratings').get_text().replace(',',''))\n", 89 | " row['ravg'] = float(stats.find(class_='ba-ravg').get_text().replace(',',''))\n", 90 | " row['pdev'] = float(stats.find(class_='ba-pdev').get_text().replace(',','').replace('%',''))\n", 91 | " row['wants'] = int(stats.find(class_='ba-wants').get_text().replace(',',''))\n", 92 | " row['gots'] = int(stats.find(class_='ba-gots').get_text().replace(',',''))\n", 93 | " row['for_trade'] = int(stats.find_all('dt')[-1].get_text().replace(',',''))\n", 94 | "\n", 95 | " info_links = soup.find('div',style=\"float:right;width:70%;\").find_all('a')\n", 96 | " row['brewery_name'] = info_links[0].get_text()\n", 97 | " row['brewery_loation'] = info_links[1].get_text()\n", 98 | " try:\n", 99 | " row['brewery_website'] = info_links[3]['href']\n", 100 | " except:\n", 101 | " row['brewery_website'] = ''\n", 102 | " row['beer_style'] = info_links[-1].get_text()\n", 103 | " row['style_url'] = info_links[-1]['href']\n", 104 | " if row['brewery_website'] == row['style_url']:\n", 105 | " row['brewery_website'] = ''\n", 106 | " try:\n", 107 | " row['abv'] = float(re.findall(r'(?<=\\(ABV\\): )\\d+\\.\\d+',soup.find('div',style=\"float:right;width:70%;\").get_text())[0])\n", 108 | " except:\n", 109 | " row['abv'] = np.nan\n", 110 | " row['availability'] = re.findall(r'(?<=Availability: )[\\w\\-]*',soup.find('div',style=\"float:right;width:70%;\").get_text())[0]\n", 111 | " return row" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 159, 117 | "metadata": { 118 | "collapsed": true 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "beers = pd.read_pickle('beer_soup_229.pkl')" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 160, 128 | "metadata": { 129 | "collapsed": true 130 | }, 131 | "outputs": [ 132 | { 133 | "name": "stderr", 134 | "output_type": "stream", 135 | "text": [ 136 | "\n", 137 | "0it [00:00, ?it/s]\u001b[A\n", 138 | "100%|██████████| 3/3 [00:00<00:00, 21.46it/s]\u001b[A\n", 139 | "5it [00:00, 19.82it/s] \u001b[A\n", 140 | "7it [00:00, 19.30it/s]\u001b[A\n", 141 | "9it [00:00, 18.70it/s]\u001b[A\n", 142 | "12it [00:00, 19.55it/s]\u001b[A\n", 143 | "14it [00:00, 19.19it/s]\u001b[A\n", 144 | "17it [00:00, 20.19it/s]\u001b[A\n", 145 | "19it [00:00, 19.93it/s]\u001b[A\n", 146 | "21it [00:01, 19.35it/s]\u001b[A\n", 147 | "23it [00:01, 19.23it/s]\u001b[A\n", 148 | "25it [00:01, 18.03it/s]\u001b[A\n", 149 | "27it [00:01, 18.53it/s]\u001b[A\n", 150 | "30it [00:01, 19.66it/s]\u001b[A\n", 151 | "32it [00:01, 18.74it/s]\u001b[A\n", 152 | "34it [00:01, 18.33it/s]\u001b[A\n", 153 | "36it [00:01, 18.63it/s]\u001b[A\n", 154 | "39it [00:02, 19.38it/s]\u001b[A\n", 155 | "41it [00:02, 18.92it/s]\u001b[A\n", 156 | "43it [00:02, 18.44it/s]\u001b[A\n", 157 | "45it [00:02, 18.30it/s]\u001b[A\n", 158 | "47it [00:02, 18.25it/s]\u001b[A\n", 159 | "50it [00:02, 19.42it/s]\u001b[A\n", 160 | "52it [00:02, 18.97it/s]\u001b[A\n", 161 | "54it [00:02, 19.17it/s]\u001b[A\n", 162 | "57it [00:02, 20.29it/s]\u001b[A\n", 163 | "60it [00:03, 18.44it/s]\u001b[A\n", 164 | "62it [00:03, 18.53it/s]\u001b[A\n", 165 | "65it [00:03, 19.49it/s]\u001b[A\n", 166 | "68it [00:03, 20.20it/s]\u001b[A\n", 167 | "71it [00:03, 20.86it/s]\u001b[A\n", 168 | "74it [00:03, 21.40it/s]\u001b[A\n", 169 | "77it [00:03, 21.17it/s]\u001b[A\n", 170 | "80it [00:04, 18.57it/s]\u001b[A\n", 171 | "82it [00:04, 17.06it/s]\u001b[A\n", 172 | "84it [00:04, 17.60it/s]\u001b[A\n", 173 | "87it [00:04, 17.90it/s]\u001b[A\n", 174 | "89it [00:04, 17.23it/s]\u001b[A\n", 175 | "91it [00:04, 17.93it/s]\u001b[A\n", 176 | "93it [00:04, 17.85it/s]\u001b[A\n", 177 | "95it [00:04, 18.33it/s]\u001b[A\n", 178 | "97it [00:05, 17.85it/s]\u001b[A\n", 179 | "99it [00:05, 17.82it/s]\u001b[A\n", 180 | "101it [00:05, 15.70it/s]\u001b[A\n", 181 | "103it [00:05, 14.99it/s]\u001b[A\n", 182 | "105it [00:05, 15.41it/s]\u001b[A\n", 183 | "107it [00:05, 15.86it/s]\u001b[A\n", 184 | "109it [00:05, 16.13it/s]\u001b[A\n", 185 | "111it [00:06, 15.24it/s]\u001b[A\n", 186 | "113it [00:06, 15.94it/s]\u001b[A\n", 187 | "115it [00:06, 16.84it/s]\u001b[A\n", 188 | "117it [00:06, 14.95it/s]\u001b[A\n", 189 | "119it [00:06, 13.30it/s]\u001b[A\n", 190 | "121it [00:06, 12.87it/s]\u001b[A\n", 191 | "123it [00:06, 13.33it/s]\u001b[A\n", 192 | "125it [00:07, 13.23it/s]\u001b[A\n", 193 | "127it [00:07, 11.97it/s]\u001b[A\n", 194 | "129it [00:07, 12.14it/s]\u001b[A\n", 195 | "131it [00:07, 12.85it/s]\u001b[A\n", 196 | "133it [00:07, 13.26it/s]\u001b[A\n", 197 | "135it [00:07, 13.21it/s]\u001b[A\n", 198 | "137it [00:08, 13.01it/s]\u001b[A\n", 199 | "139it [00:08, 13.03it/s]\u001b[A\n", 200 | "141it [00:08, 12.86it/s]\u001b[A\n", 201 | "143it [00:08, 12.90it/s]\u001b[A\n", 202 | "145it [00:08, 12.71it/s]\u001b[A\n", 203 | "147it [00:08, 13.79it/s]\u001b[A\n", 204 | "149it [00:08, 13.05it/s]\u001b[A\n", 205 | "151it [00:09, 13.26it/s]\u001b[A\n", 206 | "153it [00:09, 14.73it/s]\u001b[A\n", 207 | "155it [00:09, 14.98it/s]\u001b[A\n", 208 | "157it [00:09, 15.99it/s]\u001b[A\n", 209 | "159it [00:09, 15.68it/s]\u001b[A\n", 210 | "161it [00:09, 14.30it/s]\u001b[A\n", 211 | "163it [00:09, 14.26it/s]\u001b[A\n", 212 | "165it [00:09, 14.88it/s]\u001b[A\n", 213 | "168it [00:10, 15.96it/s]\u001b[A\n", 214 | "170it [00:10, 14.63it/s]\u001b[A\n", 215 | "172it [00:10, 14.36it/s]\u001b[A\n", 216 | "174it [00:10, 12.79it/s]\u001b[A\n", 217 | "176it [00:10, 12.96it/s]\u001b[A\n", 218 | "179it [00:10, 14.97it/s]\u001b[A\n", 219 | "182it [00:11, 16.47it/s]\u001b[A\n", 220 | "184it [00:11, 15.74it/s]\u001b[A\n", 221 | "186it [00:11, 15.15it/s]\u001b[A\n", 222 | "189it [00:11, 16.76it/s]\u001b[A\n", 223 | "191it [00:11, 17.57it/s]\u001b[A\n", 224 | "193it [00:11, 17.90it/s]\u001b[A\n", 225 | "195it [00:11, 18.31it/s]\u001b[A\n", 226 | "198it [00:11, 19.43it/s]\u001b[A\n", 227 | "201it [00:12, 20.42it/s]\u001b[A\n", 228 | "204it [00:12, 18.06it/s]\u001b[A\n", 229 | "206it [00:12, 16.31it/s]\u001b[A\n", 230 | "208it [00:12, 16.10it/s]\u001b[A\n", 231 | "210it [00:12, 16.17it/s]\u001b[A\n", 232 | "212it [00:12, 15.91it/s]\u001b[A\n", 233 | "214it [00:12, 14.92it/s]\u001b[A\n", 234 | "216it [00:13, 15.37it/s]\u001b[A\n", 235 | "218it [00:13, 15.42it/s]\u001b[A\n", 236 | "220it [00:13, 14.47it/s]\u001b[A\n", 237 | "222it [00:13, 14.32it/s]\u001b[A\n", 238 | "224it [00:13, 14.62it/s]\u001b[A\n", 239 | "226it [00:13, 15.79it/s]\u001b[A\n", 240 | "229it [00:13, 17.19it/s]\u001b[A\n", 241 | "230it [00:13, 16.43it/s]\u001b[A" 242 | ] 243 | } 244 | ], 245 | "source": [ 246 | "tqdm_pandas(tqdm())\n", 247 | "beers = beers.progress_apply(get_beer_stats,axis=1)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 8, 253 | "metadata": { 254 | "collapsed": false 255 | }, 256 | "outputs": [], 257 | "source": [ 258 | "def get_beer_df_reviews(row):\n", 259 | " ba_url = 'http://www.beeradvocate.com'\n", 260 | " url_suffix = '?sort=topr&start='\n", 261 | " row['reviews'] = get_beer_reviews(row['soup'])\n", 262 | " if row['num_reviews'] > 25:\n", 263 | " if row['num_reviews'] > 100:\n", 264 | " num_reviews = 100\n", 265 | " else:\n", 266 | " num_reviews = row['num_reviews']\n", 267 | " for i in range(1,num_reviews//25):\n", 268 | " url = ba_url + row['url'] + url_suffix + str(i*25)\n", 269 | " soup = get_soup(url)\n", 270 | " reviews = get_beer_reviews(soup)\n", 271 | " row['reviews'] += reviews\n", 272 | " return row\n", 273 | "\n", 274 | "def get_beer_reviews(soup):\n", 275 | " reviews = []\n", 276 | " for rating in soup.find_all(id='rating_fullview_content_2'):\n", 277 | " for span in rating.find_all('span'):\n", 278 | " span.extract()\n", 279 | " review = rating.get_text().strip().encode('utf-8')\n", 280 | " review = review.replace('rDev','')\n", 281 | " reviews.append(str(review))\n", 282 | " return reviews" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": 162, 288 | "metadata": { 289 | "collapsed": true 290 | }, 291 | "outputs": [ 292 | { 293 | "name": "stderr", 294 | "output_type": "stream", 295 | "text": [ 296 | "\n", 297 | "0it [00:00, ?it/s]\u001b[A\n", 298 | " 17%|█▋ | 3/18 [00:00<00:00, 21.87it/s]\u001b[A\n", 299 | " 33%|███▎ | 6/18 [00:00<00:01, 9.82it/s]\u001b[A\n", 300 | " 44%|████▍ | 8/18 [00:02<00:03, 2.94it/s]\u001b[A\n", 301 | " 50%|█████ | 9/18 [00:04<00:07, 1.26it/s]\u001b[A\n", 302 | " 67%|██████▋ | 12/18 [00:04<00:03, 1.76it/s]\u001b[A\n", 303 | " 78%|███████▊ | 14/18 [00:05<00:02, 1.74it/s]\u001b[A\n", 304 | " 94%|█████████▍| 17/18 [00:05<00:00, 2.40it/s]\u001b[A\n", 305 | "20it [00:07, 2.37it/s] \u001b[A\n", 306 | "22it [00:08, 1.83it/s]\u001b[A\n", 307 | "25it [00:10, 1.75it/s]\u001b[A\n", 308 | "27it [00:10, 2.40it/s]\u001b[A\n", 309 | "30it [00:11, 3.30it/s]\u001b[A\n", 310 | "33it [00:11, 4.47it/s]\u001b[A\n", 311 | "35it [00:12, 2.37it/s]\u001b[A\n", 312 | "38it [00:13, 3.25it/s]\u001b[A\n", 313 | "40it [00:13, 3.31it/s]\u001b[A\n", 314 | "42it [00:14, 2.64it/s]\u001b[A\n", 315 | "44it [00:15, 2.86it/s]\u001b[A\n", 316 | "47it [00:15, 3.91it/s]\u001b[A\n", 317 | "49it [00:15, 3.77it/s]\u001b[A\n", 318 | "52it [00:16, 5.04it/s]\u001b[A\n", 319 | "56it [00:17, 3.72it/s]\u001b[A\n", 320 | "59it [00:19, 2.76it/s]\u001b[A\n", 321 | "62it [00:20, 3.10it/s]\u001b[A\n", 322 | "64it [00:22, 1.63it/s]\u001b[A\n", 323 | "65it [00:24, 1.01it/s]\u001b[A\n", 324 | "67it [00:26, 1.04it/s]\u001b[A\n", 325 | "68it [00:27, 1.18it/s]\u001b[A\n", 326 | "71it [00:27, 1.65it/s]\u001b[A\n", 327 | "73it [00:27, 2.25it/s]\u001b[A\n", 328 | "75it [00:29, 1.69it/s]\u001b[A\n", 329 | "76it [00:31, 1.10s/it]\u001b[A\n", 330 | "78it [00:33, 1.03s/it]\u001b[A\n", 331 | "80it [00:33, 1.36it/s]\u001b[A\n", 332 | "83it [00:33, 1.90it/s]\u001b[A\n", 333 | "85it [00:36, 1.29it/s]\u001b[A\n", 334 | "87it [00:36, 1.79it/s]\u001b[A\n", 335 | "90it [00:36, 2.46it/s]\u001b[A\n", 336 | "93it [00:37, 2.48it/s]\u001b[A\n", 337 | "95it [00:39, 1.90it/s]\u001b[A\n", 338 | "96it [00:39, 1.78it/s]\u001b[A\n", 339 | "97it [00:41, 1.36it/s]\u001b[A\n", 340 | "101it [00:42, 1.52it/s]\u001b[A\n", 341 | "102it [00:44, 1.22it/s]\u001b[A\n", 342 | "106it [00:44, 1.71it/s]\u001b[A\n", 343 | "108it [00:46, 1.43it/s]\u001b[A\n", 344 | "111it [00:46, 2.00it/s]\u001b[A\n", 345 | "113it [00:47, 1.86it/s]\u001b[A\n", 346 | "114it [00:48, 1.36it/s]\u001b[A\n", 347 | "116it [00:51, 1.15it/s]\u001b[A\n", 348 | "117it [00:52, 1.08s/it]\u001b[A\n", 349 | "119it [00:53, 1.18it/s]\u001b[A\n", 350 | "122it [00:55, 1.27it/s]\u001b[A\n", 351 | "126it [00:55, 1.79it/s]\u001b[A\n", 352 | "129it [00:55, 2.47it/s]\u001b[A\n", 353 | "131it [00:58, 1.25it/s]\u001b[A\n", 354 | "134it [00:59, 1.58it/s]\u001b[A\n", 355 | "137it [01:00, 1.77it/s]\u001b[A\n", 356 | "138it [01:02, 1.09it/s]\u001b[A\n", 357 | "139it [01:03, 1.22it/s]\u001b[A\n", 358 | "140it [01:04, 1.06s/it]\u001b[A\n", 359 | "143it [01:04, 1.32it/s]\u001b[A\n", 360 | "144it [01:06, 1.04s/it]\u001b[A\n", 361 | "146it [01:06, 1.33it/s]\u001b[A\n", 362 | "149it [01:06, 1.84it/s]\u001b[A\n", 363 | "151it [01:08, 1.52it/s]\u001b[A\n", 364 | "154it [01:08, 2.12it/s]\u001b[A\n", 365 | "156it [01:10, 1.93it/s]\u001b[A\n", 366 | "159it [01:10, 2.68it/s]\u001b[A\n", 367 | "162it [01:10, 3.66it/s]\u001b[A\n", 368 | "164it [01:12, 2.21it/s]\u001b[A\n", 369 | "166it [01:13, 1.94it/s]\u001b[A\n", 370 | "168it [01:15, 1.53it/s]\u001b[A\n", 371 | "170it [01:24, 1.81s/it]\u001b[A\n", 372 | "174it [01:24, 1.28s/it]\u001b[A\n", 373 | "176it [01:27, 1.32s/it]\u001b[A" 374 | ] 375 | }, 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "http://www.beeradvocate.com/beer/profile/13947/30771/?sort=topr&start=75 failed with code: 403\n" 381 | ] 382 | }, 383 | { 384 | "name": "stderr", 385 | "output_type": "stream", 386 | "text": [ 387 | "\n", 388 | "178it [01:29, 1.23s/it]\u001b[A\n", 389 | "180it [01:31, 1.15s/it]\u001b[A" 390 | ] 391 | }, 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "http://www.beeradvocate.com/beer/profile/18134/13906/?sort=topr&start=50 failed with code: 403\n" 397 | ] 398 | }, 399 | { 400 | "name": "stderr", 401 | "output_type": "stream", 402 | "text": [ 403 | "\n", 404 | "182it [01:33, 1.06s/it]\u001b[A\n", 405 | "184it [01:34, 1.08it/s]\u001b[A\n", 406 | "185it [01:36, 1.19s/it]\u001b[A\n", 407 | "188it [01:38, 1.03s/it]\u001b[A\n", 408 | "191it [01:38, 1.37it/s]\u001b[A\n", 409 | "194it [01:38, 1.92it/s]\u001b[A\n", 410 | "197it [01:38, 2.67it/s]\u001b[A\n", 411 | "199it [01:40, 1.89it/s]\u001b[A\n", 412 | "201it [01:40, 2.18it/s]\u001b[A\n", 413 | "204it [01:40, 3.01it/s]\u001b[A\n", 414 | "207it [01:42, 2.48it/s]\u001b[A\n", 415 | "209it [01:44, 1.81it/s]\u001b[A\n", 416 | "211it [01:44, 2.10it/s]\u001b[A\n", 417 | "214it [01:45, 2.88it/s]\u001b[A\n", 418 | "218it [01:45, 3.96it/s]\u001b[A\n", 419 | "221it [01:45, 5.32it/s]\u001b[A\n", 420 | "225it [01:47, 3.75it/s]\u001b[A\n", 421 | "228it [01:49, 2.61it/s]\u001b[A\n", 422 | "230it [01:51, 1.57it/s]\u001b[A\n", 423 | "\u001b[A" 424 | ] 425 | }, 426 | { 427 | "data": { 428 | "text/html": [ 429 | "
\n", 430 | "\n", 431 | " \n", 432 | " \n", 433 | " \n", 434 | " \n", 435 | " \n", 436 | " \n", 437 | " \n", 438 | " \n", 439 | " \n", 440 | " \n", 441 | " \n", 442 | " \n", 443 | " \n", 444 | " \n", 445 | " \n", 446 | " \n", 447 | " \n", 448 | " \n", 449 | " \n", 450 | " \n", 451 | " \n", 452 | " \n", 453 | " \n", 454 | " \n", 455 | " \n", 456 | " \n", 457 | " \n", 458 | " \n", 459 | " \n", 460 | " \n", 462 | " \n", 463 | " \n", 464 | " \n", 465 | " \n", 466 | " \n", 467 | " \n", 468 | " \n", 469 | " \n", 470 | " \n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | " \n", 560 | " \n", 561 | " \n", 562 | " \n", 563 | " \n", 564 | " \n", 565 | " \n", 566 | " \n", 567 | " \n", 568 | " \n", 569 | " \n", 570 | " \n", 571 | " \n", 572 | "
nameurlsoupba_scorenum_reviewsnum_ratingsravgpdevwantsgotsfor_tradebrewery_namebrewery_loationbrewery_websitebeer_stylestyle_urlabvavailabilityreviews
0Imperial Eclipse Stout - High West Rye/beer/profile/14936/107388/<!DOCTYPE html>\n", 461 | "<html class=\"Public NoJs Logge...92301624.2417.92276417FiftyFifty Brewing Co.Californiahttp://www.fiftyfiftybrewing.comAmerican Double / Imperial Stout/beer/style/157/11.9Fall[ 2013 version poured into a snifter. Comes ou...
1River Horse India Pale Ale/beer/profile/877/138007/<!DOCTYPE html>\n", 484 | "<html class=\"Public NoJs Logge...8314903.6031.940320River Horse Brewing Co.New Jerseyhttp://www.riverhorse.comAmerican IPA/beer/style/116/5.7Year-round[ No date on the bottle, but purchased from th...
2Hibiscus Wit/beer/profile/24428/66018/<!DOCTYPE html>\n", 507 | "<html class=\"Public NoJs Logge...8410573.7516.802702nd Shift BrewingMissourihttp://www.2ndshiftbrewing.comWitbier/beer/style/48/5.2Rotating[ Gold with pure white headLight fresh stone f...
3Kozel/beer/profile/448/5430/<!DOCTYPE html>\n", 530 | "<html class=\"Public NoJs Logge...78591343.3624.702210Pivovar Velké Popovice a.s.Czech RepublicCzech Pilsener/beer/style/40/5.0Year-round[ On tap into a dimple beer mugA - Beer is cle...
4Summer Wheat Ale/beer/profile/12375/61223/<!DOCTYPE html>\n", 553 | "<html class=\"Public NoJs Logge...8419453.7415.24190Mt. Carmel Brewing CompanyOhiohttp://www.mtcarmelbrewingcompany.comAmerican Pale Wheat Ale/beer/style/93/4.8Summer[ A: The beer is slightly hazy light yellow in...
\n", 573 | "
" 574 | ], 575 | "text/plain": [ 576 | " name url \\\n", 577 | "0 Imperial Eclipse Stout - High West Rye /beer/profile/14936/107388/ \n", 578 | "1 River Horse India Pale Ale /beer/profile/877/138007/ \n", 579 | "2 Hibiscus Wit /beer/profile/24428/66018/ \n", 580 | "3 Kozel /beer/profile/448/5430/ \n", 581 | "4 Summer Wheat Ale /beer/profile/12375/61223/ \n", 582 | "\n", 583 | " soup ba_score num_reviews \\\n", 584 | "0 \n", 585 | "\n", 587 | "\n", 589 | "\n", 591 | "\n", 593 | "\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mbeers_reviews\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconcat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdfs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mdfs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 9\u001b[0;31m \u001b[0mbeer_reviews\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_pickle\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'all_beers_reviews.pkl'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 1479 | "\u001b[0;31mNameError\u001b[0m: name 'beer_reviews' is not defined" 1480 | ] 1481 | } 1482 | ], 1483 | "source": [ 1484 | "dfs = [temp]\n", 1485 | "for pkl in tqdm(glob.glob('temp/*.pkl')):\n", 1486 | " temp = pd.read_pickle(pkl)\n", 1487 | " temp.drop('soup',axis='columns',inplace=True)\n", 1488 | " dfs.append(temp)\n", 1489 | " \n", 1490 | "beer_reviews = pd.concat(dfs)\n", 1491 | "dfs = []\n", 1492 | "beer_reviews.to_pickle('all_beers_reviews.pkl')" 1493 | ] 1494 | }, 1495 | { 1496 | "cell_type": "code", 1497 | "execution_count": 20, 1498 | "metadata": { 1499 | "collapsed": false 1500 | }, 1501 | "outputs": [ 1502 | { 1503 | "ename": "NameError", 1504 | "evalue": "name 'beer_reviews' is not defined", 1505 | "output_type": "error", 1506 | "traceback": [ 1507 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 1508 | "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", 1509 | "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mbeer_reviews\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhead\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", 1510 | "\u001b[0;31mNameError\u001b[0m: name 'beer_reviews' is not defined" 1511 | ] 1512 | } 1513 | ], 1514 | "source": [] 1515 | }, 1516 | { 1517 | "cell_type": "code", 1518 | "execution_count": null, 1519 | "metadata": { 1520 | "collapsed": true 1521 | }, 1522 | "outputs": [], 1523 | "source": [] 1524 | } 1525 | ], 1526 | "metadata": { 1527 | "kernelspec": { 1528 | "display_name": "Python 2", 1529 | "language": "python", 1530 | "name": "python2" 1531 | }, 1532 | "language_info": { 1533 | "codemirror_mode": { 1534 | "name": "ipython", 1535 | "version": 2 1536 | }, 1537 | "file_extension": ".py", 1538 | "mimetype": "text/x-python", 1539 | "name": "python", 1540 | "nbconvert_exporter": "python", 1541 | "pygments_lexer": "ipython2", 1542 | "version": "2.7.11" 1543 | } 1544 | }, 1545 | "nbformat": 4, 1546 | "nbformat_minor": 0 1547 | } 1548 | -------------------------------------------------------------------------------- /scrape-all-ba.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 3, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from bs4 import BeautifulSoup\n", 12 | "import string\n", 13 | "import requests\n", 14 | "import pandas as pd\n", 15 | "import re\n", 16 | "import pickle\n", 17 | "\n", 18 | "from fake_useragent import UserAgent\n", 19 | "ua = UserAgent()" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 4, 25 | "metadata": { 26 | "collapsed": true 27 | }, 28 | "outputs": [], 29 | "source": [ 30 | "def remove_punctuation(x):\n", 31 | " x = str(x)\n", 32 | " return x.translate(str.maketrans({a:None for a in string.punctuation}))" 33 | ] 34 | }, 35 | { 36 | "cell_type": "code", 37 | "execution_count": 5, 38 | "metadata": { 39 | "collapsed": true 40 | }, 41 | "outputs": [], 42 | "source": [ 43 | "def get_soup(url, timeout=5):\n", 44 | " headers = {'User-Agent':ua.random}\n", 45 | " try:\n", 46 | " response = requests.get(url,headers=headers)\n", 47 | " except:\n", 48 | " print(\"FAILED \"+ url)\n", 49 | " return 0\n", 50 | " attempts = 0\n", 51 | " while(not response.ok):\n", 52 | " print((url+' failed with code: '+str(response.status_code)))\n", 53 | " if attempts > timeout:\n", 54 | " print(url+' failed with code: '+str(response.status_code))\n", 55 | " return BeautifulSoup('')\n", 56 | " response = requests.get(url)\n", 57 | " attempts += 1\n", 58 | " page = response.text\n", 59 | " soup = BeautifulSoup(page)\n", 60 | " return soup" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": null, 66 | "metadata": { 67 | "collapsed": false 68 | }, 69 | "outputs": [], 70 | "source": [ 71 | "# Get all urls for each beer style\n", 72 | "\n", 73 | "url = 'http://www.beeradvocate.com/beer/style/'\n", 74 | "soup = get_soup(url)\n", 75 | "\n", 76 | "beer_styles = {}\n", 77 | "for style in soup.find('table').find_all('a'):\n", 78 | " beer_styles[style.get_text()] = style['href']" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": null, 84 | "metadata": { 85 | "collapsed": false 86 | }, 87 | "outputs": [], 88 | "source": [ 89 | "ba_url = 'http://www.beeradvocate.com'\n", 90 | "style_suffix = '?sort=revsD&start=0'\n", 91 | "soup = get_soup(ba_url+style_url+style_suffix)" 92 | ] 93 | }, 94 | { 95 | "cell_type": "code", 96 | "execution_count": null, 97 | "metadata": { 98 | "collapsed": false 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "int(re.findall(r'(?<=\\(out of )\\d*',soup.find('tr').get_text())[0])" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": { 118 | "collapsed": false 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "beer_styles.items()[0]" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": { 129 | "collapsed": true 130 | }, 131 | "outputs": [], 132 | "source": [ 133 | "# Pulls the name and url to all beers with more than 25 'hads'\n", 134 | "\n", 135 | "ba_url = 'http://www.beeradvocate.com'\n", 136 | "style_suffix = '?sort=revsD&start='\n", 137 | "columns = ['name','url']\n", 138 | "temp = {}\n", 139 | "for style in beer_styles.items():\n", 140 | " print(url)\n", 141 | " url = ba_url+style[1]+style_suffix\n", 142 | " soup = get_soup(url)\n", 143 | " num_beers = int(re.findall(r'(?<=\\(out of )\\d*',soup.find('tr').get_text())[0])\n", 144 | " print num_beers\n", 145 | " min_beer = False\n", 146 | " for i in range(num_beers//50):\n", 147 | " if min_beer:\n", 148 | " break\n", 149 | " url = ba_url+style[1]+style_suffix+str(i*50)\n", 150 | " soup = get_soup(url)\n", 151 | " for row in soup.find_all('tr')[3:-1]:\n", 152 | " cells = row.find_all('td')\n", 153 | " if int(cells[4].get_text().replace(',','')) < 25:\n", 154 | " min_beer = True\n", 155 | " break\n", 156 | " temp[cells[0].find('a').get_text()] = cells[0].find('a')['href']" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": 7, 162 | "metadata": { 163 | "collapsed": true 164 | }, 165 | "outputs": [ 166 | { 167 | "name": "stdout", 168 | "output_type": "stream", 169 | "text": [ 170 | "538\n" 171 | ] 172 | }, 173 | { 174 | "data": { 175 | "text/plain": [ 176 | "{u'1554 Black Lager': '/beer/profile/192/111828/',\n", 177 | " u'3 Best Friends': '/beer/profile/219/116077/',\n", 178 | " u'Alhambra Negra': '/beer/profile/9262/22584/',\n", 179 | " u'Asahi Dry Black': '/beer/profile/716/89545/',\n", 180 | " u'B.B. Dark Bohemia Beer - 1795 Original Czech Dark Lager': '/beer/profile/303/37361/',\n", 181 | " u'Baltika #4 Original (Dark)': '/beer/profile/401/2235/',\n", 182 | " u'Beerlao Dark': '/beer/profile/2970/27607/',\n", 183 | " u'Bernard Cerne': '/beer/profile/2055/21521/',\n", 184 | " u'Big Rock Honey Brown Lager': '/beer/profile/391/11175/',\n", 185 | " u'Black Lager (\\u010cern\\xe9 Pivo)': '/beer/profile/22723/76132/',\n", 186 | " u'Black Licorice Lager': '/beer/profile/9629/37147/',\n", 187 | " u'Bohemia': '/beer/profile/301/7078/',\n", 188 | " u'Bohemia Regent Lager Dark': '/beer/profile/7366/14349/',\n", 189 | " u'Bony Fingers': '/beer/profile/763/20446/',\n", 190 | " u'Brick Waterloo Dark Lager': '/beer/profile/416/5196/',\n", 191 | " u'Budweiser Budvar Czech Dark Lager': '/beer/profile/304/35967/',\n", 192 | " u'California Black Beer': '/beer/profile/8818/40370/',\n", 193 | " u'Celestial Meridian Cascadian Dark Lager': '/beer/profile/29619/118880/',\n", 194 | " u'Dark 266': '/beer/profile/3912/27684/',\n", 195 | " u'Death & Taxes Black Beer': '/beer/profile/763/2306/',\n", 196 | " u'Efes Dark': '/beer/profile/569/12362/',\n", 197 | " u'El Steinber Dark Lager': '/beer/profile/193/83646/',\n", 198 | " u'Elevator Dark Horse Lager': '/beer/profile/1464/40147/',\n", 199 | " u'Faxe Amber': '/beer/profile/783/11961/',\n", 200 | " u'Fischer Tradition Amber': '/beer/profile/197/710/',\n", 201 | " u'Fix Dark': '/beer/profile/3963/85171/',\n", 202 | " u'Gigi': '/beer/profile/29250/115954/',\n", 203 | " u'Gran\\xe1t - BrouCzech Dark': '/beer/profile/21333/58010/',\n", 204 | " u'Guinness Black Lager': '/beer/profile/209/57285/',\n", 205 | " u'Heineken Dark Lager': '/beer/profile/81/1167/',\n", 206 | " u'Heineken Oud Bruin': '/beer/profile/81/4087/',\n", 207 | " u'Hot Rocks Lager': '/beer/profile/1337/48508/',\n", 208 | " u'John Michael Dark Lyric Lagrrr!': '/beer/profile/30452/98186/',\n", 209 | " u'Kelso Nut Brown Lager': '/beer/profile/8768/33357/',\n", 210 | " u'Kilikia Dark': '/beer/profile/671/2061/',\n", 211 | " u'LTD Series - 06': '/beer/profile/5316/86044/',\n", 212 | " u'Layla Dirty Blonde Lager': '/beer/profile/1939/12687/',\n", 213 | " u\"Leinenkugel's Creamy Dark\": '/beer/profile/710/2940/',\n", 214 | " u'Lev Black Lion': '/beer/profile/168/9395/',\n", 215 | " u\"McSorley's Dark Lager\": '/beer/profile/447/42663/',\n", 216 | " u'Moa Noir': '/beer/profile/15922/36001/',\n", 217 | " u'Mythos Red': '/beer/profile/1084/45087/',\n", 218 | " u'Nightfall Lager': '/beer/profile/32426/97943/',\n", 219 | " u'Obolon Oksamytove (Deep Velvet)': '/beer/profile/601/35035/',\n", 220 | " u'Oldgott': '/beer/profile/12215/25835/',\n", 221 | " u'Palone': '/beer/profile/568/25468/',\n", 222 | " u'Pietra': '/beer/profile/2977/7195/',\n", 223 | " u'Pils Noir': '/beer/profile/29196/86001/',\n", 224 | " u'Podkovan Dark': '/beer/profile/11206/28049/',\n", 225 | " u'Praga Dark Lager': '/beer/profile/303/82289/',\n", 226 | " u'Primator Dark Lager': '/beer/profile/707/14874/',\n", 227 | " u'Sagres Cerveja Preta (Dark)': '/beer/profile/301/6187/',\n", 228 | " u'San Miguel Dark Lager': '/beer/profile/355/7102/',\n", 229 | " u'Saranac Chocolate Amber Lager': '/beer/profile/99/6796/',\n", 230 | " u'Schwarzer Kristall': '/beer/profile/5687/45326/',\n", 231 | " u'Silva Strong Dark Beer': '/beer/profile/1705/4808/',\n", 232 | " u'St. Pauli Girl Special Dark': '/beer/profile/224/698/',\n", 233 | " u'Staropramen Granat Beer': '/beer/profile/437/14167/',\n", 234 | " u'Staropramen \\u010cern\\xfd': '/beer/profile/437/8689/',\n", 235 | " u'Super Bock Stout': '/beer/profile/439/37864/',\n", 236 | " u'Telenn Du': '/beer/profile/2520/6245/',\n", 237 | " u'Tomislav Pivo': '/beer/profile/1720/42822/',\n", 238 | " u'Tooheys Red': '/beer/profile/839/4001/',\n", 239 | " u\"Trafalgar Paddy's Irish Red Lager\": '/beer/profile/765/8590/',\n", 240 | " u'U Fleku Dark Lager': '/beer/profile/2805/6614/',\n", 241 | " u'U Rousse': '/beer/profile/22/2054/',\n", 242 | " u'Wolverine Dark': '/beer/profile/24808/72163/',\n", 243 | " u'Yebisu Black Beer': '/beer/profile/284/11490/',\n", 244 | " u'\\x8e\\u017datec Dark Lager': '/beer/profile/4106/57920/'}" 245 | ] 246 | }, 247 | "execution_count": 7, 248 | "metadata": {}, 249 | "output_type": "execute_result" 250 | } 251 | ], 252 | "source": [ 253 | "# Pulls the name and url to all beers with more than 25 'hads'\n", 254 | "\n", 255 | "ba_url = 'http://www.beeradvocate.com'\n", 256 | "style_suffix = '?sort=revsD&start='\n", 257 | "columns = ['name','url']\n", 258 | "temp = {}\n", 259 | "\n", 260 | "url = ba_url+'/beer/style/149/'+style_suffix\n", 261 | "soup = get_soup(url)\n", 262 | "num_beers = int(re.findall(r'(?<=\\(out of )\\d*',soup.find('tr').get_text())[0])\n", 263 | "print num_beers\n", 264 | "min_beer = False\n", 265 | "for i in range(num_beers//50):\n", 266 | " if min_beer:\n", 267 | " break\n", 268 | " url = ba_url+'/beer/style/149/'+style_suffix+str(i*50)\n", 269 | " soup = get_soup(url)\n", 270 | " for row in soup.find_all('tr')[3:-1]:\n", 271 | " cells = row.find_all('td')\n", 272 | " if int(cells[4].get_text().replace(',','')) < 25:\n", 273 | " min_beer = True\n", 274 | " break\n", 275 | " temp[cells[0].find('a').get_text()] = cells[0].find('a')['href']\n", 276 | " \n", 277 | "temp" 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": null, 283 | "metadata": { 284 | "collapsed": false 285 | }, 286 | "outputs": [], 287 | "source": [ 288 | "pickle.dump(temp,open('beer_list.pkl','wb'))" 289 | ] 290 | }, 291 | { 292 | "cell_type": "code", 293 | "execution_count": null, 294 | "metadata": { 295 | "collapsed": false 296 | }, 297 | "outputs": [], 298 | "source": [ 299 | "beer_urls = pickle.load(open('beer_list.pkl','rb'))" 300 | ] 301 | }, 302 | { 303 | "cell_type": "code", 304 | "execution_count": null, 305 | "metadata": { 306 | "collapsed": false 307 | }, 308 | "outputs": [], 309 | "source": [ 310 | "columns = ['name','url']\n", 311 | "beers = pd.DataFrame(columns=columns)\n", 312 | "\n", 313 | "beers['name'] = beer_urls.keys()\n", 314 | "beers['url'] = beer_urls.values()\n", 315 | "pd.to_pickle(beers,'beers.pkl')" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": 8, 321 | "metadata": { 322 | "collapsed": true 323 | }, 324 | "outputs": [], 325 | "source": [ 326 | "beers = pd.read_pickle('beers.pkl')" 327 | ] 328 | }, 329 | { 330 | "cell_type": "code", 331 | "execution_count": 9, 332 | "metadata": { 333 | "collapsed": false 334 | }, 335 | "outputs": [ 336 | { 337 | "data": { 338 | "text/html": [ 339 | "
\n", 340 | "\n", 341 | " \n", 342 | " \n", 343 | " \n", 344 | " \n", 345 | " \n", 346 | " \n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | "
nameurl
2299Death & Taxes Black Beer/beer/profile/763/2306/
\n", 356 | "
" 357 | ], 358 | "text/plain": [ 359 | " name url\n", 360 | "2299 Death & Taxes Black Beer /beer/profile/763/2306/" 361 | ] 362 | }, 363 | "execution_count": 9, 364 | "metadata": {}, 365 | "output_type": "execute_result" 366 | } 367 | ], 368 | "source": [ 369 | "beers[beers.name == 'Death & Taxes Black Beer']" 370 | ] 371 | }, 372 | { 373 | "cell_type": "code", 374 | "execution_count": 18, 375 | "metadata": { 376 | "collapsed": false 377 | }, 378 | "outputs": [], 379 | "source": [ 380 | "def get_beer_soup(url):\n", 381 | " ba_url = 'http://www.beeradvocate.com'\n", 382 | " url = ba_url+url\n", 383 | " soup = get_soup(url)\n", 384 | " return soup" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": 20, 390 | "metadata": { 391 | "collapsed": false 392 | }, 393 | "outputs": [], 394 | "source": [ 395 | "import sys\n", 396 | "sys.setrecursionlimit(100000000)" 397 | ] 398 | }, 399 | { 400 | "cell_type": "code", 401 | "execution_count": 31, 402 | "metadata": { 403 | "collapsed": true 404 | }, 405 | "outputs": [ 406 | { 407 | "name": "stderr", 408 | "output_type": "stream", 409 | "text": [ 410 | "\r", 411 | " 0%| | 0/89 [00:00\n", 1629 | "\n", 1630 | " \n", 1631 | " \n", 1632 | " \n", 1633 | " \n", 1634 | " \n", 1635 | " \n", 1636 | " \n", 1637 | " \n", 1638 | " \n", 1639 | " \n", 1640 | " \n", 1641 | " \n", 1642 | " \n", 1643 | " \n", 1645 | " \n", 1646 | " \n", 1647 | " \n", 1648 | " \n", 1649 | " \n", 1650 | " \n", 1652 | " \n", 1653 | " \n", 1654 | " \n", 1655 | " \n", 1656 | " \n", 1657 | " \n", 1659 | " \n", 1660 | " \n", 1661 | " \n", 1662 | " \n", 1663 | " \n", 1664 | " \n", 1666 | " \n", 1667 | " \n", 1668 | " \n", 1669 | " \n", 1670 | " \n", 1671 | " \n", 1673 | " \n", 1674 | " \n", 1675 | "
nameurlsoup
229Franconia Amber/beer/profile/17033/43115/<!DOCTYPE html>\n", 1644 | "<html class=\"Public NoJs Logge...
459Big American Stout/beer/profile/26824/113395/<!DOCTYPE html>\n", 1651 | "<html class=\"Public NoJs Logge...
689Love's Armor/beer/profile/28019/90275/<!DOCTYPE html>\n", 1658 | "<html class=\"Public NoJs Logge...
919Deadeye Jack Porter/beer/profile/19126/47303/<!DOCTYPE html>\n", 1665 | "<html class=\"Public NoJs Logge...
1149Peach Lambic/beer/profile/1170/45334/<!DOCTYPE html>\n", 1672 | "<html class=\"Public NoJs Logge...
\n", 1676 | "" 1677 | ], 1678 | "text/plain": [ 1679 | " name url \\\n", 1680 | "229 Franconia Amber /beer/profile/17033/43115/ \n", 1681 | "459 Big American Stout /beer/profile/26824/113395/ \n", 1682 | "689 Love's Armor /beer/profile/28019/90275/ \n", 1683 | "919 Deadeye Jack Porter /beer/profile/19126/47303/ \n", 1684 | "1149 Peach Lambic /beer/profile/1170/45334/ \n", 1685 | "\n", 1686 | " soup \n", 1687 | "229 \n", 1688 | "\n", 1690 | "\n", 1692 | "\n", 1694 | "\n", 1696 | "