├── .gitignore ├── LICENSE └── scrape.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Jesse Presnell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scrape.py: -------------------------------------------------------------------------------- 1 | from lxml import html 2 | import requests 3 | import json, re 4 | import urllib2 5 | import psycopg2 6 | from datetime import datetime 7 | import sys 8 | import codecs 9 | import pytz 10 | import sqlalchemy as sa 11 | import MySQLdb 12 | from bs4 import BeautifulSoup 13 | 14 | 15 | eastern = pytz.timezone('US/Eastern') 16 | 17 | def connect_local(): 18 | conn = MySQLdb.connect(host='', user='', passwd='', db='') 19 | cur = conn.cursor() 20 | cur.execute("create table if not exists contracts (id integer primary key AUTO_INCREMENT, ticker_id varchar(255) NOT NULL, contract_ticker_symbol varchar(255), contract_name varchar(255), contract_date_start varchar(255), contract_date_end varchar(255))") 21 | cur.execute("create table if not exists tickers (ticker_id integer primary key, ticker_symbol varchar(255) NOT NULL unique, ticker_name varchar(255), ticker_short_name varchar(255), ticker_image varchar(255), ticker_timestamp varchar(255), ticker_status varchar(255));") 22 | cur.execute("create table if not exists contract_data (id INTEGER primary key AUTO_INCREMENT, contract_ticker_symbol varchar(255), ticker_timestamp varchar(255), contract_traded INTEGER,contract_today_volume INTEGER,contract_total_shares INTEGER,contract_todays_change varchar(20))") 23 | cur.execute("create table if not exists contract_offers (ticker_id integer, contract_ticker_symbol varchar(255), contract_short_name varchar(255), contract_last_trade_price REAL, contract_best_buy_yes REAL, contract_best_buy_no REAL, contract_best_sell_yes REAL, contract_best_sell_no REAL, contract_last_close_price REAL, ticker_timestamp varchar(255))") 24 | return cur, conn 25 | 26 | def get_tickers(): 27 | page = re=json.load(urllib2.urlopen('https://www.predictit.org/api/marketdata/all/')) 28 | market_data=page['Markets'] 29 | return market_data 30 | 31 | def get_all_data(l): 32 | for x in l: 33 | ticker_name=x['Name'].replace(u"\u2018", "'").replace(u"\u2019", "'") 34 | ticker_short_name=x['ShortName'].replace(u"\u2018", "'").replace(u"\u2019", "'") 35 | ticker_symbol=x['TickerSymbol'] 36 | ticker_image=x['Image'] 37 | ticker_timestamp=x['TimeStamp'] 38 | ticker_status=x['Status'] 39 | ticker_id=x['ID'] 40 | cur.execute("INSERT IGNORE INTO tickers (ticker_name, ticker_short_name, ticker_symbol, ticker_image, ticker_timestamp, ticker_status, ticker_id) values (%s, %s, %s, %s, %s, %s, %s)",(ticker_name, ticker_short_name, ticker_symbol, ticker_image, ticker_timestamp, ticker_status, ticker_id,)) 41 | for c in x['Contracts']: 42 | contract_id=c['ID'] 43 | contract_status=c['Status'] 44 | contract_name=c['Name'].replace(u"\u2018", "'").replace(u"\u2019", "'") 45 | contract_url=c['URL'] 46 | contract_last_trade_price=c['LastTradePrice'] 47 | contract_best_buy_yes=c['BestBuyYesCost'] 48 | contract_best_buy_no=c['BestBuyNoCost'] 49 | contract_best_sell_yes=c['BestSellYesCost'] 50 | contract_best_sell_no=c['BestSellNoCost'] 51 | contract_short_name=c['ShortName'].replace(u"\u2018", "'").replace(u"\u2019", "'") 52 | contract_ticker_symbol=c['TickerSymbol'] 53 | contract_last_close_price=c['LastClosePrice'] 54 | try: 55 | response=urllib2.urlopen('https://www.predictit.org/Ticker/'+c['TickerSymbol']) 56 | html=response.read() 57 | soup=BeautifulSoup(html,'lxml') 58 | contract_date_start=soup.find('td',text=re.compile("Start Date:")).next_sibling.next_sibling.string 59 | contract_traded=soup.find('td',text=re.compile("Shares Traded:")).next_sibling.next_sibling.string.replace(",", "") 60 | contract_today_volume=soup.find('td',text=re.compile("Today's Volume:")).next_sibling.next_sibling.string.replace(",", "") 61 | contract_total_shares=soup.find('td',text=re.compile("Total Shares:")).next_sibling.next_sibling.string.replace(",", "") 62 | contract_todays_change=soup.find('td',text=re.compile("Today's Change:")).next_sibling.next_sibling.string.replace("+", "") 63 | cur.execute("INSERT INTO contract_data (contract_ticker_symbol, ticker_timestamp, contract_traded, contract_today_volume, contract_total_shares, contract_todays_change) values (%s, %s, %s, %s, %s, %s)",(contract_ticker_symbol, ticker_timestamp, contract_traded, contract_today_volume, contract_total_shares, contract_todays_change,)) 64 | if contract_todays_change=='NC': 65 | contract_todays_change=0.0 66 | if c['DateEnd']!="N/A": 67 | cur.execute("insert into contracts(ticker_id, contract_ticker_symbol, contract_name, contract_date_start,contract_date_end) values (%s, %s, %s, %s, %s))",(ticker_id, contract_ticker_symbol, contract_name, contract_date_start,contract_date_end,)) 68 | else: 69 | cur.execute("insert into contracts(ticker_id, contract_ticker_symbol, contract_name, contract_date_start) values (%s, %s, %s, %s)",(ticker_id, contract_ticker_symbol, contract_name, contract_date_start,)) 70 | except: 71 | try: 72 | cur.execute("insert into contracts(ticker_id, contract_ticker_symbol, contract_name, contract_date_end) values (%s, %s, %s, %s))",(ticker_id, contract_ticker_symbol, contract_name,contract_date_end,)) 73 | except: 74 | cur.execute("insert into contracts(ticker_id, contract_ticker_symbol, contract_name) values (%s, %s, %s)",(ticker_id, contract_ticker_symbol, contract_name,)) 75 | pass 76 | cur.execute("INSERT INTO contract_offers (ticker_id, contract_ticker_symbol, contract_short_name, contract_last_trade_price, contract_best_buy_yes, contract_best_buy_no, contract_best_sell_yes, contract_best_sell_no, contract_last_close_price, ticker_timestamp) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)", (ticker_id, contract_ticker_symbol, contract_short_name, contract_last_trade_price, contract_best_buy_yes, contract_best_buy_no, contract_best_sell_yes, contract_best_sell_no, contract_last_close_price, ticker_timestamp, )) 77 | conn.commit() 78 | cur, conn=connect_local() 79 | x=get_tickers() 80 | get_all_data(x) 81 | print "Data Uploaded" 82 | --------------------------------------------------------------------------------