├── LICENSE ├── README.md ├── bfscraper ├── bfscraper.py └── bfscraper_place.py └── setup.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Antony Papadimitriou 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # bfscraper 2 | 3 | --- 4 | 5 | The package scrapes Betfair price data (BSP, WAP etc) for Australian horse racing markets. 6 | 7 | --- 8 | 9 | You can use the package in the following way: 10 | 11 | ```python 12 | from bfscraper import bfscraper 13 | 14 | df = bfscraper.scrape("2018-01-01", "2018-12-31") 15 | 16 | print(df) 17 | ``` 18 | 19 | --- 20 | 21 | The above code would scrape the relevant data from January 1, 2018 till December 31, 2018. 22 | 23 | The data would be returned in a Pandas dataframe format which can then be exported to a CSV or other file formats. 24 | 25 | There may be scope in the future to expand this package to other jurisdictions and racing codes. 26 | -------------------------------------------------------------------------------- /bfscraper/bfscraper.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from datetime import timedelta, date, datetime 3 | 4 | def daterange(start_date, end_date): 5 | for n in range(int(((end_date - start_date).days) + 1)): 6 | yield start_date + timedelta(n) 7 | 8 | def scrape(start_date, end_date): 9 | start_date = datetime.strptime(start_date, "%Y-%m-%d").date() 10 | end_date = datetime.strptime(end_date, "%Y-%m-%d").date() 11 | base_url = "https://promo.betfair.com/betfairsp/prices/dwbfpricesauswin" 12 | 13 | column_names = ['EVENT_ID', 14 | 'MENU_HINT', 15 | 'EVENT_NAME', 16 | 'EVENT_DT', 17 | 'SELECTION_ID', 18 | 'SELECTION_NAME', 19 | 'WIN_LOSE', 20 | 'BSP', 21 | 'PPWAP', 22 | 'MORNINGWAP', 23 | 'PPMAX', 24 | 'PPMIN', 25 | 'IPMAX', 26 | 'IPMIN', 27 | 'MORNINGTRADEDVOL', 28 | 'PPTRADEDVOL', 29 | 'IPTRADEDVOL'] 30 | 31 | bsp_data_full = pd.DataFrame(columns=column_names) 32 | 33 | for single_date in daterange(start_date, end_date): 34 | print("Processing " + str(single_date)) 35 | try: 36 | url = str(base_url) + str(single_date.strftime("%d%m%Y")) + ".csv" 37 | current_day_df = pd.read_csv(url) 38 | bsp_data_full = pd.concat([current_day_df, bsp_data_full], ignore_index=True, sort=False) 39 | except: 40 | pass 41 | 42 | return bsp_data_full -------------------------------------------------------------------------------- /bfscraper/bfscraper_place.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from datetime import timedelta, date, datetime 3 | 4 | def daterange(start_date, end_date): 5 | for n in range(int(((end_date - start_date).days) + 1)): 6 | yield start_date + timedelta(n) 7 | 8 | def scrape(start_date, end_date): 9 | start_date = datetime.strptime(start_date, "%Y-%m-%d").date() 10 | end_date = datetime.strptime(end_date, "%Y-%m-%d").date() 11 | base_url = "https://promo.betfair.com/betfairsp/prices/dwbfpricesausplace" 12 | 13 | column_names = ['EVENT_ID', 14 | 'MENU_HINT', 15 | 'EVENT_NAME', 16 | 'EVENT_DT', 17 | 'SELECTION_ID', 18 | 'SELECTION_NAME', 19 | 'WIN_LOSE', 20 | 'BSP', 21 | 'PPWAP', 22 | 'MORNINGWAP', 23 | 'PPMAX', 24 | 'PPMIN', 25 | 'IPMAX', 26 | 'IPMIN', 27 | 'MORNINGTRADEDVOL', 28 | 'PPTRADEDVOL', 29 | 'IPTRADEDVOL'] 30 | 31 | bsp_data_full = pd.DataFrame(columns=column_names) 32 | 33 | for single_date in daterange(start_date, end_date): 34 | print("Processing " + str(single_date)) 35 | try: 36 | url = str(base_url) + str(single_date.strftime("%d%m%Y")) + ".csv" 37 | current_day_df = pd.read_csv(url) 38 | bsp_data_full = pd.concat([current_day_df, bsp_data_full], ignore_index=True, sort=False) 39 | except: 40 | pass 41 | 42 | return bsp_data_full -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | with open("README.md", "r") as fh: 4 | long_description = fh.read() 5 | 6 | setuptools.setup( 7 | name="bfscraper", 8 | version="0.1.1", 9 | author="Antony Papadimitriou", 10 | author_email="antony.papadimitriou@icloud.com", 11 | description="Scrapes Betfair price data for Australian horse racing", 12 | long_description=long_description, 13 | long_description_content_type="text/markdown", 14 | url="https://github.com/apapadimitriou/bfscraper", 15 | packages=setuptools.find_packages(), 16 | classifiers=[ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ], 21 | ) --------------------------------------------------------------------------------