├── .gitattributes ├── .gitignore ├── LICENSE.txt ├── README.md ├── pyproject.toml ├── requirements.txt └── ufc.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .google-cookie 2 | .ipynb 3 | __pycache__ 4 | .ipynb_checkpoints 5 | ufc_api.egg-info 6 | dist -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Zackary Young 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # UFC API 2 | 3 | UFC API is a lightweight web crawler built in Python to retrieve data on UFC fighters and events. 4 | 5 | # Installation 6 | 7 | You can install UFC API using pip: 8 | 9 | ``` 10 | pip install ufc_api 11 | ``` 12 | 13 | # Usage 14 | 15 | Usage is simple. To get stats on a particular fighter returned as a json: 16 | 17 | ``` 18 | >>> from ufc import get_fighter 19 | 20 | >>> get_fighter('Jon Jones') 21 | 22 | >>> {'name': 'Jon Jones', 23 | 'nickname': 'Bones', 24 | 'nationality': 'United States', 25 | 'birthplace': 'Rochester, New York', 26 | 'birthdate': 'Jul 19, 1987', 27 | 'age': '35', 28 | 'height': '6\'4"', 29 | 'weight': '248 lbs', 30 | 'association': 'Jackson-Wink MMA', 31 | 'weight_class': 'Heavyweight', 32 | 'wins': {'total': '27', 33 | 'ko/tko': '10', 34 | 'submissions': '7', 35 | 'decisions': '10', 36 | 'others': '0'}, 37 | 'losses': {'total': '1', 38 | 'ko/tko': '0', 39 | 'submissions': '0', 40 | 'decisions': '0', 41 | 'others': '1'}, 42 | 'fights': [{'name': 'UFC 285 - Jones vs. Gane', 43 | 'date': 'Mar / 04 / 2023', 44 | 'url': 'https://www.sherdog.com/events/UFC-285-Jones-vs-Gane-95232', 45 | 'result': 'win', 46 | 'method': 'Submission (Guillotine Choke)', 47 | 'referee': 'Marc Goddard', 48 | 'round': '1', 49 | 'time': '2:04', 50 | 'opponent': 'Ciryl Gane'}, 51 | ... 52 | 53 | ``` 54 | 55 | To get data on an event, the usage is similar: 56 | 57 | ``` 58 | >>> from ufc import get_event 59 | 60 | >>> get_event('UFC 280') 61 | 62 | >>> {'name': 'UFC 280: Oliveira vs. Makhachev', 63 | 'date': '2022-10-22', 64 | 'location': 'Yas Island/Yas West United Arab Emirates', 65 | 'venue': 'Etihad Arena', 66 | 'fights': [{'weightclass': 'Lightweight Title', 67 | 'red corner': {'name': 'Charles Oliveira', 68 | 'ranking': 'Unranked', 69 | 'odds': '+165', 70 | 'link': 'https://www.ufc.com/athlete/charles-oliveira', 71 | 'result': 'Loss'}, 72 | 'blue corner': {'name': 'Islam Makhachev', 73 | 'ranking': 'Unranked', 74 | 'odds': '-195', 75 | 'link': 'https://www.ufc.com/athlete/islam-makhachev', 76 | 'result': 'Win'}, 77 | 'round': '2', 78 | 'time': '3:16', 79 | 'method': 'Submission'}, 80 | {'weightclass': 'Bantamweight Title', 81 | 'red corner': {'name': 'Aljamain Sterling', 82 | 'ranking': 'Unranked', 83 | 'odds': '-175', 84 | 'link': 'https://www.ufc.com/athlete/aljamain-sterling', 85 | 'result': 'Win'}, 86 | 'blue corner': {'name': 'TJ Dillashaw', 87 | 'ranking': 'Unranked', 88 | 'odds': '+150', 89 | 'link': 'https://www.ufc.com/athlete/tj-dillashaw', 90 | 'result': 'Loss'}, 91 | 'round': '2', 92 | 'time': '3:44', 93 | 'method': 'KO/TKO'}, 94 | {'weightclass': 'Bantamweight', 95 | 'red corner': {'name': 'Petr Yan', 96 | 'ranking': 'Unranked', 97 | 'odds': '-275', 98 | 'link': 'https://www.ufc.com/athlete/petr-yan', 99 | 'result': 'Loss'}, 100 | 'blue corner': {'name': "Sean O'Malley", 101 | 'ranking': 'Unranked', 102 | 'odds': '+230', 103 | 'link': 'https://www.ufc.com/athlete/sean-omalley', 104 | 'result': 'Win'}, 105 | 'round': '3', 106 | 'time': '5:00', 107 | 'method': 'Decision - Split'}, 108 | ... 109 | 110 | ``` 111 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "ufc_api" 7 | version = "0.0.1" 8 | authors = [ 9 | { name="Zackary Young", email="zackyoung454@gmail.com" }, 10 | ] 11 | description = "A web crawler and parser for data on UFC fighters and events" 12 | readme = "README.md" 13 | requires-python = ">=3.7" 14 | classifiers = [ 15 | "Programming Language :: Python :: 3", 16 | "License :: OSI Approved :: MIT License", 17 | "Operating System :: OS Independent", 18 | ] 19 | 20 | [project.urls] 21 | "Homepage" = "https://github.com/FritzCapuyan/ufc-api" -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | lxml==4.9.1 2 | Requests==2.31.0 3 | -------------------------------------------------------------------------------- /ufc.py: -------------------------------------------------------------------------------- 1 | import requests as req 2 | from lxml import html 3 | import datetime as dt 4 | import math 5 | import re 6 | 7 | def parse_sherdog_fighter(url): 8 | headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"} 9 | htm = req.get(url, headers = headers) 10 | xml = html.document_fromstring(htm.content) 11 | 12 | 13 | wins_detailed = xml.xpath("//div[@class='wins']/div[@class='meter']/div[1]/text()") 14 | losses_detailed = xml.xpath("//div[@class='loses']/div[@class='meter']/div[1]/text()") 15 | bio = xml.xpath("//div[@class='fighter-info']")[0] 16 | 17 | try: 18 | other_wins = wins_detailed[3] 19 | other_losses = losses_detailed[3] 20 | except IndexError: 21 | other_wins = '0' 22 | other_losses = '0' 23 | 24 | fighter = { 25 | 'name' : xml.xpath("//span[@class='fn']/text()")[0], 26 | 'nickname' : bio.xpath("//span[@class='nickname']/em/text()")[0], 27 | 'nationality' : bio.xpath("//strong[@itemprop='nationality']/text()")[0], 28 | 'birthplace' : xml.xpath("//span[@class='locality']/text()")[0], 29 | 'birthdate' : xml.xpath("//span[@itemprop='birthDate']/text()")[0], 30 | 'age' : xml.xpath("//span[@itemprop='birthDate']/preceding-sibling::b/text()")[0], 31 | 'height' : xml.xpath("//b[@itemprop='height']/text()")[0], 32 | 'weight' : xml.xpath("//b[@itemprop='weight']/text()")[0], 33 | 'association' : xml.xpath("//span[@itemprop='memberOf']/a/span/text()")[0], 34 | 'weight_class' : xml.xpath("//div[@class='association-class']/a/text()")[0], 35 | 36 | 'wins' : { 37 | 'total': xml.xpath("//div[@class='winloses win']/span[2]/text()")[0], 38 | 'ko/tko': wins_detailed[0], 39 | 'submissions':wins_detailed[1], 40 | 'decisions':wins_detailed[2], 41 | 'others': other_wins 42 | }, 43 | 'losses' : { 44 | 'total': xml.xpath("//div[@class='winloses lose']/span[2]/text()")[0], 45 | 'ko/tko': losses_detailed[0], 46 | 'submissions':losses_detailed[1], 47 | 'decisions':losses_detailed[2], 48 | 'others':other_losses 49 | }, 50 | 51 | 'fights' : [] 52 | } 53 | 54 | fight_rows = xml.xpath("//table[@class='new_table fighter']/tr[not(@class='table_head')]") 55 | 56 | for row in fight_rows: 57 | try: 58 | referee = row.xpath("td[4]/span/a/text()")[0] 59 | except IndexError: 60 | referee = "" 61 | 62 | fight = { 63 | 'name': row.xpath("td[3]/a/descendant-or-self::*/text()")[0], 64 | 'date': row.xpath("td[3]/span/text()")[0], 65 | 'url': "https://www.sherdog.com" + row.xpath("td[3]/a/@href")[0], 66 | 'result': row.xpath("td[1]/span/text()")[0], 67 | 'method': row.xpath("td[4]/b/text()")[0], 68 | 'referee': referee, 69 | 'round': row.xpath("td[5]/text()")[0], 70 | 'time': row.xpath("td[6]/text()")[0], 71 | 'opponent': row.xpath("td[2]/a/text()")[0] 72 | } 73 | fighter['fights'].append(fight) 74 | return fighter 75 | 76 | def get_ufc_stats(url): 77 | headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"} 78 | htm = req.get(url, headers = headers) 79 | xml = html.document_fromstring(htm.content) 80 | 81 | distance = xml.xpath("//div[@class='c-stat-3bar__value']/text()") 82 | stats = xml.xpath("//div[@class='c-stat-compare__number']/text()") 83 | 84 | str_tds = [] 85 | for item in xml.xpath("//dd"): 86 | if item.text is not None: 87 | str_tds.append(item.text) 88 | else: 89 | str_tds.append("0") 90 | 91 | fighter = { 92 | 'strikes': { 93 | 'attempted': str_tds[1], 94 | 'landed': str_tds[0], 95 | 'standing': distance[0].split(" ")[0], 96 | 'clinch': distance[1].split(" ")[0], 97 | 'ground': distance[2].split(" ")[0], 98 | 'striking defense': stats[4].strip(), 99 | 'strikes per minute': stats[0].strip() 100 | }, 101 | 'takedowns': { 102 | 'attempted': str_tds[3], 103 | 'landed': str_tds[2], 104 | 'takedown defense':stats[5].strip(), 105 | 'subs per 15min': stats[3].strip() 106 | } 107 | } 108 | return fighter 109 | 110 | def search(query): 111 | url = 'https://www.google.com/search?q=' + query 112 | headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"} 113 | htm = req.get(url, headers = headers) 114 | xml = html.document_fromstring(htm.content) 115 | return xml.xpath("//h3/parent::a/@href") 116 | 117 | def get_sherdog_link(query): 118 | possible_urls = search(query+" Sherdog") 119 | for url in possible_urls: 120 | if ("sherdog.com/fighter/" in url) and (not "/news/" in url): 121 | return url 122 | raise BaseException("Sherdog link not found !") 123 | 124 | def get_ufc_link(query): 125 | possible_urls = search(query+" UFC.com") 126 | for url in possible_urls: 127 | if ("ufc.com/athlete/" in url): 128 | return url 129 | raise BaseException("UFC link not found !") 130 | 131 | def get_fighter(query): 132 | sherdog_link = get_sherdog_link(query) 133 | ufc_link = get_ufc_link(query) 134 | 135 | fighter = parse_sherdog_fighter(sherdog_link) 136 | fighter.update(get_ufc_stats(ufc_link)) 137 | return fighter 138 | 139 | 140 | def get_upcoming_event_links(): 141 | url = 'https://www.ufc.com/events' 142 | headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"} 143 | htm = req.get(url, headers = headers) 144 | xml = html.document_fromstring(htm.content) 145 | return ["https://www.ufc.com/"+x for x in xml.xpath("//details[@id='events-list-upcoming']/div/div/div/div/div/section/ul/li/article/div[1]/div/a/@href")] 146 | 147 | def get_ufc_link_event(query): 148 | possible_urls = search(query+" UFC") 149 | for url in possible_urls: 150 | if ("ufc.com/event/" in url): 151 | return url 152 | raise BaseException("UFC link not found !") 153 | 154 | def get_ranking(fight, corner): 155 | if corner == 'red': 156 | path = "div/div/div/div[2]/div[2]/div[2]/div[1]/span/text()" 157 | else: 158 | path = "div/div/div/div[2]/div[2]/div[2]/div[2]/span/text()" 159 | 160 | try: 161 | return fight.xpath(path)[0][1:] 162 | except IndexError: 163 | return "Unranked" 164 | 165 | def get_name(fight, corner): 166 | if corner == 'red': 167 | path = "div/div/div/div[2]/div[2]/div[5]/div[1]/a/span/text()" 168 | else: 169 | path = "div/div/div/div[2]/div[2]/div[5]/div[3]/a/span/text()" 170 | 171 | name = " ".join(fight.xpath(path)) 172 | 173 | if name == '': 174 | path = path.replace("/span", "") 175 | name = " ".join(fight.xpath(path)).strip() 176 | 177 | return name 178 | 179 | def parse_event(url, past=True): 180 | 181 | headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"} 182 | htm = req.get(url, headers = headers) 183 | xml = html.document_fromstring(htm.content) 184 | fights_html = xml.xpath("//div[@class='fight-card']/div/div/section/ul/li") 185 | 186 | prefix = xml.xpath("//div[@class='c-hero__header']/div[1]/div/h1/text()")[0].strip() 187 | names = xml.xpath("//div[@class='c-hero__header']/div[2]/span/span/text()") 188 | 189 | name = f"{prefix}: {names[0].strip()} vs. {names[-1].strip()}" 190 | 191 | date = dt.datetime.fromtimestamp(int(xml.xpath("//div[@class='c-hero__bottom-text']/div[1]/@data-timestamp")[0])) 192 | date = date.strftime("%Y-%m-%d") 193 | location = xml.xpath("//div[@class='c-hero__bottom-text']/div[2]/div/text()")[0].split(",") 194 | 195 | event = { 196 | 'name': name, 197 | 'date': date, 198 | 'location': location[1].strip(), 199 | 'venue': location[0].strip(), 200 | 'fights': [] 201 | } 202 | for fight in fights_html: 203 | this_fight = { 204 | 'weightclass': fight.xpath("div/div/div/div[2]/div[2]/div[1]/div[2]/text()")[0][:-5], 205 | 'red corner': { 206 | 'name': get_name(fight, 'red'), 207 | 'ranking': get_ranking(fight, 'red'), 208 | 'odds': fight.xpath("div/div/div/div[4]/div[2]/span[1]/span/text()")[0], 209 | 'link': fight.xpath("div/div/div/div[2]/div[2]/div[5]/div[1]/a/@href")[0] 210 | }, 211 | 'blue corner': { 212 | 'name': get_name(fight, 'blue'), 213 | 'ranking': get_ranking(fight, 'blue'), 214 | 'odds': fight.xpath("div/div/div/div[4]/div[2]/span[3]/span/text()")[0], 215 | 'link': fight.xpath("div/div/div/div[2]/div[2]/div[5]/div[3]/a/@href")[0] 216 | } 217 | } 218 | if past: 219 | result = fight.xpath("div/div/div/div[2]//div[@class='c-listing-fight__outcome-wrapper']/div/text()") 220 | method = fight.xpath("div//div[@class='c-listing-fight__result-text method']/text()") 221 | 222 | finished_round = fight.xpath("div//div[@class='c-listing-fight__result-text round']/text()") 223 | finished_time = fight.xpath("div//div[@class='c-listing-fight__result-text time']/text()") 224 | 225 | this_fight['round'] = finished_round[0] 226 | this_fight['time'] = finished_time[0] 227 | this_fight['method'] = method[0] 228 | this_fight['red corner']['result'] = result[0].strip() 229 | this_fight['blue corner']['result'] = result[1].strip() 230 | event['fights'].append(this_fight) 231 | return event 232 | 233 | def get_upcoming_events(): 234 | links = get_upcoming_event_links() 235 | 236 | results = {} 237 | 238 | for url in links: 239 | event = parse_event(url, False) 240 | results[event['name']] = event 241 | return results 242 | 243 | def get_event(query): 244 | link = get_ufc_link_event(query) 245 | return parse_event(link) --------------------------------------------------------------------------------