├── web_pages └── .gitkeep ├── delete_small_files.sh ├── .gitignore ├── create_database.py ├── delete_ignored_files.sh ├── pyproject.toml ├── main.py ├── matches.py ├── README.md ├── update_local.py ├── utils.py ├── players.py └── db_helper.json /web_pages/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /delete_small_files.sh: -------------------------------------------------------------------------------- 1 | find web_pages/ -type f -size -400k -print -delete -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | web_pages/ 2 | __pycache__/ 3 | test.db 4 | test.py 5 | master.db 6 | premier_league.db 7 | uv.lock -------------------------------------------------------------------------------- /create_database.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | 4 | def create_database(cursor, helper_file="db_helper.json", database_file="test_build.db"): 5 | with open(helper_file, "r") as f: 6 | data = json.load(f)["column_names"] 7 | 8 | for table_name, fields in data.items(): 9 | statement = ", ".join([" ".join([field, datatype]) for field, datatype in fields.items()]) 10 | cursor.execute(f"CREATE TABLE IF NOT EXISTS {table_name} ({statement})") 11 | -------------------------------------------------------------------------------- /delete_ignored_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | WEBPAGES_DIR="web_pages" 4 | UTILS_FILE="utils.py" 5 | 6 | # Extract all match IDs (quoted hex strings) inside ignore dict from utils.py 7 | match_ids=($(grep -oP '"[a-f0-9]{8}"' "$UTILS_FILE" | tr -d '"' | sort -u)) 8 | 9 | echo "Found ${#match_ids[@]} match IDs to delete." 10 | 11 | for match_id in "${match_ids[@]}"; do 12 | # Find files named exactly "$match_id" or "$match_id.*" and delete them 13 | find "$WEBPAGES_DIR" -type f \( -name "$match_id" -o -name "$match_id" \) -print -exec rm -f {} \; 14 | done 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "fbref-db" 3 | version = "0.1.0" 4 | description = "Add your description here" 5 | readme = "README.md" 6 | requires-python = ">=3.13" 7 | dependencies = [ 8 | "accelerate>=1.9.0", 9 | "langchain-community>=0.3.27", 10 | "langchain>=0.3.26", 11 | "openai>=1.97.0", 12 | "sentence-transformers>=5.0.0", 13 | "torch>=2.7.1", 14 | "transformers>=4.53.2", 15 | "langchain-openai>=0.3.28", 16 | "beautifulsoup4>=4.13.4", 17 | "aiofiles>=24.1.0", 18 | "pandas>=2.3.1", 19 | "lxml>=6.0.0", 20 | "curl-cffi>=0.12.0", 21 | "tqdm>=4.67.1", 22 | ] 23 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import sqlite3 4 | import time 5 | 6 | from create_database import create_database 7 | from matches import update_matches 8 | from players import insert_players 9 | from update_local import update_local 10 | from utils import get_matches_in_database 11 | 12 | 13 | def main(database_file, competitions=["Premier_League"], seasons=["2021-2022"]): 14 | connection = sqlite3.connect(database_file) 15 | cursor = connection.cursor() 16 | create_database(cursor) 17 | 18 | for competition in competitions: 19 | for season in seasons: 20 | with open("db_helper.json", "r") as f: 21 | comp = json.load(f)["competitions"][competition] 22 | season_start = int(season.split("-")[0]) 23 | 24 | if comp["start_year"] > season_start or comp["end_year"] < season_start: 25 | print( 26 | f"xG data doesn't exist for the {season} {competition} season" 27 | ) 28 | continue 29 | update_local(competition, season) 30 | update_matches(cursor, competition, season) 31 | 32 | db_matches = get_matches_in_database(cursor, competition, season) 33 | local_matches = set( 34 | os.listdir(os.path.join("web_pages", competition, season)) 35 | ) 36 | 37 | matches_to_add = local_matches - db_matches 38 | insert_players(cursor, competition, season, list(matches_to_add)) 39 | 40 | connection.commit() 41 | 42 | if len(matches_to_add) <= 20: 43 | print( 44 | "Didn't need to add many files to the database. Sleeping to avoid rate limit" 45 | ) 46 | """ 47 | I have done lots of testing to see what sleep length is required to not get put in FBRef jail. 48 | It's annoying and slow, but 7 seconds is required 49 | """ 50 | time.sleep(7) 51 | 52 | connection.close() 53 | 54 | 55 | if __name__ == "__main__": 56 | competitions = ["Premier_League", "Bundesliga", "La_Liga", "Ligue_1", "Serie_A", "Primeira_Liga"] 57 | seasons = [ 58 | # "2017-2018", 59 | # "2018-2019", 60 | # "2019-2020", 61 | # "2020-2021", 62 | # "2021-2022", 63 | # "2022-2023", 64 | # "2023-2024", 65 | "2024-2025", 66 | ] 67 | main("master.db", competitions=competitions, seasons=seasons) 68 | -------------------------------------------------------------------------------- /matches.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from bs4 import BeautifulSoup 4 | from curl_cffi import requests 5 | 6 | from utils import ignore, insert 7 | 8 | 9 | def get_match_data(competition, season): 10 | # Note that matches where the full match data is not yet available will still be picked up here and inserted 11 | # This is okay because every record is deleted from the specified (competition, season) pair each time, so the newly 12 | # updated matches will be put back into the Match table when they get updated 13 | data = [] 14 | with open("db_helper.json", "r") as f: 15 | competition_id = json.load(f)["competitions"][competition]["id"] 16 | url = f"https://fbref.com/en/comps/{competition_id}/{season}/schedule/" 17 | 18 | with requests.Session() as s: 19 | table = BeautifulSoup(s.get(url).text, "lxml").find("table") 20 | rows = table.find_all("tr") 21 | 22 | # for the bundesliga, e.g., the first column of the table is dedicated to either 'regular season' 23 | # or promotion-relegation playoff. This has to be taken into account when deciding which 24 | # columns to look at 25 | multistage: bool = rows[0].find("th")["data-stat"] == "round" 26 | match_ids_ignore = set.union(*ignore.values()) 27 | 28 | for row in rows[1:]: 29 | if row.find_all("td")[-3].text == "": # if referee field is blank 30 | continue 31 | match_id = row.find_all("a")[-1]["href"].split("/")[3] 32 | if match_id in match_ids_ignore: # don't even want to write the match info to Match table 33 | continue 34 | 35 | # if the season has multiple stages, start from column index 1 instead of 0 36 | row = [x.text.strip() for x in row.find_all("td")][multistage:-2] 37 | 38 | # convert each field to the correct datatype 39 | p1 = [match_id] + row[:4] 40 | p2 = float(row[4]) if row[4] != "" else None 41 | p3 = list(map(int, row[5].split("–"))) 42 | p4 = float(row[6]) if row[6] != "" else None 43 | p5 = [row[7]] + [int(row[8].replace(",", "")) if row[8] != "" else ""] + row[9:] 44 | 45 | data.append([competition, season] + p1 + [p2] + p3 + [p4] + p5) 46 | return data 47 | 48 | 49 | def update_matches(cursor, competition, season): 50 | cursor.execute("DELETE FROM Match WHERE competition = ? AND season = ?", (competition, season)) 51 | match_data = get_match_data(competition, season) 52 | insert(cursor, "Match", match_data) 53 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # FBRef_DB 2 | 3 | A tool to compile player and team statistics from FBRef match reports into a local SQLite database. 4 | 5 | ## Setup 6 | 7 | 1. **Clone the repository:** 8 | 9 | ```bash 10 | git clone https://github.com/ChrisMusson/FBRef_DB.git 11 | cd FBRef_DB 12 | ``` 13 | 14 | 2. **Install dependencies with [uv](https://github.com/astral-sh/uv):** 15 | 16 | If you don't already have `uv` installed: 17 | 18 | ```bash 19 | curl -LsSf https://astral.sh/uv/install.sh | sh 20 | ``` 21 | 22 | Then, create and activate a virtual environment: 23 | 24 | ```bash 25 | uv venv .venv 26 | source .venv/bin/activate 27 | ``` 28 | 29 | And install the dependencies from pyproject.toml: 30 | 31 | ```bash 32 | uv pip install . 33 | ``` 34 | 35 | ## Usage 36 | 37 | 1. **Download match data:** 38 | 39 | Download the `.zip` files for the leagues you want from this [Google Drive link](https://drive.google.com/drive/folders/1t34zhIvlk-2M0F_2v-7wvdf1mep9Kq-C?usp=drive_link). (Last updated 26 July 2025) 40 | 41 | 2. **Extract files:** 42 | 43 | Unzip the downloaded files into the `web_pages/` directory. 44 | This should result in a folder structure like: 45 | 46 | ``` 47 | web_pages/ 48 | ├── Premier_League/ 49 | │ ├── 2017-2018/ 50 | │ ├── 2018-2019/ 51 | │ └── ... 52 | └── Ligue_1/ 53 | └── ... 54 | ``` 55 | 56 | 3. **Edit `main.py` to specify your leagues:** 57 | 58 | By default, `main.py` is set to process the 2024-2025 season for the top 6 European leagues. You can modify the `competitions` and `seasons` lists to include/exclude the leagues and seasons you want to process. 59 | 60 | ```python 61 | competitions = ["Premier_League", "Bundesliga", "La_Liga", "Ligue_1", "Serie_A", "Primeira_Liga"] 62 | seasons = ["2023-2024", "2024-2025"] 63 | main("master.db", competitions=competitions, seasons=seasons) 64 | ``` 65 | 66 | 4. **Run the script:** 67 | 68 | ```bash 69 | python main.py 70 | ``` 71 | 72 | This will: 73 | - Check FBRef for newly played matches in your selected leagues 74 | - Add new match pages to the `web_pages/` folders 75 | - Parse the HTML pages 76 | - Populate/update the `master.db` SQLite database 77 | 78 | ## Explore the Data 79 | 80 | Use [DB Browser for SQLite](https://sqlitebrowser.org/dl/) to explore `master.db` or `premier_league.db`. 81 | 82 | A visual overview of the database schema is available here: 83 | https://dbdiagram.io/d/62221bf854f9ad109a5e298c 84 | 85 | ## Notes 86 | 87 | - The `master.db` file in this repo contains all top 6 European leagues. 88 | - A `premier_league.db` file (only Premier League) is available in the Google Drive link. 89 | -------------------------------------------------------------------------------- /update_local.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import time 4 | 5 | from bs4 import BeautifulSoup 6 | from curl_cffi import requests 7 | 8 | from utils import ignore 9 | 10 | 11 | def update_local(competition, season): 12 | # set of matches to ignore for various reasons 13 | # currently, most matches in here are those that are in the bundesliga/ligue1 relegation playoff phases 14 | # I have decided to ignore these as they are not part of the regular season, and 15 | # FBRef has no player data for these matches anyway 16 | # Other matches that have been abandoned are also included, such as Bochum vs. Monchengladbach, 17 | # where the game was abandoned after an assistant referee was hit by something thrown from the stands 18 | 19 | ignored_matches = set.union(*ignore.values()) 20 | 21 | if not os.path.exists("web_pages"): 22 | os.mkdir("web_pages") 23 | if not os.path.exists(os.path.join("web_pages", competition)): 24 | os.mkdir(os.path.join("web_pages", competition)) 25 | if not os.path.exists(os.path.join("web_pages", competition, season)): 26 | os.mkdir(os.path.join("web_pages", competition, season)) 27 | 28 | stored_files = set(os.listdir(os.path.join("web_pages", competition, season))) 29 | with open("db_helper.json", "r") as f: 30 | competition_id = json.load(f)["competitions"][competition]["id"] 31 | url = f"https://fbref.com/en/comps/{competition_id}/{season}/schedule/" 32 | 33 | web_match_ids = set() 34 | with requests.Session() as s: 35 | r = BeautifulSoup(s.get(url).text, "lxml") 36 | rows = r.find_all("td", {"data-stat": "score"}) 37 | for row in rows: 38 | if row.text == "": 39 | continue 40 | link = row.find("a") 41 | if link: 42 | web_match_ids.add(link["href"].split("matches/")[1].split("/")[0]) 43 | else: 44 | continue 45 | 46 | missing_match_ids = web_match_ids - stored_files - ignored_matches 47 | n = len(missing_match_ids) 48 | 49 | if n == 0: 50 | print(f"Local files are up to date for {competition} {season}") 51 | return 52 | 53 | print(f"Fetching {n} {'matches' if n > 1 else 'match'}") 54 | with requests.Session() as s: 55 | for number, match_id in enumerate(missing_match_ids): 56 | time.sleep(6) 57 | print(f"{competition} {season}: Fetching match number {number + 1} of {n} - ID: {match_id}") 58 | url = f"https://fbref.com/en/matches/{match_id}/" 59 | resp = s.get(url).text 60 | 61 | if "Advanced data not yet available" in resp: 62 | print(f"Full data for {competition} {season} match {match_id} is not yet available\n") 63 | continue 64 | 65 | with open( 66 | os.path.join("web_pages", competition, season, match_id), 67 | "w", 68 | encoding="utf-8", 69 | ) as f: 70 | f.write(resp) 71 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | ignore = { 2 | "bundesliga": { 3 | "5dc40876", # 17-18 relegation/promotion leg 1 4 | "9c6a24db", # 17-18 relegation/promotion leg 2 5 | "e262266b", # 18-19 relegation/promotion leg 1 6 | "f5e7a5c2", # 18-19 relegation/promotion leg 2 7 | "434865ef", # 19-20 relegation/promotion leg 1 8 | "2f2a35fa", # 19-20 relegation/promotion leg 2 9 | "dc47142c", # 20-21 relegation/promotion leg 1 10 | "948872ab", # 20-21 relegation/promotion leg 2 11 | "ac3eb7f6", # 21-22 relegation/promotion leg 1 12 | "d50b48fe", # 21-22 relegation/promotion leg 2 13 | "2c791569", # 22-23 relegation/promotion leg 1 14 | "f9a47a86", # 22-23 relegation/promotion leg 2 15 | "9156fa34", # 23-24 relegation/promotion leg 1 16 | "723ffa45", # 23-24 relegation/promotion leg 2 17 | "ea16299e", # 24-25 relegation/promotion leg 1 18 | "933eae7f", # 24-25 relegation/promotion leg 2 19 | "c34bbc21", # 21-22 - Bochum:Monchengladbach - Abandoned - Fan Trouble 20 | "171e1d37", # 24-25 - Union_Berlin:Bochum - Abandoned - Fan Trouble 21 | }, 22 | "ligue_1": { 23 | "f927719d", # 17-18 relegation/promotion leg 1 24 | "06517ea5", # 17-18 relegation/promotion leg 2 25 | "bfd434c1", # 18-19 relegation/promotion leg 1 26 | "ea5db1c4", # 18-19 relegation/promotion leg 2 27 | "621f8a81", # 20-21 relegation/promotion leg 1 28 | "28ce9808", # 20-21 relegation/promotion leg 2 29 | "f1560d55", # 21-22 relegation/promotion leg 1 30 | "78ed8c0d", # 21-22 relegation/promotion leg 1 31 | "febd4e01", # 23-24 relegation/promotion leg 1 32 | "7f01f697", # 23-24 relegation/promotion leg 2 33 | "111651be", # 24-25 relegation/promotion leg 1 34 | "5e063c64", # 24-25 relegation/promotion leg 2 35 | "15ee650c", # 24-25 - Montpellier:Saint_Etienne - Abandoned - Fan Trouble 36 | }, 37 | "serie_a": { 38 | "e0449015", # 22-23 relegation tie-breaker 39 | "e0a20cfe", # 20-21 - Hellas_Verona:Roma - Result Awarded - Registration Error 40 | "f7e35659", # 24-25 - Fiorentina:Inter - Match Suspended - Player Injury 41 | }, 42 | "primeira_liga": { 43 | "3f514a62", # 20-21 relegation/promotion leg 1 44 | "9c028e7e", # 20-21 relegation/promotion leg 2 45 | "c8cd6748", # 21-22 relegation/promotion leg 1 46 | "65aab877", # 21-22 relegation/promotion leg 2 47 | "fb49ed3b", # 22-23 relegation/promotion leg 1 48 | "b4f01c0d", # 22-23 relegation/promotion leg 2 49 | "f889dc95", # 23-24 relegation/promotion leg 1 50 | "0ede3890", # 23-24 relegation/promotion leg 2 51 | "12b7459d", # 24-25 relegation/promotion leg 1 52 | "1be42753", # 24-25 relegation/promotion leg 2 53 | }, 54 | } 55 | 56 | 57 | def clean_row(row): 58 | for i, d in enumerate(row): 59 | if d == "": 60 | row[i] = None 61 | elif "." in d: 62 | row[i] = float(d) 63 | else: 64 | try: 65 | row[i] = int(d) 66 | except ValueError: 67 | continue 68 | return row 69 | 70 | 71 | def insert(cursor, table_name, data): 72 | if data == [] or len(data[0]) == 0: 73 | return 74 | q_marks = ",".join(["?"] * len(data[0])) 75 | cursor.executemany(f"INSERT INTO {table_name} VALUES ({q_marks})", data) 76 | 77 | 78 | def get_matches_in_database(cursor, competition, season): 79 | matches = cursor.execute( 80 | """SELECT DISTINCT s.match_id 81 | FROM Summary s 82 | LEFT JOIN Match m 83 | ON s.match_id = m.match_id 84 | WHERE m.competition = ? AND m.season = ?""", 85 | (competition, season), 86 | ).fetchall() 87 | return set([x[0] for x in matches]) 88 | -------------------------------------------------------------------------------- /players.py: -------------------------------------------------------------------------------- 1 | import os 2 | from concurrent.futures import ProcessPoolExecutor, as_completed 3 | 4 | from bs4 import BeautifulSoup 5 | from tqdm import tqdm 6 | 7 | from utils import clean_row, insert 8 | 9 | 10 | def get_player_info(match_id, tables): 11 | data = [] 12 | for table_num in [3, 10]: 13 | rows = tables[table_num].find_all("tr")[2:-1] 14 | for row in rows: 15 | player_id = row.find("th").find("a")["href"].split("/")[3] 16 | player_name = row.find("a").text 17 | home_away = "H" if table_num < 10 else "A" 18 | started_match = row.text[0].isalpha() 19 | rest_of_row = clean_row([x.text for x in row.find_all("td")][:5]) 20 | data.append([match_id, player_id, player_name, home_away, started_match] + rest_of_row) 21 | return data 22 | 23 | 24 | def get_goalkeeper(match_id, tables): 25 | data = [] 26 | for table_num in [9, 16]: 27 | rows = tables[table_num].find_all("tr")[2:] 28 | for row in rows: 29 | player_id = row.find("th").find("a")["href"].split("/")[3] 30 | rest_of_row = clean_row([x.text for x in row.find_all("td")][3:]) 31 | data.append([match_id, player_id] + rest_of_row) 32 | return data 33 | 34 | 35 | def get_data(match_id, tables, i): 36 | data = [] 37 | for table_num in [3 + i, 10 + i]: 38 | rows = tables[table_num].find_all("tr")[2:-1] 39 | for row in rows: 40 | player_id = row.find("th").find("a")["href"].split("/")[3] 41 | rest_of_row = clean_row([x.text for x in row.find_all("td")][5:]) 42 | data.append([match_id, player_id] + rest_of_row) 43 | return data 44 | 45 | 46 | def handle_insert_player_error(match_id): 47 | print(f"\nProblem with match ID {match_id} - Not enough tables in the web page.") 48 | print(f"https://fbref.com/en/matches/{match_id}/") 49 | print("This could be because the match was abandoned, never played, or otherwise affected") 50 | print("If the match was not abandoned, you believe this should work, and this problem persists, please raise an issue at") 51 | print("https://github.com/ChrisMusson/FBRef_DB/issues") 52 | return 53 | 54 | 55 | def process_single_match(match_id, competition, season, db_tables): 56 | filepath = os.path.join("web_pages", competition, season, match_id) 57 | try: 58 | with open(filepath, "r", encoding="utf-8") as f: 59 | html_content = f.read() 60 | except FileNotFoundError: 61 | print(f"File not found: {filepath}") 62 | return match_id, None, None, None 63 | 64 | soup = BeautifulSoup(html_content, "lxml") 65 | tables = soup.find_all("table") 66 | 67 | if len(tables) < 10: 68 | return match_id, None, None, None 69 | 70 | player_info = get_player_info(match_id, tables) 71 | goalkeeper = get_goalkeeper(match_id, tables) 72 | 73 | season_data = {} 74 | for i, t in enumerate(db_tables): 75 | season_data[t] = get_data(match_id, tables, i) 76 | 77 | return match_id, season_data, player_info, goalkeeper 78 | 79 | 80 | def insert_players(cursor, competition, season, match_ids): 81 | if not match_ids: 82 | print(f"Database is up to date for {competition} {season}\n") 83 | return 84 | 85 | db_tables = [ 86 | "summary", 87 | "passing", 88 | "pass_types", 89 | "defensive_actions", 90 | "possession", 91 | "miscellaneous", 92 | ] 93 | 94 | all_player_info = [] 95 | all_goalkeepers = [] 96 | season_data_agg = {t: [] for t in db_tables} 97 | 98 | print(f"Starting match processing for {len(match_ids)} matches...") 99 | 100 | with ProcessPoolExecutor() as executor: 101 | # Map each future to its match_id 102 | future_to_match_id = {executor.submit(process_single_match, match_id, competition, season, db_tables): match_id for match_id in match_ids} 103 | 104 | for future in tqdm(as_completed(future_to_match_id), total=len(future_to_match_id), desc="Parsing matches"): 105 | match_id = future_to_match_id[future] 106 | try: 107 | result = future.result() 108 | if result is None: 109 | raise ValueError("process_single_match returned None") 110 | match_id, season_data, player_info, goalkeeper = result 111 | if season_data is None: 112 | handle_insert_player_error(match_id) 113 | continue 114 | 115 | all_player_info.extend(player_info) 116 | all_goalkeepers.extend(goalkeeper) 117 | for t in db_tables: 118 | season_data_agg[t].extend(season_data[t]) 119 | 120 | except Exception as e: 121 | print(f"\nError processing match ID {match_id}: {e}") 122 | import traceback 123 | 124 | traceback.print_exc() 125 | handle_insert_player_error(match_id) 126 | 127 | print(f"Inserting player data for {competition} {season}") 128 | insert(cursor, "Player_info", all_player_info) 129 | insert(cursor, "Goalkeeper", all_goalkeepers) 130 | 131 | for k, v in season_data_agg.items(): 132 | insert(cursor, k.title(), v) 133 | 134 | print("Insertion finished\n\n") 135 | -------------------------------------------------------------------------------- /db_helper.json: -------------------------------------------------------------------------------- 1 | { 2 | "column_names": { 3 | "Match": { 4 | "competition": "varchar NOT NULL", 5 | "season": "char(9) NOT NULL", 6 | "match_id": "char(8) NOT NULL", 7 | "day": "char(3)", 8 | "date": "char(10)", 9 | "time": "char(5)", 10 | "home_team": "varchar", 11 | "home_xG": "float", 12 | "home_goals": "int", 13 | "away_goals": "int", 14 | "away_xG": "float", 15 | "away_team": "varchar", 16 | "attendance": "int", 17 | "venue": "varchar", 18 | "referee": "varchar" 19 | }, 20 | "Player_Info": { 21 | "match_id": "char(8) NOT NULL", 22 | "player_id": "char(8) NOT NULL", 23 | "name": "varchar", 24 | "home_away": "char(1)", 25 | "start": "int", 26 | "squad_number": "int", 27 | "nation": "varchar", 28 | "position": "varchar", 29 | "age": "char(6)", 30 | "minutes": "int" 31 | }, 32 | "Summary": { 33 | "match_id": "char(8) NOT NULL", 34 | "player_id": "char(8) NOT NULL", 35 | "goals": "int", 36 | "assists": "int", 37 | "penalties_scored": "int", 38 | "penalties_attempted": "int", 39 | "shots": "int", 40 | "shots_on_target": "int", 41 | "yellow_cards": "int", 42 | "red_cards": "int", 43 | "touches": "int", 44 | "tackles": "int", 45 | "interceptions": "int", 46 | "blocks": "int", 47 | "xG": "float", 48 | "npxG": "float", 49 | "xA": "float", 50 | "shot_creating_actions": "int", 51 | "goal_creating_actions": "int", 52 | "passes_completed": "int", 53 | "passes_attempted": "int", 54 | "pass_completion_percentage": "float", 55 | "progressive_passes": "int", 56 | "carries": "int", 57 | "progressive_carries": "int", 58 | "successful_dribbles": "int", 59 | "dribbles_attempted": "int" 60 | }, 61 | "Passing": { 62 | "match_id": "char(8) NOT NULL", 63 | "player_id": "char(8) NOT NULL", 64 | "total_completed": "int", 65 | "total_attempted": "int", 66 | "completion_percentage": "float", 67 | "total_distance": "int", 68 | "progressive_distance": "int", 69 | "short_completed": "int", 70 | "short_attempted": "int", 71 | "short_completion_percentage": "float", 72 | "med_completed": "int", 73 | "med_attempted": "int", 74 | "med_completion_percentage": "float", 75 | "long_completed": "int", 76 | "long_attempted": "int", 77 | "long_completion_percentage": "float", 78 | "assists": "int", 79 | "xAG": "float", 80 | "xA": "float", 81 | "key_passes": "int", 82 | "final_third_passes": "int", 83 | "passes_into_penalty_area": "int", 84 | "crosses_into_penalty_area": "int", 85 | "progressive_passes": "int" 86 | }, 87 | "Pass_Types": { 88 | "match_id": "char(8) NOT NULL", 89 | "player_id": "char(8) NOT NULL", 90 | "passes_attempted": "int", 91 | "live_ball": "int", 92 | "dead_ball": "int", 93 | "free_kick": "int", 94 | "through_balls": "int", 95 | "switches": "int", 96 | "crosses": "int", 97 | "throw_ins": "int", 98 | "corner_kicks": "int", 99 | "inswinging_corner_kicks": "int", 100 | "outswinging_corner_kicks": "int", 101 | "straight_corner_kicks": "int", 102 | "passes_completed": "int", 103 | "offsides": "int", 104 | "blocked_passes": "int" 105 | }, 106 | "Defensive_Actions": { 107 | "match_id": "char(8) NOT NULL", 108 | "player_id": "char(8) NOT NULL", 109 | "tackles": "int", 110 | "tackles_won": "int", 111 | "tackles_defensive_third": "int", 112 | "tackles_midfield_third": "int", 113 | "tackles_attacking_third": "int", 114 | "dribblers_tackled": "int", 115 | "attempted_tackles_vs_dribblers": "int", 116 | "successful_dribbler_tackle_percentage": "float", 117 | "dribbled_past": "int", 118 | "blocks": "int", 119 | "shots_blocked": "int", 120 | "passes_blocked": "int", 121 | "interceptions": "int", 122 | "tackles_plus_interceptions": "int", 123 | "clearances": "int", 124 | "errors_leading_to_shot": "int" 125 | }, 126 | "Possession": { 127 | "match_id": "char(8) NOT NULL", 128 | "player_id": "char(8) NOT NULL", 129 | "total_touches": "int", 130 | "touches_defensive_penalty_area": "int", 131 | "touches_defensive_third": "int", 132 | "touches_midfield_third": "int", 133 | "touches_attacking_third": "int", 134 | "touches_attacking_penalty_area": "int", 135 | "live_ball_touches": "int", 136 | "dribbles_attempted": "int", 137 | "successful_dribbles": "int", 138 | "dribble_success_percentage": "float", 139 | "tackled": "int", 140 | "tackled_perecentage": "float", 141 | "carries": "int", 142 | "total_carry_distance": "int", 143 | "progressive_carry_distance": "int", 144 | "progressive_carries": "int", 145 | "final_third_carries": "int", 146 | "penalty_area_carries": "int", 147 | "miscontrolled": "int", 148 | "dispossessed": "int", 149 | "received": "int", 150 | "progressive_passes_received": "int" 151 | }, 152 | "Miscellaneous": { 153 | "match_id": "char(8) NOT NULL", 154 | "player_id": "char(8) NOT NULL", 155 | "yellow_cards": "int", 156 | "red_cards": "int", 157 | "second_yellow_cards": "int", 158 | "fouls_committed": "int", 159 | "fouls_drawn": "int", 160 | "offsides": "int", 161 | "crosses": "int", 162 | "interceptions": "int", 163 | "tackles_won": "int", 164 | "penalties_won": "int", 165 | "penalties_conceded": "int", 166 | "own_goals": "int", 167 | "recoveries": "int", 168 | "aerials_won": "int", 169 | "aerials_lost": "int", 170 | "aerials_won_percentage": "float" 171 | }, 172 | "Goalkeeper": { 173 | "match_id": "char(8) NOT NULL", 174 | "player_id": "char(8) NOT NULL", 175 | "shots_on_target_against": "int", 176 | "goals_against": "int", 177 | "saves": "int", 178 | "save_percentage": "float", 179 | "post_shot_xG": "float", 180 | "long_passes_completed": "int", 181 | "long_passes_attempted": "int", 182 | "long_pass_completion_percentage": "float", 183 | "passes_attempted": "int", 184 | "throws_attempted": "int", 185 | "long_pass_percentage": "float", 186 | "average_pass_length": "float", 187 | "goal_kicks": "int", 188 | "long_goal_kick_percentage": "float", 189 | "average_goal_kick_length": "float", 190 | "opponent_crosses": "int", 191 | "crosses_stopped": "int", 192 | "crosses_stopped_percentage": "float", 193 | "defensive_actions_outside_penalty_area": "int", 194 | "average_distance_of_defensive_action": "float" 195 | } 196 | }, 197 | "competitions": { 198 | "Premier_League": {"id": 9, "start_year": 2017, "end_year": 2024}, 199 | "Bundesliga": {"id": 20, "start_year": 2017, "end_year": 2024}, 200 | "La_Liga": {"id": 12, "start_year": 2017, "end_year": 2024}, 201 | "Ligue_1": {"id": 13, "start_year": 2017, "end_year": 2024}, 202 | "Serie_A": {"id": 11, "start_year": 2017, "end_year": 2024}, 203 | "Primeira_Liga": {"id": 32, "start_year": 2018, "end_year": 2024} 204 | } 205 | } --------------------------------------------------------------------------------