├── .gitignore ├── app ├── __init__.py └── ingest │ ├── __init__.py │ ├── config.json.dist │ ├── ingest_raw_files.py │ └── main.py ├── data └── .gitignore ├── docker-compose.yaml ├── model ├── Dockerfile ├── ___init__.py ├── __init__.py ├── expected_assist │ ├── __init__.py │ ├── main.py │ ├── model.py │ └── resources │ │ └── .gitkeep ├── expected_goal │ ├── __init__.py │ ├── main.py │ ├── notebook │ │ └── model_building.ipynb │ ├── resources │ │ └── .gitkeep │ ├── src │ │ └── build_training_data.sql │ ├── train.py │ └── xg_model_scheme.png ├── pass2vec │ ├── __init__.py │ ├── main.py │ ├── resources │ │ └── .gitkeep │ ├── src │ │ ├── Pass.py │ │ ├── Sequence.py │ │ ├── SequenceFactory.py │ │ └── __init__.py │ └── train.py ├── readme.md └── result_prediction │ ├── __init__.py │ ├── build_training_data.py │ ├── config │ └── database.json │ ├── main.py │ ├── src │ ├── TrainingDataBuilder.py │ ├── __init__.py │ └── sql_queries │ │ ├── get_conceded_goals.sql │ │ ├── get_conceded_shots.sql │ │ ├── get_games.sql │ │ ├── get_scored_goals.sql │ │ └── get_shots.sql │ └── train.py ├── notebooks └── pass2vec │ ├── games_team_style_feature.sql │ ├── img │ ├── autoencoder_scheme.ai │ ├── autoencoder_scheme.png │ ├── cluster_sample.ai │ ├── cluster_sample_n14_25.png │ ├── cluster_sample_n17_500.png │ ├── cluster_sample_n287_500.png │ ├── cluster_sample_n361_500.png │ ├── cluster_sample_n3_25.png │ ├── cluster_sample_n455_500.png │ ├── cluster_sample_n4_25.png │ ├── cluster_sample_n53_500.png │ ├── sample_possessions.jpg │ ├── team_cluster_frequence_heatmap.png │ ├── team_cluster_frequence_heatmap_small.jpg │ ├── tmp.psd │ ├── tsne_kmeans_plot.png │ ├── tsne_kmeans_plot_k25.png │ ├── tsne_sequence_length_plot-01.png │ ├── tsne_sequence_length_plot.ai │ └── tsne_sequence_length_plot.png │ ├── notebook.html │ └── notebook.rmd ├── readme.md ├── src ├── __init_.py ├── crawler │ ├── Dockerfile │ ├── WhoScoredCrawler.py │ └── __init__.py ├── database │ ├── Dockerfile │ ├── PassParser.py │ ├── ShotParser.py │ ├── WhoScoredToDataBase.py │ ├── __init__.py │ └── init.sql └── sql_queries │ ├── assist-shots-clustering.sql │ ├── goal_distribution.sql │ ├── lineup_evolution.sql │ ├── passsonar.sql │ ├── player_pass.sql │ ├── team_shots_and_conceded.sql │ ├── touch_5_minutes_bin.sql │ └── win_draw_lose_count_for_one_team.sql └── visualisation ├── Age ├── age.r ├── data │ └── data.csv └── img │ ├── Age distribution in Premier League.png │ ├── Average number of assists per 90 minutes by age.png │ ├── Average number of goals per 90 minutes by age.png │ └── Minutes played by age.png ├── Notebook ├── .DS_Store ├── data │ └── loan20172018.csv └── script │ ├── average age of players loaned by league.png │ ├── average number of players loaned by league.png │ ├── loan.Rmd │ ├── loan.html │ ├── number of players loaned by position.png │ ├── number of players loaned by their club in Bundesliga.png │ ├── number of players loaned by their club in La Liga.png │ ├── number of players loaned by their club in Ligue 1.png │ ├── number of players loaned by their club in Premier League.png │ └── number of players loaned by their club in Serie A.png ├── PCA ├── ozil_comparison.png ├── ozil_pca.csv └── pca.r ├── average_time_fielded_player ├── img │ ├── plot20162017.png │ ├── plot20172018.png │ └── plot20182019.png └── main.r ├── contextualized_expected_goal ├── main.r └── notebook.rmd ├── cumsum_expected_goal └── main.r ├── dendogram ├── .DS_Store ├── all_dendo_sanchez.png ├── data.csv ├── dendogram.r └── dribble_goals.png ├── games_evolution ├── fabian_delph_games_evo.png └── start_evo_fabian_delph.r ├── goal_distribution ├── img │ ├── plot_goal_against.svg │ ├── plot_goal_against_bottom_league.png │ ├── plot_goal_against_full.png │ ├── plot_goal_against_top2.png │ ├── plot_goal_against_topteam.png │ ├── plot_goal_for.svg │ ├── plot_goal_for_bottom_league.png │ ├── plot_goal_for_full.png │ ├── plot_goal_for_top2.png │ └── plot_goal_for_topteam.png └── main.r ├── goal_vs_xg └── main.r ├── goals_assists_evolution ├── gace_ozil.png └── gace_ozil.r ├── lineup ├── .DS_Store ├── data │ ├── .DS_Store │ ├── all.csv │ ├── arsenal1718.csv │ ├── arsenal_lineup.csv │ ├── arsenal_lineup35days.csv │ ├── chelsea_lineup.csv │ ├── export.csv │ ├── liverpool_lineup.csv │ ├── manCity_lineup.csv │ ├── manUtd_lineup.csv │ ├── nice_lineup.csv │ └── tottenham_lineup.csv ├── img │ ├── .DS_Store │ ├── arsenal_lineup.png │ ├── arsenal_lineup35.png │ ├── arsenal_lineup_34season.png │ ├── chelsea lineup.png │ ├── liverpool lineup.png │ ├── logoArsenal.png │ ├── logoChelsea.png │ ├── logoLiverpool.png │ ├── logoManCity.png │ ├── logoManUtd.png │ ├── logoNice.png │ ├── logoTottenham.png │ ├── manCity lineup.png │ ├── manUtd lineup.png │ ├── nice lineup.png │ └── tottenham lineup.png └── process.r ├── maps ├── .gitignore ├── assist_shot_cluster_map │ ├── img │ │ ├── all_premierleague_20190223.jpg │ │ ├── arsenal1718_cluster10_assists_shots_cluster_map.png │ │ ├── arsenal1718_cluster6_assists_shots_cluster_map.png │ │ ├── arsenal1819_cluster10_assists_shots_cluster_map.png │ │ ├── arsenal1819_cluster6_assists_shots_cluster_map.png │ │ ├── g_assist_shot_cluster_tmp.png │ │ ├── tmp_assists_shots_cluster_map_4.png │ │ ├── tmp_assists_shots_cluster_map_5.png │ │ ├── tmp_assists_shots_cluster_map_6.png │ │ └── tmp_assists_shots_cluster_map_7.png │ ├── main.r │ ├── multi_maps.r │ ├── src │ │ └── assist_shot_cluster_map.r │ └── template │ │ ├── foreground.png │ │ └── title.png ├── football_pitch.png ├── football_pitch.r ├── passmap │ ├── .gitignore │ ├── Dockerfile │ ├── __init__.py │ ├── img │ │ ├── aguero_20182019_passsonar.svg │ │ ├── bernardo_silva_20182019_passsonar.svg │ │ ├── danilo_20182019_passsonar.svg │ │ ├── david_silva_20182019_passsonar.svg │ │ ├── debruyne_20182019_passsonar.svg │ │ ├── delph_20182019_passsonar.svg │ │ ├── ederson_20182019_passsonar.svg │ │ ├── fernandinho_20182019_passsonar.svg │ │ ├── foden_20182019_passsonar.svg │ │ ├── gundogan_20182019_passsonar.svg │ │ ├── jesus_20182019_passsonar.svg │ │ ├── kompany_20182019_passsonar.svg │ │ ├── laport_20182019_passsonar.svg │ │ ├── mahrez_20182019_passsonar.svg │ │ ├── mendy_20182019_passsonar.svg │ │ ├── otamendi_20182019_passsonar.svg │ │ ├── sane_20182019_passsonar.svg │ │ ├── sterling_20182019_passsonar.svg │ │ ├── stones_20182019_passsonar.svg │ │ ├── wlaker_20182019_passsonar.svg │ │ └── zichenko_20182019_passsonar.svg │ ├── main.py │ ├── one_player_sonar.r │ ├── passnetwork.r │ ├── passsonar.r │ ├── public │ │ ├── css │ │ │ └── semantic.css │ │ ├── img │ │ │ └── passmap.jpg │ │ ├── index.html │ │ ├── js │ │ │ ├── jquery.min.js │ │ │ └── semantic.js │ │ └── result.html │ ├── src │ │ ├── core.r │ │ ├── crawler.py │ │ └── utils.r │ └── template │ │ ├── passnetwork │ │ ├── design.psd │ │ └── foreground_passnetwork.png │ │ └── passsonar │ │ ├── passsonar_template.psd │ │ └── title_passsonar.png ├── toolbox_map.r ├── touch_map │ ├── img │ │ ├── mason_greenwood_20182019.jpg │ │ └── tmp.svg │ └── main.r ├── touch_zone.R ├── xa_map │ ├── img │ │ ├── lukaku_xa_map_1718.jpg │ │ └── wolves_xa_map_conceded_1819.png │ ├── main.r │ ├── src │ │ └── xa_map.r │ └── template │ │ ├── foreground.png │ │ └── title.png └── xg_map │ ├── img │ ├── chelsea_conceded_xgmap.png │ ├── g_xgmap_tmp.png │ ├── hazard_xg_20190223.png │ ├── lacazette_xgmap.jpg │ ├── palace_conceded_xgmap.png │ ├── wolves_conceded_xgmap.png │ ├── wolves_xgmap.png │ └── wolves_xgmap_1819.png │ ├── main.r │ ├── src │ └── xg_map.r │ └── template │ ├── foreground.png │ └── title.png ├── pass_sequence_cluster ├── img │ └── cluter_sequence.png └── main.r ├── passes ├── .DS_Store ├── data │ ├── fft_passesCombination.csv │ └── fft_scrape.csv ├── img │ ├── DierMatciPassesCombinations.jpg │ ├── arsenal.jpg │ ├── attacking3rdPasses.png │ ├── bellerin.png │ ├── bellerinPhoto.png │ ├── chamberlainPhoto.png │ ├── coquelin.png │ ├── coquelinPhoto.png │ ├── kosPhoto.png │ ├── koscielny.png │ ├── logoArsenal.png │ ├── midfield.jpg │ ├── monreal.png │ ├── monrealPhoto.png │ ├── mustafi.png │ ├── mustafiPhoto.png │ ├── oxlade.png │ ├── ozil.png │ ├── ozilPhoto.png │ ├── pogbaPassesCombinations.jpg │ ├── ramsey.png │ ├── ramseyPhoto.png │ ├── sanchez.png │ ├── sanchezPhoto.png │ ├── walcott.png │ ├── walcottPhoto.png │ ├── xhaka.png │ └── xhakaPhoto.png └── process.r ├── player_stats ├── scraper.py └── scraper.r ├── rollmean ├── img │ └── manU_xgvs_xgc.png └── main.r ├── spaced_table ├── graphic.r └── img │ └── tmp.png └── table_to_heatmap ├── img └── heatmap.png └── main.r /.gitignore: -------------------------------------------------------------------------------- 1 | .Rhistory 2 | visualisation/data/* 3 | visualisation/Maps/data/* 4 | visualisation/crawler/* 5 | *__pycache__ 6 | *Rplots.pdf 7 | *.DS_Store 8 | *.csv 9 | *.pkl 10 | *config.json 11 | *.h5 12 | *.zip 13 | model/pass2vec/resources/*.png 14 | -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/app/__init__.py -------------------------------------------------------------------------------- /app/ingest/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/app/ingest/__init__.py -------------------------------------------------------------------------------- /app/ingest/config.json.dist: -------------------------------------------------------------------------------- 1 | { 2 | "database":"XXXX", 3 | "host":"XXXX", 4 | "user":"XXXX", 5 | "password":"XXXX" 6 | } -------------------------------------------------------------------------------- /app/ingest/ingest_raw_files.py: -------------------------------------------------------------------------------- 1 | """ 2 | ingest_raw_files.py 3 | 4 | Ingest downloaded data into Postgres database 5 | Example: 6 | python -m app.ingest.ingest_raw_files.py (base folder is data/raw/) 7 | """ 8 | 9 | import json 10 | import argparse 11 | import glob 12 | from src.database.WhoScoredToDataBase import WhoScoredToDataBase 13 | from src.crawler.WhoScoredCrawler import WhoScoredCrawler 14 | 15 | 16 | if __name__ == "__main__": 17 | config = json.load(open("app/ingest/config.json")) 18 | wsdb = WhoScoredToDataBase(config["database"], config["host"], config["user"], config["password"]) 19 | files = glob.glob("data/raw/*.json") 20 | i = 0 21 | for file in files: 22 | print("Processing : ", file) 23 | wsdb.process_file(file) 24 | i = i + 1 25 | if i % 25 == 0: 26 | print("Processing in progress: ", i*100/len(files)," %") 27 | 28 | wsdb.close_connection() 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /app/ingest/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | main.py 3 | 4 | Ingest WhoScored data into Postgres database 5 | Example: 6 | python -m app.ingest.main -u -c 7 | """ 8 | 9 | import json 10 | import argparse 11 | import glob 12 | import time 13 | from src.database.WhoScoredToDataBase import WhoScoredToDataBase 14 | from src.crawler.WhoScoredCrawler import WhoScoredCrawler 15 | 16 | 17 | if __name__ == "__main__": 18 | parser = argparse.ArgumentParser() 19 | parser.add_argument("-u", "--url", type=str, help="WhoScored URL") 20 | parser.add_argument("-c", "--csv", type=str, help="CSV file with a list of WhoScored URL") 21 | args = parser.parse_args() 22 | 23 | config = json.load(open("app/ingest/config.json")) 24 | crawler = WhoScoredCrawler() 25 | wsdb = WhoScoredToDataBase(config["database"], config["host"], config["user"], config["password"]) 26 | 27 | if args.url: 28 | file = crawler.crawl(args.url) 29 | wsdb.process_file(file) 30 | wsdb.close_connection() 31 | elif args.csv: 32 | with open(args.csv, "r") as urls: 33 | for url in urls.readlines(): 34 | for i in range(5): 35 | try: 36 | file = crawler.crawl(url) 37 | except Exception as e: 38 | print(e) 39 | if file: 40 | wsdb.process_file(file) 41 | time.sleep(20) 42 | crawler.close() 43 | wsdb.close_connection() 44 | 45 | else: 46 | print("No argument") 47 | 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore -------------------------------------------------------------------------------- /docker-compose.yaml: -------------------------------------------------------------------------------- 1 | version: '3' 2 | services: 3 | 4 | postgres: 5 | # For more details on configuring the Postgres Docker image, see: 6 | # https://hub.docker.com/_/postgres/ 7 | build: src/database 8 | # Expose the default Postgres port on localhost 9 | ports: 10 | - '5432:5432' 11 | networks: 12 | football-viz-net: 13 | ipv4_address: 172.28.1.1 14 | container_name: postgres 15 | 16 | environment: 17 | POSTGRES_USER: 'root' 18 | POSTGRES_PASSWORD: 'root' 19 | POSTGRES_DB: 'soccer' 20 | 21 | # Copy files from dbinit into the image so that they will be run on boot 22 | volumes: 23 | - soccer_database_volume:/var/lib/postgresql/data 24 | 25 | pgadmin: 26 | # For more details on configuring the pgadmin4 Docker image, see: 27 | # https://hub.docker.com/r/dpage/pgadmin4/ 28 | image: dpage/pgadmin4 29 | 30 | # Expose the web UI on localhost port 8080 31 | ports: 32 | - '8080:80' 33 | networks: 34 | football-viz-net: 35 | ipv4_address: 172.28.1.2 36 | container_name: pgadmin 37 | # Link this container to the postgres container with hostname 'postgres' 38 | external_links: 39 | - postgres:postgres 40 | 41 | environment: 42 | PGADMIN_DEFAULT_EMAIL: 'root@example.com' 43 | PGADMIN_DEFAULT_PASSWORD: 'root' 44 | 45 | crawling: 46 | build: src/crawler 47 | networks: 48 | football-viz-net: 49 | ipv4_address: 172.28.1.3 50 | container_name: crawler 51 | command: tail -F anything 52 | volumes: 53 | - ".:/data" 54 | 55 | model: 56 | build: model 57 | ports: 58 | - "8081:5000" 59 | networks: 60 | football-viz-net: 61 | ipv4_address: 172.28.1.4 62 | container_name: model 63 | command: tail -F anything 64 | volumes: 65 | - ".:/data" 66 | 67 | passmap: 68 | build: visualisation/maps/passmap 69 | ports: 70 | - '8082:8082' 71 | networks: 72 | football-viz-net: 73 | ipv4_address: 172.28.1.5 74 | container_name: passmap 75 | volumes: 76 | - ".:/data" 77 | 78 | volumes: 79 | soccer_database_volume: 80 | driver: local 81 | 82 | networks: 83 | football-viz-net: 84 | ipam: 85 | driver: default 86 | config: 87 | - subnet: 172.28.0.0/16 88 | -------------------------------------------------------------------------------- /model/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ufoym/deepo:all-py36-jupyter-cpu 2 | 3 | RUN apt-get -qq update -y && \ 4 | apt-get install curl -y && \ 5 | apt-get install unzip -y && \ 6 | apt-get install -my wget gnupg && \ 7 | apt-get install libpq-dev python-dev -y 8 | 9 | RUN pip install flask mlflow MulticoreTSNE psycopg2 10 | 11 | # Set the locale 12 | ENV LC_ALL C.UTF-8 13 | ENV LANG C.UTF-8 14 | 15 | CMD ["/bin/bash"] 16 | -------------------------------------------------------------------------------- /model/___init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/___init__.py -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/__init__.py -------------------------------------------------------------------------------- /model/expected_assist/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/expected_assist/__init__.py -------------------------------------------------------------------------------- /model/expected_assist/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Expected assist model application 3 | Compute xA for pass data 4 | 5 | Usage : 6 | python3 -m model.expected_assist.main "/Users/username/folder/data.csv" 7 | 8 | Wildcard: 9 | -s input for saving file 10 | 11 | """ 12 | import argparse 13 | import numpy as np 14 | import pandas 15 | from sklearn.externals import joblib 16 | 17 | def model_application(model, pass_data): 18 | """ 19 | Return Expected assist for every pass 20 | """ 21 | pass_data = pass_data.replace([np.inf, -np.inf], np.nan).dropna() 22 | y_pred = model.predict_proba(pass_data[["x_begin", "y_begin", "x_end", "y_end", "goal_distance", "key_pass", "big_chance_created"]]) 23 | pass_data["xA"] = [y[1] for y in y_pred] 24 | return pass_data 25 | 26 | if __name__ == "__main__": 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument("data", help="csv file containing pass data (with at least x_begin, y_begin, x_end, y_end, goal_distance, key_pass, big_chance_created features).") 29 | parser.add_argument("-s", "--save", help="input for saving file", action="store_true") 30 | args = parser.parse_args() 31 | 32 | model = joblib.load("model/expected_assist/resources/model.pkl") 33 | pass_data = pandas.read_csv(args.data) 34 | 35 | xA = model_application(model, pass_data) 36 | print(xA[xA["xA"]>0.05]) 37 | 38 | # Saving option 39 | if args.save: 40 | file_path = input("File path (for example /Users/username/folder/file.csv): ") 41 | xA.to_csv(file_path, index=False) 42 | 43 | 44 | -------------------------------------------------------------------------------- /model/expected_assist/model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Expected Assist Model 3 | """ 4 | 5 | import numpy as np 6 | import pandas 7 | from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, classification_report 8 | from sklearn.metrics import mean_squared_error 9 | from sklearn.pipeline import Pipeline, FeatureUnion, make_pipeline, make_union, clone 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.ensemble import RandomForestClassifier 12 | from sklearn.model_selection import GridSearchCV 13 | from sklearn.model_selection import train_test_split 14 | from sklearn.externals import joblib 15 | 16 | def load_data(csv_file): 17 | data = pandas.read_csv(csv_file, sep=",").dropna(axis=0) 18 | return data[["x_begin", "y_begin", "x_end", "y_end", "goal_distance", "key_pass", "big_chance_created", "is_assist"]] 19 | 20 | def split_dataset(data, split_rate, output_variable): 21 | """ 22 | Split dataset in 4 parts : x_train, y_train, x_test, y_test. 23 | :param data: pandas dataframe 24 | :param split_rate: often 0.3 25 | :param output_variable: variable to predict 26 | :return: x_train, y_train, x_test, y_test (pandas dataframes) 27 | """ 28 | train_set, test_set = train_test_split(data, test_size=split_rate, random_state=0) 29 | x_train = train_set.drop(output_variable, axis=1) 30 | y_train = train_set[output_variable] 31 | x_test = test_set.drop(output_variable, axis=1) 32 | y_test = test_set[output_variable] 33 | return x_train, y_train, x_test, y_test 34 | 35 | 36 | 37 | if __name__ == "__main__": 38 | data = load_data("model/expected_assist/resources/premier_league_pass.csv") # load training dataset 39 | x_train, y_train, x_test, y_test = split_dataset(data, 0.3, "is_assist") # split data 40 | 41 | model = RandomForestClassifier(n_estimators=100) 42 | model.fit(x_train, y_train) 43 | y_pred = model.predict(x_test) 44 | print(classification_report(y_test, y_pred)) 45 | 46 | joblib.dump(model, 'model/expected_assist/resources/model.pkl') # save model 47 | -------------------------------------------------------------------------------- /model/expected_assist/resources/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/expected_assist/resources/.gitkeep -------------------------------------------------------------------------------- /model/expected_goal/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/expected_goal/__init__.py -------------------------------------------------------------------------------- /model/expected_goal/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Expected goal model application 3 | Compute xG for shots data 4 | 5 | Usage : 6 | python -m model.expected_goal.main 7 | 8 | Wildcard: 9 | -s input for saving file 10 | 11 | """ 12 | import argparse 13 | import pandas 14 | from sklearn.externals import joblib 15 | from keras.models import load_model 16 | 17 | 18 | def model_application(model, shots_data): 19 | """ 20 | Return Expected goal for every shot 21 | """ 22 | y_pred = model.predict_proba(shots_data[["x_shot", "y_shot", "goal_distance", "big_chance"]]) 23 | shots_data["xG"] = [y[0] for y in y_pred] 24 | return shots_data 25 | 26 | if __name__ == "__main__": 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument("data", help="csv file containing shots data (with at least x_shot, y_shot, goal_distance, big_chance features).") 29 | parser.add_argument("-s", "--save", help="input for saving file", action="store_true") 30 | args = parser.parse_args() 31 | 32 | model = load_model('mlruns/0/0585a82ef79e4f84ba23320fbf2a87cf/artifacts/expected_goal_model/model.h5') 33 | shots_data = pandas.read_csv(args.data) 34 | xG = model_application(model, shots_data) 35 | 36 | 37 | # Saving option 38 | if args.save: 39 | file_path = input("File path (for example /Users/username/folder/file.csv): ") 40 | xG.to_csv(file_path, index=False) 41 | 42 | 43 | -------------------------------------------------------------------------------- /model/expected_goal/resources/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/expected_goal/resources/.gitkeep -------------------------------------------------------------------------------- /model/expected_goal/src/build_training_data.sql: -------------------------------------------------------------------------------- 1 | WITH shots AS ( 2 | SELECT 3 | game_id, 4 | event_id, 5 | x_shot, 6 | y_shot, 7 | goal_distance, 8 | big_chance, 9 | is_goal 10 | FROM event_shots 11 | ) 12 | 13 | SELECT 14 | event_shots.game_id, 15 | event_id, 16 | team_id, 17 | player_id, 18 | minute, 19 | second, 20 | x_shot, 21 | y_shot, 22 | goal_distance, 23 | big_chance, 24 | previous_type_name, 25 | previous_x, 26 | previous_y, 27 | is_goal, 28 | metadata.* 29 | FROM ( 30 | SELECT 31 | events.*, 32 | x_shot, 33 | y_shot, 34 | goal_distance, 35 | big_chance, 36 | is_goal, 37 | LEAD(x) OVER(ORDER BY events.game_id, minute, second) AS previous_x, 38 | LEAD(y) OVER(ORDER BY events.game_id, minute, second) AS previous_y, 39 | LEAD(type_name) OVER(ORDER BY events.game_id, minute, second) AS previous_type_name 40 | FROM events 41 | LEFT JOIN shots 42 | ON events.game_id = shots.game_id AND events.event_id = shots.event_id 43 | ORDER BY events.game_id, minute, second 44 | ) AS event_shots 45 | LEFT JOIN metadata 46 | ON event_shots.game_id = metadata.game_id 47 | WHERE x_shot IS NOT NULL 48 | -------------------------------------------------------------------------------- /model/expected_goal/train.py: -------------------------------------------------------------------------------- 1 | """ 2 | train.py 3 | """ 4 | import argparse 5 | import numpy as np 6 | import pandas 7 | from sklearn.metrics import accuracy_score, f1_score, roc_auc_score 8 | from sklearn.model_selection import train_test_split 9 | import tensorflow as tf 10 | import mlflow 11 | import mlflow.keras 12 | 13 | def load_data(csv_file): 14 | """ 15 | Load data with corresponding feature 16 | """ 17 | data = pandas.read_csv(csv_file, sep=",") 18 | return data[["x_shot", "y_shot", "goal_distance", "big_chance", "is_goal"]] 19 | 20 | def split_dataset(data, split_rate, output_variable): 21 | """ 22 | Split dataset in 4 parts : x_train, y_train, x_test, y_test. 23 | :param data: pandas dataframe 24 | :param split_rate: often 0.3 25 | :param output_variable: variable to predict 26 | :return: x_train, y_train, x_test, y_test (pandas dataframes) 27 | """ 28 | train_set, test_set = train_test_split(data, test_size=split_rate, random_state=0) 29 | x_train = train_set.drop(output_variable, axis=1) 30 | y_train = train_set[output_variable] 31 | x_test = test_set.drop(output_variable, axis=1) 32 | y_test = test_set[output_variable] 33 | return x_train, y_train, x_test, y_test 34 | 35 | def neural_network_model(input_dim, nb_hidden_layers, layer_depth): 36 | """ 37 | Neural network architecture 38 | """ 39 | input_layer = [tf.keras.layers.Dense(layer_depth, input_dim=input_dim, activation='relu')] 40 | hidden_layers = [tf.keras.layers.Dense(layer_depth, activation='relu') for i in range(nb_hidden_layers)] 41 | output_layer = [tf.keras.layers.Dense(1, activation='sigmoid')] 42 | model = tf.keras.models.Sequential(input_layer + hidden_layers + output_layer) 43 | model.compile(loss='binary_crossentropy', optimizer='adam') 44 | return model 45 | 46 | def eval_metrics(y_test, y_pred): 47 | """ 48 | Compute metrics 49 | """ 50 | f1 = f1_score(y_test, y_pred) 51 | accuracy = accuracy_score(y_test, y_pred) 52 | roc = roc_auc_score(y_test, y_pred) 53 | return f1, accuracy, roc 54 | 55 | if __name__ == "__main__": 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument("--split_dataset_rate", default=0.3, type=float) 58 | parser.add_argument("--epochs", default=10, type=int) 59 | parser.add_argument("--nb_hidden_layers", default=3, type=int) 60 | parser.add_argument("--layer_depth", default=10, type=int) 61 | args = parser.parse_args() 62 | 63 | np.random.seed(8) 64 | tf.set_random_seed(8) 65 | data = load_data("model/expected_goal/resources/premier_league_shots.csv") 66 | x_train, y_train, x_test, y_test = split_dataset(data, args.split_dataset_rate, "is_goal") 67 | 68 | with mlflow.start_run(): 69 | model = neural_network_model(x_train.shape[1], args.nb_hidden_layers, args.layer_depth) 70 | model.fit(x_train, y_train, epochs=args.epochs) 71 | y_pred = model.predict_classes(x_test) 72 | 73 | (f1, accuracy, roc) = eval_metrics(y_test, y_pred) 74 | mlflow.log_param("split_dataset_rate", args.split_dataset_rate) 75 | mlflow.log_param("epochs", args.epochs) 76 | mlflow.log_param("nb_hidden_layers", args.nb_hidden_layers) 77 | mlflow.log_param("layer_depth", args.layer_depth) 78 | mlflow.log_metric("f1_score", f1) 79 | mlflow.log_metric("accuracy_score", accuracy) 80 | mlflow.log_metric("roc_auc_score", roc) 81 | 82 | mlflow.keras.log_model(model, "expected_goal_model") 83 | 84 | -------------------------------------------------------------------------------- /model/expected_goal/xg_model_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/expected_goal/xg_model_scheme.png -------------------------------------------------------------------------------- /model/pass2vec/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/pass2vec/__init__.py -------------------------------------------------------------------------------- /model/pass2vec/main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pass2Vec model application 3 | Encode sequence of passes 4 | 5 | Usage : 6 | python -m model.pass2vec.main 7 | 8 | Wildcard: 9 | -s input for saving file 10 | -d saving decoded images 11 | """ 12 | import argparse 13 | import numpy 14 | import pandas 15 | import cv2 16 | import logging 17 | from model.pass2vec.src.SequenceFactory import SequencesFactory 18 | from keras.models import load_model 19 | # from sklearn.manifold import TSNE 20 | from MulticoreTSNE import MulticoreTSNE as TSNE 21 | 22 | 23 | def load_models(mlrun_id): 24 | """Load encoder and decoder models 25 | 26 | Args: 27 | mlrun_id (string): mlflow run id 28 | 29 | Returns: 30 | Encoder and decoder models 31 | """ 32 | encoder_model = load_model(f"mlruns/0/{mlrun_id}/artifacts/encoder_model/model.h5") 33 | decoder_model = load_model(f"mlruns/0/{mlrun_id}/artifacts/decoder_model/model.h5") 34 | return encoder_model, decoder_model 35 | 36 | def vec_to_img(vector, id=0, width=68, height=105, img_folder="model/pass2vec/resources"): 37 | """Rebuild image from decoded embeded data 38 | Utils function to test if the model did well 39 | 40 | Args: 41 | vector: (numpy array) decoded flatten image 42 | id: (int) an id for file saving 43 | width: (int) width to rebuild 44 | height: (int) height to rebuild 45 | img_folder: (string) path to save image 46 | 47 | Returns: 48 | if needed the image as a numpy array. 49 | """ 50 | img_data = vector.reshape(width, height) 51 | norm_image = cv2.normalize(img_data, None, alpha = 0, beta = 255, norm_type = cv2.NORM_MINMAX, dtype = cv2.CV_32F) 52 | norm_image.astype(numpy.uint8) 53 | cv2.imwrite(f"{img_folder}/seq_decoded_{id}.png", norm_image) 54 | return norm_image 55 | 56 | def model_application(encoder_model, decoder_model, data, decoding=False): 57 | """Return Expected goal for every shot 58 | 59 | Args: 60 | models (dict): encoder and decoder models. 61 | data (pandas.DataFrame): passes data. 62 | decoding (boolean): (optional) decoding encoded images to see if models did well. 63 | """ 64 | sequences_factory = SequencesFactory() 65 | pass_list = sequences_factory.build_pass_list(data) 66 | sequences = sequences_factory.build_sequences(pass_list) 67 | sequences_informations = sequences_factory.get_metadata(sequences) 68 | processed_data = sequences_factory.build_data(sequences, save_img=False) 69 | 70 | processed_data = processed_data.astype('float32') / 255. 71 | encoded_img = encoder_model.predict(processed_data) 72 | 73 | if decoding: 74 | for id, img, sequence in zip(sequences_informations["id"], encoded_img, sequences): 75 | vec_to_img(decoder_model.predict(numpy.array([img])), id) 76 | sequence.to_vec(True) 77 | 78 | logging.info("TSNE computing...") 79 | tsne_result = TSNE(n_jobs=4, n_components=2, perplexity=30, n_iter=2000, random_state=8).fit_transform(encoded_img) 80 | 81 | header = [f"f_{i}" for i in range(0, encoded_img.shape[1])] 82 | encoded_pass = pandas.DataFrame(data=encoded_img, columns=header) 83 | tsne_pass = pandas.DataFrame(data=tsne_result, columns=["t1", "t2"]) 84 | return pandas.concat((sequences_informations, encoded_pass, tsne_pass), axis=1) 85 | 86 | if __name__ == "__main__": 87 | parser = argparse.ArgumentParser() 88 | parser.add_argument("data", help="csv file containing passes data") 89 | parser.add_argument("-d", "--decoding", help="saving decoded images.", action="store_true", default=False) 90 | parser.add_argument("-s", "--save_file_path", help="file path to save data", default="model/pass2vec/resources/encoded_data.csv") 91 | args = parser.parse_args() 92 | 93 | logging.getLogger().setLevel(logging.INFO) 94 | 95 | 96 | logging.info("Loading data") 97 | pass_data = pandas.read_csv(args.data).dropna(axis=0).head(10000) 98 | encoder_model, decoder_model = load_models("c344bdd35a7249b980fea83c5a0c5535") 99 | encoded_passes = model_application(encoder_model , decoder_model, pass_data, args.decoding) 100 | 101 | if args.save_file_path: 102 | encoded_passes.to_csv(args.save_file_path, index=False) 103 | 104 | 105 | -------------------------------------------------------------------------------- /model/pass2vec/resources/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/pass2vec/resources/.gitkeep -------------------------------------------------------------------------------- /model/pass2vec/src/Pass.py: -------------------------------------------------------------------------------- 1 | """Pass.py 2 | A simple class to declare a pass information. 3 | """ 4 | 5 | class Pass: 6 | 7 | def __init__(self, x_begin, y_begin, x_end, y_end, game_id=None, team_id=None, player_id=None, event_id=None): 8 | self.__x_begin = x_begin 9 | self.__y_begin = y_begin 10 | self.__x_end = x_end 11 | self.__y_end = y_end 12 | self.__game_id = game_id 13 | self.__team_id = str(team_id) 14 | self.__player_id = str(player_id) 15 | self.__event_id = str(event_id) 16 | 17 | @property 18 | def game_id(self): 19 | return self.__game_id 20 | 21 | @property 22 | def team_id(self): 23 | return self.__team_id 24 | 25 | @property 26 | def player_id(self): 27 | return self.__player_id 28 | 29 | @property 30 | def x_begin(self): 31 | return self.__x_begin 32 | 33 | @property 34 | def y_begin(self): 35 | return self.__y_begin 36 | 37 | @property 38 | def x_end(self): 39 | return self.__x_end 40 | 41 | @property 42 | def y_end(self): 43 | return self.__y_end 44 | 45 | @property 46 | def geo(self): 47 | return [self.x_begin, self.y_begin, self.x_end, self.y_end] 48 | 49 | @property 50 | def sequence_id(self): 51 | """A sequence_id as identifier to find pass from a same sequence. 52 | 53 | It's used from a list of pass to differentiate a sequence of pass from a 54 | team to another. 55 | 56 | Returns: 57 | A string corresponding to concatenation of game_id and team_id 58 | """ 59 | return str(self.game_id) + str(self.team_id) 60 | 61 | @property 62 | def event_id(self): 63 | return self.__event_id -------------------------------------------------------------------------------- /model/pass2vec/src/Sequence.py: -------------------------------------------------------------------------------- 1 | """Sequence.py 2 | A class to declare a pass sequence with processing. 3 | """ 4 | 5 | from model.pass2vec.src.Pass import Pass 6 | import numpy 7 | import cv2 8 | import random 9 | 10 | class Sequence: 11 | 12 | def __init__(self, pass_list): 13 | self.__pass_list = pass_list 14 | self.__id = "_".join([str(pass_list[0].game_id) + str(pass_list[0].team_id)] + [passe.event_id for passe in pass_list]) 15 | self.__game_id = str(pass_list[0].game_id) 16 | self.__team_id = str(pass_list[0].team_id) 17 | self.__player_list = "_".join([passe.player_id for passe in pass_list]) 18 | 19 | def __len__(self): 20 | return len(self.__pass_list) 21 | 22 | @property 23 | def id(self): 24 | return self.__id 25 | 26 | @property 27 | def game_id(self): 28 | return self.__game_id 29 | 30 | @property 31 | def team_id(self): 32 | return self.__team_id 33 | 34 | @property 35 | def player_list(self): 36 | return self.__player_list 37 | 38 | @property 39 | def pass_list(self): 40 | return self.__pass_list 41 | 42 | @property 43 | def player_list(self): 44 | return self.__player_list 45 | 46 | def change_referentiel(self): 47 | """Change sequence referentiel by setting x and y starting position (x_begin and y_begin) at 0.0. 48 | 49 | Returns: 50 | A new sequence instance with new referentiel. 51 | """ 52 | x_init = self.__pass_list[0].x_begin 53 | y_init = self.__pass_list[0].y_begin 54 | new_sequence = [] 55 | for passe in self.__pass_list: 56 | new_passe = Pass( 57 | passe.x_begin - x_init, 58 | passe.y_begin - y_init, 59 | passe.x_end - x_init, 60 | passe.y_end - y_init, 61 | self.__pass_list[0].game_id, 62 | self.__pass_list[0].team_id, 63 | passe.player_id, 64 | passe.event_id) 65 | new_sequence.append(new_passe) 66 | return Sequence(new_sequence) 67 | 68 | def to_vec(self, save=False, img_width=105, img_height=68, img_folder="model/pass2vec/resources"): 69 | """Convert a sequence of pass to flatten image vector. 70 | 71 | Args: 72 | save: (optional) A boolean to save images in img_folder. 73 | img_folder: (optional string) Folder path to save images. 74 | 75 | Returns: 76 | A flatten image as a numpy array object. 77 | """ 78 | img = numpy.zeros((img_height,img_width,1), numpy.uint8) 79 | # build shades of grey from 155 to 255 according to then number of pass in the sequence 80 | colors = [int(155 + i * (255-155)/(len(self.__pass_list)-1)) for i in range(0, len(self.__pass_list))] 81 | for i, passe in zip(range(len(self.__pass_list)), self.__pass_list): 82 | img = cv2.line(img, (int(passe.x_begin),int(passe.y_begin)), (int(passe.x_end),int(passe.y_end)), (colors[i],0,0), 1) 83 | if save: 84 | cv2.imwrite(f"{img_folder}/seq_{self.__id}.png", img) 85 | return img.flatten() 86 | -------------------------------------------------------------------------------- /model/pass2vec/src/SequenceFactory.py: -------------------------------------------------------------------------------- 1 | from model.pass2vec.src.Pass import Pass 2 | from model.pass2vec.src.Sequence import Sequence 3 | from itertools import islice, tee 4 | import pandas 5 | import numpy 6 | import cv2 7 | import logging 8 | import datetime 9 | 10 | 11 | class SequencesFactory: 12 | 13 | @staticmethod 14 | def to_bin(value, step): 15 | return numpy.floor(value / step) * step 16 | 17 | def build_pass_list(self, data): 18 | """Build a pass list from a dataframe (and process row data). 19 | 20 | Args: 21 | data: A pandas dataframe containing passes informations. 22 | 23 | Returns: 24 | A list of pass instances. 25 | """ 26 | logging.info("Build pass list") 27 | pass_list = [] 28 | for index, row in data.iterrows(): 29 | pass_data = Pass( 30 | self.to_bin(row["x_begin"], 2), 31 | self.to_bin(row["y_begin"], 2), 32 | self.to_bin(row["x_end"], 2), 33 | self.to_bin(row["y_end"], 2), 34 | row["game_id"], 35 | row["team_id"], 36 | row["player_id"], 37 | row["event_id"]) 38 | pass_list.append(pass_data) 39 | return pass_list 40 | 41 | @staticmethod 42 | def build_sequences(pass_list): 43 | """Build passses sequences from a list of pass. 44 | 45 | Args: 46 | pass_list: A list of pass instances. 47 | 48 | Returns: 49 | A list of sequence instances. 50 | """ 51 | logging.info("Build sequences") 52 | sequences = [] 53 | pass_sequence = [] 54 | for i in range(0, len(pass_list)-1): 55 | if pass_list[i].sequence_id == pass_list[i-1].sequence_id and pass_list[i].sequence_id != pass_list[i+1].sequence_id: 56 | pass_sequence.append(pass_list[i]) 57 | if pass_list[i].sequence_id == pass_list[i+1].sequence_id: 58 | pass_sequence.append(pass_list[i]) 59 | else: 60 | if len(pass_sequence) > 1: 61 | sequences.append(Sequence(pass_sequence)) 62 | pass_sequence = [] 63 | return sequences 64 | 65 | 66 | def build_data(self, sequences, save_img=False): 67 | logging.info("Build data") 68 | result = [] 69 | for sequence in sequences: 70 | if len(sequence.pass_list) > 1: 71 | row_image = sequence.to_vec(save_img) 72 | result.append(row_image) 73 | return numpy.array(result) 74 | 75 | @staticmethod 76 | def get_metadata(sequences): 77 | """Retrieve metadata from many sequences. 78 | Get id, team_id, game_id and player_list and length for each sequence 79 | 80 | Args: 81 | sequences: a list of sequence 82 | 83 | Returns: 84 | A pandas dataframe with all metadata 85 | """ 86 | logging.info("Get sequences metadata") 87 | data = [] 88 | for sequence in sequences: 89 | data.append([sequence.id, sequence.team_id, sequence.game_id, sequence.player_list, len(sequence)]) 90 | return pandas.DataFrame(data, columns=["id", "team_id", "game_id", "player_list", "sequence_length"]) 91 | 92 | 93 | # def build_data(self, sequences, starting_window, ending_window): 94 | # null_pass = Pass(0, 0, 0, 0) 95 | # processed_sequences = [] 96 | # header = list(numpy.array([[f"pass{i}_x_begin", f"pass{i}_y_begin", f"pass{i}_x_end", f"pass{i}_y_end"] for i in range(ending_window)]).flatten()) 97 | # header += ["game_id", "team_id", "sequence_length"] 98 | # for sequence in sequences: 99 | # if len(sequence) >= starting_window and len(sequence) <= ending_window: 100 | # padded_sequence = self.change_sequence_referentiel(sequence) + [null_pass for i in range(0, ending_window len(sequence))] 101 | # processed_sequences.append( 102 | # list(numpy.array([passe.geo for passe in padded_sequence]).flatten()) + 103 | # [padded_sequence[0].game_id] + 104 | # [padded_sequence[0].team_id] + 105 | # [len(sequence)]) 106 | # return pandas.DataFrame(processed_sequences, columns=header) 107 | 108 | 109 | 110 | # @staticmethod 111 | # def sequ_pairs(sequence): 112 | # elem1, elem2 = tee(sequence, 2) 113 | # return zip(elem1, islice(elem2, 1, None)) 114 | 115 | # def sequences_to_training_data(self, sequences): 116 | # training_data = [] 117 | # for sequence in sequences: 118 | # if sequence: 119 | # abstract_sequence = sequence 120 | # for elem1, elem2 in self.sequ_pairs(abstract_sequence): 121 | # training_data.append(elem1.geo + elem2.geo) 122 | # training_data.append(elem2.geo + elem1.geo) 123 | # header = [ 124 | # "passA_x_begin", "passA_y_begin", "passA_x_end", "passA_y_end", 125 | # "passB_x_begin", "passB_y_begin", "passB_x_end", "passB_y_end"] 126 | # return pandas.DataFrame(training_data, columns=header) 127 | 128 | 129 | -------------------------------------------------------------------------------- /model/pass2vec/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/pass2vec/src/__init__.py -------------------------------------------------------------------------------- /model/pass2vec/train.py: -------------------------------------------------------------------------------- 1 | """train.py 2 | Train and save model 3 | """ 4 | 5 | import argparse 6 | import pandas 7 | import numpy 8 | from model.pass2vec.src.SequenceFactory import SequencesFactory 9 | from keras.layers import Input, Dense 10 | from keras.models import Model 11 | import mlflow 12 | import mlflow.keras 13 | import cv2 14 | 15 | if __name__ == "__main__": 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("--split_dataset_rate", default=0.7, type=float) 18 | parser.add_argument("--encoding_dim", default=256, type=int) 19 | parser.add_argument("--epochs", default=10, type=int) 20 | parser.add_argument("--batch_size", default=256, type=int) 21 | args = parser.parse_args() 22 | 23 | data = pandas.read_csv("model/pass2vec/resources/raw_passes.csv").head(100000).dropna(axis=0) 24 | print("Data loaded.") 25 | sequences_factory = SequencesFactory() 26 | pass_list = sequences_factory.build_pass_list(data) 27 | sequences = sequences_factory.build_sequences(pass_list) 28 | result = sequences_factory.build_data(sequences, save_img=False) 29 | print("Sequences processed.") 30 | 31 | x_train = result[:int(args.split_dataset_rate * result.shape[0])] 32 | x_test = result[int(args.split_dataset_rate * result.shape[0]):] 33 | x_train = x_train.astype('float32') / 255. 34 | x_test = x_test.astype('float32') / 255. 35 | 36 | with mlflow.start_run(run_name="pass2vec_encoder"): 37 | input_img = Input(shape=(105*68,)) 38 | encoded = Dense(args.encoding_dim, activation='relu')(input_img) 39 | 40 | decoded = Dense(args.encoding_dim, activation='relu')(encoded) 41 | decoded = Dense(105*68, activation='sigmoid')(encoded) 42 | 43 | autoencoder = Model(input_img, decoded) 44 | 45 | encoder = Model(input_img, encoded) 46 | encoded_input = Input(shape=(args.encoding_dim,)) 47 | 48 | decoder_layer = autoencoder.layers[-1] 49 | decoder = Model(encoded_input, decoder_layer(encoded_input)) 50 | 51 | autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy', metrics=['accuracy']) 52 | 53 | history = autoencoder.fit(x_train, x_train, 54 | epochs=args.epochs, 55 | batch_size=args.batch_size, 56 | shuffle=True, 57 | validation_data=(x_test, x_test)) 58 | 59 | mlflow.log_param("split_dataset_rate", args.split_dataset_rate) 60 | mlflow.log_param("epochs", args.epochs) 61 | mlflow.log_param("encoding_dim", args.encoding_dim) 62 | mlflow.log_param("batch_size", args.batch_size) 63 | mlflow.log_metric("loss", history.history["loss"][-1]) 64 | mlflow.log_metric("val_loss", history.history["val_loss"][-1]) 65 | mlflow.keras.log_model(encoder, "encoder_model") 66 | mlflow.keras.log_model(decoder, "decoder_model") 67 | -------------------------------------------------------------------------------- /model/readme.md: -------------------------------------------------------------------------------- 1 | # Model 2 | 3 | ## Expected goal 4 | 5 | Expected goals (xG) is the new revolutionary football metric, which allows you to evaluate team and player performance. 6 | In a low-scoring game such as football, final match score does not provide a clear picture of performance. 7 | This is why more and more sports analytics turn to the advanced models like xG, which is a statistical measure of the quality of chances created and conceded. 8 | 9 | Goal here is to create a simple model for shot quality evaluation. It can be improve with more data and more modelling. 10 | 11 | ![scheme xg](expected_goal/xg_model_scheme.png) 12 | 13 | ## Expected assist 14 | 15 | Expected assist (xA) are the same as expected goals but for passes. Each passe is evaluate according to its likelihood of being an assist. 16 | 17 | ## Pass2Vec 18 | 19 | For a full explanaition of this model, you can read the [Medium blogpost](https://medium.com/@benoit.pimpaud/after-raw-stats-exploring-possession-styles-with-data-embeddings-d3ebef718abf) 20 | -------------------------------------------------------------------------------- /model/result_prediction/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/result_prediction/__init__.py -------------------------------------------------------------------------------- /model/result_prediction/build_training_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | from sklearn.model_selection import train_test_split 3 | import tensorflow as tf 4 | from model.result_prediction.src.TrainingDataBuilder import TrainingDataBuilder 5 | 6 | 7 | if __name__ == "__main__": 8 | with open("model/result_prediction/config/database.json") as database_configuration_file: 9 | database_config = json.load(database_configuration_file) 10 | 11 | model = tf.keras.models.load_model('mlruns/0/0585a82ef79e4f84ba23320fbf2a87cf/artifacts/expected_goal_model/model.h5') 12 | 13 | training_data_builder = TrainingDataBuilder(database_config, "model/result_prediction/src/sql_queries", model) 14 | 15 | data = training_data_builder.build_training_data() 16 | 17 | train_set, test_set = train_test_split(data, test_size=0.2, random_state=42) 18 | train_set.to_csv("model/result_prediction/data/train_set.csv", index=False) 19 | test_set.to_csv("model/result_prediction/data/test_set.csv", index=False) -------------------------------------------------------------------------------- /model/result_prediction/config/database.json: -------------------------------------------------------------------------------- 1 | { 2 | "database":"soccer", 3 | "host":"172.28.1.1", 4 | "user":"root", 5 | "password":"root" 6 | } -------------------------------------------------------------------------------- /model/result_prediction/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/result_prediction/main.py -------------------------------------------------------------------------------- /model/result_prediction/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/model/result_prediction/src/__init__.py -------------------------------------------------------------------------------- /model/result_prediction/src/sql_queries/get_conceded_goals.sql: -------------------------------------------------------------------------------- 1 | SELECT SUM(is_goal) AS goal, "startDate" FROM event_shots 2 | LEFT JOIN metadata 3 | ON event_shots.game_id = metadata.game_id 4 | WHERE event_shots.game_id IN ( 5 | SELECT game_id FROM metadata 6 | WHERE (home_team_id = '{team_id}' OR away_team_id = '{team_id}') 7 | AND CAST("startDate" AS DATE) < CAST('{date}' AS DATE) 8 | ORDER BY CAST("startDate" AS DATE) DESC 9 | LIMIT {past_offset} 10 | ) 11 | AND team_id!='{team_id}' 12 | GROUP BY event_shots.game_id, "startDate", team_id 13 | ORDER BY "startDate" -------------------------------------------------------------------------------- /model/result_prediction/src/sql_queries/get_conceded_shots.sql: -------------------------------------------------------------------------------- 1 | SELECT x_shot, y_shot, goal_distance, big_chance, event_shots.game_id, "startDate" FROM event_shots 2 | LEFT JOIN metadata 3 | ON metadata.game_id = event_shots.game_id 4 | WHERE event_shots.game_id IN ( 5 | SELECT game_id FROM metadata 6 | WHERE (home_team_id = '{team_id}' OR away_team_id = '{team_id}') 7 | AND CAST("startDate" AS DATE) < CAST('{date}' AS DATE) 8 | ORDER BY CAST("startDate" AS DATE) DESC 9 | LIMIT {past_offset} 10 | ) 11 | AND team_id!='{team_id}' 12 | ORDER BY "startDate" -------------------------------------------------------------------------------- /model/result_prediction/src/sql_queries/get_games.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | game_id, 3 | "startDate", 4 | home_team_id, 5 | away_team_id, 6 | CASE 7 | WHEN result='home' THEN 0 8 | WHEN result='draw' THEN 1 9 | WHEN result='away' THEN 2 10 | ELSE NULL 11 | END AS result 12 | FROM metadata -------------------------------------------------------------------------------- /model/result_prediction/src/sql_queries/get_scored_goals.sql: -------------------------------------------------------------------------------- 1 | SELECT SUM(is_goal) AS goal, "startDate" FROM event_shots 2 | LEFT JOIN metadata 3 | ON event_shots.game_id = metadata.game_id 4 | WHERE event_shots.game_id IN ( 5 | SELECT game_id FROM metadata 6 | WHERE (home_team_id = '{team_id}' OR away_team_id = '{team_id}') 7 | AND CAST("startDate" AS DATE) < CAST('{date}' AS DATE) 8 | ORDER BY CAST("startDate" AS DATE) DESC 9 | LIMIT {past_offset} 10 | ) 11 | AND team_id='{team_id}' 12 | GROUP BY event_shots.game_id, "startDate", team_id 13 | ORDER BY "startDate" -------------------------------------------------------------------------------- /model/result_prediction/src/sql_queries/get_shots.sql: -------------------------------------------------------------------------------- 1 | SELECT x_shot, y_shot, goal_distance, big_chance, event_shots.game_id, "startDate" FROM event_shots 2 | LEFT JOIN metadata 3 | ON metadata.game_id = event_shots.game_id 4 | WHERE event_shots.game_id IN ( 5 | SELECT game_id FROM metadata 6 | WHERE (home_team_id = '{team_id}' OR away_team_id = '{team_id}') 7 | AND CAST("startDate" AS DATE) < CAST('{date}' AS DATE) 8 | ORDER BY CAST("startDate" AS DATE) DESC 9 | LIMIT {past_offset} 10 | ) 11 | AND team_id='{team_id}' 12 | ORDER BY "startDate" -------------------------------------------------------------------------------- /model/result_prediction/train.py: -------------------------------------------------------------------------------- 1 | """train.py""" 2 | import numpy as np 3 | import pandas as pd 4 | import mlflow 5 | from sklearn.ensemble import RandomForestClassifier 6 | from sklearn.metrics import accuracy_score 7 | 8 | import autosklearn.classification 9 | 10 | np.random.seed(8) 11 | 12 | def process_features(data): 13 | """Process and select features 14 | 15 | Args: 16 | data (pandas.DataFrame): Dataframe 17 | Returns: 18 | pandas.DataFrame: Dataframe with corresponding processed features 19 | """ 20 | feature_columns = [feature for feature in list(data.columns) if "goals" in feature] 21 | return data[feature_columns] 22 | 23 | def process_target(data): 24 | """Process target 25 | 26 | Args: 27 | data (pandas.DataFrame): Dataframe 28 | Returns: 29 | pandas.Series: Series with corresponding targets 30 | """ 31 | return data["result"] 32 | 33 | def split_feature_target(data): 34 | """Separate features and targets 35 | 36 | Args: 37 | data (pandas.DataFrame): Dataframe 38 | Returns: 39 | pandas.DataFrame: DataFrame corresponding to features 40 | pandas.DataFrame: DataFrame corresponding to targets 41 | """ 42 | return process_features(data), process_target(data) 43 | 44 | if __name__ == "__main__": 45 | train_set_file = "model/result_prediction/data/train_set.csv" 46 | test_set_file = "model/result_prediction/data/test_set.csv" 47 | train_dataset = pd.read_csv(train_set_file) 48 | test_dataset = pd.read_csv(test_set_file) 49 | 50 | x_train, y_train = process_features(train_dataset), process_target(train_dataset) 51 | x_test, y_test = process_features(test_dataset), process_target(test_dataset) 52 | 53 | with mlflow.start_run(run_name="result_prediction"): 54 | # n_estimators = 1000 55 | model = autosklearn.classification.AutoSklearnClassifier(time_left_for_this_task=300) 56 | model.fit(x_train, y_train) 57 | y_pred = model.predict(x_test) 58 | accuracy = accuracy_score(y_test, y_pred) 59 | 60 | mlflow.log_param("features", list(x_train.columns)) 61 | # mlflow.log_param("n_estimators", n_estimators) 62 | mlflow.log_metric("accuracy", accuracy) -------------------------------------------------------------------------------- /notebooks/pass2vec/games_team_style_feature.sql: -------------------------------------------------------------------------------- 1 | WITH games AS ( 2 | SELECT 3 | "startDate", 4 | game_id, 5 | home_team_id, 6 | away_team_id, 7 | home_score, 8 | away_score, 9 | result 10 | FROM metadata 11 | ) 12 | 13 | SELECT 14 | games.*, 15 | team_style_cluster.cluster_0 AS "home_cluster_0", 16 | team_style_cluster.cluster_1 AS "home_cluster_1", 17 | team_style_cluster.cluster_2 AS "home_cluster_2", 18 | team_style_cluster.cluster_3 AS "home_cluster_3", 19 | team_style_cluster.cluster_4 AS "home_cluster_4", 20 | team_style_cluster.cluster_5 AS "home_cluster_5", 21 | team_style_cluster.cluster_6 AS "home_cluster_6", 22 | team_style_cluster.cluster_7 AS "home_cluster_7", 23 | team_style_cluster.cluster_8 AS "home_cluster_8", 24 | team_style_cluster.cluster_9 AS "home_cluster_9", 25 | team_style_cluster.cluster_10 AS "home_cluster_10", 26 | team_style_cluster.cluster_11 AS "home_cluster_11", 27 | team_style_cluster.cluster_12 AS "home_cluster_12", 28 | team_style_cluster.cluster_13 AS "home_cluster_13", 29 | team_style_cluster.cluster_14 AS "home_cluster_14", 30 | team_style_cluster.cluster_15 AS "home_cluster_15", 31 | team_style_cluster.cluster_16 AS "home_cluster_16", 32 | team_style_cluster.cluster_17 AS "home_cluster_17", 33 | team_style_cluster.cluster_18 AS "home_cluster_18", 34 | team_style_cluster.cluster_19 AS "home_cluster_19", 35 | team_style_cluster.cluster_20 AS "home_cluster_20", 36 | team_style_cluster.cluster_21 AS "home_cluster_21", 37 | team_style_cluster.cluster_22 AS "home_cluster_22", 38 | team_style_cluster.cluster_23 AS "home_cluster_23", 39 | team_style_cluster.cluster_24 AS "home_cluster_24", 40 | team_style_cluster.cluster_25 AS "home_cluster_25", 41 | team_style_cluster.cluster_26 AS "home_cluster_26", 42 | team_style_cluster.cluster_27 AS "home_cluster_27", 43 | team_style_cluster.cluster_28 AS "home_cluster_28", 44 | team_style_cluster.cluster_29 AS "home_cluster_29", 45 | team_style_cluster.cluster_30 AS "home_cluster_30", 46 | team_style_cluster.cluster_31 AS "home_cluster_31", 47 | team_style_cluster.cluster_32 AS "home_cluster_32", 48 | team_style_cluster.cluster_33 AS "home_cluster_33", 49 | team_style_cluster.cluster_34 AS "home_cluster_34", 50 | team_style_cluster.cluster_35 AS "home_cluster_35", 51 | team_style_cluster.cluster_36 AS "home_cluster_36", 52 | team_style_cluster.cluster_37 AS "home_cluster_37", 53 | team_style_cluster.cluster_38 AS "home_cluster_38", 54 | team_style_cluster.cluster_39 AS "home_cluster_39", 55 | team_style_cluster.cluster_40 AS "home_cluster_40", 56 | team_style_cluster.cluster_41 AS "home_cluster_41", 57 | team_style_cluster.cluster_42 AS "home_cluster_42", 58 | team_style_cluster.cluster_43 AS "home_cluster_43", 59 | team_style_cluster.cluster_44 AS "home_cluster_44", 60 | team_style_cluster.cluster_45 AS "home_cluster_45", 61 | team_style_cluster.cluster_46 AS "home_cluster_46", 62 | team_style_cluster.cluster_47 AS "home_cluster_47", 63 | team_style_cluster.cluster_48 AS "home_cluster_48", 64 | team_style_cluster.cluster_49 AS "home_cluster_49", 65 | away_team_style_cluster.cluster_0 AS "away_cluster_0", 66 | away_team_style_cluster.cluster_1 AS "away_cluster_1", 67 | away_team_style_cluster.cluster_2 AS "away_cluster_2", 68 | away_team_style_cluster.cluster_3 AS "away_cluster_3", 69 | away_team_style_cluster.cluster_4 AS "away_cluster_4", 70 | away_team_style_cluster.cluster_5 AS "away_cluster_5", 71 | away_team_style_cluster.cluster_6 AS "away_cluster_6", 72 | away_team_style_cluster.cluster_7 AS "away_cluster_7", 73 | away_team_style_cluster.cluster_8 AS "away_cluster_8", 74 | away_team_style_cluster.cluster_9 AS "away_cluster_9", 75 | away_team_style_cluster.cluster_10 AS "away_cluster_10", 76 | away_team_style_cluster.cluster_11 AS "away_cluster_11", 77 | away_team_style_cluster.cluster_12 AS "away_cluster_12", 78 | away_team_style_cluster.cluster_13 AS "away_cluster_13", 79 | away_team_style_cluster.cluster_14 AS "away_cluster_14", 80 | away_team_style_cluster.cluster_15 AS "away_cluster_15", 81 | away_team_style_cluster.cluster_16 AS "away_cluster_16", 82 | away_team_style_cluster.cluster_17 AS "away_cluster_17", 83 | away_team_style_cluster.cluster_18 AS "away_cluster_18", 84 | away_team_style_cluster.cluster_19 AS "away_cluster_19", 85 | away_team_style_cluster.cluster_20 AS "away_cluster_20", 86 | away_team_style_cluster.cluster_21 AS "away_cluster_21", 87 | away_team_style_cluster.cluster_22 AS "away_cluster_22", 88 | away_team_style_cluster.cluster_23 AS "away_cluster_23", 89 | away_team_style_cluster.cluster_24 AS "away_cluster_24", 90 | away_team_style_cluster.cluster_25 AS "away_cluster_25", 91 | away_team_style_cluster.cluster_26 AS "away_cluster_26", 92 | away_team_style_cluster.cluster_27 AS "away_cluster_27", 93 | away_team_style_cluster.cluster_28 AS "away_cluster_28", 94 | away_team_style_cluster.cluster_29 AS "away_cluster_29", 95 | away_team_style_cluster.cluster_30 AS "away_cluster_30", 96 | away_team_style_cluster.cluster_31 AS "away_cluster_31", 97 | away_team_style_cluster.cluster_32 AS "away_cluster_32", 98 | away_team_style_cluster.cluster_33 AS "away_cluster_33", 99 | away_team_style_cluster.cluster_34 AS "away_cluster_34", 100 | away_team_style_cluster.cluster_35 AS "away_cluster_35", 101 | away_team_style_cluster.cluster_36 AS "away_cluster_36", 102 | away_team_style_cluster.cluster_37 AS "away_cluster_37", 103 | away_team_style_cluster.cluster_38 AS "away_cluster_38", 104 | away_team_style_cluster.cluster_39 AS "away_cluster_39", 105 | away_team_style_cluster.cluster_40 AS "away_cluster_40", 106 | away_team_style_cluster.cluster_41 AS "away_cluster_41", 107 | away_team_style_cluster.cluster_42 AS "away_cluster_42", 108 | away_team_style_cluster.cluster_43 AS "away_cluster_43", 109 | away_team_style_cluster.cluster_44 AS "away_cluster_44", 110 | away_team_style_cluster.cluster_45 AS "away_cluster_45", 111 | away_team_style_cluster.cluster_46 AS "away_cluster_46", 112 | away_team_style_cluster.cluster_47 AS "away_cluster_47", 113 | away_team_style_cluster.cluster_48 AS "away_cluster_48", 114 | away_team_style_cluster.cluster_49 AS "away_cluster_49" 115 | FROM games 116 | LEFT JOIN team_style_cluster 117 | ON games.home_team_id = team_style_cluster.team_id 118 | LEFT JOIN (SELECT * FROM team_style_cluster) AS away_team_style_cluster 119 | ON games.away_team_id = away_team_style_cluster.team_id -------------------------------------------------------------------------------- /notebooks/pass2vec/img/autoencoder_scheme.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/autoencoder_scheme.ai -------------------------------------------------------------------------------- /notebooks/pass2vec/img/autoencoder_scheme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/autoencoder_scheme.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample.ai -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n14_25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n14_25.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n17_500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n17_500.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n287_500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n287_500.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n361_500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n361_500.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n3_25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n3_25.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n455_500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n455_500.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n4_25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n4_25.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/cluster_sample_n53_500.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/cluster_sample_n53_500.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/sample_possessions.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/sample_possessions.jpg -------------------------------------------------------------------------------- /notebooks/pass2vec/img/team_cluster_frequence_heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/team_cluster_frequence_heatmap.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/team_cluster_frequence_heatmap_small.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/team_cluster_frequence_heatmap_small.jpg -------------------------------------------------------------------------------- /notebooks/pass2vec/img/tmp.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/tmp.psd -------------------------------------------------------------------------------- /notebooks/pass2vec/img/tsne_kmeans_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/tsne_kmeans_plot.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/tsne_kmeans_plot_k25.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/tsne_kmeans_plot_k25.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/tsne_sequence_length_plot-01.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/tsne_sequence_length_plot-01.png -------------------------------------------------------------------------------- /notebooks/pass2vec/img/tsne_sequence_length_plot.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/tsne_sequence_length_plot.ai -------------------------------------------------------------------------------- /notebooks/pass2vec/img/tsne_sequence_length_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/notebooks/pass2vec/img/tsne_sequence_length_plot.png -------------------------------------------------------------------------------- /notebooks/pass2vec/notebook.rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Pass2Vec Experiments" 3 | author: "Benoit Pimpaud" 4 | date: "09/04/2019" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE, warning=FALSE, message=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE) 10 | knitr::opts_chunk$set(fig.width=20, fig.height=15, fig.align="center") 11 | library(tidyverse) 12 | library(hrbrthemes) 13 | library(gghighlight) 14 | library(knitr) 15 | library(pander) 16 | library(viridis) 17 | set.seed(42) 18 | ``` 19 | 20 | ## Data loading 21 | 22 | ```{r, echo=FALSE, warning=FALSE} 23 | data <- read_csv("notebooks/pass2vec/data/all_encoded_pass_with_tsne.csv", col_types=cols(team_id=col_character())) 24 | data <- data %>% mutate(img_file=paste0("/data/model/pass2vec/resources/seq_", id, ".png")) %>% 25 | mutate(img_exist=as.integer(file.exists(img_file))) 26 | 27 | ``` 28 | 29 | ## T-SNE plotting 30 | 31 | ```{r, echo=FALSE} 32 | tsne_sequence_length_plot <- ggplot(data) + 33 | geom_point(aes(x=t1, y=t2, color=sequence_length)) + 34 | scale_color_viridis(option="viridis") + 35 | theme_minimal() + 36 | xlab("component 1") + 37 | ylab("component 2") 38 | 39 | tsne_sequence_length_plot 40 | 41 | ggsave(filename="notebooks/pass2vec/img/tsne_sequence_length_plot.png", tsne_sequence_length_plot, width=18, height=10, dpi=300) 42 | ``` 43 | 44 | ## T-SNE with bucket (K-means clustering) 45 | 46 | ```{r, echo=FALSE} 47 | 48 | cluster_number <- 500 49 | kmeans_result <- kmeans(data %>% 50 | select(t1, t2) %>% 51 | na.omit(), 52 | cluster_number, 53 | iter.max = 50) 54 | 55 | data <- data %>% 56 | mutate(kmeans_cluster=kmeans_result[["cluster"]]) 57 | 58 | tsne_kmeans_plot <- ggplot(data) + 59 | geom_point(aes(x=t1, y=t2, color=factor(kmeans_cluster))) + 60 | scale_color_viridis_d() + 61 | theme_minimal() + 62 | xlab("component 1") + 63 | ylab("component 2") + 64 | theme(legend.position="none") 65 | 66 | tsne_kmeans_plot 67 | 68 | ggsave(filename="notebooks/pass2vec/img/tsne_kmeans_plot.png", tsne_kmeans_plot, width=18, height=10, dpi=300) 69 | ``` 70 | 71 | ## Teams dispatch 72 | 73 | ```{r, echo=FALSE, results='asis'} 74 | for(team_id in unique(data$team_id)){ 75 | cat(paste0(team_id, ": ![img](https://d2zywfiolv4f83.cloudfront.net/img/teams/",team_id,".png)")) 76 | } 77 | ``` 78 | 79 | ```{r, echo=FALSE, warning=FALSE, message=FALSE, fig.width=10, fig.height=50} 80 | team_cluster_frequence <- data %>% 81 | group_by(kmeans_cluster, team_id) %>% 82 | summarise(count = n()) %>% 83 | mutate(freq=count/sum(count)) %>% 84 | select(team_id, kmeans_cluster, freq) %>% 85 | filter(!(team_id %in% c(170, 16, 211, 23, 21, 166, 168, 24, 188, 161, 214))) %>% # removing relegated team 86 | spread(key=kmeans_cluster, value=freq) 87 | 88 | write.csv(team_cluster_frequence, file="notebooks/pass2vec/data/team_cluster_frequence.csv", row.names=FALSE) 89 | 90 | team_cluster_frequence_heatmap <- team_cluster_frequence %>% gather(col, Value, -team_id) %>% 91 | mutate(row = factor(team_id, levels = rev(unique(team_id))), Value = Value) %>% 92 | arrange(desc(Value)) %>% 93 | ggplot(aes(x=reorder(row, Value), y=col, fill = Value)) + 94 | geom_tile() + 95 | scale_fill_viridis(option="viridis") + 96 | theme_minimal() + 97 | labs(x="", y="") 98 | 99 | team_cluster_frequence_heatmap 100 | 101 | ggsave(filename="notebooks/pass2vec/img/team_cluster_frequence_heatmap.png", team_cluster_frequence_heatmap, width=8, height=35, dpi=300) 102 | ``` 103 | 104 | ## Clusters samples 105 | 106 | ```{r, echo=FALSE, results='asis'} 107 | clusters_img <- data %>% 108 | filter(img_exist==1) %>% 109 | group_by(kmeans_cluster) %>% 110 | top_n(n=5, wt = img_file) %>% 111 | select(kmeans_cluster, img_file) %>% 112 | mutate(img_file=pandoc.image.return(img_file)) 113 | 114 | clusters_freq <- data %>% 115 | group_by(kmeans_cluster) %>% 116 | summarise(n=n()) %>% 117 | mutate(freq=round(100*n/sum(n), 2)) %>% 118 | arrange(desc(freq)) 119 | 120 | cluster_summary <- merge(clusters_img, clusters_freq) %>% 121 | select(kmeans_cluster, freq, img_file) %>% 122 | mutate(cluster=paste0("Cluster ", kmeans_cluster)) %>% 123 | arrange(desc(freq)) 124 | 125 | kable(cluster_summary) 126 | ``` 127 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Football Models and Visualisations 2 | 3 | --- 4 | 5 | This repository is a personal project where I develop football models and visualisations. 6 | 7 | It's build on top of different technologies such as: 8 | * Python: for data processing and machine learning (Tensorflow, Scikit-Learn and MLFlow). 9 | * R: all data-visualisation stuff (dplyr, ggplot2, magick). 10 | * Postgres SQL: storing and querying data easily. 11 | * Docker: to orchestrate all these elements together and easy install/startup. 12 | * Google Cloud Plateform: to run heavy jobs in cloud. 13 | 14 | ## Samples & Results 15 | 16 | ![pass-network](https://pbs.twimg.com/media/Dw-y1twX0AESyYK.jpg) 17 | 18 | ![pass-sonar](https://pbs.twimg.com/media/DwBMF3wXQAIsQPB.jpg) 19 | 20 | ![assist-shot cluster map](visualisation/maps/assist_shot_cluster_map/img/arsenal1819_cluster10_assists_shots_cluster_map.png) 21 | 22 | ![rollmean](visualisation/rollmean/img/manU_xgvs_xgc.png) 23 | 24 | ![xg-map](visualisation/maps/xg_map/img/lacazette_xgmap.jpg) 25 | 26 | ![xa-map](visualisation/maps/xa_map/img/lukaku_xa_map_1718.jpg) 27 | 28 | ![image](visualisation/lineup/img/arsenal_lineup.png) 29 | 30 | ![image](visualisation/PCA/ozil_comparison.png) 31 | 32 | ![image](visualisation/dendogram/dribble_goals.png) 33 | 34 | 35 | ## Architecture 36 | 37 | The project contains five folders: 38 | 39 | * `./app` : maybe deprecated design, mostly for data integration and easy of use. 40 | * `./data`: where raw data are stored (in addition to the database). Not in git :wink:. 41 | * `./model`: machine learning models (expected goal for example). 42 | * `./src`: source file for crawlers, database connection/ingestion, SQL queries, etc... 43 | * `./visualisation`: source code for data-visualization, most recent works (on maps) are in `./visualisation/maps` subfolders. 44 | 45 | 46 | ## Usage 47 | 48 | ### Running model applications 49 | 50 | 1. Start corresponding container: `docker-compose up -d model`. 51 | 2. For ease of use, going into the container: `docker exec -it model bash` 52 | 3. All data are mapped to the local environment: `cd /data` 53 | * Expected goal model: `python -m model.expected_goal.main --help`. 54 | * Expected assist model: `python -m model.expected_assist.main --help`. 55 | * Possession2Vec model: `python -m model.pass2vec.main --help` 56 | 57 | ### Running Passmaps vizualisations 58 | 59 | 1. Start corresponding container: `docker-compose up -d passmap`. 60 | 2. Go to `http://localhost:8082/`. 61 | 62 | 63 | ## TODO 64 | 65 | * Improve models 66 | * Add more documentations. 67 | * Clean some viz stuff. 68 | * Better Docker management. 69 | 70 | 71 | ## Contacts 72 | Any questions/improves on [Twitter @Ben8t](https://twitter.com/Ben8t). 73 | -------------------------------------------------------------------------------- /src/__init_.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/src/__init_.py -------------------------------------------------------------------------------- /src/crawler/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.6 2 | 3 | RUN apt-get -qq update -y && \ 4 | apt-get install curl -y && \ 5 | apt-get install unzip -y && \ 6 | apt-get install -my wget gnupg 7 | 8 | # Install Chrome WebDriver 9 | RUN CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE` && \ 10 | mkdir -p /opt/chromedriver-$CHROMEDRIVER_VERSION && \ 11 | curl -sS -o /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/$CHROMEDRIVER_VERSION/chromedriver_linux64.zip && \ 12 | unzip -qq /tmp/chromedriver_linux64.zip -d /opt/chromedriver-$CHROMEDRIVER_VERSION && \ 13 | rm /tmp/chromedriver_linux64.zip && \ 14 | chmod +x /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver && \ 15 | ln -fs /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver /usr/local/bin/chromedriver 16 | 17 | # Install Google Chrome 18 | RUN curl -sS -o - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \ 19 | echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list && \ 20 | apt-get -yqq update && \ 21 | apt-get -yqq install google-chrome-stable && \ 22 | rm -rf /var/lib/apt/lists/* 23 | 24 | # Install python dependencies 25 | RUN pip install requests urllib3==1.23 beautifulsoup4 selenium lxml psycopg2 26 | 27 | CMD ["/bin/bash"] -------------------------------------------------------------------------------- /src/crawler/WhoScoredCrawler.py: -------------------------------------------------------------------------------- 1 | """ 2 | WhoScoredToDataBase.py 3 | Connector to transform WhoScored (Opta) data to relational database 4 | """ 5 | import json 6 | from lxml import html 7 | import os 8 | import re 9 | import requests 10 | from bs4 import BeautifulSoup 11 | from selenium import webdriver 12 | from selenium.webdriver.common.keys import Keys 13 | 14 | class WhoScoredCrawler(): 15 | 16 | def __init__(self): 17 | """ 18 | """ 19 | options = webdriver.ChromeOptions() 20 | options.add_argument('headless') 21 | options.add_argument('--no-sandbox') 22 | driver = webdriver.Chrome(chrome_options=options) 23 | driver.implicitly_wait(30) 24 | self.__driver = driver 25 | 26 | def crawl(self, url): 27 | """ 28 | Crawl data from url 29 | :param url: a WhoScored URL 30 | :return file_path: file path 31 | """ 32 | file_path = "data/raw/" + url.split("/")[-1].replace("\n","") + ".json" 33 | if os.path.isfile(file_path): 34 | print("Data already download: ", file_path) 35 | return 36 | print("Processing: ", url) 37 | self.__driver.get(url) 38 | response = self.__driver.page_source 39 | tree = html.fromstring(response) 40 | data = tree.xpath('//*[@id="layout-content-wrapper"]/script[1]/text()')[0].strip() 41 | processed_data = re.search("\\{.*\\}", data, re.IGNORECASE) 42 | if processed_data: 43 | json_data = processed_data.group() 44 | 45 | loaded_data = json.loads(json_data) 46 | 47 | with open(file_path, 'w') as outfile: 48 | json.dump(loaded_data, outfile) 49 | 50 | self.__download_images(loaded_data) 51 | return file_path 52 | 53 | 54 | def __download_images(self, json_data): 55 | """ 56 | Download team logo images (thanks to team id) 57 | :param json_data: loaded data to gather team id 58 | """ 59 | base_url = "https://d2zywfiolv4f83.cloudfront.net/img/teams/" 60 | team_id = [str(json_data["home"]["teamId"]), str(json_data["away"]["teamId"])] 61 | for id in team_id: 62 | response = requests.get(base_url + id + ".png").content 63 | with open("data/images/" + id + ".png", "wb") as file: 64 | file.write(response) 65 | 66 | def close(self): 67 | self.__driver.quit() -------------------------------------------------------------------------------- /src/crawler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/src/crawler/__init__.py -------------------------------------------------------------------------------- /src/database/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM postgres:10.3-alpine 2 | COPY init.sql /docker-entrypoint-initdb.d/ -------------------------------------------------------------------------------- /src/database/PassParser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pass parser 3 | Parse pass data 4 | """ 5 | import math 6 | import hashlib 7 | 8 | 9 | class PassParser(): 10 | 11 | def __init__(self): 12 | pass 13 | 14 | @staticmethod 15 | def distance(x1, y1, x2, y2): 16 | if x1 is None or y1 is None: 17 | return None 18 | return math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ) 19 | 20 | def get_pass(self, id, data): 21 | """ 22 | Parse pass data 23 | :return response: a list of dict containing pass informations 24 | """ 25 | response = [] 26 | i = 0 27 | for event in data["events"]: 28 | processed_data = {} 29 | if "Pass" in event["type"]["displayName"]: 30 | clean_dict = {} 31 | for qualifier in event["qualifiers"]: 32 | if "value" in qualifier: 33 | clean_dict[qualifier["type"]["displayName"]] = qualifier["value"] 34 | else: 35 | clean_dict[qualifier["type"]["displayName"]] = qualifier["type"]["value"] 36 | processed_data["game_id"] = id 37 | processed_data["event_id"] = str(event["id"]) 38 | processed_data["x_begin"] = 105*event["x"]/100 39 | processed_data["y_begin"] = 68*event["y"]/100 40 | processed_data["x_end"] = 105*float(clean_dict["PassEndX"])/100 if "PassEndX" in clean_dict else None 41 | processed_data["y_end"] = 68*float(clean_dict["PassEndY"])/100 if "PassEndY" in clean_dict else None 42 | processed_data["goal_distance"] = self.distance(processed_data["x_end"], processed_data["y_end"], 105, 34) 43 | processed_data["type_value"] = event["type"]["value"] 44 | processed_data["type_name"] = event["type"]["displayName"] 45 | processed_data["player_id"] = str(event["playerId"]) 46 | processed_data["team_id"] = str(event["teamId"]) 47 | is_assist = 0 48 | if "IntentionalGoalAssist" in clean_dict: 49 | is_assist = 1 50 | elif data["events"][i+1]["type"]["displayName"] == "Goal": 51 | is_assist = 1 52 | else: 53 | is_assist = 0 54 | processed_data["is_assist"] = is_assist 55 | processed_data["key_pass"] = 1 if "KeyPass" in clean_dict else 0 56 | processed_data["big_chance_created"] = 1 if "BigChanceCreated" in clean_dict else 0 57 | response.append(processed_data) 58 | i = i + 1 59 | return response -------------------------------------------------------------------------------- /src/database/ShotParser.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shot parser 3 | Parse shots data 4 | """ 5 | import math 6 | import hashlib 7 | 8 | 9 | class ShotParser(): 10 | 11 | def __init__(self): 12 | pass 13 | 14 | @staticmethod 15 | def distance(x1, y1, x2, y2): 16 | return math.sqrt( (x2 - x1)**2 + (y2 - y1)**2 ) 17 | 18 | def get_shots(self, id, data): 19 | """ 20 | Parse shots data 21 | :return response: a list of dict containing shots informations 22 | """ 23 | response = [] 24 | for event in data["events"]: 25 | processed_data = {} 26 | if "Shot" in event["type"]["displayName"] or "Goal" in event["type"]["displayName"]: 27 | processed_data["game_id"] = id 28 | processed_data["event_id"] = str(event["id"]) 29 | processed_data["x_shot"] = 105*event["x"]/100 30 | processed_data["y_shot"] = 68*event["y"]/100 31 | processed_data["goal_distance"] = self.distance(processed_data["x_shot"], processed_data["y_shot"], 105, 34) 32 | processed_data["type_value"] = event["type"]["value"] 33 | processed_data["type_name"] = event["type"]["displayName"] 34 | processed_data["player_id"] = str(event["playerId"]) 35 | processed_data["team_id"] = str(event["teamId"]) 36 | processed_data["is_goal"] = 1 if event["type"]["value"] == 16 and processed_data["goal_distance"] < 60 else 0 37 | processed_data["big_chance"] = 1 if 214 in [i["type"]["value"] for i in event["qualifiers"]] else 0 38 | response.append(processed_data) 39 | return response -------------------------------------------------------------------------------- /src/database/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/src/database/__init__.py -------------------------------------------------------------------------------- /src/database/init.sql: -------------------------------------------------------------------------------- 1 | -- Table: event_shots 2 | DROP TABLE IF EXISTS public.event_pass; 3 | CREATE TABLE public.event_pass( 4 | game_id varchar(80), 5 | event_id varchar(80), 6 | x_begin float, 7 | y_begin float, 8 | x_end float, 9 | y_end float, 10 | goal_distance float, 11 | type_value float, 12 | type_name varchar(80), 13 | player_id varchar(80), 14 | team_id varchar(80), 15 | is_assist float, 16 | key_pass float, 17 | big_chance_created float 18 | ); 19 | -- Table: event_shots 20 | DROP TABLE IF EXISTS public.event_shots; 21 | CREATE TABLE public.event_shots( 22 | game_id varchar(80), 23 | event_id varchar(80), 24 | x_shot float, 25 | y_shot float, 26 | goal_distance float, 27 | type_value float, 28 | type_name varchar(80), 29 | player_id varchar(80), 30 | team_id varchar(80), 31 | is_goal float, 32 | big_chance float 33 | ); 34 | -- Table: metadata 35 | DROP TABLE IF EXISTS public.events; 36 | CREATE TABLE public.events( 37 | game_id varchar(80), 38 | event_id varchar(80), 39 | minute float, 40 | second float, 41 | team_id varchar(80), 42 | player_id varchar(80), 43 | x float, 44 | y float, 45 | type_value float, 46 | type_name varchar(80), 47 | outcome_type_value float, 48 | outcome_type_name varchar(80), 49 | is_touch float 50 | ); 51 | -- Table: metadata 52 | DROP TABLE IF EXISTS public.metadata; 53 | CREATE TABLE public.metadata( 54 | "game_id" varchar(80), 55 | "weatherCode" varchar(80), 56 | "timeStamp" varchar(80), 57 | "score" varchar(80), 58 | "minuteExpanded" int, 59 | "periodCode" int, 60 | "startDate" varchar(80), 61 | "htScore" varchar(80), 62 | "elapsed" varchar(80), 63 | "venueName" varchar(80), 64 | "maxMinute" int, 65 | "expandedMaxMinute" int, 66 | "timeoutInSeconds" varchar(80), 67 | "attendance" int, 68 | "statusCode" int, 69 | "etScore" varchar(80), 70 | "startTime" varchar(80), 71 | "maxPeriod" int, 72 | "ftScore" varchar(80), 73 | "pkScore" varchar(80), 74 | "home_team_id" varchar(80), 75 | "home_team_name" varchar(80), 76 | "home_manager_name" varchar(80), 77 | "home_formation" varchar(80), 78 | "away_team_id" varchar(80), 79 | "away_team_name" varchar(80), 80 | "away_manager_name" varchar(80), 81 | "away_formation" varchar(80), 82 | "home_score" int, 83 | "away_score" int, 84 | "result" varchar(80) 85 | ); 86 | -- Table: metadata 87 | DROP TABLE IF EXISTS public.player_base; 88 | CREATE TABLE public.player_base( 89 | game_id varchar(80), 90 | field varchar(80), 91 | team_id varchar(80), 92 | player_id varchar(80), 93 | player_name varchar(80), 94 | player_position varchar(80), 95 | player_height float, 96 | player_weight float, 97 | player_age int 98 | ); -------------------------------------------------------------------------------- /src/sql_queries/assist-shots-clustering.sql: -------------------------------------------------------------------------------- 1 | WITH assists AS ( 2 | SELECT 3 | game_id, 4 | event_id, 5 | x_begin AS x_pass_begin, 6 | y_begin AS y_pass_begin, 7 | x_end AS x_pass_end, 8 | y_end AS y_pass_end 9 | FROM event_pass 10 | WHERE is_assist=1 11 | ), 12 | 13 | shots AS ( 14 | SELECT 15 | game_id, 16 | event_id, 17 | x_shot, 18 | y_shot 19 | FROM event_shots 20 | ), 21 | 22 | events AS ( 23 | SELECT 24 | game_id, 25 | event_id, 26 | LEAD(event_id,1) OVER (ORDER BY game_id, minute, second) AS next_event_id, 27 | minute, 28 | second, 29 | player_id, 30 | team_id 31 | FROM events 32 | ORDER BY game_id, minute, second 33 | ) 34 | 35 | SELECT * FROM ( 36 | SELECT 37 | events.game_id, 38 | events.event_id, 39 | events.next_event_id, 40 | events.minute, 41 | events.second, 42 | events.player_id, 43 | events.team_id, 44 | assists.x_pass_begin, 45 | assists.y_pass_begin, 46 | assists.x_pass_end, 47 | assists.y_pass_end, 48 | shots.x_shot, 49 | shots.y_shot 50 | FROM events 51 | INNER JOIN assists 52 | ON events.event_id = assists.event_id 53 | LEFT JOIN shots 54 | ON events.next_event_id = shots.event_id 55 | ) AS fully 56 | WHERE game_id IN ( 57 | SELECT game_id FROM metadata WHERE (home_team_id = '13' OR away_team_id = '13') AND "startDate" > '2017-07-01' AND "startDate" < '2018-07-01' 58 | ) 59 | AND team_id != '13' 60 | -------------------------------------------------------------------------------- /src/sql_queries/goal_distribution.sql: -------------------------------------------------------------------------------- 1 | WITH teams AS ( 2 | SELECT DISTINCT 3 | home_team_id AS team_id, 4 | home_team_name AS team_name 5 | FROM metadata 6 | ) 7 | 8 | SELECT DISTINCT 9 | event_shots.game_id, 10 | event_shots.event_id, 11 | events.minute, 12 | teams.team_name, 13 | metadata.home_team_name, 14 | metadata.away_team_name, 15 | CASE 16 | WHEN teams.team_name=metadata.home_team_name THEN home_team_name 17 | ELSE away_team_name 18 | END AS goal_for, 19 | CASE 20 | WHEN teams.team_name=metadata.away_team_name THEN home_team_name 21 | ELSE away_team_name 22 | END AS goal_against 23 | FROM event_shots 24 | LEFT JOIN events 25 | ON events.event_id = event_shots.event_id 26 | LEFT JOIN teams 27 | ON teams.team_id = event_shots.team_id 28 | LEFT JOIN metadata 29 | ON metadata.game_id = event_shots.game_id 30 | WHERE is_goal=1 AND 31 | metadata."startDate" > '2018-07-01' -------------------------------------------------------------------------------- /src/sql_queries/lineup_evolution.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | "startDate", 3 | player_name, 4 | CASE WHEN 5 | player_position LIKE '%Sub' THEN 'sub' 6 | ELSE 'tit' END AS position, 7 | DENSE_RANK() OVER(ORDER BY "startDate") AS matchday 8 | FROM player_base 9 | INNER JOIN (SELECT game_id, "startDate" FROM metadata WHERE "startDate" > '2018-07-01' AND "startDate" < '2019-07-01') AS met 10 | ON met.game_id = player_base.game_id 11 | WHERE team_id = '167' 12 | GROUP BY "startDate", player_name, player_position 13 | ORDER BY "startDate" -------------------------------------------------------------------------------- /src/sql_queries/passsonar.sql: -------------------------------------------------------------------------------- 1 | WITH full_pass AS ( 2 | SELECT * FROM event_pass 3 | WHERE game_id IN (SELECT game_id FROM metadata WHERE "startDate" > '2017-06-01' AND "startDate" < '2018-06-01') 4 | AND player_id = '83078' 5 | ) 6 | 7 | SELECT 8 | angle, 9 | COUNT(angle) AS freq, 10 | AVG(distance) AS distance 11 | FROM ( 12 | SELECT 13 | x_begin, 14 | y_begin, 15 | x_end, 16 | y_end, 17 | SQRT(POWER((y_end - y_begin),2) + POWER((x_end - x_begin),2)) AS distance, 18 | -15 * ROUND((ATAN2((y_end - y_begin), (x_end - x_begin)) * 180 / PI())/15) AS angle 19 | FROM full_pass 20 | ) AS compute 21 | WHERE angle IS NOT NULL 22 | GROUP BY angle 23 | ORDER BY angle -------------------------------------------------------------------------------- /src/sql_queries/player_pass.sql: -------------------------------------------------------------------------------- 1 | SELECT * FROM event_pass 2 | WHERE game_id IN (SELECT game_id FROM metadata WHERE "startDate" > '2017-06-01' AND "startDate" < '2018-06-01') 3 | AND player_id = '97752' 4 | -------------------------------------------------------------------------------- /src/sql_queries/team_shots_and_conceded.sql: -------------------------------------------------------------------------------- 1 | -- gather team shots and conceded ones (all games) 2 | SELECT * FROM event_shots 3 | LEFT JOIN ( 4 | SELECT game_id, "startDate" FROM metadata 5 | ) AS met 6 | ON met.game_id = event_shots.game_id 7 | WHERE event_shots.game_id IN ( 8 | SELECT game_id FROM metadata WHERE home_team_id = '161' OR away_team_id = '161' 9 | ) 10 | -------------------------------------------------------------------------------- /src/sql_queries/touch_5_minutes_bin.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | *, 3 | 10 * (TRUNC(time/600) + 1) AS bin 4 | FROM ( 5 | SELECT 6 | minute * 60 + second AS time, 7 | minute, 8 | second, 9 | x, 10 | y, 11 | team_id 12 | FROM events 13 | WHERE game_id = 'bc5f0c1e29733ac5bc94c6e84cdb641f' AND is_touch = 1 14 | ) AS base 15 | ORDER BY time -------------------------------------------------------------------------------- /src/sql_queries/win_draw_lose_count_for_one_team.sql: -------------------------------------------------------------------------------- 1 | SELECT COUNT(result) AS nb_result, result FROM ( 2 | SELECT CASE 3 | WHEN home_team_name = 'Arsenal' AND result='home' THEN 'win' 4 | WHEN (home_team_name = 'Arsenal' OR away_team_name = 'Arsenal') AND result='draw' THEN 'draw' 5 | WHEN away_team_name = 'Arsenal' AND result='away' THEN 'win' 6 | ELSE 'lose' 7 | END AS result 8 | FROM metadata 9 | WHERE (home_team_name = 'Arsenal' OR away_team_name = 'Arsenal') 10 | ) AS team_result 11 | GROUP BY result -------------------------------------------------------------------------------- /visualisation/Age/age.r: -------------------------------------------------------------------------------- 1 | # age.r 2 | 3 | library(rvest) 4 | library(plyr) 5 | library(stringr) 6 | library(ggplot2) 7 | library(ggthemes) 8 | library(hrbrthemes) 9 | library(scales) 10 | library(viridis) 11 | library(extrafont) 12 | library(grid) 13 | library(gridExtra) 14 | library(purrr) 15 | library(dplyr) 16 | 17 | data = read.csv("data/data.csv", dec=",") 18 | 19 | min_played = data %>% group_by(Team,Age.group) %>% summarise(mean_min=sum(Mins),score=first(Classement)) 20 | g1 = ggplot(data=min_played,aes(x=reorder(Team,-score),y=mean_min))+ 21 | geom_bar(aes(fill = Age.group),stat = "identity", position = position_stack(reverse = TRUE)) + 22 | coord_flip() + 23 | scale_fill_manual("Age range",values = c("#26a69a", "#ffd54f","#ffb74d"))+ 24 | labs(x="", y="Minutes played",title="Minutes played by age",subtitle="2016-2017 season",caption="data from WhoScored \nby Benoit Pimpaud / @Ben8t",color="")+ 25 | theme_ipsum_rc()+ 26 | theme(legend.position = "right") 27 | 28 | ggsave("img/Minutes played by age.png",g1,width=10,height=7.3) 29 | 30 | gp9 = data %>% group_by(Team,Age.group) %>% summarise(gp9_avg=mean(GoalsPer90),score=first(Classement)) 31 | g2 = ggplot(data=gp9,aes(x=reorder(Team,-score),y=gp9_avg))+ 32 | geom_bar(aes(fill = Age.group),stat = "identity", position = position_stack(reverse = TRUE)) + 33 | coord_flip() + 34 | scale_fill_manual("Age range",values = c("#26a69a", "#ffd54f","#ffb74d"))+ 35 | labs(x="", y="Goals per 90 minutes",title="Average number of goals per 90 minutes by age",subtitle="2016-2017 season",caption="data from WhoScored \nby Benoit Pimpaud / @Ben8t",color="")+ 36 | theme_ipsum_rc()+ 37 | theme(legend.position = "right") 38 | 39 | ggsave("img/Average number of goals per 90 minutes by age.png",g2,width=10,height=7.3) 40 | 41 | ap9 = data %>% filter(AssistsPer90<5) %>%group_by(Team,Age.group) %>% summarise(ap9_avg=mean(AssistsPer90),score=first(Classement)) 42 | g3 = ggplot(data=ap9,aes(x=reorder(Team,-score),y=ap9_avg))+ 43 | geom_bar(aes(fill = Age.group),stat = "identity", position = position_stack(reverse = TRUE)) + 44 | coord_flip() + 45 | scale_fill_manual("Age range",values = c("#26a69a", "#ffd54f","#ffb74d"))+ 46 | labs(x="", y="Assists per 90 minutes",title="Average number of assists per 90 minutes by age",subtitle="2016-2017 season",caption="data from WhoScored \nby Benoit Pimpaud / @Ben8t",color="")+ 47 | theme_ipsum_rc()+ 48 | theme(legend.position = "right") 49 | 50 | ggsave("img/Average number of assists per 90 minutes by age.png",g3,width=10,height=7.3) 51 | 52 | 53 | age = data %>% group_by(Team,Age.group) %>% summarise(nb=n(),score=first(Classement)) 54 | g4 = ggplot(data=age,aes(x=reorder(Team,-score),y=nb))+ 55 | geom_bar(aes(fill = Age.group),stat = "identity", position = position_stack(reverse = TRUE)) + 56 | coord_flip() + 57 | scale_fill_manual("Age range",values = c("#26a69a", "#ffd54f","#ffb74d"))+ 58 | labs(x="", y="Number of players",title="Age distribution in Premier League",subtitle="2016-2017 season",caption="data from WhoScored \nby Benoit Pimpaud / @Ben8t",color="")+ 59 | theme_ipsum_rc()+ 60 | theme(legend.position = "right") 61 | 62 | ggsave("img/Age distribution in Premier League.png",g4,width=10,height=7.3) 63 | 64 | 65 | 66 | -------------------------------------------------------------------------------- /visualisation/Age/img/Age distribution in Premier League.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Age/img/Age distribution in Premier League.png -------------------------------------------------------------------------------- /visualisation/Age/img/Average number of assists per 90 minutes by age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Age/img/Average number of assists per 90 minutes by age.png -------------------------------------------------------------------------------- /visualisation/Age/img/Average number of goals per 90 minutes by age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Age/img/Average number of goals per 90 minutes by age.png -------------------------------------------------------------------------------- /visualisation/Age/img/Minutes played by age.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Age/img/Minutes played by age.png -------------------------------------------------------------------------------- /visualisation/Notebook/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/.DS_Store -------------------------------------------------------------------------------- /visualisation/Notebook/script/average age of players loaned by league.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/average age of players loaned by league.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/average number of players loaned by league.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/average number of players loaned by league.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/number of players loaned by position.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/number of players loaned by position.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/number of players loaned by their club in Bundesliga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/number of players loaned by their club in Bundesliga.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/number of players loaned by their club in La Liga.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/number of players loaned by their club in La Liga.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/number of players loaned by their club in Ligue 1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/number of players loaned by their club in Ligue 1.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/number of players loaned by their club in Premier League.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/number of players loaned by their club in Premier League.png -------------------------------------------------------------------------------- /visualisation/Notebook/script/number of players loaned by their club in Serie A.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/Notebook/script/number of players loaned by their club in Serie A.png -------------------------------------------------------------------------------- /visualisation/PCA/ozil_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/PCA/ozil_comparison.png -------------------------------------------------------------------------------- /visualisation/PCA/ozil_pca.csv: -------------------------------------------------------------------------------- 1 | Player,Apps,Mins,TotalShots,TotalGoals,SuccessfulDribbles,Assists,TacklesWon,Interceptions,Fouled,Offsides,AccuratePasses 2 | Alexis Sánchez,7,595,"3,8","0,9","3,5","0,2","1,7","0,9","2,9","0,6","32,4" 3 | Kevin De Bruyne,11,930,"2,7","0,1",2,"0,5","1,4","1,1","1,8","0,1",40 4 | Eden Hazard,10,837,"2,4","0,5","2,6","0,1","0,8","1,1","2,8","0,3","42,7" 5 | Christian Eriksen,13,1163,"3,7","0,3",1,"0,4","1,1","0,4","0,5","0,1","47,3" 6 | Dele Alli,29,2522,"2,6","0,5","1,5","0,2","1,6","0,4","2,4","0,3","32,2" 7 | Mesut Özil,32,2831,"1,3","0,3","0,9","0,3","0,8","0,5",1,"0,6","57,6" 8 | Joshua King,16,1344,"2,5","0,6","2,7",0,"1,6","0,5","1,3","0,1","18,1" 9 | Dimitri Payet,7,613,"2,8",0,"2,1","0,1","0,1","0,4","2,2",0,"33,2" 10 | Juan Mata,6,440,"2,7","0,2","0,4","0,4","0,2","0,8","0,8",0,"48,2" 11 | Gylfi Sigurdsson,12,1048,"3,3","0,3","0,9","0,3","1,1","0,9","0,5",0,25 12 | Manuel Lanzini,15,1208,"2,5","0,4","2,4",0,"1,6","1,2",1,"0,1","45,3" 13 | Pedro,6,507,"2,5","0,2","2,1",0,"1,2","0,9","1,2","0,2","30,2" 14 | David Silva,11,953,"1,4",0,"0,8","0,3","1,5","0,5","1,6","0,1","56,3" 15 | Wayne Rooney,7,598,"2,6","0,3","0,6","0,3","1,1","0,6","0,5","0,5","36,5" 16 | Jason Puncheon,16,1396,"1,2",0,1,"0,1","1,9",1,"1,2",0,"32,4" 17 | Gastón Ramírez,7,513,"1,8","0,2","2,6","0,2",3,"0,9","1,8","0,2","26,8" 18 | Joe Allen,20,1707,"1,6","0,3","0,7","0,1","2,2","1,5",2,0,29 19 | Jack Wilshere,13,1105,"1,5",0,"2,6","0,1","0,6","0,4","1,9",0,"37,7" 20 | Dusan Tadic,19,1540,"2,8","0,1","1,5","0,2","0,6","0,8","1,3","0,1",32 21 | Nacer Chadli,6,450,"1,8","0,8","0,2","0,4",1,1,"0,6","0,2","21,8" 22 | Xherdan Shaqiri,6,476,"2,1","0,2","1,1","0,2","0,9","0,2","1,9","0,4","23,4" 23 | James Morrison,15,1245,"1,2","0,4","0,9","0,1","1,4","0,9","0,8","0,1","31,3" 24 | Ross Barkley,14,1175,"2,5","0,2","1,5","0,2","0,9","0,2","1,1","0,3","37,6" 25 | Yohan Cabaye,6,470,"3,6","0,2","0,8",0,"1,9","2,3","1,3",0,"40,8" 26 | André Ayew,7,542,"1,7","0,3","1,2","0,2","1,3","0,5","1,3","0,5","20,5" 27 | Robert Snodgrass,6,534,"1,7","0,3","0,7","0,2","0,7","0,2","1,5","0,3","32,5" 28 | Adnan Januzaj,9,725,"0,7",0,"2,5","0,2","0,5","0,1","2,4","0,2","14,3" 29 | Evandro,6,418,"0,4",0,"0,4",0,"2,8","0,6","3,4",0,"33,8" -------------------------------------------------------------------------------- /visualisation/PCA/pca.r: -------------------------------------------------------------------------------- 1 | # acp.r 2 | 3 | library(tidyverse) 4 | library(FactoMineR) 5 | library(hrbrthemes) 6 | 7 | ozil_pca <- read.csv("ozil_pca.csv", dec=",") %>% select(-Apps,-Mins) 8 | 9 | df.pca <- PCA(X=ozil_pca,quali.sup = 1) 10 | 11 | df.coord=df.pca$ind$coord %>% as.data.frame() %>% select(DefensiveImplication=Dim.1,AttackingIntention=Dim.2) 12 | df.coord$Player=ozil_pca$Player 13 | df.coord = df.coord %>% separate(Player, into=c("Name","Family")) 14 | df.coord$Family[2] = "De Bruyne" 15 | df.coord$Family[12] = "Pedro" 16 | df.coord$Family[28] = "Evandro" 17 | 18 | ggplot(data=df.coord,aes(x=DefensiveImplication,y=AttackingIntention)) + 19 | xlim(-4,5)+ 20 | ylim(-3,6)+ 21 | geom_point(aes(colour=Family)) + 22 | geom_text(aes(label=Family),hjust=0, vjust=0,colour="black")+ 23 | theme_ipsum_rc()+ 24 | theme(axis.title=element_blank(),axis.text=element_blank(),axis.ticks=element_blank())+ 25 | labs(x="DefensiveImplication", y="AttackingIntention",title="Mesut Ozil statistics comparison",subtitle="Premier League 2016/2017",caption='PCA analysis with mutliple variables such as shots, goals, assists, tackles won, etc.... all recorded as "per 90mins".\nby Benoit Pimpaud / @Ben8t',color="") 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /visualisation/average_time_fielded_player/img/plot20162017.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/average_time_fielded_player/img/plot20162017.png -------------------------------------------------------------------------------- /visualisation/average_time_fielded_player/img/plot20172018.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/average_time_fielded_player/img/plot20172018.png -------------------------------------------------------------------------------- /visualisation/average_time_fielded_player/img/plot20182019.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/average_time_fielded_player/img/plot20182019.png -------------------------------------------------------------------------------- /visualisation/average_time_fielded_player/main.r: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(viridis) 3 | library(hrbrthemes) 4 | library(gghighlight) 5 | 6 | 7 | data <- read_csv("data/average_time_fielded_player_20182019.csv") 8 | 9 | plot <- ggplot(data, aes(x=average_minute, y=position)) + 10 | geom_smooth(method="glm", formula="y~poly(x, 2)", fill="gray", color="#FC3C6C") + 11 | geom_point(aes(color=-position)) + 12 | geom_text(aes(label=home_team_name), hjust=-0.1, vjust=-0.1, size=2, color="white", family="roboto") + 13 | scale_color_viridis(option="viridis") + 14 | theme_ipsum_rc() + 15 | labs(x="Average minute of substitution", y="League Position",title="Minute of substitution",subtitle="Premier League 2018-2019",caption="by @Ben8t",color="") + 16 | theme(text=element_text(colour="white"), 17 | plot.background=element_rect(fill="black"), 18 | axis.text=element_text(colour="white"), 19 | legend.text=element_text(colour="white"), 20 | legend.position="None") 21 | 22 | ggsave("img/plot20182019.png", plot, width = 20, height = 12, units = "cm") 23 | -------------------------------------------------------------------------------- /visualisation/contextualized_expected_goal/main.r: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(ggplot2) 3 | library(dplyr) 4 | library(hrbrthemes) 5 | 6 | all_xg <- read_csv("data/all_xg.csv") 7 | 8 | test = all_xg %>% 9 | filter(game_id == "27f2d04e1caeb9b665c06cc534a73055") %>% 10 | select(event_id, game_id, team_id, player_id, minute, is_goal, expected_goal) %>% 11 | group_by(team_id) %>% 12 | mutate(cum_goal=cumsum(is_goal), cum_xg=cumsum(expected_goal)) %>% 13 | mutate(xg_plus=cum_goal - cum_xg) 14 | 15 | ggplot() + 16 | geom_step(data=test, aes(x=minute, y=xg_plus, color=factor(team_id))) + 17 | geom_hline(yintercept=0, color="black", linetype="dashed") + 18 | geom_text(aes(x=5, y=0.1, label="Upper you are good"), colour="black", angle=0, size=3)+ 19 | geom_hline(yintercept=-1, color="black", linetype="dashed") + 20 | geom_text(aes(x=5, y=-0.9, label="Lower you are bad"), colour="black", angle=0, size=3)+ 21 | labs(x="", y="Goal - xG",title="Contextualized Expected Goal",subtitle="",caption="by @Ben8t",color="") + 22 | theme_ipsum_rc() + 23 | theme( 24 | plot.title = element_text(size=35), 25 | text = element_text(colour="black"), 26 | axis.text.y = element_text(colour="black"), 27 | axis.text.x = element_text(colour="black"), 28 | plot.background = element_rect(fill = "white")) + 29 | theme(axis.text.x = element_text(angle = 0, hjust = 1)) 30 | -------------------------------------------------------------------------------- /visualisation/contextualized_expected_goal/notebook.rmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Contextualized Expected Goal" 3 | author: "Benoit Pimpaud" 4 | date: "07/09/2019" 5 | output: html_document 6 | --- 7 | 8 | ```{r setup, include=FALSE, warning=FALSE, message=FALSE} 9 | knitr::opts_chunk$set(echo = TRUE) 10 | knitr::opts_chunk$set(fig.width=20, fig.height=15, fig.align="center") 11 | library(tidyverse) 12 | library(hrbrthemes) 13 | library(gghighlight) 14 | library(knitr) 15 | library(pander) 16 | library(viridis) 17 | library(zoo) 18 | set.seed(42) 19 | ``` 20 | 21 | ## Load data 22 | 23 | ```{r, echo=TRUE, warning=FALSE, message=FALSE} 24 | data <- read_csv("data/all_xg.csv") 25 | ``` 26 | 27 | We filter on 2018-2019 data : 28 | 29 | ```{r, echo=TRUE, warning=FALSE, message=FALSE} 30 | filtered_data <- data %>% filter(startDate > "2018-07-01" & startDate < "2019-07-01") 31 | ``` 32 | 33 | ## Compute "Contextualized Expected Goal" 34 | 35 | For each game and team we compute contextualized expected goal : cumalative sum of goal minus cumulative sum of xG. 36 | ```{r, echo=TRUE, warning=FALSE, message=FALSE} 37 | ceg <- filtered_data %>% 38 | select(startDate, game_id, team_id, player_id, minute, is_goal, expected_goal) %>% 39 | group_by(game_id, team_id) %>% 40 | mutate(cum_goal=cumsum(is_goal), cum_xg=cumsum(expected_goal)) %>% 41 | mutate(xg_plus=cum_goal - cum_xg) 42 | ``` 43 | 44 | Here is an example of cXg timeline 45 | 46 | ```{r, echo=FALSE, warning=FALSE, message=FALSE} 47 | timeline_plot <- ggplot() + 48 | geom_step(data=ceg %>% filter(game_id=="3702d04ff2155895a83d3fd602c5f334"), aes(x=minute, y=xg_plus, color=factor(team_id))) + 49 | geom_hline(yintercept=0, color="black", linetype="dashed") + 50 | geom_text(aes(x=5, y=0.1, label="Upper you are good"), colour="black", angle=0, size=3)+ 51 | geom_hline(yintercept=-1, color="black", linetype="dashed") + 52 | geom_text(aes(x=5, y=-0.9, label="Lower you are bad"), colour="black", angle=0, size=3)+ 53 | labs(x="", y="Goal - xG",title="Contextualized Expected Goal",subtitle="",caption="by @Ben8t",color="") + 54 | theme_ipsum_rc() + 55 | theme( 56 | plot.title = element_text(size=35), 57 | text = element_text(colour="black"), 58 | axis.text.y = element_text(colour="black"), 59 | axis.text.x = element_text(colour="black"), 60 | plot.background = element_rect(fill = "white")) + 61 | theme(axis.text.x = element_text(angle = 0, hjust = 1)) 62 | 63 | timeline_plot 64 | ``` 65 | 66 | ## cXg distriubtion 67 | 68 | ```{r, echo=FALSE, warning=FALSE, message=FALSE} 69 | ceg_distribution <- ceg %>% group_by(team_id) %>% mutate(mean_xgp=mean(xg_plus)) 70 | ceg_distribution$team_id = reorder(factor(ceg_distribution$team_id), ceg_distribution$xg_plus, median) 71 | ggplot(ceg_distribution, aes(x=team_id , y=xg_plus)) + 72 | geom_violin(aes(fill=factor(team_id), group=factor(team_id))) + 73 | geom_boxplot(width=0.1) + 74 | theme_minimal() 75 | ``` 76 | 77 | ## Team timeline 78 | ```{r, echo=FALSE, warning=FALSE, message=FALSE} 79 | ceg_team_timeline <- ceg %>% 80 | group_by(game_id, startDate, team_id) %>% 81 | summarise(cxg=mean(xg_plus)) %>% 82 | arrange(., startDate) 83 | 84 | ggplot(ceg_team_timeline, aes(x=startDate, y=rollmeanr(cxg, 5, fill=NA))) + 85 | geom_line() + 86 | facet_grid(team_id ~ .) + 87 | theme_minimal() 88 | ``` 89 | 90 | ## Minute distribution 91 | 92 | ```{r, echo=FALSE, warning=FALSE, message=FALSE} 93 | ceg_minute_distribution <- ceg %>% 94 | group_by(minute, team_id) %>% 95 | summarise(mean_cxg=mean(xg_plus)) 96 | 97 | ggplot(ceg_minute_distribution, aes(x=minute, color=factor(team_id))) + 98 | geom_density() 99 | 100 | ``` -------------------------------------------------------------------------------- /visualisation/cumsum_expected_goal/main.r: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(zoo) 3 | library(ggplot2) 4 | library(dplyr) 5 | library(ggthemes) 6 | library(hrbrthemes) 7 | 8 | 9 | data <- read_csv("data/all_xg.csv") 10 | 11 | game_id <- "000be874ddf1f5436cb38cfd49fc03c3" 12 | 13 | game_data <- data %>% 14 | filter(game_id=="000be874ddf1f5436cb38cfd49fc03c3") %>% 15 | select(team_id, minute, expected_goal, is_goal) 16 | 17 | plot_data <- game_data %>% 18 | group_by(team_id) %>% 19 | arrange(minute) %>% 20 | mutate(cumsum_xg=cumsum(expected_goal), cumsum_goal=cumsum(is_goal)) 21 | 22 | ggplot() + 23 | geom_step(data=plot_data, aes(x=minute, y=cumsum_xg, color=factor(team_id))) + 24 | geom_step(data=plot_data, aes(x=minute, y=cumsum_goal, color=factor(team_id)), linetype=2) + 25 | theme_ipsum_rc() + 26 | theme( 27 | plot.title = element_text(size=35), 28 | text = element_text(colour="white"), 29 | axis.text.y = element_text(colour="white"), 30 | axis.text.x = element_text(colour="white"), 31 | panel.grid.major = element_blank(), 32 | panel.grid.minor = element_blank(), 33 | plot.background = element_rect(fill = "black")) + 34 | theme(axis.text.x = element_text(angle = 0, hjust = 1)) -------------------------------------------------------------------------------- /visualisation/dendogram/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/dendogram/.DS_Store -------------------------------------------------------------------------------- /visualisation/dendogram/all_dendo_sanchez.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/dendogram/all_dendo_sanchez.png -------------------------------------------------------------------------------- /visualisation/dendogram/dendogram.r: -------------------------------------------------------------------------------- 1 | # dendogram.r 2 | # http://www.sthda.com/english/articles/28-hierarchical-clustering-essentials/92-visualizing-dendrograms-ultimate-guide/?utm_content=buffer4e393&utm_medium=social&utm_source=twitter.com&utm_campaign=buffer 3 | 4 | library(tidyverse) 5 | library(tibble) 6 | library(factoextra) 7 | library(dendextend) 8 | library(hrbrthemes) 9 | 10 | # Data processing 11 | data <- read.csv2("data.csv") 12 | data <- data %>% filter(Minutes.played>=3000, Goals.per.90>=0.3) %>% column_to_rownames('Player') %>% select(-Position) 13 | 14 | 15 | dd <- dist(scale(data), method = "euclidean") 16 | hc <- hclust(dd, method = "ward.D2") 17 | 18 | dend <- fviz_dend(hc, k = 8, # Cut in four groups 19 | cex = 0.6, # label size 20 | # k_colors = c("#f06292", "#40c4ff", "#ffd740", "#1de9b6"), 21 | color_labels_by_k = TRUE, # color labels by groups 22 | # rect = TRUE, # Add rectangle around groups 23 | # rect_border = c("#f06292", "#40c4ff", "#ffd740", "#1de9b6"), 24 | # rect_fill = TRUE, 25 | horiz = TRUE) 26 | 27 | dend + theme_ipsum_rc() + labs(x="", y="Distance",title="Cluster Dendogram",subtitle="Hierarchical clustering with all statistics",caption='by Benoit Pimpaud / @Ben8t',color="") + theme(axis.ticks=element_blank(),panel.grid.major = element_blank(), panel.grid.minor = element_blank(),panel.background = element_blank()) 28 | -------------------------------------------------------------------------------- /visualisation/dendogram/dribble_goals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/dendogram/dribble_goals.png -------------------------------------------------------------------------------- /visualisation/games_evolution/fabian_delph_games_evo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/games_evolution/fabian_delph_games_evo.png -------------------------------------------------------------------------------- /visualisation/games_evolution/start_evo_fabian_delph.r: -------------------------------------------------------------------------------- 1 | # start_evo.r 2 | library(tidyverse) 3 | library(hrbrthemes) 4 | library(reshape2) 5 | 6 | 7 | # data 8 | delph = data.frame(season=c("Aston Villa 09/10","Aston Villa 10/11","Aston Villa 11/12","Aston Villa 12/13","Aston Villa 13/14","Aston Villa 14/15","Manchester City 15/16","Manchester City 16/17"),start=c(8,7,11,24,34,28,17,7)) 9 | 10 | # reshape for ggplot barchart 11 | delph.dodge = melt(delph) 12 | 13 | # plot 14 | ggplot(data=delph.dodge,aes(x=season)) + geom_bar(aes(y=value,fill=variable),stat="identity",position=position_dodge()) + scale_fill_manual(values=c("#1de9b6")) + scale_x_discrete(limits=c("Aston Villa 09/10","Aston Villa 10/11","Aston Villa 11/12","Aston Villa 12/13","Aston Villa 13/14","Aston Villa 14/15","Manchester City 15/16","Manchester City 16/17")) +labs(x="", y="Games",title="Fabian Delph games evolution",subtitle="",caption="by Benoit Pimpaud / @Ben8t",color="") + theme_ipsum_rc() + theme(legend.position='none') + theme(axis.text.x = element_text(angle = 45, hjust = 1)) 15 | -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_against_bottom_league.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_against_bottom_league.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_against_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_against_full.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_against_top2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_against_top2.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_against_topteam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_against_topteam.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_for_bottom_league.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_for_bottom_league.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_for_full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_for_full.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_for_top2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_for_top2.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/img/plot_goal_for_topteam.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goal_distribution/img/plot_goal_for_topteam.png -------------------------------------------------------------------------------- /visualisation/goal_distribution/main.r: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(viridis) 3 | library(hrbrthemes) 4 | library(gghighlight) 5 | 6 | data <- read_csv("data/data_min_team.csv") 7 | adjust <- 1 8 | teams_filter <- unique(data$goal_for) 9 | # teams_filter <- c("Liverpool", "Man City", "Arsenal", "Man Utd") 10 | 11 | # Goal for 12 | data_goal_for <- data %>% filter(goal_for %in% teams_filter) 13 | plot_goal_for <- ggplot(data_goal_for, aes(x=minute, color=goal_for)) + 14 | stat_density(geom="line", position="identity", adjust=adjust) + 15 | scale_color_viridis_d(option="viridis") + 16 | # gghighlight(goal_for=="Man City" || goal_for=="Liverpool") + 17 | # gghighlight(goal_for=="Huddersfield" || goal_for=="Fulham" || goal_for=="Cardiff") + 18 | # gghighlight(goal_for=="Arsenal" || goal_for=="Tottenham" || goal_for=="Man City" || goal_for=="Man Utd" || goal_for=="Liverpool" || goal_for=="Chelsea") + 19 | theme_ipsum_rc() + 20 | labs(x="Minutes", y="Density",title="Goals distribution",subtitle="Premier League 2018-2019",caption="by @Ben8t",color="") + 21 | theme(text=element_text(colour="white"), 22 | plot.background=element_rect(fill="black"), 23 | axis.text=element_text(colour="white"), 24 | legend.text=element_text(colour="white")) 25 | 26 | ggsave("img/plot_goal_for.svg", plot_goal_for, width = 20, height = 15, units = "cm") 27 | 28 | # Goal against 29 | data_goal_against <- data %>% filter(goal_against %in% teams_filter) 30 | plot_goal_against <- ggplot(data_goal_against, aes(x=minute, color=goal_against)) + 31 | stat_density(geom="line", position="identity", adjust=adjust) + 32 | scale_color_viridis_d(option="viridis") + 33 | # gghighlight(goal_against=="Man City" || goal_against=="Liverpool") + 34 | # gghighlight(goal_against=="Huddersfield" || goal_against=="Fulham" || goal_against=="Cardiff") + 35 | # gghighlight(goal_against=="Arsenal" || goal_against=="Tottenham" || goal_against=="Man City" || goal_against=="Man Utd" || goal_against=="Liverpool" || goal_against=="Chelsea") + 36 | theme_ipsum_rc() + 37 | labs(x="Minutes", y="Density",title="Conceded goals distribution",subtitle="Premier League 2018-2019",caption="by @Ben8t",color="") + 38 | theme(text=element_text(colour="white"), 39 | plot.background=element_rect(fill="black"), 40 | axis.text=element_text(colour="white"), 41 | legend.text=element_text(colour="white")) 42 | 43 | ggsave("img/plot_goal_against.svg", plot_goal_against, width = 20, height = 15, units = "cm") -------------------------------------------------------------------------------- /visualisation/goal_vs_xg/main.r: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(zoo) 3 | library(ggplot2) 4 | library(dplyr) 5 | library(ggthemes) 6 | library(hrbrthemes) 7 | 8 | 9 | raw_data <- read_csv("data/all_xg.csv") 10 | id = 32 11 | subtitle = "Manchester United - 2015 to 2019 Premier League season" 12 | 13 | data <- raw_data %>% 14 | filter(team_id==id) %>% 15 | group_by(startDate) %>% 16 | summarise(goal=sum(is_goal, na.rm=TRUE), xg=sum(expected_goal, na.rm=TRUE)) %>% 17 | mutate(cummean_goal=cummean(goal), cummean_xg=cummean(xg)) 18 | 19 | 20 | grouped_data <- data %>% 21 | mutate(date=format(as.Date(startDate), format="%Y-%m-%d")) 22 | grouped_data$id <- seq(1, nrow(grouped_data)) 23 | 24 | ggplot() + 25 | geom_line(data=grouped_data, aes(x=id, y=rollmean(xg, 5, na.pad=TRUE)), color="#56FFAE") + 26 | geom_point(data=grouped_data, aes(x=id, y=rollmean(xg, 5, na.pad=TRUE)), color="#56FFAE") + 27 | geom_line(data=grouped_data, aes(x=id, y=rollmean(goal, 5, na.pad=TRUE)), color="#FCA337") + 28 | geom_point(data=grouped_data, aes(x=id, y=rollmean(goal, 5, na.pad=TRUE)), color="#FCA337") + 29 | geom_vline(xintercept=39, color="white", linetype="dashed") + 30 | geom_text(aes(x=39, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 31 | geom_vline(xintercept=77, color="white", linetype="dashed") + 32 | geom_text(aes(x=77, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 33 | geom_vline(xintercept=115, color="white", linetype="dashed") + 34 | geom_text(aes(x=115, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 35 | geom_vline(xintercept=153, color="white", linetype="dashed") + 36 | geom_text(aes(x=153, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 37 | scale_x_continuous(breaks=seq(1,nrow(grouped_data), 4), labels=grouped_data$date[seq(1,nrow(grouped_data), 4)]) + 38 | labs(x="", y="Average xG and goals",title="xG and goals average",subtitle=subtitle,caption="by @Ben8t",color="") + 39 | theme_ipsum_rc() + 40 | theme( 41 | plot.title = element_text(size=35), 42 | text = element_text(colour="white"), 43 | axis.text.y = element_text(colour="white"), 44 | axis.text.x = element_text(colour="white"), 45 | panel.grid.major = element_blank(), 46 | panel.grid.minor = element_blank(), 47 | plot.background = element_rect(fill = "#2162AA")) + 48 | theme(axis.text.x = element_text(angle = 45, hjust = 1)) 49 | -------------------------------------------------------------------------------- /visualisation/goals_assists_evolution/gace_ozil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/goals_assists_evolution/gace_ozil.png -------------------------------------------------------------------------------- /visualisation/goals_assists_evolution/gace_ozil.r: -------------------------------------------------------------------------------- 1 | # gace_ozil.r 2 | library(tidyverse) 3 | library(hrbrthemes) 4 | library(reshape2) 5 | 6 | # exemple url : https://www.transfermarkt.com/mesut-ozil/leistungsdatenverein/spieler/35664 7 | 8 | # data 9 | ozil = data.frame(club=c("Schalke 04","Werder Bremen","Real Madrid","Arsenal"),Goals=c(1*90/2268,16*90/8279,27*90/11377,32*90/13810),Assists=c(5*90/2268,54*90/8279,81*90/11377,57*90/13810)) 10 | 11 | # reshape for ggplot barchart 12 | ozil.dodge = melt(ozil) 13 | 14 | # plot 15 | ggplot(data=ozil.dodge,aes(x=club)) + geom_bar(aes(y=value,fill=variable),stat="identity",position=position_dodge()) + scale_fill_manual(values=c("#1de9b6", "#40c4ff")) + scale_x_discrete(limits=c("Schalke 04","Werder Bremen","Real Madrid","Arsenal")) +labs(x="", y="per 90 minutes",title="Mesut Ozil goals and assists evolution",subtitle="",caption="by Benoit Pimpaud / @Ben8t",color="") + theme_ipsum_rc() 16 | -------------------------------------------------------------------------------- /visualisation/lineup/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/.DS_Store -------------------------------------------------------------------------------- /visualisation/lineup/data/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/data/.DS_Store -------------------------------------------------------------------------------- /visualisation/lineup/img/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/.DS_Store -------------------------------------------------------------------------------- /visualisation/lineup/img/arsenal_lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/arsenal_lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/img/arsenal_lineup35.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/arsenal_lineup35.png -------------------------------------------------------------------------------- /visualisation/lineup/img/arsenal_lineup_34season.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/arsenal_lineup_34season.png -------------------------------------------------------------------------------- /visualisation/lineup/img/chelsea lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/chelsea lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/img/liverpool lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/liverpool lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoArsenal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoArsenal.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoChelsea.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoChelsea.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoLiverpool.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoLiverpool.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoManCity.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoManCity.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoManUtd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoManUtd.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoNice.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoNice.png -------------------------------------------------------------------------------- /visualisation/lineup/img/logoTottenham.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/logoTottenham.png -------------------------------------------------------------------------------- /visualisation/lineup/img/manCity lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/manCity lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/img/manUtd lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/manUtd lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/img/nice lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/nice lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/img/tottenham lineup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/lineup/img/tottenham lineup.png -------------------------------------------------------------------------------- /visualisation/lineup/process.r: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(ggplot2) 3 | library(hrbrthemes) 4 | library(showtext) 5 | 6 | data <- read_csv("data/leicester_20182019.csv") 7 | image_path <- "img/leicester_20182019.svg" 8 | 9 | data_order <- data %>% 10 | group_by(player_name, position) %>% 11 | summarise(n=n()) %>% 12 | filter(position=="tit") %>% 13 | select(player_name, n) 14 | 15 | graphic_data <- merge(data, data_order) %>% 16 | arrange(desc(n)) 17 | 18 | graphic <- ggplot() + 19 | geom_point(data=graphic_data, aes(x=matchday, y=reorder(player_name,desc(n)), color=position), size=2) + 20 | scale_colour_manual(values=c("#ff8a80","#69f0ae")) + 21 | scale_x_continuous(breaks=c(1,5,10,15,20,25,30,35,38)) + 22 | theme_minimal() + 23 | theme(legend.background=element_blank()) + 24 | theme(legend.key=element_blank()) + 25 | theme(panel.grid=element_line(color="#cccccc", size=0.2)) + 26 | theme(panel.grid.major=element_line(color="#cccccc", size=0.2)) + 27 | theme(panel.grid.minor=element_line(color="#cccccc", size=0.15)) + 28 | theme(plot.title= element_text(family="ObjectSans", face="bold", size=15)) + 29 | theme(legend.text= element_text(family="ObjectSans", size=12)) + 30 | theme(text=element_text(colour="white"), 31 | plot.background=element_rect(fill="black"), 32 | axis.text=element_text(colour="white"), 33 | legend.text=element_text(colour="white")) + 34 | labs(title="Leicester Squad Formation", 35 | subtitle="Premier League 2018-2019 season", 36 | x="Matchday", 37 | y="") 38 | 39 | ggsave(image_path, graphic, width = 20, height = 12, units = "cm") -------------------------------------------------------------------------------- /visualisation/maps/.gitignore: -------------------------------------------------------------------------------- 1 | .test_png-SDXd 2 | .Rhistory -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/all_premierleague_20190223.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/all_premierleague_20190223.jpg -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/arsenal1718_cluster10_assists_shots_cluster_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/arsenal1718_cluster10_assists_shots_cluster_map.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/arsenal1718_cluster6_assists_shots_cluster_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/arsenal1718_cluster6_assists_shots_cluster_map.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/arsenal1819_cluster10_assists_shots_cluster_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/arsenal1819_cluster10_assists_shots_cluster_map.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/arsenal1819_cluster6_assists_shots_cluster_map.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/arsenal1819_cluster6_assists_shots_cluster_map.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/g_assist_shot_cluster_tmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/g_assist_shot_cluster_tmp.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_4.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_5.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_6.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/img/tmp_assists_shots_cluster_map_7.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/main.r: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(rvest) 3 | library(hrbrthemes) 4 | library(magick) 5 | library(viridis) 6 | library(ggforce) 7 | library(gtable) 8 | library(gridExtra) 9 | library(grid) 10 | library(readr) 11 | 12 | source(file="src/assist_shot_cluster_map.r") 13 | 14 | # Launcher 15 | data_file = "data/arsenal_assists_shots_against_20172018.csv" 16 | text = "Arsenal assists-shots conceded in 2017-2018 Premier League season." 17 | final_filename = "img/arsenal1718_assists_shots_cluster_map.png" 18 | 19 | # Colors 20 | background_color = "#2162AA" 21 | foreground_color = "#F7F6F4" 22 | line_color = c("#00A6FF", "#00C3FF", "#00DAE5", "#00EBB8", "#9DF68A", "#F9F871") 23 | text_color = "white" 24 | cluster_number = 6 25 | 26 | # Load data 27 | data <- read_csv(data_file, 28 | col_types = cols(event_id = col_character(), 29 | next_event_id = col_character(), 30 | x_pass_end = col_double(), x_shot = col_double(), 31 | y_pass_end = col_double(), y_shot = col_double() 32 | ) 33 | ) 34 | 35 | # Build graphic 36 | map <- assist_shot_cluster_map(data, background_color, foreground_color, line_color, cluster_number) 37 | create_graphic(map, text, final_filename, background_color, text_color) -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/multi_maps.r: -------------------------------------------------------------------------------- 1 | 2 | library(dplyr) 3 | library(rvest) 4 | library(hrbrthemes) 5 | library(magick) 6 | library(viridis) 7 | library(ggforce) 8 | library(gtable) 9 | library(gridExtra) 10 | library(grid) 11 | library(readr) 12 | 13 | # Colors 14 | background_color = "#2162AA" 15 | foreground_color = "#F7F6F4" 16 | line_color = c("#00A6FF", "#00C3FF", "#00DAE5", "#00EBB8", "#9DF68A", "#F9F871") 17 | text_color = "white" 18 | cluster_number = 5 19 | 20 | # Load data 21 | data <- read_csv("/data/cluster.csv", 22 | col_types = cols(x_pass_end = col_double(), x_shot = col_double(), 23 | y_pass_end = col_double(), y_shot = col_double() 24 | ) 25 | ) 26 | data <- data %>% filter(x_shot>105/2) %>% filter(x_pass_begin>105/2) 27 | set.seed(42) 28 | 29 | 30 | compute_cluster <- function(data, cluster_number){ 31 | final_data <- data.frame() 32 | teams <- data %>% distinct(team_name) 33 | for(team in teams$team_name){ 34 | print(team) 35 | team_data <- data %>% filter(team_name==team) 36 | kmeans_result = kmeans(team_data %>% 37 | select(x_pass_begin, 38 | y_pass_begin, 39 | x_pass_end, 40 | y_pass_end, 41 | x_shot, 42 | y_shot) %>% 43 | na.omit(), cluster_number, iter.max = 50) 44 | cluster_centroid = kmeans_result$centers %>% as.data.frame() %>% mutate(size=kmeans_result$size,cluster_name=LETTERS[1:cluster_number], team_name=team) 45 | final_data <- rbind(final_data, cluster_centroid) 46 | } 47 | final_data 48 | } 49 | 50 | cluster_centroid <- compute_cluster(data, cluster_number) 51 | 52 | plot <- ggplot(data=cluster_centroid) + geom_rect(aes(xmin = 0, xmax = 68, ymin = 0, ymax = 105), #entire pitch with FIFA dimensions 53 | fill = background_color, 54 | colour = foreground_color, 55 | size = .5) + 56 | geom_circle(aes(x0 = 68 / 2, y0 = 105 / 2, r = 9.15), colour=foreground_color) + #centre circle 57 | geom_circle(aes(x0 = 68 / 2, y0 = 11, r = 9.15), colour=foreground_color) + #penalty arc 58 | geom_circle(aes(x0 = 68 / 2, y0 = 105 - 11, r = 9.15), colour=foreground_color) + #penalty arc 59 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 0, ymax = 16.5), #penalty box 60 | fill = background_color, 61 | colour = foreground_color, 62 | size = .5) + 63 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 105, ymax = 105 - 16.5), #penalty box 64 | fill = background_color, 65 | colour = foreground_color, 66 | size = .5) + 67 | geom_point(aes(x = 68 / 2, y = 11), colour=foreground_color) + #penalty spot 68 | geom_point(aes(x = 68 / 2, y = 105 -11), colour=foreground_color) + #penalty spot 69 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 0, ymax = 5.5), #6 yard box 70 | fill = background_color, 71 | colour = foreground_color, 72 | size = .5) + 73 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 105, ymax = 105 - 5.5), #6 yard box 74 | fill = background_color, 75 | colour = foreground_color, 76 | size = .5) + 77 | geom_segment(aes(x = 0, y = 105/2, xend = 68, yend = 105/2), colour=foreground_color) + #halfway line 78 | coord_fixed() + 79 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 80 | line = element_blank(),axis.title.y = element_blank(), 81 | legend.position = "None", 82 | axis.title.x = element_blank(), 83 | axis.text.x = element_blank(), 84 | axis.text.y = element_blank()) + 85 | geom_segment(data=cluster_centroid, 86 | aes(x=-y_pass_begin+68, y=x_pass_begin, xend=-y_pass_end+68, yend=x_pass_end, color=size), 87 | size=1.25) + 88 | scale_color_gradientn(colours=line_color) + 89 | geom_segment(data=cluster_centroid, 90 | aes(x=-y_pass_end+68, y=x_pass_end, xend=-y_shot+68, yend=x_shot, color=size), 91 | size=1.25, 92 | lineend='butt', 93 | linejoin='mitre', 94 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 95 | geom_point(data=cluster_centroid, 96 | aes(x=-y_pass_begin+68, y=x_pass_begin, color=size), 97 | size=2) + 98 | geom_text(data=cluster_centroid, aes(x=-y_pass_begin+68, y=x_pass_begin, label=cluster_name), 99 | hjust=-1.2, 100 | vjust=1.2, 101 | colour=foreground_color) + 102 | geom_point(data=cluster_centroid, 103 | aes(x=-y_pass_end+68, y=x_pass_end, color=size), 104 | size=2) + 105 | coord_cartesian(ylim=c(55, 105)) 106 | 107 | g <- plot + facet_wrap(. ~ team_name, ncol=5) + theme(strip.text.x = element_text(size=30, color="white"), panel.background = element_rect(fill = "transparent")) 108 | ggsave("test.png", g, width=40, height=25, bg="transparent") 109 | -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/src/assist_shot_cluster_map.r: -------------------------------------------------------------------------------- 1 | 2 | assist_shot_cluster_map <- function(data, background_color, foreground_color, line_color, cluster_number){ 3 | data <- data %>% filter(x_shot>105/2) %>% filter(x_pass_begin>105/2) 4 | set.seed(42) 5 | kmeans_result = kmeans(data %>% 6 | select(x_pass_begin, 7 | y_pass_begin, 8 | x_pass_end, 9 | y_pass_end, 10 | x_shot, 11 | y_shot) %>% 12 | na.omit(), cluster_number, iter.max = 50) 13 | 14 | cluster_centroid = kmeans_result$centers %>% as.data.frame() %>% mutate(size=kmeans_result$size,cluster_name=LETTERS[1:cluster_number]) 15 | 16 | ggplot() + geom_rect(aes(xmin = 0, xmax = 68, ymin = 0, ymax = 105), #entire pitch with FIFA dimensions 17 | fill = background_color, 18 | colour = foreground_color, 19 | size = .5) + 20 | geom_circle(aes(x0 = 68 / 2, y0 = 105 / 2, r = 9.15), colour=foreground_color) + #centre circle 21 | geom_circle(aes(x0 = 68 / 2, y0 = 11, r = 9.15), colour=foreground_color) + #penalty arc 22 | geom_circle(aes(x0 = 68 / 2, y0 = 105 - 11, r = 9.15), colour=foreground_color) + #penalty arc 23 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 0, ymax = 16.5), #penalty box 24 | fill = background_color, 25 | colour = foreground_color, 26 | size = .5) + 27 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 105, ymax = 105 - 16.5), #penalty box 28 | fill = background_color, 29 | colour = foreground_color, 30 | size = .5) + 31 | geom_point(aes(x = 68 / 2, y = 11), colour=foreground_color) + #penalty spot 32 | geom_point(aes(x = 68 / 2, y = 105 -11), colour=foreground_color) + #penalty spot 33 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 0, ymax = 5.5), #6 yard box 34 | fill = background_color, 35 | colour = foreground_color, 36 | size = .5) + 37 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 105, ymax = 105 - 5.5), #6 yard box 38 | fill = background_color, 39 | colour = foreground_color, 40 | size = .5) + 41 | geom_segment(aes(x = 0, y = 105/2, xend = 68, yend = 105/2), colour=foreground_color) + #halfway line 42 | coord_fixed() + 43 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 44 | line = element_blank(),axis.title.y = element_blank(), 45 | legend.position = "None", 46 | axis.title.x = element_blank(), 47 | axis.text.x = element_blank(), 48 | axis.text.y = element_blank()) + 49 | geom_segment(data=cluster_centroid, 50 | aes(x=-y_pass_begin+68, y=x_pass_begin, xend=-y_pass_end+68, yend=x_pass_end, color=size), 51 | size=1.25) + 52 | scale_color_gradientn(colours=line_color) + 53 | geom_segment(data=cluster_centroid, 54 | aes(x=-y_pass_end+68, y=x_pass_end, xend=-y_shot+68, yend=x_shot, color=size), 55 | size=1.25, 56 | lineend='butt', 57 | linejoin='mitre', 58 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 59 | geom_point(data=cluster_centroid, 60 | aes(x=-y_pass_begin+68, y=x_pass_begin, color=size), 61 | size=2) + 62 | geom_text(data=cluster_centroid, aes(x=-y_pass_begin+68, y=x_pass_begin, label=cluster_name), 63 | hjust=-1.2, 64 | vjust=1.2, 65 | colour=foreground_color) + 66 | geom_point(data=cluster_centroid, 67 | aes(x=-y_pass_end+68, y=x_pass_end, color=size), 68 | size=2) + 69 | coord_cartesian(ylim=c(55, 105)) 70 | } 71 | 72 | create_graphic <- function(map, text, filepath, background_color, text_color){ 73 | ggsave(filename="img/g_assist_shot_cluster_tmp.png", map + theme(plot.margin=unit(c(3.5,0,-0.3,0),"cm")), width=10.5, height=8, dpi=150, bg=background_color) 74 | assist_shot_cluster_map <- image_read("img/g_assist_shot_cluster_tmp.png") 75 | title <- image_read("template/title.png") 76 | foreground <- image_read("template/foreground.png") 77 | full_image <- assist_shot_cluster_map %>% 78 | image_composite(image_scale(title,"1000"), offset="+80+80") %>% 79 | image_composite(foreground) %>% 80 | image_annotate(text, font="Roboto", size=35, location="+80+190", color=text_color) 81 | image_write(full_image, path=filepath) 82 | } -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/template/foreground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/template/foreground.png -------------------------------------------------------------------------------- /visualisation/maps/assist_shot_cluster_map/template/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/assist_shot_cluster_map/template/title.png -------------------------------------------------------------------------------- /visualisation/maps/football_pitch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/football_pitch.png -------------------------------------------------------------------------------- /visualisation/maps/football_pitch.r: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(ggforce) 3 | 4 | background_color = "#2162AA" 5 | foreground_color = "#F7F6F4" 6 | 7 | ggplot() + geom_rect(aes(xmin = 0, xmax = 105, ymin = 0, ymax = 68), #entire pitch with FIFA dimensions 8 | fill = background_color, 9 | colour = foreground_color, 10 | size = .5) + 11 | geom_circle(aes(x0 = 105 / 2, y0 = 68 / 2, r = 9.15), colour=foreground_color) + #centre circle 12 | geom_circle(aes(x0 = 11, y0 = 68 / 2, r = 9.15), colour=foreground_color) + #penalty arc 13 | geom_circle(aes(x0 = 105 - 11, y0 = 68 / 2, r = 9.15), colour=foreground_color) + #penalty arc 14 | geom_rect(aes(xmin = 0, xmax = 16.5, ymin = 68 / 2 - 7.32 / 2 - 16.5, ymax = 68 / 2 + 7.32 / 2 + 16.5), #penalty box 15 | fill = background_color, 16 | colour = foreground_color, 17 | size = .5) + 18 | geom_rect(aes(xmin = 105 - 16.5, xmax = 105, ymin = 68 / 2 - 7.32 / 2 - 16.5, ymax = 68 / 2 + 7.32 / 2 + 16.5), #penalty box 19 | fill = background_color, 20 | colour = foreground_color, 21 | size = .5) + 22 | geom_point(aes(x = 11, y = 68 / 2), colour=foreground_color) + #penalty spot 23 | geom_point(aes(x = 105 -11, y = 68 / 2), colour=foreground_color) + #penalty spot 24 | geom_segment(aes(x = -.5, xend = -.5, y = 68 / 2 + 7.32 / 2, yend = 68 / 2 - 7.32 / 2), colour=foreground_color) + #goal 25 | geom_segment(aes(x = 105.5, xend = 105.5, y = 68 / 2 + 7.32 / 2, yend = 68 / 2 - 7.32 / 2), colour=foreground_color) + #goal 26 | geom_rect(aes(xmin = 0, xmax = 5.5, ymin = 68 / 2 - 7.32 / 2 - 5.5, ymax = 68 / 2 + 7.32 / 2 + 5.5), #6 yard box 27 | fill = background_color, 28 | colour = foreground_color, 29 | size = .5) + 30 | geom_rect(aes(xmin = 105 - 5.5, xmax = 105, ymin = 68 / 2 - 7.32 / 2 - 5.5, ymax = 68 / 2 + 7.32 / 2 + 5.5), #6 yard box 31 | fill = background_color, 32 | colour = foreground_color, 33 | size = .5) + 34 | geom_segment(aes(x = 105 /2, y = 0, xend = 105 / 2, yend = 68), colour=foreground_color) + #halfway line 35 | coord_fixed() + 36 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 37 | line = element_blank(),axis.title.y = element_blank(), 38 | legend.position = "none", 39 | axis.title.x = element_blank(), 40 | axis.text.x = element_blank(), 41 | axis.text.y = element_blank()) 42 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/.gitignore: -------------------------------------------------------------------------------- 1 | data/* 2 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM atavares/rocker-geospatial-magick 2 | 3 | RUN apt-get -qq update -y && \ 4 | apt-get install curl -y && \ 5 | apt-get install unzip -y && \ 6 | apt-get -y install python3 python3-pip python3-pandas python-urllib3 && \ 7 | apt-get install -my wget gnupg 8 | 9 | # Install Chrome WebDriver 10 | RUN CHROMEDRIVER_VERSION=`curl -sS chromedriver.storage.googleapis.com/LATEST_RELEASE` && \ 11 | mkdir -p /opt/chromedriver-$CHROMEDRIVER_VERSION && \ 12 | curl -sS -o /tmp/chromedriver_linux64.zip http://chromedriver.storage.googleapis.com/$CHROMEDRIVER_VERSION/chromedriver_linux64.zip && \ 13 | unzip -qq /tmp/chromedriver_linux64.zip -d /opt/chromedriver-$CHROMEDRIVER_VERSION && \ 14 | rm /tmp/chromedriver_linux64.zip && \ 15 | chmod +x /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver && \ 16 | ln -fs /opt/chromedriver-$CHROMEDRIVER_VERSION/chromedriver /usr/local/bin/chromedriver 17 | 18 | # Install Google Chrome 19 | RUN curl -sS -o - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - && \ 20 | echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google-chrome.list && \ 21 | apt-get -yqq update && \ 22 | apt-get -yqq install google-chrome-stable && \ 23 | rm -rf /var/lib/apt/lists/* 24 | 25 | # Install python dependencies 26 | RUN pip3 install urllib3==1.23 && \ 27 | pip3 install requests && \ 28 | pip3 install flask && \ 29 | pip3 install pandas && \ 30 | pip3 install lxml && \ 31 | pip3 install beautifulsoup4 && \ 32 | pip3 install selenium && \ 33 | pip3 install dropbox 34 | 35 | # Install R dependencies 36 | RUN apt-get update -y && \ 37 | apt-get install libssl-dev -y && \ 38 | apt-get install libmagick++-dev -y && \ 39 | apt-get install libcurl4-openssl-dev -y && \ 40 | Rscript -e "install.packages('dplyr', repos='http://cran.rstudio.com/')" && \ 41 | Rscript -e "install.packages('ggplot2', repos='http://cran.rstudio.com/')" && \ 42 | Rscript -e "install.packages('rvest', repos='http://cran.rstudio.com/')" && \ 43 | Rscript -e "install.packages('hrbrthemes', repos='http://cran.rstudio.com/')" && \ 44 | Rscript -e "install.packages('jsonlite', repos='http://cran.rstudio.com/')" && \ 45 | Rscript -e "install.packages('igraph', repos='http://cran.rstudio.com/')" && \ 46 | Rscript -e "install.packages('ggnetwork', repos='http://cran.rstudio.com/')" && \ 47 | Rscript -e "install.packages('intergraph', repos='http://cran.rstudio.com/')" && \ 48 | Rscript -e "install.packages('magick', repos='http://cran.rstudio.com/')" && \ 49 | Rscript -e "install.packages('ggforce', repos='http://cran.rstudio.com/')" && \ 50 | Rscript -e "install.packages('gtable', repos='http://cran.rstudio.com/')" && \ 51 | Rscript -e "install.packages('gridExtra', repos='http://cran.rstudio.com/')" && \ 52 | Rscript -e "install.packages('grid', repos='http://cran.rstudio.com/')" 53 | 54 | WORKDIR /data/visualisation/maps/passmap 55 | 56 | CMD python3 -m main 57 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/passmap/__init__.py -------------------------------------------------------------------------------- /visualisation/maps/passmap/main.py: -------------------------------------------------------------------------------- 1 | from flask import Flask 2 | from flask import request, render_template, send_file 3 | from src.crawler import download_data, last_whoscored_games 4 | import subprocess 5 | import os 6 | import sys 7 | import time 8 | import zipfile 9 | 10 | app = Flask(__name__, template_folder="public", static_folder="public") 11 | 12 | @app.route('/') 13 | def home(): 14 | last_urls = last_whoscored_games() 15 | # last_urls = ["Europa League 1st game", "UEFA Champions League Arsenal BATE"] 16 | return render_template('index.html', last_urls=last_urls) 17 | 18 | @app.route('/result') 19 | def result(): 20 | game_url = request.args.get('jsdata') 21 | # folder = "/folder/" + game_url 22 | # folder = "data/Italy-Serie-A-2018-2019-Atalanta-AC-Milan" 23 | # time.sleep(2) 24 | folder = download_data(game_url) 25 | subprocess.run(["Rscript", "passnetwork.r", folder.replace("./", "") + "/"]) 26 | subprocess.run(["Rscript", "passsonar.r", folder.replace("./", "") + "/"]) 27 | return render_template("result.html", folder=folder) 28 | 29 | @app.route('/download', methods=["GET"]) 30 | def download(): 31 | folder = request.args.get('folder') 32 | zipf = zipfile.ZipFile('data.zip', 'w', zipfile.ZIP_DEFLATED) 33 | zipdir(folder, zipf) 34 | zipf.close() 35 | return send_file('data.zip', 36 | mimetype = 'zip', 37 | attachment_filename= 'data.zip', 38 | as_attachment = True) 39 | 40 | def zipdir(path, ziph): 41 | # ziph is zipfile handle 42 | for root, dirs, files in os.walk(path): 43 | for file in files: 44 | ziph.write(os.path.join(root, file)) 45 | 46 | if __name__ == '__main__': 47 | app.run(debug=True, host='0.0.0.0', port=8082) 48 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/one_player_sonar.r: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(hrbrthemes) 3 | library(readr) 4 | 5 | data <- read_csv("data.csv") 6 | sonar_colors <- c("#B5D9FC", "#A8BCE8", "#A19FD0", "#9D81B3") 7 | plot <- ggplot(data) + 8 | geom_bar(stat="identity", aes(x=angle, y=freq, fill=distance)) + 9 | coord_polar(start=pi, direction=1) + 10 | scale_x_continuous(limits=c(-180,180),breaks=seq(-180, 180, 45)) + 11 | scale_fill_gradientn(colours=sonar_colors) + 12 | theme(legend.background=element_rect(fill=alpha("white", 0.0))) + 13 | theme_minimal() + 14 | theme_ipsum_rc() + 15 | theme(axis.title=element_blank(), 16 | axis.text=element_blank(), 17 | axis.ticks=element_blank(), 18 | legend.position="none", 19 | panel.background=element_blank(), 20 | panel.grid.minor=element_blank(), 21 | panel.grid.major = element_blank()) -------------------------------------------------------------------------------- /visualisation/maps/passmap/public/img/passmap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/passmap/public/img/passmap.jpg -------------------------------------------------------------------------------- /visualisation/maps/passmap/public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | Passmap 11 | 12 | 13 | 14 | 15 | 16 | 17 |
18 | Passmap 19 |
20 | 21 |
22 |
23 |
24 |

25 | Heads up! This is an alpha version. 26 |

27 |
28 |
29 |
30 |
31 | 32 |
33 |
34 |
35 |
Recent games

36 | {% for url in last_urls %} 37 |
38 |
39 | 45 | 46 |
47 |
48 | {% endfor%} 49 |
50 |
51 |
Specific game

52 |
53 |
54 |
55 | 61 |
62 |
63 |
64 |
65 |
66 | or 67 |
68 |
69 | 70 | 71 | 72 | 73 | 84 | 85 | 86 | 87 | 122 | 123 | 168 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/public/result.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | Success! {{folder}}. 5 |

6 |
7 |
8 | 28 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/src/core.r: -------------------------------------------------------------------------------- 1 | # core.r 2 | library(dplyr) 3 | library(rvest) 4 | library(hrbrthemes) 5 | library(jsonlite) 6 | 7 | event_to_dataframe <- function(data) { 8 | # Transform json data to dataframe 9 | # 10 | # Args: 11 | # data: data loaded from json 12 | # 13 | # Returns: 14 | # dataframe with event data 15 | event <- data.frame(id=data$events$id, 16 | eventId=data$events$eventId, 17 | minute=data$events$minute, 18 | second=data$events$second, 19 | teamId=data$events$teamId, 20 | playerId=data$events$playerId, 21 | x=data$events$x, 22 | y=data$events$y, 23 | typeValue=data$events$type$value, 24 | typeDisplayName=data$events$type$displayName, 25 | period=data$events$period$value, 26 | isTouch=data$events$isTouch, 27 | outcome=data$events$outcomeType$value) 28 | return(event) 29 | } 30 | 31 | get_playersId_playersNames <- function(data) { 32 | # Gather player names with player id 33 | # 34 | # Args: 35 | # data: data loaded from json 36 | # 37 | # Returns: 38 | # dataframe with players id and players names 39 | playersId_playersNames <- data$playerIdNameDictionary %>% 40 | t %>% 41 | as.data.frame() %>% 42 | t %>% 43 | as.data.frame() %>% 44 | tibble::rownames_to_column() %>% 45 | select(playerName=V1, playerId=rowname) %>% 46 | mutate(playerId=as.numeric(playerId), playerName=unlist(playerName)) 47 | return(playersId_playersNames) 48 | } 49 | 50 | get_teamsId_teamsNames <- function(data) { 51 | # Gather team id with team names 52 | # 53 | # Args: 54 | # data: data loaded from json 55 | # 56 | # Returns: 57 | # dataframe with teams id and teams names 58 | teamsId_teamsNames <- data.frame(teamId=c(data$home$teamId, data$away$teamId), 59 | teamName=c(data$home$name, data$away$name)) 60 | return(teamsId_teamsNames) 61 | } 62 | 63 | event_cleaning <- function(data) { 64 | # Cleaning raw event data 65 | # 66 | # Args: 67 | # data: data loaded from json 68 | # 69 | # Returns 70 | # event data cleaned and filtered 71 | event_cleaned <- event_to_dataframe(data) %>% 72 | select(eventId, minute, second, teamId, playerId, x, y, typeDisplayName, typeValue, period, isTouch, outcome) %>% 73 | mutate(x=105*x/100, y=68*y/100) %>% # normalize x and y to fit with FIFA dimensions 74 | na.omit() 75 | # get player and teams name with corresponding id 76 | playersId_playersNames <- get_playersId_playersNames(data) 77 | teamsId_teamsNames <- get_teamsId_teamsNames(data) 78 | # join all 79 | simple_event <- left_join(event_cleaned, playersId_playersNames, by=c("playerId")) %>% 80 | left_join(., teamsId_teamsNames, by=c("teamId")) 81 | return(simple_event) 82 | } 83 | 84 | get_game_information_text <- function(data, team) { 85 | # Get game information such as scoreboard and teams 86 | # 87 | # Args: 88 | # data: data loaded from json 89 | # team: either "home" or "away" 90 | # 91 | # Returns: 92 | # game scoreboard (example: "Arsenal against Liverpool - 2:1") 93 | home_team <- data$home$name 94 | away_team <- data$away$name 95 | score_home <- unlist(strsplit(data$score, "\\s+"))[1] 96 | score_away <- unlist(strsplit(data$score, "\\s+"))[3] 97 | if (team=="home") { 98 | final_text <- paste0(home_team," against ",away_team," - ",score_home,":",score_away) 99 | } else if (team=="away") { 100 | final_text <- paste0(away_team," away at ", home_team," - ", score_home, ":", score_away) 101 | } 102 | return(final_text) 103 | } 104 | 105 | get_game_datetime <- function(data) { 106 | # Parse datatime from game data 107 | # 108 | # Args: 109 | # data: data loaded from json 110 | # 111 | # Returns: 112 | # clean datetime 113 | datetime <- data$timeStamp %>% substr(., 1,10) 114 | day <- substr(datetime, 9, 10) 115 | month <- substr(datetime, 6, 7) 116 | year <- substr(datetime, 1, 4) 117 | clean_datetime <- paste0(day, "/", month, "/", year) 118 | return(clean_datetime) 119 | } 120 | 121 | get_lineup <- function(data, team) { 122 | # Get team lineup (starting eleven) 123 | # 124 | # Args: 125 | # data: data loaded from json 126 | # team: either "home" or "away" 127 | # 128 | # Return: 129 | # lineup 130 | if (team == "home") { 131 | lineup <- cbind(data$home$formations$playerIds[[1]] %>% as.data.frame() %>% select(., playerId=.) %>% slice(1:11), 132 | data$home$formations$formationPositions[[1]]) 133 | } else if (team == "away") { 134 | lineup <- cbind(data$away$formations$playerIds[[1]] %>% as.data.frame() %>% select(., playerId=.) %>% slice(1:11), 135 | data$away$formations$formationPositions[[1]]) 136 | } 137 | return(lineup) 138 | } 139 | 140 | mround <- function(x,base) { 141 | # Round number according to a base 142 | base * round(x/base) 143 | } 144 | 145 | angle_between_points <- function(p1_x, p1_y, p2_x, p2_y, rounding){ 146 | # Compute angle between to points (ie. vectors) 147 | angle <- (atan2((p2_y - p1_y), (p2_x - p1_x)) * 180 / pi) 148 | angle <- mround(angle, rounding) 149 | return(angle) 150 | } 151 | 152 | get_players_passes <- function(team_event) { 153 | # Build dataframe with player passes 154 | # 155 | # Args: 156 | # team_event: dataframe containing event data for a specific team 157 | # 158 | # Returns: 159 | # passes destination for each player 160 | player_passes <- data.frame() 161 | for (i in c(1:(nrow(team_event)-2))) { 162 | if (team_event$typeValue[i]==1 & team_event$typeValue[i+1]==1) { 163 | player_passes = rbind(player_passes, data.frame(from=team_event$playerName[i], 164 | from_x=team_event$x[i], 165 | from_y=team_event$y[i], 166 | to=team_event$playerName[i+1], 167 | to_x=team_event$x[i+1], 168 | to_y=team_event$y[i+1], 169 | team=team_event$teamName[i])) 170 | } 171 | } 172 | player_passes <- player_passes %>% 173 | mutate(angle=-angle_between_points(from_x, from_y, to_x, to_y, 15), distance=sqrt((to_y - from_y)^2 + (to_x - from_x)^2)) 174 | return(player_passes) 175 | } 176 | 177 | get_team_name <- function(data, team) { 178 | # Get team name 179 | # 180 | # Args: 181 | # data: data loaded from json 182 | # team: either "home" or "away" 183 | # 184 | # Return: 185 | # team name 186 | if (team == "home") { 187 | team_name <- data$home$name 188 | } else if (team == "away") { 189 | team_name <- data$away$name 190 | } 191 | return(team_name) 192 | } 193 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/src/crawler.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from lxml import html 3 | import os 4 | import shutil 5 | import re 6 | import json 7 | import sys 8 | from bs4 import BeautifulSoup 9 | from selenium import webdriver 10 | from selenium.webdriver.common.keys import Keys 11 | 12 | 13 | def last_whoscored_games(): 14 | """ 15 | Retrieve last WhoScored games (home page match slider) 16 | """ 17 | url = "https://whoscored.com" 18 | options = webdriver.ChromeOptions() 19 | options.add_argument('headless') 20 | options.add_argument('--no-sandbox') 21 | driver = webdriver.Chrome(chrome_options=options) 22 | driver.implicitly_wait(30) 23 | driver.get(url) 24 | response = driver.page_source 25 | soup = BeautifulSoup(response, "lxml") 26 | data = soup.findAll('div', attrs={'class': 'post-match'}) 27 | games_url = [] 28 | for div in data: 29 | links = div.findAll('a') 30 | for a in links: 31 | games_url.append("https://whoscored.com" + a['href']) 32 | print(games_url) 33 | return games_url 34 | 35 | 36 | def download_data(url): 37 | """ 38 | Download games data and teams logo images, games data are writed into data/folder/data.json and 39 | images as data/folder/home|away_logo.png 40 | :param url: url to download data 41 | :return: folder contaning downloaded data and images 42 | """ 43 | options = webdriver.ChromeOptions() 44 | options.add_argument('headless') 45 | options.add_argument('--no-sandbox') 46 | driver = webdriver.Chrome(chrome_options=options) 47 | driver.implicitly_wait(30) 48 | 49 | folder_name = "data/" + url.split("/")[-1] 50 | file_name = "{folder_name}/data.json".format(folder_name=folder_name) 51 | if os.path.exists(folder_name): 52 | shutil.rmtree(folder_name) 53 | os.makedirs(folder_name) 54 | 55 | # Download game data 56 | driver.get(url) 57 | response = driver.page_source 58 | tree = html.fromstring(response) 59 | 60 | data = tree.xpath('//*[@id="layout-content-wrapper"]/script[1]/text()')[0].strip() 61 | processed_data = re.search("\\{.*\\}", data, re.IGNORECASE) 62 | if processed_data: 63 | json_data = processed_data.group() 64 | 65 | loaded_data = json.loads(json_data) 66 | 67 | with open(file_name, 'w') as outfile: 68 | json.dump(loaded_data, outfile) 69 | 70 | # Download teams logo 71 | soup = BeautifulSoup(response, "lxml") 72 | match_header = soup.findAll('div', attrs={'class': 'match-header'}) 73 | images = [img["src"] for img in match_header[0].findAll("img")] 74 | team_images = { 75 | "home": images[0], 76 | "away": images[1] 77 | } 78 | 79 | for team, image in team_images.items(): 80 | response = requests.get(image).content 81 | with open("{folder_name}/{team}_logo.png".format(folder_name=folder_name, team=team), "wb") as file: 82 | file.write(response) 83 | 84 | return folder_name 85 | 86 | if __name__ == "__main__": 87 | url = sys.argv[1] 88 | download_data(url) 89 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/src/utils.r: -------------------------------------------------------------------------------- 1 | 2 | template_selector <- function(folder){ 3 | # Build graphic options like colors, names, etc... 4 | color = "white" 5 | line_color = c("#83FFC3","#56FFAE","#00CB69","#00793F") 6 | if(startsWith(folder, "data/England-Premier-League")){ 7 | league_name = "Premier League" 8 | } else if(startsWith(folder, "data/Italy-Serie-A")){ 9 | league_name = "Serie A" 10 | } else if(startsWith(folder, "data/France-Ligue-1")){ 11 | league_name = "Ligue 1" 12 | } else if(startsWith(folder, "data/Spain-La-Liga")){ 13 | league_name = "La Liga" 14 | } else if(startsWith(folder, "data/Germany-Bundesliga")){ 15 | league_name = "Bundesliga" 16 | } else if(startsWith(folder, "data/Europe-UEFA-Champions-League")){ 17 | league_name = "UEFA Champions League" 18 | } 19 | else if(startsWith(folder, "data/England-Championship")){ 20 | league_name = "Championship" 21 | } 22 | else if(startsWith(folder, "data/USA-Major")){ 23 | league_name = "Major League Soccer" 24 | } 25 | else if(startsWith(folder, "data/Europe-UEFA-Europa-League")){ 26 | league_name = "UEFA Europa League" 27 | } else{ 28 | league_name = "" 29 | } 30 | output = list() 31 | output$color = color 32 | output$league_name = league_name 33 | output$line_color = line_color 34 | return(output) 35 | } 36 | -------------------------------------------------------------------------------- /visualisation/maps/passmap/template/passnetwork/design.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/passmap/template/passnetwork/design.psd -------------------------------------------------------------------------------- /visualisation/maps/passmap/template/passnetwork/foreground_passnetwork.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/passmap/template/passnetwork/foreground_passnetwork.png -------------------------------------------------------------------------------- /visualisation/maps/passmap/template/passsonar/passsonar_template.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/passmap/template/passsonar/passsonar_template.psd -------------------------------------------------------------------------------- /visualisation/maps/passmap/template/passsonar/title_passsonar.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/passmap/template/passsonar/title_passsonar.png -------------------------------------------------------------------------------- /visualisation/maps/toolbox_map.r: -------------------------------------------------------------------------------- 1 | # toolbox_map.r 2 | # Function definition for football map such as passnetwork, shot locations, etc... 3 | 4 | library(tidyverse) 5 | library(hrbrthemes) 6 | library(ggforce) 7 | library(jsonlite) 8 | library(grid) 9 | library(gridExtra) 10 | library(igraph) 11 | library(ggnetwork) 12 | 13 | #data = jsonlite::fromJSON(jsonfile) 14 | 15 | # EVENT PROCESSING # 16 | event_to_dataframe <- function(data){ 17 | # Transform json data to dataframe 18 | event = data.frame(id=data$events$id, 19 | eventId=data$events$eventId, 20 | minute=data$events$minute, 21 | second=data$events$second, 22 | teamId=data$events$teamId, 23 | playerId=data$events$playerId, 24 | x=data$events$x, 25 | y=data$events$y, 26 | typeValue=data$events$type$value, 27 | typeDisplayName=data$events$type$displayName, 28 | period=data$events$period$value, 29 | isTouch=data$events$isTouch, 30 | outcome=data$events$outcomeType$value) 31 | return(event) 32 | } 33 | 34 | playersId_playersNames <- function(data){ 35 | # Gather player names with player id 36 | playersId_playersNames = data$playerIdNameDictionary %>% 37 | t %>% 38 | as.data.frame() %>% 39 | t %>% 40 | as.data.frame() %>% 41 | tibble::rownames_to_column() %>% 42 | select(playerName=V1,playerId=rowname) %>% 43 | mutate(playerId=as.numeric(playerId),playerName=unlist(playerName)) 44 | return(playersId_playersNames) 45 | } 46 | 47 | teamsId_teamsNames <- function(data){ 48 | # Gather team id with tema names 49 | teamsId_teamsNames = data.frame(teamId=c(data$home$teamId, data$away$teamId), 50 | teamName=c(data$home$name, data$away$name)) 51 | return(teamsId_teamsNames) 52 | } 53 | 54 | event_cleaning <- function(data){ 55 | # return all event with eventId, minute, second, teamName, playerName, x, y, typeDisplayName, perido, isTouch, outcome 56 | # clearing event data 57 | event_cleared = event_to_dataframe(data) %>% 58 | select(eventId, minute, second, teamId, playerId, x, y, typeDisplayName, typeValue, period, isTouch, outcome) %>% 59 | mutate(x=105*x/100, y=68*y/100) %>% # normalize x and y to fit with FIFA dimensions 60 | na.omit() 61 | 62 | # get player and teams name with corresponding id 63 | playersId_playersNames = playersId_playersNames(data) 64 | teamsId_teamsNames = teamsId_teamsNames(data) 65 | 66 | # join all 67 | simple_event = left_join(event_cleared, playersId_playersNames, by=c("playerId")) %>% 68 | left_join(., teamsId_teamsNames, by=c("teamId")) %>% 69 | select(-teamId, -playerId) 70 | 71 | return(simple_event) 72 | } 73 | 74 | lineup_from_event <- function(data){ 75 | lineup = list() 76 | lineup$home_lineup_id = data$home$formations$playerIds[[1]] %>% as.data.frame() %>% select(.,playerId=.) %>% slice(1:11) %>% as.data.frame() 77 | lineup$away_lineup_id = data$home$formations$playerIds[[1]] %>% as.data.frame() %>% select(.,playerId=.) %>% slice(1:11) %>% as.data.frame() 78 | lineup$home_lineup_maps = data$home$formations$formationPositions[[1]] 79 | return(lineup) 80 | } 81 | 82 | mround <- function(x,base){ 83 | base*round(x/base) 84 | } 85 | 86 | angle_between_points <- function(p1_x, p1_y, p2_x, p2_y){ 87 | angle <- (atan2((p2_y - p1_y),(p2_x - p1_x)) * 180 / pi) 88 | angle = mround(angle, 15) 89 | return(angle) 90 | } 91 | 92 | get_players_passes <- function(lineup_event){ 93 | player_passes=data.frame() 94 | for(i in c(1:(nrow(lineup_event)-2))){ 95 | if(lineup_event$typeValue[i]==1 & lineup_event$typeValue[i+1]==1){ 96 | player_passes = rbind(player_passes,data.frame(from=lineup_event$playerName[i], 97 | from_x=lineup_event$x[i], 98 | from_y=lineup_event$y[i], 99 | to=lineup_event$playerName[i+1], 100 | to_x=lineup_event$x[i+1], 101 | to_y=lineup_event$y[i+1], 102 | team=lineup_event$teamName[i])) 103 | } 104 | } 105 | player_passes = player_passes %>% 106 | mutate(angle=-angle_between_points(from_x, from_y, to_x, to_y), distance=sqrt((to_y - from_y)^2 + (to_x - from_x)^2)) 107 | return(player_passes) 108 | } 109 | 110 | 111 | load_many_games <- function(files){ 112 | # file_list = list.files("data/arsenal1718/", full.names=TRUE) 113 | # dataframe_events = load_many_games(file_list) 114 | dataframe_events <- list() 115 | i=1 116 | for(file in files){ 117 | print(file) 118 | data = jsonlite::fromJSON(file) 119 | dataframe_events[[i]] <- data 120 | i = i + 1 121 | } 122 | return(dataframe_events) 123 | } 124 | 125 | # MAPS PROCESSING # 126 | get_all_from_game_player <- function(data, player){ 127 | event = event_cleaning(data) %>% 128 | filter(playerName==player) %>% 129 | select(playerName, x, y, typeDisplayName, minute, period) 130 | return(event) 131 | } 132 | 133 | get_all_from_game_team <- function(data, team){ 134 | event = event_cleaning(data) %>% 135 | filter(teamName==team) %>% 136 | select(teamName, playerName, x, y, typeDisplayName, minute, period) 137 | return(event) 138 | } 139 | 140 | gather_all_from_many_games_player <- function(dataframe_events, player){ 141 | # dataframe_events is a list of each games event dataframe 142 | final_data = data.frame() 143 | for(data in dataframe_events){ 144 | game_player = get_all_from_game_player(data, player) 145 | final_data = rbind(final_data, game_player) 146 | } 147 | return(final_data) 148 | } 149 | 150 | -------------------------------------------------------------------------------- /visualisation/maps/touch_map/img/mason_greenwood_20182019.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/touch_map/img/mason_greenwood_20182019.jpg -------------------------------------------------------------------------------- /visualisation/maps/touch_map/main.r: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(ggforce) 3 | library(readr) 4 | 5 | data_file <- "data/tmp.csv" 6 | file_name <- "img/tmp.svg" 7 | 8 | background_color = "#008F7A" 9 | foreground_color = "#F7F6F4" 10 | low_color = "#FF6F91" 11 | mid_color = "#FF9671" 12 | high_color = "#FFC75F" 13 | 14 | data <- read_csv(data_file) 15 | 16 | plot <- ggplot() + geom_rect(aes(xmin = 0, xmax = 105, ymin = 0, ymax = 68), #entire pitch with FIFA dimensions 17 | fill = background_color, 18 | colour = foreground_color, 19 | size = .5) + 20 | geom_circle(aes(x0 = 105 / 2, y0 = 68 / 2, r = 9.15), colour=foreground_color) + #centre circle 21 | geom_circle(aes(x0 = 11, y0 = 68 / 2, r = 9.15), colour=foreground_color) + #penalty arc 22 | geom_circle(aes(x0 = 105 - 11, y0 = 68 / 2, r = 9.15), colour=foreground_color) + #penalty arc 23 | geom_rect(aes(xmin = 0, xmax = 16.5, ymin = 68 / 2 - 7.32 / 2 - 16.5, ymax = 68 / 2 + 7.32 / 2 + 16.5), #penalty box 24 | fill = background_color, 25 | colour = foreground_color, 26 | size = .5) + 27 | geom_rect(aes(xmin = 105 - 16.5, xmax = 105, ymin = 68 / 2 - 7.32 / 2 - 16.5, ymax = 68 / 2 + 7.32 / 2 + 16.5), #penalty box 28 | fill = background_color, 29 | colour = foreground_color, 30 | size = .5) + 31 | geom_point(aes(x = 11, y = 68 / 2), colour=foreground_color) + #penalty spot 32 | geom_point(aes(x = 105 -11, y = 68 / 2), colour=foreground_color) + #penalty spot 33 | geom_segment(aes(x = -.5, xend = -.5, y = 68 / 2 + 7.32 / 2, yend = 68 / 2 - 7.32 / 2), colour=foreground_color) + #goal 34 | geom_segment(aes(x = 105.5, xend = 105.5, y = 68 / 2 + 7.32 / 2, yend = 68 / 2 - 7.32 / 2), colour=foreground_color) + #goal 35 | geom_rect(aes(xmin = 0, xmax = 5.5, ymin = 68 / 2 - 7.32 / 2 - 5.5, ymax = 68 / 2 + 7.32 / 2 + 5.5), #6 yard box 36 | fill = background_color, 37 | colour = foreground_color, 38 | size = .5) + 39 | geom_rect(aes(xmin = 105 - 5.5, xmax = 105, ymin = 68 / 2 - 7.32 / 2 - 5.5, ymax = 68 / 2 + 7.32 / 2 + 5.5), #6 yard box 40 | fill = background_color, 41 | colour = foreground_color, 42 | size = .5) + 43 | geom_segment(aes(x = 105 /2, y = 0, xend = 105 / 2, yend = 68), colour=foreground_color) + #halfway line 44 | coord_fixed() + 45 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 46 | line = element_blank(),axis.title.y = element_blank(), 47 | legend.position = "none", 48 | axis.title.x = element_blank(), 49 | axis.text.x = element_blank(), 50 | axis.text.y = element_blank()) + 51 | stat_density2d(data=data, aes(x=x,y=y,fill=..level..,alpha=..level..),geom="polygon",colour="#eee9d6", show.legend=FALSE) + 52 | lims(x = c(-5,113),y = c(-5,73)) + 53 | scale_fill_gradient2(low = low_color, mid=mid_color, high = high_color) 54 | 55 | ggsave(file_name, plot) -------------------------------------------------------------------------------- /visualisation/maps/touch_zone.R: -------------------------------------------------------------------------------- 1 | # to integrate with football pitch 2 | stat_density2d(data=alonso_data, aes(x=x,y=y,fill=..level..,alpha=..level..),geom="polygon",colour="#eee9d6", show.legend=FALSE) + 3 | lims(x = c(-5,113),y = c(-5,73)) + 4 | scale_fill_gradient2(low = "#218c74", mid="#227093", high = "#34ace0") 5 | -------------------------------------------------------------------------------- /visualisation/maps/xa_map/img/lukaku_xa_map_1718.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xa_map/img/lukaku_xa_map_1718.jpg -------------------------------------------------------------------------------- /visualisation/maps/xa_map/img/wolves_xa_map_conceded_1819.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xa_map/img/wolves_xa_map_conceded_1819.png -------------------------------------------------------------------------------- /visualisation/maps/xa_map/main.r: -------------------------------------------------------------------------------- 1 | # xA map 2 | 3 | library(dplyr) 4 | library(rvest) 5 | library(hrbrthemes) 6 | library(magick) 7 | library(viridis) 8 | library(ggforce) 9 | library(gtable) 10 | library(gridExtra) 11 | library(grid) 12 | library(readr) 13 | 14 | source(file="src/xa_map.r") 15 | 16 | # Launcher 17 | data_file <- "data/bernardo_silva_xa.csv" 18 | text <- "Bernardo Silva passes from 2018-2019 Premier League season" 19 | final_filename <- "bernardo_silva_xa_map_1819.png" 20 | 21 | # Colors 22 | background_color <- "#2162AA" 23 | foreground_color <- "#F7F6F4" 24 | text_color <- "white" 25 | color1 <- "#64BEF3" 26 | color2 <- "#FFCB41" 27 | color3 <- "#56FFAE" 28 | high_gradient_color <- "#64BEF3" 29 | 30 | # Load data 31 | data <- read_csv(data_file) 32 | 33 | # Filter data 34 | # you can filter on team_name or player_name 35 | # data <- data %>% filter(player_name=="Olivier Giroud") 36 | 37 | # Build map 38 | stats <- get_stats(data) 39 | map <- xa_map(data, background_color, foreground_color, color1, color2, color3, high_gradient_color) 40 | create_graphic(map, text, stats, final_filename, background_color, text_color) -------------------------------------------------------------------------------- /visualisation/maps/xa_map/src/xa_map.r: -------------------------------------------------------------------------------- 1 | # xa_map.r 2 | xa_map <- function(data, background_color, foreground_color, color1, color2, color3, high_gradient_color){ 3 | data <- data %>% filter(xA >= 0 | is_assist == 1) 4 | data$xA_cut = cut(data$xA, breaks=c(0,0.1,1), right = FALSE) 5 | data = data %>% mutate(xA_cut = ifelse(is_assist==1, "assist", xA_cut)) 6 | if(length(table(data$xA_cut)) == 3){ 7 | colors_value = c(color1, color2, color3) 8 | }else{ 9 | colors_value = c(color1, color3) 10 | } 11 | print(colors_value) 12 | ggplot() + geom_rect(aes(xmin = 0, xmax = 68, ymin = 0, ymax = 105), #entire pitch with FIFA dimensions 13 | fill = background_color, 14 | colour = foreground_color, 15 | size = .5) + 16 | geom_circle(aes(x0 = 68 / 2, y0 = 105 / 2, r = 9.15), colour=foreground_color) + #centre circle 17 | geom_circle(aes(x0 = 68 / 2, y0 = 11, r = 9.15), colour=foreground_color) + #penalty arc 18 | geom_circle(aes(x0 = 68 / 2, y0 = 105 - 11, r = 9.15), colour=foreground_color) + #penalty arc 19 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 0, ymax = 16.5), #penalty box 20 | fill = background_color, 21 | colour = foreground_color, 22 | size = .5) + 23 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 105, ymax = 105 - 16.5), #penalty box 24 | fill = background_color, 25 | colour = foreground_color, 26 | size = .5) + 27 | geom_point(aes(x = 68 / 2, y = 11), colour=foreground_color) + #penalty spot 28 | geom_point(aes(x = 68 / 2, y = 105 -11), colour=foreground_color) + #penalty spot 29 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 0, ymax = 5.5), #6 yard box 30 | fill = background_color, 31 | colour = foreground_color, 32 | size = .5) + 33 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 105, ymax = 105 - 5.5), #6 yard box 34 | fill = background_color, 35 | colour = foreground_color, 36 | size = .5) + 37 | geom_segment(aes(x = 0, y = 105/2, xend = 68, yend = 105/2), colour=foreground_color) + #halfway line 38 | coord_fixed() + 39 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 40 | line = element_blank(),axis.title.y = element_blank(), 41 | legend.position = "None", 42 | axis.title.x = element_blank(), 43 | axis.text.x = element_blank(), 44 | axis.text.y = element_blank()) + 45 | stat_density2d(data=data, aes(x=-y_end+68,y=x_end,fill=..level..), alpha=0.3,geom="polygon", show.legend=FALSE) + 46 | scale_fill_gradient(low = background_color, high = high_gradient_color) + 47 | geom_point(data=data, aes(x=-y_begin + 68, y=x_begin, color=xA_cut, size=xA)) + 48 | scale_color_manual(values=colors_value) + 49 | coord_cartesian(ylim=c(55, 105)) 50 | } 51 | 52 | get_stats <- function(data){ 53 | passes <- nrow(data) 54 | assist <- sum(data$is_assist) 55 | total_xa <- round(sum(data$xA), 2) 56 | paste0("Total xA = ", total_xa, "\nAssists = ", assist, "\nPasses = ", passes) 57 | } 58 | 59 | create_graphic <- function(xa_map, text, stats, filepath, background_color, text_color){ 60 | ggsave(filename="img/g_xamap_tmp.png", xa_map + theme(plot.margin=unit(c(3.5,0,-0.3,0),"cm")), width=10.5, height=8, dpi=150, bg=background_color) 61 | xg_map <- image_read("img/g_xamap_tmp.png") 62 | title <- image_read("template/title.png") 63 | foreground <- image_read("template/foreground.png") 64 | full_image <- xg_map %>% 65 | image_composite(image_scale(title,"600"), offset="+70-40") %>% 66 | image_composite(foreground) %>% 67 | image_annotate(text, font="Roboto", size=35, location="+80+190", color=text_color) %>% 68 | image_annotate(stats, font="Roboto", size=25, location="+120+725", color=text_color) 69 | image_write(full_image, path=filepath) 70 | } -------------------------------------------------------------------------------- /visualisation/maps/xa_map/template/foreground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xa_map/template/foreground.png -------------------------------------------------------------------------------- /visualisation/maps/xa_map/template/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xa_map/template/title.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/chelsea_conceded_xgmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/chelsea_conceded_xgmap.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/g_xgmap_tmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/g_xgmap_tmp.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/hazard_xg_20190223.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/hazard_xg_20190223.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/lacazette_xgmap.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/lacazette_xgmap.jpg -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/palace_conceded_xgmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/palace_conceded_xgmap.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/wolves_conceded_xgmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/wolves_conceded_xgmap.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/wolves_xgmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/wolves_xgmap.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/img/wolves_xgmap_1819.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/img/wolves_xgmap_1819.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/main.r: -------------------------------------------------------------------------------- 1 | # Build XG Map with R 2 | library(dplyr) 3 | library(rvest) 4 | library(hrbrthemes) 5 | library(magick) 6 | library(viridis) 7 | library(ggforce) 8 | library(gtable) 9 | library(gridExtra) 10 | library(grid) 11 | library(readr) 12 | 13 | source(file="src/xg_map.r") # import xg map functions 14 | 15 | # Launcher 16 | data_file <- "data/wolves_xg_conceded.csv" 17 | text <- "Wolverhampton conceded shots in Premier League as date of February 13, 2019." 18 | final_filename <- "img/wolves_xgmap_conceded.png" 19 | 20 | # Colors 21 | background_color <- "#2162AA" 22 | foreground_color <- "#F7F6F4" 23 | color1 <- "#64BEF3" 24 | color2 <- "#FFCB41" 25 | color3 <- "#56FFAE" 26 | text_color <- "white" 27 | 28 | # Load data 29 | data <- read_csv(data_file) 30 | 31 | # Filter data 32 | # you can filter on team_name or player_name 33 | # data <- data %>% filter(startDate > "2018-08-01") 34 | 35 | # Build graphic 36 | stats <- get_stats(data) # gather stats 37 | map <- xg_map(data, background_color, foreground_color, color1, color2, color3) # build map 38 | create_graphic(map, text, stats, final_filename, background_color, text_color) # build full graphic 39 | -------------------------------------------------------------------------------- /visualisation/maps/xg_map/src/xg_map.r: -------------------------------------------------------------------------------- 1 | # xg_map.r 2 | 3 | xg_map <- function(data, background_color, foreground_color, color1, color2, color3){ 4 | # Build the main map 5 | data$xG_cut <- cut(data$xG, breaks=c(0,0.5,1), right = FALSE) 6 | data <- data %>% mutate(xG_cut=ifelse(is_goal==1, "goal", xG_cut)) 7 | if(length(table(data$xG_cut)) == 3){ 8 | colors_value = c(color1, color2, color3) 9 | }else{ 10 | colors_value = c(color1, color3) 11 | } 12 | ggplot() + geom_rect(aes(xmin = 0, xmax = 68, ymin = 0, ymax = 105), #entire pitch with FIFA dimensions 13 | fill = background_color, 14 | colour = foreground_color, 15 | size = .5) + 16 | geom_circle(aes(x0 = 68 / 2, y0 = 105 / 2, r = 9.15), colour=foreground_color) + #centre circle 17 | geom_circle(aes(x0 = 68 / 2, y0 = 11, r = 9.15), colour=foreground_color) + #penalty arc 18 | geom_circle(aes(x0 = 68 / 2, y0 = 105 - 11, r = 9.15), colour=foreground_color) + #penalty arc 19 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 0, ymax = 16.5), #penalty box 20 | fill = background_color, 21 | colour = foreground_color, 22 | size = .5) + 23 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 16.5, xmax = 68 / 2 + 7.32 / 2 + 16.5, ymin = 105, ymax = 105 - 16.5), #penalty box 24 | fill = background_color, 25 | colour = foreground_color, 26 | size = .5) + 27 | geom_point(aes(x = 68 / 2, y = 11), colour=foreground_color) + #penalty spot 28 | geom_point(aes(x = 68 / 2, y = 105 -11), colour=foreground_color) + #penalty spot 29 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 0, ymax = 5.5), #6 yard box 30 | fill = background_color, 31 | colour = foreground_color, 32 | size = .5) + 33 | geom_rect(aes(xmin = 68 / 2 - 7.32 / 2 - 5.5, xmax = 68 / 2 + 7.32 / 2 + 5.5, ymin = 105, ymax = 105 - 5.5), #6 yard box 34 | fill = background_color, 35 | colour = foreground_color, 36 | size = .5) + 37 | geom_segment(aes(x = 0, y = 105/2, xend = 68, yend = 105/2), colour=foreground_color) + #halfway line 38 | coord_fixed() + 39 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 40 | line = element_blank(),axis.title.y = element_blank(), 41 | legend.position = "None", 42 | axis.title.x = element_blank(), 43 | axis.text.x = element_blank(), 44 | axis.text.y = element_blank()) + 45 | geom_point(data=data, aes(x=-y_shot + 68, y=x_shot, size=xG, color=xG_cut, shape=factor(is_goal))) + 46 | scale_shape_manual(values=c(16, 16)) + 47 | scale_color_manual(values=colors_value) + 48 | coord_cartesian(ylim=c(55, 105)) 49 | } 50 | 51 | get_stats <- function(data){ 52 | # Gather shots statisics from data 53 | shots <- nrow(data) 54 | goals <- sum(data$is_goal) 55 | total_xg <- round(sum(data$xG), 2) 56 | xg_by_shot <- round(total_xg/shots, 2) 57 | paste0("Total xG = ", total_xg, "\nGoals = ", goals, "\nShots = ", shots, "\nxG per shot = ", xg_by_shot) 58 | } 59 | 60 | create_graphic <- function(xg_map, text, stats, filepath, background_color, text_color){ 61 | # Create full graphic with map, title and text 62 | ggsave(filename="img/g_xgmap_tmp.png", xg_map + theme(plot.margin=unit(c(3.5,0,-0.3,0),"cm")), width=10.5, height=8, dpi=150, bg=background_color) 63 | xg_map <- image_read("img/g_xgmap_tmp.png") 64 | title <- image_read("template/title.png") 65 | foreground <- image_read("template/foreground.png") 66 | full_image <- xg_map %>% 67 | image_composite(image_scale(title,"600"), offset="+70-40") %>% 68 | image_composite(foreground) %>% 69 | image_annotate(text, font="Roboto", size=35, location="+80+190", color=text_color) %>% 70 | image_annotate(stats, font="Roboto", size=25, location="+120+725", color=text_color) 71 | image_write(full_image, path=filepath) 72 | } -------------------------------------------------------------------------------- /visualisation/maps/xg_map/template/foreground.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/template/foreground.png -------------------------------------------------------------------------------- /visualisation/maps/xg_map/template/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/maps/xg_map/template/title.png -------------------------------------------------------------------------------- /visualisation/pass_sequence_cluster/img/cluter_sequence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/pass_sequence_cluster/img/cluter_sequence.png -------------------------------------------------------------------------------- /visualisation/pass_sequence_cluster/main.r: -------------------------------------------------------------------------------- 1 | library(tidyverse) 2 | library(ggplot) 3 | library(viridis) 4 | library(gghighlight) 5 | 6 | set.seed(42) 7 | cluster_number = 10 8 | kmeans_result = kmeans(test %>% na.omit(), cluster_number) 9 | cluster_centroid = kmeans_result$centers %>% as.data.frame() %>% mutate(size=kmeans_result$size,cluster_name=c(LETTERS, sapply(LETTERS, function(x) paste0(x, LETTERS)))[1:cluster_number]) 10 | 11 | 12 | ggplot(cluster_centroid) + 13 | geom_segment(aes(x=pass0_x_begin, y=pass0_y_begin, xend=pass0_x_end, yend=pass0_y_end, color=cluster_name), size=1, 14 | lineend='butt', 15 | linejoin='mitre', 16 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 17 | geom_segment(aes(x=pass1_x_begin, y=pass1_y_begin, xend=pass1_x_end, yend=pass1_y_end, color=cluster_name), size=1, 18 | lineend='butt', 19 | linejoin='mitre', 20 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 21 | geom_segment(aes(x=pass2_x_begin, y=pass2_y_begin, xend=pass2_x_end, yend=pass2_y_end, color=cluster_name), size=1, 22 | lineend='butt', 23 | linejoin='mitre', 24 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 25 | geom_segment(aes(x=pass3_x_begin, y=pass3_y_begin, xend=pass3_x_end, yend=pass3_y_end, color=cluster_name), size=1, 26 | lineend='butt', 27 | linejoin='mitre', 28 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 29 | geom_segment(aes(x=pass4_x_begin, y=pass4_y_begin, xend=pass4_x_end, yend=pass4_y_end, color=cluster_name), size=1, 30 | lineend='butt', 31 | linejoin='mitre', 32 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 33 | geom_segment(aes(x=pass5_x_begin, y=pass5_y_begin, xend=pass5_x_end, yend=pass5_y_end, color=cluster_name), size=1, 34 | lineend='butt', 35 | linejoin='mitre', 36 | arrow = arrow(length = unit(0.25, "cm"),type="closed")) + 37 | geom_text(data=cluster_centroid, aes(x=pass2_x_end, y=pass2_y_end, label=cluster_name, colour=cluster_name), 38 | hjust=-1.2, 39 | vjust=1.2) + 40 | scale_color_viridis_d() + 41 | gghighlight(max(size) > 200) + 42 | theme_minimal() + 43 | labs(x="", y="") + 44 | theme(legend.position="none") + 45 | theme(axis.title.x=element_blank(), 46 | axis.text.x=element_blank(), 47 | axis.ticks.x=element_blank()) + 48 | theme(axis.title.y=element_blank(), 49 | axis.text.y=element_blank(), 50 | axis.ticks.y=element_blank()) + 51 | theme(rect = element_blank(), #remove additional ggplot2 features: lines, axis, etc... 52 | line = element_blank(),axis.title.y = element_blank()) -------------------------------------------------------------------------------- /visualisation/passes/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/.DS_Store -------------------------------------------------------------------------------- /visualisation/passes/img/DierMatciPassesCombinations.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/DierMatciPassesCombinations.jpg -------------------------------------------------------------------------------- /visualisation/passes/img/arsenal.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/arsenal.jpg -------------------------------------------------------------------------------- /visualisation/passes/img/attacking3rdPasses.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/attacking3rdPasses.png -------------------------------------------------------------------------------- /visualisation/passes/img/bellerin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/bellerin.png -------------------------------------------------------------------------------- /visualisation/passes/img/bellerinPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/bellerinPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/chamberlainPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/chamberlainPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/coquelin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/coquelin.png -------------------------------------------------------------------------------- /visualisation/passes/img/coquelinPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/coquelinPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/kosPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/kosPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/koscielny.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/koscielny.png -------------------------------------------------------------------------------- /visualisation/passes/img/logoArsenal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/logoArsenal.png -------------------------------------------------------------------------------- /visualisation/passes/img/midfield.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/midfield.jpg -------------------------------------------------------------------------------- /visualisation/passes/img/monreal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/monreal.png -------------------------------------------------------------------------------- /visualisation/passes/img/monrealPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/monrealPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/mustafi.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/mustafi.png -------------------------------------------------------------------------------- /visualisation/passes/img/mustafiPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/mustafiPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/oxlade.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/oxlade.png -------------------------------------------------------------------------------- /visualisation/passes/img/ozil.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/ozil.png -------------------------------------------------------------------------------- /visualisation/passes/img/ozilPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/ozilPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/pogbaPassesCombinations.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/pogbaPassesCombinations.jpg -------------------------------------------------------------------------------- /visualisation/passes/img/ramsey.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/ramsey.png -------------------------------------------------------------------------------- /visualisation/passes/img/ramseyPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/ramseyPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/sanchez.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/sanchez.png -------------------------------------------------------------------------------- /visualisation/passes/img/sanchezPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/sanchezPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/walcott.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/walcott.png -------------------------------------------------------------------------------- /visualisation/passes/img/walcottPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/walcottPhoto.png -------------------------------------------------------------------------------- /visualisation/passes/img/xhaka.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/xhaka.png -------------------------------------------------------------------------------- /visualisation/passes/img/xhakaPhoto.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/passes/img/xhakaPhoto.png -------------------------------------------------------------------------------- /visualisation/player_stats/scraper.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import json 3 | import pandas as pd 4 | 5 | list_files = glob.glob("./data/*.json") 6 | 7 | json_data = [json.load(open(f)) for f in list_files] 8 | 9 | # print(json_data[0]['entity']['name']['display']) 10 | 11 | # data = [[elm['entity']['name']['first'],elm['entity']['name']['last']] for elm in json_data] 12 | 13 | # print(data) 14 | 15 | # df = pd.DataFrame(data) 16 | # df.columns = ["first","name"] 17 | 18 | # print(df) 19 | 20 | data =[] 21 | for player in json_data: 22 | player_id = player['entity']['playerId'] 23 | name = player['entity']['name']['display'] 24 | country = player['entity']['nationalTeam']['country'] 25 | position1 = player['entity']['info']['position'] 26 | position2 = player['entity']['info']['positionInfo'] 27 | age = player['entity']['age'][0:2] 28 | mins_played=0 29 | appearances=0 30 | goal=0 31 | goals_assist=0 32 | open_play_pass=0 33 | fwd_pass=0 34 | backward_pass=0 35 | accurate_pass=0 36 | total_cross=0 37 | accurate_cross=0 38 | total_tackle=0 39 | won_tackle=0 40 | shot_on_target=0 41 | shot_off_target=0 42 | interception=0 43 | for stat in player['stats']: 44 | if(stat['name']=="mins_played"): mins_played = stat['value'] 45 | if(stat['name']=="appearances"): appearances = stat['value'] 46 | if(stat['name']=="goals"): goal = stat['value'] 47 | goals_assist=stat['value'] if stat['name']=="goals_assist" else 0 48 | if(stat['name']=="open_play_pass"): open_play_pass= stat['value'] 49 | if(stat['name']=="fwd_pass"): fwd_pass= stat['value'] 50 | if(stat['name']=="backward_pass"): backward_pass= stat['value'] 51 | if(stat['name']=="accurate_pass"): accurate_pass= stat['value'] 52 | if(stat['name']=="total_cross"): total_cross= stat['value'] 53 | if(stat['name']=="accurate_cross"): accurate_cross= stat['value'] 54 | if(stat['name']=="total_tackle"): total_tackle= stat['value'] 55 | if(stat['name']=="won_tackle"): won_tackle= stat['value'] 56 | if(stat['name']=="ontarget_scoring_att"): shot_on_target= stat['value'] 57 | if(stat['name']=="shot_off_target"): shot_off_target= stat['value'] 58 | if(stat['name']=="interception"): interception= stat['value'] 59 | 60 | data.append([player_id,name,country,age,position1,position2,appearances,mins_played,goal*90/mins_played,goals_assist*90/mins_played,open_play_pass*90/mins_played,accurate_pass*90/mins_played,accurate_pass/open_play_pass,fwd_pass*90/mins_played,backward_pass*90/mins_played,accurate_cross/total_cross,won_tackle/total_tackle,shot_on_target,shot_off_target,interception]) 61 | 62 | 63 | df = pd.DataFrame(data) 64 | df.columns = ["player_id","name","country","age","position1","position2","appearances","mins_played","goal_per90","assist_per90","pass_per90","accurate_pass_per90","pass_accuracy","fwd_pass_per90","backward_pass_per90","cross_accuracy","tackl accuracy",""] 65 | print(df) 66 | 67 | 68 | -------------------------------------------------------------------------------- /visualisation/player_stats/scraper.r: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(rvest) 3 | library(readr) 4 | library(stringr) 5 | 6 | 7 | #One shot ------------- 8 | # url="http://www.rotowire.com/soccer/player.htm?id=16664" 9 | # page = url %>% read_html() 10 | # table = page %>% html_nodes(xpath='/html/body/div[3]/div[9]/table[1]') %>% html_table() %>% nth(1) 11 | # print(table) 12 | 13 | urls = read_delim("link_player_PremierLeague.csv",",") 14 | #print(urls$URL) 15 | 16 | data=data.frame() 17 | for(u in urls$URL){ 18 | tmp = data.frame() 19 | print(u) 20 | page = u %>% read_html() 21 | Sys.sleep(5) 22 | player = page %>% html_nodes(xpath='/html/body/div[3]/div[1]/div[2]/div[1]/p[1]') %>% html_text() 23 | club = page %>% html_nodes(xpath='/html/body/div[3]/div[1]/div[2]/div[1]/p[2]/span/text()') %>% html_text() 24 | age = page %>% html_nodes(xpath='/html/body/div[3]/div[1]/div[2]/div[1]/p[2]/text()') %>% html_text() %>% str_split(.," ") %>% nth(1) %>% nth(1) %>% substring(.,1,2) 25 | position = page %>% html_nodes(xpath='/html/body/div[3]/div[1]/div[2]/div[1]/p[2]/text()') %>% html_text() %>% str_split(.," ") %>% nth(1) %>% nth(2) 26 | if((position!="Defender" & position!="Midfielder" & position!="Midfielder" & position!="Forward/Midfielder" & position!="Forward" )| page %>% html_nodes(".mlb-player-basicstatsbox > table") %>% nth(3) %>% html_nodes('td') %>% html_text() == "No stats available for this player."){ 27 | next 28 | }else{ 29 | table2016 = page %>% html_nodes(".mlb-player-basicstatsbox > table") %>% nth(3) %>% html_table() 30 | tmp = data.frame(player,club,age,position,table2016) 31 | data = rbind(data,tmp) 32 | } 33 | print(player) 34 | } 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /visualisation/rollmean/img/manU_xgvs_xgc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/rollmean/img/manU_xgvs_xgc.png -------------------------------------------------------------------------------- /visualisation/rollmean/main.r: -------------------------------------------------------------------------------- 1 | library(readr) 2 | library(zoo) 3 | library(ggplot2) 4 | library(dplyr) 5 | library(hrbrthemes) 6 | 7 | 8 | raw_data <- read_csv("data/artifacts/all_xg.csv") 9 | id = 161 10 | subtitle = "Wolverhampton in Premier League" 11 | rolling = 15 12 | 13 | data <- raw_data %>% 14 | mutate(team=ifelse(team_id==id, "xG", "xGC"), id=row_number()) %>% 15 | select(startDate, team, expected_goal) 16 | 17 | grouped_data <- data %>% 18 | group_by(startDate, team) %>% 19 | summarise(xg=sum(expected_goal)) %>% as.data.frame() %>% 20 | mutate(date=format(as.Date(startDate), format="%Y-%m-%d")) 21 | grouped_data$id <- rep(1:(nrow(grouped_data)/2), each=2) 22 | 23 | ggplot() + 24 | geom_line(data=grouped_data %>% filter(team=="xG"), aes(x=id, y=rollmean(xg, rolling, na.pad=TRUE), color=team)) + 25 | geom_point(data=grouped_data %>% filter(team=="xG"), aes(x=id, y=rollmean(xg, rolling, na.pad=TRUE), , color=team)) + 26 | scale_color_manual(values=c("#56FFAE", "#FCA337")) + 27 | geom_line(data=grouped_data %>% filter(team=="xGC"), aes(x=id, y=rollmean(xg, rolling, na.pad=TRUE), color=team)) + 28 | geom_point(data=grouped_data %>% filter(team=="xGC"), aes(x=id, y=rollmean(xg, rolling, na.pad=TRUE), color=team)) + 29 | geom_vline(xintercept=39, color="white", linetype="dashed") + 30 | geom_text(aes(x=39, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 31 | geom_vline(xintercept=77, color="white", linetype="dashed") + 32 | geom_text(aes(x=77, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 33 | geom_vline(xintercept=115, color="white", linetype="dashed") + 34 | geom_text(aes(x=115, y=0.3, label="\nchange in season"), colour="white", angle=90, size=3)+ 35 | scale_x_continuous(breaks=seq(1,nrow(grouped_data)/2, 5), limits=c(5,nrow(grouped_data)/2), labels=unique(grouped_data$date)[seq(1,nrow(grouped_data)/2, 5)]) + 36 | scale_y_continuous(breaks=c(0,0.5,1,1.5,2,2.5,3), limits=c(0,3)) + 37 | labs(x="", y="xG/xGC",title="xG & xGC rolling averages",subtitle=subtitle,caption=paste0(rolling, " games rolling averages\nby @Ben8t"),color="") + 38 | theme_ipsum_rc() + 39 | theme( 40 | plot.title = element_text(size=35), 41 | text = element_text(colour="white"), 42 | axis.text.y = element_text(colour="white"), 43 | axis.text.x = element_text(colour="white"), 44 | panel.grid.major = element_blank(), 45 | panel.grid.minor = element_blank(), 46 | plot.background = element_rect(fill = "#2162AA")) + 47 | theme( 48 | axis.text.x = element_text(angle = 45, hjust = 1), 49 | axis.line.x = element_line(color="white"), 50 | axis.line.y = element_line(color="white"), 51 | axis.ticks.y = element_line(color="white"), 52 | ) 53 | -------------------------------------------------------------------------------- /visualisation/spaced_table/graphic.r: -------------------------------------------------------------------------------- 1 | # graphic.r 2 | 3 | library(ggplot2) 4 | library(dplyr) 5 | library(hrbrthemes) 6 | 7 | 8 | table = read.csv("data/premier_league_table.csv") %>% 9 | select(Team,Pts) %>% 10 | mutate(Pos=row_number()) %>% 11 | mutate(Place=ifelse(Pos<=4,"Champions League", ifelse(Pos<=6, "Europa League", ifelse(Pos>17, "Relegation", "Premier League")))) %>% 12 | mutate(seg_x=0.99, seg_xend=1.01, seg_y=Pts, seg_yend=Pts) 13 | 14 | data = table %>% 15 | group_by(Pts) %>% 16 | mutate(Team = paste0(Team, collapse = ", ")) 17 | 18 | graph = ggplot(data, aes(x=1, y=Pts)) + 19 | geom_line(color="#636e72") + 20 | geom_segment(aes(x=seg_x,xend=seg_xend,y=seg_y,yend=seg_yend), color="#636e72") + 21 | geom_point(aes(color=Place)) + 22 | geom_text(aes(label=Team, color=Place),hjust=1, x=0.98, show.legend = FALSE) + 23 | geom_text(aes(label=Pts, color=Place),hjust=0, x=1.02, show.legend = FALSE) + 24 | scale_colour_manual(values=c("#0984e3","#F1AF41","#2d3436","#d63031")) + 25 | scale_x_continuous(limits=c(0.40,1.20)) + 26 | scale_y_continuous(limits=c(min(data$Pts),max(data$Pts))) + 27 | labs(x="", y="",title="Spaced Table",subtitle="",caption="by @Ben8t",color="") + 28 | theme_ipsum_rc() + 29 | theme(axis.title.x=element_blank(), 30 | axis.text.x=element_blank(), 31 | axis.ticks.x=element_blank(), 32 | axis.title.y=element_blank(), 33 | axis.text.y=element_blank(), 34 | axis.ticks.y=element_blank(), 35 | panel.grid.major=element_blank(), 36 | panel.grid.minor=element_blank(), 37 | legend.position = c(0.95, 0.5)) 38 | 39 | ggsave(filename = "img/tmp.png", graph, width=5.5, height=10, dpi=300) 40 | 41 | -------------------------------------------------------------------------------- /visualisation/spaced_table/img/tmp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/spaced_table/img/tmp.png -------------------------------------------------------------------------------- /visualisation/table_to_heatmap/img/heatmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Ben8t/football-analytics/8db50fff35d9711568855f23147dfaa9c9b603a6/visualisation/table_to_heatmap/img/heatmap.png -------------------------------------------------------------------------------- /visualisation/table_to_heatmap/main.r: -------------------------------------------------------------------------------- 1 | library(dplyr) 2 | library(ggplot) 3 | library(viridis) 4 | 5 | data %>% na.omit() %>% 6 | rownames_to_column("row") %>% 7 | gather(col, Value, -row) %>% 8 | mutate( 9 | row = factor(row, levels = rev(unique(row))), 10 | Value = Value) %>% arrange(desc(Value)) %>% 11 | ggplot(aes(x=reorder(row, Value), y=col, fill = Value)) + 12 | geom_tile() + 13 | scale_fill_viridis(option="magma") + 14 | theme_minimal() + 15 | labs(x="", y="") + 16 | theme(legend.position = "none") + 17 | theme(axis.title.x=element_blank(), 18 | axis.text.x=element_blank(), 19 | axis.ticks.x=element_blank()) + 20 | theme(axis.title.y=element_blank(), 21 | axis.text.y=element_blank(), 22 | axis.ticks.y=element_blank()) --------------------------------------------------------------------------------