├── .gitignore ├── Getting Started with Football Analytics.md ├── LICENSE ├── README.md ├── analysis_tools ├── __init__.py ├── logos_and_badges.py ├── models.py ├── pitch_zones.py ├── statsbomb_custom_events.py ├── statsbomb_data_engineering.py ├── visuals.py ├── whoscored_custom_events.py ├── whoscored_data_engineering.py └── wyscout_data_engineering.py ├── data_directory ├── fbref_data │ └── 2022_23 │ │ └── EPL │ │ ├── epl 2023 player data.json │ │ ├── epl 2023 team data.json │ │ └── epl 2023 vs team data.json ├── leaguetable_data │ ├── 2016_17 │ │ ├── Bundesliga-table-2016.pbz2 │ │ ├── EFLC-table-2016.pbz2 │ │ ├── EPL-table-2016.pbz2 │ │ ├── La_Liga-table-2016.pbz2 │ │ ├── Ligue_1-table-2016.pbz2 │ │ └── Serie_A-table-2016.pbz2 │ ├── 2017_18 │ │ ├── Bundesliga-table-2017.pbz2 │ │ ├── EFLC-table-2017.pbz2 │ │ ├── EPL-table-2017.pbz2 │ │ ├── La_Liga-table-2017.pbz2 │ │ ├── Ligue_1-table-2017.pbz2 │ │ └── Serie_A-table-2017.pbz2 │ ├── 2018_19 │ │ ├── Bundesliga-table-2018.pbz2 │ │ ├── EFLC-table-2018.pbz2 │ │ ├── EPL-table-2018.pbz2 │ │ ├── La_Liga-table-2018.pbz2 │ │ ├── Ligue_1-table-2018.pbz2 │ │ └── Serie_A-table-2018.pbz2 │ ├── 2019_20 │ │ ├── Bundesliga-table-2019.pbz2 │ │ ├── EFLC-table-2019.pbz2 │ │ ├── EPL-table-2019.pbz2 │ │ ├── La_Liga-table-2019.pbz2 │ │ ├── Ligue_1-table-2019.pbz2 │ │ └── Serie_A-table-2019.pbz2 │ ├── 2020_21 │ │ ├── Bundesliga-table-2020.pbz2 │ │ ├── EFLC-table-2020.pbz2 │ │ ├── EPL-table-2020.pbz2 │ │ ├── La_Liga-table-2020.pbz2 │ │ ├── Ligue_1-table-2020.pbz2 │ │ └── Serie_A-table-2020.pbz2 │ ├── 2021_22 │ │ ├── Bundesliga-table-2021.pbz2 │ │ ├── EFLC-table-2021.pbz2 │ │ ├── EPL-table-2021.pbz2 │ │ ├── La_Liga-table-2021.pbz2 │ │ ├── Ligue_1-table-2021.pbz2 │ │ └── Serie_A-table-2021.pbz2 │ └── 2022_23 │ │ ├── EFLC-table-2022.pbz2 │ │ └── EPL-table-2022.pbz2 ├── misc_data │ ├── articles │ │ └── The Evolution of Shooting in the Premier League.pdf │ ├── images │ │ ├── BoxLogo.png │ │ ├── CarryLogo.png │ │ ├── EPL-2022-23-cycle-of-results.png │ │ ├── EPL-2022-23-cycle-of-results.pptx │ │ ├── JK Twitter Logo.png │ │ ├── PassLogo.png │ │ ├── RecoveryLogo.png │ │ ├── TackleLogo.png │ │ ├── example-1-1-1.png │ │ ├── example-1-1-2.png │ │ ├── example-1-1-3.png │ │ ├── example-10-1-1.png │ │ ├── example-10-1-2.png │ │ ├── example-11-1-1.png │ │ ├── example-2-1-1.png │ │ ├── example-2-1-2.png │ │ ├── example-2-1-3.png │ │ ├── example-2-1-4.png │ │ ├── example-2-2-1.png │ │ ├── example-2-2-2.png │ │ ├── example-3-1-1.png │ │ ├── example-3-1-2.png │ │ ├── example-3-1-3.png │ │ ├── example-3-1-4.png │ │ ├── example-3-1-5.png │ │ ├── example-4-1-1.png │ │ ├── example-4-1-2.png │ │ ├── example-4-2-1.png │ │ ├── example-4-2-2.png │ │ ├── example-4-3-1.png │ │ ├── example-4-3-2.png │ │ ├── example-5-1-1.png │ │ ├── example-5-1-2.png │ │ ├── example-5-2-1.png │ │ ├── example-5-2-2.png │ │ ├── example-5-3-1.png │ │ ├── example-5-3-2.png │ │ ├── example-5-3-3.png │ │ ├── example-5-4-1.png │ │ ├── example-5-4-2.png │ │ ├── example-5-5-1.png │ │ ├── example-5-5-2.png │ │ ├── example-5-5-3.png │ │ ├── example-5-5-4.png │ │ ├── example-5-6-1.png │ │ ├── example-5-6-2.png │ │ ├── example-5-7-1.png │ │ ├── example-5-7-2.png │ │ ├── example-6-1-1.png │ │ ├── example-6-1-2.png │ │ ├── example-6-2-1.png │ │ ├── example-6-2-2.png │ │ ├── example-6-3-1.png │ │ ├── example-6-4-1.png │ │ ├── example-6-5-1.png │ │ ├── example-6-5-2.png │ │ ├── example-6-6-1.png │ │ ├── example-6-6-2.png │ │ ├── example-6-7-1.png │ │ ├── example-7-1-1.png │ │ ├── example-7-1-2.png │ │ ├── example-7-2-1.png │ │ ├── example-7-2-2.png │ │ ├── example-9-1-1.png │ │ ├── example-9-1-2.png │ │ └── getting-started-roadmap.png │ ├── log_regression_xg_data.pbz2 │ ├── neural_net_xg_data.pbz2 │ └── worldcup_2010_to_2018_distcovered.xlsx ├── statsbomb_data │ └── README.txt ├── transfermarkt_data │ ├── 2021_22 │ │ ├── transfermarkt_GB1_2021-2022.pbz2 │ │ └── transfermarkt_GB2_2021-2022.pbz2 │ └── 2022_23 │ │ ├── transfermarkt_FR2_2022-2023.pbz2 │ │ └── transfermarkt_GB2_2022-2023.pbz2 ├── whoscored_data │ └── README.txt └── wyscout_data │ ├── Data Collection Report.pdf │ ├── coaches.pbz2 │ ├── competitions.pbz2 │ ├── eventid2name.csv │ ├── events │ ├── events_England.pbz2 │ ├── events_European_Championship.pbz2 │ ├── events_France.pbz2 │ ├── events_Germany.pbz2 │ ├── events_Italy.pbz2 │ ├── events_Spain.pbz2 │ └── events_World_Cup.pbz2 │ ├── matches │ ├── matches_England.pbz2 │ ├── matches_European_Championship.pbz2 │ ├── matches_France.pbz2 │ ├── matches_Germany.pbz2 │ ├── matches_Italy.pbz2 │ ├── matches_Spain.pbz2 │ └── matches_World_Cup.pbz2 │ ├── playerank.pbz2 │ ├── players.pbz2 │ ├── tags2name.csv │ └── teams.pbz2 ├── model_directory ├── pass_cluster_model │ └── PassClusterModel65.joblib └── xg_model │ └── log_regression_xg_model.joblib └── projects ├── 00_data_import_and_misc_work ├── download_yt_video.py ├── import_data_fbref.py ├── import_data_leaguetable.py ├── import_data_whoscored.py └── scrape_data_transfermarkt.py ├── 01_wc2018_box2box_mids └── worldcup_b2b_mids.py ├── 02_player_team_valuation └── team_player_value_analysis.py ├── 03_model_development_and_implementation ├── pass_cluster_data_collection.py ├── shot_xg_plot.py ├── xg_log_regression_model.py └── xg_neural_network.py ├── 04_match_reports ├── off_def_shape_report_ws.py ├── pass_report_ws.py └── shot_report_understat.py ├── 05_competition_reports_top_players ├── player_defensive_contribution.py ├── player_effective_carriers.py ├── player_effective_passers.py ├── player_high_defensive_actions.py ├── player_impact_on_team.py ├── player_penalty_takers.py ├── player_threat_creators.py └── player_threat_creators_zonal_comparison.py ├── 06_competition_reports_top_teams ├── team_ball_winning.py ├── team_common_zonal_actions.py ├── team_cross_success.py ├── team_delta_threat_creation.py ├── team_fullback_combinations.py ├── team_setpiece_shot_concession.py ├── team_threat_creation.py └── xg_league_table_sb.py ├── 07_player_reports ├── advanced_swarm_radar.py └── player_report_fullback.py ├── 08_evolution_of_shooting └── shot_characteristics_trending.py ├── 09_league_position_metric_correlation └── team_metric_pts_correlation.py ├── 10_team_buildup_passes └── team_pass_tendencies.py └── 11_justice_league └── justice_league.py /.gitignore: -------------------------------------------------------------------------------- 1 | /analysis_tools/get_football_data.py 2 | /data_directory/misc_data/pass_data.pbz2 3 | /data_directory/statsbomb_data/2017_18/ 4 | /data_directory/whoscored_data/2023_24/ 5 | /data_directory/whoscored_data/2022_23/ 6 | /data_directory/whoscored_data/2021_22/ 7 | /data_directory/whoscored_data/2020_21/ 8 | /data_directory/whoscored_data/2019_20/ 9 | /data_directory/whoscored_data/2018_19/ 10 | /data_directory/whoscored_data/2017_18/ 11 | /data_directory/whoscored_data/2016_17/ 12 | /data_directory/whoscored_data/2015_16/ 13 | /data_directory/whoscored_data/2014_15/ 14 | /data_directory/whoscored_data/2013_14/ 15 | /data_directory/whoscored_data/2012_13/ 16 | /data_directory/whoscored_data/2011_12/ 17 | /data_directory/whoscored_data/2010_11/ 18 | /data_directory/whoscored_data/2009_10/ 19 | /projects/00_data_import_and_misc_work/misc_work_images/ 20 | /projects/00_data_import_and_misc_work/import_data_statsbomb.py 21 | /projects/00_data_import_and_misc_work/check_whoscored_data_volume.py 22 | /projects/01_wc2018_box2box_mids_statsbomb/worldcup_b2b_mids/ 23 | /projects/02_player_team_valuation/forward_value_analysis/ 24 | /projects/02_player_team_valuation/market_value_league_table/ 25 | /projects/04_match_reports/pass_reports/ 26 | /projects/04_match_reports/shot_reports/ 27 | /projects/04_match_reports/shape_reports/ 28 | /projects/04_match_reports/out_of_possession_reports/ 29 | /projects/04_match_reports/out_of_posession_report_sb.py 30 | /projects/04_match_reports/team_metric_vs_season/ 31 | /projects/04_match_reports/team_metrics_vs_season.py 32 | /projects/05_competition_reports_top_players/player_effective_carriers/ 33 | /projects/05_competition_reports_top_players/player_effective_passers/ 34 | /projects/05_competition_reports_top_players/player_penalty_takers/ 35 | /projects/05_competition_reports_top_players/player_threat_creators/ 36 | /projects/05_competition_reports_top_players/top_defensive_actions/ 37 | /projects/05_competition_reports_top_players/top_defensive_contributions/ 38 | /projects/05_competition_reports_top_players/player_impact_on_team/ 39 | /projects/06_competition_reports_top_teams/team_ball_winning/ 40 | /projects/06_competition_reports_top_teams/team_cross_success/ 41 | /projects/06_competition_reports_top_teams/team_fullback_combinations/ 42 | /projects/06_competition_reports_top_teams/team_threat_creation/ 43 | /projects/06_competition_reports_top_teams/team_setpiece_chance_concession/ 44 | /projects/06_competition_reports_top_teams/team_common_actions/ 45 | /projects/06_competition_reports_top_teams/team_xg_metrics/ 46 | /projects/07_player_reports/player_reports/ 47 | /projects/07_player_reports/advanced_radars/ 48 | /projects/07_player_reports/player_report_centreforward_sb.py 49 | /projects/07_player_reports/player_report_winger_sb.py 50 | /projects/08_evolution_of_shooting/shot_characteristics_trending/ 51 | /projects/09_league_position_metric_correlation/team_metric_pts_correlation/ 52 | /projects/10_team_buildup_passes/team_pass_tendencies/ 53 | /projects/11_justice_league/justice_league/ 54 | /projects/99_private_work/ 55 | -------------------------------------------------------------------------------- /Getting Started with Football Analytics.md: -------------------------------------------------------------------------------- 1 | # Getting Started: Football Data Analytics with Python 2 | The aim of this short document is to provide some guidance and advice on getting started with football data analytics using Python. 3 | 4 | There are a huge number of relevant resources out there already, but I thought it worth providing my perspectives on the topic and outlining the path that I took into football data analytics. Please note that this document only covers the technical aspects of getting started, and does not provide guidance on starting a career within the football industry (nor am I qualified to provide this). 5 | 6 | ## Roadmap 7 | I have put together a basic roadmap that identifies a logical sequence of activities that you may choose to undertake to get started with football data. I will expand on each activity in the following sections. 8 | 9 |

10 |     11 |

12 | 13 | ## Step 1 - Python Fundamentals 14 | 15 | One of the most common mistakes I have seen, and therefore a key piece of advice I have is; don't try and run before you can walk. Whilst it is seriously tempting to immerse yourself in football data immediately, it's critical that you develop a thorough understanding of the Python programming language before-hand. In taking the time to learn the fundamentals of Python coding as part of a wider analytics skill-set, you will better position yourself to develop as a football data analyst. 16 | 17 | I have listed a few courses below that I have completed and highly recommend. It wouldn't be excessive to spend 1-3 months learning Python before you get started with football data. 18 | 19 | |Course Name|Platform|Time Commitment|Cost| 20 | |----------|:-------------:|:------:|:------:| 21 | |[Learning to Program with Python 3](https://www.youtube.com/playlist?list=PLQVvvaa0QuDeAams7fkdcwOGBpGdHpXln)|YouTube|5-10Hrs|Free| 22 | |[Python Tutorial for Beginners](https://www.youtube.com/watch?v=YYXdXT2l-Gg&list=PL-osiE80TeTskrapNbzXhwoFUiLCjGgY7&ab_channel=CoreySchafer)|YouTube|10-20Hrs|Free| 23 | |[Complete Python Bootcamp](https://www.udemy.com/course/complete-python-bootcamp/)|Udemy|20-40Hrs|£15.99 (offer)| 24 | |[Python for Data Science & Machine Learning](https://www.udemy.com/course/python-for-data-science-and-machine-learning-bootcamp/)|Udemy|20-40Hrs|£15.99 (offer)| 25 | 26 | 27 | ## Step 2 - Analysing Football Event Data 28 | 29 | With the fundamentals of Python nailed down, you can move onto the good bit! There is an absolutely essential resource that will get you started with analysing football event data. Uppsala University (Sweden) run an online course called [Mathematical Modelling of Football](https://soccermatics.readthedocs.io/en/latest/). It is delivered by the author of Soccermatics, [David Sumpter](https://www.david-sumpter.com/), who has kindly made the majority of lectures/resources publically available. 30 | 31 | I would strongly recommend that you not only watch, but follow along with the course material. Prioritise the first three sections; event data, models and scouting. These sections will get you up to speed with crucial football data analytics topics, as well as provide invaluable guidance on producing effective visuals. The remaining sections; valuing actions, randomness & prediction, pitch control, positioning & movmement, and physical data, cover more advanced topics but are still well worth completing. 32 | 33 | ## Step 3 - Understanding Football Data Sources 34 | 35 | Before going at it alone, it's important to understand what football data exists in the public domain, and equally what doesn't? What do you have to pay for, and what's available for free? 36 | 37 | Through taking the time to consider this, you can appropriately scope your future projects and ensure that the data required to complete them is actually available. If you have completed various sections of the [Mathematical Modelling of Football](https://soccermatics.readthedocs.io/en/latest/) course, then you will likely have at least some awareness of what data is available and accessible. 38 | 39 | In my experience, working with different types/formats of football data not only stretches you from a technical programming perspective, but encourages you to think about different ways to process and draw insight from football data. When getting started with football data analytics, I believe the following sources of free data are key. 40 | 41 | |Data Provider|Data Description|Data Format| 42 | |----------|-------------|:------:| 43 | |[Statsbomb Open Data](https://github.com/statsbomb/open-data)|Statsbomb match event data from a collection of games and competitions|[API](https://github.com/statsbomb/statsbombpy) or [.json](https://github.com/statsbomb/open-data)| 44 | |[Wyscout Free Data](https://figshare.com/collections/Soccer_match_event_dataset/4415000/2)|Wyscout match event data for all matches in Europe's top 5 leagues during 2017-18 season|[.json](https://figshare.com/collections/Soccer_match_event_dataset/4415000/2)| 45 | |[Understat](https://understat.com/)|Shot event data for all matches in Europe's top 5 leages (+ Russian Prem)|[API](https://pypi.org/project/understatapi/) or [Web](https://understat.com/)| 46 | |[FBref](https://fbref.com/en/)|Aggregated team and player data/performance metrics|[Web](https://fbref.com/en/), .xlsx or .csv| 47 | |[Transfermarkt](https://www.transfermarkt.co.uk/)|Team and player market value|[Web](https://www.transfermarkt.co.uk/)| 48 | 49 | There are many more data sources, the majority of which are listed and explained within Edd Webster's brilliant [football analytics repository](https://github.com/eddwebster/football_analytics#data-sources). Given the number of web-based resources around, any time spent understanding the basics of web-scraping with Python is time well spent. 50 | 51 | ## Step 4 - Personal Projects 52 | 53 | Now it's time to go alone... there is no better way to consolidate learning than through practicing, testing and experimentation. With an understanding of football data analytics principles and an awareness of what data is available to you, it's likely you have formulated some project ideas already. Go with them! Explore the data and don't be afraid to change the direction of your project if something in particular piques your interest! 54 | 55 | If you are struggling, here's a list of ideas: 56 | - Identify (and visualise) which players frequently passed the ball into the opposition box (in-play only) during the 2018 World Cup 57 | - Highlight pitch areas that each team lost possession of the ball during the 2018 World Cup. Link this to team performance 58 | - Highlight pitch areas that each team created shots/chances from during the 2018 World Cup. Link this to team performance 59 | - Create a function that identifies every time a forward receives a long ball, their next action, and whether they hold the ball up successfully 60 | - Define and implement some bespoke metrics that quantify how well a defender/midfield/forward performed during the 2018 World Cup 61 | - Build an xG model using data from Europe's top 5 leagues during the 2017/18 season 62 | - Build a tool to extract and visualise shot event data from Understat 63 | - Develop a model that attempts to predict player market value from seasonal performance (for a specific player position) 64 | - Implement an expected threat model, and use it to determine the value of all actions during the 2017/18 season in Europe's top 5 leagues 65 | - Build a tool that quickly produces a scatter plot to compare a set of players against 2 FBref metrics 66 | 67 | When you have completed a project and are happy with your work, post it! Whilst this might seem daunting, Twitter is a great place to share your work and obtain feedback. Feel free to send me your work directly and I'll take a look too. 68 | 69 | ## Step 5 - Research & Refresh Knowledge 70 | 71 | The final step, and one that should not be underestimated, is to maintain and refresh your knowledge. Continous learning will further develop your data analytics skillset and help you to bring fresh ideas and concepts into your personal projects. Go back and revisit course topics if you need to, and keep on top of public data releases. 72 | 73 | There is a huge amount of research and development that is taking place in the football analytics community, and I'd recommend at least being aware of it. One of the best ways to do this is to take a look at [Jan Van Haaren's](https://www.janvanhaaren.be/) annual soccer analytics review (e.g. [2022 soccer analytics review](https://www.janvanhaaren.be/2022/12/29/soccer-analytics-review-2022.html)). Even if you only scan through the research paper titles, you will at least get an indication of trending research topics and the state-of-the-art in football data analytics. 74 | 75 | ## Summary 76 | 77 | And that's about it! Hopefully this resource has been useful. Please feel free to reach out to me ([@\_JKDS\_](https://twitter.com/_JKDS_)) if you have any questions or thoughts! 78 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /analysis_tools/__init__.py: -------------------------------------------------------------------------------- 1 | """Package to support analysis of football event data. 2 | 3 | Collection of modules to assist with data engineering, formatting, analysis and visualisation of event-type football 4 | data from a variety of sources.""" -------------------------------------------------------------------------------- /analysis_tools/models.py: -------------------------------------------------------------------------------- 1 | """Module containing a variety of predictive and statistical models relevant to the analysis of football data 2 | 3 | Functions 4 | --------- 5 | get_pass_clusters(events): 6 | Assign statsbomb or whoscored pass events to a pass cluster 7 | 8 | simulate_match_outcome(events, matches, match_id, sim_count=10000): 9 | Simulate the outcome of a match based on teams xG 10 | 11 | 12 | """ 13 | 14 | import joblib 15 | from sklearn.base import BaseEstimator, TransformerMixin 16 | import os 17 | import numpy as np 18 | import pandas as pd 19 | 20 | 21 | # Load custom classes that are required for model pipeline (done manually here for ease) 22 | # noinspection PyPep8Naming 23 | class convertYards(BaseEstimator, TransformerMixin): 24 | def fit(self, X, y=None): 25 | return self 26 | 27 | def transform(self, X, y=None): 28 | X['x'] = X['x'] * (120 / 100) 29 | X['y'] = X['y'] * (80 / 100) 30 | X['endX'] = X['endX'] * (120 / 100) 31 | X['endY'] = X['endY'] * (80 / 100) 32 | return X 33 | 34 | 35 | # noinspection PyPep8Naming 36 | class customScaler(BaseEstimator, TransformerMixin): 37 | def fit(self, X, y=None): 38 | self.max_x = 120 39 | self.max_y = 80 40 | return self 41 | 42 | def transform(self, X, y=None): 43 | X['x'] = X['x'] / 120 44 | X['y'] = X['y'] / 120 45 | X['endX'] = X['endX'] / 120 46 | X['endY'] = X['endY'] / 120 47 | return X 48 | 49 | 50 | def get_pass_clusters(events, data_mode='whoscored'): 51 | """ Assign statsbomb or whoscored pass events to a pass cluster 52 | 53 | Function that implements a pass clustering model, that has been trained on over 5,000,000 successful passes across 54 | EPL, Serie A, Ligue 1, Bundesliga, La Liga and EFLC (2019/20 - 2022/23), to assign passes to a pass cluster. Passes 55 | are assigned to their most similar cluster based on the start and end position of the pass. The function adds a 56 | cluster id and cluster centroid (x, y, end x, end y) to each pass. 57 | 58 | Args: 59 | events (pandas.DataFrame): dataframe of event data. 60 | data_mode (string, optional): 'whoscored' or 'statsbomb' data. Defaults to 'whoscored'. 61 | 62 | Returns: 63 | pandas.DataFrame: dataframe of passes with additional 'pass_cluster_id', 'pass_cluster_mean_x', 64 | 'pass_cluster_mean_y', 'pass_cluster_mean_end_x' and 'pass_cluster_mean_end_y' columns. 65 | """ 66 | 67 | # Filter and format data based on data_mode to ensure compatibility with pass cluster model 68 | if data_mode == 'whoscored': 69 | passes_out = events[events['eventType'] == 'Pass'].copy() 70 | elif data_mode == 'statsbomb': 71 | passes_out = events[events['type_name'] == 'Pass'].copy() 72 | passes_out['x'] = 100*passes_out['x']/120 73 | passes_out['y'] = 100*passes_out['y']/80 74 | passes_out['endX'] = 100*passes_out['end_x'] / 120 75 | passes_out['endY'] = 100*passes_out['end_y'] / 80 76 | else: 77 | raise ValueError("Specify 'whoscored' or 'statsbomb' as data mode") 78 | 79 | # Load pass clustering model 80 | current_dir = os.getcwd() 81 | os.chdir(current_dir.split("football-data-analytics")[0] + 82 | "football-data-analytics/model_directory/pass_cluster_model") 83 | cluster_model = joblib.load("PassClusterModel65.joblib") 84 | os.chdir(current_dir) 85 | 86 | # Make cluster predictions and add cluster info 87 | passes_out['pass_cluster_id'] = cluster_model.predict(passes_out) 88 | cluster_centers = cluster_model['model'].cluster_centers_ * 120 89 | passes_out['pass_cluster_mean_x'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 0]) 90 | passes_out['pass_cluster_mean_y'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 1]) 91 | passes_out['pass_cluster_mean_end_x'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 2]) 92 | passes_out['pass_cluster_mean_end_y'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 3]) 93 | 94 | # Return data to standard state based on data_mode 95 | if data_mode == 'whoscored': 96 | passes_out['pass_cluster_mean_x'] = 100*passes_out['pass_cluster_mean_x']/120 97 | passes_out['pass_cluster_mean_y'] = 100*passes_out['pass_cluster_mean_y']/80 98 | passes_out['pass_cluster_mean_end_x'] = 100*passes_out['pass_cluster_mean_end_x']/120 99 | passes_out['pass_cluster_mean_end_y'] = 100*passes_out['pass_cluster_mean_end_y']/80 100 | 101 | elif data_mode == 'statsbomb': 102 | passes_out['x'] = 120*passes_out['x']/100 103 | passes_out['y'] = 80*passes_out['y']/100 104 | passes_out = passes_out.drop(columns=['endX', 'endY']) 105 | 106 | return passes_out 107 | 108 | 109 | def simulate_match_outcome(events, matches, match_id, sim_count=10000): 110 | """ Simulate the outcome of a match based on teams xG 111 | 112 | Function to simulate the outcome of a match by assigning goals to each team based on their chances and xG. Assumes 113 | that xG represents scoring probability and that all xG events are independent. Matches are simulated a number of 114 | times, with outcomes used to determine home win, draw and away win probabilites and expected points. Function 115 | requires statsbomb-style events and matches dataframe, id of match to simulate and number of iterations. Individual 116 | simulation outcomes are returned. Win probabilities and expected points are added to the matches dataframe 117 | 118 | Args: 119 | events (pandas.DataFrame): dataframe of statsbomb-style event data. 120 | matches (pandas.DataFrame): dataframe of statsbomb-style match data. 121 | match_id (int): numeric identifier of match to simulate 122 | sim_count (int): number of simulations to run 123 | 124 | Returns: 125 | pandas.DataFrame: statsbomb-style match dataframe with additional 'home_xg', 'away_xg', 'home_win_probability', 126 | 'away_win_probability', 'draw_probability', 'home_xpoints' and 'away_xpoints' columns 127 | pandas.DataFrame: dataframe of match simulation results. One row per simulation 128 | """ 129 | 130 | # Initialise lists to store simulated goal scored and outcome 131 | home_goal_list = [] 132 | away_goal_list = [] 133 | outcome_list = [] 134 | 135 | # Retrieve xG events for match to simulate 136 | match_simulate = matches[matches['match_id'] == match_id] 137 | match_xg_events = events[(events['match_id'] == match_id) & 138 | (events['shot_statsbomb_xg'] == events['shot_statsbomb_xg'])] 139 | home_xg_list = match_xg_events[match_xg_events['team_name'] == match_simulate['home_team'].values[0]][ 140 | 'shot_statsbomb_xg'].values 141 | away_xg_list = match_xg_events[match_xg_events['team_name'] == match_simulate['away_team'].values[0]][ 142 | 'shot_statsbomb_xg'].values 143 | 144 | # Simulate multiple times 145 | for i in range(sim_count): 146 | 147 | # Initialise simulated goal scored 148 | home_goals = 0 149 | away_goals = 0 150 | 151 | # Iterate through home xG events 152 | if len(home_xg_list) > 0: 153 | 154 | for xg_shot in home_xg_list: 155 | rand_prob = np.random.random() 156 | home_goals = home_goals + 1 if rand_prob < xg_shot else home_goals 157 | 158 | # Iterate through away xG events 159 | if len(away_xg_list) > 0: 160 | 161 | for xg_shot in away_xg_list: 162 | rand_prob = np.random.random() 163 | away_goals = away_goals + 1 if rand_prob < xg_shot else away_goals 164 | 165 | # Append goal outcomes to lists 166 | home_goal_list.append(home_goals) 167 | away_goal_list.append(away_goals) 168 | 169 | # Define match outcome based on home and away goals 170 | outcome = 'home' if home_goals > away_goals else 'away' if away_goals > home_goals else 'draw' 171 | outcome_list.append(outcome) 172 | 173 | # Store all simulated matches within dataframe 174 | match_simulation_results = pd.DataFrame(zip(home_goal_list, away_goal_list, outcome_list), 175 | columns=['home_goals', 'away_goals', 'outcome']) 176 | match_simulation_results['home_team'] = match_simulate['home_team'].values[0] 177 | match_simulation_results['away_team'] = match_simulate['away_team'].values[0] 178 | 179 | # Initialise dictionary to store results 180 | result_dict = dict() 181 | 182 | # Store win probabilities and xpoints in dictionary 183 | result_dict['match_id'] = match_id 184 | result_dict['home_xg'] = home_xg_list.sum() 185 | result_dict['away_xg'] = away_xg_list.sum() 186 | result_dict['home_win_probability'] = outcome_list.count('home') / sim_count 187 | result_dict['away_win_probability'] = outcome_list.count('away') / sim_count 188 | result_dict['draw_probability'] = outcome_list.count('draw') / sim_count 189 | result_dict['home_xpoints'] = result_dict['home_win_probability'] * 3 + result_dict['draw_probability'] * 1 190 | result_dict['away_xpoints'] = result_dict['away_win_probability'] * 3 + result_dict['draw_probability'] * 1 191 | 192 | # Insert win probabilities and xpoints information to dataframe 193 | if 'home_xpoints' in matches.columns: 194 | matches_out = matches.copy() 195 | matches_out.loc[matches['match_id'] == match_id, list(result_dict.keys())[1:]] = list(result_dict.values())[ 196 | 1:] 197 | else: 198 | join_df = pd.DataFrame(result_dict, index=[0]) 199 | matches_out = pd.merge(matches, join_df, left_on='match_id', right_on='match_id', how='left') 200 | 201 | return matches_out, match_simulation_results -------------------------------------------------------------------------------- /analysis_tools/visuals.py: -------------------------------------------------------------------------------- 1 | """Module containing functions to generate football data anlytics visuals. 2 | 3 | Functions 4 | --------- 5 | 6 | 7 | """ 8 | 9 | -------------------------------------------------------------------------------- /analysis_tools/wyscout_data_engineering.py: -------------------------------------------------------------------------------- 1 | """Module containing functions to assist with pre-processing and engineering of Wyscout-style data 2 | 3 | Functions 4 | --------- 5 | format_wyscout_data(tournament='England', data_folder="../../data_directory/wyscout_data") 6 | Load Wyscout json files with matches, events, players and competitions.""" 7 | 8 | import bz2 9 | import pickle 10 | from collections import defaultdict 11 | import pandas as pd 12 | 13 | 14 | def format_wyscout_data(tournament='England', data_folder="../../data_directory/wyscout_data"): 15 | """ Load Wyscout json files with matches, events, players and competitions. 16 | 17 | Function to load and format Wyscout json files (location defined by folder input) for a user-defined tournament. 18 | 19 | Args: 20 | tournament (list, optional): List of tournaments to load. 21 | data_folder (str, optional): Location of data, relative to script in which function is called.. 22 | 23 | Returns: 24 | pandas.DataFrame: wyscout-style event dataframe, containing all events from selected competition. 25 | pandas.DataFrame: wyscout-style matches dataframe, containing all match information from selected competition. 26 | defaultdict: wyscout-style event dictionary, containing events per match. 27 | pandas.DataFrame: wyscout-style player dataframe, containing player info for all players in av. Wyscout data. 28 | pandas.DataFrame: wyscout-style competition dataframe, containing comp info for all comps in av. Wyscout data. 29 | pandas.DataFrame: wyscout-style team dataframe, containing team info for all teams in av. Wyscout data. 30 | """ 31 | 32 | if isinstance(tournament, str): 33 | tournament = [tournament] 34 | 35 | events, matches = pd.DataFrame(), pd.DataFrame() 36 | 37 | for idx, data_selection in enumerate(tournament): 38 | 39 | # Load in the Wyscout matches and event data 40 | events_temp = bz2.BZ2File(f"{data_folder}/events/events_{data_selection}.pbz2", 'rb') 41 | events_temp = pd.DataFrame(pickle.load(events_temp)) 42 | matches_temp = bz2.BZ2File(f"{data_folder}/matches/matches_{data_selection}.pbz2", 'rb') 43 | matches_temp = pd.DataFrame(pickle.load(matches_temp)) 44 | 45 | if idx == 0: 46 | events = events_temp 47 | matches = matches_temp 48 | else: 49 | events = events.append(events_temp) 50 | matches = matches.append(matches_temp) 51 | 52 | # Produce a dictionary of lists: top level dictionary of matches with sub-list of events 53 | match_id2events = defaultdict(list) 54 | for _, event in events.iterrows(): 55 | match_id = event['matchId'] 56 | match_id2events[match_id].append(event) 57 | 58 | # Produce a dictionary of dictionaries: top level dictionary of matches with sub-dictionary of match info 59 | match_id2match = defaultdict(dict) 60 | for _, match in matches.iterrows(): 61 | match_id = match['wyId'] 62 | match_id2match[match_id] = match 63 | 64 | # Load in the Wyscout player data 65 | players = bz2.BZ2File(f"{data_folder}/players.pbz2", 'rb') 66 | players = pickle.load(players) 67 | 68 | # Produce a dictionary of dictionaries: top level dictionary of players with sub-dictionary of player info 69 | player_id2player = defaultdict(dict) 70 | for player in players: 71 | player_id = player['wyId'] 72 | player_id2player[player_id] = player 73 | 74 | # Load in the Wyscout competition data 75 | competitions = bz2.BZ2File(f"{data_folder}/competitions.pbz2", 'rb') 76 | competitions = pickle.load(competitions) 77 | 78 | # Produce a dictionary: top level dictionary of competitions with sub-dictionary of competition info 79 | competition_id2competition = defaultdict(dict) 80 | for competition in competitions: 81 | competition_id = competition['wyId'] 82 | competition_id2competition[competition_id] = competition 83 | 84 | # Load in the Wyscout teams data 85 | teams = bz2.BZ2File(f"{data_folder}/teams.pbz2", 'rb') 86 | teams = pickle.load(teams) 87 | 88 | # Produce a list of dictionaries: top level dictionary of teams with sub-dictionary of team info 89 | team_id2team = defaultdict(dict) 90 | for team in teams: 91 | team_id = team['wyId'] 92 | team_id2team[team_id] = team 93 | 94 | # Convert to dataframes 95 | match_id2match = pd.DataFrame(match_id2match).transpose() 96 | player_id2player = pd.DataFrame(player_id2player).transpose() 97 | competition_id2competition = pd.DataFrame(competition_id2competition).transpose() 98 | team_id2team = pd.DataFrame(team_id2team).transpose() 99 | 100 | return match_id2match, events, match_id2events, player_id2player, competition_id2competition, team_id2team 101 | -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2016_17/Bundesliga-table-2016.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/Bundesliga-table-2016.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2016_17/EFLC-table-2016.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/EFLC-table-2016.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2016_17/EPL-table-2016.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/EPL-table-2016.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2016_17/La_Liga-table-2016.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/La_Liga-table-2016.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2016_17/Ligue_1-table-2016.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/Ligue_1-table-2016.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2016_17/Serie_A-table-2016.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/Serie_A-table-2016.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2017_18/Bundesliga-table-2017.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/Bundesliga-table-2017.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2017_18/EFLC-table-2017.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/EFLC-table-2017.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2017_18/EPL-table-2017.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/EPL-table-2017.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2017_18/La_Liga-table-2017.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/La_Liga-table-2017.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2017_18/Ligue_1-table-2017.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/Ligue_1-table-2017.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2017_18/Serie_A-table-2017.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/Serie_A-table-2017.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2018_19/Bundesliga-table-2018.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/Bundesliga-table-2018.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2018_19/EFLC-table-2018.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/EFLC-table-2018.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2018_19/EPL-table-2018.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/EPL-table-2018.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2018_19/La_Liga-table-2018.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/La_Liga-table-2018.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2018_19/Ligue_1-table-2018.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/Ligue_1-table-2018.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2018_19/Serie_A-table-2018.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/Serie_A-table-2018.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2019_20/Bundesliga-table-2019.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/Bundesliga-table-2019.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2019_20/EFLC-table-2019.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/EFLC-table-2019.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2019_20/EPL-table-2019.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/EPL-table-2019.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2019_20/La_Liga-table-2019.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/La_Liga-table-2019.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2019_20/Ligue_1-table-2019.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/Ligue_1-table-2019.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2019_20/Serie_A-table-2019.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/Serie_A-table-2019.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2020_21/Bundesliga-table-2020.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/Bundesliga-table-2020.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2020_21/EFLC-table-2020.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/EFLC-table-2020.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2020_21/EPL-table-2020.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/EPL-table-2020.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2020_21/La_Liga-table-2020.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/La_Liga-table-2020.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2020_21/Ligue_1-table-2020.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/Ligue_1-table-2020.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2020_21/Serie_A-table-2020.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/Serie_A-table-2020.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2021_22/Bundesliga-table-2021.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/Bundesliga-table-2021.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2021_22/EFLC-table-2021.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/EFLC-table-2021.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2021_22/EPL-table-2021.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/EPL-table-2021.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2021_22/La_Liga-table-2021.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/La_Liga-table-2021.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2021_22/Ligue_1-table-2021.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/Ligue_1-table-2021.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2021_22/Serie_A-table-2021.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/Serie_A-table-2021.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2022_23/EFLC-table-2022.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2022_23/EFLC-table-2022.pbz2 -------------------------------------------------------------------------------- /data_directory/leaguetable_data/2022_23/EPL-table-2022.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2022_23/EPL-table-2022.pbz2 -------------------------------------------------------------------------------- /data_directory/misc_data/articles/The Evolution of Shooting in the Premier League.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/articles/The Evolution of Shooting in the Premier League.pdf -------------------------------------------------------------------------------- /data_directory/misc_data/images/BoxLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/BoxLogo.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/CarryLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/CarryLogo.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/EPL-2022-23-cycle-of-results.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/EPL-2022-23-cycle-of-results.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/EPL-2022-23-cycle-of-results.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/EPL-2022-23-cycle-of-results.pptx -------------------------------------------------------------------------------- /data_directory/misc_data/images/JK Twitter Logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/JK Twitter Logo.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/PassLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/PassLogo.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/RecoveryLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/RecoveryLogo.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/TackleLogo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/TackleLogo.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-1-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-1-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-1-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-1-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-1-1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-1-1-3.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-10-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-10-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-10-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-10-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-11-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-11-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-2-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-2-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-2-1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-3.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-2-1-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-4.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-2-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-2-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-2-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-2-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-3-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-3-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-3-1-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-3.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-3-1-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-4.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-3-1-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-5.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-4-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-4-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-4-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-2-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-4-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-2-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-4-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-3-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-4-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-3-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-2-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-2-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-3-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-3-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-3-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-3-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-3-3.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-4-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-4-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-4-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-5-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-3.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-5-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-4.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-6-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-6-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-6-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-7-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-5-7-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-7-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-2-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-2-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-3-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-3-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-4-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-4-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-5-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-5-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-5-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-5-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-6-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-6-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-6-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-6-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-6-7-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-7-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-7-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-7-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-7-2-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-2-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-7-2-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-2-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-9-1-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-9-1-1.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/example-9-1-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-9-1-2.png -------------------------------------------------------------------------------- /data_directory/misc_data/images/getting-started-roadmap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/getting-started-roadmap.png -------------------------------------------------------------------------------- /data_directory/misc_data/log_regression_xg_data.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/log_regression_xg_data.pbz2 -------------------------------------------------------------------------------- /data_directory/misc_data/neural_net_xg_data.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/neural_net_xg_data.pbz2 -------------------------------------------------------------------------------- /data_directory/misc_data/worldcup_2010_to_2018_distcovered.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/worldcup_2010_to_2018_distcovered.xlsx -------------------------------------------------------------------------------- /data_directory/statsbomb_data/README.txt: -------------------------------------------------------------------------------- 1 | Data collected from Statsbomb is not owned by the repository creator, and therefore will not be hosted within the repository. -------------------------------------------------------------------------------- /data_directory/transfermarkt_data/2021_22/transfermarkt_GB1_2021-2022.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2021_22/transfermarkt_GB1_2021-2022.pbz2 -------------------------------------------------------------------------------- /data_directory/transfermarkt_data/2021_22/transfermarkt_GB2_2021-2022.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2021_22/transfermarkt_GB2_2021-2022.pbz2 -------------------------------------------------------------------------------- /data_directory/transfermarkt_data/2022_23/transfermarkt_FR2_2022-2023.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2022_23/transfermarkt_FR2_2022-2023.pbz2 -------------------------------------------------------------------------------- /data_directory/transfermarkt_data/2022_23/transfermarkt_GB2_2022-2023.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2022_23/transfermarkt_GB2_2022-2023.pbz2 -------------------------------------------------------------------------------- /data_directory/whoscored_data/README.txt: -------------------------------------------------------------------------------- 1 | Data collected from Whoscored is not owned by the repository creator, and therefore will not be hosted within the repository. -------------------------------------------------------------------------------- /data_directory/wyscout_data/Data Collection Report.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/Data Collection Report.pdf -------------------------------------------------------------------------------- /data_directory/wyscout_data/coaches.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/coaches.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/competitions.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/competitions.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/eventid2name.csv: -------------------------------------------------------------------------------- 1 | event,subevent,event_label,subevent_label 2 | 1,10,Duel,Air duel 3 | 1,11,Duel,Ground attacking duel 4 | 1,12,Duel,Ground defending duel 5 | 1,13,Duel,Ground loose ball duel 6 | 2,20,Foul,Foul 7 | 2,21,Foul,Hand foul 8 | 2,22,Foul,Late card foul 9 | 2,23,Foul,Out of game foul 10 | 2,24,Foul,Protest 11 | 2,25,Foul,Simulation 12 | 2,26,Foul,Time lost foul 13 | 2,27,Foul,Violent Foul 14 | 3,30,Free Kick,Corner 15 | 3,31,Free Kick,Free Kick 16 | 3,32,Free Kick,Free kick cross 17 | 3,33,Free Kick,Free kick shot 18 | 3,34,Free Kick,Goal kick 19 | 3,35,Free Kick,Penalty 20 | 3,36,Free Kick,Throw in 21 | 4,40,Goalkeeper leaving line,Goalkeeper leaving line 22 | 5,50,Interruption,Ball out of the field 23 | 5,51,Interruption,Whistle 24 | 6,60,Offside,Offside 25 | 7,70,Others on the ball,Acceleration 26 | 7,71,Others on the ball,Clearance 27 | 7,72,Others on the ball,Touch 28 | 8,80,Pass,Cross 29 | 8,81,Pass,Hand pass 30 | 8,82,Pass,Head pass 31 | 8,83,Pass,High pass 32 | 8,84,Pass,Launch 33 | 8,85,Pass,Simple pass 34 | 8,86,Pass,Smart pass 35 | 9,90,Save attempt,Reflexes 36 | 9,91,Save attempt,Save attempt 37 | 10,100,Shot,Shot 38 | -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_England.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_England.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_European_Championship.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_European_Championship.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_France.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_France.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_Germany.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_Germany.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_Italy.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_Italy.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_Spain.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_Spain.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/events/events_World_Cup.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_World_Cup.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_England.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_England.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_European_Championship.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_European_Championship.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_France.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_France.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_Germany.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_Germany.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_Italy.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_Italy.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_Spain.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_Spain.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/matches/matches_World_Cup.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_World_Cup.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/playerank.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/playerank.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/players.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/players.pbz2 -------------------------------------------------------------------------------- /data_directory/wyscout_data/tags2name.csv: -------------------------------------------------------------------------------- 1 | Tag,Label,Description 2 | 101,Goal,Goal 3 | 102,own_goal,Own goal 4 | 301,assist,Assist 5 | 302,keyPass,Key pass 6 | 1901,counter_attack,Counter attack 7 | 401,Left,Left foot 8 | 402,Right,Right foot 9 | 403,head/body,Head/body 10 | 1101,direct,Direct 11 | 1102,indirect,Indirect 12 | 2001,dangerous_ball_lost,Dangerous ball lost 13 | 2101,blocked,Blocked 14 | 801,high,High 15 | 802,low,Low 16 | 1401,interception,Interception 17 | 1501,clearance,Clearance 18 | 201,opportunity,Opportunity 19 | 1301,Feint,Feint 20 | 1302,missed ball,Missed ball 21 | 501,free_space_r,Free space right 22 | 502,free_space_l,Free space left 23 | 503,take_on_l,Take on left 24 | 504,take_on_r,Take on right 25 | 1601,sliding_tackle,Sliding tackle 26 | 601,anticipated,Anticipated 27 | 602,anticipation,Anticipation 28 | 1701,red_card,Red card 29 | 1702,yellow_card,Yellow card 30 | 1703,second_yellow_card,Second yellow card 31 | 1201,gb,Position: Goal low center 32 | 1202,gbr,Position: Goal low right 33 | 1203,gc,Position: Goal center 34 | 1204,gl,Position: Goal center left 35 | 1205,glb,Position: Goal low left 36 | 1206,gr,Position: Goal center right 37 | 1207,gt,Position: Goal high center 38 | 1208,gtl,Position: Goal high left 39 | 1209,gtr,Position: Goal high right 40 | 1210,obr,Position: Out low right 41 | 1211,ol,Position: Out center left 42 | 1212,olb,Position: Out low left 43 | 1213,or,Position: Out center right 44 | 1214,ot,Position: Out high center 45 | 1215,otl,Position: Out high left 46 | 1216,otr,Position: Out high right 47 | 1217,pbr,Position: Post low right 48 | 1218,pl,Position: Post center left 49 | 1219,plb,Position: Post low left 50 | 1220,pr,Position: Post center right 51 | 1221,pt,Position: Post high center 52 | 1222,ptl,Position: Post high left 53 | 1223,ptr,Position: Post high right 54 | 901,through,Through 55 | 1001,fairplay,Fairplay 56 | 701,lost,Lost 57 | 702,neutral,Neutral 58 | 703,won,Won 59 | 1801,accurate,Accurate 60 | 1802,not accurate,Not accurate -------------------------------------------------------------------------------- /data_directory/wyscout_data/teams.pbz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/teams.pbz2 -------------------------------------------------------------------------------- /model_directory/pass_cluster_model/PassClusterModel65.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/model_directory/pass_cluster_model/PassClusterModel65.joblib -------------------------------------------------------------------------------- /model_directory/xg_model/log_regression_xg_model.joblib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/model_directory/xg_model/log_regression_xg_model.joblib -------------------------------------------------------------------------------- /projects/00_data_import_and_misc_work/download_yt_video.py: -------------------------------------------------------------------------------- 1 | # %% Imports 2 | 3 | import pytube 4 | from moviepy.editor import VideoFileClip 5 | 6 | # %% Inputs 7 | 8 | # Url of video 9 | link = "https://www.youtube.com/watch?v=1V-LVtI6t2U&ab_channel=SkySportsPremierLeague" 10 | 11 | # Save filename 12 | file_name = "output_vid.mp4" 13 | 14 | # Start of segment 15 | start = (0,6) 16 | 17 | # End of segment 18 | end = (0,12) 19 | 20 | # %% Dowload video and save 21 | 22 | yt = pytube.YouTube(link) 23 | yt.streams.filter(res="720p").first().download(filename = file_name) 24 | 25 | # %% Crop video 26 | 27 | clip = VideoFileClip(file_name) 28 | clip1 = clip.subclip(start,end) 29 | clip1.write_videofile(file_name.replace(".mp4", "_cut.mp4")) 30 | -------------------------------------------------------------------------------- /projects/00_data_import_and_misc_work/import_data_fbref.py: -------------------------------------------------------------------------------- 1 | ## Script to download and save fbref data 2 | 3 | # %% Imports 4 | 5 | import ScraperFC as sfc 6 | import traceback 7 | import pandas as pd 8 | 9 | # %% User inputs 10 | 11 | # Select competition from following list 12 | '''['Copa Libertadores', 'Champions League', 'Europa League', 'Europa Conference League', 13 | 'World Cup', 'Copa America', 'Euros', 'Big 5 combined', 'EPL', 'Ligue 1', 'Bundesliga', 14 | 'Serie A', 'La Liga', 'MLS', 'Brazilian Serie A', 'Eredivisie', 'Liga MX', 'Primeira Liga', 15 | 'EFL Championship', 'Women Champions League', 'Womens World Cup', 'Womens Euros', 'NWSL', 16 | 'A-League Women', 'WSL', 'D1 Feminine', 'Womens Bundesliga', 'Womens Serie A', 'Liga F', 17 | 'NWSL Challenge Cup', 'NWSL Fall Series'] ''' 18 | 19 | COMPETITION = 'EPL' 20 | 21 | # Select calender year in which the competition finishes 22 | COMPETITION_END_YEAR = 2024 23 | 24 | # Select whether to store player data, team data or vs team data, using one of the following case-insensitive options 25 | '''['player_only', 'team_only', 'vs_team_only', 'all'] ''' 26 | 27 | STORAGE_MODE = 'all' 28 | 29 | # Replace with path of directory to store data (path is relative to directory of this script). SAVE_COMP is not needed 30 | SAVE_COMP = 'EPL' 31 | DIRECTORY = f"../../data_directory/fbref_data/{str(COMPETITION_END_YEAR-1)}_{str(COMPETITION_END_YEAR).replace('20','',1)}/{SAVE_COMP}/" 32 | 33 | # %% Scrape data 34 | 35 | # Initialise scraper 36 | scraper = sfc.FBRef() 37 | 38 | # Get data 39 | try: 40 | fbref_dict = scraper.scrape_all_stats(year=COMPETITION_END_YEAR, league=COMPETITION) 41 | except: 42 | traceback.print_exc() 43 | finally: 44 | scraper.close() 45 | 46 | # %% Format scraped data 47 | 48 | playerinfo_df = pd.DataFrame() 49 | teaminfo_for_df = pd.DataFrame() 50 | teaminfo_against_df = pd.DataFrame() 51 | 52 | # Iterate over statistic type 53 | for idx, statistic_group in enumerate(list(fbref_dict.keys())): 54 | 55 | # Team stats for 56 | temp_team_stat_for_df = fbref_dict[statistic_group][0].copy() 57 | new_col_names = [] 58 | for col_name in temp_team_stat_for_df.columns: 59 | col_name_1 = '' if 'Unnamed' in col_name[0] else col_name[0] 60 | col_name_2 = col_name[1] if 'Unnamed' in col_name[0] else ' ' + col_name[1] 61 | new_col_names.append((col_name_1 + col_name_2).strip()) 62 | temp_team_stat_for_df.columns = new_col_names 63 | if idx != 0: 64 | teaminfo_for_df = teaminfo_for_df.merge(temp_team_stat_for_df, left_on='Team ID', right_on='Team ID', suffixes=('', '_duplicate'), how = "outer") 65 | else: 66 | teaminfo_for_df = temp_team_stat_for_df 67 | 68 | # Team stats against 69 | temp_team_stat_against_df = fbref_dict[statistic_group][1].copy() 70 | new_col_names = [] 71 | for col_name in temp_team_stat_against_df.columns: 72 | col_name_1 = '' if 'Unnamed' in col_name[0] else col_name[0] 73 | col_name_2 = col_name[1] if 'Unnamed' in col_name[0] else ' ' + col_name[1] 74 | new_col_names.append((col_name_1 + col_name_2).strip()) 75 | temp_team_stat_against_df.columns = new_col_names 76 | if idx != 0: 77 | teaminfo_against_df = teaminfo_against_df.merge(temp_team_stat_against_df, left_on='Team ID', right_on='Team ID', suffixes=('', '_duplicate'), how = "outer") 78 | else: 79 | teaminfo_against_df = temp_team_stat_against_df 80 | 81 | # Player stats 82 | temp_player_stat_df = fbref_dict[statistic_group][2].copy() 83 | new_col_names = [] 84 | for col_name in temp_player_stat_df.columns: 85 | col_name_1 = '' if 'Unnamed' in col_name[0] else col_name[0] 86 | col_name_2 = col_name[1] if 'Unnamed' in col_name[0] else ' ' + col_name[1] 87 | new_col_names.append((col_name_1 + col_name_2).strip()) 88 | temp_player_stat_df.columns = new_col_names 89 | if idx != 0: 90 | playerinfo_df = playerinfo_df.merge(temp_player_stat_df, left_on=['Player', 'Player ID', 'Squad'], right_on=['Player', 'Player ID', 'Squad'], suffixes=('', '_duplicate'), how = "outer") 91 | else: 92 | playerinfo_df = temp_player_stat_df 93 | 94 | # Remove duplicate columns 95 | teaminfo_for_df = teaminfo_for_df.loc[:,[False if '_duplicate' in x else True for x in teaminfo_for_df.columns]] 96 | teaminfo_against_df = teaminfo_against_df.loc[:,[False if '_duplicate' in x else True for x in teaminfo_against_df.columns]] 97 | playerinfo_df = playerinfo_df.loc[:,[False if '_duplicate' in x else True for x in playerinfo_df.columns]] 98 | 99 | # Adjust data types 100 | for col_name in playerinfo_df.columns: 101 | try: 102 | playerinfo_df[col_name] = playerinfo_df[col_name].astype(float) 103 | except: 104 | pass 105 | 106 | # %% Save scraped data 107 | 108 | file_extension_name = COMPETITION.lower() + ' ' + str(COMPETITION_END_YEAR) 109 | 110 | if STORAGE_MODE.lower().replace('_',' ') == 'player only': 111 | playerinfo_df.to_json(DIRECTORY + file_extension_name + ' player data.json') 112 | 113 | elif STORAGE_MODE.lower().replace('_',' ') == 'team only': 114 | teaminfo_for_df.to_json(DIRECTORY + file_extension_name + ' team data.json') 115 | 116 | elif STORAGE_MODE.lower().replace('_',' ') == 'vs team only': 117 | teaminfo_against_df.to_json(DIRECTORY + file_extension_name + ' vs team data.json') 118 | 119 | else: 120 | playerinfo_df.to_json(DIRECTORY + file_extension_name + ' player data.json') 121 | teaminfo_for_df.to_json(DIRECTORY + file_extension_name + ' team data.json') 122 | teaminfo_against_df.to_json(DIRECTORY + file_extension_name + ' vs team data.json') -------------------------------------------------------------------------------- /projects/00_data_import_and_misc_work/import_data_leaguetable.py: -------------------------------------------------------------------------------- 1 | # Import user-specified data from Sky league-tables using custom football data module 2 | 3 | #%% Imports 4 | 5 | import os 6 | import sys 7 | import numpy as np 8 | 9 | # %% Add custom tools to path 10 | 11 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 12 | sys.path.append(root_folder) 13 | 14 | import analysis_tools.get_football_data as gfd 15 | 16 | # %% User inputs 17 | 18 | # Input league identifier and starting year of season 19 | league = "EFLC" 20 | start_year = '2018' 21 | 22 | # Set up folder path 23 | folder_path = f"../../data_directory/leaguetable_data/{start_year}_{str(int(start_year.replace('20','', 1)) + 1)}" 24 | 25 | # %% Get data 26 | 27 | league_table = gfd.get_league_table(league, start_year, folderpath=folder_path) 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /projects/00_data_import_and_misc_work/import_data_whoscored.py: -------------------------------------------------------------------------------- 1 | # Import user-specified data from WhoScored using custom football data module 2 | 3 | #%% Imports 4 | 5 | import os 6 | import sys 7 | import numpy as np 8 | 9 | # %% Add custom tools to path 10 | 11 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 12 | sys.path.append(root_folder) 13 | 14 | import analysis_tools.get_football_data as gfd 15 | 16 | # %% User inputs 17 | 18 | # Input first and last match id to obtain data from 19 | match_id_start = 1729275 20 | match_id_end = 1729293 21 | 22 | # Input year folder 23 | year = '2023' 24 | 25 | # Input league folder 26 | league = 'EPL' 27 | 28 | # %% Set-up file path and match ids 29 | match_ids = np.arange(match_id_start, match_id_end+1) 30 | folderpath = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','',1)) + 1)}/{league}" 31 | 32 | # %% Get data 33 | for match_id in match_ids: 34 | match_id = str(match_id) 35 | 36 | # Obtain and save data using custom function 37 | events, players, mappings = gfd.get_whoscored_data(match_id, get_mappings=True, save_to_file=True, folderpath=folderpath) -------------------------------------------------------------------------------- /projects/00_data_import_and_misc_work/scrape_data_transfermarkt.py: -------------------------------------------------------------------------------- 1 | # Scrape user-specified data from transfermarkt.com using custom football data module 2 | 3 | # %% Imports 4 | 5 | import os 6 | import sys 7 | 8 | # %% Add custom tools to path 9 | 10 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 11 | sys.path.append(root_folder) 12 | 13 | import analysis_tools.get_football_data as gfd 14 | 15 | # %% User Inputs 16 | 17 | # Input league country (England = GB, Spain = ES, Germany = L, Italy = IT, France = FR, Scotland = SC) 18 | country_code = 'FR' 19 | 20 | # Input league number (for example Premier League = 1, Championship = 2, League One = 3, etc.) 21 | division_num = '2' 22 | 23 | # Input year that season started 24 | start_year = '2022' 25 | 26 | # Choose whether to obtains stats from all competitions (False = League comp. only, True = All comps.) 27 | all_comps = False 28 | 29 | # Obtain and save data using custom function 30 | player_info = gfd.get_transfermarkt_data(country_code, division_num, start_year, all_comps, save_to_file=True, folderpath=f"../../data_directory/transfermarkt_data/{start_year}_{str(int(start_year.replace('20','')) + 1)}") -------------------------------------------------------------------------------- /projects/03_model_development_and_implementation/pass_cluster_data_collection.py: -------------------------------------------------------------------------------- 1 | ''' Load data for pass clustering ''' 2 | 3 | # %% Imports 4 | 5 | import pandas as pd 6 | import numpy as np 7 | import os 8 | import bz2 9 | import pickle 10 | 11 | # %% Select data to load 12 | 13 | data_to_load = [['EPL', '2022'], 14 | ['La_Liga', '2022'], 15 | ['Ligue_1', '2022'], 16 | ['Bundesliga', '2022'], 17 | ['Serie_A', '2022'], 18 | ['EFLC', '2022'], 19 | ['EPL', '2021'], 20 | ['La_Liga', '2021'], 21 | ['Ligue_1', '2021'], 22 | ['Bundesliga', '2021'], 23 | ['Serie_A', '2021'], 24 | ['EFLC', '2021'], 25 | ['EPL', '2020'], 26 | ['La_Liga', '2020'], 27 | ['Ligue_1', '2020'], 28 | ['Bundesliga', '2020'], 29 | ['Serie_A', '2020'], 30 | ['EFLC', '2020'], 31 | ] 32 | 33 | # Initialise storage dataframes 34 | passes_df = pd.DataFrame() 35 | 36 | for data in data_to_load: 37 | league = data[0] 38 | year = data[1] 39 | league_passes = pd.DataFrame() 40 | 41 | file_path_evts = f"../../data_directory/whoscored_data/{data[1]}_{str(int(data[1].replace('20','', 1)) + 1)}/{data[0]}" 42 | files = os.listdir(file_path_evts) 43 | 44 | # Load event data match by match 45 | for file in files: 46 | if file == 'event-types.pbz2': 47 | event_types = bz2.BZ2File(f"{file_path_evts}/{file}", 'rb') 48 | event_types = pickle.load(event_types) 49 | elif '-eventdata-' in file: 50 | match_events = bz2.BZ2File(f"{file_path_evts}/{file}", 'rb') 51 | match_events = pickle.load(match_events) 52 | match_passes = match_events[(match_events['outcomeType'] == 'Successful') & 53 | (match_events['eventType'] == 'Pass') & 54 | (match_events['satisfiedEventsTypes'].apply(lambda x: not (31 in x or 34 in x or 212 in x)))] 55 | 56 | 57 | league_passes = pd.concat([league_passes, match_passes]) 58 | 59 | # Append league data to combined dataset 60 | passes_df = pd.concat([passes_df, league_passes]) 61 | 62 | print(f"{league}, {year} passes loaded") 63 | 64 | # %% Store data in random order in a group of compressed bz2 files 65 | 66 | passes_df_out = passes_df.sample(frac=1).reset_index(drop=True) 67 | num_files = 100 68 | n_passes = len(passes_df_out) 69 | sample_size = int(np.floor(n_passes/num_files)) 70 | for idx in np.arange(0,num_files): 71 | print(f"Isolating sample {idx}") 72 | pass_sample_df = passes_df_out.iloc[sample_size*idx:sample_size*(idx+1)] 73 | print(f"Saving sample {idx}") 74 | with bz2.BZ2File(f"pass_data_{idx}.pbz2", "wb") as f: 75 | pickle.dump(pass_sample_df, f) 76 | print("Save complete") 77 | -------------------------------------------------------------------------------- /projects/03_model_development_and_implementation/shot_xg_plot.py: -------------------------------------------------------------------------------- 1 | # %% Create plot of shot positions and associated xG for user-selected player, team or competition 2 | # 3 | # Inputs: Player, team or competition to plot xG for 4 | # 5 | # Outputs: Plot of shot positions and associated xG 6 | # 7 | # Notes: Uses logistic regression xG model. 8 | 9 | # %% Imports 10 | 11 | import bz2 12 | import pickle 13 | from PIL import Image 14 | import requests 15 | from io import BytesIO 16 | import pandas as pd 17 | import matplotlib.pyplot as plt 18 | import matplotlib as mpl 19 | from mplsoccer.pitch import VerticalPitch 20 | 21 | # %% User inputs 22 | 23 | # Select player, team or competition to plot shots for 24 | player_team_or_comp = 'Mohamed Salah' 25 | 26 | # Logo to add to plot figure 27 | #logo = 'https://1000logos.net/wp-content/uploads/2019/01/German-Bundesliga-Logo-2002.png' 28 | logo = "https://logos-world.net/wp-content/uploads/2020/06/Liverpool-Logo.png" 29 | 30 | # %% Set constants 31 | 32 | PITCH_WIDTH_Y = 80 33 | PITCH_LENGTH_X = 120 34 | 35 | # %% Load xG model and data 36 | 37 | shots_model_df = bz2.BZ2File("../../data_directory/misc_data/log_regression_xg_data.pbz2", 'rb') 38 | shots_model_df = pickle.load(shots_model_df) 39 | 40 | # %% Isolate shots for selected player or team 41 | 42 | if not shots_model_df[shots_model_df['competition_name']==player_team_or_comp].empty: 43 | selected_shots = shots_model_df[shots_model_df['competition_name']==player_team_or_comp] 44 | comp_selected = 1 45 | 46 | elif not shots_model_df[shots_model_df['team_name']==player_team_or_comp].empty: 47 | selected_shots = shots_model_df[shots_model_df['team_name']==player_team_or_comp] 48 | comp_selected = 0 49 | 50 | elif not shots_model_df[shots_model_df['player_name']==player_team_or_comp].empty: 51 | selected_shots = shots_model_df[shots_model_df['player_name']==player_team_or_comp] 52 | comp_selected = 0 53 | 54 | else: 55 | selected_shots = pd.DataFrame() 56 | comp_selected = 0 57 | 58 | # Individual dataframe for shots/headers/goals/no-goals etc. 59 | selected_ground_shots = selected_shots[selected_shots['header_tag']==0] 60 | selected_ground_goals = selected_ground_shots[selected_ground_shots['goal']==1] 61 | selected_headers = selected_shots[selected_shots['header_tag']==1] 62 | selected_headed_goals = selected_headers[selected_headers['goal']==1] 63 | 64 | # Lowest xG goal 65 | lowest_xg_goal = selected_shots[selected_shots['goal']==1].sort_values('xG').head(1) 66 | highest_xg_miss = selected_shots[selected_shots['goal']==0].sort_values('xG', ascending=False).head(1) 67 | 68 | # %% Plot shots 69 | 70 | # Overwrite rcParams 71 | mpl.rcParams['xtick.color'] = "white" 72 | mpl.rcParams['ytick.color'] = "white" 73 | mpl.rcParams['xtick.labelsize'] = 10 74 | mpl.rcParams['ytick.labelsize'] = 10 75 | 76 | # Plot pitch 77 | pitch = VerticalPitch(half=True,pitch_color='#313332', line_color='white', linewidth=1, stripe=False) 78 | fig, ax = pitch.grid(nrows=1, ncols=1, title_height = 0.03, grid_height=0.7, endnote_height=0.05, axis=False) 79 | fig.set_size_inches(9, 7) 80 | fig.set_facecolor('#313332') 81 | 82 | # Plot ground shots 83 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_ground_shots['c_yards'], PITCH_LENGTH_X - selected_ground_shots['x_yards'], 84 | marker='h', s=200, alpha=0.2, c=selected_ground_shots['xG'], edgecolors='w',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2) 85 | p1 = ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_ground_goals['c_yards'], PITCH_LENGTH_X - selected_ground_goals['x_yards'], 86 | marker='h', s=200, c=selected_ground_goals['xG'], edgecolors='w', lw=2, vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2) 87 | 88 | # Plot headers 89 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_headers['c_yards'], PITCH_LENGTH_X - selected_headers['x_yards'], 90 | marker='o', s=200, alpha=0.2, c=selected_headers['xG'], edgecolors='w',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2) 91 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_headed_goals['c_yards'], PITCH_LENGTH_X - selected_headed_goals['x_yards'], 92 | marker='o', s=200, c=selected_headed_goals['xG'], edgecolors='w', lw=2, vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2) 93 | 94 | ax['pitch'].set_ylim([59.9,125]) 95 | 96 | # Plot highest xG miss and lowest xG goal chance 97 | if lowest_xg_goal['header_tag'].values==1: 98 | lowxg_marker = 'o' 99 | else: 100 | lowxg_marker = 'h' 101 | if highest_xg_miss['header_tag'].values==1: 102 | highxg_marker = 'o' 103 | else: 104 | highxg_marker = 'h' 105 | 106 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + highest_xg_miss['c_yards'], PITCH_LENGTH_X - highest_xg_miss['x_yards'], 107 | marker=highxg_marker, s=200, c='r', edgecolors='grey', lw = 2.5 ,vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=3) 108 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + lowest_xg_goal['c_yards'], PITCH_LENGTH_X - lowest_xg_goal['x_yards'], 109 | marker=lowxg_marker, s=200, c='g', edgecolors='w', lw = 2.5 ,vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=3) 110 | 111 | 112 | # Add colorbar 113 | cb_ax = fig.add_axes([0.53, 0.107, 0.35, 0.03]) 114 | cbar = fig.colorbar(p1, cax=cb_ax, orientation='horizontal') 115 | cbar.outline.set_edgecolor('w') 116 | cbar.set_label(" xG", loc = "left", color='w', fontweight='bold', labelpad=-28.5) 117 | 118 | # Manual legend 119 | legend_ax = fig.add_axes([0.075, 0.07, 0.5, 0.08]) 120 | legend_ax.axis("off") 121 | plt.xlim([0,5]) 122 | plt.ylim([0,1]) 123 | legend_ax.scatter(0.2, 0.7, marker='h', s=200, c='#313332', edgecolors='w') 124 | legend_ax.scatter(0.2, 0.2, marker='o', s=200, c='#313332', edgecolors='w') 125 | legend_ax.text(0.35, 0.61, "Foot", color="w") 126 | legend_ax.text(0.35, 0.11, "Header", color="w") 127 | legend_ax.scatter(1.3, 0.7, marker='h', s=200, c='purple', edgecolors='w', lw=2) 128 | legend_ax.scatter(1.3, 0.2, marker='h', alpha=0.2, s=200, c='purple', edgecolors='w') 129 | legend_ax.text(1.45, 0.61, "Goal", color="w") 130 | legend_ax.text(1.465, 0.11, "No Goal", color="w") 131 | legend_ax.scatter(2.4, 0.7, marker='h', s=200, c='g', edgecolors='w', lw=2.5) 132 | legend_ax.scatter(2.4, 0.2, marker='h', s=200, c='r', edgecolors='grey', lw=2.5) 133 | legend_ax.text(2.55, 0.61, "Lowest xG Goal", color="w") 134 | legend_ax.text(2.565, 0.11, "Highest xG Miss", color="w") 135 | 136 | # Title text 137 | subtitle_text = f"{selected_shots['competition_name'].unique()[0]}" 138 | subsubtitle_text = "2017-2018" 139 | if comp_selected == 1: 140 | title_text = "Expected Goals" 141 | elif comp_selected == 0: 142 | title_text = f"{player_team_or_comp} Expected Goals" 143 | 144 | fig.text(0.18,0.92, title_text, fontweight="bold", fontsize=16, color='w') 145 | fig.text(0.18,0.883, subtitle_text, fontweight="regular", fontsize=14, color='w') 146 | fig.text(0.18,0.852, subsubtitle_text, fontweight="regular", fontsize=10, color='w') 147 | 148 | 149 | # Stats 150 | if selected_shots['goal'].sum()-selected_shots.sum()['xG'] > 0: 151 | sign = '+' 152 | else: 153 | sign='' 154 | 155 | fig.text(0.65,0.925, "Shots:", fontweight="bold", fontsize=10, color='w') 156 | fig.text(0.65,0.9, "xG:", fontweight="bold", fontsize=10, color='w') 157 | fig.text(0.65,0.875, "Goals:", fontweight="bold", fontsize=10, color='w') 158 | fig.text(0.65,0.85, "xG Perf:", fontweight="bold", fontsize=10, color='w') 159 | fig.text(0.73,0.925, f"{int(selected_shots.count()[0])}", fontweight="regular", fontsize=10, color='w') 160 | fig.text(0.73,0.9, f"{round(selected_shots.sum()['xG'],1)}", fontweight="regular", fontsize=10, color='w') 161 | fig.text(0.73,0.875, f"{int(selected_shots['goal'].sum())}", fontweight="regular", fontsize=10, color='w') 162 | fig.text(0.73,0.85, f"{sign}{int(round(100*(selected_shots['goal'].sum()-selected_shots.sum()['xG'])/selected_shots.sum()['xG'],0))}%", fontweight="regular", fontsize=10, color='w') 163 | 164 | fig.text(0.79,0.927, "xG/shot:", fontweight="bold", fontsize=10, color='w') 165 | fig.text(0.79,0.9, "Goal/shot:", fontweight="bold", fontsize=10, color='w') 166 | fig.text(0.79,0.875, "L xG Goal:", fontweight="bold", fontsize=10, color='w') 167 | fig.text(0.79,0.85, "H xG Miss:", fontweight="bold", fontsize=10, color='w') 168 | fig.text(0.89,0.925, f"{round(selected_shots.sum()['xG']/selected_shots.count()[0],2)}", fontweight="regular", fontsize=10, color='w') 169 | fig.text(0.89,0.9, f"{round(selected_shots['goal'].sum()/selected_shots.count()[0],2)}", fontweight="regular", fontsize=10, color='w') 170 | fig.text(0.89,0.875, f"{round(lowest_xg_goal['xG'].values[0],2)}", fontweight="regular", fontsize=10, color='w') 171 | fig.text(0.89,0.85, f"{round(highest_xg_miss['xG'].values[0],2)}", fontweight="regular", fontsize=10, color='w') 172 | 173 | 174 | # Footer text 175 | fig.text(0.5, 0.02, "Created by Jake Kolliari. Data provided by Wyscout.com", 176 | fontstyle="italic", ha="center", fontsize=9, color="white") 177 | 178 | # Add WC Logo 179 | ax = fig.add_axes([0.02,0.8,0.2,0.2]) 180 | ax.axis("off") 181 | response = requests.get(logo) 182 | img = Image.open(BytesIO(response.content)) 183 | ax.imshow(img) 184 | -------------------------------------------------------------------------------- /projects/03_model_development_and_implementation/xg_log_regression_model.py: -------------------------------------------------------------------------------- 1 | # %% Expected Goals Model, using Wyscout data from Top 5 Leagues in 2017/18 2 | # 3 | # Inputs: Leagues to use to train xg model 4 | # 5 | # Outputs: xG model, displayed as an xG heatmap 6 | # Dataframe all shots in chosen leagues, including shot information and xG. 7 | # 8 | # Notes: None 9 | 10 | # %% Imports 11 | 12 | import os 13 | import sys 14 | import matplotlib.pyplot as plt 15 | import matplotlib as mpl 16 | import numpy as np 17 | import pandas as pd 18 | from sklearn.linear_model import LogisticRegression 19 | from mplsoccer.pitch import VerticalPitch 20 | import pickle 21 | import bz2 22 | from PIL import Image 23 | from joblib import dump, load 24 | 25 | # %% Add custom tools to path 26 | 27 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 28 | sys.path.append(root_folder) 29 | 30 | import analysis_tools.wyscout_data_engineering as wde 31 | 32 | # %% User inputs 33 | 34 | # List of leagues to use for xg model 35 | leagues = ["England", "Italy", "France", "Germany", "Spain"] 36 | 37 | # %% Set constants 38 | 39 | PITCH_WIDTH_Y = 80 40 | PITCH_LENGTH_X = 120 41 | GOAL_WIDTH_Y = 8 42 | 43 | # %% Load Wyscout data for all competitions 44 | 45 | match_data, event_data, event_data_by_match, player_data, competition_data, team_data = wde.format_wyscout_data(leagues) 46 | 47 | # %% Define dataframe of all shots in event data 48 | 49 | # Initialise shot model dataframe 50 | shots_model_df = pd.DataFrame() 51 | i = 0 52 | 53 | # Loop through shots and obtain/calculate shot information 54 | for _, shot in event_data[event_data['eventName']=='Shot'].iterrows(): 55 | 56 | # Player, team and competition 57 | if shot['playerId'] != 0: 58 | shots_model_df.loc[i, 'player_name'] = player_data.loc[shot['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape') 59 | else: 60 | shots_model_df.loc[i, 'player_name'] = np.nan 61 | shots_model_df.loc[i, 'team_name'] = team_data.loc[shot['teamId']]['name'].encode('ascii', 'strict').decode('unicode-escape') 62 | shots_model_df.loc[i, 'competition_name'] = competition_data.loc[match_data.loc[shot['matchId']]['competitionId']]['name'] 63 | 64 | # Position and distance info 65 | shots_model_df.loc[i, 'x_yards'] = (PITCH_LENGTH_X/100)*(100 - shot['positions'][0]['x']) 66 | shots_model_df.loc[i, 'c_yards'] = (PITCH_WIDTH_Y/100)*(shot['positions'][0]['y'] - 50) 67 | shots_model_df.loc[i,'distance_yards'] = np.sqrt(shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2) 68 | 69 | # Angle info 70 | angle_denominator = (shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2 - (GOAL_WIDTH_Y/2)**2) 71 | if angle_denominator == 0: 72 | angle = np.pi/2 73 | else: 74 | angle = np.arctan((2*(GOAL_WIDTH_Y/2)*shots_model_df.loc[i, 'x_yards'])/angle_denominator) 75 | if angle<0: 76 | angle = np.pi + angle 77 | shots_model_df.loc[i, 'angle'] = angle 78 | 79 | # Header info 80 | shots_model_df.loc[i, 'header_tag'] = 0 81 | if {'id': 403} in shot['tags']: 82 | shots_model_df.loc[i, 'header_tag'] = 1 83 | 84 | # Outcome 85 | shots_model_df.loc[i, 'goal'] = 0 86 | if {'id': 101} in shot['tags']: 87 | shots_model_df.loc[i, 'goal'] = 1 88 | 89 | i += 1 90 | 91 | 92 | #%% Train a logistic regression model 93 | 94 | X = shots_model_df.drop(['player_name', 'team_name', 'competition_name','goal'], axis=1) 95 | y = shots_model_df['goal'] 96 | 97 | # Fit 98 | log_model = LogisticRegression() 99 | log_model.fit(X,y) 100 | 101 | # Save model 102 | dump(log_model, '../../model_directory/xg_model/log_regression_xg_model.joblib') 103 | 104 | # Coefficients 105 | a = log_model.intercept_[0] 106 | b = log_model.coef_[0] 107 | 108 | # Calculate xG 109 | shots_model_df['xG'] = log_model.predict_proba(X)[:,1] 110 | 111 | # %% Save xG data 112 | 113 | with bz2.BZ2File("../../data_directory/misc_data/log_regression_xg_data.pbz2", "wb") as f: 114 | pickle.dump(shots_model_df, f) 115 | 116 | #%% Create an xG test-set, and predict on test set 117 | 118 | # Initialise arrays for ground and header test-sets 119 | prob_goal_grnd = np.zeros((int(1+PITCH_LENGTH_X/2), int(1+PITCH_WIDTH_Y))) 120 | prob_goal_head = np.zeros((int(1+PITCH_LENGTH_X/2), int(1+PITCH_WIDTH_Y))) 121 | 122 | # Create array of shots 123 | dists = list() 124 | probs = list() 125 | 126 | for x_pos in range(0,int(PITCH_LENGTH_X/2 + 1)): 127 | for y_pos in range(0, int(PITCH_WIDTH_Y + 1)): 128 | c_pos = y_pos - PITCH_WIDTH_Y/2 129 | angle_denominator = (x_pos**2 + c_pos**2 - (GOAL_WIDTH_Y/2)**2) 130 | if angle_denominator == 0: 131 | angle = np.pi/2 132 | else: 133 | angle = np.arctan(2*(GOAL_WIDTH_Y/2)*x_pos/angle_denominator) 134 | if angle < 0: 135 | angle = np.pi + angle 136 | distance = np.sqrt(x_pos**2 + c_pos**2) 137 | prob_goal_grnd[x_pos, y_pos] = log_model.predict_proba([[x_pos, c_pos, distance, angle, 0]])[:,1] 138 | prob_goal_head[x_pos, y_pos] = log_model.predict_proba([[x_pos, c_pos, distance, angle, 1]])[:,1] 139 | 140 | dists.append(distance) 141 | probs.append(prob_goal_grnd[x_pos, y_pos]) 142 | 143 | # %% Plot xG model 144 | 145 | # Overwrite rcParams 146 | mpl.rcParams['xtick.color'] = "white" 147 | mpl.rcParams['ytick.color'] = "white" 148 | mpl.rcParams['xtick.labelsize'] = 10 149 | mpl.rcParams['ytick.labelsize'] = 10 150 | 151 | # Plot pitches 152 | pitch = VerticalPitch(half=True,pitch_color='#313332', line_color='white', linewidth=1, stripe=False) 153 | fig, ax = pitch.grid(nrows=1, ncols=2, grid_height=0.75, space = 0.1, axis=False) 154 | fig.set_size_inches(10, 5.5) 155 | fig.set_facecolor('#313332') 156 | 157 | # Add xG maps and contours 158 | pos1 = ax['pitch'][0].imshow(prob_goal_grnd, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno) 159 | pos2 = ax['pitch'][1].imshow(prob_goal_head, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno) 160 | cs1 = ax['pitch'][0].contour(prob_goal_grnd, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted') 161 | cs2 = ax['pitch'][1].contour(prob_goal_head, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted') 162 | ax['pitch'][0].clabel(cs1) 163 | ax['pitch'][1].clabel(cs2) 164 | 165 | # Title 166 | fig.text(0.045,0.9,"Expected Goals - Logistic Regression Model", fontsize=16, color="white", fontweight="bold") 167 | fig.text(0.045,0.85,"Trained on all 40,000+ shots during the 2017/18 season across Europe's 'big five' Leagues", fontsize=14, color="white", fontweight="regular") 168 | fig.text(0.12,0.76,"Shot Type: Left or Right Foot", fontsize=12, color="white", fontweight="bold") 169 | fig.text(0.66,0.76,"Shot Type: Header", fontsize=12, color="white", fontweight="bold") 170 | 171 | # Colourbar 172 | cbar = fig.colorbar(pos2, ax=ax['pitch'][1], location="bottom", fraction = 0.04, pad = 0.0335) 173 | cbar.ax.set_ylabel('xG', loc="bottom", color = "white", fontweight="bold", rotation=0, labelpad=20) 174 | 175 | # Footer text 176 | fig.text(0.255, 0.09, "Created by Jake Kolliari (@_JKDS_). Data provided by Wyscout.", 177 | fontstyle="italic", ha="center", fontsize=9, color="white") 178 | 179 | # Format and show 180 | plt.tight_layout() 181 | plt.show() 182 | 183 | # %% Plot distance vs. xG 184 | 185 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize = (8,8), facecolor = '#313332') 186 | ax.patch.set_alpha(0) 187 | 188 | # Add line 189 | plt.scatter(x = dists, y = probs, color = 'mediumaquamarine', s = 10) 190 | #reg = sns.regplot(x = dists, y = probs, scatter = False, order = 2, line_kws={"color": "paleturquoise", "lw":1, "ls" :'--'}, scatter_kws={"color": "mediumaquamarine", "s":50}, ax=ax) 191 | 192 | # Format 193 | ax.spines['bottom'].set_color('w') 194 | ax.spines['top'].set_visible(False) 195 | ax.spines['right'].set_visible(False) 196 | ax.spines['left'].set_color('w') 197 | ax.grid(lw = 0.5, color= 'grey', ls = ':') 198 | ax.set_xlabel("Distance from Goal (yds)", fontsize=12, fontweight = "bold", labelpad = 10) 199 | ax.set_ylabel("Probability of Scoring", fontsize=12, color = "mediumaquamarine", fontweight = "bold", labelpad = 10) 200 | 201 | # Titles 202 | title_text = f"Expected Goals Model" 203 | subtitle_text = "Probability of Scoring vs. Distance from Goal" 204 | fig.text(0.08, 0.935, title_text, fontweight="bold", fontsize=16, color='w') 205 | fig.text(0.08, 0.9, subtitle_text, fontweight="regular", fontsize=13, color='w') 206 | 207 | # Add footer text 208 | fig.text(0.5, 0.02, "Created by Jake Kolliari (@_JKDS_). Data provided by Wyscout.", 209 | fontstyle="italic", ha="center", fontsize=9, color="white") 210 | 211 | # Add twitter logo 212 | logo_ax = fig.add_axes([0.94, 0.005, 0.04, 0.04]) 213 | logo_ax.axis("off") 214 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 215 | logo_ax.imshow(badge) 216 | 217 | fig.tight_layout(rect=[0.03, 0.04, 0.97, 0.86]) -------------------------------------------------------------------------------- /projects/03_model_development_and_implementation/xg_neural_network.py: -------------------------------------------------------------------------------- 1 | # %% Expected Goals Model, using Wyscout data from Top 5 Leagues in 2017/18 2 | # 3 | # Inputs: Leagues to use to train xg model 4 | # 5 | # Outputs: xG model, displayed as an xG heatmap 6 | # Dataframe all shots in chosen leagues, including shot information and xG. 7 | # 8 | # Notes: None 9 | 10 | # %% Imports 11 | 12 | import os 13 | import sys 14 | import matplotlib.pyplot as plt 15 | import matplotlib as mpl 16 | import numpy as np 17 | import pandas as pd 18 | import tensorflow as tf 19 | from mplsoccer.pitch import VerticalPitch 20 | import pickle 21 | import bz2 22 | 23 | # %% Add custom tools to path 24 | 25 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 26 | sys.path.append(root_folder) 27 | 28 | import analysis_tools.wyscout_data_engineering as wde 29 | 30 | # %% User inputs 31 | 32 | # List of leagues to use for xg model 33 | leagues = ["England", "Italy", "France", "Germany", "Spain"] 34 | 35 | # %% Set constants 36 | 37 | PITCH_WIDTH_Y = 80 38 | PITCH_LENGTH_X = 120 39 | GOAL_WIDTH_Y = 8 40 | 41 | # %% Load Wyscout data for all competitions 42 | 43 | match_data, event_data, event_data_by_match, player_data, competition_data, team_data = wde.format_wyscout_data(leagues) 44 | 45 | # %% Define dataframe of all shots in event data 46 | 47 | # Initialise shot model dataframe 48 | shots_model_df = pd.DataFrame() 49 | i = 0 50 | 51 | # Loop through shots and obtain/calculate shot information 52 | for _, shot in event_data[event_data['eventName']=='Shot'].iterrows(): 53 | 54 | # Player, team and competition 55 | if shot['playerId'] != 0: 56 | shots_model_df.loc[i, 'player_name'] = player_data.loc[shot['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape') 57 | else: 58 | shots_model_df.loc[i, 'player_name'] = np.nan 59 | shots_model_df.loc[i, 'team_name'] = team_data.loc[shot['teamId']]['name'].encode('ascii', 'strict').decode('unicode-escape') 60 | shots_model_df.loc[i, 'competition_name'] = competition_data.loc[match_data.loc[shot['matchId']]['competitionId']]['name'] 61 | 62 | # Position and distance info 63 | shots_model_df.loc[i, 'x_yards'] = (PITCH_LENGTH_X/100)*(100 - shot['positions'][0]['x']) 64 | shots_model_df.loc[i, 'c_yards'] = (PITCH_WIDTH_Y/100)*(shot['positions'][0]['y'] - 50) 65 | shots_model_df.loc[i,'distance_yards'] = np.sqrt(shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2) 66 | 67 | # Angle info 68 | angle_denominator = (shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2 - (GOAL_WIDTH_Y/2)**2) 69 | if angle_denominator == 0: 70 | angle = np.pi/2 71 | else: 72 | angle = np.arctan((2*(GOAL_WIDTH_Y/2)*shots_model_df.loc[i, 'x_yards'])/angle_denominator) 73 | if angle<0: 74 | angle = np.pi + angle 75 | shots_model_df.loc[i, 'angle'] = angle 76 | 77 | # Header info 78 | shots_model_df.loc[i, 'header_tag'] = 0 79 | if {'id': 403} in shot['tags']: 80 | shots_model_df.loc[i, 'header_tag'] = 1 81 | 82 | # Outcome 83 | shots_model_df.loc[i, 'goal'] = 0 84 | if {'id': 101} in shot['tags']: 85 | shots_model_df.loc[i, 'goal'] = 1 86 | 87 | i += 1 88 | 89 | 90 | #%% Train a neural network 91 | 92 | X = shots_model_df.drop(['player_name', 'team_name', 'competition_name','goal'], axis=1) 93 | y = shots_model_df['goal'] 94 | 95 | # Build model 96 | neural_net = tf.keras.models.Sequential() 97 | neural_net.add(tf.keras.layers.Flatten()) 98 | neural_net.add(tf.keras.layers.Dense(256, activation=tf.nn.relu)) 99 | neural_net.add(tf.keras.layers.Dense(256, activation=tf.nn.relu)) 100 | neural_net.add(tf.keras.layers.Dense(256, activation=tf.nn.relu)) 101 | neural_net.add(tf.keras.layers.Dense(2, activation = tf.nn.softmax)) 102 | 103 | # Compile model 104 | neural_net.compile(optimizer='adam', 105 | loss='sparse_categorical_crossentropy', 106 | metrics=['accuracy']) 107 | 108 | # Train model 109 | neural_net.fit(X, y, epochs=5) 110 | 111 | # %% Make prediction 112 | 113 | # xG 114 | shots_model_df['xG'] = neural_net.predict([X])[:,1] 115 | 116 | # %% Save xG data 117 | 118 | with bz2.BZ2File("../../data_directory/misc_data/neural_net_xg_data.pbz2", "wb") as f: 119 | pickle.dump(shots_model_df, f) 120 | 121 | #%% Create an xG test-set, and predict on test set 122 | 123 | # Initialise dataframes for ground and header test sets 124 | X_test_grnd = pd.DataFrame(columns = ['xpos','cpos','distance','angle','header_tag'], dtype='float64') 125 | X_test_head = pd.DataFrame(columns = ['xpos','cpos','distance','angle','header_tag'], dtype='float64') 126 | i = 0 127 | 128 | # Create array of shots 129 | for x_pos in range(0,int(PITCH_LENGTH_X/2 + 1)): 130 | for y_pos in range(0, int(PITCH_WIDTH_Y + 1)): 131 | c_pos = y_pos - PITCH_WIDTH_Y/2 132 | angle_denominator = (x_pos**2 + c_pos**2 - (GOAL_WIDTH_Y/2)**2) 133 | if angle_denominator == 0: 134 | angle = np.pi/2 135 | else: 136 | angle = np.arctan(2*(GOAL_WIDTH_Y/2)*x_pos/angle_denominator) 137 | if angle < 0: 138 | angle = np.pi + angle 139 | distance = np.sqrt(x_pos**2 + c_pos**2) 140 | X_test_grnd.loc[i,:] = [x_pos, c_pos, distance, angle, 0] 141 | X_test_head.loc[i,:] = [x_pos, c_pos, distance, angle, 1] 142 | i += 1 143 | 144 | prob_goal_grnd = neural_net.predict([X_test_grnd])[:,1].reshape(int(1+PITCH_LENGTH_X/2),int(1+PITCH_WIDTH_Y)) 145 | prob_goal_head = neural_net.predict([X_test_head])[:,1].reshape(int(1+PITCH_LENGTH_X/2),int(1+PITCH_WIDTH_Y)) 146 | 147 | # %% Plot xG model 148 | 149 | # Overwrite rcParams 150 | mpl.rcParams['xtick.color'] = "white" 151 | mpl.rcParams['ytick.color'] = "white" 152 | mpl.rcParams['xtick.labelsize'] = 10 153 | mpl.rcParams['ytick.labelsize'] = 10 154 | 155 | # Plot pitches 156 | pitch = VerticalPitch(half=True,pitch_color='#313332', line_color='white', linewidth=1, stripe=False) 157 | fig, ax = pitch.grid(nrows=1, ncols=2, grid_height=0.75, space = 0.1, axis=False) 158 | fig.set_size_inches(10, 5.5) 159 | fig.set_facecolor('#313332') 160 | 161 | # Add xG maps and contours 162 | pos1 = ax['pitch'][0].imshow(prob_goal_grnd, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno) 163 | pos2 = ax['pitch'][1].imshow(prob_goal_head, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno) 164 | cs1 = ax['pitch'][0].contour(prob_goal_grnd, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted') 165 | cs2 = ax['pitch'][1].contour(prob_goal_head, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted') 166 | ax['pitch'][0].clabel(cs1) 167 | ax['pitch'][1].clabel(cs2) 168 | 169 | # Title 170 | fig.text(0.045,0.9,"Expected Goals - Neural Network", fontsize=16, color="white", fontweight="bold") 171 | fig.text(0.045,0.85,"Trained on all 40,000+ shots during the 2017/18 season across Europe's 'big five' Leagues", fontsize=14, color="white", fontweight="regular") 172 | fig.text(0.12,0.76,"Shot Type: Left or Right Foot", fontsize=12, color="white", fontweight="bold") 173 | fig.text(0.66,0.76,"Shot Type: Header", fontsize=12, color="white", fontweight="bold") 174 | 175 | # Colourbar 176 | cbar = fig.colorbar(pos2, ax=ax['pitch'][1], location="bottom", fraction = 0.04, pad = 0.0335) 177 | cbar.ax.set_ylabel('xG', loc="bottom", color = "white", fontweight="bold", rotation=0, labelpad=20) 178 | 179 | # Footer text 180 | fig.text(0.255, 0.09, "Created by Jake Kolliari. Data provided by Wyscout.com", 181 | fontstyle="italic", ha="center", fontsize=9, color="white") 182 | 183 | # Format and show 184 | plt.tight_layout() 185 | plt.show() -------------------------------------------------------------------------------- /projects/04_match_reports/off_def_shape_report_ws.py: -------------------------------------------------------------------------------- 1 | # %% Create shape visualisation 2 | 3 | # %% Imports and parameters 4 | 5 | import pandas as pd 6 | import matplotlib.pyplot as plt 7 | import matplotlib as mpl 8 | from PIL import Image 9 | from mplsoccer.pitch import VerticalPitch 10 | import os 11 | import sys 12 | import bz2 13 | import pickle 14 | import numpy as np 15 | from collections import Counter 16 | 17 | # %% Function definitions 18 | 19 | 20 | def protected_divide(n, d): 21 | return n / d if d else 0 22 | 23 | # %% Add custom tools to path 24 | 25 | root_folder = os.path.abspath(os.path.dirname( 26 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 27 | sys.path.append(root_folder) 28 | 29 | import analysis_tools.whoscored_custom_events as wce 30 | import analysis_tools.pitch_zones as pz 31 | import analysis_tools.whoscored_data_engineering as wde 32 | import analysis_tools.logos_and_badges as lab 33 | 34 | # %% User inputs 35 | 36 | # Input WhoScored match id 37 | match_id = '1640989' 38 | 39 | # Select year 40 | year = '2022' 41 | 42 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL) 43 | league = 'EPL' 44 | 45 | # Select team codes 46 | home_team = 'Liverpool' 47 | away_team = 'Aston Villa' 48 | 49 | # Team name to print 50 | home_team_print = None 51 | away_team_print = None 52 | 53 | # Pass flow zone type 54 | zone_type = 'jdp_custom' 55 | 56 | # Pass hull inclusion 57 | central_pct_off = '1std' 58 | central_pct_def = '1std' 59 | 60 | # %% Logos, colours and printed names 61 | 62 | home_logo, home_colourmap = lab.get_team_badge_and_colour(home_team, 'home') 63 | away_logo, away_colourmap = lab.get_team_badge_and_colour(away_team, 'home') 64 | 65 | if home_team_print is None: 66 | home_team_print = home_team 67 | 68 | if away_team_print is None: 69 | away_team_print = away_team 70 | 71 | cmaps = [home_colourmap, away_colourmap] 72 | 73 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A', 74 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup', 75 | 'EFL1': 'EFL League 1', 'EFL2': 'EFL League 2'} 76 | 77 | # %% Read in data 78 | 79 | # Opta data 80 | 81 | events_df = bz2.BZ2File(f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}/match-eventdata-{match_id}-{home_team}-{away_team}.pbz2", 'rb') 82 | events_df = pickle.load(events_df) 83 | players_df = bz2.BZ2File(f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}/match-playerdata-{match_id}-{home_team}-{away_team}.pbz2", 'rb') 84 | players_df = pickle.load(players_df) 85 | 86 | # %% Calculate Scoreline (special accounting for own goals) 87 | 88 | if 'isOwnGoal' in events_df.columns: 89 | home_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[0]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] != events_df['isOwnGoal'])]) 90 | home_score += len(events_df[(events_df['teamId']==players_df['teamId'].unique()[1]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] == events_df['isOwnGoal'])]) 91 | away_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[1]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] != events_df['isOwnGoal'])]) 92 | away_score += len(events_df[(events_df['teamId']==players_df['teamId'].unique()[0]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] == events_df['isOwnGoal'])]) 93 | else: 94 | home_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[0]) & (events_df['eventType'] == 'Goal')]) 95 | away_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[1]) & (events_df['eventType'] == 'Goal')]) 96 | 97 | # %% Pre-process data 98 | 99 | # Add cumulative minutes information 100 | players_df = wde.minutes_played(players_df, events_df) 101 | 102 | # Calculate longest consistent xi 103 | players_df = wde.longest_xi(players_df) 104 | 105 | # %% Aggregate data per player 106 | 107 | playerinfo_df = wde.create_player_list(players_df) 108 | 109 | # %% Create dataframes of defensive and offensive actions 110 | 111 | defensive_actions_df = wde.find_defensive_actions(events_df) 112 | offensive_actions_df = wde.find_offensive_actions(events_df) 113 | 114 | # Initialise dataframes 115 | defensive_hull_df = pd.DataFrame() 116 | offensive_hull_df = pd.DataFrame() 117 | 118 | # Create convex hull for each player 119 | for player_id in players_df[players_df['longest_xi']==True].index: 120 | player_def_hull = wce.create_convex_hull(defensive_actions_df[defensive_actions_df['playerId'] == player_id], name=players_df.loc[player_id,'name'], 121 | min_events=5, include_events=central_pct_def, pitch_area = 10000) 122 | player_off_hull = wce.create_convex_hull(offensive_actions_df[offensive_actions_df['playerId'] == player_id], name=players_df.loc[player_id,'name'], 123 | min_events=5, include_events=central_pct_off, pitch_area = 10000) 124 | offensive_hull_df = pd.concat([offensive_hull_df, player_off_hull]) 125 | defensive_hull_df = pd.concat([defensive_hull_df, player_def_hull]) 126 | 127 | # %% Create viz of area covered by each player when passing 128 | 129 | plot_team = 'away' 130 | 131 | # Plot pitches 132 | pitch = VerticalPitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False) 133 | fig, ax = pitch.grid(nrows=1, ncols=2, title_height=0.22, 134 | grid_height=0.7, endnote_height=0.06, axis=False) 135 | fig.set_size_inches(8.5, 7.5) 136 | fig.set_facecolor('#313332') 137 | 138 | # Initialise player position counts 139 | cf_count = 0 140 | cm_count = 0 141 | cb_count = 0 142 | last_idx = 0 143 | 144 | # Team to plot 145 | plot_team = home_team if plot_team == 'home' else away_team 146 | 147 | # Plot attacking convex hulls 148 | for hull_idx, hull_row in offensive_hull_df.iterrows(): 149 | 150 | # Determine team the hull applies to 151 | if players_df[players_df['name']==hull_idx]['team'].values[0] == plot_team: 152 | 153 | # Get player position and assign colour based on position 154 | position = players_df[players_df['name']==hull_idx]['position'].values 155 | if position in ['DR', 'DL', '']: 156 | hull_colour = 'lawngreen' 157 | elif position in ['MR', 'ML', 'AML', 'AMR', 'FWR', 'FWL']: 158 | hull_colour = 'deepskyblue' 159 | elif position in ['FW']: 160 | hull_colour = ['tomato', 'lightpink'][cf_count] 161 | cf_count+=1 162 | elif position in ['MC', 'DMC', 'AMC']: 163 | hull_colour = ['snow', 'violet', 'cyan', 'yellow'][cm_count] 164 | cm_count+=1 165 | elif position in ['DC']: 166 | hull_colour = ['tomato', 'gold', 'lawngreen'][cb_count] 167 | cb_count+=1 168 | else: 169 | hull_colour = 'lightpink' 170 | 171 | # Define text colour based on marker colour 172 | if hull_colour in ['snow', 'white']: 173 | text_colour = 'k' 174 | else: 175 | text_colour = 'w' 176 | 177 | # Player initials 178 | if len(hull_idx.split(' ')) == 1: 179 | initials = hull_idx.split(' ')[0][0:2] 180 | else: 181 | initials = hull_idx.split(' ')[0][0].upper() + hull_idx.split(' ')[1][0].upper() 182 | 183 | # Plot 184 | ax['pitch'][0].scatter(hull_row['hull_reduced_y'], hull_row['hull_reduced_x'], color=hull_colour, s=20, alpha = 0.3, zorder=2) 185 | plot_hull = pitch.convexhull(hull_row['hull_reduced_x'], hull_row['hull_reduced_y']) 186 | pitch.polygon(plot_hull, ax=ax['pitch'][0], facecolor=hull_colour, alpha=0.2, capstyle = 'round', zorder=1) 187 | pitch.polygon(plot_hull, ax=ax['pitch'][0], edgecolor=hull_colour, alpha=0.3, facecolor='none', capstyle = 'round', zorder=1) 188 | ax['pitch'][0].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', color = hull_colour, alpha = 0.6, s = 400, zorder = 3) 189 | ax['pitch'][0].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', edgecolor = hull_colour, facecolor = 'none', alpha = 1, lw = 2, s = 400, zorder = 3) 190 | ax['pitch'][0].text(hull_row['hull_centre'][1], hull_row['hull_centre'][0], initials, fontsize = 8, fontweight = 'bold', va = 'center', ha = 'center', color = text_colour, zorder = 4) 191 | 192 | # Plot attacking convex hulls 193 | cf_count = 0 194 | cm_count = 0 195 | cb_count = 0 196 | last_idx = 0 197 | for hull_idx, hull_row in defensive_hull_df.iterrows(): 198 | 199 | # Determine team the hull applies to 200 | if players_df[players_df['name']==hull_idx]['team'].values[0] == plot_team: 201 | 202 | # Get player position and assign colour based on position 203 | position = players_df[players_df['name']==hull_idx]['position'].values 204 | if position in ['DR', 'DL', '']: 205 | hull_colour = 'lawngreen' 206 | elif position in ['MR', 'ML', 'AML', 'AMR', 'FWR', 'FWL']: 207 | hull_colour = 'deepskyblue' 208 | elif position in ['FW']: 209 | hull_colour = ['tomato', 'lightpink'][cf_count] 210 | cf_count+=1 211 | elif position in ['MC', 'DMC', 'AMC']: 212 | hull_colour = ['snow', 'violet', 'cyan', 'yellow'][cm_count] 213 | cm_count+=1 214 | elif position in ['DC']: 215 | hull_colour = ['tomato', 'gold', 'lawngreen'][cb_count] 216 | cb_count+=1 217 | else: 218 | hull_colour = 'lightpink' 219 | 220 | # Define text colour based on marker colour 221 | if hull_colour in ['snow', 'white']: 222 | text_colour = 'k' 223 | else: 224 | text_colour = 'w' 225 | 226 | # Player initials 227 | if len(hull_idx.split(' ')) == 1: 228 | initials = hull_idx.split(' ')[0][0:2] 229 | else: 230 | initials = hull_idx.split(' ')[0][0].upper() + hull_idx.split(' ')[1][0].upper() 231 | 232 | # Plot 233 | ax['pitch'][1].scatter(hull_row['hull_reduced_y'], hull_row['hull_reduced_x'], color=hull_colour, s=20, alpha = 0.3, zorder=2) 234 | plot_hull = pitch.convexhull(hull_row['hull_reduced_x'], hull_row['hull_reduced_y']) 235 | pitch.polygon(plot_hull, ax=ax['pitch'][1], facecolor=hull_colour, alpha=0.2, capstyle = 'round', zorder=1) 236 | pitch.polygon(plot_hull, ax=ax['pitch'][1], edgecolor=hull_colour, alpha=0.3, facecolor='none', capstyle = 'round', zorder=1) 237 | ax['pitch'][1].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', color = hull_colour, alpha = 0.6, s = 400, zorder = 3) 238 | ax['pitch'][1].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', edgecolor = hull_colour, facecolor = 'none', alpha = 1, lw = 2, s = 400, zorder = 3) 239 | ax['pitch'][1].text(hull_row['hull_centre'][1], hull_row['hull_centre'][0], initials, fontsize = 8, fontweight = 'bold', va = 'center', ha = 'center', color = text_colour, zorder = 4) 240 | 241 | # Ax titles 242 | ax['pitch'][0].set_title(f"{plot_team} Offensive Shape", pad = 0, color = "w", fontweight = "bold") 243 | ax['pitch'][1].set_title(f"{plot_team} Defensive Shape", pad = 0, color = "w", fontweight = "bold") 244 | 245 | # Label based on include parameter 246 | hull_include = central_pct_off.replace('std','') + ' Std. Dev' if 'std' in str(central_pct_off) else str(central_pct_off) + '%' 247 | hull_include_s = central_pct_off.replace('std','') + ' SD' if 'std' in str(central_pct_off) else str(central_pct_off) + '%' 248 | 249 | # Title text 250 | title_text = f"{leagues[league]} - {year}/{int(year) + 1}" if not league in ['World_Cup'] else f"{leagues[league]} - {year}" 251 | subtitle_text = f"{home_team_print} {home_score}-{away_score} {away_team_print}" 252 | subsubtitle_text = f"Offensive and defensive territories, defined by central\n{hull_include} of offensive and defensive actions per player" 253 | 254 | fig.text(0.5, 0.93, title_text, ha='center', 255 | fontweight="bold", fontsize=20, color='w') 256 | fig.text(0.5, 0.882, subtitle_text, ha='center', 257 | fontweight="bold", fontsize=18, color='w') 258 | fig.text(0.5, 0.82, subsubtitle_text, ha='center', 259 | fontweight="regular", fontsize=11, color='w') 260 | 261 | # Add home team Logo 262 | ax = fig.add_axes([0.07, 0.825, 0.14, 0.14]) 263 | ax.axis("off") 264 | ax.imshow(home_logo) 265 | 266 | # Add away team Logo 267 | ax = fig.add_axes([0.79, 0.825, 0.14, 0.14]) 268 | ax.axis("off") 269 | ax.imshow(away_logo) 270 | 271 | # Add direction of play arrow 272 | ax = fig.add_axes([0.47, 0.17, 0.06, 0.6]) 273 | ax.set_xlim(0, 1) 274 | ax.set_ylim(0, 1) 275 | ax.axis("off") 276 | ax.arrow(0.65, 0.2, 0, 0.58, color="w", width=0.001, head_width = 0.1, head_length = 0.02) 277 | ax.text(0.495, 0.48, "Direction of play", ha="center", va="center", fontsize=10, color="w", fontweight="regular", rotation=90) 278 | 279 | # Footer text 280 | fig.text(0.5, 0.035, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 281 | fontstyle="italic", ha="center", fontsize=9, color="white") 282 | 283 | # Add twitter logo 284 | ax = fig.add_axes([0.875, 0.01, 0.07, 0.07]) 285 | ax.axis("off") 286 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 287 | ax.imshow(badge) 288 | 289 | # Save image 290 | fig.savefig(f"shape_reports/{league}-{match_id}-{plot_team}-shape", dpi=300) -------------------------------------------------------------------------------- /projects/05_competition_reports_top_players/player_high_defensive_actions.py: -------------------------------------------------------------------------------- 1 | # %% Create visualisation of top players by defensive actions across a selection of games 2 | # 3 | # Inputs: Year to plot data from 4 | # League to plot data from 5 | # Whoscored match ids 6 | # Positions not to include 7 | # Date of run 8 | # Normalisation mode 9 | # Minimum play time 10 | # 11 | # Outputs: Top 12 players by defensive actions 12 | 13 | # %% Imports and parameters 14 | 15 | import pandas as pd 16 | import matplotlib.pyplot as plt 17 | import matplotlib as mpl 18 | from PIL import Image, ImageEnhance 19 | from mplsoccer.pitch import VerticalPitch, Pitch 20 | import os 21 | import sys 22 | import bz2 23 | import pickle 24 | import numpy as np 25 | from collections import Counter 26 | import highlight_text as htext 27 | import glob 28 | 29 | # %% Function definitions 30 | 31 | 32 | def protected_divide(n, d): 33 | return n / d if d else 0 34 | 35 | # %% Add custom tools to path 36 | 37 | root_folder = os.path.abspath(os.path.dirname( 38 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 39 | sys.path.append(root_folder) 40 | 41 | import analysis_tools.whoscored_custom_events as wce 42 | import analysis_tools.pitch_zones as pz 43 | import analysis_tools.whoscored_data_engineering as wde 44 | import analysis_tools.logos_and_badges as lab 45 | 46 | # %% User inputs 47 | 48 | # Select year 49 | year = '2022' 50 | 51 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL) 52 | league = 'EPL' 53 | 54 | # Select position to exclude 55 | pos_exclude=[] 56 | 57 | # Position formatting on title 58 | pos_input = '' 59 | 60 | # Input run-date 61 | run_date = '28/05/2023' 62 | 63 | # Normalisation (None, '_90', '_100opp_pass') 64 | norm_mode = '_100opp_pass' 65 | #norm_mode = '_90' 66 | 67 | # Min minutes played 68 | min_mins = 1800 69 | 70 | # Brighten logo 71 | logo_brighten = True 72 | 73 | # %% League logo 74 | 75 | comp_logo = lab.get_competition_logo(league, year=year, logo_brighten=logo_brighten) 76 | 77 | # %% Get data 78 | 79 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}" 80 | files = os.listdir(file_path) 81 | 82 | # Initialise storage dataframes 83 | events_df = pd.DataFrame() 84 | players_df = pd.DataFrame() 85 | 86 | # Load data 87 | for file in files: 88 | if file == 'event-types.pbz2': 89 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb') 90 | event_types = pickle.load(event_types) 91 | elif file == 'formation-mapping.pbz2': 92 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb') 93 | formation_mapping = pickle.load(formation_mapping) 94 | elif '-eventdata-' in file: 95 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb') 96 | match_events = pickle.load(match_events) 97 | events_df = pd.concat([events_df, match_events]) 98 | elif '-playerdata-' in file: 99 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb') 100 | match_players = pickle.load(match_players) 101 | players_df = pd.concat([players_df, match_players]) 102 | else: 103 | pass 104 | 105 | # %% Pre-process data 106 | 107 | # Add pass recipient 108 | events_df = wde.get_recipient(events_df) 109 | 110 | # Add cumulative minutes information 111 | players_df = wde.minutes_played(players_df, events_df) 112 | 113 | # Calculate longest consistent xi 114 | players_df = wde.longest_xi(players_df) 115 | 116 | # Calculate opposition half pass events that each player faces per game 117 | players_df = wde.events_while_playing(events_df[events_df['x']<= 34], players_df, event_name = 'Pass', event_team = 'opposition')#['opp_pass'] 118 | players_df['oppthird_opp_pass'] = players_df['opp_pass'] 119 | 120 | # Calculate pass events that each player faces per game 121 | players_df = wde.events_while_playing(events_df, players_df, event_name = 'Pass', event_team = 'opposition') 122 | 123 | # %% Aggregate data per player 124 | 125 | playerinfo_df = wde.create_player_list(players_df, additional_cols = ['opp_pass', 'oppthird_opp_pass']) 126 | 127 | #%% Aggregation 128 | 129 | # Aggregate all defensive actions 130 | all_def_actions = wde.find_defensive_actions(events_df) 131 | playerinfo_df = wde.group_player_events(all_def_actions, playerinfo_df, primary_event_name='def_actions') 132 | 133 | # Aggregate defensive actions in opp half 134 | oppthird_def_actions = all_def_actions[all_def_actions['x']>67] 135 | playerinfo_df = wde.group_player_events(oppthird_def_actions, playerinfo_df, primary_event_name='oppthird_def_actions') 136 | 137 | # Defensive actions per 100 opp passes and per 90 mins 138 | playerinfo_df['def_actions_100opp_pass'] = round(100*playerinfo_df['def_actions']/playerinfo_df['opp_pass'],2) 139 | playerinfo_df['def_actions_90'] = round(90*playerinfo_df['def_actions']/playerinfo_df['mins_played'],2) 140 | playerinfo_df['oppthird_def_actions_100opp_pass'] = round(100*playerinfo_df['oppthird_def_actions']/playerinfo_df['oppthird_opp_pass'],2) 141 | playerinfo_df['oppthird_def_actions_90'] = round(90*playerinfo_df['oppthird_def_actions']/playerinfo_df['mins_played'],2) 142 | 143 | # %% Player removal 144 | 145 | playerinfo_reduced_df = playerinfo_df[(playerinfo_df['position'].isin(pos_exclude) == False) & (playerinfo_df['mins_played']>=min_mins)] 146 | 147 | # %% Ordering based on normalisation 148 | 149 | if norm_mode == None: 150 | sorted_df = playerinfo_reduced_df.sort_values(['oppthird_def_actions', 'oppthird_def_actions_100opp_pass'], ascending=[False, False]) 151 | elif norm_mode == '_90': 152 | sorted_df = playerinfo_reduced_df.sort_values(['oppthird_def_actions_90', 'oppthird_def_actions_100opp_pass'], ascending=[False, False]) 153 | elif norm_mode == '_100opp_pass': 154 | sorted_df = playerinfo_reduced_df.sort_values(['oppthird_def_actions_100opp_pass', 'oppthird_def_actions_90'], ascending=[False, False]) 155 | 156 | # %% Text formatting 157 | 158 | if norm_mode == None: 159 | title_addition = '' 160 | elif norm_mode == '_90': 161 | title_addition = 'per 90mins' 162 | elif norm_mode == '_100opp_pass': 163 | title_addition = 'per 100 opposition passes in that third' 164 | 165 | if len(pos_exclude)==0: 166 | title_pos_str = 'players' 167 | file_pos_str = '' 168 | else: 169 | title_pos_str = pos_input 170 | file_pos_str = '-' + pos_input 171 | 172 | # %% Create viz of top progressive passers 173 | 174 | # Overwrite rcparams 175 | mpl.rcParams['xtick.color'] = 'w' 176 | mpl.rcParams['ytick.color'] = 'w' 177 | 178 | # Set-up pitch subplots 179 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False) 180 | fig, ax = pitch.grid(nrows=3, ncols=4, grid_height=0.75, space=0.12, axis=False) 181 | fig.set_size_inches(14, 10) 182 | fig.set_facecolor('#313332') 183 | ax['pitch'] = ax['pitch'].reshape(-1) 184 | 185 | # Plot successful prog passes as arrows, using for loop to iterate through each player and each pass 186 | idx = 0 187 | 188 | for player_id, name in sorted_df.head(12).iterrows(): 189 | player_def_actions = all_def_actions[all_def_actions['playerId'] == player_id] 190 | player_opp_third_def_actions = oppthird_def_actions[oppthird_def_actions['playerId'] == player_id] 191 | 192 | ax['pitch'][idx].set_title(f" {idx + 1}: {name['name']}", loc = "left", color='w', fontsize = 10) 193 | 194 | pitch.kdeplot(player_def_actions['x'], player_def_actions['y'], ax=ax['pitch'][idx], fill=True, levels=80, shade_lowest=True, cmap='viridis', cut=8, alpha=0.6, antialiased=True, zorder=0) 195 | pitch.kdeplot(player_def_actions['x'], player_def_actions['y'], ax=ax['pitch'][idx], fill=True, levels=100, shade_lowest=True, cmap='viridis', cut=8, alpha=0.6, antialiased=True, zorder=0) 196 | 197 | ax['pitch'][idx].fill([0, 67, 67, 0], [0, 0, 100, 100], 'grey', alpha = 0.7, zorder=0) 198 | ax['pitch'][idx].plot([67, 67], [0, 99], 'w', ls = 'dashed', zorder=0) 199 | 200 | pitch.scatter(player_def_actions['x'], player_def_actions['y'], color = 'k', alpha = 0.2, s = 12, zorder=1, ax=ax['pitch'][idx]) 201 | pitch.scatter(player_opp_third_def_actions['x'], player_opp_third_def_actions['y'], color = 'w', alpha = 0.6, s = 12, zorder=1, ax=ax['pitch'][idx]) 202 | 203 | ax['pitch'][idx].text(0, -8, "Opp. 3rd Actions:", fontsize=8, fontweight='bold', color='w', zorder=1) 204 | ax['pitch'][idx].text(39, -8, f"{int(name['oppthird_def_actions'])}", fontsize=8, color='w', zorder=1) 205 | 206 | if norm_mode == '_100opp_pass': 207 | ax['pitch'][idx].text(48, -8, "Per 100 Opp. Passes:", fontsize=8, fontweight='bold', color='w', zorder=1) 208 | ax['pitch'][idx].text(95, -8, f"{round(name['oppthird_def_actions_100opp_pass'],1)}", fontsize=8, color='w', zorder=1) 209 | 210 | if norm_mode == '_90': 211 | ax['pitch'][idx].text(50, -8, "Per 90 Mins:", fontsize=8, fontweight='bold', color='w', zorder=1) 212 | ax['pitch'][idx].text(85, -8, f"{round(name['oppthird_def_actions_90'],1)}", fontsize=8, color='w', zorder=1) 213 | 214 | team = name['team'] 215 | team_logo, _ = lab.get_team_badge_and_colour(team) 216 | 217 | ax_pos = ax['pitch'][idx].get_position() 218 | 219 | logo_ax = fig.add_axes([ax_pos.x1-0.035, ax_pos.y1, 0.035, 0.035]) 220 | logo_ax.axis("off") 221 | logo_ax.imshow(team_logo) 222 | 223 | idx += 1 224 | 225 | # Create title and subtitles, using highlighting as figure legend 226 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A', 227 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup', 228 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'} 229 | 230 | title_text = f"{leagues[league]} {year}/{int(year) + 1} - Top 12 {title_pos_str} by Tendency to Defend from the Front" 231 | subtitle_text = f"Heatmaps of defensive actions shown. Ranking by total number of defensive actions in opp third, {title_addition}" 232 | subsubtitle_text = f"Ball recoveries, blocks, clearances, interceptions and tackles included. Correct as of {run_date}. Players with less than {min_mins} mins play-time omitted." 233 | 234 | # Title 235 | fig.text(0.1, 0.945, title_text, fontweight="bold", fontsize=15, color='w') 236 | fig.text(0.1, 0.92, subtitle_text, fontweight="regular", fontsize=13, color='w') 237 | fig.text(0.1, 0.8975, subsubtitle_text, fontweight="regular", fontsize=10, color='w') 238 | 239 | # Add direction of play arrow 240 | ax = fig.add_axes([0.042, 0.05, 0.18, 0.01]) 241 | ax.axis("off") 242 | plt.arrow(0.51, 0.15, 0.1, 0, color="white") 243 | fig.text(0.13, 0.03, "Direction of play", ha="center", fontsize=10, color="white", fontweight="regular") 244 | 245 | # Add footer text 246 | fig.text(0.5, 0.04, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 247 | fontstyle="italic", ha="center", fontsize=9, color="white") 248 | 249 | # Add competition logo 250 | ax = fig.add_axes([0.015, 0.877, 0.1, 0.1]) 251 | ax.axis("off") 252 | ax.imshow(comp_logo) 253 | 254 | # Add twitter logo 255 | ax = fig.add_axes([0.92, 0.025, 0.04, 0.04]) 256 | ax.axis("off") 257 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 258 | ax.imshow(badge) 259 | 260 | # Save image 261 | fig.savefig(f"top_defensive_actions/{league}-{year}-top-defensive-actions{file_pos_str.replace(' & ','-').replace(' ','-')}-{title_addition.replace(' ','-')}", dpi=300) 262 | 263 | -------------------------------------------------------------------------------- /projects/06_competition_reports_top_teams/team_ball_winning.py: -------------------------------------------------------------------------------- 1 | # %% Create visualisation of team ball wins and mean win height 2 | # 3 | # Inputs: Year to plot data from 4 | # League to plot data from 5 | # Date of run 6 | # Selection of whether to include percentages on visual 7 | # Selection of whether to brighten logo 8 | # 9 | # Output: Heatmaps showing ball win zones for each team & mean ball win height 10 | 11 | # %% Imports and parameters 12 | 13 | import pandas as pd 14 | import matplotlib.pyplot as plt 15 | import matplotlib as mpl 16 | from PIL import Image, ImageEnhance 17 | from mplsoccer.pitch import VerticalPitch, Pitch 18 | import matplotlib.patheffects as path_effects 19 | import os 20 | import sys 21 | import bz2 22 | import pickle 23 | import numpy as np 24 | from collections import Counter 25 | import highlight_text as htext 26 | import glob 27 | 28 | # %% Add custom tools to path 29 | 30 | root_folder = os.path.abspath(os.path.dirname( 31 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 32 | sys.path.append(root_folder) 33 | 34 | import analysis_tools.whoscored_custom_events as wce 35 | import analysis_tools.pitch_zones as pz 36 | import analysis_tools.whoscored_data_engineering as wde 37 | import analysis_tools.logos_and_badges as lab 38 | 39 | # %% User Inputs 40 | 41 | # Select year 42 | year = '2022' 43 | 44 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL) 45 | league = 'EPL' 46 | 47 | # Input run-date 48 | run_date = '28/05/2023' 49 | 50 | # Select whether to label % 51 | label_pct = False 52 | 53 | # Select whether to brighten logo 54 | logo_brighten = True 55 | 56 | # Select whether to use team colours 57 | team_colour = False 58 | 59 | # %% Get competition logo 60 | 61 | comp_logo = lab.get_competition_logo(league, year, logo_brighten=logo_brighten) 62 | 63 | # %% Get data 64 | 65 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}" 66 | files = os.listdir(file_path) 67 | 68 | # Initialise storage dataframes 69 | events_df = pd.DataFrame() 70 | players_df = pd.DataFrame() 71 | 72 | # Load data 73 | for file in files: 74 | if file == 'event-types.pbz2': 75 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb') 76 | event_types = pickle.load(event_types) 77 | elif file == 'formation-mapping.pbz2': 78 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb') 79 | formation_mapping = pickle.load(formation_mapping) 80 | elif '-eventdata-' in file: 81 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb') 82 | match_events = pickle.load(match_events) 83 | events_df = pd.concat([events_df, match_events]) 84 | elif '-playerdata-' in file: 85 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb') 86 | match_players = pickle.load(match_players) 87 | players_df = pd.concat([players_df, match_players]) 88 | else: 89 | pass 90 | 91 | # %% Isolate ball wins 92 | 93 | interceptions = events_df[(events_df['eventType']=='Interception') & (events_df['outcomeType']=='Successful')] 94 | tackles = events_df[(events_df['eventType']=='Tackle') & (events_df['outcomeType']=='Successful')] 95 | pass_blocks = events_df[(events_df['eventType']=='BlockedPass') & (events_df['outcomeType']=='Successful') ] 96 | 97 | ball_wins_df = pd.concat([interceptions, tackles, pass_blocks], axis=0) 98 | 99 | # %% Get teams and order on mean height of ball recovery 100 | 101 | # Sort alphabetically initially 102 | teams = sorted(set(players_df['team'])) 103 | 104 | # Set up dictionary to store xt per 90 per team 105 | team_ball_win_height = dict.fromkeys(teams, 0) 106 | team_count = len(teams) 107 | 108 | for team in teams: 109 | 110 | # Get team events 111 | team_id = players_df[players_df['team']==team]['teamId'].values[0] 112 | team_ball_wins = ball_wins_df[ball_wins_df['teamId']==team_id] 113 | 114 | # Get mean recovery height 115 | team_ball_win_height[team] = team_ball_wins['x'].mean() 116 | 117 | # Sort dictionary by xT/90 118 | team_ball_win_height = sorted(team_ball_win_height.items(), key=lambda x: x[1], reverse=True) 119 | 120 | # %% Custom colormap 121 | 122 | CustomCmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#313332","#47516B", "#848178", "#B2A66F", "#FDE636"]) 123 | 124 | # %% Create visual 125 | 126 | # Overwrite rcparams 127 | mpl.rcParams['xtick.color'] = 'w' 128 | mpl.rcParams['ytick.color'] = 'w' 129 | 130 | # Define grid dimensions 131 | ncols = 4 132 | nrows = int(np.ceil(len(team_ball_win_height)/ncols)) 133 | 134 | # Set-up pitch subplots 135 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False) 136 | fig, ax = pitch.grid(nrows=nrows, ncols=ncols, grid_height=0.8, title_height = 0.13, endnote_height = 0.04, space=0.12, axis=False) 137 | fig.set_size_inches(14, 15) 138 | fig.set_facecolor('#313332') 139 | ax['pitch'] = ax['pitch'].reshape(-1) 140 | idx = 0 141 | 142 | # Loop through each team 143 | for team in team_ball_win_height: 144 | 145 | # Get team name and events 146 | team_name = team[0] 147 | team_id = players_df[players_df['team']==team_name]['teamId'].values[0] 148 | team_ball_wins = ball_wins_df[ball_wins_df['teamId']==team_id] 149 | 150 | # Get team logo and colour 151 | team_logo, team_cmap = lab.get_team_badge_and_colour(team_name) 152 | if len(team_name) > 14: 153 | team_name = team_name[0:13] + '...' 154 | 155 | # Set team colour 156 | if not team_colour: 157 | team_cmap = CustomCmap 158 | 159 | # Draw heatmap 160 | bin_statistic = pitch.bin_statistic(team_ball_wins['x'], team_ball_wins['y'], 161 | statistic='count', bins=(6, 5), normalize=True) 162 | pitch.heatmap(bin_statistic, ax['pitch'][idx], cmap=team_cmap, edgecolor='w', lw=0.5, zorder=0, alpha=0.7) 163 | 164 | # Draw mean ball win pos 165 | pitch.lines(team[1], 0.5, team[1], 99.5, color=team_cmap(255), lw=3, zorder=2, ax=ax['pitch'][idx]) 166 | pitch.lines(team[1]-1, 0.5, team[1]-1, 99.5, color='k', lw=1.5, zorder=4, ax=ax['pitch'][idx]) 167 | pitch.lines(team[1]+1, 0.5, team[1]+1, 99.5, color='k', lw=1.5, zorder=4, ax=ax['pitch'][idx]) 168 | path_eff = [path_effects.Stroke(linewidth=3, foreground='k'), path_effects.Normal()] 169 | ax['pitch'][idx].text(team[1]+3, 6, f"{round(team[1],1)}%\nup pitch", fontsize=13, color='w',path_effects = path_eff) 170 | 171 | # Label heatmap zones with pressure count if selected 172 | if label_pct: 173 | labels = pitch.label_heatmap(bin_statistic, color='w', fontsize=10, fontweight = 'bold', 174 | ax=ax['pitch'][idx], ha='center', va='center', str_format='{:.0%}', path_effects=path_eff) 175 | 176 | ax['pitch'][idx].set_title(f" {idx + 1}: {team_name}", loc = "left", color='w', fontsize = 16) 177 | 178 | # Add team logo 179 | ax_pos = ax['pitch'][idx].get_position() 180 | logo_ax = fig.add_axes([ax_pos.x1-0.02, ax_pos.y1, 0.02, 0.02]) 181 | logo_ax.axis("off") 182 | logo_ax.imshow(team_logo) 183 | 184 | idx+=1 185 | 186 | # Title 187 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A', 188 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup', 189 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'} 190 | 191 | title_text = f"{leagues[league]} {year}/{int(year)+1} - Teams Ranked by Average Ball Win Height" 192 | subtitle_text = "Heatmaps showing Zones of and " 193 | subsubtitle_text = f"Correct as of {run_date}" 194 | 195 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=20, color='w') 196 | htext.fig_text(0.12, 0.934, s=subtitle_text, fontweight="bold", fontsize=18, color='w', 197 | highlight_textprops=[{"color": 'yellow', "fontweight": 'bold'}, {"color": 'grey', "fontweight": 'bold'}]) 198 | fig.text(0.12, 0.9, subsubtitle_text, fontweight="regular", fontsize=16, color='w') 199 | 200 | # Add direction of play arrow 201 | ax = fig.add_axes([0.042, 0.028, 0.18, 0.005]) 202 | ax.axis("off") 203 | plt.arrow(0.61, 0.15, -0.1, 0, color="white") 204 | fig.text(0.13, 0.02, "Direction of opposition play", ha="center", fontsize=10, color="white", fontweight="regular") 205 | 206 | # Add footer text 207 | fig.text(0.5, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 208 | fontstyle="italic", ha="center", fontsize=9, color="white") 209 | 210 | # Add competition logo 211 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1]) 212 | ax.axis("off") 213 | ax.imshow(comp_logo) 214 | 215 | # Add twitter logo 216 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04]) 217 | ax.axis("off") 218 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 219 | ax.imshow(badge) 220 | 221 | fig.savefig(f"team_ball_winning/{league}-{year}-team-ball-winning", dpi=300) -------------------------------------------------------------------------------- /projects/06_competition_reports_top_teams/team_fullback_combinations.py: -------------------------------------------------------------------------------- 1 | # %% Create visualisation of team threat creation zones 2 | # 3 | # Inputs: Year to plot data from 4 | # League to plot data from 5 | # Date of run 6 | # Selection of whether to include percentages on visual 7 | 8 | # %% Imports and parameters 9 | 10 | import pandas as pd 11 | import matplotlib.pyplot as plt 12 | import matplotlib as mpl 13 | import matplotlib.cm as cm 14 | from PIL import Image, ImageEnhance 15 | from mplsoccer.pitch import VerticalPitch, Pitch 16 | import matplotlib.patheffects as path_effects 17 | import os 18 | import sys 19 | import bz2 20 | import pickle 21 | import numpy as np 22 | from collections import Counter 23 | import highlight_text as htext 24 | import glob 25 | 26 | # %% Add custom tools to path 27 | 28 | root_folder = os.path.abspath(os.path.dirname( 29 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 30 | sys.path.append(root_folder) 31 | 32 | import analysis_tools.whoscored_custom_events as wce 33 | import analysis_tools.pitch_zones as pz 34 | import analysis_tools.whoscored_data_engineering as wde 35 | import analysis_tools.logos_and_badges as lab 36 | 37 | # %% User Inputs 38 | 39 | # Select year 40 | year = '2022' 41 | 42 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL) 43 | league = 'EPL' 44 | 45 | # Input run-date 46 | run_date = '28/05/2023' 47 | 48 | # Select whether to brighten logo 49 | logo_brighten = True 50 | 51 | # Max time between a pass and shot that tags the pass as "shot-creating" 52 | min_delta = 1/6 53 | 54 | # %% Get competition logo 55 | 56 | comp_logo = lab.get_competition_logo(league, year, logo_brighten=logo_brighten) 57 | 58 | # %% Get data 59 | 60 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}" 61 | files = os.listdir(file_path) 62 | 63 | # Initialise storage dataframes 64 | events_df = pd.DataFrame() 65 | players_df = pd.DataFrame() 66 | 67 | # Load data 68 | for file in files: 69 | if file == 'event-types.pbz2': 70 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb') 71 | event_types = pickle.load(event_types) 72 | elif file == 'formation-mapping.pbz2': 73 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb') 74 | formation_mapping = pickle.load(formation_mapping) 75 | elif '-eventdata-' in file: 76 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb') 77 | match_events = pickle.load(match_events) 78 | events_df = pd.concat([events_df, match_events]) 79 | elif '-playerdata-' in file: 80 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb') 81 | match_players = pickle.load(match_players) 82 | players_df = pd.concat([players_df, match_players]) 83 | else: 84 | pass 85 | 86 | # %% Synthesise additional info 87 | 88 | # Pass recipient 89 | events_df = wde.get_recipient(events_df) 90 | 91 | # Add cumulative minutes played information 92 | players_df = wde.minutes_played(players_df, events_df) 93 | events_df = wde.cumulative_match_mins(events_df) 94 | 95 | # %% Create dictionary of teams, and store full back pass combinations against each team 96 | 97 | # Get all team names 98 | teams = sorted(set(players_df['team'])) 99 | 100 | # Initialise dictionary to store full-back combos per team 101 | team_fb_combos = dict.fromkeys(teams, 0) 102 | 103 | # Initialise dictionary to store xT generated per 90 from full-back combos 104 | team_fb_combo_xt = dict.fromkeys(teams, 0) 105 | 106 | # Loop through each team 107 | for team in teams: 108 | 109 | # Get team id and all matches team have played 110 | team_id = players_df[players_df['team']==team]['teamId'].values[0] 111 | match_ids = set(players_df[players_df['team']==team]['match_id']) 112 | fb_pass_combos = pd.DataFrame() 113 | 114 | # Loop through each match 115 | for match_id in match_ids: 116 | 117 | # Get ids of full backs that started in match for team 118 | team_match_players = players_df[(players_df['teamId']==team_id) & (players_df['match_id']==match_id)] 119 | starting_rb_id = team_match_players[team_match_players['position'].isin(['DMR','DR'])].index.values.tolist() 120 | starting_lb_id = team_match_players[team_match_players['position'].isin(['DML','DL'])].index.values.tolist() 121 | 122 | # Get all events completed by team within match 123 | team_match_events = events_df[(events_df['teamId']==team_id) & (events_df['match_id']==match_id)] 124 | 125 | # If both a LB/LWB and RB/RWB start, then look for passes between 126 | if (len(starting_rb_id)>0) & (len(starting_lb_id)>0): 127 | 128 | # Get in-play successful passes between 129 | fb_pass_combos_match = team_match_events[(team_match_events['eventType']=='Pass') & 130 | (team_match_events['outcomeType']=='Successful') & 131 | (~team_match_events['satisfiedEventsTypes'].apply(lambda x: 31 in x or 32 in x or 33 in x or 34 in x or 212 in x)) & 132 | (((team_match_events['playerId'] == starting_rb_id[0]) & (team_match_events['pass_recipient'] == starting_lb_id[0])) | 133 | ((team_match_events['playerId'] == starting_lb_id[0]) & (team_match_events['pass_recipient'] == starting_rb_id[0])))] 134 | 135 | # Add column to store whether a shot happens within certain time of pass being made 136 | fb_pass_combos_match['leads_to_shot'] = False 137 | 138 | # Loop through full-back combos, find next sequence of events and check whether shot occured 139 | for idx, fb_pass in fb_pass_combos_match.iterrows(): 140 | following_evts = team_match_events[(team_match_events['match_id']==fb_pass['match_id']) & 141 | (team_match_events['period']==fb_pass['period']) & 142 | (team_match_events['cumulative_mins']>fb_pass['cumulative_mins']) & 143 | (team_match_events['cumulative_mins']<=fb_pass['cumulative_mins']+min_delta)] 144 | fb_pass_combos_match.loc[idx,'leads_to_shot'] = True if True in following_evts['isShot'].tolist() else False 145 | 146 | # Build up dataframe of full back combos for team across multiple matches 147 | fb_pass_combos = pd.concat([fb_pass_combos, fb_pass_combos_match]) 148 | 149 | # Store full back combo in dictionary 150 | team_fb_combos[team] = fb_pass_combos 151 | 152 | # Calculate xT generated from full back combos per 90 and store in dictionary 153 | team_fb_combo_xt[team] = fb_pass_combos['xThreat_gen'].sum()/len(match_ids) 154 | 155 | # Order teams by xT generated per 90 156 | team_order_xt_90 = sorted(team_fb_combo_xt, key=team_fb_combo_xt.get, reverse=True) 157 | 158 | #%% Create plot of individual teams and full back passes 159 | 160 | # Overwrite rcparams 161 | mpl.rcParams['xtick.color'] = 'w' 162 | mpl.rcParams['ytick.color'] = 'w' 163 | 164 | # Set-up pitch subplots 165 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False) 166 | fig, ax = pitch.grid(nrows=5, ncols=4, grid_height=0.8, title_height = 0.13, endnote_height = 0.04, space=0.12, axis=False) 167 | fig.set_size_inches(14, 15) 168 | fig.set_facecolor('#313332') 169 | ax['pitch'] = ax['pitch'].reshape(-1) 170 | idx = 0 171 | 172 | # Manual implentation of colourmap 173 | pass_cmap = cm.get_cmap('viridis') 174 | pass_cmap = pass_cmap(np.linspace(0.35,1,256)) 175 | 176 | # Loop through each team 177 | for team in team_order_xt_90: 178 | 179 | # Get team passes from dict 180 | team_fb_passes = team_fb_combos[team].sort_values('xThreat') 181 | 182 | # Loop through individual passes to format 183 | for _, pass_evt in team_fb_passes.iterrows(): 184 | if pass_evt['xThreat_gen'] < 0.001: 185 | line_colour = 'grey' 186 | line_alpha = 0.1 187 | else: 188 | line_colour = pass_cmap[int(255*min(pass_evt['xThreat_gen']/0.05, 1))] 189 | line_alpha = 0.7 190 | 191 | # Format differently if event is followed by a shot 192 | if not pass_evt['leads_to_shot']: 193 | pitch.lines(pass_evt['x'], pass_evt['y'], pass_evt['endX'], pass_evt['endY'], color = line_colour, alpha = line_alpha, 194 | comet=True, capstyle='round', lw=2, ax = ax['pitch'][idx], zorder = 2) 195 | pitch.scatter(pass_evt['endX'], pass_evt['endY'], color = line_colour, alpha = line_alpha+0.2, s=30, ax = ax['pitch'][idx], zorder = 3) 196 | else: 197 | pitch.lines(pass_evt['x'], pass_evt['y'], pass_evt['endX'], pass_evt['endY'], color = 'w', alpha = 0.7, 198 | comet=True, capstyle='round', lw=2, ax = ax['pitch'][idx], zorder = 2) 199 | pitch.scatter(pass_evt['endX'], pass_evt['endY'], color = 'w', alpha = 0.9, s=30, ax = ax['pitch'][idx], zorder = 3) 200 | pitch.scatter(pass_evt['endX'], pass_evt['endY'], color = '#313332', alpha = 1, s=10, ax = ax['pitch'][idx], zorder = 3) 201 | 202 | # Add xT text to plot 203 | ax['pitch'][idx].text(2, 3, "xT / match", fontsize=8, fontweight='bold', color='w', zorder=3) 204 | ax['pitch'][idx].text(28, 3, round(team_fb_combo_xt[team],3), fontsize=8, color='w', zorder=3) 205 | 206 | # Add team logo 207 | team_logo, _ = lab.get_team_badge_and_colour(team) 208 | 209 | ax_pos = ax['pitch'][idx].get_position() 210 | 211 | logo_ax = fig.add_axes([ax_pos.x1-0.025, ax_pos.y1, 0.025, 0.025]) 212 | logo_ax.axis("off") 213 | logo_ax.imshow(team_logo) 214 | 215 | # Add title 216 | ax['pitch'][idx].set_title(f" {idx + 1}: {team}", loc = "left", color='w', fontsize = 14) 217 | 218 | idx+=1 219 | 220 | # Title 221 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A', 222 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup', 223 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'} 224 | 225 | title_text = f"{leagues[league]} {year}/{int(year)+1} − Threat Generated through Full Back Interplay" 226 | subtitle_text = "Successful in-play passes between Full Backs shown and coloured by for each team" 227 | subsubtitle_text = f"Teams ranked by mean threat generated by passes between full-backs in starting XI. Correct as of {run_date}" 228 | 229 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=20, color='w') 230 | htext.fig_text(0.12, 0.934, s=subtitle_text, fontweight="regular", fontsize=18, color='w', 231 | highlight_textprops=[{"color": 'yellow', "fontweight": 'bold'}]) 232 | fig.text(0.12, 0.9, subsubtitle_text, fontweight="regular", fontsize=14, color='w') 233 | 234 | # Add direction of play arrow 235 | ax = fig.add_axes([0.042, 0.028, 0.18, 0.005]) 236 | ax.axis("off") 237 | plt.arrow(0.51, 0.15, 0.1, 0, color="white") 238 | fig.text(0.13, 0.015, "Direction of play", ha="center", fontsize=10, color="white", fontweight="regular") 239 | 240 | # Add legend 241 | legend_ax = fig.add_axes([0.245, 0.01, 0.2, 0.04]) 242 | legend_ax.axis("off") 243 | plt.xlim([0, 8]) 244 | plt.ylim([0, 1]) 245 | hex_count = 6 246 | path_eff = [path_effects.Stroke(linewidth=1.25, foreground='k'), path_effects.Normal()] 247 | 248 | for idx in np.arange(0,hex_count): 249 | 250 | if idx%2 == 0: 251 | ypos = 0.36 252 | else: 253 | ypos= 0.64 254 | xpos = idx/1.05 + 2.5 255 | 256 | if idx == 0: 257 | xt = '<0.001' 258 | color = 'grey' 259 | elif idx == 1: 260 | xt = round(0.001 + (0.05-0.001) * ((idx-1)/(hex_count-2)),3) 261 | color = pass_cmap[int(255*(idx-1)/(hex_count-2))] 262 | else: 263 | xt = round(0.001 + (0.05-0.001) * ((idx-1)/(hex_count-2)),2) 264 | color = pass_cmap[int(255*(idx-1)/(hex_count-2))] 265 | 266 | legend_ax.scatter(xpos, ypos, marker='H', s=600, color=color, edgecolors=None) 267 | legend_ax.text(xpos+0.03, ypos-0.02, xt, color='w', fontsize = 8, ha = "center", va = "center", path_effects = path_eff) 268 | legend_ax.text(0.1, 0.5, "xThreat:", color='w', fontsize = 10, ha = "left", va = "center", fontweight="regular") 269 | 270 | legend_ax_2 = fig.add_axes([0.46, 0.01, 0.2, 0.04]) 271 | legend_ax_2.axis("off") 272 | legend_ax_2.scatter(0.1, 0.5, color = 'w', alpha = 1, s=80) 273 | legend_ax_2.scatter(0.1, 0.5, color = '#313332', alpha = 1, s=30) 274 | legend_ax_2.text(0.15, 0.5, '= Shot-creating action', va = "center", color = 'w') 275 | plt.xlim([0, 1]) 276 | plt.ylim([0, 1]) 277 | 278 | # Add footer text 279 | fig.text(0.77, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 280 | fontstyle="italic", ha="center", fontsize=9, color="white") 281 | 282 | # Add competition logo 283 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1]) 284 | ax.axis("off") 285 | ax.imshow(comp_logo) 286 | 287 | # Add twitter logo 288 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04]) 289 | ax.axis("off") 290 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 291 | ax.imshow(badge) 292 | 293 | fig.savefig(f"team_fullback_combinations/{league}-{year}-team_fullback_combinations", dpi=300) -------------------------------------------------------------------------------- /projects/06_competition_reports_top_teams/team_threat_creation.py: -------------------------------------------------------------------------------- 1 | # %% Create visualisation of team threat creation zones 2 | # 3 | # Inputs: Year to plot data from 4 | # League to plot data from 5 | # Date of run 6 | # Selection of whether to include percentages on visual 7 | # Selection of whether to brighten logo 8 | 9 | # %% Imports and parameters 10 | 11 | import pandas as pd 12 | import matplotlib.pyplot as plt 13 | import matplotlib as mpl 14 | from PIL import Image, ImageEnhance 15 | from mplsoccer.pitch import VerticalPitch, Pitch 16 | import matplotlib.patheffects as path_effects 17 | import os 18 | import sys 19 | import bz2 20 | import pickle 21 | import numpy as np 22 | from collections import Counter 23 | import highlight_text as htext 24 | import glob 25 | 26 | # %% Add custom tools to path 27 | 28 | root_folder = os.path.abspath(os.path.dirname( 29 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 30 | sys.path.append(root_folder) 31 | 32 | import analysis_tools.whoscored_custom_events as wce 33 | import analysis_tools.pitch_zones as pz 34 | import analysis_tools.whoscored_data_engineering as wde 35 | import analysis_tools.logos_and_badges as lab 36 | 37 | # %% User Inputs 38 | 39 | # Select year 40 | year = '2023' 41 | 42 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL) 43 | league = 'EPL' 44 | 45 | # Input run-date 46 | run_date = '04/09/2023' 47 | 48 | # Select whether to label % 49 | label_pct = False 50 | 51 | # Logo brighten 52 | logo_brighten = True 53 | 54 | # Select whether to use team colours 55 | team_colour = False 56 | 57 | # %% Get competition logo 58 | 59 | comp_logo = lab.get_competition_logo(league, year, logo_brighten=logo_brighten) 60 | 61 | # %% Get data for current year 62 | 63 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}" 64 | files = os.listdir(file_path) 65 | 66 | # Initialise storage dataframes 67 | events_df = pd.DataFrame() 68 | players_df = pd.DataFrame() 69 | 70 | # Load data 71 | for file in files: 72 | if file == 'event-types.pbz2': 73 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb') 74 | event_types = pickle.load(event_types) 75 | elif file == 'formation-mapping.pbz2': 76 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb') 77 | formation_mapping = pickle.load(formation_mapping) 78 | elif '-eventdata-' in file: 79 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb') 80 | match_events = pickle.load(match_events) 81 | events_df = pd.concat([events_df, match_events]) 82 | elif '-playerdata-' in file: 83 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb') 84 | match_players = pickle.load(match_players) 85 | players_df = pd.concat([players_df, match_players]) 86 | else: 87 | pass 88 | 89 | 90 | # %% Isolate events of choice (in play only) 91 | 92 | threat_creating_events_df = events_df[events_df['xThreat']==events_df['xThreat']] 93 | threat_creating_events_df = threat_creating_events_df[~threat_creating_events_df['satisfiedEventsTypes'].apply(lambda x: True if (31 in x or 34 in x or 212 in x) else False)] 94 | 95 | # %% Get teams and order on total threat created 96 | 97 | # Sort alphabetically initially 98 | teams = sorted(set(players_df['team'])) 99 | 100 | # Set up dictionary to store xt per 90 per team 101 | team_xt_90 = dict.fromkeys(teams, 0) 102 | team_count = len(teams) 103 | 104 | for team in teams: 105 | 106 | # Get team events 107 | team_id = players_df[players_df['team']==team]['teamId'].values[0] 108 | team_threat_creating_events = threat_creating_events_df[threat_creating_events_df['teamId']==team_id] 109 | 110 | # Get each team match and accumulate total mins 111 | team_matches = set(team_threat_creating_events['match_id']) 112 | team_mins = 0 113 | for match in team_matches: 114 | team_mins += events_df[events_df['match_id']==match]['cumulative_mins'].max() 115 | 116 | # Team xT created per 90 117 | team_xt_90[team] = 90*(team_threat_creating_events['xThreat_gen'].sum() / team_mins) 118 | 119 | # Sort dictionary by xT/90 120 | team_xt_90 = sorted(team_xt_90.items(), key=lambda x: x[1], reverse=True) 121 | 122 | # %% Custom colormap 123 | 124 | CustomCmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#313332","#47516B", "#848178", "#B2A66F", "#FDE636"]) 125 | 126 | # %% Create visual 127 | 128 | # Overwrite rcparams 129 | mpl.rcParams['xtick.color'] = 'w' 130 | mpl.rcParams['ytick.color'] = 'w' 131 | 132 | # Path effects 133 | path_eff = [path_effects.Stroke(linewidth=4, foreground='#313332'), path_effects.Normal()] 134 | 135 | # Define grid dimensions 136 | ncols = 4 137 | nrows = int(np.ceil(len(team_xt_90)/ncols)) 138 | 139 | # Set-up pitch subplots 140 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False) 141 | fig, ax = pitch.grid(nrows=nrows, ncols=ncols, grid_height=0.8, title_height = 0.13, endnote_height = 0.04, space=0.12, axis=False) 142 | fig.set_size_inches(14, 15) 143 | fig.set_facecolor('#313332') 144 | ax['pitch'] = ax['pitch'].reshape(-1) 145 | idx = 0 146 | 147 | # Loop through each team 148 | for team in team_xt_90: 149 | 150 | # Get team name and events 151 | team_name = team[0] 152 | team_id = players_df[players_df['team']==team_name]['teamId'].values[0] 153 | team_threat_creating_events = threat_creating_events_df[threat_creating_events_df['teamId']==team_id] 154 | 155 | # Get team logo and colour 156 | team_logo, team_cmap = lab.get_team_badge_and_colour(team[0]) 157 | if len(team_name) > 14: 158 | team_name = team_name[0:13] + '...' 159 | 160 | # Set team colour 161 | if not team_colour: 162 | team_cmap = CustomCmap 163 | 164 | # Draw heatmap 165 | bin_statistic = pitch.bin_statistic(team_threat_creating_events['x'], team_threat_creating_events['y'], 166 | statistic='sum', bins=(6, 5), normalize=True, values = team_threat_creating_events['xThreat_gen']) 167 | pitch.heatmap(bin_statistic, ax['pitch'][idx], cmap=team_cmap, edgecolor='w', lw=0.5, zorder=0, alpha=0.7) 168 | 169 | # Label heatmap zones with pressure count if selected 170 | path_eff = [path_effects.Stroke(linewidth=1.5, foreground='#313332'), path_effects.Normal()] 171 | if label_pct: 172 | labels = pitch.label_heatmap(bin_statistic, color='w', fontsize=10, fontweight = 'bold', 173 | ax=ax['pitch'][idx], ha='center', va='center', str_format='{:.0%}', path_effects=path_eff) 174 | 175 | # Label xt 176 | ax['pitch'][idx].text(2, 2, "xT/90:", fontsize=10, fontweight='bold', color='w', zorder=3, path_effects = path_eff) 177 | ax['pitch'][idx].text(24, 2, round(team[1],2), fontsize=10, color='w', zorder=3, path_effects = path_eff) 178 | 179 | # Set title 180 | ax['pitch'][idx].set_title(f" {idx + 1}: {team_name}", loc = "left", color='w', fontsize = 16) 181 | 182 | ax_pos = ax['pitch'][idx].get_position() 183 | 184 | logo_ax = fig.add_axes([ax_pos.x1-0.02, ax_pos.y1, 0.02, 0.02]) 185 | logo_ax.axis("off") 186 | logo_ax.imshow(team_logo) 187 | 188 | idx+=1 189 | 190 | # Title 191 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A', 192 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup', 193 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'} 194 | 195 | title_text = f"{leagues[league]} {year}/{int(year)+1} - Teams Ranked by In-Play Threat Creation" 196 | subtitle_text = "Heatmaps showing Zones of and " 197 | subsubtitle_text = f"Pass, Carry and Dribble events included. Negative threat events excluded. Correct as of {run_date}" 198 | 199 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=20, color='w') 200 | htext.fig_text(0.12, 0.934, s=subtitle_text, fontweight="bold", fontsize=18, color='w', 201 | highlight_textprops=[{"color": 'yellow', "fontweight": 'bold'}, {"color": 'grey', "fontweight": 'bold'}]) 202 | fig.text(0.12, 0.9, subsubtitle_text, fontweight="regular", fontsize=16, color='w') 203 | 204 | # Add direction of play arrow 205 | ax = fig.add_axes([0.042, 0.028, 0.18, 0.005]) 206 | ax.axis("off") 207 | plt.arrow(0.51, 0.15, 0.1, 0, color="white") 208 | fig.text(0.13, 0.02, "Direction of play", ha="center", fontsize=10, color="white", fontweight="regular") 209 | 210 | # Add footer text 211 | fig.text(0.5, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 212 | fontstyle="italic", ha="center", fontsize=9, color="white") 213 | 214 | # Add competition logo 215 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1]) 216 | ax.axis("off") 217 | ax.imshow(comp_logo) 218 | 219 | # Add twitter logo 220 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04]) 221 | ax.axis("off") 222 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 223 | ax.imshow(badge) 224 | 225 | fig.savefig(f"team_threat_creation/{league}-{year}-team-threat-creation", dpi=300) 226 | -------------------------------------------------------------------------------- /projects/06_competition_reports_top_teams/xg_league_table_sb.py: -------------------------------------------------------------------------------- 1 | # %% Imports 2 | 3 | import pandas as pd 4 | import bz2 5 | import os 6 | import pickle 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | import sys 10 | import numpy as np 11 | import time 12 | from mplsoccer import Pitch, VerticalPitch 13 | import matplotlib.patheffects as path_effects 14 | import matplotlib as mpl 15 | import requests 16 | from PIL import Image, ImageEnhance 17 | from io import BytesIO 18 | from datetime import datetime 19 | import textwrap as tw 20 | from mplsoccer import PyPizza 21 | from matplotlib.offsetbox import OffsetImage, AnnotationBbox 22 | 23 | # %% Add custom tools to path 24 | 25 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 26 | sys.path.append(root_folder) 27 | 28 | import analysis_tools.get_football_data as gfd 29 | import analysis_tools.statsbomb_custom_events as sce 30 | import analysis_tools.statsbomb_data_engineering as sde 31 | import analysis_tools.logos_and_badges as lab 32 | 33 | # %% User inputs 34 | 35 | # Data to use 36 | data_grab =[['England', 'Premier League', '2023']] 37 | 38 | # %% Load data 39 | 40 | # Statsbomb 41 | data_dict = gfd.load_statsbomb_sql(data_grab, events=True, matches = True, lineups = True, player_stats=True) 42 | events_df = data_dict['events'] 43 | matches_df = data_dict['matches'] 44 | lineups_df = data_dict['lineups'] 45 | playerstats_df = data_dict['player_stats'] 46 | 47 | # Logo 48 | comp_logo = lab.get_competition_logo(data_grab[0][1], data_grab[0][2], logo_brighten=True) 49 | 50 | # %% Error metrics 51 | 52 | all_errors = events_df[events_df['type_name']=='Error'] 53 | events_following_error = pd.DataFrame() 54 | 55 | for idx, error in all_errors.iterrows(): 56 | 57 | error_evts = events_df[(events_df['match_id']==error['match_id']) & 58 | (events_df['period']==error['period']) & 59 | (events_df['cumulative_mins'] >= error['cumulative_mins']) & 60 | (events_df['cumulative_mins'] <= error['cumulative_mins'] + (15/60)) & 61 | (events_df['team_name']!=error['team_name'])] 62 | events_following_error = pd.concat([events_following_error, error_evts]) 63 | 64 | # %% Get team information 65 | 66 | # Get team list 67 | teaminfo_df = sde.create_team_list(lineups_df) 68 | 69 | # In-play shots, goals and xG 70 | shots_for = events_df[events_df['type_name']=='Shot'] 71 | ip_shots_for = shots_for[shots_for['in_play_event']==1] 72 | ip_goals_for = pd.concat([ip_shots_for[ip_shots_for['outcome_name']=='Goal'], events_df[events_df['type_name']=='Own Goal For']]) 73 | teaminfo_df = sde.group_team_events(shots_for, teaminfo_df, group_type='sum', agg_columns ='shot_statsbomb_xg', primary_event_name = 'xg_for') 74 | teaminfo_df = sde.group_team_events(ip_shots_for, teaminfo_df, group_type='sum', agg_columns ='shot_statsbomb_xg', primary_event_name = 'ip_xg_for') 75 | teaminfo_df = sde.group_team_events(ip_goals_for, teaminfo_df, group_type='count', primary_event_name = 'ip_goals_for') 76 | 77 | # In play offensive OBV 78 | ip_obv_events = events_df[(events_df['type_name'].isin(['Pass','Carry','Dribble'])) & 79 | (events_df['in_play_event']==1)] 80 | teaminfo_df = sde.group_team_events(ip_obv_events, teaminfo_df, group_type='sum', agg_columns ='obv_for_net_z', primary_event_name = 'ip_xt_for') 81 | 82 | # In-play shots, goals and xG against 83 | for team_name, _ in teaminfo_df.iterrows(): 84 | 85 | # Get matches and events by team 86 | match_ids = matches_df[(matches_df['home_team']==team_name) | (matches_df['away_team']==team_name)]['match_id'].tolist() 87 | team_match_evts = events_df[events_df['match_id'].isin(match_ids)] 88 | evts_against = team_match_evts[team_match_evts['team_name']!=team_name] 89 | shots_against = evts_against[evts_against['type_name']=='Shot'] 90 | ip_shots_against = shots_against[shots_against['in_play_event']==1] 91 | ip_goals_against = pd.concat([ip_shots_against[ip_shots_against['outcome_name']=='Goal'], evts_against[evts_against['type_name']=='Own Goal For']]) 92 | ip_obv_events_against = evts_against[(evts_against['type_name'].isin(['Pass','Carry','Dribble'])) & 93 | (evts_against['in_play_event']==1)] 94 | teaminfo_df.loc[team_name, 'xg_against'] = shots_against['shot_statsbomb_xg'].sum(numeric_only=True) 95 | teaminfo_df.loc[team_name, 'ip_xg_against'] = ip_shots_against['shot_statsbomb_xg'].sum(numeric_only=True) 96 | teaminfo_df.loc[team_name, 'ip_goals_against'] = len(ip_goals_against) 97 | teaminfo_df.loc[team_name, 'ip_xt_against'] = ip_obv_events_against['obv_for_net_z'].sum(numeric_only=True) 98 | 99 | team_post_error_evts_against = events_following_error[(events_following_error['match_id'].isin(match_ids)) & (events_following_error['team_name']!=team_name)] 100 | post_error_shots_against = team_post_error_evts_against[(team_post_error_evts_against['type_name']=='Shot') & (team_post_error_evts_against['in_play_event']==1)] 101 | teaminfo_df.loc[team_name, 'xg_against_following_error'] = post_error_shots_against['shot_statsbomb_xg'].sum(numeric_only=True) 102 | 103 | teaminfo_df['non-error_ip_xg_against'] = teaminfo_df['ip_xg_against'] - teaminfo_df['xg_against_following_error'] 104 | teaminfo_df['xg_difference'] = teaminfo_df['xg_for'] - teaminfo_df['xg_against'] 105 | teaminfo_df['ip_xg_difference'] = teaminfo_df['ip_xg_for'] - teaminfo_df['ip_xg_against'] 106 | teaminfo_df['ip_xg_xt_ratio'] = teaminfo_df['ip_xg_for']/teaminfo_df['ip_xt_for'] 107 | teaminfo_df['ip_xg_xt_against_ratio'] = teaminfo_df['ip_xg_against']/teaminfo_df['ip_xt_against'] 108 | teaminfo_df['ip_goal_xg_ratio'] = teaminfo_df['ip_goals_for'] / teaminfo_df['ip_xg_for'] 109 | teaminfo_df['ip_goal_xg_against_ratio'] = teaminfo_df['ip_goals_against'] / teaminfo_df['ip_xg_against'] 110 | 111 | 112 | ti = teaminfo_df[['ip_xg_xt_ratio','ip_goal_xg_ratio']] 113 | ti['product'] = ti['ip_xg_xt_ratio'] * ti['ip_goal_xg_ratio'] 114 | ti['mean'] = (ti['ip_xg_xt_ratio'] + ti['ip_goal_xg_ratio'])/2 115 | ti['h_mean'] = 1/((1/ti['ip_xg_xt_ratio']) + (1/ti['ip_goal_xg_ratio'])) 116 | # %% Normalise 117 | 118 | for column in teaminfo_df.columns: 119 | 120 | if ('xg' in column or 'xt' in column) and ('ratio' not in column): 121 | teaminfo_df[column + '_90'] = 90*teaminfo_df[column] / teaminfo_df['time_played'] 122 | 123 | # %% VISUAL 1: XG AND XT RATIO SCATTER 124 | 125 | # rc params 126 | mpl.rcParams['xtick.color'] = 'w' 127 | mpl.rcParams['ytick.color'] = 'w' 128 | 129 | # Metrics to plot 130 | plot_y = ['ip_goal_xg_ratio'] 131 | plot_x = ['ip_xg_xt_ratio'] 132 | 133 | # Set up figure 134 | fig, ax = plt.subplots(figsize = (8.5,9), facecolor = '#313332') 135 | fig.set_facecolor('#313332') 136 | ax.patch.set_alpha(0) 137 | #ax.set_position([0.1,0.15,0.8,0.65], which='both') 138 | 139 | # Format axes 140 | ax.spines['bottom'].set_color('w') 141 | ax.spines['top'].set_visible(False) 142 | ax.spines['right'].set_visible(False) 143 | ax.spines['left'].set_color('w') 144 | ax.grid(color='gray', alpha = 0.2) 145 | 146 | # Label axes 147 | ax.set_ylabel("Ratio between In-Play Goals and Expected Goals (Goals/xG)", labelpad = 10, fontweight="bold", fontsize=12, color='w') 148 | ax.set_xlabel("Ratio between In-Play Expected Goals and Expected Threat (xG/xT)", labelpad = 10, fontweight="bold", fontsize=12, color='w') 149 | 150 | # Define axis limits 151 | xmin = np.floor(10*teaminfo_df[plot_x].min())/10 152 | xmax = np.ceil(10*teaminfo_df[plot_x].max())/10 153 | ymin = np.floor(10*teaminfo_df[plot_y].min())/10 154 | ymax = np.ceil(10*teaminfo_df[plot_y].max())/10 155 | ax.set_xlim([xmin.values, xmax.values]) 156 | ax.set_ylim([ymin.values, ymax.values]) 157 | 158 | # Iterate through each team 159 | for team, team_metrics in teaminfo_df.iterrows(): 160 | 161 | # Get logo 162 | team_logo, _ = lab.get_team_badge_and_colour(team) 163 | 164 | # Plot logo 165 | ab = AnnotationBbox(OffsetImage(team_logo, zoom = 0.07, resample = True), (team_metrics[plot_x], team_metrics[plot_y]), frameon=False) 166 | ax.add_artist(ab) 167 | 168 | # %% VISUAL 2: XG AND XT RATIO TABLE 169 | 170 | fig, ax = plt.subplots(figsize = (8,9.5), facecolor = '#313332') 171 | ax.patch.set_alpha(0) 172 | 173 | # Sort 174 | teaminfo_df = teaminfo_df.sort_values('ip_xg_xt_ratio', ascending=False) 175 | 176 | # Title 177 | title_text = f"{data_grab[0][1]} {data_grab[0][2]}/{str(int(data_grab[0][2]) + 1).replace('20','',1)}" 178 | subtitle_text = "Team Chance Creation Effectiveness and Chance Conversion Effectiveness" 179 | fig.text(0.12, 0.935, title_text, fontweight="bold", fontsize=16, color='w') 180 | fig.text(0.12, 0.907, subtitle_text, fontweight="bold", fontsize=11, color='w') 181 | 182 | # Add competition logo 183 | comp_ax = fig.add_axes([0.015, 0.879, 0.1, 0.1]) 184 | comp_ax.axis("off") 185 | comp_ax.imshow(comp_logo) 186 | 187 | # Horizontal Header lines 188 | ax.plot([0.05, 1], [0.995, 0.995], color='w', zorder = 3) 189 | ax.plot([0, 1], [0.93, 0.93], color='w', zorder = 3) 190 | ax.plot([0, 1], [-0.02, -0.02], color='w', zorder = 3) 191 | ax.text(0.08, 0.96, "Team", ha = "left", va = "center", fontweight = "bold", color = "w") 192 | ax.text(0.5875, 0.96, "Open-play\nxG/xT Ratio", ha = "center", va = "center", fontweight = "bold", color = "w") 193 | ax.text(0.8625, 0.96, "Open-play\nGoals/xG Ratio", ha = "center", va = "center", fontweight = "bold", color = "w") 194 | 195 | # Vertical Header lines 196 | ax.plot([0.002, 0.002], [-0.02, 0.93], color='w', zorder = 2) 197 | ax.plot([0.05, 0.05], [-0.02, 0.93], color='grey', lw =0.5, zorder = 2) 198 | ax.plot([0.05, 0.05], [-0.02, 0.995], color='w', zorder = 2) 199 | ax.plot([0.45, 0.45], [-0.02, 0.93], color='grey', lw =0.5, zorder = 2) 200 | ax.plot([0.45, 0.45], [0.93, 0.995], color='w', lw =0.5, zorder = 2) 201 | ax.plot([0.725, 0.725], [-0.02, 0.93], color='grey', lw =0.5, zorder = 2) 202 | ax.plot([0.725, 0.725], [0.93, 0.995], color='w', lw =0.5, zorder = 2) 203 | ax.plot([0.999, 0.999], [-0.02, 0.995], color='w', zorder = 2) 204 | 205 | # Iterate through each team 206 | idx = 0 207 | for team, team_metrics in teaminfo_df.iterrows(): 208 | 209 | # Plot team name and badge 210 | ax.text(0.025 ,0.9*(1-idx/19), idx+1, va="center", ha = "center", color = "w" ) 211 | ax.text(0.11 ,0.9*(1-idx/19), team, va="center", color = "w" ) 212 | team_logo, _ = lab.get_team_badge_and_colour(team) 213 | ab = AnnotationBbox(OffsetImage(team_logo, zoom = 0.06, resample = True), (0.08,0.9*(1-idx/19)+0.003), frameon=False) 214 | ax.add_artist(ab) 215 | 216 | # Plot metrics 217 | ax.text(0.5875 ,0.9*(1-idx/19), round(team_metrics['ip_xg_xt_ratio'],2), va="center", ha = "center", color = "w" ) 218 | ax.text(0.8625 ,0.9*(1-idx/19), round(team_metrics['ip_goal_xg_ratio'],2), va="center", ha = "center", color = "w" ) 219 | 220 | # Plot hline 221 | ax.plot([0, 1], [0.9*(1-idx/19)-0.02, 0.9*(1-idx/19)-0.02], color='grey', lw = 0.5, zorder = 1) 222 | 223 | idx+=1 224 | 225 | # Format axis 226 | ax.spines['top'].set_visible(False) 227 | ax.spines['right'].set_visible(False) 228 | ax.spines['bottom'].set_visible(False) 229 | ax.spines['left'].set_visible(False) 230 | ax.set_xticks([]) 231 | ax.set_yticks([]) 232 | ax.yaxis.label.set_color('w') 233 | ax.set_position([0.1,0.06,0.8,0.81], which='both') 234 | ax.set_xlim([0,1]) 235 | ax.set_ylim([-0.03,1]) 236 | 237 | # Create footer 238 | fig.text(0.5, 0.024, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 239 | fontstyle="italic", ha="center", fontsize=9, color="white") 240 | 241 | # Add twitter logo 242 | ax = fig.add_axes([0.94, 0.007, 0.05, 0.05]) 243 | ax.axis("off") 244 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 245 | ax.imshow(badge) 246 | 247 | fig.savefig(f"team_xg_metrics/{data_grab[0][1].replace(' ','-').lower()}-{data_grab[0][2]}-xg-xt-table", dpi=300) -------------------------------------------------------------------------------- /projects/10_team_buildup_passes/team_pass_tendencies.py: -------------------------------------------------------------------------------- 1 | # %% Imports and parameters 2 | 3 | import pandas as pd 4 | import matplotlib.pyplot as plt 5 | import matplotlib as mpl 6 | from PIL import Image, ImageEnhance 7 | from mplsoccer.pitch import VerticalPitch, Pitch 8 | import matplotlib.patheffects as path_effects 9 | import os 10 | import sys 11 | import bz2 12 | import pickle 13 | import numpy as np 14 | from collections import Counter 15 | import highlight_text as htext 16 | import glob 17 | import joblib 18 | from scipy.spatial import Delaunay 19 | from sklearn.base import BaseEstimator, TransformerMixin 20 | from time import time 21 | 22 | # %% Add custom tools to path 23 | 24 | root_folder = os.path.abspath(os.path.dirname( 25 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 26 | sys.path.append(root_folder) 27 | 28 | import analysis_tools.get_football_data as gfd 29 | import analysis_tools.whoscored_custom_events as wce 30 | import analysis_tools.models as models 31 | import analysis_tools.whoscored_data_engineering as wde 32 | import analysis_tools.logos_and_badges as lab 33 | 34 | # %% User Inputs 35 | 36 | # Select year 37 | year = '2022' 38 | 39 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL) 40 | league = 'EPL' 41 | 42 | # Select team 43 | team = 'Brighton' 44 | 45 | # %% Get whoscored data and get statsbomb data 46 | 47 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}" 48 | files = os.listdir(file_path) 49 | 50 | # Initialise storage dataframes 51 | events_df = pd.DataFrame() 52 | players_df = pd.DataFrame() 53 | 54 | # Load whoscored data 55 | for file in files: 56 | if file == 'event-types.pbz2': 57 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb') 58 | event_types = pickle.load(event_types) 59 | elif file == 'formation-mapping.pbz2': 60 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb') 61 | formation_mapping = pickle.load(formation_mapping) 62 | elif '-eventdata-' in file: 63 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb') 64 | match_events = pickle.load(match_events) 65 | events_df = pd.concat([events_df, match_events]) 66 | elif '-playerdata-' in file: 67 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb') 68 | match_players = pickle.load(match_players) 69 | players_df = pd.concat([players_df, match_players]) 70 | else: 71 | pass 72 | 73 | # %% Isolate matches that team feature in 74 | 75 | # Get match ids and team id 76 | team_match_ids = players_df[players_df['team'] == team]['match_id'].unique() 77 | team_id = players_df[players_df['team'] == team]['teamId'].unique()[0] 78 | 79 | # Filter events 80 | team_events_df = events_df[events_df['match_id'].isin(team_match_ids)] 81 | 82 | # %% Group possessions and count passes in each possession 83 | 84 | team_events_df = wce.get_possession_chains(team_events_df) 85 | team_events_df = team_events_df[team_events_df['teamId']==team_id] 86 | 87 | # Initialise possession chain dataframe 88 | pos_chain_df = pd.DataFrame() 89 | 90 | # Iterate over match 91 | for match_id in team_match_ids: 92 | 93 | # Iterate over possessions 94 | for pos_id in team_events_df[(team_events_df['match_id']==match_id) & (team_events_df['possession_team']==team_id)]['possession_id'].unique(): 95 | 96 | # Get possession chain and count up passes 97 | possession_chain = team_events_df[(team_events_df['match_id']==match_id) & (team_events_df['possession_id']==pos_id)].copy() 98 | possession_chain['evt_number'] = np.arange(1,len(possession_chain)+1) 99 | possession_chain['pass_number'] = np.nan 100 | 101 | # First pass can be a corner, others can't 102 | first_pass = possession_chain[(possession_chain['eventType']=='Pass')].head(1) 103 | if len(first_pass) == 1: 104 | possession_chain_passes = possession_chain[(possession_chain['eventId']!=first_pass['eventId'].values[0]) & (possession_chain['eventType']=='Pass') & (~possession_chain['satisfiedEventsTypes'].apply(lambda x: 31 in x))] 105 | possession_chain_passes = pd.concat([first_pass, possession_chain_passes]) 106 | else: 107 | possession_chain_passes = pd.DataFrame() 108 | if len(possession_chain_passes) > 0: 109 | possession_chain.loc[possession_chain_passes.index.values, 'pass_number'] = np.arange(1,len(possession_chain_passes)+1) 110 | 111 | pos_chain_df = pd.concat([pos_chain_df, possession_chain]) 112 | 113 | # %% Identify possession chains starting in each third 114 | 115 | pos_chain_df.loc[(pos_chain_df['evt_number']==1) & (pos_chain_df['x']<100/3), 'pos_start'] = 'Own 3' 116 | pos_chain_df.loc[(pos_chain_df['evt_number']==1) & (pos_chain_df['x']>=100/3) & (pos_chain_df['x']<200/3), 'pos_start'] = 'Mid 3' 117 | pos_chain_df.loc[(pos_chain_df['evt_number']==1) & (pos_chain_df['x']>=200/3), 'pos_start'] = 'Opp 3' 118 | pos_chain_df['pos_start'] = pos_chain_df['pos_start'].fillna(method='ffill') 119 | 120 | # %% Cluster passes in each possession zone 121 | 122 | convertYards = models.convertYards 123 | customScaler = models.customScaler 124 | pos_chain_df = models.get_pass_clusters(pos_chain_df) 125 | 126 | # %% Plot first 6 passes in each possession type 127 | 128 | # Choose number of passes to plot 129 | first_n_passes = 6 130 | clusters_shown = 5 131 | cluster_colours = ['orchid','cornflowerblue', 'mediumseagreen', 'khaki', 'lightcoral', 'lightgrey'] 132 | 133 | # Set up pitch and figure 134 | pitch = VerticalPitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False) 135 | fig, ax = pitch.grid(nrows=3, ncols=first_n_passes, title_height=0.155, grid_height=0.785, endnote_height=0.03, space=0.07, axis=False) 136 | fig.set_size_inches(10, 10) 137 | fig.set_facecolor('#313332') 138 | ax['pitch'] = ax['pitch'].reshape(-1) 139 | 140 | # Iterate through each pitch 141 | for idx in np.arange(3*first_n_passes): 142 | 143 | # Set up pass plot indexing 144 | if idx <= first_n_passes-1: 145 | pitch.lines(100, 101.5, 100, -1.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 146 | pitch.lines(100, 0, 200/3, 0, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 147 | pitch.lines(100, 100, 200/3, 100, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 148 | pitch.lines(200/3, -1.5, 200/3, 101.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 149 | pass_num = idx+1 150 | possession_plot = pos_chain_df[pos_chain_df['pos_start'] == 'Opp 3'] 151 | elif idx <= 2*first_n_passes-1: 152 | pitch.lines(200/3, 101.5, 200/3, -1.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 153 | pitch.lines(100/3, 0, 200/3, 0, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 154 | pitch.lines(100/3, 100, 200/3, 100, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 155 | pitch.lines(100/3, -1.5, 100/3, 101.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 156 | pass_num = idx-first_n_passes+1 157 | possession_plot = pos_chain_df[pos_chain_df['pos_start'] == 'Mid 3'] 158 | else: 159 | pitch.lines(100/3, 101.5, 100/3, -1.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 160 | pitch.lines(100/3, 0, 0, 0, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 161 | pitch.lines(100/3, 100, 0, 100, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 162 | pitch.lines(0, -1.5, 0, 101.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx]) 163 | pass_num = idx-2*first_n_passes+1 164 | possession_plot = pos_chain_df[pos_chain_df['pos_start'] == 'Own 3'] 165 | 166 | # Passes to plot 167 | pass_plot = possession_plot[possession_plot['pass_number']==pass_num] 168 | pass_top_cluster_ids = pass_plot.groupby('pass_cluster_id').count()['id'].sort_values(ascending=False).head(clusters_shown).index.values 169 | cluster_rank_dict = dict(zip(pass_top_cluster_ids, np.arange(1,len(pass_top_cluster_ids)+1))) 170 | cluster_color_dict = dict(zip(pass_top_cluster_ids, cluster_colours[0:clusters_shown])) 171 | pass_top_clusters = pass_plot[pass_plot['pass_cluster_id'].isin(pass_top_cluster_ids)].copy() 172 | pass_top_clusters['cluster_rank'] = pass_top_clusters['pass_cluster_id'].apply(lambda x: cluster_rank_dict[x]) 173 | pass_top_clusters['cluster_c'] = pass_top_clusters['pass_cluster_id'].apply(lambda x: cluster_color_dict[x]) 174 | pass_top_clusters = pass_top_clusters.sort_values('cluster_rank', ascending=False) 175 | 176 | # Plot passes 177 | for _, single_pass in pass_top_clusters.iterrows(): 178 | pitch.lines(single_pass['x'], single_pass['y'], single_pass['endX'], single_pass['endY'], 179 | lw=1.5, comet=False, capstyle='round', color = single_pass['cluster_c'], alpha = 0.8, ax=ax['pitch'][idx], zorder=2) 180 | pitch.scatter(single_pass['endX'], single_pass['endY'], s=10, color = single_pass['cluster_c'], alpha = 0.8, zorder=3, ax=ax['pitch'][idx]) 181 | pitch.scatter(single_pass['endX'], single_pass['endY'], s=5, color = '#313332', alpha = 1, zorder=3, ax=ax['pitch'][idx]) 182 | 183 | # Add title 184 | if pass_num == 1: 185 | suffix = 'st' 186 | elif pass_num == 2: 187 | suffix = 'nd' 188 | elif pass_num == 3: 189 | suffix = 'rd' 190 | else: 191 | suffix = 'th' 192 | ax['pitch'][idx].set_title(f"{pass_num}{suffix} Pass", pad=-1, color = 'w', fontsize = 9) 193 | 194 | # Add title 195 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A', 196 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup', 197 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'} 198 | 199 | title_text = f"{team} Passing Tendencies − {leagues[league]} {year}/{(str(int(year)+1)).replace('20','',1)}" 200 | subtitle_text = f"Where {team} directed passes during possessions starting in each third" 201 | subsubtitle_text = f"First {first_n_passes} passes in each possession chain. {clusters_shown} most common pass types shown" 202 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=16, color='w') 203 | fig.text(0.12, 0.918, subtitle_text, fontweight="bold", fontsize=13, color='w') 204 | fig.text(0.12, 0.896, subsubtitle_text, fontweight="regular", fontsize=10, color='w') 205 | 206 | # Add figure text 207 | for loc in [[0.38,0.84], [0.42,0.57], [0.381,0.3]]: 208 | sep_ax = fig.add_axes([0.02, loc[1], 0.96, 0.02]); sep_ax.axis("off") 209 | sep_ax.plot([0,0.05],[0,0], color = 'grey', lw = 1) 210 | sep_ax.plot([loc[0],1],[0,0], color = 'grey', lw = 1) 211 | sep_ax.set_xlim([0,1]) 212 | 213 | fig.text(0.075, 0.845, "Possessions Initiated in Final Third", fontweight="bold", fontsize=11, color='w') 214 | fig.text(0.075, 0.575, "Possessions Initiated in Centre of Pitch", fontweight="bold", fontsize=11, color='w') 215 | fig.text(0.075, 0.305, "Possessions Initiated in Own Third", fontweight="bold", fontsize=11, color='w') 216 | 217 | # Add legend logo 218 | fig.text(0.14, 0.032, "nth Most Common Pass Cluster", fontweight="bold", fontsize=9, color='w', ha = "center") 219 | ax = fig.add_axes([0, 0, 0.3, 0.05]) 220 | for idx in np.arange(0,clusters_shown): 221 | ax.scatter(0.07+idx*0.17, 0.35, color=cluster_colours[idx]) 222 | ax.text(0.1+idx*0.17, 0.295, f"n={1+idx}",fontsize=8, c='w') 223 | ax.set_xlim([0,1]) 224 | ax.set_ylim([0,1]) 225 | ax.axis('off') 226 | 227 | # Add team logo 228 | logo, _ = lab.get_team_badge_and_colour(team) 229 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1]) 230 | ax.axis("off") 231 | ax.imshow(logo) 232 | 233 | # Add footer text 234 | fig.text(0.55, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 235 | fontstyle="italic", ha="center", fontsize=9, color="white") 236 | 237 | # Add twitter logo 238 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04]) 239 | ax.axis("off") 240 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 241 | ax.imshow(badge) 242 | 243 | # Save 244 | plt.savefig(f"team_pass_tendencies/{team.lower().replace(' ','')}-{league.lower().replace(' ','')}-{year}", dpi=300) 245 | -------------------------------------------------------------------------------- /projects/11_justice_league/justice_league.py: -------------------------------------------------------------------------------- 1 | # %% Imports 2 | 3 | import pandas as pd 4 | import bz2 5 | import os 6 | import pickle 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | import sys 10 | import numpy as np 11 | import time 12 | from mplsoccer import Pitch, VerticalPitch 13 | import matplotlib.patheffects as path_effects 14 | import matplotlib as mpl 15 | from matplotlib.offsetbox import OffsetImage, AnnotationBbox 16 | from matplotlib.patches import Rectangle 17 | import requests 18 | from PIL import Image, ImageEnhance 19 | from io import BytesIO 20 | from datetime import datetime 21 | import textwrap as tw 22 | from mplsoccer import PyPizza 23 | 24 | # %% Add custom tools to path 25 | 26 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))) 27 | sys.path.append(root_folder) 28 | 29 | import analysis_tools.get_football_data as gfd 30 | import analysis_tools.statsbomb_custom_events as sce 31 | import analysis_tools.statsbomb_data_engineering as sde 32 | import analysis_tools.logos_and_badges as lab 33 | import analysis_tools.models as mod 34 | 35 | # %% User inputs 36 | 37 | # Data to use 38 | data_grab =[['England', 'Premier League', '2023']] 39 | 40 | # Run date 41 | run_date = '07-11-23' 42 | 43 | # %% Load data 44 | 45 | # Statsbomb 46 | data_dict = gfd.load_statsbomb_sql(data_grab, events=True, matches = True, lineups = True, player_stats=True) 47 | events_df = data_dict['events'] 48 | matches_df = data_dict['matches'] 49 | lineups_df = data_dict['lineups'] 50 | playerstats_df = data_dict['player_stats'] 51 | 52 | # %% Simulate match outcomes 53 | 54 | sim_count = 200000 55 | for match_id in matches_df['match_id'].values: 56 | matches_df, match_simulation_df = mod.simulate_match_outcome(events_df, matches_df, match_id, sim_count=200000) 57 | 58 | # %% Generate league table 59 | 60 | leaguetable_df = sde.create_league_table(matches_df, xmetrics=True) 61 | 62 | # %% Plot league table 63 | 64 | fig = plt.figure(figsize=(9,10), facecolor='#333332') 65 | 66 | ax = fig.add_axes([0.05,0.075,0.9,0.8]) 67 | ax.patch.set_alpha(0) 68 | 69 | # Set up plotting parameters 70 | header_height = 0.07 71 | row_height = (1 - header_height)/len(leaguetable_df) 72 | 73 | # Horizontal Header lines abd shading 74 | ax.plot([0, 1], [0.998, 0.998], color='darkgrey', zorder = 3, lw=0.5) 75 | ax.plot([0, 1], [1-header_height, 1-header_height], color='darkgrey', zorder = 3, lw=0.5) 76 | ax.add_patch(Rectangle([0, 1-header_height], 1, header_height-0.002, color = '#262625')) 77 | 78 | # Header titles 79 | ax.text(0.04, 0.998 -header_height/2, "Team Name", ha = "left", va = "center_baseline", fontweight = "bold", color = "w") 80 | ax.text(0.39, 0.998 -header_height/2, "MP", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 81 | ax.text(0.455, 0.998 -header_height/2, "Pts", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 82 | ax.text(0.55, 0.998 -header_height/2, "xPts", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 83 | ax.text(0.645, 0.998 -header_height/2, "G", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 84 | ax.text(0.74, 0.998 -header_height/2, "xG", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 85 | ax.text(0.835, 0.998 -header_height/2, "GA", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 86 | ax.text(0.935, 0.998 -header_height/2, "xGA", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 87 | #ax.text(0.97, 0.998 -header_height/2, "Actual\nPos", ha = "center", va = "center_baseline", fontweight = "bold", color = "w") 88 | 89 | # Iterate over league table to position rows, icons and text 90 | for idx, table_team in leaguetable_df.iterrows(): 91 | 92 | pos = table_team['expected_position'] 93 | 94 | # Add bottom horizontal line 95 | row_bottom = 1-header_height-row_height*pos 96 | row_centre = row_bottom + row_height/2 97 | ax.plot([0, 1], [row_bottom, row_bottom], color='darkgrey', lw = 0.5, zorder = 3) 98 | 99 | # Add position 100 | ax.text(0.02, row_centre, pos, ha = "center", va = "center_baseline", color = "w") 101 | 102 | # Add logo 103 | team_logo, _ = lab.get_team_badge_and_colour(table_team['team']) 104 | ab = AnnotationBbox(OffsetImage(team_logo, zoom = 1/len(leaguetable_df)+0.01, resample = True), (0.075,row_centre), frameon=False) 105 | ax.add_artist(ab) 106 | 107 | # Add information 108 | ax.text(0.095, row_centre, table_team['team'], ha = "left", va = "center_baseline", color = "w") 109 | ax.text(0.39, row_centre, int(table_team['matches_played']), ha = "center", va = "center_baseline", color = "w") 110 | ax.text(0.455, row_centre, int(table_team['points']), ha = "center", va = "center_baseline", color = "w") 111 | ax.text(0.535, row_centre, f"{table_team['expected_points']:.2f}", ha = "center", va = "top", color = "w") 112 | ax.text(0.645, row_centre, int(table_team['goals_for']), ha = "center", va = "center_baseline", color = "w") 113 | ax.text(0.725, row_centre, f"{table_team['xg_for']:.2f}", ha = "center", va = "top", color = "w") 114 | ax.text(0.835, row_centre, int(table_team['goals_against']), ha = "center", va = "center_baseline", color = "w") 115 | ax.text(0.92, row_centre, f"{table_team['xg_against']:.2f}", ha = "center", va = "top", color = "w") 116 | #ax.text(0.97, row_centre, int(table_team['position']), ha = "center", va = "center_baseline", color = "w") 117 | 118 | # Add differences 119 | xg_delta_str = '+' if table_team['xg_for'] > table_team['goals_for'] else '' 120 | xg_col = 'green' if xg_delta_str == '+' else 'indianred' 121 | xga_delta_str = '+' if table_team['xg_against'] > table_team['goals_against'] else '' 122 | xga_col = 'green' if xga_delta_str == '' else 'indianred' 123 | xgd_delta_str = '+' if table_team['xg_difference'] > table_team['goal_difference'] else '' 124 | xgd_col = 'green' if xgd_delta_str == '+' else 'indianred' 125 | xp_delta_str = '+' if table_team['expected_points'] > table_team['points'] else '' 126 | xp_col = 'green' if xp_delta_str == '+' else 'indianred' 127 | xpos_delta_str = '+' if table_team['position'] > table_team['expected_position'] else '' 128 | xpos_col = 'green' if table_team['expected_position'] < table_team['position'] else 'w' if table_team['expected_position'] == table_team['position'] else 'indianred' 129 | 130 | ax.text(0.04, row_centre, f"{xpos_delta_str}{table_team['position']-table_team['expected_position']}", ha = "center", va = "bottom", fontweight = "bold", color = xpos_col, fontsize=7) 131 | ax.text(0.575, row_centre, f"{xp_delta_str}{(table_team['expected_points']-table_team['points']):.2f}", ha = "center", va = "bottom", color = xp_col, fontsize=7) 132 | ax.text(0.765, row_centre, f"{xg_delta_str}{(table_team['xg_for']-table_team['goals_for']):.2f}", ha = "center", va = "bottom", color = xg_col, fontsize=7) 133 | ax.text(0.96, row_centre, f"{xga_delta_str}{(table_team['xg_against']-table_team['goals_against']):.2f}", ha = "center", va = "bottom", color = xga_col, fontsize=7) 134 | 135 | # Add intermittent shading 136 | if pos % 2 == 0: 137 | ax.add_patch(Rectangle([0, row_bottom], 1, row_height, color = '#262625')) 138 | 139 | # Remove axis spines 140 | ax.spines['top'].set_visible(False) 141 | ax.spines['right'].set_visible(False) 142 | ax.spines['bottom'].set_visible(False) 143 | ax.spines['left'].set_visible(False) 144 | ax.set_xticks([]) 145 | ax.set_yticks([]) 146 | 147 | # Enforce axis limits 148 | ax.set_xlim([0,1]) 149 | ax.set_ylim([0,1]) 150 | 151 | # Add legend 152 | legend_ax = fig.add_axes([0.72, 0.88, 0.24, 0.1]) 153 | legend_ax.add_patch(Rectangle([0.15, 0.65], 0.055, 0.12, color = 'g')) 154 | legend_ax.text(0.24, 0.72, "Expected metric stronger than\nactual outcome", color = "w", va = "center_baseline", fontsize=7) 155 | legend_ax.add_patch(Rectangle([0.15, 0.3], 0.055, 0.12, color = 'indianred')) 156 | legend_ax.text(0.24, 0.37, "Expected metric weaker than\nactual outcome", color = "w", va = "center_baseline", fontsize=7) 157 | legend_ax.axis("off") 158 | 159 | 160 | # Add title and logo 161 | title_text = f"{data_grab[0][1]} {data_grab[0][2]}/{str(int(data_grab[0][2]) + 1).replace('20','',1)} − Justice League" 162 | subtitle_text = "League Table Standings based on Expected Points" 163 | fig.text(0.12, 0.94, title_text, fontweight="bold", fontsize=16, color='w') 164 | fig.text(0.12, 0.915, subtitle_text, fontweight="bold", fontsize=11, color='w') 165 | 166 | # Add competition logo 167 | comp_logo = lab.get_competition_logo(data_grab[0][1], data_grab[0][2], logo_brighten=True) 168 | comp_ax = fig.add_axes([0.022, 0.885, 0.1, 0.1]) 169 | comp_ax.axis("off") 170 | comp_ax.imshow(comp_logo) 171 | 172 | # Add description 173 | fig.text(0.5, 0.045, f"Monte Carlo method implemented to model the probability of individual match outcomes based on shot events, with {sim_count} repetitions completed per match. Expected\n"+ 174 | "points calculated using weighted outcome probabilities. Method reliant on assumption that xG represents scoring probability, and that individual shot events are independent.", 175 | color = 'lightgrey', fontsize = 6.5, ha = "center") 176 | 177 | # Add footer information 178 | fig.text(0.5, 0.012, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.", 179 | fontstyle="italic", ha="center", fontsize=9, color="white") 180 | 181 | # Add twitter logo 182 | ax = fig.add_axes([0.94, 0.001, 0.04, 0.04]) 183 | ax.axis("off") 184 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png') 185 | ax.imshow(badge) 186 | 187 | # Save fig 188 | fig.savefig(f"justice_league/{data_grab[0][1].replace(' ','-').lower()}-{data_grab[0][2]}-justice-league-{run_date}.png", dpi=300) --------------------------------------------------------------------------------