├── .gitignore
├── Getting Started with Football Analytics.md
├── LICENSE
├── README.md
├── analysis_tools
├── __init__.py
├── logos_and_badges.py
├── models.py
├── pitch_zones.py
├── statsbomb_custom_events.py
├── statsbomb_data_engineering.py
├── visuals.py
├── whoscored_custom_events.py
├── whoscored_data_engineering.py
└── wyscout_data_engineering.py
├── data_directory
├── fbref_data
│ └── 2022_23
│ │ └── EPL
│ │ ├── epl 2023 player data.json
│ │ ├── epl 2023 team data.json
│ │ └── epl 2023 vs team data.json
├── leaguetable_data
│ ├── 2016_17
│ │ ├── Bundesliga-table-2016.pbz2
│ │ ├── EFLC-table-2016.pbz2
│ │ ├── EPL-table-2016.pbz2
│ │ ├── La_Liga-table-2016.pbz2
│ │ ├── Ligue_1-table-2016.pbz2
│ │ └── Serie_A-table-2016.pbz2
│ ├── 2017_18
│ │ ├── Bundesliga-table-2017.pbz2
│ │ ├── EFLC-table-2017.pbz2
│ │ ├── EPL-table-2017.pbz2
│ │ ├── La_Liga-table-2017.pbz2
│ │ ├── Ligue_1-table-2017.pbz2
│ │ └── Serie_A-table-2017.pbz2
│ ├── 2018_19
│ │ ├── Bundesliga-table-2018.pbz2
│ │ ├── EFLC-table-2018.pbz2
│ │ ├── EPL-table-2018.pbz2
│ │ ├── La_Liga-table-2018.pbz2
│ │ ├── Ligue_1-table-2018.pbz2
│ │ └── Serie_A-table-2018.pbz2
│ ├── 2019_20
│ │ ├── Bundesliga-table-2019.pbz2
│ │ ├── EFLC-table-2019.pbz2
│ │ ├── EPL-table-2019.pbz2
│ │ ├── La_Liga-table-2019.pbz2
│ │ ├── Ligue_1-table-2019.pbz2
│ │ └── Serie_A-table-2019.pbz2
│ ├── 2020_21
│ │ ├── Bundesliga-table-2020.pbz2
│ │ ├── EFLC-table-2020.pbz2
│ │ ├── EPL-table-2020.pbz2
│ │ ├── La_Liga-table-2020.pbz2
│ │ ├── Ligue_1-table-2020.pbz2
│ │ └── Serie_A-table-2020.pbz2
│ ├── 2021_22
│ │ ├── Bundesliga-table-2021.pbz2
│ │ ├── EFLC-table-2021.pbz2
│ │ ├── EPL-table-2021.pbz2
│ │ ├── La_Liga-table-2021.pbz2
│ │ ├── Ligue_1-table-2021.pbz2
│ │ └── Serie_A-table-2021.pbz2
│ └── 2022_23
│ │ ├── EFLC-table-2022.pbz2
│ │ └── EPL-table-2022.pbz2
├── misc_data
│ ├── articles
│ │ └── The Evolution of Shooting in the Premier League.pdf
│ ├── images
│ │ ├── BoxLogo.png
│ │ ├── CarryLogo.png
│ │ ├── EPL-2022-23-cycle-of-results.png
│ │ ├── EPL-2022-23-cycle-of-results.pptx
│ │ ├── JK Twitter Logo.png
│ │ ├── PassLogo.png
│ │ ├── RecoveryLogo.png
│ │ ├── TackleLogo.png
│ │ ├── example-1-1-1.png
│ │ ├── example-1-1-2.png
│ │ ├── example-1-1-3.png
│ │ ├── example-10-1-1.png
│ │ ├── example-10-1-2.png
│ │ ├── example-11-1-1.png
│ │ ├── example-2-1-1.png
│ │ ├── example-2-1-2.png
│ │ ├── example-2-1-3.png
│ │ ├── example-2-1-4.png
│ │ ├── example-2-2-1.png
│ │ ├── example-2-2-2.png
│ │ ├── example-3-1-1.png
│ │ ├── example-3-1-2.png
│ │ ├── example-3-1-3.png
│ │ ├── example-3-1-4.png
│ │ ├── example-3-1-5.png
│ │ ├── example-4-1-1.png
│ │ ├── example-4-1-2.png
│ │ ├── example-4-2-1.png
│ │ ├── example-4-2-2.png
│ │ ├── example-4-3-1.png
│ │ ├── example-4-3-2.png
│ │ ├── example-5-1-1.png
│ │ ├── example-5-1-2.png
│ │ ├── example-5-2-1.png
│ │ ├── example-5-2-2.png
│ │ ├── example-5-3-1.png
│ │ ├── example-5-3-2.png
│ │ ├── example-5-3-3.png
│ │ ├── example-5-4-1.png
│ │ ├── example-5-4-2.png
│ │ ├── example-5-5-1.png
│ │ ├── example-5-5-2.png
│ │ ├── example-5-5-3.png
│ │ ├── example-5-5-4.png
│ │ ├── example-5-6-1.png
│ │ ├── example-5-6-2.png
│ │ ├── example-5-7-1.png
│ │ ├── example-5-7-2.png
│ │ ├── example-6-1-1.png
│ │ ├── example-6-1-2.png
│ │ ├── example-6-2-1.png
│ │ ├── example-6-2-2.png
│ │ ├── example-6-3-1.png
│ │ ├── example-6-4-1.png
│ │ ├── example-6-5-1.png
│ │ ├── example-6-5-2.png
│ │ ├── example-6-6-1.png
│ │ ├── example-6-6-2.png
│ │ ├── example-6-7-1.png
│ │ ├── example-7-1-1.png
│ │ ├── example-7-1-2.png
│ │ ├── example-7-2-1.png
│ │ ├── example-7-2-2.png
│ │ ├── example-9-1-1.png
│ │ ├── example-9-1-2.png
│ │ └── getting-started-roadmap.png
│ ├── log_regression_xg_data.pbz2
│ ├── neural_net_xg_data.pbz2
│ └── worldcup_2010_to_2018_distcovered.xlsx
├── statsbomb_data
│ └── README.txt
├── transfermarkt_data
│ ├── 2021_22
│ │ ├── transfermarkt_GB1_2021-2022.pbz2
│ │ └── transfermarkt_GB2_2021-2022.pbz2
│ └── 2022_23
│ │ ├── transfermarkt_FR2_2022-2023.pbz2
│ │ └── transfermarkt_GB2_2022-2023.pbz2
├── whoscored_data
│ └── README.txt
└── wyscout_data
│ ├── Data Collection Report.pdf
│ ├── coaches.pbz2
│ ├── competitions.pbz2
│ ├── eventid2name.csv
│ ├── events
│ ├── events_England.pbz2
│ ├── events_European_Championship.pbz2
│ ├── events_France.pbz2
│ ├── events_Germany.pbz2
│ ├── events_Italy.pbz2
│ ├── events_Spain.pbz2
│ └── events_World_Cup.pbz2
│ ├── matches
│ ├── matches_England.pbz2
│ ├── matches_European_Championship.pbz2
│ ├── matches_France.pbz2
│ ├── matches_Germany.pbz2
│ ├── matches_Italy.pbz2
│ ├── matches_Spain.pbz2
│ └── matches_World_Cup.pbz2
│ ├── playerank.pbz2
│ ├── players.pbz2
│ ├── tags2name.csv
│ └── teams.pbz2
├── model_directory
├── pass_cluster_model
│ └── PassClusterModel65.joblib
└── xg_model
│ └── log_regression_xg_model.joblib
└── projects
├── 00_data_import_and_misc_work
├── download_yt_video.py
├── import_data_fbref.py
├── import_data_leaguetable.py
├── import_data_whoscored.py
└── scrape_data_transfermarkt.py
├── 01_wc2018_box2box_mids
└── worldcup_b2b_mids.py
├── 02_player_team_valuation
└── team_player_value_analysis.py
├── 03_model_development_and_implementation
├── pass_cluster_data_collection.py
├── shot_xg_plot.py
├── xg_log_regression_model.py
└── xg_neural_network.py
├── 04_match_reports
├── off_def_shape_report_ws.py
├── pass_report_ws.py
└── shot_report_understat.py
├── 05_competition_reports_top_players
├── player_defensive_contribution.py
├── player_effective_carriers.py
├── player_effective_passers.py
├── player_high_defensive_actions.py
├── player_impact_on_team.py
├── player_penalty_takers.py
├── player_threat_creators.py
└── player_threat_creators_zonal_comparison.py
├── 06_competition_reports_top_teams
├── team_ball_winning.py
├── team_common_zonal_actions.py
├── team_cross_success.py
├── team_delta_threat_creation.py
├── team_fullback_combinations.py
├── team_setpiece_shot_concession.py
├── team_threat_creation.py
└── xg_league_table_sb.py
├── 07_player_reports
├── advanced_swarm_radar.py
└── player_report_fullback.py
├── 08_evolution_of_shooting
└── shot_characteristics_trending.py
├── 09_league_position_metric_correlation
└── team_metric_pts_correlation.py
├── 10_team_buildup_passes
└── team_pass_tendencies.py
└── 11_justice_league
└── justice_league.py
/.gitignore:
--------------------------------------------------------------------------------
1 | /analysis_tools/get_football_data.py
2 | /data_directory/misc_data/pass_data.pbz2
3 | /data_directory/statsbomb_data/2017_18/
4 | /data_directory/whoscored_data/2023_24/
5 | /data_directory/whoscored_data/2022_23/
6 | /data_directory/whoscored_data/2021_22/
7 | /data_directory/whoscored_data/2020_21/
8 | /data_directory/whoscored_data/2019_20/
9 | /data_directory/whoscored_data/2018_19/
10 | /data_directory/whoscored_data/2017_18/
11 | /data_directory/whoscored_data/2016_17/
12 | /data_directory/whoscored_data/2015_16/
13 | /data_directory/whoscored_data/2014_15/
14 | /data_directory/whoscored_data/2013_14/
15 | /data_directory/whoscored_data/2012_13/
16 | /data_directory/whoscored_data/2011_12/
17 | /data_directory/whoscored_data/2010_11/
18 | /data_directory/whoscored_data/2009_10/
19 | /projects/00_data_import_and_misc_work/misc_work_images/
20 | /projects/00_data_import_and_misc_work/import_data_statsbomb.py
21 | /projects/00_data_import_and_misc_work/check_whoscored_data_volume.py
22 | /projects/01_wc2018_box2box_mids_statsbomb/worldcup_b2b_mids/
23 | /projects/02_player_team_valuation/forward_value_analysis/
24 | /projects/02_player_team_valuation/market_value_league_table/
25 | /projects/04_match_reports/pass_reports/
26 | /projects/04_match_reports/shot_reports/
27 | /projects/04_match_reports/shape_reports/
28 | /projects/04_match_reports/out_of_possession_reports/
29 | /projects/04_match_reports/out_of_posession_report_sb.py
30 | /projects/04_match_reports/team_metric_vs_season/
31 | /projects/04_match_reports/team_metrics_vs_season.py
32 | /projects/05_competition_reports_top_players/player_effective_carriers/
33 | /projects/05_competition_reports_top_players/player_effective_passers/
34 | /projects/05_competition_reports_top_players/player_penalty_takers/
35 | /projects/05_competition_reports_top_players/player_threat_creators/
36 | /projects/05_competition_reports_top_players/top_defensive_actions/
37 | /projects/05_competition_reports_top_players/top_defensive_contributions/
38 | /projects/05_competition_reports_top_players/player_impact_on_team/
39 | /projects/06_competition_reports_top_teams/team_ball_winning/
40 | /projects/06_competition_reports_top_teams/team_cross_success/
41 | /projects/06_competition_reports_top_teams/team_fullback_combinations/
42 | /projects/06_competition_reports_top_teams/team_threat_creation/
43 | /projects/06_competition_reports_top_teams/team_setpiece_chance_concession/
44 | /projects/06_competition_reports_top_teams/team_common_actions/
45 | /projects/06_competition_reports_top_teams/team_xg_metrics/
46 | /projects/07_player_reports/player_reports/
47 | /projects/07_player_reports/advanced_radars/
48 | /projects/07_player_reports/player_report_centreforward_sb.py
49 | /projects/07_player_reports/player_report_winger_sb.py
50 | /projects/08_evolution_of_shooting/shot_characteristics_trending/
51 | /projects/09_league_position_metric_correlation/team_metric_pts_correlation/
52 | /projects/10_team_buildup_passes/team_pass_tendencies/
53 | /projects/11_justice_league/justice_league/
54 | /projects/99_private_work/
55 |
--------------------------------------------------------------------------------
/Getting Started with Football Analytics.md:
--------------------------------------------------------------------------------
1 | # Getting Started: Football Data Analytics with Python
2 | The aim of this short document is to provide some guidance and advice on getting started with football data analytics using Python.
3 |
4 | There are a huge number of relevant resources out there already, but I thought it worth providing my perspectives on the topic and outlining the path that I took into football data analytics. Please note that this document only covers the technical aspects of getting started, and does not provide guidance on starting a career within the football industry (nor am I qualified to provide this).
5 |
6 | ## Roadmap
7 | I have put together a basic roadmap that identifies a logical sequence of activities that you may choose to undertake to get started with football data. I will expand on each activity in the following sections.
8 |
9 |
10 |
   
11 |
12 |
13 | ## Step 1 - Python Fundamentals
14 |
15 | One of the most common mistakes I have seen, and therefore a key piece of advice I have is; don't try and run before you can walk. Whilst it is seriously tempting to immerse yourself in football data immediately, it's critical that you develop a thorough understanding of the Python programming language before-hand. In taking the time to learn the fundamentals of Python coding as part of a wider analytics skill-set, you will better position yourself to develop as a football data analyst.
16 |
17 | I have listed a few courses below that I have completed and highly recommend. It wouldn't be excessive to spend 1-3 months learning Python before you get started with football data.
18 |
19 | |Course Name|Platform|Time Commitment|Cost|
20 | |----------|:-------------:|:------:|:------:|
21 | |[Learning to Program with Python 3](https://www.youtube.com/playlist?list=PLQVvvaa0QuDeAams7fkdcwOGBpGdHpXln)|YouTube|5-10Hrs|Free|
22 | |[Python Tutorial for Beginners](https://www.youtube.com/watch?v=YYXdXT2l-Gg&list=PL-osiE80TeTskrapNbzXhwoFUiLCjGgY7&ab_channel=CoreySchafer)|YouTube|10-20Hrs|Free|
23 | |[Complete Python Bootcamp](https://www.udemy.com/course/complete-python-bootcamp/)|Udemy|20-40Hrs|£15.99 (offer)|
24 | |[Python for Data Science & Machine Learning](https://www.udemy.com/course/python-for-data-science-and-machine-learning-bootcamp/)|Udemy|20-40Hrs|£15.99 (offer)|
25 |
26 |
27 | ## Step 2 - Analysing Football Event Data
28 |
29 | With the fundamentals of Python nailed down, you can move onto the good bit! There is an absolutely essential resource that will get you started with analysing football event data. Uppsala University (Sweden) run an online course called [Mathematical Modelling of Football](https://soccermatics.readthedocs.io/en/latest/). It is delivered by the author of Soccermatics, [David Sumpter](https://www.david-sumpter.com/), who has kindly made the majority of lectures/resources publically available.
30 |
31 | I would strongly recommend that you not only watch, but follow along with the course material. Prioritise the first three sections; event data, models and scouting. These sections will get you up to speed with crucial football data analytics topics, as well as provide invaluable guidance on producing effective visuals. The remaining sections; valuing actions, randomness & prediction, pitch control, positioning & movmement, and physical data, cover more advanced topics but are still well worth completing.
32 |
33 | ## Step 3 - Understanding Football Data Sources
34 |
35 | Before going at it alone, it's important to understand what football data exists in the public domain, and equally what doesn't? What do you have to pay for, and what's available for free?
36 |
37 | Through taking the time to consider this, you can appropriately scope your future projects and ensure that the data required to complete them is actually available. If you have completed various sections of the [Mathematical Modelling of Football](https://soccermatics.readthedocs.io/en/latest/) course, then you will likely have at least some awareness of what data is available and accessible.
38 |
39 | In my experience, working with different types/formats of football data not only stretches you from a technical programming perspective, but encourages you to think about different ways to process and draw insight from football data. When getting started with football data analytics, I believe the following sources of free data are key.
40 |
41 | |Data Provider|Data Description|Data Format|
42 | |----------|-------------|:------:|
43 | |[Statsbomb Open Data](https://github.com/statsbomb/open-data)|Statsbomb match event data from a collection of games and competitions|[API](https://github.com/statsbomb/statsbombpy) or [.json](https://github.com/statsbomb/open-data)|
44 | |[Wyscout Free Data](https://figshare.com/collections/Soccer_match_event_dataset/4415000/2)|Wyscout match event data for all matches in Europe's top 5 leagues during 2017-18 season|[.json](https://figshare.com/collections/Soccer_match_event_dataset/4415000/2)|
45 | |[Understat](https://understat.com/)|Shot event data for all matches in Europe's top 5 leages (+ Russian Prem)|[API](https://pypi.org/project/understatapi/) or [Web](https://understat.com/)|
46 | |[FBref](https://fbref.com/en/)|Aggregated team and player data/performance metrics|[Web](https://fbref.com/en/), .xlsx or .csv|
47 | |[Transfermarkt](https://www.transfermarkt.co.uk/)|Team and player market value|[Web](https://www.transfermarkt.co.uk/)|
48 |
49 | There are many more data sources, the majority of which are listed and explained within Edd Webster's brilliant [football analytics repository](https://github.com/eddwebster/football_analytics#data-sources). Given the number of web-based resources around, any time spent understanding the basics of web-scraping with Python is time well spent.
50 |
51 | ## Step 4 - Personal Projects
52 |
53 | Now it's time to go alone... there is no better way to consolidate learning than through practicing, testing and experimentation. With an understanding of football data analytics principles and an awareness of what data is available to you, it's likely you have formulated some project ideas already. Go with them! Explore the data and don't be afraid to change the direction of your project if something in particular piques your interest!
54 |
55 | If you are struggling, here's a list of ideas:
56 | - Identify (and visualise) which players frequently passed the ball into the opposition box (in-play only) during the 2018 World Cup
57 | - Highlight pitch areas that each team lost possession of the ball during the 2018 World Cup. Link this to team performance
58 | - Highlight pitch areas that each team created shots/chances from during the 2018 World Cup. Link this to team performance
59 | - Create a function that identifies every time a forward receives a long ball, their next action, and whether they hold the ball up successfully
60 | - Define and implement some bespoke metrics that quantify how well a defender/midfield/forward performed during the 2018 World Cup
61 | - Build an xG model using data from Europe's top 5 leagues during the 2017/18 season
62 | - Build a tool to extract and visualise shot event data from Understat
63 | - Develop a model that attempts to predict player market value from seasonal performance (for a specific player position)
64 | - Implement an expected threat model, and use it to determine the value of all actions during the 2017/18 season in Europe's top 5 leagues
65 | - Build a tool that quickly produces a scatter plot to compare a set of players against 2 FBref metrics
66 |
67 | When you have completed a project and are happy with your work, post it! Whilst this might seem daunting, Twitter is a great place to share your work and obtain feedback. Feel free to send me your work directly and I'll take a look too.
68 |
69 | ## Step 5 - Research & Refresh Knowledge
70 |
71 | The final step, and one that should not be underestimated, is to maintain and refresh your knowledge. Continous learning will further develop your data analytics skillset and help you to bring fresh ideas and concepts into your personal projects. Go back and revisit course topics if you need to, and keep on top of public data releases.
72 |
73 | There is a huge amount of research and development that is taking place in the football analytics community, and I'd recommend at least being aware of it. One of the best ways to do this is to take a look at [Jan Van Haaren's](https://www.janvanhaaren.be/) annual soccer analytics review (e.g. [2022 soccer analytics review](https://www.janvanhaaren.be/2022/12/29/soccer-analytics-review-2022.html)). Even if you only scan through the research paper titles, you will at least get an indication of trending research topics and the state-of-the-art in football data analytics.
74 |
75 | ## Summary
76 |
77 | And that's about it! Hopefully this resource has been useful. Please feel free to reach out to me ([@\_JKDS\_](https://twitter.com/_JKDS_)) if you have any questions or thoughts!
78 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/analysis_tools/__init__.py:
--------------------------------------------------------------------------------
1 | """Package to support analysis of football event data.
2 |
3 | Collection of modules to assist with data engineering, formatting, analysis and visualisation of event-type football
4 | data from a variety of sources."""
--------------------------------------------------------------------------------
/analysis_tools/models.py:
--------------------------------------------------------------------------------
1 | """Module containing a variety of predictive and statistical models relevant to the analysis of football data
2 |
3 | Functions
4 | ---------
5 | get_pass_clusters(events):
6 | Assign statsbomb or whoscored pass events to a pass cluster
7 |
8 | simulate_match_outcome(events, matches, match_id, sim_count=10000):
9 | Simulate the outcome of a match based on teams xG
10 |
11 |
12 | """
13 |
14 | import joblib
15 | from sklearn.base import BaseEstimator, TransformerMixin
16 | import os
17 | import numpy as np
18 | import pandas as pd
19 |
20 |
21 | # Load custom classes that are required for model pipeline (done manually here for ease)
22 | # noinspection PyPep8Naming
23 | class convertYards(BaseEstimator, TransformerMixin):
24 | def fit(self, X, y=None):
25 | return self
26 |
27 | def transform(self, X, y=None):
28 | X['x'] = X['x'] * (120 / 100)
29 | X['y'] = X['y'] * (80 / 100)
30 | X['endX'] = X['endX'] * (120 / 100)
31 | X['endY'] = X['endY'] * (80 / 100)
32 | return X
33 |
34 |
35 | # noinspection PyPep8Naming
36 | class customScaler(BaseEstimator, TransformerMixin):
37 | def fit(self, X, y=None):
38 | self.max_x = 120
39 | self.max_y = 80
40 | return self
41 |
42 | def transform(self, X, y=None):
43 | X['x'] = X['x'] / 120
44 | X['y'] = X['y'] / 120
45 | X['endX'] = X['endX'] / 120
46 | X['endY'] = X['endY'] / 120
47 | return X
48 |
49 |
50 | def get_pass_clusters(events, data_mode='whoscored'):
51 | """ Assign statsbomb or whoscored pass events to a pass cluster
52 |
53 | Function that implements a pass clustering model, that has been trained on over 5,000,000 successful passes across
54 | EPL, Serie A, Ligue 1, Bundesliga, La Liga and EFLC (2019/20 - 2022/23), to assign passes to a pass cluster. Passes
55 | are assigned to their most similar cluster based on the start and end position of the pass. The function adds a
56 | cluster id and cluster centroid (x, y, end x, end y) to each pass.
57 |
58 | Args:
59 | events (pandas.DataFrame): dataframe of event data.
60 | data_mode (string, optional): 'whoscored' or 'statsbomb' data. Defaults to 'whoscored'.
61 |
62 | Returns:
63 | pandas.DataFrame: dataframe of passes with additional 'pass_cluster_id', 'pass_cluster_mean_x',
64 | 'pass_cluster_mean_y', 'pass_cluster_mean_end_x' and 'pass_cluster_mean_end_y' columns.
65 | """
66 |
67 | # Filter and format data based on data_mode to ensure compatibility with pass cluster model
68 | if data_mode == 'whoscored':
69 | passes_out = events[events['eventType'] == 'Pass'].copy()
70 | elif data_mode == 'statsbomb':
71 | passes_out = events[events['type_name'] == 'Pass'].copy()
72 | passes_out['x'] = 100*passes_out['x']/120
73 | passes_out['y'] = 100*passes_out['y']/80
74 | passes_out['endX'] = 100*passes_out['end_x'] / 120
75 | passes_out['endY'] = 100*passes_out['end_y'] / 80
76 | else:
77 | raise ValueError("Specify 'whoscored' or 'statsbomb' as data mode")
78 |
79 | # Load pass clustering model
80 | current_dir = os.getcwd()
81 | os.chdir(current_dir.split("football-data-analytics")[0] +
82 | "football-data-analytics/model_directory/pass_cluster_model")
83 | cluster_model = joblib.load("PassClusterModel65.joblib")
84 | os.chdir(current_dir)
85 |
86 | # Make cluster predictions and add cluster info
87 | passes_out['pass_cluster_id'] = cluster_model.predict(passes_out)
88 | cluster_centers = cluster_model['model'].cluster_centers_ * 120
89 | passes_out['pass_cluster_mean_x'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 0])
90 | passes_out['pass_cluster_mean_y'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 1])
91 | passes_out['pass_cluster_mean_end_x'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 2])
92 | passes_out['pass_cluster_mean_end_y'] = passes_out['pass_cluster_id'].apply(lambda x: cluster_centers[x, 3])
93 |
94 | # Return data to standard state based on data_mode
95 | if data_mode == 'whoscored':
96 | passes_out['pass_cluster_mean_x'] = 100*passes_out['pass_cluster_mean_x']/120
97 | passes_out['pass_cluster_mean_y'] = 100*passes_out['pass_cluster_mean_y']/80
98 | passes_out['pass_cluster_mean_end_x'] = 100*passes_out['pass_cluster_mean_end_x']/120
99 | passes_out['pass_cluster_mean_end_y'] = 100*passes_out['pass_cluster_mean_end_y']/80
100 |
101 | elif data_mode == 'statsbomb':
102 | passes_out['x'] = 120*passes_out['x']/100
103 | passes_out['y'] = 80*passes_out['y']/100
104 | passes_out = passes_out.drop(columns=['endX', 'endY'])
105 |
106 | return passes_out
107 |
108 |
109 | def simulate_match_outcome(events, matches, match_id, sim_count=10000):
110 | """ Simulate the outcome of a match based on teams xG
111 |
112 | Function to simulate the outcome of a match by assigning goals to each team based on their chances and xG. Assumes
113 | that xG represents scoring probability and that all xG events are independent. Matches are simulated a number of
114 | times, with outcomes used to determine home win, draw and away win probabilites and expected points. Function
115 | requires statsbomb-style events and matches dataframe, id of match to simulate and number of iterations. Individual
116 | simulation outcomes are returned. Win probabilities and expected points are added to the matches dataframe
117 |
118 | Args:
119 | events (pandas.DataFrame): dataframe of statsbomb-style event data.
120 | matches (pandas.DataFrame): dataframe of statsbomb-style match data.
121 | match_id (int): numeric identifier of match to simulate
122 | sim_count (int): number of simulations to run
123 |
124 | Returns:
125 | pandas.DataFrame: statsbomb-style match dataframe with additional 'home_xg', 'away_xg', 'home_win_probability',
126 | 'away_win_probability', 'draw_probability', 'home_xpoints' and 'away_xpoints' columns
127 | pandas.DataFrame: dataframe of match simulation results. One row per simulation
128 | """
129 |
130 | # Initialise lists to store simulated goal scored and outcome
131 | home_goal_list = []
132 | away_goal_list = []
133 | outcome_list = []
134 |
135 | # Retrieve xG events for match to simulate
136 | match_simulate = matches[matches['match_id'] == match_id]
137 | match_xg_events = events[(events['match_id'] == match_id) &
138 | (events['shot_statsbomb_xg'] == events['shot_statsbomb_xg'])]
139 | home_xg_list = match_xg_events[match_xg_events['team_name'] == match_simulate['home_team'].values[0]][
140 | 'shot_statsbomb_xg'].values
141 | away_xg_list = match_xg_events[match_xg_events['team_name'] == match_simulate['away_team'].values[0]][
142 | 'shot_statsbomb_xg'].values
143 |
144 | # Simulate multiple times
145 | for i in range(sim_count):
146 |
147 | # Initialise simulated goal scored
148 | home_goals = 0
149 | away_goals = 0
150 |
151 | # Iterate through home xG events
152 | if len(home_xg_list) > 0:
153 |
154 | for xg_shot in home_xg_list:
155 | rand_prob = np.random.random()
156 | home_goals = home_goals + 1 if rand_prob < xg_shot else home_goals
157 |
158 | # Iterate through away xG events
159 | if len(away_xg_list) > 0:
160 |
161 | for xg_shot in away_xg_list:
162 | rand_prob = np.random.random()
163 | away_goals = away_goals + 1 if rand_prob < xg_shot else away_goals
164 |
165 | # Append goal outcomes to lists
166 | home_goal_list.append(home_goals)
167 | away_goal_list.append(away_goals)
168 |
169 | # Define match outcome based on home and away goals
170 | outcome = 'home' if home_goals > away_goals else 'away' if away_goals > home_goals else 'draw'
171 | outcome_list.append(outcome)
172 |
173 | # Store all simulated matches within dataframe
174 | match_simulation_results = pd.DataFrame(zip(home_goal_list, away_goal_list, outcome_list),
175 | columns=['home_goals', 'away_goals', 'outcome'])
176 | match_simulation_results['home_team'] = match_simulate['home_team'].values[0]
177 | match_simulation_results['away_team'] = match_simulate['away_team'].values[0]
178 |
179 | # Initialise dictionary to store results
180 | result_dict = dict()
181 |
182 | # Store win probabilities and xpoints in dictionary
183 | result_dict['match_id'] = match_id
184 | result_dict['home_xg'] = home_xg_list.sum()
185 | result_dict['away_xg'] = away_xg_list.sum()
186 | result_dict['home_win_probability'] = outcome_list.count('home') / sim_count
187 | result_dict['away_win_probability'] = outcome_list.count('away') / sim_count
188 | result_dict['draw_probability'] = outcome_list.count('draw') / sim_count
189 | result_dict['home_xpoints'] = result_dict['home_win_probability'] * 3 + result_dict['draw_probability'] * 1
190 | result_dict['away_xpoints'] = result_dict['away_win_probability'] * 3 + result_dict['draw_probability'] * 1
191 |
192 | # Insert win probabilities and xpoints information to dataframe
193 | if 'home_xpoints' in matches.columns:
194 | matches_out = matches.copy()
195 | matches_out.loc[matches['match_id'] == match_id, list(result_dict.keys())[1:]] = list(result_dict.values())[
196 | 1:]
197 | else:
198 | join_df = pd.DataFrame(result_dict, index=[0])
199 | matches_out = pd.merge(matches, join_df, left_on='match_id', right_on='match_id', how='left')
200 |
201 | return matches_out, match_simulation_results
--------------------------------------------------------------------------------
/analysis_tools/visuals.py:
--------------------------------------------------------------------------------
1 | """Module containing functions to generate football data anlytics visuals.
2 |
3 | Functions
4 | ---------
5 |
6 |
7 | """
8 |
9 |
--------------------------------------------------------------------------------
/analysis_tools/wyscout_data_engineering.py:
--------------------------------------------------------------------------------
1 | """Module containing functions to assist with pre-processing and engineering of Wyscout-style data
2 |
3 | Functions
4 | ---------
5 | format_wyscout_data(tournament='England', data_folder="../../data_directory/wyscout_data")
6 | Load Wyscout json files with matches, events, players and competitions."""
7 |
8 | import bz2
9 | import pickle
10 | from collections import defaultdict
11 | import pandas as pd
12 |
13 |
14 | def format_wyscout_data(tournament='England', data_folder="../../data_directory/wyscout_data"):
15 | """ Load Wyscout json files with matches, events, players and competitions.
16 |
17 | Function to load and format Wyscout json files (location defined by folder input) for a user-defined tournament.
18 |
19 | Args:
20 | tournament (list, optional): List of tournaments to load.
21 | data_folder (str, optional): Location of data, relative to script in which function is called..
22 |
23 | Returns:
24 | pandas.DataFrame: wyscout-style event dataframe, containing all events from selected competition.
25 | pandas.DataFrame: wyscout-style matches dataframe, containing all match information from selected competition.
26 | defaultdict: wyscout-style event dictionary, containing events per match.
27 | pandas.DataFrame: wyscout-style player dataframe, containing player info for all players in av. Wyscout data.
28 | pandas.DataFrame: wyscout-style competition dataframe, containing comp info for all comps in av. Wyscout data.
29 | pandas.DataFrame: wyscout-style team dataframe, containing team info for all teams in av. Wyscout data.
30 | """
31 |
32 | if isinstance(tournament, str):
33 | tournament = [tournament]
34 |
35 | events, matches = pd.DataFrame(), pd.DataFrame()
36 |
37 | for idx, data_selection in enumerate(tournament):
38 |
39 | # Load in the Wyscout matches and event data
40 | events_temp = bz2.BZ2File(f"{data_folder}/events/events_{data_selection}.pbz2", 'rb')
41 | events_temp = pd.DataFrame(pickle.load(events_temp))
42 | matches_temp = bz2.BZ2File(f"{data_folder}/matches/matches_{data_selection}.pbz2", 'rb')
43 | matches_temp = pd.DataFrame(pickle.load(matches_temp))
44 |
45 | if idx == 0:
46 | events = events_temp
47 | matches = matches_temp
48 | else:
49 | events = events.append(events_temp)
50 | matches = matches.append(matches_temp)
51 |
52 | # Produce a dictionary of lists: top level dictionary of matches with sub-list of events
53 | match_id2events = defaultdict(list)
54 | for _, event in events.iterrows():
55 | match_id = event['matchId']
56 | match_id2events[match_id].append(event)
57 |
58 | # Produce a dictionary of dictionaries: top level dictionary of matches with sub-dictionary of match info
59 | match_id2match = defaultdict(dict)
60 | for _, match in matches.iterrows():
61 | match_id = match['wyId']
62 | match_id2match[match_id] = match
63 |
64 | # Load in the Wyscout player data
65 | players = bz2.BZ2File(f"{data_folder}/players.pbz2", 'rb')
66 | players = pickle.load(players)
67 |
68 | # Produce a dictionary of dictionaries: top level dictionary of players with sub-dictionary of player info
69 | player_id2player = defaultdict(dict)
70 | for player in players:
71 | player_id = player['wyId']
72 | player_id2player[player_id] = player
73 |
74 | # Load in the Wyscout competition data
75 | competitions = bz2.BZ2File(f"{data_folder}/competitions.pbz2", 'rb')
76 | competitions = pickle.load(competitions)
77 |
78 | # Produce a dictionary: top level dictionary of competitions with sub-dictionary of competition info
79 | competition_id2competition = defaultdict(dict)
80 | for competition in competitions:
81 | competition_id = competition['wyId']
82 | competition_id2competition[competition_id] = competition
83 |
84 | # Load in the Wyscout teams data
85 | teams = bz2.BZ2File(f"{data_folder}/teams.pbz2", 'rb')
86 | teams = pickle.load(teams)
87 |
88 | # Produce a list of dictionaries: top level dictionary of teams with sub-dictionary of team info
89 | team_id2team = defaultdict(dict)
90 | for team in teams:
91 | team_id = team['wyId']
92 | team_id2team[team_id] = team
93 |
94 | # Convert to dataframes
95 | match_id2match = pd.DataFrame(match_id2match).transpose()
96 | player_id2player = pd.DataFrame(player_id2player).transpose()
97 | competition_id2competition = pd.DataFrame(competition_id2competition).transpose()
98 | team_id2team = pd.DataFrame(team_id2team).transpose()
99 |
100 | return match_id2match, events, match_id2events, player_id2player, competition_id2competition, team_id2team
101 |
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2016_17/Bundesliga-table-2016.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/Bundesliga-table-2016.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2016_17/EFLC-table-2016.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/EFLC-table-2016.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2016_17/EPL-table-2016.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/EPL-table-2016.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2016_17/La_Liga-table-2016.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/La_Liga-table-2016.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2016_17/Ligue_1-table-2016.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/Ligue_1-table-2016.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2016_17/Serie_A-table-2016.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2016_17/Serie_A-table-2016.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2017_18/Bundesliga-table-2017.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/Bundesliga-table-2017.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2017_18/EFLC-table-2017.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/EFLC-table-2017.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2017_18/EPL-table-2017.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/EPL-table-2017.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2017_18/La_Liga-table-2017.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/La_Liga-table-2017.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2017_18/Ligue_1-table-2017.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/Ligue_1-table-2017.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2017_18/Serie_A-table-2017.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2017_18/Serie_A-table-2017.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2018_19/Bundesliga-table-2018.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/Bundesliga-table-2018.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2018_19/EFLC-table-2018.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/EFLC-table-2018.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2018_19/EPL-table-2018.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/EPL-table-2018.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2018_19/La_Liga-table-2018.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/La_Liga-table-2018.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2018_19/Ligue_1-table-2018.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/Ligue_1-table-2018.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2018_19/Serie_A-table-2018.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2018_19/Serie_A-table-2018.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2019_20/Bundesliga-table-2019.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/Bundesliga-table-2019.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2019_20/EFLC-table-2019.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/EFLC-table-2019.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2019_20/EPL-table-2019.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/EPL-table-2019.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2019_20/La_Liga-table-2019.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/La_Liga-table-2019.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2019_20/Ligue_1-table-2019.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/Ligue_1-table-2019.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2019_20/Serie_A-table-2019.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2019_20/Serie_A-table-2019.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2020_21/Bundesliga-table-2020.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/Bundesliga-table-2020.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2020_21/EFLC-table-2020.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/EFLC-table-2020.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2020_21/EPL-table-2020.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/EPL-table-2020.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2020_21/La_Liga-table-2020.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/La_Liga-table-2020.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2020_21/Ligue_1-table-2020.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/Ligue_1-table-2020.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2020_21/Serie_A-table-2020.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2020_21/Serie_A-table-2020.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2021_22/Bundesliga-table-2021.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/Bundesliga-table-2021.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2021_22/EFLC-table-2021.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/EFLC-table-2021.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2021_22/EPL-table-2021.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/EPL-table-2021.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2021_22/La_Liga-table-2021.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/La_Liga-table-2021.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2021_22/Ligue_1-table-2021.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/Ligue_1-table-2021.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2021_22/Serie_A-table-2021.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2021_22/Serie_A-table-2021.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2022_23/EFLC-table-2022.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2022_23/EFLC-table-2022.pbz2
--------------------------------------------------------------------------------
/data_directory/leaguetable_data/2022_23/EPL-table-2022.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/leaguetable_data/2022_23/EPL-table-2022.pbz2
--------------------------------------------------------------------------------
/data_directory/misc_data/articles/The Evolution of Shooting in the Premier League.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/articles/The Evolution of Shooting in the Premier League.pdf
--------------------------------------------------------------------------------
/data_directory/misc_data/images/BoxLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/BoxLogo.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/CarryLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/CarryLogo.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/EPL-2022-23-cycle-of-results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/EPL-2022-23-cycle-of-results.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/EPL-2022-23-cycle-of-results.pptx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/EPL-2022-23-cycle-of-results.pptx
--------------------------------------------------------------------------------
/data_directory/misc_data/images/JK Twitter Logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/JK Twitter Logo.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/PassLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/PassLogo.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/RecoveryLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/RecoveryLogo.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/TackleLogo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/TackleLogo.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-1-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-1-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-1-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-1-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-1-1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-1-1-3.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-10-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-10-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-10-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-10-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-11-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-11-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-2-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-2-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-2-1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-3.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-2-1-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-1-4.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-2-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-2-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-2-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-2-2-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-3-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-3-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-3-1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-3.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-3-1-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-4.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-3-1-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-3-1-5.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-4-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-4-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-4-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-2-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-4-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-2-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-4-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-3-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-4-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-4-3-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-2-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-2-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-3-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-3-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-3-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-3-3.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-4-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-4-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-4-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-5-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-5-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-3.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-5-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-5-4.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-6-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-6-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-6-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-7-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-5-7-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-5-7-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-2-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-2-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-3-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-4-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-5-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-5-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-5-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-5-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-6-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-6-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-6-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-6-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-6-7-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-6-7-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-7-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-7-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-7-2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-2-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-7-2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-7-2-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-9-1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-9-1-1.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/example-9-1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/example-9-1-2.png
--------------------------------------------------------------------------------
/data_directory/misc_data/images/getting-started-roadmap.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/images/getting-started-roadmap.png
--------------------------------------------------------------------------------
/data_directory/misc_data/log_regression_xg_data.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/log_regression_xg_data.pbz2
--------------------------------------------------------------------------------
/data_directory/misc_data/neural_net_xg_data.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/neural_net_xg_data.pbz2
--------------------------------------------------------------------------------
/data_directory/misc_data/worldcup_2010_to_2018_distcovered.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/misc_data/worldcup_2010_to_2018_distcovered.xlsx
--------------------------------------------------------------------------------
/data_directory/statsbomb_data/README.txt:
--------------------------------------------------------------------------------
1 | Data collected from Statsbomb is not owned by the repository creator, and therefore will not be hosted within the repository.
--------------------------------------------------------------------------------
/data_directory/transfermarkt_data/2021_22/transfermarkt_GB1_2021-2022.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2021_22/transfermarkt_GB1_2021-2022.pbz2
--------------------------------------------------------------------------------
/data_directory/transfermarkt_data/2021_22/transfermarkt_GB2_2021-2022.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2021_22/transfermarkt_GB2_2021-2022.pbz2
--------------------------------------------------------------------------------
/data_directory/transfermarkt_data/2022_23/transfermarkt_FR2_2022-2023.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2022_23/transfermarkt_FR2_2022-2023.pbz2
--------------------------------------------------------------------------------
/data_directory/transfermarkt_data/2022_23/transfermarkt_GB2_2022-2023.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/transfermarkt_data/2022_23/transfermarkt_GB2_2022-2023.pbz2
--------------------------------------------------------------------------------
/data_directory/whoscored_data/README.txt:
--------------------------------------------------------------------------------
1 | Data collected from Whoscored is not owned by the repository creator, and therefore will not be hosted within the repository.
--------------------------------------------------------------------------------
/data_directory/wyscout_data/Data Collection Report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/Data Collection Report.pdf
--------------------------------------------------------------------------------
/data_directory/wyscout_data/coaches.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/coaches.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/competitions.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/competitions.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/eventid2name.csv:
--------------------------------------------------------------------------------
1 | event,subevent,event_label,subevent_label
2 | 1,10,Duel,Air duel
3 | 1,11,Duel,Ground attacking duel
4 | 1,12,Duel,Ground defending duel
5 | 1,13,Duel,Ground loose ball duel
6 | 2,20,Foul,Foul
7 | 2,21,Foul,Hand foul
8 | 2,22,Foul,Late card foul
9 | 2,23,Foul,Out of game foul
10 | 2,24,Foul,Protest
11 | 2,25,Foul,Simulation
12 | 2,26,Foul,Time lost foul
13 | 2,27,Foul,Violent Foul
14 | 3,30,Free Kick,Corner
15 | 3,31,Free Kick,Free Kick
16 | 3,32,Free Kick,Free kick cross
17 | 3,33,Free Kick,Free kick shot
18 | 3,34,Free Kick,Goal kick
19 | 3,35,Free Kick,Penalty
20 | 3,36,Free Kick,Throw in
21 | 4,40,Goalkeeper leaving line,Goalkeeper leaving line
22 | 5,50,Interruption,Ball out of the field
23 | 5,51,Interruption,Whistle
24 | 6,60,Offside,Offside
25 | 7,70,Others on the ball,Acceleration
26 | 7,71,Others on the ball,Clearance
27 | 7,72,Others on the ball,Touch
28 | 8,80,Pass,Cross
29 | 8,81,Pass,Hand pass
30 | 8,82,Pass,Head pass
31 | 8,83,Pass,High pass
32 | 8,84,Pass,Launch
33 | 8,85,Pass,Simple pass
34 | 8,86,Pass,Smart pass
35 | 9,90,Save attempt,Reflexes
36 | 9,91,Save attempt,Save attempt
37 | 10,100,Shot,Shot
38 |
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_England.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_England.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_European_Championship.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_European_Championship.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_France.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_France.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_Germany.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_Germany.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_Italy.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_Italy.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_Spain.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_Spain.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/events/events_World_Cup.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/events/events_World_Cup.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_England.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_England.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_European_Championship.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_European_Championship.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_France.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_France.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_Germany.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_Germany.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_Italy.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_Italy.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_Spain.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_Spain.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/matches/matches_World_Cup.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/matches/matches_World_Cup.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/playerank.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/playerank.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/players.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/players.pbz2
--------------------------------------------------------------------------------
/data_directory/wyscout_data/tags2name.csv:
--------------------------------------------------------------------------------
1 | Tag,Label,Description
2 | 101,Goal,Goal
3 | 102,own_goal,Own goal
4 | 301,assist,Assist
5 | 302,keyPass,Key pass
6 | 1901,counter_attack,Counter attack
7 | 401,Left,Left foot
8 | 402,Right,Right foot
9 | 403,head/body,Head/body
10 | 1101,direct,Direct
11 | 1102,indirect,Indirect
12 | 2001,dangerous_ball_lost,Dangerous ball lost
13 | 2101,blocked,Blocked
14 | 801,high,High
15 | 802,low,Low
16 | 1401,interception,Interception
17 | 1501,clearance,Clearance
18 | 201,opportunity,Opportunity
19 | 1301,Feint,Feint
20 | 1302,missed ball,Missed ball
21 | 501,free_space_r,Free space right
22 | 502,free_space_l,Free space left
23 | 503,take_on_l,Take on left
24 | 504,take_on_r,Take on right
25 | 1601,sliding_tackle,Sliding tackle
26 | 601,anticipated,Anticipated
27 | 602,anticipation,Anticipation
28 | 1701,red_card,Red card
29 | 1702,yellow_card,Yellow card
30 | 1703,second_yellow_card,Second yellow card
31 | 1201,gb,Position: Goal low center
32 | 1202,gbr,Position: Goal low right
33 | 1203,gc,Position: Goal center
34 | 1204,gl,Position: Goal center left
35 | 1205,glb,Position: Goal low left
36 | 1206,gr,Position: Goal center right
37 | 1207,gt,Position: Goal high center
38 | 1208,gtl,Position: Goal high left
39 | 1209,gtr,Position: Goal high right
40 | 1210,obr,Position: Out low right
41 | 1211,ol,Position: Out center left
42 | 1212,olb,Position: Out low left
43 | 1213,or,Position: Out center right
44 | 1214,ot,Position: Out high center
45 | 1215,otl,Position: Out high left
46 | 1216,otr,Position: Out high right
47 | 1217,pbr,Position: Post low right
48 | 1218,pl,Position: Post center left
49 | 1219,plb,Position: Post low left
50 | 1220,pr,Position: Post center right
51 | 1221,pt,Position: Post high center
52 | 1222,ptl,Position: Post high left
53 | 1223,ptr,Position: Post high right
54 | 901,through,Through
55 | 1001,fairplay,Fairplay
56 | 701,lost,Lost
57 | 702,neutral,Neutral
58 | 703,won,Won
59 | 1801,accurate,Accurate
60 | 1802,not accurate,Not accurate
--------------------------------------------------------------------------------
/data_directory/wyscout_data/teams.pbz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/data_directory/wyscout_data/teams.pbz2
--------------------------------------------------------------------------------
/model_directory/pass_cluster_model/PassClusterModel65.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/model_directory/pass_cluster_model/PassClusterModel65.joblib
--------------------------------------------------------------------------------
/model_directory/xg_model/log_regression_xg_model.joblib:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jakeyk11/football-data-analytics/123790b78fab5c5f6e6d76eb31d78e85214797c9/model_directory/xg_model/log_regression_xg_model.joblib
--------------------------------------------------------------------------------
/projects/00_data_import_and_misc_work/download_yt_video.py:
--------------------------------------------------------------------------------
1 | # %% Imports
2 |
3 | import pytube
4 | from moviepy.editor import VideoFileClip
5 |
6 | # %% Inputs
7 |
8 | # Url of video
9 | link = "https://www.youtube.com/watch?v=1V-LVtI6t2U&ab_channel=SkySportsPremierLeague"
10 |
11 | # Save filename
12 | file_name = "output_vid.mp4"
13 |
14 | # Start of segment
15 | start = (0,6)
16 |
17 | # End of segment
18 | end = (0,12)
19 |
20 | # %% Dowload video and save
21 |
22 | yt = pytube.YouTube(link)
23 | yt.streams.filter(res="720p").first().download(filename = file_name)
24 |
25 | # %% Crop video
26 |
27 | clip = VideoFileClip(file_name)
28 | clip1 = clip.subclip(start,end)
29 | clip1.write_videofile(file_name.replace(".mp4", "_cut.mp4"))
30 |
--------------------------------------------------------------------------------
/projects/00_data_import_and_misc_work/import_data_fbref.py:
--------------------------------------------------------------------------------
1 | ## Script to download and save fbref data
2 |
3 | # %% Imports
4 |
5 | import ScraperFC as sfc
6 | import traceback
7 | import pandas as pd
8 |
9 | # %% User inputs
10 |
11 | # Select competition from following list
12 | '''['Copa Libertadores', 'Champions League', 'Europa League', 'Europa Conference League',
13 | 'World Cup', 'Copa America', 'Euros', 'Big 5 combined', 'EPL', 'Ligue 1', 'Bundesliga',
14 | 'Serie A', 'La Liga', 'MLS', 'Brazilian Serie A', 'Eredivisie', 'Liga MX', 'Primeira Liga',
15 | 'EFL Championship', 'Women Champions League', 'Womens World Cup', 'Womens Euros', 'NWSL',
16 | 'A-League Women', 'WSL', 'D1 Feminine', 'Womens Bundesliga', 'Womens Serie A', 'Liga F',
17 | 'NWSL Challenge Cup', 'NWSL Fall Series'] '''
18 |
19 | COMPETITION = 'EPL'
20 |
21 | # Select calender year in which the competition finishes
22 | COMPETITION_END_YEAR = 2024
23 |
24 | # Select whether to store player data, team data or vs team data, using one of the following case-insensitive options
25 | '''['player_only', 'team_only', 'vs_team_only', 'all'] '''
26 |
27 | STORAGE_MODE = 'all'
28 |
29 | # Replace with path of directory to store data (path is relative to directory of this script). SAVE_COMP is not needed
30 | SAVE_COMP = 'EPL'
31 | DIRECTORY = f"../../data_directory/fbref_data/{str(COMPETITION_END_YEAR-1)}_{str(COMPETITION_END_YEAR).replace('20','',1)}/{SAVE_COMP}/"
32 |
33 | # %% Scrape data
34 |
35 | # Initialise scraper
36 | scraper = sfc.FBRef()
37 |
38 | # Get data
39 | try:
40 | fbref_dict = scraper.scrape_all_stats(year=COMPETITION_END_YEAR, league=COMPETITION)
41 | except:
42 | traceback.print_exc()
43 | finally:
44 | scraper.close()
45 |
46 | # %% Format scraped data
47 |
48 | playerinfo_df = pd.DataFrame()
49 | teaminfo_for_df = pd.DataFrame()
50 | teaminfo_against_df = pd.DataFrame()
51 |
52 | # Iterate over statistic type
53 | for idx, statistic_group in enumerate(list(fbref_dict.keys())):
54 |
55 | # Team stats for
56 | temp_team_stat_for_df = fbref_dict[statistic_group][0].copy()
57 | new_col_names = []
58 | for col_name in temp_team_stat_for_df.columns:
59 | col_name_1 = '' if 'Unnamed' in col_name[0] else col_name[0]
60 | col_name_2 = col_name[1] if 'Unnamed' in col_name[0] else ' ' + col_name[1]
61 | new_col_names.append((col_name_1 + col_name_2).strip())
62 | temp_team_stat_for_df.columns = new_col_names
63 | if idx != 0:
64 | teaminfo_for_df = teaminfo_for_df.merge(temp_team_stat_for_df, left_on='Team ID', right_on='Team ID', suffixes=('', '_duplicate'), how = "outer")
65 | else:
66 | teaminfo_for_df = temp_team_stat_for_df
67 |
68 | # Team stats against
69 | temp_team_stat_against_df = fbref_dict[statistic_group][1].copy()
70 | new_col_names = []
71 | for col_name in temp_team_stat_against_df.columns:
72 | col_name_1 = '' if 'Unnamed' in col_name[0] else col_name[0]
73 | col_name_2 = col_name[1] if 'Unnamed' in col_name[0] else ' ' + col_name[1]
74 | new_col_names.append((col_name_1 + col_name_2).strip())
75 | temp_team_stat_against_df.columns = new_col_names
76 | if idx != 0:
77 | teaminfo_against_df = teaminfo_against_df.merge(temp_team_stat_against_df, left_on='Team ID', right_on='Team ID', suffixes=('', '_duplicate'), how = "outer")
78 | else:
79 | teaminfo_against_df = temp_team_stat_against_df
80 |
81 | # Player stats
82 | temp_player_stat_df = fbref_dict[statistic_group][2].copy()
83 | new_col_names = []
84 | for col_name in temp_player_stat_df.columns:
85 | col_name_1 = '' if 'Unnamed' in col_name[0] else col_name[0]
86 | col_name_2 = col_name[1] if 'Unnamed' in col_name[0] else ' ' + col_name[1]
87 | new_col_names.append((col_name_1 + col_name_2).strip())
88 | temp_player_stat_df.columns = new_col_names
89 | if idx != 0:
90 | playerinfo_df = playerinfo_df.merge(temp_player_stat_df, left_on=['Player', 'Player ID', 'Squad'], right_on=['Player', 'Player ID', 'Squad'], suffixes=('', '_duplicate'), how = "outer")
91 | else:
92 | playerinfo_df = temp_player_stat_df
93 |
94 | # Remove duplicate columns
95 | teaminfo_for_df = teaminfo_for_df.loc[:,[False if '_duplicate' in x else True for x in teaminfo_for_df.columns]]
96 | teaminfo_against_df = teaminfo_against_df.loc[:,[False if '_duplicate' in x else True for x in teaminfo_against_df.columns]]
97 | playerinfo_df = playerinfo_df.loc[:,[False if '_duplicate' in x else True for x in playerinfo_df.columns]]
98 |
99 | # Adjust data types
100 | for col_name in playerinfo_df.columns:
101 | try:
102 | playerinfo_df[col_name] = playerinfo_df[col_name].astype(float)
103 | except:
104 | pass
105 |
106 | # %% Save scraped data
107 |
108 | file_extension_name = COMPETITION.lower() + ' ' + str(COMPETITION_END_YEAR)
109 |
110 | if STORAGE_MODE.lower().replace('_',' ') == 'player only':
111 | playerinfo_df.to_json(DIRECTORY + file_extension_name + ' player data.json')
112 |
113 | elif STORAGE_MODE.lower().replace('_',' ') == 'team only':
114 | teaminfo_for_df.to_json(DIRECTORY + file_extension_name + ' team data.json')
115 |
116 | elif STORAGE_MODE.lower().replace('_',' ') == 'vs team only':
117 | teaminfo_against_df.to_json(DIRECTORY + file_extension_name + ' vs team data.json')
118 |
119 | else:
120 | playerinfo_df.to_json(DIRECTORY + file_extension_name + ' player data.json')
121 | teaminfo_for_df.to_json(DIRECTORY + file_extension_name + ' team data.json')
122 | teaminfo_against_df.to_json(DIRECTORY + file_extension_name + ' vs team data.json')
--------------------------------------------------------------------------------
/projects/00_data_import_and_misc_work/import_data_leaguetable.py:
--------------------------------------------------------------------------------
1 | # Import user-specified data from Sky league-tables using custom football data module
2 |
3 | #%% Imports
4 |
5 | import os
6 | import sys
7 | import numpy as np
8 |
9 | # %% Add custom tools to path
10 |
11 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
12 | sys.path.append(root_folder)
13 |
14 | import analysis_tools.get_football_data as gfd
15 |
16 | # %% User inputs
17 |
18 | # Input league identifier and starting year of season
19 | league = "EFLC"
20 | start_year = '2018'
21 |
22 | # Set up folder path
23 | folder_path = f"../../data_directory/leaguetable_data/{start_year}_{str(int(start_year.replace('20','', 1)) + 1)}"
24 |
25 | # %% Get data
26 |
27 | league_table = gfd.get_league_table(league, start_year, folderpath=folder_path)
28 |
29 |
30 |
31 |
32 |
33 |
--------------------------------------------------------------------------------
/projects/00_data_import_and_misc_work/import_data_whoscored.py:
--------------------------------------------------------------------------------
1 | # Import user-specified data from WhoScored using custom football data module
2 |
3 | #%% Imports
4 |
5 | import os
6 | import sys
7 | import numpy as np
8 |
9 | # %% Add custom tools to path
10 |
11 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
12 | sys.path.append(root_folder)
13 |
14 | import analysis_tools.get_football_data as gfd
15 |
16 | # %% User inputs
17 |
18 | # Input first and last match id to obtain data from
19 | match_id_start = 1729275
20 | match_id_end = 1729293
21 |
22 | # Input year folder
23 | year = '2023'
24 |
25 | # Input league folder
26 | league = 'EPL'
27 |
28 | # %% Set-up file path and match ids
29 | match_ids = np.arange(match_id_start, match_id_end+1)
30 | folderpath = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','',1)) + 1)}/{league}"
31 |
32 | # %% Get data
33 | for match_id in match_ids:
34 | match_id = str(match_id)
35 |
36 | # Obtain and save data using custom function
37 | events, players, mappings = gfd.get_whoscored_data(match_id, get_mappings=True, save_to_file=True, folderpath=folderpath)
--------------------------------------------------------------------------------
/projects/00_data_import_and_misc_work/scrape_data_transfermarkt.py:
--------------------------------------------------------------------------------
1 | # Scrape user-specified data from transfermarkt.com using custom football data module
2 |
3 | # %% Imports
4 |
5 | import os
6 | import sys
7 |
8 | # %% Add custom tools to path
9 |
10 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
11 | sys.path.append(root_folder)
12 |
13 | import analysis_tools.get_football_data as gfd
14 |
15 | # %% User Inputs
16 |
17 | # Input league country (England = GB, Spain = ES, Germany = L, Italy = IT, France = FR, Scotland = SC)
18 | country_code = 'FR'
19 |
20 | # Input league number (for example Premier League = 1, Championship = 2, League One = 3, etc.)
21 | division_num = '2'
22 |
23 | # Input year that season started
24 | start_year = '2022'
25 |
26 | # Choose whether to obtains stats from all competitions (False = League comp. only, True = All comps.)
27 | all_comps = False
28 |
29 | # Obtain and save data using custom function
30 | player_info = gfd.get_transfermarkt_data(country_code, division_num, start_year, all_comps, save_to_file=True, folderpath=f"../../data_directory/transfermarkt_data/{start_year}_{str(int(start_year.replace('20','')) + 1)}")
--------------------------------------------------------------------------------
/projects/03_model_development_and_implementation/pass_cluster_data_collection.py:
--------------------------------------------------------------------------------
1 | ''' Load data for pass clustering '''
2 |
3 | # %% Imports
4 |
5 | import pandas as pd
6 | import numpy as np
7 | import os
8 | import bz2
9 | import pickle
10 |
11 | # %% Select data to load
12 |
13 | data_to_load = [['EPL', '2022'],
14 | ['La_Liga', '2022'],
15 | ['Ligue_1', '2022'],
16 | ['Bundesliga', '2022'],
17 | ['Serie_A', '2022'],
18 | ['EFLC', '2022'],
19 | ['EPL', '2021'],
20 | ['La_Liga', '2021'],
21 | ['Ligue_1', '2021'],
22 | ['Bundesliga', '2021'],
23 | ['Serie_A', '2021'],
24 | ['EFLC', '2021'],
25 | ['EPL', '2020'],
26 | ['La_Liga', '2020'],
27 | ['Ligue_1', '2020'],
28 | ['Bundesliga', '2020'],
29 | ['Serie_A', '2020'],
30 | ['EFLC', '2020'],
31 | ]
32 |
33 | # Initialise storage dataframes
34 | passes_df = pd.DataFrame()
35 |
36 | for data in data_to_load:
37 | league = data[0]
38 | year = data[1]
39 | league_passes = pd.DataFrame()
40 |
41 | file_path_evts = f"../../data_directory/whoscored_data/{data[1]}_{str(int(data[1].replace('20','', 1)) + 1)}/{data[0]}"
42 | files = os.listdir(file_path_evts)
43 |
44 | # Load event data match by match
45 | for file in files:
46 | if file == 'event-types.pbz2':
47 | event_types = bz2.BZ2File(f"{file_path_evts}/{file}", 'rb')
48 | event_types = pickle.load(event_types)
49 | elif '-eventdata-' in file:
50 | match_events = bz2.BZ2File(f"{file_path_evts}/{file}", 'rb')
51 | match_events = pickle.load(match_events)
52 | match_passes = match_events[(match_events['outcomeType'] == 'Successful') &
53 | (match_events['eventType'] == 'Pass') &
54 | (match_events['satisfiedEventsTypes'].apply(lambda x: not (31 in x or 34 in x or 212 in x)))]
55 |
56 |
57 | league_passes = pd.concat([league_passes, match_passes])
58 |
59 | # Append league data to combined dataset
60 | passes_df = pd.concat([passes_df, league_passes])
61 |
62 | print(f"{league}, {year} passes loaded")
63 |
64 | # %% Store data in random order in a group of compressed bz2 files
65 |
66 | passes_df_out = passes_df.sample(frac=1).reset_index(drop=True)
67 | num_files = 100
68 | n_passes = len(passes_df_out)
69 | sample_size = int(np.floor(n_passes/num_files))
70 | for idx in np.arange(0,num_files):
71 | print(f"Isolating sample {idx}")
72 | pass_sample_df = passes_df_out.iloc[sample_size*idx:sample_size*(idx+1)]
73 | print(f"Saving sample {idx}")
74 | with bz2.BZ2File(f"pass_data_{idx}.pbz2", "wb") as f:
75 | pickle.dump(pass_sample_df, f)
76 | print("Save complete")
77 |
--------------------------------------------------------------------------------
/projects/03_model_development_and_implementation/shot_xg_plot.py:
--------------------------------------------------------------------------------
1 | # %% Create plot of shot positions and associated xG for user-selected player, team or competition
2 | #
3 | # Inputs: Player, team or competition to plot xG for
4 | #
5 | # Outputs: Plot of shot positions and associated xG
6 | #
7 | # Notes: Uses logistic regression xG model.
8 |
9 | # %% Imports
10 |
11 | import bz2
12 | import pickle
13 | from PIL import Image
14 | import requests
15 | from io import BytesIO
16 | import pandas as pd
17 | import matplotlib.pyplot as plt
18 | import matplotlib as mpl
19 | from mplsoccer.pitch import VerticalPitch
20 |
21 | # %% User inputs
22 |
23 | # Select player, team or competition to plot shots for
24 | player_team_or_comp = 'Mohamed Salah'
25 |
26 | # Logo to add to plot figure
27 | #logo = 'https://1000logos.net/wp-content/uploads/2019/01/German-Bundesliga-Logo-2002.png'
28 | logo = "https://logos-world.net/wp-content/uploads/2020/06/Liverpool-Logo.png"
29 |
30 | # %% Set constants
31 |
32 | PITCH_WIDTH_Y = 80
33 | PITCH_LENGTH_X = 120
34 |
35 | # %% Load xG model and data
36 |
37 | shots_model_df = bz2.BZ2File("../../data_directory/misc_data/log_regression_xg_data.pbz2", 'rb')
38 | shots_model_df = pickle.load(shots_model_df)
39 |
40 | # %% Isolate shots for selected player or team
41 |
42 | if not shots_model_df[shots_model_df['competition_name']==player_team_or_comp].empty:
43 | selected_shots = shots_model_df[shots_model_df['competition_name']==player_team_or_comp]
44 | comp_selected = 1
45 |
46 | elif not shots_model_df[shots_model_df['team_name']==player_team_or_comp].empty:
47 | selected_shots = shots_model_df[shots_model_df['team_name']==player_team_or_comp]
48 | comp_selected = 0
49 |
50 | elif not shots_model_df[shots_model_df['player_name']==player_team_or_comp].empty:
51 | selected_shots = shots_model_df[shots_model_df['player_name']==player_team_or_comp]
52 | comp_selected = 0
53 |
54 | else:
55 | selected_shots = pd.DataFrame()
56 | comp_selected = 0
57 |
58 | # Individual dataframe for shots/headers/goals/no-goals etc.
59 | selected_ground_shots = selected_shots[selected_shots['header_tag']==0]
60 | selected_ground_goals = selected_ground_shots[selected_ground_shots['goal']==1]
61 | selected_headers = selected_shots[selected_shots['header_tag']==1]
62 | selected_headed_goals = selected_headers[selected_headers['goal']==1]
63 |
64 | # Lowest xG goal
65 | lowest_xg_goal = selected_shots[selected_shots['goal']==1].sort_values('xG').head(1)
66 | highest_xg_miss = selected_shots[selected_shots['goal']==0].sort_values('xG', ascending=False).head(1)
67 |
68 | # %% Plot shots
69 |
70 | # Overwrite rcParams
71 | mpl.rcParams['xtick.color'] = "white"
72 | mpl.rcParams['ytick.color'] = "white"
73 | mpl.rcParams['xtick.labelsize'] = 10
74 | mpl.rcParams['ytick.labelsize'] = 10
75 |
76 | # Plot pitch
77 | pitch = VerticalPitch(half=True,pitch_color='#313332', line_color='white', linewidth=1, stripe=False)
78 | fig, ax = pitch.grid(nrows=1, ncols=1, title_height = 0.03, grid_height=0.7, endnote_height=0.05, axis=False)
79 | fig.set_size_inches(9, 7)
80 | fig.set_facecolor('#313332')
81 |
82 | # Plot ground shots
83 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_ground_shots['c_yards'], PITCH_LENGTH_X - selected_ground_shots['x_yards'],
84 | marker='h', s=200, alpha=0.2, c=selected_ground_shots['xG'], edgecolors='w',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2)
85 | p1 = ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_ground_goals['c_yards'], PITCH_LENGTH_X - selected_ground_goals['x_yards'],
86 | marker='h', s=200, c=selected_ground_goals['xG'], edgecolors='w', lw=2, vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2)
87 |
88 | # Plot headers
89 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_headers['c_yards'], PITCH_LENGTH_X - selected_headers['x_yards'],
90 | marker='o', s=200, alpha=0.2, c=selected_headers['xG'], edgecolors='w',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2)
91 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + selected_headed_goals['c_yards'], PITCH_LENGTH_X - selected_headed_goals['x_yards'],
92 | marker='o', s=200, c=selected_headed_goals['xG'], edgecolors='w', lw=2, vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=2)
93 |
94 | ax['pitch'].set_ylim([59.9,125])
95 |
96 | # Plot highest xG miss and lowest xG goal chance
97 | if lowest_xg_goal['header_tag'].values==1:
98 | lowxg_marker = 'o'
99 | else:
100 | lowxg_marker = 'h'
101 | if highest_xg_miss['header_tag'].values==1:
102 | highxg_marker = 'o'
103 | else:
104 | highxg_marker = 'h'
105 |
106 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + highest_xg_miss['c_yards'], PITCH_LENGTH_X - highest_xg_miss['x_yards'],
107 | marker=highxg_marker, s=200, c='r', edgecolors='grey', lw = 2.5 ,vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=3)
108 | ax['pitch'].scatter(PITCH_WIDTH_Y/2 + lowest_xg_goal['c_yards'], PITCH_LENGTH_X - lowest_xg_goal['x_yards'],
109 | marker=lowxg_marker, s=200, c='g', edgecolors='w', lw = 2.5 ,vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno, zorder=3)
110 |
111 |
112 | # Add colorbar
113 | cb_ax = fig.add_axes([0.53, 0.107, 0.35, 0.03])
114 | cbar = fig.colorbar(p1, cax=cb_ax, orientation='horizontal')
115 | cbar.outline.set_edgecolor('w')
116 | cbar.set_label(" xG", loc = "left", color='w', fontweight='bold', labelpad=-28.5)
117 |
118 | # Manual legend
119 | legend_ax = fig.add_axes([0.075, 0.07, 0.5, 0.08])
120 | legend_ax.axis("off")
121 | plt.xlim([0,5])
122 | plt.ylim([0,1])
123 | legend_ax.scatter(0.2, 0.7, marker='h', s=200, c='#313332', edgecolors='w')
124 | legend_ax.scatter(0.2, 0.2, marker='o', s=200, c='#313332', edgecolors='w')
125 | legend_ax.text(0.35, 0.61, "Foot", color="w")
126 | legend_ax.text(0.35, 0.11, "Header", color="w")
127 | legend_ax.scatter(1.3, 0.7, marker='h', s=200, c='purple', edgecolors='w', lw=2)
128 | legend_ax.scatter(1.3, 0.2, marker='h', alpha=0.2, s=200, c='purple', edgecolors='w')
129 | legend_ax.text(1.45, 0.61, "Goal", color="w")
130 | legend_ax.text(1.465, 0.11, "No Goal", color="w")
131 | legend_ax.scatter(2.4, 0.7, marker='h', s=200, c='g', edgecolors='w', lw=2.5)
132 | legend_ax.scatter(2.4, 0.2, marker='h', s=200, c='r', edgecolors='grey', lw=2.5)
133 | legend_ax.text(2.55, 0.61, "Lowest xG Goal", color="w")
134 | legend_ax.text(2.565, 0.11, "Highest xG Miss", color="w")
135 |
136 | # Title text
137 | subtitle_text = f"{selected_shots['competition_name'].unique()[0]}"
138 | subsubtitle_text = "2017-2018"
139 | if comp_selected == 1:
140 | title_text = "Expected Goals"
141 | elif comp_selected == 0:
142 | title_text = f"{player_team_or_comp} Expected Goals"
143 |
144 | fig.text(0.18,0.92, title_text, fontweight="bold", fontsize=16, color='w')
145 | fig.text(0.18,0.883, subtitle_text, fontweight="regular", fontsize=14, color='w')
146 | fig.text(0.18,0.852, subsubtitle_text, fontweight="regular", fontsize=10, color='w')
147 |
148 |
149 | # Stats
150 | if selected_shots['goal'].sum()-selected_shots.sum()['xG'] > 0:
151 | sign = '+'
152 | else:
153 | sign=''
154 |
155 | fig.text(0.65,0.925, "Shots:", fontweight="bold", fontsize=10, color='w')
156 | fig.text(0.65,0.9, "xG:", fontweight="bold", fontsize=10, color='w')
157 | fig.text(0.65,0.875, "Goals:", fontweight="bold", fontsize=10, color='w')
158 | fig.text(0.65,0.85, "xG Perf:", fontweight="bold", fontsize=10, color='w')
159 | fig.text(0.73,0.925, f"{int(selected_shots.count()[0])}", fontweight="regular", fontsize=10, color='w')
160 | fig.text(0.73,0.9, f"{round(selected_shots.sum()['xG'],1)}", fontweight="regular", fontsize=10, color='w')
161 | fig.text(0.73,0.875, f"{int(selected_shots['goal'].sum())}", fontweight="regular", fontsize=10, color='w')
162 | fig.text(0.73,0.85, f"{sign}{int(round(100*(selected_shots['goal'].sum()-selected_shots.sum()['xG'])/selected_shots.sum()['xG'],0))}%", fontweight="regular", fontsize=10, color='w')
163 |
164 | fig.text(0.79,0.927, "xG/shot:", fontweight="bold", fontsize=10, color='w')
165 | fig.text(0.79,0.9, "Goal/shot:", fontweight="bold", fontsize=10, color='w')
166 | fig.text(0.79,0.875, "L xG Goal:", fontweight="bold", fontsize=10, color='w')
167 | fig.text(0.79,0.85, "H xG Miss:", fontweight="bold", fontsize=10, color='w')
168 | fig.text(0.89,0.925, f"{round(selected_shots.sum()['xG']/selected_shots.count()[0],2)}", fontweight="regular", fontsize=10, color='w')
169 | fig.text(0.89,0.9, f"{round(selected_shots['goal'].sum()/selected_shots.count()[0],2)}", fontweight="regular", fontsize=10, color='w')
170 | fig.text(0.89,0.875, f"{round(lowest_xg_goal['xG'].values[0],2)}", fontweight="regular", fontsize=10, color='w')
171 | fig.text(0.89,0.85, f"{round(highest_xg_miss['xG'].values[0],2)}", fontweight="regular", fontsize=10, color='w')
172 |
173 |
174 | # Footer text
175 | fig.text(0.5, 0.02, "Created by Jake Kolliari. Data provided by Wyscout.com",
176 | fontstyle="italic", ha="center", fontsize=9, color="white")
177 |
178 | # Add WC Logo
179 | ax = fig.add_axes([0.02,0.8,0.2,0.2])
180 | ax.axis("off")
181 | response = requests.get(logo)
182 | img = Image.open(BytesIO(response.content))
183 | ax.imshow(img)
184 |
--------------------------------------------------------------------------------
/projects/03_model_development_and_implementation/xg_log_regression_model.py:
--------------------------------------------------------------------------------
1 | # %% Expected Goals Model, using Wyscout data from Top 5 Leagues in 2017/18
2 | #
3 | # Inputs: Leagues to use to train xg model
4 | #
5 | # Outputs: xG model, displayed as an xG heatmap
6 | # Dataframe all shots in chosen leagues, including shot information and xG.
7 | #
8 | # Notes: None
9 |
10 | # %% Imports
11 |
12 | import os
13 | import sys
14 | import matplotlib.pyplot as plt
15 | import matplotlib as mpl
16 | import numpy as np
17 | import pandas as pd
18 | from sklearn.linear_model import LogisticRegression
19 | from mplsoccer.pitch import VerticalPitch
20 | import pickle
21 | import bz2
22 | from PIL import Image
23 | from joblib import dump, load
24 |
25 | # %% Add custom tools to path
26 |
27 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
28 | sys.path.append(root_folder)
29 |
30 | import analysis_tools.wyscout_data_engineering as wde
31 |
32 | # %% User inputs
33 |
34 | # List of leagues to use for xg model
35 | leagues = ["England", "Italy", "France", "Germany", "Spain"]
36 |
37 | # %% Set constants
38 |
39 | PITCH_WIDTH_Y = 80
40 | PITCH_LENGTH_X = 120
41 | GOAL_WIDTH_Y = 8
42 |
43 | # %% Load Wyscout data for all competitions
44 |
45 | match_data, event_data, event_data_by_match, player_data, competition_data, team_data = wde.format_wyscout_data(leagues)
46 |
47 | # %% Define dataframe of all shots in event data
48 |
49 | # Initialise shot model dataframe
50 | shots_model_df = pd.DataFrame()
51 | i = 0
52 |
53 | # Loop through shots and obtain/calculate shot information
54 | for _, shot in event_data[event_data['eventName']=='Shot'].iterrows():
55 |
56 | # Player, team and competition
57 | if shot['playerId'] != 0:
58 | shots_model_df.loc[i, 'player_name'] = player_data.loc[shot['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
59 | else:
60 | shots_model_df.loc[i, 'player_name'] = np.nan
61 | shots_model_df.loc[i, 'team_name'] = team_data.loc[shot['teamId']]['name'].encode('ascii', 'strict').decode('unicode-escape')
62 | shots_model_df.loc[i, 'competition_name'] = competition_data.loc[match_data.loc[shot['matchId']]['competitionId']]['name']
63 |
64 | # Position and distance info
65 | shots_model_df.loc[i, 'x_yards'] = (PITCH_LENGTH_X/100)*(100 - shot['positions'][0]['x'])
66 | shots_model_df.loc[i, 'c_yards'] = (PITCH_WIDTH_Y/100)*(shot['positions'][0]['y'] - 50)
67 | shots_model_df.loc[i,'distance_yards'] = np.sqrt(shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2)
68 |
69 | # Angle info
70 | angle_denominator = (shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2 - (GOAL_WIDTH_Y/2)**2)
71 | if angle_denominator == 0:
72 | angle = np.pi/2
73 | else:
74 | angle = np.arctan((2*(GOAL_WIDTH_Y/2)*shots_model_df.loc[i, 'x_yards'])/angle_denominator)
75 | if angle<0:
76 | angle = np.pi + angle
77 | shots_model_df.loc[i, 'angle'] = angle
78 |
79 | # Header info
80 | shots_model_df.loc[i, 'header_tag'] = 0
81 | if {'id': 403} in shot['tags']:
82 | shots_model_df.loc[i, 'header_tag'] = 1
83 |
84 | # Outcome
85 | shots_model_df.loc[i, 'goal'] = 0
86 | if {'id': 101} in shot['tags']:
87 | shots_model_df.loc[i, 'goal'] = 1
88 |
89 | i += 1
90 |
91 |
92 | #%% Train a logistic regression model
93 |
94 | X = shots_model_df.drop(['player_name', 'team_name', 'competition_name','goal'], axis=1)
95 | y = shots_model_df['goal']
96 |
97 | # Fit
98 | log_model = LogisticRegression()
99 | log_model.fit(X,y)
100 |
101 | # Save model
102 | dump(log_model, '../../model_directory/xg_model/log_regression_xg_model.joblib')
103 |
104 | # Coefficients
105 | a = log_model.intercept_[0]
106 | b = log_model.coef_[0]
107 |
108 | # Calculate xG
109 | shots_model_df['xG'] = log_model.predict_proba(X)[:,1]
110 |
111 | # %% Save xG data
112 |
113 | with bz2.BZ2File("../../data_directory/misc_data/log_regression_xg_data.pbz2", "wb") as f:
114 | pickle.dump(shots_model_df, f)
115 |
116 | #%% Create an xG test-set, and predict on test set
117 |
118 | # Initialise arrays for ground and header test-sets
119 | prob_goal_grnd = np.zeros((int(1+PITCH_LENGTH_X/2), int(1+PITCH_WIDTH_Y)))
120 | prob_goal_head = np.zeros((int(1+PITCH_LENGTH_X/2), int(1+PITCH_WIDTH_Y)))
121 |
122 | # Create array of shots
123 | dists = list()
124 | probs = list()
125 |
126 | for x_pos in range(0,int(PITCH_LENGTH_X/2 + 1)):
127 | for y_pos in range(0, int(PITCH_WIDTH_Y + 1)):
128 | c_pos = y_pos - PITCH_WIDTH_Y/2
129 | angle_denominator = (x_pos**2 + c_pos**2 - (GOAL_WIDTH_Y/2)**2)
130 | if angle_denominator == 0:
131 | angle = np.pi/2
132 | else:
133 | angle = np.arctan(2*(GOAL_WIDTH_Y/2)*x_pos/angle_denominator)
134 | if angle < 0:
135 | angle = np.pi + angle
136 | distance = np.sqrt(x_pos**2 + c_pos**2)
137 | prob_goal_grnd[x_pos, y_pos] = log_model.predict_proba([[x_pos, c_pos, distance, angle, 0]])[:,1]
138 | prob_goal_head[x_pos, y_pos] = log_model.predict_proba([[x_pos, c_pos, distance, angle, 1]])[:,1]
139 |
140 | dists.append(distance)
141 | probs.append(prob_goal_grnd[x_pos, y_pos])
142 |
143 | # %% Plot xG model
144 |
145 | # Overwrite rcParams
146 | mpl.rcParams['xtick.color'] = "white"
147 | mpl.rcParams['ytick.color'] = "white"
148 | mpl.rcParams['xtick.labelsize'] = 10
149 | mpl.rcParams['ytick.labelsize'] = 10
150 |
151 | # Plot pitches
152 | pitch = VerticalPitch(half=True,pitch_color='#313332', line_color='white', linewidth=1, stripe=False)
153 | fig, ax = pitch.grid(nrows=1, ncols=2, grid_height=0.75, space = 0.1, axis=False)
154 | fig.set_size_inches(10, 5.5)
155 | fig.set_facecolor('#313332')
156 |
157 | # Add xG maps and contours
158 | pos1 = ax['pitch'][0].imshow(prob_goal_grnd, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno)
159 | pos2 = ax['pitch'][1].imshow(prob_goal_head, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno)
160 | cs1 = ax['pitch'][0].contour(prob_goal_grnd, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted')
161 | cs2 = ax['pitch'][1].contour(prob_goal_head, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted')
162 | ax['pitch'][0].clabel(cs1)
163 | ax['pitch'][1].clabel(cs2)
164 |
165 | # Title
166 | fig.text(0.045,0.9,"Expected Goals - Logistic Regression Model", fontsize=16, color="white", fontweight="bold")
167 | fig.text(0.045,0.85,"Trained on all 40,000+ shots during the 2017/18 season across Europe's 'big five' Leagues", fontsize=14, color="white", fontweight="regular")
168 | fig.text(0.12,0.76,"Shot Type: Left or Right Foot", fontsize=12, color="white", fontweight="bold")
169 | fig.text(0.66,0.76,"Shot Type: Header", fontsize=12, color="white", fontweight="bold")
170 |
171 | # Colourbar
172 | cbar = fig.colorbar(pos2, ax=ax['pitch'][1], location="bottom", fraction = 0.04, pad = 0.0335)
173 | cbar.ax.set_ylabel('xG', loc="bottom", color = "white", fontweight="bold", rotation=0, labelpad=20)
174 |
175 | # Footer text
176 | fig.text(0.255, 0.09, "Created by Jake Kolliari (@_JKDS_). Data provided by Wyscout.",
177 | fontstyle="italic", ha="center", fontsize=9, color="white")
178 |
179 | # Format and show
180 | plt.tight_layout()
181 | plt.show()
182 |
183 | # %% Plot distance vs. xG
184 |
185 | fig, ax = plt.subplots(nrows=1, ncols=1, figsize = (8,8), facecolor = '#313332')
186 | ax.patch.set_alpha(0)
187 |
188 | # Add line
189 | plt.scatter(x = dists, y = probs, color = 'mediumaquamarine', s = 10)
190 | #reg = sns.regplot(x = dists, y = probs, scatter = False, order = 2, line_kws={"color": "paleturquoise", "lw":1, "ls" :'--'}, scatter_kws={"color": "mediumaquamarine", "s":50}, ax=ax)
191 |
192 | # Format
193 | ax.spines['bottom'].set_color('w')
194 | ax.spines['top'].set_visible(False)
195 | ax.spines['right'].set_visible(False)
196 | ax.spines['left'].set_color('w')
197 | ax.grid(lw = 0.5, color= 'grey', ls = ':')
198 | ax.set_xlabel("Distance from Goal (yds)", fontsize=12, fontweight = "bold", labelpad = 10)
199 | ax.set_ylabel("Probability of Scoring", fontsize=12, color = "mediumaquamarine", fontweight = "bold", labelpad = 10)
200 |
201 | # Titles
202 | title_text = f"Expected Goals Model"
203 | subtitle_text = "Probability of Scoring vs. Distance from Goal"
204 | fig.text(0.08, 0.935, title_text, fontweight="bold", fontsize=16, color='w')
205 | fig.text(0.08, 0.9, subtitle_text, fontweight="regular", fontsize=13, color='w')
206 |
207 | # Add footer text
208 | fig.text(0.5, 0.02, "Created by Jake Kolliari (@_JKDS_). Data provided by Wyscout.",
209 | fontstyle="italic", ha="center", fontsize=9, color="white")
210 |
211 | # Add twitter logo
212 | logo_ax = fig.add_axes([0.94, 0.005, 0.04, 0.04])
213 | logo_ax.axis("off")
214 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
215 | logo_ax.imshow(badge)
216 |
217 | fig.tight_layout(rect=[0.03, 0.04, 0.97, 0.86])
--------------------------------------------------------------------------------
/projects/03_model_development_and_implementation/xg_neural_network.py:
--------------------------------------------------------------------------------
1 | # %% Expected Goals Model, using Wyscout data from Top 5 Leagues in 2017/18
2 | #
3 | # Inputs: Leagues to use to train xg model
4 | #
5 | # Outputs: xG model, displayed as an xG heatmap
6 | # Dataframe all shots in chosen leagues, including shot information and xG.
7 | #
8 | # Notes: None
9 |
10 | # %% Imports
11 |
12 | import os
13 | import sys
14 | import matplotlib.pyplot as plt
15 | import matplotlib as mpl
16 | import numpy as np
17 | import pandas as pd
18 | import tensorflow as tf
19 | from mplsoccer.pitch import VerticalPitch
20 | import pickle
21 | import bz2
22 |
23 | # %% Add custom tools to path
24 |
25 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
26 | sys.path.append(root_folder)
27 |
28 | import analysis_tools.wyscout_data_engineering as wde
29 |
30 | # %% User inputs
31 |
32 | # List of leagues to use for xg model
33 | leagues = ["England", "Italy", "France", "Germany", "Spain"]
34 |
35 | # %% Set constants
36 |
37 | PITCH_WIDTH_Y = 80
38 | PITCH_LENGTH_X = 120
39 | GOAL_WIDTH_Y = 8
40 |
41 | # %% Load Wyscout data for all competitions
42 |
43 | match_data, event_data, event_data_by_match, player_data, competition_data, team_data = wde.format_wyscout_data(leagues)
44 |
45 | # %% Define dataframe of all shots in event data
46 |
47 | # Initialise shot model dataframe
48 | shots_model_df = pd.DataFrame()
49 | i = 0
50 |
51 | # Loop through shots and obtain/calculate shot information
52 | for _, shot in event_data[event_data['eventName']=='Shot'].iterrows():
53 |
54 | # Player, team and competition
55 | if shot['playerId'] != 0:
56 | shots_model_df.loc[i, 'player_name'] = player_data.loc[shot['playerId']]['shortName'].encode('ascii', 'strict').decode('unicode-escape')
57 | else:
58 | shots_model_df.loc[i, 'player_name'] = np.nan
59 | shots_model_df.loc[i, 'team_name'] = team_data.loc[shot['teamId']]['name'].encode('ascii', 'strict').decode('unicode-escape')
60 | shots_model_df.loc[i, 'competition_name'] = competition_data.loc[match_data.loc[shot['matchId']]['competitionId']]['name']
61 |
62 | # Position and distance info
63 | shots_model_df.loc[i, 'x_yards'] = (PITCH_LENGTH_X/100)*(100 - shot['positions'][0]['x'])
64 | shots_model_df.loc[i, 'c_yards'] = (PITCH_WIDTH_Y/100)*(shot['positions'][0]['y'] - 50)
65 | shots_model_df.loc[i,'distance_yards'] = np.sqrt(shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2)
66 |
67 | # Angle info
68 | angle_denominator = (shots_model_df.loc[i, 'x_yards']**2 + shots_model_df.loc[i, 'c_yards']**2 - (GOAL_WIDTH_Y/2)**2)
69 | if angle_denominator == 0:
70 | angle = np.pi/2
71 | else:
72 | angle = np.arctan((2*(GOAL_WIDTH_Y/2)*shots_model_df.loc[i, 'x_yards'])/angle_denominator)
73 | if angle<0:
74 | angle = np.pi + angle
75 | shots_model_df.loc[i, 'angle'] = angle
76 |
77 | # Header info
78 | shots_model_df.loc[i, 'header_tag'] = 0
79 | if {'id': 403} in shot['tags']:
80 | shots_model_df.loc[i, 'header_tag'] = 1
81 |
82 | # Outcome
83 | shots_model_df.loc[i, 'goal'] = 0
84 | if {'id': 101} in shot['tags']:
85 | shots_model_df.loc[i, 'goal'] = 1
86 |
87 | i += 1
88 |
89 |
90 | #%% Train a neural network
91 |
92 | X = shots_model_df.drop(['player_name', 'team_name', 'competition_name','goal'], axis=1)
93 | y = shots_model_df['goal']
94 |
95 | # Build model
96 | neural_net = tf.keras.models.Sequential()
97 | neural_net.add(tf.keras.layers.Flatten())
98 | neural_net.add(tf.keras.layers.Dense(256, activation=tf.nn.relu))
99 | neural_net.add(tf.keras.layers.Dense(256, activation=tf.nn.relu))
100 | neural_net.add(tf.keras.layers.Dense(256, activation=tf.nn.relu))
101 | neural_net.add(tf.keras.layers.Dense(2, activation = tf.nn.softmax))
102 |
103 | # Compile model
104 | neural_net.compile(optimizer='adam',
105 | loss='sparse_categorical_crossentropy',
106 | metrics=['accuracy'])
107 |
108 | # Train model
109 | neural_net.fit(X, y, epochs=5)
110 |
111 | # %% Make prediction
112 |
113 | # xG
114 | shots_model_df['xG'] = neural_net.predict([X])[:,1]
115 |
116 | # %% Save xG data
117 |
118 | with bz2.BZ2File("../../data_directory/misc_data/neural_net_xg_data.pbz2", "wb") as f:
119 | pickle.dump(shots_model_df, f)
120 |
121 | #%% Create an xG test-set, and predict on test set
122 |
123 | # Initialise dataframes for ground and header test sets
124 | X_test_grnd = pd.DataFrame(columns = ['xpos','cpos','distance','angle','header_tag'], dtype='float64')
125 | X_test_head = pd.DataFrame(columns = ['xpos','cpos','distance','angle','header_tag'], dtype='float64')
126 | i = 0
127 |
128 | # Create array of shots
129 | for x_pos in range(0,int(PITCH_LENGTH_X/2 + 1)):
130 | for y_pos in range(0, int(PITCH_WIDTH_Y + 1)):
131 | c_pos = y_pos - PITCH_WIDTH_Y/2
132 | angle_denominator = (x_pos**2 + c_pos**2 - (GOAL_WIDTH_Y/2)**2)
133 | if angle_denominator == 0:
134 | angle = np.pi/2
135 | else:
136 | angle = np.arctan(2*(GOAL_WIDTH_Y/2)*x_pos/angle_denominator)
137 | if angle < 0:
138 | angle = np.pi + angle
139 | distance = np.sqrt(x_pos**2 + c_pos**2)
140 | X_test_grnd.loc[i,:] = [x_pos, c_pos, distance, angle, 0]
141 | X_test_head.loc[i,:] = [x_pos, c_pos, distance, angle, 1]
142 | i += 1
143 |
144 | prob_goal_grnd = neural_net.predict([X_test_grnd])[:,1].reshape(int(1+PITCH_LENGTH_X/2),int(1+PITCH_WIDTH_Y))
145 | prob_goal_head = neural_net.predict([X_test_head])[:,1].reshape(int(1+PITCH_LENGTH_X/2),int(1+PITCH_WIDTH_Y))
146 |
147 | # %% Plot xG model
148 |
149 | # Overwrite rcParams
150 | mpl.rcParams['xtick.color'] = "white"
151 | mpl.rcParams['ytick.color'] = "white"
152 | mpl.rcParams['xtick.labelsize'] = 10
153 | mpl.rcParams['ytick.labelsize'] = 10
154 |
155 | # Plot pitches
156 | pitch = VerticalPitch(half=True,pitch_color='#313332', line_color='white', linewidth=1, stripe=False)
157 | fig, ax = pitch.grid(nrows=1, ncols=2, grid_height=0.75, space = 0.1, axis=False)
158 | fig.set_size_inches(10, 5.5)
159 | fig.set_facecolor('#313332')
160 |
161 | # Add xG maps and contours
162 | pos1 = ax['pitch'][0].imshow(prob_goal_grnd, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno)
163 | pos2 = ax['pitch'][1].imshow(prob_goal_head, extent = (80,0,60,120) ,aspect='equal',vmin=-0.04,vmax=0.4,cmap=plt.cm.inferno)
164 | cs1 = ax['pitch'][0].contour(prob_goal_grnd, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted')
165 | cs2 = ax['pitch'][1].contour(prob_goal_head, extent = (1,80,120,60), levels = [0.01,0.05,0.2,0.5], colors = ['darkgrey','darkgrey','darkgrey','k'], linestyles = 'dotted')
166 | ax['pitch'][0].clabel(cs1)
167 | ax['pitch'][1].clabel(cs2)
168 |
169 | # Title
170 | fig.text(0.045,0.9,"Expected Goals - Neural Network", fontsize=16, color="white", fontweight="bold")
171 | fig.text(0.045,0.85,"Trained on all 40,000+ shots during the 2017/18 season across Europe's 'big five' Leagues", fontsize=14, color="white", fontweight="regular")
172 | fig.text(0.12,0.76,"Shot Type: Left or Right Foot", fontsize=12, color="white", fontweight="bold")
173 | fig.text(0.66,0.76,"Shot Type: Header", fontsize=12, color="white", fontweight="bold")
174 |
175 | # Colourbar
176 | cbar = fig.colorbar(pos2, ax=ax['pitch'][1], location="bottom", fraction = 0.04, pad = 0.0335)
177 | cbar.ax.set_ylabel('xG', loc="bottom", color = "white", fontweight="bold", rotation=0, labelpad=20)
178 |
179 | # Footer text
180 | fig.text(0.255, 0.09, "Created by Jake Kolliari. Data provided by Wyscout.com",
181 | fontstyle="italic", ha="center", fontsize=9, color="white")
182 |
183 | # Format and show
184 | plt.tight_layout()
185 | plt.show()
--------------------------------------------------------------------------------
/projects/04_match_reports/off_def_shape_report_ws.py:
--------------------------------------------------------------------------------
1 | # %% Create shape visualisation
2 |
3 | # %% Imports and parameters
4 |
5 | import pandas as pd
6 | import matplotlib.pyplot as plt
7 | import matplotlib as mpl
8 | from PIL import Image
9 | from mplsoccer.pitch import VerticalPitch
10 | import os
11 | import sys
12 | import bz2
13 | import pickle
14 | import numpy as np
15 | from collections import Counter
16 |
17 | # %% Function definitions
18 |
19 |
20 | def protected_divide(n, d):
21 | return n / d if d else 0
22 |
23 | # %% Add custom tools to path
24 |
25 | root_folder = os.path.abspath(os.path.dirname(
26 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
27 | sys.path.append(root_folder)
28 |
29 | import analysis_tools.whoscored_custom_events as wce
30 | import analysis_tools.pitch_zones as pz
31 | import analysis_tools.whoscored_data_engineering as wde
32 | import analysis_tools.logos_and_badges as lab
33 |
34 | # %% User inputs
35 |
36 | # Input WhoScored match id
37 | match_id = '1640989'
38 |
39 | # Select year
40 | year = '2022'
41 |
42 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL)
43 | league = 'EPL'
44 |
45 | # Select team codes
46 | home_team = 'Liverpool'
47 | away_team = 'Aston Villa'
48 |
49 | # Team name to print
50 | home_team_print = None
51 | away_team_print = None
52 |
53 | # Pass flow zone type
54 | zone_type = 'jdp_custom'
55 |
56 | # Pass hull inclusion
57 | central_pct_off = '1std'
58 | central_pct_def = '1std'
59 |
60 | # %% Logos, colours and printed names
61 |
62 | home_logo, home_colourmap = lab.get_team_badge_and_colour(home_team, 'home')
63 | away_logo, away_colourmap = lab.get_team_badge_and_colour(away_team, 'home')
64 |
65 | if home_team_print is None:
66 | home_team_print = home_team
67 |
68 | if away_team_print is None:
69 | away_team_print = away_team
70 |
71 | cmaps = [home_colourmap, away_colourmap]
72 |
73 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A',
74 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup',
75 | 'EFL1': 'EFL League 1', 'EFL2': 'EFL League 2'}
76 |
77 | # %% Read in data
78 |
79 | # Opta data
80 |
81 | events_df = bz2.BZ2File(f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}/match-eventdata-{match_id}-{home_team}-{away_team}.pbz2", 'rb')
82 | events_df = pickle.load(events_df)
83 | players_df = bz2.BZ2File(f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}/match-playerdata-{match_id}-{home_team}-{away_team}.pbz2", 'rb')
84 | players_df = pickle.load(players_df)
85 |
86 | # %% Calculate Scoreline (special accounting for own goals)
87 |
88 | if 'isOwnGoal' in events_df.columns:
89 | home_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[0]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] != events_df['isOwnGoal'])])
90 | home_score += len(events_df[(events_df['teamId']==players_df['teamId'].unique()[1]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] == events_df['isOwnGoal'])])
91 | away_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[1]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] != events_df['isOwnGoal'])])
92 | away_score += len(events_df[(events_df['teamId']==players_df['teamId'].unique()[0]) & (events_df['eventType'] == 'Goal') & (events_df['isOwnGoal'] == events_df['isOwnGoal'])])
93 | else:
94 | home_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[0]) & (events_df['eventType'] == 'Goal')])
95 | away_score = len(events_df[(events_df['teamId']==players_df['teamId'].unique()[1]) & (events_df['eventType'] == 'Goal')])
96 |
97 | # %% Pre-process data
98 |
99 | # Add cumulative minutes information
100 | players_df = wde.minutes_played(players_df, events_df)
101 |
102 | # Calculate longest consistent xi
103 | players_df = wde.longest_xi(players_df)
104 |
105 | # %% Aggregate data per player
106 |
107 | playerinfo_df = wde.create_player_list(players_df)
108 |
109 | # %% Create dataframes of defensive and offensive actions
110 |
111 | defensive_actions_df = wde.find_defensive_actions(events_df)
112 | offensive_actions_df = wde.find_offensive_actions(events_df)
113 |
114 | # Initialise dataframes
115 | defensive_hull_df = pd.DataFrame()
116 | offensive_hull_df = pd.DataFrame()
117 |
118 | # Create convex hull for each player
119 | for player_id in players_df[players_df['longest_xi']==True].index:
120 | player_def_hull = wce.create_convex_hull(defensive_actions_df[defensive_actions_df['playerId'] == player_id], name=players_df.loc[player_id,'name'],
121 | min_events=5, include_events=central_pct_def, pitch_area = 10000)
122 | player_off_hull = wce.create_convex_hull(offensive_actions_df[offensive_actions_df['playerId'] == player_id], name=players_df.loc[player_id,'name'],
123 | min_events=5, include_events=central_pct_off, pitch_area = 10000)
124 | offensive_hull_df = pd.concat([offensive_hull_df, player_off_hull])
125 | defensive_hull_df = pd.concat([defensive_hull_df, player_def_hull])
126 |
127 | # %% Create viz of area covered by each player when passing
128 |
129 | plot_team = 'away'
130 |
131 | # Plot pitches
132 | pitch = VerticalPitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False)
133 | fig, ax = pitch.grid(nrows=1, ncols=2, title_height=0.22,
134 | grid_height=0.7, endnote_height=0.06, axis=False)
135 | fig.set_size_inches(8.5, 7.5)
136 | fig.set_facecolor('#313332')
137 |
138 | # Initialise player position counts
139 | cf_count = 0
140 | cm_count = 0
141 | cb_count = 0
142 | last_idx = 0
143 |
144 | # Team to plot
145 | plot_team = home_team if plot_team == 'home' else away_team
146 |
147 | # Plot attacking convex hulls
148 | for hull_idx, hull_row in offensive_hull_df.iterrows():
149 |
150 | # Determine team the hull applies to
151 | if players_df[players_df['name']==hull_idx]['team'].values[0] == plot_team:
152 |
153 | # Get player position and assign colour based on position
154 | position = players_df[players_df['name']==hull_idx]['position'].values
155 | if position in ['DR', 'DL', '']:
156 | hull_colour = 'lawngreen'
157 | elif position in ['MR', 'ML', 'AML', 'AMR', 'FWR', 'FWL']:
158 | hull_colour = 'deepskyblue'
159 | elif position in ['FW']:
160 | hull_colour = ['tomato', 'lightpink'][cf_count]
161 | cf_count+=1
162 | elif position in ['MC', 'DMC', 'AMC']:
163 | hull_colour = ['snow', 'violet', 'cyan', 'yellow'][cm_count]
164 | cm_count+=1
165 | elif position in ['DC']:
166 | hull_colour = ['tomato', 'gold', 'lawngreen'][cb_count]
167 | cb_count+=1
168 | else:
169 | hull_colour = 'lightpink'
170 |
171 | # Define text colour based on marker colour
172 | if hull_colour in ['snow', 'white']:
173 | text_colour = 'k'
174 | else:
175 | text_colour = 'w'
176 |
177 | # Player initials
178 | if len(hull_idx.split(' ')) == 1:
179 | initials = hull_idx.split(' ')[0][0:2]
180 | else:
181 | initials = hull_idx.split(' ')[0][0].upper() + hull_idx.split(' ')[1][0].upper()
182 |
183 | # Plot
184 | ax['pitch'][0].scatter(hull_row['hull_reduced_y'], hull_row['hull_reduced_x'], color=hull_colour, s=20, alpha = 0.3, zorder=2)
185 | plot_hull = pitch.convexhull(hull_row['hull_reduced_x'], hull_row['hull_reduced_y'])
186 | pitch.polygon(plot_hull, ax=ax['pitch'][0], facecolor=hull_colour, alpha=0.2, capstyle = 'round', zorder=1)
187 | pitch.polygon(plot_hull, ax=ax['pitch'][0], edgecolor=hull_colour, alpha=0.3, facecolor='none', capstyle = 'round', zorder=1)
188 | ax['pitch'][0].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', color = hull_colour, alpha = 0.6, s = 400, zorder = 3)
189 | ax['pitch'][0].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', edgecolor = hull_colour, facecolor = 'none', alpha = 1, lw = 2, s = 400, zorder = 3)
190 | ax['pitch'][0].text(hull_row['hull_centre'][1], hull_row['hull_centre'][0], initials, fontsize = 8, fontweight = 'bold', va = 'center', ha = 'center', color = text_colour, zorder = 4)
191 |
192 | # Plot attacking convex hulls
193 | cf_count = 0
194 | cm_count = 0
195 | cb_count = 0
196 | last_idx = 0
197 | for hull_idx, hull_row in defensive_hull_df.iterrows():
198 |
199 | # Determine team the hull applies to
200 | if players_df[players_df['name']==hull_idx]['team'].values[0] == plot_team:
201 |
202 | # Get player position and assign colour based on position
203 | position = players_df[players_df['name']==hull_idx]['position'].values
204 | if position in ['DR', 'DL', '']:
205 | hull_colour = 'lawngreen'
206 | elif position in ['MR', 'ML', 'AML', 'AMR', 'FWR', 'FWL']:
207 | hull_colour = 'deepskyblue'
208 | elif position in ['FW']:
209 | hull_colour = ['tomato', 'lightpink'][cf_count]
210 | cf_count+=1
211 | elif position in ['MC', 'DMC', 'AMC']:
212 | hull_colour = ['snow', 'violet', 'cyan', 'yellow'][cm_count]
213 | cm_count+=1
214 | elif position in ['DC']:
215 | hull_colour = ['tomato', 'gold', 'lawngreen'][cb_count]
216 | cb_count+=1
217 | else:
218 | hull_colour = 'lightpink'
219 |
220 | # Define text colour based on marker colour
221 | if hull_colour in ['snow', 'white']:
222 | text_colour = 'k'
223 | else:
224 | text_colour = 'w'
225 |
226 | # Player initials
227 | if len(hull_idx.split(' ')) == 1:
228 | initials = hull_idx.split(' ')[0][0:2]
229 | else:
230 | initials = hull_idx.split(' ')[0][0].upper() + hull_idx.split(' ')[1][0].upper()
231 |
232 | # Plot
233 | ax['pitch'][1].scatter(hull_row['hull_reduced_y'], hull_row['hull_reduced_x'], color=hull_colour, s=20, alpha = 0.3, zorder=2)
234 | plot_hull = pitch.convexhull(hull_row['hull_reduced_x'], hull_row['hull_reduced_y'])
235 | pitch.polygon(plot_hull, ax=ax['pitch'][1], facecolor=hull_colour, alpha=0.2, capstyle = 'round', zorder=1)
236 | pitch.polygon(plot_hull, ax=ax['pitch'][1], edgecolor=hull_colour, alpha=0.3, facecolor='none', capstyle = 'round', zorder=1)
237 | ax['pitch'][1].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', color = hull_colour, alpha = 0.6, s = 400, zorder = 3)
238 | ax['pitch'][1].scatter(hull_row['hull_centre'][1], hull_row['hull_centre'][0], marker ='H', edgecolor = hull_colour, facecolor = 'none', alpha = 1, lw = 2, s = 400, zorder = 3)
239 | ax['pitch'][1].text(hull_row['hull_centre'][1], hull_row['hull_centre'][0], initials, fontsize = 8, fontweight = 'bold', va = 'center', ha = 'center', color = text_colour, zorder = 4)
240 |
241 | # Ax titles
242 | ax['pitch'][0].set_title(f"{plot_team} Offensive Shape", pad = 0, color = "w", fontweight = "bold")
243 | ax['pitch'][1].set_title(f"{plot_team} Defensive Shape", pad = 0, color = "w", fontweight = "bold")
244 |
245 | # Label based on include parameter
246 | hull_include = central_pct_off.replace('std','') + ' Std. Dev' if 'std' in str(central_pct_off) else str(central_pct_off) + '%'
247 | hull_include_s = central_pct_off.replace('std','') + ' SD' if 'std' in str(central_pct_off) else str(central_pct_off) + '%'
248 |
249 | # Title text
250 | title_text = f"{leagues[league]} - {year}/{int(year) + 1}" if not league in ['World_Cup'] else f"{leagues[league]} - {year}"
251 | subtitle_text = f"{home_team_print} {home_score}-{away_score} {away_team_print}"
252 | subsubtitle_text = f"Offensive and defensive territories, defined by central\n{hull_include} of offensive and defensive actions per player"
253 |
254 | fig.text(0.5, 0.93, title_text, ha='center',
255 | fontweight="bold", fontsize=20, color='w')
256 | fig.text(0.5, 0.882, subtitle_text, ha='center',
257 | fontweight="bold", fontsize=18, color='w')
258 | fig.text(0.5, 0.82, subsubtitle_text, ha='center',
259 | fontweight="regular", fontsize=11, color='w')
260 |
261 | # Add home team Logo
262 | ax = fig.add_axes([0.07, 0.825, 0.14, 0.14])
263 | ax.axis("off")
264 | ax.imshow(home_logo)
265 |
266 | # Add away team Logo
267 | ax = fig.add_axes([0.79, 0.825, 0.14, 0.14])
268 | ax.axis("off")
269 | ax.imshow(away_logo)
270 |
271 | # Add direction of play arrow
272 | ax = fig.add_axes([0.47, 0.17, 0.06, 0.6])
273 | ax.set_xlim(0, 1)
274 | ax.set_ylim(0, 1)
275 | ax.axis("off")
276 | ax.arrow(0.65, 0.2, 0, 0.58, color="w", width=0.001, head_width = 0.1, head_length = 0.02)
277 | ax.text(0.495, 0.48, "Direction of play", ha="center", va="center", fontsize=10, color="w", fontweight="regular", rotation=90)
278 |
279 | # Footer text
280 | fig.text(0.5, 0.035, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
281 | fontstyle="italic", ha="center", fontsize=9, color="white")
282 |
283 | # Add twitter logo
284 | ax = fig.add_axes([0.875, 0.01, 0.07, 0.07])
285 | ax.axis("off")
286 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
287 | ax.imshow(badge)
288 |
289 | # Save image
290 | fig.savefig(f"shape_reports/{league}-{match_id}-{plot_team}-shape", dpi=300)
--------------------------------------------------------------------------------
/projects/05_competition_reports_top_players/player_high_defensive_actions.py:
--------------------------------------------------------------------------------
1 | # %% Create visualisation of top players by defensive actions across a selection of games
2 | #
3 | # Inputs: Year to plot data from
4 | # League to plot data from
5 | # Whoscored match ids
6 | # Positions not to include
7 | # Date of run
8 | # Normalisation mode
9 | # Minimum play time
10 | #
11 | # Outputs: Top 12 players by defensive actions
12 |
13 | # %% Imports and parameters
14 |
15 | import pandas as pd
16 | import matplotlib.pyplot as plt
17 | import matplotlib as mpl
18 | from PIL import Image, ImageEnhance
19 | from mplsoccer.pitch import VerticalPitch, Pitch
20 | import os
21 | import sys
22 | import bz2
23 | import pickle
24 | import numpy as np
25 | from collections import Counter
26 | import highlight_text as htext
27 | import glob
28 |
29 | # %% Function definitions
30 |
31 |
32 | def protected_divide(n, d):
33 | return n / d if d else 0
34 |
35 | # %% Add custom tools to path
36 |
37 | root_folder = os.path.abspath(os.path.dirname(
38 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
39 | sys.path.append(root_folder)
40 |
41 | import analysis_tools.whoscored_custom_events as wce
42 | import analysis_tools.pitch_zones as pz
43 | import analysis_tools.whoscored_data_engineering as wde
44 | import analysis_tools.logos_and_badges as lab
45 |
46 | # %% User inputs
47 |
48 | # Select year
49 | year = '2022'
50 |
51 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL)
52 | league = 'EPL'
53 |
54 | # Select position to exclude
55 | pos_exclude=[]
56 |
57 | # Position formatting on title
58 | pos_input = ''
59 |
60 | # Input run-date
61 | run_date = '28/05/2023'
62 |
63 | # Normalisation (None, '_90', '_100opp_pass')
64 | norm_mode = '_100opp_pass'
65 | #norm_mode = '_90'
66 |
67 | # Min minutes played
68 | min_mins = 1800
69 |
70 | # Brighten logo
71 | logo_brighten = True
72 |
73 | # %% League logo
74 |
75 | comp_logo = lab.get_competition_logo(league, year=year, logo_brighten=logo_brighten)
76 |
77 | # %% Get data
78 |
79 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}"
80 | files = os.listdir(file_path)
81 |
82 | # Initialise storage dataframes
83 | events_df = pd.DataFrame()
84 | players_df = pd.DataFrame()
85 |
86 | # Load data
87 | for file in files:
88 | if file == 'event-types.pbz2':
89 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb')
90 | event_types = pickle.load(event_types)
91 | elif file == 'formation-mapping.pbz2':
92 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb')
93 | formation_mapping = pickle.load(formation_mapping)
94 | elif '-eventdata-' in file:
95 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb')
96 | match_events = pickle.load(match_events)
97 | events_df = pd.concat([events_df, match_events])
98 | elif '-playerdata-' in file:
99 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb')
100 | match_players = pickle.load(match_players)
101 | players_df = pd.concat([players_df, match_players])
102 | else:
103 | pass
104 |
105 | # %% Pre-process data
106 |
107 | # Add pass recipient
108 | events_df = wde.get_recipient(events_df)
109 |
110 | # Add cumulative minutes information
111 | players_df = wde.minutes_played(players_df, events_df)
112 |
113 | # Calculate longest consistent xi
114 | players_df = wde.longest_xi(players_df)
115 |
116 | # Calculate opposition half pass events that each player faces per game
117 | players_df = wde.events_while_playing(events_df[events_df['x']<= 34], players_df, event_name = 'Pass', event_team = 'opposition')#['opp_pass']
118 | players_df['oppthird_opp_pass'] = players_df['opp_pass']
119 |
120 | # Calculate pass events that each player faces per game
121 | players_df = wde.events_while_playing(events_df, players_df, event_name = 'Pass', event_team = 'opposition')
122 |
123 | # %% Aggregate data per player
124 |
125 | playerinfo_df = wde.create_player_list(players_df, additional_cols = ['opp_pass', 'oppthird_opp_pass'])
126 |
127 | #%% Aggregation
128 |
129 | # Aggregate all defensive actions
130 | all_def_actions = wde.find_defensive_actions(events_df)
131 | playerinfo_df = wde.group_player_events(all_def_actions, playerinfo_df, primary_event_name='def_actions')
132 |
133 | # Aggregate defensive actions in opp half
134 | oppthird_def_actions = all_def_actions[all_def_actions['x']>67]
135 | playerinfo_df = wde.group_player_events(oppthird_def_actions, playerinfo_df, primary_event_name='oppthird_def_actions')
136 |
137 | # Defensive actions per 100 opp passes and per 90 mins
138 | playerinfo_df['def_actions_100opp_pass'] = round(100*playerinfo_df['def_actions']/playerinfo_df['opp_pass'],2)
139 | playerinfo_df['def_actions_90'] = round(90*playerinfo_df['def_actions']/playerinfo_df['mins_played'],2)
140 | playerinfo_df['oppthird_def_actions_100opp_pass'] = round(100*playerinfo_df['oppthird_def_actions']/playerinfo_df['oppthird_opp_pass'],2)
141 | playerinfo_df['oppthird_def_actions_90'] = round(90*playerinfo_df['oppthird_def_actions']/playerinfo_df['mins_played'],2)
142 |
143 | # %% Player removal
144 |
145 | playerinfo_reduced_df = playerinfo_df[(playerinfo_df['position'].isin(pos_exclude) == False) & (playerinfo_df['mins_played']>=min_mins)]
146 |
147 | # %% Ordering based on normalisation
148 |
149 | if norm_mode == None:
150 | sorted_df = playerinfo_reduced_df.sort_values(['oppthird_def_actions', 'oppthird_def_actions_100opp_pass'], ascending=[False, False])
151 | elif norm_mode == '_90':
152 | sorted_df = playerinfo_reduced_df.sort_values(['oppthird_def_actions_90', 'oppthird_def_actions_100opp_pass'], ascending=[False, False])
153 | elif norm_mode == '_100opp_pass':
154 | sorted_df = playerinfo_reduced_df.sort_values(['oppthird_def_actions_100opp_pass', 'oppthird_def_actions_90'], ascending=[False, False])
155 |
156 | # %% Text formatting
157 |
158 | if norm_mode == None:
159 | title_addition = ''
160 | elif norm_mode == '_90':
161 | title_addition = 'per 90mins'
162 | elif norm_mode == '_100opp_pass':
163 | title_addition = 'per 100 opposition passes in that third'
164 |
165 | if len(pos_exclude)==0:
166 | title_pos_str = 'players'
167 | file_pos_str = ''
168 | else:
169 | title_pos_str = pos_input
170 | file_pos_str = '-' + pos_input
171 |
172 | # %% Create viz of top progressive passers
173 |
174 | # Overwrite rcparams
175 | mpl.rcParams['xtick.color'] = 'w'
176 | mpl.rcParams['ytick.color'] = 'w'
177 |
178 | # Set-up pitch subplots
179 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False)
180 | fig, ax = pitch.grid(nrows=3, ncols=4, grid_height=0.75, space=0.12, axis=False)
181 | fig.set_size_inches(14, 10)
182 | fig.set_facecolor('#313332')
183 | ax['pitch'] = ax['pitch'].reshape(-1)
184 |
185 | # Plot successful prog passes as arrows, using for loop to iterate through each player and each pass
186 | idx = 0
187 |
188 | for player_id, name in sorted_df.head(12).iterrows():
189 | player_def_actions = all_def_actions[all_def_actions['playerId'] == player_id]
190 | player_opp_third_def_actions = oppthird_def_actions[oppthird_def_actions['playerId'] == player_id]
191 |
192 | ax['pitch'][idx].set_title(f" {idx + 1}: {name['name']}", loc = "left", color='w', fontsize = 10)
193 |
194 | pitch.kdeplot(player_def_actions['x'], player_def_actions['y'], ax=ax['pitch'][idx], fill=True, levels=80, shade_lowest=True, cmap='viridis', cut=8, alpha=0.6, antialiased=True, zorder=0)
195 | pitch.kdeplot(player_def_actions['x'], player_def_actions['y'], ax=ax['pitch'][idx], fill=True, levels=100, shade_lowest=True, cmap='viridis', cut=8, alpha=0.6, antialiased=True, zorder=0)
196 |
197 | ax['pitch'][idx].fill([0, 67, 67, 0], [0, 0, 100, 100], 'grey', alpha = 0.7, zorder=0)
198 | ax['pitch'][idx].plot([67, 67], [0, 99], 'w', ls = 'dashed', zorder=0)
199 |
200 | pitch.scatter(player_def_actions['x'], player_def_actions['y'], color = 'k', alpha = 0.2, s = 12, zorder=1, ax=ax['pitch'][idx])
201 | pitch.scatter(player_opp_third_def_actions['x'], player_opp_third_def_actions['y'], color = 'w', alpha = 0.6, s = 12, zorder=1, ax=ax['pitch'][idx])
202 |
203 | ax['pitch'][idx].text(0, -8, "Opp. 3rd Actions:", fontsize=8, fontweight='bold', color='w', zorder=1)
204 | ax['pitch'][idx].text(39, -8, f"{int(name['oppthird_def_actions'])}", fontsize=8, color='w', zorder=1)
205 |
206 | if norm_mode == '_100opp_pass':
207 | ax['pitch'][idx].text(48, -8, "Per 100 Opp. Passes:", fontsize=8, fontweight='bold', color='w', zorder=1)
208 | ax['pitch'][idx].text(95, -8, f"{round(name['oppthird_def_actions_100opp_pass'],1)}", fontsize=8, color='w', zorder=1)
209 |
210 | if norm_mode == '_90':
211 | ax['pitch'][idx].text(50, -8, "Per 90 Mins:", fontsize=8, fontweight='bold', color='w', zorder=1)
212 | ax['pitch'][idx].text(85, -8, f"{round(name['oppthird_def_actions_90'],1)}", fontsize=8, color='w', zorder=1)
213 |
214 | team = name['team']
215 | team_logo, _ = lab.get_team_badge_and_colour(team)
216 |
217 | ax_pos = ax['pitch'][idx].get_position()
218 |
219 | logo_ax = fig.add_axes([ax_pos.x1-0.035, ax_pos.y1, 0.035, 0.035])
220 | logo_ax.axis("off")
221 | logo_ax.imshow(team_logo)
222 |
223 | idx += 1
224 |
225 | # Create title and subtitles, using highlighting as figure legend
226 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A',
227 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup',
228 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'}
229 |
230 | title_text = f"{leagues[league]} {year}/{int(year) + 1} - Top 12 {title_pos_str} by Tendency to Defend from the Front"
231 | subtitle_text = f"Heatmaps of defensive actions shown. Ranking by total number of defensive actions in opp third, {title_addition}"
232 | subsubtitle_text = f"Ball recoveries, blocks, clearances, interceptions and tackles included. Correct as of {run_date}. Players with less than {min_mins} mins play-time omitted."
233 |
234 | # Title
235 | fig.text(0.1, 0.945, title_text, fontweight="bold", fontsize=15, color='w')
236 | fig.text(0.1, 0.92, subtitle_text, fontweight="regular", fontsize=13, color='w')
237 | fig.text(0.1, 0.8975, subsubtitle_text, fontweight="regular", fontsize=10, color='w')
238 |
239 | # Add direction of play arrow
240 | ax = fig.add_axes([0.042, 0.05, 0.18, 0.01])
241 | ax.axis("off")
242 | plt.arrow(0.51, 0.15, 0.1, 0, color="white")
243 | fig.text(0.13, 0.03, "Direction of play", ha="center", fontsize=10, color="white", fontweight="regular")
244 |
245 | # Add footer text
246 | fig.text(0.5, 0.04, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
247 | fontstyle="italic", ha="center", fontsize=9, color="white")
248 |
249 | # Add competition logo
250 | ax = fig.add_axes([0.015, 0.877, 0.1, 0.1])
251 | ax.axis("off")
252 | ax.imshow(comp_logo)
253 |
254 | # Add twitter logo
255 | ax = fig.add_axes([0.92, 0.025, 0.04, 0.04])
256 | ax.axis("off")
257 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
258 | ax.imshow(badge)
259 |
260 | # Save image
261 | fig.savefig(f"top_defensive_actions/{league}-{year}-top-defensive-actions{file_pos_str.replace(' & ','-').replace(' ','-')}-{title_addition.replace(' ','-')}", dpi=300)
262 |
263 |
--------------------------------------------------------------------------------
/projects/06_competition_reports_top_teams/team_ball_winning.py:
--------------------------------------------------------------------------------
1 | # %% Create visualisation of team ball wins and mean win height
2 | #
3 | # Inputs: Year to plot data from
4 | # League to plot data from
5 | # Date of run
6 | # Selection of whether to include percentages on visual
7 | # Selection of whether to brighten logo
8 | #
9 | # Output: Heatmaps showing ball win zones for each team & mean ball win height
10 |
11 | # %% Imports and parameters
12 |
13 | import pandas as pd
14 | import matplotlib.pyplot as plt
15 | import matplotlib as mpl
16 | from PIL import Image, ImageEnhance
17 | from mplsoccer.pitch import VerticalPitch, Pitch
18 | import matplotlib.patheffects as path_effects
19 | import os
20 | import sys
21 | import bz2
22 | import pickle
23 | import numpy as np
24 | from collections import Counter
25 | import highlight_text as htext
26 | import glob
27 |
28 | # %% Add custom tools to path
29 |
30 | root_folder = os.path.abspath(os.path.dirname(
31 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
32 | sys.path.append(root_folder)
33 |
34 | import analysis_tools.whoscored_custom_events as wce
35 | import analysis_tools.pitch_zones as pz
36 | import analysis_tools.whoscored_data_engineering as wde
37 | import analysis_tools.logos_and_badges as lab
38 |
39 | # %% User Inputs
40 |
41 | # Select year
42 | year = '2022'
43 |
44 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL)
45 | league = 'EPL'
46 |
47 | # Input run-date
48 | run_date = '28/05/2023'
49 |
50 | # Select whether to label %
51 | label_pct = False
52 |
53 | # Select whether to brighten logo
54 | logo_brighten = True
55 |
56 | # Select whether to use team colours
57 | team_colour = False
58 |
59 | # %% Get competition logo
60 |
61 | comp_logo = lab.get_competition_logo(league, year, logo_brighten=logo_brighten)
62 |
63 | # %% Get data
64 |
65 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}"
66 | files = os.listdir(file_path)
67 |
68 | # Initialise storage dataframes
69 | events_df = pd.DataFrame()
70 | players_df = pd.DataFrame()
71 |
72 | # Load data
73 | for file in files:
74 | if file == 'event-types.pbz2':
75 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb')
76 | event_types = pickle.load(event_types)
77 | elif file == 'formation-mapping.pbz2':
78 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb')
79 | formation_mapping = pickle.load(formation_mapping)
80 | elif '-eventdata-' in file:
81 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb')
82 | match_events = pickle.load(match_events)
83 | events_df = pd.concat([events_df, match_events])
84 | elif '-playerdata-' in file:
85 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb')
86 | match_players = pickle.load(match_players)
87 | players_df = pd.concat([players_df, match_players])
88 | else:
89 | pass
90 |
91 | # %% Isolate ball wins
92 |
93 | interceptions = events_df[(events_df['eventType']=='Interception') & (events_df['outcomeType']=='Successful')]
94 | tackles = events_df[(events_df['eventType']=='Tackle') & (events_df['outcomeType']=='Successful')]
95 | pass_blocks = events_df[(events_df['eventType']=='BlockedPass') & (events_df['outcomeType']=='Successful') ]
96 |
97 | ball_wins_df = pd.concat([interceptions, tackles, pass_blocks], axis=0)
98 |
99 | # %% Get teams and order on mean height of ball recovery
100 |
101 | # Sort alphabetically initially
102 | teams = sorted(set(players_df['team']))
103 |
104 | # Set up dictionary to store xt per 90 per team
105 | team_ball_win_height = dict.fromkeys(teams, 0)
106 | team_count = len(teams)
107 |
108 | for team in teams:
109 |
110 | # Get team events
111 | team_id = players_df[players_df['team']==team]['teamId'].values[0]
112 | team_ball_wins = ball_wins_df[ball_wins_df['teamId']==team_id]
113 |
114 | # Get mean recovery height
115 | team_ball_win_height[team] = team_ball_wins['x'].mean()
116 |
117 | # Sort dictionary by xT/90
118 | team_ball_win_height = sorted(team_ball_win_height.items(), key=lambda x: x[1], reverse=True)
119 |
120 | # %% Custom colormap
121 |
122 | CustomCmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#313332","#47516B", "#848178", "#B2A66F", "#FDE636"])
123 |
124 | # %% Create visual
125 |
126 | # Overwrite rcparams
127 | mpl.rcParams['xtick.color'] = 'w'
128 | mpl.rcParams['ytick.color'] = 'w'
129 |
130 | # Define grid dimensions
131 | ncols = 4
132 | nrows = int(np.ceil(len(team_ball_win_height)/ncols))
133 |
134 | # Set-up pitch subplots
135 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False)
136 | fig, ax = pitch.grid(nrows=nrows, ncols=ncols, grid_height=0.8, title_height = 0.13, endnote_height = 0.04, space=0.12, axis=False)
137 | fig.set_size_inches(14, 15)
138 | fig.set_facecolor('#313332')
139 | ax['pitch'] = ax['pitch'].reshape(-1)
140 | idx = 0
141 |
142 | # Loop through each team
143 | for team in team_ball_win_height:
144 |
145 | # Get team name and events
146 | team_name = team[0]
147 | team_id = players_df[players_df['team']==team_name]['teamId'].values[0]
148 | team_ball_wins = ball_wins_df[ball_wins_df['teamId']==team_id]
149 |
150 | # Get team logo and colour
151 | team_logo, team_cmap = lab.get_team_badge_and_colour(team_name)
152 | if len(team_name) > 14:
153 | team_name = team_name[0:13] + '...'
154 |
155 | # Set team colour
156 | if not team_colour:
157 | team_cmap = CustomCmap
158 |
159 | # Draw heatmap
160 | bin_statistic = pitch.bin_statistic(team_ball_wins['x'], team_ball_wins['y'],
161 | statistic='count', bins=(6, 5), normalize=True)
162 | pitch.heatmap(bin_statistic, ax['pitch'][idx], cmap=team_cmap, edgecolor='w', lw=0.5, zorder=0, alpha=0.7)
163 |
164 | # Draw mean ball win pos
165 | pitch.lines(team[1], 0.5, team[1], 99.5, color=team_cmap(255), lw=3, zorder=2, ax=ax['pitch'][idx])
166 | pitch.lines(team[1]-1, 0.5, team[1]-1, 99.5, color='k', lw=1.5, zorder=4, ax=ax['pitch'][idx])
167 | pitch.lines(team[1]+1, 0.5, team[1]+1, 99.5, color='k', lw=1.5, zorder=4, ax=ax['pitch'][idx])
168 | path_eff = [path_effects.Stroke(linewidth=3, foreground='k'), path_effects.Normal()]
169 | ax['pitch'][idx].text(team[1]+3, 6, f"{round(team[1],1)}%\nup pitch", fontsize=13, color='w',path_effects = path_eff)
170 |
171 | # Label heatmap zones with pressure count if selected
172 | if label_pct:
173 | labels = pitch.label_heatmap(bin_statistic, color='w', fontsize=10, fontweight = 'bold',
174 | ax=ax['pitch'][idx], ha='center', va='center', str_format='{:.0%}', path_effects=path_eff)
175 |
176 | ax['pitch'][idx].set_title(f" {idx + 1}: {team_name}", loc = "left", color='w', fontsize = 16)
177 |
178 | # Add team logo
179 | ax_pos = ax['pitch'][idx].get_position()
180 | logo_ax = fig.add_axes([ax_pos.x1-0.02, ax_pos.y1, 0.02, 0.02])
181 | logo_ax.axis("off")
182 | logo_ax.imshow(team_logo)
183 |
184 | idx+=1
185 |
186 | # Title
187 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A',
188 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup',
189 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'}
190 |
191 | title_text = f"{leagues[league]} {year}/{int(year)+1} - Teams Ranked by Average Ball Win Height"
192 | subtitle_text = "Heatmaps showing Zones of and "
193 | subsubtitle_text = f"Correct as of {run_date}"
194 |
195 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=20, color='w')
196 | htext.fig_text(0.12, 0.934, s=subtitle_text, fontweight="bold", fontsize=18, color='w',
197 | highlight_textprops=[{"color": 'yellow', "fontweight": 'bold'}, {"color": 'grey', "fontweight": 'bold'}])
198 | fig.text(0.12, 0.9, subsubtitle_text, fontweight="regular", fontsize=16, color='w')
199 |
200 | # Add direction of play arrow
201 | ax = fig.add_axes([0.042, 0.028, 0.18, 0.005])
202 | ax.axis("off")
203 | plt.arrow(0.61, 0.15, -0.1, 0, color="white")
204 | fig.text(0.13, 0.02, "Direction of opposition play", ha="center", fontsize=10, color="white", fontweight="regular")
205 |
206 | # Add footer text
207 | fig.text(0.5, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
208 | fontstyle="italic", ha="center", fontsize=9, color="white")
209 |
210 | # Add competition logo
211 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1])
212 | ax.axis("off")
213 | ax.imshow(comp_logo)
214 |
215 | # Add twitter logo
216 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04])
217 | ax.axis("off")
218 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
219 | ax.imshow(badge)
220 |
221 | fig.savefig(f"team_ball_winning/{league}-{year}-team-ball-winning", dpi=300)
--------------------------------------------------------------------------------
/projects/06_competition_reports_top_teams/team_fullback_combinations.py:
--------------------------------------------------------------------------------
1 | # %% Create visualisation of team threat creation zones
2 | #
3 | # Inputs: Year to plot data from
4 | # League to plot data from
5 | # Date of run
6 | # Selection of whether to include percentages on visual
7 |
8 | # %% Imports and parameters
9 |
10 | import pandas as pd
11 | import matplotlib.pyplot as plt
12 | import matplotlib as mpl
13 | import matplotlib.cm as cm
14 | from PIL import Image, ImageEnhance
15 | from mplsoccer.pitch import VerticalPitch, Pitch
16 | import matplotlib.patheffects as path_effects
17 | import os
18 | import sys
19 | import bz2
20 | import pickle
21 | import numpy as np
22 | from collections import Counter
23 | import highlight_text as htext
24 | import glob
25 |
26 | # %% Add custom tools to path
27 |
28 | root_folder = os.path.abspath(os.path.dirname(
29 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
30 | sys.path.append(root_folder)
31 |
32 | import analysis_tools.whoscored_custom_events as wce
33 | import analysis_tools.pitch_zones as pz
34 | import analysis_tools.whoscored_data_engineering as wde
35 | import analysis_tools.logos_and_badges as lab
36 |
37 | # %% User Inputs
38 |
39 | # Select year
40 | year = '2022'
41 |
42 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL)
43 | league = 'EPL'
44 |
45 | # Input run-date
46 | run_date = '28/05/2023'
47 |
48 | # Select whether to brighten logo
49 | logo_brighten = True
50 |
51 | # Max time between a pass and shot that tags the pass as "shot-creating"
52 | min_delta = 1/6
53 |
54 | # %% Get competition logo
55 |
56 | comp_logo = lab.get_competition_logo(league, year, logo_brighten=logo_brighten)
57 |
58 | # %% Get data
59 |
60 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}"
61 | files = os.listdir(file_path)
62 |
63 | # Initialise storage dataframes
64 | events_df = pd.DataFrame()
65 | players_df = pd.DataFrame()
66 |
67 | # Load data
68 | for file in files:
69 | if file == 'event-types.pbz2':
70 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb')
71 | event_types = pickle.load(event_types)
72 | elif file == 'formation-mapping.pbz2':
73 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb')
74 | formation_mapping = pickle.load(formation_mapping)
75 | elif '-eventdata-' in file:
76 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb')
77 | match_events = pickle.load(match_events)
78 | events_df = pd.concat([events_df, match_events])
79 | elif '-playerdata-' in file:
80 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb')
81 | match_players = pickle.load(match_players)
82 | players_df = pd.concat([players_df, match_players])
83 | else:
84 | pass
85 |
86 | # %% Synthesise additional info
87 |
88 | # Pass recipient
89 | events_df = wde.get_recipient(events_df)
90 |
91 | # Add cumulative minutes played information
92 | players_df = wde.minutes_played(players_df, events_df)
93 | events_df = wde.cumulative_match_mins(events_df)
94 |
95 | # %% Create dictionary of teams, and store full back pass combinations against each team
96 |
97 | # Get all team names
98 | teams = sorted(set(players_df['team']))
99 |
100 | # Initialise dictionary to store full-back combos per team
101 | team_fb_combos = dict.fromkeys(teams, 0)
102 |
103 | # Initialise dictionary to store xT generated per 90 from full-back combos
104 | team_fb_combo_xt = dict.fromkeys(teams, 0)
105 |
106 | # Loop through each team
107 | for team in teams:
108 |
109 | # Get team id and all matches team have played
110 | team_id = players_df[players_df['team']==team]['teamId'].values[0]
111 | match_ids = set(players_df[players_df['team']==team]['match_id'])
112 | fb_pass_combos = pd.DataFrame()
113 |
114 | # Loop through each match
115 | for match_id in match_ids:
116 |
117 | # Get ids of full backs that started in match for team
118 | team_match_players = players_df[(players_df['teamId']==team_id) & (players_df['match_id']==match_id)]
119 | starting_rb_id = team_match_players[team_match_players['position'].isin(['DMR','DR'])].index.values.tolist()
120 | starting_lb_id = team_match_players[team_match_players['position'].isin(['DML','DL'])].index.values.tolist()
121 |
122 | # Get all events completed by team within match
123 | team_match_events = events_df[(events_df['teamId']==team_id) & (events_df['match_id']==match_id)]
124 |
125 | # If both a LB/LWB and RB/RWB start, then look for passes between
126 | if (len(starting_rb_id)>0) & (len(starting_lb_id)>0):
127 |
128 | # Get in-play successful passes between
129 | fb_pass_combos_match = team_match_events[(team_match_events['eventType']=='Pass') &
130 | (team_match_events['outcomeType']=='Successful') &
131 | (~team_match_events['satisfiedEventsTypes'].apply(lambda x: 31 in x or 32 in x or 33 in x or 34 in x or 212 in x)) &
132 | (((team_match_events['playerId'] == starting_rb_id[0]) & (team_match_events['pass_recipient'] == starting_lb_id[0])) |
133 | ((team_match_events['playerId'] == starting_lb_id[0]) & (team_match_events['pass_recipient'] == starting_rb_id[0])))]
134 |
135 | # Add column to store whether a shot happens within certain time of pass being made
136 | fb_pass_combos_match['leads_to_shot'] = False
137 |
138 | # Loop through full-back combos, find next sequence of events and check whether shot occured
139 | for idx, fb_pass in fb_pass_combos_match.iterrows():
140 | following_evts = team_match_events[(team_match_events['match_id']==fb_pass['match_id']) &
141 | (team_match_events['period']==fb_pass['period']) &
142 | (team_match_events['cumulative_mins']>fb_pass['cumulative_mins']) &
143 | (team_match_events['cumulative_mins']<=fb_pass['cumulative_mins']+min_delta)]
144 | fb_pass_combos_match.loc[idx,'leads_to_shot'] = True if True in following_evts['isShot'].tolist() else False
145 |
146 | # Build up dataframe of full back combos for team across multiple matches
147 | fb_pass_combos = pd.concat([fb_pass_combos, fb_pass_combos_match])
148 |
149 | # Store full back combo in dictionary
150 | team_fb_combos[team] = fb_pass_combos
151 |
152 | # Calculate xT generated from full back combos per 90 and store in dictionary
153 | team_fb_combo_xt[team] = fb_pass_combos['xThreat_gen'].sum()/len(match_ids)
154 |
155 | # Order teams by xT generated per 90
156 | team_order_xt_90 = sorted(team_fb_combo_xt, key=team_fb_combo_xt.get, reverse=True)
157 |
158 | #%% Create plot of individual teams and full back passes
159 |
160 | # Overwrite rcparams
161 | mpl.rcParams['xtick.color'] = 'w'
162 | mpl.rcParams['ytick.color'] = 'w'
163 |
164 | # Set-up pitch subplots
165 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False)
166 | fig, ax = pitch.grid(nrows=5, ncols=4, grid_height=0.8, title_height = 0.13, endnote_height = 0.04, space=0.12, axis=False)
167 | fig.set_size_inches(14, 15)
168 | fig.set_facecolor('#313332')
169 | ax['pitch'] = ax['pitch'].reshape(-1)
170 | idx = 0
171 |
172 | # Manual implentation of colourmap
173 | pass_cmap = cm.get_cmap('viridis')
174 | pass_cmap = pass_cmap(np.linspace(0.35,1,256))
175 |
176 | # Loop through each team
177 | for team in team_order_xt_90:
178 |
179 | # Get team passes from dict
180 | team_fb_passes = team_fb_combos[team].sort_values('xThreat')
181 |
182 | # Loop through individual passes to format
183 | for _, pass_evt in team_fb_passes.iterrows():
184 | if pass_evt['xThreat_gen'] < 0.001:
185 | line_colour = 'grey'
186 | line_alpha = 0.1
187 | else:
188 | line_colour = pass_cmap[int(255*min(pass_evt['xThreat_gen']/0.05, 1))]
189 | line_alpha = 0.7
190 |
191 | # Format differently if event is followed by a shot
192 | if not pass_evt['leads_to_shot']:
193 | pitch.lines(pass_evt['x'], pass_evt['y'], pass_evt['endX'], pass_evt['endY'], color = line_colour, alpha = line_alpha,
194 | comet=True, capstyle='round', lw=2, ax = ax['pitch'][idx], zorder = 2)
195 | pitch.scatter(pass_evt['endX'], pass_evt['endY'], color = line_colour, alpha = line_alpha+0.2, s=30, ax = ax['pitch'][idx], zorder = 3)
196 | else:
197 | pitch.lines(pass_evt['x'], pass_evt['y'], pass_evt['endX'], pass_evt['endY'], color = 'w', alpha = 0.7,
198 | comet=True, capstyle='round', lw=2, ax = ax['pitch'][idx], zorder = 2)
199 | pitch.scatter(pass_evt['endX'], pass_evt['endY'], color = 'w', alpha = 0.9, s=30, ax = ax['pitch'][idx], zorder = 3)
200 | pitch.scatter(pass_evt['endX'], pass_evt['endY'], color = '#313332', alpha = 1, s=10, ax = ax['pitch'][idx], zorder = 3)
201 |
202 | # Add xT text to plot
203 | ax['pitch'][idx].text(2, 3, "xT / match", fontsize=8, fontweight='bold', color='w', zorder=3)
204 | ax['pitch'][idx].text(28, 3, round(team_fb_combo_xt[team],3), fontsize=8, color='w', zorder=3)
205 |
206 | # Add team logo
207 | team_logo, _ = lab.get_team_badge_and_colour(team)
208 |
209 | ax_pos = ax['pitch'][idx].get_position()
210 |
211 | logo_ax = fig.add_axes([ax_pos.x1-0.025, ax_pos.y1, 0.025, 0.025])
212 | logo_ax.axis("off")
213 | logo_ax.imshow(team_logo)
214 |
215 | # Add title
216 | ax['pitch'][idx].set_title(f" {idx + 1}: {team}", loc = "left", color='w', fontsize = 14)
217 |
218 | idx+=1
219 |
220 | # Title
221 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A',
222 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup',
223 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'}
224 |
225 | title_text = f"{leagues[league]} {year}/{int(year)+1} − Threat Generated through Full Back Interplay"
226 | subtitle_text = "Successful in-play passes between Full Backs shown and coloured by for each team"
227 | subsubtitle_text = f"Teams ranked by mean threat generated by passes between full-backs in starting XI. Correct as of {run_date}"
228 |
229 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=20, color='w')
230 | htext.fig_text(0.12, 0.934, s=subtitle_text, fontweight="regular", fontsize=18, color='w',
231 | highlight_textprops=[{"color": 'yellow', "fontweight": 'bold'}])
232 | fig.text(0.12, 0.9, subsubtitle_text, fontweight="regular", fontsize=14, color='w')
233 |
234 | # Add direction of play arrow
235 | ax = fig.add_axes([0.042, 0.028, 0.18, 0.005])
236 | ax.axis("off")
237 | plt.arrow(0.51, 0.15, 0.1, 0, color="white")
238 | fig.text(0.13, 0.015, "Direction of play", ha="center", fontsize=10, color="white", fontweight="regular")
239 |
240 | # Add legend
241 | legend_ax = fig.add_axes([0.245, 0.01, 0.2, 0.04])
242 | legend_ax.axis("off")
243 | plt.xlim([0, 8])
244 | plt.ylim([0, 1])
245 | hex_count = 6
246 | path_eff = [path_effects.Stroke(linewidth=1.25, foreground='k'), path_effects.Normal()]
247 |
248 | for idx in np.arange(0,hex_count):
249 |
250 | if idx%2 == 0:
251 | ypos = 0.36
252 | else:
253 | ypos= 0.64
254 | xpos = idx/1.05 + 2.5
255 |
256 | if idx == 0:
257 | xt = '<0.001'
258 | color = 'grey'
259 | elif idx == 1:
260 | xt = round(0.001 + (0.05-0.001) * ((idx-1)/(hex_count-2)),3)
261 | color = pass_cmap[int(255*(idx-1)/(hex_count-2))]
262 | else:
263 | xt = round(0.001 + (0.05-0.001) * ((idx-1)/(hex_count-2)),2)
264 | color = pass_cmap[int(255*(idx-1)/(hex_count-2))]
265 |
266 | legend_ax.scatter(xpos, ypos, marker='H', s=600, color=color, edgecolors=None)
267 | legend_ax.text(xpos+0.03, ypos-0.02, xt, color='w', fontsize = 8, ha = "center", va = "center", path_effects = path_eff)
268 | legend_ax.text(0.1, 0.5, "xThreat:", color='w', fontsize = 10, ha = "left", va = "center", fontweight="regular")
269 |
270 | legend_ax_2 = fig.add_axes([0.46, 0.01, 0.2, 0.04])
271 | legend_ax_2.axis("off")
272 | legend_ax_2.scatter(0.1, 0.5, color = 'w', alpha = 1, s=80)
273 | legend_ax_2.scatter(0.1, 0.5, color = '#313332', alpha = 1, s=30)
274 | legend_ax_2.text(0.15, 0.5, '= Shot-creating action', va = "center", color = 'w')
275 | plt.xlim([0, 1])
276 | plt.ylim([0, 1])
277 |
278 | # Add footer text
279 | fig.text(0.77, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
280 | fontstyle="italic", ha="center", fontsize=9, color="white")
281 |
282 | # Add competition logo
283 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1])
284 | ax.axis("off")
285 | ax.imshow(comp_logo)
286 |
287 | # Add twitter logo
288 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04])
289 | ax.axis("off")
290 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
291 | ax.imshow(badge)
292 |
293 | fig.savefig(f"team_fullback_combinations/{league}-{year}-team_fullback_combinations", dpi=300)
--------------------------------------------------------------------------------
/projects/06_competition_reports_top_teams/team_threat_creation.py:
--------------------------------------------------------------------------------
1 | # %% Create visualisation of team threat creation zones
2 | #
3 | # Inputs: Year to plot data from
4 | # League to plot data from
5 | # Date of run
6 | # Selection of whether to include percentages on visual
7 | # Selection of whether to brighten logo
8 |
9 | # %% Imports and parameters
10 |
11 | import pandas as pd
12 | import matplotlib.pyplot as plt
13 | import matplotlib as mpl
14 | from PIL import Image, ImageEnhance
15 | from mplsoccer.pitch import VerticalPitch, Pitch
16 | import matplotlib.patheffects as path_effects
17 | import os
18 | import sys
19 | import bz2
20 | import pickle
21 | import numpy as np
22 | from collections import Counter
23 | import highlight_text as htext
24 | import glob
25 |
26 | # %% Add custom tools to path
27 |
28 | root_folder = os.path.abspath(os.path.dirname(
29 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
30 | sys.path.append(root_folder)
31 |
32 | import analysis_tools.whoscored_custom_events as wce
33 | import analysis_tools.pitch_zones as pz
34 | import analysis_tools.whoscored_data_engineering as wde
35 | import analysis_tools.logos_and_badges as lab
36 |
37 | # %% User Inputs
38 |
39 | # Select year
40 | year = '2023'
41 |
42 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL)
43 | league = 'EPL'
44 |
45 | # Input run-date
46 | run_date = '04/09/2023'
47 |
48 | # Select whether to label %
49 | label_pct = False
50 |
51 | # Logo brighten
52 | logo_brighten = True
53 |
54 | # Select whether to use team colours
55 | team_colour = False
56 |
57 | # %% Get competition logo
58 |
59 | comp_logo = lab.get_competition_logo(league, year, logo_brighten=logo_brighten)
60 |
61 | # %% Get data for current year
62 |
63 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}"
64 | files = os.listdir(file_path)
65 |
66 | # Initialise storage dataframes
67 | events_df = pd.DataFrame()
68 | players_df = pd.DataFrame()
69 |
70 | # Load data
71 | for file in files:
72 | if file == 'event-types.pbz2':
73 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb')
74 | event_types = pickle.load(event_types)
75 | elif file == 'formation-mapping.pbz2':
76 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb')
77 | formation_mapping = pickle.load(formation_mapping)
78 | elif '-eventdata-' in file:
79 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb')
80 | match_events = pickle.load(match_events)
81 | events_df = pd.concat([events_df, match_events])
82 | elif '-playerdata-' in file:
83 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb')
84 | match_players = pickle.load(match_players)
85 | players_df = pd.concat([players_df, match_players])
86 | else:
87 | pass
88 |
89 |
90 | # %% Isolate events of choice (in play only)
91 |
92 | threat_creating_events_df = events_df[events_df['xThreat']==events_df['xThreat']]
93 | threat_creating_events_df = threat_creating_events_df[~threat_creating_events_df['satisfiedEventsTypes'].apply(lambda x: True if (31 in x or 34 in x or 212 in x) else False)]
94 |
95 | # %% Get teams and order on total threat created
96 |
97 | # Sort alphabetically initially
98 | teams = sorted(set(players_df['team']))
99 |
100 | # Set up dictionary to store xt per 90 per team
101 | team_xt_90 = dict.fromkeys(teams, 0)
102 | team_count = len(teams)
103 |
104 | for team in teams:
105 |
106 | # Get team events
107 | team_id = players_df[players_df['team']==team]['teamId'].values[0]
108 | team_threat_creating_events = threat_creating_events_df[threat_creating_events_df['teamId']==team_id]
109 |
110 | # Get each team match and accumulate total mins
111 | team_matches = set(team_threat_creating_events['match_id'])
112 | team_mins = 0
113 | for match in team_matches:
114 | team_mins += events_df[events_df['match_id']==match]['cumulative_mins'].max()
115 |
116 | # Team xT created per 90
117 | team_xt_90[team] = 90*(team_threat_creating_events['xThreat_gen'].sum() / team_mins)
118 |
119 | # Sort dictionary by xT/90
120 | team_xt_90 = sorted(team_xt_90.items(), key=lambda x: x[1], reverse=True)
121 |
122 | # %% Custom colormap
123 |
124 | CustomCmap = mpl.colors.LinearSegmentedColormap.from_list("", ["#313332","#47516B", "#848178", "#B2A66F", "#FDE636"])
125 |
126 | # %% Create visual
127 |
128 | # Overwrite rcparams
129 | mpl.rcParams['xtick.color'] = 'w'
130 | mpl.rcParams['ytick.color'] = 'w'
131 |
132 | # Path effects
133 | path_eff = [path_effects.Stroke(linewidth=4, foreground='#313332'), path_effects.Normal()]
134 |
135 | # Define grid dimensions
136 | ncols = 4
137 | nrows = int(np.ceil(len(team_xt_90)/ncols))
138 |
139 | # Set-up pitch subplots
140 | pitch = Pitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False)
141 | fig, ax = pitch.grid(nrows=nrows, ncols=ncols, grid_height=0.8, title_height = 0.13, endnote_height = 0.04, space=0.12, axis=False)
142 | fig.set_size_inches(14, 15)
143 | fig.set_facecolor('#313332')
144 | ax['pitch'] = ax['pitch'].reshape(-1)
145 | idx = 0
146 |
147 | # Loop through each team
148 | for team in team_xt_90:
149 |
150 | # Get team name and events
151 | team_name = team[0]
152 | team_id = players_df[players_df['team']==team_name]['teamId'].values[0]
153 | team_threat_creating_events = threat_creating_events_df[threat_creating_events_df['teamId']==team_id]
154 |
155 | # Get team logo and colour
156 | team_logo, team_cmap = lab.get_team_badge_and_colour(team[0])
157 | if len(team_name) > 14:
158 | team_name = team_name[0:13] + '...'
159 |
160 | # Set team colour
161 | if not team_colour:
162 | team_cmap = CustomCmap
163 |
164 | # Draw heatmap
165 | bin_statistic = pitch.bin_statistic(team_threat_creating_events['x'], team_threat_creating_events['y'],
166 | statistic='sum', bins=(6, 5), normalize=True, values = team_threat_creating_events['xThreat_gen'])
167 | pitch.heatmap(bin_statistic, ax['pitch'][idx], cmap=team_cmap, edgecolor='w', lw=0.5, zorder=0, alpha=0.7)
168 |
169 | # Label heatmap zones with pressure count if selected
170 | path_eff = [path_effects.Stroke(linewidth=1.5, foreground='#313332'), path_effects.Normal()]
171 | if label_pct:
172 | labels = pitch.label_heatmap(bin_statistic, color='w', fontsize=10, fontweight = 'bold',
173 | ax=ax['pitch'][idx], ha='center', va='center', str_format='{:.0%}', path_effects=path_eff)
174 |
175 | # Label xt
176 | ax['pitch'][idx].text(2, 2, "xT/90:", fontsize=10, fontweight='bold', color='w', zorder=3, path_effects = path_eff)
177 | ax['pitch'][idx].text(24, 2, round(team[1],2), fontsize=10, color='w', zorder=3, path_effects = path_eff)
178 |
179 | # Set title
180 | ax['pitch'][idx].set_title(f" {idx + 1}: {team_name}", loc = "left", color='w', fontsize = 16)
181 |
182 | ax_pos = ax['pitch'][idx].get_position()
183 |
184 | logo_ax = fig.add_axes([ax_pos.x1-0.02, ax_pos.y1, 0.02, 0.02])
185 | logo_ax.axis("off")
186 | logo_ax.imshow(team_logo)
187 |
188 | idx+=1
189 |
190 | # Title
191 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A',
192 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup',
193 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'}
194 |
195 | title_text = f"{leagues[league]} {year}/{int(year)+1} - Teams Ranked by In-Play Threat Creation"
196 | subtitle_text = "Heatmaps showing Zones of and "
197 | subsubtitle_text = f"Pass, Carry and Dribble events included. Negative threat events excluded. Correct as of {run_date}"
198 |
199 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=20, color='w')
200 | htext.fig_text(0.12, 0.934, s=subtitle_text, fontweight="bold", fontsize=18, color='w',
201 | highlight_textprops=[{"color": 'yellow', "fontweight": 'bold'}, {"color": 'grey', "fontweight": 'bold'}])
202 | fig.text(0.12, 0.9, subsubtitle_text, fontweight="regular", fontsize=16, color='w')
203 |
204 | # Add direction of play arrow
205 | ax = fig.add_axes([0.042, 0.028, 0.18, 0.005])
206 | ax.axis("off")
207 | plt.arrow(0.51, 0.15, 0.1, 0, color="white")
208 | fig.text(0.13, 0.02, "Direction of play", ha="center", fontsize=10, color="white", fontweight="regular")
209 |
210 | # Add footer text
211 | fig.text(0.5, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
212 | fontstyle="italic", ha="center", fontsize=9, color="white")
213 |
214 | # Add competition logo
215 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1])
216 | ax.axis("off")
217 | ax.imshow(comp_logo)
218 |
219 | # Add twitter logo
220 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04])
221 | ax.axis("off")
222 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
223 | ax.imshow(badge)
224 |
225 | fig.savefig(f"team_threat_creation/{league}-{year}-team-threat-creation", dpi=300)
226 |
--------------------------------------------------------------------------------
/projects/06_competition_reports_top_teams/xg_league_table_sb.py:
--------------------------------------------------------------------------------
1 | # %% Imports
2 |
3 | import pandas as pd
4 | import bz2
5 | import os
6 | import pickle
7 | import matplotlib.pyplot as plt
8 | import seaborn as sns
9 | import sys
10 | import numpy as np
11 | import time
12 | from mplsoccer import Pitch, VerticalPitch
13 | import matplotlib.patheffects as path_effects
14 | import matplotlib as mpl
15 | import requests
16 | from PIL import Image, ImageEnhance
17 | from io import BytesIO
18 | from datetime import datetime
19 | import textwrap as tw
20 | from mplsoccer import PyPizza
21 | from matplotlib.offsetbox import OffsetImage, AnnotationBbox
22 |
23 | # %% Add custom tools to path
24 |
25 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
26 | sys.path.append(root_folder)
27 |
28 | import analysis_tools.get_football_data as gfd
29 | import analysis_tools.statsbomb_custom_events as sce
30 | import analysis_tools.statsbomb_data_engineering as sde
31 | import analysis_tools.logos_and_badges as lab
32 |
33 | # %% User inputs
34 |
35 | # Data to use
36 | data_grab =[['England', 'Premier League', '2023']]
37 |
38 | # %% Load data
39 |
40 | # Statsbomb
41 | data_dict = gfd.load_statsbomb_sql(data_grab, events=True, matches = True, lineups = True, player_stats=True)
42 | events_df = data_dict['events']
43 | matches_df = data_dict['matches']
44 | lineups_df = data_dict['lineups']
45 | playerstats_df = data_dict['player_stats']
46 |
47 | # Logo
48 | comp_logo = lab.get_competition_logo(data_grab[0][1], data_grab[0][2], logo_brighten=True)
49 |
50 | # %% Error metrics
51 |
52 | all_errors = events_df[events_df['type_name']=='Error']
53 | events_following_error = pd.DataFrame()
54 |
55 | for idx, error in all_errors.iterrows():
56 |
57 | error_evts = events_df[(events_df['match_id']==error['match_id']) &
58 | (events_df['period']==error['period']) &
59 | (events_df['cumulative_mins'] >= error['cumulative_mins']) &
60 | (events_df['cumulative_mins'] <= error['cumulative_mins'] + (15/60)) &
61 | (events_df['team_name']!=error['team_name'])]
62 | events_following_error = pd.concat([events_following_error, error_evts])
63 |
64 | # %% Get team information
65 |
66 | # Get team list
67 | teaminfo_df = sde.create_team_list(lineups_df)
68 |
69 | # In-play shots, goals and xG
70 | shots_for = events_df[events_df['type_name']=='Shot']
71 | ip_shots_for = shots_for[shots_for['in_play_event']==1]
72 | ip_goals_for = pd.concat([ip_shots_for[ip_shots_for['outcome_name']=='Goal'], events_df[events_df['type_name']=='Own Goal For']])
73 | teaminfo_df = sde.group_team_events(shots_for, teaminfo_df, group_type='sum', agg_columns ='shot_statsbomb_xg', primary_event_name = 'xg_for')
74 | teaminfo_df = sde.group_team_events(ip_shots_for, teaminfo_df, group_type='sum', agg_columns ='shot_statsbomb_xg', primary_event_name = 'ip_xg_for')
75 | teaminfo_df = sde.group_team_events(ip_goals_for, teaminfo_df, group_type='count', primary_event_name = 'ip_goals_for')
76 |
77 | # In play offensive OBV
78 | ip_obv_events = events_df[(events_df['type_name'].isin(['Pass','Carry','Dribble'])) &
79 | (events_df['in_play_event']==1)]
80 | teaminfo_df = sde.group_team_events(ip_obv_events, teaminfo_df, group_type='sum', agg_columns ='obv_for_net_z', primary_event_name = 'ip_xt_for')
81 |
82 | # In-play shots, goals and xG against
83 | for team_name, _ in teaminfo_df.iterrows():
84 |
85 | # Get matches and events by team
86 | match_ids = matches_df[(matches_df['home_team']==team_name) | (matches_df['away_team']==team_name)]['match_id'].tolist()
87 | team_match_evts = events_df[events_df['match_id'].isin(match_ids)]
88 | evts_against = team_match_evts[team_match_evts['team_name']!=team_name]
89 | shots_against = evts_against[evts_against['type_name']=='Shot']
90 | ip_shots_against = shots_against[shots_against['in_play_event']==1]
91 | ip_goals_against = pd.concat([ip_shots_against[ip_shots_against['outcome_name']=='Goal'], evts_against[evts_against['type_name']=='Own Goal For']])
92 | ip_obv_events_against = evts_against[(evts_against['type_name'].isin(['Pass','Carry','Dribble'])) &
93 | (evts_against['in_play_event']==1)]
94 | teaminfo_df.loc[team_name, 'xg_against'] = shots_against['shot_statsbomb_xg'].sum(numeric_only=True)
95 | teaminfo_df.loc[team_name, 'ip_xg_against'] = ip_shots_against['shot_statsbomb_xg'].sum(numeric_only=True)
96 | teaminfo_df.loc[team_name, 'ip_goals_against'] = len(ip_goals_against)
97 | teaminfo_df.loc[team_name, 'ip_xt_against'] = ip_obv_events_against['obv_for_net_z'].sum(numeric_only=True)
98 |
99 | team_post_error_evts_against = events_following_error[(events_following_error['match_id'].isin(match_ids)) & (events_following_error['team_name']!=team_name)]
100 | post_error_shots_against = team_post_error_evts_against[(team_post_error_evts_against['type_name']=='Shot') & (team_post_error_evts_against['in_play_event']==1)]
101 | teaminfo_df.loc[team_name, 'xg_against_following_error'] = post_error_shots_against['shot_statsbomb_xg'].sum(numeric_only=True)
102 |
103 | teaminfo_df['non-error_ip_xg_against'] = teaminfo_df['ip_xg_against'] - teaminfo_df['xg_against_following_error']
104 | teaminfo_df['xg_difference'] = teaminfo_df['xg_for'] - teaminfo_df['xg_against']
105 | teaminfo_df['ip_xg_difference'] = teaminfo_df['ip_xg_for'] - teaminfo_df['ip_xg_against']
106 | teaminfo_df['ip_xg_xt_ratio'] = teaminfo_df['ip_xg_for']/teaminfo_df['ip_xt_for']
107 | teaminfo_df['ip_xg_xt_against_ratio'] = teaminfo_df['ip_xg_against']/teaminfo_df['ip_xt_against']
108 | teaminfo_df['ip_goal_xg_ratio'] = teaminfo_df['ip_goals_for'] / teaminfo_df['ip_xg_for']
109 | teaminfo_df['ip_goal_xg_against_ratio'] = teaminfo_df['ip_goals_against'] / teaminfo_df['ip_xg_against']
110 |
111 |
112 | ti = teaminfo_df[['ip_xg_xt_ratio','ip_goal_xg_ratio']]
113 | ti['product'] = ti['ip_xg_xt_ratio'] * ti['ip_goal_xg_ratio']
114 | ti['mean'] = (ti['ip_xg_xt_ratio'] + ti['ip_goal_xg_ratio'])/2
115 | ti['h_mean'] = 1/((1/ti['ip_xg_xt_ratio']) + (1/ti['ip_goal_xg_ratio']))
116 | # %% Normalise
117 |
118 | for column in teaminfo_df.columns:
119 |
120 | if ('xg' in column or 'xt' in column) and ('ratio' not in column):
121 | teaminfo_df[column + '_90'] = 90*teaminfo_df[column] / teaminfo_df['time_played']
122 |
123 | # %% VISUAL 1: XG AND XT RATIO SCATTER
124 |
125 | # rc params
126 | mpl.rcParams['xtick.color'] = 'w'
127 | mpl.rcParams['ytick.color'] = 'w'
128 |
129 | # Metrics to plot
130 | plot_y = ['ip_goal_xg_ratio']
131 | plot_x = ['ip_xg_xt_ratio']
132 |
133 | # Set up figure
134 | fig, ax = plt.subplots(figsize = (8.5,9), facecolor = '#313332')
135 | fig.set_facecolor('#313332')
136 | ax.patch.set_alpha(0)
137 | #ax.set_position([0.1,0.15,0.8,0.65], which='both')
138 |
139 | # Format axes
140 | ax.spines['bottom'].set_color('w')
141 | ax.spines['top'].set_visible(False)
142 | ax.spines['right'].set_visible(False)
143 | ax.spines['left'].set_color('w')
144 | ax.grid(color='gray', alpha = 0.2)
145 |
146 | # Label axes
147 | ax.set_ylabel("Ratio between In-Play Goals and Expected Goals (Goals/xG)", labelpad = 10, fontweight="bold", fontsize=12, color='w')
148 | ax.set_xlabel("Ratio between In-Play Expected Goals and Expected Threat (xG/xT)", labelpad = 10, fontweight="bold", fontsize=12, color='w')
149 |
150 | # Define axis limits
151 | xmin = np.floor(10*teaminfo_df[plot_x].min())/10
152 | xmax = np.ceil(10*teaminfo_df[plot_x].max())/10
153 | ymin = np.floor(10*teaminfo_df[plot_y].min())/10
154 | ymax = np.ceil(10*teaminfo_df[plot_y].max())/10
155 | ax.set_xlim([xmin.values, xmax.values])
156 | ax.set_ylim([ymin.values, ymax.values])
157 |
158 | # Iterate through each team
159 | for team, team_metrics in teaminfo_df.iterrows():
160 |
161 | # Get logo
162 | team_logo, _ = lab.get_team_badge_and_colour(team)
163 |
164 | # Plot logo
165 | ab = AnnotationBbox(OffsetImage(team_logo, zoom = 0.07, resample = True), (team_metrics[plot_x], team_metrics[plot_y]), frameon=False)
166 | ax.add_artist(ab)
167 |
168 | # %% VISUAL 2: XG AND XT RATIO TABLE
169 |
170 | fig, ax = plt.subplots(figsize = (8,9.5), facecolor = '#313332')
171 | ax.patch.set_alpha(0)
172 |
173 | # Sort
174 | teaminfo_df = teaminfo_df.sort_values('ip_xg_xt_ratio', ascending=False)
175 |
176 | # Title
177 | title_text = f"{data_grab[0][1]} {data_grab[0][2]}/{str(int(data_grab[0][2]) + 1).replace('20','',1)}"
178 | subtitle_text = "Team Chance Creation Effectiveness and Chance Conversion Effectiveness"
179 | fig.text(0.12, 0.935, title_text, fontweight="bold", fontsize=16, color='w')
180 | fig.text(0.12, 0.907, subtitle_text, fontweight="bold", fontsize=11, color='w')
181 |
182 | # Add competition logo
183 | comp_ax = fig.add_axes([0.015, 0.879, 0.1, 0.1])
184 | comp_ax.axis("off")
185 | comp_ax.imshow(comp_logo)
186 |
187 | # Horizontal Header lines
188 | ax.plot([0.05, 1], [0.995, 0.995], color='w', zorder = 3)
189 | ax.plot([0, 1], [0.93, 0.93], color='w', zorder = 3)
190 | ax.plot([0, 1], [-0.02, -0.02], color='w', zorder = 3)
191 | ax.text(0.08, 0.96, "Team", ha = "left", va = "center", fontweight = "bold", color = "w")
192 | ax.text(0.5875, 0.96, "Open-play\nxG/xT Ratio", ha = "center", va = "center", fontweight = "bold", color = "w")
193 | ax.text(0.8625, 0.96, "Open-play\nGoals/xG Ratio", ha = "center", va = "center", fontweight = "bold", color = "w")
194 |
195 | # Vertical Header lines
196 | ax.plot([0.002, 0.002], [-0.02, 0.93], color='w', zorder = 2)
197 | ax.plot([0.05, 0.05], [-0.02, 0.93], color='grey', lw =0.5, zorder = 2)
198 | ax.plot([0.05, 0.05], [-0.02, 0.995], color='w', zorder = 2)
199 | ax.plot([0.45, 0.45], [-0.02, 0.93], color='grey', lw =0.5, zorder = 2)
200 | ax.plot([0.45, 0.45], [0.93, 0.995], color='w', lw =0.5, zorder = 2)
201 | ax.plot([0.725, 0.725], [-0.02, 0.93], color='grey', lw =0.5, zorder = 2)
202 | ax.plot([0.725, 0.725], [0.93, 0.995], color='w', lw =0.5, zorder = 2)
203 | ax.plot([0.999, 0.999], [-0.02, 0.995], color='w', zorder = 2)
204 |
205 | # Iterate through each team
206 | idx = 0
207 | for team, team_metrics in teaminfo_df.iterrows():
208 |
209 | # Plot team name and badge
210 | ax.text(0.025 ,0.9*(1-idx/19), idx+1, va="center", ha = "center", color = "w" )
211 | ax.text(0.11 ,0.9*(1-idx/19), team, va="center", color = "w" )
212 | team_logo, _ = lab.get_team_badge_and_colour(team)
213 | ab = AnnotationBbox(OffsetImage(team_logo, zoom = 0.06, resample = True), (0.08,0.9*(1-idx/19)+0.003), frameon=False)
214 | ax.add_artist(ab)
215 |
216 | # Plot metrics
217 | ax.text(0.5875 ,0.9*(1-idx/19), round(team_metrics['ip_xg_xt_ratio'],2), va="center", ha = "center", color = "w" )
218 | ax.text(0.8625 ,0.9*(1-idx/19), round(team_metrics['ip_goal_xg_ratio'],2), va="center", ha = "center", color = "w" )
219 |
220 | # Plot hline
221 | ax.plot([0, 1], [0.9*(1-idx/19)-0.02, 0.9*(1-idx/19)-0.02], color='grey', lw = 0.5, zorder = 1)
222 |
223 | idx+=1
224 |
225 | # Format axis
226 | ax.spines['top'].set_visible(False)
227 | ax.spines['right'].set_visible(False)
228 | ax.spines['bottom'].set_visible(False)
229 | ax.spines['left'].set_visible(False)
230 | ax.set_xticks([])
231 | ax.set_yticks([])
232 | ax.yaxis.label.set_color('w')
233 | ax.set_position([0.1,0.06,0.8,0.81], which='both')
234 | ax.set_xlim([0,1])
235 | ax.set_ylim([-0.03,1])
236 |
237 | # Create footer
238 | fig.text(0.5, 0.024, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
239 | fontstyle="italic", ha="center", fontsize=9, color="white")
240 |
241 | # Add twitter logo
242 | ax = fig.add_axes([0.94, 0.007, 0.05, 0.05])
243 | ax.axis("off")
244 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
245 | ax.imshow(badge)
246 |
247 | fig.savefig(f"team_xg_metrics/{data_grab[0][1].replace(' ','-').lower()}-{data_grab[0][2]}-xg-xt-table", dpi=300)
--------------------------------------------------------------------------------
/projects/10_team_buildup_passes/team_pass_tendencies.py:
--------------------------------------------------------------------------------
1 | # %% Imports and parameters
2 |
3 | import pandas as pd
4 | import matplotlib.pyplot as plt
5 | import matplotlib as mpl
6 | from PIL import Image, ImageEnhance
7 | from mplsoccer.pitch import VerticalPitch, Pitch
8 | import matplotlib.patheffects as path_effects
9 | import os
10 | import sys
11 | import bz2
12 | import pickle
13 | import numpy as np
14 | from collections import Counter
15 | import highlight_text as htext
16 | import glob
17 | import joblib
18 | from scipy.spatial import Delaunay
19 | from sklearn.base import BaseEstimator, TransformerMixin
20 | from time import time
21 |
22 | # %% Add custom tools to path
23 |
24 | root_folder = os.path.abspath(os.path.dirname(
25 | (os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
26 | sys.path.append(root_folder)
27 |
28 | import analysis_tools.get_football_data as gfd
29 | import analysis_tools.whoscored_custom_events as wce
30 | import analysis_tools.models as models
31 | import analysis_tools.whoscored_data_engineering as wde
32 | import analysis_tools.logos_and_badges as lab
33 |
34 | # %% User Inputs
35 |
36 | # Select year
37 | year = '2022'
38 |
39 | # Select league (EPL, La_Liga, Bundesliga, Serie_A, Ligue_1, RFPL)
40 | league = 'EPL'
41 |
42 | # Select team
43 | team = 'Brighton'
44 |
45 | # %% Get whoscored data and get statsbomb data
46 |
47 | file_path = f"../../data_directory/whoscored_data/{year}_{str(int(year.replace('20','')) + 1)}/{league}"
48 | files = os.listdir(file_path)
49 |
50 | # Initialise storage dataframes
51 | events_df = pd.DataFrame()
52 | players_df = pd.DataFrame()
53 |
54 | # Load whoscored data
55 | for file in files:
56 | if file == 'event-types.pbz2':
57 | event_types = bz2.BZ2File(f"{file_path}/{file}", 'rb')
58 | event_types = pickle.load(event_types)
59 | elif file == 'formation-mapping.pbz2':
60 | formation_mapping = bz2.BZ2File(f"{file_path}/{file}", 'rb')
61 | formation_mapping = pickle.load(formation_mapping)
62 | elif '-eventdata-' in file:
63 | match_events = bz2.BZ2File(f"{file_path}/{file}", 'rb')
64 | match_events = pickle.load(match_events)
65 | events_df = pd.concat([events_df, match_events])
66 | elif '-playerdata-' in file:
67 | match_players = bz2.BZ2File(f"{file_path}/{file}", 'rb')
68 | match_players = pickle.load(match_players)
69 | players_df = pd.concat([players_df, match_players])
70 | else:
71 | pass
72 |
73 | # %% Isolate matches that team feature in
74 |
75 | # Get match ids and team id
76 | team_match_ids = players_df[players_df['team'] == team]['match_id'].unique()
77 | team_id = players_df[players_df['team'] == team]['teamId'].unique()[0]
78 |
79 | # Filter events
80 | team_events_df = events_df[events_df['match_id'].isin(team_match_ids)]
81 |
82 | # %% Group possessions and count passes in each possession
83 |
84 | team_events_df = wce.get_possession_chains(team_events_df)
85 | team_events_df = team_events_df[team_events_df['teamId']==team_id]
86 |
87 | # Initialise possession chain dataframe
88 | pos_chain_df = pd.DataFrame()
89 |
90 | # Iterate over match
91 | for match_id in team_match_ids:
92 |
93 | # Iterate over possessions
94 | for pos_id in team_events_df[(team_events_df['match_id']==match_id) & (team_events_df['possession_team']==team_id)]['possession_id'].unique():
95 |
96 | # Get possession chain and count up passes
97 | possession_chain = team_events_df[(team_events_df['match_id']==match_id) & (team_events_df['possession_id']==pos_id)].copy()
98 | possession_chain['evt_number'] = np.arange(1,len(possession_chain)+1)
99 | possession_chain['pass_number'] = np.nan
100 |
101 | # First pass can be a corner, others can't
102 | first_pass = possession_chain[(possession_chain['eventType']=='Pass')].head(1)
103 | if len(first_pass) == 1:
104 | possession_chain_passes = possession_chain[(possession_chain['eventId']!=first_pass['eventId'].values[0]) & (possession_chain['eventType']=='Pass') & (~possession_chain['satisfiedEventsTypes'].apply(lambda x: 31 in x))]
105 | possession_chain_passes = pd.concat([first_pass, possession_chain_passes])
106 | else:
107 | possession_chain_passes = pd.DataFrame()
108 | if len(possession_chain_passes) > 0:
109 | possession_chain.loc[possession_chain_passes.index.values, 'pass_number'] = np.arange(1,len(possession_chain_passes)+1)
110 |
111 | pos_chain_df = pd.concat([pos_chain_df, possession_chain])
112 |
113 | # %% Identify possession chains starting in each third
114 |
115 | pos_chain_df.loc[(pos_chain_df['evt_number']==1) & (pos_chain_df['x']<100/3), 'pos_start'] = 'Own 3'
116 | pos_chain_df.loc[(pos_chain_df['evt_number']==1) & (pos_chain_df['x']>=100/3) & (pos_chain_df['x']<200/3), 'pos_start'] = 'Mid 3'
117 | pos_chain_df.loc[(pos_chain_df['evt_number']==1) & (pos_chain_df['x']>=200/3), 'pos_start'] = 'Opp 3'
118 | pos_chain_df['pos_start'] = pos_chain_df['pos_start'].fillna(method='ffill')
119 |
120 | # %% Cluster passes in each possession zone
121 |
122 | convertYards = models.convertYards
123 | customScaler = models.customScaler
124 | pos_chain_df = models.get_pass_clusters(pos_chain_df)
125 |
126 | # %% Plot first 6 passes in each possession type
127 |
128 | # Choose number of passes to plot
129 | first_n_passes = 6
130 | clusters_shown = 5
131 | cluster_colours = ['orchid','cornflowerblue', 'mediumseagreen', 'khaki', 'lightcoral', 'lightgrey']
132 |
133 | # Set up pitch and figure
134 | pitch = VerticalPitch(pitch_color='#313332', pitch_type='opta', line_color='white', linewidth=1, stripe=False)
135 | fig, ax = pitch.grid(nrows=3, ncols=first_n_passes, title_height=0.155, grid_height=0.785, endnote_height=0.03, space=0.07, axis=False)
136 | fig.set_size_inches(10, 10)
137 | fig.set_facecolor('#313332')
138 | ax['pitch'] = ax['pitch'].reshape(-1)
139 |
140 | # Iterate through each pitch
141 | for idx in np.arange(3*first_n_passes):
142 |
143 | # Set up pass plot indexing
144 | if idx <= first_n_passes-1:
145 | pitch.lines(100, 101.5, 100, -1.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
146 | pitch.lines(100, 0, 200/3, 0, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
147 | pitch.lines(100, 100, 200/3, 100, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
148 | pitch.lines(200/3, -1.5, 200/3, 101.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
149 | pass_num = idx+1
150 | possession_plot = pos_chain_df[pos_chain_df['pos_start'] == 'Opp 3']
151 | elif idx <= 2*first_n_passes-1:
152 | pitch.lines(200/3, 101.5, 200/3, -1.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
153 | pitch.lines(100/3, 0, 200/3, 0, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
154 | pitch.lines(100/3, 100, 200/3, 100, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
155 | pitch.lines(100/3, -1.5, 100/3, 101.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
156 | pass_num = idx-first_n_passes+1
157 | possession_plot = pos_chain_df[pos_chain_df['pos_start'] == 'Mid 3']
158 | else:
159 | pitch.lines(100/3, 101.5, 100/3, -1.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
160 | pitch.lines(100/3, 0, 0, 0, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
161 | pitch.lines(100/3, 100, 0, 100, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
162 | pitch.lines(0, -1.5, 0, 101.5, lw=3, color = 'lightsteelblue', zorder = 1, ax=ax['pitch'][idx])
163 | pass_num = idx-2*first_n_passes+1
164 | possession_plot = pos_chain_df[pos_chain_df['pos_start'] == 'Own 3']
165 |
166 | # Passes to plot
167 | pass_plot = possession_plot[possession_plot['pass_number']==pass_num]
168 | pass_top_cluster_ids = pass_plot.groupby('pass_cluster_id').count()['id'].sort_values(ascending=False).head(clusters_shown).index.values
169 | cluster_rank_dict = dict(zip(pass_top_cluster_ids, np.arange(1,len(pass_top_cluster_ids)+1)))
170 | cluster_color_dict = dict(zip(pass_top_cluster_ids, cluster_colours[0:clusters_shown]))
171 | pass_top_clusters = pass_plot[pass_plot['pass_cluster_id'].isin(pass_top_cluster_ids)].copy()
172 | pass_top_clusters['cluster_rank'] = pass_top_clusters['pass_cluster_id'].apply(lambda x: cluster_rank_dict[x])
173 | pass_top_clusters['cluster_c'] = pass_top_clusters['pass_cluster_id'].apply(lambda x: cluster_color_dict[x])
174 | pass_top_clusters = pass_top_clusters.sort_values('cluster_rank', ascending=False)
175 |
176 | # Plot passes
177 | for _, single_pass in pass_top_clusters.iterrows():
178 | pitch.lines(single_pass['x'], single_pass['y'], single_pass['endX'], single_pass['endY'],
179 | lw=1.5, comet=False, capstyle='round', color = single_pass['cluster_c'], alpha = 0.8, ax=ax['pitch'][idx], zorder=2)
180 | pitch.scatter(single_pass['endX'], single_pass['endY'], s=10, color = single_pass['cluster_c'], alpha = 0.8, zorder=3, ax=ax['pitch'][idx])
181 | pitch.scatter(single_pass['endX'], single_pass['endY'], s=5, color = '#313332', alpha = 1, zorder=3, ax=ax['pitch'][idx])
182 |
183 | # Add title
184 | if pass_num == 1:
185 | suffix = 'st'
186 | elif pass_num == 2:
187 | suffix = 'nd'
188 | elif pass_num == 3:
189 | suffix = 'rd'
190 | else:
191 | suffix = 'th'
192 | ax['pitch'][idx].set_title(f"{pass_num}{suffix} Pass", pad=-1, color = 'w', fontsize = 9)
193 |
194 | # Add title
195 | leagues = {'EPL': 'Premier League', 'La_Liga': 'La Liga', 'Bundesliga': 'Bundesliga', 'Serie_A': 'Serie A',
196 | 'Ligue_1': 'Ligue 1', 'RFPL': 'Russian Premier Leauge', 'EFLC': 'EFL Championship', 'World_Cup': 'World Cup',
197 | 'EFL1': 'EFL League One', 'EFL2': 'EFL League Two'}
198 |
199 | title_text = f"{team} Passing Tendencies − {leagues[league]} {year}/{(str(int(year)+1)).replace('20','',1)}"
200 | subtitle_text = f"Where {team} directed passes during possessions starting in each third"
201 | subsubtitle_text = f"First {first_n_passes} passes in each possession chain. {clusters_shown} most common pass types shown"
202 | fig.text(0.12, 0.945, title_text, fontweight="bold", fontsize=16, color='w')
203 | fig.text(0.12, 0.918, subtitle_text, fontweight="bold", fontsize=13, color='w')
204 | fig.text(0.12, 0.896, subsubtitle_text, fontweight="regular", fontsize=10, color='w')
205 |
206 | # Add figure text
207 | for loc in [[0.38,0.84], [0.42,0.57], [0.381,0.3]]:
208 | sep_ax = fig.add_axes([0.02, loc[1], 0.96, 0.02]); sep_ax.axis("off")
209 | sep_ax.plot([0,0.05],[0,0], color = 'grey', lw = 1)
210 | sep_ax.plot([loc[0],1],[0,0], color = 'grey', lw = 1)
211 | sep_ax.set_xlim([0,1])
212 |
213 | fig.text(0.075, 0.845, "Possessions Initiated in Final Third", fontweight="bold", fontsize=11, color='w')
214 | fig.text(0.075, 0.575, "Possessions Initiated in Centre of Pitch", fontweight="bold", fontsize=11, color='w')
215 | fig.text(0.075, 0.305, "Possessions Initiated in Own Third", fontweight="bold", fontsize=11, color='w')
216 |
217 | # Add legend logo
218 | fig.text(0.14, 0.032, "nth Most Common Pass Cluster", fontweight="bold", fontsize=9, color='w', ha = "center")
219 | ax = fig.add_axes([0, 0, 0.3, 0.05])
220 | for idx in np.arange(0,clusters_shown):
221 | ax.scatter(0.07+idx*0.17, 0.35, color=cluster_colours[idx])
222 | ax.text(0.1+idx*0.17, 0.295, f"n={1+idx}",fontsize=8, c='w')
223 | ax.set_xlim([0,1])
224 | ax.set_ylim([0,1])
225 | ax.axis('off')
226 |
227 | # Add team logo
228 | logo, _ = lab.get_team_badge_and_colour(team)
229 | ax = fig.add_axes([0.017, 0.88, 0.1, 0.1])
230 | ax.axis("off")
231 | ax.imshow(logo)
232 |
233 | # Add footer text
234 | fig.text(0.55, 0.022, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
235 | fontstyle="italic", ha="center", fontsize=9, color="white")
236 |
237 | # Add twitter logo
238 | ax = fig.add_axes([0.92, 0.005, 0.04, 0.04])
239 | ax.axis("off")
240 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
241 | ax.imshow(badge)
242 |
243 | # Save
244 | plt.savefig(f"team_pass_tendencies/{team.lower().replace(' ','')}-{league.lower().replace(' ','')}-{year}", dpi=300)
245 |
--------------------------------------------------------------------------------
/projects/11_justice_league/justice_league.py:
--------------------------------------------------------------------------------
1 | # %% Imports
2 |
3 | import pandas as pd
4 | import bz2
5 | import os
6 | import pickle
7 | import matplotlib.pyplot as plt
8 | import seaborn as sns
9 | import sys
10 | import numpy as np
11 | import time
12 | from mplsoccer import Pitch, VerticalPitch
13 | import matplotlib.patheffects as path_effects
14 | import matplotlib as mpl
15 | from matplotlib.offsetbox import OffsetImage, AnnotationBbox
16 | from matplotlib.patches import Rectangle
17 | import requests
18 | from PIL import Image, ImageEnhance
19 | from io import BytesIO
20 | from datetime import datetime
21 | import textwrap as tw
22 | from mplsoccer import PyPizza
23 |
24 | # %% Add custom tools to path
25 |
26 | root_folder = os.path.abspath(os.path.dirname((os.path.dirname(os.path.dirname(os.path.abspath(__file__))))))
27 | sys.path.append(root_folder)
28 |
29 | import analysis_tools.get_football_data as gfd
30 | import analysis_tools.statsbomb_custom_events as sce
31 | import analysis_tools.statsbomb_data_engineering as sde
32 | import analysis_tools.logos_and_badges as lab
33 | import analysis_tools.models as mod
34 |
35 | # %% User inputs
36 |
37 | # Data to use
38 | data_grab =[['England', 'Premier League', '2023']]
39 |
40 | # Run date
41 | run_date = '07-11-23'
42 |
43 | # %% Load data
44 |
45 | # Statsbomb
46 | data_dict = gfd.load_statsbomb_sql(data_grab, events=True, matches = True, lineups = True, player_stats=True)
47 | events_df = data_dict['events']
48 | matches_df = data_dict['matches']
49 | lineups_df = data_dict['lineups']
50 | playerstats_df = data_dict['player_stats']
51 |
52 | # %% Simulate match outcomes
53 |
54 | sim_count = 200000
55 | for match_id in matches_df['match_id'].values:
56 | matches_df, match_simulation_df = mod.simulate_match_outcome(events_df, matches_df, match_id, sim_count=200000)
57 |
58 | # %% Generate league table
59 |
60 | leaguetable_df = sde.create_league_table(matches_df, xmetrics=True)
61 |
62 | # %% Plot league table
63 |
64 | fig = plt.figure(figsize=(9,10), facecolor='#333332')
65 |
66 | ax = fig.add_axes([0.05,0.075,0.9,0.8])
67 | ax.patch.set_alpha(0)
68 |
69 | # Set up plotting parameters
70 | header_height = 0.07
71 | row_height = (1 - header_height)/len(leaguetable_df)
72 |
73 | # Horizontal Header lines abd shading
74 | ax.plot([0, 1], [0.998, 0.998], color='darkgrey', zorder = 3, lw=0.5)
75 | ax.plot([0, 1], [1-header_height, 1-header_height], color='darkgrey', zorder = 3, lw=0.5)
76 | ax.add_patch(Rectangle([0, 1-header_height], 1, header_height-0.002, color = '#262625'))
77 |
78 | # Header titles
79 | ax.text(0.04, 0.998 -header_height/2, "Team Name", ha = "left", va = "center_baseline", fontweight = "bold", color = "w")
80 | ax.text(0.39, 0.998 -header_height/2, "MP", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
81 | ax.text(0.455, 0.998 -header_height/2, "Pts", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
82 | ax.text(0.55, 0.998 -header_height/2, "xPts", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
83 | ax.text(0.645, 0.998 -header_height/2, "G", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
84 | ax.text(0.74, 0.998 -header_height/2, "xG", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
85 | ax.text(0.835, 0.998 -header_height/2, "GA", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
86 | ax.text(0.935, 0.998 -header_height/2, "xGA", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
87 | #ax.text(0.97, 0.998 -header_height/2, "Actual\nPos", ha = "center", va = "center_baseline", fontweight = "bold", color = "w")
88 |
89 | # Iterate over league table to position rows, icons and text
90 | for idx, table_team in leaguetable_df.iterrows():
91 |
92 | pos = table_team['expected_position']
93 |
94 | # Add bottom horizontal line
95 | row_bottom = 1-header_height-row_height*pos
96 | row_centre = row_bottom + row_height/2
97 | ax.plot([0, 1], [row_bottom, row_bottom], color='darkgrey', lw = 0.5, zorder = 3)
98 |
99 | # Add position
100 | ax.text(0.02, row_centre, pos, ha = "center", va = "center_baseline", color = "w")
101 |
102 | # Add logo
103 | team_logo, _ = lab.get_team_badge_and_colour(table_team['team'])
104 | ab = AnnotationBbox(OffsetImage(team_logo, zoom = 1/len(leaguetable_df)+0.01, resample = True), (0.075,row_centre), frameon=False)
105 | ax.add_artist(ab)
106 |
107 | # Add information
108 | ax.text(0.095, row_centre, table_team['team'], ha = "left", va = "center_baseline", color = "w")
109 | ax.text(0.39, row_centre, int(table_team['matches_played']), ha = "center", va = "center_baseline", color = "w")
110 | ax.text(0.455, row_centre, int(table_team['points']), ha = "center", va = "center_baseline", color = "w")
111 | ax.text(0.535, row_centre, f"{table_team['expected_points']:.2f}", ha = "center", va = "top", color = "w")
112 | ax.text(0.645, row_centre, int(table_team['goals_for']), ha = "center", va = "center_baseline", color = "w")
113 | ax.text(0.725, row_centre, f"{table_team['xg_for']:.2f}", ha = "center", va = "top", color = "w")
114 | ax.text(0.835, row_centre, int(table_team['goals_against']), ha = "center", va = "center_baseline", color = "w")
115 | ax.text(0.92, row_centre, f"{table_team['xg_against']:.2f}", ha = "center", va = "top", color = "w")
116 | #ax.text(0.97, row_centre, int(table_team['position']), ha = "center", va = "center_baseline", color = "w")
117 |
118 | # Add differences
119 | xg_delta_str = '+' if table_team['xg_for'] > table_team['goals_for'] else ''
120 | xg_col = 'green' if xg_delta_str == '+' else 'indianred'
121 | xga_delta_str = '+' if table_team['xg_against'] > table_team['goals_against'] else ''
122 | xga_col = 'green' if xga_delta_str == '' else 'indianred'
123 | xgd_delta_str = '+' if table_team['xg_difference'] > table_team['goal_difference'] else ''
124 | xgd_col = 'green' if xgd_delta_str == '+' else 'indianred'
125 | xp_delta_str = '+' if table_team['expected_points'] > table_team['points'] else ''
126 | xp_col = 'green' if xp_delta_str == '+' else 'indianred'
127 | xpos_delta_str = '+' if table_team['position'] > table_team['expected_position'] else ''
128 | xpos_col = 'green' if table_team['expected_position'] < table_team['position'] else 'w' if table_team['expected_position'] == table_team['position'] else 'indianred'
129 |
130 | ax.text(0.04, row_centre, f"{xpos_delta_str}{table_team['position']-table_team['expected_position']}", ha = "center", va = "bottom", fontweight = "bold", color = xpos_col, fontsize=7)
131 | ax.text(0.575, row_centre, f"{xp_delta_str}{(table_team['expected_points']-table_team['points']):.2f}", ha = "center", va = "bottom", color = xp_col, fontsize=7)
132 | ax.text(0.765, row_centre, f"{xg_delta_str}{(table_team['xg_for']-table_team['goals_for']):.2f}", ha = "center", va = "bottom", color = xg_col, fontsize=7)
133 | ax.text(0.96, row_centre, f"{xga_delta_str}{(table_team['xg_against']-table_team['goals_against']):.2f}", ha = "center", va = "bottom", color = xga_col, fontsize=7)
134 |
135 | # Add intermittent shading
136 | if pos % 2 == 0:
137 | ax.add_patch(Rectangle([0, row_bottom], 1, row_height, color = '#262625'))
138 |
139 | # Remove axis spines
140 | ax.spines['top'].set_visible(False)
141 | ax.spines['right'].set_visible(False)
142 | ax.spines['bottom'].set_visible(False)
143 | ax.spines['left'].set_visible(False)
144 | ax.set_xticks([])
145 | ax.set_yticks([])
146 |
147 | # Enforce axis limits
148 | ax.set_xlim([0,1])
149 | ax.set_ylim([0,1])
150 |
151 | # Add legend
152 | legend_ax = fig.add_axes([0.72, 0.88, 0.24, 0.1])
153 | legend_ax.add_patch(Rectangle([0.15, 0.65], 0.055, 0.12, color = 'g'))
154 | legend_ax.text(0.24, 0.72, "Expected metric stronger than\nactual outcome", color = "w", va = "center_baseline", fontsize=7)
155 | legend_ax.add_patch(Rectangle([0.15, 0.3], 0.055, 0.12, color = 'indianred'))
156 | legend_ax.text(0.24, 0.37, "Expected metric weaker than\nactual outcome", color = "w", va = "center_baseline", fontsize=7)
157 | legend_ax.axis("off")
158 |
159 |
160 | # Add title and logo
161 | title_text = f"{data_grab[0][1]} {data_grab[0][2]}/{str(int(data_grab[0][2]) + 1).replace('20','',1)} − Justice League"
162 | subtitle_text = "League Table Standings based on Expected Points"
163 | fig.text(0.12, 0.94, title_text, fontweight="bold", fontsize=16, color='w')
164 | fig.text(0.12, 0.915, subtitle_text, fontweight="bold", fontsize=11, color='w')
165 |
166 | # Add competition logo
167 | comp_logo = lab.get_competition_logo(data_grab[0][1], data_grab[0][2], logo_brighten=True)
168 | comp_ax = fig.add_axes([0.022, 0.885, 0.1, 0.1])
169 | comp_ax.axis("off")
170 | comp_ax.imshow(comp_logo)
171 |
172 | # Add description
173 | fig.text(0.5, 0.045, f"Monte Carlo method implemented to model the probability of individual match outcomes based on shot events, with {sim_count} repetitions completed per match. Expected\n"+
174 | "points calculated using weighted outcome probabilities. Method reliant on assumption that xG represents scoring probability, and that individual shot events are independent.",
175 | color = 'lightgrey', fontsize = 6.5, ha = "center")
176 |
177 | # Add footer information
178 | fig.text(0.5, 0.012, "Created by Jake Kolliari (@_JKDS_). Data provided by Opta.",
179 | fontstyle="italic", ha="center", fontsize=9, color="white")
180 |
181 | # Add twitter logo
182 | ax = fig.add_axes([0.94, 0.001, 0.04, 0.04])
183 | ax.axis("off")
184 | badge = Image.open('..\..\data_directory\misc_data\images\JK Twitter Logo.png')
185 | ax.imshow(badge)
186 |
187 | # Save fig
188 | fig.savefig(f"justice_league/{data_grab[0][1].replace(' ','-').lower()}-{data_grab[0][2]}-justice-league-{run_date}.png", dpi=300)
--------------------------------------------------------------------------------