├── README.md ├── environment.yml ├── LICENSE ├── .gitignore ├── young_outliers.ipynb ├── 02_data_checks.ipynb ├── 06_xa_map.ipynb ├── 01_statsbomb_json_to_feather.ipynb └── demo_crawley.ipynb /README.md: -------------------------------------------------------------------------------- 1 | # statsbomb-explore 2 | Exploring statsbomb data with mplsoccer 3 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: statsbomb-explore 2 | channels: 3 | - anaconda 4 | dependencies: 5 | - jupyter 6 | - pandas 7 | - scipy 8 | - seaborn 9 | - beautifulsoup4 10 | - pyarrow 11 | - scikit-learn 12 | - pillow 13 | - openpyxl 14 | <<<<<<< HEAD 15 | ======= 16 | - pillow 17 | - requests 18 | >>>>>>> 6d5a52dc8abbb526b8433831ca108d0786cbc715 19 | - pip 20 | - pip: 21 | - mplsoccer 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The data are licensed under StatsBomb Public Data User Agreement: https://github.com/statsbomb/open-data/blob/master/LICENSE.pdf. 2 | The code uses the MIT license. 3 | 4 | Code: 5 | MIT License 6 | 7 | Copyright (c) 2020 Andrew Rowlinson 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | Data: 28 | Please refer to the StatBomb Public Data User Agreement: https://github.com/statsbomb/open-data/blob/master/LICENSE.pdf. 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # data folder 132 | data/ 133 | -------------------------------------------------------------------------------- /young_outliers.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from sklearn.covariance import EmpiricalCovariance" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": null, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "df = pd.read_csv('https://raw.githubusercontent.com/mancunian1792/2019_2020_football_analysis/master/data/big5_full_stats.csv')" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "# string to number\n", 30 | "df['playing_minutes'] = pd.to_numeric(df.playing_minutes.str.replace(',', ''))" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "# drop object columns\n", 40 | "df.drop(['per90_matches', 'xg_team_success_matches'], axis=1, inplace=True)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "code", 45 | "execution_count": null, 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "# keep young players (aged 23 or under playing 900 or greater minutes)\n", 50 | "df = df[(df.age <= 24) & (df.playing_minutes >= 900) & (df.position != 'GK')].copy()" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "# calculate mahalanobis distance and sort dataframe so largest top\n", 60 | "player_values_array = df[df.columns[7:]].values # subset columns with stats\n", 61 | "cov = EmpiricalCovariance().fit(player_values_array)\n", 62 | "df['dist'] = cov.mahalanobis(player_values_array)\n", 63 | "df.sort_values('dist', ascending=False, inplace=True)\n", 64 | "df.reset_index(drop=True, inplace=True)" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "#df.head(40)" 74 | ] 75 | } 76 | ], 77 | "metadata": { 78 | "kernelspec": { 79 | "display_name": "Python 3", 80 | "language": "python", 81 | "name": "python3" 82 | }, 83 | "language_info": { 84 | "codemirror_mode": { 85 | "name": "ipython", 86 | "version": 3 87 | }, 88 | "file_extension": ".py", 89 | "mimetype": "text/x-python", 90 | "name": "python", 91 | "nbconvert_exporter": "python", 92 | "pygments_lexer": "ipython3", 93 | "version": "3.8.3" 94 | } 95 | }, 96 | "nbformat": 4, 97 | "nbformat_minor": 4 98 | } 99 | -------------------------------------------------------------------------------- /02_data_checks.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import os" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "# Load dataframes" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": 2, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "DATA_PATH = os.path.join(os.getcwd(),'data')\n", 28 | "SHOT_PATH = os.path.join(DATA_PATH,'freeze.parquet')\n", 29 | "df_shots = pd.read_parquet(SHOT_PATH)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 3, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "MATCH_PATH = os.path.join(DATA_PATH,'match.parquet')\n", 39 | "df_match = pd.read_parquet(MATCH_PATH)" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 4, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "EVENTS_PATH = os.path.join(DATA_PATH,'event.parquet')\n", 49 | "df_events = pd.read_parquet(EVENTS_PATH)" 50 | ] 51 | }, 52 | { 53 | "cell_type": "code", 54 | "execution_count": 5, 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "RELATED_PATH = os.path.join(DATA_PATH,'related.parquet')\n", 59 | "df_related_events = pd.read_parquet(RELATED_PATH)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 6, 65 | "metadata": {}, 66 | "outputs": [], 67 | "source": [ 68 | "RELATED_PATH = os.path.join(DATA_PATH,'lineup.parquet')\n", 69 | "df_lineup = pd.read_parquet(RELATED_PATH)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 7, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "RELATED_PATH = os.path.join(DATA_PATH,'tactic.parquet')\n", 79 | "df_tactics = pd.read_parquet(RELATED_PATH)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "metadata": {}, 85 | "source": [ 86 | "# Check that all events have matches and vice versa" 87 | ] 88 | }, 89 | { 90 | "cell_type": "markdown", 91 | "metadata": {}, 92 | "source": [ 93 | "Some event files don't haev match info" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": 8, 99 | "metadata": { 100 | "scrolled": true 101 | }, 102 | "outputs": [ 103 | { 104 | "data": { 105 | "text/plain": [ 106 | "{22536, 265905, 266234, 266466, 266574, 266933, 267161, 267405, 267609, 267679}" 107 | ] 108 | }, 109 | "execution_count": 8, 110 | "metadata": {}, 111 | "output_type": "execute_result" 112 | } 113 | ], 114 | "source": [ 115 | "set(df_events.match_id.unique()).symmetric_difference(set(df_match.match_id.unique()))" 116 | ] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "metadata": {}, 121 | "source": [ 122 | "# Check all shots have freeze frames" 123 | ] 124 | }, 125 | { 126 | "cell_type": "markdown", 127 | "metadata": {}, 128 | "source": [ 129 | "All non-penalties have freeze frames. Some penalties have the goal keeper location." 130 | ] 131 | }, 132 | { 133 | "cell_type": "code", 134 | "execution_count": 9, 135 | "metadata": {}, 136 | "outputs": [], 137 | "source": [ 138 | "set_shots = set(df_events.loc[df_events.type_name=='Shot','id'].unique())\n", 139 | "set_freeze = set(df_shots.id.unique())" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": 10, 145 | "metadata": {}, 146 | "outputs": [ 147 | { 148 | "data": { 149 | "text/plain": [ 150 | "set()" 151 | ] 152 | }, 153 | "execution_count": 10, 154 | "metadata": {}, 155 | "output_type": "execute_result" 156 | } 157 | ], 158 | "source": [ 159 | "# all freeze frames have shots\n", 160 | "set_freeze - set_shots" 161 | ] 162 | }, 163 | { 164 | "cell_type": "code", 165 | "execution_count": 11, 166 | "metadata": {}, 167 | "outputs": [ 168 | { 169 | "name": "stdout", 170 | "output_type": "stream", 171 | "text": [ 172 | <<<<<<< HEAD 173 | "Number of shots without freeze frame: 259\n" 174 | ======= 175 | "Number of shots without freeze frame: 254\n" 176 | >>>>>>> 6d5a52dc8abbb526b8433831ca108d0786cbc715 177 | ] 178 | }, 179 | { 180 | "data": { 181 | "text/plain": [ 182 | <<<<<<< HEAD 183 | "Penalty 259\n", 184 | ======= 185 | "Penalty 254\n", 186 | >>>>>>> 6d5a52dc8abbb526b8433831ca108d0786cbc715 187 | "Name: shot_type_name, dtype: int64" 188 | ] 189 | }, 190 | "execution_count": 11, 191 | "metadata": {}, 192 | "output_type": "execute_result" 193 | } 194 | ], 195 | "source": [ 196 | "# the shots without freeze frames are penalties\n", 197 | "print('Number of shots without freeze frame:',len(set_shots)-len(set_freeze))\n", 198 | "df_events[df_events.id.isin(set_shots - set_freeze)].shot_type_name.value_counts()" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 12, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "data": { 208 | "text/plain": [ 209 | "Goalkeeper 36\n", 210 | "Left Midfield 1\n", 211 | "Name: player_position_name, dtype: int64" 212 | ] 213 | }, 214 | "execution_count": 12, 215 | "metadata": {}, 216 | "output_type": "execute_result" 217 | } 218 | ], 219 | "source": [ 220 | "# some penalties have the location of the goalkeeper, one has the location of the left midfield\n", 221 | "penalty_ids = df_events[df_events.shot_type_name=='Penalty'].id\n", 222 | "df_shots[df_shots.id.isin(penalty_ids)].player_position_name.value_counts()" 223 | ] 224 | }, 225 | { 226 | "cell_type": "markdown", 227 | "metadata": {}, 228 | "source": [ 229 | "# Check related events" 230 | ] 231 | }, 232 | { 233 | "cell_type": "markdown", 234 | "metadata": {}, 235 | "source": [ 236 | "Note I made a change to the preprocessing to link all events both ways.\n", 237 | "\n", 238 | "In the docs it said that related_event was a comma separated list of the Ids of related events. For example, a shot might be related to the Goalkeeper event, and a Block Event. The corresponding events will have the Id of the shot in their related_events column.\n", 239 | "\n", 240 | "When I explored the data, often carries didn't have the corresponding event.\n", 241 | "\n", 242 | "Now this is fixed." 243 | ] 244 | }, 245 | { 246 | "cell_type": "code", 247 | "execution_count": 13, 248 | "metadata": {}, 249 | "outputs": [ 250 | { 251 | "data": { 252 | "text/plain": [ 253 | "set()" 254 | ] 255 | }, 256 | "execution_count": 13, 257 | "metadata": {}, 258 | "output_type": "execute_result" 259 | } 260 | ], 261 | "source": [ 262 | "set1 = set(df_related_events.id.unique())\n", 263 | "set2 = set(df_related_events.id_related.unique())\n", 264 | "set(set1).symmetric_difference(set2)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "metadata": {}, 270 | "source": [ 271 | "# Are team names consistent between events and match" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "Yes!" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 14, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "name": "stdout", 288 | "output_type": "stream", 289 | "text": [ 290 | "True\n" 291 | ] 292 | } 293 | ], 294 | "source": [ 295 | "away_teams = (df_match[['away_team_id', 'away_team_name']]\n", 296 | " .drop_duplicates()\n", 297 | " .rename({'away_team_id':'team_id','away_team_name':'team_name'},axis=1))\n", 298 | "home_teams = (df_match[['home_team_id', 'home_team_name']]\n", 299 | " .drop_duplicates()\n", 300 | " .rename({'home_team_id':'team_id','home_team_name':'team_name'},axis=1))\n", 301 | "teams = pd.concat([away_teams,home_teams]).drop_duplicates()\n", 302 | "print(teams.team_id.nunique()==len(teams))" 303 | ] 304 | }, 305 | { 306 | "cell_type": "code", 307 | "execution_count": 15, 308 | "metadata": {}, 309 | "outputs": [ 310 | { 311 | "name": "stdout", 312 | "output_type": "stream", 313 | "text": [ 314 | "Number of differences: 0\n" 315 | ] 316 | } 317 | ], 318 | "source": [ 319 | "teams_from_events = df_events[['team_id','team_name']].drop_duplicates()\n", 320 | "teams_from_events = teams_from_events.merge(teams,on='team_id',how='outer')\n", 321 | "print('Number of differences:',(teams_from_events.team_name_x != teams_from_events.team_name_y).sum())" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 16, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "data": { 331 | "text/html": [ 332 | "
\n", 333 | "\n", 346 | "\n", 347 | " \n", 348 | " \n", 349 | " \n", 350 | " \n", 351 | " \n", 352 | " \n", 353 | " \n", 354 | " \n", 355 | " \n", 356 | " \n", 357 | "
team_idteam_name_xteam_name_y
\n", 358 | "
" 359 | ], 360 | "text/plain": [ 361 | "Empty DataFrame\n", 362 | "Columns: [team_id, team_name_x, team_name_y]\n", 363 | "Index: []" 364 | ] 365 | }, 366 | "execution_count": 16, 367 | "metadata": {}, 368 | "output_type": "execute_result" 369 | } 370 | ], 371 | "source": [ 372 | "teams_from_events[(teams_from_events.team_name_x != teams_from_events.team_name_y)]" 373 | ] 374 | }, 375 | { 376 | "cell_type": "markdown", 377 | "metadata": {}, 378 | "source": [ 379 | "# Are player names consistent?" 380 | ] 381 | }, 382 | { 383 | "cell_type": "markdown", 384 | "metadata": {}, 385 | "source": [ 386 | "Yes!" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": 17, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "player1 = df_shots.loc[df_shots.player_id.notnull(),['player_id','player_name']].drop_duplicates()\n", 396 | "player2 = df_lineup.loc[df_lineup.player_id.notnull(),['player_id','player_name']].drop_duplicates()\n", 397 | "player3 = df_tactics.loc[df_tactics.player_id.notnull(),['player_id','player_name']].drop_duplicates()\n", 398 | "player4 = df_events.loc[df_events.player_id.notnull(),['player_id','player_name']].drop_duplicates()" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 18, 404 | "metadata": {}, 405 | "outputs": [], 406 | "source": [ 407 | "players = (player1.merge(player2,how='outer',on='player_id',suffixes=['_shot','_lineup'])\n", 408 | " .merge(player3,how='outer',on='player_id')\n", 409 | " .merge(player4,how='outer',on='player_id',suffixes=['_tactics','_events']))" 410 | ] 411 | }, 412 | { 413 | "cell_type": "code", 414 | "execution_count": 19, 415 | "metadata": {}, 416 | "outputs": [ 417 | { 418 | "data": { 419 | "text/plain": [ 420 | "0" 421 | ] 422 | }, 423 | "execution_count": 19, 424 | "metadata": {}, 425 | "output_type": "execute_result" 426 | } 427 | ], 428 | "source": [ 429 | "# check player names in shots matches events\n", 430 | "len(players[((players.player_name_events != players.player_name_shot) &\n", 431 | " (players.player_name_shot.notnull())&\n", 432 | " (players.player_name_events.notnull()))])" 433 | ] 434 | }, 435 | { 436 | "cell_type": "code", 437 | "execution_count": 20, 438 | "metadata": {}, 439 | "outputs": [ 440 | { 441 | "data": { 442 | "text/plain": [ 443 | "0" 444 | ] 445 | }, 446 | "execution_count": 20, 447 | "metadata": {}, 448 | "output_type": "execute_result" 449 | } 450 | ], 451 | "source": [ 452 | "# check player names in lineups matches events\n", 453 | "len(players[((players.player_name_events != players.player_name_lineup) &\n", 454 | " (players.player_name_lineup.notnull())&\n", 455 | " (players.player_name_events.notnull()))])" 456 | ] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": 21, 461 | "metadata": {}, 462 | "outputs": [ 463 | { 464 | "data": { 465 | "text/html": [ 466 | "
\n", 467 | "\n", 480 | "\n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | "
player_idplayer_name_shotplayer_name_lineupplayer_name_tacticsplayer_name_events
\n", 494 | "
" 495 | ], 496 | "text/plain": [ 497 | "Empty DataFrame\n", 498 | "Columns: [player_id, player_name_shot, player_name_lineup, player_name_tactics, player_name_events]\n", 499 | "Index: []" 500 | ] 501 | }, 502 | "execution_count": 21, 503 | "metadata": {}, 504 | "output_type": "execute_result" 505 | } 506 | ], 507 | "source": [ 508 | "players[((players.player_name_events != players.player_name_lineup) &\n", 509 | " (players.player_name_lineup.notnull())&\n", 510 | " (players.player_name_events.notnull()))]" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 22, 516 | "metadata": {}, 517 | "outputs": [ 518 | { 519 | "data": { 520 | "text/plain": [ 521 | "0" 522 | ] 523 | }, 524 | "execution_count": 22, 525 | "metadata": {}, 526 | "output_type": "execute_result" 527 | } 528 | ], 529 | "source": [ 530 | "# check player names in tactics matches events\n", 531 | "len(players[((players.player_name_events != players.player_name_tactics) &\n", 532 | " (players.player_name_tactics.notnull())&\n", 533 | " (players.player_name_events.notnull()))])" 534 | ] 535 | }, 536 | { 537 | "cell_type": "markdown", 538 | "metadata": {}, 539 | "source": [ 540 | "# Are scorelines correct (exclude shoot outs)" 541 | ] 542 | }, 543 | { 544 | "cell_type": "markdown", 545 | "metadata": {}, 546 | "source": [ 547 | "Yes!" 548 | ] 549 | }, 550 | { 551 | "cell_type": "code", 552 | "execution_count": 23, 553 | "metadata": {}, 554 | "outputs": [], 555 | "source": [ 556 | "team_goals_from_events = df_events[((df_events.outcome_name=='Goal')|\n", 557 | " (df_events.type_name=='Own Goal For'))&(df_events.period!=5)]\n", 558 | "team_goals_from_events = pd.DataFrame(team_goals_from_events.groupby(['match_id','team_name'])\n", 559 | " .id.nunique()).reset_index()\n", 560 | "team_goals_from_events.rename({'id':'number_goals_events'},axis=1,inplace=True)\n", 561 | "teams_home_away = df_match[['match_id','away_team_name','home_team_name']]\n", 562 | "team_goals_from_events = team_goals_from_events.merge(teams_home_away,on='match_id',validate='m:1')\n", 563 | "mask_home = team_goals_from_events.team_name == team_goals_from_events.home_team_name\n", 564 | "team_goals_from_events.loc[mask_home,'team_status'] = 'home_score_events' \n", 565 | "team_goals_from_events.loc[~mask_home,'team_status'] = 'away_score_events'\n", 566 | "team_goals_from_events = team_goals_from_events[['match_id','team_status','number_goals_events']]\n", 567 | "team_goals_from_events = (team_goals_from_events.pivot(index='match_id',\n", 568 | " columns='team_status',\n", 569 | " values='number_goals_events')\n", 570 | " .reset_index())\n", 571 | "team_goals_from_events.replace({np.nan:0},inplace=True)\n", 572 | "df_match = df_match.merge(team_goals_from_events,on='match_id',how='outer')\n", 573 | "df_match.away_score_events.replace({np.nan:0},inplace=True)\n", 574 | "df_match.home_score_events.replace({np.nan:0},inplace=True)" 575 | ] 576 | }, 577 | { 578 | "cell_type": "code", 579 | "execution_count": 24, 580 | "metadata": {}, 581 | "outputs": [ 582 | { 583 | "data": { 584 | "text/html": [ 585 | "
\n", 586 | "\n", 599 | "\n", 600 | " \n", 601 | " \n", 602 | " \n", 603 | " \n", 604 | " \n", 605 | " \n", 606 | " \n", 607 | " \n", 608 | " \n", 609 | " \n", 610 | " \n", 611 | " \n", 612 | " \n", 613 | " \n", 614 | " \n", 615 | " \n", 616 | " \n", 617 | " \n", 618 | " \n", 619 | " \n", 620 | " \n", 621 | " \n", 622 | " \n", 623 | " \n", 624 | " \n", 625 | " \n", 626 | " \n", 627 | " \n", 628 | "
match_idmatch_datekick_offhome_scoreaway_scorelast_updatedmatch_weekcompetition_idcompetition_country_namecompetition_name...home_team_managers_country_nameaway_team_managers_idaway_team_managers_nameaway_team_managers_nicknameaway_team_managers_dobaway_team_managers_country_idaway_team_managers_country_namemetadata_xy_fidelity_versionaway_score_eventshome_score_events
\n", 629 | "

0 rows × 50 columns

\n", 630 | "
" 631 | ], 632 | "text/plain": [ 633 | "Empty DataFrame\n", 634 | "Columns: [match_id, match_date, kick_off, home_score, away_score, last_updated, match_week, competition_id, competition_country_name, competition_name, season_id, season_name, home_team_id, home_team_name, competition_gender, home_team_group, home_team_country_id, home_team_country_name, away_team_id, away_team_name, away_team_group, away_team_country_id, away_team_country_name, metadata_data_version, metadata_shot_fidelity_version, competition_stage_id, competition_stage_name, stadium_id, stadium_name, stadium_country_id, stadium_country_name, referee_id, referee_name, referee_country_id, referee_country_name, home_team_managers_id, home_team_managers_name, home_team_managers_nickname, home_team_managers_dob, home_team_managers_country_id, home_team_managers_country_name, away_team_managers_id, away_team_managers_name, away_team_managers_nickname, away_team_managers_dob, away_team_managers_country_id, away_team_managers_country_name, metadata_xy_fidelity_version, away_score_events, home_score_events]\n", 635 | "Index: []\n", 636 | "\n", 637 | "[0 rows x 50 columns]" 638 | ] 639 | }, 640 | "execution_count": 24, 641 | "metadata": {}, 642 | "output_type": "execute_result" 643 | } 644 | ], 645 | "source": [ 646 | "df_match[df_match.home_score != df_match.home_score_events]" 647 | ] 648 | }, 649 | { 650 | "cell_type": "code", 651 | "execution_count": 25, 652 | "metadata": {}, 653 | "outputs": [ 654 | { 655 | "data": { 656 | "text/html": [ 657 | "
\n", 658 | "\n", 671 | "\n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | "
match_idmatch_datekick_offhome_scoreaway_scorelast_updatedmatch_weekcompetition_idcompetition_country_namecompetition_name...home_team_managers_country_nameaway_team_managers_idaway_team_managers_nameaway_team_managers_nicknameaway_team_managers_dobaway_team_managers_country_idaway_team_managers_country_namemetadata_xy_fidelity_versionaway_score_eventshome_score_events
\n", 701 | "

0 rows × 50 columns

\n", 702 | "
" 703 | ], 704 | "text/plain": [ 705 | "Empty DataFrame\n", 706 | "Columns: [match_id, match_date, kick_off, home_score, away_score, last_updated, match_week, competition_id, competition_country_name, competition_name, season_id, season_name, home_team_id, home_team_name, competition_gender, home_team_group, home_team_country_id, home_team_country_name, away_team_id, away_team_name, away_team_group, away_team_country_id, away_team_country_name, metadata_data_version, metadata_shot_fidelity_version, competition_stage_id, competition_stage_name, stadium_id, stadium_name, stadium_country_id, stadium_country_name, referee_id, referee_name, referee_country_id, referee_country_name, home_team_managers_id, home_team_managers_name, home_team_managers_nickname, home_team_managers_dob, home_team_managers_country_id, home_team_managers_country_name, away_team_managers_id, away_team_managers_name, away_team_managers_nickname, away_team_managers_dob, away_team_managers_country_id, away_team_managers_country_name, metadata_xy_fidelity_version, away_score_events, home_score_events]\n", 707 | "Index: []\n", 708 | "\n", 709 | "[0 rows x 50 columns]" 710 | ] 711 | }, 712 | "execution_count": 25, 713 | "metadata": {}, 714 | "output_type": "execute_result" 715 | } 716 | ], 717 | "source": [ 718 | "df_match[df_match.away_score != df_match.away_score_events]" 719 | ] 720 | }, 721 | { 722 | "cell_type": "markdown", 723 | "metadata": {}, 724 | "source": [ 725 | "# Number of events in each file" 726 | ] 727 | }, 728 | { 729 | "cell_type": "code", 730 | "execution_count": 26, 731 | "metadata": {}, 732 | "outputs": [ 733 | { 734 | "data": { 735 | "text/plain": [ 736 | <<<<<<< HEAD 737 | "count 808.000000\n", 738 | "mean 3585.856436\n", 739 | "std 400.381267\n", 740 | "min 2173.000000\n", 741 | "25% 3330.000000\n", 742 | "50% 3588.000000\n", 743 | "75% 3855.500000\n", 744 | ======= 745 | "count 778.000000\n", 746 | "mean 3595.831620\n", 747 | "std 399.092479\n", 748 | "min 2173.000000\n", 749 | "25% 3337.250000\n", 750 | "50% 3603.000000\n", 751 | "75% 3866.500000\n", 752 | >>>>>>> 6d5a52dc8abbb526b8433831ca108d0786cbc715 753 | "max 5026.000000\n", 754 | "Name: id, dtype: float64" 755 | ] 756 | }, 757 | "execution_count": 26, 758 | "metadata": {}, 759 | "output_type": "execute_result" 760 | } 761 | ], 762 | "source": [ 763 | "df_events.groupby('match_id')['id'].nunique().describe()" 764 | ] 765 | } 766 | ], 767 | "metadata": { 768 | "kernelspec": { 769 | "display_name": "Python 3", 770 | "language": "python", 771 | "name": "python3" 772 | }, 773 | "language_info": { 774 | "codemirror_mode": { 775 | "name": "ipython", 776 | "version": 3 777 | }, 778 | "file_extension": ".py", 779 | "mimetype": "text/x-python", 780 | "name": "python", 781 | "nbconvert_exporter": "python", 782 | "pygments_lexer": "ipython3", 783 | "version": "3.8.3" 784 | } 785 | }, 786 | "nbformat": 4, 787 | "nbformat_minor": 2 788 | } 789 | -------------------------------------------------------------------------------- /06_xa_map.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "from mplsoccer.pitch import Pitch\n", 12 | "import matplotlib.pyplot as plt\n", 13 | "import os\n", 14 | "import matplotlib.gridspec as gridspec" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "# Load datasets" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 2, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "#DATA_PATH = os.path.join(os.getcwd(),'data')\n", 31 | "#EVENTS_PATH = os.path.join(DATA_PATH,'events')\n", 32 | "#df_events = pd.read_feather(EVENTS_PATH)" 33 | ] 34 | }, 35 | { 36 | "cell_type": "markdown", 37 | "metadata": {}, 38 | "source": [ 39 | "# Get all passes for Samuel Eto" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": 3, 45 | "metadata": {}, 46 | "outputs": [], 47 | "source": [ 48 | "#df_pass = df_events[df_events.type_name=='Pass'].copy()\n", 49 | "#df_pass = df_pass[['id','player_id','player_name','pass_assisted_shot_id',\n", 50 | "# 'x', 'y', 'pass_end_x', 'pass_end_y']].copy()\n", 51 | "#df_pass = df_pass[df_pass.player_id==19298].copy()" 52 | ] 53 | }, 54 | { 55 | "cell_type": "markdown", 56 | "metadata": {}, 57 | "source": [ 58 | "# Get all shots and merge onto shots onto passes for outcomes" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 4, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "#df_shot = df_events[df_events.type_name=='Shot'].dropna(axis=1,how='all').copy()\n", 68 | "#df_shot = df_shot[['id','shot_statsbomb_xg','shot_outcome_name']].copy()\n", 69 | "#df_shot = df_shot.rename({'id':'pass_assisted_shot_id'},axis=1)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 5, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "#df_pass = df_pass.merge(df_shot,on='pass_assisted_shot_id',how='left')\n", 79 | "# add assist column and drop shot outcome\n", 80 | "#df_pass['assist'] = df_pass['shot_outcome_name'] == 'Goal'\n", 81 | "#df_pass.drop('shot_outcome_name',axis=1,inplace=True)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": 7, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "df_pass = pd.read_csv('LubalaAssists.csv')" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 8, 96 | "metadata": {}, 97 | "outputs": [ 98 | { 99 | "data": { 100 | "text/html": [ 101 | "
\n", 102 | "\n", 115 | "\n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | "
x_pass_starty_pass_startx_pass_endy_pass_endxgassist
093.047.590.147.10.00True
183.139.385.353.10.19False
253.250.940.254.00.00False
346.252.440.543.90.22True
484.137.999.019.30.00False
\n", 175 | "
" 176 | ], 177 | "text/plain": [ 178 | " x_pass_start y_pass_start x_pass_end y_pass_end xg assist\n", 179 | "0 93.0 47.5 90.1 47.1 0.00 True\n", 180 | "1 83.1 39.3 85.3 53.1 0.19 False\n", 181 | "2 53.2 50.9 40.2 54.0 0.00 False\n", 182 | "3 46.2 52.4 40.5 43.9 0.22 True\n", 183 | "4 84.1 37.9 99.0 19.3 0.00 False" 184 | ] 185 | }, 186 | "execution_count": 8, 187 | "metadata": {}, 188 | "output_type": "execute_result" 189 | } 190 | ], 191 | "source": [ 192 | "df_pass.head()" 193 | ] 194 | }, 195 | { 196 | "cell_type": "markdown", 197 | "metadata": {}, 198 | "source": [ 199 | "# Subset the data for the lines and shots (assist/ or other)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "code", 204 | "execution_count": 10, 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [ 208 | "#boolean mask for assists or high xg. Use this to subset data\n", 209 | "mask_line = (df_pass.assist==True)|(df_pass.xg>=0.1)\n", 210 | "df_line = df_pass[mask_line].copy()\n", 211 | "# boolean mask for assists. Use this to subset data\n", 212 | "mask_assist = (mask_line) & (df_pass.assist==True)\n", 213 | "df_assist = df_pass[mask_assist].copy()\n", 214 | "# booelan mask for other passes (no assist/goal). Use this to subset data\n", 215 | "mask_other = (mask_line) & (df_pass.assist==False)\n", 216 | "df_other = df_pass[mask_other].copy()" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "metadata": {}, 222 | "source": [ 223 | "# Plot the data" 224 | ] 225 | }, 226 | { 227 | "cell_type": "code", 228 | "execution_count": 16, 229 | "metadata": {}, 230 | "outputs": [ 231 | { 232 | "data": { 233 | "image/png": "iVBORw0KGgoAAAANSUhEUgAABLQAAAKjCAYAAAAEWFa6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAgAElEQVR4nOzd2W+deZ7f989zzuG+iRS1L6W99q6lq6trusvT45mxYcADuJFGkBtfxIkTIHAucpWbAIH/BF8kjpEM4ARwLnzRdsdwNjvj7umpXmem9lUq7bsokaK4L+c8uTiUVNVdi6pa5OFDvV4Ylkrkofilamaq8Nb393uK517+QRkAAAAAqIhapwcAAAAAgK9C0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUhqdHgAAAADYmr7zylMb+vV+/sv3N/Tr0Tk2tAAAAACoFBtaAAAAwLpa782pjd4Eo/NsaAEAAABQKYIWAAAAAJUiaAEAAABQKYIWAAAAAJVSPPfyD8pODwEAAABf5gfff7XTIwAP0Q9/9NrX/lwbWgAAAABUSqPTAwAAAMBX8btsdWwWd7fNNuP3spln2wib9fvfrHN9HQ9j29KGFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmCFgAAAACVImgBAAAAUCmNTg8AwMP3nVee6vQIW8rPf/l+p0cAAAA+wYYWAAAAAJViQwtgC7NZ9Lux6QYAPMp+8P1XOz0CfC4bWgAAAABUig0tAAAA4J4f/ui1To8AX8qGFgAAAACVImgBAAAAUCmCFgAAAFABRacHYBMRtAAAAIDNb/vupKu/01OwSQhaAAAAwOZW7056B5LxXZ2ehE1C0AIAAAA2t6HhpLcv6R9KBrZ1eho2AUELAAAA2Ny27Ui6e5PuvmTPoU5PwyYgaAEAAACbWC0Z2p50dyc9PcngcDI03umh6DBBCwAAANi8to23Y1ajJ2k0kkZvcuBEp6eiwxqdHgAAAADgc23fk3T1JkWSlEmtSLpHk5HxZPpmp6ejQwQtAAAAYPPavivp7kqKRortu1L09adcXEi5spj85Z91ejo6RNACAAAANqfewWRwJMXASOpPvJAURVKrJ61mcuSpNG9PpTz1eqenpAPcoQUAAABsTnsOJ13dqT/xQopGV4p6I0VRtH9sdKX+9/6zpG5X51EkaAEAAACb0449Kbbvbm9mfY7i8Rc3cCA2C0ELAAAA2Jw++KsUl08ntc/JF/VGim3jGzsTm4KgBQAAAGxOM5MpJ64ky8uf/fGVpZS3PenwUSRoAQAAAJtW+dHr7UvgP0ur1f44jxw3pwEAAAAdNzjQm2+99Hi2jQxkfn4pf/3Gqdy8dSdprqb5L/9J6v/Jf9M+etjVk6wsJa1Wmv/ynyTN1U6PTgdsyqD1g++/2ukRALYE///04fD7CPC7++GPXuv0CMAmVqsV+e53ns7gQF+SZHCwL9955an8+//wehYWllNePZ/V/+G/TXHihRTbxlPevtnezBKzHlmbMmgBAAAAj46Bgb57Meuurq5GxkaHc3lh7Y6s1ZWU7/86ZQfmY/PZ1EHLn+LQCXc3Mfzv39bjny18Mf83snX5Z0sn2XIFHsTqanvTqizLFEVx78e774ff5FJ4AAAAoKMWFpZz+syVFEWRJCmKItdvTOXGxO0OT8Zmtak3tAAAAIBHw5tvn8mtyZl7l8KfOXctpfOFfA5BCwAAANgULl6ayMVLE50egwpw5BAAAACAShG0AAAAAKgUQQsAAACAShG0AAAAgI335LeT/qFOT0FFuRQeAAAA2BiNrhQnXkix+7EUB46lfPy5lLeupbx4Krl4Mmk2Oz0hFSFoAQAAAOuu2PNY6j/4RyknLqW8ej5ZWU7tyNNpdfel2DaePPlSWtcvJOdPJreudnpcNjlBCwAAAFhfja7Uf/CP0vx///eUp9669+7i+HOp/8k/SOv0u0mtntqeQ8nOgynnZ1NeO5dcOJksznVsbDYvQQsAAABYV8WJF9qbWZ+IWUlSnnor5fWLKYZGU96eSMoyZVGm6OtP8diTycETad2eSC6fTa6dT8pWh74DNhtBCwAAAFhXxeiO9jHDz1BePp3iyDNrP0mK8v7flGVSDG9PhrYlR59OOXE9uXYmmb61YbOzOQlaAAAAwLoqpyZSe/aVz/xYse9osryUlOXai+/+bdnuWmklZZGy3kixc28yvivlwkwycSW5filZXtiw74PNo9bpAQAAAICtrfzo9RQ79qc4/tyn3l8cfy7FrgMp725clWXKJEXKFK32j2VZS3kvdrV/LHoGUuw9kjz33eTxbybb9yaFxPEosaEFAAAArK/mapo//B/bTzn81h+3jxnuO5pi14G0Lp9uh6wyKdKOWLnbr4okabU/0Fo7j9gq729zpUyGRpKBoWTf4WTqRvsJifOzHfk22TiCFgAAALDuyqvns/rP/rsUL/1xiu270rp5JeWpt1J09yY9fSm6upOiaL82ZZK78ar9nnwidKXI2tZWLSlX2++r1ZJtO5LhsWRpoR23bt9Kmisb/r2y/gQtAAAAYGOsrqT85f99r0sl9xtVmSTd/cngcNI/mPT2Jd19SXdvip6+pNHV3tAqW0lZpijLlGWz/Ymttfh191dr9CQ79iWjO5P5mWR6Kpmb/sRXo+oELQAAAGBzWJ5PJueTyU+/+16GanQnvYNJX3/S1ZP09ib1rnbsanTdj1tFmTRbSYr267t7k9HxZO5OMjOdrC5t7PfFQydoAQAAQIf84PuvdnqELWUltazUetMqurJa1NMq6mnV6mkWRYoUKcoiRZpptFYysDqfrrvHFakcQQsAAADYErrSSlerleSz781aTZFWamkWtdTK1sYOx0MlaAEAAMAG++GPXuv0CF/Z0FB//ubvfyNdXY2cPnMlb759ptMjfWVHDu/JC88dTdlazc9/+X6uXZ/q9Eh8TbVODwAAAABsfjMz8/nlrz9Ms9nK0SN788JzRzs90ldy7Oj9md98+7SYVXGCFgAAAPBAbkzczi9+9X6azWaOHN6Tb75wPEVRdHqsL/X48f157tkjSZI33vo4Z85e6/BE/K4ELQAAAOCBXb9xOz//5ftZXW3m0GO78r1Xn01vb3enx/pM9XotL7/0eJ55+lCS5PU3TolZW4SgBQAAAHwlNyam89PX3sn8/FK2bx/OH/3B89kxPtLpsT5laLAvf/i953Jg/46srKzmF7/6IGfPX+/0WDwkghYAAADwlU3dns2f/eSN3Ji4nd7e7vz+q8/mxReOpburs8+fq9VqeerJg/njP3whw8MDuXNnPj/+87dy5eqtjs7Fw+UphwAAAMDXsry8mtd+/m6eOHEgj584kMOP7c7e3dvz7vvncv7CjZRluaHz7Nk9lm88cziDg31JkrPnruXtd89mdbW5oXOw/gQtAAAA4Gsry+SDjy7m4uWbeeG5o9m5Y1u++cLxPHHiQE6eupRzF26k1Wqt29cvimTvnu15/MSBjG4bTJJM35nLG2+ezq3JO+v2deksQQsAAAD4nc3OLuQvfvZu9u8bz1NPHMzQUH9eeP5YnnrqsVy+fDOXLt/MxM3ph/b1to0MZP/+HTmwbzz9/b1JksXF5Xx06lJOn7m64dthbCxBCwAAAHhoLq3Fq317x3Pi+L6MjQ7lyOE9OXJ4TxYWlnJjYjq3Ju9kcnIm03fmHvjXHRzozdjYcLaPDWXHjm0ZWjtWmLRj2qnTl3P+wo00m+u3DcbmIWgBAAAAD93lKzdz+crNDA/158D+Hdm/fzyDA3157ODOPHZwZ5Kk2WxlcXE58wtLWVhYyupqM62yTK0oUq/X0tfbk76+7vT19aTRqH/q119cXM6lKzdz6dJEbk3OdOJbpIMELQAAAGDd3JmZz3sfnM97H5zPyMhAtq9tWW0fG87AQO+9ty+zuLicW5MzmZy8097wmpqJU4WPLkELAAAA2BDT03OZnp7LmbNXk6S9hdXXk77eu1tYtRRFkbJMWq1WFhaWsrDQ3uDypEI+SdACAAAAOqLZbGV2diGzswudHoWKqXV6AAAAAAD4KgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUgQtAAAAACpF0AIAAACgUhqdHgBgo/3g+692egQAAAB+Bza0AAAAAKgUG1rAI+OHP3qt0yMAAADwENjQAgAAAKBSBC0AAAAAKkXQAgAAAKBSBC0AAAAAKkXQAgAAAKBSBC0AAAAAKkXQAgAAAKBSGp0eAAC+rkaSodQylnoaKbKaMpNpZiatrHZ6OAAAYN0IWgBUUm+KHEhXiiTLKbOcMvUkO9LIeJKLWcliyg5PCQAArAdHDgGonEaSA+nKasospExz7f3NJAsps5oyB9LlT20AAGCLErQAqJyh1FIkn3uscDVJsfY6AABg6/Ff+gBUzljqWf7N44S1T/8rbTllxlLfwKkAAICNImgBUDmNFPeOGSZJ6vVkcNunXtNcex0AALD1CFoAVM7q2gXw9xVJoysp7v9rrb72OgAAYOsRtAConMk00/3J7ataLamtRa013Sky+ek9LgAAYIsQtAConJm0UiafeIphkdTqSXdPsvb+cu11AADA1iNoAVA5q0kuZiWNFOlLkXqtlqRIvas7fSnSSJGLWfncpyACAADV1vjylwDA5rOYMmeznKHUMlarpaeoZbWrNxNZzUxaYhYAAGxhghYAlbWaZCqtTHXVksGBpEgy1UhWlzs9GgAAsI4cOQSg+mq1pF60n3LYN9DpaQAAgHUmaAFQfUU9qXUl9UYyMNjpaQAAgHXmyCEA1Vert7ezUia9w52eBgAAWGeCFgDV12i0jx0WRdLvyCEAAGx1jhwCUH21elKvt3/s6k56RS0AANjKBC0Aqq/e1Y5Zd48eDm3r9EQAAMA6ErQAqL560T5yWCuSRj0ZHOn0RAAAwDpyhxYA1Vfrah85LNd+LmgBAMCWJmgBUH31taOGRdH+ee9gUjSScrWzcwEAAOvCkUMAqq/RlaJWS1EUSa1I0agnQ7a0AABgqxK0AKi+Wm3tUvhailqtva01PNrpqQAAgHXiyCEA1dfoWjtymCSloAUAAFucoAVA9dUb7Yh1V5EUQ6P37ogHAAC2FkcOAai+teOGqRVJvdb++56+pNHb6ckAAIB1IGgBUH31Rjti3X3SYVFvv29sR6cnAwAA1oGgBUDlFfVGUqvf+7G9pVVPhrd3ejQAAGAduEMLgOprNNrHDVOkSO7fnbVN0AIAgK1I0AKg+uqN9jHDlO2aVSRJkdrQaFqdnQwAAFgHjhwCUH31evvurFq9ffSwVrTv1OruSQaGOz0dAADwkAlaAFRe8clL4WtFUjTWAlctGdvT6fEAAICHTNACoPrqjRR3n3BYa29rtX9e86RDAADYgtyhBUDF1dr3Z/UPptixN8XwaDtkla2Ud6ZSpEz51mudHhIAAHiIBC0Aqq2rK8XB4ymGx5La2mZW0o5cI2OpP/N7KRvdaf7bf560mp2dFQCAe4oiGRrsz7Ztgxka7Eu9UUu9VkutVktZlmk2W2k2W5mbX8zt27OZvjOXVqv88l+YR4KgBUCl1f/uf5piZCxFrf5bHyuKWlJPcuL51P/kH6T5b/504wcEACBJ0tfXk507RrJt22BGRwYzMjKQRuO3/xvu87Rardy5M5+p27OZuj2bm7fuZGZmfh0nZjMTtACorGLv4RTHnvnMmPWp13V1J8efS7HnUMqr5zZkNgAA2nbtHM3RI3uye9doiqL41Mfm5hYzdXs2d+7MZWW1mVazlWarTK1WpF6vpV6vZWioP6Mjgxka6su2bYPZtm0wh9c+/+at6Zw5ey2Xr9y0vfWIEbQAqKzay38rqXc92IsbXam9/LfS/D/+l/UdCgCAdHc3cujgrhw+vDuDA31JkmazlWvXJzM5NZPbt2dz+/ZslldWH/jXrNdr2TYymNFtgxkdHcye3WMZ3z6S8e0jWVw6nHPnrufsuWuZX1har2+LTUTQAqCyimPPpqg92AN7i1otOfaNdZ4IAODRVq/X8sxTh3L40O7U6+3/TpubX8yZs1dz7vz1LC8/eMD6Tc1mK7cm7+TW5J17X+vA/h05emRPto0M5onHD+TxE/tz6fLNvPXOmSwtrTyU74nNSdACoLoa3V/t9V0PuM0FAMBXNr59OC+9eCIDA70pyzJXr03mzNmruXZ9al2+XrPZyrnz13Pu/PWMjQ3l6OE92bd3PAf278jOndvy1ttncvHSxLp8bTpP0AKgulaXk66eB3/9ij+lAwB42Or1Wp55+lCOHdmbJLk9PZu/ev1UpqfnNmyGycmZTE7O5N33z+fF549l967RvPzS49m3dzxvvPWxba0tSNACoLLKj99JHn/xgY4dlq1Wyo/f3oCpAAAeHePbh/PNF49ncKAvrVYrH350MR+evJSy7MwF7QsLS/nZL97Locd25RvPHM6+vdszPj5sW2sLErQAqKzWr/996kefTbofYEtrdSWtX//79R8KAOAR8djBnfnmC8dTFEVHtrK+yLnz13P9xlRefP74vW2tkeH+vPv++U6PxkPyYDfpAsAmVF45m/LUWylXvvhJNuXKUvt1V89tyFwAAFvdsaN789KLJ1IURT46eTH/4SdvbZqYddfCwnJ+9ov38vqbH6fVauXxEwfywnNHOz0WD4mgBUClNf/tP0958q2Uy0spW61PfaxstdrvP/lWmv/2n3doQgCAreXE8X157tkjSZK33j6Td98/37Ejhg/i7Llr+cWvPkiz2cyRw3vy0ovHOz0SD4EjhwBUW6uZ5r/50xR7DqX27b+dHH22/TTDlZWUH7+d1q//XcqrVssBAB6Gw4d259mnD6csy/z1G6dy/sKNTo/0QK5dn8prP38v3/29p/PYwV1ZWWnmrXfOdHosfgeCFgBbQnn1XJo/+p87PQYAwJa1b+/2e0f23njrdGVi1l03b93JL371fr7zytM5dnRvlpZX8uFHFzs9Fl+TI4cAAADAF+rr6753Afw7753L2XPXOj3S13JjYjq//quPUpZlnnriYLaPDXV6JL4mQQsAAAD4Qi8+fzxdXY1cuXorJ09d6vQ4v5MrV2/lo5OXUhRFvvniidTr0kgV+acGAAAAfK7HDu7K7l2jWV5eyRtvftzpcR6KDz66kDt35jI02Jennnys0+PwNQhaAHREUcSfhgEAbHJ9fd157tnDSZI33z6TxaWVDk/0cLRaZf7q9VNptcocP7rX0cMKcik8ABumVqvlwP7xHDm8J2Oj7f9oWF1t5uKliZw+ezXT03MdnhAAgE/65FHDi5cmOj3OQzV1ezYnT13KE48fyDdfPJE/+/EbaTZbnR6LByRoAbAh+vt78t3fezrDQ/1JkrIsUxRFGo16Dh/ancOHdueDjy7k/Q8udHhSAACSZPeu0S131PA3ffDRhezZM5aR4YEcPbInJ09d7vRIPCBnPQBYd93djfyN7zxzL2YlSVEUn3pNWZZ58vGDeeLEgY0eDwCAz3D08J4kyUcnL22Zo4a/qdUq8+5755IkRw7t6ewwfCWCFgDr7vjRfRkc7PvC1xRF0Y5aTxxIT0/XBk0GAMBnGejvza5do2k2Wzl34Xqnx1lX165PZW5uMQMDvdm9a7TT4/CABC0A1lWtVuTQoV0P9NqiKFKr1XL4sd3rPBUAAF/k8OHdKYoily5PZHl5tdPjrLszZ68mSY4ctqVVFYIWAOtqdHQovT3dX+lz9uwZW6dpAAD4MrVaLYcea/+B5Om10LPVnbtwPc1mK7t3jWagv6fT4/AABC0A1lVXo74hnwMAwMOxf994erq7MnV7NlNTs50eZ0MsL6/m0uWbKYoih92lVQmCFgDranW1+ZU/Z+VrfA4AAA/Hgf3jSe4fw3tU3P1+D+zf0eFJeBCCFgDranJqNkvLX+2pONevT63TNAAAfJnRbUNJkhs3bnd4ko01OTWTlZXV9Pf3eEhRBQhaAKyrVquV8+cf7Mk4ZVmm1Spz9ty1dZ4KAIDP0t/XjjlLSyuZX1jq9Dgb7vb0XJJkdNtghyfhywhaAKy7kx9fzvz84he+pizLFEWRk6cuZWFxeYMmAwDgk7athZzbtx+Nu7N+093ve5ugtekJWgCsu6WllfzFz9/L3NxvR62yLJMkRVHk1OnLee+D8xs9HgAAa+5uJk09okHr7vdtQ2vza3R6AAAeDbOzC/n/fvxGHju4M0cO78nwUH+SdtC6dPlmTp+9mlu37nR4SgCAR9s2QStJsm1E0NrsBC0ANszqajOnz1zN6TNX09VVT61Wy8rKalqtstOjAQCQZNvIQJLk9vSjGbRmZxfuXQzf3d3I8vJqp0ficwhaAHTEykozSbPTYwAA8And3e1MsLCw8Xea1uu1PP3kYxkZGcj8/FLee/9cFpe+2tOyH4alpZV0dTXS3SVobWaCFgAAAJCiSGq1WsqyvHfP6Ub6vW8/mV07R+/9fHz7cP7sJ29mdXVj/xC02WwlSer1+oZ+Xb4aQQsAAIBK+cH3X+30CFtaURSb4vd4cLAvf+9Pfq9jX/+P//CFjn1tvpynHAIAAABQKTa0AAAAqIQf/ui1To8AbBI2tAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAACoFEELAAAAgEoRtAAAAAColEanBwBg8yn2Hk7txe+lOPZc0tOXLC2kPPVWWq//JOXVc50eDwAAeMQJWgDcV6un/nf+forHHk/r9Z+k+ZN/nczNJANDqT397dS//1+mPP9Rmv/Pv0hazU5PCwAAPKIcOQTgnvrf+fvJ0EhW//Qfp/Wrf5fMTidlK5mdTutX/y6rf/qPk+HR9usAAAA6RNACIEn7mGHx2ONp/qt/lqwsf/aLVpbT/OE/TXH06dT+5D9P8eRLycC2jR0UAAB45DlyCECSpPbC99J6/SefH7PuWllO6y//Q4rnXk3R358883sp5+4k1y+mdfVscu2C44gAAMC6ErQASJIUx59L88//9QO9tvXuL9P47t9N6+aVFEmK/sHkyFOpH34y5cpKMnktrWsXkytnkrnp9R0cAAB45AhaALT19LUvgH8Q8zNJoytJmbIsk7JIkSRFUjQayc79qe/Ym/LpbyVzd1LevJLy2vnk2qWktL0FAAD8bgQtANqWFpKBofZF8F+mfyhZXUlSS4pWUpYpy6T9l6RI0Y5b9XoyOJIMDKU4cCJZXUk5dSPlxOXkyrlk/s56fkcAAMAWJWgBkCQpT72V2tPfbj/d8EvUnnkl5ex0inotaRUp00pStoNWmfbPy7S3tlKkKGpJLSkbSbFjT4rte5Invply/k5y82rKaxeSiSvtJyoCAAB8CUELgCRJ640/T/3v/RdffjF8V09q3/qjtCavp2i22ptYRS0p1mJWqx2z7m1tFWWK9uJWilqRlLWUtTJplSn6hpIDgyn2H0+aqylvT6ScuNK+WN72FgAA8DkELQCSJOWVsynPf5T6f/Rfpfmv/qfPjlpdPan/x/91yuWlZHFu7Zhh7t2flRQpavWkbKVslUnWjiO217ba8StFiqJIavWUZWttq6uV1OrJ2K7UxnYljz+fzM+ldfNKcuNSe3sr5cb9ZgAAAJuaoAXAPc3/51+k/nf+fhr/8B+n9fqfp/Xer9oXwPcPpfb0t1N76Q9Tzk6nvPhx0jeQpH1EsB2miiSttfuzihT1ot2z7m5srR1JLMtW1m7ZSlEU7U+r1ZJWK8XaHVxlWSa9/akdOJYcaG9vZWoirYlLyfWLyeJ8J357AACATULQAuC+VjPN/+t/S7HnUGov/kEa//C/bz/9cGkh5am30vxX/zTl1fPt13b1JON7UoztTjE6ngyNJj1997ax7l0MX6u1Q9anwlYrZYqkbKUo0v77Wi1JsRa2akla7bCVsn2kcWxXatt3J4+/mMzdSevWteTG5WTqhru3AADgESNoAfBbyqvn0vw//9cvftHKUnL1XMqr5+4fBuzuS3bsTTG6I+XIeIrh0RTdve2NrVotaZUpiyJpNdeOGq7ds5X21lZbLam1krKWomy1P+duJGu12nd19Q+m1n882X8sWVlOOXUz5eTV9tHEJdtbAACw1QlaADw8ywvJ5dMpL59OsraQ1T+UbN+TYnRHipHxZHg0Rb3RPnrYWrtDK+1jhsWn7skqkqIdt4rW2ms+8STFoli7lKvRnWJ8d4rxncmxb6Scu5NyaiK5dSW5ffPerw8AAGwdghYA62t+JpmfSXnx5P1cNbQt2b47xcj2FCPbk76htcvky7t3x+f+JfDtC+TTWruf627carXufnTtr7WkVqYYGE4xMJTsPZRydTWZvply8kYyeS1ZWtiwb3uzaCQZSi1jqaeRIqspM5lmZtLKaqeHAwCAr0nQAmDjzdxOZm5n7SGJSYpkeCwZ25lieHvKodFkYLB9VPGTC1a1WvvHci1ulWX7aYpFe3Or/aDFov2L1mopGl3J9l3J6M7kyNPJwmzK2zeTW9eS6VvJFn9yYm+KHEhXiiTLKbOcMvUkO9LIeJKLWcniFv89AABgaxK0ANgEyuTOreTOrft5pagl23ak3LY9xfBYMjSSorcduVLU2p+TVopa+whiO46VSau8v+FVFEmrltD6VpIAACAASURBVKJotT/aN5iitz/ZfTBprqa8M5lMTSST15PlxY585+ulkeRAurKa8lObWM0kCynvffxslm1qAQBQOYIWAJtT2UqmridT1+9FrrLeSEbGk23bUwyNpRjalnT3pv0kxLQvkF87tth+dmLZPo1YFu1tr5RJWbTDV63evtNrZDw59OT97a3bE2vbW9U2lFqK5HNj1WqSrrXXTcVTIgEAqBZBC4DqaK6278KavHb/uGJXT7JtPBkaTTG0LcXAcNLoWrs7vrx3KXzRPo+49j93k9cn/tLbn2L3wWT3gWS12d7euj3R3uBaXe7AN/u7GUs9y19ynHA5ZcZSF7QAAKgcQQuAaltZSiYuJxOX70eunr72nVxDoymGRpL+oZS1RtpbW2sXyBdlinLtKOLdM4t3A1CtlmLb2ibYY0+knJ9JOb0WuObupAp3bzVSfGnQaibpWTugCQAAVSJoAbD1LC18KnIlSfoGkuGxlINrW1x9Qylqd48iFklRri1z3d/quvvUxfQNpugbSHbtT1ZWUs5OJXcm20cTV1c68R1+qdW1C+CbX/Ca+trrAACgagQtAB4NC3Ptt+sX79/J1T+cDG1LBoZTDI60L4xP+8qtdtS6H7baiqSrq3331vBYsvdIysW5ZGYquTOVzM9s8Df1+SbTzI40svAFwao7RSZcCQ8AQAUJWgA8uubvtN+y9pTEopYMDif9Q8nASIr+wbVL59O+pL59w/z9zy9q7QjW05+M7025uprMTScz0+3I1ezc9tZMWhlP+1/0n5Ws2gcw268DAICqEbQA4K6ylczcbr9lbZOrqLcj18DQ/aOHXT33t7aKtcjVSop6LRkaTTm4Ldl1oH30cW46mZ1OFmY39FtZTXIxKzmQrnSlfQF8M+1jht1r1+JfzIr9LAAAKknQAoAvUjbb21YzU+2fJkmt0Y5cfQPtba7e/hT1Rj61wVUrkt6+pKc3GdvVfkLj3Ew7cM1NJ80vut3q4VhMmbNZzlBqGUs9PSmymjITWc1MWmIWAACVJWgBwFfVWm1fCn9n8t67ykZXO271DSa9/e2QVdTXPlgmtVoyONIOYeWBZGn+fuBaWli3UVeTTKWVKUcLAQDYQgQtAHgYVld+K3KlqyfpHVh76026++4fUezuTbp6k23b29ta8zPJ/Gz7x/LL41Mjubd51VjbvJpM0+YVAACPBEELANbLylL7beaTkas36etvx62evqSru729NTDcfitbyeJCsjSXzM22P/839KbIgXSlSPturOWUqSfZkUbG074ba/ELnm4IAABVJ2gBwEZaWWy/fVJ3T/tJiT19SXd3+7hid08yNJasrraPJy7MJYvzaZStHEhXVlN+ahOrmWQhZRpJDqQrZ7NsUwsAgC1L0AKATltear+tXTyf1JKenrVjiT3tuNXbn5RlhuYXUiwtZXVl8TMvll9N0pX2cUT3ZgEAsFUJWgCw6bTaF8V/8rL4opZ092Ss6Mlyo5H0bGtfTr8w197i+oTllBlLXdACAGDLErTgc/zg+692egSA3zJw+U6a3bWkKFKmSJIUv3lfVlmmvtzK8/uGOzAhAACsv1qnBwAAHlxZL5JWO2C1k9ZnXP7eKtuvAwCALcqGFvyGH/7otU6PQIX09/fkD7/3fHp6uvLxmSt56+0znR6JLay/ryd/+5tPZniplUs3JnPq9JXPfF1fikxk1ZFDAAC2LBtaAL+D+fml/OJX76fZbOXYkb05enhPp0dii2o06vnOK0+lMdqfqTtzOXfms2NWI0mZZEbMAgBgCxO0AH5HtyZn8tdvnEqSPPeNI9m9a7TDE7HVFEWRl196PCMjA5meX8yPT59PvSzSlyL1tdfU097MaqTIxaxk9Yt+QQAAqDhBC+AhuHhpIh98eCFFUeSVl58UtXhoiqLIt7/1ePbsHsvS0kp+/ov3M7u6mrNZzkRWU0/Svxa2JtJ+/+Jn3asFAABbiKAF8JC8/+GFnD5zJfV6La+8/GT27B7r9EhUXK1W5JWXn8i+veNZXl7Ja794L3Pzi0mS1SRTaeV0VvJRlnM6K5lKy2YWAACPBEEL4CF68+0zOXX68lrUeiJ792zv9EhUVDtmPZm9e7ZnaXklP/3Zu7l9e7bTYwEAwKYgaAE8ZG+/czYnT11KrVbLt7/1ePbtFbX4amq1Wr7z7afuHTP8i9fezfT0XKfHAgCATUPQAlgH77x3Lh+dvJharZaXX3oi+/eNd3okKqJer+W7rzyVXbtGs7i4nJ++9k6m74hZAADwSY1ODwCwVb37/vm0yjJPPn4wL7/0eIqiyMVLE50ei02sXq/lO688lZ07tmVhcTl/8do7mZld6PRYAACw6QhaAOvo/Q8upGyVeerJx/Ktb55IURS5cPFGp8diE2o06vnuK09lfHwkCwtL+enP3s2smAUAAJ9J0AJYZx98dDGtsswzTx3KSy8eT6NRy5mz1zo9FptId3cj3/n2U9m+fTjz80v56c/eydzcYqfHAgCATUvQAtgAH528lLIs8+zTh/PCc8eybWQwb759Oq1W2enR6LDRbYN55eUn0t/fm7n5xfzFa+9kbn6p02MBAMCmJmgBbJCTpy5ncXElLz5/NIcP7c7IyEB++esPs7AgXjyqDj22K89/42jq9VpuTd7JL3/9YRYXlzs9FgAAbHqecgiwgS5cvJEf//TtzM0tZmx0KH/0B89n546RTo/FBqvVirz4/LF884XjqddrOX3mSv78L94RswAA4AEJWgAbbHp6Ln/2kzdz7fpkenq68up3nsmJ4/s7PRYbpK+vJ9/7G9/I4UO702w285d/fTJvvn0mZen4KQAAPChBC6ADVlZW87NfvJ8PPryQoijy7NOH8srLT6TRqHd6NNbRjvGR/NEfPJ+x0aHMzS3mxz9921MvAQDga3CHFkAHvf/hhUzdns1L3zyRfXvHMzzUn1/86oPMzC50ejQeshPH9+WZpw6lKIpcuz6ZX//VyaysrHZ6LAAAqCQbWgAddvXaZH78kzczPT2XoaH+/M3vPZfHDu7s9Fg8JD09XXnl5Sfz7NOHUxRFPvjwQn72i/fFLAAA+B3Y0ALYBGbnFvPjn76VF58/loMHdualF0/k4IGdefOt07a1Kuzwod155ulD6e5qZGVlNX/51ydz9dpkp8cCAIDKE7QANolms5W//OuTuX7jdr7xzOHs3LEtf/Q3X8jJU5fy4cmLabVcGl4VI8MDeeH5o9k+NpwkuXZtMm+8fTrz80sdngwAALYGQQtgk7lw8UauXZvMM08fyuFDu/PkEwdzYP+OvPHW6dyYuN3p8fgC9XotTz1xMMeO7kutVmRhYSlvvXMml6/c6vRoAACwpQhaAJvQ8spqXn/z45y/cCMvPH80I8MD+RvffSYXLt7I2++ezdLSSqdH5Dfs2T2W579xJP39vSnLMh+fvpL3Pjif1dVmp0cDAIAtR9AC2MRuTd7Jn/34zRw/ti9PPn4gBw/szO7dY3n3vXM5e+5ap8cjSV9fT57/xpHs3bM9STJ1ezavv/lxbt+e7fBkAACwdQlaAJtcWZY5eepSLl2eyAvfOJrdu8fy4vPH8tjBnXnv/fOZuDnd6REfSY1GPUeP7MkTJw6k0ahnZWU1731wPqfPXO30aAAAsOUJWgAVMT+/lJ/98v3s27s9zz17JNvHhvP7rz6biZvT+eDDC8LWBrkbsk4c25fu7q4kyaXLN/PWO2eyuLjc4ekAAODRIGgBVMzlK7dy/cbtHDuyN8eP7c2O8ZHsePXZ3Lw5nfeFrXXTaNTv/Z7fDVl+zwEAoDMELYAKWl1t5sOTF/PxmSv3toXGx0fy+68+m8mpmZz6+HIuX7mZsuz0pNXX19edY0f25vCh3enqav9r01YcAAB0lqAFUGGrq818dPJSTp+5mqNH9uT40X0ZGx3Kt7/1RObmF/Pxx1dy7sJ1T9r7GkZGBnL82L4c2DeeWq2WRMgCAIDNQtAC2ALuhq2PT1/JwQM7c+LYvgwO9uW5bxzJk08czKXLE7l4aSI3b93p9KibWndXI/v2bs+BAzuzY3wkSftS/ouXJnLq48uZ8uRCAADYFAQtgC2k2Wzl7LlrOXvuWvbuGcvxY/syvn0kRw7vyZHDezK/sJRLl2/m4qWJ3BZnkrTvxtqzeywH9u/Irp3b7m1jra42c/b8tXx8+krm55c6PCUAAPBJghbAFnXl6mSuXJ3MyMhADuwbz/79OzLQ35sTx/blxLF9mZ1dyMXLE7l46WZmZuY7Pe6GqtVq2b17NAf27cie3aOp1+tJklarzPXrU7l4eSKXr9xyVBMAADYpQQtgi5uensv09Fzeff98xkaHcmD/juzfN57Bwb48+fjBPPn4wUxPz+Xi5Ylcunwzc3OLnR55XdRqRXaMb8uB/Tuyd8/YvQvek/bdWJcuT+Ty5VtZWl7p4JQAAMCDELQAHiGTUzOZnJrJW++cyY7xkRzYvyP79m7PyMhARkYG8sxTh7KwsJRbkzOZnLyTW5MzuT09m1areo9L7O3pytjYcLaPDWX72HC2bRtMvV679/GpqZlcvHwzly5PZGFhuYOTAgAAX5WgBfCImrg5nYmb03njrdPZtbO9ubR712j6+nqyf19P9u8bT9K+l2vq9uy9wHVr8k6WljbXFlNRJCPDA58KWAMDvb/1utvTc7l85WYuXZrI7BbdRAMAgEeBoAXwiCvLMteuT+Xa9akkydBg3yfC0FCGhwcyvn0449uH733O3NxiJqdmMj+/mPmF5SwsLK29La/bkb1arUhfb3f6+nruvfX3dWdoqD9jo0NpNOqfev3Kymomp2bubZtNTs1kZcWdWAAAsBUIWgB8yszsQmZmF3L+wvUkSVdXPWOj9zefRkcHMzDQ+5kbUEl7o2thYSkLi8uZXwtdiwvLWW22UpattFplWmWZslWmLMvUarUURZFarbj3Y3d3I/2fCFd9vd3p7e3+wrlnZxfubZBNTs5k+s7cQ/+9AQAANgdBC4AvtLLSzPUbU7l+o73BVRTJ8PBARoYH1qJT99q2VPvvu7u7MjjYl8HBvoc6R6tVZmGxvQX2yY2wufnFTE7OuMwdAAAeIYIWAF9JWd5/cuJnqddr944DfnLDqlarfWoLqyjab2VZtre2Wq3235dlVlaaWVhYurfhtbCwnMVFF7cDAABtghYAD1Wz2crs7EJmZxc6PQoAALBF1b78JQAAAACweQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAAFSKoAUAAABApQhaAAAAwP/f3n01x3me+b7+dyMnIjAAYA4KFEVFW5bGo/Ga8VqrdtU+mip/zqma89mTPZYVrcgo5gCCAQCJDHTYBwChYEkUKYBvv8R1VbFkAE30DVMSwZ/u52koFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAoFUELAAAAgFIRtAAAAAAolfaiBwAAAACeTb9958RTfb4//fnUU30+imNDCwAAAIBSsaEFAAAAbKmt3px62ptgFM+GFgAAAAClImgBAAAAUCqCFgAAAAClImgBAAAAUCouhQcAAKAU/vCP7xY9Ak/oaf3a+XukXP7pn//4xD/XhhYAAAAApWJDCwAAgFL5JVsdreLhJlErfi2tPNvT0Kpff6vO9SQ2Y5POhhYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApSJoAQAAAFAqghYAAAAApdJe9AAAAABA6/nDP75b9AhrGs1UavW0rTaSZjOpVFLvqKbZ3pZUK0VPR0EELQAAAKA11RtpX1pNmpU0q0naqkmjmbaVRrJaT627Y+19bDuCFgAAALDhn/75j0WPkCSpJNmRahpJGj/w8er6jwdppPlUJ6MVyJgAAABAy+lIJZX8cMzK+vsr649j+xG0AAAAgJbTnaT2iMfU1h/H9iNoAQAAAC2nmsojjxI21x/H9iNoAQAAAC2nkeYjU1Vl/XFsP4IWAAAA0HKW8uhXsmtffxzbj6AFAAAAtJzVNNePFP6wataOHK7a0NqWBC0AAACg5TSTzKWRapKOZOP4YWX97er6x+Ws7elR23sAAAAAhagneZBGOlJJd9YugG+kmYV8s8HF9iRoAQAAAC2rmWQlzaxsvAWOHAIAAABQMoIWAAAAAKUiaAEAAABQKoIWAAAAAKUiaAEAAABQKoIWAAAAAKUiaAEAAABQKu1FDwAAAABQrVby4vP7MzjYl4WF5Zw+ey2rq7Wix6JFCVoAAABA4d5+63j2ju/ceHvP7qH8+399lnq9UeBUtCpHDgEAAIBC9fV2Ze/4zjSbzY33DQ72Zc/uoQKnopUJWgAAAEChKtW1PFGpVL73/soPPRwELQAAAKBY8/OLmZqe/c77FheXc/fu/YImotW5QwsAAAAoVLOZ/M97X+X1V49tXAr/2RcXs7LiUnh+mKAFAAAAFG5lpZYPPjpb9BiUhCOHAAAAAJSKoAUAAABAqQhaAAAAAJSKoAUAAABAqQhaAAAAAJSKoAUAAABAqQhaAAAAAJSKoAUAAABAqbQXPQAAAABAESpJOlJJd5JqKmmkmaUkq2mmWfBs/DRBCwAAANh22pL0p5pKklqSepJKKulN0kwlc2mkXuiE/BRHDgEAAIBtpZK1mNVIsppsbGM1199u5JvYRWuyoQUAAABsG5UkfalmR5L6+jHD5Xw3bDWytsHVkUpWHD5sSTa0AAAAgG2hLcmOVDOUSlZSWd/GqqQnlQykkrZvPbaWpLuYMfkZBC0AAADgmfftY4YPfyTfPWbYl8rGMcNm4tBhC3PkEIAt197els7O9lQrlVSqlVQqlVQrlaRSSbPZTLPRTLPZTKPZTL3eyPLyappNq90AAGyejvVY9TBmVZLvHCb85phhsrL+8Ybjhi1L0ALgiVWr1fT1daenuyPd3V3p7u5Id3dnero7093Vme6etb+2t7c9+pN9z/LyahaXVrL0vR8P37e4uJzFpZUt+KoAAHgWdWftGGGSLCfpydpm1rfVknRlLWi1J1l4atPxuAQtAH6WarWawcHeDA/1Z2ioP8ND/dkx0Jdq9dFr2LVaPcsrq2k21rawHv41SdYWtSob21vtbW3p6urY+JHBvh/9vMvLq5mZmcv0zFym789lZmYuCwvLm/Y1AwBstT/847tFj7BttM8tp9m+fvNSo5m2xdW1b0a/8/1sM5V6M7WejlSazdR6Or/3cVqFoAXAX6lUKhka7Mvw8E/Hq2azmdm5xSwuLn+zRbW8+p1NqqWlldRq9ceeobtrbdvr2z96vvW/+3q709XVkdHR4YyODm/8vO9HrumpWZtcAACsxatGcy1QVSup93SkbXE1lXozzYff5zabSb25FrO6O8SsFiZoAZBk7Z6rsdHhjI+NZGx0JJ2d3/0totls5v6D+Y1YNDMzl5n786nXGz/yGX+ZpeXVLC2vJvfnf/QxPT1dGV4Pbg/D2w9Frpn7c5mYmMrNW1OZmZnbknkBAB7HP/3zH4se4YmcOH4wLx0/mEajkfc/PJubE/eKHuln2z3Ql79784W093RmYmIqn3x2IZWshZHuVFLN2ivnzaSZebdntTxBC2Ab6+3tyvjYSPaO7cyuXTtSrX7z4rezswuZmp59KvHqSS0uLmdxcfk730h9P3LtHBnI0GB/hgb789Lxg1lcXM7EralM3JrK7Tv302i01tcEANDKTp25mmpbNS8+vz9vv3U8H3x0Njdu3i16rEcaHu7PO++8nI5aI5MTU/nL5xeSfPMKh6tpbgQtMascBC2AbWZoqD/7xndmfGwkg9+6n6rRaObO3fuZmLiXiVtTmZtfKnDKJ/f9yFWtVrJ712DGx0YyPrYzvb1dOXpkPEePjKdWq+f2nZncnJjKzYl7WV2tPeKzAwDw5VeX02w0c/zFA3n7rRdz+mxvTp+5WvRYP+rggT158/VjaWtry5Xbd3Lm8wtpb65dAN9MNra0mknmxKzSELQAtoG2tmoO7N+do0fGMzzUv/H+1dVabk1OZ+LWVG5NTj+TQafRaGby9kwmb8/k088vZnCwL3vHRjI+vjPDQ/3ZO74ze8d3pl4/mmvX7+bCpQnHEgEAHuGr01eyWqvl5InDOXH8YIaH+vPhx2ezuvr4d6dulUqlktdeOZJjR/cmSS5dvpW/fHYhaTbTkUq6k1RTSSPNLGRtS0vMKg9BC+AZ1t/fk6OHx3Lo0Gg6O9b+lb+8vJrrN+7k5sRU7ty9n2Zze/22ff/+fO7fn8/ps9fS092ZsbGR7N+7K3v2DOXwodEcPjSaqenZXLw0kWvX7zqSCADwI86dv5GZmfn85q0XMz42kv/z+zfz2ecXW+JerZHhgbzx+nMZGuxLvd7Ip59fyOUrkxsfX0kzay8btL2+F36WCFoAz5hKJRkf25mjR8Yyuuebi9HvTT3IxUsTuX7jbhoNv3EnyeLSSi5dvpVLl2+lv687R46M5/DBPRkZHsjI8EBePXkkl69M5uLlW5kv6RFMAICtdPvOTP7tPz7Nb379YnaO7MjfvP1SJm5N5dPPL2RhYfmpz9PR0Z6TJw7l6JHxJMn8/FLe//BMpm3gP3MELYBnREdHW44d3Zsjh8fS29OVJKnV6rl2/U4uXprIzE+8WiDJ3PxSvvjyUk6dvpL9+3bl2JHxDA8P5IXn9+eF5/fn1uR0vr5wI5O3Z4oeFQCgpSwsLOc//uvzHD0ynpMnDmV8bCSje4Zy+cpkzp6//lTCVmdne44d3Zvnj+1NR0d7Go1Gzp2/kTPnrrXcCxuxOQQtgJJra6vm2NHxvPj8gXR2rv1rfXZ2IRcv3cqVa5MtdY9BGdTrjVy5ejtXrt7O8FB/jh4Zz4H9uzI2Opyx0eHcuXs/X566nKmp2aJHBQBoKRcvTeTmxL288vLhjftbjxwey42b93L12u1M3p7e9JMCu3buyMEDe3Jg/+60t7clSW7fnsmnX1zM7OzCpj4XrUXQAiipSiU5fHA0Lx0/mJ71jazbd2Zy9ty13L5zv+Dpng3TM3P5+C/n88WXl3L48GheeH5/du8azD/87rXcnLiXL09d8Y0SAMC3LC2t5MOPz+XMuWt58fkDObB/V/bvW/uxslLLzYl7mbw9nXtTD7K4uPLYn7+zsz07R3Zk9+7B7N+7a+P74CS5dWsqZ85fz717DzbzS6JFCVoAJbRv7868/NKhDAz0JlkLL19+dTm37zgOtxVWVms5d/5GLl66sd7kzAAAFw5JREFUlRee35fnj+3L3vGdGR8byZWrt3P6zNUsLD79OyIAAFrV7OxiPvrkXL48dXl9g2pXhgb7N16EJ1k7qjhzfy4LC8tZXFzOwuJyarV6ms1mKpVK2tqq6enpSk9PV3p7OjO4o2/j+9+H5ueXcu36nVy9djuzc4tFfKkURNACKJHduwZz8uXDGRkeSJLMzS3mq9NXcv3G3YIn2x5qtXpOnb6aCxcn8tKLB3Lk8FgOHxrNgf27c/HSRM6cu5aVlVrRYwIAtIylpZWcO389585fz8BAT/aN78rOnQMZGdmR3t6u9PZ2PfqTfEutVs/0zFzuTT3IxMRUpqZdA7FdCVoAJdDX2503XjuW0dG1Vy1cXFrJmTNXc+nKZJpNr1j4tC0vr+bTzy/m/Nc3c+Klgzl4YE+ef25fDh8azemz13L+6xtFjwgA0HJmZxdzZvbaxtsDA73ZMdCb3p7OjU2strZqqpVKms1m6o1GFpdWsri4nMXFlczOLeb+/Xnf/5JE0AJoeceOjufkicNpb2/L6motZ89fz9cXbnq1lhYwv7CUDz8+l3Nf38jJE4cyNjqSV08eyb69O/PxJ+etvQMA/ITZ2QX3kfLEBC2AFtXX25VfvflCdu8aTJJcu34nn35+wZG2FnT//nz+571TGRsdzhuvP5edIzvyv//hjZw6fSXnbGsBAMCmE7QAWtCxI+M5+fLaVtbS0kr+8tmF3Jy4V/RYPMKtyen8f//2SV49eTSHD43mlZNHste2FgAAbDpBC6CF2Moqv9XVej7+y/ncuHk3b35rW+ur01fcrQUAAJtE0AJoEbayni23JqfzL9/a1nK3FgAAbB5BC6Bg7e1teetXL2Tv+M4ktrKeJT+0rfX7v389H31yLjduipUAAPCkqkUPALCd9fV25+9/92r2ju/Myspq3nv/dD746KyY9Yx5uK119drttLe35Z3fvJQTxw8WPRYAAJSWDS2AguzeNZi3f3M8XZ0defBgIX96/1Tm55eKHostsrpaz4cfn8v0zFxePXkkLx0/mB07+vLRJ+dSq9WLHg8AAEpF0AIowLEj43n1laOpViuZuDWVDz46K2psE19fuJkHDxby9lvHs2/vzvT3vZo/vX8qCwvLRY8GAACl4cghwFNUqVTy5uvP5fXXjqVareTMuWv5059PiVnbzO07M/n3//w0s7MLGRzsy+//1+sbr2wJAAA8mqAF8JR0dXbkd397MkcOj6Ver+f9D8/kq1NXih6LgszNL+Xf/vOzTNyaSldXR9797ckcPTJW9FgAAFAKghbAUzDQ35N/+PvXsmvXYBYWl/Mf//1Frt+4W/RYFKxWq+dPfz6Vs+eup1qt5I3Xnsvrrx4teiwAAGh57tAC2GI7dvTm7/72ZLq7OnNv6kH+/P7pLC2vFj0WLeTLU5dz/8F8fvXG8zl2dG/a2tvy8Sfnix4LAABalqAFsIWGBvvy7t+eTFdnR25NTue990+n0WgUPRYt6Nr1O1laWslv3zmRwwdH01at5sOPz6XZbBY9GgAAtBxHDgG2yMjwQH737ivp6uzIzYl7ee/9U2IWP+nO3fv545++zOpqLQf2787bbx1PtVopeiwAAGg5ghbAFhge7s+7v305HR3tuX7jbv78wZk0GjZteLR7U7P57//5MisrtezbuzNvv3U8lYqoBQAA3yZoAWyyocG+vPvbk+noaM+163fywUdnHBvjsUzPzOW//ueLrKysZu/4zvzm1y9E0wIAgG8IWgCbaMdAb9797cl0drTnxs27+fDjs9GyeBL378/nv//0VVZXa9m/b3d+9cYLRY8EAAAtQ9AC2CT9fd35u789ma6ujkzcmsr7H4pZ/DIzM3P543tfpVar59DBPXnz9eeKHgkAAFqCoAWwCTo62vLbd06ku7szk7en8+cPTjtmyKaYmprN/6xHrSOHx/LCc/uKHgkAAAonaAFsgt/8+ngGBnozc38+771/2gXwbKq79x7kw4/PJUlOvnw4Y6PDBU8EAADFErQAfqFX1gPD8vJq3nv/VOr1RtEj8Qy6OXEvX52+kkqlkt/8+sUM9PcUPRIAABRG0AL4BQ4e2JMXnt+fRqORP39wOgsLy0WPxDPszNlruX7jbjo62vM375xIR0db0SMBAEAhBC2AJzQyPLBxSfenn1/M3XsPCp6I7eCjT85l5v5cBvp78vZbx1OpFD0RAAA8fYIWwBPo7u7MO2+/lLa2ai5cvJlLl28VPRLbRL3eyHt/Pp2l5ZWM7hnOKy8fKXokAAB46gQtgMdUrVbzN2+/lJ7uzty+M5PPvrhU9EhsMwuLy/nzB2fSaDTy/HP7cujgnqJHAgCAp0rQAnhMb75+LCPDA5mfX8r7H55Js+kVDXn67t17kL98diFJ8sZrz2V4qL/giQAA4OkRtAAew97xnTl0cDS1Wj1/ev9UVlZqRY/ENnb5ymQuXLyZtrZq3vrVC6lW/bYOAMD24DtfgJ+ps7M9b7x2LEnyxVeX8+DBQsETQfL5l5fyYHYhAwO9OfHSwaLHAQCAp0LQAviZXn/1WLrX7826eGmi6HEgSdJoNPPxJ+fTbDbzwnP7MjI8UPRIAACw5QQtgJ9h7/jOHNi/O7VaPZ/85XzR48B3TE3P5tzXN1KpVPLrN5939BAAgGee73gBHuH7Rw3nF5YLngj+2qnTVxw9BABg2xC0AB7BUUPKoNFo5qNPzjl6CADAtiBoAfyEbx81/NhRQ1rc9PRczp139BAAgGef73QBfsR3jxpeyoKjhpTAqTOOHgIA8OwTtAB+xInjh7511PBW0ePAz/Lto4fPH9uX/v6eokcCAIBNJ2gB/IC+vu4cOTyaZrOZTz+/UPQ48Fimp+dy+cpkqtVKXn7pUNHjAADAphO0AH7Ayy8dSrVazeWrk5mdXSx6HHhsp85cTb1ez/59uzI81F/0OAAAsKkELYDvGRrsy4H9u1OvN3L6zNWix4EnsrS0kq8v3EySnHz5cLHDAADAJhO0AL7n5ROHkyQXLt7M4uJKscPAL3D2/PWsrNSyZ/dQ9uweKnocAADYNIIWwLfs3jWYsdHhrK7Wcvbc9aLHgV9kdbWes+evJbGlBQDAs0XQAviWk+vbWWfPX8/Kaq3YYWATXLg4kcXF5QwP9Wf/vl1FjwMAAJtC0AJYt3d8Z0ZGBrL4rbuHoOy+fRfcyy8dSqVSKXgiAAD45QQtgCSVSnLyxKEkyZkzV1OvNwqeCDbP2qt1LqS/vyeHD40WPQ4AAPxighZAkvGxkQwM9GZufjGXrkwWPQ5sqmYz+er02pbWC8/tK3gaAAD45dqLHgCgFRw9Mp5k7b6hZrNZ8DTl155kINWMpC3tqaSWZqZSz2wacTNZMW7cvJv5haX09/dkdM9QJm/PFD0SAAA8MRtawLbX39ed0T3DqdXquXLVdtYv1Z1KjqQzu9OeepKFNFNPsjvtOZLOdMcdTkW5dOlWkm8CLgAAlJWgBWx7D/9wf+36nayu1gueptzakxxIR2ppZnE9ZCVJPclimqmlmQPpsB5ckMtXJtNoNDI+NpLenq6ixwEAgCcmaAHbWltbNYfWL8m+eGmi4GnKbyDVVJIfPVZYS1JZfxxP3/LKaq7fuJtKpZIjh8eKHgcAAJ6YP1EA29qB/bvT2dGee1MPMnN/vuhxSm8kbVnJT99BtpJmRtL2lCbi+x6G28OHRlOtOv4JAEA5CVrAtvbwuKHtrM3RnkoedWizvv44inFvajYz9+fT3d2ZveM7ix4HAACeiKAFbFvDw/0ZHurP8vLaMSx+uVqaj9y9alt/HMV5GHCPuRweAICSErSAbevhH+bXLsoWWDbDVOrpfMT2VWcqmXrkHhdb6eq121ldrWXXrsHs2NFb9DgAAPDYBC1gW2prq2b/vl1JkkuXHTfcLLNppJn86KsYtidprj+O4tTrjVy9djtJcvjgaMHTAADA4xO0gG1pdM9Q2tracm/qQeYXlose55lRS3Itq2lPJT2pbBw/bEvSk0raU8m1rP7oqyDy9Fy7vnbMdnxspOBJAADg8QlawLY0PrZ2GfbEramCJ3n2LKWZS1nJndTSlqR3PWzdSS2XspIl92e1hHtTD7K8vJr+/p4MDPQUPQ4AADwWQQvYlsbGhpMkExOC1laoJZlOIxeymrNZyYWsZjoNm1ktZmJy7e9/W1oAAJSNoAVsOyPDA+nu6sz8/FIezC4UPQ4U5mHQfbixCAAAZSFoAdvO+PjaNorjhmx3k7enU683snNkIF2dHUWPAwAAP5ugBWw7e9ePV92cuFfwJFCser2RO3dnUqlUNo7hAgBAGQhawLbS19udHTv6srJay917D4oeBwr3zbFD92gBAFAeghawrTw8bjg5OZ1m06vtwcOjt6N7hlOtVgqeBgAAfh5BC9hWxh03hO9YXFrJ9Mxc2tvbsmf3UNHjAADAzyJoAdtGpbL2CodJcvv2TMHTQOuYnJxOkuwc2VHwJAAA8PMIWsC2MTDQm/b2tszPL2VltVb0ONAypmfmkiTDQ/0FTwIAAD+PoAVsGw//sD49M1vwJNBaHgatIUELAICSELSAbeOboDVX8CTQWhYXl7O8vJquro709nYVPQ4AADySoAVsGw+3T2YELfgrtrQAACgTQQvYFiqVZHBHX5Jkema+4Gmg9TwMvcODghYAAK1P0AK2hW9fCL/qQnj4Ky6GBwCgTAQtYFtwITz8NEcOAQAoE0EL2BZcCA8/zcXwAACUiaAFbAtDgy6Eh0fZ2NJyjxYAAC1O0AK2hYcbJ7NziwVPAq1rbv2fjz4bWgAAtDhBC9gWurs7kyRLS6sFTwKta3FpOck3/7wAAECrErSAZ153V0cqlUqWllfSbDaLHgda1sPgK2gBANDq2oseAGCr2c6Cn2dpaSWJoAUAPF2dne0Z6O9JW1s1bW1tqVYraTSaqdcbqTcaWVhYyuLiStFj0mIELeCZ903QWi54Emhti+tBq0fQAgC2SGdne4aH+jM01L/x177e7kf+vKXllcxMz2X6/lxmZuYyPT238b0L25OgBTzzbGjBz2NDCwDYCm1t1Rw8sCdHj4z94Ksp12r1PJhdSK1WT73eSKPRSLVaTbVaSXtbWwYGetLd1ZmxsZGMjY1s/Lz5+aVcvHwrV65MZnnF9/rbjaAFPPN6NoKW/4IDP2V1tZZ6vZGOjva0tVVTrzeKHgkAKLGBgZ4cPTKeQwf2pKNjLT/UavXM3J/LzMx8pmfmMj0zm9nZR78SeV9v13e2uoaH+tPX151XXj6cE8cP5sbNu7lwaSJTU7Nb/WXRIgQt4Jn3cNvESjI82tLSSvr6utPd3Zn5+aWixwEASmjP7qG8+ML+7Nk9tPG+u3fv58KlidycuJdG4/FfqGl+YTnzC8u5cfPexvtG9wzl6JHxjI+N5OCBPTl4YE9m7s/n/NfXc/XanU35WmhdghbwzOu2odVSKpVK9u3dmZGRgbRVq1laXs3163cyO/fo/zLH1nsYtHoELQDgMbW3t+XVV47kyKGxJGvbWFev3c6FSxN58GBh059v8vZMJm/PpLe3K0cOjeXw4dEMDfblrV+9mAP79+STT8+7TP4ZJmgBzzxBq3UcOzKeF1888FeXjp84fjC378zkk0+/FlEKtrTsHi0A4PGN7hnKm68/n97ertTrjZw+ezUXLk6kVqtv+XMvLCznq9NXcvrs1RzYvyevnDycsdHh/N/fv5nPv7yUy1cmt3wGnj5BC3jmtbe1JUlWa7WCJ9neXj5xKMdfOPCDH2s2m9mzeyj/8LvX8p///bltrQKtrq5909m2/s8NAMBP+f5W1tTUbD765Fwh3881Gs1cuTqZW5NTefP157J3fGd+9cbz2b93Vz7+9OssLnrV82dJtegBALZapVpJkjSf4Kw+m2Pv+EiOv3AgzeYP/xpUKmu/Rl1dHfmbd048zdH4noe/RtX1f24AAH7MQH9P/s/v38iRQ2Op1xv54stL+Y///qzw/zi5vLya994/nQ8+OpvlldWMjg7n//7+jezZPVjoXGwuQQt45lXXY8mPxRS23nPH9iX5Jlz9lIH+noyNDm/1SPyIh5e0/pxfKwBg+xoa7Mv/+rtX09fbnanp2fzrv/8l576+kVb6lvva9Tv5l3/9JDcn7qWjoz2/fefl7B3fWfRYbBJBC3jmPfyDuQWtYvT3dWf3rsHHCopHDo9t4UT8lI0NLUELAPgRIyMD+d27r6SrqyMTt6byX3/8ovCtrB/zcFvr6ws309ZWzTu/OZ4D+3cXPRaboKXv0PrDP75b9AjAM+T//X/eKnqEbe1xNn72ju/0e0DBXnv1aF579WjRYwAALWZwsC9/+zcvp6OjPdeu38mHH58rxUmIz764mNXVWl46fjC/fvOF1Gr1TNyaKnosfgEbWgAAAMAj9fd1593fvpzOjvZcv3E3H3x0thQx66FTZ67m9NmrqVYrefut49m9y51aZdaSG1r/9M9/LHoEAAAAYF21Wsk7b7+U7q7OTE5O58OPzxY90hM5dfpqOjvac+zo3rz9m+P5l3/9JMvLq0WPxROwoQUAAAD8pJdePJjBHX2ZnVvMex+c3nghmTL69POLmZycTldnR958/bmix+EJCVoAAADAjxoa6s8Lz+9Ps9nMx5+cS73eKHqkX+zjT89ndbWWveM7XRJfUoIWAAAA8IOq1Up+/ebzqVYrOX/hZu5NzRY90qZYXFzJ519eSpK8/urRdHd1FDwRj0vQAgAAAH7Qt48anjp9ZdM/fyVJZyrZkUqGUs2OVNKZSn7+62M/uctXJnNrciqdnR15w9HD0hG0AAAAgL8yMNCzpUcN25LsSDW9qaSRSlaTNFJJbyrZkWraNvXZftgnf/l64+jh3vGdT+EZ2SyCFgAAAPBXjh3Zm2q1kstXJjf9qGElSX+qaSRZTfLwivnm+tuN9Y9v9abW4tJKvlrfPHvu2N4tfjY2k6AFAAAAfEd7e1sOHli7LP38hZub/vk71o8V/tjOVyNr0avjKRw+vHL1dlZXa9m9azA7Bnq3/PnYHIIWAAAA8B0HD+xOR0d77ty9n9nZhU3//N1Jao94TG39cVutVqvn6rXbSZKjR8aewjOyGQQtAAAA4DuOHhlPkly4OLEln7+aysYxwx/TXH/c03Dx0q0kycEDe9Le/jRu7+KXErQAAACADbt27sjgjr4sLq3k5sS9LXmORpqPTFWV9cc9DQ9mF3Ln7v10dLTn4P7dT+U5+WUELQAAAGDD4UNrx+4uX76VZnNrgtJSkvZHPKZ9/XFPy8VLa9tohw87dlgGghYAAACwYefIQJLkxs2t2c5KktU0148U/rBqHr7i4dPZ0EqSmxP30mg0MjTYl7Y2uaTV+RUCAAAAkiQdHW3p7+9JvV7Pg9n5LXueZpK5NFJN0pFsHD+srL9dXf/408tZSaPRzIPZhVQqlQwN9j3FZ+ZJCFoAAABAkmRosD9JMnN/Plt02nBDPcmDNLKQZqpproesZhbSzIM0Ut/ap/9B0zNzSZKhof4Cnp3H8agjqwAAAMA2MbwecmbWw85WayZZSTMrG28Va2ZmLjn0zf8PtC4bWgAAAECSbzaTpme27rhhK7OhVR6CFgAAAJDkm82k6ZnZgicpxv37C2k0Gtkx0Oti+BbnVwcAAABIkvT0dCVJ5ueXCnn+3bsG8/yxvdm7d2chz99oNLK4uJJKpZLurs5CZuDncYcWAAAAkCQbW0n1euOpP/fxFw7k5ROHNt6+eu12Pvz43FOfo15fu47ehlZrq7z2mz8Uf+saAAAAPMIf/vHdokcANtE//fMfn/jnyo0AAAAAlIoNLQAAAABKxYYWAAAAAKUiaAEAAABQKoIWAAAAAKUiaAEAAABQKoIWAAAAAKUiaAEAAABQKv8/zvAtRj22TeoAAAAASUVORK5CYII=\n", 234 | "text/plain": [ 235 | "
" 236 | ] 237 | }, 238 | "metadata": {}, 239 | "output_type": "display_data" 240 | } 241 | ], 242 | "source": [ 243 | "pad = 1/72\n", 244 | "figsize1 = 1536/72\n", 245 | "figsize2 = 1125/72\n", 246 | "fig = plt.figure(figsize=(figsize1, figsize2),facecolor='#2f3653') \n", 247 | "gs = gridspec.GridSpec(2, 2, width_ratios=[3.13, 1])\n", 248 | "ax1 = plt.subplot(gs[:, 0])\n", 249 | "ax2 = plt.subplot(gs[0, 1])\n", 250 | "ax3 = plt.subplot(gs[1, 1])\n", 251 | "pitch = Pitch(pitch_type='opta',orientation='vertical',view='half',layout=(1,1),figsize=(10,10),\n", 252 | " pitch_color='#2f3653',line_color='#82868f',goal_type='box',linewidth=2,\n", 253 | " pad_bottom=0.2,pad_top=4)\n", 254 | "pitch.draw(ax1)\n", 255 | "pitch.draw(ax2)\n", 256 | "pitch.draw(ax3)\n", 257 | "#plot lines\n", 258 | "pitch.lines(df_line.x_pass_start,df_line.y_pass_start,df_line.x_pass_end,df_line.y_pass_end,\n", 259 | " lw=9,transparent=True,comet=True,ax=ax1)\n", 260 | "pitch.lines(df_line.x_pass_start,df_line.y_pass_start,df_line.x_pass_end,df_line.y_pass_end,\n", 261 | " lw=9,transparent=True,comet=True,ax=ax2)\n", 262 | "# plot assists\n", 263 | "pitch.plot(df_assist.x_pass_end,df_assist.y_pass_end,\n", 264 | " marker='o', color='None',markersize=12,markerfacecolor='#34afed',\n", 265 | " linestyle='None',markeredgecolor='#34afed',ax=ax1)\n", 266 | "pitch.plot(df_assist.x_pass_end,df_assist.y_pass_end,\n", 267 | " marker='o', color='None',markersize=7,markerfacecolor='#34afed',\n", 268 | " linestyle='None',markeredgecolor='#34afed',ax=ax2)\n", 269 | "# plot other\n", 270 | "pitch.plot(df_other.x_pass_end,df_other.y_pass_end,markerfacecolor='#2f3653',\n", 271 | " marker='o', color='None',markersize=12,zorder=3,\n", 272 | " linestyle='None',markeredgecolor='#34afed',ax=ax1)\n", 273 | "pitch.plot(df_other.x_pass_end,df_other.y_pass_end,markerfacecolor='#2f3653',\n", 274 | " marker='o', color='None',markersize=7,zorder=3,\n", 275 | " linestyle='None',markeredgecolor='#34afed',ax=ax2)\n", 276 | "# plot pass start locations\n", 277 | "pitch.plot(df_pass.x_pass_start,df_pass.y_pass_start,\n", 278 | " marker='o', color='#a43967',markersize=10,alpha=0.25,linestyle='None',ax=ax1)\n", 279 | "pitch.plot(df_pass.x_pass_start,df_pass.y_pass_start,\n", 280 | " marker='o', color='#a43967',markersize=10,alpha=0.1,linestyle='None',ax=ax3)" 281 | ] 282 | } 283 | ], 284 | "metadata": { 285 | "kernelspec": { 286 | "display_name": "Python 3", 287 | "language": "python", 288 | "name": "python3" 289 | }, 290 | "language_info": { 291 | "codemirror_mode": { 292 | "name": "ipython", 293 | "version": 3 294 | }, 295 | "file_extension": ".py", 296 | "mimetype": "text/x-python", 297 | "name": "python", 298 | "nbconvert_exporter": "python", 299 | "pygments_lexer": "ipython3", 300 | "version": "3.8.2" 301 | } 302 | }, 303 | "nbformat": 4, 304 | "nbformat_minor": 2 305 | } 306 | -------------------------------------------------------------------------------- /01_statsbomb_json_to_feather.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "import numpy as np\n", 11 | "import glob\n", 12 | "import os" 13 | ] 14 | }, 15 | { 16 | "cell_type": "markdown", 17 | "metadata": {}, 18 | "source": [ 19 | "This notebook takes the StatsBomb json files and turns them into feather files. These are extremely fast to load so good for this prototyping kind of analysis. See: https://medium.com/@steven.p.dye/feather-files-faster-than-the-speed-of-light-d4666ce24387.\n", 20 | "\n", 21 | "They are not really meant for long term storage though. The event files are then combined from all the matches." 22 | ] 23 | }, 24 | { 25 | "cell_type": "markdown", 26 | "metadata": {}, 27 | "source": [ 28 | "# Change these paths/ parameters\n", 29 | "You will need to change these paths/ parameters depending on where the StatsBomb open-data is located, how and where you want to save the resulting data, and if you only want the new files to be processed." 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 2, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "# open data folder is one folder down in the directory. To change if run elsewhere\n", 39 | "STATSBOMB_DATA = os.path.join('..','open-data','data')\n", 40 | "# save files in folder in current directory. To change if want to save elsewhere\n", 41 | "DATA_PATH = os.path.join(os.getcwd(),'data')\n", 42 | "# if true, only processes files that don't already have a event file\n", 43 | "process_new_only = True" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "metadata": {}, 49 | "source": [ 50 | "# Delete event data included in error" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "One event file seems to be added to the statsbomb data in error. See: https://github.com/statsbomb/open-data/issues/13. Deleting it here for consistency." 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 3, 63 | "metadata": {}, 64 | "outputs": [ 65 | { 66 | "name": "stdout", 67 | "output_type": "stream", 68 | "text": [ 69 | "../open-data/data/events/7298.json removed\n", 70 | "../open-data/data/lineups/7298.json removed\n" 71 | ] 72 | } 73 | ], 74 | "source": [ 75 | "ERROR_FILES = [os.path.join(STATSBOMB_DATA,'events','7298.json'),\n", 76 | " os.path.join(STATSBOMB_DATA,'lineups','7298.json')]\n", 77 | "for file in ERROR_FILES:\n", 78 | " if os.path.isfile(file):\n", 79 | " os.remove(file)\n", 80 | " print(file,'removed')" 81 | ] 82 | }, 83 | { 84 | "cell_type": "markdown", 85 | "metadata": {}, 86 | "source": [ 87 | "# Setup folders" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "metadata": {}, 93 | "source": [ 94 | "I set up the following folders in a new data directory folder (location set above). These are the places we will save the processed json files, in feather-format.
\n", 95 | "├── data
\n", 96 | "│ ├── events_raw <- Data from the event file
\n", 97 | "│ ├── related_events_raw <- Data with the info on how events are connected.
\n", 98 | "│ ├── shot_freeze_raw <- DAta with the individual shot freeze frames
\n", 99 | "│ └── tactics_raw <- Data with the lineup tactics.
" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": 4, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [ 108 | "def make_dir(PATH):\n", 109 | " if os.path.isdir(PATH)==False: os.mkdir(PATH)" 110 | ] 111 | }, 112 | { 113 | "cell_type": "code", 114 | "execution_count": 5, 115 | "metadata": {}, 116 | "outputs": [], 117 | "source": [ 118 | "# locations of new folders\n", 119 | "RAW_EVENT_PATH = os.path.join(DATA_PATH,'events_raw')\n", 120 | "RAW_RELATED_PATH = os.path.join(DATA_PATH,'related_events_raw')\n", 121 | "RAW_SHOT_PATH = os.path.join(DATA_PATH,'shot_freeze_raw')\n", 122 | "RAW_TACTICS_PATH = os.path.join(DATA_PATH,'tactics_raw')" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": 6, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "# making directories\n", 132 | "make_dir(DATA_PATH)\n", 133 | "make_dir(RAW_EVENT_PATH)\n", 134 | "make_dir(RAW_RELATED_PATH)\n", 135 | "make_dir(RAW_SHOT_PATH)\n", 136 | "make_dir(RAW_TACTICS_PATH)" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "metadata": {}, 142 | "source": [ 143 | "# Get file paths" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "Retrieve a list of json file paths from which we will extract the infomation." 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 7, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "MATCH_PATH = glob.glob(os.path.join(STATSBOMB_DATA,'matches','**','*.json'),recursive=True)\n", 160 | "LINEUP_PATH = glob.glob(os.path.join(STATSBOMB_DATA,'lineups','**','*.json'),recursive=True)\n", 161 | "EVENT_PATH = glob.glob(os.path.join(STATSBOMB_DATA,'events','**','*.json'),recursive=True)\n", 162 | "COMPETITION_PATH = os.path.join(STATSBOMB_DATA,'competitions.json')" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "metadata": {}, 168 | "source": [ 169 | "# Format competition data" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "metadata": {}, 175 | "source": [ 176 | "Get the competition data and save in feather format." 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 8, 182 | "metadata": {}, 183 | "outputs": [ 184 | { 185 | "name": "stdout", 186 | "output_type": "stream", 187 | "text": [ 188 | "Number of competitions in data: 20\n" 189 | ] 190 | } 191 | ], 192 | "source": [ 193 | "df_competition = pd.read_json(COMPETITION_PATH,convert_dates=['match_updated','match_available'])\n", 194 | "df_competition.sort_values(['competition_id','season_id'],inplace=True)\n", 195 | "df_competition.reset_index(drop=True,inplace=True)\n", 196 | "print('Number of competitions in data:',len(df_competition))" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": 9, 202 | "metadata": {}, 203 | "outputs": [ 204 | { 205 | "name": "stdout", 206 | "output_type": "stream", 207 | "text": [ 208 | "\n", 209 | "RangeIndex: 20 entries, 0 to 19\n", 210 | "Data columns (total 8 columns):\n", 211 | "competition_id 20 non-null int64\n", 212 | "season_id 20 non-null int64\n", 213 | "country_name 20 non-null object\n", 214 | "competition_name 20 non-null object\n", 215 | "competition_gender 20 non-null object\n", 216 | "season_name 20 non-null object\n", 217 | "match_updated 20 non-null datetime64[ns]\n", 218 | "match_available 20 non-null datetime64[ns]\n", 219 | "dtypes: datetime64[ns](2), int64(2), object(4)\n", 220 | "memory usage: 1.4+ KB\n" 221 | ] 222 | } 223 | ], 224 | "source": [ 225 | "# save to feather-format and show info\n", 226 | "df_competition.to_feather(os.path.join(DATA_PATH,'competition'))\n", 227 | "df_competition.info()" 228 | ] 229 | }, 230 | { 231 | "cell_type": "markdown", 232 | "metadata": {}, 233 | "source": [ 234 | "# Format match data" 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "metadata": {}, 240 | "source": [ 241 | "Get the match data and save in feather format." 242 | ] 243 | }, 244 | { 245 | "cell_type": "code", 246 | "execution_count": 10, 247 | "metadata": {}, 248 | "outputs": [ 249 | { 250 | "name": "stdout", 251 | "output_type": "stream", 252 | "text": [ 253 | "Number of match files in data: 20\n", 254 | "Number of matches in data: 778\n" 255 | ] 256 | } 257 | ], 258 | "source": [ 259 | "print('Number of match files in data:',len(MATCH_PATH))\n", 260 | "match_list_dfs = [pd.read_json(file,convert_dates=['match_date','last_updated']) for file in MATCH_PATH]\n", 261 | "df_match = pd.concat(match_list_dfs,sort=False)\n", 262 | "print('Number of matches in data:',len(df_match))" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": 11, 268 | "metadata": {}, 269 | "outputs": [], 270 | "source": [ 271 | "def split_dict_col(df,col):\n", 272 | " '''function to split a dictionary column to seperate columns'''\n", 273 | " # handle missings by filling with an empty dictionary\n", 274 | " df[col] = df[col].apply(lambda x: {} if pd.isna(x) else x)\n", 275 | " # split the non missings and change column names\n", 276 | " df_temp_cols = pd.io.json.json_normalize(df[col]).set_index(df.index)\n", 277 | " col_names = df_temp_cols.columns\n", 278 | " # note add column description to column name if doesn't already contain it\n", 279 | " col_names = [(c).replace('.','_') if c[:len(col)]==col else (col+'_'+c).replace('.','_') for c in col_names]\n", 280 | " df[col_names] = df_temp_cols\n", 281 | " # drop old column\n", 282 | " df.drop(col,axis=1,inplace=True)\n", 283 | " return df" 284 | ] 285 | }, 286 | { 287 | "cell_type": "code", 288 | "execution_count": 12, 289 | "metadata": {}, 290 | "outputs": [], 291 | "source": [ 292 | "# loop through the columns that are still dictionary columns and add them as seperate cols to the dataframe\n", 293 | "dictionary_columns = ['competition','season','home_team','away_team','metadata','competition_stage',\n", 294 | " 'stadium','referee']\n", 295 | "for col in dictionary_columns:\n", 296 | " df_match = split_dict_col(df_match,col)\n", 297 | "# convert kickoff to datetime - date + kickoff time\n", 298 | "df_match['kick_off'] = pd.to_datetime(df_match.match_date.astype(str) +' '+ df_match.kick_off)\n", 299 | "# drop one gender column as always equal to the other\n", 300 | "# drop match status as always available\n", 301 | "df_match.drop(['away_team_gender','match_status'],axis=1,inplace=True)\n", 302 | "df_match.rename({'home_team_gender':'competition_gender'},axis=1,inplace=True)\n", 303 | "# manager is a list (len=1) containing a dictionary so lets split into columns\n", 304 | "df_match['home_team_managers'] = df_match.home_team_managers.str[0]\n", 305 | "df_match = split_dict_col(df_match,'home_team_managers')\n", 306 | "df_match['away_team_managers'] = df_match.away_team_managers.str[0]\n", 307 | "df_match = split_dict_col(df_match,'away_team_managers')\n", 308 | "df_match['home_team_managers_dob'] = pd.to_datetime(df_match['home_team_managers_dob'])\n", 309 | "df_match['away_team_managers_dob'] = pd.to_datetime(df_match['away_team_managers_dob'])\n", 310 | "for col in ['competition_id','season_id','home_team_id','competition_stage_id']:\n", 311 | " df_match[col] = df_match[col].astype(np.int64)\n", 312 | "# sort and reset index: ready for exporting to feather\n", 313 | "df_match.sort_values('kick_off',inplace=True)\n", 314 | "df_match.reset_index(inplace=True,drop=True)" 315 | ] 316 | }, 317 | { 318 | "cell_type": "code", 319 | "execution_count": 13, 320 | "metadata": {}, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "\n", 327 | "RangeIndex: 778 entries, 0 to 777\n", 328 | "Data columns (total 48 columns):\n", 329 | "match_id 778 non-null int64\n", 330 | "match_date 778 non-null datetime64[ns]\n", 331 | "kick_off 778 non-null datetime64[ns]\n", 332 | "home_score 778 non-null int64\n", 333 | "away_score 778 non-null int64\n", 334 | "last_updated 778 non-null datetime64[ns]\n", 335 | "match_week 778 non-null int64\n", 336 | "competition_id 778 non-null int64\n", 337 | "competition_country_name 778 non-null object\n", 338 | "competition_name 778 non-null object\n", 339 | "season_id 778 non-null int64\n", 340 | "season_name 778 non-null object\n", 341 | "home_team_id 778 non-null int64\n", 342 | "home_team_name 778 non-null object\n", 343 | "competition_gender 778 non-null object\n", 344 | "home_team_group 100 non-null object\n", 345 | "home_team_country_id 777 non-null float64\n", 346 | "home_team_country_name 777 non-null object\n", 347 | "away_team_id 778 non-null int64\n", 348 | "away_team_name 778 non-null object\n", 349 | "away_team_group 100 non-null object\n", 350 | "away_team_country_id 776 non-null float64\n", 351 | "away_team_country_name 776 non-null object\n", 352 | "metadata_data_version 778 non-null object\n", 353 | "metadata_shot_fidelity_version 591 non-null object\n", 354 | "metadata_xy_fidelity_version 501 non-null object\n", 355 | "competition_stage_id 778 non-null int64\n", 356 | "competition_stage_name 778 non-null object\n", 357 | "stadium_id 680 non-null float64\n", 358 | "stadium_name 680 non-null object\n", 359 | "stadium_country_id 549 non-null float64\n", 360 | "stadium_country_name 549 non-null object\n", 361 | "referee_id 732 non-null float64\n", 362 | "referee_name 732 non-null object\n", 363 | "referee_country_id 325 non-null float64\n", 364 | "referee_country_name 325 non-null object\n", 365 | "home_team_managers_id 520 non-null float64\n", 366 | "home_team_managers_name 520 non-null object\n", 367 | "home_team_managers_nickname 173 non-null object\n", 368 | "home_team_managers_dob 436 non-null datetime64[ns]\n", 369 | "home_team_managers_country_id 520 non-null float64\n", 370 | "home_team_managers_country_name 520 non-null object\n", 371 | "away_team_managers_id 520 non-null float64\n", 372 | "away_team_managers_name 520 non-null object\n", 373 | "away_team_managers_nickname 163 non-null object\n", 374 | "away_team_managers_dob 438 non-null datetime64[ns]\n", 375 | "away_team_managers_country_id 520 non-null float64\n", 376 | "away_team_managers_country_name 520 non-null object\n", 377 | "dtypes: datetime64[ns](5), float64(10), int64(9), object(24)\n", 378 | "memory usage: 291.9+ KB\n" 379 | ] 380 | } 381 | ], 382 | "source": [ 383 | "# save to feather-format and show info\n", 384 | "df_match.to_feather(os.path.join(DATA_PATH,'match'))\n", 385 | "df_match.info()" 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "# Format lineup data" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "Get the lineup data and save in feather format." 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 14, 405 | "metadata": {}, 406 | "outputs": [ 407 | { 408 | "name": "stdout", 409 | "output_type": "stream", 410 | "text": [ 411 | "Number of lineup files in data: 778\n" 412 | ] 413 | } 414 | ], 415 | "source": [ 416 | "print('Number of lineup files in data:',len(LINEUP_PATH))\n", 417 | "# read as dataframe can't use list comprehension to read files as need to create the match_id from the file name\n", 418 | "lineup_list_dfs = []\n", 419 | "for file in LINEUP_PATH:\n", 420 | " df_temp = pd.read_json(file)\n", 421 | " df_temp['match_id'] = os.path.basename(file[:-5])\n", 422 | " lineup_list_dfs.append(df_temp)\n", 423 | "df_lineup = pd.concat(lineup_list_dfs,sort=False)\n", 424 | "df_lineup.reset_index(inplace=True,drop=True)\n", 425 | "# each line has a column named player that contains a list of dictionaries\n", 426 | "# we split into seperate columns and then create a new row for each player using melt\n", 427 | "df_lineup_players = df_lineup.lineup.apply(pd.Series)\n", 428 | "df_lineup = df_lineup.merge(df_lineup_players,left_index=True,right_index=True)\n", 429 | "df_lineup.drop('lineup',axis=1,inplace=True)\n", 430 | "df_lineup = df_lineup.melt(id_vars = ['team_id','team_name','match_id'], value_name = 'player')\n", 431 | "df_lineup.drop('variable',axis=1,inplace=True)\n", 432 | "df_lineup = df_lineup[df_lineup.player.notnull()].copy()\n", 433 | "df_lineup = split_dict_col(df_lineup,'player')\n", 434 | "# turn ids to integers if no missings\n", 435 | "df_lineup['match_id'] = df_lineup.match_id.astype(np.int64)\n", 436 | "df_lineup['player_id'] = df_lineup.player_id.astype(np.int64)\n", 437 | "# sort and reset index: ready for exporting to feather\n", 438 | "df_lineup.sort_values('player_id',inplace=True)\n", 439 | "df_lineup.reset_index(inplace=True,drop=True)" 440 | ] 441 | }, 442 | { 443 | "cell_type": "code", 444 | "execution_count": 15, 445 | "metadata": {}, 446 | "outputs": [ 447 | { 448 | "name": "stdout", 449 | "output_type": "stream", 450 | "text": [ 451 | "\n", 452 | "RangeIndex: 21416 entries, 0 to 21415\n", 453 | "Data columns (total 9 columns):\n", 454 | "team_id 21416 non-null int64\n", 455 | "team_name 21416 non-null object\n", 456 | "match_id 21416 non-null int64\n", 457 | "player_id 21416 non-null int64\n", 458 | "player_name 21416 non-null object\n", 459 | "player_nickname 12156 non-null object\n", 460 | "player_jersey_number 21409 non-null float64\n", 461 | "player_country_id 21328 non-null float64\n", 462 | "player_country_name 21328 non-null object\n", 463 | "dtypes: float64(2), int64(3), object(4)\n", 464 | "memory usage: 1.5+ MB\n" 465 | ] 466 | } 467 | ], 468 | "source": [ 469 | "# save to feather-format and show info\n", 470 | "df_lineup.to_feather(os.path.join(DATA_PATH,'lineup'))\n", 471 | "df_lineup.info()" 472 | ] 473 | }, 474 | { 475 | "cell_type": "markdown", 476 | "metadata": {}, 477 | "source": [ 478 | "# Format event data" 479 | ] 480 | }, 481 | { 482 | "cell_type": "markdown", 483 | "metadata": {}, 484 | "source": [ 485 | "Get the event data and save in feather format:\n", 486 | " - an events dataframe\n", 487 | " - a related events dataframe\n", 488 | " - a shot freeze frame dataframe\n", 489 | " - a tactics lineup dataframe\n", 490 | " \n", 491 | "Each match is stored in a seperate dataframe" 492 | ] 493 | }, 494 | { 495 | "cell_type": "code", 496 | "execution_count": 16, 497 | "metadata": {}, 498 | "outputs": [], 499 | "source": [ 500 | "def list_dictionary_to_df(df,col,value_name,var_name):\n", 501 | " '''Some columns are a list of dictionaries. This turns them into a new dataframe of rows'''\n", 502 | " df = df.loc[df[col].notnull(),['id',col]]\n", 503 | " df.set_index('id',inplace=True)\n", 504 | " df = df[col].apply(pd.Series).copy()\n", 505 | " df.reset_index(inplace=True)\n", 506 | " df = df.melt(id_vars='id',value_name=value_name,var_name=var_name)\n", 507 | " df[var_name] = df[var_name] + 1\n", 508 | " df = df[df[value_name].notnull()].copy()\n", 509 | " df.reset_index(inplace=True,drop=True)\n", 510 | " return df" 511 | ] 512 | }, 513 | { 514 | "cell_type": "code", 515 | "execution_count": 17, 516 | "metadata": {}, 517 | "outputs": [], 518 | "source": [ 519 | "def split_location_cols(df,col,new_cols):\n", 520 | " ''' Location is stored as a list. split into columns'''\n", 521 | " if col in df.columns:\n", 522 | " df[new_cols] = df[col].apply(pd.Series)\n", 523 | " df.drop(col,axis=1,inplace=True)" 524 | ] 525 | }, 526 | { 527 | "cell_type": "code", 528 | "execution_count": 18, 529 | "metadata": {}, 530 | "outputs": [ 531 | { 532 | "name": "stdout", 533 | "output_type": "stream", 534 | "text": [ 535 | "Number of event files in data: 778\n" 536 | ] 537 | } 538 | ], 539 | "source": [ 540 | "print('Number of event files in data:',len(EVENT_PATH))" 541 | ] 542 | }, 543 | { 544 | "cell_type": "code", 545 | "execution_count": 19, 546 | "metadata": {}, 547 | "outputs": [ 548 | { 549 | "name": "stdout", 550 | "output_type": "stream", 551 | "text": [ 552 | "Matches with no event file: []\n", 553 | "Events with no match file: []\n" 554 | ] 555 | } 556 | ], 557 | "source": [ 558 | "EVENT_FILE_NAMES = np.array([os.path.basename(file)[:-5] for file in EVENT_PATH]).astype(int)\n", 559 | "# quick check that all events have matches and vice versa.\n", 560 | "print('Matches with no event file:',list(set(df_match.match_id) - set(EVENT_FILE_NAMES)))\n", 561 | "print('Events with no match file:',list(set(EVENT_FILE_NAMES) - set(df_match.match_id)))" 562 | ] 563 | }, 564 | { 565 | "cell_type": "code", 566 | "execution_count": 20, 567 | "metadata": {}, 568 | "outputs": [ 569 | { 570 | "name": "stdout", 571 | "output_type": "stream", 572 | "text": [ 573 | "Event files to process: 1\n" 574 | ] 575 | } 576 | ], 577 | "source": [ 578 | "# if you set process_new_only to True then we will not process event jsons which already have feather files\n", 579 | "if process_new_only:\n", 580 | " event_set = set([os.path.basename(file) for file in glob.glob(os.path.join(RAW_EVENT_PATH,'*'))])\n", 581 | " related_set = set([os.path.basename(file) for file in glob.glob(os.path.join(RAW_RELATED_PATH,'*'))])\n", 582 | " shot_set = set([os.path.basename(file) for file in glob.glob(os.path.join(RAW_SHOT_PATH,'*'))])\n", 583 | " tactics_set = set([os.path.basename(file) for file in glob.glob(os.path.join(RAW_TACTICS_PATH,'*'))])\n", 584 | " to_delete = set.intersection(event_set,related_set,shot_set,tactics_set)\n", 585 | " mask_delete = [False if file in to_delete else True for file in (EVENT_FILE_NAMES).astype(str)]\n", 586 | " EVENT_PATH = np.array(EVENT_PATH)[mask_delete].tolist()\n", 587 | " print('Event files to process:',np.array(mask_delete).sum())" 588 | ] 589 | }, 590 | { 591 | "cell_type": "code", 592 | "execution_count": 21, 593 | "metadata": {}, 594 | "outputs": [], 595 | "source": [ 596 | "def create_event_feather_files(PATH):\n", 597 | " ''' Extracts individual event jsons and loads as four feather-format files: events, related events,\n", 598 | " shot freeze frames, and tactics lineups'''\n", 599 | " # timestamp defaults to today's date so store as a string - feather can't store time objects\n", 600 | " df = pd.read_json(PATH,encoding='utf-8')\n", 601 | " df['timestamp'] = df['timestamp'].dt.time.astype(str)\n", 602 | " \n", 603 | " # get match id\n", 604 | " match_id = int(os.path.basename(PATH)[:-5])\n", 605 | " \n", 606 | " # loop through the columns that are still dictionary columns and add them as seperate cols to the dataframe\n", 607 | " # these are nested dataframes in the docs - although dribbled_past/ pressure isn't needed here?\n", 608 | " # also some others are needed: type, possession_team, play_pattern, team, tactics, player, pposition\n", 609 | " dictionary_columns = ['50_50','bad_behaviour','ball_receipt','ball_recovery','block','carry',\n", 610 | " 'clearance','dribble','duel','foul_committed','foul_won','goalkeeper',\n", 611 | " 'half_end','half_start','injury_stoppage','interception',\n", 612 | " 'miscontrol','pass','play_pattern','player','player_off','position',\n", 613 | " 'possession_team','shot','substitution','tactics','team','type',] \n", 614 | " for col in dictionary_columns:\n", 615 | " if col in df.columns:\n", 616 | " df = split_dict_col(df,col)\n", 617 | " \n", 618 | " # sort and reset index: ready for exporting to feather\n", 619 | " df.sort_values(['minute','second','timestamp','possession'],inplace=True)\n", 620 | " df.reset_index(inplace=True,drop=True)\n", 621 | " \n", 622 | " # split location info to x, y and (z for shot) columns and drop old columns\n", 623 | " split_location_cols(df,'location',['x','y'])\n", 624 | " split_location_cols(df,'pass_end_location',['pass_end_x','pass_end_y'])\n", 625 | " split_location_cols(df,'carry_end_location',['carry_end_x','carry_end_y'])\n", 626 | " split_location_cols(df,'shot_end_location',['shot_end_x','shot_end_y','shot_end_z'])\n", 627 | " split_location_cols(df,'goalkeeper_end_location',['goalkeeper_end_x','goalkeeper_end_y'])\n", 628 | " \n", 629 | " # replace weird * character in the type_name for ball receipt\n", 630 | " df['type_name'] = df['type_name'].replace({'Ball Receipt*':'Ball Receipt'})\n", 631 | " \n", 632 | " # create a related events dataframe\n", 633 | " df_related_events = list_dictionary_to_df(df,col='related_events',\n", 634 | " value_name='related_event',var_name='event_related_id')\n", 635 | " # some carries don't have the corresponding events. This makes sure all events are linked both ways\n", 636 | " df_related_events.drop('event_related_id',axis=1,inplace=True)\n", 637 | " df_related_events_reverse = df_related_events.rename({'related_event':'id','id':'related_event'},axis=1)\n", 638 | " df_related_events = pd.concat([df_related_events,df_related_events_reverse],sort=False)\n", 639 | " df_related_events.drop_duplicates(inplace=True)\n", 640 | " # and add on the type_names, index for easier lookups of how the events are related\n", 641 | " df_event_type = df[['id','type_name','index']].copy()\n", 642 | " df_related_events = df_related_events.merge(df_event_type,on='id',how='left',validate='m:1')\n", 643 | " df_event_type.rename({'id':'related_event'},axis=1,inplace=True)\n", 644 | " df_related_events = df_related_events.merge(df_event_type,on='related_event',\n", 645 | " how='left',validate='m:1',suffixes=['','_related'])\n", 646 | " df_related_events.rename({'related_event':'id_related'},axis=1,inplace=True)\n", 647 | " \n", 648 | " # create a shot freeze frame dataframe - also splits dictionary of player details into columns\n", 649 | " df_shot_freeze = list_dictionary_to_df(df,col='shot_freeze_frame',\n", 650 | " value_name='player',var_name='event_freeze_id')\n", 651 | " df_shot_freeze = split_dict_col(df_shot_freeze,'player')\n", 652 | " split_location_cols(df_shot_freeze,'player_location',['x','y'])\n", 653 | "\n", 654 | " # create a tactics lineup frame dataframe - also splits dictionary of player details into columns\n", 655 | " df_tactics_lineup = list_dictionary_to_df(df,col='tactics_lineup',\n", 656 | " value_name='player',var_name='event_tactics_id')\n", 657 | " df_tactics_lineup = split_dict_col(df_tactics_lineup,'player')\n", 658 | " \n", 659 | " # drop columns stored as a seperate table \n", 660 | " df.drop(['related_events','shot_freeze_frame','tactics_lineup'],axis=1,inplace=True)\n", 661 | " \n", 662 | " # add match id to dataframes\n", 663 | " df['match_id'] = match_id\n", 664 | " df_related_events['match_id'] = match_id\n", 665 | " df_shot_freeze['match_id'] = match_id \n", 666 | " df_tactics_lineup['match_id'] = match_id\n", 667 | " \n", 668 | " # save as feather files\n", 669 | " df.to_feather(os.path.join(RAW_EVENT_PATH,str(match_id)))\n", 670 | " df_related_events.to_feather(os.path.join(RAW_RELATED_PATH,str(match_id)))\n", 671 | " df_shot_freeze.to_feather(os.path.join(RAW_SHOT_PATH,str(match_id)))\n", 672 | " df_tactics_lineup.to_feather(os.path.join(RAW_TACTICS_PATH,str(match_id)))" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 22, 678 | "metadata": {}, 679 | "outputs": [ 680 | { 681 | "name": "stdout", 682 | "output_type": "stream", 683 | "text": [ 684 | "0 2275036.json\n" 685 | ] 686 | } 687 | ], 688 | "source": [ 689 | "# loop through and save all the event jsons as 4 seperate feather-files\n", 690 | "for i, file in enumerate(EVENT_PATH):\n", 691 | " create_event_feather_files(file)\n", 692 | " if i%10 == 0:\n", 693 | " print(i,os.path.basename(file))" 694 | ] 695 | }, 696 | { 697 | "cell_type": "markdown", 698 | "metadata": {}, 699 | "source": [ 700 | "# Combine the raw dataframes and save as a single dataframe" 701 | ] 702 | }, 703 | { 704 | "cell_type": "markdown", 705 | "metadata": {}, 706 | "source": [ 707 | "Combine the event dataframes into a single dataframe for each type:\n", 708 | "- events\n", 709 | "- related_events\n", 710 | "- shot freeze frame\n", 711 | "- tactics\n", 712 | "\n", 713 | "Note that the resulting feather file will be large (3gb+)" 714 | ] 715 | }, 716 | { 717 | "cell_type": "code", 718 | "execution_count": 23, 719 | "metadata": {}, 720 | "outputs": [], 721 | "source": [ 722 | "def combine_single_file(PATH,SAVE_PATH):\n", 723 | " ''' loads individual feather files and combines into a mega feather file'''\n", 724 | " files = glob.glob(os.path.join(PATH,'*'))\n", 725 | " dfs = [pd.read_feather(file) for file in files]\n", 726 | " df = pd.concat(dfs,sort=False)\n", 727 | " if 'index' in df.columns:\n", 728 | " df.sort_values(['match_id','index'],inplace=True)\n", 729 | " df.reset_index(drop=True,inplace=True)\n", 730 | " print(df.info(verbose=True,null_counts=True))\n", 731 | " df.to_feather(SAVE_PATH)" 732 | ] 733 | }, 734 | { 735 | "cell_type": "code", 736 | "execution_count": 24, 737 | "metadata": {}, 738 | "outputs": [ 739 | { 740 | "name": "stderr", 741 | "output_type": "stream", 742 | "text": [ 743 | "/home/andy/anaconda3/envs/statsbomb-explore/lib/python3.7/site-packages/pyarrow/pandas_compat.py:752: FutureWarning: .labels was deprecated in version 0.24.0. Use .codes instead.\n", 744 | " labels, = index.labels\n" 745 | ] 746 | }, 747 | { 748 | "name": "stdout", 749 | "output_type": "stream", 750 | "text": [ 751 | "\n", 752 | "RangeIndex: 2797557 entries, 0 to 2797556\n", 753 | "Data columns (total 153 columns):\n", 754 | "id 2797557 non-null object\n", 755 | "index 2797557 non-null int64\n", 756 | "period 2797557 non-null int64\n", 757 | "timestamp 2797557 non-null object\n", 758 | "minute 2797557 non-null int64\n", 759 | "second 2797557 non-null int64\n", 760 | "possession 2797557 non-null int64\n", 761 | "duration 2046570 non-null float64\n", 762 | "off_camera 27283 non-null float64\n", 763 | "out 16363 non-null float64\n", 764 | "under_pressure 604676 non-null float64\n", 765 | "counterpress 86916 non-null float64\n", 766 | "ball_receipt_outcome_id 110299 non-null float64\n", 767 | "ball_receipt_outcome_name 110299 non-null object\n", 768 | "ball_recovery_offensive 298 non-null object\n", 769 | "ball_recovery_recovery_failure 6325 non-null object\n", 770 | "block_save_block 176 non-null object\n", 771 | "block_offensive 423 non-null object\n", 772 | "clearance_right_foot 7995 non-null object\n", 773 | "clearance_body_part_id 26528 non-null float64\n", 774 | "clearance_body_part_name 26528 non-null object\n", 775 | "clearance_left_foot 4733 non-null object\n", 776 | "clearance_head 13659 non-null object\n", 777 | "clearance_aerial_won 4886 non-null object\n", 778 | "dribble_outcome_id 32587 non-null float64\n", 779 | "dribble_outcome_name 32587 non-null object\n", 780 | "dribble_overrun 1948 non-null object\n", 781 | "duel_type_id 48472 non-null float64\n", 782 | "duel_type_name 48472 non-null object\n", 783 | "duel_outcome_id 30420 non-null float64\n", 784 | "duel_outcome_name 30420 non-null object\n", 785 | "foul_committed_offensive 921 non-null object\n", 786 | "foul_committed_type_id 1397 non-null float64\n", 787 | "foul_committed_type_name 1397 non-null object\n", 788 | "foul_committed_card_id 2492 non-null float64\n", 789 | "foul_committed_card_name 2492 non-null object\n", 790 | "foul_committed_penalty 229 non-null object\n", 791 | "foul_won_defensive 5429 non-null object\n", 792 | "foul_won_penalty 192 non-null object\n", 793 | "goalkeeper_type_id 23560 non-null float64\n", 794 | "goalkeeper_type_name 23560 non-null object\n", 795 | "goalkeeper_position_id 19906 non-null float64\n", 796 | "goalkeeper_position_name 19906 non-null object\n", 797 | "goalkeeper_outcome_id 11203 non-null float64\n", 798 | "goalkeeper_outcome_name 11203 non-null object\n", 799 | "goalkeeper_body_part_id 5730 non-null float64\n", 800 | "goalkeeper_body_part_name 5730 non-null object\n", 801 | "goalkeeper_technique_id 7702 non-null float64\n", 802 | "goalkeeper_technique_name 7702 non-null object\n", 803 | "half_start_late_video_start 32 non-null object\n", 804 | "interception_outcome_id 15212 non-null float64\n", 805 | "interception_outcome_name 15212 non-null object\n", 806 | "miscontrol_aerial_won 723 non-null object\n", 807 | "pass_length 769576 non-null float64\n", 808 | "pass_angle 769576 non-null float64\n", 809 | "pass_recipient_id 716166 non-null float64\n", 810 | "pass_recipient_name 716166 non-null object\n", 811 | "pass_height_id 769576 non-null float64\n", 812 | "pass_height_name 769576 non-null object\n", 813 | "pass_type_id 151071 non-null float64\n", 814 | "pass_type_name 151071 non-null object\n", 815 | "pass_body_part_id 724127 non-null float64\n", 816 | "pass_body_part_name 724127 non-null object\n", 817 | "pass_outcome_id 163709 non-null float64\n", 818 | "pass_outcome_name 163709 non-null object\n", 819 | "pass_cross 16541 non-null object\n", 820 | "pass_assisted_shot_id 13984 non-null object\n", 821 | "pass_shot_assist 12333 non-null object\n", 822 | "pass_switch 20890 non-null object\n", 823 | "pass_aerial_won 11234 non-null object\n", 824 | "pass_goal_assist 1651 non-null object\n", 825 | "pass_no_touch 507 non-null object\n", 826 | "pass_inswinging 1871 non-null object\n", 827 | "pass_technique_id 8987 non-null float64\n", 828 | "pass_technique_name 8987 non-null object\n", 829 | "pass_cut_back 1444 non-null object\n", 830 | "pass_straight 520 non-null object\n", 831 | "pass_through_ball 5002 non-null object\n", 832 | "pass_outswinging 1594 non-null object\n", 833 | "play_pattern_id 2797557 non-null int64\n", 834 | "play_pattern_name 2797557 non-null object\n", 835 | "player_id 2782289 non-null float64\n", 836 | "player_name 2782289 non-null object\n", 837 | "position_id 2782289 non-null float64\n", 838 | "position_name 2782289 non-null object\n", 839 | "possession_team_id 2797557 non-null int64\n", 840 | "possession_team_name 2797557 non-null object\n", 841 | "shot_statsbomb_xg 19934 non-null float64\n", 842 | "shot_key_pass_id 13984 non-null object\n", 843 | "shot_aerial_won 1209 non-null object\n", 844 | "shot_type_id 19934 non-null float64\n", 845 | "shot_type_name 19934 non-null object\n", 846 | "shot_body_part_id 19934 non-null float64\n", 847 | "shot_body_part_name 19934 non-null object\n", 848 | "shot_technique_id 19934 non-null float64\n", 849 | "shot_technique_name 19934 non-null object\n", 850 | "shot_outcome_id 19934 non-null float64\n", 851 | "shot_outcome_name 19934 non-null object\n", 852 | "shot_first_time 5275 non-null object\n", 853 | "shot_one_on_one 1239 non-null object\n", 854 | "substitution_outcome_id 4289 non-null float64\n", 855 | "substitution_outcome_name 4289 non-null object\n", 856 | "substitution_replacement_id 4294 non-null float64\n", 857 | "substitution_replacement_name 4294 non-null object\n", 858 | "tactics_formation 2929 non-null float64\n", 859 | "team_id 2797557 non-null int64\n", 860 | "team_name 2797557 non-null object\n", 861 | "type_id 2797557 non-null int64\n", 862 | "type_name 2797557 non-null object\n", 863 | "x 2775055 non-null float64\n", 864 | "y 2775055 non-null float64\n", 865 | "pass_end_x 769576 non-null float64\n", 866 | "pass_end_y 769576 non-null float64\n", 867 | "carry_end_x 637990 non-null float64\n", 868 | "carry_end_y 637990 non-null float64\n", 869 | "shot_end_x 19934 non-null float64\n", 870 | "shot_end_y 19934 non-null float64\n", 871 | "shot_end_z 14422 non-null float64\n", 872 | "goalkeeper_end_x 12306 non-null float64\n", 873 | "goalkeeper_end_y 12306 non-null float64\n", 874 | "match_id 2797557 non-null int64\n", 875 | "50_50_outcome_id 1232 non-null float64\n", 876 | "50_50_outcome_name 1232 non-null object\n", 877 | "bad_behaviour_card_id 545 non-null float64\n", 878 | "bad_behaviour_card_name 545 non-null object\n", 879 | "dribble_nutmeg 1064 non-null object\n", 880 | "foul_committed_advantage 2940 non-null object\n", 881 | "foul_won_advantage 3034 non-null object\n", 882 | "goalkeeper_success_in_play 15 non-null object\n", 883 | "injury_stoppage_in_chain 303 non-null object\n", 884 | "pass_miscommunication 478 non-null object\n", 885 | "pass_backheel 978 non-null object\n", 886 | "block_deflection 849 non-null object\n", 887 | "dribble_no_touch 84 non-null object\n", 888 | "pass_deflected 875 non-null object\n", 889 | "shot_deflected 195 non-null object\n", 890 | "clearance_other 141 non-null object\n", 891 | "shot_open_goal 236 non-null object\n", 892 | "goalkeeper_punched_out 90 non-null object\n", 893 | "shot_redirect 63 non-null object\n", 894 | "goalkeeper_lost_in_play 20 non-null object\n", 895 | "goalkeeper_shot_saved_off_target 72 non-null object\n", 896 | "shot_saved_off_target 72 non-null object\n", 897 | "shot_follows_dribble 17 non-null object\n", 898 | "goalkeeper_shot_saved_to_post 54 non-null object\n", 899 | "shot_saved_to_post 49 non-null object\n", 900 | "goalkeeper_lost_out 10 non-null object\n", 901 | "half_end_early_video_end 8 non-null object\n", 902 | "goalkeeper_saved_to_post 2 non-null object\n", 903 | "goalkeeper_success_out 8 non-null object\n", 904 | "player_off_permanent 7 non-null object\n", 905 | "goalkeeper_penalty_saved_to_post 1 non-null object\n", 906 | "shot_kick_off 1 non-null object\n", 907 | "dtypes: float64(49), int64(10), object(94)\n", 908 | "memory usage: 3.2+ GB\n", 909 | "None\n" 910 | ] 911 | } 912 | ], 913 | "source": [ 914 | "combine_single_file(RAW_EVENT_PATH,SAVE_PATH=os.path.join(DATA_PATH,'events'))" 915 | ] 916 | }, 917 | { 918 | "cell_type": "code", 919 | "execution_count": 25, 920 | "metadata": {}, 921 | "outputs": [ 922 | { 923 | "name": "stdout", 924 | "output_type": "stream", 925 | "text": [ 926 | "\n", 927 | "RangeIndex: 5450328 entries, 0 to 5450327\n", 928 | "Data columns (total 7 columns):\n", 929 | "id 5450328 non-null object\n", 930 | "id_related 5450328 non-null object\n", 931 | "type_name 5450328 non-null object\n", 932 | "index 5450328 non-null int64\n", 933 | "type_name_related 5450328 non-null object\n", 934 | "index_related 5450328 non-null int64\n", 935 | "match_id 5450328 non-null int64\n", 936 | "dtypes: int64(3), object(4)\n", 937 | "memory usage: 291.1+ MB\n", 938 | "None\n" 939 | ] 940 | } 941 | ], 942 | "source": [ 943 | "combine_single_file(RAW_RELATED_PATH,SAVE_PATH=os.path.join(DATA_PATH,'related_events'))" 944 | ] 945 | }, 946 | { 947 | "cell_type": "code", 948 | "execution_count": 26, 949 | "metadata": {}, 950 | "outputs": [ 951 | { 952 | "name": "stdout", 953 | "output_type": "stream", 954 | "text": [ 955 | "\n", 956 | "RangeIndex: 244803 entries, 0 to 244802\n", 957 | "Data columns (total 10 columns):\n", 958 | "id 244803 non-null object\n", 959 | "event_freeze_id 244803 non-null int64\n", 960 | "player_teammate 244803 non-null bool\n", 961 | "player_id 244803 non-null int64\n", 962 | "player_name 244803 non-null object\n", 963 | "player_position_id 244803 non-null int64\n", 964 | "player_position_name 244803 non-null object\n", 965 | "x 244803 non-null float64\n", 966 | "y 244803 non-null float64\n", 967 | "match_id 244803 non-null int64\n", 968 | "dtypes: bool(1), float64(2), int64(4), object(3)\n", 969 | "memory usage: 17.0+ MB\n", 970 | "None\n" 971 | ] 972 | } 973 | ], 974 | "source": [ 975 | "combine_single_file(RAW_SHOT_PATH,SAVE_PATH=os.path.join(DATA_PATH,'shot_freeze_frame'))" 976 | ] 977 | }, 978 | { 979 | "cell_type": "code", 980 | "execution_count": 27, 981 | "metadata": {}, 982 | "outputs": [ 983 | { 984 | "name": "stdout", 985 | "output_type": "stream", 986 | "text": [ 987 | "\n", 988 | "RangeIndex: 32211 entries, 0 to 32210\n", 989 | "Data columns (total 8 columns):\n", 990 | "id 32211 non-null object\n", 991 | "event_tactics_id 32211 non-null int64\n", 992 | "player_jersey_number 32204 non-null float64\n", 993 | "player_id 32211 non-null int64\n", 994 | "player_name 32211 non-null object\n", 995 | "player_position_id 32211 non-null int64\n", 996 | "player_position_name 32211 non-null object\n", 997 | "match_id 32211 non-null int64\n", 998 | "dtypes: float64(1), int64(4), object(3)\n", 999 | "memory usage: 2.0+ MB\n", 1000 | "None\n" 1001 | ] 1002 | } 1003 | ], 1004 | "source": [ 1005 | "combine_single_file(RAW_TACTICS_PATH,SAVE_PATH=os.path.join(DATA_PATH,'tactics'))" 1006 | ] 1007 | } 1008 | ], 1009 | "metadata": { 1010 | "kernelspec": { 1011 | "display_name": "Python 3", 1012 | "language": "python", 1013 | "name": "python3" 1014 | }, 1015 | "language_info": { 1016 | "codemirror_mode": { 1017 | "name": "ipython", 1018 | "version": 3 1019 | }, 1020 | "file_extension": ".py", 1021 | "mimetype": "text/x-python", 1022 | "name": "python", 1023 | "nbconvert_exporter": "python", 1024 | "pygments_lexer": "ipython3", 1025 | "version": "3.7.5" 1026 | } 1027 | }, 1028 | "nbformat": 4, 1029 | "nbformat_minor": 2 1030 | } 1031 | -------------------------------------------------------------------------------- /demo_crawley.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "from mplsoccer.pitch import Pitch\n", 10 | "import numpy as np\n", 11 | "import matplotlib.pyplot as plt\n", 12 | "from urllib.request import urlopen\n", 13 | "from PIL import Image\n", 14 | "import numpy as np" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": 2, 20 | "metadata": {}, 21 | "outputs": [], 22 | "source": [ 23 | "background_color = '#004D98'\n", 24 | "line_color='#d2dde1'\n", 25 | "marker_color = '#dcdf4c'\n", 26 | "figsize = (9,16)\n", 27 | "width, height = figsize\n", 28 | "aspect = width/height" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 3, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "pitch = Pitch(pitch_color=background_color,line_color=line_color,orientation='vertical',pitch_type='opta',\n", 38 | " view='half')\n", 39 | "# going to use this to plot a legend - cheap hack that lines are same color as the background so\n", 40 | "# the lines won't show up\n", 41 | "empty_pitch = Pitch(pitch_color=background_color,line_color=background_color,\n", 42 | " orientation='vertical',pitch_type='opta',view='full')" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 4, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "IMAGE_URL = 'https://upload.wikimedia.org/wikipedia/en/8/8b/Crawley_Town_FC_logo.png'\n", 52 | "crawley_logo = np.array(Image.open(urlopen(IMAGE_URL)))" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": 5, 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "# for this image the aspect is different so you have to calculate the logo height from its width\n", 62 | "# so that you can get the height and width of the logo axis right\n", 63 | "def calculate_display_height(img,img_display_width,aspect):\n", 64 | " img_height, img_width, _ = img.shape\n", 65 | " img_aspect = img_width/img_height\n", 66 | " img_display_height = img_display_width/img_aspect*aspect\n", 67 | " return img_display_height" 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "# Note that the logo is slightly transparent (alpha = 0.9) I like this better, but you can delete" 75 | ] 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 6, 80 | "metadata": { 81 | "scrolled": false 82 | }, 83 | "outputs": [ 84 | { 85 | "data": { 86 | "image/png": "\n", 87 | "text/plain": [ 88 | "
" 89 | ] 90 | }, 91 | "metadata": {}, 92 | "output_type": "display_data" 93 | } 94 | ], 95 | "source": [ 96 | "# setup a figure\n", 97 | "fig = plt.figure(figsize=figsize,facecolor=background_color);\n", 98 | "# The dimensions [left, bottom, width, height] of the new axes.\n", 99 | "# All quantities are in fractions of figure width and height.\n", 100 | "pitch_top_rect = (0,0.5,1,0.45)\n", 101 | "ax_pitch_top = fig.add_axes(pitch_top_rect)\n", 102 | "legend1_rect = (0.2,0.45,0.1,0.05)\n", 103 | "ax_legend1 = fig.add_axes(legend1_rect)\n", 104 | "ax_legend1.axis('off')\n", 105 | "legend2_rect = (0.5,0.45,0.1,0.05) #dimensions for line legend\n", 106 | "ax_legend2 = fig.add_axes(legend2_rect) # ax for legend2\n", 107 | "pitch_bottom_rect = (0,0,1,0.45)\n", 108 | "ax_pitch_bottom = fig.add_axes(pitch_bottom_rect)\n", 109 | "title_rect = (0.02,0.95,0.7,0.05)\n", 110 | "ax_title= fig.add_axes(title_rect)\n", 111 | "ax_title.axis('off')\n", 112 | "logo_display_width = 0.2\n", 113 | "logo_display_height = calculate_display_height(crawley_logo,logo_display_width,aspect)\n", 114 | "logo_rect = (1-logo_display_width,1-logo_display_height,logo_display_width,logo_display_height)\n", 115 | "ax_logo = fig.add_axes(logo_rect)\n", 116 | "ax_logo.axis('off')\n", 117 | "ax_logo.imshow(crawley_logo,alpha=0.9)\n", 118 | "# draw pitches\n", 119 | "pitch.draw(ax=ax_pitch_top)\n", 120 | "pitch.draw(ax=ax_pitch_bottom)\n", 121 | "# draw legend circle\n", 122 | "ax_legend1.scatter(0.5,0.5,c=marker_color,s=100);\n", 123 | "# draw legend line\n", 124 | "empty_pitch.draw(ax=ax_legend2)\n", 125 | "empty_pitch.lines(np.array([20]),np.array([70]),np.array([70]),np.array([20]),\n", 126 | " comet=True,transparent=True,ax=ax_legend2,color=marker_color);\n", 127 | "# add title\n", 128 | "fig.text(0.03,0.98,\"Crawley Town\",verticalalignment='top',horizontalalignment='left',fontsize=50,color=line_color);\n", 129 | "fig.text(0.3,0.47,\"Pass start location\",fontsize=15,color=line_color);\n", 130 | "fig.text(0.6,0.47,\"Assist/ high Xg\",fontsize=15,color=line_color);" 131 | ] 132 | } 133 | ], 134 | "metadata": { 135 | "kernelspec": { 136 | "display_name": "Python 3", 137 | "language": "python", 138 | "name": "python3" 139 | }, 140 | "language_info": { 141 | "codemirror_mode": { 142 | "name": "ipython", 143 | "version": 3 144 | }, 145 | "file_extension": ".py", 146 | "mimetype": "text/x-python", 147 | "name": "python", 148 | "nbconvert_exporter": "python", 149 | "pygments_lexer": "ipython3", 150 | "version": "3.8.2" 151 | } 152 | }, 153 | "nbformat": 4, 154 | "nbformat_minor": 4 155 | } 156 | --------------------------------------------------------------------------------