├── tests ├── __init__.py ├── test_core │ ├── __init__.py │ ├── test_definitions.py │ ├── test_code.py │ ├── test_teamsheet.py │ └── conftest.py ├── test_io │ ├── __init__.py │ ├── test_statsbomb.py │ ├── test_utils.py │ ├── conftest.py │ └── test_datasets.py ├── test_utils │ └── __init__.py ├── test_vis │ ├── __init__.py │ ├── test_utils.py │ ├── conftest.py │ ├── test_positions.py │ └── test_pitches.py ├── test_metrics │ ├── __init__.py │ └── test_apen.py ├── test_models │ ├── __init__.py │ ├── conftest.py │ ├── test_geometry.py │ ├── test_base.py │ ├── test_kinematics.py │ └── test_kinetics.py └── test_transforms │ ├── __init__.py │ └── conftest.py ├── floodlight ├── core │ ├── __init__.py │ ├── property.py │ ├── definitions.py │ ├── code.py │ └── teamsheet.py ├── io │ ├── __init__.py │ ├── utils.py │ └── opta.py ├── utils │ ├── __init__.py │ └── types.py ├── vis │ ├── __init__.py │ ├── utils.py │ └── positions.py ├── metrics │ ├── __init__.py │ └── entropy.py ├── models │ ├── __init__.py │ ├── base.py │ └── geometry.py ├── transforms │ └── __init__.py ├── settings.py └── __init__.py ├── docs ├── requirements.txt ├── source │ ├── _img │ │ ├── pitch.png │ │ ├── pitch_positive.png │ │ ├── pitch_standardized.png │ │ ├── positions_example.png │ │ ├── sample_dvm_plot_hex.png │ │ ├── trajectories_example.png │ │ ├── pitch_football_example.png │ │ ├── pitch_handball_example.png │ │ ├── sample_dvm_plot_square.png │ │ ├── savgol_adjusted_example.png │ │ ├── savgol_default_example.png │ │ ├── sample_dvm_plot_hex_mesh.png │ │ ├── tutorial_matchsheets_grid.png │ │ ├── butterworth_adjusted_example.png │ │ ├── butterworth_default_example.png │ │ ├── getting_started_sample_plot.png │ │ ├── savgol_removed_short_example.png │ │ ├── getting_started_avg_positions.png │ │ ├── tutorial_analysis_trajectories.png │ │ ├── tutorial_matchsheets_allgoals.png │ │ ├── tutorial_matchsheets_singlegoal.png │ │ ├── butterworth_removed_short_example.png │ │ ├── tutorial_analysis_trajectories_filtered.png │ │ └── tutorial_analysis_trajectories_filtered_zoom.png │ ├── modules │ │ ├── io │ │ │ ├── dfl.rst │ │ │ ├── opta.rst │ │ │ ├── utils.rst │ │ │ ├── tracab.rst │ │ │ ├── kinexon.rst │ │ │ ├── statsbomb.rst │ │ │ ├── sportradar.rst │ │ │ ├── skillcorner.rst │ │ │ ├── statsperform.rst │ │ │ ├── secondspectrum.rst │ │ │ ├── datasets.rst │ │ │ └── io.rst │ │ ├── core │ │ │ ├── xy.rst │ │ │ ├── code.rst │ │ │ ├── pitch.rst │ │ │ ├── events.rst │ │ │ ├── teamsheet.rst │ │ │ ├── property.rst │ │ │ ├── definitions.rst │ │ │ └── core.rst │ │ ├── metrics │ │ │ ├── entropy.rst │ │ │ └── metrics.rst │ │ ├── models │ │ │ ├── space.rst │ │ │ ├── geometry.rst │ │ │ ├── kinetics.rst │ │ │ ├── kinematics.rst │ │ │ └── models.rst │ │ ├── vis │ │ │ ├── utils.rst │ │ │ ├── pitches.rst │ │ │ ├── positions.rst │ │ │ └── vis.rst │ │ ├── transforms │ │ │ ├── filter.rst │ │ │ └── transforms.rst │ │ └── utils │ │ │ ├── types.rst │ │ │ └── utils.rst │ ├── _static │ │ └── css │ │ │ └── custom.css │ ├── conf.py │ ├── index.rst │ ├── compendium │ │ ├── 0_compendium.rst │ │ ├── 5_identifier.rst │ │ ├── 2_design.rst │ │ ├── 3_time.rst │ │ └── 1_data.rst │ └── guides │ │ ├── getting_started.rst │ │ └── tutorial_analysis.rst ├── README.md └── Makefile ├── .data └── toy_dataset │ ├── xy_away_ht1.npy │ ├── xy_away_ht2.npy │ ├── xy_ball_ht1.npy │ ├── xy_ball_ht2.npy │ ├── xy_home_ht1.npy │ ├── xy_home_ht2.npy │ ├── ballstatus_ht1.npy │ ├── ballstatus_ht2.npy │ ├── possession_ht1.npy │ ├── possession_ht2.npy │ ├── events_away_ht1.csv │ ├── events_home_ht2.csv │ ├── events_away_ht2.csv │ └── events_home_ht1.csv ├── setup.cfg ├── .readthedocs.yaml ├── .github ├── workflows │ ├── linting.yaml │ └── build.yaml └── ISSUE_TEMPLATE │ ├── -scroll--documentation--scroll-.md │ ├── -sparkles--feature-request--sparkles-.md │ └── -collision--bug-report--collision-.md ├── .pre-commit-config.yaml ├── LICENSE ├── pyproject.toml ├── .gitignore └── CONTRIBUTING.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/vis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_core/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_io/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_vis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /floodlight/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_metrics/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test_transforms/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | sphinx==7.4.7 2 | sphinx-rtd-theme==2.0.0 3 | sphinx-autodoc-typehints==2.3.0 4 | -------------------------------------------------------------------------------- /docs/source/_img/pitch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/pitch.png -------------------------------------------------------------------------------- /.data/toy_dataset/xy_away_ht1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/xy_away_ht1.npy -------------------------------------------------------------------------------- /.data/toy_dataset/xy_away_ht2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/xy_away_ht2.npy -------------------------------------------------------------------------------- /.data/toy_dataset/xy_ball_ht1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/xy_ball_ht1.npy -------------------------------------------------------------------------------- /.data/toy_dataset/xy_ball_ht2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/xy_ball_ht2.npy -------------------------------------------------------------------------------- /.data/toy_dataset/xy_home_ht1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/xy_home_ht1.npy -------------------------------------------------------------------------------- /.data/toy_dataset/xy_home_ht2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/xy_home_ht2.npy -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | extend-ignore = E203, W503 3 | exclude = 4 | .git 5 | .github 6 | .idea 7 | .virtualenvs 8 | -------------------------------------------------------------------------------- /.data/toy_dataset/ballstatus_ht1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/ballstatus_ht1.npy -------------------------------------------------------------------------------- /.data/toy_dataset/ballstatus_ht2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/ballstatus_ht2.npy -------------------------------------------------------------------------------- /.data/toy_dataset/possession_ht1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/possession_ht1.npy -------------------------------------------------------------------------------- /.data/toy_dataset/possession_ht2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/.data/toy_dataset/possession_ht2.npy -------------------------------------------------------------------------------- /docs/source/_img/pitch_positive.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/pitch_positive.png -------------------------------------------------------------------------------- /docs/source/_img/pitch_standardized.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/pitch_standardized.png -------------------------------------------------------------------------------- /docs/source/_img/positions_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/positions_example.png -------------------------------------------------------------------------------- /docs/source/_img/sample_dvm_plot_hex.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/sample_dvm_plot_hex.png -------------------------------------------------------------------------------- /docs/source/_img/trajectories_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/trajectories_example.png -------------------------------------------------------------------------------- /docs/source/_img/pitch_football_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/pitch_football_example.png -------------------------------------------------------------------------------- /docs/source/_img/pitch_handball_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/pitch_handball_example.png -------------------------------------------------------------------------------- /docs/source/_img/sample_dvm_plot_square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/sample_dvm_plot_square.png -------------------------------------------------------------------------------- /docs/source/_img/savgol_adjusted_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/savgol_adjusted_example.png -------------------------------------------------------------------------------- /docs/source/_img/savgol_default_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/savgol_default_example.png -------------------------------------------------------------------------------- /docs/source/_img/sample_dvm_plot_hex_mesh.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/sample_dvm_plot_hex_mesh.png -------------------------------------------------------------------------------- /docs/source/_img/tutorial_matchsheets_grid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/tutorial_matchsheets_grid.png -------------------------------------------------------------------------------- /docs/source/modules/io/dfl.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | floodlight.io.dfl 3 | ================= 4 | 5 | .. automodule:: floodlight.io.dfl 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/_img/butterworth_adjusted_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/butterworth_adjusted_example.png -------------------------------------------------------------------------------- /docs/source/_img/butterworth_default_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/butterworth_default_example.png -------------------------------------------------------------------------------- /docs/source/_img/getting_started_sample_plot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/getting_started_sample_plot.png -------------------------------------------------------------------------------- /docs/source/_img/savgol_removed_short_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/savgol_removed_short_example.png -------------------------------------------------------------------------------- /docs/source/modules/core/xy.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | floodlight.core.xy 3 | ================== 4 | 5 | .. automodule:: floodlight.core.xy 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/io/opta.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | floodlight.io.opta 3 | ================== 4 | 5 | .. automodule:: floodlight.io.opta 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/_img/getting_started_avg_positions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/getting_started_avg_positions.png -------------------------------------------------------------------------------- /docs/source/_img/tutorial_analysis_trajectories.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/tutorial_analysis_trajectories.png -------------------------------------------------------------------------------- /docs/source/_img/tutorial_matchsheets_allgoals.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/tutorial_matchsheets_allgoals.png -------------------------------------------------------------------------------- /docs/source/_img/tutorial_matchsheets_singlegoal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/tutorial_matchsheets_singlegoal.png -------------------------------------------------------------------------------- /docs/source/modules/io/utils.rst: -------------------------------------------------------------------------------- 1 | =================== 2 | floodlight.io.utils 3 | =================== 4 | 5 | .. automodule:: floodlight.io.utils 6 | :members: 7 | -------------------------------------------------------------------------------- /floodlight/utils/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import numpy as np 4 | 5 | 6 | Numeric = Union[int, float, np.number] 7 | """Numeric data types""" 8 | -------------------------------------------------------------------------------- /docs/source/_img/butterworth_removed_short_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/butterworth_removed_short_example.png -------------------------------------------------------------------------------- /docs/source/modules/core/code.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | floodlight.core.code 3 | ==================== 4 | 5 | .. automodule:: floodlight.core.code 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/io/tracab.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | floodlight.io.tracab 3 | ==================== 4 | 5 | .. automodule:: floodlight.io.tracab 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/core/pitch.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | floodlight.core.pitch 3 | ===================== 4 | 5 | .. automodule:: floodlight.core.pitch 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/io/kinexon.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | floodlight.io.kinexon 3 | ===================== 4 | 5 | .. automodule:: floodlight.io.kinexon 6 | :members: 7 | -------------------------------------------------------------------------------- /floodlight/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pathlib import Path 3 | 4 | ROOT_DIR = Path(os.path.dirname(os.path.abspath(__file__))).parent 5 | DATA_DIR = ROOT_DIR / ".data" 6 | -------------------------------------------------------------------------------- /docs/source/modules/core/events.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | floodlight.core.events 3 | ====================== 4 | 5 | .. automodule:: floodlight.core.events 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/_img/tutorial_analysis_trajectories_filtered.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/tutorial_analysis_trajectories_filtered.png -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | /* table line breaks (for autosummary) */ 2 | .wy-table-responsive table td, 3 | .wy-table-responsive table th { 4 | white-space: normal; 5 | } 6 | -------------------------------------------------------------------------------- /docs/source/modules/io/statsbomb.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | floodlight.io.statsbomb 3 | ======================= 4 | 5 | .. automodule:: floodlight.io.statsbomb 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/metrics/entropy.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | floodlight.io.entropy 3 | ===================== 4 | 5 | .. automodule:: floodlight.metrics.entropy 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/models/space.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | floodlight.models.space 3 | ======================= 4 | 5 | .. automodule:: floodlight.models.space 6 | :members: 7 | -------------------------------------------------------------------------------- /.data/toy_dataset/events_away_ht1.csv: -------------------------------------------------------------------------------- 1 | ,eID,gameclock,outcome 2 | 0,KickoffWhistle,0.0, 3 | 1,Save,50.0, 4 | 2,Kickoff,140.0, 5 | 3,Pass,140.0,1 6 | 4,Pass,143.0,0 7 | 5,FinalWhistle,202.0, 8 | -------------------------------------------------------------------------------- /docs/source/modules/io/sportradar.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | floodlight.io.sportradar 3 | ======================== 4 | 5 | .. automodule:: floodlight.io.sportradar 6 | :members: 7 | -------------------------------------------------------------------------------- /.data/toy_dataset/events_home_ht2.csv: -------------------------------------------------------------------------------- 1 | ,eID,gameclock,outcome 2 | 0,KickoffWhistle,0.0, 3 | 1,Tackle,25.0,0.0 4 | 2,Foul,25.0, 5 | 3,Handball,64.0, 6 | 4,Goalkick,143.0, 7 | 5,FinalWhistle,148.0, 8 | -------------------------------------------------------------------------------- /docs/source/_img/tutorial_analysis_trajectories_filtered_zoom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/floodlight-sports/floodlight/HEAD/docs/source/_img/tutorial_analysis_trajectories_filtered_zoom.png -------------------------------------------------------------------------------- /docs/source/modules/core/teamsheet.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | floodlight.core.teamsheet 3 | ========================= 4 | 5 | .. automodule:: floodlight.core.teamsheet 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/io/skillcorner.rst: -------------------------------------------------------------------------------- 1 | ========================= 2 | floodlight.io.skillcorner 3 | ========================= 4 | 5 | .. automodule:: floodlight.io.skillcorner 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/vis/utils.rst: -------------------------------------------------------------------------------- 1 | ==================== 2 | floodlight.vis.utils 3 | ==================== 4 | 5 | description here 6 | 7 | .. automodule:: floodlight.vis.utils 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/modules/io/statsperform.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | floodlight.io.statsperform 3 | ========================== 4 | 5 | .. automodule:: floodlight.io.statsperform 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/models/geometry.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | floodlight.models.geometry 3 | ========================== 4 | 5 | .. automodule:: floodlight.models.geometry 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/models/kinetics.rst: -------------------------------------------------------------------------------- 1 | ========================== 2 | floodlight.models.kinetics 3 | ========================== 4 | 5 | .. automodule:: floodlight.models.kinetics 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/vis/pitches.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | floodlight.vis.pitches 3 | ====================== 4 | 5 | description here 6 | 7 | .. automodule:: floodlight.vis.pitches 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/modules/io/secondspectrum.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | floodlight.io.secondspectrum 3 | ============================ 4 | 5 | .. automodule:: floodlight.io.secondspectrum 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/models/kinematics.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | floodlight.models.kinematics 3 | ============================ 4 | 5 | .. automodule:: floodlight.models.kinematics 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/transforms/filter.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | floodlight.transforms.filter 3 | ============================ 4 | 5 | .. automodule:: floodlight.transforms.filter 6 | :members: 7 | -------------------------------------------------------------------------------- /docs/source/modules/vis/positions.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | floodlight.vis.positions 3 | ======================== 4 | 5 | description here 6 | 7 | .. automodule:: floodlight.vis.positions 8 | :members: 9 | -------------------------------------------------------------------------------- /docs/source/modules/utils/types.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | floodlight.utils.types 3 | ====================== 4 | 5 | floodlight specific typing objects 6 | 7 | .. autodata:: floodlight.utils.types.Numeric 8 | :annotation: 9 | -------------------------------------------------------------------------------- /docs/source/modules/utils/utils.rst: -------------------------------------------------------------------------------- 1 | ================ 2 | floodlight.utils 3 | ================ 4 | 5 | A collection of utility functions. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | types 12 | -------------------------------------------------------------------------------- /docs/source/modules/core/property.rst: -------------------------------------------------------------------------------- 1 | ======================== 2 | floodlight.core.property 3 | ======================== 4 | 5 | .. automodule:: floodlight.core.property 6 | :members: 7 | :inherited-members: 8 | :exclude-members: BaseProperty 9 | -------------------------------------------------------------------------------- /.data/toy_dataset/events_away_ht2.csv: -------------------------------------------------------------------------------- 1 | ,eID,gameclock,outcome 2 | 0,KickoffWhistle,0.0, 3 | 1,Kickoff,0.0, 4 | 2,Pass,0.0,1 5 | 3,Pass,1.0,1 6 | 4,Pass,1.0,1 7 | 5,Dribbling,5.0,1 8 | 6,Pass,8.0,1 9 | 7,Pass,13.0,1 10 | 8,Pass,18.0,1 11 | 9,Freekick,60.0,0 12 | 10,Penalty,105.0,0 13 | 11,FinalWhistle,148.0, 14 | -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # Quick Docs Intro 2 | 3 | If you have installed floodlight from source and set up your dev environment via poetry, just run the sphinx build from within this directory to build the docs locally: 4 | 5 | ``` 6 | cd docs 7 | poetry run sphinx-build -b html source build/html 8 | ``` 9 | 10 | You can now check out the docs at `docs/build/html/index.html` 11 | -------------------------------------------------------------------------------- /tests/test_io/test_statsbomb.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from floodlight.io.statsbomb import read_open_event_data_json 4 | 5 | 6 | @pytest.mark.unit 7 | def test_statsbomb_read_events_path_not_exists(filepath_empty) -> None: 8 | 9 | with pytest.raises(FileNotFoundError): 10 | read_open_event_data_json( 11 | filepath_events=filepath_empty, filepath_match=filepath_empty 12 | ) 13 | -------------------------------------------------------------------------------- /.data/toy_dataset/events_home_ht1.csv: -------------------------------------------------------------------------------- 1 | ,eID,gameclock,outcome 2 | 0,KickoffWhistle,0.0, 3 | 1,Kickoff,0.0, 4 | 2,Pass,0.0,1 5 | 3,Dribbling,5.0,1 6 | 4,Pass,15.0,1 7 | 5,Pass,15.0,1 8 | 6,Dribbling,20.0,1 9 | 7,Pass,30.0,1 10 | 8,Pass,35.0,1 11 | 9,Shot,45.0,0 12 | 10,Corner,80.0,1 13 | 11,Header,85.0,1 14 | 12,Goal,90.0,1 15 | 13,ThrowIn,165.0,1 16 | 14,Pass,168.0,1 17 | 15,Dribbling,172.0,1 18 | 16,FinalWhistle,202.0,875 19 | -------------------------------------------------------------------------------- /docs/source/modules/core/definitions.rst: -------------------------------------------------------------------------------- 1 | .. _definitions target: 2 | 3 | =========================== 4 | floodlight.core.definitions 5 | =========================== 6 | 7 | The following source code defines essential and protected columns for usage in :doc:`Events ` and :doc:`Teamsheet ` objects. 8 | 9 | .. literalinclude:: ../../../../floodlight/core/definitions.py 10 | :language: python 11 | :lines: 6-115 12 | -------------------------------------------------------------------------------- /docs/source/modules/metrics/metrics.rst: -------------------------------------------------------------------------------- 1 | ================== 2 | floodlight.metrics 3 | ================== 4 | 5 | Collection of functions for calculations of performance metrics. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | entropy 12 | 13 | 14 | .. rubric:: Performance Metrics 15 | 16 | .. currentmodule:: floodlight.metrics 17 | .. autosummary:: 18 | :nosignatures: 19 | 20 | entropy.approx_entropy 21 | -------------------------------------------------------------------------------- /docs/source/modules/transforms/transforms.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | floodlight.transforms 3 | ===================== 4 | 5 | Collection of data transformation and processing functions. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | filter 12 | 13 | .. rubric:: Filter 14 | 15 | .. currentmodule:: floodlight.transforms.filter 16 | .. autosummary:: 17 | :nosignatures: 18 | 19 | butterworth_lowpass 20 | savgol_lowpass 21 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # Required 2 | version: 2 3 | 4 | # Set the version of Python and other tools you might need 5 | build: 6 | os: ubuntu-22.04 7 | tools: 8 | python: "3.12" 9 | 10 | # Build documentation in the docs/ directory with Sphinx 11 | sphinx: 12 | builder: html 13 | configuration: docs/source/conf.py 14 | fail_on_warning: true 15 | 16 | # Optionally declare the Python requirements required to build your docs 17 | python: 18 | install: 19 | - requirements: docs/requirements.txt 20 | - method: pip 21 | path: . 22 | 23 | formats: [] 24 | -------------------------------------------------------------------------------- /.github/workflows/linting.yaml: -------------------------------------------------------------------------------- 1 | name: linting 2 | 3 | on: 4 | push: 5 | branches: [main, develop] 6 | pull_request: 7 | branches: [] 8 | 9 | jobs: 10 | 11 | flake8-lint: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - name: Check out source repository 15 | uses: actions/checkout@v2 16 | 17 | - name: Set up Python 3.10 18 | uses: actions/setup-python@v1 19 | with: 20 | python-version: "3.10" 21 | 22 | - name: flake8 Lint 23 | uses: py-actions/flake8@v1 24 | with: 25 | ignore: "E203" 26 | max-line-length: "88" 27 | -------------------------------------------------------------------------------- /docs/source/modules/vis/vis.rst: -------------------------------------------------------------------------------- 1 | ============== 2 | floodlight.vis 3 | ============== 4 | 5 | A collection of plotting functions based on the `matplotlib `_ library. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | pitches 12 | positions 13 | utils 14 | 15 | 16 | .. rubric:: Pitches 17 | 18 | .. currentmodule:: floodlight.vis.pitches 19 | .. autosummary:: 20 | :nosignatures: 21 | 22 | plot_football_pitch 23 | plot_handball_pitch 24 | 25 | .. rubric:: Positions 26 | 27 | .. currentmodule:: floodlight.vis.positions 28 | .. autosummary:: 29 | :nosignatures: 30 | 31 | plot_positions 32 | plot_trajectories 33 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /tests/test_io/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from floodlight.io.utils import get_and_convert 4 | 5 | 6 | # Test get_and_convert function 7 | @pytest.mark.unit 8 | def test_get_and_convert() -> None: 9 | sample_dict = {"foo": "1"} 10 | 11 | # get 12 | assert get_and_convert(sample_dict, "foo", int) == 1 13 | # convert 14 | assert type(get_and_convert(sample_dict, "foo", int)) is int 15 | # missing entry 16 | assert get_and_convert(sample_dict, "bar", int) is None 17 | # fallback if failed type conversion 18 | assert get_and_convert(sample_dict, "foo", dict) == "1" 19 | # custom default with failed conversion 20 | assert get_and_convert(sample_dict, "bar", int, "default") == "default" 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-scroll--documentation--scroll-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ":scroll: Documentation :scroll:" 3 | about: For typos, obscurities or proposing additions 4 | title: "[DOCS] " 5 | labels: documentation 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Checklist** 11 | - [ ] I've updated to the latest version of floodlight 12 | 13 | 14 | **Purpose** 15 | - [ ] report typo(s) 16 | - [ ] found obscure or ambiguous documentation 17 | - [ ] propose additions I think are helpful 18 | - [ ] something else 19 | 20 | **Pages** 21 | Where did you find typos or obscurities, or where do you think a new addition should be placed? 22 | 23 | **Description** 24 | Please provide a description of your matter. 25 | 26 | **Additional context** 27 | Add any other context about the problem here. 28 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | default_stages: [commit] 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v4.6.0 7 | hooks: 8 | - id: trailing-whitespace 9 | - id: end-of-file-fixer 10 | - id: check-yaml 11 | - id: check-added-large-files 12 | - repo: https://github.com/psf/black 13 | rev: 24.8.0 14 | hooks: 15 | - id: black 16 | language_version: python3 17 | - repo: https://github.com/pycqa/flake8 18 | rev: 7.1.1 19 | hooks: 20 | - id: flake8 21 | args: [--max-line-length=88] 22 | - repo: https://github.com/commitizen-tools/commitizen 23 | rev: v3.29.0 24 | hooks: 25 | - id: commitizen 26 | stages: [commit-msg] 27 | -------------------------------------------------------------------------------- /docs/source/modules/io/datasets.rst: -------------------------------------------------------------------------------- 1 | ====================== 2 | floodlight.io.datasets 3 | ====================== 4 | 5 | .. note:: 6 | We cannot guarantee data availability for public data sets, unfortunately. Data from published articles (e.g. the EIGD) should be permanently available and stay static. Public provider data from StatsBomb is available on GitHub, but unversioned and with dynamically changing content. You can find methods to query the current list of games, and we also state the last date that we found the data to be available. 7 | 8 | As public data sets for proprietary sports data are fairly rare, the standard way of accessing data is still via provider raw data files. To load these, we have more than ten parser for different provider formats in the IO submodule! 9 | 10 | 11 | .. automodule:: floodlight.io.datasets 12 | :members: 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-sparkles--feature-request--sparkles-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ":sparkles: Feature request :sparkles:" 3 | about: Suggest or propose an idea for this project 4 | title: "[FEAT] " 5 | labels: enhancement 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Checklist** 11 | - [ ] I believe the feature fits the scope of the project 12 | 13 | **Is your feature request related to a problem? Please describe.** 14 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 15 | 16 | **Describe the solution you'd like** 17 | A clear and concise description of what you want to happen. 18 | 19 | **Describe alternatives you've considered** 20 | A clear and concise description of any alternative solutions or features you've considered. 21 | 22 | **Additional context** 23 | Add any other context or screenshots about the feature request here. 24 | -------------------------------------------------------------------------------- /tests/test_vis/test_utils.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import matplotlib 3 | matplotlib.use('agg') 4 | from matplotlib import pyplot as plt # noqa: 402 5 | 6 | from floodlight.vis.utils import check_axes_given # noqa: 402 7 | 8 | 9 | # Test check_axes_given(func 10 | @pytest.mark.plot 11 | def test_check_axes_given(): 12 | # Arrange 13 | @check_axes_given 14 | def some_function_that_requires_matplotlib_axes(ax: matplotlib.axes = None): 15 | if isinstance(ax, matplotlib.axes.Axes): 16 | return True 17 | else: 18 | return False 19 | 20 | # Act 21 | without_ax_given = some_function_that_requires_matplotlib_axes(ax=None) 22 | with_ax_given = some_function_that_requires_matplotlib_axes(ax=plt.subplots()[1]) 23 | 24 | # Assert 25 | assert without_ax_given 26 | assert with_ax_given 27 | 28 | plt.close() 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/-collision--bug-report--collision-.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ":collision: Bug report :collision:" 3 | about: Create a bug report to help us improve 4 | title: "[BUG] " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Checklist** 11 | - [ ] I've updated to the latest version of floodlight 12 | - [ ] I've checked if a similar issue exists 13 | 14 | **Describe the bug** 15 | A clear and concise description of what the bug is. 16 | 17 | **To Reproduce** 18 | Steps to reproduce the behavior. 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Platform (please complete the following information):** 27 | - OS: [e.g. Ubuntu] 28 | - Python Version [e.g. 3.8] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /docs/source/modules/core/core.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | floodlight.core 3 | =============== 4 | 5 | Collection of core data structures. Each class is designed to contain one type of sports data where each individual object contains data for *one* team and *one* time segment. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | xy 12 | events 13 | pitch 14 | code 15 | teamsheet 16 | property 17 | definitions 18 | 19 | 20 | .. rubric:: Data-level Core Objects 21 | 22 | .. currentmodule:: floodlight.core 23 | .. autosummary:: 24 | :nosignatures: 25 | 26 | xy.XY 27 | events.Events 28 | pitch.Pitch 29 | code.Code 30 | property.DyadicProperty 31 | property.PlayerProperty 32 | property.TeamProperty 33 | 34 | 35 | .. rubric:: Observation-level Core Objects 36 | 37 | .. currentmodule:: floodlight.core 38 | .. autosummary:: 39 | :nosignatures: 40 | 41 | teamsheet.Teamsheet 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 floodlight-sports 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /floodlight/vis/utils.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | 3 | import matplotlib.pyplot as plt 4 | 5 | 6 | def check_axes_given(func): 7 | """Decorator function that checks if a matplotlib.axes is given as an argument. 8 | Creates one if not. 9 | 10 | Parameters 11 | ---------- 12 | func: 13 | Function object that needs a matplotlib.axes as an argument. If ax == None 14 | an axes is created an passed to the given function object as a keyworded 15 | argument. 16 | 17 | Returns 18 | ------- 19 | func: 20 | Function with matplotlib.axes as additional argument if not specified. 21 | Otherwise the function is returned as it is. 22 | """ 23 | 24 | @wraps(func) 25 | def add_ax(*args, **kwargs): # actual wrapper function that gets args and kwargs 26 | # from the funtion that was passed 27 | # If matplotlib.axes is not given (ax == None) an axes is created. 28 | if not kwargs.get("ax"): 29 | kwargs.pop("ax") # Remove ax from kwargs 30 | ax = plt.subplots()[1] # Create matplotlib.axes 31 | return func(*args, ax=ax, **kwargs) # return function with axes 32 | 33 | # If matplotlib.axes is given nothing changes and the function is returned with 34 | # the given *args and **kwargs 35 | return func(*args, **kwargs) 36 | 37 | return add_ax 38 | -------------------------------------------------------------------------------- /tests/test_vis/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import matplotlib 4 | matplotlib.use('agg') 5 | import matplotlib.pyplot as plt # noqa: 402 6 | 7 | from floodlight.core.xy import XY # noqa: 402 8 | 9 | 10 | @pytest.fixture() 11 | def example_input_plot_football_pitch() -> []: 12 | ax = plt.subplots()[1] 13 | input = [(0, 105), (0, 68), 105, 68, "m", "standard", False, ax] 14 | 15 | return input 16 | 17 | 18 | @pytest.fixture() 19 | def example_input_plot_football_pitch_axis_ticks() -> []: 20 | ax = plt.subplots()[1] 21 | input = [(0, 105), (0, 68), 105, 68, "m", "standard", True, ax] 22 | 23 | return input 24 | 25 | 26 | @pytest.fixture() 27 | def example_input_plot_handball_pitch() -> []: 28 | ax = plt.subplots()[1] 29 | input = [(0, 40), (0, 20), "m", "standard", False, ax] 30 | 31 | return input 32 | 33 | 34 | @pytest.fixture() 35 | def example_input_plot_handball_pitch_axis_ticks() -> []: 36 | ax = plt.subplots()[1] 37 | input = [(0, 40), (0, 20), "m", "standard", True, ax] 38 | 39 | return input 40 | 41 | 42 | @pytest.fixture() 43 | def example_xy_object() -> XY: 44 | pos = np.array( 45 | [ 46 | [35, 5, 35, 63, 25, 25, 25, 50], 47 | [45, 10, 45, 55, 35, 20, 35, 45], 48 | [55, 10, 55, 55, 45, 20, 45, 45], 49 | [88.5, 20, 88.5, 30, 88.5, 40, 88.5, 50], 50 | ] 51 | ) 52 | 53 | return XY(pos) 54 | -------------------------------------------------------------------------------- /tests/test_core/test_definitions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from floodlight.core.definitions import essential_events_columns, protected_columns 4 | 5 | 6 | # Test for column specifications - tests only dictionary syntax, so that new added 7 | # columns dont' break existing methods 8 | @pytest.mark.unit 9 | def test_essential_events_columns() -> None: 10 | # Arrange 11 | for key in essential_events_columns: 12 | # Act + Assert 13 | column = essential_events_columns[key] 14 | assert isinstance(column["definition"], str) 15 | assert isinstance(column["dtypes"], list) 16 | 17 | if column["value_range"] is not None: 18 | assert isinstance(column["value_range"], list) 19 | assert len(column["value_range"]) == 2 20 | min_val, max_val = column["value_range"] 21 | assert min_val <= max_val 22 | 23 | 24 | @pytest.mark.unit 25 | def test_protected_columns() -> None: 26 | # Arrange 27 | for key in protected_columns: 28 | # Act + Assert 29 | column = protected_columns[key] 30 | assert isinstance(column["definition"], str) 31 | assert isinstance(column["dtypes"], list) 32 | 33 | if column["value_range"] is not None: 34 | assert isinstance(column["value_range"], list) 35 | assert len(column["value_range"]) == 2 36 | min_val, max_val = column["value_range"] 37 | assert min_val <= max_val 38 | -------------------------------------------------------------------------------- /floodlight/__init__.py: -------------------------------------------------------------------------------- 1 | from .core.code import Code 2 | from .core.events import Events 3 | from .core.pitch import Pitch 4 | from .core.xy import XY 5 | from .core.property import DyadicProperty, PlayerProperty, TeamProperty 6 | from .core.teamsheet import Teamsheet 7 | 8 | __all__ = [ 9 | "__version__", 10 | "__doc__", 11 | "Code", 12 | "DyadicProperty", 13 | "Events", 14 | "Pitch", 15 | "PlayerProperty", 16 | "TeamProperty", 17 | "Teamsheet", 18 | "XY", 19 | ] 20 | 21 | __version__ = "1.1.0" 22 | 23 | __doc__ = """ 24 | A high-level, data-driven sports analytics framework 25 | ==================================================== 26 | 27 | **floodlight** is a Python package for streamlined analysis of sports data. It is 28 | designed with a clear focus on scientific computing and built upon popular libraries 29 | such as *numpy* or *pandas*. 30 | 31 | Load, process, and model tracking and event data, codes and other match-related 32 | information from over ten major data providers or public datasets. This package provides 33 | a range of popular analyses and methods such as space control models, metabolic power, 34 | approximate entropy out of the box. Under the hood, the package comes with a set of 35 | standardized data objects to structure and handle sports data, together with a suite of 36 | common processing operations such as transforms or data manipulation methods. 37 | 38 | All implementations run completely provider- and sports-independent, while maintaining 39 | a maximum of flexibility to incorporate as many data flavours as possible. A high-level 40 | interface allows easy access to all standard routines, so that you can stop worrying 41 | about data wrangling and start focussing on the analysis instead! 42 | """ 43 | -------------------------------------------------------------------------------- /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | branches: [main, develop] 6 | pull_request: 7 | branches: [] 8 | 9 | jobs: 10 | 11 | build-and-test: 12 | runs-on: ${{ matrix.os }} 13 | 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | os: [ubuntu-latest, windows-latest] 18 | python-version: ['3.10', '3.11', '3.12'] 19 | poetry-version: [1.8.3] 20 | 21 | env: 22 | MPLBACKEND: Agg # https://github.com/microsoft/azure-pipelines-tasks/issues/16426 23 | 24 | steps: 25 | - name: Check out source repository 26 | uses: actions/checkout@v2 27 | 28 | - name: Set up Python ${{ matrix.python-version }} 29 | uses: actions/setup-python@v2 30 | with: 31 | python-version: ${{ matrix.python-version }} 32 | 33 | - name: Install poetry 34 | uses: abatilo/actions-poetry@v2.0.0 35 | with: 36 | poetry-version: ${{ matrix.poetry-version }} 37 | 38 | - name: Cache poetry virtualenv 39 | uses: actions/cache@v4 40 | with: 41 | path: ~/.virtualenvs 42 | key: poetry-${{ hashFiles('**/poetry.lock') }} 43 | restore-keys: | 44 | poetry-${{ hashFiles('**/poetry.lock') }} 45 | 46 | - name: Change poetry config to cached virtualenv 47 | run: | 48 | poetry config virtualenvs.in-project false 49 | poetry config virtualenvs.path ~/.virtualenvs 50 | 51 | - name: Install dependencies 52 | run: poetry install 53 | if: steps.cache.outputs.cache-hit != 'true' 54 | 55 | - name: Test with pytest 56 | run: | 57 | poetry run pytest --cov --cov-report xml . 58 | 59 | - name: Upload coverage reports to Codecov 60 | uses: codecov/codecov-action@v3 61 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | import sys 4 | 5 | # -- Path setup -------------------------------------------------------------- 6 | sys.path.insert(0, os.path.abspath("../..")) 7 | 8 | import floodlight # noqa: 402 9 | 10 | 11 | # -- Project information ----------------------------------------------------- 12 | project = "floodlight" 13 | version = floodlight.__version__ 14 | year = datetime.datetime.now().year 15 | author = "Dominik Raabe" 16 | copyright = f"{year}, {author}" 17 | 18 | release = f"v{version}" 19 | 20 | 21 | # -- General configuration --------------------------------------------------- 22 | extensions = [ 23 | "sphinx.ext.autodoc", 24 | "sphinx.ext.autosummary", 25 | "sphinx.ext.coverage", 26 | "sphinx.ext.napoleon", 27 | "sphinx.ext.viewcode", 28 | "sphinx_autodoc_typehints", 29 | ] 30 | 31 | napoleon_google_docstring = False 32 | napoleon_use_param = False 33 | napoleon_use_ivar = True 34 | 35 | # Add any paths that contain templates here, relative to this directory. 36 | templates_path = ["_templates"] 37 | 38 | # List of patterns, relative to source directory, that match files and 39 | # directories to ignore when looking for source files. 40 | # This pattern also affects html_static_path and html_extra_path. 41 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 42 | 43 | 44 | # -- Options for HTML output ------------------------------------------------- 45 | html_theme = "sphinx_rtd_theme" 46 | html_theme_options = { 47 | "display_version": True, 48 | "collapse_navigation": False, 49 | "navigation_depth": 2, 50 | "prev_next_buttons_location": None, 51 | "style_external_links": True, 52 | "style_nav_header_background": "#006666", 53 | } 54 | 55 | html_static_path = ["_static"] 56 | html_css_files = ["css/custom.css"] 57 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | **floodlight** Documentation 2 | ============================ 3 | 4 | **floodlight** is a Python package for streamlined analysis of sports data. It is 5 | designed with a clear focus on scientific computing and built upon popular libraries 6 | such as *numpy* or *pandas*. 7 | 8 | Load, integrate, and process tracking and event data, codes and other match-related 9 | information from major data providers. This package provides a set of standardized 10 | data objects to structure and handle sports data, together with a suite of common 11 | processing operations such as transforms or data manipulation methods. 12 | 13 | All implementations run completely provider- and sports-independent, while maintaining 14 | a maximum of flexibility to incorporate as many data flavours as possible. A high-level 15 | interface allows easy access to all standard routines, so that you can stop worrying 16 | about data wrangling and start focussing on the analysis instead! 17 | 18 | 19 | .. toctree:: 20 | :glob: 21 | :maxdepth: 1 22 | :caption: Package Guides 23 | 24 | Getting started 25 | Contributing 26 | Tutorial: Data Analysis 27 | Tutorial: Match Sheets 28 | 29 | 30 | .. toctree:: 31 | :glob: 32 | :maxdepth: 1 33 | :caption: Compendium 34 | 35 | Intro 36 | Data 37 | Design 38 | Time 39 | Space 40 | Identifier 41 | 42 | .. toctree:: 43 | :glob: 44 | :maxdepth: 1 45 | :caption: Module Reference 46 | 47 | modules/core/core 48 | modules/io/io 49 | modules/metrics/metrics 50 | modules/models/models 51 | modules/transforms/transforms 52 | modules/utils/utils 53 | modules/vis/vis 54 | 55 | Indices and tables 56 | ================== 57 | 58 | * :ref:`genindex` 59 | * :ref:`modindex` 60 | -------------------------------------------------------------------------------- /floodlight/io/utils.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import urllib.request 3 | from typing import AnyStr, Any 4 | 5 | 6 | def extract_zip(filepath: str, target: str, archive_type: str = "zip") -> None: 7 | """Extracts the content of an archive to disk. 8 | 9 | Parameters 10 | ---------- 11 | filepath : str 12 | Path to file. 13 | target : str 14 | Target to extract files to. 15 | archive_type: "zip", optional 16 | Type of archive, like zip, rar, gzip, etc. 17 | """ 18 | shutil.unpack_archive(filepath, target, format=archive_type) 19 | 20 | 21 | def download_from_url(path: str) -> AnyStr: 22 | """Downloads file from URL. 23 | 24 | Parameters 25 | ---------- 26 | path : str 27 | URL path to download data from 28 | 29 | Returns 30 | ------- 31 | data: AnyStr 32 | """ 33 | res = urllib.request.urlopen(path) 34 | return res.read() 35 | 36 | 37 | def get_and_convert(dic: dict, key: Any, value_type: type, default: Any = None) -> Any: 38 | """Performs dictionary get and type conversion simultaneously. 39 | 40 | Parameters 41 | ---------- 42 | dic: dict 43 | Dictionary to be queried. 44 | key: Any 45 | Key to be looked up. 46 | value_type: type 47 | Desired output type the value should be cast into. 48 | default: Any, optional 49 | Return value if key is not in dic, defaults to None. 50 | 51 | Returns 52 | ------- 53 | value: value_type 54 | Returns the value for key if key is in dic, else default. Tries type conversion 55 | to `type(value) = value_type`. If type conversion fails, e.g. by trying to force 56 | something like `float(None)` due to a missing dic entry, value is returned in 57 | its original data type. 58 | """ 59 | value = dic.get(key, default) 60 | try: 61 | value = value_type(value) 62 | except TypeError: 63 | pass 64 | except ValueError: 65 | pass 66 | 67 | return value 68 | -------------------------------------------------------------------------------- /docs/source/modules/models/models.rst: -------------------------------------------------------------------------------- 1 | ================= 2 | floodlight.models 3 | ================= 4 | 5 | Collection of data models grouped by category. Each submodule contains model classes that provide data analysis methods based on core objects. Inspired by the `scikit-learn API `_, each model class contains a ``.fit(...)``-method that 'fits' the model to the data. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | geometry 12 | kinematics 13 | kinetics 14 | space 15 | 16 | 17 | .. rubric:: All Available Models 18 | 19 | .. currentmodule:: floodlight.models 20 | .. autosummary:: 21 | :nosignatures: 22 | 23 | kinematics.DistanceModel 24 | kinematics.VelocityModel 25 | kinematics.AccelerationModel 26 | geometry.CentroidModel 27 | kinetics.MetabolicPowerModel 28 | space.DiscreteVoronoiModel 29 | 30 | 31 | For quick reference, the following computations are available after calling the respective model's ``.fit(...)``-method 32 | 33 | .. rubric:: Geometry 34 | 35 | .. currentmodule:: floodlight.models.geometry 36 | .. autosummary:: 37 | :nosignatures: 38 | 39 | CentroidModel.centroid 40 | CentroidModel.centroid_distance 41 | CentroidModel.stretch_index 42 | 43 | 44 | .. rubric:: Kinematics 45 | 46 | .. currentmodule:: floodlight.models.kinematics 47 | .. autosummary:: 48 | :nosignatures: 49 | 50 | DistanceModel.distance_covered 51 | DistanceModel.cumulative_distance_covered 52 | VelocityModel.velocity 53 | AccelerationModel.acceleration 54 | 55 | 56 | .. rubric:: Kinetics 57 | 58 | .. currentmodule:: floodlight.models.kinetics 59 | .. autosummary:: 60 | :nosignatures: 61 | 62 | MetabolicPowerModel.metabolic_power 63 | MetabolicPowerModel.cumulative_metabolic_power 64 | MetabolicPowerModel.equivalent_distance 65 | MetabolicPowerModel.cumulative_equivalent_distance 66 | 67 | .. rubric:: Space 68 | 69 | .. currentmodule:: floodlight.models.space 70 | .. autosummary:: 71 | :nosignatures: 72 | 73 | DiscreteVoronoiModel.player_controls 74 | DiscreteVoronoiModel.team_controls 75 | DiscreteVoronoiModel.plot 76 | DiscreteVoronoiModel.plot_mesh 77 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "floodlight" 3 | version = "1.1.0" 4 | description = "A high-level framework for sports data analysis" 5 | authors = ["draabe "] 6 | license = "MIT" 7 | readme = "README.md" 8 | homepage = "https://github.com/floodlight-sports/floodlight" 9 | repository = "https://github.com/floodlight-sports/floodlight" 10 | documentation = "https://floodlight.readthedocs.io/en/latest/" 11 | keywords = ["sports analytics", "sports data analysis", "tracking data"] 12 | classifiers = [ 13 | "Environment :: Console", 14 | "License :: OSI Approved :: MIT License", 15 | "Operating System :: Microsoft :: Windows", 16 | "Operating System :: Unix" 17 | 18 | ] 19 | include = [ 20 | "LICENSE", 21 | "CHANGELOG.md" 22 | ] 23 | 24 | 25 | [tool.poetry.dependencies] 26 | python = ">=3.10, <3.13" 27 | numpy = "^2.1.1" 28 | scipy = "^1.14.1" 29 | pandas = "^2.2.2" 30 | lxml = "^5.3.0" 31 | iso8601 = "^2.1.0" 32 | pytz = "^2024.1" 33 | h5py = "^3.11.0" 34 | matplotlib = "^3.9.2" 35 | 36 | [tool.poetry.group.dev.dependencies] 37 | pytest = "^8.3.2" 38 | black = "^24.8.0" 39 | flake8 = "^7.1.1" 40 | pre-commit = "^3.8.0" 41 | commitizen = "^3.29.0" 42 | coverage = "^7.6.1" 43 | pytest-cov = "^5.0.0" 44 | sphinx = "^7.0.0" 45 | sphinx-rtd-theme = "^2.0.0" 46 | sphinx-autodoc-typehints = "^2.3.0" 47 | auto-changelog = "^0.6.0" 48 | 49 | [tool.commitizen] 50 | name = "cz_conventional_commits" 51 | version = "0.0.1" 52 | tag_format = "$version" 53 | 54 | [build-system] 55 | requires = ["poetry-core>=1.0.0"] 56 | build-backend = "poetry.core.masonry.api" 57 | 58 | [tool.black] 59 | line-length = 88 60 | target-version = ['py38'] 61 | include = '\.pyi?$' 62 | exclude = ''' 63 | 64 | ( 65 | /( 66 | \.eggs # exclude a few common directories in the 67 | | \.git # root of the project 68 | | \.hg 69 | | \.mypy_cache 70 | | \.tox 71 | | \.venv 72 | | _build 73 | | buck-out 74 | | build 75 | | dist 76 | )/ 77 | | foo.py # also separately exclude a file named foo.py in 78 | # the root of the project 79 | ) 80 | ''' 81 | 82 | [tool.pytest.ini_options] 83 | markers = [ 84 | "unit: marks unit tests (deselect with '-m \"not unit\"')", 85 | "plot: marks tests creating visualizations (deselect with '-m \"not plot\"')" 86 | ] 87 | -------------------------------------------------------------------------------- /tests/test_vis/test_positions.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import matplotlib 3 | matplotlib.use('agg') 4 | import matplotlib.pyplot as plt # noqa: 402 5 | 6 | from floodlight.vis.positions import plot_positions, plot_trajectories # noqa: 402 7 | 8 | 9 | # Test plot_positions( xy, frame: int, ball: bool, ax: matplotlib.axes, **kwargs) 10 | @pytest.mark.plot 11 | def test_plot_positions_return_without_axes(example_xy_object): 12 | # Act 13 | ax = plot_positions(example_xy_object, frame=0, ball=False, ax=None) 14 | # Assert 15 | assert isinstance(ax, matplotlib.axes.Axes) 16 | plt.close() 17 | 18 | 19 | @pytest.mark.plot 20 | def test_plot_positions_return_with_axes(example_xy_object): 21 | # Arrange 22 | axes = plt.subplots()[1] 23 | # Act 24 | ax = plot_positions(example_xy_object, frame=0, ball=False, ax=axes) 25 | # Assert 26 | assert ax == axes 27 | plt.close() 28 | 29 | 30 | # Test plot_trajectories(xy, frame: int, ball: bool, ax: matplotlib.axes, **kwargs) 31 | @pytest.mark.plot 32 | def test_plot_trajectoriess_return_matplotlib_axes_without_ax(example_xy_object): 33 | # Act 34 | ax = plot_trajectories( 35 | example_xy_object, start_frame=0, end_frame=4, ball=False, ax=None 36 | ) 37 | # Assert 38 | assert isinstance(ax, matplotlib.axes.Axes) 39 | plt.close() 40 | 41 | 42 | @pytest.mark.plot 43 | def test_plot_trajectories_return_matplotlib_axes_with_ax(example_xy_object): 44 | # Arrange 45 | axes = plt.subplots()[1] 46 | # Act 47 | ax = plot_trajectories( 48 | example_xy_object, start_frame=0, end_frame=4, ball=False, ax=axes 49 | ) 50 | # Assert 51 | assert ax == axes 52 | plt.close() 53 | 54 | 55 | @pytest.mark.plot 56 | def test_plot_trajectories_default_color(example_xy_object): 57 | # Act 58 | plot_trajectories( 59 | example_xy_object, start_frame=0, end_frame=4, ball=False, ax=None 60 | ) 61 | # Assert 62 | for line in plt.gca().lines: 63 | assert line.get_color() == "black" 64 | plt.close() 65 | 66 | 67 | @pytest.mark.plot 68 | def test_plot_trajectories_default_color_ball_true(example_xy_object): 69 | # Act 70 | plot_trajectories(example_xy_object, start_frame=0, end_frame=4, ball=True, ax=None) 71 | # Assert 72 | for line in plt.gca().lines: 73 | assert line.get_color() == "grey" 74 | plt.close() 75 | -------------------------------------------------------------------------------- /tests/test_models/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Tuple 2 | 3 | import pytest 4 | import numpy as np 5 | 6 | from floodlight import Pitch, XY 7 | 8 | 9 | @pytest.fixture() 10 | def example_xy_object_kinetics() -> XY: 11 | xy = XY( 12 | xy=np.array( 13 | ( 14 | (37.586, 10.144, 32.343, 7.752), 15 | (37.694, 10.144, 32.318, 7.731), 16 | (37.803, 10.145, 32.285, 7.708), 17 | ) 18 | ), 19 | framerate=20, 20 | ) 21 | return xy 22 | 23 | 24 | # sample data for testing geometry models 25 | @pytest.fixture() 26 | def example_xy_object_geometry(): 27 | xy = XY(np.array(((1, 1, 2, -2, np.nan, -2), (1.5, np.nan, np.nan, -0, 1, 1)))) 28 | return xy 29 | 30 | 31 | # sample data for testing kinematic models 32 | @pytest.fixture() 33 | def example_xy_object_kinematics(): 34 | xy = XY( 35 | xy=np.array(((0, 0, -1, 1), (0, 1, np.nan, np.nan), (1, 2, 1, -1))), 36 | framerate=20, 37 | ) 38 | return xy 39 | 40 | 41 | @pytest.fixture() 42 | def example_equivalent_slope() -> np.ndarray: 43 | equivalent_slope = np.array(((0, 0.15), (-0.11, 0.2), (0.5, -0.5))) 44 | return equivalent_slope 45 | 46 | 47 | @pytest.fixture() 48 | def example_velocity() -> np.ndarray: 49 | velocity = np.array(((1, 0.1), (2.8, 5), (2.3, 2.3))) 50 | return velocity 51 | 52 | 53 | @pytest.fixture() 54 | def example_acceleration() -> np.ndarray: 55 | acceleration = np.array(((1.8, 4.9), (0.65, 1.1), (-0.5, -2.7))) 56 | return acceleration 57 | 58 | 59 | @pytest.fixture() 60 | def example_equivalent_mass() -> np.ndarray: 61 | equivalent_mass = np.array(((1, 1.011), (1.006, 1.02), (1.118, 1.118))) 62 | return equivalent_mass 63 | 64 | 65 | @pytest.fixture() 66 | def example_pitch_dfl(): 67 | pitch = Pitch.from_template("dfl", length=100, width=50, sport="football") 68 | return pitch 69 | 70 | 71 | @pytest.fixture() 72 | def example_xy_objects_space_control() -> Tuple[XY, XY]: 73 | xy1 = XY( 74 | xy=np.array( 75 | ( 76 | (-30, 0, 0, 0, 0, 10), 77 | (-31, 0, 0, 0, 1, 11), 78 | ) 79 | ), 80 | framerate=20, 81 | ) 82 | xy2 = XY( 83 | xy=np.array( 84 | ( 85 | (30, 0, 0, 0, 4, -10), 86 | (31, 0, np.nan, np.nan, 5, -11), 87 | ) 88 | ), 89 | framerate=20, 90 | ) 91 | 92 | return xy1, xy2 93 | -------------------------------------------------------------------------------- /floodlight/models/base.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from functools import wraps 3 | 4 | from floodlight import Pitch 5 | 6 | 7 | class BaseModel: 8 | """Base class for all models. 9 | 10 | Parameters 11 | ---------- 12 | pitch: Pitch, optional 13 | Some models require pitch information, so the corresponding Pitch object is 14 | handled during initialization. 15 | """ 16 | 17 | def __init__(self, pitch: Pitch = None): 18 | 19 | if pitch is not None: 20 | self.check_pitch(pitch) 21 | self._pitch = pitch 22 | 23 | def __str__(self): 24 | return f"Floodlight {self.__class__.__name__}" 25 | 26 | @property 27 | def is_fitted(self) -> bool: 28 | """Returns ``True`` if all model parameters (those with a trailing underscore) 29 | are fitted (i.e. not None), and ``False`` otherwise.""" 30 | fitted = all( 31 | [ 32 | vars(self)[v] is not None 33 | for v in vars(self) 34 | if (v.endswith("_") and not v.startswith("__")) 35 | ] 36 | ) 37 | 38 | return fitted 39 | 40 | @staticmethod 41 | def check_pitch(pitch: Pitch): 42 | """ 43 | Performs a series of checks on a Pitch object and raises warnings if the pitch 44 | configuration may affect computation results. 45 | 46 | Parameters 47 | ---------- 48 | pitch: Pitch 49 | Pitch object the checks are performed on. 50 | """ 51 | # check if metrical system 52 | if not pitch.is_metrical: 53 | warnings.warn( 54 | "Model initialized with non-metrical pitch. Results may be distorted, " 55 | "use at your own risk.", 56 | category=RuntimeWarning, 57 | ) 58 | 59 | 60 | def requires_fit(func): 61 | """Decorator function for Model-based class-methods that require a previous call to 62 | that model's fit()-method. Raises a ValueError if fit() has not been called yet.""" 63 | 64 | @wraps(func) 65 | def wrapper(*args, **kwargs): 66 | model = args[0] 67 | if not model.is_fitted: 68 | raise ValueError( 69 | f"Not all model parameters have been calculated yet. Try " 70 | f"running {model.__class__.__name__}.fit() before calling " 71 | f"this method" 72 | ) 73 | return func(*args, **kwargs) 74 | 75 | return wrapper 76 | -------------------------------------------------------------------------------- /docs/source/compendium/0_compendium.rst: -------------------------------------------------------------------------------- 1 | ===================== 2 | About this Compendium 3 | ===================== 4 | 5 | .. TIP:: 6 | If you would like to use this package without worrying about all the fiddly details, check out the guides and API references. However, if you want to take a deep dive into sports data analysis and the engine room of this package, look no further! 7 | 8 | 9 | Match and performance data from all kinds of sports has seen a remarkable upsurge over the past decade. Better technology and professionalization has led clubs across countries and leagues to collect massive amounts of data and monitor almost every performance related aspect within their organization. Regardless of your background - professional sports, science, industry, hobby developer or fan - sports data analysis is exciting! 10 | 11 | Unfortunately, sports data analysis also comes with a range of pitfalls. Proprietary data, dozens of different data providers (each having their own specific data format), a lack of definitions (what's a "pass" or "shot" anyway?), little (but increasing!) open source algorithms, a huge selection of proposed models with sometimes uncertain validity and so on and so forth. 12 | 13 | The *floodlight* package aims to provide a high-level framework that tries to help with some of these issues in team sports. In a nutshell, the basic idea is to create a set of streamlined data structures and standard functionality to make your life working with sports data a lot easier. While we're aiming at an intuitive and easy-to-use high-level interface, there's still plenty of complexity unfolding under the hood. Creating basic, flexible data structures that can hold almost any information about sports play - independent of the data provider - is a challenge that asks for a rather technical reflection of the matter. 14 | 15 | And that's where this compendium comes into play. The idea of this read is to provide you with an in-depth discussion of core concepts, challenges and caveats when working with team sports data experienced by the core development team over the past years. We additionally demonstrate how each of these issues has influenced design choices, how certain problems are tackled in our implementation and which conventions we've established in an attempt to tackle the complexity inherent to the task. We hope this document is of help if you want to know more about sports data in general or as an in-depth guide to the package if you want to start developing with us! 16 | 17 | Let's get started! 18 | -------------------------------------------------------------------------------- /tests/test_models/test_geometry.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from floodlight.models.geometry import CentroidModel 5 | 6 | 7 | # Test fit function of CentroidModel with different xIDs excluded 8 | @pytest.mark.unit 9 | def test_centroid_model_fit(example_xy_object_geometry) -> None: 10 | # Arrange 11 | xy = example_xy_object_geometry 12 | 13 | # Act 14 | model = CentroidModel() 15 | model.fit(xy) 16 | centroid1 = model._centroid_ 17 | model.fit(xy, exclude_xIDs=[0]) 18 | centroid2 = model._centroid_ 19 | model.fit(xy, exclude_xIDs=[0, 1]) 20 | centroid3 = model._centroid_ 21 | 22 | # Assert 23 | assert np.array_equal(centroid1, np.array(((1.5, -1), (1.25, 0.5)))) 24 | assert np.array_equal(centroid2, np.array(((2, -2), (1, 0.5)))) 25 | assert np.array_equal( 26 | centroid3, 27 | np.array(((np.nan, -2), (1, 1))), 28 | equal_nan=True, 29 | ) 30 | 31 | 32 | # Test centroid function of CentroidModel 33 | @pytest.mark.unit 34 | def test_centroid(example_xy_object_geometry) -> None: 35 | # Arrange 36 | xy = example_xy_object_geometry 37 | 38 | # Act 39 | model = CentroidModel() 40 | model.fit(xy) 41 | centroid = model.centroid() 42 | 43 | # Assert 44 | assert np.array_equal(centroid, np.array(((1.5, -1), (1.25, 0.5)))) 45 | 46 | 47 | # Test centroid_distance function of CentroidModel 48 | @pytest.mark.unit 49 | def test_centroid_distance(example_xy_object_geometry) -> None: 50 | # Arrange 51 | xy = example_xy_object_geometry 52 | 53 | # Act 54 | model = CentroidModel() 55 | model.fit(xy) 56 | distance = model.centroid_distance(xy) 57 | 58 | # Assert 59 | assert np.array_equal( 60 | np.round(distance, 3), 61 | np.array(((2.062, 1.118, np.nan), (np.nan, np.nan, 0.559))), 62 | equal_nan=True, 63 | ) 64 | 65 | 66 | # Test stretch_index function of CentroidModel 67 | @pytest.mark.unit 68 | def test_stretch_index(example_xy_object_geometry) -> None: 69 | # Arrange 70 | xy = example_xy_object_geometry 71 | xy.framerate = 20 72 | 73 | # Act 74 | model = CentroidModel() 75 | model.fit(xy) 76 | stretch_index1 = model.stretch_index(xy) 77 | stretch_index2 = model.stretch_index(xy, axis="x") 78 | stretch_index3 = model.stretch_index(xy, axis="y") 79 | 80 | # Assert 81 | assert np.array_equal(np.round(stretch_index1, 3), np.array((1.59, 0.559))) 82 | assert np.array_equal(np.round(stretch_index2, 3), np.array((0.5, 0.25))) 83 | assert np.array_equal(np.round(stretch_index3, 3), np.array((1.333, 0.5))) 84 | assert stretch_index1.framerate == 20 85 | -------------------------------------------------------------------------------- /tests/test_transforms/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from floodlight import XY 4 | 5 | 6 | @pytest.fixture() 7 | def example_sequence(): 8 | seq = np.array( 9 | [np.nan, np.nan, -5.07, -2.7, -3, np.nan, np.nan, 1.53, 27.13, None, 30.06] 10 | ) 11 | return seq 12 | 13 | 14 | @pytest.fixture() 15 | def example_sequence_empty(): 16 | seq = np.empty(()) 17 | return seq 18 | 19 | 20 | @pytest.fixture() 21 | def example_sequence_two_dimensional(): 22 | seq = np.array([[0, 1, 2], [3, 4, 5]]) 23 | return seq 24 | 25 | 26 | @pytest.fixture() 27 | def example_sequence_full(): 28 | seq = np.array([-5.07, -2.7, 1.53, 27.13, 30.06]) 29 | return seq 30 | 31 | 32 | @pytest.fixture() 33 | def example_sequence_nan(): 34 | seq = np.array([np.nan, np.nan, np.nan, np.nan, np.nan]) 35 | return seq 36 | 37 | 38 | @pytest.fixture() 39 | def example_xy_filter(): 40 | 41 | xy = XY( 42 | np.array( 43 | [ 44 | [np.nan, -8.66, np.nan, 1], 45 | [np.nan, -6.29, np.nan, 2], 46 | [-5.07, -4.31, np.nan, 3], 47 | [-2.7, -1.95, np.nan, 4], 48 | [np.nan, -0.13, np.nan, 5], 49 | [np.nan, 2.31, np.nan, 6], 50 | [1.53, 3.74, np.nan, 7], 51 | [5.13, 6.53, np.nan, 8], 52 | [7.02, 8.07, np.nan, 9], 53 | [9.48, 10.53, np.nan, 8], 54 | [10.09, np.nan, np.nan, 7], 55 | [12.31, np.nan, np.nan, 6], 56 | [13.22, np.nan, np.nan, 5], 57 | [14.88, 14.88, np.nan, 4], 58 | [16.23, 17.05, np.nan, 3], 59 | [17.06, 18.37, np.nan, 2], 60 | [18.56, 19.27, np.nan, 1], 61 | [20.32, 20.46, np.nan, 2], 62 | [21.7, 22.61, np.nan, 3], 63 | [23.11, 23.54, np.nan, 4], 64 | [24.23, 25.25, np.nan, 5], 65 | [25.74, 25.95, np.nan, 6], 66 | [27.13, 28.06, np.nan, 7], 67 | [None, 29.55, np.nan, 8], 68 | [30.06, np.nan, np.nan, 9], 69 | ] 70 | ), 71 | framerate=20, 72 | ) 73 | 74 | return xy 75 | 76 | 77 | @pytest.fixture() 78 | def example_xy_filter_short(): 79 | xy = XY(np.array([[23.11, 23.54, np.nan], [30.06, np.nan, np.nan]]), framerate=20) 80 | return xy 81 | 82 | 83 | @pytest.fixture() 84 | def example_xy_filter_one_frame(): 85 | xy = XY(np.array((0, 1, np.nan)), framerate=20) 86 | return xy 87 | 88 | 89 | @pytest.fixture() 90 | def example_xy_filter_empty(): 91 | xy = XY(np.array(()), framerate=20) 92 | return xy 93 | -------------------------------------------------------------------------------- /tests/test_models/test_base.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import pytest 4 | 5 | from floodlight import Pitch 6 | from floodlight.models.base import BaseModel, requires_fit 7 | 8 | 9 | # Test BaseModel dunder-methods 10 | @pytest.mark.unit 11 | def test_base_model_dunders() -> None: 12 | # Arrange 13 | pitch = Pitch((0, 40), (0, 20), unit="m", boundaries="fixed") 14 | model = BaseModel(pitch) 15 | 16 | # Assert 17 | assert hasattr(model, "_pitch") 18 | assert model.__str__() == "Floodlight BaseModel" 19 | 20 | 21 | # Test is_fitted @property 22 | @pytest.mark.unit 23 | def test_is_fitted_property() -> None: 24 | # Arrange 25 | model1 = BaseModel() 26 | model2 = BaseModel() 27 | model3 = BaseModel() 28 | 29 | # Act 30 | # Model 1 is fitted 31 | model1._model_parameter1_ = 0 32 | model1.model_parameter2_ = 1 33 | model1.__not_a_model_parameter__ = 2 34 | model1._not_a_model_parameter = 3 35 | model1.not_a_model_parameter = None 36 | # Model 2 is not fully fitted 37 | model2._model_parameter1_ = None 38 | model2.model_parameter2_ = 0 39 | model2.__not_a_model_parameter__ = 1 40 | model2._not_a_model_parameter = 2 41 | model2.not_a_model_parameter = None 42 | # Model 3 has no model parameters 43 | model3.__not_a_model_parameter__ = 1 44 | model3._not_a_model_parameter = 2 45 | model3.not_a_model_parameter = None 46 | 47 | # Assert 48 | assert model1.is_fitted 49 | assert not model2.is_fitted 50 | assert model3.is_fitted 51 | 52 | 53 | # Test check_pitch @staticmethod 54 | @pytest.mark.unit 55 | def test_check_pitch_staticmethod() -> None: 56 | # Arrange 57 | pitch1 = Pitch((0, 40), (0, 20), unit="m", boundaries="fixed") 58 | pitch2 = Pitch((0, 40), (0, 20), unit="percent", boundaries="fixed") 59 | 60 | # Assert 61 | # raise NO warning 62 | with warnings.catch_warnings(): 63 | warnings.simplefilter("error") 64 | BaseModel.check_pitch(pitch1) 65 | # raise warning 66 | with pytest.warns(RuntimeWarning): 67 | BaseModel.check_pitch(pitch2) 68 | 69 | 70 | # Test requires_fit decorator 71 | @pytest.mark.unit 72 | def test_requires_fit_decorator_function() -> None: 73 | # Arrange 74 | class MockModel(BaseModel): 75 | def __init__(self): 76 | super().__init__() 77 | self._fitted_parameter_ = None 78 | 79 | def fit(self, arg): 80 | self._fitted_parameter_ = arg 81 | 82 | @requires_fit 83 | def get_fitted_parameter(self): 84 | return self._fitted_parameter_ 85 | 86 | # Act 87 | model1 = MockModel() 88 | model2 = MockModel() 89 | arg = 1 90 | model2.fit(arg) 91 | 92 | # Assert 93 | with pytest.raises(ValueError): 94 | model1.get_fitted_parameter() 95 | assert model2.get_fitted_parameter() == arg 96 | -------------------------------------------------------------------------------- /tests/test_metrics/test_apen.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from floodlight.metrics.entropy import approx_entropy 5 | 6 | 7 | # Test approx_entropy function 8 | @pytest.mark.unit 9 | def test_approx_entropy_logistic_map(check_vals=[0, 0.23, 0.45], precision=0.1) -> None: 10 | """Check results for a logistic map according to Pincus (1991). 11 | 12 | References 13 | ---------- 14 | Pincus, S. M. (1991). Approximate entropy as a measure of system complexity. 15 | Proceedings of the National Academy of Sciences, 88(6), 2297-2301. 16 | 17 | Parameters 18 | ---------- 19 | check_vals: Values from Pincus (1991) 20 | precision: allowed deviation from test values. 21 | 22 | """ 23 | 24 | def logistic_map(N, R, x_0=np.random.uniform(0.0, 1.0)): 25 | """Generates a signal from a logistic map system. 26 | 27 | Pincus(1991) equation [12] 28 | x_{i+1} = R x_i (1 - x_i) 29 | 30 | """ 31 | x = np.zeros(N) 32 | x[0] = x_0 33 | for i in range(N - 1): 34 | x[i + 1] = R * x[i] * (1 - x[i]) 35 | return x 36 | 37 | # Using the values from Pincus (1991) 38 | R = [3.5, 3.6, 3.8] # Parameters for logistic map 39 | N = [300, 1000, 3000] # length of signal 40 | apens_logistic = np.zeros(len(R) * len(N)) 41 | counter = 0 42 | # iterate through combinations of R and N 43 | for r in R: 44 | log_map = logistic_map(np.max(N), r, x_0=0.5) 45 | for n in N: 46 | apens_logistic[counter] = approx_entropy(log_map[0:n], 2, 0.025) 47 | counter += 1 48 | apens_logistic.shape = (3, 3) 49 | 50 | assert np.all(np.abs(np.mean(apens_logistic, axis=1) - check_vals) < precision) 51 | 52 | 53 | @pytest.mark.unit 54 | def test_approx_entropy_series_5(precision=0.1, reps=100) -> None: 55 | """Check results according to Pincus et al. (1991). 56 | 57 | Pincus, S. M., Gladstone, I. M., & Ehrenkranz, R. A. (1991). A regularity statistic 58 | for medical data analysis. Journal of clinical monitoring, 7(4), 335-345. 59 | """ 60 | # signal consists of the consecutive pattern [1,2,3] 61 | signal = np.tile([1, 2, 1, 3], reps) 62 | # should equal zero 63 | assert np.abs(approx_entropy(signal, m=1, r=0.5) - 0.5 * np.log(2.0)) < precision 64 | # should equal zero 65 | assert np.abs(approx_entropy(signal, m=2, r=0.5)) < precision 66 | 67 | 68 | @pytest.mark.unit 69 | def test_approx_entropy_series_7(precision=0.01, N=400) -> None: 70 | """Check results according to Pincus et al. (1991).""" 71 | # signal consists of repeated [1,0] added with either 0.002 or -0.001 72 | # with selection probability p = 1/2. 73 | signal = np.random.choice([0.002, -0.001], N) + np.tile([1, 0], int(N / 2)) 74 | # should equal zero 75 | assert np.abs(approx_entropy(signal, m=2, r=0.5)) < precision 76 | -------------------------------------------------------------------------------- /docs/source/modules/io/io.rst: -------------------------------------------------------------------------------- 1 | ============= 2 | floodlight.io 3 | ============= 4 | 5 | Collection of file parsing functionalities for different data providers as well as loaders for public datasets. 6 | 7 | .. toctree:: 8 | :maxdepth: 1 9 | :caption: Submodule Reference 10 | 11 | datasets 12 | dfl 13 | kinexon 14 | opta 15 | secondspectrum 16 | skillcorner 17 | sportradar 18 | statsbomb 19 | statsperform 20 | tracab 21 | utils 22 | 23 | 24 | .. rubric:: Datasets 25 | 26 | .. currentmodule:: floodlight.io.datasets 27 | .. autosummary:: 28 | :nosignatures: 29 | 30 | EIGDDataset 31 | IDSSEDataset 32 | StatsBombOpenDataset 33 | ToyDataset 34 | 35 | .. rubric:: DFL 36 | 37 | .. currentmodule:: floodlight.io.dfl 38 | .. autosummary:: 39 | :nosignatures: 40 | 41 | read_position_data_xml 42 | read_event_data_xml 43 | read_pitch_from_mat_info_xml 44 | read_teamsheets_from_mat_info_xml 45 | 46 | .. rubric:: Kinexon 47 | 48 | .. currentmodule:: floodlight.io.kinexon 49 | .. autosummary:: 50 | :nosignatures: 51 | 52 | read_position_data_csv 53 | create_links_from_meta_data 54 | get_meta_data 55 | get_column_names_from_csv 56 | 57 | .. rubric:: Opta 58 | 59 | .. currentmodule:: floodlight.io.opta 60 | .. autosummary:: 61 | :nosignatures: 62 | 63 | read_event_data_xml 64 | get_opta_feedtype 65 | 66 | .. rubric:: Second Spectrum 67 | 68 | .. currentmodule:: floodlight.io.secondspectrum 69 | .. autosummary:: 70 | :nosignatures: 71 | 72 | read_position_data_jsonl 73 | read_event_data_jsonl 74 | read_teamsheets_from_meta_json 75 | 76 | .. rubric:: Skillcorner 77 | 78 | .. currentmodule:: floodlight.io.skillcorner 79 | .. autosummary:: 80 | :nosignatures: 81 | 82 | read_position_data_json 83 | 84 | .. rubric:: Sportradar 85 | 86 | .. currentmodule:: floodlight.io.sportradar 87 | .. autosummary:: 88 | :nosignatures: 89 | 90 | read_event_data_json 91 | 92 | .. rubric:: StatsBomb 93 | 94 | .. currentmodule:: floodlight.io.statsbomb 95 | .. autosummary:: 96 | :nosignatures: 97 | 98 | read_open_event_data_json 99 | read_teamsheets_from_open_event_data_json 100 | 101 | .. rubric:: StatsPerform 102 | 103 | .. currentmodule:: floodlight.io.statsperform 104 | .. autosummary:: 105 | :nosignatures: 106 | 107 | read_position_data_txt 108 | read_open_position_data_csv 109 | read_position_data_from_url 110 | read_event_data_xml 111 | read_open_event_data_csv 112 | read_event_data_from_url 113 | read_teamsheets_from_position_data_txt 114 | read_teamsheets_from_event_data_xml 115 | read_teamsheets_from_open_data_csv 116 | 117 | .. rubric:: Tracab (ChyronHego) 118 | 119 | .. currentmodule:: floodlight.io.tracab 120 | .. autosummary:: 121 | :nosignatures: 122 | 123 | read_position_data_dat 124 | read_teamsheets_from_dat 125 | read_teamsheets_from_meta_json 126 | -------------------------------------------------------------------------------- /tests/test_core/test_code.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | from floodlight.core.code import Code 5 | 6 | 7 | # Test dunder methods 8 | @pytest.mark.unit 9 | def test_dunder_methods(example_code: Code, example_code_int: Code) -> None: 10 | example_code[1] = "R" 11 | code_eq_R = [False, True, False, False, False, False, False, False, False, False] 12 | code_ne_R = [True, False, True, True, True, True, True, True, True, True] 13 | 14 | # __len__ 15 | assert len(example_code) == 10 16 | # __getitem__ and __setitem__ 17 | assert example_code[0] == "A" 18 | assert example_code[1] == "R" 19 | # __eq__ and __ne__ 20 | assert ((example_code == "R") == code_eq_R).all() 21 | assert ((example_code != "R") == code_ne_R).all() 22 | # __gt__, __lt__, __ge__, and __le__ 23 | assert ((example_code_int < 1) == [True, False, False, False]).all() 24 | assert ((example_code_int > 2) == [False, False, False, True]).all() 25 | assert ((example_code_int <= 1) == [True, True, False, False]).all() 26 | assert ((example_code_int >= 2) == [False, False, True, True]).all() 27 | 28 | 29 | # Test def token property 30 | @pytest.mark.unit 31 | def test_token(example_code: Code) -> None: 32 | # Act 33 | token = example_code.token 34 | 35 | # Assert 36 | assert token == ["A", "H"] 37 | 38 | 39 | # Test def find_sequences(return_type) method 40 | @pytest.mark.unit 41 | def test_find_sequences( 42 | example_code: Code, example_code_int: Code, example_code_empty 43 | ) -> None: 44 | # literal token 45 | assert example_code.find_sequences() == {"A": [(0, 5)], "H": [(5, 10)]} 46 | assert example_code.find_sequences(return_type="list") == [ 47 | (0, 5, "A"), 48 | (5, 10, "H"), 49 | ] 50 | # numeric token and single occurrences 51 | assert example_code_int.find_sequences() == { 52 | 0: [(0, 1)], 53 | 1: [(1, 2)], 54 | 2: [(2, 3)], 55 | 3: [(3, 4)], 56 | } 57 | assert example_code_int.find_sequences(return_type="list") == [ 58 | (0, 1, 0), 59 | (1, 2, 1), 60 | (2, 3, 2), 61 | (3, 4, 3), 62 | ] 63 | # empty code 64 | assert example_code_empty.find_sequences() == {} 65 | assert example_code_empty.find_sequences(return_type="list") == [] 66 | 67 | 68 | # Test def slice(startframe, endframe, inplace) method 69 | @pytest.mark.unit 70 | def test_slice(example_code: Code) -> None: 71 | # copy 72 | code = example_code 73 | code_deep_copy = code.slice() 74 | assert code is not code_deep_copy 75 | assert code.code is not code_deep_copy.code 76 | assert (code.code == code_deep_copy.code).all() 77 | 78 | # slicing 79 | code_short = code.slice(endframe=3) 80 | code_mid = code.slice(startframe=4, endframe=6) 81 | code_none = code.slice(startframe=1, endframe=1) 82 | assert (code_short.code == ["A", "A", "A"]).all() 83 | assert (code_mid.code == ["A", "H"]).all() 84 | assert code_none.code.size == 0 85 | assert code.slice() 86 | 87 | # inplace 88 | code.slice(startframe=8, inplace=True) 89 | assert (code.code == ["H", "H"]).all() 90 | -------------------------------------------------------------------------------- /tests/test_vis/test_pitches.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import matplotlib 4 | matplotlib.use('agg') 5 | import matplotlib.pyplot as plt # noqa: 402 6 | 7 | from floodlight.vis.pitches import plot_handball_pitch, plot_football_pitch # noqa: 402 8 | 9 | 10 | # Test def plot_*_pitch( 11 | # xlim: Tuple[Numeric, Numeric], 12 | # ylim: Tuple[Numeric, Numeric], 13 | # (length: Numeric), 14 | # (width: Numeric), 15 | # unit: str, 16 | # color_scheme: str, 17 | # show_axis_ticks: bool, 18 | # ax: matplotlib.axes, 19 | # **kwargs,) -> matplotib.axes 20 | # Test return 21 | # football 22 | 23 | 24 | @pytest.mark.plot 25 | def test_plot_football_pitch_return_matplotlib_axes( 26 | example_input_plot_football_pitch, 27 | ) -> None: 28 | # Act 29 | ax = plot_football_pitch(*example_input_plot_football_pitch) 30 | # Assert 31 | assert isinstance(ax, matplotlib.axes.Axes) 32 | plt.close() 33 | 34 | 35 | # handball 36 | @pytest.mark.plot 37 | def test_plot_handball_pitch_return_matplotlib_axes( 38 | example_input_plot_handball_pitch, 39 | ) -> None: 40 | # Act 41 | ax = plot_handball_pitch(*example_input_plot_handball_pitch) 42 | # Assert 43 | assert isinstance(ax, matplotlib.axes.Axes) 44 | plt.close() 45 | 46 | 47 | # Test ticks 48 | # football 49 | @pytest.mark.plot 50 | def test_plot_football_pitch_show_axis_ticks_default( 51 | example_input_plot_football_pitch, 52 | ) -> None: 53 | # Act 54 | ax = plot_football_pitch(*example_input_plot_football_pitch) 55 | # Assert 56 | assert ax.get_xticks() == [] 57 | assert ax.get_yticks() == [] 58 | plt.close() 59 | 60 | 61 | @pytest.mark.plot 62 | def test_plot_football_pitch_show_axis_ticks_True( 63 | example_input_plot_football_pitch_axis_ticks, 64 | ) -> None: 65 | # Act 66 | ax = plot_football_pitch(*example_input_plot_football_pitch_axis_ticks) 67 | # Assert 68 | assert np.array_equal( 69 | np.array(ax.get_xticks()), np.array([-20, 0, 20, 40, 60, 80, 100, 120]) 70 | ) 71 | assert np.array_equal( 72 | np.array(ax.get_yticks()), 73 | np.array([-10.0, 0.0, 10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0]), 74 | ) 75 | plt.close() 76 | 77 | 78 | # handball 79 | @pytest.mark.plot 80 | def test_plot_handball_pitch_show_axis_ticks_default( 81 | example_input_plot_handball_pitch, 82 | ) -> None: 83 | # Act 84 | ax = plot_handball_pitch(*example_input_plot_handball_pitch) 85 | # Assert 86 | assert ax.get_xticks() == [] 87 | assert ax.get_yticks() == [] 88 | plt.close() 89 | 90 | 91 | @pytest.mark.plot 92 | def test_plot_handball_pitch_show_axis_ticks_True( 93 | example_input_plot_handball_pitch_axis_ticks, 94 | ) -> None: 95 | # Act 96 | ax = plot_handball_pitch(*example_input_plot_handball_pitch_axis_ticks) 97 | # Assert 98 | assert np.array_equal( 99 | np.array(ax.get_xticks()), np.array([-5, 0, 5, 10, 15, 20, 25, 30, 35, 40, 45]) 100 | ) 101 | assert np.array_equal( 102 | np.array(ax.get_yticks()), 103 | np.array([-2.5, 0, 2.5, 5, 7.5, 10, 12.5, 15, 17.5, 20, 22.5]), 104 | ) 105 | plt.close() 106 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python,pycharm 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,pycharm 3 | 4 | 5 | ### Data that is not the Toy Dataset ### 6 | !.data/ 7 | .data/* 8 | !.data/toy_dataset/ 9 | !.data/toy_dataset/* 10 | 11 | 12 | ### PyCharm ### 13 | .idea/ 14 | 15 | 16 | ### Python ### 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | share/python-wheels/ 40 | *.egg-info/ 41 | .installed.cfg 42 | *.egg 43 | MANIFEST 44 | 45 | # PyInstaller 46 | # Usually these files are written by a python script from a template 47 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 48 | *.manifest 49 | *.spec 50 | 51 | # Installer logs 52 | pip-log.txt 53 | pip-delete-this-directory.txt 54 | 55 | # Unit test / coverage reports 56 | htmlcov/ 57 | .tox/ 58 | .nox/ 59 | .coverage 60 | .coverage.* 61 | .cache 62 | nosetests.xml 63 | coverage.xml 64 | *.cover 65 | *.py,cover 66 | .hypothesis/ 67 | .pytest_cache/ 68 | cover/ 69 | 70 | # Translations 71 | *.mo 72 | *.pot 73 | 74 | # Django stuff: 75 | *.log 76 | local_settings.py 77 | db.sqlite3 78 | db.sqlite3-journal 79 | 80 | # Flask stuff: 81 | instance/ 82 | .webassets-cache 83 | 84 | # Scrapy stuff: 85 | .scrapy 86 | 87 | # Sphinx documentation 88 | docs/_build/ 89 | 90 | # PyBuilder 91 | .pybuilder/ 92 | target/ 93 | 94 | # Jupyter Notebook 95 | .ipynb_checkpoints 96 | 97 | # IPython 98 | profile_default/ 99 | ipython_config.py 100 | 101 | # pyenv 102 | # For a library or package, you might want to ignore these files since the code is 103 | # intended to run in multiple environments; otherwise, check them in: 104 | .python-version 105 | 106 | # pipenv 107 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 108 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 109 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 110 | # install all needed dependencies. 111 | #Pipfile.lock 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # End of https://www.toptal.com/developers/gitignore/api/python,pycharm 157 | 158 | # Mac OS 159 | .DS_Store 160 | -------------------------------------------------------------------------------- /floodlight/metrics/entropy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.typing as npt 3 | 4 | 5 | def approx_entropy(sig: npt.NDArray, m: int = 2, r: float = 0.5) -> float: 6 | """Calculates the Approximate Entropy ApEn(m,r) of sig according to Pincus (1991). 7 | [1]_ 8 | 9 | Parameters 10 | ---------- 11 | sig: np.array 12 | A time-series as np.ndarray with a single dimension (sig.ndim == 1). 13 | m: int, optional 14 | Comparison length of runs. Typically, m in {2,3}. Defaults to 2. 15 | r: float, optional 16 | Filtering level. Defaults to 0.5. 17 | 18 | Returns 19 | ------- 20 | ApEn: float 21 | The Approximate Entropy of sig. 22 | 23 | Notes 24 | ----- 25 | Time-series must be taken at equally spaced time points. Lower bound according to 26 | Pincus, Gladstone, Ehrenkranz (1991) is 50 time points [2]_. The filtering level r 27 | should be at least three times larger in magnitude as the noise. 28 | 29 | Rule of thumb: 0.1-0.25 of data STD. 30 | 31 | References 32 | ---------- 33 | .. [1] `Pincus, S. M. (1991). Approximate entropy as a measure of system complexity. 34 | Proceedings of the National Academy of Sciences, 88(6), 2297-2301. 35 | `_ 36 | .. [2] `Pincus, S. M., Gladstone, I. M., & Ehrenkranz, R. A. (1991). A regularity 37 | statistic for medical data analysis. Journal of clinical monitoring, 7(4), 38 | 335-345. 39 | `_ 40 | """ 41 | 42 | # sanity checks 43 | if type(sig) is not np.ndarray: 44 | raise TypeError(f"sig should be Numpy.ndarray, got {type(sig)}.") 45 | if sig.ndim != 1: 46 | raise TypeError(f"sig should have only a single dimension, got {sig.ndim}") 47 | if np.any(np.isnan(sig)): 48 | raise ValueError("Signal cannot contain Numpy.NaNs.") 49 | 50 | N = len(sig) 51 | 52 | def phi_m(m_): 53 | """Small helper function which calculates the sample entropy. 54 | 55 | Parameters 56 | ---------- 57 | m: comparison length 58 | 59 | Returns 60 | ------- 61 | Phi: sample entropy 62 | """ 63 | no_parts = N - m_ + 1 64 | x_i_s = np.zeros((no_parts, m_)) 65 | # determine reference patterns for chosen segment lengths 66 | for i in range(no_parts): 67 | x_i_s[i, :] = sig[i : (i + m_)] 68 | # placeholder for to determine pattern regularity 69 | c_i_m_r_s = np.zeros(no_parts) 70 | # iterate through all comparisons 71 | for i in range(no_parts): 72 | # determine the maximum distance between current reference pattern 73 | # and the remaining patterns 74 | d_i_j = np.max(np.abs(x_i_s - x_i_s[i, :]), axis=1) 75 | # Sum maximum distances across reference patterns 76 | c_i_m_r_s[i] = np.sum(d_i_j <= r) 77 | # calculate entropy 78 | return np.sum(np.log(c_i_m_r_s)) / no_parts - np.log(no_parts) 79 | 80 | # calculates the approximate entropy as the difference 81 | # between the entropies with two different consecutive segment 82 | # lengths. 83 | ap = phi_m(m) - phi_m(m + 1) 84 | # clamp minimum ap value to zero. 85 | if ap < np.finfo("float64").eps: 86 | ap = 0.0 87 | return ap 88 | -------------------------------------------------------------------------------- /tests/test_core/test_teamsheet.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | 3 | import pytest 4 | import pandas as pd 5 | 6 | from floodlight.core.teamsheet import Teamsheet 7 | from floodlight.core.definitions import protected_columns 8 | 9 | 10 | @pytest.mark.unit 11 | def test_teamsheet_getter(example_teamsheet_data: pd.DataFrame) -> None: 12 | data = Teamsheet(example_teamsheet_data) 13 | pIDs = data["pID"] 14 | assert pd.Series.equals(pIDs, pd.Series([1, 2, 3])) 15 | 16 | 17 | @pytest.mark.unit 18 | def test_teamsheet_setter(example_teamsheet_data: pd.DataFrame) -> None: 19 | data = Teamsheet(example_teamsheet_data) 20 | data["pID"] = [2, 3, 4] 21 | assert pd.Series.equals(data["pID"], pd.Series([2, 3, 4])) 22 | 23 | 24 | @pytest.mark.unit 25 | def test_column_properties() -> None: 26 | # Arrange df with different columns 27 | columns = ["name", "pID"] 28 | df = pd.DataFrame(columns=columns) 29 | 30 | # trigger post_init 31 | with pytest.raises(ValueError): 32 | teamsheet = Teamsheet(teamsheet=df) 33 | 34 | # add missing column 35 | columns.append("player") 36 | df = pd.DataFrame(columns=columns) 37 | teamsheet = Teamsheet(teamsheet=df) 38 | 39 | # Assert column properties 40 | assert teamsheet.essential == ["player"] 41 | assert teamsheet.protected == ["pID"] 42 | assert teamsheet.custom == ["name"] 43 | assert teamsheet.essential_missing == [] 44 | assert len(teamsheet.protected_missing) > 3 45 | 46 | 47 | @pytest.mark.unit 48 | def test_protected_missing( 49 | example_teamsheet_data: pd.DataFrame, 50 | ) -> None: 51 | data = Teamsheet(example_teamsheet_data) 52 | missing_protected_columns = data.protected_missing 53 | total_num_protected = len(protected_columns) 54 | 55 | assert len(missing_protected_columns) == total_num_protected - 2 56 | 57 | 58 | @pytest.mark.unit 59 | def test_protected_invalid( 60 | example_teamsheet_data: pd.DataFrame, 61 | ) -> None: 62 | data = Teamsheet(example_teamsheet_data) 63 | data.teamsheet.at[1, "jID"] = -1 64 | invalid_protected_columns = data.protected_invalid 65 | assert invalid_protected_columns == ["jID"] 66 | 67 | 68 | @pytest.mark.unit 69 | def test_column_values_in_range( 70 | example_teamsheet_data: pd.DataFrame, 71 | ) -> None: 72 | # Arrange 73 | data = Teamsheet(example_teamsheet_data) 74 | jID_in_range = data.column_values_in_range("jID", protected_columns) 75 | 76 | assert jID_in_range 77 | 78 | 79 | @pytest.mark.unit 80 | def test_get_links(example_teamsheet_data) -> None: 81 | data = Teamsheet(example_teamsheet_data) 82 | 83 | # trigger checks 84 | with pytest.raises(ValueError): 85 | data.get_links("xID", "pID") 86 | with pytest.raises(ValueError): 87 | data.get_links("pID", "xID") 88 | with pytest.raises(ValueError): 89 | data.get_links("position", "pID") 90 | 91 | links = data.get_links("pID", "jID") 92 | assert links == {1: 1, 2: 13, 3: 99} 93 | 94 | 95 | @pytest.mark.unit 96 | def test_add_xIDs(example_teamsheet_data) -> None: 97 | data = Teamsheet(example_teamsheet_data) 98 | data.add_xIDs() 99 | assert all(data.teamsheet["xID"].values == [0, 1, 2]) 100 | 101 | 102 | @pytest.mark.unit 103 | def test_add_xIDs_overwrite(example_teamsheet_data) -> None: 104 | example_teamsheet_data["xID"] = [2, 0, 1] 105 | original_data = Teamsheet(example_teamsheet_data) 106 | new_data = Teamsheet(deepcopy(example_teamsheet_data)) 107 | new_data.add_xIDs() 108 | assert all(original_data.teamsheet["xID"].values == [2, 0, 1]) 109 | assert all(new_data.teamsheet["xID"].values == [0, 1, 2]) 110 | -------------------------------------------------------------------------------- /docs/source/compendium/5_identifier.rst: -------------------------------------------------------------------------------- 1 | ================================ 2 | Identifier - Linking Information 3 | ================================ 4 | 5 | We've previously highlighted the importance and necessity of linking information within and across observations. Within observations, data from the same players (such as positions, events or performance metrics) are often required to be linked across objects. This is especially important during parsing, where players are identified with their jersey number or an attached sensor ID rather than their name. Across observations, one might be interested in comparing player and team performances over the course of a season. 6 | 7 | Data providers have established their own systems to keep track of identities. These systems assign IDs on many things from players, referees or coaches to clubs, competitions or seasons. Yet, different providers solve this task with slightly different systems that differ mostly in terms of scope and ID formats. A challenge arises when processing files from two data providers that use two non-matching ID systems. 8 | 9 | In this package, we aim to fit these different systems under one hood by providing a minimal common ID system. Our approach consists of defining a set of ID categories that are reserved for ID purposes and follow a specific naming convention. All parsers within the package parse provider IDs into these categories, yet they only change the name, not the IDs. This way, you are free to use any database of IDs that you would like while knowing where to find certain information. We also offer a small linkage convention that helps connecting the multiple IDs. 10 | 11 | 12 | ID System 13 | ========= 14 | 15 | Below is a list of IDs and according names have a special meaning within our package. This is one of the (rare) cases we deliberately deviate from PEP8 naming conventions to keep variable names short and quickly identifiable (*pun intended*). You will also find these IDs throughout the code. 16 | 17 | pID 18 | player identifier - Unique number or string for player identification. 19 | 20 | jID 21 | jersey identifier - A players jersey number within a single observation. 22 | 23 | xID 24 | index identifier - A players index in the list of all players of a team for a given observation. Starts counting at **0**. This identifier is primarily used for locating players in :doc:`XY ` objects, but can also be helpful iterating over or displaying all players of a team. 25 | 26 | tID 27 | team identifier - Unique number or string for team identification. 28 | 29 | mID 30 | match identifier - Unique number or string for team identification. 31 | 32 | cID 33 | competition identifier - Unique number or string for competition (e.g. league or cup) identification. 34 | 35 | 36 | Links 37 | ===== 38 | 39 | To link one ID to another, we often use dictionaries that map the incoming to the outgoing ID. These are called ``links`` or, more detailed ``links_*ID_to_*ID``. The dictionary keys are the incoming, the dictionary values the outgoing IDs. For example, a dictionary linking jersey numbers to index numbers can look the following: 40 | 41 | .. code-block:: python 42 | 43 | links_jID_to_xID = { 44 | 1: 1, 45 | 15: 2, 46 | 2: 3, 47 | # ... 48 | } 49 | 50 | If at some point you need to query or store player information with a certain ID, but you only have another one, this dictionary does all the work: 51 | 52 | .. code-block:: python 53 | 54 | # Get the position data for player with shirt number 15 - this method requires and xID 55 | xy.player(links_jID_to_xID[15]) 56 | 57 | A final remark: links are not enforced by the code at any point. It's just a little internal convention that you will find throughout the source code. Some methods require or return links, and the method descriptions then give you more detailed information as to what they link. Feel free to use them! 58 | -------------------------------------------------------------------------------- /floodlight/core/property.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | 4 | import numpy as np 5 | 6 | 7 | @dataclass 8 | class BaseProperty: 9 | 10 | property: np.ndarray 11 | name: str 12 | framerate: int = None 13 | 14 | def __str__(self): 15 | return f"Floodlight {self.__class__.__name__} object encoding '{self.name}'" 16 | 17 | def __len__(self): 18 | return len(self.property) 19 | 20 | def __getitem__(self, key): 21 | return self.property[key] 22 | 23 | def __setitem__(self, key, value): 24 | self.property[key] = value 25 | 26 | @classmethod 27 | def _slice_new(cls, sliced_property, name, framerate): 28 | sliced_copy = cls( 29 | property=sliced_property, 30 | name=name, 31 | framerate=framerate, 32 | ) 33 | 34 | return sliced_copy 35 | 36 | def slice( 37 | self, startframe: int = None, endframe: int = None, inplace: bool = False 38 | ): 39 | """Return copy of object with sliced property. Mimics numpy's array slicing. 40 | 41 | Parameters 42 | ---------- 43 | startframe : int, optional 44 | Start of slice. Defaults to beginning of segment. 45 | endframe : int, optional 46 | End of slice (endframe is excluded). Defaults to end of segment. 47 | inplace: bool, optional 48 | If set to ``False`` (default), a new object is returned, otherwise the 49 | operation is performed in place on the called object. 50 | 51 | Returns 52 | ------- 53 | property_sliced: Union[cls, None] 54 | """ 55 | sliced_data = self.property[startframe:endframe].copy() 56 | sliced_object = None 57 | 58 | if inplace: 59 | self.property = sliced_data 60 | else: 61 | sliced_object = self._slice_new( 62 | sliced_property=sliced_data, 63 | name=deepcopy(self.name), 64 | framerate=deepcopy(self.framerate), 65 | ) 66 | 67 | return sliced_object 68 | 69 | 70 | @dataclass 71 | class TeamProperty(BaseProperty): 72 | """Fragment of one continuous team property. Core class of floodlight. 73 | 74 | Parameters 75 | ---------- 76 | property: np.ndarray 77 | A 1-dimensional array of properties of shape (T), where T is the number of 78 | total frames. 79 | name: str 80 | Name of the property (e.g. 'stretch_index'). 81 | framerate: int, optional 82 | Temporal resolution of data in frames per second/Hertz. 83 | """ 84 | 85 | 86 | @dataclass 87 | class PlayerProperty(BaseProperty): 88 | """Fragment of one continuous property per player. Core class of floodlight. 89 | 90 | Parameters 91 | ---------- 92 | property: np.ndarray 93 | A 2-dimensional array of properties of shape (T, N), where T is the number of 94 | total frames and N is the number of players. 95 | name: str 96 | Name of the property (e.g. 'speed'). 97 | framerate: int, optional 98 | Temporal resolution of data in frames per second/Hertz. 99 | """ 100 | 101 | 102 | @dataclass 103 | class DyadicProperty(BaseProperty): 104 | """Fragment of one continuous property per player dyad. Core class of floodlight. 105 | 106 | Parameters 107 | ---------- 108 | property: np.ndarray 109 | A 3-dimensional array of properties of shape (T, N_1, N_2), where T is the 110 | number of total frames and {N_1, N_2} are the number of players between dyads 111 | are formed. For example, the item at (1, 2, 3) encodes the relation from player 112 | with xID=2 to player with xID=3 at frame 1. Note that players could be in the 113 | same team (intra-team relations, in this case N_1 = N_2) or opposing teams 114 | (inter-team relations). 115 | name: str 116 | Name of the property (e.g. 'distance'). 117 | framerate: int, optional 118 | Temporal resolution of data in frames per second/Hertz. 119 | """ 120 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Contributing to floodlight 2 | ========================== 3 | 4 | Thank you for considering to help with this package! We warmly welcome all kinds of contributions that fit our scope, irrespective of size. 5 | 6 | Please also feel encouraged to open an issue (or PR) if you found a bug, have general feedback or if you are unsure whether your new feature would be a good addition. 7 | 8 | If all or some of this is new to you, and you would prefer a detailed step-by-step explanation of how to contribute, check out our extended contributing manual in the [documentation](https://floodlight.readthedocs.io). 9 | 10 | 11 | Development 12 | ----------- 13 | 14 | We use *poetry* for development and dependency management, which is based on the static `pyproject.toml` file and replaces `setup.py`. With *poetry*, setting up your local dev environment is rather straightforward: 15 | 16 | 1. Fork and Clone this repository 17 | 2. Install poetry if you have not done so yet 18 | 3. Use poetry to create a virtualenv with all necessary dependencies in the correct versions. To do so, simply run: 19 | 20 | ``` 21 | poetry install 22 | ``` 23 | 24 | Once you've finished your contribution, please send a Pull Request to merge either into `develop` (if you have an independent contribution) or a related feature branch. We would like to kindly encourage you to include appropriate unit tests and documentation into your contribution. 25 | 26 | 27 | Standards 28 | --------- 29 | 30 | We follow a few standards to ensure code quality: 31 | 32 | - [PEP8](https://www.python.org/dev/peps/pep-0008/) is a must, with only very few exceptions: 33 | - Line length can be up to 88 characters 34 | - Variable names can include uppercase literals for IDs 35 | - Please also keep the [Zen of Python](https://www.python.org/dev/peps/pep-0020/) in mind regarding code style. 36 | - We make extensive use of [typing](https://docs.python.org/3/library/typing.html), and we encourage you to include type hints as much as possible. 37 | - [Docstrings](https://www.python.org/dev/peps/pep-0257/) are formatted in [numpy-style](https://numpydoc.readthedocs.io/en/latest/format.html) (as in this [example](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_numpy.html)) to enable smooth creation of the API reference. 38 | - Commit messages should follow [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) 39 | 40 | 41 | Continuous Integration 42 | ---------------------- 43 | 44 | We use GitHub Actions for continuous integration, and every PR is checked with *flake8*, *black* and the entire test suite. 45 | 46 | To check locally if your PR will successfully pass all workflows, we set up a number of pre-commit hooks. To install them, run 47 | 48 | ``` 49 | poetry run pre-commit install 50 | poetry run pre-commit install --hook-type commit-msg 51 | ``` 52 | 53 | After installation, all hooks will be automatically called for each commit. You may also call the hooks without committing by running 54 | 55 | ``` 56 | poetry run pre-commit run --all-files 57 | ``` 58 | 59 | 60 | Testing 61 | ------- 62 | 63 | All tests and necessary mock data is stored in the `docs/` folder. We use *pytest* for testing, and you can run the entire test suite with 64 | 65 | ``` 66 | poetry run pytest 67 | ``` 68 | 69 | Note that tests are not included as a pre-commit hook to avoid long wait times. You should run them separately if you want to ensure that your contribution does not break any tests. 70 | 71 | We make use of fixtures for mock data, and also marks to group tests according to their purpose. For example, to run only unit tests (and avoid time-expensive integration tests), you can execute 72 | 73 | ``` 74 | poetry run pytest -m unit 75 | ``` 76 | 77 | 78 | Docs 79 | ---- 80 | 81 | All documentation is stored in the `docs/` folder and are based on the *sphinx* package. There is a dedicated `README.md` in this folder with instructions on how to build the docs. 82 | 83 | Vision 84 | ------ 85 | 86 | The list of possible features for this package is endless, so we aim to keep things together. In the beginning, our focus really is on: 87 | 88 | * parsing functionality 89 | * observation-level core data structures 90 | * core object methods and handling 91 | * design and interface optimization 92 | 93 | The next step (in the future) would be to include a set of plotting methods, and analyses from published articles. 94 | 95 | Please also make sure you've checked out design principles from the documentation to understand our perspective on this package. 96 | -------------------------------------------------------------------------------- /floodlight/core/definitions.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import datetime 3 | 4 | 5 | # fmt: off 6 | essential_events_columns = { 7 | "eID": { 8 | "definition": "Event ID - unique number or string that identifies the event " 9 | "type. The resulting system can either be provider specific, or " 10 | "customary. However, a link between eID and the event " 11 | "definitions/descriptions should be available", 12 | "dtypes": [str, int], 13 | "value_range": None, 14 | }, 15 | "gameclock": { 16 | "definition": "Elapsed time relative to segment start in seconds", 17 | "dtypes": [float], 18 | "value_range": [0, np.inf] 19 | } 20 | } 21 | 22 | essential_teamsheet_columns = { 23 | "player": { 24 | "definition": "Name of a player. May be abbreviated or contain aliases.", 25 | "dtypes": [str], 26 | "value_range": None, 27 | } 28 | } 29 | 30 | 31 | protected_columns = { 32 | "pID": { 33 | "definition": "Player ID - unique number or string for player identification ", 34 | "dtypes": [str, int], 35 | "value_range": None 36 | }, 37 | "jID": { 38 | "definition": "Jersey ID - a players jersey number in a single observation", 39 | "dtypes": [int], 40 | "value_range": [0, np.inf] 41 | }, 42 | "xID": { 43 | "definition": "Index ID - a players index in the list of all players of a team" 44 | "for a given match. This is primarily used for locating players " 45 | "data in XY objects, but can also be helpful iterating or " 46 | "displaying all players of a team", 47 | "dtypes": [int], 48 | "value_range": [0, np.inf] 49 | }, 50 | "tID": { 51 | "definition": "Team ID - unique number or string for team identification ", 52 | "dtypes": [str, int], 53 | "value_range": None 54 | }, 55 | "mID": { 56 | "definition": "Match ID - unique number or string for match identification ", 57 | "dtypes": [str, int], 58 | "value_range": None 59 | }, 60 | "cID": { 61 | "definition": "Competition ID - unique number or string for competition (e.g. " 62 | "league or cup) identification", 63 | "dtypes": [str, int], 64 | "value_range": None 65 | }, 66 | "frameclock": { 67 | "definition": "Elapsed time relative to segment start in frames given a certain" 68 | "framerate.", 69 | "dtypes": [int], 70 | "value_range": [0, np.inf] 71 | }, 72 | "timestamp": { 73 | "definition": "Datetime timestamp. Should be aware and carry a pytz timezone", 74 | "dtypes": [datetime.datetime], 75 | "value_range": None 76 | }, 77 | "minute": { 78 | "definition": "Minute of the segment when an event took place", 79 | "dtypes": [int], 80 | "value_range": [0, np.inf] 81 | }, 82 | "second": { 83 | "definition": "Second of the minute of the segment when an event took place", 84 | "dtypes": [int], 85 | "value_range": [0, np.inf] 86 | }, 87 | "outcome": { 88 | "definition": "Result of an event as included by many data providers. " 89 | "Positive/Successful is 1, Negative/Unsuccessful is 0", 90 | "dtypes": [int], 91 | "value_range": [0, 1], 92 | }, 93 | "at_x": { 94 | "definition": "The x position (longitudinal) where an event took place or " 95 | "originated from", 96 | "dtypes": [float], 97 | "value_range": None 98 | }, 99 | "at_y": { 100 | "definition": "The y position (lateral) where an event took place or " 101 | "originated from", 102 | "dtypes": [float], 103 | "value_range": None 104 | }, 105 | "to_x": { 106 | "definition": "The x position (longitudinal) where an event ended", 107 | "dtypes": [float], 108 | "value_range": None 109 | }, 110 | "to_y": { 111 | "definition": "The y position (lateral) where an event ended", 112 | "dtypes": [float], 113 | "value_range": None 114 | }, 115 | "started": { 116 | "definition": "Boolean indicating whether a player started the observation as" 117 | "an active player.", 118 | "dtypes": [bool], 119 | "value_range": None 120 | } 121 | } 122 | 123 | # fmt:on 124 | -------------------------------------------------------------------------------- /tests/test_io/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | 5 | # EIGD mock data (first 5 frames of first 8 players of team A 6 | # from match_name="48dcd3", segment="00-06-00" in original EIGD format (as in h5 files) 7 | @pytest.fixture() 8 | def eigd_sample_data_h5_shape() -> np.ndarray: 9 | data = np.array( 10 | [ 11 | [ 12 | [30.416, 15.463], 13 | [32.848, 10.999], 14 | [34.93, 2.903], 15 | [35.271, 16.948], 16 | [33.492, 6.2], 17 | [32.273, 9.547], 18 | [37.354, 9.504], 19 | [np.nan, np.nan], 20 | ], 21 | [ 22 | [30.416, 15.463], 23 | [32.848, 10.999], 24 | [34.93, 2.903], 25 | [35.271, 16.948], 26 | [33.492, 6.2], 27 | [32.273, 9.547], 28 | [37.354, 9.504], 29 | [np.nan, np.nan], 30 | ], 31 | [ 32 | [30.416, 15.463], 33 | [32.848, 10.999], 34 | [34.93, 2.903], 35 | [35.271, 16.948], 36 | [33.492, 6.2], 37 | [32.273, 9.547], 38 | [37.354, 9.504], 39 | [np.nan, np.nan], 40 | ], 41 | [ 42 | [30.416, 15.463], 43 | [32.848, 10.999], 44 | [34.93, 2.903], 45 | [35.271, 16.948], 46 | [33.492, 6.2], 47 | [32.273, 9.547], 48 | [37.354, 9.504], 49 | [np.nan, np.nan], 50 | ], 51 | [ 52 | [30.416, 15.463], 53 | [32.848, 10.999], 54 | [34.93, 2.903], 55 | [35.271, 16.948], 56 | [33.492, 6.2], 57 | [32.273, 9.547], 58 | [37.354, 9.504], 59 | [np.nan, np.nan], 60 | ], 61 | ] 62 | ) 63 | 64 | return data 65 | 66 | 67 | # EIGD mock data (first 5 frames of first 8 players of team A 68 | # from match_name="48dcd3", segment="00-06-00" in (transformed) floodlight format 69 | @pytest.fixture() 70 | def eigd_sample_data_floodlight_shape() -> np.ndarray: 71 | data = np.array( 72 | [ 73 | [ 74 | 30.416, 75 | 15.463, 76 | 32.848, 77 | 10.999, 78 | 34.93, 79 | 2.903, 80 | 35.271, 81 | 16.948, 82 | 33.492, 83 | 6.2, 84 | 32.273, 85 | 9.547, 86 | 37.354, 87 | 9.504, 88 | np.nan, 89 | np.nan, 90 | ], 91 | [ 92 | 30.416, 93 | 15.463, 94 | 32.848, 95 | 10.999, 96 | 34.93, 97 | 2.903, 98 | 35.271, 99 | 16.948, 100 | 33.492, 101 | 6.2, 102 | 32.273, 103 | 9.547, 104 | 37.354, 105 | 9.504, 106 | np.nan, 107 | np.nan, 108 | ], 109 | [ 110 | 30.416, 111 | 15.463, 112 | 32.848, 113 | 10.999, 114 | 34.93, 115 | 2.903, 116 | 35.271, 117 | 16.948, 118 | 33.492, 119 | 6.2, 120 | 32.273, 121 | 9.547, 122 | 37.354, 123 | 9.504, 124 | np.nan, 125 | np.nan, 126 | ], 127 | [ 128 | 30.416, 129 | 15.463, 130 | 32.848, 131 | 10.999, 132 | 34.93, 133 | 2.903, 134 | 35.271, 135 | 16.948, 136 | 33.492, 137 | 6.2, 138 | 32.273, 139 | 9.547, 140 | 37.354, 141 | 9.504, 142 | np.nan, 143 | np.nan, 144 | ], 145 | [ 146 | 30.416, 147 | 15.463, 148 | 32.848, 149 | 10.999, 150 | 34.93, 151 | 2.903, 152 | 35.271, 153 | 16.948, 154 | 33.492, 155 | 6.2, 156 | 32.273, 157 | 9.547, 158 | 37.354, 159 | 9.504, 160 | np.nan, 161 | np.nan, 162 | ], 163 | ] 164 | ) 165 | 166 | return data 167 | 168 | 169 | @pytest.fixture() 170 | def filepath_empty() -> str: 171 | path = ".data\\EMPTY" 172 | return path 173 | -------------------------------------------------------------------------------- /tests/test_models/test_kinematics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from floodlight.models.kinematics import DistanceModel, VelocityModel, AccelerationModel 5 | 6 | 7 | # Differences in the kinematic models can be calculated via central or backward 8 | # difference methode. This is specified in the respective models .fit()-method. 9 | # As this has no impact on the calculations in the other class methods, only the 10 | # .fit()-methods are tested with both difference methods. The other class methods are 11 | # tested with the default difference methode (i.e., 'central'). 12 | 13 | 14 | @pytest.mark.unit 15 | def test_distance_model_fit_difference_central(example_xy_object_kinematics) -> None: 16 | # Arrange 17 | xy = example_xy_object_kinematics 18 | 19 | # Act 20 | dist_model = DistanceModel() 21 | dist_model.fit(xy) 22 | distance_covered = dist_model._distance_euclidean_ 23 | 24 | # Assert 25 | assert np.array_equal( 26 | np.round(distance_covered, 3), 27 | np.array(((1, np.nan), (1.118, 1.414), (1.414, np.nan))), 28 | equal_nan=True, 29 | ) 30 | 31 | 32 | @pytest.mark.unit 33 | def test_distance_model_fit_difference_backward(example_xy_object_kinematics) -> None: 34 | # Arrange 35 | xy = example_xy_object_kinematics 36 | 37 | # Act 38 | dist_model = DistanceModel() 39 | dist_model.fit(xy, difference="backward") 40 | distance_covered = dist_model._distance_euclidean_ 41 | 42 | # Assert 43 | assert np.array_equal( 44 | np.round(distance_covered, 3), 45 | np.array(((0, 0), (1, np.nan), (1.414, np.nan))), 46 | equal_nan=True, 47 | ) 48 | 49 | 50 | @pytest.mark.unit 51 | def test_distance_covered(example_xy_object_kinematics) -> None: 52 | # Arrange 53 | xy = example_xy_object_kinematics 54 | 55 | # Act 56 | dist_model = DistanceModel() 57 | dist_model.fit(xy) 58 | distance_covered = dist_model.distance_covered() 59 | 60 | # Assert 61 | assert np.array_equal( 62 | np.round(distance_covered, 3), 63 | np.array(((1, np.nan), (1.118, 1.414), (1.414, np.nan))), 64 | equal_nan=True, 65 | ) 66 | 67 | 68 | @pytest.mark.unit 69 | def test_cumulative_distance_covered(example_xy_object_kinematics) -> None: 70 | # Arrange 71 | xy = example_xy_object_kinematics 72 | 73 | # Act 74 | dist_model = DistanceModel() 75 | dist_model.fit(xy) 76 | distance_covered = dist_model.cumulative_distance_covered() 77 | 78 | # Assert 79 | assert np.array_equal( 80 | np.round(distance_covered, 3), 81 | np.array(((1, 0), (2.118, 1.414), (3.532, 1.414))), 82 | equal_nan=True, 83 | ) 84 | 85 | 86 | @pytest.mark.unit 87 | def test_velocity_model_fit_difference_central(example_xy_object_kinematics) -> None: 88 | # Arrange 89 | xy = example_xy_object_kinematics 90 | 91 | # Act 92 | vel_model = VelocityModel() 93 | vel_model.fit(xy) 94 | velocity = vel_model._velocity_ 95 | 96 | # Assert 97 | assert np.array_equal( 98 | np.round(velocity, 3), 99 | np.array(((20, np.nan), (22.361, 28.284), (28.284, np.nan))), 100 | equal_nan=True, 101 | ) 102 | 103 | 104 | @pytest.mark.unit 105 | def test_velocity_model_fit_difference_backward(example_xy_object_kinematics) -> None: 106 | # Arrange 107 | xy = example_xy_object_kinematics 108 | 109 | # Act 110 | vel_model = VelocityModel() 111 | vel_model.fit(xy, difference="backward") 112 | velocity = vel_model._velocity_ 113 | 114 | # Assert 115 | assert np.array_equal( 116 | np.round(velocity, 3), 117 | np.array(((0, 0), (20, np.nan), (28.284, np.nan))), 118 | equal_nan=True, 119 | ) 120 | 121 | 122 | @pytest.mark.unit 123 | def test_velocity(example_xy_object_kinematics) -> None: 124 | # Arrange 125 | xy = example_xy_object_kinematics 126 | 127 | # Act 128 | vel_model = VelocityModel() 129 | vel_model.fit(xy) 130 | velocity = vel_model.velocity() 131 | 132 | # Assert 133 | assert np.array_equal( 134 | np.round(velocity, 3), 135 | np.array(((20, np.nan), (22.361, 28.284), (28.284, np.nan))), 136 | equal_nan=True, 137 | ) 138 | 139 | 140 | @pytest.mark.unit 141 | def test_acceleration_model_difference_central(example_xy_object_kinematics) -> None: 142 | # Arrange 143 | xy = example_xy_object_kinematics 144 | 145 | # Act 146 | acc_model = AccelerationModel() 147 | acc_model.fit(xy) 148 | acceleration = acc_model._acceleration_ 149 | 150 | # Assert 151 | assert np.array_equal( 152 | np.round(acceleration, 3), 153 | np.array(((47.214, np.nan), (82.843, np.nan), (118.472, np.nan))), 154 | equal_nan=True, 155 | ) 156 | 157 | 158 | @pytest.mark.unit 159 | def test_acceleration_model_difference_backward(example_xy_object_kinematics) -> None: 160 | # Arrange 161 | xy = example_xy_object_kinematics 162 | 163 | # Act 164 | acc_model = AccelerationModel() 165 | acc_model.fit(xy, difference="backward") 166 | acceleration = acc_model._acceleration_ 167 | 168 | # Assert 169 | assert np.array_equal( 170 | np.round(acceleration, 3), 171 | np.array(((0, 0), (400, np.nan), (165.685, np.nan))), 172 | equal_nan=True, 173 | ) 174 | 175 | 176 | @pytest.mark.unit 177 | def test_acceleration(example_xy_object_kinematics) -> None: 178 | # Arrange 179 | xy = example_xy_object_kinematics 180 | 181 | # Act 182 | acc_model = AccelerationModel() 183 | acc_model.fit(xy) 184 | acceleration = acc_model.acceleration() 185 | 186 | # Assert 187 | assert np.array_equal( 188 | np.round(acceleration, 3), 189 | np.array(((47.214, np.nan), (82.843, np.nan), (118.472, np.nan))), 190 | equal_nan=True, 191 | ) 192 | -------------------------------------------------------------------------------- /tests/test_io/test_datasets.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from floodlight.io.datasets import EIGDDataset 5 | from floodlight.io.datasets import StatsBombOpenDataset 6 | from floodlight import Events 7 | from floodlight.core.teamsheet import Teamsheet 8 | 9 | 10 | # Test _transform staticmethod from EIGDDataset 11 | @pytest.mark.unit 12 | def test_eigd_transform( 13 | eigd_sample_data_h5_shape, eigd_sample_data_floodlight_shape 14 | ) -> None: 15 | # transform data in raw format 16 | data_transformed = EIGDDataset._transform(eigd_sample_data_h5_shape) 17 | 18 | assert np.array_equal( 19 | data_transformed, eigd_sample_data_floodlight_shape, equal_nan=True 20 | ) 21 | 22 | 23 | # Test get method from StatsBombDataset 24 | @pytest.mark.unit 25 | def test_statsbomb_get() -> None: 26 | 27 | dataset = StatsBombOpenDataset() 28 | events, teamsheets = dataset.get( 29 | "Champions League", 30 | "2004/2005", 31 | "AC Milan vs. Liverpool", 32 | ) 33 | assert isinstance(events["HT1"]["Home"], Events) 34 | assert isinstance(events["HT4"]["Away"], Events) 35 | assert isinstance(teamsheets["Home"], Teamsheet) 36 | assert isinstance(teamsheets["Away"], Teamsheet) 37 | 38 | 39 | # Test get_teamsheet method from StatsBombDataset 40 | @pytest.mark.unit 41 | def test_statsbomb_get_teamsheet() -> None: 42 | 43 | dataset = StatsBombOpenDataset() 44 | teamsheets = dataset.get_teamsheets( 45 | "Champions League", 46 | "2004/2005", 47 | "AC Milan vs. Liverpool", 48 | ) 49 | assert isinstance(teamsheets["Home"], Teamsheet) 50 | assert isinstance(teamsheets["Away"], Teamsheet) 51 | assert teamsheets["Home"].teamsheet.at[0, "team_name"] == "AC Milan" 52 | assert teamsheets["Away"].teamsheet.at[0, "team_name"] == "Liverpool" 53 | assert len(teamsheets["Home"].teamsheet) == 14 54 | assert len(teamsheets["Away"].teamsheet) == 14 55 | assert len(teamsheets["Home"].teamsheet.columns) == 6 56 | assert len(teamsheets["Away"].teamsheet.columns) == 6 57 | 58 | 59 | # Test passing custom home_teamsheet to get method 60 | @pytest.mark.unit 61 | def test_statsbomb_get_pass_custom_home_teamsheet() -> None: 62 | 63 | # get teamsheets 64 | dataset = StatsBombOpenDataset() 65 | teamsheets = dataset.get_teamsheets( 66 | "Champions League", 67 | "2004/2005", 68 | "AC Milan vs. Liverpool", 69 | ) 70 | 71 | # customize home teamsheet 72 | teamsheets["Home"].teamsheet.at[0, "player"] = "Dida" # custom entry 73 | teamsheets["Home"].teamsheet.at[0, "pID"] = 999999 # custom entry 74 | teamsheets["Home"]["custom_col"] = 99 # custom column passed to function 75 | teamsheets["Away"]["my_col"] = 99 # custom column but not passed to function 76 | 77 | # call get function with custom teamsheet 78 | events, teamsheets = dataset.get( 79 | "Champions League", 80 | "2004/2005", 81 | "AC Milan vs. Liverpool", 82 | teamsheet_home=teamsheets["Home"], 83 | ) 84 | 85 | assert teamsheets["Home"].teamsheet.at[0, "player"] == "Dida" 86 | assert teamsheets["Home"].teamsheet.at[0, "pID"] == 999999 87 | assert "custom_col" in teamsheets["Home"].teamsheet.columns 88 | assert "my_col" not in teamsheets["Away"].teamsheet.columns 89 | 90 | 91 | # Test passing custom away_teamsheet to get method 92 | @pytest.mark.unit 93 | def test_statsbomb_get_pass_custom_away_teamsheet() -> None: 94 | # get teamsheets 95 | dataset = StatsBombOpenDataset() 96 | teamsheets = dataset.get_teamsheets( 97 | "Champions League", 98 | "2004/2005", 99 | "AC Milan vs. Liverpool", 100 | ) 101 | 102 | # customize home teamsheet 103 | teamsheets["Home"].teamsheet.at[0, "player"] = "Dida" # custom entry but not passed 104 | teamsheets["Home"].teamsheet.at[0, "pID"] = 999999 # custom entry but not passed 105 | teamsheets["Home"]["custom_col"] = 99 # custom column but not passed to function 106 | teamsheets["Away"]["my_col"] = 99 # custom column passed to function 107 | 108 | # call get function with custom teamsheet 109 | events, teamsheets = dataset.get( 110 | "Champions League", 111 | "2004/2005", 112 | "AC Milan vs. Liverpool", 113 | teamsheet_away=teamsheets["Away"], 114 | ) 115 | 116 | assert teamsheets["Home"].teamsheet.at[0, "player"] != "Dida" 117 | assert teamsheets["Home"].teamsheet.at[0, "pID"] != 999999 118 | assert "custom_col" not in teamsheets["Home"].teamsheet.columns 119 | assert "my_col" in teamsheets["Away"].teamsheet.columns 120 | 121 | 122 | # Test passing custom away_teamsheet to get method 123 | @pytest.mark.unit 124 | def test_statsbomb_get_pass_custom_teamsheets() -> None: 125 | # get teamsheets 126 | dataset = StatsBombOpenDataset() 127 | teamsheets = dataset.get_teamsheets( 128 | "Champions League", 129 | "2004/2005", 130 | "AC Milan vs. Liverpool", 131 | ) 132 | 133 | # customize home teamsheet 134 | teamsheets["Home"].teamsheet.at[0, "player"] = "Dida" # custom entry 135 | teamsheets["Home"].teamsheet.at[0, "pID"] = 999999 # custom entry 136 | teamsheets["Home"]["custom_col"] = 99 # custom column but not passed to function 137 | teamsheets["Away"]["my_col"] = 99 # custom column passed to function 138 | 139 | # call get function with custom teamsheet 140 | events, teamsheets = dataset.get( 141 | "Champions League", 142 | "2004/2005", 143 | "AC Milan vs. Liverpool", 144 | teamsheet_home=teamsheets["Home"], 145 | teamsheet_away=teamsheets["Away"], 146 | ) 147 | 148 | assert teamsheets["Home"].teamsheet.at[0, "player"] == "Dida" 149 | assert teamsheets["Home"].teamsheet.at[0, "pID"] == 999999 150 | assert "custom_col" in teamsheets["Home"].teamsheet.columns 151 | assert "my_col" in teamsheets["Away"].teamsheet.columns 152 | -------------------------------------------------------------------------------- /docs/source/compendium/2_design.rst: -------------------------------------------------------------------------------- 1 | ====== 2 | Design 3 | ====== 4 | 5 | 6 | At this point, let's summarize a few design principles that we decided build our package around. These are problem-specific and on top of any general software design principles. During implementation, we've naturally encountered questions on how to solve *this* or how to incorporate *that* special case. Often then, half a dozen possible solutions come to mind, none of which appears to be clearly right or wrong. If you've decided to contribute and find yourself in this situation, these principles hopefully come in handy. 7 | 8 | The scope of this package is another reason we explicitly formalized these principles. Designing a high-level framework bears the natural risk of mutating into a jack of all trades that does everything, but nothing really well. There are plenty of use cases our package might come in handy, and even more possible ways to extend it. Yet, each of this cases could require a different implementation focus, and possibly conflicting ones. To keep things from diverging, these principles should also roughly narrow down the perspective we aim to take when adding functionality. 9 | 10 | 1. **Data Analytics Focus** 11 | 12 | All data structures and functionality should be implemented with a clear focus on scientific data analytics. We attempt to optimize core objects and manipulation functions so that data processing becomes intuitive and easy. If a trade-off has to be made, we prioritize data handling over (external) API compatibility, database friendliness, or use-case specific requirements. Performance is also an issue we keep in mind. 13 | 14 | On an implementation level, we make extensive use of data analysis packages such as *numpy*, *pandas*, or *matplotlib*. We let the experts do what they're the best at, and gratefully use their great tools. Many of our core data objects are container classes wrapping ``ndarray``\s or ``DataFrame``\s. This way, we can add our own flavour and add needed functionaliy, while leveraging and interfacing their rich functionality such as views, indexing, and vectorization. 15 | 16 | 2. **Sports Independence** 17 | 18 | The basics of processing sports data apply to many sports across the board. Most spatial or temporal transforms, data manipulation techniques or performance metrics apply irrespectively of whether there are eleven players chasing a ball, or five a puck. We intend to provide essential data structures and functionality for all team invasion games, such as football, basketball, handball, hockey, and so on. 19 | 20 | We are aware that data analysis on football draws by far the major portion of attention. Submodules that solve a sport-specific problems are certainly welcome, but please double check whether your contribution can be further abstracted to apply to all team sports. If not, that's okay, but make sure to mark it as sport-specific! 21 | 22 | 3. **Provider Independence** 23 | 24 | Our :doc:`io <../modules/io/io>` submodule handles all necessary heavy lifting to parse provider data into core objects. They are intended to provide a clear interface to data loading, and anything that is *not* related to parsing should be implemented completely provider independent. 25 | 26 | Obviously, there is functionality that requires certain data flavours specific to individual providers. We welcome these additions, and have some included already. However, they should be based off core objects and enforce necessary conditions to match provider-specific requirements. 27 | 28 | 4. **Soft Constraints Enforcement** 29 | 30 | To fit as many data flavours and use cases as possible, we design all core objects with minimal requirements on the data. All the conventions we include are the ones we found to be absolutely essential to abstract the data and provide a common ground to operate on. However, this does not prevent us from introducing "soft conventions". Any constraints on data or functionality that exceeds the basics is included not as an absolute requirement, but rather as an prerequisite for certain functionality. 31 | 32 | For example, event-related information comes in various scopes and shapes, and is handled via columns within the :doc:`Events <../modules/core/events>` objects ``DataFrame``. You can put anything you like into these ``DataFrame``\s, and there is no limitation on column names. However, there are two *essential columns* ("eID" and "gameclock") that are the absolute minimum to describe and locate events (hard constraints), and are necessary for construction. 33 | 34 | To handle event properties beyond those two descriptors, we do not enforce any definitions or mappings. Instead, we provide a short list of *protected columns* that are unambiguous to standardize, such as "at_x" and "at_y" to include event locations. You may use the object as you like, but any method that requires this information then checks if the respective column names are available, and throws and error if not (soft constraints). 35 | 36 | 5. **Sensible Defaults** 37 | 38 | We aim to design any functionality as little opinionated as possible. There should be a general way to do things, with neutral outputs that do not take a subjective perspective. 39 | 40 | However, we are fully aware that sports data processing is full of choices. Many algorithms or standard procedures can be done this way or that way, which require selecting a default behavior. In these cases, we intend to choose a sensible default that is independent of any data flavours or personal preferences. In case multiple options are available, we inform the user about our choice and include handling processing alternatives via function parameters. 41 | 42 | We also try to not exclude any potential personal preferences or use cases by requirements that are to strict, or function calls that are narrower in their scope as their name and description promises. 43 | 44 | 6. **Intuitive High-Level Interfaces** 45 | 46 | Data processing is complex, and we aim to tackle this complexity with our approach. Thus, all low-level interfaces may bear a complexity that is familiar to users with a data science background. However, we also acknowledge that the package may be used by less experienced users. These users may not have the experience to establish all steps of a full processing pipelines all the way from raw data to final results. 47 | 48 | In this case, our framework might help set up an environment that takes away the worries of data handling and lets the user focus on analysis. To enable such an approach, all high-level functionality should have intuitive and easy to use interfaces. 49 | -------------------------------------------------------------------------------- /floodlight/core/code.py: -------------------------------------------------------------------------------- 1 | from copy import deepcopy 2 | from dataclasses import dataclass 3 | from typing import Any, Dict, List, Union 4 | 5 | import numpy as np 6 | 7 | 8 | @dataclass 9 | class Code: 10 | """Fragment of continuous signal encoding one game state. Core class of floodlight. 11 | 12 | Parameters 13 | ---------- 14 | code: np.ndarray 15 | One-dimensional array with codes describing a sequence of play. 16 | name: str 17 | Name of encoded game state (e.g. 'possession'). 18 | definitions: dict, optional 19 | Dictionary of the form {token: definition} where each code category is defined 20 | or explained. 21 | framerate: int, optional 22 | Temporal resolution of data in frames per second/Hertz. 23 | 24 | Attributes 25 | ---------- 26 | token: list 27 | A list of all tokens used in game code, in ascending order. 28 | 29 | """ 30 | 31 | code: np.ndarray 32 | name: str 33 | definitions: dict = None 34 | framerate: int = None 35 | 36 | def __str__(self): 37 | return f"Floodlight Code object encoding '{self.name}'" 38 | 39 | def __len__(self): 40 | return len(self.code) 41 | 42 | def __getitem__(self, key): 43 | return self.code[key] 44 | 45 | def __setitem__(self, key, value): 46 | self.code[key] = value 47 | 48 | def __eq__(self, other): 49 | return self.code == other 50 | 51 | def __ne__(self, other): 52 | return self.code != other 53 | 54 | def __gt__(self, other): 55 | return self.code > other 56 | 57 | def __lt__(self, other): 58 | return self.code < other 59 | 60 | def __ge__(self, other): 61 | return self.code >= other 62 | 63 | def __le__(self, other): 64 | return self.code <= other 65 | 66 | @property 67 | def token(self) -> list: 68 | """A list of all tokens used in game code, in ascending order.""" 69 | token = list(np.unique(self.code)) 70 | token.sort() 71 | 72 | return token 73 | 74 | def find_sequences( 75 | self, return_type: str = "dict" 76 | ) -> Union[Dict[Any, tuple], List[tuple]]: 77 | """Finds all sequences of consecutive appearances for each token and returns 78 | their start and end indices. 79 | 80 | Parameters 81 | ---------- 82 | return_type: {'dict', 'list'}, default='dict' 83 | Specifies type of the returned sequences object. 84 | 85 | 86 | Returns 87 | ------- 88 | sequences: Union[Dict[Any, tuple], List[tuple]] 89 | If ``return_type`` is 'dict', returns a dictionary of the form 90 | ``{token: [(sequence_start_idx, sequence_end_idx)]}``. 91 | If ``return_type`` is 'list', returns a list of the form 92 | ``[(sequence_start_idx, sequence_end_idx, token)]`` ordered by the 93 | respective sequence start indices. 94 | 95 | 96 | Examples 97 | -------- 98 | >>> import numpy as np 99 | >>> from floodlight import Code 100 | 101 | >>> code = Code(code=np.array([1, 1, 2, 1, 1, 3, 1, 1]), name="intensity") 102 | >>> code.find_sequences() 103 | {1: [(0, 2), (3, 5), (6, 8)], 2: [(2, 3)], 3: [(5, 6)]} 104 | 105 | >>> code = Code(code=np.array(['A', 'A', 'H', 'H', 'H', 'H', 'A', 'A', 'A']), 106 | ... name="possession") 107 | >>> code.find_sequences(return_type="list") 108 | [(0, 2, 'A'), (2, 6, 'H'), (6, 9, 'A')] 109 | """ 110 | if return_type not in ["dict", "list"]: 111 | raise ValueError( 112 | f"Expected return_type to be one of ['list', 'dict'], got {return_type}" 113 | ) 114 | 115 | # get all token for token-wise query 116 | all_token = self.token 117 | # get change points for each token 118 | # NOTE: as np.diff can't be called on non-numerical token, a token-wise approach 119 | # is necessary, where self == token produces a boolean array 120 | change_points = { 121 | token: np.where(np.diff(self == token, prepend=np.nan, append=np.nan))[0] 122 | for token in all_token 123 | } 124 | # determine token-wise ranges of respective sequence lengths 125 | ranges = {token: range(len(change_points[token]) - 1) for token in all_token} 126 | # create token-wise dictionary of sequence start- and end-points 127 | sequences = { 128 | token: [ 129 | ( 130 | change_points[token][i], 131 | change_points[token][i + 1], 132 | ) 133 | for i in ranges[token] 134 | if self[change_points[token][i]] == token 135 | ] 136 | for token in all_token 137 | } 138 | 139 | # combine all sequences in list and sort if desired 140 | if return_type == "list": 141 | sequences_list = [] 142 | for token in all_token: 143 | sequences_list.extend( 144 | [(sequence[0], sequence[1], token) for sequence in sequences[token]] 145 | ) 146 | sequences = sorted(sequences_list) 147 | 148 | return sequences 149 | 150 | def slice( 151 | self, startframe: int = None, endframe: int = None, inplace: bool = False 152 | ): 153 | """Return copy of object with sliced code. Mimics numpy's array slicing. 154 | 155 | Parameters 156 | ---------- 157 | startframe : int, optional 158 | Start of slice. Defaults to beginning of segment. 159 | endframe : int, optional 160 | End of slice (endframe is excluded). Defaults to end of segment. 161 | inplace: bool, optional 162 | If set to ``False`` (default), a new object is returned, otherwise the 163 | operation is performed in place on the called object. 164 | 165 | Returns 166 | ------- 167 | code_sliced: Union[Code, None] 168 | """ 169 | sliced_data = self.code[startframe:endframe].copy() 170 | code_sliced = None 171 | 172 | if inplace: 173 | self.code = sliced_data 174 | else: 175 | code_sliced = Code( 176 | code=sliced_data, 177 | name=deepcopy(self.name), 178 | definitions=deepcopy(self.definitions), 179 | framerate=deepcopy(self.framerate), 180 | ) 181 | 182 | return code_sliced 183 | -------------------------------------------------------------------------------- /tests/test_core/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from floodlight.core.code import Code 6 | from floodlight.core.pitch import Pitch 7 | from floodlight.core.xy import XY 8 | 9 | 10 | # Sample data for easy creation of core objects 11 | # XY 12 | @pytest.fixture() 13 | def example_xy_data_pos_int() -> np.ndarray: 14 | positions = np.array([[1, 2, 3, 4], [5, 6, 7, 8]]) 15 | return positions 16 | 17 | 18 | @pytest.fixture() 19 | def example_xy_data_none() -> np.ndarray: 20 | positions = np.array([[None, None, None, None], [None, None, None, None]]) 21 | return positions 22 | 23 | 24 | @pytest.fixture() 25 | def example_xy_data_negative() -> np.ndarray: 26 | positions = np.array([[-1, -2, -3, -4], [-5, -6, -7, -8]]) 27 | return positions 28 | 29 | 30 | @pytest.fixture() 31 | def example_xy_data_float() -> np.ndarray: 32 | positions = np.array( 33 | [ 34 | [1.0, 2.3333, 0.00000000000000001, 99999999999999999], 35 | [2.32843476297480273847, 6.0, 7.5, 8], 36 | ] 37 | ) 38 | return positions 39 | 40 | 41 | @pytest.fixture() 42 | def example_xy_data_float_with_nans() -> np.ndarray: 43 | positions = np.array( 44 | [ 45 | [1.0, np.nan, 0.00000000000000001, 99999999999999999], 46 | [2.32843476297480273847, 6, np.nan, np.nan], 47 | ] 48 | ) 49 | return positions 50 | 51 | 52 | @pytest.fixture() 53 | def example_xy_object() -> XY: 54 | pos = np.array( 55 | [ 56 | [35, 5, 35, 63, 25, 25, 25, 50], 57 | [45, 10, 45, 55, 35, 20, 35, 45], 58 | [55, 10, 55, 55, 45, 20, 45, 45], 59 | [88.5, 20, 88.5, 30, 88.5, 40, 88.5, 50], 60 | ] 61 | ) 62 | 63 | return XY(pos) 64 | 65 | 66 | @pytest.fixture() 67 | def example_xy_data_string() -> np.ndarray: 68 | positions = np.array([["1", "2", "3", "4"], ["5", "6", "7", "8"]]) 69 | return positions 70 | 71 | 72 | # Code 73 | @pytest.fixture() 74 | def example_code() -> Code: 75 | array = np.array(["A"] * 5 + ["H"] * 5) 76 | name = "possession" 77 | definitions = {"H": "Home", "A": "Away"} 78 | framerate = 10 79 | code = Code(code=array, name=name, definitions=definitions, framerate=framerate) 80 | 81 | return code 82 | 83 | 84 | @pytest.fixture() 85 | def example_code_int() -> Code: 86 | array = np.array([0, 1, 2, 3]) 87 | name = "intensity" 88 | definitions = {0: "None", 1: "Low", 2: "Medium", 3: "High"} 89 | framerate = 4 90 | code = Code(code=array, name=name, definitions=definitions, framerate=framerate) 91 | 92 | return code 93 | 94 | 95 | @pytest.fixture() 96 | def example_code_empty() -> Code: 97 | array = np.array([]) 98 | name = "empty" 99 | code = Code(code=array, name=name) 100 | 101 | return code 102 | 103 | 104 | # Events 105 | @pytest.fixture() 106 | def example_events_data_minimal() -> pd.DataFrame: 107 | data = { 108 | "eID": [1, 2], 109 | "gameclock": [1.1, 2.2], 110 | } 111 | return pd.DataFrame(data) 112 | 113 | 114 | @pytest.fixture() 115 | def example_events_data_minimal_none() -> pd.DataFrame: 116 | data = { 117 | "eID": [None, 2], 118 | "gameclock": [1.1, None], 119 | } 120 | return pd.DataFrame(data) 121 | 122 | 123 | @pytest.fixture() 124 | def example_events_data_minimal_missing_essential() -> pd.DataFrame: 125 | data = { 126 | "eID": [1, 2], 127 | "outcome": [0, 1], 128 | } 129 | return pd.DataFrame(data) 130 | 131 | 132 | @pytest.fixture() 133 | def example_events_data_minimal_invalid_essential() -> pd.DataFrame: 134 | data = { 135 | "eID": [1, 2], 136 | "gameclock": [-1.1, 2.2], 137 | } 138 | return pd.DataFrame(data) 139 | 140 | 141 | @pytest.fixture() 142 | def example_events_data_invalid_protected() -> pd.DataFrame: 143 | data = { 144 | "eID": [1, 2], 145 | "gameclock": [1.1, 2.2], 146 | "jID": [10, -11], 147 | } 148 | return pd.DataFrame(data) 149 | 150 | 151 | @pytest.fixture() 152 | def example_events_data_with_outcome_none() -> pd.DataFrame: 153 | data = { 154 | "eID": [1, 2, 2, 4, 1], 155 | "gameclock": [1.1412, 2.4122, 5.213, 11.214, 21.12552], 156 | "outcome": [0, 1, None, 0, None], 157 | } 158 | return pd.DataFrame(data) 159 | 160 | 161 | @pytest.fixture() 162 | def example_events_data_xy() -> pd.DataFrame: 163 | data = { 164 | "eID": [0, 0], 165 | "gameclock": [0.1, 0.2], 166 | "at_x": [1, 3], 167 | "at_y": [2, 4], 168 | } 169 | return pd.DataFrame(data) 170 | 171 | 172 | @pytest.fixture() 173 | def example_events_data_xy_none() -> pd.DataFrame: 174 | data = { 175 | "eID": [0, 0], 176 | "gameclock": [0.1, 0.2], 177 | "at_x": [np.nan, np.nan], 178 | "at_y": [np.nan, np.nan], 179 | } 180 | return pd.DataFrame(data) 181 | 182 | 183 | @pytest.fixture() 184 | def example_events_data_frameclock() -> pd.DataFrame: 185 | data = { 186 | "eID": [1, 2], 187 | "gameclock": [0.1, 0.2], 188 | "frameclock": [12.4, 16.7], 189 | } 190 | return pd.DataFrame(data) 191 | 192 | 193 | @pytest.fixture() 194 | def example_events_data_frameclock_none() -> pd.DataFrame: 195 | data = { 196 | "eID": ["1", "2"], 197 | "gameclock": [0.1, 0.2], 198 | "frameclock": [None, 16.7], 199 | } 200 | return pd.DataFrame(data) 201 | 202 | 203 | @pytest.fixture() 204 | def example_events_data_frameclock_unsorted() -> pd.DataFrame: 205 | data = { 206 | "eID": [1, 2, 3], 207 | "gameclock": [1.3, 0.1, 0.2], 208 | "frameclock": [21.6, 12.4, 16.7], 209 | } 210 | return pd.DataFrame(data) 211 | 212 | 213 | # Pitch 214 | @pytest.fixture() 215 | def example_pitch_football() -> Pitch: 216 | football_pitch = Pitch( 217 | xlim=(0, 105), ylim=(0, 68), unit="m", boundaries="fixed", sport="football" 218 | ) 219 | 220 | return football_pitch 221 | 222 | 223 | @pytest.fixture() 224 | def example_pitch_handball() -> Pitch: 225 | handball_pitch = Pitch( 226 | xlim=(0, 40), ylim=(0, 20), unit="m", boundaries="fixed", sport="handball" 227 | ) 228 | 229 | return handball_pitch 230 | 231 | 232 | # Teamsheet 233 | @pytest.fixture() 234 | def example_teamsheet_data() -> pd.DataFrame: 235 | 236 | data = pd.DataFrame( 237 | { 238 | "player": ["player 1", "player 2", "player 3"], 239 | "pID": [1, 2, 3], 240 | "jID": [1, 13, 99], 241 | "position": ["GK", "SUB", "SUB"], 242 | } 243 | ) 244 | 245 | return data 246 | -------------------------------------------------------------------------------- /tests/test_models/test_kinetics.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | 4 | from floodlight.models.kinetics import MetabolicPowerModel 5 | 6 | 7 | @pytest.mark.unit 8 | def test_calc_es(example_velocity, example_acceleration) -> None: 9 | # Arrange 10 | velocity = example_velocity 11 | acceleration = example_acceleration 12 | 13 | # Act 14 | equivalent_slope = MetabolicPowerModel._calc_es(velocity, acceleration) 15 | 16 | # Assert 17 | assert np.array_equal( 18 | np.round(equivalent_slope, 3), 19 | np.array(((0.184, 0.5), (0.069, 0.122), (-0.049, -0.273))), 20 | ) 21 | 22 | 23 | @pytest.mark.unit 24 | def test_calc_em(example_equivalent_slope) -> None: 25 | # Arrange 26 | equivalent_slope = example_equivalent_slope 27 | 28 | # Act 29 | equivalent_mass = MetabolicPowerModel._calc_em(equivalent_slope) 30 | 31 | # Assert 32 | assert np.array_equal( 33 | np.round(equivalent_mass, 3), 34 | np.array(((1, 1.011), (1.006, 1.02), (1.118, 1.118))), 35 | ) 36 | 37 | 38 | @pytest.mark.unit 39 | def test_calc_v_trans(example_equivalent_slope) -> None: 40 | # Arrange 41 | equivalent_slope = example_equivalent_slope 42 | 43 | # Act 44 | v_trans = MetabolicPowerModel._calc_v_trans(equivalent_slope) 45 | 46 | # Assert 47 | assert np.array_equal( 48 | np.round(v_trans, 3), np.array(((2.27, 1.704), (2.285, 1.434), (1.044, 9.717))) 49 | ) 50 | 51 | 52 | @pytest.mark.unit 53 | def test_is_running(example_velocity, example_equivalent_slope) -> None: 54 | # Arrange 55 | velocity = example_velocity 56 | equivalent_slope = example_equivalent_slope 57 | 58 | # Act 59 | is_running = MetabolicPowerModel._is_running(velocity, equivalent_slope) 60 | 61 | # Assert 62 | assert np.array_equal( 63 | is_running, np.array(((False, False), (True, True), (True, False))) 64 | ) 65 | 66 | 67 | @pytest.mark.unit 68 | def test_get_interpolation_matrix(example_equivalent_slope) -> None: 69 | # Arrange 70 | equivalent_slope = example_equivalent_slope 71 | 72 | # Act 73 | W = MetabolicPowerModel._get_interpolation_weight_matrix(equivalent_slope) 74 | 75 | # Assert 76 | assert np.array_equal( 77 | np.round(W, 3), 78 | np.array( 79 | ( 80 | ([0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0.5, 0.5, 0, 0]), 81 | ([0, 0.1, 0.9, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0]), 82 | ([0, 0, 0, 0, 0, 0, 0, 1], [1, 0, 0, 0, 0, 0, 0, 0]), 83 | ) 84 | ), 85 | ) 86 | 87 | 88 | @pytest.mark.unit 89 | def test_calc_ecw( 90 | example_equivalent_slope, example_velocity, example_equivalent_mass 91 | ) -> None: 92 | # Arrange 93 | equivalent_slope = example_equivalent_slope 94 | velocity = example_velocity 95 | equivalent_mass = example_equivalent_mass 96 | 97 | # Act 98 | ecw = MetabolicPowerModel._calc_ecw(equivalent_slope, velocity, equivalent_mass) 99 | 100 | # Assert 101 | assert np.array_equal( 102 | np.round(ecw, 3), np.array(((2.02, 8.962), (6.746, 1043.807), (992.779, 3.013))) 103 | ) 104 | 105 | 106 | @pytest.mark.unit 107 | def test_calc_ecr(example_equivalent_slope, example_equivalent_mass) -> None: 108 | # Arrange 109 | equivalent_slope = example_equivalent_slope 110 | equivalent_mass = example_equivalent_mass 111 | 112 | # Act 113 | ecr = MetabolicPowerModel._calc_ecr(equivalent_slope, equivalent_mass) 114 | 115 | # Assert 116 | assert np.array_equal( 117 | np.round(ecr, 3), np.array(((3.6, 7.988), (1.79, 9.708), (22.625, 4.668))) 118 | ) 119 | 120 | 121 | @pytest.mark.unit 122 | def test_calc_ecl( 123 | example_equivalent_slope, example_velocity, example_equivalent_mass 124 | ) -> None: 125 | # Arrange 126 | equivalent_slope = example_equivalent_slope 127 | velocity = example_velocity 128 | equivalent_mass = example_equivalent_mass 129 | 130 | # Act 131 | ecl = MetabolicPowerModel._calc_ecl(equivalent_slope, velocity, equivalent_mass) 132 | 133 | # Assert 134 | assert np.array_equal( 135 | np.round(ecl, 3), np.array(((2.02, 8.962), (1.79, 9.708), (22.625, 3.013))) 136 | ) 137 | 138 | 139 | @pytest.mark.unit 140 | def test_calc_metabolic_power( 141 | example_equivalent_slope, 142 | example_velocity, 143 | example_equivalent_mass, 144 | ) -> None: 145 | # Arrange 146 | equivalent_slope = example_equivalent_slope 147 | velocity = example_velocity 148 | equivalent_mass = example_equivalent_mass 149 | framerate = 20 150 | 151 | # Act 152 | metabolic_power = MetabolicPowerModel._calc_metabolic_power( 153 | equivalent_slope, velocity, equivalent_mass, framerate 154 | ) 155 | 156 | # Assert 157 | assert np.array_equal( 158 | np.round(metabolic_power, 3), 159 | np.array(((2.020, 0.896), (5.011, 48.540), (52.038, 6.931))), 160 | ) 161 | 162 | 163 | @pytest.mark.unit 164 | def test_metabolic_power(example_pitch_dfl, example_xy_object_kinetics) -> None: 165 | # Arrange 166 | xy = example_xy_object_kinetics 167 | 168 | # Act 169 | metp_model = MetabolicPowerModel() 170 | metp_model.fit(xy) 171 | metabolic_power = metp_model.metabolic_power() 172 | 173 | # Assert 174 | assert np.array_equal( 175 | np.round(metabolic_power, 3), 176 | np.array(((9.177, 4.452), (9.306, 4.988), (9.439, 5.570))), 177 | ) 178 | 179 | 180 | @pytest.mark.unit 181 | def test_cumulative_metabolic_power( 182 | example_pitch_dfl, example_xy_object_kinetics 183 | ) -> None: 184 | # Arrange 185 | xy = example_xy_object_kinetics 186 | 187 | # Act 188 | metp_model = MetabolicPowerModel() 189 | metp_model.fit(xy) 190 | cumulative_metabolic_power = metp_model.cumulative_metabolic_power() 191 | 192 | # Assert 193 | assert np.array_equal( 194 | np.round(cumulative_metabolic_power, 3), 195 | np.array(((0.459, 0.223), (0.924, 0.472), (1.396, 0.751))), 196 | ) 197 | 198 | 199 | @pytest.mark.unit 200 | def test_equivalent_distance(example_pitch_dfl, example_xy_object_kinetics) -> None: 201 | # Arrange 202 | xy = example_xy_object_kinetics 203 | 204 | # Act 205 | metp_model = MetabolicPowerModel() 206 | metp_model.fit(xy) 207 | equivalent_distance = metp_model.equivalent_distance() 208 | 209 | # Assert 210 | assert np.array_equal( 211 | np.round(equivalent_distance, 3), 212 | np.array(((2.549, 1.237), (2.585, 1.386), (2.622, 1.547))), 213 | ) 214 | 215 | 216 | @pytest.mark.unit 217 | def test_cumulative_equivalent_distance( 218 | example_pitch_dfl, example_xy_object_kinetics 219 | ) -> None: 220 | # Arrange 221 | xy = example_xy_object_kinetics 222 | 223 | # Act 224 | metp_model = MetabolicPowerModel() 225 | metp_model.fit(xy) 226 | cumulative_equivalent_distance = metp_model.cumulative_equivalent_distance() 227 | 228 | # Assert 229 | assert np.array_equal( 230 | np.round(cumulative_equivalent_distance, 3), 231 | np.array(((0.127, 0.062), (0.257, 0.131), (0.388, 0.208))), 232 | ) 233 | -------------------------------------------------------------------------------- /docs/source/compendium/3_time.rst: -------------------------------------------------------------------------------- 1 | ============================ 2 | Time - Framerates and Clocks 3 | ============================ 4 | 5 | Sports data naturally capture phenomena unfolding through time. That's not too big of a problem per se, but a few challenges arise managing the time dimension of the data. Plus, a lack of naming conventions leads to sometimes confusing references to parts of sports play (e.g. some call it half times, some periods). So we've decided to keep naming of things consistent. Let's go through this top-down: 6 | 7 | 8 | Observation-level 9 | ================= 10 | 11 | Given all the data for a single observation (such as a match or practice session), we need to deal with the different parts of this event (such as halftimes or different exercises within the session). We call these different parts *segments*. 12 | 13 | A segment can be any part of an observation, but it should be a distinct part of an observation in terms of its temporal dimension. Examples are half times (e.g. in football), thirds (e.g. in hockey), quarters (e.g. in basketball), drills (e.g. in a practice session), overtime (e.g. after draws in regular time), or runs (e.g. in an experiment). That really depends on the observation. 14 | 15 | The key advantage of this separation is an sports-independent handling of the different parts of an observation. Additionally, we leverage these segments to separate the portions of data. Each data-level object is a fragment of data for exactly one segment, and treated independent of the other segments. Only at the observation level are segments tied together. Here, they are identified by the ``segments`` attribute that lists all segments and which can be used for indexing. 16 | 17 | In terms of indexing, there are a few naming conventions we use throughout the code. As an example, for matches that are organized in half times, we use ``segments = ["HT1", "HT2"]``. This list is extended with ``["HT3", "HT4"]`` if we go into overtime. 18 | 19 | Data-level 20 | ========== 21 | 22 | Let's move on to the data-level handling of time. There's a major distinction to be made here between *frame-based* objects such as :doc:`XY <../modules/core/xy>` and :doc:`Code <../modules/core/code>` and *list-based* objects such as :doc:`Events <../modules/core/events>`. 23 | 24 | Frame-based objects represent consistent signals that run through an entire segment, such as player positions. Each frame refers to all data at one particular point in time. All these objects are based on *numpy*\'s ``ndarray``\s, where **the first dimenion always encodes the time dimension**\. 25 | 26 | List-based objects are collections of events that can happen at any time (and any place). They are based on *pandas*\' ``DataFrame``\s. 27 | 28 | Both categories are treated differently in the time dimension (see below), but there is one thing that unites them: **All time references are always relative to the respective segment** (with timestamps being the exception to the rule). 29 | 30 | 31 | List-based 32 | ---------- 33 | 34 | As events occur irregularly throughout a segment, each event needs to carry information as to when it took place. There are multiple ways to do it, for example by timestamps or the time on the scoreboard (e.g. '35:12'). Accordingly, you will find ``{"timestamp", "minute", "second"}`` in the list of protected columns. However, these are merely added for convenience, the single time-identifier we rely on is the ``gameclock``\, which measures the elapsed time since the segment started in seconds. 35 | 36 | The reason for this choice is that time on the scoreboard is problematic due to stoppage time. In some sports such as football, the referee deliberatly adds a couple of minutes to the playing time. Quite commonly, this is denoted as something like '45:00+01:32', but *it does not have to be*, sometimes it's just '46:32'. That's not an ideal format for implemenation, and we refuse to adapt the entire code to deal with this. Irrespective of first half extra time, the second half will start again at '45:00', leading to potentially ambiguous timestamps such as '46:32'. 37 | 38 | Timestamps are problematic due to their absolute nature. They're great for parsing, or link and sync data-level objects. But they're really not helpful for finding out when an event happend during the game unless you know the timestamp of the segment start. Unfortunately, that's not always the case, as some data providers do not provide timestamps. Plus, the vast majority of data manipulation algorithms rely on relative rather than absolute timings, so we wan't to avoid the hassle of computing timedeltas all over the place. 39 | 40 | .. TIP:: 41 | We use the built-in ``datetime`` objects to handle timestamps, but only if they are *aware*, i.e. timezone information is provided. Some providers use local time-zones when coding these, others always use UTC. So *unaware* timezones can quickly become a problem. For timezone handling, we use the *pytz* package. 42 | 43 | 44 | Frame-based 45 | ----------- 46 | 47 | Frame-based objects depend on a ``framerate``\, which is an attribute in the respective classes. The frame rate denotes the number of frames per second, and typically ranges from one up to a hundred for tracking data. It is important to know for every analysis that is time-sensitive. Each frame thus has a frame number, which can be used for indexing. 48 | 49 | We decided that the frame number suffices as the *only* reference for time in frame-based objects. There are no timestamps or other time information attached to single frames. Instead, the timestamp of a frame is *implicitly* encoded by its index position in the array. For example, tracking data stored in a :doc:`XY <../modules/core/xy>` object is encoded as a big array where rows are frames, and columns are player's *x*\- and *y*\-coordinates. 50 | 51 | .. TIP:: 52 | You can rely on (and need to take care of) frame-based objects spanning the entire segment at a given frame rate. If your segment is 10 seconds long, and you have a frame rate of 25, there should be a total of 250 frames. Missing data such as skipped frames during data acquisition or player's missing half the segment due to substitutions are instead replaced by *numpy*'s ``np.nan``\s. 53 | 54 | You might argue that this is not very Pythonic, but we found that it leads to much leaner objects and let's us use the full power of *numpy*, such as indexing or slicing! It's rather straightforward in this format to manipulate xy or code data at once, and fast when vectorizing manipulations over the whole time dimension. 55 | 56 | 57 | Handling 58 | ======== 59 | 60 | Of course we try to support any usage of whatever time-information-identification that you prefer. For many purposes, its easier to use the scoreboard clock (e.g. for printing out stuff) or timestamps (e.g. for linking stuff). Internally, however, we rely on the ``gameclock`` as much as possible. This is due to the robustness reasons given above. But the real deal is the case of joint manipulation of frame-based and list-based objects! 61 | 62 | Within a segment (e.g. relative to its start), list-based objects can be time-identified with the ``gameclock``, and frame-based objects with index positions (plus a framerate). To join the former with the latter, let's introduce another clock, the ``frameclock``\. Whereas the ``gameclock`` measures elapsed times in seconds, the ``frameclock`` measures elapsed time in frames for a given frame rate. So it's really just ``frameclock = int(gameclock * framerate)``\, but its helpful link to get all objects on the same page (or clock, if you like). 63 | -------------------------------------------------------------------------------- /floodlight/vis/positions.py: -------------------------------------------------------------------------------- 1 | import matplotlib 2 | 3 | from floodlight.vis.utils import check_axes_given 4 | 5 | 6 | @check_axes_given 7 | def plot_positions( 8 | xy, frame: int, ball: bool, ax: matplotlib.axes, **kwargs 9 | ) -> matplotlib.axes: 10 | """Scatter plots positions for a given frame of an XY object on a 11 | matplotlib.axes. 12 | 13 | Parameters 14 | ---------- 15 | xy: floodlight.core.xy.XY 16 | XY object containing spatiotemporal data to be plotted. 17 | frame: int 18 | Number of frame to be plotted. 19 | ball: bool 20 | Boolean indicating whether this object is storing ball data. If set to False 21 | marker="o", else marker=".". 22 | ax: matplotlib.axes 23 | Axes from matplotlib library on which the positions are plotted. 24 | kwargs: 25 | Optional keyworded arguments e.g. {'color', 'zorder', 'marker'} 26 | which can be used for the plot functions from matplotlib. The kwargs are 27 | only passed to the plot functions of matplotlib. 28 | 29 | Returns 30 | ------- 31 | axes: matplotib.axes 32 | Axes from matplotlib library on which the positions are plotted. 33 | 34 | Notes 35 | ----- 36 | The kwargs are only passed to the plot functions of matplotlib. To customize the 37 | plots have a look at 38 | `matplotlib 39 | `_. 40 | For example in order to modify the color of the points pass a color 41 | name or rgb-value (`matplotlib colors 42 | `_) to the keyworded 43 | argument 'color'. The same principle applies to other kwargs like 'zorder' and 44 | 'marker'. 45 | 46 | .. _positions-plot-label: 47 | 48 | Examples 49 | -------- 50 | >>> import matplotlib.pyplot as plt 51 | >>> import numpy as np 52 | >>> from floodlight.core.xy import XY 53 | >>> from floodlight.core.pitch import Pitch 54 | >>> from floodlight.vis.positions import plot_positions 55 | 56 | >>> # positions 57 | >>> pos = np.array( 58 | >>> [[35,5,35,63,25,25,25,50], 59 | >>> [45,10,45,55,35,20,35,45], 60 | >>> [55,10,55,55,45,20,45,45], 61 | >>> [88.5,20,88.5,30,88.5,40,88.5,50]]) 62 | >>> # create XY object 63 | >>> xy_pos = XY(pos) 64 | >>> # create Pitch object 65 | >>> football_pitch = Pitch(xlim=(0,105), ylim=(0, 68), unit="m", sport="football") 66 | >>> # create matplotlib.axes 67 | >>> ax = plt.subplots()[1] 68 | >>> # plot football pitch on ax 69 | >>> football_pitch.plot(ax=ax) 70 | >>> # plot positions on ax 71 | >>> plot_positions(xy=xy_pos, frame=0, ball=False, ax=ax) 72 | >>> plt.show() 73 | 74 | .. image:: ../../_img/positions_example.png 75 | 76 | """ 77 | 78 | # kwargs which are used to configure the plot with default values. 79 | # All other kwargs are just getting passed to the scatter() method. 80 | marker = kwargs.pop("marker", "o" if not ball else ".") 81 | color = kwargs.pop("color", "black" if not ball else "grey") 82 | zorder = kwargs.pop("zorder", 1) 83 | 84 | # plotting the positions 85 | # if ball is false 86 | if not ball: 87 | ax.scatter( 88 | x=xy.x[frame], 89 | y=xy.y[frame], 90 | marker=marker, 91 | color=color, 92 | zorder=zorder, 93 | **kwargs, 94 | ) 95 | # if ball is true 96 | elif ball: 97 | ax.scatter( 98 | x=xy.x[frame], 99 | y=xy.y[frame], 100 | marker=marker, 101 | color=color, 102 | zorder=zorder, 103 | **kwargs, 104 | ) 105 | 106 | return ax 107 | 108 | 109 | @check_axes_given 110 | def plot_trajectories( 111 | xy, 112 | start_frame: int, 113 | end_frame: int, 114 | ball: bool, 115 | ax: matplotlib.axes, 116 | **kwargs, 117 | ) -> matplotlib.axes: 118 | """Draws the trajectories of an XY object from a given time interval on a 119 | matplotlib.axes. 120 | 121 | Parameters 122 | ---------- 123 | xy: floodlight.core.xy.XY 124 | XY object containing spatiotemporal data to be plotted. 125 | start_frame: int 126 | Starting frame of time interval to be plotted. 127 | end_frame: int 128 | Closing frame of time interval to be plotted. 129 | ball: bool 130 | Boolean indicating whether this object is storing ball data. If set to False 131 | marker="o", else marker=".". 132 | ax: matplotlib.axes 133 | Axes from matplotlib library on which the trajectories are drawn. 134 | kwargs: 135 | Optional keyworded arguments e.g. {'linewidth', 'zorder', 'linestyle', 'alpha'} 136 | which can be used for the plot functions from matplotlib. The kwargs are only 137 | passed to all the plot functions of matplotlib. 138 | 139 | Returns 140 | ------- 141 | axes: matplotib.axes 142 | Axes from matplotlib library on which the trajectories are drawn. 143 | 144 | Notes 145 | ----- 146 | The kwargs are only passed to the plot functions of matplotlib. To customize the 147 | plots have a look at 148 | `matplotlib 149 | `_. 150 | For example in order to modify the color of the lines pass a color 151 | name or rgb-value (`matplotlib colors 152 | `_) to the keyworded 153 | argument 'color'. The same principle applies to other kwargs like 'zorder' and 154 | 'linestyle'. 155 | 156 | .. _trajectories-plot-label: 157 | 158 | Examples 159 | -------- 160 | >>> import matplotlib.pyplot as plt 161 | >>> import numpy as np 162 | >>> from floodlight.core.xy import XY 163 | >>> from floodlight.core.pitch import Pitch 164 | >>> from floodlight.vis.positions import plot_trajectories 165 | 166 | >>> # positions 167 | >>> pos = np.array( 168 | >>> [[35,5,35,63,25,25,25,50], 169 | >>> [45,10,45,55,35,20,35,45], 170 | >>> [55,10,55,55,45,20,45,45], 171 | >>> [88.5,20,88.5,30,88.5,40,88.5,50]]) 172 | >>> # create XY object 173 | >>> xy_pos = XY(pos) 174 | >>> # create matplotlib.axes 175 | >>> ax = plt.subplots()[1] 176 | >>> # create Pitch object 177 | >>> football_pitch = Pitch(xlim=(0,105), ylim=(0, 68), unit="m", sport="football") 178 | >>> # plot football pitch on ax 179 | >>> football_pitch.plot(ax=ax) 180 | >>> # plot positions on ax 181 | >>> plot_trajectories(xy=xy_pos, start_frame=0, end_frame=4, ball=False, ax=ax) 182 | >>> plt.show() 183 | 184 | .. image:: ../../_img/trajectories_example.png 185 | 186 | """ 187 | 188 | # kwargs which are used to configure the plot with default values. 189 | # All other kwargs are just getting passed to the plot() method. 190 | color = kwargs.pop("color", "black" if not ball else "grey") 191 | zorder = kwargs.pop("zorder", 1) 192 | # ball trajectories are thinner per default 193 | linewidth = kwargs.pop("linewidth", 1 if not ball else 0.5) 194 | 195 | # iterating over every object (for instance players) in the XY.xy array and plot the 196 | # trajectories for the given range of frames 197 | for i in range(0, xy.N): 198 | x = xy.x[start_frame:end_frame, i] 199 | y = xy.y[start_frame:end_frame, i] 200 | ax.plot(x, y, color=color, zorder=zorder, linewidth=linewidth, **kwargs) 201 | 202 | return ax 203 | -------------------------------------------------------------------------------- /docs/source/guides/getting_started.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Getting Started 3 | =============== 4 | 5 | Here's everything you need to know to quickly get set up and start using floodlight! 6 | 7 | 8 | Installation 9 | ============ 10 | 11 | The package can be installed via pip 12 | 13 | .. code-block:: bash 14 | 15 | pip install floodlight 16 | 17 | and then imported into your local Python environment 18 | 19 | .. code-block:: python 20 | 21 | import floodlight 22 | 23 | 24 | Loading Data 25 | ============ 26 | 27 | As this is a data analysis package, the obvious first step is to get some data. 28 | 29 | Provider Data 30 | ------------- 31 | 32 | If you have data files saved in a specific provider format, see if there is matching parser in the :doc:`io <../modules/io/io>` module. Parsing might work slightly different depending on the specific provider file types. But in essence, there is one submodule per supported provider, and one function per file type. In cases where parsing requires multiple files (e.g. when you have a data containing position data and an attached metadata file), there is one function that does so. 33 | 34 | Let's look at a quick example loading Tracab position and Opta event data: 35 | 36 | .. code-block:: python 37 | 38 | from floodlight.io.tracab import read_position_data_dat 39 | from floodlight.io.opta import read_event_data_xml 40 | 41 | filepath_dat = 42 | filepath_meta = 43 | filepath_f24 = 44 | 45 | ( 46 | xy_objects, 47 | possession_objects, 48 | ballstatus_objects, 49 | teamsheets, 50 | pitch_xy 51 | ) = read_position_data_dat(filepath_dat, filepath_meta) 52 | 53 | events_objects, pitch_events = read_event_data_xml(filepath_f24) 54 | 55 | 56 | The data returned by both parsers is stored in nested dictionaries because the number of match segments can differ (due to overtime). We can unpack some of them to be more explicit: 57 | 58 | .. code-block:: python 59 | 60 | xy_home_ht1 = xy_objects["HT1"]["Home"] 61 | xy_away_ht1 = xy_objects["HT1"]["Away"] 62 | possession_ht1 = possession_objects["HT1"] 63 | ballstatus_ht1 = ballstatus_objects["HT1"] 64 | 65 | events_home_ht1 = events_objects["HT1"]["Home"] 66 | events_away_ht1 = events_objects["HT1"]["Away"] 67 | 68 | 69 | Sample Data 70 | ----------- 71 | 72 | An alternative to proprietary provider data are public datasets. We provide classes to access some of these datasets in the :doc:`datasets <../modules/io/datasets>` submodule. We've also included a small dataset of synthetic match data for instructional and testing purposes. Load them by running: 73 | 74 | .. code-block:: python 75 | 76 | from floodlight.io.datasets import ToyDataset 77 | 78 | dataset = ToyDataset() 79 | 80 | ( 81 | xy_home_ht1, 82 | xy_away_ht1, 83 | xy_ball_ht1, 84 | events_home_ht1, 85 | events_away_ht1, 86 | possession_ht1, 87 | ballstatus_ht1, 88 | ) = dataset.get(segment="HT1") 89 | 90 | ( 91 | xy_home_ht2, 92 | xy_away_ht2, 93 | xy_ball_ht2, 94 | events_home_ht2, 95 | events_away_ht2, 96 | possession_ht2, 97 | ballstatus_ht2, 98 | ) = dataset.get(segment="HT2") 99 | 100 | pitch = dataset.get_pitch() 101 | 102 | 103 | Note that the sample data is already projected to the same pitch, so there are no separate objects for tracking data and events. 104 | 105 | Data Manipulation 106 | ================= 107 | 108 | We proceed with the data queried from the ToyDataset, but if you've loaded provider data, the steps are actually the same. 109 | 110 | At this point, you've got a whole bunch of core objects for both teams and both halftimes. Each core class stores a different kind of sports data, such as tracking data, event data, or codes: 111 | 112 | .. code-block:: python 113 | 114 | print(xy_home_ht1) 115 | # Floodlight XY object of shape (100, 22) 116 | print(events_home_ht1) 117 | # Floodlight Events object of shape (17, 4) 118 | print(possession_ht1) 119 | # Floodlight Code object encoding 'possession' 120 | print(pitch) 121 | # Floodlight Pitch object with axes x = (-52.5, 52.5) / y = (-34, 34) (flexible) in [m] 122 | 123 | Now that we have some objects loaded, let's manipulate them. Below are just a few examples, for all methods check out the respective class methods in the :doc:`core <../modules/core/core>` module reference. 124 | 125 | .. code-block:: python 126 | 127 | # rotate position data 180 degrees (counter-clockwise) 128 | xy_home_ht1.rotate(180) 129 | # show only x coordinates 130 | print(xy_home_ht1.x) 131 | # show points of 3rd player (xID=3) 132 | xy_home_ht1.player(3) 133 | # slice position data to first 100 frames 134 | xy_home_ht1.slice(startframe=0, endframe=100, inplace=True) 135 | 136 | # print coordinates of pitch middle 137 | print(pitch.center) 138 | 139 | # add "frameclock" column to events object 140 | events_away_ht1.add_frameclock(5) 141 | # show all "Pass" events within first 800 frames 142 | events_away_ht1.select(conditions=[("eID", "Pass"), ("frameclock", (0, 800))]) 143 | 144 | # check what's stored in code object 145 | print(possession_ht1.definitions) 146 | # slice ball possession code to first 10 frames 147 | possession_ht1.slice(startframe=0, endframe=10, inplace=True) 148 | 149 | 150 | Plotting 151 | ======== 152 | 153 | All plotting is based on the `matplotlib `_ library, and also follows the matplotlib syntax. All low-level plotting functionality can be accessed via the :doc:`vis <../modules/vis/vis>` module, but some core objects have a ``.plot()``-method which is a convenience wrapper for plotting. 154 | 155 | Plotting functions and methods accept an ``ax`` argument, which is an matplotlib.axes on which the plot is created (and create one if none is given). This allows to plot in the same fashion as is known from matplotlib: 156 | 157 | .. code-block:: python 158 | 159 | import matplotlib.pyplot as plt 160 | 161 | # create a matplotlib plot 162 | fig, ax = plt.subplots() 163 | # plot the pitch 164 | pitch.plot(ax=ax) 165 | # plot the players for the first time-step 166 | xy_home_ht2.plot(t=0, color='black', ax=ax) 167 | xy_away_ht2.plot(t=0, color='yellow', ax=ax) 168 | xy_ball_ht2.plot(t=0, ball=True, ax=ax) 169 | 170 | .. image:: ../_img/getting_started_sample_plot.png 171 | 172 | Example: Average Positions 173 | ========================== 174 | 175 | To put everything together, let's look at a quick example where we calculate the average positions of the home team - depending on them having ball possession or not. 176 | 177 | .. code-block:: python 178 | 179 | import numpy as np 180 | from floodlight import XY 181 | 182 | # index XY object based on Code object and take the mean along the first axis (time dimension) 183 | avg_in_pos = np.mean(xy_home_ht2[possession_ht2 == 1], axis=0) 184 | # create a new dummy XY object with a single frame 185 | avg_in_pos = XY(avg_in_pos.reshape(1, -1)) 186 | # the same with non-possession frames 187 | avg_out_of_pos = np.mean(xy_home_ht2[possession_ht2 == 2], axis=0) 188 | avg_out_of_pos = XY(avg_out_of_pos.reshape(1, -1)) 189 | 190 | # create subplots and plot data 191 | fig, axs = plt.subplots(1, 2) 192 | 193 | pitch.plot(ax=axs[0], color_scheme='bw') 194 | axs[0].set_title("Avg. Positions in Possession") 195 | avg_in_pos.plot(t=0, ax=axs[0]) 196 | 197 | pitch.plot(ax=axs[1], color_scheme='bw') 198 | axs[1].set_title("Avg. Positions out of Possession") 199 | avg_out_of_pos.plot(t=0, ax=axs[1]) 200 | 201 | .. image:: ../_img/getting_started_avg_positions.png 202 | 203 | 204 | Next Steps 205 | ========== 206 | 207 | Once you are familiar with loading and handling core data structures, make sure to check out the module reference for advanced computations involving these object. For example, the :doc:`transforms <../modules/transforms/transforms>` module contains data transformation functions, whereas the :doc:`models <../modules/models/models>` module contains data models. The tutorials provided in the documentation are another starting point to learn more about data analysis with floodlight! 208 | -------------------------------------------------------------------------------- /floodlight/models/geometry.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | 3 | import numpy as np 4 | from scipy.spatial.distance import cdist 5 | 6 | from floodlight import XY 7 | from floodlight.core.property import TeamProperty, PlayerProperty 8 | from floodlight.models.base import BaseModel, requires_fit 9 | 10 | 11 | class CentroidModel(BaseModel): 12 | """Computations based on the geometric center of all players, commonly referred to 13 | as a team's *centroid*. 14 | 15 | Upon calling the :func:`~CentroidModel.fit`-method, this model calculates a team's 16 | centroid. The following calculations can subsequently be queried by calling the 17 | corresponding methods: 18 | 19 | - Centroid [1]_ --> :func:`~CentroidModel.centroid` 20 | - Centroid Distance --> :func:`~CentroidModel.centroid_distance` 21 | - Stretch Index [2]_ --> :func:`~CentroidModel.stretch_index` 22 | 23 | Notes 24 | ----- 25 | Team centroids are computed as the arithmetic mean of all player positions (based on 26 | *numpy*'s nanmean function). For a fixed point in time and :math:`N` players with 27 | corresponding positions :math:`x_1, \\dots, x_N \\in \\mathbb{R}^2`, the centroid is 28 | calculated as 29 | 30 | .. math:: 31 | C = \\frac{1}{N} \\sum_i^N x_i. 32 | 33 | Examples 34 | -------- 35 | >>> import numpy as np 36 | >>> from floodlight import XY 37 | >>> from floodlight.models.geometry import CentroidModel 38 | 39 | >>> xy = XY(np.array(((1, 1, 2, -2), (1.5, np.nan, np.nan, -0)))) 40 | >>> cm = CentroidModel() 41 | >>> cm.fit(xy) 42 | >>> cm.centroid() 43 | XY(xy=array([[ 1.5, -0.5], 44 | [ 1.5, 0. ]]), framerate=None, direction=None) 45 | >>> cm.stretch_index(xy) 46 | TeamProperty(property=array([1.5811388, nan]), name='stretch_index', framerate=None) 47 | >>> cm.stretch_index(xy, axis='x') 48 | TeamProperty(property=array([0.5, 0.]), name='stretch_index', framerate=None) 49 | 50 | References 51 | ---------- 52 | .. [1] `Sampaio, J., & Maçãs, V. (2012). Measuring tactical behaviour in 53 | football. International Journal of Sports Medicine, 33(05), 395-401. 54 | `_ 56 | .. [2] `Bourbousson, J., Sève, C., & McGarry, T. (2010). Space–time coordination 57 | dynamics in basketball: Part 2. The interaction between the two teams. 58 | Journal of Sports Sciences, 28(3), 349-358. 59 | `_ 60 | """ 61 | 62 | def __init__(self): 63 | super().__init__() 64 | # model parameter 65 | self._centroid_ = None 66 | 67 | def fit(self, xy: XY, exclude_xIDs: list = None): 68 | """Fit the model to the given data and calculate team centroids. 69 | 70 | Parameters 71 | ---------- 72 | xy: XY 73 | Player spatiotemporal data for which the centroid is calculated. 74 | exclude_xIDs: list, optional 75 | A list of xIDs to be excluded from computation. This can be useful if one 76 | would like, for example, to exclude goalkeepers from analysis. 77 | """ 78 | if not exclude_xIDs: 79 | exclude_xIDs = [] 80 | # boolean for column inclusion, initialize to True for all columns 81 | include = np.full((xy.N * 2), True) 82 | 83 | # exclude columns according to exclude_xIDs 84 | for xID in exclude_xIDs: 85 | if xID not in range(0, xy.N): 86 | raise ValueError( 87 | f"Expected entries of exclude_xIDs to be in range 0 to {xy.N}, " 88 | f"got {xID}." 89 | ) 90 | exclude_start = xID * 2 91 | exclude_end = exclude_start + 2 92 | include[exclude_start:exclude_end] = False 93 | 94 | with warnings.catch_warnings(): 95 | # supress warnings caused by empty slices 96 | warnings.filterwarnings("ignore", category=RuntimeWarning) 97 | # calculate centroid 98 | centroids = np.nanmean(xy.xy[:, include].reshape((len(xy), -1, 2)), axis=1) 99 | 100 | # wrap as XY object 101 | self._centroid_ = XY( 102 | xy=centroids, framerate=xy.framerate, direction=xy.direction 103 | ) 104 | 105 | @requires_fit 106 | def centroid(self) -> XY: 107 | """Returns the team centroid positions as computed by the fit method. 108 | 109 | Returns 110 | ------- 111 | centroid: XY 112 | An XY object of shape (T, 2), where T is the total number of frames. The two 113 | columns contain the centroids' x- and y-coordinates, respectively. 114 | """ 115 | return self._centroid_ 116 | 117 | @requires_fit 118 | def centroid_distance(self, xy: XY, axis: str = None) -> PlayerProperty: 119 | """Calculates the Euclidean distance of each player to the fitted centroids. 120 | 121 | Parameters 122 | ---------- 123 | xy: XY 124 | Player spatiotemporal data for which the distances to the fitted centroids 125 | are calculated. 126 | axis: {None, 'x', 'y'}, optional 127 | Optional argument that restricts distance calculation to either the x- or 128 | y-dimension of the data. If set to None (default), distances are calculated 129 | in both dimensions. 130 | 131 | Returns 132 | ------- 133 | centroid_distance: PlayerProperty 134 | A PlayerProperty object of shape (T, N), where T is the total number of 135 | frames. Each column contains the distances to the team centroid of the 136 | player with corresponding xID. 137 | """ 138 | # check matching lengths 139 | T = len(self._centroid_) 140 | if len(xy) != T: 141 | raise ValueError( 142 | f"Length of xy ({len(xy)}) does not match length of fitted centroids " 143 | f"({T})." 144 | ) 145 | 146 | # calculate distances on specified axis 147 | distances = np.full((T, xy.N), np.nan) 148 | if axis is None: 149 | for t in range(T): 150 | distances[t] = cdist( 151 | self._centroid_[t].reshape(-1, 2), xy[t].reshape(-1, 2) 152 | ) 153 | elif axis == "x": 154 | for t in range(T): 155 | distances[t] = cdist( 156 | self._centroid_.x[t].reshape(-1, 1), xy.x[t].reshape(-1, 1) 157 | ) 158 | elif axis == "y": 159 | for t in range(T): 160 | distances[t] = cdist( 161 | self._centroid_.y[t].reshape(-1, 1), xy.y[t].reshape(-1, 1) 162 | ) 163 | else: 164 | raise ValueError( 165 | f"Expected axis to be one of (None, 'x', 'y'), got {axis}." 166 | ) 167 | 168 | # wrap as PlayerProperty 169 | centroid_distance = PlayerProperty( 170 | property=distances, 171 | name="centroid_distance", 172 | framerate=xy.framerate, 173 | ) 174 | 175 | return centroid_distance 176 | 177 | @requires_fit 178 | def stretch_index(self, xy: XY, axis: str = None) -> TeamProperty: 179 | """Calculates the *Stretch Index*, i.e., the mean distance of all players to the 180 | team centroid. 181 | 182 | Parameters 183 | ---------- 184 | xy: XY 185 | Player spatiotemporal data for which the stretch index is calculated. 186 | axis: {None, 'x', 'y'}, optional 187 | Optional argument that restricts stretch index calculation to either the x- 188 | or y-dimension of the data. If set to None (default), the stretch index is 189 | calculated in both dimensions. 190 | 191 | Returns 192 | ------- 193 | stretch_index: TeamProperty 194 | A TeamProperty object of shape (T, 1), where T is the total number of 195 | frames. Each entry contains the stretch index of that particular frame. 196 | """ 197 | # get player distances from centroid 198 | centroid_distances = self.centroid_distance(xy=xy, axis=axis) 199 | 200 | with warnings.catch_warnings(): 201 | # supress warnings caused by empty slices 202 | warnings.filterwarnings("ignore", category=RuntimeWarning) 203 | # calculate stretch index 204 | stretch_index = np.nanmean(centroid_distances.property, axis=1) 205 | 206 | # wrap as TeamProperty object 207 | stretch_index = TeamProperty( 208 | property=stretch_index, name="stretch_index", framerate=xy.framerate 209 | ) 210 | 211 | return stretch_index 212 | -------------------------------------------------------------------------------- /docs/source/guides/tutorial_analysis.rst: -------------------------------------------------------------------------------- 1 | ======================= 2 | Tutorial: Data Analysis 3 | ======================= 4 | 5 | In this tutorial we will have a look at how to analyze tracking data with the data models provided by floodlight. The goal is to load some sample data, do a quick pre-processing, calculate a set of performance metrics and print the results. 6 | 7 | 8 | Setup 9 | ===== 10 | 11 | Let's start by getting some data that we can work with. The public EIGD dataset contains 25 samples of handball tracking data from the German Men's Handball Bundesliga and fits our purpose. We load the dataset, query a single sample (the default sample) disregarding all but the home team data, and also get the corresponding pitch information. 12 | 13 | .. code-block:: python 14 | 15 | from floodlight.io.datasets import EIGDDataset 16 | 17 | # get the default segment from the EIGD dataset 18 | dataset = EIGDDataset() 19 | home, _, _ = dataset.get() 20 | pitch = dataset.get_pitch() 21 | 22 | The variable ``home`` that we get is a XY object and contains a five minute sample of tracking data with a total of 9000 frames for one team. This is the one we are going to analyze! The ``pitch`` variable is a Pitch object that contains information regarding the pitch specification and coordinate system our data live in. 23 | 24 | 25 | Data Preparation 26 | ================ 27 | 28 | Before we jump into the analysis, we will have to do some pre-processing. Tracking data can generally be of very varying quality and contain artefacts such as rapid jumps in player positions. Such jumps can be problematic, depending on the analysis. Although the EIGD data is of good general quality, we will perform a filtering step in applying a lowpass Butterworth filter to smooth the trajectories and eliminate major jumps. As we want to calculate physical performance metrics later on, this will prevent that our results contain super-human abilities due to measurement errors. 29 | 30 | To demonstrate the effect of the filter, we first create a small plot showing all players' trajectories for the first 5000 frames. This is also is a nice eyeball test to check if our data generally looks alright before we do any calculations! 31 | 32 | 33 | .. code-block:: python 34 | 35 | import matplotlib.pyplot as plt 36 | 37 | # create a matplotlib plot 38 | fig, ax = plt.subplots() 39 | ax.set_title('Home Team Trajectories') 40 | 41 | # plot the pitch 42 | pitch.plot(ax=ax) 43 | 44 | # plot the home team trajectories for the first 5000 frames in black 45 | home.plot((1, 5000), plot_type='trajectories', color='black', ax=ax) 46 | 47 | .. image:: ../_img/tutorial_analysis_trajectories.png 48 | 49 | This looks alright! But one can see a few rough edges here and there that are likely due to measurement precision rather than the actual player movement. Thus, let's apply the filter and plot the data again in a different color onto the same axes. This will create an overlay and we can inspect how the filter smooths the data. 50 | 51 | .. code-block:: python 52 | 53 | from floodlight.transforms.filter import butterworth_lowpass 54 | 55 | # filter the data and plot again in red 56 | home = butterworth_lowpass(home) 57 | home.plot((1, 5000), plot_type='trajectories', color='red', ax=ax) 58 | 59 | .. image:: ../_img/tutorial_analysis_trajectories_filtered.png 60 | 61 | These lines look almost identical. The filter (with default parameter) is a very gentle smoother, but we can see the effect if we zoom in a little: 62 | 63 | .. image:: ../_img/tutorial_analysis_trajectories_filtered_zoom.png 64 | 65 | That almost looks like art, but anyhow, our data is now ready for processing! 66 | 67 | 68 | Data Models 69 | =========== 70 | 71 | Let's try out some of the models that are part of the ``floodlight.models`` submodule. Each model is a separate class, so lets import them (and numpy, which we will need later on). To save our results, we also create a dictionary and add a list containing player "names". 72 | 73 | .. code-block:: python 74 | 75 | import numpy as np 76 | 77 | from floodlight.models.kinematics import DistanceModel, VelocityModel 78 | from floodlight.models.kinetics import MetabolicPowerModel 79 | from floodlight.models.geometry import CentroidModel 80 | 81 | # dictionary for computed metrics 82 | metrics = {"Name": [f"Player {n}" for n in range(home.N)]} 83 | 84 | 85 | Now it's time to do the number crunching! Luckily, our data is scaled in meter (given by the ``pitch.unit`` attribute), so we don't need to worry about unit systems and can start processing. 86 | 87 | Using the data models always follows the same routine, which is inspired by the great scikit-learn package! Each model has a ``.fit()``-method which needs to be called after instantiation. This method does the heavy lifting, and once it's done, we can use the other class methods to query some results! For example: 88 | 89 | .. code-block:: python 90 | 91 | # create - fit - query 92 | model1 = DistanceModel() 93 | model1.fit(home) 94 | distances = model1.cumulative_distance_covered() 95 | metrics["Total Dist. [m]"] = distances[-1] 96 | 97 | This one calculates players' covered distances. After running the ``.fit()``-method, we could query the ``.distance_covered()``-method which returns a PlayerProperty object containing the frame-wise covered distance for each player and each time-frame. We want to check out what they've run in total over those five minutes, so let's instead query the ``.cumulative_distance_covered()``-method, which returns a PlayerProperty containing cumulated distances. At last, we take the last row of that object, which is the total distance covered, and save it in our ``metrics`` dictionary. 98 | 99 | We can do the same trick with player velocities, but this time, let's check out the maximum value to see the player's top speeds: 100 | 101 | .. code-block:: python 102 | 103 | model2 = VelocityModel() 104 | model2.fit(home) 105 | speeds = model2.velocity() 106 | metrics["Top Speed [m/s]"] = np.nanmax(speeds, axis=0) 107 | 108 | Alright, let's check out some more advanced computations. The general syntax stays the same, so again we instantiate the model, call the ``.fit()``-method, query the desired result and save it in the ``metrics`` dictionary. Let's try this and calculate the metabolic power (an advanced biomechanical model that approximates internal physiological load during non-linear activities such as handball) as well as each player's distance to his team's centroid: 109 | 110 | .. code-block:: python 111 | 112 | model3 = MetabolicPowerModel() 113 | model3.fit(home) 114 | metabolic_power = model3.cumulative_metabolic_power() 115 | metrics["Metabolic Work"] = metabolic_power[-1] 116 | 117 | model4 = CentroidModel() 118 | model4.fit(home) 119 | centroid_distance = model4.centroid_distance(home) 120 | metrics["Avg. Centroid Dist. [m]"] = np.nanmean(centroid_distance, axis=0) 121 | 122 | That's it, let's check out the results! 123 | 124 | Results 125 | ======= 126 | 127 | We can use pandas to brush and display the results. So let's create a DataFrame, round the values to three decimal places, and show the first ten players of the data frame. 128 | 129 | .. code-block:: python 130 | 131 | import pandas as pd 132 | 133 | # create and show DataFrame 134 | metrics = pd.DataFrame(metrics) 135 | metrics = metrics.round(3) 136 | print(metrics.head(10).to_string()) 137 | 138 | Here's the (formatted) result you should get: 139 | 140 | ======= ================= ================ ================= ========================= 141 | Subject Total Dist. [m] Top Speed [m/s] Metabolic Work Avg. Centroid Dist. [m] 142 | ======= ================= ================ ================= ========================= 143 | P 0 394.095 6.941 1669.19 5.805 144 | P 1 371.544 5.948 1536.22 3.965 145 | P 2 321.057 6.413 1461.03 9.409 146 | P 3 350.462 7.067 1488.61 9.39 147 | P 4 182.469 6.406 773.093 4.92 148 | P 5 371.928 5.606 1645.02 3.412 149 | P 6 211.308 3.181 746.941 10.623 150 | P 7 216.569 6.058 958.511 4.584 151 | P 8 0 nan 0 nan 152 | P 9 0 nan 0 nan 153 | ======= ================= ================ ================= ========================= 154 | 155 | And that's it! If you inspect the data a little closer, you'll find there are some players where all values are either NaN or 0. Those are the substitutes that did not play in the short snippet we've investigated. Feel free to expand the code and loop over the entire EIGD dataset to compare performances between teams and samples! 156 | -------------------------------------------------------------------------------- /docs/source/compendium/1_data.rst: -------------------------------------------------------------------------------- 1 | ============================== 2 | Data - Structuring Information 3 | ============================== 4 | 5 | The core idea behind this package is to provide streamlined data structures that can hold the various information contained in sports data. This is realized by a set of Python classes, each of which is handling exactly one type of information. But before we dig deeper into implementation details we start with a closer look at what we're dealing with: the data. 6 | 7 | We found that sports data from across major providers typically differs drastically in format and shape. In general, there are three main data types that can be identified: tracking or position data, event data, and video. Each of these three data sources has its own history, and their appearances have changed over the years. Let's have a quick look at each one of them. 8 | 9 | 10 | Provider Data 11 | ============= 12 | 13 | **Event data** has its root in so called hand annotations: back in the days analysts came up with annotation schemes to manually track actions over the course of the game on their notepads. This task is now primarily in the hands of companies that supply sports organizations with packages of entire leagues being annotated - match by match, and still mostly by hand. 14 | 15 | As far as we know, there hardly exist any conventions in this domain and consequently, each provider has its own scheme in which they record the games. These schemes differ dramatically in terms of scope, depth, accuracy, and the used definitions for game actions. As the conceptualizations differ, so do implementations and data formats. And as fine-grained descriptions of match play become complex rather quickly, the logic used when storing this data in XML- or JSON-files is usually fundamentally different across providers. 16 | 17 | **Video** is perhaps the most important data source in the everyday life of a professional match analyst. We won't touch this topic right now, as video is not yet supported in our package. 18 | 19 | **Tracking data** has not been around for long, but is gaining popularity quickly. It's just fascinating seeing those dots move around the screen, and modelling possibilities seem endless. Not all "tracking data" are the same though, and their final shape differs in terms of the data acquisition technique. Roughly speaking, the most prominent sources are GPS sensors, multi camera tracking or (local) sensor-based tracking. The acquisition has a strong influence on the eventual data in terms of quality, spatial and temporal resolution, as well as the actual data format. It can be represented as latitudes and longitudes or coordinates in a reference system, frame rates change from one up to a hundred Hertz, sometimes additional body sensor data such as accelerations or impacts are mixed in, and then, there's the big issue of missing and inaccurate data. 20 | 21 | Although these are the three main data sources, sometimes there's even more data collected for a match! Looking at recent tracking data, they often come with frame-by-frame **contextual tags** regarding ball possession or ball status, i.e., information whether the play is underway or interrupted. Additionally, professional sports teams still have their own analysts who cover every match **manually coding** phases of interesting play for effective post-game video analysis. Plus, as of recently, advances in deep learning have produced first results on generating player actions from video or tracking data, or tracking data from moving, single-camera setups. 22 | 23 | 24 | The Challenge 25 | ============= 26 | 27 | The sheer amount of data, it's different types, formats and heterogeneity are a natural cause of complexity for the analysis process. If you've ever worked with any one these data sources, you most probably have encountered some challenges during processing. The task becomes even more complex when integrating multiple of these data sources. Event- and tracking data are still typically out of sync due to timing errors in event data acquisition. Differing frame rates or coordinate systems are another hurdle to take in multimodal analyses. 28 | 29 | Another major challenge are analyses that contain more than a single match. In this case, match meta-information come into play: lineups, scorelines, standings, location, weather, and so on. Linking information across different matches requires keeping track of player and or team IDs, substitutions, and so on. Depending on the provider, there exist extensive databases with elaborate ID systems that can identify every player, club or referee, including their personal and career information. Unfortunately, they (naturally) change across providers. 30 | 31 | Last but not least, the events under observation (we collectively call these *observations*, such as matches, training drills or study experiments) can also differ in shape. Even a typical match can sometimes have overtime periods, or penalty shootouts, which changes requirements to the deployed algorithms. But especially data from training sessions or experimental setups can vary strongly in terms of duration, pitch dimensions, or the number of involved players. 32 | 33 | To sum up, sports data analysis is awesome, but it can become quickly complicated and rather tedious on the implementation level looking at these challenges. All this complication ultimately leads to massive overhead effort needed for data parsing, pre-processing and wrangling. Furthermore, the formal incompatibility of different data sources is a noticeable hindrance on unfolding the data's full potential. There's a good reason why, to our knowledge, hardly any applications or scientific publications exist that combine two of the aforementioned data sources (with a few exceptions). 34 | 35 | 36 | Core Objects 37 | ============ 38 | 39 | As stated before, the aim of this package is to tackle some of these challenges. The starting point is to formalize the logic behind team sport data and systematically break down inherent complexity into stand-alone data structures by abstraction and generalization. Most importantly, the desired data classes should be independent from any data provider or source. They should also be performant, clear and intuitive to use and allow a clean interface to data loading and processing. That way, any data processing is attached to the data objects and effectively decoupled from any provider specifics. 40 | 41 | To realize this idea, we've attempted to break down all that information you can extract from team sports data and come up with systemization that translates smoothly to a object-oriented implementation. Generally speaking, we introduce core data objects on three levels: 42 | 43 | 1. **Data level objects** store raw data such as player positions, events, or the used coordinate system. These are essentially independent data fragments that in itself do not carry any further information of where they come from. Instead, they are pure data structures with methods concerned with data manipulation: spatial or temporal transforms, clipping and slicing, modifications, visualizations, and so on. Each fragment (and thus object instance) only stores data for one *team* (such as the home and away team) and temporal *segment* (such as a halftime). 44 | 45 | 2. **Observation level objects** are concerned with bundling and enriching data level objects into "meta" data structures. Each observation, such as a match or training drill, can contain a number of data level objects for each segment and team. An observation-level object contains all respective data-level objects and further incorporates objects regarding match or player information. 46 | 47 | 3. **Analysis level objects** contain analysis-related objects such as performance metrics or high-level models of match play. 48 | 49 | On the following pages we discuss a range of topics that are directly linked to the creation and handling of these core data structures, such as handling spatial and temporal data, identities, and so on. 50 | 51 | 52 | But... why? 53 | =========== 54 | 55 | Before we proceed, a quick personal note on the necessity of this package. At this point you might be rightfully asking yourself: Why do we need another package that introduces its own data structures and ways of dealing with certain problems? And what's the purpose of trying to integrate all different data sources and fit them into a single framework? Especially since there already exist packages that aim to solve certain parts of that pipeline? 56 | 57 | Our answer is - although we love those packages out there - that we did not find a solution that did fit our needs. Available packages are either tightly connected to a certain data format/provider, adapt to the subtleties of a 58 | particular sport, or solve *one* particular problem. Ultimately, this means that each of these isolated solutions has their own interface. And this still left us with the core problem discussed on this page: connecting all those, partly incompatible, interfaces. 59 | 60 | We felt that as long as there is no underlying, high-level framework, each and every use case again and again needs its own implementation. At last, we found ourselves refactoring the same code - and there are certain data processing or plotting routines that are required in *almost every* project - over and over again just to fit the particular data structures we we're dealing with at that time. 61 | -------------------------------------------------------------------------------- /floodlight/io/opta.py: -------------------------------------------------------------------------------- 1 | import re 2 | from pathlib import Path 3 | from typing import Dict, Tuple, Union 4 | 5 | import pytz 6 | import iso8601 7 | import pandas as pd 8 | from lxml import etree 9 | 10 | from floodlight.core.events import Events 11 | from floodlight.core.pitch import Pitch 12 | from floodlight.io.utils import get_and_convert 13 | 14 | 15 | def get_opta_feedtype(filepath: Union[str, Path]) -> Union[str, None]: 16 | """Tries to extract the feed type from Opta's XML feed. 17 | 18 | This function assumes that the file follows Opta's format of producing feeds. 19 | Thus it should have a "PRODUCTION HEADER" comment at the top of the file so that on 20 | line 6 it reads something like ``production module: Opta::Feed::XML::Soccer::F24``. 21 | 22 | Parameters 23 | ---------- 24 | filepath : Union[str, Path] 25 | Full path to Opta XML file. 26 | 27 | Returns 28 | ------- 29 | feedtype: str or None 30 | Returns the type of the feed as a string in case it finds it, e.g. 'F24', 31 | and `None` otherwise. 32 | """ 33 | with open(str(filepath), "r") as f: 34 | # iterate through first lines instead of loading entire file to RAM 35 | for i, line in enumerate(f): 36 | # search for production module at line 6 37 | if i == 6: 38 | production_tags = line.strip().split(":") 39 | if production_tags[0] == "production module": 40 | feedtype = production_tags[-1] 41 | else: 42 | feedtype = None 43 | break 44 | 45 | return feedtype 46 | 47 | 48 | def read_event_data_xml( 49 | filepath: Union[str, Path] 50 | ) -> Tuple[Dict[str, Dict[str, Events]], Pitch]: 51 | """Parse Opta's f24 feed (containing match events) and extract event data and pitch 52 | information. 53 | 54 | This function provides a high-level access to the particular f24 feed and will 55 | return event objects for both teams. The number of segments is inferred from the 56 | data, yet data for each segment is stored in a separate object. 57 | 58 | Parameters 59 | ---------- 60 | filepath: str or pathlib.Path 61 | Full path to the XML feed. 62 | 63 | Returns 64 | ------- 65 | data_objects: Tuple[Dict[str, Dict[str, Events]], Pitch] 66 | Tuple of (nested) floodlight core objects with shape (events_objects, 67 | pitch). 68 | 69 | ``events_objects`` is a nested dictionary containing ``Events`` objects for 70 | each team and segment of the form ``events_objects[segment][team] = Events``. 71 | For a typical league match with two halves and teams this dictionary looks like: 72 | ``{'HT1': {'Home': Events, 'Away': Events}, 'HT2': {'Home': Events, 'Away': 73 | Events}}``. 74 | 75 | ``pitch`` is a ``Pitch`` object corresponding to the data. 76 | 77 | Notes 78 | ----- 79 | Opta's format of handling event data information involves an elaborate use of so 80 | called qualifiers, which attach additional information to certain events. There 81 | also exist a number of mappings that define which qualifiers may be attached to 82 | which kind of events. Parsing this information involves quite a bit of logic and is 83 | planned to be inclucded in further releases. As of now, qualifier information is 84 | parsed as a string in the `qualifier` column of the returned DataFrame and can be 85 | transformed to a dict of the form `{qualifier_id: value}`. 86 | """ 87 | # check feed type 88 | if get_opta_feedtype(filepath) != "F24": 89 | raise ValueError(f"Not an Opta F24 feed: {filepath}") 90 | 91 | # load xml tree into memory 92 | tree = etree.parse(str(filepath)) 93 | root = tree.getroot() 94 | 95 | # 1. parse match info 96 | matchinfo = root.xpath("Game")[0].attrib 97 | teams = ["Home", "Away"] 98 | tID_link = { 99 | int(matchinfo["home_team_id"]): "Home", 100 | int(matchinfo["away_team_id"]): "Away", 101 | } 102 | number_of_periods = len(list(filter(re.compile("period_._start").match, matchinfo))) 103 | segments = [f"HT{period}" for period in range(1, number_of_periods + 1)] 104 | 105 | # 2. parse events 106 | # bins 107 | columns = [ 108 | "eID", 109 | "gameclock", 110 | "pID", 111 | "outcome", 112 | "timestamp", 113 | "minute", 114 | "second", 115 | "at_x", 116 | "at_y", 117 | "qualifier", 118 | ] 119 | 120 | event_lists = { 121 | team: {segment: {col: [] for col in columns} for segment in segments} 122 | for team in teams 123 | } 124 | directions = {team: {} for team in teams} 125 | dir_link = {"Left to Right": "lr", "Right to Left": "rl"} 126 | segment_offsets = {1: 0, 2: 45, 3: 90, 4: 105} 127 | kickoffs = {} 128 | 129 | # read kickoff events for times and playing direction 130 | # (NOTE: kickoff times can also be directly found in matchinfo, although the 131 | # explicit kickoff-event timestamps appear to be more accurate) 132 | for event in root.xpath("Game/Event[@type_id='32']"): 133 | # get team and segment information 134 | period = get_and_convert(event.attrib, "period_id", int) 135 | segment = "HT" + str(period) 136 | tID = get_and_convert(event.attrib, "team_id", int) 137 | team = tID_link[tID] 138 | # read kickoff times 139 | kickoff_timestring = get_and_convert(event.attrib, "timestamp", str) 140 | kickoff_datetime = iso8601.parse_date( 141 | kickoff_timestring, default_timezone=pytz.utc 142 | ) 143 | kickoffs[segment] = kickoff_datetime 144 | # read playing direction 145 | direction_qualifier = event.xpath("Q[@qualifier_id='127']") 146 | if len(direction_qualifier) > 0: 147 | value = get_and_convert(direction_qualifier[0], "value", str) 148 | direction = dir_link.get(value) 149 | else: 150 | direction = None 151 | directions[team][segment] = direction 152 | # cut event from tree to prevent double parsing 153 | # event.getparent().remove(event) 154 | 155 | # loop 156 | for event in root.xpath("Game/Event"): 157 | # get team and segment information 158 | period = get_and_convert(event.attrib, "period_id", int) 159 | segment = "HT" + str(period) 160 | tID = get_and_convert(event.attrib, "team_id", int) 161 | team = tID_link[tID] 162 | # skip match-unrelated events 163 | if period not in range(1, 6): 164 | continue 165 | 166 | # identifier and outcome: 167 | eID = get_and_convert(event.attrib, "type_id", int) 168 | # skip unwanted events 169 | if eID in [30]: 170 | continue 171 | pID = get_and_convert(event.attrib, "player_id", int) 172 | outcome = get_and_convert(event.attrib, "outcome", int) 173 | event_lists[team][segment]["eID"].append(eID) 174 | event_lists[team][segment]["pID"].append(pID) 175 | event_lists[team][segment]["outcome"].append(outcome) 176 | 177 | # absolute and relative time 178 | event_timestring = get_and_convert(event.attrib, "timestamp", str) 179 | minute = get_and_convert(event.attrib, "min", int) 180 | # transform minute to be relative to current segment 181 | minute -= segment_offsets[period] 182 | second = get_and_convert(event.attrib, "sec", int) 183 | timestamp = iso8601.parse_date(event_timestring, default_timezone=pytz.utc) 184 | delta = timestamp - kickoffs[segment] 185 | gameclock = delta.total_seconds() 186 | # re-adjust pre-kick-off events (e.g. substitutions) to 00:00 187 | gameclock = max(gameclock, 0.0) 188 | event_lists[team][segment]["timestamp"].append(timestamp) 189 | event_lists[team][segment]["minute"].append(minute) 190 | event_lists[team][segment]["second"].append(second) 191 | event_lists[team][segment]["gameclock"].append(gameclock) 192 | 193 | # location 194 | at_x = get_and_convert(event.attrib, "x", float) 195 | at_y = get_and_convert(event.attrib, "y", float) 196 | event_lists[team][segment]["at_x"].append(at_x) 197 | event_lists[team][segment]["at_y"].append(at_y) 198 | 199 | # qualifier 200 | qual_dict = {} 201 | for qualifier in event.iterchildren(): 202 | qual_id = int(qualifier.attrib["qualifier_id"]) 203 | qual_value = qualifier.attrib.get("value") 204 | qual_dict[qual_id] = qual_value 205 | event_lists[team][segment]["qualifier"].append(str(qual_dict)) 206 | 207 | # create objects 208 | events_objects = {} 209 | for segment in segments: 210 | events_objects[segment] = {} 211 | for team in ["Home", "Away"]: 212 | events_objects[segment][team] = Events( 213 | events=pd.DataFrame(data=event_lists[team][segment]), 214 | direction=directions[team][segment], 215 | ) 216 | pitch = Pitch.from_template("opta", sport="football") 217 | 218 | # pack objects 219 | data_objects = (events_objects, pitch) 220 | 221 | return data_objects 222 | -------------------------------------------------------------------------------- /floodlight/core/teamsheet.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import Dict 3 | import warnings 4 | 5 | import pandas as pd 6 | 7 | from floodlight.core.definitions import essential_teamsheet_columns, protected_columns 8 | 9 | 10 | @dataclass 11 | class Teamsheet: 12 | """Teamsheet storing player information. Core class of floodlight. 13 | 14 | Teamsheet data is stored in a `pandas` ``DataFrame``, where each row stores one 15 | player with their different properties organized in columns. Columns may contain 16 | any relevant information. A `"player"` column is required for instantiation 17 | to identify a player, and some particular column names are protected (see Notes). 18 | 19 | Parameters 20 | ---------- 21 | teamsheet: pd.DataFrame 22 | DataFrame containing rows of players and columns of respective properties. 23 | 24 | Attributes 25 | ---------- 26 | essential: list 27 | List of essential columns available for stored players. 28 | protected: list 29 | List of protected columns available for stored players. 30 | custom: list 31 | List of custom (i.e. non-essential and non-protected) columns available for 32 | stored players. 33 | essential_missing: list 34 | List of missing essential columns. 35 | essential_invalid: list 36 | List of essential columns that violate the definitions. 37 | protected_missing: list 38 | List of missing protected columns. 39 | protected_invalid: list 40 | List of protected columns that violate the definitions. 41 | 42 | Notes 43 | ----- 44 | Teamsheet data, particularly information available for players, may vary across 45 | data providers. To accommodate all data flavours, any column name or data type is 46 | permissible. However, one `essential` column is required (`"player"`). Other column 47 | names are `protected`. Using these names assumes that data stored in these columns 48 | follows conventions in terms of data types and value ranges. These are required for 49 | methods working with protected columns to assure correct calculations. Definitions 50 | for `essential` and `protected` columns can be found in 51 | :ref:`floodlight.core.definitions `. 52 | 53 | """ 54 | 55 | teamsheet: pd.DataFrame 56 | 57 | def __post_init__(self): 58 | # check for missing essential columns 59 | missing_columns = self.essential_missing 60 | if missing_columns: 61 | raise ValueError( 62 | f"Floodlight Teamsheet object is missing the essential " 63 | f"column(s) {missing_columns}!" 64 | ) 65 | 66 | # warn if value ranges are violated 67 | incorrect_columns = self.essential_invalid 68 | if incorrect_columns: 69 | for col in incorrect_columns: 70 | warnings.warn( 71 | f"The '{col}' column does not match the defined value range (from " 72 | f"floodlight.core.definitions). This may lead to unexpected " 73 | f"behavior of methods using this column." 74 | ) 75 | 76 | def __str__(self): 77 | return f"Floodlight Teamsheet object of shape {self.teamsheet.shape}" 78 | 79 | def __len__(self): 80 | return len(self.teamsheet) 81 | 82 | def __getitem__(self, key): 83 | return self.teamsheet[key] 84 | 85 | def __setitem__(self, key, value): 86 | self.teamsheet[key] = value 87 | 88 | @property 89 | def essential(self): 90 | essential = [ 91 | col for col in self.teamsheet.columns if col in essential_teamsheet_columns 92 | ] 93 | 94 | return essential 95 | 96 | @property 97 | def protected(self): 98 | protected = [col for col in self.teamsheet.columns if col in protected_columns] 99 | return protected 100 | 101 | @property 102 | def custom(self): 103 | custom = [ 104 | col 105 | for col in self.teamsheet.columns 106 | if col not in essential_teamsheet_columns and col not in protected_columns 107 | ] 108 | 109 | return custom 110 | 111 | @property 112 | def essential_missing(self): 113 | missing_columns = [ 114 | col for col in essential_teamsheet_columns if col not in self.essential 115 | ] 116 | 117 | return missing_columns 118 | 119 | @property 120 | def essential_invalid(self): 121 | invalid_columns = [ 122 | col 123 | for col in self.essential 124 | if not self.column_values_in_range(col, essential_teamsheet_columns) 125 | ] 126 | 127 | return invalid_columns 128 | 129 | @property 130 | def protected_missing(self): 131 | missing_columns = [ 132 | col for col in protected_columns if col not in self.protected 133 | ] 134 | 135 | return missing_columns 136 | 137 | @property 138 | def protected_invalid(self): 139 | invalid_columns = [ 140 | col 141 | for col in self.protected 142 | if not self.column_values_in_range(col, protected_columns) 143 | ] 144 | 145 | return invalid_columns 146 | 147 | def column_values_in_range(self, col: str, definitions: Dict[str, Dict]) -> bool: 148 | """Check if values for a single column of the inner teamsheet DataFrame are in 149 | correct range using the specifications from 150 | :ref:`floodlight.core.definitions `. 151 | 152 | Parameters 153 | ---------- 154 | col: str 155 | Column name of the inner teamsheet DataFrame to be checked 156 | definitions: Dict 157 | Dictionary (from floodlight.core.definitions) containing specifications for 158 | the columns to be checked. 159 | 160 | The definitions need to contain an entry for the column to be checked and 161 | this entry needs to contain information about the value range in the form: 162 | ``definitions[col][value_range] = (min, max)``. 163 | 164 | Returns 165 | ------- 166 | bool 167 | True if the checks for value range pass and False otherwise 168 | 169 | Notes 170 | ----- 171 | Non-integer results of this computation will always be rounded to the next 172 | smaller integer. 173 | """ 174 | # skip if value range is not defined 175 | if definitions[col]["value_range"] is None: 176 | return True 177 | 178 | # skip values that are None or NaN 179 | col_nan_free = self.teamsheet[col].dropna() 180 | 181 | # retrieve value range from definitions 182 | min_val, max_val = definitions[col]["value_range"] 183 | 184 | # check value range for remaining values 185 | if not (min_val <= col_nan_free).all() & (col_nan_free <= max_val).all(): 186 | return False 187 | 188 | # all checks passed 189 | return True 190 | 191 | def get_links(self, keys: str, values: str) -> dict: 192 | """Creates dictionary of links between two columns of the teamsheet as specified 193 | by `keys` and `values`. 194 | 195 | Parameters 196 | ---------- 197 | keys : str 198 | Column of teamsheet used as keys in links dictionary. 199 | values : str 200 | Column of teamsheet used as values in links dictionary. 201 | 202 | Returns 203 | ------- 204 | links : dict 205 | Dictionary of links between columns specified by `keys` and `values` 206 | argument. 207 | """ 208 | # checks 209 | if keys not in self.teamsheet.columns: 210 | raise ValueError(f"No '{keys}' column in teamsheet.") 211 | if values not in self.teamsheet.columns: 212 | raise ValueError(f"No '{values}' column in teamsheet.") 213 | if not self.teamsheet[keys].is_unique: 214 | raise ValueError( 215 | f"Cannot construct dictionary with unambiguous assignments" 216 | f" as '{keys}' column has duplicate entries." 217 | ) 218 | # bin 219 | links = {} 220 | 221 | # loop through players 222 | for idx in self.teamsheet.index: 223 | # add key-value pairs to links dict: 224 | links[self.teamsheet.at[idx, keys]] = self.teamsheet.at[idx, values] 225 | 226 | return links 227 | 228 | def add_xIDs(self): 229 | """Adds the column "xID" as an increasing index over all players. 230 | 231 | 232 | The player index identifier ("xID") is used to enforce an order to the players 233 | within a team. This identifier is primarily used for locating players in 234 | respective XY objects, but can also be helpful iterating over or displaying all 235 | players of a team. This function assigns the "xID" as an increasing index that 236 | counts over all players in the inner teamsheet DataFrame, starting at 0 237 | and ending at N_players - 1. Any existing entries for "xID" are overwritten by 238 | this function. 239 | """ 240 | self.teamsheet["xID"] = [i for i in range(len(self.teamsheet))] 241 | --------------------------------------------------------------------------------