├── .gitignore ├── LICENSE.md ├── NEWS.md ├── README.md ├── examples └── predict_matches.ipynb ├── impectPy ├── __init__.py ├── access_token.py ├── config.py ├── events.py ├── helpers.py ├── impect.py ├── iteration_averages.py ├── iterations.py ├── match_info.py ├── matches.py ├── matchsums.py ├── player_profile_scores.py ├── player_scores.py ├── set_pieces.py ├── squad_coefficients.py ├── squad_ratings.py ├── squad_scores.py └── xml.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/windows,macos,pycharm+all,python,flask 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows,macos,pycharm+all,python,flask 3 | 4 | ### Flask ### 5 | instance/* 6 | !instance/.gitignore 7 | .webassets-cache 8 | .env 9 | 10 | ### Flask.Python Stack ### 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | 77 | # Scrapy stuff: 78 | .scrapy 79 | 80 | # Sphinx documentation 81 | docs/_build/ 82 | 83 | # PyBuilder 84 | .pybuilder/ 85 | target/ 86 | 87 | # Jupyter Notebook 88 | .ipynb_checkpoints 89 | 90 | # IPython 91 | profile_default/ 92 | ipython_config.py 93 | 94 | # pyenv 95 | # For a library or package, you might want to ignore these files since the code is 96 | # intended to run in multiple environments; otherwise, check them in: 97 | # .python-version 98 | 99 | # pipenv 100 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 101 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 102 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 103 | # install all needed dependencies. 104 | #Pipfile.lock 105 | 106 | # poetry 107 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 108 | # This is especially recommended for binary packages to ensure reproducibility, and is more 109 | # commonly ignored for libraries. 110 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 111 | #poetry.lock 112 | 113 | # pdm 114 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 115 | #pdm.lock 116 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 117 | # in version control. 118 | # https://pdm.fming.dev/#use-with-ide 119 | .pdm.toml 120 | 121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 122 | __pypackages__/ 123 | 124 | # Celery stuff 125 | celerybeat-schedule 126 | celerybeat.pid 127 | 128 | # SageMath parsed files 129 | *.sage.py 130 | 131 | # Environments 132 | .venv 133 | venv-testpypi 134 | venv-pypi 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | 165 | # PyCharm 166 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 167 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 168 | # and can be added to the global gitignore or merged into this file. For a more nuclear 169 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 170 | #.idea/ 171 | 172 | ### macOS ### 173 | # General 174 | .DS_Store 175 | .AppleDouble 176 | .LSOverride 177 | 178 | # Icon must end with two \r 179 | Icon 180 | 181 | 182 | # Thumbnails 183 | ._* 184 | 185 | # Files that might appear in the root of a volume 186 | .DocumentRevisions-V100 187 | .fseventsd 188 | .Spotlight-V100 189 | .TemporaryItems 190 | .Trashes 191 | .VolumeIcon.icns 192 | .com.apple.timemachine.donotpresent 193 | 194 | # Directories potentially created on remote AFP share 195 | .AppleDB 196 | .AppleDesktop 197 | Network Trash Folder 198 | Temporary Items 199 | .apdisk 200 | 201 | ### macOS Patch ### 202 | # iCloud generated files 203 | *.icloud 204 | 205 | ### PyCharm+all ### 206 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 207 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 208 | 209 | # User-specific stuff 210 | .idea/**/workspace.xml 211 | .idea/**/tasks.xml 212 | .idea/**/usage.statistics.xml 213 | .idea/**/dictionaries 214 | .idea/**/shelf 215 | 216 | # AWS User-specific 217 | .idea/**/aws.xml 218 | 219 | # Generated files 220 | .idea/**/contentModel.xml 221 | 222 | # Sensitive or high-churn files 223 | .idea/**/dataSources/ 224 | .idea/**/dataSources.ids 225 | .idea/**/dataSources.local.xml 226 | .idea/**/sqlDataSources.xml 227 | .idea/**/dynamic.xml 228 | .idea/**/uiDesigner.xml 229 | .idea/**/dbnavigator.xml 230 | 231 | # Gradle 232 | .idea/**/gradle.xml 233 | .idea/**/libraries 234 | 235 | # Gradle and Maven with auto-import 236 | # When using Gradle or Maven with auto-import, you should exclude module files, 237 | # since they will be recreated, and may cause churn. Uncomment if using 238 | # auto-import. 239 | # .idea/artifacts 240 | # .idea/compiler.xml 241 | # .idea/jarRepositories.xml 242 | # .idea/modules.xml 243 | # .idea/*.iml 244 | # .idea/modules 245 | # *.iml 246 | # *.ipr 247 | 248 | # CMake 249 | cmake-build-*/ 250 | 251 | # Mongo Explorer plugin 252 | .idea/**/mongoSettings.xml 253 | 254 | # File-based project format 255 | *.iws 256 | 257 | # IntelliJ 258 | out/ 259 | 260 | # mpeltonen/sbt-idea plugin 261 | .idea_modules/ 262 | 263 | # JIRA plugin 264 | atlassian-ide-plugin.xml 265 | 266 | # Cursive Clojure plugin 267 | .idea/replstate.xml 268 | 269 | # SonarLint plugin 270 | .idea/sonarlint/ 271 | 272 | # Crashlytics plugin (for Android Studio and IntelliJ) 273 | com_crashlytics_export_strings.xml 274 | crashlytics.properties 275 | crashlytics-build.properties 276 | fabric.properties 277 | 278 | # Editor-based Rest Client 279 | .idea/httpRequests 280 | 281 | # Android studio 3.1+ serialized cache file 282 | .idea/caches/build_file_checksums.ser 283 | 284 | ### PyCharm+all Patch ### 285 | # Ignore everything but code style settings and run configurations 286 | # that are supposed to be shared within teams. 287 | 288 | .idea/* 289 | 290 | !.idea/codeStyles 291 | !.idea/runConfigurations 292 | 293 | ### Python ### 294 | # Byte-compiled / optimized / DLL files 295 | 296 | # C extensions 297 | 298 | # Distribution / packaging 299 | 300 | # PyInstaller 301 | # Usually these files are written by a python script from a template 302 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 303 | 304 | # Installer logs 305 | 306 | # Unit test / coverage reports 307 | 308 | # Translations 309 | 310 | # Django stuff: 311 | 312 | # Flask stuff: 313 | 314 | # Scrapy stuff: 315 | 316 | # Sphinx documentation 317 | 318 | # PyBuilder 319 | 320 | # Jupyter Notebook 321 | 322 | # IPython 323 | 324 | # pyenv 325 | # For a library or package, you might want to ignore these files since the code is 326 | # intended to run in multiple environments; otherwise, check them in: 327 | # .python-version 328 | 329 | # pipenv 330 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 331 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 332 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 333 | # install all needed dependencies. 334 | 335 | # poetry 336 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 337 | # This is especially recommended for binary packages to ensure reproducibility, and is more 338 | # commonly ignored for libraries. 339 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 340 | 341 | # pdm 342 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 343 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 344 | # in version control. 345 | # https://pdm.fming.dev/#use-with-ide 346 | 347 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 348 | 349 | # Celery stuff 350 | 351 | # SageMath parsed files 352 | 353 | # Environments 354 | 355 | # Spyder project settings 356 | 357 | # Rope project settings 358 | 359 | # mkdocs documentation 360 | 361 | # mypy 362 | 363 | # Pyre type checker 364 | 365 | # pytype static type analyzer 366 | 367 | # Cython debug symbols 368 | 369 | # PyCharm 370 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 371 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 372 | # and can be added to the global gitignore or merged into this file. For a more nuclear 373 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 374 | 375 | ### Python Patch ### 376 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 377 | poetry.toml 378 | 379 | # ruff 380 | .ruff_cache/ 381 | 382 | # LSP config files 383 | pyrightconfig.json 384 | 385 | ### Windows ### 386 | # Windows thumbnail cache files 387 | Thumbs.db 388 | Thumbs.db:encryptable 389 | ehthumbs.db 390 | ehthumbs_vista.db 391 | 392 | # Dump file 393 | *.stackdump 394 | 395 | # Folder config file 396 | [Dd]esktop.ini 397 | 398 | # Recycle Bin used on file shares 399 | $RECYCLE.BIN/ 400 | 401 | # Windows Installer files 402 | *.cab 403 | *.msi 404 | *.msix 405 | *.msm 406 | *.msp 407 | 408 | # Windows shortcuts 409 | *.lnk 410 | 411 | # End of https://www.toptal.com/developers/gitignore/api/windows,macos,pycharm+all,python,flask 412 | 413 | # Tests 414 | tests/ 415 | 416 | # Maintenance checklist 417 | maintenance_checklist.md -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # MIT License 2 | 3 | Copyright (c) 2023 impectPy authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | # impectPy 2.5.2 2 | 3 | ## Minor Changes 4 | * fix bugs in the following functions that occured if the coaches endpoint returned coaches for the iteration but the match does not have any coaches: 5 | * `getEvents()` 6 | * `getPlayerMatchSums()` 7 | * `getSquadMatchSums()` 8 | * `getPlayerMatchScores()` 9 | * `getSquadMatchScores()` 10 | 11 | # impectPy 2.5.1 12 | 13 | ## Minor Changes 14 | * fix bugs in the following functions that occured if either the coaches endpoint returned no coaches or the coaches endpoint being blacklisted for the user: 15 | * `getEvents()` 16 | * `getPlayerMatchSums()` 17 | * `getSquadMatchSums()` 18 | * `getPlayerMatchScores()` 19 | * `getSquadMatchScores()` 20 | 21 | # impectPy 2.5.0 22 | 23 | ## Major Changes 24 | * Use new endpoints to drastically improve performance of `getPlayerMatchScores()` and `getPlayerIterationScores()`. The argument `positions` is no longer required. If it is not spplied the function defaults to the new endpoints and returns all unique player-position-squad combinations. 25 | * Add coaches ot the following functions: 26 | * `getEvents()` 27 | * `getPlayerMatchSums()` 28 | * `getSquadMatchSums()` 29 | * `getPlayerMatchScores()` 30 | * `getSquadMatchScores()` 31 | * Add function `getSquadCoefficients()` to retrieve detailed model coefficients to enable match predictions 32 | 33 | ## Minor Changes 34 | * Fix error in `getPlayerIterationAverages()` regarding type conversions 35 | * Use `NA` as fill value instead of 0 for score related functions 36 | * Minor fixes to enable PyPi submission 37 | * Improve error handling 38 | 39 | # impectPy 2.4.5 40 | 41 | ## Minor Changes 42 | * fix bug in `getPlayerIterationAverages()`function 43 | 44 | # impectPy 2.4.4 45 | 46 | ## Major Changes 47 | * Rename function `generateSportsCodeXML()` to `generateXML()` 48 | * Add proper xml structure to the `generateXML()` function for Python versions >= 3.9 49 | * Significantly improve customization options for new `generateXML()` function with new function arguments 50 | * `kpis`:Customize KPIs included 51 | * `lables`: Customize labels included 52 | * `codeTag`: Customize code tag selection 53 | * `labelSorting`: Enable/Disable label sorting 54 | 55 | ## Minor Changes 56 | * fix bug in `getEvents()` that prevented the column `duelPlayerName`from being populated correctly 57 | 58 | # impectPy 2.4.3 59 | 60 | ## Minor Changes 61 | * Add FIFA Country Name to the following functions 62 | * `getIterations()` 63 | * `getPlayerMatchsums()` 64 | * `getPlayerIterationAverages()` 65 | * `getPlayerMatchScores()` 66 | * `getPlayerIterationScores()` 67 | * `getPlayerProfileScores()` 68 | * Fix bug in `getStartingPositions()` that resulted from players not having a shirt number assigned 69 | 70 | # impectPy 2.4.2 71 | 72 | ## Minor Changes 73 | * Improvements to `getSubstitutions()` to handle matches where one team did not substitute any players 74 | * Significant performance improvements to `getPlayerIterationAverages()` 75 | 76 | # impectPy 2.4.1 77 | 78 | ## Minor Changes 79 | * Fix error in `getEvents()` that prevented set piece data from properly being joined to event data 80 | * Fix error in `getSubstitutions()` & `getStartingPositions()` that caused an error when players switched shirt numbers 81 | 82 | # impectPy 2.4.0 83 | 84 | ## Major Changes 85 | * Add function `getFormations()` to retrieve squad formations on match level 86 | * Add function `getStartingPositions()` to retrieve squad starting positions on match level 87 | * Add function `getSubstitutions()` to retrieve squad substitutions on match level 88 | 89 | ## Minor changes 90 | * Add IMPECT class to enable object-oriented API usage and improve performance 91 | * Add new arguments to `getSportCcodeXML()` to enable more customization options for the generated XML: 92 | * Disable sequencing 93 | * Disable KPI buckets 94 | 95 | # impectPy 2.3.1 96 | 97 | ## Major Changes 98 | * Add function `getSquadRatings()` to retrieve squad ratings 99 | 100 | ## Minor changes 101 | * Add attribute `inferredSetPiece`to `getEvents()` function 102 | * Add ID mappings to other providers (HeimSpiel, SkillCorner, Wyscout) to several functions 103 | * Fix bug in `getSquadMatchScores()` that occured if the home team did not have a player at the given position 104 | 105 | # impectPy 2.3.0 106 | 107 | ## Major changes 108 | * Add new `getSetPieces()` function 109 | * Add set piece data to `getEvents()` 110 | * Add arguments to `getEvents()` function that control the addition of KPIs and set piece data to the events dataframe 111 | 112 | ## Minor changes 113 | * Fix error in `getEvents()` for matches without any tagged duels 114 | * Use raw string notation when using regex to clean column names 115 | * Add EventId to XML generation 116 | * Fix error in `getPlayerIterationScores()`, `getPlayerIterationScores()` & `getPlayerProfileScores()` when no records are returned for given combination of match/iteration and position 117 | 118 | # impectPy 2.2.0 119 | 120 | ## Major changes 121 | * add new functions to query the new customer API endpoints that provide ratios & scores 122 | 123 | ## Minor changes 124 | * switch from German country name to FIFA country name 125 | * Update to readme structure 126 | 127 | # impectPy 2.1.0 128 | 129 | ## Major changes 130 | * add new attributes from dataVersion V4 to `getEvents()` 131 | 132 | ## Minor changes 133 | * add some of the new dataVersion V4 attributes to `generateSportsCodeXML()` 134 | * fix labels of periods in `generateSportsCodeXML()` to better support MatchTracker integration 135 | 136 | # impectPy 2.0.6 137 | 138 | ## Minor changes 139 | * add new label to player phase of xml export: team 140 | 141 | # impectPy 2.0.4 142 | 143 | # impectPy 2.0.5 144 | 145 | ## Minor changes 146 | * add more player master data to `getPlayerMatchsums()` and `getPlayerIterationAverages()` 147 | * fix issue with several functions that occurred with pandas version 2.1 or newer 148 | * fix minor consistency issue in code for `generateSportsCodeXML()` 149 | * edit naming of kickoff events in `generateSportsCodeXML()` to properly support SBG MatchTracker 150 | 151 | # impectPy 2.0.4 152 | 153 | ## Minor changes 154 | * fix bug in `getSquadMatchsums()` and `getPlayerMatchsums()` caused by duplicates 155 | * fix bug in `getMatches()` function caused by addition of wyscoutIds 156 | * 157 | * improve error handling for functions that use match ids as input 158 | * improve error handling for `getMatches()` function 159 | * add `playDuration` on player level to `getSquadMatchsums()`, `getPlayerMatchsums()`, `getPlayerIterationAverages()` and `getSquadIterationAverages()` 160 | * fix bug in `getEvents()`, `getSquadMatchsums()`, `getPlayerMatchsums()`, `getPlayerIterationAverages()` and `getSquadIterationAverages()` that was caused by the addition of several new keys to the KPI endpoint 161 | 162 | # impectPy 2.0.3 163 | 164 | ## Minor changes 165 | * fix bug in `getEvents()` function caused by querying data for multiple iterations of the same competition 166 | 167 | # impectPy 2.0.2 168 | 169 | ## Minor changes 170 | * fix bug in `getPlayerIterationAverages()` function caused by user access rights 171 | * fix bug in `getIterations()` function caused by addition of wyscoutIds 172 | * fix bug in `getMatches()` function caused by addition of wyscoutIds 173 | 174 | # impectPy 2.0.1 175 | 176 | ## Minor changes 177 | * fix bug in `getSquadIterationAverages()` function 178 | * fix bug in `getEvents()` function 179 | * fix bug in `generateSportsCodeXML()` function 180 | * fix bug in `getPlayerMatchsums()` function 181 | * add sorting by id to `getIterations()` function 182 | * add sorting by id to `getMatches()` function 183 | * fix function argument name in readMe 184 | 185 | # impectPy 2.0.0 186 | 187 | ## Major changes 188 | * Modify package to support the IMPECT API V5 instead of V4 189 | * Add `getPlayerIterationAverages()` function 190 | * Add `getSquadIterationAverages()` function 191 | 192 | ## Minor changes 193 | * Fix error in readme sample code 194 | * raise exception for wrong `matches` argument input type in several functions 195 | 196 | # impectPy 1.0.3 197 | 198 | ## Minor changes 199 | * fix bug in `generateSportsCodeXML()` that did not filter out events of action type 'NO_VIDEO_AVAILABLE', 'FINAL_WHISTLE' or 'REFEREE_INTERCEPTION' correctly 200 | * fix bug in `generateSportsCodeXML()` that caused certain kickoffs to be missing 201 | 202 | # impectPy 1.0.2 203 | 204 | ## Minor changes 205 | * add features and KPIs to `generateSportsCodeXML()` function, finalize initial built for IMPECT portals 206 | 207 | # impectPy 1.0.1 208 | 209 | ## Minor improvements and bug fixes 210 | * Fix issue in `getAccessToken()` with certain characters in password 211 | 212 | # impectPy 1.0.0 213 | 214 | ## Major changes 215 | * Release package 216 | 217 | ## Minor changes 218 | * implement retry on HTTP response codes other than 200 219 | 220 | # impectPy 0.1.1 221 | 222 | ## Minor improvements and bug fixes 223 | * renamed `generateXML()` to `generateSportsCodeXML()` 224 | * Minor bug fixes in `generateSportsCodeXML()` 225 | 226 | # impectPy 0.1 227 | 228 | ## Major changes 229 | * Added basic package build 230 | * Added `getAccessToken()` function 231 | * Added `getCompetitions()` function 232 | * Added `getMatchplan()` function 233 | * Added `getEventData()` function 234 | * Added `getMatchsums()` function 235 | 236 | ## Minor improvements and bug fixes 237 | * Added a `NEWS.md` file to track changes to the package 238 | * Added `README.md` 239 | * Added `LICENSE.md` -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # impectPy ImpectPy Logo 2 | 3 | A package provided by: Impect GmbH 4 | 5 | Version: v2.5.2 6 | 7 | **Updated: October 20th 2025** 8 | 9 | --- 10 | 11 | **Supported API Version: V5** 12 | For older versions, please see list below: 13 | 14 | - API V4: https://github.com/ImpectAPI/impectPy/tree/v1.0.3 15 | - API V3: not supported by this package 16 | 17 | --- 18 | 19 | ## Introduction 20 | 21 | The goal of the impectPy package is to provide an easy way for Impect 22 | Customers to access data from the customer API. This API includes basic 23 | information about competitions, competition iterations, and matches as 24 | well as event data and aggregated scorings per player and position on 25 | match and season level. 26 | 27 | ## Installation 28 | 29 | You can install the latest version of impectPy from PyPi with: 30 | 31 | ```cmd 32 | pip install impectPy 33 | ``` 34 | 35 | You can also install it from [GitHub](https://github.com/) with: 36 | 37 | ```cmd 38 | pip install git+https://github.com/ImpectAPI/impectPy.git@v2.5.2 39 | ``` 40 | 41 | ## Usage 42 | 43 | ### Getting started 44 | 45 | Before accessing any data via our API, you will need to request a bearer 46 | token for authorization. You can get this authorization token using the 47 | following code snippet: 48 | 49 | ```python 50 | import impectPy as ip 51 | import pandas as pd 52 | 53 | # define login credentials 54 | username = "yourUsername" 55 | password = "yourPassword" 56 | 57 | # get access token 58 | token = ip.getAccessToken(username=username, password=password) 59 | ``` 60 | 61 | This access token is a requirement to use any of the functions that 62 | requests data from the API. We recommend to first get a list of 63 | competition iterations that are enabled for your account. 64 | 65 | ### Retrieve Basic Information 66 | 67 | ```python 68 | # get list of iterations 69 | iterations = ip.getIterations(token=token) 70 | 71 | # print iterations to console 72 | iterations 73 | ``` 74 | 75 | If any iteration you were expected to see is not listed, please contact 76 | your sales representative. Now let’s assume you are interested in data 77 | for 2022/23 season of the 1. Bundesliga (iteration = 518). The following 78 | snippet gets you a list of matches for this iteration: 79 | 80 | ```python 81 | # get matches for iteration 82 | matchplan = ip.getMatches(iteration=518, token=token) 83 | 84 | # print matches to console 85 | matchplan 86 | ``` 87 | 88 | The column `available` denotes whether a given match has been tagged by Impect 89 | and the data is available to you. 90 | 91 | ### Retrieve Match Level Data 92 | 93 | Let's assume you are interested in the FC Bayern München vs Borussia Dortmund game 94 | from April 1st 2023 (matchId = 84344) and want to retrieve event level data as well 95 | as team formation, starting position and substitution data. As the functions allows 96 | for multiple games to be requested at once, we need to wrap the matchId into a list. 97 | Hence, to request data for this game, run the following code snippet: 98 | 99 | ```python 100 | # define matches to get event data for 101 | matches = [84344] 102 | 103 | # get event data for matches 104 | events = ip.getEvents( 105 | matches=matches, 106 | token=token, 107 | include_kpis=True, 108 | include_set_pieces=True 109 | ) 110 | 111 | # get match info 112 | formations = ip.getFormations(matches, token) 113 | substitutions = ip.getSubstitutions(matches, token) 114 | starting_positions = ip.getStartingPositions(matches, token) 115 | 116 | # print first few rows from events dataframe to console 117 | events.head() 118 | ``` 119 | 120 | You can access the aggregated scores per player and position or per 121 | squad for this match in a similar way. You can also find more detailed data 122 | around set piece situations within our API. 123 | Also, we provide you with IMPECT scores and ratios that you might know from our 124 | Scouting and Analysis portals. On player level, these are calculated across 125 | positions which is why you have to supply the function with a list of positions 126 | your want to retrieve data for: 127 | 128 | ```python 129 | # define matches to get further data for 130 | matches = [84344] 131 | 132 | # get set piece data including KPI aggregates 133 | setPieces = ip.getSetPieces(matches=matches, token=token) 134 | 135 | # get kpi matchsums for match per player and position 136 | playerMatchsums = ip.getPlayerMatchsums(matches=matches, token=token) 137 | 138 | # get kpi matchsums for match per squad 139 | squadMatchsums = ip.getSquadMatchsums(matches=matches, token=token) 140 | 141 | # define positions to get scores aggregated by 142 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] 143 | 144 | # get player scores and ratios for match and positions per player 145 | playerMatchScores = ip.getPlayerMatchScores( 146 | matches=matches, 147 | positions=positions, 148 | token=token 149 | ) 150 | 151 | # get squad scores and ratios for match per squad 152 | squadMatchScores = ip.getSquadMatchScores(matches=matches, token=token) 153 | ``` 154 | 155 | In case you wish to retrieve data for multiple matches, we suggest using 156 | the following method to do so in order to minimize the amount of 157 | requests sent to the API. Let’s also get the event data for the RB 158 | Leipzig vs FSV Mainz 05 game (matchId = 84350) from the same day: 159 | 160 | ```python 161 | # define list of matches 162 | matches = [84344, 84350] 163 | 164 | # apply getEvents function to a set of matchIds 165 | events = ip.getEvents( 166 | matches=matches, 167 | token=token, 168 | include_kpis=True, 169 | include_set_pieces=True 170 | ) 171 | 172 | # get set piece data including KPI aggregates 173 | setPieces = ip.getSetPieces(matches=matches, token=token) 174 | 175 | # get matchsums for matches per player and position 176 | playerMatchsums = ip.getPlayerMatchsums(matches=matches, token=token) 177 | 178 | # get matchsums for matches per squad 179 | squadMatchsums = ip.getSquadMatchsums(matches=matches, token=token) 180 | 181 | # define positions to get scores aggregated by 182 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] 183 | 184 | # get player scores and ratios for match and positions per player 185 | playerMatchScores = ip.getPlayerMatchScores( 186 | matches=matches, 187 | token=token, 188 | positions=positions # optional 189 | ) 190 | 191 | # get squad scores and ratios for match per squad 192 | squadMatchScores = ip.getSquadMatchScores(matches=matches, token=token) 193 | ``` 194 | 195 | ### Retrieve Iteration Level Data 196 | 197 | Starting from API version V5, we also offer an endpoint to get KPI average values 198 | per iteration on player as well as squad level. These averages are calculated by 199 | dividing the kpi sum of all individual matches by the sum of matchShares the player 200 | accumulated at a given position. On a team level we divide the score by the 201 | amount of matches played by the team. 202 | Also, we provide you with IMPECT scores and ratios that you might know from our 203 | Scouting and Analysis portals. On player level, these are calculated across 204 | positions which is why you have to supply the function with a list of positions 205 | your want to retrieve data for. 206 | Let's assume you were interested in wing backs in the 2022/2023 Bundesliga season, 207 | then you could use this code snippet: 208 | 209 | ```python 210 | # define iteration ID 211 | iteration = 518 212 | 213 | # define positions to get scores aggregated by 214 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] 215 | 216 | # get player kpi averages for iteration 217 | playerIterationAverages = ip.getPlayerIterationAverages( 218 | iteration=iteration, 219 | token=token 220 | ) 221 | 222 | # get squad kpi averages for iteration 223 | squadIterationAverages = ip.getSquadIterationAverages( 224 | iteration=iteration, 225 | token=token 226 | ) 227 | 228 | # get player scores and ratios for iteration and positions 229 | playerIterationScores = ip.getPlayerIterationScores( 230 | iteration=iteration, 231 | token=token, 232 | positions=positions # optional 233 | ) 234 | 235 | # get squad scores and ratios for iteration 236 | squadIterationScores = ip.getSquadIterationScores( 237 | iteration=iteration, 238 | token=token 239 | ) 240 | ``` 241 | 242 | The squad rating values that you can find on the league ranking in the Scouting portal can 243 | also be retrieved from the API. In addition, we also provide you with the more detailed squad 244 | coefficients that can be used to make match predictions. See [this example script](https://github.com/ImpectAPI/impectPy/blob/release/examples/predict_matches.ipynb) 245 | for further details. 246 | 247 | ```python 248 | # get squad rating for iteration 249 | squadRatings = ip.getSquadRatings(iteration=iteration, token=token) 250 | 251 | # get squad coefficients for iteration 252 | squadCoefficients = ip.getSquadCoefficients(iteration=iteration, token=token) 253 | ``` 254 | 255 | You can now also retrieve the positional profile scores for players via our API. This 256 | includes profiles that you created through the scouting portal. The function requires a 257 | positional input that determines which matchShares to consider when computing the scores. 258 | In the below example, all matchShares that a player played as either a left back or a right 259 | back are included for profile score calculation. 260 | 261 | ```python 262 | # define iteration ID 263 | iteration = 518 264 | 265 | # define positions to get scores aggregated by 266 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] 267 | 268 | # get player profile scores 269 | playerProfileScores = ip.getPlayerProfileScores( 270 | iteration=iteration, 271 | positions=positions, 272 | token=token 273 | ) 274 | ``` 275 | 276 | Please keep in mind that Impect enforces a rate limit of 10 requests per second 277 | per user. A token bucket logic has been implemented to restrict the amount of API 278 | calls made on the client side already. The rate limit is read from the first limit 279 | policy sent back by the API, so if this limit increases over time, this package will 280 | act accordingly. 281 | 282 | ### SportsCodeXML 283 | 284 | It is also possible to convert a dataframe containing event data into an XML file, 285 | that can be imported into Videotools such as FOCUS. The XML can be customized to a certain 286 | degree using the following in put variables: 287 | * `codeTag`: Customize code tag selection (Choose what goes into the `code` tag) 288 | * `lables`: Customize labels included (provide a list of labels to be included) 289 | * `kpis`: Customize KPIs included (provide a list of KPIs to be included) 290 | * `labelSorting`: Enable/Disable label sorting (Labels and KPIs are usually prefixed with a sorting number (e.g. `01 | `) or the word `KPI: ` to enable easier filtering in your video tool.) 291 | * `sequencing`: Disable sequencing (A sequence of `RECEPTION > DRIBBLE > PASS` is split into 3 instances: `RECEPTION`, `DRIBBLE`, `PASS`) 292 | * `buckets`: Disable Label/KPI buckets (e.g. conversion from value `0.1` to bucket `[0,1[`) 293 | 294 | To see a full list of available codeTags, labels, KPIs and allowed combinations of these, 295 | please see the beginning of the [function definition](https://github.com/ImpectAPI/impectPy/blob/release/impectPy/xml.py). 296 | 297 | Please make sure to only retrieve event data for 298 | one game at a time. Let's use the Bayern vs Dortmund game from earlier as an example: 299 | 300 | ```python 301 | # define matchId 302 | matches = [84344] 303 | 304 | # get event data for matchId 305 | events = ip.getEvents(matches=matches, token=token) 306 | 307 | # define lead and lag time in seconds 308 | lead = 3 309 | lag = 3 310 | 311 | # define period start offsets from video start in seconds 312 | p1Start = 16 # first half kickoff happens after 16 seconds in your video file 313 | p2Start = 48 * 60 + 53 # first half kickoff happens after 48 minutes and 53 seconds in your video file 314 | p3Start = 0 # set to timestamp of the kickoff of the first half of extra time 315 | p4Start = 0 # set to timestamp of the kickoff of the second half of extra time 316 | p5Start = 0 # set to timestamp of the first penalty of the penalty shootout 317 | 318 | # generate xml 319 | xml_tree = ip.generateXML( 320 | events=events, 321 | lead=lead, 322 | lag=lag, 323 | p1Start=p1Start, 324 | p2Start=p2Start, 325 | p3Start=p3Start, 326 | p4Start=p4Start, 327 | p5Start=p5Start, 328 | codeTag="playerName", # Use the playerName for the Code Tag 329 | labels=["action", "opponents"], # defaults to None to inlcude all available labels 330 | kpis=["BYPASSED_OPPONENTS", "BYPASSED_DEFENDERS"], # defaults to None to inlcude all available KPIs 331 | labelSorting=False, # Disable sorting prefixes 332 | sequencing=False, # Disable merging of consecutive events by the same player into one sequence 333 | buckets=False # Use precise KPI and label values instead of predefined buckets 334 | ) 335 | 336 | # write to xml file 337 | with open(f"match{matches[0]}_" 338 | # add home team name 339 | f"{events.homeSquadName.unique().tolist()[0].replace(' ', '_')}" 340 | f"_vs_" 341 | # add away team name 342 | f"{events.awaySquadName.unique().tolist()[0].replace(' ', '_')}" 343 | f".xml", 344 | "wb") as file: 345 | xml_tree.write(file, 346 | xml_declaration=True, 347 | encoding='utf-8', 348 | method="xml") 349 | ``` 350 | 351 | ## Object-Oriented Package Version 352 | 353 | Since version 2.4.0, there is another way to call the familiar functions in a more object-oriented way. 354 | An object of the class "Impect" can be used to query the API. This new object offers a slightly enhanced 355 | performance and stores your token as an object attribute. This means you no longer have to include it in 356 | every function call. This new IMPECT object can be used as shown in the example below: 357 | 358 | ```python 359 | from impectPy import Impect 360 | 361 | # define login credentials 362 | username = "yourUsername" 363 | password = "yourPassword" 364 | 365 | # create Impect instance and login 366 | api = Impect() 367 | api.login(username, password) 368 | 369 | # define iteration ID 370 | iteration = 518 371 | 372 | # define matchId 373 | matches = [84344] 374 | 375 | # define positions to get scores/profiles aggregated by 376 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"] 377 | 378 | # get iterations 379 | iterations = api.getIterations() 380 | 381 | # get squad ratings 382 | ratings = api.getSquadRatings(iteration=iteration) 383 | 384 | # get squad coefficients 385 | coefficients = api.getSquadCoefficients(iteration=iteration) 386 | 387 | # get matches 388 | matchplan = api.getMatches(iteration=iteration) 389 | 390 | # get match info 391 | formations = api.getFormations(matches=matches) 392 | substitutions = api.getSubstitutions(matches=matches) 393 | startingPositions = api.getStartingPositions(matches=matches) 394 | 395 | # get match events 396 | events = api.getEvents(matches=matches, include_kpis=False, include_set_pieces=False) 397 | 398 | # get set pieces 399 | set_pieces = api.getSetPieces(matches=matches) 400 | 401 | # get player iteration averages 402 | playerIterationAverages = api.getPlayerIterationAverages(iteration=iteration) 403 | 404 | # get player matchsums 405 | playerMatchsums = api.getPlayerMatchsums(matches=matches) 406 | 407 | # get squad iteration averages 408 | squadIterationAverages = api.getSquadIterationAverages(iteration=iteration) 409 | 410 | # get squad matchsums 411 | squadMatchsums = api.getSquadMatchsums(matches=matches) 412 | 413 | # get player match scores 414 | playerMatchScores = api.getPlayerMatchScores(matches=matches, positions=positions) # specific positions 415 | playerMatchScoresAll = api.getPlayerMatchScores(matches=matches) # all positions 416 | 417 | # get squad match scores 418 | squadMatchScores = api.getSquadMatchScores(matches=matches) 419 | 420 | # get player iteration scores 421 | playerIterationScores = api.getPlayerIterationScores(iteration=iteration, positions=positions) # specific positions 422 | playerIterationScoresAll = api.getPlayerIterationScores(iteration=iteration) # all positions 423 | 424 | # get squad iteration scores 425 | squadIterationScores = api.getSquadIterationScores(iteration=iteration) 426 | 427 | # get player profile scores 428 | playerProfileScores = api.getPlayerProfileScores(iteration=iteration, positions=positions) 429 | ``` 430 | 431 | ## Final Notes 432 | 433 | Further documentation on the data and explanations of variables can be 434 | found in our [Glossary](https://glossary.impect.com/). -------------------------------------------------------------------------------- /examples/predict_matches.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# load packages\n", 10 | "import impectPy\n", 11 | "import numpy as np\n", 12 | "import pandas as pd" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": null, 18 | "metadata": {}, 19 | "outputs": [], 20 | "source": [ 21 | "# Set login credentials\n", 22 | "username = \"yourUsername\"\n", 23 | "password = \"yourPassword\"\n", 24 | "\n", 25 | "# create Impect instance and login\n", 26 | "api = impectPy.Impect()\n", 27 | "api.login(username=username, password=password)" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": null, 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "# set iterationId\n", 37 | "iteration = 1385\n", 38 | "\n", 39 | "# fetch matches for iteration\n", 40 | "matches = api.getMatches(iteration=iteration)\n", 41 | "\n", 42 | "# show matches dataframe\n", 43 | "matches.head()" 44 | ] 45 | }, 46 | { 47 | "cell_type": "code", 48 | "execution_count": null, 49 | "metadata": {}, 50 | "outputs": [], 51 | "source": [ 52 | "# fetch prediction model coefficients\n", 53 | "coefficients = api.getSquadCoefficients(iteration=iteration)\n", 54 | "\n", 55 | "# show coefficients\n", 56 | "coefficients.head()" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "# prepare columns for merging\n", 66 | "matches[\"date\"] = pd.to_datetime(matches[\"scheduledDate\"]).dt.tz_localize(None).dt.normalize()\n", 67 | "matches[\"homeSquadId\"] = matches[\"homeSquadId\"].astype(\"int64\")\n", 68 | "matches[\"awaySquadId\"] = matches[\"awaySquadId\"].astype(\"int64\")\n", 69 | "coefficients[\"date\"] = pd.to_datetime(coefficients[\"date\"]).dt.normalize()\n", 70 | "coefficients[\"squadId\"] = coefficients[\"squadId\"].astype(\"int64\")\n", 71 | "\n", 72 | "# sort by date\n", 73 | "matches = matches.sort_values(\"date\")\n", 74 | "coefficients = coefficients.sort_values(\"date\")\n", 75 | "\n", 76 | "# merge competition-specific coefficients using the most recent date\n", 77 | "matches = pd.merge_asof(\n", 78 | " matches,\n", 79 | " coefficients[\n", 80 | " [\"date\", \"interceptCoefficient\", \"homeCoefficient\", \"competitionCoefficient\"]\n", 81 | " ].drop_duplicates(\"date\"),\n", 82 | " on=\"date\",\n", 83 | " direction=\"backward\"\n", 84 | ")\n", 85 | "\n", 86 | "# merge squad-specific coefficients using the most recent date\n", 87 | "def get_squad_coeffs(row, coeff_df, squad_id_col) -> pd.Series:\n", 88 | " squad_id = row[squad_id_col]\n", 89 | " match_date = row[\"date\"]\n", 90 | " squad_coeffs = coeff_df[(coeff_df[\"squadId\"] == squad_id) & (coeff_df[\"date\"] <= match_date)]\n", 91 | " if len(squad_coeffs) > 0:\n", 92 | " latest = squad_coeffs.sort_values(\"date\").iloc[-1]\n", 93 | " return pd.Series({\"attack\": latest[\"attackCoefficient\"], \"defense\": latest[\"defenseCoefficient\"]})\n", 94 | " return pd.Series({\"attack\": None, \"defense\": None})\n", 95 | "\n", 96 | "# merge homeSquad coefficients\n", 97 | "home_coeffs = matches.apply(lambda row: get_squad_coeffs(row, coefficients, \"homeSquadId\"), axis=1)\n", 98 | "matches[\"attackCoefficientHome\"] = home_coeffs[\"attack\"]\n", 99 | "matches[\"defenseCoefficientHome\"] = home_coeffs[\"defense\"]\n", 100 | "\n", 101 | "# merge awaySquad coefficients\n", 102 | "away_coeffs = matches.apply(lambda row: get_squad_coeffs(row, coefficients, \"awaySquadId\"), axis=1)\n", 103 | "matches[\"attackCoefficientAway\"] = away_coeffs[\"attack\"]\n", 104 | "matches[\"defenseCoefficientAway\"] = away_coeffs[\"defense\"]\n", 105 | "\n", 106 | "# show new matches dataframe\n", 107 | "matches.head()" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": null, 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "# compute predictions\n", 117 | "matches[\"predHome\"] = np.exp(\n", 118 | " matches[\"interceptCoefficient\"] +\n", 119 | " matches[\"homeCoefficient\"] +\n", 120 | " matches[\"competitionCoefficient\"] +\n", 121 | " matches[\"attackCoefficientHome\"] +\n", 122 | " matches[\"defenseCoefficientAway\"]\n", 123 | ")\n", 124 | "matches[\"predAway\"] = np.exp(\n", 125 | " matches[\"interceptCoefficient\"] +\n", 126 | " matches[\"competitionCoefficient\"] +\n", 127 | " matches[\"attackCoefficientAway\"] +\n", 128 | " matches[\"defenseCoefficientHome\"]\n", 129 | ")\n", 130 | "\n", 131 | "# show matches including predictions\n", 132 | "matches.head()" 133 | ] 134 | } 135 | ], 136 | "metadata": { 137 | "kernelspec": { 138 | "display_name": "venv", 139 | "language": "python", 140 | "name": "python3" 141 | }, 142 | "language_info": { 143 | "codemirror_mode": { 144 | "name": "ipython", 145 | "version": 3 146 | }, 147 | "file_extension": ".py", 148 | "mimetype": "text/x-python", 149 | "name": "python", 150 | "nbconvert_exporter": "python", 151 | "pygments_lexer": "ipython3", 152 | "version": "3.11.7" 153 | } 154 | }, 155 | "nbformat": 4, 156 | "nbformat_minor": 4 157 | } 158 | -------------------------------------------------------------------------------- /impectPy/__init__.py: -------------------------------------------------------------------------------- 1 | # define version attribute 2 | __version__ = "2.5.2" 3 | 4 | # import modules 5 | from .access_token import getAccessToken 6 | from .iterations import getIterations 7 | from .matches import getMatches 8 | from .events import getEvents 9 | from .matchsums import getPlayerMatchsums, getSquadMatchsums 10 | from .iteration_averages import getPlayerIterationAverages, getSquadIterationAverages 11 | from .player_scores import getPlayerMatchScores, getPlayerIterationScores 12 | from .squad_scores import getSquadMatchScores, getSquadIterationScores 13 | from .player_profile_scores import getPlayerProfileScores 14 | from .xml import generateXML 15 | from .set_pieces import getSetPieces 16 | from .squad_ratings import getSquadRatings 17 | from .squad_coefficients import getSquadCoefficients 18 | from .match_info import getFormations, getSubstitutions, getStartingPositions 19 | from .config import Config as Config 20 | from .impect import Impect as Impect -------------------------------------------------------------------------------- /impectPy/access_token.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import urllib 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI 5 | 6 | ###### 7 | # 8 | # This function returns an access token for the external API 9 | # 10 | ###### 11 | 12 | 13 | # define function 14 | def getAccessToken(username: str, password: str, session: requests.Session = requests.Session()) -> str: 15 | 16 | # create an instance of RateLimitedAPI 17 | connection = RateLimitedAPI(session) 18 | 19 | return getAccessTokenFromUrl(username, password, connection, "https://login.impect.com/auth/realms/production/protocol/openid-connect/token") 20 | 21 | def getAccessTokenFromUrl(username: str, password: str, connection: RateLimitedAPI, token_url: str) -> str: 22 | 23 | # define request parameters 24 | login = 'client_id=api&grant_type=password&username=' + urllib.parse.quote( 25 | username) + '&password=' + urllib.parse.quote(password) 26 | 27 | # define request headers 28 | connection.session.headers.update({"body": login, "Content-Type": "application/x-www-form-urlencoded"}) 29 | 30 | # request access token 31 | response = connection.make_api_request(url=token_url, method="POST", data=login) 32 | 33 | # remove headers again 34 | connection.session.headers.clear() 35 | 36 | # get access token from response and return it 37 | token = response.json()["access_token"] 38 | return token -------------------------------------------------------------------------------- /impectPy/config.py: -------------------------------------------------------------------------------- 1 | class Config(object): 2 | def __init__(self, host: str = 'https://api.impect.com', oidc_token_endpoint: str = 'https://login.impect.com/auth/realms/production/protocol/openid-connect/token'): 3 | self.HOST = host 4 | self.OIDC_TOKEN_ENDPOINT = oidc_token_endpoint -------------------------------------------------------------------------------- /impectPy/events.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import numpy as np 3 | import pandas as pd 4 | import requests 5 | import re 6 | from impectPy.helpers import RateLimitedAPI, ForbiddenError 7 | from .matches import getMatchesFromHost 8 | from .iterations import getIterationsFromHost 9 | 10 | ###### 11 | # 12 | # This function returns a pandas dataframe that contains all events for a 13 | # given match 14 | # 15 | ###### 16 | 17 | 18 | def getEvents( 19 | matches: list, token: str, include_kpis: bool = True, 20 | include_set_pieces: bool = True, session: requests.Session = requests.Session() 21 | ) -> pd.DataFrame: 22 | 23 | # create an instance of RateLimitedAPI 24 | connection = RateLimitedAPI(session) 25 | 26 | # construct header with access token 27 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 28 | 29 | return getEventsFromHost(matches, include_kpis, include_set_pieces, connection, "https://api.impect.com") 30 | 31 | # define function 32 | def getEventsFromHost( 33 | matches: list, include_kpis: bool, include_set_pieces: bool, connection: RateLimitedAPI, host: str 34 | ) -> pd.DataFrame: 35 | 36 | # check input for matches argument 37 | if not isinstance(matches, list): 38 | raise Exception("Argument 'matches' must be a list of integers.") 39 | 40 | # get match info 41 | match_data = pd.concat( 42 | map(lambda match: connection.make_api_request_limited( 43 | url=f"{host}/v5/customerapi/matches/{match}", 44 | method="GET" 45 | ).process_response( 46 | endpoint="Match Info" 47 | ), 48 | matches), 49 | ignore_index=True) 50 | 51 | # filter for matches that are unavailable 52 | fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 53 | 54 | # drop matches that are unavailable from list of matches 55 | matches = [match for match in matches if match not in fail_matches] 56 | 57 | # raise exception if no matches remaining or report removed matches 58 | if len(fail_matches) > 0: 59 | if len(matches) == 0: 60 | raise Exception("All supplied matches are unavailable. Execution stopped.") 61 | else: 62 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 63 | 64 | # extract iterationIds 65 | iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) 66 | 67 | # get match events 68 | events = pd.concat( 69 | map(lambda match: connection.make_api_request_limited( 70 | url=f"{host}/v5/customerapi/matches/{match}/events", 71 | method="GET" 72 | ).process_response( 73 | endpoint="Events" 74 | ).assign( 75 | matchId=match 76 | ), 77 | matches), 78 | ignore_index=True) 79 | 80 | # account for matches without dribbles, duels or opponents tagged 81 | attributes = [ 82 | "dribbleDistance", 83 | "dribbleType", 84 | "dribbleResult", 85 | "dribblePlayerId", 86 | "duelDuelType", 87 | "duelPlayerId", 88 | "opponentCoordinatesX", 89 | "opponentCoordinatesY", 90 | "opponentAdjCoordinatesX", 91 | "opponentAdjCoordinatesY" 92 | ] 93 | 94 | # add attribute if it doesn't exist in df 95 | for attribute in attributes: 96 | if attribute not in events.columns: 97 | events[attribute] = np.nan 98 | 99 | # get players 100 | players = pd.concat( 101 | map(lambda iteration: connection.make_api_request_limited( 102 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 103 | method="GET" 104 | ).process_response( 105 | endpoint="Players" 106 | ), 107 | iterations), 108 | ignore_index=True)[["id", "commonname"]].drop_duplicates() 109 | 110 | # get squads 111 | squads = pd.concat( 112 | map(lambda iteration: connection.make_api_request_limited( 113 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 114 | method="GET" 115 | ).process_response( 116 | endpoint="Squads" 117 | ), 118 | iterations), 119 | ignore_index=True)[["id", "name"]].drop_duplicates() 120 | 121 | # get coaches 122 | coaches_blacklisted = False 123 | try: 124 | coaches = pd.concat( 125 | map(lambda iteration: connection.make_api_request_limited( 126 | url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", 127 | method="GET" 128 | ).process_response( 129 | endpoint="Coaches", 130 | raise_exception=False 131 | ), 132 | iterations), 133 | ignore_index=True)[["id", "name"]].drop_duplicates() 134 | except KeyError: 135 | # no coaches found, create empty df 136 | coaches = pd.DataFrame(columns=["id", "name"]) 137 | except ForbiddenError: 138 | coaches_blacklisted = True 139 | 140 | # get matches 141 | matchplan = pd.concat( 142 | map(lambda iteration: getMatchesFromHost( 143 | iteration=iteration, 144 | connection=connection, 145 | host=host 146 | ), 147 | iterations), 148 | ignore_index=True) 149 | 150 | # get iterations 151 | iterations = getIterationsFromHost(connection=connection, host=host) 152 | 153 | if include_kpis: 154 | # get event scorings 155 | scorings = pd.concat( 156 | map(lambda match: connection.make_api_request_limited( 157 | url=f"{host}/v5/customerapi/matches/{match}/event-kpis", 158 | method="GET" 159 | ).process_response( 160 | endpoint="Scorings" 161 | ), 162 | matches), 163 | ignore_index=True) 164 | 165 | # get kpis 166 | kpis = connection.make_api_request_limited( 167 | url=f"{host}/v5/customerapi/kpis/event", 168 | method="GET" 169 | ).process_response( 170 | endpoint="EventKPIs" 171 | )[["id", "name"]] 172 | 173 | if include_set_pieces: 174 | # get set piece data 175 | set_pieces = pd.concat( 176 | map(lambda match: connection.make_api_request_limited( 177 | url=f"{host}/v5/customerapi/matches/{match}/set-pieces", 178 | method="GET" 179 | ).process_response( 180 | endpoint="Set-Pieces" 181 | ), 182 | matches), 183 | ignore_index=True 184 | ).rename( 185 | columns={"id": "setPieceId"} 186 | ).explode("setPieceSubPhase", ignore_index=True) 187 | 188 | # unpack setPieceSubPhase column 189 | set_pieces = pd.concat( 190 | [ 191 | set_pieces.drop(columns=["setPieceSubPhase"]), 192 | pd.json_normalize(set_pieces["setPieceSubPhase"]).add_prefix("setPieceSubPhase.") 193 | ], 194 | axis=1 195 | ).rename(columns=lambda x: re.sub(r"\.(.)", lambda y: y.group(1).upper(), x)) 196 | 197 | # fix potential typing issues 198 | events.pressingPlayerId = events.pressingPlayerId.astype("Int64") 199 | events.fouledPlayerId = events.fouledPlayerId.astype("Int64") 200 | events.passReceiverPlayerId = events.passReceiverPlayerId.astype("Int64") 201 | events.duelPlayerId = events.duelPlayerId.astype("Int64") 202 | events.fouledPlayerId = events.fouledPlayerId.astype("Int64") 203 | if include_set_pieces: 204 | set_pieces.setPieceSubPhaseMainEventPlayerId = set_pieces.setPieceSubPhaseMainEventPlayerId.astype("Int64") 205 | set_pieces.setPieceSubPhaseFirstTouchPlayerId = set_pieces.setPieceSubPhaseFirstTouchPlayerId.astype("Int64") 206 | set_pieces.setPieceSubPhaseSecondTouchPlayerId = set_pieces.setPieceSubPhaseSecondTouchPlayerId.astype("Int64") 207 | 208 | # start merging dfs 209 | 210 | # merge events with secondary data 211 | events = events.merge( 212 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}), 213 | left_on="squadId", 214 | right_on="squadId", 215 | how="left", 216 | suffixes=("", "_home") 217 | ).merge( 218 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "currentAttackingSquadName"}), 219 | left_on="currentAttackingSquadId", 220 | right_on="squadId", 221 | how="left", 222 | suffixes=("", "_away") 223 | ).merge( 224 | players[["id", "commonname"]].rename(columns={"id": "playerId", "commonname": "playerName"}), 225 | left_on="playerId", 226 | right_on="playerId", 227 | how="left", 228 | suffixes=("", "_right") 229 | ).merge( 230 | players[["id", "commonname"]].rename( 231 | columns={"id": "pressingPlayerId", "commonname": "pressingPlayerName"}), 232 | left_on="pressingPlayerId", 233 | right_on="pressingPlayerId", 234 | how="left", 235 | suffixes=("", "_right") 236 | ).merge( 237 | players[["id", "commonname"]].rename(columns={"id": "fouledPlayerId", "commonname": "fouledPlayerName"}), 238 | left_on="fouledPlayerId", 239 | right_on="fouledPlayerId", 240 | how="left", 241 | suffixes=("", "_right") 242 | ).merge( 243 | players[["id", "commonname"]].rename(columns={"id": "duelPlayerId", "commonname": "duelPlayerName"}), 244 | left_on="duelPlayerId", 245 | right_on="duelPlayerId", 246 | how="left", 247 | suffixes=("", "_right") 248 | ).merge( 249 | players[["id", "commonname"]].rename( 250 | columns={"id": "passReceiverPlayerId", "commonname": "passReceiverPlayerName"}), 251 | left_on="passReceiverPlayerId", 252 | right_on="passReceiverPlayerId", 253 | how="left", 254 | suffixes=("", "_right") 255 | ).merge( 256 | players[["id", "commonname"]].rename( 257 | columns={"id": "dribbleOpponentPlayerId", "commonname": "dribbleOpponentPlayerName"}), 258 | left_on="dribblePlayerId", 259 | right_on="dribbleOpponentPlayerId", 260 | how="left", 261 | suffixes=("", "_right") 262 | ).merge( 263 | matchplan, 264 | left_on="matchId", 265 | right_on="id", 266 | how="left", 267 | suffixes=("", "_right") 268 | ).merge( 269 | match_data[["id", "squadHomeCoachId", "squadAwayCoachId"]].rename( 270 | columns={"squadHomeCoachId": "homeSquadCoachId", "squadAwayCoachId": "awaySquadCoachId"}), 271 | left_on="matchId", 272 | right_on="id", 273 | how="left", 274 | suffixes=("", "_right") 275 | ).merge( 276 | iterations, 277 | left_on="iterationId", 278 | right_on="id", 279 | how="left", 280 | suffixes=("", "_right") 281 | ) 282 | 283 | if not coaches_blacklisted: 284 | 285 | # convert coachId to integer if it is None 286 | events["homeSquadCoachId"] = events["homeSquadCoachId"].astype("Int64") 287 | events["awaySquadCoachId"] = events["awaySquadCoachId"].astype("Int64") 288 | events = events.merge( 289 | coaches[["id", "name"]].rename(columns={"id": "homeCoachId", "name": "homeCoachName"}), 290 | left_on="homeSquadCoachId", 291 | right_on="homeCoachId", 292 | how="left", 293 | suffixes=("", "_right") 294 | ).merge( 295 | coaches[["id", "name"]].rename(columns={"id": "awayCoachId", "name": "awayCoachName"}), 296 | left_on="awaySquadCoachId", 297 | right_on="awayCoachId", 298 | how="left", 299 | suffixes=("", "_right") 300 | ) 301 | 302 | if include_kpis: 303 | # unnest scorings and full join with kpi list to ensure all kpis are present 304 | scorings = scorings.merge(kpis, left_on="kpiId", right_on="id", how="outer") \ 305 | .sort_values("kpiId") \ 306 | .drop("kpiId", axis=1) \ 307 | .fillna({"eventId": "", "position": "", "playerId": ""}) \ 308 | .pivot_table(index=["eventId", "position", "playerId"], columns="name", values="value", aggfunc="sum", 309 | fill_value=None) \ 310 | .reset_index() \ 311 | .loc[lambda df: df["eventId"].notna()] 312 | 313 | # Replace empty strings with None in the eventId and playerId column 314 | scorings["eventId"] = scorings["eventId"].mask(scorings["eventId"] == "", None) 315 | scorings["playerId"] = scorings["playerId"].mask(scorings["playerId"] == "", None) 316 | events["playerId"] = events["playerId"].mask(events["playerId"] == "", None) 317 | 318 | # Convert column eventId from float to int 319 | scorings["eventId"] = scorings["eventId"].astype(pd.Int64Dtype()) 320 | scorings["playerId"] = scorings["playerId"].astype(pd.Int64Dtype()) 321 | events["playerId"] = events["playerId"].astype(pd.Int64Dtype()) 322 | 323 | # merge events and scorings 324 | events = events.merge(scorings, 325 | left_on=["playerPosition", "playerId", "id"], 326 | right_on=["position", "playerId", "eventId"], 327 | how="left", 328 | suffixes=("", "_scorings")) 329 | 330 | if include_set_pieces: 331 | events = events.merge( 332 | set_pieces, 333 | left_on=["setPieceId", "setPieceSubPhaseId"], 334 | right_on=["setPieceId", "setPieceSubPhaseId"], 335 | how="left", 336 | suffixes=("", "_right") 337 | ).merge( 338 | players[["id", "commonname"]].rename( 339 | columns={ 340 | "id": "setPieceSubPhaseMainEventPlayerId", 341 | "commonname": "setPieceSubPhaseMainEventPlayerName" 342 | } 343 | ), 344 | left_on="setPieceSubPhaseMainEventPlayerId", 345 | right_on="setPieceSubPhaseMainEventPlayerId", 346 | how="left", 347 | suffixes=("", "_right") 348 | ).merge( 349 | players[["id", "commonname"]].rename( 350 | columns={ 351 | "id": "setPieceSubPhasePassReceiverId", 352 | "commonname": "setPieceSubPhasePassReceiverName" 353 | } 354 | ), 355 | left_on="setPieceSubPhasePassReceiverId", 356 | right_on="setPieceSubPhasePassReceiverId", 357 | how="left", 358 | suffixes=("", "_right") 359 | ).merge( 360 | players[["id", "commonname"]].rename( 361 | columns={ 362 | "id": "setPieceSubPhaseFirstTouchPlayerId", 363 | "commonname": "setPieceSubPhaseFirstTouchPlayerName" 364 | } 365 | ), 366 | left_on="setPieceSubPhaseFirstTouchPlayerId", 367 | right_on="setPieceSubPhaseFirstTouchPlayerId", 368 | how="left", 369 | suffixes=("", "_right") 370 | ).merge( 371 | players[["id", "commonname"]].rename( 372 | columns={ 373 | "id": "setPieceSubPhaseSecondTouchPlayerId", 374 | "commonname": "setPieceSubPhaseSecondTouchPlayerName" 375 | } 376 | ), 377 | left_on="setPieceSubPhaseSecondTouchPlayerId", 378 | right_on="setPieceSubPhaseSecondTouchPlayerId", 379 | how="left", 380 | suffixes=("", "_right") 381 | ) 382 | 383 | # rename some columns 384 | events = events.rename(columns={ 385 | "currentAttackingSquadId": "attackingSquadId", 386 | "currentAttackingSquadName": "attackingSquadName", 387 | "duelDuelType": "duelType", 388 | "scheduledDate": "dateTime", 389 | "gameTimeGameTime": "gameTime", 390 | "gameTimeGameTimeInSec": "gameTimeInSec", 391 | "eventId": "eventId_scorings", 392 | "id": "eventId", 393 | "index": "eventNumber", 394 | "phaseIndex": "setPiecePhaseIndex", 395 | "setPieceMainEvent": "setPieceSubPhaseMainEvent", 396 | }) 397 | 398 | # define desired column order 399 | event_cols = [ 400 | "matchId", 401 | "dateTime", 402 | "competitionId", 403 | "competitionName", 404 | "competitionType", 405 | "iterationId", 406 | "season", 407 | "matchDayIndex", 408 | "matchDayName", 409 | "homeSquadId", 410 | "homeSquadName", 411 | "homeSquadCountryId", 412 | "homeSquadCountryName", 413 | "homeCoachId", 414 | "homeCoachName", 415 | "homeSquadType", 416 | "awaySquadId", 417 | "awaySquadName", 418 | "awaySquadCountryId", 419 | "awaySquadCountryName", 420 | "awaySquadType", 421 | "awayCoachId", 422 | "awayCoachName", 423 | "eventId", 424 | "eventNumber", 425 | "sequenceIndex", 426 | "periodId", 427 | "gameTime", 428 | "gameTimeInSec", 429 | "duration", 430 | "squadId", 431 | "squadName", 432 | "attackingSquadId", 433 | "attackingSquadName", 434 | "phase", 435 | "playerId", 436 | "playerName", 437 | "playerPosition", 438 | "playerPositionSide", 439 | "actionType", 440 | "action", 441 | "bodyPart", 442 | "bodyPartExtended", 443 | "previousPassHeight", 444 | "result", 445 | "startCoordinatesX", 446 | "startCoordinatesY", 447 | "startAdjCoordinatesX", 448 | "startAdjCoordinatesY", 449 | "startPackingZone", 450 | "startPitchPosition", 451 | "startLane", 452 | "endCoordinatesX", 453 | "endCoordinatesY", 454 | "endAdjCoordinatesX", 455 | "endAdjCoordinatesY", 456 | "endPackingZone", 457 | "endPitchPosition", 458 | "endLane", 459 | "opponents", 460 | "pressure", 461 | "distanceToGoal", 462 | "pxTTeam", 463 | "pxTOpponent", 464 | "pressingPlayerId", 465 | "pressingPlayerName", 466 | "distanceToOpponent", 467 | "opponentCoordinatesX", 468 | "opponentCoordinatesY", 469 | "opponentAdjCoordinatesX", 470 | "opponentAdjCoordinatesY", 471 | "passReceiverType", 472 | "passReceiverPlayerId", 473 | "passReceiverPlayerName", 474 | "passDistance", 475 | "passAngle", 476 | "dribbleDistance", 477 | "dribbleType", 478 | "dribbleResult", 479 | "dribbleOpponentPlayerId", 480 | "dribbleOpponentPlayerName", 481 | "shotDistance", 482 | "shotAngle", 483 | "shotTargetPointY", 484 | "shotTargetPointZ", 485 | "shotWoodwork", 486 | "shotGkCoordinatesX", 487 | "shotGkCoordinatesY", 488 | "shotGkAdjCoordinatesX", 489 | "shotGkAdjCoordinatesY", 490 | "shotGkDivePointY", 491 | "shotGkDivePointZ", 492 | "duelType", 493 | "duelPlayerId", 494 | "duelPlayerName", 495 | "fouledPlayerId", 496 | "fouledPlayerName", 497 | "formationTeam", 498 | "formationOpponent", 499 | "inferredSetPiece", 500 | ] 501 | 502 | set_piece_cols = [ 503 | "setPieceId", 504 | "setPiecePhaseIndex", 505 | "setPieceCategory", 506 | "adjSetPieceCategory", 507 | "setPieceExecutionType", 508 | "setPieceSubPhaseId", 509 | "setPieceSubPhaseIndex", 510 | "setPieceSubPhaseStartZone", 511 | "setPieceSubPhaseCornerEndZone", 512 | "setPieceSubPhaseCornerType", 513 | "setPieceSubPhaseFreeKickEndZone", 514 | "setPieceSubPhaseFreeKickType", 515 | "setPieceSubPhaseMainEvent", 516 | "setPieceSubPhaseMainEventPlayerId", 517 | "setPieceSubPhaseMainEventPlayerName", 518 | "setPieceSubPhaseMainEventOutcome", 519 | "setPieceSubPhasePassReceiverId", 520 | "setPieceSubPhasePassReceiverName", 521 | "setPieceSubPhaseFirstTouchPlayerId", 522 | "setPieceSubPhaseFirstTouchPlayerName", 523 | "setPieceSubPhaseFirstTouchWon", 524 | "setPieceSubPhaseIndirectHeader", 525 | "setPieceSubPhaseSecondTouchPlayerId", 526 | "setPieceSubPhaseSecondTouchPlayerName", 527 | "setPieceSubPhaseSecondTouchWon", 528 | ] 529 | 530 | # add columns that might not exist in previous data versions 531 | for col in event_cols: 532 | if col not in events.columns: 533 | events[col] = np.nan 534 | 535 | # create order 536 | order = event_cols 537 | 538 | if include_set_pieces: 539 | # add kpis 540 | order = order + set_piece_cols 541 | 542 | if include_kpis: 543 | # get list of kpi columns 544 | kpi_cols = kpis["name"].tolist() 545 | 546 | # add kpis 547 | order = order + kpi_cols 548 | 549 | if coaches_blacklisted: 550 | order = [col for col in order if col not in ["homeCoachId", "homeCoachName", "awayCoachId", "awayCoachName"]] 551 | 552 | # reorder data 553 | events = events[order] 554 | 555 | # reorder rows 556 | events = events.sort_values(["matchId", "eventNumber"]) 557 | 558 | # return events 559 | return events -------------------------------------------------------------------------------- /impectPy/helpers.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import numpy as np 3 | import requests 4 | import time 5 | import pandas as pd 6 | import re 7 | from typing import Optional, Dict, Any 8 | import math 9 | 10 | 11 | ###### 12 | # 13 | # This class creates an object to handle rate-limited API requests 14 | # 15 | ###### 16 | 17 | 18 | class ForbiddenError(Exception): 19 | """Raised when the API returns a 403 Forbidden response.""" 20 | pass 21 | 22 | 23 | class RateLimitedAPI: 24 | def __init__(self, session: Optional[requests.Session] = None): 25 | """ 26 | Initializes a RateLimitedAPI object. 27 | 28 | Args: 29 | session (requests.Session): The session object to use for the API calls. 30 | """ 31 | self.session = session or requests.Session() # use the provided session or create a new session 32 | self.bucket = None # TokenBucket object to manage rate limit tokens 33 | 34 | # make a rate-limited API request 35 | def make_api_request_limited( 36 | self, url: str, method: str, data: Optional[Dict[str, str]] = None 37 | ) -> requests.Response: 38 | """ 39 | Executes an API call while applying the rate limit. 40 | 41 | Returns: 42 | requests.Response: The response returned by the API. 43 | """ 44 | 45 | # check if bucket is not initialized 46 | if not self.bucket: 47 | # make an initial API call to get rate limit information 48 | response = self.make_api_request(url=url, method=method, data=data) 49 | 50 | # get rate limit policy 51 | policy = response.headers["RateLimit-Policy"] 52 | 53 | # extract maximum requests using regex 54 | capacity = int(re.sub(";.*", "", policy)) 55 | 56 | # extract time window using regex 57 | interval = int(re.sub(".*w=(\\d+).*", "\\1", policy)) 58 | 59 | # create TokenBucket 60 | self.bucket = TokenBucket( 61 | capacity=capacity, 62 | refill_after=interval, 63 | remaining=int(response.headers["RateLimit-Remaining"]) 64 | ) 65 | 66 | # return response 67 | return response 68 | 69 | # check if a token is available 70 | if self.bucket.isTokenAvailable(): 71 | # get API response 72 | response = self.make_api_request(url=url, method=method, data=data) 73 | 74 | # consume a token 75 | self.bucket.consumeToken() 76 | else: 77 | # wait for refill 78 | time.sleep( 79 | math.ceil( 80 | self.bucket.refill_after * 100 - ( 81 | time.time() - self.bucket.last_refill_time 82 | ) * 100 83 | ) / 100 84 | ) 85 | 86 | # call function again 87 | response = self.make_api_request_limited(url=url, method=method, data=data) 88 | 89 | # return response 90 | return response 91 | 92 | def make_api_request( 93 | self, url: str, method: str, data: Optional[Dict[str, Any]] = None, 94 | max_retries: int = 3, retry_delay: int = 1 95 | ) -> requests.Response: 96 | """ 97 | Executes an API call. 98 | 99 | Returns: 100 | requests.Response: The response returned by the API. 101 | """ 102 | # try API call 103 | for i in range(max_retries): 104 | response = self.session.request(method=method, url=url, data=data) 105 | 106 | # check status code and return if 200 107 | if response.status_code == 200: 108 | # return response 109 | return response 110 | # check status code and retry if 429 111 | elif response.status_code == 429: 112 | print(f"Received status code {response.status_code} " 113 | f"({response.json().get('message', 'Rate Limit Exceeded')})" 114 | f", retrying in {retry_delay} seconds...") 115 | time.sleep(retry_delay) 116 | # check status code and terminate if 401 or 403 117 | elif response.status_code == 401: 118 | raise Exception(f"Received status code {response.status_code} " 119 | f"(You do not have API access.)\n" 120 | f"Request-ID: {response.headers['x-request-id']} " 121 | f"(Make sure to include this in any support request.)") 122 | elif response.status_code == 403: 123 | raise ForbiddenError(f"Received status code {response.status_code} " 124 | f"(You do not have access to this resource.)\n" 125 | f"Request-ID: {response.headers['x-request-id']} " 126 | f"(Make sure to include this in any support request.)") 127 | # check status code and terminate if other error 128 | else: 129 | raise Exception(f"Received status code {response.status_code} " 130 | f"({response.json().get('message', 'Unknown error')})\n" 131 | f"Request-ID: {response.headers['x-request-id']} " 132 | f"(Make sure to include this in any support request.)") 133 | 134 | 135 | ###### 136 | # 137 | # This class creates a token bucket that handles the rate limit returned by the API accordingly 138 | # 139 | ###### 140 | 141 | 142 | class TokenBucket: 143 | def __init__(self, capacity: int, refill_after: int = 1, remaining: int = 0): 144 | """ 145 | Initializes a TokenBucket object. 146 | 147 | Args: 148 | capacity (int): The maximum number of tokens the bucket can hold. 149 | refill_after (int): The time period (in seconds) after which the bucket is refilled. 150 | remaining (int): The amount of tokens remaining at the moment of initialization. 151 | """ 152 | self.capacity = capacity # maximum number of tokens the bucket can hold 153 | self.refill_after = refill_after # time period (in seconds) after which the bucket is refilled 154 | self.tokens = remaining # number of tokens remaining at time of bucket creation 155 | self.last_refill_time = time.time() # time of the last token refill 156 | 157 | def addTokens(self): 158 | """ 159 | Refills the token bucket if the refill time has elapsed. 160 | """ 161 | now = time.time() # current time 162 | elapsed_time = now - self.last_refill_time # time elapsed since the last token refill 163 | if elapsed_time > self.refill_after: 164 | self.tokens = self.capacity # refill the bucket to its maximum capacity 165 | self.last_refill_time = now # update the last refill time to the current time 166 | 167 | def isTokenAvailable(self): 168 | """ 169 | Checks if at least one token is available in the bucket. 170 | 171 | Returns: 172 | bool: True if a token is available, False otherwise. 173 | """ 174 | self.addTokens() # ensure the token bucket is up-to-date 175 | return self.tokens >= 1 # return True if there is at least one token, False otherwise 176 | 177 | def consumeToken(self): 178 | """ 179 | Consumes a token from the bucket if available. 180 | 181 | Returns: 182 | bool: True if a token was consumed successfully, False otherwise. 183 | """ 184 | if not self.isTokenAvailable(): # if no token is available, return False 185 | return False 186 | self.tokens -= 1 # decrement the token count by 1 187 | return True # return True to indicate successful token consumption 188 | 189 | 190 | ###### 191 | # 192 | # This function converts the response from an API call to a pandas dataframe, flattens it and fixes the column names 193 | # 194 | ###### 195 | 196 | 197 | def process_response(self: requests.Response, endpoint: str, raise_exception: bool = True) -> pd.DataFrame: 198 | # validate and get data from response 199 | result = validate_response(response=self, endpoint=endpoint, raise_exception=raise_exception) 200 | 201 | # convert to df 202 | result = pd.json_normalize(result) 203 | 204 | # fix column names using regex 205 | result = result.rename(columns=lambda x: re.sub(r"\.(.)", lambda y: y.group(1).upper(), x)) 206 | 207 | # return result 208 | return result 209 | 210 | 211 | # attach method to requests module 212 | requests.Response.process_response = process_response 213 | 214 | 215 | ###### 216 | # 217 | # This function unnests the idMappings key from an API response 218 | # 219 | ###### 220 | 221 | 222 | def unnest_mappings_dict(mapping_dict: dict) -> dict: 223 | # iterate over entry and unnest idMappings 224 | for entry in mapping_dict: 225 | # iterate over mappings 226 | for mapping in entry["idMappings"]: 227 | # get mapping data 228 | for provider, mapping_id in mapping.items(): 229 | # add mapping as key on iteration level 230 | entry[provider + "Id"] = mapping_id 231 | 232 | # return result 233 | return mapping_dict 234 | 235 | 236 | ###### 237 | # 238 | # This function unnests the idMappings key from a dataframe 239 | # 240 | ###### 241 | 242 | 243 | def unnest_mappings_df(df: pd.DataFrame, mapping_col: str) -> pd.DataFrame: 244 | # create empty df to store mappings 245 | df_mappings = pd.DataFrame(columns=["wyscoutId", "heimSpielId", "skillCornerId"]) 246 | 247 | # iterate over entry and unnest idMappings 248 | for index, entry in df.iterrows(): 249 | # iterate over mappings 250 | for mapping in entry[mapping_col]: 251 | # get mapping data 252 | for provider, mapping_ids in mapping.items(): 253 | # fix provider name 254 | if provider == "heim_spiel": 255 | provider = "heimSpiel" 256 | elif provider == "skill_corner": 257 | provider = "skillCorner" 258 | elif provider == "wyscout": 259 | pass 260 | else: 261 | raise Exception(f"Unknown provider: {provider}") 262 | 263 | # check if mapping is a dict with at least one entry 264 | if isinstance(mapping_ids, list): 265 | if len(mapping_ids) > 0: 266 | # add first mapping as key on iteration level 267 | df_mappings.loc[index, provider + "Id"] = mapping_ids[0] 268 | else: 269 | df_mappings.loc[index, provider + "Id"] = np.nan 270 | 271 | # merge with original df 272 | df = pd.concat([df, df_mappings], axis=1, ignore_index=False) 273 | 274 | # return result 275 | return df 276 | 277 | 278 | # define function to validate JSON response and return data 279 | def validate_response(response: requests.Response, endpoint: str, raise_exception: bool = True) -> dict: 280 | # get data from response 281 | data = response.json()["data"] 282 | 283 | # check if response contains data 284 | if len(data) == 0 and raise_exception: 285 | # raise exception 286 | raise Exception(f"The {endpoint} endpoint returned no data/ an empty list.") 287 | else: 288 | # return data 289 | return data -------------------------------------------------------------------------------- /impectPy/impect.py: -------------------------------------------------------------------------------- 1 | from impectPy.config import Config 2 | from .helpers import RateLimitedAPI 3 | from .access_token import getAccessTokenFromUrl 4 | from .iterations import getIterationsFromHost 5 | from .matches import getMatchesFromHost 6 | from .events import getEventsFromHost 7 | from .matchsums import getPlayerMatchsumsFromHost, getSquadMatchsumsFromHost 8 | from .iteration_averages import getPlayerIterationAveragesFromHost, getSquadIterationAveragesFromHost 9 | from .player_scores import getPlayerMatchScoresFromHost, getPlayerIterationScoresFromHost 10 | from .squad_scores import getSquadMatchScoresFromHost, getSquadIterationScoresFromHost 11 | from .player_profile_scores import getPlayerProfileScoresFromHost 12 | from .xml import generateXML 13 | from .set_pieces import getSetPiecesFromHost 14 | from .squad_ratings import getSquadRatingsFromHost 15 | from .squad_coefficients import getSquadCoefficientsFromHost 16 | from .match_info import getFormationsFromHost, getSubstitutionsFromHost, getStartingPositionsFromHost 17 | import pandas as pd 18 | from xml.etree import ElementTree as ET 19 | 20 | 21 | class Impect: 22 | def __init__(self, config: Config = Config(), connection: RateLimitedAPI = RateLimitedAPI()): 23 | self.__config = config 24 | self.connection = connection 25 | 26 | # login with username and password 27 | def login(self, username: str, password: str) -> str: 28 | self.__token = getAccessTokenFromUrl(username, password, self.connection, self.__config.OIDC_TOKEN_ENDPOINT) 29 | self.connection.session.headers.update({"Authorization": f"Bearer {self.__token}"}) 30 | return self.__token 31 | 32 | # use the given token for all calls of the instance 33 | def init(self, token: str): 34 | self.__token = token 35 | self.connection.session.headers.update({"Authorization": f"Bearer {self.__token}"}) 36 | 37 | def getIterations(self) -> pd.DataFrame: 38 | return getIterationsFromHost( 39 | self.connection, self.__config.HOST 40 | ) 41 | 42 | def getMatches(self, iteration: int) -> pd.DataFrame: 43 | return getMatchesFromHost( 44 | iteration, self.connection, self.__config.HOST 45 | ) 46 | 47 | def getEvents(self, matches: list, include_kpis: bool = True, include_set_pieces: bool = True) -> pd.DataFrame: 48 | return getEventsFromHost( 49 | matches, include_kpis, include_set_pieces, self.connection, self.__config.HOST 50 | ) 51 | 52 | def getPlayerMatchsums(self, matches: list) -> pd.DataFrame: 53 | return getPlayerMatchsumsFromHost( 54 | matches, self.connection, self.__config.HOST 55 | ) 56 | 57 | def getSquadMatchsums(self, matches: list, ) -> pd.DataFrame: 58 | return getSquadMatchsumsFromHost( 59 | matches, self.connection, self.__config.HOST 60 | ) 61 | 62 | def getPlayerIterationAverages(self, iteration: int) -> pd.DataFrame: 63 | return getPlayerIterationAveragesFromHost( 64 | iteration, self.connection, self.__config.HOST 65 | ) 66 | 67 | def getSquadIterationAverages(self, iteration: int) -> pd.DataFrame: 68 | return getSquadIterationAveragesFromHost( 69 | iteration, self.connection, self.__config.HOST 70 | ) 71 | 72 | def getPlayerMatchScores(self, matches: list, positions: list = None) -> pd.DataFrame: 73 | return getPlayerMatchScoresFromHost( 74 | matches, self.connection, self.__config.HOST, positions 75 | ) 76 | 77 | def getPlayerIterationScores(self, iteration: int, positions: list = None) -> pd.DataFrame: 78 | return getPlayerIterationScoresFromHost( 79 | iteration, self.connection, self.__config.HOST, positions 80 | ) 81 | 82 | def getSquadMatchScores(self, matches: list) -> pd.DataFrame: 83 | return getSquadMatchScoresFromHost( 84 | matches, self.connection, self.__config.HOST 85 | ) 86 | 87 | def getSquadIterationScores(self, iteration: int) -> pd.DataFrame: 88 | return getSquadIterationScoresFromHost( 89 | iteration, self.connection, self.__config.HOST 90 | ) 91 | 92 | def getPlayerProfileScores(self, iteration: int, positions: list) -> pd.DataFrame: 93 | return getPlayerProfileScoresFromHost( 94 | iteration, positions, self.connection, self.__config.HOST 95 | ) 96 | 97 | def getSetPieces(self, matches: list) -> pd.DataFrame: 98 | return getSetPiecesFromHost( 99 | matches, self.connection, self.__config.HOST 100 | ) 101 | 102 | def getSquadRatings(self, iteration: int) -> pd.DataFrame: 103 | return getSquadRatingsFromHost( 104 | iteration, self.connection, self.__config.HOST 105 | ) 106 | 107 | def getSquadCoefficients(self, iteration: int) -> pd.DataFrame: 108 | return getSquadCoefficientsFromHost( 109 | iteration, self.connection, self.__config.HOST 110 | ) 111 | 112 | def getFormations(self, matches: list) -> pd.DataFrame: 113 | return getFormationsFromHost( 114 | matches, self.connection, self.__config.HOST 115 | ) 116 | 117 | def getSubstitutions(self, matches: list) -> pd.DataFrame: 118 | return getSubstitutionsFromHost( 119 | matches, self.connection, self.__config.HOST 120 | ) 121 | 122 | def getStartingPositions(self, matches: list) -> pd.DataFrame: 123 | return getStartingPositionsFromHost( 124 | matches, self.connection, self.__config.HOST 125 | ) 126 | 127 | @staticmethod 128 | def generateXML( 129 | events: pd.DataFrame, 130 | lead: int, 131 | lag: int, 132 | p1Start: int, 133 | p2Start: int, 134 | p3Start: int, 135 | p4Start: int, 136 | p5Start: int 137 | ) -> ET.ElementTree: 138 | return generateXML(events, lead, lag, p1Start, p2Start, p3Start, p4Start, p5Start) -------------------------------------------------------------------------------- /impectPy/iteration_averages.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df 5 | from .iterations import getIterationsFromHost 6 | 7 | ###### 8 | # 9 | # This function returns a pandas dataframe that contains all kpis for a 10 | # given iteration aggregated per player and position 11 | # 12 | ###### 13 | 14 | 15 | def getPlayerIterationAverages( 16 | iteration: int, token: str, session: requests.Session = requests.Session() 17 | ) -> pd.DataFrame: 18 | 19 | # create an instance of RateLimitedAPI 20 | connection = RateLimitedAPI(session) 21 | 22 | # construct header with access token 23 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 24 | 25 | return getPlayerIterationAveragesFromHost(iteration, connection, "https://api.impect.com") 26 | 27 | def getPlayerIterationAveragesFromHost( 28 | iteration: int, connection: RateLimitedAPI, host: str 29 | ) -> pd.DataFrame: 30 | 31 | # check input for matches argument 32 | if not isinstance(iteration, int): 33 | raise Exception("Input vor iteration argument must be an integer") 34 | 35 | # get squads 36 | squads = connection.make_api_request_limited( 37 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 38 | method="GET" 39 | ).process_response( 40 | endpoint="Squads" 41 | ) 42 | 43 | # get squadIds 44 | squad_ids = squads[squads.access].id.to_list() 45 | 46 | # get players 47 | players = connection.make_api_request_limited( 48 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 49 | method="GET" 50 | ).process_response( 51 | endpoint="Players" 52 | )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]] 53 | 54 | # only keep first country id for each player 55 | country_series = players["countryIds"].explode().groupby(level=0).first() 56 | players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64") 57 | players = players.rename(columns={"countryIds": "countryId"}) 58 | 59 | # unnest mappings 60 | players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 61 | 62 | # get kpis 63 | kpis = connection.make_api_request_limited( 64 | url=f"{host}/v5/customerapi/kpis", 65 | method="GET" 66 | ).process_response( 67 | endpoint="KPIs" 68 | )[["id", "name"]] 69 | 70 | # get iterations 71 | iterations = getIterationsFromHost(connection=connection, host=host) 72 | 73 | # get country data 74 | countries = connection.make_api_request_limited( 75 | url=f"{host}/v5/customerapi/countries", 76 | method="GET" 77 | ).process_response( 78 | endpoint="KPIs" 79 | ) 80 | 81 | # create empty df to store averages 82 | averages = pd.DataFrame() 83 | 84 | # iterate over squads 85 | for squad_id in squad_ids: 86 | 87 | # get player iteration averages per squad 88 | averages_raw = connection.make_api_request_limited( 89 | url=f"{host}/v5/customerapi/iterations/{iteration}/" 90 | f"squads/{squad_id}/player-kpis", 91 | method="GET" 92 | ).process_response( 93 | endpoint="PlayerAverages" 94 | ).assign( 95 | iterationId=iteration, 96 | squadId=squad_id 97 | ) 98 | 99 | # unnest scorings 100 | averages_raw = averages_raw.explode("kpis").reset_index(drop=True) 101 | 102 | # unnest dictionary in kpis column 103 | averages_raw = pd.concat( 104 | [averages_raw.drop(["kpis"], axis=1), pd.json_normalize(averages_raw["kpis"])], 105 | axis=1 106 | ) 107 | 108 | # merge with kpis to ensure all kpis are present 109 | averages_raw = averages_raw.merge( 110 | kpis, 111 | left_on="kpiId", 112 | right_on="id", 113 | how="outer", 114 | suffixes=("", "_right") 115 | ) 116 | 117 | # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost 118 | if len(averages_raw["name"][averages_raw["name"].isnull()]) > 0: 119 | averages_raw["name"] = averages_raw["name"].fillna("-1") 120 | 121 | # get KPIs without a scoring 122 | mask = ( 123 | averages_raw.iterationId.isnull() 124 | & averages_raw.squadId.isnull() 125 | & averages_raw.playerId.isnull() 126 | & averages_raw.position.isnull() 127 | ) 128 | 129 | # fill join cols with placeholder 130 | averages_raw.loc[mask] = averages_raw.loc[mask].fillna(-1) 131 | 132 | # get matchShares 133 | match_shares_raw = averages_raw[ 134 | ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"]].drop_duplicates() 135 | 136 | # pivot kpi values 137 | averages_raw = pd.pivot_table( 138 | averages_raw, 139 | values="value", 140 | index=["iterationId", "squadId", "playerId", "position"], 141 | columns="name", 142 | aggfunc="sum", 143 | fill_value=0, 144 | dropna=False, 145 | observed=True, 146 | ).reset_index() 147 | 148 | # drop "-1" column 149 | if "-1" in averages_raw.columns: 150 | averages_raw.drop(["-1"], inplace=True, axis=1) 151 | 152 | # drop -1 rows 153 | averages_raw = averages_raw[ 154 | ~(averages_raw.iterationId == -1) 155 | & ~(averages_raw.squadId == -1) 156 | & ~(averages_raw.playerId == -1) 157 | & ~(averages_raw.position == -1) 158 | ] 159 | 160 | # merge with playDuration and matchShare 161 | averages_raw = averages_raw.merge( 162 | match_shares_raw, 163 | left_on=["iterationId", "squadId", "playerId", "position"], 164 | right_on=["iterationId", "squadId", "playerId", "position"], 165 | how="inner", 166 | suffixes=("", "_right") 167 | ) 168 | 169 | averages = pd.concat([averages, averages_raw], axis=0) 170 | 171 | # merge with other data 172 | averages = averages.merge( 173 | iterations[["id", "competitionName", "season"]], 174 | left_on="iterationId", 175 | right_on="id", 176 | how="left", 177 | suffixes=("", "_right") 178 | ).merge( 179 | squads[["id", "name"]].rename( 180 | columns={"id": "squadId", "name": "squadName"} 181 | ), 182 | left_on="squadId", 183 | right_on="squadId", 184 | how="left", 185 | suffixes=("", "_right") 186 | ).merge( 187 | players[[ 188 | "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname", 189 | "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg" 190 | ]].rename( 191 | columns={"commonname": "playerName"} 192 | ), 193 | left_on="playerId", 194 | right_on="id", 195 | how="left", 196 | suffixes=("", "_right") 197 | ).merge( 198 | countries.rename(columns={"fifaName": "playerCountry"}), 199 | left_on="countryId", 200 | right_on="id", 201 | how="left", 202 | suffixes=("", "_right") 203 | ) 204 | 205 | # remove NA rows 206 | averages = averages[averages.iterationId.notnull()] 207 | 208 | # fix column types 209 | averages["iterationId"] = averages["iterationId"].astype("Int64") 210 | averages["squadId"] = averages["squadId"].astype("Int64") 211 | averages["playerId"] = averages["playerId"].astype("Int64") 212 | averages["wyscoutId"] = averages["wyscoutId"].astype("Int64") 213 | averages["heimSpielId"] = averages["heimSpielId"].astype("Int64") 214 | averages["skillCornerId"] = averages["skillCornerId"].astype("Int64") 215 | 216 | # define column order 217 | order = [ 218 | "iterationId", 219 | "competitionName", 220 | "season", 221 | "squadId", 222 | "squadName", 223 | "playerId", 224 | "wyscoutId", 225 | "heimSpielId", 226 | "skillCornerId", 227 | "playerName", 228 | "firstname", 229 | "lastname", 230 | "birthdate", 231 | "birthplace", 232 | "playerCountry", 233 | "leg", 234 | "position", 235 | "matchShare", 236 | "playDuration" 237 | ] 238 | 239 | # add kpiNames to order 240 | order = order + kpis.name.to_list() 241 | 242 | # select columns 243 | averages = averages[order] 244 | 245 | # return result 246 | return averages 247 | 248 | 249 | ###### 250 | # 251 | # This function returns a pandas dataframe that contains all kpis for a 252 | # given iteration aggregated per squad 253 | # 254 | ###### 255 | def getSquadIterationAverages( 256 | iteration: int, token: str, session: requests.Session = requests.Session() 257 | ) -> pd.DataFrame: 258 | 259 | # create an instance of RateLimitedAPI 260 | connection = RateLimitedAPI(session) 261 | 262 | # construct header with access token 263 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 264 | 265 | return getSquadIterationAveragesFromHost(iteration, connection, "https://api.impect.com") 266 | 267 | def getSquadIterationAveragesFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 268 | 269 | # check input for matches argument 270 | if not isinstance(iteration, int): 271 | raise Exception("Input vor iteration argument must be an integer") 272 | 273 | # get squads 274 | squads = connection.make_api_request_limited( 275 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 276 | method="GET" 277 | ).process_response( 278 | endpoint="Squads" 279 | )[["id", "name", "idMappings"]] 280 | 281 | # unnest mappings 282 | squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 283 | 284 | # get squad iteration averages 285 | averages_raw = connection.make_api_request_limited( 286 | url=f"{host}/v5/customerapi/iterations/{iteration}/squad-kpis", 287 | method="GET" 288 | ).process_response( 289 | endpoint="SquadAverages" 290 | ).assign(iterationId=iteration) 291 | 292 | # get kpis 293 | kpis = connection.make_api_request_limited( 294 | url=f"{host}/v5/customerapi/kpis", 295 | method="GET" 296 | ).process_response( 297 | endpoint="KPIs" 298 | )[["id", "name"]] 299 | 300 | # get iterations 301 | iterations = getIterationsFromHost(connection=connection, host=host) 302 | 303 | # get matches played 304 | matches = averages_raw[["squadId", "matches"]].drop_duplicates() 305 | 306 | # unnest scorings 307 | averages = averages_raw.explode("kpis").reset_index(drop=True) 308 | 309 | # unnest dictionary in kpis column 310 | averages = pd.concat( 311 | [averages.drop(["kpis"], axis=1), pd.json_normalize(averages["kpis"])], 312 | axis=1 313 | ) 314 | 315 | # merge with kpis to ensure all kpis are present 316 | averages = averages.merge( 317 | kpis, 318 | left_on="kpiId", 319 | right_on="id", 320 | how="outer", 321 | suffixes=("", "_right") 322 | ) 323 | 324 | # pivot kpi values 325 | averages = pd.pivot_table( 326 | averages, 327 | values="value", 328 | index=["iterationId", "squadId"], 329 | columns="name", 330 | aggfunc="sum", 331 | fill_value=0, 332 | dropna=False 333 | ).reset_index() 334 | 335 | # inner join with matches played 336 | averages = pd.merge( 337 | averages, 338 | matches, 339 | left_on="squadId", 340 | right_on="squadId", 341 | how="inner", 342 | suffixes=("", "_right") 343 | ) 344 | 345 | # merge with other data 346 | averages = averages.merge( 347 | iterations[["id", "competitionName", "season"]], 348 | left_on="iterationId", 349 | right_on="id", 350 | how="left", 351 | suffixes=("", "_right") 352 | ).merge( 353 | squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( 354 | columns={"id": "squadId", "name": "squadName"} 355 | ), 356 | left_on="squadId", 357 | right_on="squadId", 358 | how="left", 359 | suffixes=("", "_right") 360 | ) 361 | 362 | # remove NA rows 363 | averages = averages[averages.iterationId.notnull()] 364 | 365 | # fix column types 366 | averages["squadId"] = averages["squadId"].astype("Int64") 367 | averages["matches"] = averages["matches"].astype("Int64") 368 | averages["iterationId"] = averages["iterationId"].astype("Int64") 369 | averages["wyscoutId"] = averages["wyscoutId"].astype("Int64") 370 | averages["heimSpielId"] = averages["heimSpielId"].astype("Int64") 371 | averages["skillCornerId"] = averages["skillCornerId"].astype("Int64") 372 | 373 | # define column order 374 | order = [ 375 | "iterationId", 376 | "competitionName", 377 | "season", 378 | "squadId", 379 | "wyscoutId", 380 | "heimSpielId", 381 | "skillCornerId", 382 | "squadName", 383 | "matches" 384 | ] 385 | 386 | # add kpiNames to order 387 | order = order + kpis.name.to_list() 388 | 389 | # select columns 390 | averages = averages[order] 391 | 392 | # return result 393 | return averages -------------------------------------------------------------------------------- /impectPy/iterations.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import re 4 | import requests 5 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_dict, validate_response 6 | 7 | ###### 8 | # 9 | # This function returns a dataframe containing all competitionIterations available to the user 10 | # 11 | ###### 12 | 13 | 14 | def getIterations(token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 15 | 16 | # create an instance of RateLimitedAPI 17 | connection = RateLimitedAPI(session) 18 | 19 | # construct header with access token 20 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 21 | 22 | return getIterationsFromHost(connection, "https://api.impect.com") 23 | 24 | # define function 25 | def getIterationsFromHost(connection: RateLimitedAPI, host: str) -> pd.DataFrame: 26 | 27 | # request competition iteration information from API 28 | response = connection.make_api_request_limited( 29 | f"{host}/v5/customerapi/iterations/", 30 | method="GET" 31 | ) 32 | 33 | # get data from response 34 | data = validate_response(response, "Iterations") 35 | 36 | # unnest nested IdMapping column 37 | data = unnest_mappings_dict(data) 38 | 39 | # convert to pandas dataframe 40 | df = pd.json_normalize(data) 41 | 42 | # drop idMappings column 43 | df = df.drop("idMappings", axis = 1) 44 | 45 | # fix column names using regex 46 | df = df.rename(columns=lambda x: re.sub("[\._](.)", lambda y: y.group(1).upper(), x)) 47 | 48 | # keep first entry for skillcorner, heimspiel and wyscout data 49 | df.skillCornerId = df.skillCornerId.apply(lambda x: x[0] if x else None) 50 | df.heimSpielId = df.heimSpielId.apply(lambda x: x[0] if x else None) 51 | df.wyscoutId = df.wyscoutId.apply(lambda x: x[0] if x else None) 52 | 53 | # get country data 54 | countries = connection.make_api_request_limited( 55 | url=f"{host}/v5/customerapi/countries", 56 | method="GET" 57 | ).process_response( 58 | endpoint="KPIs" 59 | ) 60 | 61 | df = df.merge( 62 | countries[["id", "fifaName"]].rename( 63 | columns={"id": "competitionCountryId", "fifaName": "competitionCountryName"} 64 | ), 65 | how="left", 66 | on="competitionCountryId" 67 | ) 68 | 69 | # sort iterations 70 | df = df.sort_values(by="id") 71 | 72 | # define column order 73 | order = [ 74 | "id", 75 | "competitionId", 76 | "competitionName", 77 | "season", 78 | "competitionType", 79 | "competitionCountryId", 80 | "competitionCountryName", 81 | "competitionGender", 82 | "dataVersion", 83 | "lastChangeTimestamp", 84 | "wyscoutId", 85 | "heimSpielId", 86 | "skillCornerId", 87 | ] 88 | 89 | # select columns 90 | df = df[order] 91 | 92 | # return dataframe 93 | return df -------------------------------------------------------------------------------- /impectPy/match_info.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import numpy as np 3 | import pandas as pd 4 | import requests 5 | from impectPy.helpers import RateLimitedAPI 6 | from .matches import getMatchesFromHost 7 | from .iterations import getIterationsFromHost 8 | import re 9 | 10 | 11 | ###### 12 | # 13 | # This function returns a pandas dataframe that contains all events for a 14 | # given match 15 | # 16 | ###### 17 | 18 | 19 | def getFormations(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 20 | # create an instance of RateLimitedAPI 21 | connection = RateLimitedAPI(session) 22 | 23 | # construct header with access token 24 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 25 | 26 | return getFormationsFromHost(matches, connection, "https://api.impect.com") 27 | 28 | 29 | # define function 30 | def getFormationsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 31 | # check input for matches argument 32 | if not isinstance(matches, list): 33 | raise Exception("Argument 'matches' must be a list of integers.") 34 | 35 | # get match info 36 | matches = pd.concat( 37 | map(lambda match: connection.make_api_request_limited( 38 | url=f"{host}/v5/customerapi/matches/{match}", 39 | method="GET" 40 | ).process_response( 41 | endpoint="Iterations" 42 | ), 43 | matches), 44 | ignore_index=True) 45 | 46 | # filter for matches that are unavailable 47 | fail_matches = matches[matches.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 48 | 49 | # drop matches that are unavailable from list of matches 50 | matches = matches[~matches.id.isin(fail_matches)] 51 | 52 | # raise exception if no matches remaining or report removed matches 53 | if len(fail_matches) > 0: 54 | if len(matches) == 0: 55 | raise Exception("All supplied matches are unavailable. Execution stopped.") 56 | else: 57 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 58 | 59 | # extract iterationIds 60 | iterations = list(matches[matches.lastCalculationDate.notnull()].iterationId.unique()) 61 | 62 | # get squads 63 | squads = pd.concat( 64 | map(lambda iteration: connection.make_api_request_limited( 65 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 66 | method="GET" 67 | ).process_response( 68 | endpoint="Squads" 69 | ), 70 | iterations), 71 | ignore_index=True)[["id", "name"]].drop_duplicates() 72 | 73 | # get matches 74 | matchplan = pd.concat( 75 | map(lambda iteration: getMatchesFromHost( 76 | iteration=iteration, 77 | connection=connection, 78 | host=host 79 | ), 80 | iterations), 81 | ignore_index=True) 82 | 83 | # get iterations 84 | iterations = getIterationsFromHost(connection=connection, host=host) 85 | 86 | # extract formations 87 | formations_home = matches[["id", "squadHomeId", "squadHomeFormations"]].rename( 88 | columns={"squadHomeFormations": "squadFormations", "squadHomeId": "squadId"} 89 | ) 90 | formations_away = matches[["id", "squadAwayId", "squadAwayFormations"]].rename( 91 | columns={"squadAwayFormations": "squadFormations", "squadAwayId": "squadId"} 92 | ) 93 | 94 | # concat dfs 95 | formations = pd.concat([formations_home, formations_away], axis=0).reset_index(drop=True) 96 | 97 | # unnest formations column 98 | formations = formations.explode("squadFormations").reset_index(drop=True) 99 | 100 | # normalize the JSON structure into separate columns 101 | formations = formations.join(pd.json_normalize(formations["squadFormations"])) 102 | 103 | # drop the original column 104 | formations.drop(columns=["squadFormations"], inplace=True) 105 | 106 | # start merging dfs 107 | 108 | # merge formations with squads 109 | formations = formations.merge( 110 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}), 111 | left_on="squadId", 112 | right_on="squadId", 113 | how="left", 114 | suffixes=("", "_home") 115 | ) 116 | 117 | # merge with matches info 118 | formations = formations.merge( 119 | matchplan[[ 120 | "id", "skillCornerId", "heimSpielId", "wyscoutId", "matchDayIndex", 121 | "matchDayName", "scheduledDate", "lastCalculationDate", "iterationId" 122 | ]], 123 | left_on="id", 124 | right_on="id", 125 | how="left", 126 | suffixes=("", "_right") 127 | ) 128 | 129 | # merge with competition info 130 | formations = formations.merge( 131 | iterations[["id", "competitionName", "competitionId", "competitionType", "season"]], 132 | left_on="iterationId", 133 | right_on="id", 134 | how="left", 135 | suffixes=("", "_right") 136 | ) 137 | 138 | # rename some columns 139 | formations = formations.rename(columns={ 140 | "id": "matchId", 141 | "scheduledDate": "dateTime" 142 | }) 143 | 144 | # define desired column order 145 | cols = [ 146 | "matchId", 147 | "dateTime", 148 | "competitionId", 149 | "competitionName", 150 | "competitionType", 151 | "iterationId", 152 | "season", 153 | "matchDayIndex", 154 | "matchDayName", 155 | "squadId", 156 | "squadName", 157 | "gameTime", 158 | "gameTimeInSec", 159 | "formation" 160 | ] 161 | 162 | # reorder data 163 | formations = formations[cols] 164 | 165 | # reorder rows 166 | formations = formations.sort_values(["matchId", "squadId", "gameTimeInSec"]) 167 | 168 | # return events 169 | return formations 170 | 171 | 172 | ###### 173 | # 174 | # This function returns a pandas dataframe that contains all substitutions for a 175 | # given match 176 | # 177 | ###### 178 | 179 | 180 | def getSubstitutions(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 181 | # create an instance of RateLimitedAPI 182 | connection = RateLimitedAPI(session) 183 | 184 | # construct header with access token 185 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 186 | 187 | return getSubstitutionsFromHost(matches, connection, "https://api.impect.com") 188 | 189 | 190 | # define function 191 | def getSubstitutionsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 192 | # check input for matches argument 193 | if not isinstance(matches, list): 194 | raise Exception("Argument 'matches' must be a list of integers.") 195 | 196 | # get match info 197 | matches = pd.concat( 198 | map(lambda match: connection.make_api_request_limited( 199 | url=f"{host}/v5/customerapi/matches/{match}", 200 | method="GET" 201 | ).process_response( 202 | endpoint="Iterations" 203 | ), 204 | matches), 205 | ignore_index=True) 206 | 207 | # filter for matches that are unavailable 208 | fail_matches = matches[matches.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 209 | 210 | # drop matches that are unavailable from list of matches 211 | matches = matches[~matches.id.isin(fail_matches)] 212 | 213 | # raise exception if no matches remaining or report removed matches 214 | if len(fail_matches) > 0: 215 | if len(matches) == 0: 216 | raise Exception("All supplied matches are unavailable. Execution stopped.") 217 | else: 218 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 219 | 220 | # extract iterationIds 221 | iterations = list(matches[matches.lastCalculationDate.notnull()].iterationId.unique()) 222 | 223 | # get squads 224 | squads = pd.concat( 225 | map(lambda iteration: connection.make_api_request_limited( 226 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 227 | method="GET" 228 | ).process_response( 229 | endpoint="Squads" 230 | ), 231 | iterations), 232 | ignore_index=True)[["id", "name"]].drop_duplicates() 233 | 234 | # get players 235 | players = pd.concat( 236 | map(lambda iteration: connection.make_api_request_limited( 237 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 238 | method="GET" 239 | ).process_response( 240 | endpoint="Players" 241 | ), 242 | iterations), 243 | ignore_index=True)[["id", "commonname"]].drop_duplicates() 244 | 245 | # get matches 246 | matchplan = pd.concat( 247 | map(lambda iteration: getMatchesFromHost( 248 | iteration=iteration, 249 | connection=connection, 250 | host=host 251 | ), 252 | iterations), 253 | ignore_index=True) 254 | 255 | # get iterations 256 | iterations = getIterationsFromHost(connection=connection, host=host) 257 | 258 | # extract shirt numbers 259 | shirt_numbers_home = matches[["id", "squadHomeId", "squadHomePlayers"]].rename( 260 | columns={"squadHomePlayers": "players", "squadHomeId": "squadId"} 261 | ) 262 | shirt_numbers_away = matches[["id", "squadAwayId", "squadAwayPlayers"]].rename( 263 | columns={"squadAwayPlayers": "players", "squadAwayId": "squadId"} 264 | ) 265 | 266 | # concat dfs 267 | shirt_numbers = pd.concat([shirt_numbers_home, shirt_numbers_away], axis=0).reset_index(drop=True) 268 | 269 | # unnest players column 270 | shirt_numbers = shirt_numbers.explode("players").reset_index(drop=True) 271 | 272 | # normalize the JSON structure into separate columns 273 | shirt_numbers = pd.concat( 274 | [ 275 | shirt_numbers.drop(columns=["players"]), 276 | pd.json_normalize(shirt_numbers["players"]).rename(columns={"id": "playerId"}) 277 | ], 278 | axis=1 279 | ) 280 | 281 | # extract substitutions 282 | substitutions_home = matches[["id", "squadHomeId", "squadHomeSubstitutions"]].rename( 283 | columns={"squadHomeSubstitutions": "squadSubstitutions", "squadHomeId": "squadId"} 284 | ) 285 | substitutions_away = matches[["id", "squadAwayId", "squadAwaySubstitutions"]].rename( 286 | columns={"squadAwaySubstitutions": "squadSubstitutions", "squadAwayId": "squadId"} 287 | ) 288 | 289 | # concat dfs 290 | substitutions = pd.concat([substitutions_home, substitutions_away], axis=0).reset_index(drop=True) 291 | 292 | # unnest formations column 293 | substitutions = substitutions.explode("squadSubstitutions").reset_index(drop=True) 294 | 295 | # drop emtpy row that occurs if one team did not substitute 296 | substitutions = substitutions[substitutions.squadSubstitutions.notnull()].reset_index(drop=True) 297 | 298 | # normalize the JSON structure into separate columns 299 | substitutions = substitutions.join(pd.json_normalize(substitutions["squadSubstitutions"])) 300 | 301 | # drop the original column 302 | substitutions.drop(columns=["squadSubstitutions"], inplace=True) 303 | 304 | # fix potential typing issues 305 | substitutions.exchangedPlayerId = substitutions.exchangedPlayerId.astype("Int64") 306 | 307 | # start merging dfs 308 | 309 | # merge substitutions with squads 310 | substitutions = substitutions.merge( 311 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}), 312 | left_on="squadId", 313 | right_on="squadId", 314 | how="left", 315 | suffixes=("", "_x") 316 | ) 317 | 318 | # merge substitutions with shirt numbers 319 | substitutions = substitutions.merge( 320 | shirt_numbers, 321 | left_on=["playerId", "squadId", "id"], 322 | right_on=["playerId", "squadId", "id"], 323 | how="left", 324 | suffixes=("", "_x") 325 | ).merge( 326 | shirt_numbers.rename( 327 | columns={"playerId": "exchangedPlayerId", "shirtNumber": "exchangedShirtNumber"} 328 | ), 329 | left_on=["exchangedPlayerId", "squadId", "id"], 330 | right_on=["exchangedPlayerId", "squadId", "id"], 331 | how="left", 332 | suffixes=("", "_x") 333 | ) 334 | 335 | # merge substitutions with players 336 | substitutions = substitutions.merge( 337 | players[["id", "commonname"]].rename( 338 | columns={"commonname": "playerName"} 339 | ), 340 | left_on="playerId", 341 | right_on="id", 342 | how="left", 343 | suffixes=("", "_right") 344 | ).merge( 345 | players[["id", "commonname"]].rename( 346 | columns={"commonname": "exchangedPlayerName"} 347 | ), 348 | left_on="exchangedPlayerId", 349 | right_on="id", 350 | how="left", 351 | suffixes=("", "_right") 352 | ) 353 | 354 | # merge with matches info 355 | substitutions = substitutions.merge( 356 | matchplan[[ 357 | "id", "skillCornerId", "heimSpielId", "wyscoutId", "matchDayIndex", 358 | "matchDayName", "scheduledDate", "lastCalculationDate", "iterationId" 359 | ]], 360 | left_on="id", 361 | right_on="id", 362 | how="left", 363 | suffixes=("", "_right") 364 | ) 365 | 366 | # merge with competition info 367 | substitutions = substitutions.merge( 368 | iterations[["id", "competitionName", "competitionId", "competitionType", "season"]], 369 | left_on="iterationId", 370 | right_on="id", 371 | how="left", 372 | suffixes=("", "_right") 373 | ) 374 | 375 | # rename some columns 376 | substitutions = substitutions.rename(columns={ 377 | "id": "matchId", 378 | "positionSide": "toPositionSide", 379 | "scheduledDate": "dateTime", 380 | "gameTime.gameTime": "gameTime", 381 | "gameTime.gameTimeInSec": "gameTimeInSec" 382 | }) 383 | 384 | # fix column types 385 | substitutions["shirtNumber"] = substitutions["shirtNumber"].astype("Int64") 386 | substitutions["exchangedShirtNumber"] = substitutions["exchangedShirtNumber"].astype("Int64") 387 | 388 | # define desired column order 389 | cols = [ 390 | "matchId", 391 | "dateTime", 392 | "competitionId", 393 | "competitionName", 394 | "competitionType", 395 | "iterationId", 396 | "season", 397 | "matchDayIndex", 398 | "matchDayName", 399 | "squadId", 400 | "squadName", 401 | "gameTime", 402 | "gameTimeInSec", 403 | "substitutionType", 404 | "playerId", 405 | "playerName", 406 | "shirtNumber", 407 | "fromPosition", 408 | "fromPositionSide", 409 | "toPosition", 410 | "toPositionSide", 411 | "exchangedPlayerId", 412 | "exchangedPlayerName", 413 | "exchangedShirtNumber", 414 | ] 415 | 416 | # reorder data 417 | substitutions = substitutions[cols] 418 | 419 | # reorder rows 420 | substitutions = substitutions.sort_values(["matchId", "squadId", "gameTimeInSec", "playerId"]) 421 | 422 | # return events 423 | return substitutions 424 | 425 | 426 | ###### 427 | # 428 | # This function returns a pandas dataframe that contains the starting formations for a 429 | # given match 430 | # 431 | ###### 432 | 433 | 434 | def getStartingPositions(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 435 | # create an instance of RateLimitedAPI 436 | connection = RateLimitedAPI(session) 437 | 438 | # construct header with access token 439 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 440 | 441 | return getStartingPositionsFromHost(matches, connection, "https://api.impect.com") 442 | 443 | 444 | # define function 445 | def getStartingPositionsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 446 | # check input for matches argument 447 | if not isinstance(matches, list): 448 | raise Exception("Argument 'matches' must be a list of integers.") 449 | 450 | # get match info 451 | matches = pd.concat( 452 | map(lambda match: connection.make_api_request_limited( 453 | url=f"{host}/v5/customerapi/matches/{match}", 454 | method="GET" 455 | ).process_response( 456 | endpoint="Iterations" 457 | ), 458 | matches), 459 | ignore_index=True) 460 | 461 | # filter for matches that are unavailable 462 | fail_matches = matches[matches.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 463 | 464 | # drop matches that are unavailable from list of matches 465 | matches = matches[~matches.id.isin(fail_matches)] 466 | 467 | # raise exception if no matches remaining or report removed matches 468 | if len(fail_matches) > 0: 469 | if len(matches) == 0: 470 | raise Exception("All supplied matches are unavailable. Execution stopped.") 471 | else: 472 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 473 | 474 | # extract iterationIds 475 | iterations = list(matches[matches.lastCalculationDate.notnull()].iterationId.unique()) 476 | 477 | # get squads 478 | squads = pd.concat( 479 | map(lambda iteration: connection.make_api_request_limited( 480 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 481 | method="GET" 482 | ).process_response( 483 | endpoint="Squads" 484 | ), 485 | iterations), 486 | ignore_index=True)[["id", "name"]].drop_duplicates() 487 | 488 | # get players 489 | players = pd.concat( 490 | map(lambda iteration: connection.make_api_request_limited( 491 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 492 | method="GET" 493 | ).process_response( 494 | endpoint="Players" 495 | ), 496 | iterations), 497 | ignore_index=True)[["id", "commonname"]].drop_duplicates() 498 | 499 | # get matches 500 | matchplan = pd.concat( 501 | map(lambda iteration: getMatchesFromHost( 502 | iteration=iteration, 503 | connection=connection, 504 | host=host 505 | ), 506 | iterations), 507 | ignore_index=True) 508 | 509 | # get iterations 510 | iterations = getIterationsFromHost(connection=connection, host=host) 511 | 512 | # extract shirt numbers 513 | shirt_numbers_home = matches[["id", "squadHomeId", "squadHomePlayers"]].rename( 514 | columns={"squadHomePlayers": "players", "squadHomeId": "squadId"} 515 | ) 516 | shirt_numbers_away = matches[["id", "squadAwayId", "squadAwayPlayers"]].rename( 517 | columns={"squadAwayPlayers": "players", "squadAwayId": "squadId"} 518 | ) 519 | 520 | # concat dfs 521 | shirt_numbers = pd.concat([shirt_numbers_home, shirt_numbers_away], axis=0).reset_index(drop=True) 522 | 523 | # unnest players column 524 | shirt_numbers = shirt_numbers.explode("players").reset_index(drop=True) 525 | 526 | # normalize the JSON structure into separate columns 527 | shirt_numbers = pd.concat( 528 | [ 529 | shirt_numbers.drop(columns=["players"]), 530 | pd.json_normalize(shirt_numbers["players"]).rename(columns={"id": "playerId"}) 531 | ], 532 | axis=1 533 | ) 534 | 535 | # extract starting_positions 536 | starting_positions_home = matches[["id", "squadHomeId", "squadHomeStartingPositions"]].rename( 537 | columns={"squadHomeStartingPositions": "squadStartingPositions", "squadHomeId": "squadId"} 538 | ) 539 | starting_positions_away = matches[["id", "squadAwayId", "squadAwayStartingPositions"]].rename( 540 | columns={"squadAwayStartingPositions": "squadStartingPositions", "squadAwayId": "squadId"} 541 | ) 542 | 543 | # concat dfs 544 | starting_positions = pd.concat([starting_positions_home, starting_positions_away], axis=0).reset_index(drop=True) 545 | 546 | # unnest formations column 547 | starting_positions = starting_positions.explode("squadStartingPositions").reset_index(drop=True) 548 | 549 | # normalize the JSON structure into separate columns 550 | starting_positions = starting_positions.join(pd.json_normalize(starting_positions["squadStartingPositions"])) 551 | 552 | # drop the original column 553 | starting_positions.drop(columns=["squadStartingPositions"], inplace=True) 554 | 555 | # start merging dfs 556 | 557 | # merge substitutions with shirt numbers 558 | starting_positions = starting_positions.merge( 559 | shirt_numbers, 560 | left_on=["playerId", "squadId", "id"], 561 | right_on=["playerId", "squadId", "id"], 562 | how="left", 563 | suffixes=("", "_x") 564 | ) 565 | 566 | # merge substitutions with squads 567 | starting_positions = starting_positions.merge( 568 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}), 569 | left_on="squadId", 570 | right_on="squadId", 571 | how="left", 572 | suffixes=("", "_x") 573 | ) 574 | 575 | # merge substitutions with players 576 | starting_positions = starting_positions.merge( 577 | players[["id", "commonname"]].rename( 578 | columns={"commonname": "playerName"} 579 | ), 580 | left_on="playerId", 581 | right_on="id", 582 | how="left", 583 | suffixes=("", "_right") 584 | ) 585 | 586 | # merge with matches info 587 | starting_positions = starting_positions.merge( 588 | matchplan[[ 589 | "id", "skillCornerId", "heimSpielId", "wyscoutId", "matchDayIndex", 590 | "matchDayName", "scheduledDate", "lastCalculationDate", "iterationId" 591 | ]], 592 | left_on="id", 593 | right_on="id", 594 | how="left", 595 | suffixes=("", "_right") 596 | ) 597 | 598 | # merge with competition info 599 | starting_positions = starting_positions.merge( 600 | iterations[["id", "competitionName", "competitionId", "competitionType", "season"]], 601 | left_on="iterationId", 602 | right_on="id", 603 | how="left", 604 | suffixes=("", "_right") 605 | ) 606 | 607 | # rename some columns 608 | starting_positions = starting_positions.rename(columns={ 609 | "id": "matchId", 610 | "scheduledDate": "dateTime", 611 | }) 612 | 613 | # fix column types 614 | missing_shirt_numbers = starting_positions["shirtNumber"].isnull() 615 | if missing_shirt_numbers.any(): 616 | print("Warning: The following players are missing a shirt number and will be set to None:") 617 | print(starting_positions[missing_shirt_numbers][["matchId", "squadName", "playerName"]].to_string(index=False)) 618 | starting_positions["shirtNumber"] = starting_positions["shirtNumber"].astype("Int64") 619 | 620 | # define desired column order 621 | cols = [ 622 | "matchId", 623 | "dateTime", 624 | "competitionId", 625 | "competitionName", 626 | "competitionType", 627 | "iterationId", 628 | "season", 629 | "matchDayIndex", 630 | "matchDayName", 631 | "squadId", 632 | "squadName", 633 | "playerId", 634 | "playerName", 635 | "shirtNumber", 636 | "position", 637 | "positionSide" 638 | ] 639 | 640 | # reorder data 641 | starting_positions = starting_positions[cols] 642 | 643 | # reorder rows 644 | starting_positions = starting_positions.sort_values(["matchId", "squadId", "playerId"]) 645 | 646 | # return events 647 | return starting_positions -------------------------------------------------------------------------------- /impectPy/matches.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import re 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_dict, validate_response 5 | 6 | ###### 7 | # 8 | # This function returns a dataframe with basic information 9 | # for all matches for a given set of parameters 10 | # 11 | ###### 12 | 13 | 14 | def getMatches(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 15 | 16 | # create an instance of RateLimitedAPI 17 | connection = RateLimitedAPI(session) 18 | 19 | # construct header with access token 20 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 21 | 22 | return getMatchesFromHost(iteration, connection, "https://api.impect.com") 23 | 24 | # define function 25 | def getMatchesFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 26 | 27 | # get match data 28 | matches = connection.make_api_request_limited( 29 | url=f"{host}/v5/customerapi/iterations/" 30 | f"{iteration}/matches", 31 | method="GET" 32 | ) 33 | 34 | # get data from response 35 | matches = validate_response(response=matches, endpoint="Matches") 36 | 37 | # get squads data 38 | squads = connection.make_api_request_limited( 39 | url=f"{host}/v5/customerapi/iterations/" 40 | f"{iteration}/squads", 41 | method="GET" 42 | ) 43 | 44 | # get data from response 45 | squads = validate_response(response=squads, endpoint="Squads") 46 | 47 | # get country data 48 | countries = connection.make_api_request_limited( 49 | url=f"{host}/v5/customerapi/countries", 50 | method="GET" 51 | ) 52 | 53 | # get data from response 54 | countries = validate_response(response=countries, endpoint="Countries") 55 | 56 | # convert to df and clean 57 | matches = clean_df(matches) 58 | squads = clean_df(squads) 59 | countries = pd.DataFrame(countries) 60 | 61 | # merge matches with squads 62 | matches = matches.merge(squads, 63 | left_on="homeSquadId", 64 | right_on="id", 65 | suffixes=("", "_home")) 66 | matches = matches.rename(columns={ 67 | "name": "homeSquadName", 68 | "type": "homeSquadType", 69 | "skillCornerId_home": "homeSquadSkillCornerId", 70 | "heimSpielId_home": "homeSquadHeimSpielId", 71 | "wyscoutId_home": "homeSquadWyscoutId", 72 | "countryId": "homeSquadCountryId" 73 | }) 74 | matches = matches.merge(squads, 75 | left_on="awaySquadId", 76 | right_on="id", 77 | suffixes=("", "_away")) 78 | matches = matches.rename(columns={ 79 | "name": "awaySquadName", 80 | "type": "awaySquadType", 81 | "skillCornerId_away": "awaySquadSkillCornerId", 82 | "heimSpielId_away": "awaySquadHeimSpielId", 83 | "wyscoutId_away": "awaySquadWyscoutId", 84 | "countryId": "awaySquadCountryId" 85 | }) 86 | 87 | # merge with countries 88 | matches = matches.merge( 89 | countries, 90 | left_on="homeSquadCountryId", 91 | right_on="id", 92 | suffixes=("", "_right") 93 | ) 94 | matches = matches.rename(columns={"fifaName": "homeSquadCountryName"}) 95 | 96 | matches = matches.merge( 97 | countries, 98 | left_on="awaySquadCountryId", 99 | right_on="id", 100 | suffixes=("", "_right") 101 | ) 102 | matches = matches.rename(columns={"fifaName": "awaySquadCountryName"}) 103 | 104 | # reorder columns 105 | matches = matches.loc[:, [ 106 | 'id', 107 | 'skillCornerId', 108 | 'heimSpielId', 109 | 'wyscoutId', 110 | 'iterationId', 111 | 'matchDayIndex', 112 | 'matchDayName', 113 | 'homeSquadId', 114 | 'homeSquadName', 115 | 'homeSquadType', 116 | 'homeSquadCountryId', 117 | 'homeSquadCountryName', 118 | 'homeSquadSkillCornerId', 119 | 'homeSquadHeimSpielId', 120 | 'homeSquadWyscoutId', 121 | 'awaySquadId', 122 | 'awaySquadName', 123 | 'awaySquadType', 124 | 'awaySquadCountryId', 125 | 'awaySquadCountryName', 126 | 'awaySquadSkillCornerId', 127 | 'awaySquadHeimSpielId', 128 | 'awaySquadWyscoutId', 129 | 'scheduledDate', 130 | 'lastCalculationDate', 131 | 'available' 132 | ]] 133 | 134 | # reorder matches 135 | matches = matches.sort_values(by=["matchDayIndex", "id"]) 136 | 137 | # sort matches 138 | matches = matches.sort_values(by="id") 139 | 140 | # return matches 141 | return matches 142 | 143 | 144 | # define function to clean df 145 | def clean_df(data: dict) -> pd.DataFrame: 146 | 147 | # unnest nested idMapping key 148 | data = unnest_mappings_dict(data) 149 | 150 | # convert to df 151 | df = pd.json_normalize(data) 152 | 153 | # fix column names using regex 154 | df = df.rename(columns=lambda x: re.sub("[\._](.)", lambda y: y.group(1).upper(), x)) 155 | 156 | # drop idMappings column 157 | df = df.drop("idMappings", axis=1) 158 | 159 | # keep first entry for skillcorner and heimspiel data 160 | df.skillCornerId = df.skillCornerId.apply(lambda x: x[0] if x else None) 161 | df.heimSpielId = df.heimSpielId.apply(lambda x: x[0] if x else None) 162 | df.wyscoutId = df.wyscoutId.apply(lambda x: x[0] if x else None) 163 | 164 | return df -------------------------------------------------------------------------------- /impectPy/matchsums.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df, ForbiddenError 5 | from .matches import getMatchesFromHost 6 | from .iterations import getIterationsFromHost 7 | 8 | ###### 9 | # 10 | # This function returns a pandas dataframe that contains all kpis for a 11 | # given match aggregated per player and position 12 | # 13 | ###### 14 | 15 | 16 | def getPlayerMatchsums(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 17 | 18 | # create an instance of RateLimitedAPI 19 | connection = RateLimitedAPI(session) 20 | 21 | # construct header with access token 22 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 23 | 24 | return getPlayerMatchsumsFromHost(matches, connection, "https://api.impect.com") 25 | 26 | def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 27 | 28 | # check input for matches argument 29 | if not isinstance(matches, list): 30 | raise Exception("Argument 'matches' must be a list of integers.") 31 | 32 | # get match info 33 | match_data = pd.concat( 34 | map(lambda match: connection.make_api_request_limited( 35 | url=f"{host}/v5/customerapi/matches/{match}", 36 | method="GET" 37 | ).process_response( 38 | endpoint="Match Info" 39 | ), 40 | matches), 41 | ignore_index=True) 42 | 43 | # filter for matches that are unavailable 44 | fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 45 | 46 | # drop matches that are unavailable from list of matches 47 | matches = [match for match in matches if match not in fail_matches] 48 | 49 | # raise warnings 50 | if len(fail_matches) > 0: 51 | if len(matches) == 0: 52 | raise Exception("All supplied matches are unavailable. Execution stopped.") 53 | else: 54 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 55 | 56 | # extract iterationIds 57 | iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) 58 | 59 | # get player match sums 60 | matchsums_raw = pd.concat( 61 | map(lambda match: connection.make_api_request_limited( 62 | url=f"{host}/v5/customerapi/matches/{match}/player-kpis", 63 | method="GET" 64 | ).process_response( 65 | endpoint="PlayerMatchsums" 66 | ).assign( 67 | matchId=match 68 | ), 69 | matches), 70 | ignore_index=True) 71 | 72 | # get players 73 | players = pd.concat( 74 | map( 75 | lambda iteration: connection.make_api_request_limited( 76 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 77 | method="GET" 78 | ).process_response( 79 | endpoint="Players" 80 | ), 81 | iterations), 82 | ignore_index=True 83 | )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]] 84 | 85 | # only keep first country id for each player 86 | country_series = players["countryIds"].explode().groupby(level=0).first() 87 | players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64") 88 | players = players.rename(columns={"countryIds": "countryId"}) 89 | 90 | # unnest mappings 91 | players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 92 | 93 | # get squads 94 | squads = pd.concat( 95 | map(lambda iteration: connection.make_api_request_limited( 96 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 97 | method="GET" 98 | ).process_response( 99 | endpoint="Squads" 100 | ), 101 | iterations), 102 | ignore_index=True)[["id", "name"]].drop_duplicates() 103 | 104 | # get coaches 105 | coaches_blacklisted = False 106 | try: 107 | coaches = pd.concat( 108 | map(lambda iteration: connection.make_api_request_limited( 109 | url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", 110 | method="GET" 111 | ).process_response( 112 | endpoint="Coaches", 113 | raise_exception=False 114 | ), 115 | iterations), 116 | ignore_index=True)[["id", "name"]].drop_duplicates() 117 | except KeyError: 118 | # no coaches found, create empty df 119 | coaches = pd.DataFrame(columns=["id", "name"]) 120 | except ForbiddenError: 121 | coaches_blacklisted = True 122 | 123 | # get kpis 124 | kpis = connection.make_api_request_limited( 125 | url=f"{host}/v5/customerapi/kpis", 126 | method="GET" 127 | ).process_response( 128 | endpoint="KPIs" 129 | )[["id", "name"]] 130 | 131 | # get matches 132 | matchplan = pd.concat( 133 | map(lambda iteration: getMatchesFromHost( 134 | iteration=iteration, 135 | connection=connection, 136 | host=host 137 | ), 138 | iterations), 139 | ignore_index=True) 140 | 141 | # get iterations 142 | iterations = getIterationsFromHost(connection=connection, host=host) 143 | 144 | # get country data 145 | countries = connection.make_api_request_limited( 146 | url=f"{host}/v5/customerapi/countries", 147 | method="GET" 148 | ).process_response( 149 | endpoint="KPIs" 150 | ) 151 | 152 | # create empty df to store matchsums 153 | matchsums = pd.DataFrame() 154 | 155 | # manipulate matchsums 156 | 157 | # iterate over matches 158 | for i in range(len(matchsums_raw)): 159 | 160 | # iterate over sides 161 | for side in ["squadHomePlayers", "squadAwayPlayers"]: 162 | # get data for index 163 | temp = matchsums_raw[side].loc[i] 164 | 165 | # convert to pandas df 166 | temp = pd.DataFrame(temp).assign( 167 | matchId=matchsums_raw.matchId.loc[i], 168 | squadId=matchsums_raw[side.replace("Players", "Id")].loc[i] 169 | ) 170 | 171 | # extract matchshares 172 | matchshares = temp[["matchId", "squadId", "id", "position", "matchShare", "playDuration"]].drop_duplicates() 173 | 174 | # explode kpis column 175 | temp = temp.explode("kpis") 176 | 177 | # unnest dictionary in kpis column 178 | temp = pd.concat( 179 | [temp.drop(["kpis"], axis=1), temp["kpis"].apply(pd.Series)], 180 | axis=1 181 | ) 182 | 183 | # merge with kpis to ensure all kpis are present 184 | temp = pd.merge( 185 | temp, 186 | kpis, 187 | left_on="kpiId", 188 | right_on="id", 189 | how="outer", 190 | suffixes=("", "_right") 191 | ) 192 | 193 | # pivot data 194 | temp = pd.pivot_table( 195 | temp, 196 | values="value", 197 | index=["matchId", "squadId", "id", "position"], 198 | columns="name", 199 | aggfunc="sum", 200 | fill_value=0, 201 | dropna=False 202 | ).reset_index() 203 | 204 | # inner join with matchshares 205 | temp = pd.merge( 206 | temp, 207 | matchshares, 208 | left_on=["matchId", "squadId", "id", "position"], 209 | right_on=["matchId", "squadId", "id", "position"], 210 | how="inner", 211 | suffixes=("", "_right") 212 | ) 213 | 214 | # append to matchsums 215 | matchsums = pd.concat([matchsums, temp]) 216 | 217 | # merge with other data 218 | matchsums = matchsums.merge( 219 | matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]], 220 | left_on="matchId", 221 | right_on="id", 222 | how="left", 223 | suffixes=("", "_right") 224 | ).merge( 225 | pd.concat([ 226 | match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), 227 | match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) 228 | ], ignore_index=True), 229 | left_on=["matchId", "squadId"], 230 | right_on=["id", "squadId"], 231 | how="left", 232 | suffixes=("", "_right") 233 | ).merge( 234 | iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], 235 | left_on="iterationId", 236 | right_on="id", 237 | how="left", 238 | suffixes=("", "_right") 239 | ).merge( 240 | squads[["id", "name"]].rename( 241 | columns={"id": "squadId", "name": "squadName"} 242 | ), 243 | left_on="squadId", 244 | right_on="squadId", 245 | how="left", 246 | suffixes=("", "_right") 247 | ).merge( 248 | players[[ 249 | "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname", 250 | "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg" 251 | ]].rename( 252 | columns={"commonname": "playerName"} 253 | ), 254 | left_on="id", 255 | right_on="id", 256 | how="left", 257 | suffixes=("", "_right") 258 | ).merge( 259 | countries.rename(columns={"fifaName": "playerCountry"}), 260 | left_on="countryId", 261 | right_on="id", 262 | how="left", 263 | suffixes=("", "_right") 264 | ) 265 | 266 | if not coaches_blacklisted: 267 | matchsums["coachId"] = matchsums["coachId"].astype("Int64") 268 | matchsums = matchsums.merge( 269 | coaches[["id", "name"]].rename( 270 | columns={"id": "coachId", "name": "coachName"} 271 | ), 272 | left_on="coachId", 273 | right_on="coachId", 274 | how="left", 275 | suffixes=("", "_right") 276 | ) 277 | 278 | # rename some columns 279 | matchsums = matchsums.rename(columns={ 280 | "scheduledDate": "dateTime", 281 | "id": "playerId" 282 | }) 283 | 284 | # define column order 285 | order = [ 286 | "matchId", 287 | "dateTime", 288 | "competitionName", 289 | "competitionId", 290 | "competitionType", 291 | "iterationId", 292 | "season", 293 | "matchDayIndex", 294 | "matchDayName", 295 | "squadId", 296 | "squadName", 297 | "coachId", 298 | "coachName", 299 | "playerId", 300 | "wyscoutId", 301 | "heimSpielId", 302 | "skillCornerId", 303 | "playerName", 304 | "firstname", 305 | "lastname", 306 | "birthdate", 307 | "birthplace", 308 | "playerCountry", 309 | "leg", 310 | "position", 311 | "matchShare", 312 | "playDuration" 313 | ] 314 | 315 | # add kpiNames to order 316 | order += kpis['name'].to_list() 317 | 318 | # check if coaches are blacklisted 319 | if coaches_blacklisted: 320 | order = [col for col in order if col not in ["coachId", "coachName"]] 321 | 322 | # select columns 323 | matchsums = matchsums[order] 324 | 325 | # fix some column types 326 | matchsums["matchId"] = matchsums["matchId"].astype("Int64") 327 | matchsums["squadId"] = matchsums["squadId"].astype("Int64") 328 | matchsums["playerId"] = matchsums["playerId"].astype("Int64") 329 | matchsums["wyscoutId"] = matchsums["wyscoutId"].astype("Int64") 330 | matchsums["heimSpielId"] = matchsums["heimSpielId"].astype("Int64") 331 | matchsums["skillCornerId"] = matchsums["skillCornerId"].astype("Int64") 332 | 333 | # return data 334 | return matchsums 335 | 336 | 337 | ###### 338 | # 339 | # This function returns a pandas dataframe that contains all kpis for a 340 | # given match aggregated per squad 341 | # 342 | ###### 343 | 344 | 345 | def getSquadMatchsums(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 346 | 347 | # create an instance of RateLimitedAPI 348 | connection = RateLimitedAPI(session) 349 | 350 | # construct header with access token 351 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 352 | 353 | return getSquadMatchsumsFromHost(matches, connection, "https://api.impect.com") 354 | 355 | def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 356 | 357 | # check input for matches argument 358 | if not isinstance(matches, list): 359 | raise Exception("Input vor matches argument must be a list of integers") 360 | 361 | # get match info 362 | match_data = pd.concat( 363 | map(lambda match: connection.make_api_request_limited( 364 | url=f"{host}/v5/customerapi/matches/{match}", 365 | method="GET" 366 | ).process_response( 367 | endpoint="Match Info" 368 | ), 369 | matches), 370 | ignore_index=True) 371 | 372 | # filter for matches that are unavailable 373 | fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 374 | 375 | # drop matches that are unavailable from list of matches 376 | matches = [match for match in matches if match not in fail_matches] 377 | 378 | # raise warnings 379 | if len(fail_matches) > 0: 380 | if len(matches) == 0: 381 | raise Exception("All supplied matches are unavailable. Execution stopped.") 382 | else: 383 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 384 | 385 | # extract iterationIds 386 | iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) 387 | 388 | # get squad match sums 389 | matchsums_raw = pd.concat( 390 | map(lambda match: connection.make_api_request_limited( 391 | url=f"{host}/v5/customerapi/matches/{match}/squad-kpis", 392 | method="GET" 393 | ).process_response( 394 | endpoint="SquadMatchsums" 395 | ).assign( 396 | matchId=match 397 | ), 398 | matches), 399 | ignore_index=True) 400 | 401 | # get squads 402 | squads = pd.concat( 403 | map(lambda iteration: connection.make_api_request_limited( 404 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 405 | method="GET" 406 | ).process_response( 407 | endpoint="Squads" 408 | ), 409 | iterations), 410 | ignore_index=True)[["id", "name", "idMappings"]] 411 | 412 | # get coaches 413 | coaches_blacklisted = False 414 | try: 415 | coaches = pd.concat( 416 | map(lambda iteration: connection.make_api_request_limited( 417 | url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", 418 | method="GET" 419 | ).process_response( 420 | endpoint="Coaches", 421 | raise_exception=False 422 | ), 423 | iterations), 424 | ignore_index=True)[["id", "name"]].drop_duplicates() 425 | except KeyError: 426 | # no coaches found, create empty df 427 | coaches = pd.DataFrame(columns=["id", "name"]) 428 | except ForbiddenError: 429 | coaches_blacklisted = True 430 | 431 | # unnest mappings 432 | squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 433 | 434 | # get kpis 435 | kpis = connection.make_api_request_limited( 436 | url=f"{host}/v5/customerapi/kpis", 437 | method="GET" 438 | ).process_response( 439 | endpoint="KPIs" 440 | )[["id", "name"]] 441 | 442 | # get matches 443 | matchplan = pd.concat( 444 | map(lambda iteration: getMatchesFromHost( 445 | iteration=iteration, 446 | connection=connection, 447 | host=host 448 | ), 449 | iterations), 450 | ignore_index=True) 451 | 452 | # get iterations 453 | iterations = getIterationsFromHost(connection=connection, host=host) 454 | 455 | # create empty df to store matchsums 456 | matchsums = pd.DataFrame() 457 | 458 | # manipulate matchsums 459 | 460 | # iterate over matches 461 | for i in range(len(matchsums_raw)): 462 | 463 | # iterate over sides 464 | for side in ["squadHomeKpis", "squadAwayKpis"]: 465 | # get data for index 466 | temp = matchsums_raw[side].loc[i] 467 | 468 | # convert to pandas df 469 | temp = pd.DataFrame(temp).assign( 470 | matchId=matchsums_raw.matchId.loc[i], 471 | squadId=matchsums_raw[side.replace("Kpis", "Id")].loc[i] 472 | ) 473 | 474 | # merge with kpis to ensure all kpis are present 475 | temp = temp.merge( 476 | kpis, 477 | left_on="kpiId", 478 | right_on="id", 479 | how="outer", 480 | suffixes=("", "right") 481 | ) 482 | 483 | # pivot data 484 | temp = pd.pivot_table( 485 | temp, 486 | values="value", 487 | index=["matchId", "squadId"], 488 | columns="name", 489 | aggfunc="sum", 490 | fill_value=0, 491 | dropna=False 492 | ).reset_index() 493 | 494 | # append to matchsums 495 | matchsums = pd.concat([matchsums, temp]) 496 | 497 | # merge with other data 498 | matchsums = matchsums.merge( 499 | matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]], 500 | left_on="matchId", 501 | right_on="id", 502 | how="left", 503 | suffixes=("", "_right") 504 | ).merge( 505 | pd.concat([ 506 | match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), 507 | match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) 508 | ], ignore_index=True), 509 | left_on=["matchId", "squadId"], 510 | right_on=["id", "squadId"], 511 | how="left", 512 | suffixes=("", "_right") 513 | ).merge( 514 | iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], 515 | left_on="iterationId", 516 | right_on="id", 517 | how="left", 518 | suffixes=("", "_right") 519 | ).merge( 520 | squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( 521 | columns={"id": "squadId", "name": "squadName"} 522 | ), 523 | left_on="squadId", 524 | right_on="squadId", 525 | how="left", 526 | suffixes=("", "_home") 527 | ) 528 | 529 | if not coaches_blacklisted: 530 | matchsums["coachId"] = matchsums["coachId"].astype("Int64") 531 | matchsums = matchsums.merge( 532 | coaches[["id", "name"]].rename( 533 | columns={"id": "coachId", "name": "coachName"} 534 | ), 535 | left_on="coachId", 536 | right_on="coachId", 537 | how="left", 538 | suffixes=("", "_right") 539 | ) 540 | 541 | # rename some columns 542 | matchsums = matchsums.rename(columns={ 543 | "scheduledDate": "dateTime" 544 | }) 545 | 546 | # define column order 547 | order = [ 548 | "matchId", 549 | "dateTime", 550 | "competitionName", 551 | "competitionId", 552 | "competitionType", 553 | "iterationId", 554 | "season", 555 | "matchDayIndex", 556 | "matchDayName", 557 | "squadId", 558 | "wyscoutId", 559 | "heimSpielId", 560 | "skillCornerId", 561 | "squadName", 562 | "coachId", 563 | "coachName" 564 | ] 565 | 566 | # add kpiNames to order 567 | order += kpis['name'].to_list() 568 | 569 | # filter for non-NA columns only 570 | matchsums = matchsums[ 571 | (matchsums.matchId.notnull()) & 572 | (matchsums.squadId.notnull()) 573 | ] 574 | 575 | # reset index 576 | matchsums = matchsums.reset_index() 577 | 578 | # check if coaches are blacklisted 579 | if coaches_blacklisted: 580 | order = [col for col in order if col not in ["coachId", "coachName"]] 581 | 582 | # select & order columns 583 | matchsums = matchsums[order] 584 | 585 | # fix some column types 586 | matchsums["matchId"] = matchsums["matchId"].astype("Int64") 587 | matchsums["competitionId"] = matchsums["competitionId"].astype("Int64") 588 | matchsums["iterationId"] = matchsums["iterationId"].astype("Int64") 589 | matchsums["matchDayIndex"] = matchsums["matchDayIndex"].astype("Int64") 590 | matchsums["squadId"] = matchsums["squadId"].astype("Int64") 591 | matchsums["wyscoutId"] = matchsums["wyscoutId"].astype("Int64") 592 | matchsums["heimSpielId"] = matchsums["heimSpielId"].astype("Int64") 593 | matchsums["skillCornerId"] = matchsums["skillCornerId"].astype("Int64") 594 | 595 | # return data 596 | return matchsums -------------------------------------------------------------------------------- /impectPy/player_profile_scores.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df 5 | from .iterations import getIterationsFromHost 6 | 7 | # define the allowed positions 8 | allowed_positions = [ 9 | "GOALKEEPER", 10 | "LEFT_WINGBACK_DEFENDER", 11 | "RIGHT_WINGBACK_DEFENDER", 12 | "CENTRAL_DEFENDER", 13 | "DEFENSE_MIDFIELD", 14 | "CENTRAL_MIDFIELD", 15 | "ATTACKING_MIDFIELD", 16 | "LEFT_WINGER", 17 | "RIGHT_WINGER", 18 | "CENTER_FORWARD" 19 | ] 20 | 21 | ###### 22 | # 23 | # This function returns a pandas dataframe that contains all profile scores 24 | # for a given iteration and a given set of positions per player 25 | # 26 | ###### 27 | 28 | 29 | def getPlayerProfileScores( 30 | iteration: int, positions: list, token: str, session: requests.Session = requests.Session() 31 | ) -> pd.DataFrame: 32 | 33 | # create an instance of RateLimitedAPI 34 | connection = RateLimitedAPI(session) 35 | 36 | # construct header with access token 37 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 38 | 39 | return getPlayerProfileScoresFromHost(iteration, positions, connection, "https://api.impect.com") 40 | 41 | def getPlayerProfileScoresFromHost( 42 | iteration: int, positions: list, connection: RateLimitedAPI, host: str 43 | ) -> pd.DataFrame: 44 | 45 | # check input for iteration argument 46 | if not isinstance(iteration, int): 47 | raise Exception("Input for iteration argument must be an integer") 48 | 49 | # check input for positions argument 50 | if not isinstance(positions, list): 51 | raise Exception("Input for positions argument must be a list") 52 | 53 | # check if the input positions are valid 54 | invalid_positions = [position for position in positions if position not in allowed_positions] 55 | if len(invalid_positions) > 0: 56 | raise Exception( 57 | f"Invalid position(s): {', '.join(invalid_positions)}." 58 | f"\nChoose one or more of: {', '.join(allowed_positions)}" 59 | ) 60 | 61 | # get squads 62 | squads = connection.make_api_request_limited( 63 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 64 | method="GET" 65 | ).process_response( 66 | endpoint="Squads" 67 | ) 68 | 69 | # get squadIds 70 | squad_ids = squads[squads.access].id.to_list() 71 | 72 | # compile position string 73 | position_string = ",".join(positions) 74 | 75 | # get player profile scores per squad 76 | profile_scores_raw = pd.concat( 77 | map(lambda squadId: connection.make_api_request_limited( 78 | url=f"{host}/v5/customerapi/iterations/{iteration}/" 79 | f"squads/{squadId}/positions/{position_string}/player-profile-scores", 80 | method="GET" 81 | ).process_response( 82 | endpoint="PlayerIterationScores", 83 | raise_exception=False 84 | ).assign( 85 | iterationId=iteration, 86 | squadId=squadId, 87 | positions=position_string 88 | ), 89 | squad_ids), 90 | ignore_index=True) 91 | 92 | # raise exception if no player played at given positions in entire iteration 93 | if len(profile_scores_raw) == 0: 94 | raise Exception(f"No players played at given position in iteration {iteration}.") 95 | 96 | # print squads without players at given position 97 | error_list = [str(squadId) for squadId in squad_ids if squadId not in profile_scores_raw.squadId.to_list()] 98 | if len(error_list) > 0: 99 | print(f"No players played at positions {positions} for iteration {iteration} for following squads:\n\t{', '.join(error_list)}") 100 | 101 | # get players 102 | players = connection.make_api_request_limited( 103 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 104 | method="GET" 105 | ).process_response( 106 | endpoint="Players" 107 | )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]] 108 | 109 | # only keep first country id for each player 110 | country_series = players["countryIds"].explode().groupby(level=0).first() 111 | players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64") 112 | players = players.rename(columns={"countryIds": "countryId"}) 113 | 114 | # unnest mappings 115 | players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 116 | 117 | # get scores 118 | scores = connection.make_api_request_limited( 119 | url=f"{host}/v5/customerapi/player-profiles", 120 | method="GET" 121 | ).process_response( 122 | endpoint="playerProfiles" 123 | )[["name"]] 124 | 125 | # get iterations 126 | iterations = getIterationsFromHost(connection=connection, host=host) 127 | 128 | # get country data 129 | countries = connection.make_api_request_limited( 130 | url=f"{host}/v5/customerapi/countries", 131 | method="GET" 132 | ).process_response( 133 | endpoint="KPIs" 134 | ) 135 | 136 | # unnest scorings 137 | profile_scores = profile_scores_raw.explode("profileScores").reset_index(drop=True) 138 | 139 | # unnest dictionary in kpis column 140 | profile_scores = pd.concat( 141 | [profile_scores.drop(["profileScores"], axis=1), pd.json_normalize(profile_scores["profileScores"])], 142 | axis=1 143 | ) 144 | 145 | # merge with player scores to ensure all kpis are present 146 | profile_scores = profile_scores.merge( 147 | scores, 148 | left_on="profileName", 149 | right_on="name", 150 | how="outer", 151 | suffixes=("", "_right") 152 | ) 153 | 154 | # get matchShares 155 | match_shares = profile_scores[ 156 | ["iterationId", "squadId", "playerId", "positions", "playDuration", "matchShare"]].drop_duplicates() 157 | 158 | # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost 159 | if len(profile_scores["name"][profile_scores["name"].isnull()]) > 0: 160 | profile_scores["name"] = profile_scores["name"].fillna("-1") 161 | 162 | # pivot kpi values 163 | profile_scores = pd.pivot_table( 164 | profile_scores, 165 | values="value", 166 | index=["iterationId", "squadId", "playerId", "positions"], 167 | columns="name", 168 | aggfunc="sum", 169 | fill_value=0, 170 | dropna=False 171 | ).reset_index() 172 | 173 | # drop "-1" column 174 | if "-1" in profile_scores.columns: 175 | profile_scores.drop(["-1"], inplace=True, axis=1) 176 | 177 | # merge with playDuration and matchShare 178 | profile_scores = profile_scores.merge( 179 | match_shares, 180 | left_on=["iterationId", "squadId", "playerId", "positions"], 181 | right_on=["iterationId", "squadId", "playerId", "positions"], 182 | how="inner", 183 | suffixes=("", "_right") 184 | ) 185 | # merge with other data 186 | profile_scores = profile_scores.merge( 187 | iterations[["id", "competitionName", "season"]], 188 | left_on="iterationId", 189 | right_on="id", 190 | how="left", 191 | suffixes=("", "_right") 192 | ).merge( 193 | squads[["id", "name"]].rename( 194 | columns={"id": "squadId", "name": "squadName"} 195 | ), 196 | left_on="squadId", 197 | right_on="squadId", 198 | how="left", 199 | suffixes=("", "_right") 200 | ).merge( 201 | players[[ 202 | "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname", 203 | "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg" 204 | ]].rename( 205 | columns={"commonname": "playerName"} 206 | ), 207 | left_on="playerId", 208 | right_on="id", 209 | how="left", 210 | suffixes=("", "_right") 211 | ).merge( 212 | countries.rename(columns={"fifaName": "playerCountry"}), 213 | left_on="countryId", 214 | right_on="id", 215 | how="left", 216 | suffixes=("", "_right") 217 | ) 218 | 219 | # remove NA rows 220 | profile_scores = profile_scores[profile_scores.iterationId.notnull()] 221 | 222 | # fix column types 223 | profile_scores["squadId"] = profile_scores["squadId"].astype("Int64") 224 | profile_scores["playerId"] = profile_scores["playerId"].astype("Int64") 225 | profile_scores["iterationId"] = profile_scores["iterationId"].astype("Int64") 226 | profile_scores["wyscoutId"] = profile_scores["wyscoutId"].astype("Int64") 227 | profile_scores["heimSpielId"] = profile_scores["heimSpielId"].astype("Int64") 228 | profile_scores["skillCornerId"] = profile_scores["skillCornerId"].astype("Int64") 229 | 230 | # define column order 231 | order = [ 232 | "iterationId", 233 | "competitionName", 234 | "season", 235 | "squadId", 236 | "squadName", 237 | "playerId", 238 | "wyscoutId", 239 | "heimSpielId", 240 | "skillCornerId", 241 | "playerName", 242 | "firstname", 243 | "lastname", 244 | "birthdate", 245 | "birthplace", 246 | "playerCountry", 247 | "leg", 248 | "positions", 249 | "matchShare", 250 | "playDuration" 251 | ] 252 | 253 | # add kpiNames to order 254 | order = order + scores.name.to_list() 255 | 256 | # select columns 257 | profile_scores = profile_scores[order] 258 | 259 | # return result 260 | return profile_scores -------------------------------------------------------------------------------- /impectPy/player_scores.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df, ForbiddenError 5 | from .matches import getMatchesFromHost 6 | from .iterations import getIterationsFromHost 7 | 8 | # define the allowed positions 9 | allowed_positions = [ 10 | "GOALKEEPER", 11 | "LEFT_WINGBACK_DEFENDER", 12 | "RIGHT_WINGBACK_DEFENDER", 13 | "CENTRAL_DEFENDER", 14 | "DEFENSE_MIDFIELD", 15 | "CENTRAL_MIDFIELD", 16 | "ATTACKING_MIDFIELD", 17 | "LEFT_WINGER", 18 | "RIGHT_WINGER", 19 | "CENTER_FORWARD" 20 | ] 21 | 22 | ###### 23 | # 24 | # This function returns a pandas dataframe that contains all scores for a 25 | # given match and a given set of positions aggregated per player 26 | # 27 | ###### 28 | 29 | 30 | def getPlayerMatchScores( 31 | matches: list, token: str, positions: list = None, session: requests.Session = requests.Session() 32 | ) -> pd.DataFrame: 33 | 34 | # create an instance of RateLimitedAPI 35 | connection = RateLimitedAPI(session) 36 | 37 | # construct header with access token 38 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 39 | 40 | return getPlayerMatchScoresFromHost(matches, connection, "https://api.impect.com", positions) 41 | 42 | def getPlayerMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: str, positions: list = None) -> pd.DataFrame: 43 | 44 | # check input for matches argument 45 | if not isinstance(matches, list): 46 | raise Exception("Argument 'matches' must be a list of integers.") 47 | 48 | # check input for positions argument 49 | if not isinstance(positions, list) and positions is not None: 50 | raise Exception("Input for positions argument must be a list") 51 | 52 | # check if the input positions are valid 53 | if positions is not None: 54 | invalid_positions = [position for position in positions if position not in allowed_positions] 55 | if len(invalid_positions) > 0: 56 | raise Exception( 57 | f"Invalid position(s): {', '.join(invalid_positions)}." 58 | f"\nChoose one or more of: {', '.join(allowed_positions)}" 59 | ) 60 | 61 | # get match info 62 | match_data = pd.concat( 63 | map(lambda match: connection.make_api_request_limited( 64 | url=f"{host}/v5/customerapi/matches/{match}", 65 | method="GET" 66 | ).process_response( 67 | endpoint="Match Info" 68 | ), 69 | matches), 70 | ignore_index=True) 71 | 72 | # filter for matches that are unavailable 73 | fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 74 | 75 | # drop matches that are unavailable from list of matches 76 | matches = [match for match in matches if match not in fail_matches] 77 | 78 | # raise warnings 79 | if len(fail_matches) > 0: 80 | if len(matches) == 0: 81 | raise Exception("All supplied matches are unavailable. Execution stopped.") 82 | else: 83 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 84 | 85 | # extract iterationIds 86 | iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) 87 | 88 | # get player scores 89 | if positions is None: 90 | # query positions at once 91 | scores_raw = pd.concat( 92 | map(lambda match: connection.make_api_request_limited( 93 | url=f"{host}/v5/customerapi/matches/{match}/player-scores", 94 | method="GET" 95 | ).process_response( 96 | endpoint="PlayerMatchScores" 97 | ).assign( 98 | matchId=match, 99 | ), 100 | matches), 101 | ignore_index=True) 102 | else: 103 | 104 | # compile list of positions 105 | position_string = ",".join(positions) 106 | 107 | # query positions individually 108 | scores_raw = pd.concat( 109 | map(lambda match: connection.make_api_request_limited( 110 | url=f"{host}/v5/customerapi/matches/{match}/positions/{position_string}/player-scores", 111 | method="GET" 112 | ).process_response( 113 | endpoint="PlayerMatchScores" 114 | ).assign( 115 | matchId=match, 116 | positions=position_string 117 | ), 118 | matches), 119 | ignore_index=True) 120 | 121 | # get players 122 | players = pd.concat( 123 | map( 124 | lambda iteration: connection.make_api_request_limited( 125 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 126 | method="GET" 127 | ).process_response( 128 | endpoint="Players" 129 | ), 130 | iterations), 131 | ignore_index=True 132 | )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]] 133 | 134 | # only keep first country id for each player 135 | country_series = players["countryIds"].explode().groupby(level=0).first() 136 | players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64") 137 | players = players.rename(columns={"countryIds": "countryId"}) 138 | 139 | # unnest mappings 140 | players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 141 | 142 | # get squads 143 | squads = pd.concat( 144 | map(lambda iteration: connection.make_api_request_limited( 145 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 146 | method="GET" 147 | ).process_response( 148 | endpoint="Squads" 149 | ), 150 | iterations), 151 | ignore_index=True)[["id", "name"]].drop_duplicates() 152 | 153 | # get coaches 154 | coaches_blacklisted = False 155 | try: 156 | coaches = pd.concat( 157 | map(lambda iteration: connection.make_api_request_limited( 158 | url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", 159 | method="GET" 160 | ).process_response( 161 | endpoint="Coaches", 162 | raise_exception=False 163 | ), 164 | iterations), 165 | ignore_index=True)[["id", "name"]].drop_duplicates() 166 | except KeyError: 167 | # no coaches found, create empty df 168 | coaches = pd.DataFrame(columns=["id", "name"]) 169 | except ForbiddenError: 170 | coaches_blacklisted = True 171 | 172 | # get player scores 173 | scores = connection.make_api_request_limited( 174 | url=f"{host}/v5/customerapi/player-scores", 175 | method="GET" 176 | ).process_response( 177 | endpoint="PlayerScores" 178 | )[["id", "name"]] 179 | 180 | # get matches 181 | matchplan = pd.concat( 182 | map(lambda iteration: getMatchesFromHost( 183 | iteration=iteration, 184 | connection=connection, 185 | host=host 186 | ), 187 | iterations), 188 | ignore_index=True) 189 | 190 | # get iterations 191 | iterations = getIterationsFromHost(connection=connection, host=host) 192 | 193 | # get country data 194 | countries = connection.make_api_request_limited( 195 | url=f"{host}/v5/customerapi/countries", 196 | method="GET" 197 | ).process_response( 198 | endpoint="Countries" 199 | ) 200 | 201 | # create empty df to store player scores 202 | player_scores = pd.DataFrame() 203 | 204 | # manipulate player_scores 205 | 206 | # iterate over matches 207 | for i in range(len(scores_raw)): 208 | 209 | # create empty df to store per match scores 210 | match_player_scores = pd.DataFrame() 211 | 212 | # iterate over sides 213 | for side in ["squadHomePlayers", "squadAwayPlayers"]: 214 | 215 | # get data for index 216 | temp = scores_raw[side].loc[i] 217 | 218 | # check if any records for side at given position 219 | if len(temp) == 0: 220 | continue 221 | 222 | # convert to pandas df 223 | if positions is None: 224 | temp = pd.DataFrame(temp).assign( 225 | matchId=scores_raw.matchId.loc[i], 226 | squadId=scores_raw[side.replace("Players", "Id")].loc[i], 227 | ) 228 | 229 | # extract matchshares 230 | matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration", "position"]].drop_duplicates() 231 | 232 | else: 233 | temp = pd.DataFrame(temp).assign( 234 | matchId=scores_raw.matchId.loc[i], 235 | squadId=scores_raw[side.replace("Players", "Id")].loc[i], 236 | positions=scores_raw.positions.loc[i] 237 | ) 238 | 239 | # extract matchshares 240 | matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration"]].drop_duplicates().assign( 241 | positions=position_string 242 | ) 243 | 244 | # explode kpis column 245 | temp = temp.explode("playerScores") 246 | 247 | # unnest dictionary in kpis column 248 | temp = pd.concat( 249 | [temp.drop(["playerScores"], axis=1), temp["playerScores"].apply(pd.Series)], 250 | axis=1 251 | ) 252 | 253 | # merge with player scores to ensure all scores are present 254 | temp = pd.merge( 255 | temp, 256 | scores, 257 | left_on="playerScoreId", 258 | right_on="id", 259 | how="outer", 260 | suffixes=("", "_right") 261 | ) 262 | 263 | # pivot data 264 | if positions is None: 265 | temp = pd.pivot_table( 266 | temp, 267 | values="value", 268 | index=["matchId", "squadId", "position", "id"], 269 | columns="name", 270 | aggfunc="sum", 271 | fill_value=0, 272 | dropna=False 273 | ).reset_index() 274 | 275 | # inner join with matchshares 276 | temp = pd.merge( 277 | temp, 278 | matchshares, 279 | left_on=["matchId", "squadId", "id", "position"], 280 | right_on=["matchId", "squadId", "id", "position"], 281 | how="inner", 282 | suffixes=("", "_right") 283 | ) 284 | else: 285 | temp = pd.pivot_table( 286 | temp, 287 | values="value", 288 | index=["matchId", "squadId", "positions", "id"], 289 | columns="name", 290 | aggfunc="sum", 291 | fill_value=0, 292 | dropna=False 293 | ).reset_index() 294 | 295 | # inner join with matchshares 296 | temp = pd.merge( 297 | temp, 298 | matchshares, 299 | left_on=["matchId", "squadId", "id", "positions"], 300 | right_on=["matchId", "squadId", "id", "positions"], 301 | how="inner", 302 | suffixes=("", "_right") 303 | ) 304 | 305 | # append to match_player_scores 306 | match_player_scores = pd.concat([match_player_scores, temp]) 307 | 308 | # check if any records for match at given position 309 | if len(match_player_scores) == 0: 310 | print(f"No players played at given position in match {scores_raw.loc[i].matchId}") 311 | 312 | # append to player_scores 313 | player_scores = pd.concat([player_scores, match_player_scores]) 314 | 315 | # check if any records for any match at given position 316 | if len(player_scores) == 0: 317 | raise Exception("No players played at given positions for any given match. Execution stopped.") 318 | 319 | # merge with other data 320 | player_scores = player_scores.merge( 321 | matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]], 322 | left_on="matchId", 323 | right_on="id", 324 | how="left", 325 | suffixes=("", "_right") 326 | ).merge( 327 | pd.concat([ 328 | match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), 329 | match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) 330 | ], ignore_index=True), 331 | left_on=["matchId", "squadId"], 332 | right_on=["id", "squadId"], 333 | how="left", 334 | suffixes=("", "_right") 335 | ).merge( 336 | iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], 337 | left_on="iterationId", 338 | right_on="id", 339 | how="left", 340 | suffixes=("", "_right") 341 | ).merge( 342 | squads[["id", "name"]].rename( 343 | columns={"id": "squadId", "name": "squadName"} 344 | ), 345 | left_on="squadId", 346 | right_on="squadId", 347 | how="left", 348 | suffixes=("", "_right") 349 | ).merge( 350 | players[[ 351 | "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname", 352 | "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg" 353 | ]].rename( 354 | columns={"commonname": "playerName"} 355 | ), 356 | left_on="id", 357 | right_on="id", 358 | how="left", 359 | suffixes=("", "_right") 360 | ).merge( 361 | countries.rename(columns={"fifaName": "playerCountry"}), 362 | left_on="countryId", 363 | right_on="id", 364 | how="left", 365 | suffixes=("", "_right") 366 | ) 367 | 368 | if not coaches_blacklisted: 369 | player_scores["coachId"] = player_scores["coachId"].astype("Int64") 370 | player_scores = player_scores.merge( 371 | coaches[["id", "name"]].rename( 372 | columns={"id": "coachId", "name": "coachName"} 373 | ), 374 | left_on="coachId", 375 | right_on="coachId", 376 | how="left", 377 | suffixes=("", "_right") 378 | ) 379 | 380 | # rename some columns 381 | player_scores = player_scores.rename(columns={ 382 | "scheduledDate": "dateTime", 383 | "id": "playerId" 384 | }) 385 | 386 | # define column order 387 | order = [ 388 | "matchId", 389 | "dateTime", 390 | "competitionName", 391 | "competitionId", 392 | "competitionType", 393 | "iterationId", 394 | "season", 395 | "matchDayIndex", 396 | "matchDayName", 397 | "squadId", 398 | "squadName", 399 | "coachId", 400 | "coachName", 401 | "playerId", 402 | "wyscoutId", 403 | "heimSpielId", 404 | "skillCornerId", 405 | "playerName", 406 | "firstname", 407 | "lastname", 408 | "birthdate", 409 | "birthplace", 410 | "playerCountry", 411 | "leg", 412 | "positions" if positions is not None else "position", 413 | "matchShare", 414 | "playDuration", 415 | ] 416 | 417 | # add kpiNames to order 418 | order += scores["name"].to_list() 419 | 420 | # check if coaches are blacklisted 421 | if coaches_blacklisted: 422 | order = [col for col in order if col not in ["coachId", "coachName"]] 423 | 424 | # select columns 425 | player_scores = player_scores[order] 426 | 427 | # fix some column types 428 | player_scores["matchId"] = player_scores["matchId"].astype("Int64") 429 | player_scores["squadId"] = player_scores["squadId"].astype("Int64") 430 | player_scores["playerId"] = player_scores["playerId"].astype("Int64") 431 | player_scores["wyscoutId"] = player_scores["wyscoutId"].astype("Int64") 432 | player_scores["heimSpielId"] = player_scores["heimSpielId"].astype("Int64") 433 | player_scores["skillCornerId"] = player_scores["skillCornerId"].astype("Int64") 434 | 435 | # return data 436 | return player_scores 437 | 438 | 439 | ###### 440 | # 441 | # This function returns a pandas dataframe that contains all scores for a 442 | # given iteration and a given set of positions aggregated per player 443 | # 444 | ###### 445 | 446 | 447 | def getPlayerIterationScores( 448 | iteration: int, token: str, positions: list = None, session: requests.Session = requests.Session() 449 | ) -> pd.DataFrame: 450 | 451 | # create an instance of RateLimitedAPI 452 | connection = RateLimitedAPI(session) 453 | 454 | # construct header with access token 455 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 456 | 457 | return getPlayerIterationScoresFromHost(iteration, connection, "https://api.impect.com", positions) 458 | 459 | def getPlayerIterationScoresFromHost( 460 | iteration: int, connection: RateLimitedAPI, host: str, positions: list = None 461 | ) -> pd.DataFrame: 462 | 463 | # check input for iteration argument 464 | if not isinstance(iteration, int): 465 | raise Exception("Input for iteration argument must be an integer") 466 | 467 | # check input for positions argument 468 | if not isinstance(positions, list) and positions is not None: 469 | raise Exception("Input for positions argument must be a list") 470 | 471 | # check if the input positions are valid 472 | if positions is not None: 473 | invalid_positions = [position for position in positions if position not in allowed_positions] 474 | if len(invalid_positions) > 0: 475 | raise Exception( 476 | f"Invalid position(s): {', '.join(invalid_positions)}." 477 | f"\nChoose one or more of: {', '.join(allowed_positions)}" 478 | ) 479 | 480 | # get squads 481 | squads = connection.make_api_request_limited( 482 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 483 | method="GET" 484 | ).process_response( 485 | endpoint="Squads" 486 | ) 487 | 488 | # get squadIds 489 | squad_ids = squads[squads.access].id.to_list() 490 | 491 | # get player iteration averages per squad 492 | if positions is None: 493 | 494 | scores_raw = pd.concat( 495 | map(lambda squadId: connection.make_api_request_limited( 496 | url=f"{host}/v5/customerapi/iterations/{iteration}/" 497 | f"squads/{squadId}/player-scores", 498 | method="GET" 499 | ).process_response( 500 | endpoint="PlayerIterationScores", 501 | raise_exception=False 502 | ).assign( 503 | iterationId=iteration, 504 | squadId=squadId 505 | ), 506 | squad_ids), 507 | ignore_index=True) 508 | 509 | else: 510 | 511 | # compile position string 512 | position_string = ",".join(positions) 513 | 514 | scores_raw = pd.concat( 515 | map(lambda squadId: connection.make_api_request_limited( 516 | url=f"{host}/v5/customerapi/iterations/{iteration}/" 517 | f"squads/{squadId}/positions/{position_string}/player-scores", 518 | method="GET" 519 | ).process_response( 520 | endpoint="PlayerIterationScores", 521 | raise_exception=False 522 | ).assign( 523 | iterationId=iteration, 524 | squadId=squadId, 525 | positions=position_string 526 | ), 527 | squad_ids), 528 | ignore_index=True) 529 | 530 | # raise exception if no player played at given positions in entire iteration 531 | if len(scores_raw) == 0: 532 | raise Exception(f"No players played at given position in iteration {iteration}.") 533 | 534 | # print squads without players at given position 535 | error_list = [str(squadId) for squadId in squad_ids if squadId not in scores_raw.squadId.to_list()] 536 | if len(error_list) > 0: 537 | print(f"No players played at positions {positions} for iteration {iteration} for following squads:\n\t{', '.join(error_list)}") 538 | 539 | # get players 540 | players = connection.make_api_request_limited( 541 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 542 | method="GET" 543 | ).process_response( 544 | endpoint="Players" 545 | )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]] 546 | 547 | # only keep first country id for each player 548 | country_series = players["countryIds"].explode().groupby(level=0).first() 549 | players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64") 550 | players = players.rename(columns={"countryIds": "countryId"}) 551 | 552 | # unnest mappings 553 | players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 554 | 555 | # get scores 556 | scores = connection.make_api_request_limited( 557 | url=f"{host}/v5/customerapi/player-scores", 558 | method="GET" 559 | ).process_response( 560 | endpoint="playerScores" 561 | )[["id", "name"]] 562 | 563 | # get iterations 564 | iterations = getIterationsFromHost(connection=connection, host=host) 565 | 566 | # get country data 567 | countries = connection.make_api_request_limited( 568 | url=f"{host}/v5/customerapi/countries", 569 | method="GET" 570 | ).process_response( 571 | endpoint="KPIs" 572 | ) 573 | 574 | # unnest scorings 575 | averages = scores_raw.explode("playerScores").reset_index(drop=True) 576 | 577 | # unnest dictionary in kpis column 578 | averages = pd.concat( 579 | [averages.drop(["playerScores"], axis=1), pd.json_normalize(averages["playerScores"])], 580 | axis=1 581 | ) 582 | 583 | # merge with player scores to ensure all kpis are present 584 | averages = averages.merge( 585 | scores, 586 | left_on="playerScoreId", 587 | right_on="id", 588 | how="outer", 589 | suffixes=("", "_right") 590 | ) 591 | 592 | # get matchShares 593 | if positions is None: 594 | match_shares = averages[ 595 | ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"] 596 | ].drop_duplicates() 597 | 598 | # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost 599 | if len(averages["name"][averages["name"].isnull()]) > 0: 600 | averages["name"] = averages["name"].fillna("-1") 601 | 602 | # pivot kpi values 603 | averages = pd.pivot_table( 604 | averages, 605 | values="value", 606 | index=["iterationId", "squadId", "playerId", "position"], 607 | columns="name", 608 | aggfunc="sum", 609 | fill_value=0, 610 | dropna=False 611 | ).reset_index() 612 | 613 | # drop "-1" column 614 | if "-1" in averages.columns: 615 | averages.drop(["-1"], inplace=True, axis=1) 616 | 617 | # merge with playDuration and matchShare 618 | averages = averages.merge( 619 | match_shares, 620 | left_on=["iterationId", "squadId", "playerId", "position"], 621 | right_on=["iterationId", "squadId", "playerId", "position"], 622 | how="inner", 623 | suffixes=("", "_right") 624 | ) 625 | else: 626 | match_shares = averages[ 627 | ["iterationId", "squadId", "playerId", "positions", "playDuration", "matchShare"] 628 | ].drop_duplicates() 629 | 630 | # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost 631 | if len(averages["name"][averages["name"].isnull()]) > 0: 632 | averages["name"] = averages["name"].fillna("-1") 633 | 634 | # pivot kpi values 635 | averages = pd.pivot_table( 636 | averages, 637 | values="value", 638 | index=["iterationId", "squadId", "playerId", "positions"], 639 | columns="name", 640 | aggfunc="sum", 641 | fill_value=0, 642 | dropna=False 643 | ).reset_index() 644 | 645 | # drop "-1" column 646 | if "-1" in averages.columns: 647 | averages.drop(["-1"], inplace=True, axis=1) 648 | 649 | # merge with playDuration and matchShare 650 | averages = averages.merge( 651 | match_shares, 652 | left_on=["iterationId", "squadId", "playerId", "positions"], 653 | right_on=["iterationId", "squadId", "playerId", "positions"], 654 | how="inner", 655 | suffixes=("", "_right") 656 | ) 657 | 658 | # merge with other data 659 | averages = averages.merge( 660 | iterations[["id", "competitionName", "season"]], 661 | left_on="iterationId", 662 | right_on="id", 663 | how="left", 664 | suffixes=("", "_right") 665 | ).merge( 666 | squads[["id", "name"]].rename( 667 | columns={"id": "squadId", "name": "squadName"} 668 | ), 669 | left_on="squadId", 670 | right_on="squadId", 671 | how="left", 672 | suffixes=("", "_right") 673 | ).merge( 674 | players[[ 675 | "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname", 676 | "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg" 677 | ]].rename( 678 | columns={"commonname": "playerName"} 679 | ), 680 | left_on="playerId", 681 | right_on="id", 682 | how="left", 683 | suffixes=("", "_right") 684 | ).merge( 685 | countries.rename(columns={"fifaName": "playerCountry"}), 686 | left_on="countryId", 687 | right_on="id", 688 | how="left", 689 | suffixes=("", "_right") 690 | ) 691 | 692 | # remove NA rows 693 | averages = averages[averages.iterationId.notnull()] 694 | 695 | # fix column types 696 | averages["squadId"] = averages["squadId"].astype(int) 697 | averages["playerId"] = averages["playerId"].astype(int) 698 | averages["iterationId"] = averages["iterationId"].astype(int) 699 | 700 | # define column order 701 | order = [ 702 | "iterationId", 703 | "competitionName", 704 | "season", 705 | "squadId", 706 | "squadName", 707 | "playerId", 708 | "wyscoutId", 709 | "heimSpielId", 710 | "skillCornerId", 711 | "playerName", 712 | "firstname", 713 | "lastname", 714 | "birthdate", 715 | "birthplace", 716 | "playerCountry", 717 | "leg", 718 | "positions" if positions is not None else "position", 719 | "matchShare", 720 | "playDuration" 721 | ] 722 | 723 | # add kpiNames to order 724 | order = order + scores.name.to_list() 725 | 726 | # select columns 727 | averages = averages[order] 728 | 729 | # fix some column types 730 | averages["squadId"] = averages["squadId"].astype("Int64") 731 | averages["playerId"] = averages["playerId"].astype("Int64") 732 | averages["wyscoutId"] = averages["wyscoutId"].astype("Int64") 733 | averages["heimSpielId"] = averages["heimSpielId"].astype("Int64") 734 | averages["skillCornerId"] = averages["skillCornerId"].astype("Int64") 735 | 736 | # return result 737 | return averages -------------------------------------------------------------------------------- /impectPy/set_pieces.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI 5 | from .matches import getMatchesFromHost 6 | from .iterations import getIterationsFromHost 7 | import re 8 | 9 | ###### 10 | # 11 | # This function returns a pandas dataframe that contains all set pieces for a 12 | # given match 13 | # 14 | ###### 15 | 16 | 17 | # define function 18 | def getSetPieces(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 19 | 20 | # create an instance of RateLimitedAPI 21 | connection = RateLimitedAPI(session) 22 | 23 | # construct header with access token 24 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 25 | 26 | return getSetPiecesFromHost(matches, connection, "https://api.impect.com") 27 | 28 | def getSetPiecesFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 29 | 30 | # check input for matches argument 31 | if not isinstance(matches, list): 32 | raise Exception("Argument 'matches' must be a list of integers.") 33 | 34 | # get match info 35 | iterations = pd.concat( 36 | map(lambda match: connection.make_api_request_limited( 37 | url=f"{host}/v5/customerapi/matches/{match}", 38 | method="GET" 39 | ).process_response( 40 | endpoint="Iterations" 41 | ), 42 | matches), 43 | ignore_index=True) 44 | 45 | # filter for matches that are unavailable 46 | fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 47 | 48 | # drop matches that are unavailable from list of matches 49 | matches = [match for match in matches if match not in fail_matches] 50 | 51 | # raise exception if no matches remaining or report removed matches 52 | if len(fail_matches) > 0: 53 | if len(matches) == 0: 54 | raise Exception("All supplied matches are unavailable. Execution stopped.") 55 | else: 56 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 57 | 58 | # extract iterationIds 59 | iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique()) 60 | 61 | # get players 62 | players = pd.concat( 63 | map(lambda iteration: connection.make_api_request_limited( 64 | url=f"{host}/v5/customerapi/iterations/{iteration}/players", 65 | method="GET" 66 | ).process_response( 67 | endpoint="Players" 68 | ), 69 | iterations), 70 | ignore_index=True)[["id", "commonname"]].drop_duplicates() 71 | 72 | # get squads 73 | squads = pd.concat( 74 | map(lambda iteration: connection.make_api_request_limited( 75 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 76 | method="GET" 77 | ).process_response( 78 | endpoint="Squads" 79 | ), 80 | iterations), 81 | ignore_index=True)[["id", "name"]].drop_duplicates() 82 | 83 | # get matches 84 | matchplan = pd.concat( 85 | map(lambda iteration: getMatchesFromHost( 86 | iteration=iteration, 87 | connection=connection, 88 | host=host 89 | ), 90 | iterations), 91 | ignore_index=True) 92 | 93 | # get iterations 94 | iterations = getIterationsFromHost(connection=connection, host=host) 95 | 96 | # get set piece data 97 | set_pieces = pd.concat( 98 | map(lambda match: connection.make_api_request_limited( 99 | url=f"{host}/v5/customerapi/matches/{match}/set-pieces", 100 | method="GET" 101 | ).process_response( 102 | endpoint="Set-Pieces" 103 | ), 104 | matches), 105 | ignore_index=True 106 | ).rename( 107 | columns={"id": "setPieceId"} 108 | ).explode("setPieceSubPhase", ignore_index=True) 109 | 110 | # unpack setPieceSubPhase column 111 | set_pieces = pd.concat( 112 | [ 113 | set_pieces.drop(columns=["setPieceSubPhase"]), 114 | pd.json_normalize(set_pieces["setPieceSubPhase"]).add_prefix("setPieceSubPhase.") 115 | ], 116 | axis=1 117 | ).rename(columns=lambda x: re.sub(r"\.(.)", lambda y: y.group(1).upper(), x)) 118 | 119 | # fix typing 120 | set_pieces.setPieceSubPhaseMainEventPlayerId = set_pieces.setPieceSubPhaseMainEventPlayerId.astype("Int64") 121 | set_pieces.setPieceSubPhaseFirstTouchPlayerId = set_pieces.setPieceSubPhaseFirstTouchPlayerId.astype("Int64") 122 | set_pieces.setPieceSubPhaseSecondTouchPlayerId = set_pieces.setPieceSubPhaseSecondTouchPlayerId.astype("Int64") 123 | 124 | # start merging dfs 125 | 126 | # merge events with matches 127 | set_pieces = set_pieces.merge( 128 | matchplan, 129 | left_on="matchId", 130 | right_on="id", 131 | how="left", 132 | suffixes=("", "_right") 133 | ) 134 | 135 | # merge with competition info 136 | set_pieces = set_pieces.merge( 137 | iterations, 138 | left_on="iterationId", 139 | right_on="id", 140 | how="left", 141 | suffixes=("", "_right") 142 | ) 143 | 144 | # determine defending squad 145 | set_pieces["defendingSquadId"] = set_pieces.apply( 146 | lambda row: row.homeSquadId if row.squadId == row.awaySquadId else row.awaySquadId, 147 | axis=1 148 | ) 149 | 150 | # merge events with squads 151 | set_pieces = set_pieces.merge( 152 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "attackingSquadName"}), 153 | left_on="squadId", 154 | right_on="squadId", 155 | how="left", 156 | suffixes=("", "_home") 157 | ).merge( 158 | squads[["id", "name"]].rename(columns={"id": "squadId", "name": "defendingSquadName"}), 159 | left_on="defendingSquadId", 160 | right_on="squadId", 161 | how="left", 162 | suffixes=("", "_away") 163 | ) 164 | 165 | # merge events with players 166 | set_pieces = set_pieces.merge( 167 | players[["id", "commonname"]].rename( 168 | columns={ 169 | "id": "setPieceSubPhaseMainEventPlayerId", 170 | "commonname": "setPieceSubPhaseMainEventPlayerName" 171 | } 172 | ), 173 | left_on="setPieceSubPhaseMainEventPlayerId", 174 | right_on="setPieceSubPhaseMainEventPlayerId", 175 | how="left", 176 | suffixes=("", "_right") 177 | ).merge( 178 | players[["id", "commonname"]].rename( 179 | columns={ 180 | "id": "setPieceSubPhasePassReceiverId", 181 | "commonname": "setPieceSubPhasePassReceiverName" 182 | } 183 | ), 184 | left_on="setPieceSubPhasePassReceiverId", 185 | right_on="setPieceSubPhasePassReceiverId", 186 | how="left", 187 | suffixes=("", "_right") 188 | ).merge( 189 | players[["id", "commonname"]].rename( 190 | columns={ 191 | "id": "setPieceSubPhaseFirstTouchPlayerId", 192 | "commonname": "setPieceSubPhaseFirstTouchPlayerName" 193 | } 194 | ), 195 | left_on="setPieceSubPhaseFirstTouchPlayerId", 196 | right_on="setPieceSubPhaseFirstTouchPlayerId", 197 | how="left", 198 | suffixes=("", "_right") 199 | ).merge( 200 | players[["id", "commonname"]].rename( 201 | columns={ 202 | "id": "setPieceSubPhaseSecondTouchPlayerId", 203 | "commonname": "setPieceSubPhaseSecondTouchPlayerName" 204 | } 205 | ), 206 | left_on="setPieceSubPhaseSecondTouchPlayerId", 207 | right_on="setPieceSubPhaseSecondTouchPlayerId", 208 | how="left", 209 | suffixes=("", "_right") 210 | ) 211 | 212 | # rename some columns 213 | set_pieces = set_pieces.rename(columns={ 214 | "scheduledDate": "dateTime", 215 | "squadId": "attackingSquadId", 216 | "phaseIndex": "setPiecePhaseIndex", 217 | "setPieceSubPhaseAggregatesSHOT_XG": "setPieceSubPhase_SHOT_XG", 218 | "setPieceSubPhaseAggregatesPACKING_XG": "setPieceSubPhase_PACKING_XG", 219 | "setPieceSubPhaseAggregatesPOSTSHOT_XG": "setPieceSubPhase_POSTSHOT_XG", 220 | "setPieceSubPhaseAggregatesSHOT_AT_GOAL_NUMBER": "setPieceSubPhase_SHOT_AT_GOAL_NUMBER", 221 | "setPieceSubPhaseAggregatesGOALS": "setPieceSubPhase_GOALS", 222 | "setPieceSubPhaseAggregatesPXT_POSITIVE": "setPieceSubPhase_PXT_POSITIVE", 223 | "setPieceSubPhaseAggregatesBYPASSED_OPPONENTS": "setPieceSubPhase_BYPASSED_OPPONENTS", 224 | "setPieceSubPhaseAggregatesBYPASSED_DEFENDERS": "setPieceSubPhase_BYPASSED_DEFENDERS" 225 | }) 226 | 227 | # define desired column order 228 | order = [ 229 | "matchId", 230 | "dateTime", 231 | "competitionName", 232 | "competitionId", 233 | "competitionType", 234 | "iterationId", 235 | "season", 236 | "attackingSquadId", 237 | "attackingSquadName", 238 | "defendingSquadId", 239 | "defendingSquadName", 240 | "setPieceId", 241 | "setPiecePhaseIndex", 242 | "setPieceCategory", 243 | "adjSetPieceCategory", 244 | "setPieceExecutionType", 245 | "setPieceSubPhaseId", 246 | "setPieceSubPhaseIndex", 247 | "setPieceSubPhaseStartZone", 248 | "setPieceSubPhaseCornerEndZone", 249 | "setPieceSubPhaseCornerType", 250 | "setPieceSubPhaseFreeKickEndZone", 251 | "setPieceSubPhaseFreeKickType", 252 | "setPieceSubPhaseMainEventPlayerId", 253 | "setPieceSubPhaseMainEventPlayerName", 254 | "setPieceSubPhaseMainEventOutcome", 255 | "setPieceSubPhasePassReceiverId", 256 | "setPieceSubPhasePassReceiverName", 257 | "setPieceSubPhaseFirstTouchPlayerId", 258 | "setPieceSubPhaseFirstTouchPlayerName", 259 | "setPieceSubPhaseFirstTouchWon", 260 | "setPieceSubPhaseIndirectHeader", 261 | "setPieceSubPhaseSecondTouchPlayerId", 262 | "setPieceSubPhaseSecondTouchPlayerName", 263 | "setPieceSubPhaseSecondTouchWon", 264 | "setPieceSubPhase_SHOT_XG", 265 | "setPieceSubPhase_PACKING_XG", 266 | "setPieceSubPhase_POSTSHOT_XG", 267 | "setPieceSubPhase_SHOT_AT_GOAL_NUMBER", 268 | "setPieceSubPhase_GOALS", 269 | "setPieceSubPhase_PXT_POSITIVE", 270 | "setPieceSubPhase_BYPASSED_OPPONENTS", 271 | "setPieceSubPhase_BYPASSED_DEFENDERS", 272 | ] 273 | 274 | # reorder data 275 | set_pieces = set_pieces[order] 276 | 277 | # reorder rows 278 | set_pieces = set_pieces.sort_values(["matchId", "setPiecePhaseIndex"]) 279 | 280 | # return events 281 | return set_pieces -------------------------------------------------------------------------------- /impectPy/squad_coefficients.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df 5 | from .iterations import getIterationsFromHost 6 | 7 | ###### 8 | # 9 | # This function returns a pandas dataframe that contains all squad ratings for a given iteration 10 | # 11 | ###### 12 | 13 | 14 | # define function 15 | def getSquadCoefficients(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 16 | 17 | # create an instance of RateLimitedAPI 18 | connection = RateLimitedAPI(session) 19 | 20 | # construct header with access token 21 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 22 | 23 | return getSquadCoefficientsFromHost(iteration, connection, "https://api.impect.com") 24 | 25 | def getSquadCoefficientsFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 26 | 27 | # check input for matches argument 28 | if not isinstance(iteration, int): 29 | raise Exception("Argument 'iteration' must be an integer.") 30 | 31 | # get iterations 32 | iterations = getIterationsFromHost(connection=connection, host=host) 33 | 34 | # raise exception if provided iteration id doesn't exist 35 | if iteration not in list(iterations.id): 36 | raise Exception("The supplied iteration id does not exist. Execution stopped.") 37 | 38 | # get squads 39 | squads = connection.make_api_request_limited( 40 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 41 | method="GET" 42 | ).process_response( 43 | endpoint="Squads" 44 | )[["id", "name", "idMappings"]] 45 | 46 | # unnest mappings 47 | squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 48 | 49 | # get squad coefficients 50 | coefficients_raw = connection.make_api_request_limited( 51 | url=f"{host}/v5/customerapi/iterations/{iteration}/predictions/model-coefficients", 52 | method="GET" 53 | ).process_response( 54 | endpoint="Squad Coefficients" 55 | ) 56 | 57 | # extract JSON from the column 58 | nested_data = coefficients_raw["entries"][0] 59 | 60 | # flatten coefficients df 61 | coefficients = [] 62 | for entry in nested_data: 63 | date = entry["date"] 64 | for squad in entry["squads"]: 65 | coefficients.append({ 66 | "iterationId": iteration, 67 | "date": date, 68 | "interceptCoefficient": entry["competition"]["intercept"], 69 | "homeCoefficient": entry["competition"]["home"], 70 | "competitionCoefficient": entry["competition"]["comp"], 71 | "squadId": squad["id"], 72 | "attackCoefficient": squad["att"], 73 | "defenseCoefficient": squad["def"] 74 | }) 75 | 76 | # convert to df 77 | coefficients = pd.DataFrame(coefficients) 78 | 79 | # merge with competition info 80 | coefficients = coefficients.merge( 81 | iterations[["id", "competitionId", "competitionName", "competitionType", "season", "competitionGender"]], 82 | left_on="iterationId", 83 | right_on="id", 84 | how="left", 85 | suffixes=("", "_right") 86 | ) 87 | 88 | # merge events with squads 89 | coefficients = coefficients.merge( 90 | squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( 91 | columns={"id": "squadId", "name": "squadName"} 92 | ), 93 | left_on="squadId", 94 | right_on="squadId", 95 | how="left", 96 | suffixes=("", "_home") 97 | ) 98 | 99 | # fix some column types 100 | coefficients["iterationId"] = coefficients["iterationId"].astype("Int64") 101 | coefficients["competitionId"] = coefficients["competitionId"].astype("Int64") 102 | coefficients["squadId"] = coefficients["squadId"].astype("Int64") 103 | coefficients["wyscoutId"] = coefficients["wyscoutId"].astype("Int64") 104 | coefficients["heimSpielId"] = coefficients["heimSpielId"].astype("Int64") 105 | coefficients["skillCornerId"] = coefficients["skillCornerId"].astype("Int64") 106 | 107 | # define desired column order 108 | order = [ 109 | "iterationId", 110 | "competitionId", 111 | "competitionName", 112 | "competitionType", 113 | "season", 114 | "competitionGender", 115 | "interceptCoefficient", 116 | "homeCoefficient", 117 | "competitionCoefficient", 118 | "date", 119 | "squadId", 120 | "wyscoutId", 121 | "heimSpielId", 122 | "skillCornerId", 123 | "squadName", 124 | "attackCoefficient", 125 | "defenseCoefficient", 126 | ] 127 | 128 | # reorder data 129 | coefficients = coefficients[order] 130 | 131 | # reorder rows 132 | coefficients = coefficients.sort_values(["date", "squadId"]) 133 | 134 | # return events 135 | return coefficients -------------------------------------------------------------------------------- /impectPy/squad_ratings.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df 5 | from .iterations import getIterationsFromHost 6 | 7 | ###### 8 | # 9 | # This function returns a pandas dataframe that contains all squad ratings for a given iteration 10 | # 11 | ###### 12 | 13 | 14 | # define function 15 | def getSquadRatings(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 16 | 17 | # create an instance of RateLimitedAPI 18 | connection = RateLimitedAPI(session) 19 | 20 | # construct header with access token 21 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 22 | 23 | return getSquadRatingsFromHost(iteration, connection, "https://api.impect.com") 24 | 25 | def getSquadRatingsFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 26 | 27 | # check input for matches argument 28 | if not isinstance(iteration, int): 29 | raise Exception("Argument 'iteration' must be an integer.") 30 | 31 | # get iterations 32 | iterations = getIterationsFromHost(connection=connection, host=host) 33 | 34 | # raise exception if provided iteration id doesn't exist 35 | if iteration not in list(iterations.id): 36 | raise Exception("The supplied iteration id does not exist. Execution stopped.") 37 | 38 | # get squads 39 | squads = connection.make_api_request_limited( 40 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 41 | method="GET" 42 | ).process_response( 43 | endpoint="Squads" 44 | )[["id", "name", "idMappings"]] 45 | 46 | # unnest mappings 47 | squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 48 | 49 | # get squad ratings 50 | ratings_raw = connection.make_api_request_limited( 51 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads/ratings", 52 | method="GET" 53 | ).process_response( 54 | endpoint="Squad Ratings" 55 | ) 56 | 57 | # extract JSON from the column 58 | nested_data = ratings_raw["squadRatingsEntries"][0] 59 | 60 | # flatten ratings df 61 | ratings = [] 62 | for entry in nested_data: 63 | date = entry["date"] 64 | for squad in entry["squadRatings"]: 65 | ratings.append({ 66 | "date": date, 67 | "squadId": squad["squadId"], 68 | "value": squad["value"] 69 | }) 70 | 71 | # convert to df 72 | ratings = pd.DataFrame(ratings) 73 | 74 | # add iteration id 75 | ratings["iterationId"] = iteration 76 | 77 | # merge with competition info 78 | ratings = ratings.merge( 79 | iterations[["id", "competitionId", "competitionName", "competitionType", "season", "competitionGender"]], 80 | left_on="iterationId", 81 | right_on="id", 82 | how="left", 83 | suffixes=("", "_right") 84 | ) 85 | 86 | # merge events with squads 87 | ratings = ratings.merge( 88 | squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( 89 | columns={"id": "squadId", "name": "squadName"} 90 | ), 91 | left_on="squadId", 92 | right_on="squadId", 93 | how="left", 94 | suffixes=("", "_home") 95 | ) 96 | 97 | # fix some column types 98 | ratings["iterationId"] = ratings["iterationId"].astype("Int64") 99 | ratings["competitionId"] = ratings["competitionId"].astype("Int64") 100 | ratings["squadId"] = ratings["squadId"].astype("Int64") 101 | ratings["wyscoutId"] = ratings["wyscoutId"].astype("Int64") 102 | ratings["heimSpielId"] = ratings["heimSpielId"].astype("Int64") 103 | ratings["skillCornerId"] = ratings["skillCornerId"].astype("Int64") 104 | 105 | # define desired column order 106 | order = [ 107 | "iterationId", 108 | "competitionId", 109 | "competitionName", 110 | "competitionType", 111 | "season", 112 | "competitionGender", 113 | "date", 114 | "squadId", 115 | "wyscoutId", 116 | "heimSpielId", 117 | "skillCornerId", 118 | "squadName", 119 | "value" 120 | ] 121 | 122 | # reorder data 123 | ratings = ratings[order] 124 | 125 | # reorder rows 126 | ratings = ratings.sort_values(["date", "squadId"]) 127 | 128 | # return events 129 | return ratings -------------------------------------------------------------------------------- /impectPy/squad_scores.py: -------------------------------------------------------------------------------- 1 | # load packages 2 | import pandas as pd 3 | import requests 4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df, ForbiddenError 5 | from .matches import getMatchesFromHost 6 | from .iterations import getIterationsFromHost 7 | 8 | ###### 9 | # 10 | # This function returns a pandas dataframe that contains all scores for a 11 | # given match aggregated per squad 12 | # 13 | ###### 14 | 15 | 16 | def getSquadMatchScores(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 17 | 18 | # create an instance of RateLimitedAPI 19 | connection = RateLimitedAPI(session) 20 | 21 | # construct header with access token 22 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 23 | 24 | return getSquadMatchScoresFromHost(matches, connection, "https://api.impect.com") 25 | 26 | def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 27 | 28 | # check input for matches argument 29 | if not isinstance(matches, list): 30 | raise Exception("Argument 'matches' must be a list of integers.") 31 | 32 | # get match info 33 | match_data = pd.concat( 34 | map(lambda match: connection.make_api_request_limited( 35 | url=f"{host}/v5/customerapi/matches/{match}", 36 | method="GET" 37 | ).process_response( 38 | endpoint="Match Info" 39 | ), 40 | matches), 41 | ignore_index=True) 42 | 43 | # filter for matches that are unavailable 44 | fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list() 45 | 46 | # drop matches that are unavailable from list of matches 47 | matches = [match for match in matches if match not in fail_matches] 48 | 49 | # raise warnings 50 | if len(fail_matches) > 0: 51 | if len(matches) == 0: 52 | raise Exception("All supplied matches are unavailable. Execution stopped.") 53 | else: 54 | print(f"The following matches are not available yet and were ignored:\n{fail_matches}") 55 | 56 | # extract iterationIds 57 | iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique()) 58 | 59 | # get squad scores 60 | scores_raw = pd.concat( 61 | map(lambda match: connection.make_api_request_limited( 62 | url=f"{host}/v5/customerapi/matches/{match}/squad-scores", 63 | method="GET" 64 | ).process_response( 65 | endpoint="SquadMatchScores" 66 | ).assign( 67 | matchId=match 68 | ), 69 | matches), 70 | ignore_index=True) 71 | 72 | # get squads 73 | squads = pd.concat( 74 | map(lambda iteration: connection.make_api_request_limited( 75 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 76 | method="GET" 77 | ).process_response( 78 | endpoint="Squads" 79 | ), 80 | iterations), 81 | ignore_index=True)[["id", "name", "idMappings"]] 82 | 83 | # get coaches 84 | coaches_blacklisted = False 85 | try: 86 | coaches = pd.concat( 87 | map(lambda iteration: connection.make_api_request_limited( 88 | url=f"{host}/v5/customerapi/iterations/{iteration}/coaches", 89 | method="GET" 90 | ).process_response( 91 | endpoint="Coaches", 92 | raise_exception=False 93 | ), 94 | iterations), 95 | ignore_index=True)[["id", "name"]].drop_duplicates() 96 | except KeyError: 97 | # no coaches found, create empty df 98 | coaches = pd.DataFrame(columns=["id", "name"]) 99 | except ForbiddenError: 100 | coaches_blacklisted = True 101 | 102 | # unnest mappings 103 | squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 104 | 105 | # get squad scores 106 | scores = connection.make_api_request_limited( 107 | url=f"{host}/v5/customerapi/squad-scores", 108 | method="GET" 109 | ).process_response( 110 | endpoint="PlayerScores" 111 | )[["id", "name"]] 112 | 113 | # get matches 114 | matchplan = pd.concat( 115 | map(lambda iteration: getMatchesFromHost( 116 | iteration=iteration, 117 | connection=connection, 118 | host=host 119 | ), 120 | iterations), 121 | ignore_index=True) 122 | 123 | # get iterations 124 | iterations = getIterationsFromHost(connection=connection, host=host) 125 | 126 | # create empty df to store squad scores 127 | squad_scores = pd.DataFrame() 128 | 129 | # manipulate squad scores 130 | 131 | # iterate over matches 132 | for i in range(len(scores_raw)): 133 | 134 | # iterate over sides 135 | for side in ["squadHomeSquadScores", "squadAwaySquadScores"]: 136 | # get data for index 137 | temp = scores_raw[side].loc[i] 138 | 139 | # convert to pandas df 140 | temp = pd.DataFrame(temp).assign( 141 | matchId=scores_raw.matchId.loc[i], 142 | squadId=scores_raw[side.replace("SquadScores", "Id")].loc[i] 143 | ) 144 | 145 | # merge with squad scores to ensure all scores are present 146 | temp = pd.merge( 147 | temp, 148 | scores, 149 | left_on="squadScoreId", 150 | right_on="id", 151 | how="outer", 152 | suffixes=("", "_right") 153 | ) 154 | 155 | # pivot data 156 | temp = pd.pivot_table( 157 | temp, 158 | values="value", 159 | index=["matchId", "squadId"], 160 | columns="name", 161 | aggfunc="sum", 162 | fill_value=0, 163 | dropna=False 164 | ).reset_index() 165 | 166 | # append to player_scores 167 | squad_scores = pd.concat([squad_scores, temp]) 168 | 169 | # merge with other data 170 | squad_scores = squad_scores.merge( 171 | matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]], 172 | left_on="matchId", 173 | right_on="id", 174 | how="left", 175 | suffixes=("", "_right") 176 | ).merge( 177 | pd.concat([ 178 | match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}), 179 | match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"}) 180 | ], ignore_index=True), 181 | left_on=["matchId", "squadId"], 182 | right_on=["id", "squadId"], 183 | how="left", 184 | suffixes=("", "_right") 185 | ).merge( 186 | iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], 187 | left_on="iterationId", 188 | right_on="id", 189 | how="left", 190 | suffixes=("", "_right") 191 | ).merge( 192 | squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( 193 | columns={"id": "squadId", "name": "squadName"} 194 | ), 195 | left_on="squadId", 196 | right_on="squadId", 197 | how="left", 198 | suffixes=("", "_right") 199 | ) 200 | 201 | if not coaches_blacklisted: 202 | squad_scores["coachId"] = squad_scores["coachId"].astype("Int64") 203 | squad_scores = squad_scores.merge( 204 | coaches[["id", "name"]].rename( 205 | columns={"id": "coachId", "name": "coachName"} 206 | ), 207 | left_on="coachId", 208 | right_on="coachId", 209 | how="left", 210 | suffixes=("", "_right") 211 | ) 212 | 213 | # rename some columns 214 | squad_scores = squad_scores.rename(columns={ 215 | "scheduledDate": "dateTime" 216 | }) 217 | 218 | # define column order 219 | order = [ 220 | "matchId", 221 | "dateTime", 222 | "competitionName", 223 | "competitionId", 224 | "competitionType", 225 | "iterationId", 226 | "season", 227 | "matchDayIndex", 228 | "matchDayName", 229 | "squadId", 230 | "wyscoutId", 231 | "heimSpielId", 232 | "skillCornerId", 233 | "squadName", 234 | "coachId", 235 | "coachName" 236 | ] 237 | 238 | # check if coaches are blacklisted 239 | if coaches_blacklisted: 240 | order = [col for col in order if col not in ["coachId", "coachName"]] 241 | 242 | # add scoreNames to order 243 | order += scores["name"].to_list() 244 | 245 | # select columns 246 | squad_scores = squad_scores[order] 247 | 248 | # fix some column types 249 | squad_scores["matchId"] = squad_scores["matchId"].astype("Int64") 250 | squad_scores["competitionId"] = squad_scores["competitionId"].astype("Int64") 251 | squad_scores["iterationId"] = squad_scores["iterationId"].astype("Int64") 252 | squad_scores["matchDayIndex"] = squad_scores["matchDayIndex"].astype("Int64") 253 | squad_scores["squadId"] = squad_scores["squadId"].astype("Int64") 254 | squad_scores["wyscoutId"] = squad_scores["wyscoutId"].astype("Int64") 255 | squad_scores["heimSpielId"] = squad_scores["heimSpielId"].astype("Int64") 256 | squad_scores["skillCornerId"] = squad_scores["skillCornerId"].astype("Int64") 257 | 258 | # return data 259 | return squad_scores 260 | 261 | ###### 262 | # 263 | # This function returns a pandas dataframe that contains all scores for a 264 | # given iteration aggregated per squad 265 | # 266 | ###### 267 | 268 | 269 | def getSquadIterationScores(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame: 270 | 271 | # create an instance of RateLimitedAPI 272 | connection = RateLimitedAPI(session) 273 | 274 | # construct header with access token 275 | connection.session.headers.update({"Authorization": f"Bearer {token}"}) 276 | 277 | return getSquadIterationScoresFromHost(iteration, connection, "https://api.impect.com") 278 | 279 | def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame: 280 | 281 | # check input for matches argument 282 | if not isinstance(iteration, int): 283 | raise Exception("Input for iteration argument must be an integer") 284 | 285 | # get squads 286 | squads = connection.make_api_request_limited( 287 | url=f"{host}/v5/customerapi/iterations/{iteration}/squads", 288 | method="GET" 289 | ).process_response( 290 | endpoint="Squads" 291 | )[["id", "name", "idMappings"]] 292 | 293 | # unnest mappings 294 | squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates() 295 | 296 | # get squad iteration averages 297 | scores_raw = connection.make_api_request_limited( 298 | url=f"{host}/v5/customerapi/iterations/{iteration}/squad-scores", 299 | method="GET" 300 | ).process_response( 301 | endpoint="SquadIterationScores" 302 | ).assign(iterationId=iteration) 303 | 304 | # get scores 305 | scores_definitions = connection.make_api_request_limited( 306 | url=f"{host}/v5/customerapi/squad-scores", 307 | method="GET" 308 | ).process_response( 309 | endpoint="scoreDefinitions" 310 | )[["id", "name"]] 311 | 312 | # get iterations 313 | iterations = getIterationsFromHost(connection=connection, host=host) 314 | 315 | # get matches played 316 | matches = scores_raw[["squadId", "matches"]].drop_duplicates() 317 | 318 | # unnest scores 319 | scores = scores_raw.explode("squadScores").reset_index(drop=True) 320 | 321 | # unnest dictionary in kpis column 322 | scores = pd.concat( 323 | [scores.drop(["squadScores"], axis=1), pd.json_normalize(scores["squadScores"])], 324 | axis=1 325 | ) 326 | 327 | # merge with kpis to ensure all kpis are present 328 | scores = scores.merge( 329 | scores_definitions, 330 | left_on="squadScoreId", 331 | right_on="id", 332 | how="outer", 333 | suffixes=("", "_right") 334 | ) 335 | 336 | # pivot kpi values 337 | scores = pd.pivot_table( 338 | scores, 339 | values="value", 340 | index=["iterationId", "squadId"], 341 | columns="name", 342 | aggfunc="sum", 343 | fill_value=0, 344 | dropna=False 345 | ).reset_index() 346 | 347 | # inner join with matches played 348 | scores = pd.merge( 349 | scores, 350 | matches, 351 | left_on="squadId", 352 | right_on="squadId", 353 | how="inner", 354 | suffixes=("", "_right") 355 | ) 356 | 357 | # merge with other data 358 | scores = scores.merge( 359 | iterations[["id", "competitionId", "competitionName", "competitionType", "season"]], 360 | left_on="iterationId", 361 | right_on="id", 362 | how="left", 363 | suffixes=("", "_right") 364 | ).merge( 365 | squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename( 366 | columns={"id": "squadId", "name": "squadName"} 367 | ), 368 | left_on="squadId", 369 | right_on="squadId", 370 | how="left", 371 | suffixes=("", "_right") 372 | ) 373 | 374 | # remove NA rows 375 | averages = scores[scores.iterationId.notnull()] 376 | 377 | # fix column types 378 | averages["matches"] = averages["matches"].astype("Int64") 379 | averages["iterationId"] = averages["iterationId"].astype("Int64") 380 | averages["squadId"] = averages["squadId"].astype("Int64") 381 | averages["wyscoutId"] = averages["wyscoutId"].astype("Int64") 382 | averages["heimSpielId"] = averages["heimSpielId"].astype("Int64") 383 | averages["skillCornerId"] = averages["skillCornerId"].astype("Int64") 384 | 385 | # define column order 386 | order = [ 387 | "iterationId", 388 | "competitionName", 389 | "season", 390 | "squadId", 391 | "wyscoutId", 392 | "heimSpielId", 393 | "skillCornerId", 394 | "squadName", 395 | "matches" 396 | ] 397 | 398 | # add scoreNames to order 399 | order = order + scores_definitions.name.to_list() 400 | 401 | # select columns 402 | averages = averages[order] 403 | 404 | # return result 405 | return averages -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup 3 | 4 | with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), "README.md")) as f: 5 | README = f.read() 6 | 7 | setup( 8 | # Needed to silence warnings (and to be a worthwhile package) 9 | name="impectPy", 10 | url="https://github.com/ImpectAPI/impectPy", 11 | author="Impect", 12 | author_email="info@impect.com", 13 | # Needed to actually package something 14 | packages=["impectPy"], 15 | # Needed for dependencies 16 | install_requires=["requests>=2.24.0", 17 | "pandas>=2.0.0", 18 | "numpy>=1.24.2,<2.0"], 19 | # *strongly* suggested for sharing 20 | version="2.5.2", 21 | # The license can be anything you like 22 | license="MIT", 23 | description="A Python package to facilitate interaction with the Impect customer API", 24 | long_description=README, 25 | long_description_content_type="text/markdown", 26 | ) --------------------------------------------------------------------------------