├── .gitignore
├── LICENSE.md
├── NEWS.md
├── README.md
├── examples
    └── predict_matches.ipynb
├── impectPy
    ├── __init__.py
    ├── access_token.py
    ├── config.py
    ├── events.py
    ├── helpers.py
    ├── impect.py
    ├── iteration_averages.py
    ├── iterations.py
    ├── match_info.py
    ├── matches.py
    ├── matchsums.py
    ├── player_profile_scores.py
    ├── player_scores.py
    ├── set_pieces.py
    ├── squad_coefficients.py
    ├── squad_ratings.py
    ├── squad_scores.py
    └── xml.py
└── setup.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/windows,macos,pycharm+all,python,flask
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=windows,macos,pycharm+all,python,flask
  3 | 
  4 | ### Flask ###
  5 | instance/*
  6 | !instance/.gitignore
  7 | .webassets-cache
  8 | .env
  9 | 
 10 | ### Flask.Python Stack ###
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .nox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | *.py,cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | cover/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | db.sqlite3-journal
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | .pybuilder/
 85 | target/
 86 | 
 87 | # Jupyter Notebook
 88 | .ipynb_checkpoints
 89 | 
 90 | # IPython
 91 | profile_default/
 92 | ipython_config.py
 93 | 
 94 | # pyenv
 95 | #   For a library or package, you might want to ignore these files since the code is
 96 | #   intended to run in multiple environments; otherwise, check them in:
 97 | # .python-version
 98 | 
 99 | # pipenv
100 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
101 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
102 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
103 | #   install all needed dependencies.
104 | #Pipfile.lock
105 | 
106 | # poetry
107 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
108 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
109 | #   commonly ignored for libraries.
110 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
111 | #poetry.lock
112 | 
113 | # pdm
114 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
115 | #pdm.lock
116 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
117 | #   in version control.
118 | #   https://pdm.fming.dev/#use-with-ide
119 | .pdm.toml
120 | 
121 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
122 | __pypackages__/
123 | 
124 | # Celery stuff
125 | celerybeat-schedule
126 | celerybeat.pid
127 | 
128 | # SageMath parsed files
129 | *.sage.py
130 | 
131 | # Environments
132 | .venv
133 | venv-testpypi
134 | venv-pypi
135 | env/
136 | venv/
137 | ENV/
138 | env.bak/
139 | venv.bak/
140 | 
141 | # Spyder project settings
142 | .spyderproject
143 | .spyproject
144 | 
145 | # Rope project settings
146 | .ropeproject
147 | 
148 | # mkdocs documentation
149 | /site
150 | 
151 | # mypy
152 | .mypy_cache/
153 | .dmypy.json
154 | dmypy.json
155 | 
156 | # Pyre type checker
157 | .pyre/
158 | 
159 | # pytype static type analyzer
160 | .pytype/
161 | 
162 | # Cython debug symbols
163 | cython_debug/
164 | 
165 | # PyCharm
166 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
167 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
168 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
169 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
170 | #.idea/
171 | 
172 | ### macOS ###
173 | # General
174 | .DS_Store
175 | .AppleDouble
176 | .LSOverride
177 | 
178 | # Icon must end with two \r
179 | Icon
180 | 
181 | 
182 | # Thumbnails
183 | ._*
184 | 
185 | # Files that might appear in the root of a volume
186 | .DocumentRevisions-V100
187 | .fseventsd
188 | .Spotlight-V100
189 | .TemporaryItems
190 | .Trashes
191 | .VolumeIcon.icns
192 | .com.apple.timemachine.donotpresent
193 | 
194 | # Directories potentially created on remote AFP share
195 | .AppleDB
196 | .AppleDesktop
197 | Network Trash Folder
198 | Temporary Items
199 | .apdisk
200 | 
201 | ### macOS Patch ###
202 | # iCloud generated files
203 | *.icloud
204 | 
205 | ### PyCharm+all ###
206 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
207 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
208 | 
209 | # User-specific stuff
210 | .idea/**/workspace.xml
211 | .idea/**/tasks.xml
212 | .idea/**/usage.statistics.xml
213 | .idea/**/dictionaries
214 | .idea/**/shelf
215 | 
216 | # AWS User-specific
217 | .idea/**/aws.xml
218 | 
219 | # Generated files
220 | .idea/**/contentModel.xml
221 | 
222 | # Sensitive or high-churn files
223 | .idea/**/dataSources/
224 | .idea/**/dataSources.ids
225 | .idea/**/dataSources.local.xml
226 | .idea/**/sqlDataSources.xml
227 | .idea/**/dynamic.xml
228 | .idea/**/uiDesigner.xml
229 | .idea/**/dbnavigator.xml
230 | 
231 | # Gradle
232 | .idea/**/gradle.xml
233 | .idea/**/libraries
234 | 
235 | # Gradle and Maven with auto-import
236 | # When using Gradle or Maven with auto-import, you should exclude module files,
237 | # since they will be recreated, and may cause churn.  Uncomment if using
238 | # auto-import.
239 | # .idea/artifacts
240 | # .idea/compiler.xml
241 | # .idea/jarRepositories.xml
242 | # .idea/modules.xml
243 | # .idea/*.iml
244 | # .idea/modules
245 | # *.iml
246 | # *.ipr
247 | 
248 | # CMake
249 | cmake-build-*/
250 | 
251 | # Mongo Explorer plugin
252 | .idea/**/mongoSettings.xml
253 | 
254 | # File-based project format
255 | *.iws
256 | 
257 | # IntelliJ
258 | out/
259 | 
260 | # mpeltonen/sbt-idea plugin
261 | .idea_modules/
262 | 
263 | # JIRA plugin
264 | atlassian-ide-plugin.xml
265 | 
266 | # Cursive Clojure plugin
267 | .idea/replstate.xml
268 | 
269 | # SonarLint plugin
270 | .idea/sonarlint/
271 | 
272 | # Crashlytics plugin (for Android Studio and IntelliJ)
273 | com_crashlytics_export_strings.xml
274 | crashlytics.properties
275 | crashlytics-build.properties
276 | fabric.properties
277 | 
278 | # Editor-based Rest Client
279 | .idea/httpRequests
280 | 
281 | # Android studio 3.1+ serialized cache file
282 | .idea/caches/build_file_checksums.ser
283 | 
284 | ### PyCharm+all Patch ###
285 | # Ignore everything but code style settings and run configurations
286 | # that are supposed to be shared within teams.
287 | 
288 | .idea/*
289 | 
290 | !.idea/codeStyles
291 | !.idea/runConfigurations
292 | 
293 | ### Python ###
294 | # Byte-compiled / optimized / DLL files
295 | 
296 | # C extensions
297 | 
298 | # Distribution / packaging
299 | 
300 | # PyInstaller
301 | #  Usually these files are written by a python script from a template
302 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
303 | 
304 | # Installer logs
305 | 
306 | # Unit test / coverage reports
307 | 
308 | # Translations
309 | 
310 | # Django stuff:
311 | 
312 | # Flask stuff:
313 | 
314 | # Scrapy stuff:
315 | 
316 | # Sphinx documentation
317 | 
318 | # PyBuilder
319 | 
320 | # Jupyter Notebook
321 | 
322 | # IPython
323 | 
324 | # pyenv
325 | #   For a library or package, you might want to ignore these files since the code is
326 | #   intended to run in multiple environments; otherwise, check them in:
327 | # .python-version
328 | 
329 | # pipenv
330 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
331 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
332 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
333 | #   install all needed dependencies.
334 | 
335 | # poetry
336 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
337 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
338 | #   commonly ignored for libraries.
339 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
340 | 
341 | # pdm
342 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
343 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
344 | #   in version control.
345 | #   https://pdm.fming.dev/#use-with-ide
346 | 
347 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
348 | 
349 | # Celery stuff
350 | 
351 | # SageMath parsed files
352 | 
353 | # Environments
354 | 
355 | # Spyder project settings
356 | 
357 | # Rope project settings
358 | 
359 | # mkdocs documentation
360 | 
361 | # mypy
362 | 
363 | # Pyre type checker
364 | 
365 | # pytype static type analyzer
366 | 
367 | # Cython debug symbols
368 | 
369 | # PyCharm
370 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
371 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
372 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
373 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
374 | 
375 | ### Python Patch ###
376 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
377 | poetry.toml
378 | 
379 | # ruff
380 | .ruff_cache/
381 | 
382 | # LSP config files
383 | pyrightconfig.json
384 | 
385 | ### Windows ###
386 | # Windows thumbnail cache files
387 | Thumbs.db
388 | Thumbs.db:encryptable
389 | ehthumbs.db
390 | ehthumbs_vista.db
391 | 
392 | # Dump file
393 | *.stackdump
394 | 
395 | # Folder config file
396 | [Dd]esktop.ini
397 | 
398 | # Recycle Bin used on file shares
399 | $RECYCLE.BIN/
400 | 
401 | # Windows Installer files
402 | *.cab
403 | *.msi
404 | *.msix
405 | *.msm
406 | *.msp
407 | 
408 | # Windows shortcuts
409 | *.lnk
410 | 
411 | # End of https://www.toptal.com/developers/gitignore/api/windows,macos,pycharm+all,python,flask
412 | 
413 | # Tests
414 | tests/
415 | 
416 | # Maintenance checklist
417 | maintenance_checklist.md


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
 1 | # MIT License
 2 | 
 3 | Copyright (c) 2023 impectPy authors
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/NEWS.md:
--------------------------------------------------------------------------------
  1 | # impectPy 2.5.2
  2 | 
  3 | ## Minor Changes
  4 | * fix bugs in the following functions that occured if the coaches endpoint returned coaches for the iteration but the match does not have any coaches:
  5 |   * `getEvents()`
  6 |   * `getPlayerMatchSums()`
  7 |   * `getSquadMatchSums()`
  8 |   * `getPlayerMatchScores()`
  9 |   * `getSquadMatchScores()`
 10 | 
 11 | # impectPy 2.5.1
 12 | 
 13 | ## Minor Changes
 14 | * fix bugs in the following functions that occured if either the coaches endpoint returned no coaches or the coaches endpoint being blacklisted for the user:
 15 |   * `getEvents()`
 16 |   * `getPlayerMatchSums()`
 17 |   * `getSquadMatchSums()`
 18 |   * `getPlayerMatchScores()`
 19 |   * `getSquadMatchScores()`
 20 | 
 21 | # impectPy 2.5.0
 22 | 
 23 | ## Major Changes
 24 | * Use new endpoints to drastically improve performance of `getPlayerMatchScores()` and `getPlayerIterationScores()`. The argument `positions` is no longer required. If it is not spplied the function defaults to the new endpoints and returns all unique player-position-squad combinations.
 25 | * Add coaches ot the following functions:
 26 |   * `getEvents()`
 27 |   * `getPlayerMatchSums()`
 28 |   * `getSquadMatchSums()`
 29 |   * `getPlayerMatchScores()`
 30 |   * `getSquadMatchScores()`
 31 | * Add function `getSquadCoefficients()` to retrieve detailed model coefficients to enable match predictions
 32 | 
 33 | ## Minor Changes
 34 | * Fix error in `getPlayerIterationAverages()` regarding type conversions
 35 | * Use `NA` as fill value instead of 0 for score related functions
 36 | * Minor fixes to enable PyPi submission
 37 | * Improve error handling
 38 | 
 39 | # impectPy 2.4.5
 40 | 
 41 | ## Minor Changes
 42 | * fix bug in `getPlayerIterationAverages()`function
 43 | 
 44 | # impectPy 2.4.4
 45 | 
 46 | ## Major Changes
 47 | * Rename function `generateSportsCodeXML()` to `generateXML()`
 48 | * Add proper xml structure to the `generateXML()` function for Python versions >= 3.9
 49 | * Significantly improve customization options for new `generateXML()` function with new function arguments
 50 |   * `kpis`:Customize KPIs included
 51 |   * `lables`: Customize labels included
 52 |   * `codeTag`: Customize code tag selection
 53 |   * `labelSorting`: Enable/Disable label sorting
 54 | 
 55 | ## Minor Changes
 56 | * fix bug in `getEvents()` that prevented the column `duelPlayerName`from being populated correctly
 57 | 
 58 | # impectPy 2.4.3
 59 | 
 60 | ## Minor Changes
 61 | * Add FIFA Country Name to the following functions
 62 |   * `getIterations()`
 63 |   * `getPlayerMatchsums()`
 64 |   * `getPlayerIterationAverages()`
 65 |   * `getPlayerMatchScores()`
 66 |   * `getPlayerIterationScores()`
 67 |   * `getPlayerProfileScores()`
 68 | * Fix bug in `getStartingPositions()` that resulted from players not having a shirt number assigned
 69 | 
 70 | # impectPy 2.4.2
 71 | 
 72 | ## Minor Changes
 73 | * Improvements to `getSubstitutions()` to handle matches where one team did not substitute any players
 74 | * Significant performance improvements to `getPlayerIterationAverages()`
 75 | 
 76 | # impectPy 2.4.1
 77 | 
 78 | ## Minor Changes
 79 | * Fix error in `getEvents()` that prevented set piece data from properly being joined to event data
 80 | * Fix error in `getSubstitutions()` & `getStartingPositions()` that caused an error when players switched shirt numbers
 81 | 
 82 | # impectPy 2.4.0
 83 | 
 84 | ## Major Changes
 85 | * Add function `getFormations()` to retrieve squad formations on match level
 86 | * Add function `getStartingPositions()` to retrieve squad starting positions on match level
 87 | * Add function `getSubstitutions()` to retrieve squad substitutions on match level
 88 | 
 89 | ## Minor changes
 90 | * Add IMPECT class to enable object-oriented API usage and improve performance
 91 | * Add new arguments to `getSportCcodeXML()` to enable more customization options for the generated XML:
 92 |   * Disable sequencing
 93 |   * Disable KPI buckets
 94 | 
 95 | # impectPy 2.3.1
 96 | 
 97 | ## Major Changes
 98 | * Add function `getSquadRatings()` to retrieve squad ratings
 99 | 
100 | ## Minor changes
101 | * Add attribute `inferredSetPiece`to `getEvents()` function
102 | * Add ID mappings to other providers (HeimSpiel, SkillCorner, Wyscout) to several functions
103 | * Fix bug in `getSquadMatchScores()` that occured if the home team did not have a player at the given position
104 | 
105 | # impectPy 2.3.0
106 | 
107 | ## Major changes
108 | * Add new `getSetPieces()` function
109 | * Add set piece data to `getEvents()`
110 | * Add arguments to `getEvents()` function that control the addition of KPIs and set piece data to the events dataframe
111 | 
112 | ## Minor changes
113 | * Fix error in `getEvents()` for matches without any tagged duels
114 | * Use raw string notation when using regex to clean column names
115 | * Add EventId to XML generation
116 | * Fix error in `getPlayerIterationScores()`, `getPlayerIterationScores()` & `getPlayerProfileScores()` when no records are returned for given combination of match/iteration and position
117 | 
118 | # impectPy 2.2.0
119 | 
120 | ## Major changes
121 | * add new functions to query the new customer API endpoints that provide ratios & scores
122 | 
123 | ## Minor changes
124 | * switch from German country name to FIFA country name
125 | * Update to readme structure
126 | 
127 | # impectPy 2.1.0
128 | 
129 | ## Major changes
130 | * add new attributes from dataVersion V4 to `getEvents()`
131 | 
132 | ## Minor changes
133 | * add some of the new dataVersion V4 attributes to `generateSportsCodeXML()`
134 | * fix labels of periods in `generateSportsCodeXML()` to better support MatchTracker integration
135 | 
136 | # impectPy 2.0.6
137 | 
138 | ## Minor changes
139 | * add new label to player phase of xml export: team
140 | 
141 | # impectPy 2.0.4
142 | 
143 | # impectPy 2.0.5
144 | 
145 | ## Minor changes
146 | * add more player master data to `getPlayerMatchsums()` and `getPlayerIterationAverages()`
147 | * fix issue with several functions that occurred with pandas version 2.1 or newer
148 | * fix minor consistency issue in code for `generateSportsCodeXML()`
149 | * edit naming of kickoff events in `generateSportsCodeXML()` to properly support SBG MatchTracker
150 | 
151 | # impectPy 2.0.4
152 | 
153 | ## Minor changes
154 | * fix bug in `getSquadMatchsums()` and `getPlayerMatchsums()` caused by duplicates
155 | * fix bug in `getMatches()` function caused by addition of wyscoutIds
156 | * 
157 | * improve error handling for functions that use match ids as input
158 | * improve error handling for `getMatches()` function
159 | * add `playDuration` on player level to `getSquadMatchsums()`, `getPlayerMatchsums()`, `getPlayerIterationAverages()` and `getSquadIterationAverages()`
160 | * fix bug in `getEvents()`, `getSquadMatchsums()`, `getPlayerMatchsums()`, `getPlayerIterationAverages()` and `getSquadIterationAverages()` that was caused by the addition of several new keys to the KPI endpoint
161 | 
162 | # impectPy 2.0.3
163 | 
164 | ## Minor changes
165 | * fix bug in `getEvents()` function caused by querying data for multiple iterations of the same competition
166 | 
167 | # impectPy 2.0.2
168 | 
169 | ## Minor changes
170 | * fix bug in `getPlayerIterationAverages()` function caused by user access rights
171 | * fix bug in `getIterations()` function caused by addition of wyscoutIds
172 | * fix bug in `getMatches()` function caused by addition of wyscoutIds
173 | 
174 | # impectPy 2.0.1
175 | 
176 | ## Minor changes
177 | * fix bug in `getSquadIterationAverages()` function
178 | * fix bug in `getEvents()` function
179 | * fix bug in `generateSportsCodeXML()` function
180 | * fix bug in `getPlayerMatchsums()` function
181 | * add sorting by id to `getIterations()` function
182 | * add sorting by id to `getMatches()` function
183 | * fix function argument name in readMe
184 | 
185 | # impectPy 2.0.0
186 | 
187 | ## Major changes
188 | * Modify package to support the IMPECT API V5 instead of V4
189 | * Add `getPlayerIterationAverages()` function
190 | * Add `getSquadIterationAverages()` function
191 | 
192 | ## Minor changes
193 | * Fix error in readme sample code
194 | * raise exception for wrong `matches` argument input type in several functions
195 | 
196 | # impectPy 1.0.3
197 | 
198 | ## Minor changes
199 | * fix bug in `generateSportsCodeXML()` that did not filter out events of action type 'NO_VIDEO_AVAILABLE', 'FINAL_WHISTLE' or 'REFEREE_INTERCEPTION' correctly
200 | * fix bug in `generateSportsCodeXML()` that caused certain kickoffs to be missing
201 | 
202 | # impectPy 1.0.2
203 | 
204 | ## Minor changes
205 | * add features and KPIs to `generateSportsCodeXML()` function, finalize initial built for IMPECT portals  
206 | 
207 | # impectPy 1.0.1
208 | 
209 | ## Minor improvements and bug fixes
210 | * Fix issue in `getAccessToken()` with certain characters in password
211 | 
212 | # impectPy 1.0.0
213 | 
214 | ## Major changes
215 | * Release package
216 | 
217 | ## Minor changes
218 | * implement retry on HTTP response codes other than 200
219 | 
220 | # impectPy 0.1.1
221 | 
222 | ## Minor improvements and bug fixes
223 | * renamed `generateXML()` to `generateSportsCodeXML()`
224 | * Minor bug fixes in `generateSportsCodeXML()`
225 | 
226 | # impectPy 0.1
227 | 
228 | ## Major changes
229 | * Added basic package build
230 | * Added `getAccessToken()` function
231 | * Added `getCompetitions()` function
232 | * Added `getMatchplan()` function
233 | * Added `getEventData()` function
234 | * Added `getMatchsums()` function
235 | 
236 | ## Minor improvements and bug fixes
237 | * Added a `NEWS.md` file to track changes to the package
238 | * Added `README.md`
239 | * Added `LICENSE.md`


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # impectPy <picture><source media="(prefers-color-scheme: dark)" srcset="https://github.com/ImpectAPI/logos/blob/main/impectPy_white.svg"><source media="(prefers-color-scheme: light)" srcset="https://github.com/ImpectAPI/logos/blob/main/impectPy_black.svg"><img alt="ImpectPy Logo" src="https://github.com/ImpectAPI/logos/blob/main/impectPy_black.svg" align="right" height="40"></picture>
  2 | 
  3 | A package provided by: Impect GmbH
  4 | 
  5 | Version: v2.5.2
  6 | 
  7 | **Updated: October 20th 2025**
  8 | 
  9 | ---
 10 | 
 11 | **Supported API Version: V5**
 12 | For older versions, please see list below:
 13 | 
 14 | - API V4: https://github.com/ImpectAPI/impectPy/tree/v1.0.3
 15 | - API V3: not supported by this package
 16 | 
 17 | ---
 18 | 
 19 | ## Introduction
 20 | 
 21 | The goal of the impectPy package is to provide an easy way for Impect
 22 | Customers to access data from the customer API. This API includes basic
 23 | information about competitions, competition iterations, and matches as
 24 | well as event data and aggregated scorings per player and position on
 25 | match and season level.
 26 | 
 27 | ## Installation
 28 | 
 29 | You can install the latest version of impectPy from PyPi with:
 30 | 
 31 | ```cmd
 32 | pip install impectPy
 33 | ```
 34 | 
 35 | You can also install it from [GitHub](https://github.com/) with:
 36 | 
 37 | ```cmd
 38 | pip install git+https://github.com/ImpectAPI/impectPy.git@v2.5.2
 39 | ```
 40 | 
 41 | ## Usage
 42 | 
 43 | ### Getting started
 44 | 
 45 | Before accessing any data via our API, you will need to request a bearer
 46 | token for authorization. You can get this authorization token using the
 47 | following code snippet:
 48 | 
 49 | ```python
 50 | import impectPy as ip
 51 | import pandas as pd
 52 | 
 53 | # define login credentials
 54 | username = "yourUsername"
 55 | password = "yourPassword"
 56 | 
 57 | # get access token
 58 | token = ip.getAccessToken(username=username, password=password)
 59 | ```
 60 | 
 61 | This access token is a requirement to use any of the functions that
 62 | requests data from the API. We recommend to first get a list of
 63 | competition iterations that are enabled for your account.
 64 | 
 65 | ### Retrieve Basic Information
 66 | 
 67 | ```python
 68 | # get list of iterations
 69 | iterations = ip.getIterations(token=token)
 70 | 
 71 | # print iterations to console
 72 | iterations
 73 | ```
 74 | 
 75 | If any iteration you were expected to see is not listed, please contact
 76 | your sales representative. Now let’s assume you are interested in data
 77 | for 2022/23 season of the 1. Bundesliga (iteration = 518). The following
 78 | snippet gets you a list of matches for this iteration:
 79 | 
 80 | ```python
 81 | # get matches for iteration
 82 | matchplan = ip.getMatches(iteration=518, token=token)
 83 | 
 84 | # print matches to console
 85 | matchplan
 86 | ```
 87 | 
 88 | The column `available` denotes whether a given match has been tagged by Impect
 89 | and the data is available to you.
 90 | 
 91 | ### Retrieve Match Level Data
 92 | 
 93 | Let's assume you are interested in the FC Bayern München vs Borussia Dortmund game 
 94 | from April 1st 2023 (matchId = 84344) and want to retrieve event level data as well 
 95 | as team formation, starting position and substitution data. As the functions allows 
 96 | for multiple games to be requested at once, we need to wrap the matchId into a list. 
 97 | Hence, to request data for this game, run the following code snippet:
 98 | 
 99 | ```python
100 | # define matches to get event data for
101 | matches = [84344]
102 | 
103 | # get event data for matches
104 | events = ip.getEvents(
105 |     matches=matches, 
106 |     token=token,
107 |     include_kpis=True,
108 |     include_set_pieces=True
109 | )
110 | 
111 | # get match info
112 | formations = ip.getFormations(matches, token)
113 | substitutions = ip.getSubstitutions(matches, token)
114 | starting_positions = ip.getStartingPositions(matches, token)
115 | 
116 | # print first few rows from events dataframe to console
117 | events.head()
118 | ```
119 | 
120 | You can access the aggregated scores per player and position or per
121 | squad for this match in a similar way. You can also find more detailed data
122 | around set piece situations within our API.
123 | Also, we provide you with IMPECT scores and ratios that you might know from our 
124 | Scouting and Analysis portals. On player level, these are calculated across 
125 | positions which is why you have to supply the function with a list of positions 
126 | your want to retrieve data for:
127 | 
128 | ```python
129 | # define matches to get further data for
130 | matches = [84344]
131 | 
132 | # get set piece data including KPI aggregates
133 | setPieces = ip.getSetPieces(matches=matches, token=token)
134 | 
135 | # get kpi matchsums for match per player and position
136 | playerMatchsums = ip.getPlayerMatchsums(matches=matches, token=token)
137 | 
138 | # get kpi matchsums for match per squad
139 | squadMatchsums = ip.getSquadMatchsums(matches=matches, token=token)
140 | 
141 | # define positions to get scores aggregated by
142 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"]
143 | 
144 | # get player scores and ratios for match and positions per player
145 | playerMatchScores = ip.getPlayerMatchScores(
146 |     matches=matches,
147 |     positions=positions,
148 |     token=token
149 | )
150 | 
151 | # get squad scores and ratios for match per squad
152 | squadMatchScores = ip.getSquadMatchScores(matches=matches, token=token)
153 | ```
154 | 
155 | In case you wish to retrieve data for multiple matches, we suggest using
156 | the following method to do so in order to minimize the amount of
157 | requests sent to the API. Let’s also get the event data for the RB
158 | Leipzig vs FSV Mainz 05 game (matchId = 84350) from the same day:
159 | 
160 | ```python
161 | # define list of matches
162 | matches = [84344, 84350]
163 | 
164 | # apply getEvents function to a set of matchIds
165 | events = ip.getEvents(
166 |     matches=matches, 
167 |     token=token,
168 |     include_kpis=True,
169 |     include_set_pieces=True
170 | )
171 | 
172 | # get set piece data including KPI aggregates
173 | setPieces = ip.getSetPieces(matches=matches, token=token)
174 | 
175 | # get matchsums for matches per player and position
176 | playerMatchsums = ip.getPlayerMatchsums(matches=matches, token=token)
177 | 
178 | # get matchsums for matches per squad
179 | squadMatchsums = ip.getSquadMatchsums(matches=matches, token=token)
180 | 
181 | # define positions to get scores aggregated by
182 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"]
183 | 
184 | # get player scores and ratios for match and positions per player
185 | playerMatchScores = ip.getPlayerMatchScores(
186 |     matches=matches,
187 |     token=token,
188 |     positions=positions  # optional
189 | )
190 | 
191 | # get squad scores and ratios for match per squad
192 | squadMatchScores = ip.getSquadMatchScores(matches=matches, token=token)
193 | ```
194 | 
195 | ### Retrieve Iteration Level Data
196 | 
197 | Starting from API version V5, we also offer an endpoint to get KPI average values
198 | per iteration on player as well as squad level. These averages are calculated by
199 | dividing the kpi sum of all individual matches by the sum of matchShares the player
200 | accumulated at a given position. On a team level we divide the score by the
201 | amount of matches played by the team.
202 | Also, we provide you with IMPECT scores and ratios that you might know from our 
203 | Scouting and Analysis portals. On player level, these are calculated across 
204 | positions which is why you have to supply the function with a list of positions 
205 | your want to retrieve data for.
206 | Let's assume you were interested in wing backs in the 2022/2023 Bundesliga season, 
207 | then you could use this code snippet:
208 | 
209 | ```python
210 | # define iteration ID
211 | iteration = 518
212 | 
213 | # define positions to get scores aggregated by
214 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"]
215 | 
216 | # get player kpi averages for iteration
217 | playerIterationAverages = ip.getPlayerIterationAverages(
218 |     iteration=iteration,
219 |     token=token
220 | )
221 | 
222 | # get squad kpi averages for iteration
223 | squadIterationAverages = ip.getSquadIterationAverages(
224 |     iteration=iteration,
225 |     token=token
226 | )
227 | 
228 | # get player scores and ratios for iteration and positions
229 | playerIterationScores = ip.getPlayerIterationScores(
230 |     iteration=iteration,
231 |     token=token,
232 |     positions=positions  # optional
233 | )
234 | 
235 | # get squad scores and ratios for iteration
236 | squadIterationScores = ip.getSquadIterationScores(
237 |     iteration=iteration,
238 |     token=token
239 | )
240 | ```
241 | 
242 | The squad rating values that you can find on the league ranking in the Scouting portal can 
243 | also be retrieved from the API. In addition, we also provide you with the more detailed squad 
244 | coefficients that can be used to make match predictions. See [this example script](https://github.com/ImpectAPI/impectPy/blob/release/examples/predict_matches.ipynb) 
245 | for further details. 
246 | 
247 | ```python
248 | # get squad rating for iteration
249 | squadRatings = ip.getSquadRatings(iteration=iteration, token=token)
250 | 
251 | # get squad coefficients for iteration
252 | squadCoefficients = ip.getSquadCoefficients(iteration=iteration, token=token)
253 | ```
254 | 
255 | You can now also retrieve the positional profile scores for players via our API. This 
256 | includes profiles that you created through the scouting portal. The function requires a 
257 | positional input that determines which matchShares to consider when computing the scores. 
258 | In the below example, all matchShares that a player played as either a left back or a right 
259 | back are included for profile score calculation.
260 | 
261 | ```python
262 | # define iteration ID
263 | iteration = 518
264 | 
265 | # define positions to get scores aggregated by
266 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"]
267 | 
268 | # get player profile scores
269 | playerProfileScores = ip.getPlayerProfileScores(
270 |     iteration=iteration,
271 |     positions=positions,
272 |     token=token
273 | )
274 | ```
275 | 
276 | Please keep in mind that Impect enforces a rate limit of 10 requests per second
277 | per user. A token bucket logic has been implemented to restrict the amount of API
278 | calls made on the client side already. The rate limit is read from the first limit
279 | policy sent back by the API, so if this limit increases over time, this package will
280 | act accordingly.
281 | 
282 | ### SportsCodeXML
283 | 
284 | It is also possible to convert a dataframe containing event data into an XML file,
285 | that can be imported into Videotools such as FOCUS. The XML can be customized to a certain
286 | degree using the following in put variables:
287 | * `codeTag`: Customize code tag selection (Choose what goes into the `code` tag)
288 | * `lables`: Customize labels included (provide a list of labels to be included)
289 | * `kpis`: Customize KPIs included (provide a list of KPIs to be included)
290 | * `labelSorting`: Enable/Disable label sorting (Labels and KPIs are usually prefixed with a sorting number (e.g. `01 | `) or the word `KPI: ` to enable easier filtering in your video tool.)
291 | * `sequencing`: Disable sequencing (A sequence of `RECEPTION > DRIBBLE > PASS` is split into 3 instances: `RECEPTION`, `DRIBBLE`, `PASS`)
292 | * `buckets`: Disable Label/KPI buckets (e.g. conversion from value `0.1` to bucket `[0,1[`)
293 | 
294 | To see a full list of available codeTags, labels, KPIs and allowed combinations of these,
295 | please see the beginning of the [function definition](https://github.com/ImpectAPI/impectPy/blob/release/impectPy/xml.py).
296 | 
297 | Please make sure to only retrieve event data for
298 | one game at a time. Let's use the Bayern vs Dortmund game from earlier as an example:
299 | 
300 | ```python
301 | # define matchId
302 | matches = [84344]
303 | 
304 | # get event data for matchId
305 | events = ip.getEvents(matches=matches, token=token)
306 | 
307 | # define lead and lag time in seconds
308 | lead = 3
309 | lag = 3
310 | 
311 | # define period start offsets from video start in seconds
312 | p1Start = 16 # first half kickoff happens after 16 seconds in your video file
313 | p2Start = 48 * 60 + 53 # first half kickoff happens after 48 minutes and 53 seconds in your video file
314 | p3Start = 0 # set to timestamp of the kickoff of the first half of extra time
315 | p4Start = 0 # set to timestamp of the kickoff of the second half of extra time
316 | p5Start = 0 # set to timestamp of the first penalty of the penalty shootout
317 | 
318 | # generate xml
319 | xml_tree = ip.generateXML(
320 |     events=events,
321 |     lead=lead,
322 |     lag=lag,
323 |     p1Start=p1Start,
324 |     p2Start=p2Start,
325 |     p3Start=p3Start,
326 |     p4Start=p4Start,
327 |     p5Start=p5Start,
328 |     codeTag="playerName",  # Use the playerName for the Code Tag
329 |     labels=["action", "opponents"],  # defaults to None to inlcude all available labels
330 |     kpis=["BYPASSED_OPPONENTS", "BYPASSED_DEFENDERS"],  # defaults to None to inlcude all available KPIs
331 |     labelSorting=False,  # Disable sorting prefixes
332 |     sequencing=False,  # Disable merging of consecutive events by the same player into one sequence
333 |     buckets=False  # Use precise KPI and label values instead of predefined buckets
334 | )
335 | 
336 | # write to xml file
337 | with open(f"match{matches[0]}_"
338 |           # add home team name
339 |           f"{events.homeSquadName.unique().tolist()[0].replace(' ', '_')}"
340 |           f"_vs_"
341 |           # add away team name
342 |           f"{events.awaySquadName.unique().tolist()[0].replace(' ', '_')}"
343 |           f".xml",
344 |           "wb") as file:
345 |     xml_tree.write(file,
346 |                    xml_declaration=True,
347 |                    encoding='utf-8',
348 |                    method="xml")
349 | ```
350 | 
351 | ## Object-Oriented Package Version
352 | 
353 | Since version 2.4.0, there is another way to call the familiar functions in a more object-oriented way. 
354 | An object of the class "Impect" can be used to query the API. This new object offers a slightly enhanced 
355 | performance and stores your token as an object attribute. This means you no longer have to include it in 
356 | every function call. This new IMPECT object can be used as shown in the example below:
357 | 
358 | ```python
359 | from impectPy import Impect
360 | 
361 | # define login credentials
362 | username = "yourUsername"
363 | password = "yourPassword"
364 | 
365 | # create Impect instance and login
366 | api = Impect()
367 | api.login(username, password)
368 | 
369 | # define iteration ID
370 | iteration = 518
371 | 
372 | # define matchId
373 | matches = [84344]
374 | 
375 | # define positions to get scores/profiles aggregated by
376 | positions = ["LEFT_WINGBACK_DEFENDER", "RIGHT_WINGBACK_DEFENDER"]
377 | 
378 | # get iterations
379 | iterations = api.getIterations()
380 | 
381 | # get squad ratings
382 | ratings = api.getSquadRatings(iteration=iteration)
383 | 
384 | # get squad coefficients
385 | coefficients = api.getSquadCoefficients(iteration=iteration)
386 | 
387 | # get matches
388 | matchplan = api.getMatches(iteration=iteration)
389 | 
390 | # get match info
391 | formations = api.getFormations(matches=matches)
392 | substitutions = api.getSubstitutions(matches=matches)
393 | startingPositions = api.getStartingPositions(matches=matches)
394 | 
395 | # get match events
396 | events = api.getEvents(matches=matches, include_kpis=False, include_set_pieces=False)
397 | 
398 | # get set pieces
399 | set_pieces = api.getSetPieces(matches=matches)
400 | 
401 | # get player iteration averages
402 | playerIterationAverages = api.getPlayerIterationAverages(iteration=iteration)
403 | 
404 | # get player matchsums
405 | playerMatchsums = api.getPlayerMatchsums(matches=matches)
406 | 
407 | # get squad iteration averages
408 | squadIterationAverages = api.getSquadIterationAverages(iteration=iteration)
409 | 
410 | # get squad matchsums
411 | squadMatchsums = api.getSquadMatchsums(matches=matches)
412 | 
413 | # get player match scores
414 | playerMatchScores = api.getPlayerMatchScores(matches=matches, positions=positions)  # specific positions
415 | playerMatchScoresAll = api.getPlayerMatchScores(matches=matches)  # all positions
416 | 
417 | # get squad match scores
418 | squadMatchScores = api.getSquadMatchScores(matches=matches)
419 | 
420 | # get player iteration scores
421 | playerIterationScores = api.getPlayerIterationScores(iteration=iteration, positions=positions)  # specific positions
422 | playerIterationScoresAll = api.getPlayerIterationScores(iteration=iteration)  # all positions
423 | 
424 | # get squad iteration scores
425 | squadIterationScores = api.getSquadIterationScores(iteration=iteration)
426 | 
427 | # get player profile scores
428 | playerProfileScores = api.getPlayerProfileScores(iteration=iteration, positions=positions)
429 | ```
430 | 
431 | ## Final Notes
432 | 
433 | Further documentation on the data and explanations of variables can be
434 | found in our [Glossary](https://glossary.impect.com/).


--------------------------------------------------------------------------------
/examples/predict_matches.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# load packages\n",
 10 |     "import impectPy\n",
 11 |     "import numpy as np\n",
 12 |     "import pandas as pd"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "# Set login credentials\n",
 22 |     "username = \"yourUsername\"\n",
 23 |     "password = \"yourPassword\"\n",
 24 |     "\n",
 25 |     "# create Impect instance and login\n",
 26 |     "api = impectPy.Impect()\n",
 27 |     "api.login(username=username, password=password)"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": null,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# set iterationId\n",
 37 |     "iteration = 1385\n",
 38 |     "\n",
 39 |     "# fetch matches for iteration\n",
 40 |     "matches = api.getMatches(iteration=iteration)\n",
 41 |     "\n",
 42 |     "# show matches dataframe\n",
 43 |     "matches.head()"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "# fetch prediction model coefficients\n",
 53 |     "coefficients = api.getSquadCoefficients(iteration=iteration)\n",
 54 |     "\n",
 55 |     "# show coefficients\n",
 56 |     "coefficients.head()"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "# prepare columns for merging\n",
 66 |     "matches[\"date\"] = pd.to_datetime(matches[\"scheduledDate\"]).dt.tz_localize(None).dt.normalize()\n",
 67 |     "matches[\"homeSquadId\"] = matches[\"homeSquadId\"].astype(\"int64\")\n",
 68 |     "matches[\"awaySquadId\"] = matches[\"awaySquadId\"].astype(\"int64\")\n",
 69 |     "coefficients[\"date\"] = pd.to_datetime(coefficients[\"date\"]).dt.normalize()\n",
 70 |     "coefficients[\"squadId\"] = coefficients[\"squadId\"].astype(\"int64\")\n",
 71 |     "\n",
 72 |     "# sort by date\n",
 73 |     "matches = matches.sort_values(\"date\")\n",
 74 |     "coefficients = coefficients.sort_values(\"date\")\n",
 75 |     "\n",
 76 |     "# merge competition-specific coefficients using the most recent date\n",
 77 |     "matches = pd.merge_asof(\n",
 78 |     "    matches,\n",
 79 |     "    coefficients[\n",
 80 |     "        [\"date\", \"interceptCoefficient\", \"homeCoefficient\", \"competitionCoefficient\"]\n",
 81 |     "    ].drop_duplicates(\"date\"),\n",
 82 |     "    on=\"date\",\n",
 83 |     "    direction=\"backward\"\n",
 84 |     ")\n",
 85 |     "\n",
 86 |     "# merge squad-specific coefficients using the most recent date\n",
 87 |     "def get_squad_coeffs(row, coeff_df, squad_id_col) -> pd.Series:\n",
 88 |     "    squad_id = row[squad_id_col]\n",
 89 |     "    match_date = row[\"date\"]\n",
 90 |     "    squad_coeffs = coeff_df[(coeff_df[\"squadId\"] == squad_id) & (coeff_df[\"date\"] <= match_date)]\n",
 91 |     "    if len(squad_coeffs) > 0:\n",
 92 |     "        latest = squad_coeffs.sort_values(\"date\").iloc[-1]\n",
 93 |     "        return pd.Series({\"attack\": latest[\"attackCoefficient\"], \"defense\": latest[\"defenseCoefficient\"]})\n",
 94 |     "    return pd.Series({\"attack\": None, \"defense\": None})\n",
 95 |     "\n",
 96 |     "# merge homeSquad coefficients\n",
 97 |     "home_coeffs = matches.apply(lambda row: get_squad_coeffs(row, coefficients, \"homeSquadId\"), axis=1)\n",
 98 |     "matches[\"attackCoefficientHome\"] = home_coeffs[\"attack\"]\n",
 99 |     "matches[\"defenseCoefficientHome\"] = home_coeffs[\"defense\"]\n",
100 |     "\n",
101 |     "# merge awaySquad coefficients\n",
102 |     "away_coeffs = matches.apply(lambda row: get_squad_coeffs(row, coefficients, \"awaySquadId\"), axis=1)\n",
103 |     "matches[\"attackCoefficientAway\"] = away_coeffs[\"attack\"]\n",
104 |     "matches[\"defenseCoefficientAway\"] = away_coeffs[\"defense\"]\n",
105 |     "\n",
106 |     "# show new matches dataframe\n",
107 |     "matches.head()"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": [
116 |     "# compute predictions\n",
117 |     "matches[\"predHome\"] = np.exp(\n",
118 |     "    matches[\"interceptCoefficient\"] +\n",
119 |     "    matches[\"homeCoefficient\"] +\n",
120 |     "    matches[\"competitionCoefficient\"] +\n",
121 |     "    matches[\"attackCoefficientHome\"] +\n",
122 |     "    matches[\"defenseCoefficientAway\"]\n",
123 |     ")\n",
124 |     "matches[\"predAway\"] = np.exp(\n",
125 |     "    matches[\"interceptCoefficient\"] +\n",
126 |     "    matches[\"competitionCoefficient\"] +\n",
127 |     "    matches[\"attackCoefficientAway\"] +\n",
128 |     "    matches[\"defenseCoefficientHome\"]\n",
129 |     ")\n",
130 |     "\n",
131 |     "# show matches including predictions\n",
132 |     "matches.head()"
133 |    ]
134 |   }
135 |  ],
136 |  "metadata": {
137 |   "kernelspec": {
138 |    "display_name": "venv",
139 |    "language": "python",
140 |    "name": "python3"
141 |   },
142 |   "language_info": {
143 |    "codemirror_mode": {
144 |     "name": "ipython",
145 |     "version": 3
146 |    },
147 |    "file_extension": ".py",
148 |    "mimetype": "text/x-python",
149 |    "name": "python",
150 |    "nbconvert_exporter": "python",
151 |    "pygments_lexer": "ipython3",
152 |    "version": "3.11.7"
153 |   }
154 |  },
155 |  "nbformat": 4,
156 |  "nbformat_minor": 4
157 | }
158 | 


--------------------------------------------------------------------------------
/impectPy/__init__.py:
--------------------------------------------------------------------------------
 1 | # define version attribute
 2 | __version__ = "2.5.2"
 3 | 
 4 | # import modules
 5 | from .access_token import getAccessToken
 6 | from .iterations import getIterations
 7 | from .matches import getMatches
 8 | from .events import getEvents
 9 | from .matchsums import getPlayerMatchsums, getSquadMatchsums
10 | from .iteration_averages import getPlayerIterationAverages, getSquadIterationAverages
11 | from .player_scores import getPlayerMatchScores, getPlayerIterationScores
12 | from .squad_scores import getSquadMatchScores, getSquadIterationScores
13 | from .player_profile_scores import getPlayerProfileScores
14 | from .xml import generateXML
15 | from .set_pieces import getSetPieces
16 | from .squad_ratings import getSquadRatings
17 | from .squad_coefficients import getSquadCoefficients
18 | from .match_info import getFormations, getSubstitutions, getStartingPositions
19 | from .config import Config as Config
20 | from .impect import Impect as Impect


--------------------------------------------------------------------------------
/impectPy/access_token.py:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | import urllib
 3 | import requests
 4 | from impectPy.helpers import RateLimitedAPI
 5 | 
 6 | ######
 7 | #
 8 | # This function returns an access token for the external API
 9 | #
10 | ######
11 | 
12 | 
13 | # define function
14 | def getAccessToken(username: str, password: str, session: requests.Session = requests.Session()) -> str:
15 | 
16 |     # create an instance of RateLimitedAPI
17 |     connection = RateLimitedAPI(session)
18 | 
19 |     return getAccessTokenFromUrl(username, password, connection, "https://login.impect.com/auth/realms/production/protocol/openid-connect/token")
20 | 
21 | def getAccessTokenFromUrl(username: str, password: str, connection: RateLimitedAPI, token_url: str) -> str:
22 | 
23 |     # define request parameters
24 |     login = 'client_id=api&grant_type=password&username=' + urllib.parse.quote(
25 |         username) + '&password=' + urllib.parse.quote(password)
26 | 
27 |     # define request headers
28 |     connection.session.headers.update({"body": login, "Content-Type": "application/x-www-form-urlencoded"})
29 | 
30 |     # request access token
31 |     response = connection.make_api_request(url=token_url, method="POST", data=login)
32 | 
33 |     # remove headers again
34 |     connection.session.headers.clear()
35 | 
36 |     # get access token from response and return it
37 |     token = response.json()["access_token"]
38 |     return token


--------------------------------------------------------------------------------
/impectPy/config.py:
--------------------------------------------------------------------------------
1 | class Config(object):
2 |     def __init__(self, host: str = 'https://api.impect.com', oidc_token_endpoint: str = 'https://login.impect.com/auth/realms/production/protocol/openid-connect/token'):
3 |         self.HOST = host
4 |         self.OIDC_TOKEN_ENDPOINT = oidc_token_endpoint


--------------------------------------------------------------------------------
/impectPy/events.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import numpy as np
  3 | import pandas as pd
  4 | import requests
  5 | import re
  6 | from impectPy.helpers import RateLimitedAPI, ForbiddenError
  7 | from .matches import getMatchesFromHost
  8 | from .iterations import getIterationsFromHost
  9 | 
 10 | ######
 11 | #
 12 | # This function returns a pandas dataframe that contains all events for a
 13 | # given match
 14 | #
 15 | ######
 16 | 
 17 | 
 18 | def getEvents(
 19 |         matches: list, token: str, include_kpis: bool = True,
 20 |         include_set_pieces: bool = True, session: requests.Session = requests.Session()
 21 | ) -> pd.DataFrame:
 22 | 
 23 |     # create an instance of RateLimitedAPI
 24 |     connection = RateLimitedAPI(session)
 25 | 
 26 |     # construct header with access token
 27 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 28 | 
 29 |     return getEventsFromHost(matches, include_kpis, include_set_pieces, connection, "https://api.impect.com")
 30 | 
 31 | # define function
 32 | def getEventsFromHost(
 33 |         matches: list, include_kpis: bool, include_set_pieces: bool, connection: RateLimitedAPI, host: str
 34 | ) -> pd.DataFrame:
 35 | 
 36 |     # check input for matches argument
 37 |     if not isinstance(matches, list):
 38 |         raise Exception("Argument 'matches' must be a list of integers.")
 39 | 
 40 |     # get match info
 41 |     match_data = pd.concat(
 42 |         map(lambda match: connection.make_api_request_limited(
 43 |             url=f"{host}/v5/customerapi/matches/{match}",
 44 |             method="GET"
 45 |         ).process_response(
 46 |             endpoint="Match Info"
 47 |         ),
 48 |             matches),
 49 |         ignore_index=True)
 50 | 
 51 |     # filter for matches that are unavailable
 52 |     fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
 53 | 
 54 |     # drop matches that are unavailable from list of matches
 55 |     matches = [match for match in matches if match not in fail_matches]
 56 | 
 57 |     # raise exception if no matches remaining or report removed matches
 58 |     if len(fail_matches) > 0:
 59 |         if len(matches) == 0:
 60 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
 61 |         else:
 62 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
 63 | 
 64 |     # extract iterationIds
 65 |     iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique())
 66 | 
 67 |     # get match events
 68 |     events = pd.concat(
 69 |         map(lambda match: connection.make_api_request_limited(
 70 |             url=f"{host}/v5/customerapi/matches/{match}/events",
 71 |             method="GET"
 72 |         ).process_response(
 73 |             endpoint="Events"
 74 |         ).assign(
 75 |             matchId=match
 76 |         ),
 77 |             matches),
 78 |         ignore_index=True)
 79 | 
 80 |     # account for matches without dribbles, duels or opponents tagged
 81 |     attributes = [
 82 |         "dribbleDistance",
 83 |         "dribbleType",
 84 |         "dribbleResult",
 85 |         "dribblePlayerId",
 86 |         "duelDuelType",
 87 |         "duelPlayerId",
 88 |         "opponentCoordinatesX",
 89 |         "opponentCoordinatesY",
 90 |         "opponentAdjCoordinatesX",
 91 |         "opponentAdjCoordinatesY"
 92 |     ]
 93 | 
 94 |     # add attribute if it doesn't exist in df
 95 |     for attribute in attributes:
 96 |         if attribute not in events.columns:
 97 |             events[attribute] = np.nan
 98 | 
 99 |     # get players
100 |     players = pd.concat(
101 |         map(lambda iteration: connection.make_api_request_limited(
102 |             url=f"{host}/v5/customerapi/iterations/{iteration}/players",
103 |             method="GET"
104 |         ).process_response(
105 |             endpoint="Players"
106 |         ),
107 |             iterations),
108 |         ignore_index=True)[["id", "commonname"]].drop_duplicates()
109 | 
110 |     # get squads
111 |     squads = pd.concat(
112 |         map(lambda iteration: connection.make_api_request_limited(
113 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
114 |             method="GET"
115 |         ).process_response(
116 |             endpoint="Squads"
117 |         ),
118 |             iterations),
119 |         ignore_index=True)[["id", "name"]].drop_duplicates()
120 | 
121 |     # get coaches
122 |     coaches_blacklisted = False
123 |     try:
124 |         coaches = pd.concat(
125 |             map(lambda iteration: connection.make_api_request_limited(
126 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/coaches",
127 |                 method="GET"
128 |             ).process_response(
129 |                 endpoint="Coaches",
130 |                 raise_exception=False
131 |             ),
132 |                 iterations),
133 |             ignore_index=True)[["id", "name"]].drop_duplicates()
134 |     except KeyError:
135 |         # no coaches found, create empty df
136 |         coaches = pd.DataFrame(columns=["id", "name"])
137 |     except ForbiddenError:
138 |         coaches_blacklisted = True
139 | 
140 |     # get matches
141 |     matchplan = pd.concat(
142 |         map(lambda iteration: getMatchesFromHost(
143 |             iteration=iteration,
144 |             connection=connection,
145 |             host=host
146 |         ),
147 |             iterations),
148 |         ignore_index=True)
149 | 
150 |     # get iterations
151 |     iterations = getIterationsFromHost(connection=connection, host=host)
152 | 
153 |     if include_kpis:
154 |         # get event scorings
155 |         scorings = pd.concat(
156 |             map(lambda match: connection.make_api_request_limited(
157 |                 url=f"{host}/v5/customerapi/matches/{match}/event-kpis",
158 |                 method="GET"
159 |             ).process_response(
160 |                 endpoint="Scorings"
161 |             ),
162 |                 matches),
163 |             ignore_index=True)
164 | 
165 |         # get kpis
166 |         kpis = connection.make_api_request_limited(
167 |             url=f"{host}/v5/customerapi/kpis/event",
168 |             method="GET"
169 |         ).process_response(
170 |             endpoint="EventKPIs"
171 |         )[["id", "name"]]
172 | 
173 |     if include_set_pieces:
174 |         # get set piece data
175 |         set_pieces = pd.concat(
176 |             map(lambda match: connection.make_api_request_limited(
177 |                 url=f"{host}/v5/customerapi/matches/{match}/set-pieces",
178 |                 method="GET"
179 |             ).process_response(
180 |                 endpoint="Set-Pieces"
181 |             ),
182 |                 matches),
183 |             ignore_index=True
184 |         ).rename(
185 |             columns={"id": "setPieceId"}
186 |         ).explode("setPieceSubPhase", ignore_index=True)
187 | 
188 |         # unpack setPieceSubPhase column
189 |         set_pieces = pd.concat(
190 |             [
191 |                 set_pieces.drop(columns=["setPieceSubPhase"]),
192 |                 pd.json_normalize(set_pieces["setPieceSubPhase"]).add_prefix("setPieceSubPhase.")
193 |             ],
194 |             axis=1
195 |         ).rename(columns=lambda x: re.sub(r"\.(.)", lambda y: y.group(1).upper(), x))
196 | 
197 |     # fix potential typing issues
198 |     events.pressingPlayerId = events.pressingPlayerId.astype("Int64")
199 |     events.fouledPlayerId = events.fouledPlayerId.astype("Int64")
200 |     events.passReceiverPlayerId = events.passReceiverPlayerId.astype("Int64")
201 |     events.duelPlayerId = events.duelPlayerId.astype("Int64")
202 |     events.fouledPlayerId = events.fouledPlayerId.astype("Int64")
203 |     if include_set_pieces:
204 |         set_pieces.setPieceSubPhaseMainEventPlayerId = set_pieces.setPieceSubPhaseMainEventPlayerId.astype("Int64")
205 |         set_pieces.setPieceSubPhaseFirstTouchPlayerId = set_pieces.setPieceSubPhaseFirstTouchPlayerId.astype("Int64")
206 |         set_pieces.setPieceSubPhaseSecondTouchPlayerId = set_pieces.setPieceSubPhaseSecondTouchPlayerId.astype("Int64")
207 | 
208 |     # start merging dfs
209 | 
210 |     # merge events with secondary data
211 |     events = events.merge(
212 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}),
213 |         left_on="squadId",
214 |         right_on="squadId",
215 |         how="left",
216 |         suffixes=("", "_home")
217 |     ).merge(
218 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "currentAttackingSquadName"}),
219 |         left_on="currentAttackingSquadId",
220 |         right_on="squadId",
221 |         how="left",
222 |         suffixes=("", "_away")
223 |     ).merge(
224 |         players[["id", "commonname"]].rename(columns={"id": "playerId", "commonname": "playerName"}),
225 |         left_on="playerId",
226 |         right_on="playerId",
227 |         how="left",
228 |         suffixes=("", "_right")
229 |     ).merge(
230 |         players[["id", "commonname"]].rename(
231 |             columns={"id": "pressingPlayerId", "commonname": "pressingPlayerName"}),
232 |         left_on="pressingPlayerId",
233 |         right_on="pressingPlayerId",
234 |         how="left",
235 |         suffixes=("", "_right")
236 |     ).merge(
237 |         players[["id", "commonname"]].rename(columns={"id": "fouledPlayerId", "commonname": "fouledPlayerName"}),
238 |         left_on="fouledPlayerId",
239 |         right_on="fouledPlayerId",
240 |         how="left",
241 |         suffixes=("", "_right")
242 |     ).merge(
243 |         players[["id", "commonname"]].rename(columns={"id": "duelPlayerId", "commonname": "duelPlayerName"}),
244 |         left_on="duelPlayerId",
245 |         right_on="duelPlayerId",
246 |         how="left",
247 |         suffixes=("", "_right")
248 |     ).merge(
249 |         players[["id", "commonname"]].rename(
250 |             columns={"id": "passReceiverPlayerId", "commonname": "passReceiverPlayerName"}),
251 |         left_on="passReceiverPlayerId",
252 |         right_on="passReceiverPlayerId",
253 |         how="left",
254 |         suffixes=("", "_right")
255 |     ).merge(
256 |         players[["id", "commonname"]].rename(
257 |             columns={"id": "dribbleOpponentPlayerId", "commonname": "dribbleOpponentPlayerName"}),
258 |         left_on="dribblePlayerId",
259 |         right_on="dribbleOpponentPlayerId",
260 |         how="left",
261 |         suffixes=("", "_right")
262 |     ).merge(
263 |         matchplan,
264 |         left_on="matchId",
265 |         right_on="id",
266 |         how="left",
267 |         suffixes=("", "_right")
268 |     ).merge(
269 |         match_data[["id", "squadHomeCoachId", "squadAwayCoachId"]].rename(
270 |             columns={"squadHomeCoachId": "homeSquadCoachId", "squadAwayCoachId": "awaySquadCoachId"}),
271 |         left_on="matchId",
272 |         right_on="id",
273 |         how="left",
274 |         suffixes=("", "_right")
275 |     ).merge(
276 |         iterations,
277 |         left_on="iterationId",
278 |         right_on="id",
279 |         how="left",
280 |         suffixes=("", "_right")
281 |     )
282 | 
283 |     if not coaches_blacklisted:
284 | 
285 |         # convert coachId to integer if it is None
286 |         events["homeSquadCoachId"] = events["homeSquadCoachId"].astype("Int64")
287 |         events["awaySquadCoachId"] = events["awaySquadCoachId"].astype("Int64")
288 |         events = events.merge(
289 |             coaches[["id", "name"]].rename(columns={"id": "homeCoachId", "name": "homeCoachName"}),
290 |             left_on="homeSquadCoachId",
291 |             right_on="homeCoachId",
292 |             how="left",
293 |             suffixes=("", "_right")
294 |         ).merge(
295 |             coaches[["id", "name"]].rename(columns={"id": "awayCoachId", "name": "awayCoachName"}),
296 |             left_on="awaySquadCoachId",
297 |             right_on="awayCoachId",
298 |             how="left",
299 |             suffixes=("", "_right")
300 |         )
301 | 
302 |     if include_kpis:
303 |         # unnest scorings and full join with kpi list to ensure all kpis are present
304 |         scorings = scorings.merge(kpis, left_on="kpiId", right_on="id", how="outer") \
305 |             .sort_values("kpiId") \
306 |             .drop("kpiId", axis=1) \
307 |             .fillna({"eventId": "", "position": "", "playerId": ""}) \
308 |             .pivot_table(index=["eventId", "position", "playerId"], columns="name", values="value", aggfunc="sum",
309 |                          fill_value=None) \
310 |             .reset_index() \
311 |             .loc[lambda df: df["eventId"].notna()]
312 | 
313 |         # Replace empty strings with None in the eventId and playerId column
314 |         scorings["eventId"] = scorings["eventId"].mask(scorings["eventId"] == "", None)
315 |         scorings["playerId"] = scorings["playerId"].mask(scorings["playerId"] == "", None)
316 |         events["playerId"] = events["playerId"].mask(events["playerId"] == "", None)
317 | 
318 |         # Convert column eventId from float to int
319 |         scorings["eventId"] = scorings["eventId"].astype(pd.Int64Dtype())
320 |         scorings["playerId"] = scorings["playerId"].astype(pd.Int64Dtype())
321 |         events["playerId"] = events["playerId"].astype(pd.Int64Dtype())
322 | 
323 |         # merge events and scorings
324 |         events = events.merge(scorings,
325 |                               left_on=["playerPosition", "playerId", "id"],
326 |                               right_on=["position", "playerId", "eventId"],
327 |                               how="left",
328 |                               suffixes=("", "_scorings"))
329 | 
330 |     if include_set_pieces:
331 |         events = events.merge(
332 |             set_pieces,
333 |             left_on=["setPieceId", "setPieceSubPhaseId"],
334 |             right_on=["setPieceId", "setPieceSubPhaseId"],
335 |             how="left",
336 |             suffixes=("", "_right")
337 |         ).merge(
338 |             players[["id", "commonname"]].rename(
339 |                 columns={
340 |                     "id": "setPieceSubPhaseMainEventPlayerId",
341 |                     "commonname": "setPieceSubPhaseMainEventPlayerName"
342 |                 }
343 |             ),
344 |             left_on="setPieceSubPhaseMainEventPlayerId",
345 |             right_on="setPieceSubPhaseMainEventPlayerId",
346 |             how="left",
347 |             suffixes=("", "_right")
348 |         ).merge(
349 |             players[["id", "commonname"]].rename(
350 |                 columns={
351 |                     "id": "setPieceSubPhasePassReceiverId",
352 |                     "commonname": "setPieceSubPhasePassReceiverName"
353 |                 }
354 |             ),
355 |             left_on="setPieceSubPhasePassReceiverId",
356 |             right_on="setPieceSubPhasePassReceiverId",
357 |             how="left",
358 |             suffixes=("", "_right")
359 |         ).merge(
360 |             players[["id", "commonname"]].rename(
361 |                 columns={
362 |                     "id": "setPieceSubPhaseFirstTouchPlayerId",
363 |                     "commonname": "setPieceSubPhaseFirstTouchPlayerName"
364 |                 }
365 |             ),
366 |             left_on="setPieceSubPhaseFirstTouchPlayerId",
367 |             right_on="setPieceSubPhaseFirstTouchPlayerId",
368 |             how="left",
369 |             suffixes=("", "_right")
370 |         ).merge(
371 |             players[["id", "commonname"]].rename(
372 |                 columns={
373 |                     "id": "setPieceSubPhaseSecondTouchPlayerId",
374 |                     "commonname": "setPieceSubPhaseSecondTouchPlayerName"
375 |                 }
376 |             ),
377 |             left_on="setPieceSubPhaseSecondTouchPlayerId",
378 |             right_on="setPieceSubPhaseSecondTouchPlayerId",
379 |             how="left",
380 |             suffixes=("", "_right")
381 |         )
382 | 
383 |     # rename some columns
384 |     events = events.rename(columns={
385 |         "currentAttackingSquadId": "attackingSquadId",
386 |         "currentAttackingSquadName": "attackingSquadName",
387 |         "duelDuelType": "duelType",
388 |         "scheduledDate": "dateTime",
389 |         "gameTimeGameTime": "gameTime",
390 |         "gameTimeGameTimeInSec": "gameTimeInSec",
391 |         "eventId": "eventId_scorings",
392 |         "id": "eventId",
393 |         "index": "eventNumber",
394 |         "phaseIndex": "setPiecePhaseIndex",
395 |         "setPieceMainEvent": "setPieceSubPhaseMainEvent",
396 |     })
397 | 
398 |     # define desired column order
399 |     event_cols = [
400 |         "matchId",
401 |         "dateTime",
402 |         "competitionId",
403 |         "competitionName",
404 |         "competitionType",
405 |         "iterationId",
406 |         "season",
407 |         "matchDayIndex",
408 |         "matchDayName",
409 |         "homeSquadId",
410 |         "homeSquadName",
411 |         "homeSquadCountryId",
412 |         "homeSquadCountryName",
413 |         "homeCoachId",
414 |         "homeCoachName",
415 |         "homeSquadType",
416 |         "awaySquadId",
417 |         "awaySquadName",
418 |         "awaySquadCountryId",
419 |         "awaySquadCountryName",
420 |         "awaySquadType",
421 |         "awayCoachId",
422 |         "awayCoachName",
423 |         "eventId",
424 |         "eventNumber",
425 |         "sequenceIndex",
426 |         "periodId",
427 |         "gameTime",
428 |         "gameTimeInSec",
429 |         "duration",
430 |         "squadId",
431 |         "squadName",
432 |         "attackingSquadId",
433 |         "attackingSquadName",
434 |         "phase",
435 |         "playerId",
436 |         "playerName",
437 |         "playerPosition",
438 |         "playerPositionSide",
439 |         "actionType",
440 |         "action",
441 |         "bodyPart",
442 |         "bodyPartExtended",
443 |         "previousPassHeight",
444 |         "result",
445 |         "startCoordinatesX",
446 |         "startCoordinatesY",
447 |         "startAdjCoordinatesX",
448 |         "startAdjCoordinatesY",
449 |         "startPackingZone",
450 |         "startPitchPosition",
451 |         "startLane",
452 |         "endCoordinatesX",
453 |         "endCoordinatesY",
454 |         "endAdjCoordinatesX",
455 |         "endAdjCoordinatesY",
456 |         "endPackingZone",
457 |         "endPitchPosition",
458 |         "endLane",
459 |         "opponents",
460 |         "pressure",
461 |         "distanceToGoal",
462 |         "pxTTeam",
463 |         "pxTOpponent",
464 |         "pressingPlayerId",
465 |         "pressingPlayerName",
466 |         "distanceToOpponent",
467 |         "opponentCoordinatesX",
468 |         "opponentCoordinatesY",
469 |         "opponentAdjCoordinatesX",
470 |         "opponentAdjCoordinatesY",
471 |         "passReceiverType",
472 |         "passReceiverPlayerId",
473 |         "passReceiverPlayerName",
474 |         "passDistance",
475 |         "passAngle",
476 |         "dribbleDistance",
477 |         "dribbleType",
478 |         "dribbleResult",
479 |         "dribbleOpponentPlayerId",
480 |         "dribbleOpponentPlayerName",
481 |         "shotDistance",
482 |         "shotAngle",
483 |         "shotTargetPointY",
484 |         "shotTargetPointZ",
485 |         "shotWoodwork",
486 |         "shotGkCoordinatesX",
487 |         "shotGkCoordinatesY",
488 |         "shotGkAdjCoordinatesX",
489 |         "shotGkAdjCoordinatesY",
490 |         "shotGkDivePointY",
491 |         "shotGkDivePointZ",
492 |         "duelType",
493 |         "duelPlayerId",
494 |         "duelPlayerName",
495 |         "fouledPlayerId",
496 |         "fouledPlayerName",
497 |         "formationTeam",
498 |         "formationOpponent",
499 |         "inferredSetPiece",
500 |     ]
501 | 
502 |     set_piece_cols = [
503 |         "setPieceId",
504 |         "setPiecePhaseIndex",
505 |         "setPieceCategory",
506 |         "adjSetPieceCategory",
507 |         "setPieceExecutionType",
508 |         "setPieceSubPhaseId",
509 |         "setPieceSubPhaseIndex",
510 |         "setPieceSubPhaseStartZone",
511 |         "setPieceSubPhaseCornerEndZone",
512 |         "setPieceSubPhaseCornerType",
513 |         "setPieceSubPhaseFreeKickEndZone",
514 |         "setPieceSubPhaseFreeKickType",
515 |         "setPieceSubPhaseMainEvent",
516 |         "setPieceSubPhaseMainEventPlayerId",
517 |         "setPieceSubPhaseMainEventPlayerName",
518 |         "setPieceSubPhaseMainEventOutcome",
519 |         "setPieceSubPhasePassReceiverId",
520 |         "setPieceSubPhasePassReceiverName",
521 |         "setPieceSubPhaseFirstTouchPlayerId",
522 |         "setPieceSubPhaseFirstTouchPlayerName",
523 |         "setPieceSubPhaseFirstTouchWon",
524 |         "setPieceSubPhaseIndirectHeader",
525 |         "setPieceSubPhaseSecondTouchPlayerId",
526 |         "setPieceSubPhaseSecondTouchPlayerName",
527 |         "setPieceSubPhaseSecondTouchWon",
528 |     ]
529 | 
530 |     # add columns that might not exist in previous data versions
531 |     for col in event_cols:
532 |         if col not in events.columns:
533 |             events[col] = np.nan
534 | 
535 |     # create order
536 |     order = event_cols
537 | 
538 |     if include_set_pieces:
539 |         # add kpis
540 |         order = order + set_piece_cols
541 | 
542 |     if include_kpis:
543 |         # get list of kpi columns
544 |         kpi_cols = kpis["name"].tolist()
545 | 
546 |         # add kpis
547 |         order = order + kpi_cols
548 | 
549 |     if coaches_blacklisted:
550 |         order = [col for col in order if col not in ["homeCoachId", "homeCoachName", "awayCoachId", "awayCoachName"]]
551 | 
552 |     # reorder data
553 |     events = events[order]
554 | 
555 |     # reorder rows
556 |     events = events.sort_values(["matchId", "eventNumber"])
557 | 
558 |     # return events
559 |     return events


--------------------------------------------------------------------------------
/impectPy/helpers.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import numpy as np
  3 | import requests
  4 | import time
  5 | import pandas as pd
  6 | import re
  7 | from typing import Optional, Dict, Any
  8 | import math
  9 | 
 10 | 
 11 | ######
 12 | #
 13 | # This class creates an object to handle rate-limited API requests
 14 | #
 15 | ######
 16 | 
 17 | 
 18 | class ForbiddenError(Exception):
 19 |     """Raised when the API returns a 403 Forbidden response."""
 20 |     pass
 21 | 
 22 | 
 23 | class RateLimitedAPI:
 24 |     def __init__(self, session: Optional[requests.Session] = None):
 25 |         """
 26 |         Initializes a RateLimitedAPI object.
 27 | 
 28 |         Args:
 29 |             session (requests.Session): The session object to use for the API calls.
 30 |         """
 31 |         self.session = session or requests.Session()  # use the provided session or create a new session
 32 |         self.bucket = None  # TokenBucket object to manage rate limit tokens
 33 | 
 34 |     # make a rate-limited API request
 35 |     def make_api_request_limited(
 36 |             self, url: str, method: str, data: Optional[Dict[str, str]] = None
 37 |     ) -> requests.Response:
 38 |         """
 39 |         Executes an API call while applying the rate limit.
 40 | 
 41 |         Returns:
 42 |             requests.Response: The response returned by the API.
 43 |         """
 44 | 
 45 |         # check if bucket is not initialized
 46 |         if not self.bucket:
 47 |             # make an initial API call to get rate limit information
 48 |             response = self.make_api_request(url=url, method=method, data=data)
 49 | 
 50 |             # get rate limit policy
 51 |             policy = response.headers["RateLimit-Policy"]
 52 | 
 53 |             # extract maximum requests using regex
 54 |             capacity = int(re.sub(";.*", "", policy))
 55 | 
 56 |             # extract time window using regex
 57 |             interval = int(re.sub(".*w=(\\d+).*", "\\1", policy))
 58 | 
 59 |             # create TokenBucket
 60 |             self.bucket = TokenBucket(
 61 |                 capacity=capacity,
 62 |                 refill_after=interval,
 63 |                 remaining=int(response.headers["RateLimit-Remaining"])
 64 |             )
 65 | 
 66 |             # return response
 67 |             return response
 68 | 
 69 |         # check if a token is available
 70 |         if self.bucket.isTokenAvailable():
 71 |             # get API response
 72 |             response = self.make_api_request(url=url, method=method, data=data)
 73 | 
 74 |             # consume a token
 75 |             self.bucket.consumeToken()
 76 |         else:
 77 |             # wait for refill
 78 |             time.sleep(
 79 |                 math.ceil(
 80 |                     self.bucket.refill_after * 100 - (
 81 |                             time.time() - self.bucket.last_refill_time
 82 |                     ) * 100
 83 |                 ) / 100
 84 |             )
 85 | 
 86 |             # call function again
 87 |             response = self.make_api_request_limited(url=url, method=method, data=data)
 88 | 
 89 |         # return response
 90 |         return response
 91 | 
 92 |     def make_api_request(
 93 |             self, url: str, method: str, data: Optional[Dict[str, Any]] = None,
 94 |             max_retries: int = 3, retry_delay: int = 1
 95 |     ) -> requests.Response:
 96 |         """
 97 |         Executes an API call.
 98 | 
 99 |         Returns:
100 |             requests.Response: The response returned by the API.
101 |         """
102 |         # try API call
103 |         for i in range(max_retries):
104 |             response = self.session.request(method=method, url=url, data=data)
105 | 
106 |             # check status code and return if 200
107 |             if response.status_code == 200:
108 |                 # return response
109 |                 return response
110 |             # check status code and retry if 429
111 |             elif response.status_code == 429:
112 |                 print(f"Received status code {response.status_code} "
113 |                       f"({response.json().get('message', 'Rate Limit Exceeded')})"
114 |                       f", retrying in {retry_delay} seconds...")
115 |                 time.sleep(retry_delay)
116 |             # check status code and terminate if 401 or 403
117 |             elif response.status_code == 401:
118 |                 raise Exception(f"Received status code {response.status_code} "
119 |                                 f"(You do not have API access.)\n"
120 |                                 f"Request-ID: {response.headers['x-request-id']} "
121 |                                 f"(Make sure to include this in any support request.)")
122 |             elif response.status_code == 403:
123 |                 raise ForbiddenError(f"Received status code {response.status_code} "
124 |                                      f"(You do not have access to this resource.)\n"
125 |                                      f"Request-ID: {response.headers['x-request-id']} "
126 |                                      f"(Make sure to include this in any support request.)")
127 |             # check status code and terminate if other error
128 |             else:
129 |                 raise Exception(f"Received status code {response.status_code} "
130 |                                 f"({response.json().get('message', 'Unknown error')})\n"
131 |                                 f"Request-ID: {response.headers['x-request-id']} "
132 |                                 f"(Make sure to include this in any support request.)")
133 | 
134 | 
135 | ######
136 | #
137 | # This class creates a token bucket that handles the rate limit returned by the API accordingly
138 | #
139 | ######
140 | 
141 | 
142 | class TokenBucket:
143 |     def __init__(self, capacity: int, refill_after: int = 1, remaining: int = 0):
144 |         """
145 |         Initializes a TokenBucket object.
146 | 
147 |         Args:
148 |             capacity (int): The maximum number of tokens the bucket can hold.
149 |             refill_after (int): The time period (in seconds) after which the bucket is refilled.
150 |             remaining (int): The amount of tokens remaining at the moment of initialization.
151 |         """
152 |         self.capacity = capacity  # maximum number of tokens the bucket can hold
153 |         self.refill_after = refill_after  # time period (in seconds) after which the bucket is refilled
154 |         self.tokens = remaining  # number of tokens remaining at time of bucket creation
155 |         self.last_refill_time = time.time()  # time of the last token refill
156 | 
157 |     def addTokens(self):
158 |         """
159 |         Refills the token bucket if the refill time has elapsed.
160 |         """
161 |         now = time.time()  # current time
162 |         elapsed_time = now - self.last_refill_time  # time elapsed since the last token refill
163 |         if elapsed_time > self.refill_after:
164 |             self.tokens = self.capacity  # refill the bucket to its maximum capacity
165 |             self.last_refill_time = now  # update the last refill time to the current time
166 | 
167 |     def isTokenAvailable(self):
168 |         """
169 |         Checks if at least one token is available in the bucket.
170 | 
171 |         Returns:
172 |             bool: True if a token is available, False otherwise.
173 |         """
174 |         self.addTokens()  # ensure the token bucket is up-to-date
175 |         return self.tokens >= 1  # return True if there is at least one token, False otherwise
176 | 
177 |     def consumeToken(self):
178 |         """
179 |         Consumes a token from the bucket if available.
180 | 
181 |         Returns:
182 |             bool: True if a token was consumed successfully, False otherwise.
183 |         """
184 |         if not self.isTokenAvailable():  # if no token is available, return False
185 |             return False
186 |         self.tokens -= 1  # decrement the token count by 1
187 |         return True  # return True to indicate successful token consumption
188 | 
189 | 
190 | ######
191 | #
192 | # This function converts the response from an API call to a pandas dataframe, flattens it and fixes the column names
193 | #
194 | ######
195 | 
196 | 
197 | def process_response(self: requests.Response, endpoint: str, raise_exception: bool = True) -> pd.DataFrame:
198 |     # validate and get data from response
199 |     result = validate_response(response=self, endpoint=endpoint, raise_exception=raise_exception)
200 | 
201 |     # convert to df
202 |     result = pd.json_normalize(result)
203 | 
204 |     # fix column names using regex
205 |     result = result.rename(columns=lambda x: re.sub(r"\.(.)", lambda y: y.group(1).upper(), x))
206 | 
207 |     # return result
208 |     return result
209 | 
210 | 
211 | # attach method to requests module
212 | requests.Response.process_response = process_response
213 | 
214 | 
215 | ######
216 | #
217 | # This function unnests the idMappings key from an API response
218 | #
219 | ######
220 | 
221 | 
222 | def unnest_mappings_dict(mapping_dict: dict) -> dict:
223 |     # iterate over entry and unnest idMappings
224 |     for entry in mapping_dict:
225 |         # iterate over mappings
226 |         for mapping in entry["idMappings"]:
227 |             # get mapping data
228 |             for provider, mapping_id in mapping.items():
229 |                 # add mapping as key on iteration level
230 |                 entry[provider + "Id"] = mapping_id
231 | 
232 |     # return result
233 |     return mapping_dict
234 | 
235 | 
236 | ######
237 | #
238 | # This function unnests the idMappings key from a dataframe
239 | #
240 | ######
241 | 
242 | 
243 | def unnest_mappings_df(df: pd.DataFrame, mapping_col: str) -> pd.DataFrame:
244 |     # create empty df to store mappings
245 |     df_mappings = pd.DataFrame(columns=["wyscoutId", "heimSpielId", "skillCornerId"])
246 | 
247 |     # iterate over entry and unnest idMappings
248 |     for index, entry in df.iterrows():
249 |         # iterate over mappings
250 |         for mapping in entry[mapping_col]:
251 |             # get mapping data
252 |             for provider, mapping_ids in mapping.items():
253 |                 # fix provider name
254 |                 if provider == "heim_spiel":
255 |                     provider = "heimSpiel"
256 |                 elif provider == "skill_corner":
257 |                     provider = "skillCorner"
258 |                 elif provider == "wyscout":
259 |                     pass
260 |                 else:
261 |                     raise Exception(f"Unknown provider: {provider}")
262 | 
263 |                 # check if mapping is a dict with at least one entry
264 |                 if isinstance(mapping_ids, list):
265 |                     if len(mapping_ids) > 0:
266 |                         # add first mapping as key on iteration level
267 |                         df_mappings.loc[index, provider + "Id"] = mapping_ids[0]
268 |                 else:
269 |                     df_mappings.loc[index, provider + "Id"] = np.nan
270 | 
271 |     # merge with original df
272 |     df = pd.concat([df, df_mappings], axis=1, ignore_index=False)
273 | 
274 |     # return result
275 |     return df
276 | 
277 | 
278 | # define function to validate JSON response and return data
279 | def validate_response(response: requests.Response, endpoint: str, raise_exception: bool = True) -> dict:
280 |     # get data from response
281 |     data = response.json()["data"]
282 | 
283 |     # check if response contains data
284 |     if len(data) == 0 and raise_exception:
285 |         # raise exception
286 |         raise Exception(f"The {endpoint} endpoint returned no data/ an empty list.")
287 |     else:
288 |         # return data
289 |         return data


--------------------------------------------------------------------------------
/impectPy/impect.py:
--------------------------------------------------------------------------------
  1 | from impectPy.config import Config
  2 | from .helpers import RateLimitedAPI
  3 | from .access_token import getAccessTokenFromUrl
  4 | from .iterations import getIterationsFromHost
  5 | from .matches import getMatchesFromHost
  6 | from .events import getEventsFromHost
  7 | from .matchsums import getPlayerMatchsumsFromHost, getSquadMatchsumsFromHost
  8 | from .iteration_averages import getPlayerIterationAveragesFromHost, getSquadIterationAveragesFromHost
  9 | from .player_scores import getPlayerMatchScoresFromHost, getPlayerIterationScoresFromHost
 10 | from .squad_scores import getSquadMatchScoresFromHost, getSquadIterationScoresFromHost
 11 | from .player_profile_scores import getPlayerProfileScoresFromHost
 12 | from .xml import generateXML
 13 | from .set_pieces import getSetPiecesFromHost
 14 | from .squad_ratings import getSquadRatingsFromHost
 15 | from .squad_coefficients import getSquadCoefficientsFromHost
 16 | from .match_info import getFormationsFromHost, getSubstitutionsFromHost, getStartingPositionsFromHost
 17 | import pandas as pd
 18 | from xml.etree import ElementTree as ET
 19 | 
 20 | 
 21 | class Impect:
 22 |     def __init__(self, config: Config = Config(), connection: RateLimitedAPI = RateLimitedAPI()):
 23 |         self.__config = config
 24 |         self.connection = connection
 25 | 
 26 |     # login with username and password
 27 |     def login(self, username: str, password: str) -> str:
 28 |         self.__token = getAccessTokenFromUrl(username, password, self.connection, self.__config.OIDC_TOKEN_ENDPOINT)
 29 |         self.connection.session.headers.update({"Authorization": f"Bearer {self.__token}"})
 30 |         return self.__token
 31 | 
 32 |     # use the given token for all calls of the instance
 33 |     def init(self, token: str):
 34 |         self.__token = token
 35 |         self.connection.session.headers.update({"Authorization": f"Bearer {self.__token}"})
 36 | 
 37 |     def getIterations(self) -> pd.DataFrame:
 38 |         return getIterationsFromHost(
 39 |             self.connection, self.__config.HOST
 40 |         )
 41 | 
 42 |     def getMatches(self, iteration: int) -> pd.DataFrame:
 43 |         return getMatchesFromHost(
 44 |             iteration, self.connection, self.__config.HOST
 45 |         )
 46 | 
 47 |     def getEvents(self, matches: list, include_kpis: bool = True, include_set_pieces: bool = True) -> pd.DataFrame:
 48 |         return getEventsFromHost(
 49 |             matches, include_kpis, include_set_pieces, self.connection, self.__config.HOST
 50 |         )
 51 | 
 52 |     def getPlayerMatchsums(self, matches: list) -> pd.DataFrame:
 53 |         return getPlayerMatchsumsFromHost(
 54 |             matches, self.connection, self.__config.HOST
 55 |         )
 56 | 
 57 |     def getSquadMatchsums(self, matches: list, ) -> pd.DataFrame:
 58 |         return getSquadMatchsumsFromHost(
 59 |             matches, self.connection, self.__config.HOST
 60 |         )
 61 | 
 62 |     def getPlayerIterationAverages(self, iteration: int) -> pd.DataFrame:
 63 |         return getPlayerIterationAveragesFromHost(
 64 |             iteration, self.connection, self.__config.HOST
 65 |         )
 66 | 
 67 |     def getSquadIterationAverages(self, iteration: int) -> pd.DataFrame:
 68 |         return getSquadIterationAveragesFromHost(
 69 |             iteration, self.connection, self.__config.HOST
 70 |         )
 71 | 
 72 |     def getPlayerMatchScores(self, matches: list, positions: list = None) -> pd.DataFrame:
 73 |         return getPlayerMatchScoresFromHost(
 74 |             matches, self.connection, self.__config.HOST, positions
 75 |         )
 76 | 
 77 |     def getPlayerIterationScores(self, iteration: int, positions: list = None) -> pd.DataFrame:
 78 |         return getPlayerIterationScoresFromHost(
 79 |             iteration, self.connection, self.__config.HOST, positions
 80 |         )
 81 | 
 82 |     def getSquadMatchScores(self, matches: list) -> pd.DataFrame:
 83 |         return getSquadMatchScoresFromHost(
 84 |             matches, self.connection, self.__config.HOST
 85 |         )
 86 | 
 87 |     def getSquadIterationScores(self, iteration: int) -> pd.DataFrame:
 88 |         return getSquadIterationScoresFromHost(
 89 |             iteration, self.connection, self.__config.HOST
 90 |         )
 91 | 
 92 |     def getPlayerProfileScores(self, iteration: int, positions: list) -> pd.DataFrame:
 93 |         return getPlayerProfileScoresFromHost(
 94 |             iteration, positions, self.connection, self.__config.HOST
 95 |         )
 96 | 
 97 |     def getSetPieces(self, matches: list) -> pd.DataFrame:
 98 |         return getSetPiecesFromHost(
 99 |             matches, self.connection, self.__config.HOST
100 |         )
101 | 
102 |     def getSquadRatings(self, iteration: int) -> pd.DataFrame:
103 |         return getSquadRatingsFromHost(
104 |             iteration, self.connection, self.__config.HOST
105 |         )
106 | 
107 |     def getSquadCoefficients(self, iteration: int) -> pd.DataFrame:
108 |         return getSquadCoefficientsFromHost(
109 |             iteration, self.connection, self.__config.HOST
110 |         )
111 | 
112 |     def getFormations(self, matches: list) -> pd.DataFrame:
113 |         return getFormationsFromHost(
114 |             matches, self.connection, self.__config.HOST
115 |         )
116 | 
117 |     def getSubstitutions(self, matches: list) -> pd.DataFrame:
118 |         return getSubstitutionsFromHost(
119 |             matches, self.connection, self.__config.HOST
120 |         )
121 | 
122 |     def getStartingPositions(self, matches: list) -> pd.DataFrame:
123 |         return getStartingPositionsFromHost(
124 |             matches, self.connection, self.__config.HOST
125 |         )
126 | 
127 |     @staticmethod
128 |     def generateXML(
129 |             events: pd.DataFrame,
130 |             lead: int,
131 |             lag: int,
132 |             p1Start: int,
133 |             p2Start: int,
134 |             p3Start: int,
135 |             p4Start: int,
136 |             p5Start: int
137 |     ) -> ET.ElementTree:
138 |         return generateXML(events, lead, lag, p1Start, p2Start, p3Start, p4Start, p5Start)


--------------------------------------------------------------------------------
/impectPy/iteration_averages.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df
  5 | from .iterations import getIterationsFromHost
  6 | 
  7 | ######
  8 | #
  9 | # This function returns a pandas dataframe that contains all kpis for a
 10 | # given iteration aggregated per player and position
 11 | #
 12 | ######
 13 | 
 14 | 
 15 | def getPlayerIterationAverages(
 16 |         iteration: int, token: str, session: requests.Session = requests.Session()
 17 | ) -> pd.DataFrame:
 18 | 
 19 |     # create an instance of RateLimitedAPI
 20 |     connection = RateLimitedAPI(session)
 21 | 
 22 |     # construct header with access token
 23 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 24 | 
 25 |     return getPlayerIterationAveragesFromHost(iteration, connection, "https://api.impect.com")
 26 | 
 27 | def getPlayerIterationAveragesFromHost(
 28 |         iteration: int, connection: RateLimitedAPI, host: str
 29 | ) -> pd.DataFrame:
 30 | 
 31 |     # check input for matches argument
 32 |     if not isinstance(iteration, int):
 33 |         raise Exception("Input vor iteration argument must be an integer")
 34 | 
 35 |     # get squads
 36 |     squads = connection.make_api_request_limited(
 37 |         url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 38 |         method="GET"
 39 |     ).process_response(
 40 |         endpoint="Squads"
 41 |     )
 42 | 
 43 |     # get squadIds
 44 |     squad_ids = squads[squads.access].id.to_list()
 45 | 
 46 |     # get players
 47 |     players = connection.make_api_request_limited(
 48 |         url=f"{host}/v5/customerapi/iterations/{iteration}/players",
 49 |         method="GET"
 50 |     ).process_response(
 51 |         endpoint="Players"
 52 |     )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]]
 53 | 
 54 |     # only keep first country id for each player
 55 |     country_series = players["countryIds"].explode().groupby(level=0).first()
 56 |     players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64")
 57 |     players = players.rename(columns={"countryIds": "countryId"})
 58 | 
 59 |     # unnest mappings
 60 |     players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
 61 | 
 62 |     # get kpis
 63 |     kpis = connection.make_api_request_limited(
 64 |         url=f"{host}/v5/customerapi/kpis",
 65 |         method="GET"
 66 |     ).process_response(
 67 |         endpoint="KPIs"
 68 |     )[["id", "name"]]
 69 | 
 70 |     # get iterations
 71 |     iterations = getIterationsFromHost(connection=connection, host=host)
 72 | 
 73 |     # get country data
 74 |     countries = connection.make_api_request_limited(
 75 |         url=f"{host}/v5/customerapi/countries",
 76 |         method="GET"
 77 |     ).process_response(
 78 |         endpoint="KPIs"
 79 |     )
 80 | 
 81 |     # create empty df to store averages
 82 |     averages = pd.DataFrame()
 83 | 
 84 |     # iterate over squads
 85 |     for squad_id in squad_ids:
 86 | 
 87 |         # get player iteration averages per squad
 88 |         averages_raw = connection.make_api_request_limited(
 89 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/"
 90 |                     f"squads/{squad_id}/player-kpis",
 91 |                 method="GET"
 92 |             ).process_response(
 93 |                 endpoint="PlayerAverages"
 94 |             ).assign(
 95 |                 iterationId=iteration,
 96 |                 squadId=squad_id
 97 |             )
 98 | 
 99 |         # unnest scorings
100 |         averages_raw = averages_raw.explode("kpis").reset_index(drop=True)
101 | 
102 |         # unnest dictionary in kpis column
103 |         averages_raw = pd.concat(
104 |             [averages_raw.drop(["kpis"], axis=1), pd.json_normalize(averages_raw["kpis"])],
105 |             axis=1
106 |         )
107 | 
108 |         # merge with kpis to ensure all kpis are present
109 |         averages_raw = averages_raw.merge(
110 |             kpis,
111 |             left_on="kpiId",
112 |             right_on="id",
113 |             how="outer",
114 |             suffixes=("", "_right")
115 |         )
116 | 
117 |         # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost
118 |         if len(averages_raw["name"][averages_raw["name"].isnull()]) > 0:
119 |             averages_raw["name"] = averages_raw["name"].fillna("-1")
120 | 
121 |         # get KPIs without a scoring
122 |         mask = (
123 |                 averages_raw.iterationId.isnull()
124 |                 & averages_raw.squadId.isnull()
125 |                 & averages_raw.playerId.isnull()
126 |                 & averages_raw.position.isnull()
127 |         )
128 | 
129 |         # fill join cols with placeholder
130 |         averages_raw.loc[mask] = averages_raw.loc[mask].fillna(-1)
131 | 
132 |         # get matchShares
133 |         match_shares_raw = averages_raw[
134 |             ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"]].drop_duplicates()
135 | 
136 |         # pivot kpi values
137 |         averages_raw = pd.pivot_table(
138 |             averages_raw,
139 |             values="value",
140 |             index=["iterationId", "squadId", "playerId", "position"],
141 |             columns="name",
142 |             aggfunc="sum",
143 |             fill_value=0,
144 |             dropna=False,
145 |             observed=True,
146 |         ).reset_index()
147 | 
148 |         # drop "-1" column
149 |         if "-1" in averages_raw.columns:
150 |             averages_raw.drop(["-1"], inplace=True, axis=1)
151 | 
152 |         # drop -1 rows
153 |         averages_raw = averages_raw[
154 |             ~(averages_raw.iterationId == -1)
155 |             & ~(averages_raw.squadId == -1)
156 |             & ~(averages_raw.playerId == -1)
157 |             & ~(averages_raw.position == -1)
158 |         ]
159 | 
160 |         # merge with playDuration and matchShare
161 |         averages_raw = averages_raw.merge(
162 |             match_shares_raw,
163 |             left_on=["iterationId", "squadId", "playerId", "position"],
164 |             right_on=["iterationId", "squadId", "playerId", "position"],
165 |             how="inner",
166 |             suffixes=("", "_right")
167 |         )
168 | 
169 |         averages = pd.concat([averages, averages_raw], axis=0)
170 | 
171 |     # merge with other data
172 |     averages = averages.merge(
173 |         iterations[["id", "competitionName", "season"]],
174 |         left_on="iterationId",
175 |         right_on="id",
176 |         how="left",
177 |         suffixes=("", "_right")
178 |     ).merge(
179 |         squads[["id", "name"]].rename(
180 |             columns={"id": "squadId", "name": "squadName"}
181 |         ),
182 |         left_on="squadId",
183 |         right_on="squadId",
184 |         how="left",
185 |         suffixes=("", "_right")
186 |     ).merge(
187 |         players[[
188 |             "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname",
189 |             "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg"
190 |         ]].rename(
191 |             columns={"commonname": "playerName"}
192 |         ),
193 |         left_on="playerId",
194 |         right_on="id",
195 |         how="left",
196 |         suffixes=("", "_right")
197 |     ).merge(
198 |         countries.rename(columns={"fifaName": "playerCountry"}),
199 |         left_on="countryId",
200 |         right_on="id",
201 |         how="left",
202 |         suffixes=("", "_right")
203 |     )
204 | 
205 |     # remove NA rows
206 |     averages = averages[averages.iterationId.notnull()]
207 | 
208 |     # fix column types
209 |     averages["iterationId"] = averages["iterationId"].astype("Int64")
210 |     averages["squadId"] = averages["squadId"].astype("Int64")
211 |     averages["playerId"] = averages["playerId"].astype("Int64")
212 |     averages["wyscoutId"] = averages["wyscoutId"].astype("Int64")
213 |     averages["heimSpielId"] = averages["heimSpielId"].astype("Int64")
214 |     averages["skillCornerId"] = averages["skillCornerId"].astype("Int64")
215 | 
216 |     # define column order
217 |     order = [
218 |         "iterationId",
219 |         "competitionName",
220 |         "season",
221 |         "squadId",
222 |         "squadName",
223 |         "playerId",
224 |         "wyscoutId",
225 |         "heimSpielId",
226 |         "skillCornerId",
227 |         "playerName",
228 |         "firstname",
229 |         "lastname",
230 |         "birthdate",
231 |         "birthplace",
232 |         "playerCountry",
233 |         "leg",
234 |         "position",
235 |         "matchShare",
236 |         "playDuration"
237 |     ]
238 | 
239 |     # add kpiNames to order
240 |     order = order + kpis.name.to_list()
241 | 
242 |     # select columns
243 |     averages = averages[order]
244 | 
245 |     # return result
246 |     return averages
247 | 
248 | 
249 | ######
250 | #
251 | # This function returns a pandas dataframe that contains all kpis for a
252 | # given iteration aggregated per squad
253 | #
254 | ######
255 | def getSquadIterationAverages(
256 |         iteration: int, token: str, session: requests.Session = requests.Session()
257 |     ) -> pd.DataFrame:
258 | 
259 |     # create an instance of RateLimitedAPI
260 |     connection = RateLimitedAPI(session)
261 | 
262 |     # construct header with access token
263 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
264 | 
265 |     return getSquadIterationAveragesFromHost(iteration, connection, "https://api.impect.com")
266 | 
267 | def getSquadIterationAveragesFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
268 | 
269 |     # check input for matches argument
270 |     if not isinstance(iteration, int):
271 |         raise Exception("Input vor iteration argument must be an integer")
272 | 
273 |     # get squads
274 |     squads = connection.make_api_request_limited(
275 |         url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
276 |         method="GET"
277 |     ).process_response(
278 |         endpoint="Squads"
279 |     )[["id", "name", "idMappings"]]
280 | 
281 |     # unnest mappings
282 |     squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
283 | 
284 |     # get squad iteration averages
285 |     averages_raw = connection.make_api_request_limited(
286 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squad-kpis",
287 |             method="GET"
288 |         ).process_response(
289 |         endpoint="SquadAverages"
290 |     ).assign(iterationId=iteration)
291 | 
292 |     # get kpis
293 |     kpis = connection.make_api_request_limited(
294 |         url=f"{host}/v5/customerapi/kpis",
295 |         method="GET"
296 |     ).process_response(
297 |         endpoint="KPIs"
298 |     )[["id", "name"]]
299 | 
300 |     # get iterations
301 |     iterations = getIterationsFromHost(connection=connection, host=host)
302 | 
303 |     # get matches played
304 |     matches = averages_raw[["squadId", "matches"]].drop_duplicates()
305 | 
306 |     # unnest scorings
307 |     averages = averages_raw.explode("kpis").reset_index(drop=True)
308 | 
309 |     # unnest dictionary in kpis column
310 |     averages = pd.concat(
311 |         [averages.drop(["kpis"], axis=1), pd.json_normalize(averages["kpis"])],
312 |         axis=1
313 |     )
314 | 
315 |     # merge with kpis to ensure all kpis are present
316 |     averages = averages.merge(
317 |         kpis,
318 |         left_on="kpiId",
319 |         right_on="id",
320 |         how="outer",
321 |         suffixes=("", "_right")
322 |     )
323 | 
324 |     # pivot kpi values
325 |     averages = pd.pivot_table(
326 |         averages,
327 |         values="value",
328 |         index=["iterationId", "squadId"],
329 |         columns="name",
330 |         aggfunc="sum",
331 |         fill_value=0,
332 |         dropna=False
333 |     ).reset_index()
334 | 
335 |     # inner join with matches played
336 |     averages = pd.merge(
337 |         averages,
338 |         matches,
339 |         left_on="squadId",
340 |         right_on="squadId",
341 |         how="inner",
342 |         suffixes=("", "_right")
343 |     )
344 | 
345 |     # merge with other data
346 |     averages = averages.merge(
347 |         iterations[["id", "competitionName", "season"]],
348 |         left_on="iterationId",
349 |         right_on="id",
350 |         how="left",
351 |         suffixes=("", "_right")
352 |     ).merge(
353 |         squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename(
354 |             columns={"id": "squadId", "name": "squadName"}
355 |         ),
356 |         left_on="squadId",
357 |         right_on="squadId",
358 |         how="left",
359 |         suffixes=("", "_right")
360 |     )
361 | 
362 |     # remove NA rows
363 |     averages = averages[averages.iterationId.notnull()]
364 | 
365 |     # fix column types
366 |     averages["squadId"] = averages["squadId"].astype("Int64")
367 |     averages["matches"] = averages["matches"].astype("Int64")
368 |     averages["iterationId"] = averages["iterationId"].astype("Int64")
369 |     averages["wyscoutId"] = averages["wyscoutId"].astype("Int64")
370 |     averages["heimSpielId"] = averages["heimSpielId"].astype("Int64")
371 |     averages["skillCornerId"] = averages["skillCornerId"].astype("Int64")
372 | 
373 |     # define column order
374 |     order = [
375 |         "iterationId",
376 |         "competitionName",
377 |         "season",
378 |         "squadId",
379 |         "wyscoutId",
380 |         "heimSpielId",
381 |         "skillCornerId",
382 |         "squadName",
383 |         "matches"
384 |     ]
385 | 
386 |     # add kpiNames to order
387 |     order = order + kpis.name.to_list()
388 | 
389 |     # select columns
390 |     averages = averages[order]
391 | 
392 |     # return result
393 |     return averages


--------------------------------------------------------------------------------
/impectPy/iterations.py:
--------------------------------------------------------------------------------
 1 | # load packages
 2 | import pandas as pd
 3 | import re
 4 | import requests
 5 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_dict, validate_response
 6 | 
 7 | ######
 8 | #
 9 | # This function returns a dataframe containing all competitionIterations available to the user
10 | #
11 | ######
12 | 
13 | 
14 | def getIterations(token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
15 | 
16 |     # create an instance of RateLimitedAPI
17 |     connection = RateLimitedAPI(session)
18 | 
19 |     # construct header with access token
20 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
21 | 
22 |     return getIterationsFromHost(connection, "https://api.impect.com")
23 | 
24 | # define function
25 | def getIterationsFromHost(connection: RateLimitedAPI, host: str) -> pd.DataFrame:
26 | 
27 |     # request competition iteration information from API
28 |     response = connection.make_api_request_limited(
29 |         f"{host}/v5/customerapi/iterations/",
30 |         method="GET"
31 |     )
32 | 
33 |     # get data from response
34 |     data = validate_response(response, "Iterations")
35 | 
36 |     # unnest nested IdMapping column
37 |     data = unnest_mappings_dict(data)
38 | 
39 |     # convert to pandas dataframe
40 |     df = pd.json_normalize(data)
41 | 
42 |     # drop idMappings column
43 |     df = df.drop("idMappings", axis = 1)
44 | 
45 |     # fix column names using regex
46 |     df = df.rename(columns=lambda x: re.sub("[\._](.)", lambda y: y.group(1).upper(), x))
47 | 
48 |     # keep first entry for skillcorner, heimspiel and wyscout data
49 |     df.skillCornerId = df.skillCornerId.apply(lambda x: x[0] if x else None)
50 |     df.heimSpielId = df.heimSpielId.apply(lambda x: x[0] if x else None)
51 |     df.wyscoutId = df.wyscoutId.apply(lambda x: x[0] if x else None)
52 | 
53 |     # get country data
54 |     countries = connection.make_api_request_limited(
55 |         url=f"{host}/v5/customerapi/countries",
56 |         method="GET"
57 |     ).process_response(
58 |         endpoint="KPIs"
59 |     )
60 | 
61 |     df = df.merge(
62 |         countries[["id", "fifaName"]].rename(
63 |             columns={"id": "competitionCountryId", "fifaName": "competitionCountryName"}
64 |         ),
65 |         how="left",
66 |         on="competitionCountryId"
67 |     )
68 | 
69 |     # sort iterations
70 |     df = df.sort_values(by="id")
71 | 
72 |     # define column order
73 |     order = [
74 |         "id",
75 |         "competitionId",
76 |         "competitionName",
77 |         "season",
78 |         "competitionType",
79 |         "competitionCountryId",
80 |         "competitionCountryName",
81 |         "competitionGender",
82 |         "dataVersion",
83 |         "lastChangeTimestamp",
84 |         "wyscoutId",
85 |         "heimSpielId",
86 |         "skillCornerId",
87 |     ]
88 | 
89 |     # select columns
90 |     df = df[order]
91 | 
92 |     # return dataframe
93 |     return df


--------------------------------------------------------------------------------
/impectPy/match_info.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import numpy as np
  3 | import pandas as pd
  4 | import requests
  5 | from impectPy.helpers import RateLimitedAPI
  6 | from .matches import getMatchesFromHost
  7 | from .iterations import getIterationsFromHost
  8 | import re
  9 | 
 10 | 
 11 | ######
 12 | #
 13 | # This function returns a pandas dataframe that contains all events for a
 14 | # given match
 15 | #
 16 | ######
 17 | 
 18 | 
 19 | def getFormations(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 20 |     # create an instance of RateLimitedAPI
 21 |     connection = RateLimitedAPI(session)
 22 | 
 23 |     # construct header with access token
 24 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 25 | 
 26 |     return getFormationsFromHost(matches, connection, "https://api.impect.com")
 27 | 
 28 | 
 29 | # define function
 30 | def getFormationsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 31 |     # check input for matches argument
 32 |     if not isinstance(matches, list):
 33 |         raise Exception("Argument 'matches' must be a list of integers.")
 34 | 
 35 |     # get match info
 36 |     matches = pd.concat(
 37 |         map(lambda match: connection.make_api_request_limited(
 38 |             url=f"{host}/v5/customerapi/matches/{match}",
 39 |             method="GET"
 40 |         ).process_response(
 41 |             endpoint="Iterations"
 42 |         ),
 43 |             matches),
 44 |         ignore_index=True)
 45 | 
 46 |     # filter for matches that are unavailable
 47 |     fail_matches = matches[matches.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
 48 | 
 49 |     # drop matches that are unavailable from list of matches
 50 |     matches = matches[~matches.id.isin(fail_matches)]
 51 | 
 52 |     # raise exception if no matches remaining or report removed matches
 53 |     if len(fail_matches) > 0:
 54 |         if len(matches) == 0:
 55 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
 56 |         else:
 57 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
 58 | 
 59 |     # extract iterationIds
 60 |     iterations = list(matches[matches.lastCalculationDate.notnull()].iterationId.unique())
 61 | 
 62 |     # get squads
 63 |     squads = pd.concat(
 64 |         map(lambda iteration: connection.make_api_request_limited(
 65 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 66 |             method="GET"
 67 |         ).process_response(
 68 |             endpoint="Squads"
 69 |         ),
 70 |             iterations),
 71 |         ignore_index=True)[["id", "name"]].drop_duplicates()
 72 | 
 73 |     # get matches
 74 |     matchplan = pd.concat(
 75 |         map(lambda iteration: getMatchesFromHost(
 76 |             iteration=iteration,
 77 |             connection=connection,
 78 |             host=host
 79 |         ),
 80 |             iterations),
 81 |         ignore_index=True)
 82 | 
 83 |     # get iterations
 84 |     iterations = getIterationsFromHost(connection=connection, host=host)
 85 | 
 86 |     # extract formations
 87 |     formations_home = matches[["id", "squadHomeId", "squadHomeFormations"]].rename(
 88 |         columns={"squadHomeFormations": "squadFormations", "squadHomeId": "squadId"}
 89 |     )
 90 |     formations_away = matches[["id", "squadAwayId", "squadAwayFormations"]].rename(
 91 |         columns={"squadAwayFormations": "squadFormations", "squadAwayId": "squadId"}
 92 |     )
 93 | 
 94 |     # concat dfs
 95 |     formations = pd.concat([formations_home, formations_away], axis=0).reset_index(drop=True)
 96 | 
 97 |     # unnest formations column
 98 |     formations = formations.explode("squadFormations").reset_index(drop=True)
 99 | 
100 |     # normalize the JSON structure into separate columns
101 |     formations = formations.join(pd.json_normalize(formations["squadFormations"]))
102 | 
103 |     # drop the original column
104 |     formations.drop(columns=["squadFormations"], inplace=True)
105 | 
106 |     # start merging dfs
107 | 
108 |     # merge formations with squads
109 |     formations = formations.merge(
110 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}),
111 |         left_on="squadId",
112 |         right_on="squadId",
113 |         how="left",
114 |         suffixes=("", "_home")
115 |     )
116 | 
117 |     # merge with matches info
118 |     formations = formations.merge(
119 |         matchplan[[
120 |             "id", "skillCornerId", "heimSpielId", "wyscoutId", "matchDayIndex",
121 |             "matchDayName", "scheduledDate", "lastCalculationDate", "iterationId"
122 |         ]],
123 |         left_on="id",
124 |         right_on="id",
125 |         how="left",
126 |         suffixes=("", "_right")
127 |     )
128 | 
129 |     # merge with competition info
130 |     formations = formations.merge(
131 |         iterations[["id", "competitionName", "competitionId", "competitionType", "season"]],
132 |         left_on="iterationId",
133 |         right_on="id",
134 |         how="left",
135 |         suffixes=("", "_right")
136 |     )
137 | 
138 |     # rename some columns
139 |     formations = formations.rename(columns={
140 |         "id": "matchId",
141 |         "scheduledDate": "dateTime"
142 |     })
143 | 
144 |     # define desired column order
145 |     cols = [
146 |         "matchId",
147 |         "dateTime",
148 |         "competitionId",
149 |         "competitionName",
150 |         "competitionType",
151 |         "iterationId",
152 |         "season",
153 |         "matchDayIndex",
154 |         "matchDayName",
155 |         "squadId",
156 |         "squadName",
157 |         "gameTime",
158 |         "gameTimeInSec",
159 |         "formation"
160 |     ]
161 | 
162 |     # reorder data
163 |     formations = formations[cols]
164 | 
165 |     # reorder rows
166 |     formations = formations.sort_values(["matchId", "squadId", "gameTimeInSec"])
167 | 
168 |     # return events
169 |     return formations
170 | 
171 | 
172 | ######
173 | #
174 | # This function returns a pandas dataframe that contains all substitutions for a
175 | # given match
176 | #
177 | ######
178 | 
179 | 
180 | def getSubstitutions(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
181 |     # create an instance of RateLimitedAPI
182 |     connection = RateLimitedAPI(session)
183 | 
184 |     # construct header with access token
185 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
186 | 
187 |     return getSubstitutionsFromHost(matches, connection, "https://api.impect.com")
188 | 
189 | 
190 | # define function
191 | def getSubstitutionsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
192 |     # check input for matches argument
193 |     if not isinstance(matches, list):
194 |         raise Exception("Argument 'matches' must be a list of integers.")
195 | 
196 |     # get match info
197 |     matches = pd.concat(
198 |         map(lambda match: connection.make_api_request_limited(
199 |             url=f"{host}/v5/customerapi/matches/{match}",
200 |             method="GET"
201 |         ).process_response(
202 |             endpoint="Iterations"
203 |         ),
204 |             matches),
205 |         ignore_index=True)
206 | 
207 |     # filter for matches that are unavailable
208 |     fail_matches = matches[matches.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
209 | 
210 |     # drop matches that are unavailable from list of matches
211 |     matches = matches[~matches.id.isin(fail_matches)]
212 | 
213 |     # raise exception if no matches remaining or report removed matches
214 |     if len(fail_matches) > 0:
215 |         if len(matches) == 0:
216 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
217 |         else:
218 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
219 | 
220 |     # extract iterationIds
221 |     iterations = list(matches[matches.lastCalculationDate.notnull()].iterationId.unique())
222 | 
223 |     # get squads
224 |     squads = pd.concat(
225 |         map(lambda iteration: connection.make_api_request_limited(
226 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
227 |             method="GET"
228 |         ).process_response(
229 |             endpoint="Squads"
230 |         ),
231 |             iterations),
232 |         ignore_index=True)[["id", "name"]].drop_duplicates()
233 | 
234 |     # get players
235 |     players = pd.concat(
236 |         map(lambda iteration: connection.make_api_request_limited(
237 |             url=f"{host}/v5/customerapi/iterations/{iteration}/players",
238 |             method="GET"
239 |         ).process_response(
240 |             endpoint="Players"
241 |         ),
242 |             iterations),
243 |         ignore_index=True)[["id", "commonname"]].drop_duplicates()
244 | 
245 |     # get matches
246 |     matchplan = pd.concat(
247 |         map(lambda iteration: getMatchesFromHost(
248 |             iteration=iteration,
249 |             connection=connection,
250 |             host=host
251 |         ),
252 |             iterations),
253 |         ignore_index=True)
254 | 
255 |     # get iterations
256 |     iterations = getIterationsFromHost(connection=connection, host=host)
257 | 
258 |     # extract shirt numbers
259 |     shirt_numbers_home = matches[["id", "squadHomeId", "squadHomePlayers"]].rename(
260 |         columns={"squadHomePlayers": "players", "squadHomeId": "squadId"}
261 |     )
262 |     shirt_numbers_away = matches[["id", "squadAwayId", "squadAwayPlayers"]].rename(
263 |         columns={"squadAwayPlayers": "players", "squadAwayId": "squadId"}
264 |     )
265 | 
266 |     # concat dfs
267 |     shirt_numbers = pd.concat([shirt_numbers_home, shirt_numbers_away], axis=0).reset_index(drop=True)
268 | 
269 |     # unnest players column
270 |     shirt_numbers = shirt_numbers.explode("players").reset_index(drop=True)
271 | 
272 |     # normalize the JSON structure into separate columns
273 |     shirt_numbers = pd.concat(
274 |         [
275 |             shirt_numbers.drop(columns=["players"]),
276 |             pd.json_normalize(shirt_numbers["players"]).rename(columns={"id": "playerId"})
277 |         ],
278 |         axis=1
279 |     )
280 | 
281 |     # extract substitutions
282 |     substitutions_home = matches[["id", "squadHomeId", "squadHomeSubstitutions"]].rename(
283 |         columns={"squadHomeSubstitutions": "squadSubstitutions", "squadHomeId": "squadId"}
284 |     )
285 |     substitutions_away = matches[["id", "squadAwayId", "squadAwaySubstitutions"]].rename(
286 |         columns={"squadAwaySubstitutions": "squadSubstitutions", "squadAwayId": "squadId"}
287 |     )
288 | 
289 |     # concat dfs
290 |     substitutions = pd.concat([substitutions_home, substitutions_away], axis=0).reset_index(drop=True)
291 | 
292 |     # unnest formations column
293 |     substitutions = substitutions.explode("squadSubstitutions").reset_index(drop=True)
294 | 
295 |     # drop emtpy row that occurs if one team did not substitute
296 |     substitutions = substitutions[substitutions.squadSubstitutions.notnull()].reset_index(drop=True)
297 | 
298 |     # normalize the JSON structure into separate columns
299 |     substitutions = substitutions.join(pd.json_normalize(substitutions["squadSubstitutions"]))
300 | 
301 |     # drop the original column
302 |     substitutions.drop(columns=["squadSubstitutions"], inplace=True)
303 | 
304 |     # fix potential typing issues
305 |     substitutions.exchangedPlayerId = substitutions.exchangedPlayerId.astype("Int64")
306 | 
307 |     # start merging dfs
308 | 
309 |     # merge substitutions with squads
310 |     substitutions = substitutions.merge(
311 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}),
312 |         left_on="squadId",
313 |         right_on="squadId",
314 |         how="left",
315 |         suffixes=("", "_x")
316 |     )
317 | 
318 |     # merge substitutions with shirt numbers
319 |     substitutions = substitutions.merge(
320 |         shirt_numbers,
321 |         left_on=["playerId", "squadId", "id"],
322 |         right_on=["playerId", "squadId", "id"],
323 |         how="left",
324 |         suffixes=("", "_x")
325 |     ).merge(
326 |         shirt_numbers.rename(
327 |             columns={"playerId": "exchangedPlayerId", "shirtNumber": "exchangedShirtNumber"}
328 |         ),
329 |         left_on=["exchangedPlayerId", "squadId", "id"],
330 |         right_on=["exchangedPlayerId", "squadId", "id"],
331 |         how="left",
332 |         suffixes=("", "_x")
333 |     )
334 | 
335 |     # merge substitutions with players
336 |     substitutions = substitutions.merge(
337 |         players[["id", "commonname"]].rename(
338 |             columns={"commonname": "playerName"}
339 |         ),
340 |         left_on="playerId",
341 |         right_on="id",
342 |         how="left",
343 |         suffixes=("", "_right")
344 |     ).merge(
345 |         players[["id", "commonname"]].rename(
346 |             columns={"commonname": "exchangedPlayerName"}
347 |         ),
348 |         left_on="exchangedPlayerId",
349 |         right_on="id",
350 |         how="left",
351 |         suffixes=("", "_right")
352 |     )
353 | 
354 |     # merge with matches info
355 |     substitutions = substitutions.merge(
356 |         matchplan[[
357 |             "id", "skillCornerId", "heimSpielId", "wyscoutId", "matchDayIndex",
358 |             "matchDayName", "scheduledDate", "lastCalculationDate", "iterationId"
359 |         ]],
360 |         left_on="id",
361 |         right_on="id",
362 |         how="left",
363 |         suffixes=("", "_right")
364 |     )
365 | 
366 |     # merge with competition info
367 |     substitutions = substitutions.merge(
368 |         iterations[["id", "competitionName", "competitionId", "competitionType", "season"]],
369 |         left_on="iterationId",
370 |         right_on="id",
371 |         how="left",
372 |         suffixes=("", "_right")
373 |     )
374 | 
375 |     # rename some columns
376 |     substitutions = substitutions.rename(columns={
377 |         "id": "matchId",
378 |         "positionSide": "toPositionSide",
379 |         "scheduledDate": "dateTime",
380 |         "gameTime.gameTime": "gameTime",
381 |         "gameTime.gameTimeInSec": "gameTimeInSec"
382 |     })
383 | 
384 |     # fix column types
385 |     substitutions["shirtNumber"] = substitutions["shirtNumber"].astype("Int64")
386 |     substitutions["exchangedShirtNumber"] = substitutions["exchangedShirtNumber"].astype("Int64")
387 | 
388 |     # define desired column order
389 |     cols = [
390 |         "matchId",
391 |         "dateTime",
392 |         "competitionId",
393 |         "competitionName",
394 |         "competitionType",
395 |         "iterationId",
396 |         "season",
397 |         "matchDayIndex",
398 |         "matchDayName",
399 |         "squadId",
400 |         "squadName",
401 |         "gameTime",
402 |         "gameTimeInSec",
403 |         "substitutionType",
404 |         "playerId",
405 |         "playerName",
406 |         "shirtNumber",
407 |         "fromPosition",
408 |         "fromPositionSide",
409 |         "toPosition",
410 |         "toPositionSide",
411 |         "exchangedPlayerId",
412 |         "exchangedPlayerName",
413 |         "exchangedShirtNumber",
414 |     ]
415 | 
416 |     # reorder data
417 |     substitutions = substitutions[cols]
418 | 
419 |     # reorder rows
420 |     substitutions = substitutions.sort_values(["matchId", "squadId", "gameTimeInSec", "playerId"])
421 | 
422 |     # return events
423 |     return substitutions
424 | 
425 | 
426 | ######
427 | #
428 | # This function returns a pandas dataframe that contains the starting formations for a
429 | # given match
430 | #
431 | ######
432 | 
433 | 
434 | def getStartingPositions(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
435 |     # create an instance of RateLimitedAPI
436 |     connection = RateLimitedAPI(session)
437 | 
438 |     # construct header with access token
439 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
440 | 
441 |     return getStartingPositionsFromHost(matches, connection, "https://api.impect.com")
442 | 
443 | 
444 | # define function
445 | def getStartingPositionsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
446 |     # check input for matches argument
447 |     if not isinstance(matches, list):
448 |         raise Exception("Argument 'matches' must be a list of integers.")
449 | 
450 |     # get match info
451 |     matches = pd.concat(
452 |         map(lambda match: connection.make_api_request_limited(
453 |             url=f"{host}/v5/customerapi/matches/{match}",
454 |             method="GET"
455 |         ).process_response(
456 |             endpoint="Iterations"
457 |         ),
458 |             matches),
459 |         ignore_index=True)
460 | 
461 |     # filter for matches that are unavailable
462 |     fail_matches = matches[matches.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
463 | 
464 |     # drop matches that are unavailable from list of matches
465 |     matches = matches[~matches.id.isin(fail_matches)]
466 | 
467 |     # raise exception if no matches remaining or report removed matches
468 |     if len(fail_matches) > 0:
469 |         if len(matches) == 0:
470 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
471 |         else:
472 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
473 | 
474 |     # extract iterationIds
475 |     iterations = list(matches[matches.lastCalculationDate.notnull()].iterationId.unique())
476 | 
477 |     # get squads
478 |     squads = pd.concat(
479 |         map(lambda iteration: connection.make_api_request_limited(
480 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
481 |             method="GET"
482 |         ).process_response(
483 |             endpoint="Squads"
484 |         ),
485 |             iterations),
486 |         ignore_index=True)[["id", "name"]].drop_duplicates()
487 | 
488 |     # get players
489 |     players = pd.concat(
490 |         map(lambda iteration: connection.make_api_request_limited(
491 |             url=f"{host}/v5/customerapi/iterations/{iteration}/players",
492 |             method="GET"
493 |         ).process_response(
494 |             endpoint="Players"
495 |         ),
496 |             iterations),
497 |         ignore_index=True)[["id", "commonname"]].drop_duplicates()
498 | 
499 |     # get matches
500 |     matchplan = pd.concat(
501 |         map(lambda iteration: getMatchesFromHost(
502 |             iteration=iteration,
503 |             connection=connection,
504 |             host=host
505 |         ),
506 |             iterations),
507 |         ignore_index=True)
508 | 
509 |     # get iterations
510 |     iterations = getIterationsFromHost(connection=connection, host=host)
511 | 
512 |     # extract shirt numbers
513 |     shirt_numbers_home = matches[["id", "squadHomeId", "squadHomePlayers"]].rename(
514 |         columns={"squadHomePlayers": "players", "squadHomeId": "squadId"}
515 |     )
516 |     shirt_numbers_away = matches[["id", "squadAwayId", "squadAwayPlayers"]].rename(
517 |         columns={"squadAwayPlayers": "players", "squadAwayId": "squadId"}
518 |     )
519 | 
520 |     # concat dfs
521 |     shirt_numbers = pd.concat([shirt_numbers_home, shirt_numbers_away], axis=0).reset_index(drop=True)
522 | 
523 |     # unnest players column
524 |     shirt_numbers = shirt_numbers.explode("players").reset_index(drop=True)
525 | 
526 |     # normalize the JSON structure into separate columns
527 |     shirt_numbers = pd.concat(
528 |         [
529 |             shirt_numbers.drop(columns=["players"]),
530 |             pd.json_normalize(shirt_numbers["players"]).rename(columns={"id": "playerId"})
531 |         ],
532 |         axis=1
533 |     )
534 | 
535 |     # extract starting_positions
536 |     starting_positions_home = matches[["id", "squadHomeId", "squadHomeStartingPositions"]].rename(
537 |         columns={"squadHomeStartingPositions": "squadStartingPositions", "squadHomeId": "squadId"}
538 |     )
539 |     starting_positions_away = matches[["id", "squadAwayId", "squadAwayStartingPositions"]].rename(
540 |         columns={"squadAwayStartingPositions": "squadStartingPositions", "squadAwayId": "squadId"}
541 |     )
542 | 
543 |     # concat dfs
544 |     starting_positions = pd.concat([starting_positions_home, starting_positions_away], axis=0).reset_index(drop=True)
545 | 
546 |     # unnest formations column
547 |     starting_positions = starting_positions.explode("squadStartingPositions").reset_index(drop=True)
548 | 
549 |     # normalize the JSON structure into separate columns
550 |     starting_positions = starting_positions.join(pd.json_normalize(starting_positions["squadStartingPositions"]))
551 | 
552 |     # drop the original column
553 |     starting_positions.drop(columns=["squadStartingPositions"], inplace=True)
554 | 
555 |     # start merging dfs
556 | 
557 |     # merge substitutions with shirt numbers
558 |     starting_positions = starting_positions.merge(
559 |         shirt_numbers,
560 |         left_on=["playerId", "squadId", "id"],
561 |         right_on=["playerId", "squadId", "id"],
562 |         how="left",
563 |         suffixes=("", "_x")
564 |     )
565 | 
566 |     # merge substitutions with squads
567 |     starting_positions = starting_positions.merge(
568 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "squadName"}),
569 |         left_on="squadId",
570 |         right_on="squadId",
571 |         how="left",
572 |         suffixes=("", "_x")
573 |     )
574 | 
575 |     # merge substitutions with players
576 |     starting_positions = starting_positions.merge(
577 |         players[["id", "commonname"]].rename(
578 |             columns={"commonname": "playerName"}
579 |         ),
580 |         left_on="playerId",
581 |         right_on="id",
582 |         how="left",
583 |         suffixes=("", "_right")
584 |     )
585 | 
586 |     # merge with matches info
587 |     starting_positions = starting_positions.merge(
588 |         matchplan[[
589 |             "id", "skillCornerId", "heimSpielId", "wyscoutId", "matchDayIndex",
590 |             "matchDayName", "scheduledDate", "lastCalculationDate", "iterationId"
591 |         ]],
592 |         left_on="id",
593 |         right_on="id",
594 |         how="left",
595 |         suffixes=("", "_right")
596 |     )
597 | 
598 |     # merge with competition info
599 |     starting_positions = starting_positions.merge(
600 |         iterations[["id", "competitionName", "competitionId", "competitionType", "season"]],
601 |         left_on="iterationId",
602 |         right_on="id",
603 |         how="left",
604 |         suffixes=("", "_right")
605 |     )
606 | 
607 |     # rename some columns
608 |     starting_positions = starting_positions.rename(columns={
609 |         "id": "matchId",
610 |         "scheduledDate": "dateTime",
611 |     })
612 | 
613 |     # fix column types
614 |     missing_shirt_numbers = starting_positions["shirtNumber"].isnull()
615 |     if missing_shirt_numbers.any():
616 |         print("Warning: The following players are missing a shirt number and will be set to None:")
617 |         print(starting_positions[missing_shirt_numbers][["matchId", "squadName", "playerName"]].to_string(index=False))
618 |     starting_positions["shirtNumber"] = starting_positions["shirtNumber"].astype("Int64")
619 | 
620 |     # define desired column order
621 |     cols = [
622 |         "matchId",
623 |         "dateTime",
624 |         "competitionId",
625 |         "competitionName",
626 |         "competitionType",
627 |         "iterationId",
628 |         "season",
629 |         "matchDayIndex",
630 |         "matchDayName",
631 |         "squadId",
632 |         "squadName",
633 |         "playerId",
634 |         "playerName",
635 |         "shirtNumber",
636 |         "position",
637 |         "positionSide"
638 |     ]
639 | 
640 |     # reorder data
641 |     starting_positions = starting_positions[cols]
642 | 
643 |     # reorder rows
644 |     starting_positions = starting_positions.sort_values(["matchId", "squadId", "playerId"])
645 | 
646 |     # return events
647 |     return starting_positions


--------------------------------------------------------------------------------
/impectPy/matches.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import re
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_dict, validate_response
  5 | 
  6 | ######
  7 | #
  8 | # This function returns a dataframe with basic information
  9 | # for all matches for a given set of parameters
 10 | #
 11 | ######
 12 | 
 13 | 
 14 | def getMatches(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 15 | 
 16 |     # create an instance of RateLimitedAPI
 17 |     connection = RateLimitedAPI(session)
 18 | 
 19 |     # construct header with access token
 20 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 21 | 
 22 |     return getMatchesFromHost(iteration, connection, "https://api.impect.com")
 23 | 
 24 | # define function
 25 | def getMatchesFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 26 | 
 27 |     # get match data
 28 |     matches = connection.make_api_request_limited(
 29 |         url=f"{host}/v5/customerapi/iterations/"
 30 |             f"{iteration}/matches",
 31 |         method="GET"
 32 |     )
 33 | 
 34 |     # get data from response
 35 |     matches = validate_response(response=matches, endpoint="Matches")
 36 | 
 37 |     # get squads data
 38 |     squads = connection.make_api_request_limited(
 39 |         url=f"{host}/v5/customerapi/iterations/"
 40 |             f"{iteration}/squads",
 41 |         method="GET"
 42 |     )
 43 | 
 44 |     # get data from response
 45 |     squads = validate_response(response=squads, endpoint="Squads")
 46 | 
 47 |     # get country data
 48 |     countries = connection.make_api_request_limited(
 49 |         url=f"{host}/v5/customerapi/countries",
 50 |         method="GET"
 51 |     )
 52 | 
 53 |     # get data from response
 54 |     countries = validate_response(response=countries, endpoint="Countries")
 55 | 
 56 |     # convert to df and clean
 57 |     matches = clean_df(matches)
 58 |     squads = clean_df(squads)
 59 |     countries = pd.DataFrame(countries)
 60 | 
 61 |     # merge matches with squads
 62 |     matches = matches.merge(squads,
 63 |                             left_on="homeSquadId",
 64 |                             right_on="id",
 65 |                             suffixes=("", "_home"))
 66 |     matches = matches.rename(columns={
 67 |         "name": "homeSquadName",
 68 |         "type": "homeSquadType",
 69 |         "skillCornerId_home": "homeSquadSkillCornerId",
 70 |         "heimSpielId_home": "homeSquadHeimSpielId",
 71 |         "wyscoutId_home": "homeSquadWyscoutId",
 72 |         "countryId": "homeSquadCountryId"
 73 |     })
 74 |     matches = matches.merge(squads,
 75 |                             left_on="awaySquadId",
 76 |                             right_on="id",
 77 |                             suffixes=("", "_away"))
 78 |     matches = matches.rename(columns={
 79 |         "name": "awaySquadName",
 80 |         "type": "awaySquadType",
 81 |         "skillCornerId_away": "awaySquadSkillCornerId",
 82 |         "heimSpielId_away": "awaySquadHeimSpielId",
 83 |         "wyscoutId_away": "awaySquadWyscoutId",
 84 |         "countryId": "awaySquadCountryId"
 85 |     })
 86 | 
 87 |     # merge with countries
 88 |     matches = matches.merge(
 89 |         countries,
 90 |         left_on="homeSquadCountryId",
 91 |         right_on="id",
 92 |         suffixes=("", "_right")
 93 |     )
 94 |     matches = matches.rename(columns={"fifaName": "homeSquadCountryName"})
 95 | 
 96 |     matches = matches.merge(
 97 |         countries,
 98 |         left_on="awaySquadCountryId",
 99 |         right_on="id",
100 |         suffixes=("", "_right")
101 |     )
102 |     matches = matches.rename(columns={"fifaName": "awaySquadCountryName"})
103 | 
104 |     # reorder columns
105 |     matches = matches.loc[:, [
106 |                                  'id',
107 |                                  'skillCornerId',
108 |                                  'heimSpielId',
109 |                                  'wyscoutId',
110 |                                  'iterationId',
111 |                                  'matchDayIndex',
112 |                                  'matchDayName',
113 |                                  'homeSquadId',
114 |                                  'homeSquadName',
115 |                                  'homeSquadType',
116 |                                  'homeSquadCountryId',
117 |                                  'homeSquadCountryName',
118 |                                  'homeSquadSkillCornerId',
119 |                                  'homeSquadHeimSpielId',
120 |                                  'homeSquadWyscoutId',
121 |                                  'awaySquadId',
122 |                                  'awaySquadName',
123 |                                  'awaySquadType',
124 |                                  'awaySquadCountryId',
125 |                                  'awaySquadCountryName',
126 |                                  'awaySquadSkillCornerId',
127 |                                  'awaySquadHeimSpielId',
128 |                                  'awaySquadWyscoutId',
129 |                                  'scheduledDate',
130 |                                  'lastCalculationDate',
131 |                                  'available'
132 |                              ]]
133 | 
134 |     # reorder matches
135 |     matches = matches.sort_values(by=["matchDayIndex", "id"])
136 | 
137 |     # sort matches
138 |     matches = matches.sort_values(by="id")
139 | 
140 |     # return matches
141 |     return matches
142 | 
143 | 
144 | # define function to clean df
145 | def clean_df(data: dict) -> pd.DataFrame:
146 | 
147 |     # unnest nested idMapping key
148 |     data = unnest_mappings_dict(data)
149 | 
150 |     # convert to df
151 |     df = pd.json_normalize(data)
152 | 
153 |     # fix column names using regex
154 |     df = df.rename(columns=lambda x: re.sub("[\._](.)", lambda y: y.group(1).upper(), x))
155 | 
156 |     # drop idMappings column
157 |     df = df.drop("idMappings", axis=1)
158 | 
159 |     # keep first entry for skillcorner and heimspiel data
160 |     df.skillCornerId = df.skillCornerId.apply(lambda x: x[0] if x else None)
161 |     df.heimSpielId = df.heimSpielId.apply(lambda x: x[0] if x else None)
162 |     df.wyscoutId = df.wyscoutId.apply(lambda x: x[0] if x else None)
163 | 
164 |     return df


--------------------------------------------------------------------------------
/impectPy/matchsums.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df, ForbiddenError
  5 | from .matches import getMatchesFromHost
  6 | from .iterations import getIterationsFromHost
  7 | 
  8 | ######
  9 | #
 10 | # This function returns a pandas dataframe that contains all kpis for a
 11 | # given match aggregated per player and position
 12 | #
 13 | ######
 14 | 
 15 | 
 16 | def getPlayerMatchsums(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 17 | 
 18 |     # create an instance of RateLimitedAPI
 19 |     connection = RateLimitedAPI(session)
 20 | 
 21 |     # construct header with access token
 22 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 23 | 
 24 |     return getPlayerMatchsumsFromHost(matches, connection, "https://api.impect.com")
 25 | 
 26 | def getPlayerMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 27 | 
 28 |     # check input for matches argument
 29 |     if not isinstance(matches, list):
 30 |         raise Exception("Argument 'matches' must be a list of integers.")
 31 | 
 32 |     # get match info
 33 |     match_data = pd.concat(
 34 |         map(lambda match: connection.make_api_request_limited(
 35 |             url=f"{host}/v5/customerapi/matches/{match}",
 36 |             method="GET"
 37 |         ).process_response(
 38 |             endpoint="Match Info"
 39 |         ),
 40 |             matches),
 41 |         ignore_index=True)
 42 | 
 43 |     # filter for matches that are unavailable
 44 |     fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
 45 | 
 46 |     # drop matches that are unavailable from list of matches
 47 |     matches = [match for match in matches if match not in fail_matches]
 48 | 
 49 |     # raise warnings
 50 |     if len(fail_matches) > 0:
 51 |         if len(matches) == 0:
 52 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
 53 |         else:
 54 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
 55 | 
 56 |     # extract iterationIds
 57 |     iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique())
 58 | 
 59 |     # get player match sums
 60 |     matchsums_raw = pd.concat(
 61 |         map(lambda match: connection.make_api_request_limited(
 62 |             url=f"{host}/v5/customerapi/matches/{match}/player-kpis",
 63 |             method="GET"
 64 |         ).process_response(
 65 |             endpoint="PlayerMatchsums"
 66 |         ).assign(
 67 |             matchId=match
 68 |         ),
 69 |             matches),
 70 |         ignore_index=True)
 71 | 
 72 |     # get players
 73 |     players = pd.concat(
 74 |         map(
 75 |             lambda iteration: connection.make_api_request_limited(
 76 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/players",
 77 |                 method="GET"
 78 |             ).process_response(
 79 |                 endpoint="Players"
 80 |             ),
 81 |             iterations),
 82 |         ignore_index=True
 83 |     )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]]
 84 | 
 85 |     # only keep first country id for each player
 86 |     country_series = players["countryIds"].explode().groupby(level=0).first()
 87 |     players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64")
 88 |     players = players.rename(columns={"countryIds": "countryId"})
 89 | 
 90 |     # unnest mappings
 91 |     players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
 92 | 
 93 |     # get squads
 94 |     squads = pd.concat(
 95 |         map(lambda iteration: connection.make_api_request_limited(
 96 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 97 |             method="GET"
 98 |         ).process_response(
 99 |             endpoint="Squads"
100 |         ),
101 |             iterations),
102 |         ignore_index=True)[["id", "name"]].drop_duplicates()
103 | 
104 |     # get coaches
105 |     coaches_blacklisted = False
106 |     try:
107 |         coaches = pd.concat(
108 |             map(lambda iteration: connection.make_api_request_limited(
109 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/coaches",
110 |                 method="GET"
111 |             ).process_response(
112 |                 endpoint="Coaches",
113 |                 raise_exception=False
114 |             ),
115 |                 iterations),
116 |             ignore_index=True)[["id", "name"]].drop_duplicates()
117 |     except KeyError:
118 |         # no coaches found, create empty df
119 |         coaches = pd.DataFrame(columns=["id", "name"])
120 |     except ForbiddenError:
121 |         coaches_blacklisted = True
122 | 
123 |     # get kpis
124 |     kpis = connection.make_api_request_limited(
125 |         url=f"{host}/v5/customerapi/kpis",
126 |         method="GET"
127 |     ).process_response(
128 |         endpoint="KPIs"
129 |     )[["id", "name"]]
130 | 
131 |     # get matches
132 |     matchplan = pd.concat(
133 |         map(lambda iteration: getMatchesFromHost(
134 |             iteration=iteration,
135 |             connection=connection,
136 |             host=host
137 |         ),
138 |             iterations),
139 |         ignore_index=True)
140 | 
141 |     # get iterations
142 |     iterations = getIterationsFromHost(connection=connection, host=host)
143 | 
144 |     # get country data
145 |     countries = connection.make_api_request_limited(
146 |         url=f"{host}/v5/customerapi/countries",
147 |         method="GET"
148 |     ).process_response(
149 |         endpoint="KPIs"
150 |     )
151 | 
152 |     # create empty df to store matchsums
153 |     matchsums = pd.DataFrame()
154 | 
155 |     # manipulate matchsums
156 | 
157 |     # iterate over matches
158 |     for i in range(len(matchsums_raw)):
159 | 
160 |         # iterate over sides
161 |         for side in ["squadHomePlayers", "squadAwayPlayers"]:
162 |             # get data for index
163 |             temp = matchsums_raw[side].loc[i]
164 | 
165 |             # convert to pandas df
166 |             temp = pd.DataFrame(temp).assign(
167 |                 matchId=matchsums_raw.matchId.loc[i],
168 |                 squadId=matchsums_raw[side.replace("Players", "Id")].loc[i]
169 |             )
170 | 
171 |             # extract matchshares
172 |             matchshares = temp[["matchId", "squadId", "id", "position", "matchShare", "playDuration"]].drop_duplicates()
173 | 
174 |             # explode kpis column
175 |             temp = temp.explode("kpis")
176 | 
177 |             # unnest dictionary in kpis column
178 |             temp = pd.concat(
179 |                 [temp.drop(["kpis"], axis=1), temp["kpis"].apply(pd.Series)],
180 |                 axis=1
181 |             )
182 | 
183 |             # merge with kpis to ensure all kpis are present
184 |             temp = pd.merge(
185 |                 temp,
186 |                 kpis,
187 |                 left_on="kpiId",
188 |                 right_on="id",
189 |                 how="outer",
190 |                 suffixes=("", "_right")
191 |             )
192 | 
193 |             # pivot data
194 |             temp = pd.pivot_table(
195 |                 temp,
196 |                 values="value",
197 |                 index=["matchId", "squadId", "id", "position"],
198 |                 columns="name",
199 |                 aggfunc="sum",
200 |                 fill_value=0,
201 |                 dropna=False
202 |             ).reset_index()
203 | 
204 |             # inner join with matchshares
205 |             temp = pd.merge(
206 |                 temp,
207 |                 matchshares,
208 |                 left_on=["matchId", "squadId", "id", "position"],
209 |                 right_on=["matchId", "squadId", "id", "position"],
210 |                 how="inner",
211 |                 suffixes=("", "_right")
212 |             )
213 | 
214 |             # append to matchsums
215 |             matchsums = pd.concat([matchsums, temp])
216 | 
217 |     # merge with other data
218 |     matchsums = matchsums.merge(
219 |         matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]],
220 |         left_on="matchId",
221 |         right_on="id",
222 |         how="left",
223 |         suffixes=("", "_right")
224 |     ).merge(
225 |         pd.concat([
226 |             match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}),
227 |             match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"})
228 |         ], ignore_index=True),
229 |         left_on=["matchId", "squadId"],
230 |         right_on=["id", "squadId"],
231 |         how="left",
232 |         suffixes=("", "_right")
233 |     ).merge(
234 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season"]],
235 |         left_on="iterationId",
236 |         right_on="id",
237 |         how="left",
238 |         suffixes=("", "_right")
239 |     ).merge(
240 |         squads[["id", "name"]].rename(
241 |             columns={"id": "squadId", "name": "squadName"}
242 |         ),
243 |         left_on="squadId",
244 |         right_on="squadId",
245 |         how="left",
246 |         suffixes=("", "_right")
247 |     ).merge(
248 |         players[[
249 |             "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname",
250 |             "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg"
251 |         ]].rename(
252 |             columns={"commonname": "playerName"}
253 |         ),
254 |         left_on="id",
255 |         right_on="id",
256 |         how="left",
257 |         suffixes=("", "_right")
258 |     ).merge(
259 |         countries.rename(columns={"fifaName": "playerCountry"}),
260 |         left_on="countryId",
261 |         right_on="id",
262 |         how="left",
263 |         suffixes=("", "_right")
264 |     )
265 | 
266 |     if not coaches_blacklisted:
267 |         matchsums["coachId"] = matchsums["coachId"].astype("Int64")
268 |         matchsums = matchsums.merge(
269 |             coaches[["id", "name"]].rename(
270 |                 columns={"id": "coachId", "name": "coachName"}
271 |             ),
272 |             left_on="coachId",
273 |             right_on="coachId",
274 |             how="left",
275 |             suffixes=("", "_right")
276 |         )
277 | 
278 |     # rename some columns
279 |     matchsums = matchsums.rename(columns={
280 |         "scheduledDate": "dateTime",
281 |         "id": "playerId"
282 |     })
283 | 
284 |     # define column order
285 |     order = [
286 |         "matchId",
287 |         "dateTime",
288 |         "competitionName",
289 |         "competitionId",
290 |         "competitionType",
291 |         "iterationId",
292 |         "season",
293 |         "matchDayIndex",
294 |         "matchDayName",
295 |         "squadId",
296 |         "squadName",
297 |         "coachId",
298 |         "coachName",
299 |         "playerId",
300 |         "wyscoutId",
301 |         "heimSpielId",
302 |         "skillCornerId",
303 |         "playerName",
304 |         "firstname",
305 |         "lastname",
306 |         "birthdate",
307 |         "birthplace",
308 |         "playerCountry",
309 |         "leg",
310 |         "position",
311 |         "matchShare",
312 |         "playDuration"
313 |     ]
314 | 
315 |     # add kpiNames to order
316 |     order += kpis['name'].to_list()
317 | 
318 |     # check if coaches are blacklisted
319 |     if coaches_blacklisted:
320 |         order = [col for col in order if col not in ["coachId", "coachName"]]
321 | 
322 |     # select columns
323 |     matchsums = matchsums[order]
324 | 
325 |     # fix some column types
326 |     matchsums["matchId"] = matchsums["matchId"].astype("Int64")
327 |     matchsums["squadId"] = matchsums["squadId"].astype("Int64")
328 |     matchsums["playerId"] = matchsums["playerId"].astype("Int64")
329 |     matchsums["wyscoutId"] = matchsums["wyscoutId"].astype("Int64")
330 |     matchsums["heimSpielId"] = matchsums["heimSpielId"].astype("Int64")
331 |     matchsums["skillCornerId"] = matchsums["skillCornerId"].astype("Int64")
332 | 
333 |     # return data
334 |     return matchsums
335 | 
336 | 
337 | ######
338 | #
339 | # This function returns a pandas dataframe that contains all kpis for a
340 | # given match aggregated per squad
341 | #
342 | ######
343 | 
344 | 
345 | def getSquadMatchsums(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
346 | 
347 |     # create an instance of RateLimitedAPI
348 |     connection = RateLimitedAPI(session)
349 | 
350 |     # construct header with access token
351 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
352 | 
353 |     return getSquadMatchsumsFromHost(matches, connection, "https://api.impect.com")
354 | 
355 | def getSquadMatchsumsFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
356 | 
357 |     # check input for matches argument
358 |     if not isinstance(matches, list):
359 |         raise Exception("Input vor matches argument must be a list of integers")
360 | 
361 |     # get match info
362 |     match_data = pd.concat(
363 |         map(lambda match: connection.make_api_request_limited(
364 |             url=f"{host}/v5/customerapi/matches/{match}",
365 |             method="GET"
366 |         ).process_response(
367 |             endpoint="Match Info"
368 |         ),
369 |             matches),
370 |         ignore_index=True)
371 | 
372 |     # filter for matches that are unavailable
373 |     fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
374 | 
375 |     # drop matches that are unavailable from list of matches
376 |     matches = [match for match in matches if match not in fail_matches]
377 | 
378 |     # raise warnings
379 |     if len(fail_matches) > 0:
380 |         if len(matches) == 0:
381 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
382 |         else:
383 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
384 | 
385 |     # extract iterationIds
386 |     iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique())
387 | 
388 |     # get squad match sums
389 |     matchsums_raw = pd.concat(
390 |         map(lambda match: connection.make_api_request_limited(
391 |             url=f"{host}/v5/customerapi/matches/{match}/squad-kpis",
392 |             method="GET"
393 |         ).process_response(
394 |             endpoint="SquadMatchsums"
395 |         ).assign(
396 |             matchId=match
397 |         ),
398 |             matches),
399 |         ignore_index=True)
400 | 
401 |     # get squads
402 |     squads = pd.concat(
403 |         map(lambda iteration: connection.make_api_request_limited(
404 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
405 |             method="GET"
406 |         ).process_response(
407 |             endpoint="Squads"
408 |         ),
409 |             iterations),
410 |         ignore_index=True)[["id", "name", "idMappings"]]
411 | 
412 |     # get coaches
413 |     coaches_blacklisted = False
414 |     try:
415 |         coaches = pd.concat(
416 |             map(lambda iteration: connection.make_api_request_limited(
417 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/coaches",
418 |                 method="GET"
419 |             ).process_response(
420 |                 endpoint="Coaches",
421 |                 raise_exception=False
422 |             ),
423 |                 iterations),
424 |             ignore_index=True)[["id", "name"]].drop_duplicates()
425 |     except KeyError:
426 |         # no coaches found, create empty df
427 |         coaches = pd.DataFrame(columns=["id", "name"])
428 |     except ForbiddenError:
429 |         coaches_blacklisted = True
430 | 
431 |     # unnest mappings
432 |     squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
433 | 
434 |     # get kpis
435 |     kpis = connection.make_api_request_limited(
436 |         url=f"{host}/v5/customerapi/kpis",
437 |         method="GET"
438 |     ).process_response(
439 |         endpoint="KPIs"
440 |     )[["id", "name"]]
441 | 
442 |     # get matches
443 |     matchplan = pd.concat(
444 |         map(lambda iteration: getMatchesFromHost(
445 |             iteration=iteration,
446 |             connection=connection,
447 |             host=host
448 |         ),
449 |             iterations),
450 |         ignore_index=True)
451 | 
452 |     # get iterations
453 |     iterations = getIterationsFromHost(connection=connection, host=host)
454 | 
455 |     # create empty df to store matchsums
456 |     matchsums = pd.DataFrame()
457 | 
458 |     # manipulate matchsums
459 | 
460 |     # iterate over matches
461 |     for i in range(len(matchsums_raw)):
462 | 
463 |         # iterate over sides
464 |         for side in ["squadHomeKpis", "squadAwayKpis"]:
465 |             # get data for index
466 |             temp = matchsums_raw[side].loc[i]
467 | 
468 |             # convert to pandas df
469 |             temp = pd.DataFrame(temp).assign(
470 |                 matchId=matchsums_raw.matchId.loc[i],
471 |                 squadId=matchsums_raw[side.replace("Kpis", "Id")].loc[i]
472 |             )
473 | 
474 |             # merge with kpis to ensure all kpis are present
475 |             temp = temp.merge(
476 |                 kpis,
477 |                 left_on="kpiId",
478 |                 right_on="id",
479 |                 how="outer",
480 |                 suffixes=("", "right")
481 |             )
482 | 
483 |             # pivot data
484 |             temp = pd.pivot_table(
485 |                 temp,
486 |                 values="value",
487 |                 index=["matchId", "squadId"],
488 |                 columns="name",
489 |                 aggfunc="sum",
490 |                 fill_value=0,
491 |                 dropna=False
492 |             ).reset_index()
493 | 
494 |             # append to matchsums
495 |             matchsums = pd.concat([matchsums, temp])
496 | 
497 |     # merge with other data
498 |     matchsums = matchsums.merge(
499 |         matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]],
500 |         left_on="matchId",
501 |         right_on="id",
502 |         how="left",
503 |         suffixes=("", "_right")
504 |     ).merge(
505 |         pd.concat([
506 |             match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}),
507 |             match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"})
508 |         ], ignore_index=True),
509 |         left_on=["matchId", "squadId"],
510 |         right_on=["id", "squadId"],
511 |         how="left",
512 |         suffixes=("", "_right")
513 |     ).merge(
514 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season"]],
515 |         left_on="iterationId",
516 |         right_on="id",
517 |         how="left",
518 |         suffixes=("", "_right")
519 |     ).merge(
520 |         squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename(
521 |             columns={"id": "squadId", "name": "squadName"}
522 |         ),
523 |         left_on="squadId",
524 |         right_on="squadId",
525 |         how="left",
526 |         suffixes=("", "_home")
527 |     )
528 | 
529 |     if not coaches_blacklisted:
530 |         matchsums["coachId"] = matchsums["coachId"].astype("Int64")
531 |         matchsums = matchsums.merge(
532 |             coaches[["id", "name"]].rename(
533 |                 columns={"id": "coachId", "name": "coachName"}
534 |             ),
535 |             left_on="coachId",
536 |             right_on="coachId",
537 |             how="left",
538 |             suffixes=("", "_right")
539 |         )
540 | 
541 |     # rename some columns
542 |     matchsums = matchsums.rename(columns={
543 |         "scheduledDate": "dateTime"
544 |     })
545 | 
546 |     # define column order
547 |     order = [
548 |         "matchId",
549 |         "dateTime",
550 |         "competitionName",
551 |         "competitionId",
552 |         "competitionType",
553 |         "iterationId",
554 |         "season",
555 |         "matchDayIndex",
556 |         "matchDayName",
557 |         "squadId",
558 |         "wyscoutId",
559 |         "heimSpielId",
560 |         "skillCornerId",
561 |         "squadName",
562 |         "coachId",
563 |         "coachName"
564 |     ]
565 | 
566 |     # add kpiNames to order
567 |     order += kpis['name'].to_list()
568 | 
569 |     # filter for non-NA columns only
570 |     matchsums = matchsums[
571 |         (matchsums.matchId.notnull()) &
572 |         (matchsums.squadId.notnull())
573 |     ]
574 | 
575 |     # reset index
576 |     matchsums = matchsums.reset_index()
577 | 
578 |     # check if coaches are blacklisted
579 |     if coaches_blacklisted:
580 |         order = [col for col in order if col not in ["coachId", "coachName"]]
581 | 
582 |     # select & order columns
583 |     matchsums = matchsums[order]
584 | 
585 |     # fix some column types
586 |     matchsums["matchId"] = matchsums["matchId"].astype("Int64")
587 |     matchsums["competitionId"] = matchsums["competitionId"].astype("Int64")
588 |     matchsums["iterationId"] = matchsums["iterationId"].astype("Int64")
589 |     matchsums["matchDayIndex"] = matchsums["matchDayIndex"].astype("Int64")
590 |     matchsums["squadId"] = matchsums["squadId"].astype("Int64")
591 |     matchsums["wyscoutId"] = matchsums["wyscoutId"].astype("Int64")
592 |     matchsums["heimSpielId"] = matchsums["heimSpielId"].astype("Int64")
593 |     matchsums["skillCornerId"] = matchsums["skillCornerId"].astype("Int64")
594 | 
595 |     # return data
596 |     return matchsums


--------------------------------------------------------------------------------
/impectPy/player_profile_scores.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df
  5 | from .iterations import getIterationsFromHost
  6 | 
  7 | # define the allowed positions
  8 | allowed_positions = [
  9 |   "GOALKEEPER",
 10 |   "LEFT_WINGBACK_DEFENDER",
 11 |   "RIGHT_WINGBACK_DEFENDER",
 12 |   "CENTRAL_DEFENDER",
 13 |   "DEFENSE_MIDFIELD",
 14 |   "CENTRAL_MIDFIELD",
 15 |   "ATTACKING_MIDFIELD",
 16 |   "LEFT_WINGER",
 17 |   "RIGHT_WINGER",
 18 |   "CENTER_FORWARD"
 19 | ]
 20 | 
 21 | ######
 22 | #
 23 | # This function returns a pandas dataframe that contains all profile scores
 24 | # for a given iteration and a given set of positions per player
 25 | #
 26 | ######
 27 | 
 28 | 
 29 | def getPlayerProfileScores(
 30 |         iteration: int, positions: list, token: str, session: requests.Session = requests.Session()
 31 | ) -> pd.DataFrame:
 32 | 
 33 |     # create an instance of RateLimitedAPI
 34 |     connection = RateLimitedAPI(session)
 35 | 
 36 |     # construct header with access token
 37 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 38 | 
 39 |     return getPlayerProfileScoresFromHost(iteration, positions, connection, "https://api.impect.com")
 40 | 
 41 | def getPlayerProfileScoresFromHost(
 42 |         iteration: int, positions: list, connection: RateLimitedAPI, host: str
 43 | ) -> pd.DataFrame:
 44 | 
 45 |     # check input for iteration argument
 46 |     if not isinstance(iteration, int):
 47 |         raise Exception("Input for iteration argument must be an integer")
 48 | 
 49 |     # check input for positions argument
 50 |     if not isinstance(positions, list):
 51 |         raise Exception("Input for positions argument must be a list")
 52 | 
 53 |     # check if the input positions are valid
 54 |     invalid_positions = [position for position in positions if position not in allowed_positions]
 55 |     if len(invalid_positions) > 0:
 56 |         raise Exception(
 57 |             f"Invalid position(s): {', '.join(invalid_positions)}."
 58 |             f"\nChoose one or more of: {', '.join(allowed_positions)}"
 59 |         )
 60 | 
 61 |     # get squads
 62 |     squads = connection.make_api_request_limited(
 63 |         url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 64 |         method="GET"
 65 |     ).process_response(
 66 |         endpoint="Squads"
 67 |     )
 68 | 
 69 |     # get squadIds
 70 |     squad_ids = squads[squads.access].id.to_list()
 71 | 
 72 |     # compile position string
 73 |     position_string = ",".join(positions)
 74 | 
 75 |     # get player profile scores per squad
 76 |     profile_scores_raw = pd.concat(
 77 |         map(lambda squadId: connection.make_api_request_limited(
 78 |             url=f"{host}/v5/customerapi/iterations/{iteration}/"
 79 |                 f"squads/{squadId}/positions/{position_string}/player-profile-scores",
 80 |             method="GET"
 81 |         ).process_response(
 82 |             endpoint="PlayerIterationScores",
 83 |             raise_exception=False
 84 |         ).assign(
 85 |             iterationId=iteration,
 86 |             squadId=squadId,
 87 |             positions=position_string
 88 |         ),
 89 |             squad_ids),
 90 |         ignore_index=True)
 91 | 
 92 |     # raise exception if no player played at given positions in entire iteration
 93 |     if len(profile_scores_raw) == 0:
 94 |         raise Exception(f"No players played at given position in iteration {iteration}.")
 95 | 
 96 |     # print squads without players at given position
 97 |     error_list = [str(squadId) for squadId in squad_ids if squadId not in profile_scores_raw.squadId.to_list()]
 98 |     if len(error_list) > 0:
 99 |         print(f"No players played at positions {positions} for iteration {iteration} for following squads:\n\t{', '.join(error_list)}")
100 | 
101 |     # get players
102 |     players = connection.make_api_request_limited(
103 |         url=f"{host}/v5/customerapi/iterations/{iteration}/players",
104 |         method="GET"
105 |     ).process_response(
106 |         endpoint="Players"
107 |     )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]]
108 | 
109 |     # only keep first country id for each player
110 |     country_series = players["countryIds"].explode().groupby(level=0).first()
111 |     players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64")
112 |     players = players.rename(columns={"countryIds": "countryId"})
113 | 
114 |     # unnest mappings
115 |     players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
116 | 
117 |     # get scores
118 |     scores = connection.make_api_request_limited(
119 |         url=f"{host}/v5/customerapi/player-profiles",
120 |         method="GET"
121 |     ).process_response(
122 |         endpoint="playerProfiles"
123 |     )[["name"]]
124 | 
125 |     # get iterations
126 |     iterations = getIterationsFromHost(connection=connection, host=host)
127 | 
128 |     # get country data
129 |     countries = connection.make_api_request_limited(
130 |         url=f"{host}/v5/customerapi/countries",
131 |         method="GET"
132 |     ).process_response(
133 |         endpoint="KPIs"
134 |     )
135 | 
136 |     # unnest scorings
137 |     profile_scores = profile_scores_raw.explode("profileScores").reset_index(drop=True)
138 | 
139 |     # unnest dictionary in kpis column
140 |     profile_scores = pd.concat(
141 |         [profile_scores.drop(["profileScores"], axis=1), pd.json_normalize(profile_scores["profileScores"])],
142 |         axis=1
143 |     )
144 | 
145 |     # merge with player scores to ensure all kpis are present
146 |     profile_scores = profile_scores.merge(
147 |         scores,
148 |         left_on="profileName",
149 |         right_on="name",
150 |         how="outer",
151 |         suffixes=("", "_right")
152 |     )
153 | 
154 |     # get matchShares
155 |     match_shares = profile_scores[
156 |         ["iterationId", "squadId", "playerId", "positions", "playDuration", "matchShare"]].drop_duplicates()
157 | 
158 |     # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost
159 |     if len(profile_scores["name"][profile_scores["name"].isnull()]) > 0:
160 |         profile_scores["name"] = profile_scores["name"].fillna("-1")
161 | 
162 |     # pivot kpi values
163 |     profile_scores = pd.pivot_table(
164 |         profile_scores,
165 |         values="value",
166 |         index=["iterationId", "squadId", "playerId", "positions"],
167 |         columns="name",
168 |         aggfunc="sum",
169 |         fill_value=0,
170 |         dropna=False
171 |     ).reset_index()
172 | 
173 |     # drop "-1" column
174 |     if "-1" in profile_scores.columns:
175 |         profile_scores.drop(["-1"], inplace=True, axis=1)
176 | 
177 |     # merge with playDuration and matchShare
178 |     profile_scores = profile_scores.merge(
179 |         match_shares,
180 |         left_on=["iterationId", "squadId", "playerId", "positions"],
181 |         right_on=["iterationId", "squadId", "playerId", "positions"],
182 |         how="inner",
183 |         suffixes=("", "_right")
184 |     )
185 |     # merge with other data
186 |     profile_scores = profile_scores.merge(
187 |         iterations[["id", "competitionName", "season"]],
188 |         left_on="iterationId",
189 |         right_on="id",
190 |         how="left",
191 |         suffixes=("", "_right")
192 |     ).merge(
193 |         squads[["id", "name"]].rename(
194 |             columns={"id": "squadId", "name": "squadName"}
195 |         ),
196 |         left_on="squadId",
197 |         right_on="squadId",
198 |         how="left",
199 |         suffixes=("", "_right")
200 |     ).merge(
201 |         players[[
202 |             "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname",
203 |             "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg"
204 |         ]].rename(
205 |             columns={"commonname": "playerName"}
206 |         ),
207 |         left_on="playerId",
208 |         right_on="id",
209 |         how="left",
210 |         suffixes=("", "_right")
211 |     ).merge(
212 |         countries.rename(columns={"fifaName": "playerCountry"}),
213 |         left_on="countryId",
214 |         right_on="id",
215 |         how="left",
216 |         suffixes=("", "_right")
217 |     )
218 | 
219 |     # remove NA rows
220 |     profile_scores = profile_scores[profile_scores.iterationId.notnull()]
221 | 
222 |     # fix column types
223 |     profile_scores["squadId"] = profile_scores["squadId"].astype("Int64")
224 |     profile_scores["playerId"] = profile_scores["playerId"].astype("Int64")
225 |     profile_scores["iterationId"] = profile_scores["iterationId"].astype("Int64")
226 |     profile_scores["wyscoutId"] = profile_scores["wyscoutId"].astype("Int64")
227 |     profile_scores["heimSpielId"] = profile_scores["heimSpielId"].astype("Int64")
228 |     profile_scores["skillCornerId"] = profile_scores["skillCornerId"].astype("Int64")
229 | 
230 |     # define column order
231 |     order = [
232 |         "iterationId",
233 |         "competitionName",
234 |         "season",
235 |         "squadId",
236 |         "squadName",
237 |         "playerId",
238 |         "wyscoutId",
239 |         "heimSpielId",
240 |         "skillCornerId",
241 |         "playerName",
242 |         "firstname",
243 |         "lastname",
244 |         "birthdate",
245 |         "birthplace",
246 |         "playerCountry",
247 |         "leg",
248 |         "positions",
249 |         "matchShare",
250 |         "playDuration"
251 |     ]
252 | 
253 |     # add kpiNames to order
254 |     order = order + scores.name.to_list()
255 | 
256 |     # select columns
257 |     profile_scores = profile_scores[order]
258 | 
259 |     # return result
260 |     return profile_scores


--------------------------------------------------------------------------------
/impectPy/player_scores.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df, ForbiddenError
  5 | from .matches import getMatchesFromHost
  6 | from .iterations import getIterationsFromHost
  7 | 
  8 | # define the allowed positions
  9 | allowed_positions = [
 10 |   "GOALKEEPER",
 11 |   "LEFT_WINGBACK_DEFENDER",
 12 |   "RIGHT_WINGBACK_DEFENDER",
 13 |   "CENTRAL_DEFENDER",
 14 |   "DEFENSE_MIDFIELD",
 15 |   "CENTRAL_MIDFIELD",
 16 |   "ATTACKING_MIDFIELD",
 17 |   "LEFT_WINGER",
 18 |   "RIGHT_WINGER",
 19 |   "CENTER_FORWARD"
 20 | ]
 21 | 
 22 | ######
 23 | #
 24 | # This function returns a pandas dataframe that contains all scores for a
 25 | # given match and a given set of positions aggregated per player
 26 | #
 27 | ######
 28 | 
 29 | 
 30 | def getPlayerMatchScores(
 31 |         matches: list, token: str, positions: list = None, session: requests.Session = requests.Session()
 32 | ) -> pd.DataFrame:
 33 | 
 34 |     # create an instance of RateLimitedAPI
 35 |     connection = RateLimitedAPI(session)
 36 | 
 37 |     # construct header with access token
 38 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 39 | 
 40 |     return getPlayerMatchScoresFromHost(matches, connection, "https://api.impect.com", positions)
 41 | 
 42 | def getPlayerMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: str, positions: list = None) -> pd.DataFrame:
 43 | 
 44 |     # check input for matches argument
 45 |     if not isinstance(matches, list):
 46 |         raise Exception("Argument 'matches' must be a list of integers.")
 47 | 
 48 |     # check input for positions argument
 49 |     if not isinstance(positions, list) and positions is not None:
 50 |         raise Exception("Input for positions argument must be a list")
 51 | 
 52 |     # check if the input positions are valid
 53 |     if positions is not None:
 54 |         invalid_positions = [position for position in positions if position not in allowed_positions]
 55 |         if len(invalid_positions) > 0:
 56 |             raise Exception(
 57 |                 f"Invalid position(s): {', '.join(invalid_positions)}."
 58 |                 f"\nChoose one or more of: {', '.join(allowed_positions)}"
 59 |             )
 60 | 
 61 |     # get match info
 62 |     match_data = pd.concat(
 63 |         map(lambda match: connection.make_api_request_limited(
 64 |             url=f"{host}/v5/customerapi/matches/{match}",
 65 |             method="GET"
 66 |         ).process_response(
 67 |             endpoint="Match Info"
 68 |         ),
 69 |             matches),
 70 |         ignore_index=True)
 71 | 
 72 |     # filter for matches that are unavailable
 73 |     fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
 74 | 
 75 |     # drop matches that are unavailable from list of matches
 76 |     matches = [match for match in matches if match not in fail_matches]
 77 | 
 78 |     # raise warnings
 79 |     if len(fail_matches) > 0:
 80 |         if len(matches) == 0:
 81 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
 82 |         else:
 83 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
 84 | 
 85 |     # extract iterationIds
 86 |     iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique())
 87 | 
 88 |     # get player scores
 89 |     if positions is None:
 90 |         # query positions at once
 91 |         scores_raw = pd.concat(
 92 |             map(lambda match: connection.make_api_request_limited(
 93 |                 url=f"{host}/v5/customerapi/matches/{match}/player-scores",
 94 |                 method="GET"
 95 |             ).process_response(
 96 |                 endpoint="PlayerMatchScores"
 97 |             ).assign(
 98 |                 matchId=match,
 99 |             ),
100 |                 matches),
101 |             ignore_index=True)
102 |     else:
103 | 
104 |         # compile list of positions
105 |         position_string = ",".join(positions)
106 | 
107 |         # query positions individually
108 |         scores_raw = pd.concat(
109 |             map(lambda match: connection.make_api_request_limited(
110 |                 url=f"{host}/v5/customerapi/matches/{match}/positions/{position_string}/player-scores",
111 |                 method="GET"
112 |             ).process_response(
113 |                 endpoint="PlayerMatchScores"
114 |             ).assign(
115 |                 matchId=match,
116 |                 positions=position_string
117 |             ),
118 |                 matches),
119 |             ignore_index=True)
120 | 
121 |     # get players
122 |     players = pd.concat(
123 |         map(
124 |             lambda iteration: connection.make_api_request_limited(
125 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/players",
126 |                 method="GET"
127 |             ).process_response(
128 |                 endpoint="Players"
129 |             ),
130 |             iterations),
131 |         ignore_index=True
132 |     )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]]
133 | 
134 |     # only keep first country id for each player
135 |     country_series = players["countryIds"].explode().groupby(level=0).first()
136 |     players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64")
137 |     players = players.rename(columns={"countryIds": "countryId"})
138 | 
139 |     # unnest mappings
140 |     players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
141 | 
142 |     # get squads
143 |     squads = pd.concat(
144 |         map(lambda iteration: connection.make_api_request_limited(
145 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
146 |             method="GET"
147 |         ).process_response(
148 |             endpoint="Squads"
149 |         ),
150 |             iterations),
151 |         ignore_index=True)[["id", "name"]].drop_duplicates()
152 | 
153 |     # get coaches
154 |     coaches_blacklisted = False
155 |     try:
156 |         coaches = pd.concat(
157 |             map(lambda iteration: connection.make_api_request_limited(
158 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/coaches",
159 |                 method="GET"
160 |             ).process_response(
161 |                 endpoint="Coaches",
162 |                 raise_exception=False
163 |             ),
164 |                 iterations),
165 |             ignore_index=True)[["id", "name"]].drop_duplicates()
166 |     except KeyError:
167 |         # no coaches found, create empty df
168 |         coaches = pd.DataFrame(columns=["id", "name"])
169 |     except ForbiddenError:
170 |         coaches_blacklisted = True
171 | 
172 |     # get player scores
173 |     scores = connection.make_api_request_limited(
174 |         url=f"{host}/v5/customerapi/player-scores",
175 |         method="GET"
176 |     ).process_response(
177 |         endpoint="PlayerScores"
178 |     )[["id", "name"]]
179 | 
180 |     # get matches
181 |     matchplan = pd.concat(
182 |         map(lambda iteration: getMatchesFromHost(
183 |             iteration=iteration,
184 |             connection=connection,
185 |             host=host
186 |         ),
187 |             iterations),
188 |         ignore_index=True)
189 | 
190 |     # get iterations
191 |     iterations = getIterationsFromHost(connection=connection, host=host)
192 | 
193 |     # get country data
194 |     countries = connection.make_api_request_limited(
195 |         url=f"{host}/v5/customerapi/countries",
196 |         method="GET"
197 |     ).process_response(
198 |         endpoint="Countries"
199 |     )
200 | 
201 |     # create empty df to store player scores
202 |     player_scores = pd.DataFrame()
203 | 
204 |     # manipulate player_scores
205 | 
206 |     # iterate over matches
207 |     for i in range(len(scores_raw)):
208 | 
209 |         # create empty df to store per match scores
210 |         match_player_scores = pd.DataFrame()
211 | 
212 |         # iterate over sides
213 |         for side in ["squadHomePlayers", "squadAwayPlayers"]:
214 | 
215 |             # get data for index
216 |             temp = scores_raw[side].loc[i]
217 | 
218 |             # check if any records for side at given position
219 |             if len(temp) == 0:
220 |                 continue
221 | 
222 |             # convert to pandas df
223 |             if positions is None:
224 |                 temp = pd.DataFrame(temp).assign(
225 |                     matchId=scores_raw.matchId.loc[i],
226 |                     squadId=scores_raw[side.replace("Players", "Id")].loc[i],
227 |                 )
228 | 
229 |                 # extract matchshares
230 |                 matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration", "position"]].drop_duplicates()
231 | 
232 |             else:
233 |                 temp = pd.DataFrame(temp).assign(
234 |                     matchId=scores_raw.matchId.loc[i],
235 |                     squadId=scores_raw[side.replace("Players", "Id")].loc[i],
236 |                     positions=scores_raw.positions.loc[i]
237 |                 )
238 | 
239 |                 # extract matchshares
240 |                 matchshares = temp[["matchId", "squadId", "id", "matchShare", "playDuration"]].drop_duplicates().assign(
241 |                     positions=position_string
242 |                 )
243 | 
244 |             # explode kpis column
245 |             temp = temp.explode("playerScores")
246 | 
247 |             # unnest dictionary in kpis column
248 |             temp = pd.concat(
249 |                 [temp.drop(["playerScores"], axis=1), temp["playerScores"].apply(pd.Series)],
250 |                 axis=1
251 |             )
252 | 
253 |             # merge with player scores to ensure all scores are present
254 |             temp = pd.merge(
255 |                 temp,
256 |                 scores,
257 |                 left_on="playerScoreId",
258 |                 right_on="id",
259 |                 how="outer",
260 |                 suffixes=("", "_right")
261 |             )
262 | 
263 |             # pivot data
264 |             if positions is None:
265 |                 temp = pd.pivot_table(
266 |                     temp,
267 |                     values="value",
268 |                     index=["matchId", "squadId", "position", "id"],
269 |                     columns="name",
270 |                     aggfunc="sum",
271 |                     fill_value=0,
272 |                     dropna=False
273 |                 ).reset_index()
274 | 
275 |                 # inner join with matchshares
276 |                 temp = pd.merge(
277 |                     temp,
278 |                     matchshares,
279 |                     left_on=["matchId", "squadId", "id", "position"],
280 |                     right_on=["matchId", "squadId", "id", "position"],
281 |                     how="inner",
282 |                     suffixes=("", "_right")
283 |                 )
284 |             else:
285 |                 temp = pd.pivot_table(
286 |                     temp,
287 |                     values="value",
288 |                     index=["matchId", "squadId", "positions", "id"],
289 |                     columns="name",
290 |                     aggfunc="sum",
291 |                     fill_value=0,
292 |                     dropna=False
293 |                 ).reset_index()
294 | 
295 |                 # inner join with matchshares
296 |                 temp = pd.merge(
297 |                     temp,
298 |                     matchshares,
299 |                     left_on=["matchId", "squadId", "id", "positions"],
300 |                     right_on=["matchId", "squadId", "id", "positions"],
301 |                     how="inner",
302 |                     suffixes=("", "_right")
303 |                 )
304 | 
305 |             # append to match_player_scores
306 |             match_player_scores = pd.concat([match_player_scores, temp])
307 | 
308 |         # check if any records for match at given position
309 |         if len(match_player_scores) == 0:
310 |             print(f"No players played at given position in match {scores_raw.loc[i].matchId}")
311 | 
312 |         # append to player_scores
313 |         player_scores = pd.concat([player_scores, match_player_scores])
314 | 
315 |     # check if any records for any match at given position
316 |     if len(player_scores) == 0:
317 |             raise Exception("No players played at given positions for any given match. Execution stopped.")
318 | 
319 |     # merge with other data
320 |     player_scores = player_scores.merge(
321 |         matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]],
322 |         left_on="matchId",
323 |         right_on="id",
324 |         how="left",
325 |         suffixes=("", "_right")
326 |     ).merge(
327 |         pd.concat([
328 |             match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}),
329 |             match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"})
330 |         ], ignore_index=True),
331 |         left_on=["matchId", "squadId"],
332 |         right_on=["id", "squadId"],
333 |         how="left",
334 |         suffixes=("", "_right")
335 |     ).merge(
336 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season"]],
337 |         left_on="iterationId",
338 |         right_on="id",
339 |         how="left",
340 |         suffixes=("", "_right")
341 |     ).merge(
342 |         squads[["id", "name"]].rename(
343 |             columns={"id": "squadId", "name": "squadName"}
344 |         ),
345 |         left_on="squadId",
346 |         right_on="squadId",
347 |         how="left",
348 |         suffixes=("", "_right")
349 |     ).merge(
350 |         players[[
351 |             "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname",
352 |             "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg"
353 |         ]].rename(
354 |             columns={"commonname": "playerName"}
355 |         ),
356 |         left_on="id",
357 |         right_on="id",
358 |         how="left",
359 |         suffixes=("", "_right")
360 |     ).merge(
361 |         countries.rename(columns={"fifaName": "playerCountry"}),
362 |         left_on="countryId",
363 |         right_on="id",
364 |         how="left",
365 |         suffixes=("", "_right")
366 |     )
367 | 
368 |     if not coaches_blacklisted:
369 |         player_scores["coachId"] = player_scores["coachId"].astype("Int64")
370 |         player_scores = player_scores.merge(
371 |             coaches[["id", "name"]].rename(
372 |                 columns={"id": "coachId", "name": "coachName"}
373 |             ),
374 |             left_on="coachId",
375 |             right_on="coachId",
376 |             how="left",
377 |             suffixes=("", "_right")
378 |         )
379 | 
380 |     # rename some columns
381 |     player_scores = player_scores.rename(columns={
382 |         "scheduledDate": "dateTime",
383 |         "id": "playerId"
384 |     })
385 | 
386 |     # define column order
387 |     order = [
388 |         "matchId",
389 |         "dateTime",
390 |         "competitionName",
391 |         "competitionId",
392 |         "competitionType",
393 |         "iterationId",
394 |         "season",
395 |         "matchDayIndex",
396 |         "matchDayName",
397 |         "squadId",
398 |         "squadName",
399 |         "coachId",
400 |         "coachName",
401 |         "playerId",
402 |         "wyscoutId",
403 |         "heimSpielId",
404 |         "skillCornerId",
405 |         "playerName",
406 |         "firstname",
407 |         "lastname",
408 |         "birthdate",
409 |         "birthplace",
410 |         "playerCountry",
411 |         "leg",
412 |         "positions" if positions is not None else "position",
413 |         "matchShare",
414 |         "playDuration",
415 |     ]
416 | 
417 |     # add kpiNames to order
418 |     order += scores["name"].to_list()
419 | 
420 |     # check if coaches are blacklisted
421 |     if coaches_blacklisted:
422 |         order = [col for col in order if col not in ["coachId", "coachName"]]
423 | 
424 |     # select columns
425 |     player_scores = player_scores[order]
426 | 
427 |     # fix some column types
428 |     player_scores["matchId"] = player_scores["matchId"].astype("Int64")
429 |     player_scores["squadId"] = player_scores["squadId"].astype("Int64")
430 |     player_scores["playerId"] = player_scores["playerId"].astype("Int64")
431 |     player_scores["wyscoutId"] = player_scores["wyscoutId"].astype("Int64")
432 |     player_scores["heimSpielId"] = player_scores["heimSpielId"].astype("Int64")
433 |     player_scores["skillCornerId"] = player_scores["skillCornerId"].astype("Int64")
434 | 
435 |     # return data
436 |     return player_scores
437 | 
438 | 
439 | ######
440 | #
441 | # This function returns a pandas dataframe that contains all scores for a
442 | # given iteration and a given set of positions aggregated per player
443 | #
444 | ######
445 | 
446 | 
447 | def getPlayerIterationScores(
448 |         iteration: int, token: str, positions: list = None, session: requests.Session = requests.Session()
449 | ) -> pd.DataFrame:
450 | 
451 |     # create an instance of RateLimitedAPI
452 |     connection = RateLimitedAPI(session)
453 | 
454 |     # construct header with access token
455 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
456 | 
457 |     return getPlayerIterationScoresFromHost(iteration, connection, "https://api.impect.com", positions)
458 | 
459 | def getPlayerIterationScoresFromHost(
460 |         iteration: int, connection: RateLimitedAPI, host: str, positions: list = None
461 | ) -> pd.DataFrame:
462 | 
463 |     # check input for iteration argument
464 |     if not isinstance(iteration, int):
465 |         raise Exception("Input for iteration argument must be an integer")
466 | 
467 |     # check input for positions argument
468 |     if not isinstance(positions, list) and positions is not None:
469 |         raise Exception("Input for positions argument must be a list")
470 | 
471 |     # check if the input positions are valid
472 |     if positions is not None:
473 |         invalid_positions = [position for position in positions if position not in allowed_positions]
474 |         if len(invalid_positions) > 0:
475 |             raise Exception(
476 |                 f"Invalid position(s): {', '.join(invalid_positions)}."
477 |                 f"\nChoose one or more of: {', '.join(allowed_positions)}"
478 |             )
479 | 
480 |     # get squads
481 |     squads = connection.make_api_request_limited(
482 |         url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
483 |         method="GET"
484 |     ).process_response(
485 |         endpoint="Squads"
486 |     )
487 | 
488 |     # get squadIds
489 |     squad_ids = squads[squads.access].id.to_list()
490 | 
491 |     # get player iteration averages per squad
492 |     if positions is None:
493 | 
494 |         scores_raw = pd.concat(
495 |             map(lambda squadId: connection.make_api_request_limited(
496 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/"
497 |                     f"squads/{squadId}/player-scores",
498 |                 method="GET"
499 |             ).process_response(
500 |                 endpoint="PlayerIterationScores",
501 |                 raise_exception=False
502 |             ).assign(
503 |                 iterationId=iteration,
504 |                 squadId=squadId
505 |             ),
506 |                 squad_ids),
507 |             ignore_index=True)
508 | 
509 |     else:
510 | 
511 |         # compile position string
512 |         position_string = ",".join(positions)
513 | 
514 |         scores_raw = pd.concat(
515 |             map(lambda squadId: connection.make_api_request_limited(
516 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/"
517 |                     f"squads/{squadId}/positions/{position_string}/player-scores",
518 |                 method="GET"
519 |             ).process_response(
520 |                 endpoint="PlayerIterationScores",
521 |                 raise_exception=False
522 |             ).assign(
523 |                 iterationId=iteration,
524 |                 squadId=squadId,
525 |                 positions=position_string
526 |             ),
527 |                 squad_ids),
528 |             ignore_index=True)
529 | 
530 |     # raise exception if no player played at given positions in entire iteration
531 |     if len(scores_raw) == 0:
532 |         raise Exception(f"No players played at given position in iteration {iteration}.")
533 | 
534 |     # print squads without players at given position
535 |     error_list = [str(squadId) for squadId in squad_ids if squadId not in scores_raw.squadId.to_list()]
536 |     if len(error_list) > 0:
537 |         print(f"No players played at positions {positions} for iteration {iteration} for following squads:\n\t{', '.join(error_list)}")
538 | 
539 |     # get players
540 |     players = connection.make_api_request_limited(
541 |         url=f"{host}/v5/customerapi/iterations/{iteration}/players",
542 |         method="GET"
543 |     ).process_response(
544 |         endpoint="Players"
545 |     )[["id", "commonname", "firstname", "lastname", "birthdate", "birthplace", "leg", "countryIds", "idMappings"]]
546 | 
547 |     # only keep first country id for each player
548 |     country_series = players["countryIds"].explode().groupby(level=0).first()
549 |     players["countryIds"] = players.index.to_series().map(country_series).astype("float").astype("Int64")
550 |     players = players.rename(columns={"countryIds": "countryId"})
551 | 
552 |     # unnest mappings
553 |     players = unnest_mappings_df(players, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
554 | 
555 |     # get scores
556 |     scores = connection.make_api_request_limited(
557 |         url=f"{host}/v5/customerapi/player-scores",
558 |         method="GET"
559 |     ).process_response(
560 |         endpoint="playerScores"
561 |     )[["id", "name"]]
562 | 
563 |     # get iterations
564 |     iterations = getIterationsFromHost(connection=connection, host=host)
565 | 
566 |     # get country data
567 |     countries = connection.make_api_request_limited(
568 |         url=f"{host}/v5/customerapi/countries",
569 |         method="GET"
570 |     ).process_response(
571 |         endpoint="KPIs"
572 |     )
573 | 
574 |     # unnest scorings
575 |     averages = scores_raw.explode("playerScores").reset_index(drop=True)
576 | 
577 |     # unnest dictionary in kpis column
578 |     averages = pd.concat(
579 |         [averages.drop(["playerScores"], axis=1), pd.json_normalize(averages["playerScores"])],
580 |         axis=1
581 |     )
582 | 
583 |     # merge with player scores to ensure all kpis are present
584 |     averages = averages.merge(
585 |         scores,
586 |         left_on="playerScoreId",
587 |         right_on="id",
588 |         how="outer",
589 |         suffixes=("", "_right")
590 |     )
591 | 
592 |     # get matchShares
593 |     if positions is None:
594 |         match_shares = averages[
595 |             ["iterationId", "squadId", "playerId", "position", "playDuration", "matchShare"]
596 |         ].drop_duplicates()
597 | 
598 |         # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost
599 |         if len(averages["name"][averages["name"].isnull()]) > 0:
600 |             averages["name"] = averages["name"].fillna("-1")
601 | 
602 |         # pivot kpi values
603 |         averages = pd.pivot_table(
604 |             averages,
605 |             values="value",
606 |             index=["iterationId", "squadId", "playerId", "position"],
607 |             columns="name",
608 |             aggfunc="sum",
609 |             fill_value=0,
610 |             dropna=False
611 |         ).reset_index()
612 | 
613 |         # drop "-1" column
614 |         if "-1" in averages.columns:
615 |             averages.drop(["-1"], inplace=True, axis=1)
616 | 
617 |         # merge with playDuration and matchShare
618 |         averages = averages.merge(
619 |             match_shares,
620 |             left_on=["iterationId", "squadId", "playerId", "position"],
621 |             right_on=["iterationId", "squadId", "playerId", "position"],
622 |             how="inner",
623 |             suffixes=("", "_right")
624 |         )
625 |     else:
626 |         match_shares = averages[
627 |             ["iterationId", "squadId", "playerId", "positions", "playDuration", "matchShare"]
628 |         ].drop_duplicates()
629 | 
630 |         # fill missing values in the "name" column with a default value to ensure players without scorings don't get lost
631 |         if len(averages["name"][averages["name"].isnull()]) > 0:
632 |             averages["name"] = averages["name"].fillna("-1")
633 | 
634 |         # pivot kpi values
635 |         averages = pd.pivot_table(
636 |             averages,
637 |             values="value",
638 |             index=["iterationId", "squadId", "playerId", "positions"],
639 |             columns="name",
640 |             aggfunc="sum",
641 |             fill_value=0,
642 |             dropna=False
643 |         ).reset_index()
644 | 
645 |         # drop "-1" column
646 |         if "-1" in averages.columns:
647 |             averages.drop(["-1"], inplace=True, axis=1)
648 | 
649 |         # merge with playDuration and matchShare
650 |         averages = averages.merge(
651 |             match_shares,
652 |             left_on=["iterationId", "squadId", "playerId", "positions"],
653 |             right_on=["iterationId", "squadId", "playerId", "positions"],
654 |             how="inner",
655 |             suffixes=("", "_right")
656 |         )
657 | 
658 |     # merge with other data
659 |     averages = averages.merge(
660 |         iterations[["id", "competitionName", "season"]],
661 |         left_on="iterationId",
662 |         right_on="id",
663 |         how="left",
664 |         suffixes=("", "_right")
665 |     ).merge(
666 |         squads[["id", "name"]].rename(
667 |             columns={"id": "squadId", "name": "squadName"}
668 |         ),
669 |         left_on="squadId",
670 |         right_on="squadId",
671 |         how="left",
672 |         suffixes=("", "_right")
673 |     ).merge(
674 |         players[[
675 |             "id", "wyscoutId", "heimSpielId", "skillCornerId", "commonname",
676 |             "firstname", "lastname", "birthdate", "birthplace", "countryId", "leg"
677 |         ]].rename(
678 |             columns={"commonname": "playerName"}
679 |         ),
680 |         left_on="playerId",
681 |         right_on="id",
682 |         how="left",
683 |         suffixes=("", "_right")
684 |     ).merge(
685 |         countries.rename(columns={"fifaName": "playerCountry"}),
686 |         left_on="countryId",
687 |         right_on="id",
688 |         how="left",
689 |         suffixes=("", "_right")
690 |     )
691 | 
692 |     # remove NA rows
693 |     averages = averages[averages.iterationId.notnull()]
694 | 
695 |     # fix column types
696 |     averages["squadId"] = averages["squadId"].astype(int)
697 |     averages["playerId"] = averages["playerId"].astype(int)
698 |     averages["iterationId"] = averages["iterationId"].astype(int)
699 | 
700 |     # define column order
701 |     order = [
702 |         "iterationId",
703 |         "competitionName",
704 |         "season",
705 |         "squadId",
706 |         "squadName",
707 |         "playerId",
708 |         "wyscoutId",
709 |         "heimSpielId",
710 |         "skillCornerId",
711 |         "playerName",
712 |         "firstname",
713 |         "lastname",
714 |         "birthdate",
715 |         "birthplace",
716 |         "playerCountry",
717 |         "leg",
718 |         "positions" if positions is not None else "position",
719 |         "matchShare",
720 |         "playDuration"
721 |     ]
722 | 
723 |     # add kpiNames to order
724 |     order = order + scores.name.to_list()
725 | 
726 |     # select columns
727 |     averages = averages[order]
728 | 
729 |     # fix some column types
730 |     averages["squadId"] = averages["squadId"].astype("Int64")
731 |     averages["playerId"] = averages["playerId"].astype("Int64")
732 |     averages["wyscoutId"] = averages["wyscoutId"].astype("Int64")
733 |     averages["heimSpielId"] = averages["heimSpielId"].astype("Int64")
734 |     averages["skillCornerId"] = averages["skillCornerId"].astype("Int64")
735 | 
736 |     # return result
737 |     return averages


--------------------------------------------------------------------------------
/impectPy/set_pieces.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI
  5 | from .matches import getMatchesFromHost
  6 | from .iterations import getIterationsFromHost
  7 | import re
  8 | 
  9 | ######
 10 | #
 11 | # This function returns a pandas dataframe that contains all set pieces for a
 12 | # given match
 13 | #
 14 | ######
 15 | 
 16 | 
 17 | # define function
 18 | def getSetPieces(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 19 | 
 20 |     # create an instance of RateLimitedAPI
 21 |     connection = RateLimitedAPI(session)
 22 | 
 23 |     # construct header with access token
 24 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 25 | 
 26 |     return getSetPiecesFromHost(matches, connection, "https://api.impect.com")
 27 | 
 28 | def getSetPiecesFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 29 | 
 30 |     # check input for matches argument
 31 |     if not isinstance(matches, list):
 32 |         raise Exception("Argument 'matches' must be a list of integers.")
 33 | 
 34 |     # get match info
 35 |     iterations = pd.concat(
 36 |         map(lambda match: connection.make_api_request_limited(
 37 |             url=f"{host}/v5/customerapi/matches/{match}",
 38 |             method="GET"
 39 |         ).process_response(
 40 |             endpoint="Iterations"
 41 |         ),
 42 |             matches),
 43 |         ignore_index=True)
 44 | 
 45 |     # filter for matches that are unavailable
 46 |     fail_matches = iterations[iterations.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
 47 | 
 48 |     # drop matches that are unavailable from list of matches
 49 |     matches = [match for match in matches if match not in fail_matches]
 50 | 
 51 |     # raise exception if no matches remaining or report removed matches
 52 |     if len(fail_matches) > 0:
 53 |         if len(matches) == 0:
 54 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
 55 |         else:
 56 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
 57 | 
 58 |     # extract iterationIds
 59 |     iterations = list(iterations[iterations.lastCalculationDate.notnull()].iterationId.unique())
 60 | 
 61 |     # get players
 62 |     players = pd.concat(
 63 |         map(lambda iteration: connection.make_api_request_limited(
 64 |             url=f"{host}/v5/customerapi/iterations/{iteration}/players",
 65 |             method="GET"
 66 |         ).process_response(
 67 |             endpoint="Players"
 68 |         ),
 69 |             iterations),
 70 |         ignore_index=True)[["id", "commonname"]].drop_duplicates()
 71 | 
 72 |     # get squads
 73 |     squads = pd.concat(
 74 |         map(lambda iteration: connection.make_api_request_limited(
 75 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 76 |             method="GET"
 77 |         ).process_response(
 78 |             endpoint="Squads"
 79 |         ),
 80 |             iterations),
 81 |         ignore_index=True)[["id", "name"]].drop_duplicates()
 82 | 
 83 |     # get matches
 84 |     matchplan = pd.concat(
 85 |         map(lambda iteration: getMatchesFromHost(
 86 |             iteration=iteration,
 87 |             connection=connection,
 88 |             host=host
 89 |         ),
 90 |             iterations),
 91 |         ignore_index=True)
 92 | 
 93 |     # get iterations
 94 |     iterations = getIterationsFromHost(connection=connection, host=host)
 95 | 
 96 |     # get set piece data
 97 |     set_pieces = pd.concat(
 98 |         map(lambda match: connection.make_api_request_limited(
 99 |             url=f"{host}/v5/customerapi/matches/{match}/set-pieces",
100 |             method="GET"
101 |         ).process_response(
102 |             endpoint="Set-Pieces"
103 |         ),
104 |             matches),
105 |         ignore_index=True
106 |     ).rename(
107 |         columns={"id": "setPieceId"}
108 |     ).explode("setPieceSubPhase", ignore_index=True)
109 | 
110 |     # unpack setPieceSubPhase column
111 |     set_pieces = pd.concat(
112 |         [
113 |             set_pieces.drop(columns=["setPieceSubPhase"]),
114 |             pd.json_normalize(set_pieces["setPieceSubPhase"]).add_prefix("setPieceSubPhase.")
115 |         ],
116 |         axis=1
117 |     ).rename(columns=lambda x: re.sub(r"\.(.)", lambda y: y.group(1).upper(), x))
118 | 
119 |     # fix typing
120 |     set_pieces.setPieceSubPhaseMainEventPlayerId = set_pieces.setPieceSubPhaseMainEventPlayerId.astype("Int64")
121 |     set_pieces.setPieceSubPhaseFirstTouchPlayerId = set_pieces.setPieceSubPhaseFirstTouchPlayerId.astype("Int64")
122 |     set_pieces.setPieceSubPhaseSecondTouchPlayerId = set_pieces.setPieceSubPhaseSecondTouchPlayerId.astype("Int64")
123 | 
124 |     # start merging dfs
125 | 
126 |     # merge events with matches
127 |     set_pieces = set_pieces.merge(
128 |         matchplan,
129 |         left_on="matchId",
130 |         right_on="id",
131 |         how="left",
132 |         suffixes=("", "_right")
133 |     )
134 | 
135 |     # merge with competition info
136 |     set_pieces = set_pieces.merge(
137 |         iterations,
138 |         left_on="iterationId",
139 |         right_on="id",
140 |         how="left",
141 |         suffixes=("", "_right")
142 |     )
143 | 
144 |     # determine defending squad
145 |     set_pieces["defendingSquadId"] = set_pieces.apply(
146 |         lambda row: row.homeSquadId if row.squadId == row.awaySquadId else row.awaySquadId,
147 |         axis=1
148 |     )
149 | 
150 |     # merge events with squads
151 |     set_pieces = set_pieces.merge(
152 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "attackingSquadName"}),
153 |         left_on="squadId",
154 |         right_on="squadId",
155 |         how="left",
156 |         suffixes=("", "_home")
157 |     ).merge(
158 |         squads[["id", "name"]].rename(columns={"id": "squadId", "name": "defendingSquadName"}),
159 |         left_on="defendingSquadId",
160 |         right_on="squadId",
161 |         how="left",
162 |         suffixes=("", "_away")
163 |     )
164 | 
165 |     # merge events with players
166 |     set_pieces = set_pieces.merge(
167 |         players[["id", "commonname"]].rename(
168 |             columns={
169 |                 "id": "setPieceSubPhaseMainEventPlayerId",
170 |                 "commonname": "setPieceSubPhaseMainEventPlayerName"
171 |             }
172 |         ),
173 |         left_on="setPieceSubPhaseMainEventPlayerId",
174 |         right_on="setPieceSubPhaseMainEventPlayerId",
175 |         how="left",
176 |         suffixes=("", "_right")
177 |     ).merge(
178 |         players[["id", "commonname"]].rename(
179 |             columns={
180 |                 "id": "setPieceSubPhasePassReceiverId",
181 |                 "commonname": "setPieceSubPhasePassReceiverName"
182 |             }
183 |         ),
184 |         left_on="setPieceSubPhasePassReceiverId",
185 |         right_on="setPieceSubPhasePassReceiverId",
186 |         how="left",
187 |         suffixes=("", "_right")
188 |     ).merge(
189 |         players[["id", "commonname"]].rename(
190 |             columns={
191 |                 "id": "setPieceSubPhaseFirstTouchPlayerId",
192 |                 "commonname": "setPieceSubPhaseFirstTouchPlayerName"
193 |             }
194 |         ),
195 |         left_on="setPieceSubPhaseFirstTouchPlayerId",
196 |         right_on="setPieceSubPhaseFirstTouchPlayerId",
197 |         how="left",
198 |         suffixes=("", "_right")
199 |     ).merge(
200 |         players[["id", "commonname"]].rename(
201 |             columns={
202 |                 "id": "setPieceSubPhaseSecondTouchPlayerId",
203 |                 "commonname": "setPieceSubPhaseSecondTouchPlayerName"
204 |             }
205 |         ),
206 |         left_on="setPieceSubPhaseSecondTouchPlayerId",
207 |         right_on="setPieceSubPhaseSecondTouchPlayerId",
208 |         how="left",
209 |         suffixes=("", "_right")
210 |     )
211 | 
212 |     # rename some columns
213 |     set_pieces = set_pieces.rename(columns={
214 |         "scheduledDate": "dateTime",
215 |         "squadId": "attackingSquadId",
216 |         "phaseIndex": "setPiecePhaseIndex",
217 |         "setPieceSubPhaseAggregatesSHOT_XG": "setPieceSubPhase_SHOT_XG",
218 |         "setPieceSubPhaseAggregatesPACKING_XG": "setPieceSubPhase_PACKING_XG",
219 |         "setPieceSubPhaseAggregatesPOSTSHOT_XG": "setPieceSubPhase_POSTSHOT_XG",
220 |         "setPieceSubPhaseAggregatesSHOT_AT_GOAL_NUMBER": "setPieceSubPhase_SHOT_AT_GOAL_NUMBER",
221 |         "setPieceSubPhaseAggregatesGOALS": "setPieceSubPhase_GOALS",
222 |         "setPieceSubPhaseAggregatesPXT_POSITIVE": "setPieceSubPhase_PXT_POSITIVE",
223 |         "setPieceSubPhaseAggregatesBYPASSED_OPPONENTS": "setPieceSubPhase_BYPASSED_OPPONENTS",
224 |         "setPieceSubPhaseAggregatesBYPASSED_DEFENDERS": "setPieceSubPhase_BYPASSED_DEFENDERS"
225 |     })
226 | 
227 |     # define desired column order
228 |     order = [
229 |         "matchId",
230 |         "dateTime",
231 |         "competitionName",
232 |         "competitionId",
233 |         "competitionType",
234 |         "iterationId",
235 |         "season",
236 |         "attackingSquadId",
237 |         "attackingSquadName",
238 |         "defendingSquadId",
239 |         "defendingSquadName",
240 |         "setPieceId",
241 |         "setPiecePhaseIndex",
242 |         "setPieceCategory",
243 |         "adjSetPieceCategory",
244 |         "setPieceExecutionType",
245 |         "setPieceSubPhaseId",
246 |         "setPieceSubPhaseIndex",
247 |         "setPieceSubPhaseStartZone",
248 |         "setPieceSubPhaseCornerEndZone",
249 |         "setPieceSubPhaseCornerType",
250 |         "setPieceSubPhaseFreeKickEndZone",
251 |         "setPieceSubPhaseFreeKickType",
252 |         "setPieceSubPhaseMainEventPlayerId",
253 |         "setPieceSubPhaseMainEventPlayerName",
254 |         "setPieceSubPhaseMainEventOutcome",
255 |         "setPieceSubPhasePassReceiverId",
256 |         "setPieceSubPhasePassReceiverName",
257 |         "setPieceSubPhaseFirstTouchPlayerId",
258 |         "setPieceSubPhaseFirstTouchPlayerName",
259 |         "setPieceSubPhaseFirstTouchWon",
260 |         "setPieceSubPhaseIndirectHeader",
261 |         "setPieceSubPhaseSecondTouchPlayerId",
262 |         "setPieceSubPhaseSecondTouchPlayerName",
263 |         "setPieceSubPhaseSecondTouchWon",
264 |         "setPieceSubPhase_SHOT_XG",
265 |         "setPieceSubPhase_PACKING_XG",
266 |         "setPieceSubPhase_POSTSHOT_XG",
267 |         "setPieceSubPhase_SHOT_AT_GOAL_NUMBER",
268 |         "setPieceSubPhase_GOALS",
269 |         "setPieceSubPhase_PXT_POSITIVE",
270 |         "setPieceSubPhase_BYPASSED_OPPONENTS",
271 |         "setPieceSubPhase_BYPASSED_DEFENDERS",
272 |     ]
273 | 
274 |     # reorder data
275 |     set_pieces = set_pieces[order]
276 | 
277 |     # reorder rows
278 |     set_pieces = set_pieces.sort_values(["matchId", "setPiecePhaseIndex"])
279 | 
280 |     # return events
281 |     return set_pieces


--------------------------------------------------------------------------------
/impectPy/squad_coefficients.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df
  5 | from .iterations import getIterationsFromHost
  6 | 
  7 | ######
  8 | #
  9 | # This function returns a pandas dataframe that contains all squad ratings for a given iteration
 10 | #
 11 | ######
 12 | 
 13 | 
 14 | # define function
 15 | def getSquadCoefficients(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 16 | 
 17 |     # create an instance of RateLimitedAPI
 18 |     connection = RateLimitedAPI(session)
 19 | 
 20 |     # construct header with access token
 21 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 22 | 
 23 |     return getSquadCoefficientsFromHost(iteration, connection, "https://api.impect.com")
 24 | 
 25 | def getSquadCoefficientsFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 26 | 
 27 |     # check input for matches argument
 28 |     if not isinstance(iteration, int):
 29 |         raise Exception("Argument 'iteration' must be an integer.")
 30 | 
 31 |     # get iterations
 32 |     iterations = getIterationsFromHost(connection=connection, host=host)
 33 | 
 34 |     # raise exception if provided iteration id doesn't exist
 35 |     if iteration not in list(iterations.id):
 36 |         raise Exception("The supplied iteration id does not exist. Execution stopped.")
 37 | 
 38 |     # get squads
 39 |     squads = connection.make_api_request_limited(
 40 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 41 |             method="GET"
 42 |         ).process_response(
 43 |             endpoint="Squads"
 44 |         )[["id", "name", "idMappings"]]
 45 | 
 46 |     # unnest mappings
 47 |     squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
 48 | 
 49 |     # get squad coefficients
 50 |     coefficients_raw = connection.make_api_request_limited(
 51 |         url=f"{host}/v5/customerapi/iterations/{iteration}/predictions/model-coefficients",
 52 |         method="GET"
 53 |     ).process_response(
 54 |         endpoint="Squad Coefficients"
 55 |     )
 56 | 
 57 |     # extract JSON from the column
 58 |     nested_data = coefficients_raw["entries"][0]
 59 | 
 60 |     # flatten coefficients df
 61 |     coefficients = []
 62 |     for entry in nested_data:
 63 |         date = entry["date"]
 64 |         for squad in entry["squads"]:
 65 |             coefficients.append({
 66 |                 "iterationId": iteration,
 67 |                 "date": date,
 68 |                 "interceptCoefficient": entry["competition"]["intercept"],
 69 |                 "homeCoefficient": entry["competition"]["home"],
 70 |                 "competitionCoefficient": entry["competition"]["comp"],
 71 |                 "squadId": squad["id"],
 72 |                 "attackCoefficient": squad["att"],
 73 |                 "defenseCoefficient": squad["def"]
 74 |             })
 75 | 
 76 |     # convert to df
 77 |     coefficients = pd.DataFrame(coefficients)
 78 | 
 79 |     # merge with competition info
 80 |     coefficients = coefficients.merge(
 81 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season", "competitionGender"]],
 82 |         left_on="iterationId",
 83 |         right_on="id",
 84 |         how="left",
 85 |         suffixes=("", "_right")
 86 |     )
 87 | 
 88 |     # merge events with squads
 89 |     coefficients = coefficients.merge(
 90 |         squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename(
 91 |             columns={"id": "squadId", "name": "squadName"}
 92 |         ),
 93 |         left_on="squadId",
 94 |         right_on="squadId",
 95 |         how="left",
 96 |         suffixes=("", "_home")
 97 |     )
 98 | 
 99 |     # fix some column types
100 |     coefficients["iterationId"] = coefficients["iterationId"].astype("Int64")
101 |     coefficients["competitionId"] = coefficients["competitionId"].astype("Int64")
102 |     coefficients["squadId"] = coefficients["squadId"].astype("Int64")
103 |     coefficients["wyscoutId"] = coefficients["wyscoutId"].astype("Int64")
104 |     coefficients["heimSpielId"] = coefficients["heimSpielId"].astype("Int64")
105 |     coefficients["skillCornerId"] = coefficients["skillCornerId"].astype("Int64")
106 | 
107 |     # define desired column order
108 |     order = [
109 |         "iterationId",
110 |         "competitionId",
111 |         "competitionName",
112 |         "competitionType",
113 |         "season",
114 |         "competitionGender",
115 |         "interceptCoefficient",
116 |         "homeCoefficient",
117 |         "competitionCoefficient",
118 |         "date",
119 |         "squadId",
120 |         "wyscoutId",
121 |         "heimSpielId",
122 |         "skillCornerId",
123 |         "squadName",
124 |         "attackCoefficient",
125 |         "defenseCoefficient",
126 |     ]
127 | 
128 |     # reorder data
129 |     coefficients = coefficients[order]
130 | 
131 |     # reorder rows
132 |     coefficients = coefficients.sort_values(["date", "squadId"])
133 | 
134 |     # return events
135 |     return coefficients


--------------------------------------------------------------------------------
/impectPy/squad_ratings.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df
  5 | from .iterations import getIterationsFromHost
  6 | 
  7 | ######
  8 | #
  9 | # This function returns a pandas dataframe that contains all squad ratings for a given iteration
 10 | #
 11 | ######
 12 | 
 13 | 
 14 | # define function
 15 | def getSquadRatings(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 16 | 
 17 |     # create an instance of RateLimitedAPI
 18 |     connection = RateLimitedAPI(session)
 19 | 
 20 |     # construct header with access token
 21 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 22 | 
 23 |     return getSquadRatingsFromHost(iteration, connection, "https://api.impect.com")
 24 | 
 25 | def getSquadRatingsFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 26 | 
 27 |     # check input for matches argument
 28 |     if not isinstance(iteration, int):
 29 |         raise Exception("Argument 'iteration' must be an integer.")
 30 | 
 31 |     # get iterations
 32 |     iterations = getIterationsFromHost(connection=connection, host=host)
 33 | 
 34 |     # raise exception if provided iteration id doesn't exist
 35 |     if iteration not in list(iterations.id):
 36 |         raise Exception("The supplied iteration id does not exist. Execution stopped.")
 37 | 
 38 |     # get squads
 39 |     squads = connection.make_api_request_limited(
 40 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 41 |             method="GET"
 42 |         ).process_response(
 43 |             endpoint="Squads"
 44 |         )[["id", "name", "idMappings"]]
 45 | 
 46 |     # unnest mappings
 47 |     squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
 48 | 
 49 |     # get squad ratings
 50 |     ratings_raw = connection.make_api_request_limited(
 51 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads/ratings",
 52 |             method="GET"
 53 |         ).process_response(
 54 |             endpoint="Squad Ratings"
 55 |         )
 56 | 
 57 |     # extract JSON from the column
 58 |     nested_data = ratings_raw["squadRatingsEntries"][0]
 59 | 
 60 |     # flatten ratings df
 61 |     ratings = []
 62 |     for entry in nested_data:
 63 |         date = entry["date"]
 64 |         for squad in entry["squadRatings"]:
 65 |             ratings.append({
 66 |                 "date": date,
 67 |                 "squadId": squad["squadId"],
 68 |                 "value": squad["value"]
 69 |             })
 70 | 
 71 |     # convert to df
 72 |     ratings = pd.DataFrame(ratings)
 73 | 
 74 |     # add iteration id
 75 |     ratings["iterationId"] = iteration
 76 | 
 77 |     # merge with competition info
 78 |     ratings = ratings.merge(
 79 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season", "competitionGender"]],
 80 |         left_on="iterationId",
 81 |         right_on="id",
 82 |         how="left",
 83 |         suffixes=("", "_right")
 84 |     )
 85 | 
 86 |     # merge events with squads
 87 |     ratings = ratings.merge(
 88 |         squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename(
 89 |             columns={"id": "squadId", "name": "squadName"}
 90 |         ),
 91 |         left_on="squadId",
 92 |         right_on="squadId",
 93 |         how="left",
 94 |         suffixes=("", "_home")
 95 |     )
 96 | 
 97 |     # fix some column types
 98 |     ratings["iterationId"] = ratings["iterationId"].astype("Int64")
 99 |     ratings["competitionId"] = ratings["competitionId"].astype("Int64")
100 |     ratings["squadId"] = ratings["squadId"].astype("Int64")
101 |     ratings["wyscoutId"] = ratings["wyscoutId"].astype("Int64")
102 |     ratings["heimSpielId"] = ratings["heimSpielId"].astype("Int64")
103 |     ratings["skillCornerId"] = ratings["skillCornerId"].astype("Int64")
104 | 
105 |     # define desired column order
106 |     order = [
107 |         "iterationId",
108 |         "competitionId",
109 |         "competitionName",
110 |         "competitionType",
111 |         "season",
112 |         "competitionGender",
113 |         "date",
114 |         "squadId",
115 |         "wyscoutId",
116 |         "heimSpielId",
117 |         "skillCornerId",
118 |         "squadName",
119 |         "value"
120 |     ]
121 | 
122 |     # reorder data
123 |     ratings = ratings[order]
124 | 
125 |     # reorder rows
126 |     ratings = ratings.sort_values(["date", "squadId"])
127 | 
128 |     # return events
129 |     return ratings


--------------------------------------------------------------------------------
/impectPy/squad_scores.py:
--------------------------------------------------------------------------------
  1 | # load packages
  2 | import pandas as pd
  3 | import requests
  4 | from impectPy.helpers import RateLimitedAPI, unnest_mappings_df, ForbiddenError
  5 | from .matches import getMatchesFromHost
  6 | from .iterations import getIterationsFromHost
  7 | 
  8 | ######
  9 | #
 10 | # This function returns a pandas dataframe that contains all scores for a
 11 | # given match aggregated per squad
 12 | #
 13 | ######
 14 | 
 15 | 
 16 | def getSquadMatchScores(matches: list, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
 17 | 
 18 |     # create an instance of RateLimitedAPI
 19 |     connection = RateLimitedAPI(session)
 20 | 
 21 |     # construct header with access token
 22 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
 23 | 
 24 |     return getSquadMatchScoresFromHost(matches, connection, "https://api.impect.com")
 25 | 
 26 | def getSquadMatchScoresFromHost(matches: list, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
 27 | 
 28 |     # check input for matches argument
 29 |     if not isinstance(matches, list):
 30 |         raise Exception("Argument 'matches' must be a list of integers.")
 31 | 
 32 |     # get match info
 33 |     match_data = pd.concat(
 34 |         map(lambda match: connection.make_api_request_limited(
 35 |             url=f"{host}/v5/customerapi/matches/{match}",
 36 |             method="GET"
 37 |         ).process_response(
 38 |             endpoint="Match Info"
 39 |         ),
 40 |             matches),
 41 |         ignore_index=True)
 42 | 
 43 |     # filter for matches that are unavailable
 44 |     fail_matches = match_data[match_data.lastCalculationDate.isnull()].id.drop_duplicates().to_list()
 45 | 
 46 |     # drop matches that are unavailable from list of matches
 47 |     matches = [match for match in matches if match not in fail_matches]
 48 | 
 49 |     # raise warnings
 50 |     if len(fail_matches) > 0:
 51 |         if len(matches) == 0:
 52 |             raise Exception("All supplied matches are unavailable. Execution stopped.")
 53 |         else:
 54 |             print(f"The following matches are not available yet and were ignored:\n{fail_matches}")
 55 | 
 56 |     # extract iterationIds
 57 |     iterations = list(match_data[match_data.lastCalculationDate.notnull()].iterationId.unique())
 58 | 
 59 |     # get squad scores
 60 |     scores_raw = pd.concat(
 61 |         map(lambda match: connection.make_api_request_limited(
 62 |             url=f"{host}/v5/customerapi/matches/{match}/squad-scores",
 63 |             method="GET"
 64 |         ).process_response(
 65 |             endpoint="SquadMatchScores"
 66 |         ).assign(
 67 |             matchId=match
 68 |         ),
 69 |             matches),
 70 |         ignore_index=True)
 71 | 
 72 |     # get squads
 73 |     squads = pd.concat(
 74 |         map(lambda iteration: connection.make_api_request_limited(
 75 |             url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
 76 |             method="GET"
 77 |         ).process_response(
 78 |             endpoint="Squads"
 79 |         ),
 80 |             iterations),
 81 |         ignore_index=True)[["id", "name", "idMappings"]]
 82 | 
 83 |     # get coaches
 84 |     coaches_blacklisted = False
 85 |     try:
 86 |         coaches = pd.concat(
 87 |             map(lambda iteration: connection.make_api_request_limited(
 88 |                 url=f"{host}/v5/customerapi/iterations/{iteration}/coaches",
 89 |                 method="GET"
 90 |             ).process_response(
 91 |                 endpoint="Coaches",
 92 |                 raise_exception=False
 93 |             ),
 94 |                 iterations),
 95 |             ignore_index=True)[["id", "name"]].drop_duplicates()
 96 |     except KeyError:
 97 |         # no coaches found, create empty df
 98 |         coaches = pd.DataFrame(columns=["id", "name"])
 99 |     except ForbiddenError:
100 |         coaches_blacklisted = True
101 | 
102 |     # unnest mappings
103 |     squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
104 | 
105 |     # get squad scores
106 |     scores = connection.make_api_request_limited(
107 |         url=f"{host}/v5/customerapi/squad-scores",
108 |         method="GET"
109 |     ).process_response(
110 |         endpoint="PlayerScores"
111 |     )[["id", "name"]]
112 | 
113 |     # get matches
114 |     matchplan = pd.concat(
115 |         map(lambda iteration: getMatchesFromHost(
116 |             iteration=iteration,
117 |             connection=connection,
118 |             host=host
119 |         ),
120 |             iterations),
121 |         ignore_index=True)
122 | 
123 |     # get iterations
124 |     iterations = getIterationsFromHost(connection=connection, host=host)
125 | 
126 |     # create empty df to store squad scores
127 |     squad_scores = pd.DataFrame()
128 | 
129 |     # manipulate squad scores
130 | 
131 |     # iterate over matches
132 |     for i in range(len(scores_raw)):
133 | 
134 |         # iterate over sides
135 |         for side in ["squadHomeSquadScores", "squadAwaySquadScores"]:
136 |             # get data for index
137 |             temp = scores_raw[side].loc[i]
138 | 
139 |             # convert to pandas df
140 |             temp = pd.DataFrame(temp).assign(
141 |                 matchId=scores_raw.matchId.loc[i],
142 |                 squadId=scores_raw[side.replace("SquadScores", "Id")].loc[i]
143 |             )
144 | 
145 |             # merge with squad scores to ensure all scores are present
146 |             temp = pd.merge(
147 |                 temp,
148 |                 scores,
149 |                 left_on="squadScoreId",
150 |                 right_on="id",
151 |                 how="outer",
152 |                 suffixes=("", "_right")
153 |             )
154 | 
155 |             # pivot data
156 |             temp = pd.pivot_table(
157 |                 temp,
158 |                 values="value",
159 |                 index=["matchId", "squadId"],
160 |                 columns="name",
161 |                 aggfunc="sum",
162 |                 fill_value=0,
163 |                 dropna=False
164 |             ).reset_index()
165 | 
166 |             # append to player_scores
167 |             squad_scores = pd.concat([squad_scores, temp])
168 | 
169 |     # merge with other data
170 |     squad_scores = squad_scores.merge(
171 |         matchplan[["id", "scheduledDate", "matchDayIndex", "matchDayName", "iterationId"]],
172 |         left_on="matchId",
173 |         right_on="id",
174 |         how="left",
175 |         suffixes=("", "_right")
176 |     ).merge(
177 |         pd.concat([
178 |             match_data[["id","squadHomeId", "squadHomeCoachId"]].rename(columns={"squadHomeId": "squadId", "squadHomeCoachId": "coachId"}),
179 |             match_data[["id","squadAwayId", "squadAwayCoachId"]].rename(columns={"squadAwayId": "squadId", "squadAwayCoachId": "coachId"})
180 |         ], ignore_index=True),
181 |         left_on=["matchId", "squadId"],
182 |         right_on=["id", "squadId"],
183 |         how="left",
184 |         suffixes=("", "_right")
185 |     ).merge(
186 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season"]],
187 |         left_on="iterationId",
188 |         right_on="id",
189 |         how="left",
190 |         suffixes=("", "_right")
191 |     ).merge(
192 |         squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename(
193 |             columns={"id": "squadId", "name": "squadName"}
194 |         ),
195 |         left_on="squadId",
196 |         right_on="squadId",
197 |         how="left",
198 |         suffixes=("", "_right")
199 |     )
200 | 
201 |     if not coaches_blacklisted:
202 |         squad_scores["coachId"] = squad_scores["coachId"].astype("Int64")
203 |         squad_scores = squad_scores.merge(
204 |             coaches[["id", "name"]].rename(
205 |                 columns={"id": "coachId", "name": "coachName"}
206 |             ),
207 |             left_on="coachId",
208 |             right_on="coachId",
209 |             how="left",
210 |             suffixes=("", "_right")
211 |         )
212 | 
213 |     # rename some columns
214 |     squad_scores = squad_scores.rename(columns={
215 |         "scheduledDate": "dateTime"
216 |     })
217 | 
218 |     # define column order
219 |     order = [
220 |         "matchId",
221 |         "dateTime",
222 |         "competitionName",
223 |         "competitionId",
224 |         "competitionType",
225 |         "iterationId",
226 |         "season",
227 |         "matchDayIndex",
228 |         "matchDayName",
229 |         "squadId",
230 |         "wyscoutId",
231 |         "heimSpielId",
232 |         "skillCornerId",
233 |         "squadName",
234 |         "coachId",
235 |         "coachName"
236 |     ]
237 | 
238 |     # check if coaches are blacklisted
239 |     if coaches_blacklisted:
240 |         order = [col for col in order if col not in ["coachId", "coachName"]]
241 | 
242 |     # add scoreNames to order
243 |     order += scores["name"].to_list()
244 | 
245 |     # select columns
246 |     squad_scores = squad_scores[order]
247 | 
248 |     # fix some column types
249 |     squad_scores["matchId"] = squad_scores["matchId"].astype("Int64")
250 |     squad_scores["competitionId"] = squad_scores["competitionId"].astype("Int64")
251 |     squad_scores["iterationId"] = squad_scores["iterationId"].astype("Int64")
252 |     squad_scores["matchDayIndex"] = squad_scores["matchDayIndex"].astype("Int64")
253 |     squad_scores["squadId"] = squad_scores["squadId"].astype("Int64")
254 |     squad_scores["wyscoutId"] = squad_scores["wyscoutId"].astype("Int64")
255 |     squad_scores["heimSpielId"] = squad_scores["heimSpielId"].astype("Int64")
256 |     squad_scores["skillCornerId"] = squad_scores["skillCornerId"].astype("Int64")
257 | 
258 |     # return data
259 |     return squad_scores
260 | 
261 | ######
262 | #
263 | # This function returns a pandas dataframe that contains all scores for a
264 | # given iteration aggregated per squad
265 | #
266 | ######
267 | 
268 | 
269 | def getSquadIterationScores(iteration: int, token: str, session: requests.Session = requests.Session()) -> pd.DataFrame:
270 | 
271 |     # create an instance of RateLimitedAPI
272 |     connection = RateLimitedAPI(session)
273 | 
274 |     # construct header with access token
275 |     connection.session.headers.update({"Authorization": f"Bearer {token}"})
276 | 
277 |     return getSquadIterationScoresFromHost(iteration, connection, "https://api.impect.com")
278 | 
279 | def getSquadIterationScoresFromHost(iteration: int, connection: RateLimitedAPI, host: str) -> pd.DataFrame:
280 | 
281 |     # check input for matches argument
282 |     if not isinstance(iteration, int):
283 |         raise Exception("Input for iteration argument must be an integer")
284 | 
285 |     # get squads
286 |     squads = connection.make_api_request_limited(
287 |         url=f"{host}/v5/customerapi/iterations/{iteration}/squads",
288 |         method="GET"
289 |     ).process_response(
290 |         endpoint="Squads"
291 |     )[["id", "name", "idMappings"]]
292 | 
293 |     # unnest mappings
294 |     squads = unnest_mappings_df(squads, "idMappings").drop(["idMappings"], axis=1).drop_duplicates()
295 | 
296 |     # get squad iteration averages
297 |     scores_raw = connection.make_api_request_limited(
298 |         url=f"{host}/v5/customerapi/iterations/{iteration}/squad-scores",
299 |         method="GET"
300 |     ).process_response(
301 |         endpoint="SquadIterationScores"
302 |     ).assign(iterationId=iteration)
303 | 
304 |     # get scores
305 |     scores_definitions = connection.make_api_request_limited(
306 |         url=f"{host}/v5/customerapi/squad-scores",
307 |         method="GET"
308 |     ).process_response(
309 |         endpoint="scoreDefinitions"
310 |     )[["id", "name"]]
311 | 
312 |     # get iterations
313 |     iterations = getIterationsFromHost(connection=connection, host=host)
314 | 
315 |     # get matches played
316 |     matches = scores_raw[["squadId", "matches"]].drop_duplicates()
317 | 
318 |     # unnest scores
319 |     scores = scores_raw.explode("squadScores").reset_index(drop=True)
320 | 
321 |     # unnest dictionary in kpis column
322 |     scores = pd.concat(
323 |         [scores.drop(["squadScores"], axis=1), pd.json_normalize(scores["squadScores"])],
324 |         axis=1
325 |     )
326 | 
327 |     # merge with kpis to ensure all kpis are present
328 |     scores = scores.merge(
329 |         scores_definitions,
330 |         left_on="squadScoreId",
331 |         right_on="id",
332 |         how="outer",
333 |         suffixes=("", "_right")
334 |     )
335 | 
336 |     # pivot kpi values
337 |     scores = pd.pivot_table(
338 |         scores,
339 |         values="value",
340 |         index=["iterationId", "squadId"],
341 |         columns="name",
342 |         aggfunc="sum",
343 |         fill_value=0,
344 |         dropna=False
345 |     ).reset_index()
346 | 
347 |     # inner join with matches played
348 |     scores = pd.merge(
349 |         scores,
350 |         matches,
351 |         left_on="squadId",
352 |         right_on="squadId",
353 |         how="inner",
354 |         suffixes=("", "_right")
355 |     )
356 | 
357 |     # merge with other data
358 |     scores = scores.merge(
359 |         iterations[["id", "competitionId", "competitionName", "competitionType", "season"]],
360 |         left_on="iterationId",
361 |         right_on="id",
362 |         how="left",
363 |         suffixes=("", "_right")
364 |     ).merge(
365 |         squads[["id", "wyscoutId", "heimSpielId", "skillCornerId", "name"]].rename(
366 |             columns={"id": "squadId", "name": "squadName"}
367 |         ),
368 |         left_on="squadId",
369 |         right_on="squadId",
370 |         how="left",
371 |         suffixes=("", "_right")
372 |     )
373 | 
374 |     # remove NA rows
375 |     averages = scores[scores.iterationId.notnull()]
376 | 
377 |     # fix column types
378 |     averages["matches"] = averages["matches"].astype("Int64")
379 |     averages["iterationId"] = averages["iterationId"].astype("Int64")
380 |     averages["squadId"] = averages["squadId"].astype("Int64")
381 |     averages["wyscoutId"] = averages["wyscoutId"].astype("Int64")
382 |     averages["heimSpielId"] = averages["heimSpielId"].astype("Int64")
383 |     averages["skillCornerId"] = averages["skillCornerId"].astype("Int64")
384 | 
385 |     # define column order
386 |     order = [
387 |         "iterationId",
388 |         "competitionName",
389 |         "season",
390 |         "squadId",
391 |         "wyscoutId",
392 |         "heimSpielId",
393 |         "skillCornerId",
394 |         "squadName",
395 |         "matches"
396 |     ]
397 | 
398 |     # add scoreNames to order
399 |     order = order + scores_definitions.name.to_list()
400 | 
401 |     # select columns
402 |     averages = averages[order]
403 | 
404 |     # return result
405 |     return averages


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from setuptools import setup
 3 | 
 4 | with open(os.path.join(os.path.abspath(os.path.dirname(__file__)), "README.md")) as f:
 5 |     README = f.read()
 6 | 
 7 | setup(
 8 |     # Needed to silence warnings (and to be a worthwhile package)
 9 |     name="impectPy",
10 |     url="https://github.com/ImpectAPI/impectPy",
11 |     author="Impect",
12 |     author_email="info@impect.com",
13 |     # Needed to actually package something
14 |     packages=["impectPy"],
15 |     # Needed for dependencies
16 |     install_requires=["requests>=2.24.0",
17 |                       "pandas>=2.0.0",
18 |                       "numpy>=1.24.2,<2.0"],
19 |     # *strongly* suggested for sharing
20 |     version="2.5.2",
21 |     # The license can be anything you like
22 |     license="MIT",
23 |     description="A Python package to facilitate interaction with the Impect customer API",
24 |     long_description=README,
25 |     long_description_content_type="text/markdown",
26 | )


--------------------------------------------------------------------------------