├── .dbt ├── .gitignore └── profiles.yml ├── .devcontainer.json ├── .env.sample ├── .github ├── .gitattributes └── workflows │ └── main.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── Makefile ├── README.md ├── dbt ├── README.md ├── analysis │ └── .gitkeep ├── data │ └── .gitkeep ├── dbt_project.yml ├── macros │ ├── .gitkeep │ ├── test_between_0_and_1.sql │ └── test_not_negative.sql ├── models │ └── staging │ │ ├── league.sql │ │ ├── match.sql │ │ ├── player.sql │ │ ├── schema.yml │ │ ├── season.sql │ │ ├── shot.sql │ │ └── team.sql ├── snapshots │ └── .gitkeep └── tests │ └── .gitkeep ├── docker-compose.yml ├── docs ├── Gemfile ├── Gemfile.lock ├── _config.yml ├── _data │ ├── sidebars │ │ └── home_sidebar.yml │ └── topnav.yml ├── _site │ ├── Gemfile │ ├── Gemfile.lock │ ├── app.html │ ├── assets │ │ ├── css │ │ │ ├── bootstrap.min.css │ │ │ ├── boxshadowproperties.css │ │ │ ├── customstyles.css │ │ │ ├── font-awesome.min.css │ │ │ ├── fonts │ │ │ │ ├── FontAwesome.otf │ │ │ │ ├── fontawesome-webfont.eot │ │ │ │ ├── fontawesome-webfont.svg │ │ │ │ ├── fontawesome-webfont.ttf │ │ │ │ ├── fontawesome-webfont.woff │ │ │ │ └── fontawesome-webfont.woff2 │ │ │ ├── modern-business.css │ │ │ ├── printstyles.css │ │ │ ├── syntax.css │ │ │ ├── theme-blue.css │ │ │ └── theme-green.css │ │ ├── fonts │ │ │ ├── FontAwesome.otf │ │ │ ├── fontawesome-webfont.eot │ │ │ ├── fontawesome-webfont.svg │ │ │ ├── fontawesome-webfont.ttf │ │ │ ├── fontawesome-webfont.woff │ │ │ ├── glyphicons-halflings-regular.eot │ │ │ ├── glyphicons-halflings-regular.svg │ │ │ ├── glyphicons-halflings-regular.ttf │ │ │ ├── glyphicons-halflings-regular.woff │ │ │ └── glyphicons-halflings-regular.woff2 │ │ ├── images │ │ │ ├── colab.svg │ │ │ ├── company_logo.png │ │ │ ├── company_logo_big.png │ │ │ ├── favicon.ico │ │ │ └── workflowarrow.png │ │ └── js │ │ │ ├── customscripts.js │ │ │ ├── jekyll-search.js │ │ │ ├── jquery.ba-throttle-debounce.min.js │ │ │ ├── jquery.navgoco.min.js │ │ │ ├── jquery.shuffle.min.js │ │ │ └── toc.js │ ├── cli.html │ ├── db.html │ ├── feed.xml │ ├── index.html │ ├── sidebar.json │ ├── sitemap.xml │ └── understat.html ├── app.html ├── cli.html ├── db.html ├── feed.xml ├── index.html ├── sidebar.json ├── sitemap.xml └── understat.html ├── nbs ├── .gitattributes ├── cli.ipynb ├── db.ipynb ├── index.ipynb └── understat.ipynb ├── settings.ini ├── setup.py └── understatdb ├── __init__.py ├── _nbdev.py ├── cli.py ├── db.py └── understat.py /.dbt/.gitignore: -------------------------------------------------------------------------------- 1 | .user.yml 2 | -------------------------------------------------------------------------------- /.dbt/profiles.yml: -------------------------------------------------------------------------------- 1 | default: 2 | target: default 3 | outputs: 4 | default: 5 | type: "postgres" 6 | host: "{{ env_var('DB_HOST') }}" 7 | user: "{{ env_var('DB_USER') }}" 8 | pass: "{{ env_var('DB_PASS') }}" 9 | port: "{{ env_var('DB_PORT') | as_number }}" 10 | dbname: "{{ env_var('DB_NAME') }}" 11 | schema: public 12 | threads: 4 13 | -------------------------------------------------------------------------------- /.devcontainer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "nbdev_template-codespaces", 3 | "dockerComposeFile": "docker-compose.yml", 4 | "service": "watcher", 5 | "settings": {"terminal.integrated.shell.linux": "/bin/bash"}, 6 | "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], 7 | "forwardPorts": [4000, 8080], 8 | "appPort": [4000, 8080], 9 | "extensions": ["ms-python.python", 10 | "ms-azuretools.vscode-docker"], 11 | "runServices": ["notebook", "jekyll", "watcher", "db"], 12 | "postStartCommand": "pip install -e ." 13 | } 14 | -------------------------------------------------------------------------------- /.env.sample: -------------------------------------------------------------------------------- 1 | # Default database config (see docker-compose.yml) 2 | DB_HOST=localhost 3 | DB_USER=postgres 4 | DB_PASS=postgres 5 | DB_NAME=understat 6 | DB_PORT=7865 7 | -------------------------------------------------------------------------------- /.github/.gitattributes: -------------------------------------------------------------------------------- 1 | ../**/*.ipynb linguist-language=python 2 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: [push, pull_request] 3 | jobs: 4 | build: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - uses: actions/checkout@v1 8 | - uses: actions/setup-python@v1 9 | with: 10 | python-version: '3.6' 11 | architecture: 'x64' 12 | - name: Install the library 13 | run: | 14 | pip install nbdev jupyter 15 | pip install -e . 16 | - name: Read all notebooks 17 | run: | 18 | nbdev_read_nbs 19 | - name: Check if all notebooks are cleaned 20 | run: | 21 | echo "Check we are starting with clean git checkout" 22 | if [ -n "$(git status -uno -s)" ]; then echo "git status is not clean"; false; fi 23 | echo "Trying to strip out notebooks" 24 | nbdev_clean_nbs 25 | echo "Check that strip out was unnecessary" 26 | git status -s # display the status to see which nbs need cleaning up 27 | if [ -n "$(git status -uno -s)" ]; then echo -e "!!! Detected unstripped out notebooks\n!!!Remember to run nbdev_install_git_hooks"; false; fi 28 | - name: Check if there is no diff library/notebooks 29 | run: | 30 | if [ -n "$(nbdev_diff_nbs)" ]; then echo -e "!!! Detected difference between the notebooks and the library"; false; fi 31 | - name: Run tests 32 | run: | 33 | nbdev_test_nbs 34 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .pg_data 2 | 3 | # Added by nbdev 4 | *.bak 5 | .gitattributes 6 | .last_checked 7 | .gitconfig 8 | *.bak 9 | *.log 10 | *~ 11 | ~* 12 | _tmp* 13 | tmp* 14 | tags 15 | 16 | # Byte-compiled / optimized / DLL files 17 | __pycache__/ 18 | *.py[cod] 19 | *$py.class 20 | 21 | # C extensions 22 | *.so 23 | 24 | # Distribution / packaging 25 | .Python 26 | env/ 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | *.egg-info/ 40 | .installed.cfg 41 | *.egg 42 | 43 | # PyInstaller 44 | # Usually these files are written by a python script from a template 45 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 46 | *.manifest 47 | *.spec 48 | 49 | # Installer logs 50 | pip-log.txt 51 | pip-delete-this-directory.txt 52 | 53 | # Unit test / coverage reports 54 | htmlcov/ 55 | .tox/ 56 | .coverage 57 | .coverage.* 58 | .cache 59 | nosetests.xml 60 | coverage.xml 61 | *.cover 62 | .hypothesis/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | 72 | # Flask stuff: 73 | instance/ 74 | .webassets-cache 75 | 76 | # Scrapy stuff: 77 | .scrapy 78 | 79 | # Sphinx documentation 80 | docs/_build/ 81 | 82 | # PyBuilder 83 | target/ 84 | 85 | # Jupyter Notebook 86 | .ipynb_checkpoints 87 | 88 | # pyenv 89 | .python-version 90 | 91 | # celery beat schedule file 92 | celerybeat-schedule 93 | 94 | # SageMath parsed files 95 | *.sage.py 96 | 97 | # dotenv 98 | .env 99 | 100 | # virtualenv 101 | .venv 102 | venv/ 103 | ENV/ 104 | 105 | # Spyder project settings 106 | .spyderproject 107 | .spyproject 108 | 109 | # Rope project settings 110 | .ropeproject 111 | 112 | # mkdocs documentation 113 | /site 114 | 115 | # mypy 116 | .mypy_cache/ 117 | 118 | .vscode 119 | *.swp 120 | 121 | # osx generated files 122 | .DS_Store 123 | .DS_Store? 124 | .Trashes 125 | ehthumbs.db 126 | Thumbs.db 127 | .idea 128 | 129 | # pytest 130 | .pytest_cache 131 | 132 | # tools/trust-doc-nbs 133 | docs_src/.last_checked 134 | 135 | # symlinks to fastai 136 | docs_src/fastai 137 | tools/fastai 138 | 139 | # link checker 140 | checklink/cookies.txt 141 | 142 | # .gitconfig is now autogenerated 143 | .gitconfig 144 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to contribute 2 | 3 | ## How to get started 4 | 5 | Before anything else, please install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it: 6 | ``` 7 | nbdev_install_git_hooks 8 | ``` 9 | 10 | ## Did you find a bug? 11 | 12 | * Ensure the bug was not already reported by searching on GitHub under Issues. 13 | * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring. 14 | * Be sure to add the complete error messages. 15 | 16 | #### Did you write a patch that fixes a bug? 17 | 18 | * Open a new GitHub pull request with the patch. 19 | * Ensure that your PR includes a test that fails without your patch, and pass with it. 20 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable. 21 | 22 | ## PR submission guidelines 23 | 24 | * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused. 25 | * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected. 26 | * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can. 27 | * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project. 28 | * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another. 29 | 30 | ## Do you want to contribute to the documentation? 31 | 32 | * Docs are automatically created from the notebooks in the nbs folder. 33 | 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2021 Ben Torvaney 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in the 5 | Software without restriction, including without limitation the rights to use, copy, 6 | modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, 7 | and to permit persons to whom the Software is furnished to do so, subject to the 8 | following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 15 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 17 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include settings.ini 2 | include LICENSE 3 | include CONTRIBUTING.md 4 | include README.md 5 | recursive-exclude * __pycache__ 6 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .ONESHELL: 2 | SHELL := /bin/bash 3 | SRC = $(wildcard nbs/*.ipynb) 4 | PYTHON_VENV ?= venv 5 | 6 | # nbdev commands 7 | 8 | all: understatdb docs 9 | 10 | understatdb: $(SRC) 11 | nbdev_build_lib 12 | touch understatdb 13 | 14 | sync: 15 | nbdev_update_lib 16 | 17 | docs_serve: docs 18 | cd docs && bundle exec jekyll serve 19 | 20 | docs: $(SRC) 21 | nbdev_build_docs 22 | touch docs 23 | 24 | test: 25 | nbdev_test_nbs 26 | 27 | release: pypi conda_release 28 | nbdev_bump_version 29 | 30 | conda_release: 31 | fastrelease_conda_package 32 | 33 | pypi: dist 34 | twine upload --repository pypi dist/* 35 | 36 | dist: clean 37 | python setup.py sdist bdist_wheel 38 | 39 | clean: 40 | rm -rf dist 41 | 42 | env: 43 | python -m venv $(PYTHON_VENV) 44 | $(PYTHON_VENV)/bin/pip install --upgrade pip 45 | $(PYTHON_VENV)/bin/pip install -e .[dev] 46 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Understat DB 2 | > Create a database using data from Understat. 3 | 4 | 5 | Understat DB is a project to scrape data from [Understat](understat.com) and store it in a Postgres database. It aims to be a useful companion or starting point for projects using football data. 6 | 7 | ## Usage 8 | 9 | The simplest way to get started is to populate a local database with `docker-compose`. 10 | 11 | First, clone the repository: 12 | 13 | ```bash 14 | git clone https://github.com/Torvaney/understat-db.git 15 | cd understat-db 16 | ``` 17 | 18 | Then, setup the local environment 19 | 20 | ```bash 21 | make env # Create a virtualenv and installs the project & dependencies 22 | source venv/bin/activate # Activate the virtualenv 23 | cp .env.sample .env # Copy default environment vars to .env 24 | ``` 25 | 26 | Run the database 27 | 28 | ```bash 29 | docker-compose up -d db # Start a postgres database within a docker container 30 | understat-db migrate # Create base database tables 31 | ``` 32 | 33 | Finally, import the data you want 34 | 35 | ```bash 36 | understat-db ingest --leagues EPL --seasons 2020 37 | ``` 38 | 39 | ## Requirements 40 | 41 | To run this project you will need: 42 | 43 | * Python 3.6+ 44 | * Docker 45 | 46 | 47 | ## Contributing 48 | 49 | Pull requests are encouraged! For major changes, please open an issue first to discuss what you would like to change. 50 | 51 | ## License 52 | 53 | [MIT](https://choosealicense.com/licenses/mit/) 54 | -------------------------------------------------------------------------------- /dbt/README.md: -------------------------------------------------------------------------------- 1 | Welcome to your new dbt project! 2 | 3 | ### Using the starter project 4 | 5 | Try running the following commands: 6 | - dbt run 7 | - dbt test 8 | 9 | 10 | ### Resources: 11 | - Learn more about dbt [in the docs](https://docs.getdbt.com/docs/introduction) 12 | - Check out [Discourse](https://discourse.getdbt.com/) for commonly asked questions and answers 13 | - Join the [chat](http://slack.getdbt.com/) on Slack for live discussions and support 14 | - Find [dbt events](https://events.getdbt.com) near you 15 | - Check out [the blog](https://blog.getdbt.com/) for the latest news on dbt's development and best practices 16 | -------------------------------------------------------------------------------- /dbt/analysis/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/dbt/analysis/.gitkeep -------------------------------------------------------------------------------- /dbt/data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/dbt/data/.gitkeep -------------------------------------------------------------------------------- /dbt/dbt_project.yml: -------------------------------------------------------------------------------- 1 | name: 'understat_dbt' 2 | version: '1.0.0' 3 | config-version: 2 4 | 5 | profile: 'default' 6 | 7 | source-paths: ["models"] 8 | analysis-paths: ["analysis"] 9 | test-paths: ["tests"] 10 | data-paths: ["data"] 11 | macro-paths: ["macros"] 12 | snapshot-paths: ["snapshots"] 13 | 14 | target-path: "target" 15 | clean-targets: 16 | - "target" 17 | - "dbt_modules" 18 | -------------------------------------------------------------------------------- /dbt/macros/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/dbt/macros/.gitkeep -------------------------------------------------------------------------------- /dbt/macros/test_between_0_and_1.sql: -------------------------------------------------------------------------------- 1 | {% macro test_between_0_and_1(model, column_name) %} 2 | 3 | with validation as ( 4 | 5 | select 6 | {{ column_name }} as percent_field 7 | from {{ model }} 8 | 9 | ), 10 | 11 | validation_errors as ( 12 | 13 | select 14 | percent_field 15 | from validation 16 | where (percent_field < 0 or percent_field > 1) 17 | -- allow null values since this test can be combined with 18 | -- a not_null test 19 | and (percent_field is not null) 20 | 21 | ) 22 | 23 | select count(*) 24 | from validation_errors 25 | 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /dbt/macros/test_not_negative.sql: -------------------------------------------------------------------------------- 1 | {% macro test_not_negative(model, column_name) %} 2 | 3 | with validation as ( 4 | 5 | select 6 | {{ column_name }} as not_negative_field 7 | from {{ model }} 8 | 9 | ), 10 | 11 | validation_errors as ( 12 | 13 | select 14 | not_negative_field 15 | from validation 16 | where (not_negative_field < 0) 17 | -- allow null values since this test can be combined with 18 | -- a not_null test 19 | and (not_negative_field is not null) 20 | 21 | ) 22 | 23 | select count(*) 24 | from validation_errors 25 | 26 | {% endmacro %} 27 | -------------------------------------------------------------------------------- /dbt/models/staging/league.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config(materialized='table') 3 | }} 4 | 5 | select * from "base_league" 6 | -------------------------------------------------------------------------------- /dbt/models/staging/match.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config(materialized='table') 3 | }} 4 | 5 | with 6 | 7 | unnested as ( 8 | 9 | select 10 | league_id, 11 | season_id, 12 | json_array_elements(json) as json 13 | from base_matches 14 | 15 | ), 16 | 17 | match as ( 18 | 19 | select 20 | league_id, 21 | season_id, 22 | (json ->> 'id')::int as id, 23 | (json ->> 'datetime')::timestamp as kickoff, 24 | (json ->> 'isResult')::bool as is_result, 25 | (json -> 'h' ->> 'id')::int as home_team_id, 26 | (json -> 'a' ->> 'id')::int as away_team_id, 27 | (json -> 'goals' ->> 'h')::int as home_goals, 28 | (json -> 'goals' ->> 'a')::int as away_goals, 29 | (json -> 'xG' ->> 'h')::float as home_xg, 30 | (json -> 'xG' ->> 'a')::float as away_xg, 31 | (json -> 'forecast' ->> 'w')::float as forecast_h, 32 | (json -> 'forecast' ->> 'd')::float as forecast_d, 33 | (json -> 'forecast' ->> 'l')::float as forecast_a 34 | from unnested 35 | 36 | ) 37 | 38 | select * from match 39 | -------------------------------------------------------------------------------- /dbt/models/staging/player.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config(materialized='table') 3 | }} 4 | 5 | with 6 | 7 | 8 | shots_json as ( 9 | 10 | -- Home shots 11 | select 12 | match_id, 13 | json_array_elements(json -> 'h') as json 14 | from base_shots 15 | 16 | union all 17 | 18 | -- Away shots 19 | select 20 | match_id, 21 | json_array_elements(json -> 'a') as json 22 | from base_shots 23 | 24 | ), 25 | 26 | 27 | base_players as ( 28 | 29 | select 30 | (json ->> 'player_id')::int as id, 31 | (json ->> 'player') as name 32 | from shots_json 33 | 34 | ), 35 | 36 | 37 | players as ( 38 | 39 | select distinct on (id) 40 | id, 41 | name 42 | from base_players 43 | 44 | ) 45 | 46 | select * from players order by id 47 | -------------------------------------------------------------------------------- /dbt/models/staging/schema.yml: -------------------------------------------------------------------------------- 1 | 2 | version: 2 3 | 4 | models: 5 | 6 | - name: match 7 | description: "Understat match data" 8 | columns: 9 | - name: id 10 | description: "The Match ID provided by Understat (primary key)" 11 | tests: 12 | - unique 13 | - not_null 14 | - name: kickoff 15 | tests: 16 | - not_null 17 | - name: is_result 18 | description: "Whether or not the match has been completed, with match data available" 19 | tests: 20 | - not_null 21 | - name: home_team_id 22 | tests: 23 | - not_null 24 | - relationships: 25 | to: ref('team') 26 | field: id 27 | - name: away_team_id 28 | tests: 29 | - not_null 30 | - relationships: 31 | to: ref('team') 32 | field: id 33 | - name: home_goals 34 | tests: 35 | - not_negative 36 | - name: away_goals 37 | tests: 38 | - not_negative 39 | - name: home_xg 40 | tests: 41 | - not_negative 42 | - name: away_xg 43 | tests: 44 | - not_negative 45 | - name: forecast_h 46 | tests: 47 | - between_0_and_1 48 | - name: forecast_d 49 | tests: 50 | - between_0_and_1 51 | - name: forecast_a 52 | tests: 53 | - between_0_and_1 54 | 55 | - name: league 56 | description: "Understat league data" 57 | columns: 58 | - name: id 59 | description: "The League ID (primary key)" 60 | tests: 61 | - unique 62 | - not_null 63 | - name: name 64 | 65 | - name: season 66 | description: "Understat season data" 67 | columns: 68 | - name: id 69 | description: "The Season ID (primary key)" 70 | tests: 71 | - unique 72 | - not_null 73 | - name: name 74 | 75 | - name: player 76 | description: "Understat season data" 77 | columns: 78 | - name: id 79 | description: "The Player ID provided by Understat (primary key)" 80 | tests: 81 | - unique 82 | - not_null 83 | - name: name 84 | 85 | - name: team 86 | description: "Understat season data" 87 | columns: 88 | - name: id 89 | description: "The Player ID provided by Understat (primary key)" 90 | tests: 91 | - unique 92 | - not_null 93 | - name: title 94 | description: "The team name" 95 | tests: 96 | - not_null 97 | - name: short_title 98 | description: "A short version of the team name" 99 | tests: 100 | - not_null 101 | 102 | - name: shot 103 | description: "Understat season data" 104 | columns: 105 | - name: id 106 | description: "The Shot ID provided by Understat (primary key)" 107 | tests: 108 | - unique 109 | - not_null 110 | - name: match_id 111 | tests: 112 | - not_null 113 | - relationships: 114 | to: ref('match') 115 | field: id 116 | - name: minute 117 | - name: x 118 | description: "x-coordinate of the shot (along the touchline, 1 = opponent's goal)" 119 | tests: 120 | - between_0_and_1 121 | - name: y 122 | description: "y-coordinate of the shot (along the goal line)" 123 | tests: 124 | - between_0_and_1 125 | - name: xg 126 | description: "Expected Goals (xG) per Understat's model" 127 | tests: 128 | - between_0_and_1 129 | - name: body_part 130 | description: "Body part with which the shot was taken" 131 | - name: result 132 | description: "Shot outcome" 133 | - name: situation 134 | description: "The type of play in which the shot was taken" 135 | - name: previous_action 136 | description: "The type of action preceding the shot" 137 | - name: player_id 138 | tests: 139 | - not_null 140 | - relationships: 141 | to: ref('player') 142 | field: id 143 | - name: is_home 144 | - name: team_id 145 | tests: 146 | - not_null 147 | - relationships: 148 | to: ref('team') 149 | field: id 150 | - name: opponent_id 151 | tests: 152 | - not_null 153 | - relationships: 154 | to: ref('team') 155 | field: id 156 | -------------------------------------------------------------------------------- /dbt/models/staging/season.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config(materialized='table') 3 | }} 4 | 5 | select * from "base_season" 6 | -------------------------------------------------------------------------------- /dbt/models/staging/shot.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config(materialized='table') 3 | }} 4 | 5 | with 6 | 7 | 8 | matches as ( 9 | 10 | select * from {{ ref('match') }} 11 | 12 | ), 13 | 14 | 15 | shots_json as ( 16 | 17 | -- Home shots 18 | select 19 | match_id, 20 | json_array_elements(json -> 'h') as json 21 | from base_shots 22 | 23 | union all 24 | 25 | -- Away shots 26 | select 27 | match_id, 28 | json_array_elements(json -> 'a') as json 29 | from base_shots 30 | 31 | ), 32 | 33 | 34 | base_shots as ( 35 | 36 | select 37 | (json ->> 'id')::int as id, 38 | match_id as match_id, 39 | (json ->> 'minute')::int as minute, 40 | (json ->> 'X')::float as x, 41 | (json ->> 'Y')::float as y, 42 | (json ->> 'xG')::float as xg, 43 | (json ->> 'shotType') as body_part, 44 | (json ->> 'result') as result, 45 | (json ->> 'situation') as situation, 46 | (json ->> 'lastAction') as previous_action, 47 | (json ->> 'player_id')::int as player_id, 48 | (json ->> 'h_a') = 'h' as is_home 49 | from shots_json 50 | 51 | ), 52 | 53 | 54 | -- Join back to the matches table to get the shooting team ID 55 | shots as ( 56 | 57 | select 58 | base_shots.*, 59 | case 60 | when is_home then home_team_id 61 | when not is_home then away_team_id 62 | end as team_id, 63 | case 64 | when is_home then away_team_id 65 | when not is_home then home_team_id 66 | end as opponent_id 67 | from base_shots 68 | left join matches 69 | on matches.id = base_shots.match_id 70 | 71 | ) 72 | 73 | select * from shots order by id 74 | -------------------------------------------------------------------------------- /dbt/models/staging/team.sql: -------------------------------------------------------------------------------- 1 | {{ 2 | config(materialized='table') 3 | }} 4 | 5 | with 6 | 7 | unnested as ( 8 | 9 | select 10 | league_id, 11 | season_id, 12 | json_array_elements(json) as json 13 | from base_matches 14 | 15 | ), 16 | 17 | 18 | home_teams as ( 19 | 20 | select 21 | (json -> 'h' ->> 'id')::int as id, 22 | (json -> 'h' ->> 'title') as title, 23 | (json -> 'h' ->> 'short_title') as short_title 24 | from unnested 25 | 26 | ), 27 | 28 | 29 | away_teams as ( 30 | 31 | select 32 | (json -> 'a' ->> 'id')::int as id, 33 | (json -> 'a' ->> 'title') as title, 34 | (json -> 'a' ->> 'short_title') as short_title 35 | from unnested 36 | 37 | ), 38 | 39 | 40 | home_and_away_teams as ( 41 | 42 | select * from home_teams 43 | union all 44 | select * from away_teams 45 | 46 | ), 47 | 48 | 49 | teams as ( 50 | 51 | select distinct on (id) 52 | * 53 | from home_and_away_teams 54 | order by id 55 | 56 | ) 57 | 58 | 59 | select * from teams 60 | -------------------------------------------------------------------------------- /dbt/snapshots/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/dbt/snapshots/.gitkeep -------------------------------------------------------------------------------- /dbt/tests/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/dbt/tests/.gitkeep -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | 4 | fastai: &fastai 5 | restart: unless-stopped 6 | working_dir: /data 7 | image: fastai/codespaces 8 | logging: 9 | driver: json-file 10 | options: 11 | max-size: 50m 12 | stdin_open: true 13 | tty: true 14 | volumes: 15 | - .:/data/ 16 | 17 | notebook: 18 | <<: *fastai 19 | command: bash -c "pip install -e . && jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port=8080 --NotebookApp.token='' --NotebookApp.password='' --notebook-dir nbs" 20 | ports: 21 | - "8080:8080" 22 | 23 | watcher: 24 | <<: *fastai 25 | command: watchmedo shell-command --command nbdev_build_docs --pattern *.ipynb --recursive --drop 26 | network_mode: host # for GitHub Codespaces https://github.com/features/codespaces/ 27 | 28 | jekyll: 29 | <<: *fastai 30 | ports: 31 | - "4000:4000" 32 | command: > 33 | bash -c "pip install -e . 34 | && nbdev_build_docs && cd docs 35 | && bundle i 36 | && chmod -R u+rwx . && bundle exec jekyll serve --host 0.0.0.0" 37 | 38 | db: 39 | image: postgres:12.5 # Make sure we use v12+ for CTE inlining 40 | environment: 41 | POSTGRES_USER: postgres 42 | POSTGRES_PASSWORD: postgres 43 | POSTGRES_DB: understat 44 | volumes: 45 | - ./.pg_data:/var/lib/postgresql/data 46 | ports: 47 | - 7865:5432 48 | -------------------------------------------------------------------------------- /docs/Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem 'github-pages', group: :jekyll_plugins 4 | 5 | # Added at 2019-11-25 10:11:40 -0800 by jhoward: 6 | gem "nokogiri", "< 1.11.1" 7 | gem "jekyll", ">= 3.7" 8 | gem "kramdown", ">= 2.3.1" 9 | gem "jekyll-remote-theme" 10 | 11 | gem "webrick", "~> 1.7" 12 | -------------------------------------------------------------------------------- /docs/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (6.0.3.6) 5 | concurrent-ruby (~> 1.0, >= 1.0.2) 6 | i18n (>= 0.7, < 2) 7 | minitest (~> 5.1) 8 | tzinfo (~> 1.1) 9 | zeitwerk (~> 2.2, >= 2.2.2) 10 | addressable (2.7.0) 11 | public_suffix (>= 2.0.2, < 5.0) 12 | coffee-script (2.4.1) 13 | coffee-script-source 14 | execjs 15 | coffee-script-source (1.11.1) 16 | colorator (1.1.0) 17 | commonmarker (0.17.13) 18 | ruby-enum (~> 0.5) 19 | concurrent-ruby (1.1.8) 20 | dnsruby (1.61.5) 21 | simpleidn (~> 0.1) 22 | em-websocket (0.5.2) 23 | eventmachine (>= 0.12.9) 24 | http_parser.rb (~> 0.6.0) 25 | ethon (0.12.0) 26 | ffi (>= 1.3.0) 27 | eventmachine (1.2.7) 28 | execjs (2.7.0) 29 | faraday (1.3.0) 30 | faraday-net_http (~> 1.0) 31 | multipart-post (>= 1.2, < 3) 32 | ruby2_keywords 33 | faraday-net_http (1.0.1) 34 | ffi (1.15.0) 35 | forwardable-extended (2.6.0) 36 | gemoji (3.0.1) 37 | github-pages (214) 38 | github-pages-health-check (= 1.17.0) 39 | jekyll (= 3.9.0) 40 | jekyll-avatar (= 0.7.0) 41 | jekyll-coffeescript (= 1.1.1) 42 | jekyll-commonmark-ghpages (= 0.1.6) 43 | jekyll-default-layout (= 0.1.4) 44 | jekyll-feed (= 0.15.1) 45 | jekyll-gist (= 1.5.0) 46 | jekyll-github-metadata (= 2.13.0) 47 | jekyll-mentions (= 1.6.0) 48 | jekyll-optional-front-matter (= 0.3.2) 49 | jekyll-paginate (= 1.1.0) 50 | jekyll-readme-index (= 0.3.0) 51 | jekyll-redirect-from (= 0.16.0) 52 | jekyll-relative-links (= 0.6.1) 53 | jekyll-remote-theme (= 0.4.3) 54 | jekyll-sass-converter (= 1.5.2) 55 | jekyll-seo-tag (= 2.7.1) 56 | jekyll-sitemap (= 1.4.0) 57 | jekyll-swiss (= 1.0.0) 58 | jekyll-theme-architect (= 0.1.1) 59 | jekyll-theme-cayman (= 0.1.1) 60 | jekyll-theme-dinky (= 0.1.1) 61 | jekyll-theme-hacker (= 0.1.2) 62 | jekyll-theme-leap-day (= 0.1.1) 63 | jekyll-theme-merlot (= 0.1.1) 64 | jekyll-theme-midnight (= 0.1.1) 65 | jekyll-theme-minimal (= 0.1.1) 66 | jekyll-theme-modernist (= 0.1.1) 67 | jekyll-theme-primer (= 0.5.4) 68 | jekyll-theme-slate (= 0.1.1) 69 | jekyll-theme-tactile (= 0.1.1) 70 | jekyll-theme-time-machine (= 0.1.1) 71 | jekyll-titles-from-headings (= 0.5.3) 72 | jemoji (= 0.12.0) 73 | kramdown (= 2.3.1) 74 | kramdown-parser-gfm (= 1.1.0) 75 | liquid (= 4.0.3) 76 | mercenary (~> 0.3) 77 | minima (= 2.5.1) 78 | nokogiri (>= 1.10.4, < 2.0) 79 | rouge (= 3.26.0) 80 | terminal-table (~> 1.4) 81 | github-pages-health-check (1.17.0) 82 | addressable (~> 2.3) 83 | dnsruby (~> 1.60) 84 | octokit (~> 4.0) 85 | public_suffix (>= 2.0.2, < 5.0) 86 | typhoeus (~> 1.3) 87 | html-pipeline (2.14.0) 88 | activesupport (>= 2) 89 | nokogiri (>= 1.4) 90 | http_parser.rb (0.6.0) 91 | i18n (0.9.5) 92 | concurrent-ruby (~> 1.0) 93 | jekyll (3.9.0) 94 | addressable (~> 2.4) 95 | colorator (~> 1.0) 96 | em-websocket (~> 0.5) 97 | i18n (~> 0.7) 98 | jekyll-sass-converter (~> 1.0) 99 | jekyll-watch (~> 2.0) 100 | kramdown (>= 1.17, < 3) 101 | liquid (~> 4.0) 102 | mercenary (~> 0.3.3) 103 | pathutil (~> 0.9) 104 | rouge (>= 1.7, < 4) 105 | safe_yaml (~> 1.0) 106 | jekyll-avatar (0.7.0) 107 | jekyll (>= 3.0, < 5.0) 108 | jekyll-coffeescript (1.1.1) 109 | coffee-script (~> 2.2) 110 | coffee-script-source (~> 1.11.1) 111 | jekyll-commonmark (1.3.1) 112 | commonmarker (~> 0.14) 113 | jekyll (>= 3.7, < 5.0) 114 | jekyll-commonmark-ghpages (0.1.6) 115 | commonmarker (~> 0.17.6) 116 | jekyll-commonmark (~> 1.2) 117 | rouge (>= 2.0, < 4.0) 118 | jekyll-default-layout (0.1.4) 119 | jekyll (~> 3.0) 120 | jekyll-feed (0.15.1) 121 | jekyll (>= 3.7, < 5.0) 122 | jekyll-gist (1.5.0) 123 | octokit (~> 4.2) 124 | jekyll-github-metadata (2.13.0) 125 | jekyll (>= 3.4, < 5.0) 126 | octokit (~> 4.0, != 4.4.0) 127 | jekyll-mentions (1.6.0) 128 | html-pipeline (~> 2.3) 129 | jekyll (>= 3.7, < 5.0) 130 | jekyll-optional-front-matter (0.3.2) 131 | jekyll (>= 3.0, < 5.0) 132 | jekyll-paginate (1.1.0) 133 | jekyll-readme-index (0.3.0) 134 | jekyll (>= 3.0, < 5.0) 135 | jekyll-redirect-from (0.16.0) 136 | jekyll (>= 3.3, < 5.0) 137 | jekyll-relative-links (0.6.1) 138 | jekyll (>= 3.3, < 5.0) 139 | jekyll-remote-theme (0.4.3) 140 | addressable (~> 2.0) 141 | jekyll (>= 3.5, < 5.0) 142 | jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) 143 | rubyzip (>= 1.3.0, < 3.0) 144 | jekyll-sass-converter (1.5.2) 145 | sass (~> 3.4) 146 | jekyll-seo-tag (2.7.1) 147 | jekyll (>= 3.8, < 5.0) 148 | jekyll-sitemap (1.4.0) 149 | jekyll (>= 3.7, < 5.0) 150 | jekyll-swiss (1.0.0) 151 | jekyll-theme-architect (0.1.1) 152 | jekyll (~> 3.5) 153 | jekyll-seo-tag (~> 2.0) 154 | jekyll-theme-cayman (0.1.1) 155 | jekyll (~> 3.5) 156 | jekyll-seo-tag (~> 2.0) 157 | jekyll-theme-dinky (0.1.1) 158 | jekyll (~> 3.5) 159 | jekyll-seo-tag (~> 2.0) 160 | jekyll-theme-hacker (0.1.2) 161 | jekyll (> 3.5, < 5.0) 162 | jekyll-seo-tag (~> 2.0) 163 | jekyll-theme-leap-day (0.1.1) 164 | jekyll (~> 3.5) 165 | jekyll-seo-tag (~> 2.0) 166 | jekyll-theme-merlot (0.1.1) 167 | jekyll (~> 3.5) 168 | jekyll-seo-tag (~> 2.0) 169 | jekyll-theme-midnight (0.1.1) 170 | jekyll (~> 3.5) 171 | jekyll-seo-tag (~> 2.0) 172 | jekyll-theme-minimal (0.1.1) 173 | jekyll (~> 3.5) 174 | jekyll-seo-tag (~> 2.0) 175 | jekyll-theme-modernist (0.1.1) 176 | jekyll (~> 3.5) 177 | jekyll-seo-tag (~> 2.0) 178 | jekyll-theme-primer (0.5.4) 179 | jekyll (> 3.5, < 5.0) 180 | jekyll-github-metadata (~> 2.9) 181 | jekyll-seo-tag (~> 2.0) 182 | jekyll-theme-slate (0.1.1) 183 | jekyll (~> 3.5) 184 | jekyll-seo-tag (~> 2.0) 185 | jekyll-theme-tactile (0.1.1) 186 | jekyll (~> 3.5) 187 | jekyll-seo-tag (~> 2.0) 188 | jekyll-theme-time-machine (0.1.1) 189 | jekyll (~> 3.5) 190 | jekyll-seo-tag (~> 2.0) 191 | jekyll-titles-from-headings (0.5.3) 192 | jekyll (>= 3.3, < 5.0) 193 | jekyll-watch (2.2.1) 194 | listen (~> 3.0) 195 | jemoji (0.12.0) 196 | gemoji (~> 3.0) 197 | html-pipeline (~> 2.2) 198 | jekyll (>= 3.0, < 5.0) 199 | kramdown (2.3.1) 200 | rexml 201 | kramdown-parser-gfm (1.1.0) 202 | kramdown (~> 2.0) 203 | liquid (4.0.3) 204 | listen (3.5.1) 205 | rb-fsevent (~> 0.10, >= 0.10.3) 206 | rb-inotify (~> 0.9, >= 0.9.10) 207 | mercenary (0.3.6) 208 | mini_portile2 (2.5.0) 209 | minima (2.5.1) 210 | jekyll (>= 3.5, < 5.0) 211 | jekyll-feed (~> 0.9) 212 | jekyll-seo-tag (~> 2.1) 213 | minitest (5.14.4) 214 | multipart-post (2.1.1) 215 | nokogiri (1.11.0) 216 | mini_portile2 (~> 2.5.0) 217 | racc (~> 1.4) 218 | octokit (4.20.0) 219 | faraday (>= 0.9) 220 | sawyer (~> 0.8.0, >= 0.5.3) 221 | pathutil (0.16.2) 222 | forwardable-extended (~> 2.6) 223 | public_suffix (4.0.6) 224 | racc (1.5.2) 225 | rb-fsevent (0.10.4) 226 | rb-inotify (0.10.1) 227 | ffi (~> 1.0) 228 | rexml (3.2.5) 229 | rouge (3.26.0) 230 | ruby-enum (0.9.0) 231 | i18n 232 | ruby2_keywords (0.0.4) 233 | rubyzip (2.3.0) 234 | safe_yaml (1.0.5) 235 | sass (3.7.4) 236 | sass-listen (~> 4.0.0) 237 | sass-listen (4.0.0) 238 | rb-fsevent (~> 0.9, >= 0.9.4) 239 | rb-inotify (~> 0.9, >= 0.9.7) 240 | sawyer (0.8.2) 241 | addressable (>= 2.3.5) 242 | faraday (> 0.8, < 2.0) 243 | simpleidn (0.2.1) 244 | unf (~> 0.1.4) 245 | terminal-table (1.8.0) 246 | unicode-display_width (~> 1.1, >= 1.1.1) 247 | thread_safe (0.3.6) 248 | typhoeus (1.4.0) 249 | ethon (>= 0.9.0) 250 | tzinfo (1.2.9) 251 | thread_safe (~> 0.1) 252 | unf (0.1.4) 253 | unf_ext 254 | unf_ext (0.0.7.7) 255 | unicode-display_width (1.7.0) 256 | webrick (1.7.0) 257 | zeitwerk (2.4.2) 258 | 259 | PLATFORMS 260 | ruby 261 | 262 | DEPENDENCIES 263 | github-pages 264 | jekyll (>= 3.7) 265 | jekyll-remote-theme 266 | kramdown (>= 2.3.1) 267 | nokogiri (< 1.11.1) 268 | webrick (~> 1.7) 269 | 270 | BUNDLED WITH 271 | 2.2.17 272 | -------------------------------------------------------------------------------- /docs/_config.yml: -------------------------------------------------------------------------------- 1 | repository: torvaney/understatdb 2 | output: web 3 | topnav_title: understatdb 4 | site_title: understatdb 5 | company_name: Ben Torvaney # Required arg :( 6 | description: An extendable project for creating a database of soccer data 7 | # Set to false to disable KaTeX math 8 | use_math: true 9 | # Add Google analytics id if you have one and want to use it here 10 | google_analytics: 11 | # See http://nbdev.fast.ai/search for help with adding Search 12 | google_search: 13 | 14 | host: 127.0.0.1 15 | # the preview server used. Leave as is. 16 | port: 4000 17 | # the port where the preview is rendered. 18 | 19 | exclude: 20 | - .idea/ 21 | - .gitignore 22 | - vendor 23 | 24 | exclude: [vendor] 25 | 26 | highlighter: rouge 27 | markdown: kramdown 28 | kramdown: 29 | input: GFM 30 | auto_ids: true 31 | hard_wrap: false 32 | syntax_highlighter: rouge 33 | 34 | collections: 35 | tooltips: 36 | output: false 37 | 38 | defaults: 39 | - 40 | scope: 41 | path: "" 42 | type: "pages" 43 | values: 44 | layout: "page" 45 | comments: true 46 | search: true 47 | sidebar: home_sidebar 48 | topnav: topnav 49 | - 50 | scope: 51 | path: "" 52 | type: "tooltips" 53 | values: 54 | layout: "page" 55 | comments: true 56 | search: true 57 | tooltip: true 58 | 59 | sidebars: 60 | - home_sidebar 61 | 62 | plugins: 63 | - jekyll-remote-theme 64 | 65 | remote_theme: fastai/nbdev-jekyll-theme 66 | baseurl: /understatdb/ -------------------------------------------------------------------------------- /docs/_data/sidebars/home_sidebar.yml: -------------------------------------------------------------------------------- 1 | 2 | ################################################# 3 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ### 4 | ################################################# 5 | # Instead edit ../../sidebar.json 6 | entries: 7 | - folders: 8 | - folderitems: 9 | - output: web,pdf 10 | title: Overview 11 | url: / 12 | - output: web,pdf 13 | title: Understat-DB CLI 14 | url: cli.html 15 | - output: web,pdf 16 | title: Database 17 | url: db.html 18 | - output: web,pdf 19 | title: Understat 20 | url: understat.html 21 | output: web 22 | title: understatdb 23 | output: web 24 | title: Sidebar 25 | -------------------------------------------------------------------------------- /docs/_data/topnav.yml: -------------------------------------------------------------------------------- 1 | topnav: 2 | - title: Topnav 3 | items: 4 | - title: github 5 | external_url: https://github.com/torvaney/understatdb/tree/master/ 6 | 7 | #Topnav dropdowns 8 | topnav_dropdowns: 9 | - title: Topnav dropdowns 10 | folders: -------------------------------------------------------------------------------- /docs/_site/Gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gem 'github-pages', group: :jekyll_plugins 4 | 5 | # Added at 2019-11-25 10:11:40 -0800 by jhoward: 6 | gem "nokogiri", "< 1.11.1" 7 | gem "jekyll", ">= 3.7" 8 | gem "kramdown", ">= 2.3.1" 9 | gem "jekyll-remote-theme" 10 | 11 | gem "webrick", "~> 1.7" 12 | -------------------------------------------------------------------------------- /docs/_site/Gemfile.lock: -------------------------------------------------------------------------------- 1 | GEM 2 | remote: https://rubygems.org/ 3 | specs: 4 | activesupport (6.0.3.6) 5 | concurrent-ruby (~> 1.0, >= 1.0.2) 6 | i18n (>= 0.7, < 2) 7 | minitest (~> 5.1) 8 | tzinfo (~> 1.1) 9 | zeitwerk (~> 2.2, >= 2.2.2) 10 | addressable (2.7.0) 11 | public_suffix (>= 2.0.2, < 5.0) 12 | coffee-script (2.4.1) 13 | coffee-script-source 14 | execjs 15 | coffee-script-source (1.11.1) 16 | colorator (1.1.0) 17 | commonmarker (0.17.13) 18 | ruby-enum (~> 0.5) 19 | concurrent-ruby (1.1.8) 20 | dnsruby (1.61.5) 21 | simpleidn (~> 0.1) 22 | em-websocket (0.5.2) 23 | eventmachine (>= 0.12.9) 24 | http_parser.rb (~> 0.6.0) 25 | ethon (0.12.0) 26 | ffi (>= 1.3.0) 27 | eventmachine (1.2.7) 28 | execjs (2.7.0) 29 | faraday (1.3.0) 30 | faraday-net_http (~> 1.0) 31 | multipart-post (>= 1.2, < 3) 32 | ruby2_keywords 33 | faraday-net_http (1.0.1) 34 | ffi (1.15.0) 35 | forwardable-extended (2.6.0) 36 | gemoji (3.0.1) 37 | github-pages (214) 38 | github-pages-health-check (= 1.17.0) 39 | jekyll (= 3.9.0) 40 | jekyll-avatar (= 0.7.0) 41 | jekyll-coffeescript (= 1.1.1) 42 | jekyll-commonmark-ghpages (= 0.1.6) 43 | jekyll-default-layout (= 0.1.4) 44 | jekyll-feed (= 0.15.1) 45 | jekyll-gist (= 1.5.0) 46 | jekyll-github-metadata (= 2.13.0) 47 | jekyll-mentions (= 1.6.0) 48 | jekyll-optional-front-matter (= 0.3.2) 49 | jekyll-paginate (= 1.1.0) 50 | jekyll-readme-index (= 0.3.0) 51 | jekyll-redirect-from (= 0.16.0) 52 | jekyll-relative-links (= 0.6.1) 53 | jekyll-remote-theme (= 0.4.3) 54 | jekyll-sass-converter (= 1.5.2) 55 | jekyll-seo-tag (= 2.7.1) 56 | jekyll-sitemap (= 1.4.0) 57 | jekyll-swiss (= 1.0.0) 58 | jekyll-theme-architect (= 0.1.1) 59 | jekyll-theme-cayman (= 0.1.1) 60 | jekyll-theme-dinky (= 0.1.1) 61 | jekyll-theme-hacker (= 0.1.2) 62 | jekyll-theme-leap-day (= 0.1.1) 63 | jekyll-theme-merlot (= 0.1.1) 64 | jekyll-theme-midnight (= 0.1.1) 65 | jekyll-theme-minimal (= 0.1.1) 66 | jekyll-theme-modernist (= 0.1.1) 67 | jekyll-theme-primer (= 0.5.4) 68 | jekyll-theme-slate (= 0.1.1) 69 | jekyll-theme-tactile (= 0.1.1) 70 | jekyll-theme-time-machine (= 0.1.1) 71 | jekyll-titles-from-headings (= 0.5.3) 72 | jemoji (= 0.12.0) 73 | kramdown (= 2.3.1) 74 | kramdown-parser-gfm (= 1.1.0) 75 | liquid (= 4.0.3) 76 | mercenary (~> 0.3) 77 | minima (= 2.5.1) 78 | nokogiri (>= 1.10.4, < 2.0) 79 | rouge (= 3.26.0) 80 | terminal-table (~> 1.4) 81 | github-pages-health-check (1.17.0) 82 | addressable (~> 2.3) 83 | dnsruby (~> 1.60) 84 | octokit (~> 4.0) 85 | public_suffix (>= 2.0.2, < 5.0) 86 | typhoeus (~> 1.3) 87 | html-pipeline (2.14.0) 88 | activesupport (>= 2) 89 | nokogiri (>= 1.4) 90 | http_parser.rb (0.6.0) 91 | i18n (0.9.5) 92 | concurrent-ruby (~> 1.0) 93 | jekyll (3.9.0) 94 | addressable (~> 2.4) 95 | colorator (~> 1.0) 96 | em-websocket (~> 0.5) 97 | i18n (~> 0.7) 98 | jekyll-sass-converter (~> 1.0) 99 | jekyll-watch (~> 2.0) 100 | kramdown (>= 1.17, < 3) 101 | liquid (~> 4.0) 102 | mercenary (~> 0.3.3) 103 | pathutil (~> 0.9) 104 | rouge (>= 1.7, < 4) 105 | safe_yaml (~> 1.0) 106 | jekyll-avatar (0.7.0) 107 | jekyll (>= 3.0, < 5.0) 108 | jekyll-coffeescript (1.1.1) 109 | coffee-script (~> 2.2) 110 | coffee-script-source (~> 1.11.1) 111 | jekyll-commonmark (1.3.1) 112 | commonmarker (~> 0.14) 113 | jekyll (>= 3.7, < 5.0) 114 | jekyll-commonmark-ghpages (0.1.6) 115 | commonmarker (~> 0.17.6) 116 | jekyll-commonmark (~> 1.2) 117 | rouge (>= 2.0, < 4.0) 118 | jekyll-default-layout (0.1.4) 119 | jekyll (~> 3.0) 120 | jekyll-feed (0.15.1) 121 | jekyll (>= 3.7, < 5.0) 122 | jekyll-gist (1.5.0) 123 | octokit (~> 4.2) 124 | jekyll-github-metadata (2.13.0) 125 | jekyll (>= 3.4, < 5.0) 126 | octokit (~> 4.0, != 4.4.0) 127 | jekyll-mentions (1.6.0) 128 | html-pipeline (~> 2.3) 129 | jekyll (>= 3.7, < 5.0) 130 | jekyll-optional-front-matter (0.3.2) 131 | jekyll (>= 3.0, < 5.0) 132 | jekyll-paginate (1.1.0) 133 | jekyll-readme-index (0.3.0) 134 | jekyll (>= 3.0, < 5.0) 135 | jekyll-redirect-from (0.16.0) 136 | jekyll (>= 3.3, < 5.0) 137 | jekyll-relative-links (0.6.1) 138 | jekyll (>= 3.3, < 5.0) 139 | jekyll-remote-theme (0.4.3) 140 | addressable (~> 2.0) 141 | jekyll (>= 3.5, < 5.0) 142 | jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0) 143 | rubyzip (>= 1.3.0, < 3.0) 144 | jekyll-sass-converter (1.5.2) 145 | sass (~> 3.4) 146 | jekyll-seo-tag (2.7.1) 147 | jekyll (>= 3.8, < 5.0) 148 | jekyll-sitemap (1.4.0) 149 | jekyll (>= 3.7, < 5.0) 150 | jekyll-swiss (1.0.0) 151 | jekyll-theme-architect (0.1.1) 152 | jekyll (~> 3.5) 153 | jekyll-seo-tag (~> 2.0) 154 | jekyll-theme-cayman (0.1.1) 155 | jekyll (~> 3.5) 156 | jekyll-seo-tag (~> 2.0) 157 | jekyll-theme-dinky (0.1.1) 158 | jekyll (~> 3.5) 159 | jekyll-seo-tag (~> 2.0) 160 | jekyll-theme-hacker (0.1.2) 161 | jekyll (> 3.5, < 5.0) 162 | jekyll-seo-tag (~> 2.0) 163 | jekyll-theme-leap-day (0.1.1) 164 | jekyll (~> 3.5) 165 | jekyll-seo-tag (~> 2.0) 166 | jekyll-theme-merlot (0.1.1) 167 | jekyll (~> 3.5) 168 | jekyll-seo-tag (~> 2.0) 169 | jekyll-theme-midnight (0.1.1) 170 | jekyll (~> 3.5) 171 | jekyll-seo-tag (~> 2.0) 172 | jekyll-theme-minimal (0.1.1) 173 | jekyll (~> 3.5) 174 | jekyll-seo-tag (~> 2.0) 175 | jekyll-theme-modernist (0.1.1) 176 | jekyll (~> 3.5) 177 | jekyll-seo-tag (~> 2.0) 178 | jekyll-theme-primer (0.5.4) 179 | jekyll (> 3.5, < 5.0) 180 | jekyll-github-metadata (~> 2.9) 181 | jekyll-seo-tag (~> 2.0) 182 | jekyll-theme-slate (0.1.1) 183 | jekyll (~> 3.5) 184 | jekyll-seo-tag (~> 2.0) 185 | jekyll-theme-tactile (0.1.1) 186 | jekyll (~> 3.5) 187 | jekyll-seo-tag (~> 2.0) 188 | jekyll-theme-time-machine (0.1.1) 189 | jekyll (~> 3.5) 190 | jekyll-seo-tag (~> 2.0) 191 | jekyll-titles-from-headings (0.5.3) 192 | jekyll (>= 3.3, < 5.0) 193 | jekyll-watch (2.2.1) 194 | listen (~> 3.0) 195 | jemoji (0.12.0) 196 | gemoji (~> 3.0) 197 | html-pipeline (~> 2.2) 198 | jekyll (>= 3.0, < 5.0) 199 | kramdown (2.3.1) 200 | rexml 201 | kramdown-parser-gfm (1.1.0) 202 | kramdown (~> 2.0) 203 | liquid (4.0.3) 204 | listen (3.5.1) 205 | rb-fsevent (~> 0.10, >= 0.10.3) 206 | rb-inotify (~> 0.9, >= 0.9.10) 207 | mercenary (0.3.6) 208 | mini_portile2 (2.5.0) 209 | minima (2.5.1) 210 | jekyll (>= 3.5, < 5.0) 211 | jekyll-feed (~> 0.9) 212 | jekyll-seo-tag (~> 2.1) 213 | minitest (5.14.4) 214 | multipart-post (2.1.1) 215 | nokogiri (1.11.0) 216 | mini_portile2 (~> 2.5.0) 217 | racc (~> 1.4) 218 | octokit (4.20.0) 219 | faraday (>= 0.9) 220 | sawyer (~> 0.8.0, >= 0.5.3) 221 | pathutil (0.16.2) 222 | forwardable-extended (~> 2.6) 223 | public_suffix (4.0.6) 224 | racc (1.5.2) 225 | rb-fsevent (0.10.4) 226 | rb-inotify (0.10.1) 227 | ffi (~> 1.0) 228 | rexml (3.2.5) 229 | rouge (3.26.0) 230 | ruby-enum (0.9.0) 231 | i18n 232 | ruby2_keywords (0.0.4) 233 | rubyzip (2.3.0) 234 | safe_yaml (1.0.5) 235 | sass (3.7.4) 236 | sass-listen (~> 4.0.0) 237 | sass-listen (4.0.0) 238 | rb-fsevent (~> 0.9, >= 0.9.4) 239 | rb-inotify (~> 0.9, >= 0.9.7) 240 | sawyer (0.8.2) 241 | addressable (>= 2.3.5) 242 | faraday (> 0.8, < 2.0) 243 | simpleidn (0.2.1) 244 | unf (~> 0.1.4) 245 | terminal-table (1.8.0) 246 | unicode-display_width (~> 1.1, >= 1.1.1) 247 | thread_safe (0.3.6) 248 | typhoeus (1.4.0) 249 | ethon (>= 0.9.0) 250 | tzinfo (1.2.9) 251 | thread_safe (~> 0.1) 252 | unf (0.1.4) 253 | unf_ext 254 | unf_ext (0.0.7.7) 255 | unicode-display_width (1.7.0) 256 | webrick (1.7.0) 257 | zeitwerk (2.4.2) 258 | 259 | PLATFORMS 260 | ruby 261 | 262 | DEPENDENCIES 263 | github-pages 264 | jekyll (>= 3.7) 265 | jekyll-remote-theme 266 | kramdown (>= 2.3.1) 267 | nokogiri (< 1.11.1) 268 | webrick (~> 1.7) 269 | 270 | BUNDLED WITH 271 | 2.2.17 272 | -------------------------------------------------------------------------------- /docs/_site/app.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Understat-DB CLI | understatdb 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 115 | 120 | 131 | 132 | 133 | 134 | 135 | 136 | 174 | 175 | 176 |
177 |
178 | 179 |
180 | 181 | 182 | 183 |
184 | 185 | 186 | 187 | 235 | 236 | 237 | 238 | 239 |
240 | 241 | 242 | 243 | 244 |
245 |
246 | 247 |

Understat-DB CLI

248 | 249 | 250 |
251 | 252 | 253 | 254 |
255 | 256 | 257 |
CLI for creating a soccer database with data from understat.com
258 | 259 | 260 | 261 | 262 | 263 | 280 | 281 |
282 | 283 | 284 | 285 | 286 | 287 | 288 | 297 | 298 |
299 | 300 | 301 | 302 |
303 | 304 |
305 | 306 | 307 |
308 |
309 |

Initialise the app with Typer

310 | 311 |
312 |
313 |
314 | 315 | 316 |
317 | 318 |
319 | 320 | 321 |
322 |
323 |

Hello, world

An example method

324 | 325 |
326 |
327 |
328 | 329 | 330 |
331 | 332 |
333 |
334 | 335 |
336 | 337 | 338 |
339 |

hello[source]

hello(name:str)

340 |
341 | 342 |
343 | 344 |
345 | 346 |
347 |
348 | 349 |
350 | 351 | 352 | 353 | 354 |
355 | 356 |
357 | 358 | 359 |
360 |
361 |

Test the app using Typer's in-build CLI-runner

362 | 363 |
364 |
365 |
366 | 367 | 368 |
369 |
370 | 371 |
372 |
373 |
import typer.testing
374 | 
375 | runner = typer.testing.CliRunner()
376 | result = runner.invoke(app, ['World'])
377 | assert result.exit_code == 0
378 | assert 'Hello, World!' in result.output
379 | 
380 | 381 |
382 |
383 |
384 | 385 |
386 | 387 | 388 |
389 |
390 |

Migrate

Migrating the database

391 | 392 |
393 |
394 |
395 | 396 | 397 |
398 | 399 |
400 |
401 | 402 |
403 | 404 | 405 |
406 |

migrate[source]

migrate(interactive:bool=True)

407 |
408 | 409 |
410 | 411 |
412 | 413 |
414 |
415 | 416 |
417 | 418 | 419 | 420 | 421 |
422 | 423 |
424 | 425 | 426 | 427 | 428 |
429 |
430 | 431 |
432 |
433 |
dotenv.load_dotenv()
434 | 
435 | 436 |
437 |
438 |
439 | 440 |
441 |
442 | 443 |
444 | 445 | 446 | 447 |
448 |
True
449 |
450 | 451 |
452 | 453 |
454 |
455 | 456 |
457 | 458 | 459 |
460 |
461 |
462 |

Finally, make the app executable from the command line

463 | 464 |
465 |
466 |
467 | 468 | 469 |
470 | 471 |
472 | 473 | 474 |
475 | 476 | 477 | 478 | 479 |
480 | 481 |
482 | 483 |
484 | 485 | 486 | 487 |
488 |
489 | 494 |
495 |
496 | 497 | 498 |
499 | 500 |
501 | 502 |
503 | 504 |
505 | 506 | 507 | 508 | 509 | -------------------------------------------------------------------------------- /docs/_site/assets/css/boxshadowproperties.css: -------------------------------------------------------------------------------- 1 | /* box-shadow fonts return errors with prince, so extracting here to put in web output only */ 2 | 3 | #search-demo-container ul#results-container { 4 | box-shadow: 2px 3px 2px #dedede; 5 | } 6 | 7 | 8 | hr.shaded { 9 | box-shadow: inset 0 6px 6px -6px rgba(0,0,0,0.5); 10 | } 11 | 12 | .videoThumbs img { 13 | box-shadow: 2px 2px 1px #f0f0f0; 14 | } 15 | 16 | .box { 17 | box-shadow: 2px 2px 4px #dedede; 18 | } 19 | 20 | @media (max-width: 1200px) { 21 | .navbar-collapse { 22 | box-shadow: inset 0 1px 0 rgba(255,255,255,0.1); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /docs/_site/assets/css/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/css/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /docs/_site/assets/css/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/css/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/_site/assets/css/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/css/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/_site/assets/css/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/css/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/_site/assets/css/fonts/fontawesome-webfont.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/css/fonts/fontawesome-webfont.woff2 -------------------------------------------------------------------------------- /docs/_site/assets/css/modern-business.css: -------------------------------------------------------------------------------- 1 | /*! 2 | * Start Bootstrap - Modern Business HTML Template (http://startbootstrap.com) 3 | * Code licensed under the Apache License v2.0. 4 | * For details, see http://www.apache.org/licenses/LICENSE-2.0. 5 | */ 6 | 7 | /* Global Styles */ 8 | 9 | html, 10 | body { 11 | height: 100%; 12 | } 13 | 14 | .img-portfolio { 15 | margin-bottom: 30px; 16 | } 17 | 18 | .img-hover:hover { 19 | opacity: 0.8; 20 | } 21 | 22 | /* Home Page Carousel */ 23 | 24 | header.carousel { 25 | height: 50%; 26 | } 27 | 28 | header.carousel .item, 29 | header.carousel .item.active, 30 | header.carousel .carousel-inner { 31 | height: 100%; 32 | } 33 | 34 | header.carousel .fill { 35 | width: 100%; 36 | height: 100%; 37 | background-position: center; 38 | background-size: cover; 39 | } 40 | 41 | /* 404 Page Styles */ 42 | 43 | .error-404 { 44 | font-size: 100px; 45 | } 46 | 47 | /* Pricing Page Styles */ 48 | 49 | .price { 50 | display: block; 51 | font-size: 50px; 52 | line-height: 50px; 53 | } 54 | 55 | .price sup { 56 | top: -20px; 57 | left: 2px; 58 | font-size: 20px; 59 | } 60 | 61 | .period { 62 | display: block; 63 | font-style: italic; 64 | } 65 | 66 | /* Footer Styles */ 67 | 68 | footer { 69 | margin: 50px 0; 70 | } 71 | 72 | /* Responsive Styles */ 73 | 74 | @media(max-width:991px) { 75 | .client-img, 76 | .img-related { 77 | margin-bottom: 30px; 78 | } 79 | } 80 | 81 | @media(max-width:767px) { 82 | .img-portfolio { 83 | margin-bottom: 15px; 84 | } 85 | 86 | header.carousel .carousel { 87 | height: 70%; 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /docs/_site/assets/css/printstyles.css: -------------------------------------------------------------------------------- 1 | 2 | /*body.print .container {max-width: 650px;}*/ 3 | 4 | body { 5 | font-size:14px; 6 | } 7 | .nav ul li a {border-top:0px; background-color:transparent; color: #808080; } 8 | #navig a[href] {color: #595959 !important;} 9 | table .table {max-width:650px;} 10 | 11 | #navig li.sectionHead {font-weight: bold; font-size: 18px; color: #595959 !important; } 12 | #navig li {font-weight: normal; } 13 | 14 | #navig a[href]::after { content: leader(".") target-counter(attr(href), page); } 15 | 16 | a[href]::after { 17 | content: " (page " target-counter(attr(href), page) ")" 18 | } 19 | 20 | a[href^="http:"]::after, a[href^="https:"]::after { 21 | content: ""; 22 | } 23 | 24 | a[href] { 25 | color: blue !important; 26 | } 27 | a[href*="mailto"]::after, a[data-toggle="tooltip"]::after, a[href].noCrossRef::after { 28 | content: ""; 29 | } 30 | 31 | 32 | @page { 33 | margin: 60pt 90pt 60pt 90pt; 34 | font-family: sans-serif; 35 | font-style:none; 36 | color: gray; 37 | 38 | } 39 | 40 | .printTitle { 41 | line-height:30pt; 42 | font-size:27pt; 43 | font-weight: bold; 44 | letter-spacing: -.5px; 45 | margin-bottom:25px; 46 | } 47 | 48 | .printSubtitle { 49 | font-size: 19pt; 50 | color: #cccccc !important; 51 | font-family: "Grotesque MT Light"; 52 | line-height: 22pt; 53 | letter-spacing: -.5px; 54 | margin-bottom:20px; 55 | } 56 | .printTitleArea hr { 57 | color: #999999 !important; 58 | height: 2px; 59 | width: 100%; 60 | } 61 | 62 | .printTitleImage { 63 | max-width:300px; 64 | margin-bottom:200px; 65 | } 66 | 67 | 68 | .printTitleImage { 69 | max-width: 250px; 70 | } 71 | 72 | #navig { 73 | /*page-break-before: always;*/ 74 | } 75 | 76 | .copyrightBoilerplate { 77 | page-break-before:always; 78 | font-size:14px; 79 | } 80 | 81 | .lastGeneratedDate { 82 | font-style: italic; 83 | font-size:14px; 84 | color: gray; 85 | } 86 | 87 | .alert a { 88 | text-decoration: none !important; 89 | } 90 | 91 | 92 | body.title { page: title } 93 | 94 | @page title { 95 | @top-left { 96 | content: " "; 97 | } 98 | @top-right { 99 | content: " " 100 | } 101 | @bottom-right { 102 | content: " "; 103 | } 104 | @bottom-left { 105 | content: " "; 106 | } 107 | } 108 | 109 | body.frontmatter { page: frontmatter } 110 | body.frontmatter {counter-reset: page 1} 111 | 112 | 113 | @page frontmatter { 114 | @top-left { 115 | content: prince-script(guideName); 116 | } 117 | @top-right { 118 | content: prince-script(datestamp); 119 | } 120 | @bottom-right { 121 | content: counter(page, lower-roman); 122 | } 123 | @bottom-left { 124 | content: "youremail@domain.com"; } 125 | } 126 | 127 | body.first_page {counter-reset: page 1} 128 | 129 | h1 { string-set: doctitle content() } 130 | 131 | @page { 132 | @top-left { 133 | content: string(doctitle); 134 | font-size: 11px; 135 | font-style: italic; 136 | } 137 | @top-right { 138 | content: prince-script(datestamp); 139 | font-size: 11px; 140 | } 141 | 142 | @bottom-right { 143 | content: "Page " counter(page); 144 | font-size: 11px; 145 | } 146 | @bottom-left { 147 | content: prince-script(guideName); 148 | font-size: 11px; 149 | } 150 | } 151 | .alert { 152 | background-color: #fafafa !important; 153 | border-color: #dedede !important; 154 | color: black; 155 | } 156 | 157 | pre { 158 | background-color: #fafafa; 159 | } 160 | -------------------------------------------------------------------------------- /docs/_site/assets/css/syntax.css: -------------------------------------------------------------------------------- 1 | .highlight { background: #ffffff; } 2 | .highlight .c { color: #999988; font-style: italic } /* Comment */ 3 | .highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */ 4 | .highlight .k { font-weight: bold } /* Keyword */ 5 | .highlight .o { font-weight: bold } /* Operator */ 6 | .highlight .cm { color: #999988; font-style: italic } /* Comment.Multiline */ 7 | .highlight .cp { color: #999999; font-weight: bold } /* Comment.Preproc */ 8 | .highlight .c1 { color: #999988; font-style: italic } /* Comment.Single */ 9 | .highlight .cs { color: #999999; font-weight: bold; font-style: italic } /* Comment.Special */ 10 | .highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */ 11 | .highlight .gd .x { color: #000000; background-color: #ffaaaa } /* Generic.Deleted.Specific */ 12 | .highlight .ge { font-style: italic } /* Generic.Emph */ 13 | .highlight .gr { color: #aa0000 } /* Generic.Error */ 14 | .highlight .gh { color: #999999 } /* Generic.Heading */ 15 | .highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */ 16 | .highlight .gi .x { color: #000000; background-color: #aaffaa } /* Generic.Inserted.Specific */ 17 | .highlight .go { color: #888888 } /* Generic.Output */ 18 | .highlight .gp { color: #555555 } /* Generic.Prompt */ 19 | .highlight .gs { font-weight: bold } /* Generic.Strong */ 20 | .highlight .gu { color: #aaaaaa } /* Generic.Subheading */ 21 | .highlight .gt { color: #aa0000 } /* Generic.Traceback */ 22 | .highlight .kc { font-weight: bold } /* Keyword.Constant */ 23 | .highlight .kd { font-weight: bold } /* Keyword.Declaration */ 24 | .highlight .kp { font-weight: bold } /* Keyword.Pseudo */ 25 | .highlight .kr { font-weight: bold } /* Keyword.Reserved */ 26 | .highlight .kt { color: #445588; font-weight: bold } /* Keyword.Type */ 27 | .highlight .m { color: #009999 } /* Literal.Number */ 28 | .highlight .s { color: #d14 } /* Literal.String */ 29 | .highlight .na { color: #008080 } /* Name.Attribute */ 30 | .highlight .nb { color: #0086B3 } /* Name.Builtin */ 31 | .highlight .nc { color: #445588; font-weight: bold } /* Name.Class */ 32 | .highlight .no { color: #008080 } /* Name.Constant */ 33 | .highlight .ni { color: #800080 } /* Name.Entity */ 34 | .highlight .ne { color: #990000; font-weight: bold } /* Name.Exception */ 35 | .highlight .nf { color: #990000; font-weight: bold } /* Name.Function */ 36 | .highlight .nn { color: #555555 } /* Name.Namespace */ 37 | .highlight .nt { color: #000080 } /* Name.Tag */ 38 | .highlight .nv { color: #008080 } /* Name.Variable */ 39 | .highlight .ow { font-weight: bold } /* Operator.Word */ 40 | .highlight .w { color: #bbbbbb } /* Text.Whitespace */ 41 | .highlight .mf { color: #009999 } /* Literal.Number.Float */ 42 | .highlight .mh { color: #009999 } /* Literal.Number.Hex */ 43 | .highlight .mi { color: #009999 } /* Literal.Number.Integer */ 44 | .highlight .mo { color: #009999 } /* Literal.Number.Oct */ 45 | .highlight .sb { color: #d14 } /* Literal.String.Backtick */ 46 | .highlight .sc { color: #d14 } /* Literal.String.Char */ 47 | .highlight .sd { color: #d14 } /* Literal.String.Doc */ 48 | .highlight .s2 { color: #d14 } /* Literal.String.Double */ 49 | .highlight .se { color: #d14 } /* Literal.String.Escape */ 50 | .highlight .sh { color: #d14 } /* Literal.String.Heredoc */ 51 | .highlight .si { color: #d14 } /* Literal.String.Interpol */ 52 | .highlight .sx { color: #d14 } /* Literal.String.Other */ 53 | .highlight .sr { color: #009926 } /* Literal.String.Regex */ 54 | .highlight .s1 { color: #d14 } /* Literal.String.Single */ 55 | .highlight .ss { color: #990073 } /* Literal.String.Symbol */ 56 | .highlight .bp { color: #999999 } /* Name.Builtin.Pseudo */ 57 | .highlight .vc { color: #008080 } /* Name.Variable.Class */ 58 | .highlight .vg { color: #008080 } /* Name.Variable.Global */ 59 | .highlight .vi { color: #008080 } /* Name.Variable.Instance */ 60 | .highlight .il { color: #009999 } /* Literal.Number.Integer.Long */ -------------------------------------------------------------------------------- /docs/_site/assets/css/theme-blue.css: -------------------------------------------------------------------------------- 1 | .summary { 2 | color: #808080; 3 | border-left: 5px solid #ED1951; 4 | font-size:16px; 5 | } 6 | 7 | 8 | h3 {color: #000000; } 9 | h4 {color: #000000; } 10 | 11 | .nav-tabs > li.active > a, .nav-tabs > li.active > a:hover, .nav-tabs > li.active > a:focus { 12 | background-color: #248ec2; 13 | color: white; 14 | } 15 | 16 | .nav > li.active > a { 17 | background-color: #347DBE; 18 | } 19 | 20 | .nav > li > a:hover { 21 | background-color: #248ec2; 22 | } 23 | 24 | div.navbar-collapse .dropdown-menu > li > a:hover { 25 | background-color: #347DBE; 26 | } 27 | 28 | .nav li.thirdlevel > a { 29 | background-color: #FAFAFA !important; 30 | color: #248EC2; 31 | font-weight: bold; 32 | } 33 | 34 | a[data-toggle="tooltip"] { 35 | color: #649345; 36 | font-style: italic; 37 | cursor: default; 38 | } 39 | 40 | .navbar-inverse { 41 | background-color: #347DBE; 42 | border-color: #015CAE; 43 | } 44 | .navbar-inverse .navbar-nav>li>a, .navbar-inverse .navbar-brand { 45 | color: white; 46 | } 47 | 48 | .navbar-inverse .navbar-nav>li>a:hover, a.fa.fa-home.fa-lg.navbar-brand:hover { 49 | color: #f0f0f0; 50 | } 51 | 52 | a.navbar-brand:hover { 53 | color: #f0f0f0; 54 | } 55 | 56 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus { 57 | color: #015CAE; 58 | } 59 | 60 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus { 61 | background-color: #015CAE; 62 | color: #ffffff; 63 | } 64 | 65 | .navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form { 66 | border-color: #248ec2 !important; 67 | } 68 | 69 | .btn-primary { 70 | color: #ffffff; 71 | background-color: #347DBE; 72 | border-color: #347DBE; 73 | } 74 | 75 | .navbar-inverse .navbar-nav > .active > a, .navbar-inverse .navbar-nav > .active > a:hover, .navbar-inverse .navbar-nav > .active > a:focus { 76 | background-color: #347DBE; 77 | } 78 | 79 | .btn-primary:hover, 80 | .btn-primary:focus, 81 | .btn-primary:active, 82 | .btn-primary.active, 83 | .open .dropdown-toggle.btn-primary { 84 | background-color: #248ec2; 85 | border-color: #347DBE; 86 | } 87 | 88 | .printTitle { 89 | color: #015CAE !important; 90 | } 91 | 92 | body.print h1 {color: #015CAE !important; font-size:28px !important;} 93 | body.print h2 {color: #595959 !important; font-size:20px !important;} 94 | body.print h3 {color: #E50E51 !important; font-size:14px !important;} 95 | body.print h4 {color: #679DCE !important; font-size:14px; font-style: italic !important;} 96 | 97 | .anchorjs-link:hover { 98 | color: #216f9b; 99 | } 100 | 101 | div.sidebarTitle { 102 | color: #015CAE; 103 | } 104 | 105 | li.sidebarTitle { 106 | margin-top:20px; 107 | font-weight:normal; 108 | font-size:130%; 109 | color: #ED1951; 110 | margin-bottom:10px; 111 | margin-left: 5px; 112 | 113 | } 114 | 115 | .navbar-inverse .navbar-toggle:focus, .navbar-inverse .navbar-toggle:hover { 116 | background-color: #015CAE; 117 | } 118 | 119 | .navbar-inverse .navbar-toggle { 120 | border-color: #015CAE; 121 | } 122 | -------------------------------------------------------------------------------- /docs/_site/assets/css/theme-green.css: -------------------------------------------------------------------------------- 1 | .summary { 2 | color: #808080; 3 | border-left: 5px solid #E50E51; 4 | font-size:16px; 5 | } 6 | 7 | 8 | h3 {color: #E50E51; } 9 | h4 {color: #808080; } 10 | 11 | .nav-tabs > li.active > a, .nav-tabs > li.active > a:hover, .nav-tabs > li.active > a:focus { 12 | background-color: #248ec2; 13 | color: white; 14 | } 15 | 16 | .nav > li.active > a { 17 | background-color: #72ac4a; 18 | } 19 | 20 | .nav > li > a:hover { 21 | background-color: #72ac4a; 22 | } 23 | 24 | div.navbar-collapse .dropdown-menu > li > a:hover { 25 | background-color: #72ac4a; 26 | } 27 | 28 | .navbar-inverse .navbar-nav>li>a, .navbar-inverse .navbar-brand { 29 | color: white; 30 | } 31 | 32 | .navbar-inverse .navbar-nav>li>a:hover, a.fa.fa-home.fa-lg.navbar-brand:hover { 33 | color: #f0f0f0; 34 | } 35 | 36 | .nav li.thirdlevel > a { 37 | background-color: #FAFAFA !important; 38 | color: #72ac4a; 39 | font-weight: bold; 40 | } 41 | 42 | a[data-toggle="tooltip"] { 43 | color: #649345; 44 | font-style: italic; 45 | cursor: default; 46 | } 47 | 48 | .navbar-inverse { 49 | background-color: #72ac4a; 50 | border-color: #5b893c; 51 | } 52 | 53 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus { 54 | color: #5b893c; 55 | } 56 | 57 | .navbar-inverse .navbar-nav > .open > a, .navbar-inverse .navbar-nav > .open > a:hover, .navbar-inverse .navbar-nav > .open > a:focus { 58 | background-color: #5b893c; 59 | color: #ffffff; 60 | } 61 | 62 | /* not sure if using this ...*/ 63 | .navbar-inverse .navbar-collapse, .navbar-inverse .navbar-form { 64 | border-color: #72ac4a !important; 65 | } 66 | 67 | .btn-primary { 68 | color: #ffffff; 69 | background-color: #5b893c; 70 | border-color: #5b893c; 71 | } 72 | 73 | .btn-primary:hover, 74 | .btn-primary:focus, 75 | .btn-primary:active, 76 | .btn-primary.active, 77 | .open .dropdown-toggle.btn-primary { 78 | background-color: #72ac4a; 79 | border-color: #5b893c; 80 | } 81 | 82 | .printTitle { 83 | color: #5b893c !important; 84 | } 85 | 86 | body.print h1 {color: #5b893c !important; font-size:28px;} 87 | body.print h2 {color: #595959 !important; font-size:24px;} 88 | body.print h3 {color: #E50E51 !important; font-size:14px;} 89 | body.print h4 {color: #679DCE !important; font-size:14px; font-style: italic;} 90 | 91 | .anchorjs-link:hover { 92 | color: #4f7233; 93 | } 94 | 95 | div.sidebarTitle { 96 | color: #E50E51; 97 | } 98 | 99 | li.sidebarTitle { 100 | margin-top:20px; 101 | font-weight:normal; 102 | font-size:130%; 103 | color: #ED1951; 104 | margin-bottom:10px; 105 | margin-left: 5px; 106 | } 107 | 108 | .navbar-inverse .navbar-toggle:focus, .navbar-inverse .navbar-toggle:hover { 109 | background-color: #E50E51; 110 | } 111 | -------------------------------------------------------------------------------- /docs/_site/assets/fonts/FontAwesome.otf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/FontAwesome.otf -------------------------------------------------------------------------------- /docs/_site/assets/fonts/fontawesome-webfont.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/fontawesome-webfont.eot -------------------------------------------------------------------------------- /docs/_site/assets/fonts/fontawesome-webfont.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/fontawesome-webfont.ttf -------------------------------------------------------------------------------- /docs/_site/assets/fonts/fontawesome-webfont.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/fontawesome-webfont.woff -------------------------------------------------------------------------------- /docs/_site/assets/fonts/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /docs/_site/assets/fonts/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /docs/_site/assets/fonts/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /docs/_site/assets/fonts/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/fonts/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /docs/_site/assets/images/colab.svg: -------------------------------------------------------------------------------- 1 | Open in ColabOpen in Colab 2 | -------------------------------------------------------------------------------- /docs/_site/assets/images/company_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/images/company_logo.png -------------------------------------------------------------------------------- /docs/_site/assets/images/company_logo_big.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/images/company_logo_big.png -------------------------------------------------------------------------------- /docs/_site/assets/images/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/images/favicon.ico -------------------------------------------------------------------------------- /docs/_site/assets/images/workflowarrow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Torvaney/understat-db/1e46544bca23f3551736f62125d33f7499382832/docs/_site/assets/images/workflowarrow.png -------------------------------------------------------------------------------- /docs/_site/assets/js/customscripts.js: -------------------------------------------------------------------------------- 1 | $('#mysidebar').height($(".nav").height()); 2 | 3 | 4 | $( document ).ready(function() { 5 | 6 | //this script says, if the height of the viewport is greater than 800px, then insert affix class, which makes the nav bar float in a fixed 7 | // position as your scroll. if you have a lot of nav items, this height may not work for you. 8 | var h = $(window).height(); 9 | //console.log (h); 10 | if (h > 800) { 11 | $( "#mysidebar" ).attr("class", "nav affix"); 12 | } 13 | /** 14 | * AnchorJS 15 | */ 16 | anchors.add('h2,h3,h4,h5'); 17 | 18 | }); 19 | 20 | // needed for nav tabs on pages. See Formatting > Nav tabs for more details. 21 | // script from http://stackoverflow.com/questions/10523433/how-do-i-keep-the-current-tab-active-with-twitter-bootstrap-after-a-page-reload 22 | $(function() { 23 | var json, tabsState; 24 | $('a[data-toggle="pill"], a[data-toggle="tab"]').on('shown.bs.tab', function(e) { 25 | var href, json, parentId, tabsState; 26 | 27 | tabsState = localStorage.getItem("tabs-state"); 28 | json = JSON.parse(tabsState || "{}"); 29 | parentId = $(e.target).parents("ul.nav.nav-pills, ul.nav.nav-tabs").attr("id"); 30 | href = $(e.target).attr('href'); 31 | json[parentId] = href; 32 | 33 | return localStorage.setItem("tabs-state", JSON.stringify(json)); 34 | }); 35 | 36 | tabsState = localStorage.getItem("tabs-state"); 37 | json = JSON.parse(tabsState || "{}"); 38 | 39 | $.each(json, function(containerId, href) { 40 | return $("#" + containerId + " a[href=" + href + "]").tab('show'); 41 | }); 42 | 43 | $("ul.nav.nav-pills, ul.nav.nav-tabs").each(function() { 44 | var $this = $(this); 45 | if (!json[$this.attr("id")]) { 46 | return $this.find("a[data-toggle=tab]:first, a[data-toggle=pill]:first").tab("show"); 47 | } 48 | }); 49 | }); 50 | -------------------------------------------------------------------------------- /docs/_site/assets/js/jekyll-search.js: -------------------------------------------------------------------------------- 1 | !function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a="function"==typeof require&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);throw new Error("Cannot find module '"+o+"'")}var f=n[o]={exports:{}};t[o][0].call(f.exports,function(e){var n=t[o][1][e];return s(n?n:e)},f,f.exports,e,t,n,r)}return n[o].exports}for(var i="function"==typeof require&&require,o=0;o=0}var self=this;self.matches=function(string,crit){return"string"!=typeof string?!1:(string=string.trim(),doMatch(string,crit))}}module.exports=new LiteralSearchStrategy},{}],4:[function(require,module){module.exports=function(){function findMatches(store,crit,strategy){for(var data=store.get(),i=0;i{title}',noResultsText:"No results found",limit:10,fuzzy:!1};self.init=function(_opt){validateOptions(_opt),assignOptions(_opt),isJSON(opt.dataSource)?initWithJSON(opt.dataSource):initWithURL(opt.dataSource)}}var Searcher=require("./Searcher"),Templater=require("./Templater"),Store=require("./Store"),JSONLoader=require("./JSONLoader"),searcher=new Searcher,templater=new Templater,store=new Store,jsonLoader=new JSONLoader;window.SimpleJekyllSearch=new SimpleJekyllSearch}(window,document)},{"./JSONLoader":1,"./Searcher":4,"./Store":5,"./Templater":6}]},{},[7]); 2 | -------------------------------------------------------------------------------- /docs/_site/assets/js/jquery.ba-throttle-debounce.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery throttle / debounce - v1.1 - 3/7/2010 3 | * http://benalman.com/projects/jquery-throttle-debounce-plugin/ 4 | * 5 | * Copyright (c) 2010 "Cowboy" Ben Alman 6 | * Dual licensed under the MIT and GPL licenses. 7 | * http://benalman.com/about/license/ 8 | */ 9 | (function(b,c){var $=b.jQuery||b.Cowboy||(b.Cowboy={}),a;$.throttle=a=function(e,f,j,i){var h,d=0;if(typeof f!=="boolean"){i=j;j=f;f=c}function g(){var o=this,m=+new Date()-d,n=arguments;function l(){d=+new Date();j.apply(o,n)}function k(){h=c}if(i&&!h){l()}h&&clearTimeout(h);if(i===c&&m>e){l()}else{if(f!==true){h=setTimeout(i?k:l,i===c?e-m:e)}}}if($.guid){g.guid=j.guid=j.guid||$.guid++}return g};$.debounce=function(d,e,f){return f===c?a(d,e,false):a(d,f,e!==false)}})(this); -------------------------------------------------------------------------------- /docs/_site/assets/js/jquery.navgoco.min.js: -------------------------------------------------------------------------------- 1 | /* 2 | * jQuery Navgoco Menus Plugin v0.2.1 (2014-04-11) 3 | * https://github.com/tefra/navgoco 4 | * 5 | * Copyright (c) 2014 Chris T (@tefra) 6 | * BSD - https://github.com/tefra/navgoco/blob/master/LICENSE-BSD 7 | */ 8 | !function(a){"use strict";var b=function(b,c,d){return this.el=b,this.$el=a(b),this.options=c,this.uuid=this.$el.attr("id")?this.$el.attr("id"):d,this.state={},this.init(),this};b.prototype={init:function(){var b=this;b._load(),b.$el.find("ul").each(function(c){var d=a(this);d.attr("data-index",c),b.options.save&&b.state.hasOwnProperty(c)?(d.parent().addClass(b.options.openClass),d.show()):d.parent().hasClass(b.options.openClass)?(d.show(),b.state[c]=1):d.hide()});var c=a("").prepend(b.options.caretHtml),d=b.$el.find("li > a");b._trigger(c,!1),b._trigger(d,!0),b.$el.find("li:has(ul) > a").prepend(c)},_trigger:function(b,c){var d=this;b.on("click",function(b){b.stopPropagation();var e=c?a(this).next():a(this).parent().next(),f=!1;if(c){var g=a(this).attr("href");f=void 0===g||""===g||"#"===g}if(e=e.length>0?e:!1,d.options.onClickBefore.call(this,b,e),!c||e&&f)b.preventDefault(),d._toggle(e,e.is(":hidden")),d._save();else if(d.options.accordion){var h=d.state=d._parents(a(this));d.$el.find("ul").filter(":visible").each(function(){var b=a(this),c=b.attr("data-index");h.hasOwnProperty(c)||d._toggle(b,!1)}),d._save()}d.options.onClickAfter.call(this,b,e)})},_toggle:function(b,c){var d=this,e=b.attr("data-index"),f=b.parent();if(d.options.onToggleBefore.call(this,b,c),c){if(f.addClass(d.options.openClass),b.slideDown(d.options.slide),d.state[e]=1,d.options.accordion){var g=d.state=d._parents(b);g[e]=d.state[e]=1,d.$el.find("ul").filter(":visible").each(function(){var b=a(this),c=b.attr("data-index");g.hasOwnProperty(c)||d._toggle(b,!1)})}}else f.removeClass(d.options.openClass),b.slideUp(d.options.slide),d.state[e]=0;d.options.onToggleAfter.call(this,b,c)},_parents:function(b,c){var d={},e=b.parent(),f=e.parents("ul");return f.each(function(){var b=a(this),e=b.attr("data-index");return e?void(d[e]=c?b:1):!1}),d},_save:function(){if(this.options.save){var b={};for(var d in this.state)1===this.state[d]&&(b[d]=1);c[this.uuid]=this.state=b,a.cookie(this.options.cookie.name,JSON.stringify(c),this.options.cookie)}},_load:function(){if(this.options.save){if(null===c){var b=a.cookie(this.options.cookie.name);c=b?JSON.parse(b):{}}this.state=c.hasOwnProperty(this.uuid)?c[this.uuid]:{}}},toggle:function(b){var c=this,d=arguments.length;if(1>=d)c.$el.find("ul").each(function(){var d=a(this);c._toggle(d,b)});else{var e,f={},g=Array.prototype.slice.call(arguments,1);d--;for(var h=0;d>h;h++){e=g[h];var i=c.$el.find('ul[data-index="'+e+'"]').first();if(i&&(f[e]=i,b)){var j=c._parents(i,!0);for(var k in j)f.hasOwnProperty(k)||(f[k]=j[k])}}for(e in f)c._toggle(f[e],b)}c._save()},destroy:function(){a.removeData(this.$el),this.$el.find("li:has(ul) > a").unbind("click"),this.$el.find("li:has(ul) > a > span").unbind("click")}},a.fn.navgoco=function(c){if("string"==typeof c&&"_"!==c.charAt(0)&&"init"!==c)var d=!0,e=Array.prototype.slice.call(arguments,1);else c=a.extend({},a.fn.navgoco.defaults,c||{}),a.cookie||(c.save=!1);return this.each(function(f){var g=a(this),h=g.data("navgoco");h||(h=new b(this,d?a.fn.navgoco.defaults:c,f),g.data("navgoco",h)),d&&h[c].apply(h,e)})};var c=null;a.fn.navgoco.defaults={caretHtml:"",accordion:!1,openClass:"open",save:!0,cookie:{name:"navgoco",expires:!1,path:"/"},slide:{duration:400,easing:"swing"},onClickBefore:a.noop,onClickAfter:a.noop,onToggleBefore:a.noop,onToggleAfter:a.noop}}(jQuery); -------------------------------------------------------------------------------- /docs/_site/assets/js/toc.js: -------------------------------------------------------------------------------- 1 | // https://github.com/ghiculescu/jekyll-table-of-contents 2 | // this library modified by fastai to: 3 | // - update the location.href with the correct anchor when a toc item is clicked on 4 | (function($){ 5 | $.fn.toc = function(options) { 6 | var defaults = { 7 | noBackToTopLinks: false, 8 | title: '', 9 | minimumHeaders: 3, 10 | headers: 'h1, h2, h3, h4', 11 | listType: 'ol', // values: [ol|ul] 12 | showEffect: 'show', // values: [show|slideDown|fadeIn|none] 13 | showSpeed: 'slow' // set to 0 to deactivate effect 14 | }, 15 | settings = $.extend(defaults, options); 16 | 17 | var headers = $(settings.headers).filter(function() { 18 | // get all headers with an ID 19 | var previousSiblingName = $(this).prev().attr( "name" ); 20 | if (!this.id && previousSiblingName) { 21 | this.id = $(this).attr( "id", previousSiblingName.replace(/\./g, "-") ); 22 | } 23 | return this.id; 24 | }), output = $(this); 25 | if (!headers.length || headers.length < settings.minimumHeaders || !output.length) { 26 | return; 27 | } 28 | 29 | if (0 === settings.showSpeed) { 30 | settings.showEffect = 'none'; 31 | } 32 | 33 | var render = { 34 | show: function() { output.hide().html(html).show(settings.showSpeed); }, 35 | slideDown: function() { output.hide().html(html).slideDown(settings.showSpeed); }, 36 | fadeIn: function() { output.hide().html(html).fadeIn(settings.showSpeed); }, 37 | none: function() { output.html(html); } 38 | }; 39 | 40 | var get_level = function(ele) { return parseInt(ele.nodeName.replace("H", ""), 10); } 41 | var highest_level = headers.map(function(_, ele) { return get_level(ele); }).get().sort()[0]; 42 | //var return_to_top = ''; 43 | // other nice icons that can be used instead: glyphicon-upload glyphicon-hand-up glyphicon-chevron-up glyphicon-menu-up glyphicon-triangle-top 44 | var level = get_level(headers[0]), 45 | this_level, 46 | html = settings.title + " <"+settings.listType+">"; 47 | headers.on('click', function() { 48 | if (!settings.noBackToTopLinks) { 49 | var pos = $(window).scrollTop(); 50 | window.location.hash = this.id; 51 | $(window).scrollTop(pos); 52 | } 53 | }) 54 | .addClass('clickable-header') 55 | .each(function(_, header) { 56 | base_url = window.location.href; 57 | base_url = base_url.replace(/#.*$/, ""); 58 | this_level = get_level(header); 59 | //if (!settings.noBackToTopLinks && this_level > 1) { 60 | // $(header).addClass('top-level-header').before(return_to_top); 61 | //} 62 | txt = header.textContent.split('¶')[0].split(/\[(test|source)\]/)[0]; 63 | if (!txt) {return;} 64 | if (this_level === level) // same level as before; same indenting 65 | html += "
  • " + txt + ""; 66 | else if (this_level <= level){ // higher level than before; end parent ol 67 | for(i = this_level; i < level; i++) { 68 | html += "
  • " 69 | } 70 | html += "
  • " + txt + ""; 71 | } 72 | else if (this_level > level) { // lower level than before; expand the previous to contain a ol 73 | for(i = this_level; i > level; i--) { 74 | html += "<"+settings.listType+">"+((i-level == 2) ? "
  • " : "
  • ") 75 | } 76 | html += "" + txt + ""; 77 | } 78 | level = this_level; // update for the next one 79 | }); 80 | html += ""; 81 | if (!settings.noBackToTopLinks) { 82 | $(document).on('click', '.back-to-top', function() { 83 | $(window).scrollTop(0); 84 | window.location.hash = ''; 85 | }); 86 | } 87 | 88 | render[settings.showEffect](); 89 | }; 90 | })(jQuery); 91 | -------------------------------------------------------------------------------- /docs/_site/db.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Database | understatdb 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 115 | 120 | 131 | 132 | 133 | 134 | 135 | 136 | 174 | 175 | 176 |
    177 |
    178 | 179 |
    180 | 181 | 182 | 183 |
    184 | 185 | 186 | 187 | 235 | 236 | 237 | 238 | 239 |
    240 | 241 | 242 | 243 | 244 |
    245 |
    246 | 247 |

    Database

    248 | 249 | 250 |
    251 | 252 | 253 | 254 |
    255 | 256 | 257 |
    Database schema and so on
    258 | 259 | 260 | 261 | 262 | 263 | 280 | 281 |
    282 | 283 | 284 | 285 | 286 | 287 | 288 | 297 | 298 |
    299 | 300 | 301 | 302 |
    303 | 304 |
    305 | 306 | 307 | 308 | 309 |
    310 | 311 |
    312 | 313 | 314 | 315 | 316 |
    317 | 318 |
    319 |
    320 | 321 |
    322 | 323 | 324 |
    325 |

    evolve_ignore[source]

    evolve_ignore(registry=['base_base_model'])

    326 |
    327 | 328 |
    329 | 330 |
    331 | 332 |
    333 |
    334 | 335 |
    336 | 337 | 338 | 339 | 340 |
    341 | 342 |
    343 |
    344 | 345 |
    346 | 347 | 348 |
    349 |

    prefixed_snake_case[source]

    prefixed_snake_case(prefix)

    350 |
    351 | 352 |
    353 | 354 |
    355 | 356 |
    357 |
    358 | 359 |
    360 | 361 | 362 | 363 | 364 |
    365 | 366 |
    367 |
    368 | 369 |
    370 | 371 | 372 |
    373 |

    class BaseModel[source]

    BaseModel(*args, **kwargs) :: Model

    374 |
    375 |

    A model for base (json) data from Understat

    376 | 377 |
    378 | 379 |
    380 | 381 |
    382 |
    383 | 384 |
    385 | 386 | 387 | 388 | 389 |
    390 | 391 |
    392 | 393 | 394 | 395 | 396 |
    397 | 398 |
    399 |
    400 | 401 |
    402 | 403 | 404 |
    405 |

    class League[source]

    League(*args, **kwargs) :: BaseModel

    406 |
    407 |

    A model for base (json) data from Understat

    408 | 409 |
    410 | 411 |
    412 | 413 |
    414 |
    415 | 416 |
    417 | 418 | 419 | 420 | 421 |
    422 | 423 |
    424 | 425 | 426 | 427 | 428 |
    429 | 430 |
    431 |
    432 | 433 |
    434 | 435 | 436 |
    437 |

    class Season[source]

    Season(*args, **kwargs) :: BaseModel

    438 |
    439 |

    A model for base (json) data from Understat

    440 | 441 |
    442 | 443 |
    444 | 445 |
    446 |
    447 | 448 |
    449 | 450 | 451 | 452 | 453 |
    454 | 455 |
    456 | 457 | 458 | 459 | 460 |
    461 | 462 |
    463 |
    464 | 465 |
    466 | 467 | 468 |
    469 |

    class Matches[source]

    Matches(*args, **kwargs) :: BaseModel

    470 |
    471 |

    A model for base (json) data from Understat

    472 | 473 |
    474 | 475 |
    476 | 477 |
    478 |
    479 | 480 |
    481 | 482 | 483 | 484 | 485 |
    486 | 487 |
    488 | 489 | 490 | 491 | 492 |
    493 | 494 |
    495 |
    496 | 497 |
    498 | 499 | 500 |
    501 |

    class Shots[source]

    Shots(*args, **kwargs) :: BaseModel

    502 |
    503 |

    A model for base (json) data from Understat

    504 | 505 |
    506 | 507 |
    508 | 509 |
    510 |
    511 | 512 |
    513 | 514 | 515 | 516 | 517 |
    518 | 519 |
    520 | 521 | 522 |
    523 | 524 | 525 | 526 | 527 |
    528 | 529 |
    530 | 531 |
    532 | 533 | 534 | 535 |
    536 |
    537 | 542 |
    543 |
    544 | 545 | 546 |
    547 | 548 |
    549 | 550 |
    551 | 552 |
    553 | 554 | 555 | 556 | 557 | -------------------------------------------------------------------------------- /docs/_site/feed.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | understatdb 5 | An extendable project for creating a database of soccer data 6 | http://0.0.0.0:4000/ 7 | 8 | Tue, 11 May 2021 20:09:42 +0000 9 | Tue, 11 May 2021 20:09:42 +0000 10 | Jekyll v3.9.0 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/_site/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Understat DB | understatdb 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 115 | 120 | 131 | 132 | 133 | 134 | 135 | 136 | 174 | 175 | 176 |
    177 |
    178 | 179 |
    180 | 181 | 182 | 183 |
    184 | 185 | 186 | 187 | 235 | 236 | 237 | 238 | 239 |
    240 | 241 | 242 | 243 | 244 |
    245 |
    246 | 247 |

    Understat DB

    248 | 249 | 250 |
    251 | 252 | 253 | 254 |
    255 | 256 | 257 |
    Create a database using data from Understat.
    258 | 259 | 260 | 261 | 262 | 263 | 280 | 281 |
    282 | 283 | 284 | 285 | 286 | 287 | 288 | 297 | 298 |
    299 | 300 | 301 | 302 |
    303 | 304 |
    305 | 306 | 307 |
    308 |
    309 |

    Understat DB is a project to scrape data from Understat and store it in a Postgres database. It aims to be a useful companion or starting point for projects using football data.

    310 | 311 |
    312 |
    313 |
    314 |
    315 |
    316 |

    Usage

    The simplest way to get started is to populate a local database with docker-compose.

    317 |

    First, clone the repository:

    318 |
    git clone https://github.com/Torvaney/understat-db.git
    319 | cd understat-db
    320 | 
    321 |

    Then, setup the local environment

    322 |
    make env                 # Creates a virtualenv and installs the project & dependencies
    323 | cp .env.sample .env      # Copy default environment vars to .env
    324 | 
    325 |

    Run the database

    326 |
    docker-compose up -d db  # Starts a postgres database within a docker container
    327 | understat-db migrate     # Creates base database tables
    328 | 
    329 |

    Finally, import the data you want

    330 |
    understat-db ingest --leagues EPL --seasons 2020
    331 | 
    332 |

    Contributing

    Pull requests are encouraged! For major changes, please open an issue first to discuss what you would like to change.

    333 |

    License

    MIT

    334 | 335 |
    336 |
    337 |
    338 |
    339 | 340 | 341 | 342 | 343 |
    344 | 345 |
    346 | 347 |
    348 | 349 | 350 | 351 |
    352 |
    353 | 358 |
    359 |
    360 | 361 | 362 |
    363 | 364 |
    365 | 366 |
    367 | 368 |
    369 | 370 | 371 | 372 | 373 | -------------------------------------------------------------------------------- /docs/_site/sidebar.json: -------------------------------------------------------------------------------- 1 | { 2 | "understatdb": { 3 | "Overview": "/", 4 | "Understat-DB CLI": "cli.html", 5 | "Database": "db.html", 6 | "Understat": "understat.html" 7 | } 8 | } -------------------------------------------------------------------------------- /docs/_site/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | http://0.0.0.0:4000/app.html 10 | 11 | 12 | 13 | 14 | 15 | http://0.0.0.0:4000/cli.html 16 | 17 | 18 | 19 | 20 | 21 | http://0.0.0.0:4000/db.html 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | http://0.0.0.0:4000/ 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | http://0.0.0.0:4000/understat.html 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /docs/app.html: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | title: Understat-DB CLI 4 | 5 | 6 | keywords: fastai 7 | sidebar: home_sidebar 8 | 9 | summary: "CLI for creating a soccer database with data from understat.com" 10 | description: "CLI for creating a soccer database with data from understat.com" 11 | nb_path: "nbs/app.ipynb" 12 | --- 13 | 22 | 23 |
    24 | 25 | {% raw %} 26 | 27 |
    28 | 29 |
    30 | {% endraw %} 31 | 32 |
    33 |
    34 |

    Initialise the app with Typer

    35 | 36 |
    37 |
    38 |
    39 | {% raw %} 40 | 41 |
    42 | 43 |
    44 | {% endraw %} 45 | 46 |
    47 |
    48 |

    Hello, world

    An example method

    49 | 50 |
    51 |
    52 |
    53 | {% raw %} 54 | 55 |
    56 | 57 |
    58 |
    59 | 60 |
    61 | 62 | 63 |
    64 |

    hello[source]

    hello(name:str)

    65 |
    66 | 67 |
    68 | 69 |
    70 | 71 |
    72 |
    73 | 74 |
    75 | {% endraw %} 76 | 77 | {% raw %} 78 | 79 |
    80 | 81 |
    82 | {% endraw %} 83 | 84 |
    85 |
    86 |

    Test the app using Typer's in-build CLI-runner

    87 | 88 |
    89 |
    90 |
    91 | {% raw %} 92 | 93 |
    94 |
    95 | 96 |
    97 |
    98 |
    import typer.testing
     99 | 
    100 | runner = typer.testing.CliRunner()
    101 | result = runner.invoke(app, ['World'])
    102 | assert result.exit_code == 0
    103 | assert 'Hello, World!' in result.output
    104 | 
    105 | 106 |
    107 |
    108 |
    109 | 110 |
    111 | {% endraw %} 112 | 113 |
    114 |
    115 |

    Migrate

    Migrating the database

    116 | 117 |
    118 |
    119 |
    120 | {% raw %} 121 | 122 |
    123 | 124 |
    125 |
    126 | 127 |
    128 | 129 | 130 |
    131 |

    migrate[source]

    migrate(interactive:bool=True)

    132 |
    133 | 134 |
    135 | 136 |
    137 | 138 |
    139 |
    140 | 141 |
    142 | {% endraw %} 143 | 144 | {% raw %} 145 | 146 |
    147 | 148 |
    149 | {% endraw %} 150 | 151 | {% raw %} 152 | 153 |
    154 |
    155 | 156 |
    157 |
    158 |
    dotenv.load_dotenv()
    159 | 
    160 | 161 |
    162 |
    163 |
    164 | 165 |
    166 |
    167 | 168 |
    169 | 170 | 171 | 172 |
    173 |
    True
    174 |
    175 | 176 |
    177 | 178 |
    179 |
    180 | 181 |
    182 | {% endraw %} 183 | 184 |
    185 |
    186 |
    187 |

    Finally, make the app executable from the command line

    188 | 189 |
    190 |
    191 |
    192 | {% raw %} 193 | 194 |
    195 | 196 |
    197 | {% endraw %} 198 | 199 |
    200 | 201 | 202 | -------------------------------------------------------------------------------- /docs/cli.html: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | title: Understat-DB CLI 4 | 5 | 6 | keywords: fastai 7 | sidebar: home_sidebar 8 | 9 | summary: "CLI for creating a soccer database with data from understat.com" 10 | description: "CLI for creating a soccer database with data from understat.com" 11 | nb_path: "nbs/cli.ipynb" 12 | --- 13 | 22 | 23 |
    24 | 25 | {% raw %} 26 | 27 |
    28 | 29 |
    30 | {% endraw %} 31 | 32 |
    33 |
    34 |

    Initialise the app with Typer

    35 | 36 |
    37 |
    38 |
    39 | {% raw %} 40 | 41 |
    42 | 43 |
    44 |
    45 | 46 |
    47 | 48 | 49 |
    50 |

    class EnvTyper[source]

    EnvTyper(name:Optional[str]=<typer.models.DefaultPlaceholder object at 0x128ce52b0>, invoke_without_command:bool=<typer.models.DefaultPlaceholder object at 0x128cf6908>, no_args_is_help:Optional[bool]=<typer.models.DefaultPlaceholder object at 0x128cf6710>, subcommand_metavar:Optional[str]=<typer.models.DefaultPlaceholder object at 0x128cf6940>, chain:bool=<typer.models.DefaultPlaceholder object at 0x128cf6978>, result_callback:Optional[Callable[Ellipsis, Any]]=<typer.models.DefaultPlaceholder object at 0x128cf69b0>, context_settings:Optional[Dict[Any, Any]]=<typer.models.DefaultPlaceholder object at 0x128cf69e8>, callback:Optional[Callable[Ellipsis, Any]]=<typer.models.DefaultPlaceholder object at 0x128cf6a20>, help:Optional[str]=<typer.models.DefaultPlaceholder object at 0x128cf6a58>, epilog:Optional[str]=<typer.models.DefaultPlaceholder object at 0x128cf6a90>, short_help:Optional[str]=<typer.models.DefaultPlaceholder object at 0x128cf6ac8>, options_metavar:str=<typer.models.DefaultPlaceholder object at 0x128cf6b00>, add_help_option:bool=<typer.models.DefaultPlaceholder object at 0x128cf6b38>, hidden:bool=<typer.models.DefaultPlaceholder object at 0x128cf6b70>, deprecated:bool=<typer.models.DefaultPlaceholder object at 0x128cf6ba8>, add_completion:bool=True) :: Typer

    51 |
    52 |

    Just like typer.Typer, except it loads the environment with 53 | dotenv.load_dotenv before executing any command.

    54 | 55 |
    56 | 57 |
    58 | 59 |
    60 |
    61 | 62 |
    63 | {% endraw %} 64 | 65 | {% raw %} 66 | 67 |
    68 | 69 |
    70 | {% endraw %} 71 | 72 |
    73 |
    74 |

    Migrate

    Migrating the database

    75 | 76 |
    77 |
    78 |
    79 | {% raw %} 80 | 81 |
    82 | 83 |
    84 |
    85 | 86 |
    87 | 88 | 89 |
    90 |

    migrate[source]

    migrate(interactive:bool=True)

    91 |
    92 |

    Migrate database to the current schema (as defined in nbs/db.ipynb)

    93 | 94 |
    95 | 96 |
    97 | 98 |
    99 |
    100 | 101 |
    102 | {% endraw %} 103 | 104 | {% raw %} 105 | 106 |
    107 | 108 |
    109 | {% endraw %} 110 | 111 |
    112 |
    113 |

    dbt runner

    (Re-)build tables from base data using dbt (data-build-tool)'s (unsupported) python API

    114 | 115 |
    116 |
    117 |
    118 | {% raw %} 119 | 120 |
    121 | 122 |
    123 |
    124 | 125 |
    126 | 127 | 128 |
    129 |

    build_tables[source]

    build_tables(args:List[str]=<typer.models.OptionInfo object at 0x128cda278>)

    130 |
    131 |

    Build tables from base data using dbt

    132 | 133 |
    134 | 135 |
    136 | 137 |
    138 |
    139 | 140 |
    141 | {% endraw %} 142 | 143 | {% raw %} 144 | 145 |
    146 | 147 |
    148 | {% endraw %} 149 | 150 |
    151 |
    152 |

    Ingest

    Ingest base understat data and build understat tables from base data

    153 | 154 |
    155 |
    156 |
    157 | {% raw %} 158 | 159 |
    160 | 161 |
    162 |
    163 | 164 |
    165 | 166 | 167 |
    168 |

    ingest[source]

    ingest(refresh:bool=False, leagues:List[str]=<typer.models.OptionInfo object at 0x128cda320>, seasons:List[int]=<typer.models.OptionInfo object at 0x128cda588>)

    169 |
    170 |

    Ingest match and shot data from Understat.com

    171 | 172 |
    173 | 174 |
    175 | 176 |
    177 |
    178 | 179 |
    180 | {% endraw %} 181 | 182 | {% raw %} 183 | 184 |
    185 | 186 |
    187 | {% endraw %} 188 | 189 |
    190 |
    191 |
    192 |

    Finally, make the app executable from the command line

    193 | 194 |
    195 |
    196 |
    197 | {% raw %} 198 | 199 |
    200 | 201 |
    202 | {% endraw %} 203 | 204 |
    205 | 206 | 207 | -------------------------------------------------------------------------------- /docs/db.html: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | title: Database 4 | 5 | 6 | keywords: fastai 7 | sidebar: home_sidebar 8 | 9 | summary: "Database schema and so on" 10 | description: "Database schema and so on" 11 | nb_path: "nbs/db.ipynb" 12 | --- 13 | 22 | 23 |
    24 | 25 | {% raw %} 26 | 27 |
    28 | 29 |
    30 | {% endraw %} 31 | 32 | {% raw %} 33 | 34 |
    35 | 36 |
    37 | {% endraw %} 38 | 39 | {% raw %} 40 | 41 |
    42 | 43 |
    44 |
    45 | 46 |
    47 | 48 | 49 |
    50 |

    evolve_ignore[source]

    evolve_ignore(registry=['base_base_model'])

    51 |
    52 | 53 |
    54 | 55 |
    56 | 57 |
    58 |
    59 | 60 |
    61 | {% endraw %} 62 | 63 | {% raw %} 64 | 65 |
    66 | 67 |
    68 |
    69 | 70 |
    71 | 72 | 73 |
    74 |

    prefixed_snake_case[source]

    prefixed_snake_case(prefix)

    75 |
    76 | 77 |
    78 | 79 |
    80 | 81 |
    82 |
    83 | 84 |
    85 | {% endraw %} 86 | 87 | {% raw %} 88 | 89 |
    90 | 91 |
    92 |
    93 | 94 |
    95 | 96 | 97 |
    98 |

    class BaseModel[source]

    BaseModel(*args, **kwargs) :: Model

    99 |
    100 |

    A model for base (json) data from Understat

    101 | 102 |
    103 | 104 |
    105 | 106 |
    107 |
    108 | 109 |
    110 | {% endraw %} 111 | 112 | {% raw %} 113 | 114 |
    115 | 116 |
    117 | {% endraw %} 118 | 119 | {% raw %} 120 | 121 |
    122 | 123 |
    124 |
    125 | 126 |
    127 | 128 | 129 |
    130 |

    class League[source]

    League(*args, **kwargs) :: BaseModel

    131 |
    132 |

    A model for base (json) data from Understat

    133 | 134 |
    135 | 136 |
    137 | 138 |
    139 |
    140 | 141 |
    142 | {% endraw %} 143 | 144 | {% raw %} 145 | 146 |
    147 | 148 |
    149 | {% endraw %} 150 | 151 | {% raw %} 152 | 153 |
    154 | 155 |
    156 |
    157 | 158 |
    159 | 160 | 161 |
    162 |

    class Season[source]

    Season(*args, **kwargs) :: BaseModel

    163 |
    164 |

    A model for base (json) data from Understat

    165 | 166 |
    167 | 168 |
    169 | 170 |
    171 |
    172 | 173 |
    174 | {% endraw %} 175 | 176 | {% raw %} 177 | 178 |
    179 | 180 |
    181 | {% endraw %} 182 | 183 | {% raw %} 184 | 185 |
    186 | 187 |
    188 |
    189 | 190 |
    191 | 192 | 193 |
    194 |

    class Matches[source]

    Matches(*args, **kwargs) :: BaseModel

    195 |
    196 |

    A model for base (json) data from Understat

    197 | 198 |
    199 | 200 |
    201 | 202 |
    203 |
    204 | 205 |
    206 | {% endraw %} 207 | 208 | {% raw %} 209 | 210 |
    211 | 212 |
    213 | {% endraw %} 214 | 215 | {% raw %} 216 | 217 |
    218 | 219 |
    220 |
    221 | 222 |
    223 | 224 | 225 |
    226 |

    class Shots[source]

    Shots(*args, **kwargs) :: BaseModel

    227 |
    228 |

    A model for base (json) data from Understat

    229 | 230 |
    231 | 232 |
    233 | 234 |
    235 |
    236 | 237 |
    238 | {% endraw %} 239 | 240 | {% raw %} 241 | 242 |
    243 | 244 |
    245 | {% endraw %} 246 | 247 |
    248 | 249 | 250 | -------------------------------------------------------------------------------- /docs/feed.xml: -------------------------------------------------------------------------------- 1 | --- 2 | search: exclude 3 | layout: none 4 | --- 5 | 6 | 7 | 8 | 9 | {{ site.title | xml_escape }} 10 | {{ site.description | xml_escape }} 11 | {{ site.url }}/ 12 | 13 | {{ site.time | date_to_rfc822 }} 14 | {{ site.time | date_to_rfc822 }} 15 | Jekyll v{{ jekyll.version }} 16 | {% for post in site.posts limit:10 %} 17 | 18 | {{ post.title | xml_escape }} 19 | {{ post.content | xml_escape }} 20 | {{ post.date | date_to_rfc822 }} 21 | {{ post.url | prepend: site.url }} 22 | {{ post.url | prepend: site.url }} 23 | {% for tag in post.tags %} 24 | {{ tag | xml_escape }} 25 | {% endfor %} 26 | {% for tag in page.tags %} 27 | {{ cat | xml_escape }} 28 | {% endfor %} 29 | 30 | {% endfor %} 31 | 32 | 33 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | title: Understat DB 4 | 5 | 6 | keywords: fastai 7 | sidebar: home_sidebar 8 | 9 | summary: "Create a database using data from Understat." 10 | description: "Create a database using data from Understat." 11 | nb_path: "nbs/index.ipynb" 12 | --- 13 | 22 | 23 |
    24 | 25 | {% raw %} 26 | 27 |
    28 | 29 |
    30 | {% endraw %} 31 | 32 |
    33 |
    34 |

    Understat DB is a project to scrape data from Understat and store it in a Postgres database. It aims to be a useful companion or starting point for projects using football data.

    35 | 36 |
    37 |
    38 |
    39 |
    40 |
    41 |

    Usage

    The simplest way to get started is to populate a local database with docker-compose.

    42 |

    First, clone the repository:

    43 |
    git clone https://github.com/Torvaney/understat-db.git
    44 | cd understat-db
    45 | 
    46 |

    Then, setup the local environment

    47 |
    make env                  # Create a virtualenv and installs the project & dependencies
    48 | source venv/bin/activate  # Activate the virtualenv
    49 | cp .env.sample .env       # Copy default environment vars to .env
    50 | 
    51 |

    Run the database

    52 |
    docker-compose up -d db   # Start a postgres database within a docker container
    53 | understat-db migrate      # Create base database tables
    54 | 
    55 |

    Finally, import the data you want

    56 |
    understat-db ingest --leagues EPL --seasons 2020
    57 | 
    58 |

    Requirements

    To run this project you will need:

    59 |
      60 |
    • Python 3.6+
    • 61 |
    • Docker
    • 62 |
    63 |

    Contributing

    Pull requests are encouraged! For major changes, please open an issue first to discuss what you would like to change.

    64 |

    License

    MIT

    65 | 66 |
    67 |
    68 |
    69 |
    70 | 71 | 72 | -------------------------------------------------------------------------------- /docs/sidebar.json: -------------------------------------------------------------------------------- 1 | { 2 | "understatdb": { 3 | "Overview": "/", 4 | "Understat-DB CLI": "cli.html", 5 | "Database": "db.html", 6 | "Understat": "understat.html" 7 | } 8 | } -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | --- 2 | layout: none 3 | search: exclude 4 | --- 5 | 6 | 7 | 8 | {% for post in site.posts %} 9 | {% unless post.search == "exclude" %} 10 | 11 | {{site.url}}{{post.url}} 12 | 13 | {% endunless %} 14 | {% endfor %} 15 | 16 | 17 | {% for page in site.pages %} 18 | {% unless page.search == "exclude" %} 19 | 20 | {{site.url}}{{ page.url}} 21 | 22 | {% endunless %} 23 | {% endfor %} 24 | -------------------------------------------------------------------------------- /docs/understat.html: -------------------------------------------------------------------------------- 1 | --- 2 | 3 | title: Understat 4 | 5 | 6 | keywords: fastai 7 | sidebar: home_sidebar 8 | 9 | summary: "A module for fetching data from understat.com" 10 | description: "A module for fetching data from understat.com" 11 | nb_path: "nbs/understat.ipynb" 12 | --- 13 | 22 | 23 |
    24 | 25 | {% raw %} 26 | 27 |
    28 | 29 |
    30 | {% endraw %} 31 | 32 |
    33 |
    34 |

    Helper functions

    35 |
    36 |
    37 |
    38 | {% raw %} 39 | 40 |
    41 | 42 |
    43 |
    44 | 45 |
    46 | 47 | 48 |
    49 |

    fetch_html[source]

    fetch_html(url)

    50 |
    51 |

    Fetch HTML and decode into a bs4.BeautifulSoup object

    52 | 53 |
    54 | 55 |
    56 | 57 |
    58 |
    59 | 60 |
    61 | {% endraw %} 62 | 63 | {% raw %} 64 | 65 |
    66 | 67 |
    68 |
    69 | 70 |
    71 | 72 | 73 |
    74 |

    extract_json[source]

    extract_json(soup, json_var)

    75 |
    76 |

    Extract a JSON variable from understat HTML.

    77 | 78 |
    79 | 80 |
    81 | 82 |
    83 |
    84 | 85 |
    86 | {% endraw %} 87 | 88 | {% raw %} 89 | 90 |
    91 | 92 |
    93 | {% endraw %} 94 | 95 |
    96 |
    97 |

    Understat 'API'

    98 |
    99 |
    100 |
    101 | {% raw %} 102 | 103 |
    104 | 105 |
    106 |
    107 | 108 |
    109 | 110 | 111 |
    112 |

    League[source]

    Enum = [EPL, LA_LIGA, SERIE_A, BUNDESLIGA, LIGUE_1, RPL]

    113 |
    114 |

    Understat leagues

    115 | 116 |
    117 | 118 |
    119 | 120 |
    121 |
    122 | 123 |
    124 | {% endraw %} 125 | 126 | {% raw %} 127 | 128 |
    129 | 130 |
    131 | {% endraw %} 132 | 133 | {% raw %} 134 | 135 |
    136 | 137 |
    138 |
    139 | 140 |
    141 | 142 | 143 |
    144 |

    class Understat[source]

    Understat(base_url:str='https://understat.com')

    145 |
    146 |

    Fetches understat data webpages

    147 | 148 |
    149 | 150 |
    151 | 152 |
    153 |
    154 | 155 |
    156 | {% endraw %} 157 | 158 | {% raw %} 159 | 160 |
    161 | 162 |
    163 | {% endraw %} 164 | 165 |
    166 |
    167 |

    Fetch matches from Understat

    168 | 169 |
    170 |
    171 |
    172 | {% raw %} 173 | 174 |
    175 |
    176 | 177 |
    178 |
    179 |
    understat = Understat()
    180 | 
    181 | matches = understat.matches(League.EPL, 2019)[17]
    182 | matches
    183 | 
    184 | 185 |
    186 |
    187 |
    188 | 189 |
    190 |
    191 | 192 |
    193 | 194 | 195 | 196 |
    197 |
    {'id': '11660',
    198 |  'isResult': True,
    199 |  'h': {'id': '238', 'title': 'Sheffield United', 'short_title': 'SHE'},
    200 |  'a': {'id': '78', 'title': 'Crystal Palace', 'short_title': 'CRY'},
    201 |  'goals': {'h': '1', 'a': '0'},
    202 |  'xG': {'h': '1.84778', 'a': '0.241912'},
    203 |  'datetime': '2019-08-18 14:00:00',
    204 |  'forecast': {'w': '0.8326', 'd': '0.1408', 'l': '0.0266'}}
    205 |
    206 | 207 |
    208 | 209 |
    210 |
    211 | 212 |
    213 | {% endraw %} 214 | 215 |
    216 |
    217 |

    Fetch individual match shots

    218 | 219 |
    220 |
    221 |
    222 | {% raw %} 223 | 224 |
    225 |
    226 | 227 |
    228 |
    229 |
    shots = understat.shots(11660)
    230 | 
    231 | # Take the home team's 5th shot
    232 | shots['h'][5]
    233 | 
    234 | 235 |
    236 |
    237 |
    238 | 239 |
    240 |
    241 | 242 |
    243 | 244 | 245 | 246 |
    247 |
    {'id': '311085',
    248 |  'minute': '25',
    249 |  'result': 'BlockedShot',
    250 |  'X': '0.899000015258789',
    251 |  'Y': '0.5609999847412109',
    252 |  'xG': '0.07507339864969254',
    253 |  'player': 'Jack O&#039;Connell',
    254 |  'h_a': 'h',
    255 |  'player_id': '7705',
    256 |  'situation': 'FromCorner',
    257 |  'season': '2019',
    258 |  'shotType': 'LeftFoot',
    259 |  'match_id': '11660',
    260 |  'h_team': 'Sheffield United',
    261 |  'a_team': 'Crystal Palace',
    262 |  'h_goals': '1',
    263 |  'a_goals': '0',
    264 |  'date': '2019-08-18 14:00:00',
    265 |  'player_assisted': 'Oliver Norwood',
    266 |  'lastAction': 'Pass'}
    267 |
    268 | 269 |
    270 | 271 |
    272 |
    273 | 274 |
    275 | {% endraw %} 276 | 277 |
    278 | 279 | 280 | -------------------------------------------------------------------------------- /nbs/.gitattributes: -------------------------------------------------------------------------------- 1 | **/*.ipynb filter=clean-nbs 2 | **/*.ipynb diff=ipynb 3 | *.ipynb linguist-language=Python 4 | 5 | -------------------------------------------------------------------------------- /nbs/cli.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "# default_exp cli" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "# Understat-DB CLI\n", 17 | "\n", 18 | "> CLI for creating a soccer database with data from understat.com" 19 | ] 20 | }, 21 | { 22 | "cell_type": "code", 23 | "execution_count": null, 24 | "metadata": {}, 25 | "outputs": [], 26 | "source": [ 27 | "#hide\n", 28 | "from nbdev.showdoc import *" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": {}, 34 | "source": [ 35 | "Initialise the app with Typer" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "metadata": {}, 42 | "outputs": [], 43 | "source": [ 44 | "#exporti\n", 45 | "import os\n", 46 | "import functools\n", 47 | "import itertools\n", 48 | "import pathlib\n", 49 | "import typing\n", 50 | "import time\n", 51 | "\n", 52 | "import dbt.main\n", 53 | "import dotenv\n", 54 | "import playhouse.postgres_ext\n", 55 | "import pyprojroot\n", 56 | "import typer\n", 57 | "\n", 58 | "import understatdb" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "#export\n", 68 | "\n", 69 | "\n", 70 | "class EnvTyper(typer.Typer):\n", 71 | " \"\"\"\n", 72 | " Just like typer.Typer, except it loads the environment with\n", 73 | " `dotenv.load_dotenv` before executing any command.\n", 74 | " \"\"\"\n", 75 | " def __call__(self, *args, **kwargs):\n", 76 | " dotenv.load_dotenv()\n", 77 | " return super().__call__(*args, **kwargs)\n", 78 | "\n", 79 | "\n", 80 | "\n", 81 | "app = EnvTyper()" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "#exporti\n", 91 | "\n", 92 | "\n", 93 | "def initialize_db():\n", 94 | " \"\"\" \n", 95 | " Load database config from environment and initialise\n", 96 | " `understatdb.db.DB` with a database connection. \n", 97 | " \"\"\"\n", 98 | " \n", 99 | " # Load database config from environment\n", 100 | " postgres_db = playhouse.postgres_ext.PostgresqlExtDatabase(\n", 101 | " host=os.environ['DB_HOST'],\n", 102 | " user=os.environ['DB_USER'],\n", 103 | " password=os.environ['DB_PASS'],\n", 104 | " database=os.environ['DB_NAME'],\n", 105 | " port=os.environ['DB_PORT'],\n", 106 | " )\n", 107 | "\n", 108 | " # Configure proxy database to use configured postgres\n", 109 | " typer.secho('Initialising database connection...', fg=typer.colors.BRIGHT_BLACK)\n", 110 | " understatdb.db.DB.initialize(postgres_db)" 111 | ] 112 | }, 113 | { 114 | "cell_type": "markdown", 115 | "metadata": {}, 116 | "source": [ 117 | "## Migrate\n", 118 | "\n", 119 | "Migrating the database" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "#export\n", 129 | "\n", 130 | "\n", 131 | "@app.command()\n", 132 | "def migrate(interactive: bool = True):\n", 133 | " \"\"\" Migrate database to the current schema (as defined in nbs/db.ipynb) \"\"\"\n", 134 | " \n", 135 | " initialize_db()\n", 136 | " \n", 137 | " # Get names of tables generated by dbt and exclude them from the migration\n", 138 | " dbt_models_path = pyprojroot.here()/'dbt'/'models'\n", 139 | " dbt_tables = [f.stem for f in dbt_models_path.glob('**/*.sql')]\n", 140 | " \n", 141 | " # Migrate database tables\n", 142 | " typer.secho('Migrating database tables...', fg=typer.colors.BRIGHT_BLACK)\n", 143 | " understatdb.db.DB.evolve(\n", 144 | " ignore_tables=understatdb.db.EVOLVE_IGNORE_TABLES + dbt_tables,\n", 145 | " interactive=interactive\n", 146 | " )\n", 147 | " typer.secho('Done!', fg=typer.colors.GREEN, bold=True)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "## dbt runner\n", 155 | "\n", 156 | "(Re-)build tables from base data using dbt (data-build-tool)'s (unsupported) python API" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": {}, 163 | "outputs": [], 164 | "source": [ 165 | "#export\n", 166 | "\n", 167 | "\n", 168 | "@app.command()\n", 169 | "def build_tables(args: typing.List[str] = typer.Option([], help='Additional arguments passed to `dbt run`')):\n", 170 | " \"\"\" Build tables from base data using dbt \"\"\"\n", 171 | " \n", 172 | " project_dir = pyprojroot.here()/'dbt'\n", 173 | " profiles_dir = pyprojroot.here()/'.dbt'\n", 174 | " \n", 175 | " base_args = [\n", 176 | " 'run', \n", 177 | " '--profiles-dir', \n", 178 | " str(profiles_dir), \n", 179 | " '--project-dir', \n", 180 | " str(project_dir)\n", 181 | " ]\n", 182 | " \n", 183 | " # NOTE: Python API is not officially supported, so\n", 184 | " # watch out if you change dbt versions...\n", 185 | " typer.secho('Building tables with dbt', fg=typer.colors.BLUE)\n", 186 | " _ = dbt.main.handle_and_check(base_args + list(args))" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "metadata": {}, 192 | "source": [ 193 | "## Ingest\n", 194 | "\n", 195 | "Ingest base understat data and build understat tables from base data" 196 | ] 197 | }, 198 | { 199 | "cell_type": "code", 200 | "execution_count": null, 201 | "metadata": {}, 202 | "outputs": [], 203 | "source": [ 204 | "#export\n", 205 | "\n", 206 | "\n", 207 | "# Use the list of league *values* (i.e. strings) so that the help text shows\n", 208 | "# all the possible inputs the user can use\n", 209 | "_DEFAULT_INGEST_LEAGUES = [l.value for l in understatdb.understat.League]\n", 210 | "_DEFAULT_INGEST_SEASONS = list(range(2014, 2021))\n", 211 | "\n", 212 | "\n", 213 | "@app.command()\n", 214 | "def ingest(\n", 215 | " refresh: bool = False,\n", 216 | " leagues: typing.List[str] = typer.Option(\n", 217 | " _DEFAULT_INGEST_LEAGUES, \n", 218 | " help='Leagues to import', \n", 219 | " callback=lambda xs: [understatdb.understat.League(x) for x in xs]\n", 220 | " ), \n", 221 | " seasons: typing.List[int] = typer.Option(\n", 222 | " _DEFAULT_INGEST_SEASONS, \n", 223 | " help='Seasons to import (by start year)'\n", 224 | " ),\n", 225 | "):\n", 226 | " \"\"\" Ingest match and shot data from Understat.com \"\"\"\n", 227 | " \n", 228 | " initialize_db()\n", 229 | " client = understatdb.understat.Understat()\n", 230 | " \n", 231 | " for league, season in itertools.product(\n", 232 | " [understatdb.understat.League(l) for l in leagues], \n", 233 | " seasons\n", 234 | " ):\n", 235 | " # Add league & season to DB\n", 236 | " with understatdb.db.DB.atomic():\n", 237 | " db_league, _ = understatdb.db.League.get_or_create(name=league.value)\n", 238 | " db_season, _ = understatdb.db.Season.get_or_create(name=season)\n", 239 | " \n", 240 | " # Check if a record for this league and season already exists. If so, skip it.\n", 241 | " existing_record = understatdb.db.Matches.get_or_none(\n", 242 | " league_id=db_league.id, \n", 243 | " season_id=db_season.id\n", 244 | " )\n", 245 | " if not refresh and existing_record:\n", 246 | " typer.secho(\n", 247 | " f'Data for {league.value}, {season} already exists. Skipping. '\n", 248 | " 'To update data for this league and season, use the `--refresh` flag', \n", 249 | " fg=typer.colors.BRIGHT_BLACK\n", 250 | " )\n", 251 | " continue\n", 252 | " \n", 253 | " # Add match and shot data to DB\n", 254 | " typer.secho(f'Ingesting data for {league.value}, {season}', fg=typer.colors.BLUE)\n", 255 | " with understatdb.db.DB.atomic():\n", 256 | " \n", 257 | " # Fetch match data from understat\n", 258 | " matches = client.matches(league, season)\n", 259 | " \n", 260 | " # Delete any old match data\n", 261 | " if refresh:\n", 262 | " understatdb.db.Matches.delete().where(\n", 263 | " (understatdb.db.Matches.league_id==db_league.id) &\n", 264 | " (understatdb.db.Matches.season_id==db_season.id)\n", 265 | " ).execute()\n", 266 | " \n", 267 | " db_matches = understatdb.db.Matches.create(\n", 268 | " league_id=db_league.id,\n", 269 | " season_id=db_season.id,\n", 270 | " json=matches,\n", 271 | " version=understatdb.__version__\n", 272 | " )\n", 273 | " \n", 274 | " with typer.progressbar(matches, label=\"Shots\") as progress:\n", 275 | " for match in progress:\n", 276 | " if not match['isResult']:\n", 277 | " continue\n", 278 | " \n", 279 | " # Add an artificial crawl delay to avoid bombarding \n", 280 | " # understat with requests\n", 281 | " # There's no robots.txt or ToS available on the site,\n", 282 | " # So we just use a relatively conservative delay of\n", 283 | " # 5 seconds per (shots) request\n", 284 | " time.sleep(5)\n", 285 | " \n", 286 | " match_id = int(match['id'])\n", 287 | " shots = client.shots(match_id)\n", 288 | " \n", 289 | " # Delete any old shots data\n", 290 | " if refresh:\n", 291 | " understatdb.db.Shots.delete().where(\n", 292 | " understatdb.db.Shots.match_id==match_id\n", 293 | " ).execute()\n", 294 | " \n", 295 | " db_shots = understatdb.db.Shots.create(\n", 296 | " match_id=match_id,\n", 297 | " json=shots,\n", 298 | " version=understatdb.__version__\n", 299 | " )\n", 300 | " \n", 301 | " # Rebuild tables in dbt\n", 302 | " build_tables(args=[])" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "---\n", 310 | "\n", 311 | "Finally, make the app executable from the command line" 312 | ] 313 | }, 314 | { 315 | "cell_type": "code", 316 | "execution_count": null, 317 | "metadata": {}, 318 | "outputs": [], 319 | "source": [ 320 | "#export\n", 321 | "\n", 322 | "# Try/except block seems to be the 'canonical'\n", 323 | "# way to export __name__ == __main__ in nbdev.\n", 324 | "# By excepting an ImportError, we don't have to\n", 325 | "# include nbdev as a runtime dependency (only a\n", 326 | "# development dependency).\n", 327 | "# \n", 328 | "# See: \n", 329 | "# * https://pete88b.github.io/fastpages/nbdev/fastai/jupyter/2020/07/24/nbdev-deep-dive.html#Export-a-if-__name__-==-\n", 330 | "# * https://forums.fast.ai/t/nbdev-is-there-a-way-to-export-a-if-name-main-clause/73050/3\n", 331 | "try:\n", 332 | " from nbdev.imports import IN_NOTEBOOK\n", 333 | "except ImportError: \n", 334 | " IN_NOTEBOOK = False\n", 335 | "\n", 336 | "if __name__ == '__main__' and not IN_NOTEBOOK:\n", 337 | " app()" 338 | ] 339 | }, 340 | { 341 | "cell_type": "code", 342 | "execution_count": null, 343 | "metadata": {}, 344 | "outputs": [], 345 | "source": [] 346 | } 347 | ], 348 | "metadata": { 349 | "kernelspec": { 350 | "display_name": "Python 3", 351 | "language": "python", 352 | "name": "python3" 353 | } 354 | }, 355 | "nbformat": 4, 356 | "nbformat_minor": 2 357 | } 358 | -------------------------------------------------------------------------------- /nbs/db.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "aed8359a", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# default_exp db" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "c5a1fc64", 16 | "metadata": {}, 17 | "source": [ 18 | "# Database\n", 19 | "\n", 20 | "> Database schema and so on" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "id": "71781ec1", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "#hide\n", 31 | "from nbdev.showdoc import *" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "734138bc", 37 | "metadata": {}, 38 | "source": [] 39 | }, 40 | { 41 | "cell_type": "code", 42 | "execution_count": null, 43 | "id": "11c00bde", 44 | "metadata": {}, 45 | "outputs": [], 46 | "source": [ 47 | "#export\n", 48 | "import functools\n", 49 | "\n", 50 | "import peewee\n", 51 | "import peeweedbevolve\n", 52 | "import playhouse.postgres_ext\n", 53 | "\n", 54 | "\n", 55 | "DB = peewee.DatabaseProxy()" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "execution_count": null, 61 | "id": "91deeead", 62 | "metadata": {}, 63 | "outputs": [], 64 | "source": [ 65 | "#export\n", 66 | "\n", 67 | "EVOLVE_IGNORE_TABLES = []\n", 68 | "\n", 69 | "\n", 70 | "def evolve_ignore(cls, registry=EVOLVE_IGNORE_TABLES):\n", 71 | " registry.append(cls._meta.table_name)\n", 72 | " return cls\n", 73 | "\n", 74 | "\n", 75 | "def prefixed_snake_case(prefix, cls):\n", 76 | " return prefix + peewee.make_snake_case(cls.__name__)\n", 77 | "\n", 78 | "\n", 79 | "@evolve_ignore\n", 80 | "class BaseModel(peewee.Model):\n", 81 | " \"\"\"\n", 82 | " A model for base (json) data from Understat\n", 83 | " \"\"\"\n", 84 | " class Meta:\n", 85 | " database = DB\n", 86 | " legacy_table_names = False\n", 87 | " table_function = functools.partial(prefixed_snake_case, 'base_')" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "dd83c059", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "#export\n", 98 | "\n", 99 | "\n", 100 | "class League(BaseModel):\n", 101 | " id = peewee.PrimaryKeyField()\n", 102 | " name = peewee.TextField()" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "id": "dc294dd3", 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "#export\n", 113 | "\n", 114 | "\n", 115 | "class Season(BaseModel):\n", 116 | " id = peewee.PrimaryKeyField()\n", 117 | " name = peewee.TextField()" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": null, 123 | "id": "26e2dee4", 124 | "metadata": {}, 125 | "outputs": [], 126 | "source": [ 127 | "#export\n", 128 | "\n", 129 | "\n", 130 | "class Matches(BaseModel):\n", 131 | " id = peewee.PrimaryKeyField()\n", 132 | " \n", 133 | " # Since we're ingesting the data before reshaping\n", 134 | " # and testing with dbt (ELT not ETL), we might prefer \n", 135 | " # flexibility to the correctness that a FK provides.\n", 136 | " # But since understat don't actually provide any\n", 137 | " # IDs (afaik, it's all indexed by league *name*),\n", 138 | " # we're going to be making our own IDs at some point\n", 139 | " # anyway. So I prefer to do that with the guarantees\n", 140 | " # of a FK.\n", 141 | " league_id = peewee.ForeignKeyField(League)\n", 142 | " \n", 143 | " # Similar logic to above (`league_id`),\n", 144 | " # we're creating our own season IDs.\n", 145 | " # Understat indexes by season start year,\n", 146 | " # so we could conceivably just use an \n", 147 | " # integer field and refactor if that assumption\n", 148 | " # is ever violated.\n", 149 | " season_id = peewee.ForeignKeyField(Season)\n", 150 | " \n", 151 | " # Dump the scraped JSON as JSON\n", 152 | " # We'll clean it up in dbt\n", 153 | " json = playhouse.postgres_ext.JSONField()\n", 154 | " \n", 155 | " # Store the app version, so that we can parse\n", 156 | " # the JSON differently should the format change in\n", 157 | " # the future.\n", 158 | " version = peewee.TextField()\n", 159 | "\n", 160 | " class Meta:\n", 161 | " indexes = (\n", 162 | " # Force uniqueness on the combination of \n", 163 | " # league and season ID.\n", 164 | " # We should only ever have one row per\n", 165 | " # league, per season!\n", 166 | " (('league_id', 'season_id'), True),\n", 167 | " )" 168 | ] 169 | }, 170 | { 171 | "cell_type": "code", 172 | "execution_count": null, 173 | "id": "aaba8d21", 174 | "metadata": {}, 175 | "outputs": [], 176 | "source": [ 177 | "#export\n", 178 | "\n", 179 | "\n", 180 | "class Shots(BaseModel):\n", 181 | " id = peewee.PrimaryKeyField()\n", 182 | " \n", 183 | " # This time, we're using Understat's IDs,\n", 184 | " # so we aren't interested in using a FK for\n", 185 | " # match ID.\n", 186 | " match_id = peewee.IntegerField(unique=True)\n", 187 | " \n", 188 | " # Dump the scraped JSON as JSON again\n", 189 | " json = playhouse.postgres_ext.JSONField()\n", 190 | " \n", 191 | " version = peewee.TextField()" 192 | ] 193 | } 194 | ], 195 | "metadata": { 196 | "kernelspec": { 197 | "display_name": "Python 3", 198 | "language": "python", 199 | "name": "python3" 200 | } 201 | }, 202 | "nbformat": 4, 203 | "nbformat_minor": 5 204 | } 205 | -------------------------------------------------------------------------------- /nbs/index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Understat DB\n", 8 | "\n", 9 | "> Create a database using data from [Understat](understat.com)." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "Understat DB is a project to scrape data from [Understat](understat.com) and store it in a Postgres database. It aims to be a useful companion or starting point for projects using football data." 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "## Usage\n", 24 | "\n", 25 | "The simplest way to get started is to populate a local database with `docker-compose`.\n", 26 | "\n", 27 | "First, clone the repository:\n", 28 | "\n", 29 | "```bash\n", 30 | "git clone https://github.com/Torvaney/understat-db.git\n", 31 | "cd understat-db\n", 32 | "```\n", 33 | "\n", 34 | "Then, setup the local environment\n", 35 | "\n", 36 | "```bash\n", 37 | "make env # Create a virtualenv and installs the project & dependencies\n", 38 | "source venv/bin/activate # Activate the virtualenv\n", 39 | "cp .env.sample .env # Copy default environment vars to .env\n", 40 | "```\n", 41 | "\n", 42 | "Run the database\n", 43 | "\n", 44 | "```bash\n", 45 | "docker-compose up -d db # Start a postgres database within a docker container\n", 46 | "understat-db migrate # Create base database tables\n", 47 | "```\n", 48 | "\n", 49 | "Finally, import the data you want\n", 50 | "\n", 51 | "```bash\n", 52 | "understat-db ingest --leagues EPL --seasons 2020\n", 53 | "```\n", 54 | "\n", 55 | "## Requirements\n", 56 | "\n", 57 | "To run this project you will need:\n", 58 | "\n", 59 | "* Python 3.6+\n", 60 | "* Docker\n", 61 | "\n", 62 | "\n", 63 | "## Contributing\n", 64 | "\n", 65 | "Pull requests are encouraged! For major changes, please open an issue first to discuss what you would like to change.\n", 66 | "\n", 67 | "## License\n", 68 | "\n", 69 | "[MIT](https://choosealicense.com/licenses/mit/)" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [] 76 | } 77 | ], 78 | "metadata": { 79 | "kernelspec": { 80 | "display_name": "Python 3", 81 | "language": "python", 82 | "name": "python3" 83 | } 84 | }, 85 | "nbformat": 4, 86 | "nbformat_minor": 2 87 | } 88 | -------------------------------------------------------------------------------- /nbs/understat.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "97dba7b0", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "# default_exp understat" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "55d3555a", 16 | "metadata": {}, 17 | "source": [ 18 | "# Understat\n", 19 | "\n", 20 | "> A module for fetching data from understat.com" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "id": "412c74ea", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "#hide\n", 31 | "from nbdev.showdoc import *" 32 | ] 33 | }, 34 | { 35 | "cell_type": "markdown", 36 | "id": "44ae6d75", 37 | "metadata": {}, 38 | "source": [ 39 | "## Helper functions" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "ea2022fc", 46 | "metadata": {}, 47 | "outputs": [], 48 | "source": [ 49 | "#export\n", 50 | "import enum\n", 51 | "import re\n", 52 | "import json\n", 53 | "\n", 54 | "import requests\n", 55 | "import bs4\n", 56 | "\n", 57 | "\n", 58 | "def fetch_html(url):\n", 59 | " \"\"\"\n", 60 | " Fetch HTML and decode into a `bs4.BeautifulSoup` object\n", 61 | " \"\"\"\n", 62 | " r = requests.get(url)\n", 63 | " r.raise_for_status()\n", 64 | " return bs4.BeautifulSoup(str(r.content, 'unicode-escape'), features='html.parser')\n", 65 | " \n", 66 | " \n", 67 | "def extract_json(soup, json_var):\n", 68 | " \"\"\" Extract a JSON variable from understat HTML. \"\"\"\n", 69 | " node, *__ = [s for s in soup.select('script') if s.string and json_var in s.string]\n", 70 | " \n", 71 | " # Clean string by removing and newlines (\\n) and tabs (\\t)\n", 72 | " node_string = ' '.join(node.string.split())\n", 73 | " \n", 74 | " json_value = re.match(f\"var {json_var} = JSON\\.parse\\(\\'(?P.*?)\\'\\)\", node_string).group('json')\n", 75 | " return json.loads(json_value)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "markdown", 80 | "id": "7429fd04", 81 | "metadata": {}, 82 | "source": [ 83 | "## Understat 'API'" 84 | ] 85 | }, 86 | { 87 | "cell_type": "code", 88 | "execution_count": null, 89 | "id": "7c0aa45f", 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "#export\n", 94 | "\n", 95 | "\n", 96 | "# 'Competition' might be a better name, but let's stick with understat's terminology\n", 97 | "class League(enum.Enum): \n", 98 | " \"\"\"\n", 99 | " Understat leagues\n", 100 | " \"\"\"\n", 101 | " EPL = 'EPL'\n", 102 | " LA_LIGA = 'La_Liga'\n", 103 | " SERIE_A = 'Serie_A'\n", 104 | " BUNDESLIGA = 'Bundesliga'\n", 105 | " LIGUE_1 = 'Ligue_1'\n", 106 | " RPL = 'RPL'" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "id": "791b1e7f", 113 | "metadata": {}, 114 | "outputs": [], 115 | "source": [ 116 | "#export\n", 117 | "\n", 118 | "class Understat:\n", 119 | " \"\"\"\n", 120 | " Fetches understat data webpages\n", 121 | " \"\"\"\n", 122 | " \n", 123 | " def __init__(self, base_url: str='https://understat.com'):\n", 124 | " self.base_url = base_url\n", 125 | " \n", 126 | " def matches(self, league: League, season: int):\n", 127 | " \"\"\" Fetch match data for a given `league` and `season` (start year). \"\"\"\n", 128 | " league_url = f'{self.base_url}/league/{league.value}/{season}'\n", 129 | " soup = fetch_html(league_url)\n", 130 | " return extract_json(soup, 'datesData')\n", 131 | " \n", 132 | " def shots(self, match_id: int):\n", 133 | " match_url = f'{self.base_url}/match/{match_id}'\n", 134 | " soup = fetch_html(match_url)\n", 135 | " return extract_json(soup, 'shotsData')" 136 | ] 137 | }, 138 | { 139 | "cell_type": "markdown", 140 | "id": "25c6daa4", 141 | "metadata": {}, 142 | "source": [ 143 | "Fetch matches from Understat" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": null, 149 | "id": "fbb9f99e", 150 | "metadata": {}, 151 | "outputs": [ 152 | { 153 | "data": { 154 | "text/plain": [ 155 | "{'id': '11660',\n", 156 | " 'isResult': True,\n", 157 | " 'h': {'id': '238', 'title': 'Sheffield United', 'short_title': 'SHE'},\n", 158 | " 'a': {'id': '78', 'title': 'Crystal Palace', 'short_title': 'CRY'},\n", 159 | " 'goals': {'h': '1', 'a': '0'},\n", 160 | " 'xG': {'h': '1.84778', 'a': '0.241912'},\n", 161 | " 'datetime': '2019-08-18 14:00:00',\n", 162 | " 'forecast': {'w': '0.8326', 'd': '0.1408', 'l': '0.0266'}}" 163 | ] 164 | }, 165 | "execution_count": null, 166 | "metadata": {}, 167 | "output_type": "execute_result" 168 | } 169 | ], 170 | "source": [ 171 | "understat = Understat()\n", 172 | "\n", 173 | "matches = understat.matches(League.EPL, 2019)[17]\n", 174 | "matches" 175 | ] 176 | }, 177 | { 178 | "cell_type": "markdown", 179 | "id": "eb666678", 180 | "metadata": {}, 181 | "source": [ 182 | "Fetch individual match shots" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": null, 188 | "id": "50c01835", 189 | "metadata": {}, 190 | "outputs": [ 191 | { 192 | "data": { 193 | "text/plain": [ 194 | "{'id': '311085',\n", 195 | " 'minute': '25',\n", 196 | " 'result': 'BlockedShot',\n", 197 | " 'X': '0.899000015258789',\n", 198 | " 'Y': '0.5609999847412109',\n", 199 | " 'xG': '0.07507339864969254',\n", 200 | " 'player': 'Jack O'Connell',\n", 201 | " 'h_a': 'h',\n", 202 | " 'player_id': '7705',\n", 203 | " 'situation': 'FromCorner',\n", 204 | " 'season': '2019',\n", 205 | " 'shotType': 'LeftFoot',\n", 206 | " 'match_id': '11660',\n", 207 | " 'h_team': 'Sheffield United',\n", 208 | " 'a_team': 'Crystal Palace',\n", 209 | " 'h_goals': '1',\n", 210 | " 'a_goals': '0',\n", 211 | " 'date': '2019-08-18 14:00:00',\n", 212 | " 'player_assisted': 'Oliver Norwood',\n", 213 | " 'lastAction': 'Pass'}" 214 | ] 215 | }, 216 | "execution_count": null, 217 | "metadata": {}, 218 | "output_type": "execute_result" 219 | } 220 | ], 221 | "source": [ 222 | "shots = understat.shots(11660)\n", 223 | "\n", 224 | "# Take the home team's 5th shot\n", 225 | "shots['h'][5]" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": null, 231 | "id": "5027c2cb", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [] 235 | } 236 | ], 237 | "metadata": { 238 | "kernelspec": { 239 | "display_name": "Python 3", 240 | "language": "python", 241 | "name": "python3" 242 | } 243 | }, 244 | "nbformat": 4, 245 | "nbformat_minor": 5 246 | } 247 | -------------------------------------------------------------------------------- /settings.ini: -------------------------------------------------------------------------------- 1 | [DEFAULT] 2 | # All sections below are required unless otherwise specified 3 | host = github 4 | lib_name = understatdb 5 | repo_name = understat-db 6 | 7 | user = torvaney 8 | description = An extendable project for creating a database of soccer data 9 | keywords = soccer-analytics football-analytics understat 10 | author = Ben Torvaney 11 | author_email = torvaney@protonmail.com 12 | branch = master 13 | version = 0.0.1 14 | min_python = 3.6 15 | audience = Developers 16 | language = English 17 | # Set to True if you want to create a more fancy sidebar.json than the default 18 | custom_sidebar = False 19 | # Add licenses and see current list in `setup.py` 20 | license = mit 21 | copyright = Ben Torvaney # Required arg :( 22 | # From 1-7: Planning Pre-Alpha Alpha Beta Production Mature Inactive 23 | status = 2 24 | 25 | # Optional. Same format as setuptools requirements 26 | requirements = 27 | typer 28 | requests 29 | bs4 30 | peewee 31 | peewee-db-evolve 32 | psycopg2-binary 33 | python-dotenv 34 | dbt-core==0.19.1 35 | dbt-postgres==0.19.1 36 | pyprojroot 37 | dev_requirements = 38 | jupyter 39 | nbdev 40 | 41 | console_scripts = 42 | understat-db=understatdb.cli:app 43 | # Optional. Same format as setuptools dependency-links 44 | # dep_links = 45 | 46 | ### 47 | # You probably won't need to change anything under here, 48 | # unless you have some special requirements 49 | ### 50 | 51 | # Change to, e.g. "nbs", to put your notebooks in nbs dir instead of repo root 52 | nbs_path = nbs 53 | doc_path = docs 54 | 55 | # Whether to look for library notebooks recursively in the `nbs_path` dir 56 | recursive = False 57 | 58 | # Anything shown as '%(...)s' is substituted with that setting automatically 59 | doc_host = https://%(user)s.github.io 60 | #For Enterprise Git pages use: 61 | #doc_host = https://pages.github.%(company_name)s.com. 62 | 63 | 64 | doc_baseurl = /%(lib_name)s/ 65 | # For Enterprise Github pages docs use: 66 | # doc_baseurl = /%(repo_name)s/%(lib_name)s/ 67 | 68 | git_url = https://github.com/%(user)s/%(lib_name)s/tree/%(branch)s/ 69 | # For Enterprise Github use: 70 | #git_url = https://github.%(company_name)s.com/%(repo_name)s/%(lib_name)s/tree/%(branch)s/ 71 | 72 | 73 | 74 | lib_path = %(lib_name)s 75 | title = %(lib_name)s 76 | 77 | #Optional advanced parameters 78 | #Monospace docstings: adds
     tags around the doc strings, preserving newlines/indentation.
    79 | #monospace_docstrings = False
    80 | #Test flags: introduce here the test flags you want to use separated by |
    81 | #tst_flags =
    82 | #Custom sidebar: customize sidebar.json yourself for advanced sidebars (False/True)
    83 | #custom_sidebar =
    84 | #Cell spacing: if you want cell blocks in code separated by more than one new line
    85 | #cell_spacing =
    86 | #Custom jekyll styles: if you want more jekyll styles than tip/important/warning, set them here
    87 | #jekyll_styles = note,warning,tip,important
    88 | 
    
    
    --------------------------------------------------------------------------------
    /setup.py:
    --------------------------------------------------------------------------------
     1 | from pkg_resources import parse_version
     2 | from configparser import ConfigParser
     3 | import setuptools,re,sys
     4 | assert parse_version(setuptools.__version__)>=parse_version('36.2')
     5 | 
     6 | # note: all settings are in settings.ini; edit there, not here
     7 | config = ConfigParser(delimiters=['='])
     8 | config.read('settings.ini')
     9 | cfg = config['DEFAULT']
    10 | 
    11 | cfg_keys = 'version description keywords author author_email'.split()
    12 | expected = cfg_keys + "lib_name user branch license status min_python audience language".split()
    13 | for o in expected: assert o in cfg, "missing expected setting: {}".format(o)
    14 | setup_cfg = {o:cfg[o] for o in cfg_keys}
    15 | 
    16 | if len(sys.argv)>1 and sys.argv[1]=='version':
    17 |     print(setup_cfg['version'])
    18 |     exit()
    19 | 
    20 | licenses = {
    21 |     'apache2': ('Apache Software License 2.0','OSI Approved :: Apache Software License'),
    22 |     'mit': ('MIT License', 'OSI Approved :: MIT License'),
    23 |     'gpl2': ('GNU General Public License v2', 'OSI Approved :: GNU General Public License v2 (GPLv2)'),
    24 |     'gpl3': ('GNU General Public License v3', 'OSI Approved :: GNU General Public License v3 (GPLv3)'),
    25 |     'bsd3': ('BSD License', 'OSI Approved :: BSD License'),
    26 | }
    27 | statuses = [ '1 - Planning', '2 - Pre-Alpha', '3 - Alpha',
    28 |     '4 - Beta', '5 - Production/Stable', '6 - Mature', '7 - Inactive' ]
    29 | py_versions = '2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8'.split()
    30 | 
    31 | lic = licenses.get(cfg['license'].lower(), (cfg['license'], None))
    32 | min_python = cfg['min_python']
    33 | 
    34 | requirements = ['pip', 'packaging']
    35 | if cfg.get('requirements'):
    36 |     requirements += cfg.get('requirements', '').split()
    37 | if cfg.get('pip_requirements'):
    38 |     requirements += cfg.get('pip_requirements', '').split()
    39 | dev_requirements = (cfg.get('dev_requirements') or '').split()
    40 | 
    41 | long_description = open('README.md').read()
    42 | # ![png](docs/images/output_13_0.png)
    43 | for ext in ['png', 'svg']:
    44 |     long_description = re.sub(r'!\['+ext+'\]\((.*)\)', '!['+ext+']('+'https://raw.githubusercontent.com/{}/{}'.format(cfg['user'],cfg['lib_name'])+'/'+cfg['branch']+'/\\1)', long_description)
    45 |     long_description = re.sub(r'src=\"(.*)\.'+ext+'\"', 'src=\"https://raw.githubusercontent.com/{}/{}'.format(cfg['user'],cfg['lib_name'])+'/'+cfg['branch']+'/\\1.'+ext+'\"', long_description)
    46 | 
    47 | setuptools.setup(
    48 |     name = cfg['lib_name'],
    49 |     license = lic[0],
    50 |     classifiers = [
    51 |         'Development Status :: ' + statuses[int(cfg['status'])],
    52 |         'Intended Audience :: ' + cfg['audience'].title(),
    53 |         'Natural Language :: ' + cfg['language'].title(),
    54 |     ] + ['Programming Language :: Python :: '+o for o in py_versions[py_versions.index(min_python):]] + (['License :: ' + lic[1] ] if lic[1] else []),
    55 |     url = cfg['git_url'],
    56 |     packages = setuptools.find_packages(),
    57 |     include_package_data = True,
    58 |     install_requires = requirements,
    59 |     extras_require={ 'dev': dev_requirements },
    60 |     python_requires  = '>=' + cfg['min_python'],
    61 |     long_description = long_description,
    62 |     long_description_content_type = 'text/markdown',
    63 |     zip_safe = False,
    64 |     entry_points = { 'console_scripts': cfg.get('console_scripts','').split() },
    65 |     **setup_cfg)
    66 | 
    
    
    --------------------------------------------------------------------------------
    /understatdb/__init__.py:
    --------------------------------------------------------------------------------
    1 | __version__ = "0.0.1"
    2 | 
    3 | from . import (
    4 |     db,
    5 |     understat
    6 | )
    7 | 
    
    
    --------------------------------------------------------------------------------
    /understatdb/_nbdev.py:
    --------------------------------------------------------------------------------
     1 | # AUTOGENERATED BY NBDEV! DO NOT EDIT!
     2 | 
     3 | __all__ = ["index", "modules", "custom_doc_links", "git_url"]
     4 | 
     5 | index = {"EnvTyper": "cli.ipynb",
     6 |          "app": "cli.ipynb",
     7 |          "initialize_db": "cli.ipynb",
     8 |          "migrate": "cli.ipynb",
     9 |          "build_tables": "cli.ipynb",
    10 |          "ingest": "cli.ipynb",
    11 |          "DB": "db.ipynb",
    12 |          "evolve_ignore": "db.ipynb",
    13 |          "prefixed_snake_case": "db.ipynb",
    14 |          "BaseModel": "db.ipynb",
    15 |          "EVOLVE_IGNORE_TABLES": "db.ipynb",
    16 |          "League": "understat.ipynb",
    17 |          "Season": "db.ipynb",
    18 |          "Matches": "db.ipynb",
    19 |          "Shots": "db.ipynb",
    20 |          "fetch_html": "understat.ipynb",
    21 |          "extract_json": "understat.ipynb",
    22 |          "Understat": "understat.ipynb"}
    23 | 
    24 | modules = ["cli.py",
    25 |            "db.py",
    26 |            "understat.py"]
    27 | 
    28 | doc_url = "https://torvaney.github.io/understatdb/"
    29 | 
    30 | git_url = "https://github.com/torvaney/understatdb/tree/master/"
    31 | 
    32 | def custom_doc_links(name): return None
    33 | 
    
    
    --------------------------------------------------------------------------------
    /understatdb/cli.py:
    --------------------------------------------------------------------------------
      1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/cli.ipynb (unless otherwise specified).
      2 | 
      3 | __all__ = ['EnvTyper', 'app', 'migrate', 'build_tables', 'ingest']
      4 | 
      5 | # Internal Cell
      6 | import os
      7 | import functools
      8 | import itertools
      9 | import pathlib
     10 | import typing
     11 | import time
     12 | 
     13 | import dbt.main
     14 | import dotenv
     15 | import playhouse.postgres_ext
     16 | import pyprojroot
     17 | import typer
     18 | 
     19 | import understatdb
     20 | 
     21 | # Cell
     22 | 
     23 | 
     24 | class EnvTyper(typer.Typer):
     25 |     """
     26 |     Just like typer.Typer, except it loads the environment with
     27 |     `dotenv.load_dotenv` before executing any command.
     28 |     """
     29 |     def __call__(self, *args, **kwargs):
     30 |         dotenv.load_dotenv()
     31 |         return super().__call__(*args, **kwargs)
     32 | 
     33 | 
     34 | 
     35 | app = EnvTyper()
     36 | 
     37 | # Internal Cell
     38 | 
     39 | 
     40 | def initialize_db():
     41 |     """
     42 |     Load database config from environment and initialise
     43 |     `understatdb.db.DB` with a database connection.
     44 |     """
     45 | 
     46 |     # Load database config from environment
     47 |     postgres_db = playhouse.postgres_ext.PostgresqlExtDatabase(
     48 |         host=os.environ['DB_HOST'],
     49 |         user=os.environ['DB_USER'],
     50 |         password=os.environ['DB_PASS'],
     51 |         database=os.environ['DB_NAME'],
     52 |         port=os.environ['DB_PORT'],
     53 |     )
     54 | 
     55 |     # Configure proxy database to use configured postgres
     56 |     typer.secho('Initialising database connection...', fg=typer.colors.BRIGHT_BLACK)
     57 |     understatdb.db.DB.initialize(postgres_db)
     58 | 
     59 | # Cell
     60 | 
     61 | 
     62 | @app.command()
     63 | def migrate(interactive: bool = True):
     64 |     """ Migrate database to the current schema (as defined in nbs/db.ipynb) """
     65 | 
     66 |     initialize_db()
     67 | 
     68 |     # Get names of tables generated by dbt and exclude them from the migration
     69 |     dbt_models_path = pyprojroot.here()/'dbt'/'models'
     70 |     dbt_tables = [f.stem for f in dbt_models_path.glob('**/*.sql')]
     71 | 
     72 |     # Migrate database tables
     73 |     typer.secho('Migrating database tables...', fg=typer.colors.BRIGHT_BLACK)
     74 |     understatdb.db.DB.evolve(
     75 |         ignore_tables=understatdb.db.EVOLVE_IGNORE_TABLES + dbt_tables,
     76 |         interactive=interactive
     77 |     )
     78 |     typer.secho('Done!', fg=typer.colors.GREEN, bold=True)
     79 | 
     80 | # Cell
     81 | 
     82 | 
     83 | @app.command()
     84 | def build_tables(args: typing.List[str] = typer.Option([], help='Additional arguments passed to `dbt run`')):
     85 |     """ Build tables from base data using dbt """
     86 | 
     87 |     project_dir = pyprojroot.here()/'dbt'
     88 |     profiles_dir = pyprojroot.here()/'.dbt'
     89 | 
     90 |     base_args = [
     91 |         'run',
     92 |         '--profiles-dir',
     93 |         str(profiles_dir),
     94 |         '--project-dir',
     95 |         str(project_dir)
     96 |     ]
     97 | 
     98 |     # NOTE: Python API is not officially supported, so
     99 |     # watch out if you change dbt versions...
    100 |     typer.secho('Building tables with dbt', fg=typer.colors.BLUE)
    101 |     _ = dbt.main.handle_and_check(base_args + list(args))
    102 | 
    103 | # Cell
    104 | 
    105 | 
    106 | # Use the list of league *values* (i.e. strings) so that the help text shows
    107 | # all the possible inputs the user can use
    108 | _DEFAULT_INGEST_LEAGUES = [l.value for l in understatdb.understat.League]
    109 | _DEFAULT_INGEST_SEASONS = list(range(2014, 2021))
    110 | 
    111 | 
    112 | @app.command()
    113 | def ingest(
    114 |     refresh: bool = False,
    115 |     leagues: typing.List[str] = typer.Option(
    116 |         _DEFAULT_INGEST_LEAGUES,
    117 |         help='Leagues to import',
    118 |         callback=lambda xs: [understatdb.understat.League(x) for x in xs]
    119 |     ),
    120 |     seasons: typing.List[int] = typer.Option(
    121 |         _DEFAULT_INGEST_SEASONS,
    122 |         help='Seasons to import (by start year)'
    123 |     ),
    124 | ):
    125 |     """ Ingest match and shot data from Understat.com """
    126 | 
    127 |     initialize_db()
    128 |     client = understatdb.understat.Understat()
    129 | 
    130 |     for league, season in itertools.product(
    131 |         [understatdb.understat.League(l) for l in leagues],
    132 |         seasons
    133 |     ):
    134 |         # Add league & season to DB
    135 |         with understatdb.db.DB.atomic():
    136 |             db_league, _ = understatdb.db.League.get_or_create(name=league.value)
    137 |             db_season, _ = understatdb.db.Season.get_or_create(name=season)
    138 | 
    139 |         # Check if a record for this league and season already exists. If so, skip it.
    140 |         existing_record = understatdb.db.Matches.get_or_none(
    141 |             league_id=db_league.id,
    142 |             season_id=db_season.id
    143 |         )
    144 |         if not refresh and existing_record:
    145 |             typer.secho(
    146 |                 f'Data for {league.value}, {season} already exists. Skipping. '
    147 |                 'To update data for this league and season, use the `--refresh` flag',
    148 |                 fg=typer.colors.BRIGHT_BLACK
    149 |             )
    150 |             continue
    151 | 
    152 |         # Add match and shot data to DB
    153 |         typer.secho(f'Ingesting data for {league.value}, {season}', fg=typer.colors.BLUE)
    154 |         with understatdb.db.DB.atomic():
    155 | 
    156 |             # Fetch match data from understat
    157 |             matches = client.matches(league, season)
    158 | 
    159 |             # Delete any old match data
    160 |             if refresh:
    161 |                 understatdb.db.Matches.delete().where(
    162 |                     (understatdb.db.Matches.league_id==db_league.id) &
    163 |                     (understatdb.db.Matches.season_id==db_season.id)
    164 |                 ).execute()
    165 | 
    166 |             db_matches = understatdb.db.Matches.create(
    167 |                 league_id=db_league.id,
    168 |                 season_id=db_season.id,
    169 |                 json=matches,
    170 |                 version=understatdb.__version__
    171 |             )
    172 | 
    173 |             with typer.progressbar(matches, label="Shots") as progress:
    174 |                 for match in progress:
    175 |                     if not match['isResult']:
    176 |                         continue
    177 | 
    178 |                     # Add an artificial crawl delay to avoid bombarding
    179 |                     # understat with requests
    180 |                     # There's no robots.txt or ToS available on the site,
    181 |                     # So we just use a relatively conservative delay of
    182 |                     # 5 seconds per (shots) request
    183 |                     time.sleep(5)
    184 | 
    185 |                     match_id = int(match['id'])
    186 |                     shots = client.shots(match_id)
    187 | 
    188 |                     # Delete any old shots data
    189 |                     if refresh:
    190 |                         understatdb.db.Shots.delete().where(
    191 |                             understatdb.db.Shots.match_id==match_id
    192 |                         ).execute()
    193 | 
    194 |                     db_shots = understatdb.db.Shots.create(
    195 |                         match_id=match_id,
    196 |                         json=shots,
    197 |                         version=understatdb.__version__
    198 |                     )
    199 | 
    200 |     # Rebuild tables in dbt
    201 |     build_tables(args=[])
    202 | 
    203 | # Cell
    204 | 
    205 | # Try/except block seems to be the 'canonical'
    206 | # way to export __name__ == __main__ in nbdev.
    207 | # By excepting an ImportError, we don't have to
    208 | # include nbdev as a runtime dependency (only a
    209 | # development dependency).
    210 | #
    211 | # See:
    212 | #  * https://pete88b.github.io/fastpages/nbdev/fastai/jupyter/2020/07/24/nbdev-deep-dive.html#Export-a-if-__name__-==-
    213 | #  * https://forums.fast.ai/t/nbdev-is-there-a-way-to-export-a-if-name-main-clause/73050/3
    214 | try:
    215 |     from nbdev.imports import IN_NOTEBOOK
    216 | except ImportError:
    217 |     IN_NOTEBOOK = False
    218 | 
    219 | if __name__ == '__main__' and not IN_NOTEBOOK:
    220 |     app()
    
    
    --------------------------------------------------------------------------------
    /understatdb/db.py:
    --------------------------------------------------------------------------------
      1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/db.ipynb (unless otherwise specified).
      2 | 
      3 | __all__ = ['DB', 'evolve_ignore', 'prefixed_snake_case', 'BaseModel', 'EVOLVE_IGNORE_TABLES', 'League', 'Season',
      4 |            'Matches', 'Shots']
      5 | 
      6 | # Cell
      7 | import functools
      8 | 
      9 | import peewee
     10 | import peeweedbevolve
     11 | import playhouse.postgres_ext
     12 | 
     13 | 
     14 | DB = peewee.DatabaseProxy()
     15 | 
     16 | # Cell
     17 | 
     18 | EVOLVE_IGNORE_TABLES = []
     19 | 
     20 | 
     21 | def evolve_ignore(cls, registry=EVOLVE_IGNORE_TABLES):
     22 |     registry.append(cls._meta.table_name)
     23 |     return cls
     24 | 
     25 | 
     26 | def prefixed_snake_case(prefix, cls):
     27 |     return prefix + peewee.make_snake_case(cls.__name__)
     28 | 
     29 | 
     30 | @evolve_ignore
     31 | class BaseModel(peewee.Model):
     32 |     """
     33 |     A model for base (json) data from Understat
     34 |     """
     35 |     class Meta:
     36 |         database = DB
     37 |         legacy_table_names = False
     38 |         table_function = functools.partial(prefixed_snake_case, 'base_')
     39 | 
     40 | # Cell
     41 | 
     42 | 
     43 | class League(BaseModel):
     44 |     id = peewee.PrimaryKeyField()
     45 |     name = peewee.TextField()
     46 | 
     47 | # Cell
     48 | 
     49 | 
     50 | class Season(BaseModel):
     51 |     id = peewee.PrimaryKeyField()
     52 |     name = peewee.TextField()
     53 | 
     54 | # Cell
     55 | 
     56 | 
     57 | class Matches(BaseModel):
     58 |     id = peewee.PrimaryKeyField()
     59 | 
     60 |     # Since we're ingesting the data before reshaping
     61 |     # and testing with dbt (ELT not ETL), we might prefer
     62 |     # flexibility to the correctness that a FK provides.
     63 |     # But since understat don't actually provide any
     64 |     # IDs (afaik, it's all indexed by league *name*),
     65 |     # we're going to be making our own IDs at some point
     66 |     # anyway. So I prefer to do that with the guarantees
     67 |     # of a FK.
     68 |     league_id = peewee.ForeignKeyField(League)
     69 | 
     70 |     # Similar logic to above (`league_id`),
     71 |     # we're creating our own season IDs.
     72 |     # Understat indexes by season start year,
     73 |     # so we could conceivably just use an
     74 |     # integer field and refactor if that assumption
     75 |     # is ever violated.
     76 |     season_id = peewee.ForeignKeyField(Season)
     77 | 
     78 |     # Dump the scraped JSON as JSON
     79 |     # We'll clean it up in dbt
     80 |     json = playhouse.postgres_ext.JSONField()
     81 | 
     82 |     # Store the app version, so that we can parse
     83 |     # the JSON differently should the format change in
     84 |     # the future.
     85 |     version = peewee.TextField()
     86 | 
     87 |     class Meta:
     88 |         indexes = (
     89 |             # Force uniqueness on the combination of
     90 |             # league and season ID.
     91 |             # We should only ever have one row per
     92 |             # league, per season!
     93 |             (('league_id', 'season_id'), True),
     94 |         )
     95 | 
     96 | # Cell
     97 | 
     98 | 
     99 | class Shots(BaseModel):
    100 |     id = peewee.PrimaryKeyField()
    101 | 
    102 |     # This time, we're using Understat's IDs,
    103 |     # so we aren't interested in using a FK for
    104 |     # match ID.
    105 |     match_id = peewee.IntegerField(unique=True)
    106 | 
    107 |     # Dump the scraped JSON as JSON again
    108 |     json = playhouse.postgres_ext.JSONField()
    109 | 
    110 |     version = peewee.TextField()
    
    
    --------------------------------------------------------------------------------
    /understatdb/understat.py:
    --------------------------------------------------------------------------------
     1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/understat.ipynb (unless otherwise specified).
     2 | 
     3 | __all__ = ['fetch_html', 'extract_json', 'League', 'Understat']
     4 | 
     5 | # Cell
     6 | import enum
     7 | import re
     8 | import json
     9 | 
    10 | import requests
    11 | import bs4
    12 | 
    13 | 
    14 | def fetch_html(url):
    15 |     """
    16 |     Fetch HTML and decode into a `bs4.BeautifulSoup` object
    17 |     """
    18 |     r = requests.get(url)
    19 |     r.raise_for_status()
    20 |     return bs4.BeautifulSoup(str(r.content, 'unicode-escape'), features='html.parser')
    21 | 
    22 | 
    23 | def extract_json(soup, json_var):
    24 |     """ Extract a JSON variable from understat HTML. """
    25 |     node, *__ = [s for s in soup.select('script') if s.string and json_var in s.string]
    26 | 
    27 |     # Clean string by removing and newlines (\n) and tabs (\t)
    28 |     node_string = ' '.join(node.string.split())
    29 | 
    30 |     json_value = re.match(f"var {json_var} = JSON\.parse\(\'(?P.*?)\'\)", node_string).group('json')
    31 |     return json.loads(json_value)
    32 | 
    33 | # Cell
    34 | 
    35 | 
    36 | # 'Competition' might be a better name, but let's stick with understat's terminology
    37 | class League(enum.Enum):
    38 |     """
    39 |     Understat leagues
    40 |     """
    41 |     EPL = 'EPL'
    42 |     LA_LIGA = 'La_Liga'
    43 |     SERIE_A = 'Serie_A'
    44 |     BUNDESLIGA = 'Bundesliga'
    45 |     LIGUE_1 = 'Ligue_1'
    46 |     RPL = 'RPL'
    47 | 
    48 | # Cell
    49 | 
    50 | class Understat:
    51 |     """
    52 |     Fetches understat data webpages
    53 |     """
    54 | 
    55 |     def __init__(self, base_url: str='https://understat.com'):
    56 |         self.base_url = base_url
    57 | 
    58 |     def matches(self, league: League, season: int):
    59 |         """ Fetch match data for a given `league` and `season` (start year). """
    60 |         league_url = f'{self.base_url}/league/{league.value}/{season}'
    61 |         soup = fetch_html(league_url)
    62 |         return extract_json(soup, 'datesData')
    63 | 
    64 |     def shots(self, match_id: int):
    65 |         match_url = f'{self.base_url}/match/{match_id}'
    66 |         soup = fetch_html(match_url)
    67 |         return extract_json(soup, 'shotsData')
    
    
    --------------------------------------------------------------------------------