├── docs
    ├── .gitignore
    ├── assets
    │   └── images
    │   │   └── favicon.ico
    ├── _data
    │   ├── topnav.yml
    │   └── sidebars
    │   │   └── home_sidebar.yml
    ├── sidebar.json
    ├── Gemfile
    ├── sitemap.xml
    ├── feed.xml
    ├── _config.yml
    ├── weights.html
    ├── blocks.html
    ├── models.html
    ├── parameters.html
    ├── Gemfile.lock
    ├── index.html
    └── adapters.html
├── nbs
    ├── .gitattributes
    ├── weights.ipynb
    ├── parameters.ipynb
    ├── blocks.ipynb
    ├── index.ipynb
    ├── models.ipynb
    ├── adapters.ipynb
    └── core.ipynb
├── MANIFEST.in
├── mezzala
    ├── __init__.py
    ├── parameters.py
    ├── weights.py
    ├── _nbdev.py
    ├── adapters.py
    ├── blocks.py
    └── models.py
├── .devcontainer.json
├── settings.ini
├── Makefile
├── docker-compose.yml
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── CONTRIBUTING.md
├── setup.py
├── README.md
└── LICENSE


/docs/.gitignore:
--------------------------------------------------------------------------------
1 | _site/
2 | 


--------------------------------------------------------------------------------
/docs/assets/images/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Torvaney/mezzala/HEAD/docs/assets/images/favicon.ico


--------------------------------------------------------------------------------
/nbs/.gitattributes:
--------------------------------------------------------------------------------
1 | **/*.ipynb filter=clean-nbs
2 | **/*.ipynb diff=ipynb
3 | *.ipynb linguist-language=Python
4 | 
5 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include settings.ini
2 | include LICENSE
3 | include CONTRIBUTING.md
4 | include README.md
5 | recursive-exclude * __pycache__
6 | 


--------------------------------------------------------------------------------
/docs/_data/topnav.yml:
--------------------------------------------------------------------------------
 1 | topnav:
 2 | - title: Topnav
 3 |   items:
 4 |     - title: github
 5 |       external_url: https://github.com/Torvaney/mezzala/tree/master/
 6 | 
 7 | #Topnav dropdowns
 8 | topnav_dropdowns:
 9 | - title: Topnav dropdowns
10 |   folders:


--------------------------------------------------------------------------------
/docs/sidebar.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "mezzala": {
 3 |     "Overview": "/",
 4 |     "Data Adapters": "adapters.html",
 5 |     "Model blocks": "blocks.html",
 6 |     "Core": "core.html",
 7 |     "Models": "models.html",
 8 |     "Parameters": "parameters.html",
 9 |     "Weights": "weights.html"
10 |   }
11 | }


--------------------------------------------------------------------------------
/docs/Gemfile:
--------------------------------------------------------------------------------
 1 | source "https://rubygems.org"
 2 | 
 3 | gem 'github-pages', group: :jekyll_plugins
 4 | 
 5 | # Added at 2019-11-25 10:11:40 -0800 by jhoward:
 6 | gem "nokogiri", "< 1.11.1"
 7 | gem "jekyll", ">= 3.7"
 8 | gem "kramdown", ">= 2.3.1"
 9 | gem "jekyll-remote-theme"
10 | 
11 | gem "webrick", "~> 1.7"
12 | 


--------------------------------------------------------------------------------
/mezzala/__init__.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/core.ipynb (unless otherwise specified).
 2 | 
 3 | __all__ = []
 4 | 
 5 | # Cell
 6 | 
 7 | # For now, just re-export everything
 8 | from .adapters import *
 9 | from .blocks import *
10 | from .models import *
11 | from .weights import *
12 | from .parameters import *


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
 1 | ---
 2 | layout: none
 3 | search: exclude
 4 | ---
 5 | 
 6 | <?xml version="1.0" encoding="UTF-8"?>
 7 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
 8 |   {% for post in site.posts %}
 9 |   {% unless post.search == "exclude" %}
10 |   <url>
11 |     <loc>{{site.url}}{{post.url}}</loc>
12 |   </url>
13 |   {% endunless %}
14 |   {% endfor %}
15 | 
16 | 
17 |   {% for page in site.pages %}
18 |   {% unless page.search == "exclude" %}
19 |   <url>
20 |     <loc>{{site.url}}{{ page.url}}</loc>
21 |   </url>
22 |   {% endunless %}
23 |   {% endfor %}
24 | </urlset>


--------------------------------------------------------------------------------
/.devcontainer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "nbdev_template-codespaces",
 3 |     "dockerComposeFile": "docker-compose.yml",
 4 |     "service": "watcher",
 5 |     "settings": {"terminal.integrated.shell.linux": "/bin/bash"},
 6 |     "mounts": [ "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ],
 7 |     "forwardPorts": [4000, 8080],
 8 |     "appPort": [4000, 8080],
 9 |     "extensions": ["ms-python.python",
10 |                    "ms-azuretools.vscode-docker"],
11 |     "runServices": ["notebook", "jekyll", "watcher"],
12 |     "postStartCommand": "pip install -e .[dev]"
13 | }
14 | 


--------------------------------------------------------------------------------
/settings.ini:
--------------------------------------------------------------------------------
 1 | [DEFAULT]
 2 | host = github
 3 | lib_name = mezzala
 4 | user = Torvaney
 5 | description = A package for soccer modelling
 6 | keywords = soccer-analytics football-analytics dixon-coles
 7 | author = Ben Torvaney
 8 | author_email = torvaney@protonmail.com
 9 | copyright = Ben Torvaney  # Required arg :(
10 | branch = master
11 | version = 0.0.7
12 | min_python = 3.7
13 | audience = Developers
14 | language = English
15 | custom_sidebar = False
16 | license = apache2
17 | status = 2
18 | requirements =
19 | 	numpy
20 | 	scipy
21 | dev_requirements =
22 | 	jupyter
23 | 	nbdev
24 | nbs_path = nbs
25 | doc_path = docs
26 | recursive = False
27 | doc_host = https://Torvaney.github.io
28 | doc_baseurl = /mezzala/
29 | git_url = https://github.com/Torvaney/mezzala/tree/master/
30 | lib_path = mezzala
31 | title = mezzala
32 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .ONESHELL:
 2 | SHELL := /bin/bash
 3 | SRC = $(wildcard nbs/*.ipynb)
 4 | PYTHON_VENV ?= venv
 5 | 
 6 | # nbdev commands
 7 | 
 8 | all: package docs
 9 | 
10 | package: $(SRC)
11 | 	nbdev_build_lib
12 | 	touch mezzala
13 | 
14 | sync:
15 | 	nbdev_update_lib
16 | 
17 | docs_serve: docs
18 | 	cd docs && bundle exec jekyll serve
19 | 
20 | docs: $(SRC)
21 | 	nbdev_build_docs
22 | 	touch docs
23 | 
24 | test:
25 | 	nbdev_test_nbs
26 | 
27 | release: pypi conda_release
28 | 	nbdev_bump_version
29 | 
30 | conda_release:
31 | 	fastrelease_conda_package
32 | 
33 | pypi: dist
34 | 	twine upload --repository pypi dist/*
35 | 
36 | dist: clean
37 | 	python setup.py sdist bdist_wheel
38 | 
39 | clean:
40 | 	rm -rf dist
41 | 
42 | env:
43 | 	python -m venv $(PYTHON_VENV)
44 | 	$(PYTHON_VENV)/bin/pip install --upgrade pip
45 | 	$(PYTHON_VENV)/bin/pip install -e .[dev]
46 | 


--------------------------------------------------------------------------------
/docs/_data/sidebars/home_sidebar.yml:
--------------------------------------------------------------------------------
 1 | 
 2 | #################################################
 3 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 4 | #################################################
 5 | # Instead edit ../../sidebar.json
 6 | entries:
 7 | - folders:
 8 |   - folderitems:
 9 |     - output: web,pdf
10 |       title: Overview
11 |       url: /
12 |     - output: web,pdf
13 |       title: Data Adapters
14 |       url: adapters.html
15 |     - output: web,pdf
16 |       title: Model blocks
17 |       url: blocks.html
18 |     - output: web,pdf
19 |       title: Core
20 |       url: core.html
21 |     - output: web,pdf
22 |       title: Models
23 |       url: models.html
24 |     - output: web,pdf
25 |       title: Parameters
26 |       url: parameters.html
27 |     - output: web,pdf
28 |       title: Weights
29 |       url: weights.html
30 |     output: web
31 |     title: mezzala
32 |   output: web
33 |   title: Sidebar
34 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |   fastai: &fastai
 4 |     restart: unless-stopped
 5 |     working_dir: /data
 6 |     image: fastai/codespaces
 7 |     logging:
 8 |       driver: json-file
 9 |       options:
10 |         max-size: 50m
11 |     stdin_open: true
12 |     tty: true
13 |     volumes:
14 |       - .:/data/
15 | 
16 |   notebook:
17 |     <<: *fastai
18 |     command: bash -c "pip install -e . && jupyter notebook --allow-root --no-browser --ip=0.0.0.0 --port=8080 --NotebookApp.token='' --NotebookApp.password=''"
19 |     ports:
20 |       - "8080:8080"
21 | 
22 |   watcher:
23 |     <<: *fastai
24 |     command: watchmedo shell-command --command nbdev_build_docs --pattern *.ipynb --recursive --drop
25 |     network_mode: host # for GitHub Codespaces https://github.com/features/codespaces/
26 | 
27 |   jekyll:
28 |     <<: *fastai
29 |     ports:
30 |      - "4000:4000"
31 |     command: >
32 |      bash -c "pip install .
33 |      && nbdev_build_docs && cd docs
34 |      && bundle i
35 |      && chmod -R u+rwx . && bundle exec jekyll serve --host 0.0.0.0"
36 | 


--------------------------------------------------------------------------------
/mezzala/parameters.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/parameters.ipynb (unless otherwise specified).
 2 | 
 3 | __all__ = ['ParameterKey', 'RHO_KEY', 'HFA_KEY', 'AVG_KEY', 'TeamParameterKey', 'OffenceParameterKey',
 4 |            'DefenceParameterKey']
 5 | 
 6 | # Cell
 7 | import dataclasses
 8 | import typing
 9 | 
10 | # Cell
11 | 
12 | 
13 | @dataclasses.dataclass(frozen=True)
14 | class ParameterKey:
15 |     label: typing.Hashable
16 | 
17 | 
18 | # Basic model constants
19 | RHO_KEY = ParameterKey('Rho')
20 | HFA_KEY = ParameterKey('Home-field advantage')
21 | AVG_KEY = ParameterKey('Average rate')
22 | 
23 | # Cell
24 | 
25 | 
26 | class TeamParameterKey(ParameterKey):
27 |     """
28 |     Parent class of `OffenceParameterKey` and `DefenceParameterKey`.
29 | 
30 |     Can be used to extract team offence/defence parameters with `isinstance`.
31 |     """
32 |     pass
33 | 
34 | 
35 | class OffenceParameterKey(TeamParameterKey):
36 |     """
37 |     Team offence parameter key
38 |     """
39 |     pass
40 | 
41 | 
42 | class DefenceParameterKey(TeamParameterKey):
43 |     """
44 |     Team Defence parameter key
45 |     """
46 |     pass


--------------------------------------------------------------------------------
/mezzala/weights.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/weights.ipynb (unless otherwise specified).
 2 | 
 3 | __all__ = ['UniformWeight', 'ExponentialWeight', 'KeyWeight']
 4 | 
 5 | # Cell
 6 | import numpy as np
 7 | 
 8 | # Cell
 9 | 
10 | 
11 | class UniformWeight:
12 |     """
13 |     Weight all observations equally
14 |     """
15 |     def __repr__(self):
16 |         return f'UniformWeight()'
17 | 
18 |     @staticmethod
19 |     def __call__(row):
20 |         return 1.0
21 | 
22 | # Cell
23 | 
24 | 
25 | class ExponentialWeight:
26 |     """
27 |     Weight observations with exponential decay
28 |     """
29 |     def __init__(self, epsilon, key):
30 |         self.epsilon = epsilon
31 |         self.key = key
32 | 
33 |     def __repr__(self):
34 |         return f'ExponentialWeight(epsilon={self.epsilon}, key={self.key})'
35 | 
36 |     def __call__(self, row):
37 |         return np.exp(self.epsilon*self.key(row))
38 | 
39 | # Cell
40 | 
41 | 
42 | class KeyWeight:
43 |     """
44 |     Weight observations with an arbitrary key function
45 |     """
46 |     def __init__(self, key):
47 |         self.key = key
48 | 
49 |     def __repr__(self):
50 |         return f'KeyWeight(key={self.key})'
51 | 
52 |     def __call__(self, row):
53 |         return self.key(row)


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on: [push, pull_request]
 3 | jobs:
 4 |   build:
 5 |     runs-on: ubuntu-latest
 6 |     steps:
 7 |     - uses: actions/checkout@v1
 8 |     - uses: actions/setup-python@v1
 9 |       with:
10 |         python-version: '3.7'
11 |         architecture: 'x64'
12 |     - name: Install the library
13 |       run: |
14 |         pip install nbdev jupyter
15 |         pip install -e .
16 |     - name: Read all notebooks
17 |       run: |
18 |         nbdev_read_nbs
19 |     - name: Check if all notebooks are cleaned
20 |       run: |
21 |         echo "Check we are starting with clean git checkout"
22 |         if [ -n "$(git status -uno -s)" ]; then echo "git status is not clean"; false; fi
23 |         echo "Trying to strip out notebooks"
24 |         nbdev_clean_nbs
25 |         echo "Check that strip out was unnecessary"
26 |         git status -s # display the status to see which nbs need cleaning up
27 |         if [ -n "$(git status -uno -s)" ]; then echo -e "!!! Detected unstripped out notebooks\n!!!Remember to run nbdev_install_git_hooks"; false; fi
28 |     - name: Check if there is no diff library/notebooks
29 |       run: |
30 |         if [ -n "$(nbdev_diff_nbs)" ]; then echo -e "!!! Detected difference between the notebooks and the library"; false; fi
31 |     - name: Run tests
32 |       run: |
33 |         nbdev_test_nbs
34 | 


--------------------------------------------------------------------------------
/docs/feed.xml:
--------------------------------------------------------------------------------
 1 | ---
 2 | search: exclude
 3 | layout: none
 4 | ---
 5 | 
 6 | <?xml version="1.0" encoding="UTF-8"?>
 7 | <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
 8 |     <channel>
 9 |         <title>{{ site.title | xml_escape }}</title>
10 |         <description>{{ site.description | xml_escape }}</description>
11 |         <link>{{ site.url }}/</link>
12 |         <atom:link href="{{ "/feed.xml" | prepend: site.url }}" rel="self" type="application/rss+xml"/>
13 |         <pubDate>{{ site.time | date_to_rfc822 }}</pubDate>
14 |         <lastBuildDate>{{ site.time | date_to_rfc822 }}</lastBuildDate>
15 |         <generator>Jekyll v{{ jekyll.version }}</generator>
16 |         {% for post in site.posts limit:10 %}
17 |         <item>
18 |             <title>{{ post.title | xml_escape }}</title>
19 |             <description>{{ post.content | xml_escape }}</description>
20 |             <pubDate>{{ post.date | date_to_rfc822 }}</pubDate>
21 |             <link>{{ post.url | prepend: site.url }}</link>
22 |             <guid isPermaLink="true">{{ post.url | prepend: site.url }}</guid>
23 |             {% for tag in post.tags %}
24 |             <category>{{ tag | xml_escape }}</category>
25 |             {% endfor %}
26 |             {% for tag in page.tags %}
27 |             <category>{{ cat | xml_escape }}</category>
28 |             {% endfor %}
29 |         </item>
30 |         {% endfor %}
31 |     </channel>
32 | </rss>
33 | 


--------------------------------------------------------------------------------
/docs/_config.yml:
--------------------------------------------------------------------------------
 1 | repository: Torvaney/mezzala
 2 | output: web
 3 | topnav_title: mezzala
 4 | site_title: mezzala
 5 | company_name: Ben Torvaney  # Required arg :(
 6 | description: A package for soccer modelling
 7 | # Set to false to disable KaTeX math
 8 | use_math: true
 9 | # Add Google analytics id if you have one and want to use it here
10 | google_analytics:
11 | # See http://nbdev.fast.ai/search for help with adding Search
12 | google_search:
13 | 
14 | host: 127.0.0.1
15 | # the preview server used. Leave as is.
16 | port: 4000
17 | # the port where the preview is rendered.
18 | 
19 | exclude:
20 |   - .idea/
21 |   - .gitignore
22 |   - vendor
23 |  
24 | exclude: [vendor]
25 | 
26 | highlighter: rouge
27 | markdown: kramdown
28 | kramdown:
29 |  input: GFM
30 |  auto_ids: true
31 |  hard_wrap: false
32 |  syntax_highlighter: rouge
33 | 
34 | collections:
35 |   tooltips:
36 |     output: false
37 | 
38 | defaults:
39 |   -
40 |     scope:
41 |       path: ""
42 |       type: "pages"
43 |     values:
44 |       layout: "page"
45 |       comments: true
46 |       search: true
47 |       sidebar: home_sidebar
48 |       topnav: topnav
49 |   -
50 |     scope:
51 |       path: ""
52 |       type: "tooltips"
53 |     values:
54 |       layout: "page"
55 |       comments: true
56 |       search: true
57 |       tooltip: true
58 | 
59 | sidebars:
60 | - home_sidebar
61 | 
62 | plugins:
63 |     - jekyll-remote-theme
64 |     
65 | remote_theme: fastai/nbdev-jekyll-theme
66 | baseurl: /mezzala/


--------------------------------------------------------------------------------
/mezzala/_nbdev.py:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED BY NBDEV! DO NOT EDIT!
 2 | 
 3 | __all__ = ["index", "modules", "custom_doc_links", "git_url"]
 4 | 
 5 | index = {"KeyAdapter": "adapters.ipynb",
 6 |          "AttributeAdapter": "adapters.ipynb",
 7 |          "LumpedAdapter": "adapters.ipynb",
 8 |          "ModelBlockABC": "blocks.ipynb",
 9 |          "BaseRate": "blocks.ipynb",
10 |          "HomeAdvantage": "blocks.ipynb",
11 |          "TeamStrength": "blocks.ipynb",
12 |          "KeyBlock": "blocks.ipynb",
13 |          "ConstantBlock": "blocks.ipynb",
14 |          "ScorelinePrediction": "models.ipynb",
15 |          "Outcomes": "models.ipynb",
16 |          "OutcomePrediction": "models.ipynb",
17 |          "scoreline_to_outcome": "models.ipynb",
18 |          "scorelines_to_outcomes": "models.ipynb",
19 |          "DixonColes": "models.ipynb",
20 |          "ParameterKey": "parameters.ipynb",
21 |          "RHO_KEY": "parameters.ipynb",
22 |          "HFA_KEY": "parameters.ipynb",
23 |          "AVG_KEY": "parameters.ipynb",
24 |          "TeamParameterKey": "parameters.ipynb",
25 |          "OffenceParameterKey": "parameters.ipynb",
26 |          "DefenceParameterKey": "parameters.ipynb",
27 |          "UniformWeight": "weights.ipynb",
28 |          "ExponentialWeight": "weights.ipynb",
29 |          "KeyWeight": "weights.ipynb"}
30 | 
31 | modules = ["adapters.py",
32 |            "blocks.py",
33 |            "__init__.py",
34 |            "models.py",
35 |            "parameters.py",
36 |            "weights.py"]
37 | 
38 | doc_url = "https://Torvaney.github.io/mezzala/"
39 | 
40 | git_url = "https://github.com/Torvaney/mezzala/tree/master/"
41 | 
42 | def custom_doc_links(name): return None
43 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *.bak
  2 | .gitattributes
  3 | .last_checked
  4 | .gitconfig
  5 | *.bak
  6 | *.log
  7 | *~
  8 | ~*
  9 | _tmp*
 10 | tmp*
 11 | tags
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | env/
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | 
 40 | # PyInstaller
 41 | #  Usually these files are written by a python script from a template
 42 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 43 | *.manifest
 44 | *.spec
 45 | 
 46 | # Installer logs
 47 | pip-log.txt
 48 | pip-delete-this-directory.txt
 49 | 
 50 | # Unit test / coverage reports
 51 | htmlcov/
 52 | .tox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | .hypothesis/
 60 | 
 61 | # Translations
 62 | *.mo
 63 | *.pot
 64 | 
 65 | # Django stuff:
 66 | *.log
 67 | local_settings.py
 68 | 
 69 | # Flask stuff:
 70 | instance/
 71 | .webassets-cache
 72 | 
 73 | # Scrapy stuff:
 74 | .scrapy
 75 | 
 76 | # Sphinx documentation
 77 | docs/_build/
 78 | 
 79 | # PyBuilder
 80 | target/
 81 | 
 82 | # Jupyter Notebook
 83 | .ipynb_checkpoints
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # celery beat schedule file
 89 | celerybeat-schedule
 90 | 
 91 | # SageMath parsed files
 92 | *.sage.py
 93 | 
 94 | # dotenv
 95 | .env
 96 | 
 97 | # virtualenv
 98 | .venv
 99 | venv/
100 | ENV/
101 | 
102 | # Spyder project settings
103 | .spyderproject
104 | .spyproject
105 | 
106 | # Rope project settings
107 | .ropeproject
108 | 
109 | # mkdocs documentation
110 | /site
111 | 
112 | # mypy
113 | .mypy_cache/
114 | 
115 | .vscode
116 | *.swp
117 | 
118 | # osx generated files
119 | .DS_Store
120 | .DS_Store?
121 | .Trashes
122 | ehthumbs.db
123 | Thumbs.db
124 | .idea
125 | 
126 | # pytest
127 | .pytest_cache
128 | 
129 | # tools/trust-doc-nbs
130 | docs_src/.last_checked
131 | 
132 | # symlinks to fastai
133 | docs_src/fastai
134 | tools/fastai
135 | 
136 | # link checker
137 | checklink/cookies.txt
138 | 
139 | # .gitconfig is now autogenerated
140 | .gitconfig
141 | 
142 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to contribute
 2 | 
 3 | ## How to get started
 4 | 
 5 | Before anything else, please install the git hooks that run automatic scripts during each commit and merge to strip the notebooks of superfluous metadata (and avoid merge conflicts). After cloning the repository, run the following command inside it:
 6 | ```
 7 | nbdev_install_git_hooks
 8 | ```
 9 | 
10 | ## Did you find a bug?
11 | 
12 | * Ensure the bug was not already reported by searching on GitHub under Issues.
13 | * If you're unable to find an open issue addressing the problem, open a new one. Be sure to include a title and clear description, as much relevant information as possible, and a code sample or an executable test case demonstrating the expected behavior that is not occurring.
14 | * Be sure to add the complete error messages.
15 | 
16 | #### Did you write a patch that fixes a bug?
17 | 
18 | * Open a new GitHub pull request with the patch.
19 | * Ensure that your PR includes a test that fails without your patch, and pass with it.
20 | * Ensure the PR description clearly describes the problem and solution. Include the relevant issue number if applicable.
21 | 
22 | ## PR submission guidelines
23 | 
24 | * Keep each PR focused. While it's more convenient, do not combine several unrelated fixes together. Create as many branches as needing to keep each PR focused.
25 | * Do not mix style changes/fixes with "functional" changes. It's very difficult to review such PRs and it most likely get rejected.
26 | * Do not add/remove vertical whitespace. Preserve the original style of the file you edit as much as you can.
27 | * Do not turn an already submitted PR into your development playground. If after you submitted PR, you discovered that more work is needed - close the PR, do the required work and then submit a new PR. Otherwise each of your commits requires attention from maintainers of the project.
28 | * If, however, you submitted a PR and received a request for changes, you should proceed with commits inside that PR, so that the maintainer can see the incremental fixes and won't need to review the whole PR again. In the exception case where you realize it'll take many many commits to complete the requests, then it's probably best to close the PR, do the work and then submit it again. Use common sense where you'd choose one way over another.
29 | 
30 | ## Do you want to contribute to the documentation?
31 | 
32 | * Docs are automatically created from the notebooks in the nbs folder.
33 | 
34 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from pkg_resources import parse_version
 2 | from configparser import ConfigParser
 3 | import setuptools
 4 | 
 5 | assert parse_version(setuptools.__version__) >= parse_version("36.2")
 6 | 
 7 | # note: all settings are in settings.ini; edit there, not here
 8 | config = ConfigParser(delimiters=["="])
 9 | config.read("settings.ini")
10 | cfg = config["DEFAULT"]
11 | 
12 | cfg_keys = "version description keywords author author_email".split()
13 | expected = (
14 |     cfg_keys
15 |     + "lib_name user branch license status min_python audience language".split()
16 | )
17 | for o in expected:
18 |     assert o in cfg, "missing expected setting: {}".format(o)
19 | setup_cfg = {o: cfg[o] for o in cfg_keys}
20 | 
21 | licenses = {
22 |     "apache2": (
23 |         "Apache Software License 2.0",
24 |         "OSI Approved :: Apache Software License",
25 |     ),
26 |     "mit": ("MIT License", "OSI Approved :: MIT License"),
27 |     "gpl2": (
28 |         "GNU General Public License v2",
29 |         "OSI Approved :: GNU General Public License v2 (GPLv2)",
30 |     ),
31 |     "gpl3": (
32 |         "GNU General Public License v3",
33 |         "OSI Approved :: GNU General Public License v3 (GPLv3)",
34 |     ),
35 |     "bsd3": ("BSD License", "OSI Approved :: BSD License"),
36 | }
37 | statuses = [
38 |     "1 - Planning",
39 |     "2 - Pre-Alpha",
40 |     "3 - Alpha",
41 |     "4 - Beta",
42 |     "5 - Production/Stable",
43 |     "6 - Mature",
44 |     "7 - Inactive",
45 | ]
46 | py_versions = (
47 |     "2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 3.0 3.1 3.2 3.3 3.4 3.5 3.6 3.7 3.8".split()
48 | )
49 | 
50 | requirements = cfg.get("requirements", "").split()
51 | dev_requirements = cfg.get("dev_requirements", "").split()
52 | min_python = cfg["min_python"]
53 | lic = licenses.get(cfg["license"].lower(), (cfg["license"], None))
54 | 
55 | setuptools.setup(
56 |     name=cfg["lib_name"],
57 |     license=lic[0],
58 |     classifiers=[
59 |         "Development Status :: " + statuses[int(cfg["status"])],
60 |         "Intended Audience :: " + cfg["audience"].title(),
61 |         "Natural Language :: " + cfg["language"].title(),
62 |     ]
63 |     + [
64 |         "Programming Language :: Python :: " + o
65 |         for o in py_versions[py_versions.index(min_python) :]
66 |     ]
67 |     + (["License :: " + lic[1]] if lic[1] else []),
68 |     url=cfg["git_url"],
69 |     packages=setuptools.find_packages(),
70 |     include_package_data=True,
71 |     install_requires=requirements,
72 |     extras_require={"dev": dev_requirements},
73 |     dependency_links=cfg.get("dep_links", "").split(),
74 |     python_requires=">=" + cfg["min_python"],
75 |     long_description=open("README.md").read(),
76 |     long_description_content_type="text/markdown",
77 |     zip_safe=False,
78 |     entry_points={"console_scripts": cfg.get("console_scripts", "").split()},
79 |     **setup_cfg
80 | )
81 | 


--------------------------------------------------------------------------------
/nbs/weights.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "7f647236",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# default_exp weights"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "9d64a591",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Weights"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "690e4a2b",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "#hide\n",
 29 |     "from nbdev.showdoc import *"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "id": "786cccce",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "#export\n",
 40 |     "import numpy as np"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "73429c3f",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "#export\n",
 51 |     "\n",
 52 |     "\n",
 53 |     "class UniformWeight:\n",
 54 |     "    \"\"\"\n",
 55 |     "    Weight all observations equally\n",
 56 |     "    \"\"\"\n",
 57 |     "    def __repr__(self):\n",
 58 |     "        return f'UniformWeight()'\n",
 59 |     "    \n",
 60 |     "    @staticmethod\n",
 61 |     "    def __call__(row):\n",
 62 |     "        return 1.0"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "id": "9313bfbc",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "#export\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "class ExponentialWeight:\n",
 76 |     "    \"\"\"\n",
 77 |     "    Weight observations with exponential decay\n",
 78 |     "    \"\"\"\n",
 79 |     "    def __init__(self, epsilon, key):\n",
 80 |     "        self.epsilon = epsilon\n",
 81 |     "        self.key = key\n",
 82 |     "        \n",
 83 |     "    def __repr__(self):\n",
 84 |     "        return f'ExponentialWeight(epsilon={self.epsilon}, key={self.key})'\n",
 85 |     "\n",
 86 |     "    def __call__(self, row):\n",
 87 |     "        return np.exp(self.epsilon*self.key(row))"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "21bb58e4",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "#export\n",
 98 |     "\n",
 99 |     "\n",
100 |     "class KeyWeight:\n",
101 |     "    \"\"\"\n",
102 |     "    Weight observations with an arbitrary key function\n",
103 |     "    \"\"\"\n",
104 |     "    def __init__(self, key):\n",
105 |     "        self.key = key\n",
106 |     "        \n",
107 |     "    def __repr__(self):\n",
108 |     "        return f'KeyWeight(key={self.key})'\n",
109 |     "\n",
110 |     "    def __call__(self, row):\n",
111 |     "        return self.key(row)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "id": "76cf28b0",
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": []
121 |   }
122 |  ],
123 |  "metadata": {
124 |   "kernelspec": {
125 |    "display_name": "Python 3",
126 |    "language": "python",
127 |    "name": "python3"
128 |   }
129 |  },
130 |  "nbformat": 4,
131 |  "nbformat_minor": 5
132 | }
133 | 


--------------------------------------------------------------------------------
/docs/weights.html:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | title: Weights
  4 | 
  5 | 
  6 | keywords: fastai
  7 | sidebar: home_sidebar
  8 | 
  9 | 
 10 | 
 11 | nb_path: "nbs/weights.ipynb"
 12 | ---
 13 | <!--
 14 | 
 15 | #################################################
 16 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 17 | #################################################
 18 | # file to edit: nbs/weights.ipynb
 19 | # command to build the docs after a change: nbdev_build_docs
 20 | 
 21 | -->
 22 | 
 23 | <div class="container" id="notebook-container">
 24 |         
 25 |     {% raw %}
 26 |     
 27 | <div class="cell border-box-sizing code_cell rendered">
 28 | 
 29 | </div>
 30 |     {% endraw %}
 31 | 
 32 |     {% raw %}
 33 |     
 34 | <div class="cell border-box-sizing code_cell rendered">
 35 | 
 36 | </div>
 37 |     {% endraw %}
 38 | 
 39 |     {% raw %}
 40 |     
 41 | <div class="cell border-box-sizing code_cell rendered">
 42 | 
 43 | <div class="output_wrapper">
 44 | <div class="output">
 45 | 
 46 | <div class="output_area">
 47 | 
 48 | 
 49 | <div class="output_markdown rendered_html output_subarea ">
 50 | <h2 id="UniformWeight" class="doc_header"><code>class</code> <code>UniformWeight</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/weights.py#L11" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>UniformWeight</code>()</p>
 51 | </blockquote>
 52 | <p>Weight all observations equally</p>
 53 | 
 54 | </div>
 55 | 
 56 | </div>
 57 | 
 58 | </div>
 59 | </div>
 60 | 
 61 | </div>
 62 |     {% endraw %}
 63 | 
 64 |     {% raw %}
 65 |     
 66 | <div class="cell border-box-sizing code_cell rendered">
 67 | 
 68 | </div>
 69 |     {% endraw %}
 70 | 
 71 |     {% raw %}
 72 |     
 73 | <div class="cell border-box-sizing code_cell rendered">
 74 | 
 75 | <div class="output_wrapper">
 76 | <div class="output">
 77 | 
 78 | <div class="output_area">
 79 | 
 80 | 
 81 | <div class="output_markdown rendered_html output_subarea ">
 82 | <h2 id="ExponentialWeight" class="doc_header"><code>class</code> <code>ExponentialWeight</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/weights.py#L25" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ExponentialWeight</code>(<strong><code>epsilon</code></strong>, <strong><code>key</code></strong>)</p>
 83 | </blockquote>
 84 | <p>Weight observations with exponential decay</p>
 85 | 
 86 | </div>
 87 | 
 88 | </div>
 89 | 
 90 | </div>
 91 | </div>
 92 | 
 93 | </div>
 94 |     {% endraw %}
 95 | 
 96 |     {% raw %}
 97 |     
 98 | <div class="cell border-box-sizing code_cell rendered">
 99 | 
100 | </div>
101 |     {% endraw %}
102 | 
103 |     {% raw %}
104 |     
105 | <div class="cell border-box-sizing code_cell rendered">
106 | 
107 | <div class="output_wrapper">
108 | <div class="output">
109 | 
110 | <div class="output_area">
111 | 
112 | 
113 | <div class="output_markdown rendered_html output_subarea ">
114 | <h2 id="KeyWeight" class="doc_header"><code>class</code> <code>KeyWeight</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/weights.py#L42" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>KeyWeight</code>(<strong><code>key</code></strong>)</p>
115 | </blockquote>
116 | <p>Weight observations with an arbitrary key function</p>
117 | 
118 | </div>
119 | 
120 | </div>
121 | 
122 | </div>
123 | </div>
124 | 
125 | </div>
126 |     {% endraw %}
127 | 
128 |     {% raw %}
129 |     
130 | <div class="cell border-box-sizing code_cell rendered">
131 | 
132 | </div>
133 |     {% endraw %}
134 | 
135 | </div>
136 |  
137 | 
138 | 


--------------------------------------------------------------------------------
/nbs/parameters.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "6f1b9bb4",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# default_exp parameters"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "108b707b",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import pprint"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "markdown",
 25 |    "id": "764b9540",
 26 |    "metadata": {},
 27 |    "source": [
 28 |     "# Parameters"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "id": "ec3f14c3",
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "#export\n",
 39 |     "import dataclasses\n",
 40 |     "import typing"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "4bfb6766",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "#export\n",
 51 |     "\n",
 52 |     "\n",
 53 |     "@dataclasses.dataclass(frozen=True)\n",
 54 |     "class ParameterKey:\n",
 55 |     "    label: typing.Hashable\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "# Basic model constants\n",
 59 |     "RHO_KEY = ParameterKey('Rho')\n",
 60 |     "HFA_KEY = ParameterKey('Home-field advantage')\n",
 61 |     "AVG_KEY = ParameterKey('Average rate')"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "id": "480aa95b",
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "#export\n",
 72 |     "\n",
 73 |     "\n",
 74 |     "class TeamParameterKey(ParameterKey):\n",
 75 |     "    \"\"\"\n",
 76 |     "    Parent class of `OffenceParameterKey` and `DefenceParameterKey`.\n",
 77 |     "    \n",
 78 |     "    Can be used to extract team offence/defence parameters with `isinstance`.\n",
 79 |     "    \"\"\"\n",
 80 |     "    pass\n",
 81 |     "\n",
 82 |     "\n",
 83 |     "class OffenceParameterKey(TeamParameterKey):\n",
 84 |     "    \"\"\"\n",
 85 |     "    Team offence parameter key\n",
 86 |     "    \"\"\"\n",
 87 |     "    pass\n",
 88 |     "\n",
 89 |     "\n",
 90 |     "class DefenceParameterKey(TeamParameterKey):\n",
 91 |     "    \"\"\"\n",
 92 |     "    Team Defence parameter key\n",
 93 |     "    \"\"\"\n",
 94 |     "    pass"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "id": "bd2b13f8",
101 |    "metadata": {},
102 |    "outputs": [
103 |     {
104 |      "name": "stdout",
105 |      "output_type": "stream",
106 |      "text": [
107 |       "[OffenceParameterKey(label='Arsenal'),\n",
108 |       " OffenceParameterKey(label='Chelsea'),\n",
109 |       " DefenceParameterKey(label='Arsenal'),\n",
110 |       " DefenceParameterKey(label='Chelsea')]\n"
111 |      ]
112 |     }
113 |    ],
114 |    "source": [
115 |     "parameters = [\n",
116 |     "    RHO_KEY,\n",
117 |     "    HFA_KEY,\n",
118 |     "    AVG_KEY,\n",
119 |     "    OffenceParameterKey('Arsenal'),\n",
120 |     "    OffenceParameterKey('Chelsea'),\n",
121 |     "    DefenceParameterKey('Arsenal'),\n",
122 |     "    DefenceParameterKey('Chelsea'),\n",
123 |     "]\n",
124 |     "\n",
125 |     "pprint.pprint([p for p in parameters if isinstance(p, TeamParameterKey)])"
126 |    ]
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "Python 3",
132 |    "language": "python",
133 |    "name": "python3"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 5
138 | }
139 | 


--------------------------------------------------------------------------------
/mezzala/adapters.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/adapters.ipynb (unless otherwise specified).
  2 | 
  3 | __all__ = ['KeyAdapter', 'AttributeAdapter', 'LumpedAdapter']
  4 | 
  5 | # Cell
  6 | import collections
  7 | import functools
  8 | 
  9 | import mezzala.parameters
 10 | 
 11 | # Cell
 12 | 
 13 | 
 14 | class KeyAdapter:
 15 |     """
 16 |     Get data from subscriptable objects.
 17 |     """
 18 | 
 19 |     def __init__(self, home_goals, away_goals, **kwargs):
 20 |         self._lookup = {
 21 |             'home_goals': home_goals,
 22 |             'away_goals': away_goals,
 23 |             **kwargs
 24 |         }
 25 | 
 26 |     def __repr__(self):
 27 |         args_repr = ', '.join(f'{k}={repr(v)}' for k, v in self._lookup.items())
 28 |         return f'KeyAdapter({args_repr})'
 29 | 
 30 |     def _get_in(self, row, item):
 31 |         if isinstance(item, list):
 32 |             return functools.reduce(lambda d, i: d[i], item, row)
 33 |         return row[item]
 34 | 
 35 |     def __getattr__(self, key):
 36 |         def getter(row):
 37 |             return self._get_in(row, self._lookup[key])
 38 |         return getter
 39 | 
 40 | # Cell
 41 | 
 42 | 
 43 | class AttributeAdapter:
 44 |     """
 45 |     Get data from object attributes.
 46 |     """
 47 |     def __init__(self, home_goals, away_goals, **kwargs):
 48 |         self._lookup = {
 49 |             'home_goals': home_goals,
 50 |             'away_goals': away_goals,
 51 |             **kwargs
 52 |         }
 53 | 
 54 |     def __repr__(self):
 55 |         args_repr = ', '.join(f'{k}={repr(v)}' for k, v in self._lookup.items())
 56 |         return f'KeyAdapter({args_repr})'
 57 | 
 58 |     def _get_in(self, row, item):
 59 |         if isinstance(item, list):
 60 |             return functools.reduce(getattr, item, row)
 61 |         return getattr(row, item)
 62 | 
 63 |     def __getattr__(self, key):
 64 |         def getter(row):
 65 |             return self._get_in(row, self._lookup[key])
 66 |         return getter
 67 | 
 68 | # Cell
 69 | 
 70 | 
 71 | class LumpedAdapter:
 72 |     """
 73 |     Lump terms which have appeared below a minimum number of times in
 74 |     the training data into a placeholder term
 75 |     """
 76 | 
 77 |     def __init__(self, base_adapter, **kwargs):
 78 |         self.base_adapter = base_adapter
 79 | 
 80 |         # Match terms to placeholders
 81 |         # If multiple terms have the same placeholder (e.g. Home and Away
 82 |         # teams) they will share a counter
 83 |         self._term_lookup = kwargs
 84 | 
 85 |         self._counters = None
 86 | 
 87 |     def __repr__(self):
 88 |         args_repr = ', '.join(f'{k}={repr(v)}' for k, v in self._term_lookup.items())
 89 |         return f'LumpedAdapter(base_adapter={repr(self.base_adapter)}, {args_repr})'
 90 | 
 91 |     def fit(self, data):
 92 |         self._counters = {}
 93 |         for term, (placeholder, _) in self._term_lookup.items():
 94 |             # Initialise with an empty counter if it doesn't already exist
 95 |             # We need to do this so that multiple terms sharing the same counter
 96 |             # (home and away teams) are shared
 97 |             init_counter = self._counters.get(placeholder, collections.Counter())
 98 | 
 99 |             counter = collections.Counter(getattr(self.base_adapter, term)(row) for row in data)
100 | 
101 |             self._counters[placeholder] = init_counter + counter
102 |         return self
103 | 
104 |     def __getattr__(self, key):
105 |         if not self._counters:
106 |             raise ValueError(
107 |                 'No counts found! You need to call `LumpedAdapter.fit` '
108 |                 'on the training data before you can use it!'
109 |             )
110 | 
111 |         def getter(row):
112 |             value = getattr(self.base_adapter, key)(row)
113 |             placeholder, min_obs = self._term_lookup.get(key, (None, None))
114 |             if placeholder and self._counters[placeholder][value] < min_obs:
115 |                 return placeholder
116 |             return value
117 |         return getter


--------------------------------------------------------------------------------
/mezzala/blocks.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/blocks.ipynb (unless otherwise specified).
  2 | 
  3 | __all__ = ['BaseRate', 'HomeAdvantage', 'TeamStrength', 'KeyBlock', 'ConstantBlock']
  4 | 
  5 | # Cell
  6 | import abc
  7 | 
  8 | import numpy as np
  9 | 
 10 | import mezzala.parameters
 11 | 
 12 | # Internal Cell
 13 | 
 14 | 
 15 | class ModelBlockABC(abc.ABC):
 16 |     """
 17 |     Base class for model blocks
 18 |     """
 19 |     PRIORITY = 0
 20 | 
 21 |     def param_keys(self, adapter, data):
 22 |         return []
 23 | 
 24 |     def constraints(self, adapter, data):
 25 |         return []
 26 | 
 27 |     def home_terms(self, adapter, data):
 28 |         return []
 29 | 
 30 |     def away_terms(self, adapter, data):
 31 |         return []
 32 | 
 33 | # Cell
 34 | 
 35 | 
 36 | class BaseRate(ModelBlockABC):
 37 |     """
 38 |     Estimate average goalscoring rate as a separate parameter.
 39 | 
 40 |     This can be useful, since it results in both team offence and
 41 |     team defence parameters being centered around 1.0
 42 |     """
 43 | 
 44 |     def __init__(self):
 45 |         pass
 46 | 
 47 |     def __repr__(self):
 48 |         return 'BaseRate()'
 49 | 
 50 |     def param_keys(self, adapter, data):
 51 |         return [mezzala.parameters.AVG_KEY]
 52 | 
 53 |     def home_terms(self, adapter, row):
 54 |         return [
 55 |             (mezzala.parameters.AVG_KEY, 1.0)
 56 |         ]
 57 | 
 58 |     def away_terms(self, adapter, row):
 59 |         return [
 60 |             (mezzala.parameters.AVG_KEY, 1.0)
 61 |         ]
 62 | 
 63 | # Cell
 64 | 
 65 | 
 66 | class HomeAdvantage(ModelBlockABC):
 67 |     """
 68 |     Estimate home advantage.
 69 | 
 70 |     Assumes constant home advantage is present in every match in the
 71 |     dataset
 72 |     """
 73 | 
 74 |     def __init__(self):
 75 |         # TODO: allow HFA on/off depending on the data?
 76 |         pass
 77 | 
 78 |     def __repr__(self):
 79 |         return 'HomeAdvantage()'
 80 | 
 81 |     def param_keys(self, adapter, data):
 82 |         return [mezzala.parameters.HFA_KEY]
 83 | 
 84 |     def home_terms(self, adapter, row):
 85 |         return [
 86 |             (mezzala.parameters.HFA_KEY, 1.0)
 87 |         ]
 88 | 
 89 | # Cell
 90 | 
 91 | 
 92 | class TeamStrength(ModelBlockABC):
 93 |     """
 94 |     Estimate team offence and team defence parameters.
 95 |     """
 96 | 
 97 |     # This is a gross hack so that we know that the
 98 |     # team strength parameters come first, and thus can
 99 |     # do the constraints (which are positionally indexed)
100 |     PRIORITY = 1
101 | 
102 |     def __init__(self):
103 |         pass
104 | 
105 |     def __repr__(self):
106 |         return 'TeamStrength()'
107 | 
108 |     def _teams(self, adapter, data):
109 |         return set(adapter.home_team(r) for r in data) | set(adapter.away_team(r) for r in data)
110 | 
111 |     def offence_key(self, label):
112 |         return mezzala.parameters.OffenceParameterKey(label)
113 | 
114 |     def defence_key(self, label):
115 |         return mezzala.parameters.DefenceParameterKey(label)
116 | 
117 |     def param_keys(self, adapter, data):
118 |         teams = self._teams(adapter, data)
119 | 
120 |         offence = [self.offence_key(t) for t in teams]
121 |         defence = [self.defence_key(t) for t in teams]
122 | 
123 |         return offence + defence
124 | 
125 |     def constraints(self, adapter, data):
126 |         n_teams = len(self._teams(adapter, data))
127 |         return [
128 |             # Force team offence parameters to average to 1
129 |             {'fun': lambda x: 1 - np.mean(np.exp(x[0:n_teams])),
130 |              'type': 'eq'},
131 |         ]
132 | 
133 |     def home_terms(self, adapter, row):
134 |         return [
135 |             (self.offence_key(adapter.home_team(row)), 1.0),
136 |             (self.defence_key(adapter.away_team(row)), 1.0),
137 |         ]
138 | 
139 |     def away_terms(self, adapter, row):
140 |         return [
141 |             (self.offence_key(adapter.away_team(row)), 1.0),
142 |             (self.defence_key(adapter.home_team(row)), 1.0),
143 |         ]
144 | 
145 | # Cell
146 | 
147 | 
148 | class KeyBlock(ModelBlockABC):
149 |     """
150 |     Generic model block for adding arbitrary model terms from the data
151 |     to both home and away team
152 |     """
153 |     def __init__(self, key):
154 |         self.key = key
155 | 
156 |     def __repr__(self):
157 |         return 'KeyBlock()'
158 | 
159 |     def param_keys(self, adapter, data):
160 |         return list(set(self.key(r) for r in data))
161 | 
162 |     def home_terms(self, adapter, row):
163 |         return [self.key(row)]
164 | 
165 |     def away_terms(self, adapter, row):
166 |         return [self.key(row)]
167 | 
168 | # Cell
169 | 
170 | 
171 | class ConstantBlock(ModelBlockABC):
172 |     """
173 |     A model block for adding specific model terms to the parameter keys.
174 | 
175 |     Can be useful in conjunction with `LumpedAdapter` to ensure that certain parameters
176 |     are in the model (even if they aren't estimated)
177 |     """
178 |     def __init__(self, *args):
179 |         self.terms = args
180 | 
181 |     def __repr__(self):
182 |         return 'ConstantBlock()'
183 | 
184 |     def param_keys(self, adapter, data):
185 |         return list(self.terms)


--------------------------------------------------------------------------------
/docs/blocks.html:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | title: Model blocks
  4 | 
  5 | 
  6 | keywords: fastai
  7 | sidebar: home_sidebar
  8 | 
  9 | 
 10 | 
 11 | nb_path: "nbs/blocks.ipynb"
 12 | ---
 13 | <!--
 14 | 
 15 | #################################################
 16 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 17 | #################################################
 18 | # file to edit: nbs/blocks.ipynb
 19 | # command to build the docs after a change: nbdev_build_docs
 20 | 
 21 | -->
 22 | 
 23 | <div class="container" id="notebook-container">
 24 |         
 25 |     {% raw %}
 26 |     
 27 | <div class="cell border-box-sizing code_cell rendered">
 28 | 
 29 | </div>
 30 |     {% endraw %}
 31 | 
 32 |     {% raw %}
 33 |     
 34 | <div class="cell border-box-sizing code_cell rendered">
 35 | 
 36 | </div>
 37 |     {% endraw %}
 38 | 
 39 |     {% raw %}
 40 |     
 41 | <div class="cell border-box-sizing code_cell rendered">
 42 | 
 43 | <div class="output_wrapper">
 44 | <div class="output">
 45 | 
 46 | <div class="output_area">
 47 | 
 48 | 
 49 | <div class="output_markdown rendered_html output_subarea ">
 50 | <h2 id="BaseRate" class="doc_header"><code>class</code> <code>BaseRate</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/blocks.py#L36" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>BaseRate</code>() :: <a href="/mezzala/blocks.html#ModelBlockABC"><code>ModelBlockABC</code></a></p>
 51 | </blockquote>
 52 | <p>Estimate average goalscoring rate as a separate parameter.</p>
 53 | <p>This can be useful, since it results in both team offence and
 54 | team defence parameters being centered around 1.0</p>
 55 | 
 56 | </div>
 57 | 
 58 | </div>
 59 | 
 60 | </div>
 61 | </div>
 62 | 
 63 | </div>
 64 |     {% endraw %}
 65 | 
 66 |     {% raw %}
 67 |     
 68 | <div class="cell border-box-sizing code_cell rendered">
 69 | 
 70 | </div>
 71 |     {% endraw %}
 72 | 
 73 |     {% raw %}
 74 |     
 75 | <div class="cell border-box-sizing code_cell rendered">
 76 | 
 77 | <div class="output_wrapper">
 78 | <div class="output">
 79 | 
 80 | <div class="output_area">
 81 | 
 82 | 
 83 | <div class="output_markdown rendered_html output_subarea ">
 84 | <h2 id="HomeAdvantage" class="doc_header"><code>class</code> <code>HomeAdvantage</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/blocks.py#L66" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>HomeAdvantage</code>() :: <a href="/mezzala/blocks.html#ModelBlockABC"><code>ModelBlockABC</code></a></p>
 85 | </blockquote>
 86 | <p>Estimate home advantage.</p>
 87 | <p>Assumes constant home advantage is present in every match in the
 88 | dataset</p>
 89 | 
 90 | </div>
 91 | 
 92 | </div>
 93 | 
 94 | </div>
 95 | </div>
 96 | 
 97 | </div>
 98 |     {% endraw %}
 99 | 
100 |     {% raw %}
101 |     
102 | <div class="cell border-box-sizing code_cell rendered">
103 | 
104 | </div>
105 |     {% endraw %}
106 | 
107 |     {% raw %}
108 |     
109 | <div class="cell border-box-sizing code_cell rendered">
110 | 
111 | <div class="output_wrapper">
112 | <div class="output">
113 | 
114 | <div class="output_area">
115 | 
116 | 
117 | <div class="output_markdown rendered_html output_subarea ">
118 | <h2 id="TeamStrength" class="doc_header"><code>class</code> <code>TeamStrength</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/blocks.py#L92" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>TeamStrength</code>() :: <a href="/mezzala/blocks.html#ModelBlockABC"><code>ModelBlockABC</code></a></p>
119 | </blockquote>
120 | <p>Estimate team offence and team defence parameters.</p>
121 | 
122 | </div>
123 | 
124 | </div>
125 | 
126 | </div>
127 | </div>
128 | 
129 | </div>
130 |     {% endraw %}
131 | 
132 |     {% raw %}
133 |     
134 | <div class="cell border-box-sizing code_cell rendered">
135 | 
136 | </div>
137 |     {% endraw %}
138 | 
139 |     {% raw %}
140 |     
141 | <div class="cell border-box-sizing code_cell rendered">
142 | 
143 | <div class="output_wrapper">
144 | <div class="output">
145 | 
146 | <div class="output_area">
147 | 
148 | 
149 | <div class="output_markdown rendered_html output_subarea ">
150 | <h2 id="KeyBlock" class="doc_header"><code>class</code> <code>KeyBlock</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/blocks.py#L148" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>KeyBlock</code>(<strong><code>key</code></strong>) :: <a href="/mezzala/blocks.html#ModelBlockABC"><code>ModelBlockABC</code></a></p>
151 | </blockquote>
152 | <p>Generic model block for adding arbitrary model terms from the data
153 | to both home and away team</p>
154 | 
155 | </div>
156 | 
157 | </div>
158 | 
159 | </div>
160 | </div>
161 | 
162 | </div>
163 |     {% endraw %}
164 | 
165 |     {% raw %}
166 |     
167 | <div class="cell border-box-sizing code_cell rendered">
168 | 
169 | </div>
170 |     {% endraw %}
171 | 
172 |     {% raw %}
173 |     
174 | <div class="cell border-box-sizing code_cell rendered">
175 | 
176 | <div class="output_wrapper">
177 | <div class="output">
178 | 
179 | <div class="output_area">
180 | 
181 | 
182 | <div class="output_markdown rendered_html output_subarea ">
183 | <h2 id="ConstantBlock" class="doc_header"><code>class</code> <code>ConstantBlock</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/blocks.py#L171" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ConstantBlock</code>(<strong>*<code>args</code></strong>) :: <a href="/mezzala/blocks.html#ModelBlockABC"><code>ModelBlockABC</code></a></p>
184 | </blockquote>
185 | <p>A model block for adding specific model terms to the parameter keys.</p>
186 | <p>Can be useful in conjunction with <a href="/mezzala/adapters.html#LumpedAdapter"><code>LumpedAdapter</code></a> to ensure that certain parameters
187 | are in the model (even if they aren't estimated)</p>
188 | 
189 | </div>
190 | 
191 | </div>
192 | 
193 | </div>
194 | </div>
195 | 
196 | </div>
197 |     {% endraw %}
198 | 
199 |     {% raw %}
200 |     
201 | <div class="cell border-box-sizing code_cell rendered">
202 | 
203 | </div>
204 |     {% endraw %}
205 | 
206 | </div>
207 |  
208 | 
209 | 


--------------------------------------------------------------------------------
/docs/models.html:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | title: Models
  4 | 
  5 | 
  6 | keywords: fastai
  7 | sidebar: home_sidebar
  8 | 
  9 | 
 10 | 
 11 | nb_path: "nbs/models.ipynb"
 12 | ---
 13 | <!--
 14 | 
 15 | #################################################
 16 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 17 | #################################################
 18 | # file to edit: nbs/models.ipynb
 19 | # command to build the docs after a change: nbdev_build_docs
 20 | 
 21 | -->
 22 | 
 23 | <div class="container" id="notebook-container">
 24 |         
 25 |     {% raw %}
 26 |     
 27 | <div class="cell border-box-sizing code_cell rendered">
 28 | 
 29 | </div>
 30 |     {% endraw %}
 31 | 
 32 |     {% raw %}
 33 |     
 34 | <div class="cell border-box-sizing code_cell rendered">
 35 | 
 36 | </div>
 37 |     {% endraw %}
 38 | 
 39 |     {% raw %}
 40 |     
 41 | <div class="cell border-box-sizing code_cell rendered">
 42 | 
 43 | <div class="output_wrapper">
 44 | <div class="output">
 45 | 
 46 | <div class="output_area">
 47 | 
 48 | 
 49 | <div class="output_markdown rendered_html output_subarea ">
 50 | <h2 id="ScorelinePrediction" class="doc_header"><code>class</code> <code>ScorelinePrediction</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/models.py#L27" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ScorelinePrediction</code>(<strong><code>home_goals</code></strong>:<code>int</code>, <strong><code>away_goals</code></strong>:<code>int</code>, <strong><code>probability</code></strong>:<code>float</code>)</p>
 51 | </blockquote>
 52 | <p>ScorelinePrediction(home_goals: int, away_goals: int, probability: float)</p>
 53 | 
 54 | </div>
 55 | 
 56 | </div>
 57 | 
 58 | </div>
 59 | </div>
 60 | 
 61 | </div>
 62 |     {% endraw %}
 63 | 
 64 |     {% raw %}
 65 |     
 66 | <div class="cell border-box-sizing code_cell rendered">
 67 | 
 68 | </div>
 69 |     {% endraw %}
 70 | 
 71 |     {% raw %}
 72 |     
 73 | <div class="cell border-box-sizing code_cell rendered">
 74 | 
 75 | <div class="output_wrapper">
 76 | <div class="output">
 77 | 
 78 | <div class="output_area">
 79 | 
 80 | 
 81 | <div class="output_markdown rendered_html output_subarea ">
 82 | <h2 id="Outcomes" class="doc_header"><code>Outcomes</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/models.py#L36" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>Enum</code> = [HOME_WIN, DRAW, AWAY_WIN]</p>
 83 | </blockquote>
 84 | <p>An enumeration.</p>
 85 | 
 86 | </div>
 87 | 
 88 | </div>
 89 | 
 90 | </div>
 91 | </div>
 92 | 
 93 | </div>
 94 |     {% endraw %}
 95 | 
 96 |     {% raw %}
 97 |     
 98 | <div class="cell border-box-sizing code_cell rendered">
 99 | 
100 | <div class="output_wrapper">
101 | <div class="output">
102 | 
103 | <div class="output_area">
104 | 
105 | 
106 | <div class="output_markdown rendered_html output_subarea ">
107 | <h2 id="OutcomePrediction" class="doc_header"><code>class</code> <code>OutcomePrediction</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/models.py#L46" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>OutcomePrediction</code>(<strong><code>outcome</code></strong>:<a href="/mezzala/models.html#Outcomes"><code>Outcomes</code></a>, <strong><code>probability</code></strong>:<code>float</code>)</p>
108 | </blockquote>
109 | <p>OutcomePrediction(outcome: mezzala.models.Outcomes, probability: float)</p>
110 | 
111 | </div>
112 | 
113 | </div>
114 | 
115 | </div>
116 | </div>
117 | 
118 | </div>
119 |     {% endraw %}
120 | 
121 |     {% raw %}
122 |     
123 | <div class="cell border-box-sizing code_cell rendered">
124 | 
125 | </div>
126 |     {% endraw %}
127 | 
128 |     {% raw %}
129 |     
130 | <div class="cell border-box-sizing code_cell rendered">
131 | 
132 | <div class="output_wrapper">
133 | <div class="output">
134 | 
135 | <div class="output_area">
136 | 
137 | 
138 | <div class="output_markdown rendered_html output_subarea ">
139 | <h4 id="scoreline_to_outcome" class="doc_header"><code>scoreline_to_outcome</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/models.py#L53" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>scoreline_to_outcome</code>(<strong><code>home_goals</code></strong>, <strong><code>away_goals</code></strong>)</p>
140 | </blockquote>
141 | 
142 | </div>
143 | 
144 | </div>
145 | 
146 | </div>
147 | </div>
148 | 
149 | </div>
150 |     {% endraw %}
151 | 
152 |     {% raw %}
153 |     
154 | <div class="cell border-box-sizing code_cell rendered">
155 | 
156 | <div class="output_wrapper">
157 | <div class="output">
158 | 
159 | <div class="output_area">
160 | 
161 | 
162 | <div class="output_markdown rendered_html output_subarea ">
163 | <h4 id="scorelines_to_outcomes" class="doc_header"><code>scorelines_to_outcomes</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/models.py#L62" class="source_link" style="float:right">[source]</a></h4><blockquote><p><code>scorelines_to_outcomes</code>(<strong><code>scorelines</code></strong>)</p>
164 | </blockquote>
165 | 
166 | </div>
167 | 
168 | </div>
169 | 
170 | </div>
171 | </div>
172 | 
173 | </div>
174 |     {% endraw %}
175 | 
176 |     {% raw %}
177 |     
178 | <div class="cell border-box-sizing code_cell rendered">
179 | 
180 | </div>
181 |     {% endraw %}
182 | 
183 |     {% raw %}
184 |     
185 | <div class="cell border-box-sizing code_cell rendered">
186 | 
187 | <div class="output_wrapper">
188 | <div class="output">
189 | 
190 | <div class="output_area">
191 | 
192 | 
193 | <div class="output_markdown rendered_html output_subarea ">
194 | <h2 id="DixonColes" class="doc_header"><code>class</code> <code>DixonColes</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/models.py#L80" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>DixonColes</code>(<strong><code>adapter</code></strong>, <strong><code>blocks</code></strong>=<em><code>[BaseRate(), HomeAdvantage(), TeamStrength()]</code></em>, <strong><code>weight</code></strong>=<em><code>UniformWeight()</code></em>, <strong><code>params</code></strong>=<em><code>None</code></em>)</p>
195 | </blockquote>
196 | <p>Dixon-Coles models in Python</p>
197 | 
198 | </div>
199 | 
200 | </div>
201 | 
202 | </div>
203 | </div>
204 | 
205 | </div>
206 |     {% endraw %}
207 | 
208 |     {% raw %}
209 |     
210 | <div class="cell border-box-sizing code_cell rendered">
211 | 
212 | </div>
213 |     {% endraw %}
214 | 
215 | </div>
216 |  
217 | 
218 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Mezzala
  2 | > Models for estimating football (soccer) team-strength
  3 | 
  4 | 
  5 | ## Install
  6 | 
  7 | `pip install mezzala`
  8 | 
  9 | ## How to use
 10 | 
 11 | ```python
 12 | import mezzala
 13 | ```
 14 | 
 15 | Fitting a Dixon-Coles team strength model:
 16 | 
 17 | First, we need to get some data
 18 | 
 19 | ```python
 20 | import itertools
 21 | import json
 22 | import urllib.request
 23 | 
 24 | 
 25 | # Use 2016/17 Premier League data from the openfootball repo
 26 | url = 'https://raw.githubusercontent.com/openfootball/football.json/master/2016-17/en.1.json'
 27 | 
 28 | 
 29 | response = urllib.request.urlopen(url)
 30 | data_raw = json.loads(response.read())
 31 | 
 32 | # Reshape the data to just get the matches
 33 | data = list(itertools.chain(*[d['matches'] for d in data_raw['rounds']]))
 34 | 
 35 | data[0:3]
 36 | ```
 37 | 
 38 | 
 39 | 
 40 | 
 41 |     [{'date': '2016-08-13',
 42 |       'team1': 'Hull City AFC',
 43 |       'team2': 'Leicester City FC',
 44 |       'score': {'ft': [2, 1]}},
 45 |      {'date': '2016-08-13',
 46 |       'team1': 'Everton FC',
 47 |       'team2': 'Tottenham Hotspur FC',
 48 |       'score': {'ft': [1, 1]}},
 49 |      {'date': '2016-08-13',
 50 |       'team1': 'Crystal Palace FC',
 51 |       'team2': 'West Bromwich Albion FC',
 52 |       'score': {'ft': [0, 1]}}]
 53 | 
 54 | 
 55 | 
 56 | ### Fitting a model
 57 | 
 58 | To fit a model with mezzala, you need to create an "adapter". Adapters are used to connect a model to a data source.
 59 | 
 60 | Because our data is a list of dicts, we are going to use a `KeyAdapter`.
 61 | 
 62 | ```python
 63 | adapter = mezzala.KeyAdapter(       # `KeyAdapter` = datum['...']
 64 |     home_team='team1',
 65 |     away_team='team2',
 66 |     home_goals=['score', 'ft', 0],  # Get nested fields with lists of fields
 67 |     away_goals=['score', 'ft', 1],  # i.e. datum['score']['ft'][1]
 68 | )
 69 | 
 70 | # You'll never need to call the methods on an 
 71 | # adapter directly, but just to show that it 
 72 | # works as expected:
 73 | adapter.home_team(data[0])
 74 | ```
 75 | 
 76 | 
 77 | 
 78 | 
 79 |     'Hull City AFC'
 80 | 
 81 | 
 82 | 
 83 | Once we have an adapter for our specific data source, we can fit the model:
 84 | 
 85 | ```python
 86 | model = mezzala.DixonColes(adapter=adapter)
 87 | model.fit(data)
 88 | ```
 89 | 
 90 | 
 91 | 
 92 | 
 93 |     DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=UniformWeight()
 94 | 
 95 | 
 96 | 
 97 | ### Making predictions
 98 | 
 99 | By default, you only need to supply the home and away team to get predictions. This should be supplied in the same format as the training data.
100 | 
101 | `DixonColes` has two methods for making predictions:
102 | 
103 | * `predict_one` - for predicting a single match
104 | * `predict` - for predicting multiple matches
105 | 
106 | ```python
107 | match_to_predict = {
108 |     'team1': 'Manchester City FC',
109 |     'team2': 'Swansea City FC',
110 | }
111 | 
112 | scorelines = model.predict_one(match_to_predict)
113 | 
114 | scorelines[0:5]
115 | ```
116 | 
117 | 
118 | 
119 | 
120 |     [ScorelinePrediction(home_goals=0, away_goals=0, probability=0.023625049697587167),
121 |      ScorelinePrediction(home_goals=0, away_goals=1, probability=0.012682094432376022),
122 |      ScorelinePrediction(home_goals=0, away_goals=2, probability=0.00623268833779594),
123 |      ScorelinePrediction(home_goals=0, away_goals=3, probability=0.0016251514235046444),
124 |      ScorelinePrediction(home_goals=0, away_goals=4, probability=0.00031781436109636405)]
125 | 
126 | 
127 | 
128 | Each of these methods return predictions in the form of `ScorelinePredictions`. 
129 | 
130 | * `predict_one` returns a list of `ScorelinePredictions`
131 | * `predict` returns a list of `ScorelinePredictions` for each predicted match (i.e. a list of lists)
132 | 
133 | However, it can sometimes be more useful to have predictions in the form of match _outcomes_. Mezzala exposes the `scorelines_to_outcomes` function for this purpose:
134 | 
135 | ```python
136 | mezzala.scorelines_to_outcomes(scorelines)
137 | ```
138 | 
139 | 
140 | 
141 | 
142 |     {Outcomes('Home win'): OutcomePrediction(outcome=Outcomes('Home win'), probability=0.8255103334702835),
143 |      Outcomes('Draw'): OutcomePrediction(outcome=Outcomes('Draw'), probability=0.11615659853961693),
144 |      Outcomes('Away win'): OutcomePrediction(outcome=Outcomes('Away win'), probability=0.058333067990098304)}
145 | 
146 | 
147 | 
148 | ### Extending the model
149 | 
150 | It's possible to fit more sophisticated models with mezzala, using **weights** and **model blocks**
151 | 
152 | #### Weights
153 | 
154 | You can weight individual data points by supplying a function (or callable) to the `weight` argument to `DixonColes`:
155 | 
156 | ```python
157 | mezzala.DixonColes(
158 |     adapter=adapter,
159 |     # By default, all data points are weighted equally,
160 |     # which is equivalent to:
161 |     weight=lambda x: 1
162 | )
163 | ```
164 | 
165 | 
166 | 
167 | 
168 |     DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=<function <lambda> at 0x123067488>
169 | 
170 | 
171 | 
172 | Mezzala also provides an `ExponentialWeight` for the purpose of time-discounting:
173 | 
174 | ```python
175 | mezzala.DixonColes(
176 |     adapter=adapter,
177 |     weight=mezzala.ExponentialWeight(
178 |         epsilon=-0.0065,               # Decay rate
179 |         key=lambda x: x['days_ago']
180 |     )
181 | )
182 | ```
183 | 
184 | 
185 | 
186 | 
187 |     DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=ExponentialWeight(epsilon=-0.0065, key=<function <lambda> at 0x122f938c8>)
188 | 
189 | 
190 | 
191 | #### Model blocks
192 | 
193 | Model "blocks" define the calculation and estimation of home and away goalscoring rates.
194 | 
195 | ```python
196 | mezzala.DixonColes(
197 |     adapter=adapter,
198 |     # By default, only team strength and home advantage,
199 |     # is estimated:
200 |     blocks=[
201 |         mezzala.blocks.HomeAdvantage(),
202 |         mezzala.blocks.TeamStrength(),
203 |         mezzala.blocks.BaseRate(),      # Adds "average goalscoring rate" as a distinct parameter
204 |     ]
205 | )
206 | ```
207 | 
208 | 
209 | 
210 | 
211 |     DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), HomeAdvantage(), BaseRate()]), weight=UniformWeight()
212 | 
213 | 
214 | 
215 | To add custom parameters (e.g. per-league home advantage), you need to add additional model blocks.
216 | 


--------------------------------------------------------------------------------
/docs/parameters.html:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | title: Parameters
  4 | 
  5 | 
  6 | keywords: fastai
  7 | sidebar: home_sidebar
  8 | 
  9 | 
 10 | 
 11 | nb_path: "nbs/parameters.ipynb"
 12 | ---
 13 | <!--
 14 | 
 15 | #################################################
 16 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 17 | #################################################
 18 | # file to edit: nbs/parameters.ipynb
 19 | # command to build the docs after a change: nbdev_build_docs
 20 | 
 21 | -->
 22 | 
 23 | <div class="container" id="notebook-container">
 24 |         
 25 |     {% raw %}
 26 |     
 27 | <div class="cell border-box-sizing code_cell rendered">
 28 | 
 29 | </div>
 30 |     {% endraw %}
 31 | 
 32 |     {% raw %}
 33 |     
 34 | <div class="cell border-box-sizing code_cell rendered">
 35 | <div class="input">
 36 | 
 37 | <div class="inner_cell">
 38 |     <div class="input_area">
 39 | <div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">pprint</span>
 40 | </pre></div>
 41 | 
 42 |     </div>
 43 | </div>
 44 | </div>
 45 | 
 46 | </div>
 47 |     {% endraw %}
 48 | 
 49 |     {% raw %}
 50 |     
 51 | <div class="cell border-box-sizing code_cell rendered">
 52 | 
 53 | </div>
 54 |     {% endraw %}
 55 | 
 56 |     {% raw %}
 57 |     
 58 | <div class="cell border-box-sizing code_cell rendered">
 59 | 
 60 | <div class="output_wrapper">
 61 | <div class="output">
 62 | 
 63 | <div class="output_area">
 64 | 
 65 | 
 66 | <div class="output_markdown rendered_html output_subarea ">
 67 | <h2 id="ParameterKey" class="doc_header"><code>class</code> <code>ParameterKey</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/parameters.py#L14" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>ParameterKey</code>(<strong><code>label</code></strong>:<code>Hashable</code>)</p>
 68 | </blockquote>
 69 | <p>ParameterKey(label: Hashable)</p>
 70 | 
 71 | </div>
 72 | 
 73 | </div>
 74 | 
 75 | </div>
 76 | </div>
 77 | 
 78 | </div>
 79 |     {% endraw %}
 80 | 
 81 |     {% raw %}
 82 |     
 83 | <div class="cell border-box-sizing code_cell rendered">
 84 | 
 85 | </div>
 86 |     {% endraw %}
 87 | 
 88 |     {% raw %}
 89 |     
 90 | <div class="cell border-box-sizing code_cell rendered">
 91 | 
 92 | <div class="output_wrapper">
 93 | <div class="output">
 94 | 
 95 | <div class="output_area">
 96 | 
 97 | 
 98 | <div class="output_markdown rendered_html output_subarea ">
 99 | <h2 id="TeamParameterKey" class="doc_header"><code>class</code> <code>TeamParameterKey</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/parameters.py#L26" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>TeamParameterKey</code>(<strong><code>label</code></strong>:<code>Hashable</code>) :: <a href="/mezzala/parameters.html#ParameterKey"><code>ParameterKey</code></a></p>
100 | </blockquote>
101 | <p>Parent class of <a href="/mezzala/parameters.html#OffenceParameterKey"><code>OffenceParameterKey</code></a> and <a href="/mezzala/parameters.html#DefenceParameterKey"><code>DefenceParameterKey</code></a>.</p>
102 | <p>Can be used to extract team offence/defence parameters with <code>isinstance</code>.</p>
103 | 
104 | </div>
105 | 
106 | </div>
107 | 
108 | </div>
109 | </div>
110 | 
111 | </div>
112 |     {% endraw %}
113 | 
114 |     {% raw %}
115 |     
116 | <div class="cell border-box-sizing code_cell rendered">
117 | 
118 | <div class="output_wrapper">
119 | <div class="output">
120 | 
121 | <div class="output_area">
122 | 
123 | 
124 | <div class="output_markdown rendered_html output_subarea ">
125 | <h2 id="OffenceParameterKey" class="doc_header"><code>class</code> <code>OffenceParameterKey</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/parameters.py#L35" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>OffenceParameterKey</code>(<strong><code>label</code></strong>:<code>Hashable</code>) :: <a href="/mezzala/parameters.html#TeamParameterKey"><code>TeamParameterKey</code></a></p>
126 | </blockquote>
127 | <p>Team offence parameter key</p>
128 | 
129 | </div>
130 | 
131 | </div>
132 | 
133 | </div>
134 | </div>
135 | 
136 | </div>
137 |     {% endraw %}
138 | 
139 |     {% raw %}
140 |     
141 | <div class="cell border-box-sizing code_cell rendered">
142 | 
143 | <div class="output_wrapper">
144 | <div class="output">
145 | 
146 | <div class="output_area">
147 | 
148 | 
149 | <div class="output_markdown rendered_html output_subarea ">
150 | <h2 id="DefenceParameterKey" class="doc_header"><code>class</code> <code>DefenceParameterKey</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/parameters.py#L42" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>DefenceParameterKey</code>(<strong><code>label</code></strong>:<code>Hashable</code>) :: <a href="/mezzala/parameters.html#TeamParameterKey"><code>TeamParameterKey</code></a></p>
151 | </blockquote>
152 | <p>Team Defence parameter key</p>
153 | 
154 | </div>
155 | 
156 | </div>
157 | 
158 | </div>
159 | </div>
160 | 
161 | </div>
162 |     {% endraw %}
163 | 
164 |     {% raw %}
165 |     
166 | <div class="cell border-box-sizing code_cell rendered">
167 | 
168 | </div>
169 |     {% endraw %}
170 | 
171 |     {% raw %}
172 |     
173 | <div class="cell border-box-sizing code_cell rendered">
174 | <div class="input">
175 | 
176 | <div class="inner_cell">
177 |     <div class="input_area">
178 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">parameters</span> <span class="o">=</span> <span class="p">[</span>
179 |     <span class="n">RHO_KEY</span><span class="p">,</span>
180 |     <span class="n">HFA_KEY</span><span class="p">,</span>
181 |     <span class="n">AVG_KEY</span><span class="p">,</span>
182 |     <span class="n">OffenceParameterKey</span><span class="p">(</span><span class="s1">&#39;Arsenal&#39;</span><span class="p">),</span>
183 |     <span class="n">OffenceParameterKey</span><span class="p">(</span><span class="s1">&#39;Chelsea&#39;</span><span class="p">),</span>
184 |     <span class="n">DefenceParameterKey</span><span class="p">(</span><span class="s1">&#39;Arsenal&#39;</span><span class="p">),</span>
185 |     <span class="n">DefenceParameterKey</span><span class="p">(</span><span class="s1">&#39;Chelsea&#39;</span><span class="p">),</span>
186 | <span class="p">]</span>
187 | 
188 | <span class="n">pprint</span><span class="o">.</span><span class="n">pprint</span><span class="p">([</span><span class="n">p</span> <span class="k">for</span> <span class="n">p</span> <span class="ow">in</span> <span class="n">parameters</span> <span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">p</span><span class="p">,</span> <span class="n">TeamParameterKey</span><span class="p">)])</span>
189 | </pre></div>
190 | 
191 |     </div>
192 | </div>
193 | </div>
194 | 
195 | <div class="output_wrapper">
196 | <div class="output">
197 | 
198 | <div class="output_area">
199 | 
200 | <div class="output_subarea output_stream output_stdout output_text">
201 | <pre>[OffenceParameterKey(label=&#39;Arsenal&#39;),
202 |  OffenceParameterKey(label=&#39;Chelsea&#39;),
203 |  DefenceParameterKey(label=&#39;Arsenal&#39;),
204 |  DefenceParameterKey(label=&#39;Chelsea&#39;)]
205 | </pre>
206 | </div>
207 | </div>
208 | 
209 | </div>
210 | </div>
211 | 
212 | </div>
213 |     {% endraw %}
214 | 
215 | </div>
216 |  
217 | 
218 | 


--------------------------------------------------------------------------------
/nbs/blocks.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "87d13599",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# default_exp blocks"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "id": "ec3f14c3",
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "#export\n",
 21 |     "import abc\n",
 22 |     "\n",
 23 |     "import numpy as np\n",
 24 |     "\n",
 25 |     "import mezzala.parameters"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "markdown",
 30 |    "id": "8240d697",
 31 |    "metadata": {},
 32 |    "source": [
 33 |     "# Model blocks"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "id": "0ffc35b3",
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "#exporti\n",
 44 |     "\n",
 45 |     "\n",
 46 |     "class ModelBlockABC(abc.ABC):\n",
 47 |     "    \"\"\"\n",
 48 |     "    Base class for model blocks\n",
 49 |     "    \"\"\"\n",
 50 |     "    PRIORITY = 0\n",
 51 |     "    \n",
 52 |     "    def param_keys(self, adapter, data):\n",
 53 |     "        return []\n",
 54 |     "\n",
 55 |     "    def constraints(self, adapter, data):\n",
 56 |     "        return []\n",
 57 |     "    \n",
 58 |     "    def home_terms(self, adapter, data):\n",
 59 |     "        return []\n",
 60 |     "    \n",
 61 |     "    def away_terms(self, adapter, data):\n",
 62 |     "        return []"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "id": "dd79c4c0",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "#export\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "class BaseRate(ModelBlockABC):\n",
 76 |     "    \"\"\"\n",
 77 |     "    Estimate average goalscoring rate as a separate parameter.\n",
 78 |     "    \n",
 79 |     "    This can be useful, since it results in both team offence and\n",
 80 |     "    team defence parameters being centered around 1.0\n",
 81 |     "    \"\"\"\n",
 82 |     "    \n",
 83 |     "    def __init__(self):\n",
 84 |     "        pass\n",
 85 |     "    \n",
 86 |     "    def __repr__(self):\n",
 87 |     "        return 'BaseRate()'\n",
 88 |     "    \n",
 89 |     "    def param_keys(self, adapter, data):\n",
 90 |     "        return [mezzala.parameters.AVG_KEY]\n",
 91 |     "    \n",
 92 |     "    def home_terms(self, adapter, row):\n",
 93 |     "        return [\n",
 94 |     "            (mezzala.parameters.AVG_KEY, 1.0)\n",
 95 |     "        ]\n",
 96 |     "    \n",
 97 |     "    def away_terms(self, adapter, row):\n",
 98 |     "        return [\n",
 99 |     "            (mezzala.parameters.AVG_KEY, 1.0)\n",
100 |     "        ]"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "code",
105 |    "execution_count": null,
106 |    "id": "2b40048d",
107 |    "metadata": {},
108 |    "outputs": [],
109 |    "source": [
110 |     "#export\n",
111 |     "\n",
112 |     "\n",
113 |     "class HomeAdvantage(ModelBlockABC):\n",
114 |     "    \"\"\"\n",
115 |     "    Estimate home advantage.\n",
116 |     "    \n",
117 |     "    Assumes constant home advantage is present in every match in the\n",
118 |     "    dataset\n",
119 |     "    \"\"\"\n",
120 |     "    \n",
121 |     "    def __init__(self):\n",
122 |     "        # TODO: allow HFA on/off depending on the data?\n",
123 |     "        pass\n",
124 |     "    \n",
125 |     "    def __repr__(self):\n",
126 |     "        return 'HomeAdvantage()'\n",
127 |     "    \n",
128 |     "    def param_keys(self, adapter, data):\n",
129 |     "        return [mezzala.parameters.HFA_KEY]\n",
130 |     "    \n",
131 |     "    def home_terms(self, adapter, row):\n",
132 |     "        return [\n",
133 |     "            (mezzala.parameters.HFA_KEY, 1.0)\n",
134 |     "        ]"
135 |    ]
136 |   },
137 |   {
138 |    "cell_type": "code",
139 |    "execution_count": null,
140 |    "id": "3b4c3987",
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "#export\n",
145 |     "\n",
146 |     "\n",
147 |     "class TeamStrength(ModelBlockABC):\n",
148 |     "    \"\"\"\n",
149 |     "    Estimate team offence and team defence parameters.\n",
150 |     "    \"\"\"\n",
151 |     "    \n",
152 |     "    # This is a gross hack so that we know that the \n",
153 |     "    # team strength parameters come first, and thus can\n",
154 |     "    # do the constraints (which are positionally indexed)\n",
155 |     "    PRIORITY = 1\n",
156 |     "    \n",
157 |     "    def __init__(self):\n",
158 |     "        pass\n",
159 |     "    \n",
160 |     "    def __repr__(self):\n",
161 |     "        return 'TeamStrength()'\n",
162 |     "    \n",
163 |     "    def _teams(self, adapter, data):\n",
164 |     "        return set(adapter.home_team(r) for r in data) | set(adapter.away_team(r) for r in data)\n",
165 |     "    \n",
166 |     "    def offence_key(self, label):\n",
167 |     "        return mezzala.parameters.OffenceParameterKey(label)\n",
168 |     "    \n",
169 |     "    def defence_key(self, label):\n",
170 |     "        return mezzala.parameters.DefenceParameterKey(label)\n",
171 |     "    \n",
172 |     "    def param_keys(self, adapter, data):\n",
173 |     "        teams = self._teams(adapter, data)\n",
174 |     "\n",
175 |     "        offence = [self.offence_key(t) for t in teams]\n",
176 |     "        defence = [self.defence_key(t) for t in teams]\n",
177 |     "\n",
178 |     "        return offence + defence\n",
179 |     "    \n",
180 |     "    def constraints(self, adapter, data):\n",
181 |     "        n_teams = len(self._teams(adapter, data))\n",
182 |     "        return [\n",
183 |     "            # Force team offence parameters to average to 1\n",
184 |     "            {'fun': lambda x: 1 - np.mean(np.exp(x[0:n_teams])),\n",
185 |     "             'type': 'eq'},\n",
186 |     "        ]\n",
187 |     "    \n",
188 |     "    def home_terms(self, adapter, row):\n",
189 |     "        return [\n",
190 |     "            (self.offence_key(adapter.home_team(row)), 1.0),\n",
191 |     "            (self.defence_key(adapter.away_team(row)), 1.0),\n",
192 |     "        ]\n",
193 |     "    \n",
194 |     "    def away_terms(self, adapter, row):\n",
195 |     "        return [\n",
196 |     "            (self.offence_key(adapter.away_team(row)), 1.0),\n",
197 |     "            (self.defence_key(adapter.home_team(row)), 1.0),\n",
198 |     "        ]"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "id": "bcc5939f",
205 |    "metadata": {},
206 |    "outputs": [],
207 |    "source": [
208 |     "#export\n",
209 |     "\n",
210 |     "\n",
211 |     "class KeyBlock(ModelBlockABC):\n",
212 |     "    \"\"\"\n",
213 |     "    Generic model block for adding arbitrary model terms from the data\n",
214 |     "    to both home and away team\n",
215 |     "    \"\"\"\n",
216 |     "    def __init__(self, key):\n",
217 |     "        self.key = key\n",
218 |     "    \n",
219 |     "    def __repr__(self):\n",
220 |     "        return 'KeyBlock()'\n",
221 |     "    \n",
222 |     "    def param_keys(self, adapter, data):\n",
223 |     "        return list(set(self.key(r) for r in data))\n",
224 |     "    \n",
225 |     "    def home_terms(self, adapter, row):\n",
226 |     "        return [self.key(row)]\n",
227 |     "    \n",
228 |     "    def away_terms(self, adapter, row):\n",
229 |     "        return [self.key(row)]"
230 |    ]
231 |   },
232 |   {
233 |    "cell_type": "code",
234 |    "execution_count": null,
235 |    "id": "57c18565",
236 |    "metadata": {},
237 |    "outputs": [],
238 |    "source": [
239 |     "#export\n",
240 |     "\n",
241 |     "\n",
242 |     "class ConstantBlock(ModelBlockABC):\n",
243 |     "    \"\"\"\n",
244 |     "    A model block for adding specific model terms to the parameter keys.\n",
245 |     "    \n",
246 |     "    Can be useful in conjunction with `LumpedAdapter` to ensure that certain parameters\n",
247 |     "    are in the model (even if they aren't estimated)\n",
248 |     "    \"\"\"\n",
249 |     "    def __init__(self, *args):\n",
250 |     "        self.terms = args\n",
251 |     "    \n",
252 |     "    def __repr__(self):\n",
253 |     "        return 'ConstantBlock()'\n",
254 |     "    \n",
255 |     "    def param_keys(self, adapter, data):\n",
256 |     "        return list(self.terms)"
257 |    ]
258 |   }
259 |  ],
260 |  "metadata": {
261 |   "kernelspec": {
262 |    "display_name": "Python 3",
263 |    "language": "python",
264 |    "name": "python3"
265 |   }
266 |  },
267 |  "nbformat": 4,
268 |  "nbformat_minor": 5
269 | }
270 | 


--------------------------------------------------------------------------------
/docs/Gemfile.lock:
--------------------------------------------------------------------------------
  1 | GEM
  2 |   remote: https://rubygems.org/
  3 |   specs:
  4 |     activesupport (6.0.3.7)
  5 |       concurrent-ruby (~> 1.0, >= 1.0.2)
  6 |       i18n (>= 0.7, < 2)
  7 |       minitest (~> 5.1)
  8 |       tzinfo (~> 1.1)
  9 |       zeitwerk (~> 2.2, >= 2.2.2)
 10 |     addressable (2.7.0)
 11 |       public_suffix (>= 2.0.2, < 5.0)
 12 |     coffee-script (2.4.1)
 13 |       coffee-script-source
 14 |       execjs
 15 |     coffee-script-source (1.11.1)
 16 |     colorator (1.1.0)
 17 |     commonmarker (0.17.13)
 18 |       ruby-enum (~> 0.5)
 19 |     concurrent-ruby (1.1.8)
 20 |     dnsruby (1.61.5)
 21 |       simpleidn (~> 0.1)
 22 |     em-websocket (0.5.2)
 23 |       eventmachine (>= 0.12.9)
 24 |       http_parser.rb (~> 0.6.0)
 25 |     ethon (0.14.0)
 26 |       ffi (>= 1.15.0)
 27 |     eventmachine (1.2.7)
 28 |     execjs (2.8.1)
 29 |     faraday (1.4.1)
 30 |       faraday-excon (~> 1.1)
 31 |       faraday-net_http (~> 1.0)
 32 |       faraday-net_http_persistent (~> 1.1)
 33 |       multipart-post (>= 1.2, < 3)
 34 |       ruby2_keywords (>= 0.0.4)
 35 |     faraday-excon (1.1.0)
 36 |     faraday-net_http (1.0.1)
 37 |     faraday-net_http_persistent (1.1.0)
 38 |     ffi (1.15.0)
 39 |     forwardable-extended (2.6.0)
 40 |     gemoji (3.0.1)
 41 |     github-pages (214)
 42 |       github-pages-health-check (= 1.17.0)
 43 |       jekyll (= 3.9.0)
 44 |       jekyll-avatar (= 0.7.0)
 45 |       jekyll-coffeescript (= 1.1.1)
 46 |       jekyll-commonmark-ghpages (= 0.1.6)
 47 |       jekyll-default-layout (= 0.1.4)
 48 |       jekyll-feed (= 0.15.1)
 49 |       jekyll-gist (= 1.5.0)
 50 |       jekyll-github-metadata (= 2.13.0)
 51 |       jekyll-mentions (= 1.6.0)
 52 |       jekyll-optional-front-matter (= 0.3.2)
 53 |       jekyll-paginate (= 1.1.0)
 54 |       jekyll-readme-index (= 0.3.0)
 55 |       jekyll-redirect-from (= 0.16.0)
 56 |       jekyll-relative-links (= 0.6.1)
 57 |       jekyll-remote-theme (= 0.4.3)
 58 |       jekyll-sass-converter (= 1.5.2)
 59 |       jekyll-seo-tag (= 2.7.1)
 60 |       jekyll-sitemap (= 1.4.0)
 61 |       jekyll-swiss (= 1.0.0)
 62 |       jekyll-theme-architect (= 0.1.1)
 63 |       jekyll-theme-cayman (= 0.1.1)
 64 |       jekyll-theme-dinky (= 0.1.1)
 65 |       jekyll-theme-hacker (= 0.1.2)
 66 |       jekyll-theme-leap-day (= 0.1.1)
 67 |       jekyll-theme-merlot (= 0.1.1)
 68 |       jekyll-theme-midnight (= 0.1.1)
 69 |       jekyll-theme-minimal (= 0.1.1)
 70 |       jekyll-theme-modernist (= 0.1.1)
 71 |       jekyll-theme-primer (= 0.5.4)
 72 |       jekyll-theme-slate (= 0.1.1)
 73 |       jekyll-theme-tactile (= 0.1.1)
 74 |       jekyll-theme-time-machine (= 0.1.1)
 75 |       jekyll-titles-from-headings (= 0.5.3)
 76 |       jemoji (= 0.12.0)
 77 |       kramdown (= 2.3.1)
 78 |       kramdown-parser-gfm (= 1.1.0)
 79 |       liquid (= 4.0.3)
 80 |       mercenary (~> 0.3)
 81 |       minima (= 2.5.1)
 82 |       nokogiri (>= 1.10.4, < 2.0)
 83 |       rouge (= 3.26.0)
 84 |       terminal-table (~> 1.4)
 85 |     github-pages-health-check (1.17.0)
 86 |       addressable (~> 2.3)
 87 |       dnsruby (~> 1.60)
 88 |       octokit (~> 4.0)
 89 |       public_suffix (>= 2.0.2, < 5.0)
 90 |       typhoeus (~> 1.3)
 91 |     html-pipeline (2.14.0)
 92 |       activesupport (>= 2)
 93 |       nokogiri (>= 1.4)
 94 |     http_parser.rb (0.6.0)
 95 |     i18n (0.9.5)
 96 |       concurrent-ruby (~> 1.0)
 97 |     jekyll (3.9.0)
 98 |       addressable (~> 2.4)
 99 |       colorator (~> 1.0)
100 |       em-websocket (~> 0.5)
101 |       i18n (~> 0.7)
102 |       jekyll-sass-converter (~> 1.0)
103 |       jekyll-watch (~> 2.0)
104 |       kramdown (>= 1.17, < 3)
105 |       liquid (~> 4.0)
106 |       mercenary (~> 0.3.3)
107 |       pathutil (~> 0.9)
108 |       rouge (>= 1.7, < 4)
109 |       safe_yaml (~> 1.0)
110 |     jekyll-avatar (0.7.0)
111 |       jekyll (>= 3.0, < 5.0)
112 |     jekyll-coffeescript (1.1.1)
113 |       coffee-script (~> 2.2)
114 |       coffee-script-source (~> 1.11.1)
115 |     jekyll-commonmark (1.3.1)
116 |       commonmarker (~> 0.14)
117 |       jekyll (>= 3.7, < 5.0)
118 |     jekyll-commonmark-ghpages (0.1.6)
119 |       commonmarker (~> 0.17.6)
120 |       jekyll-commonmark (~> 1.2)
121 |       rouge (>= 2.0, < 4.0)
122 |     jekyll-default-layout (0.1.4)
123 |       jekyll (~> 3.0)
124 |     jekyll-feed (0.15.1)
125 |       jekyll (>= 3.7, < 5.0)
126 |     jekyll-gist (1.5.0)
127 |       octokit (~> 4.2)
128 |     jekyll-github-metadata (2.13.0)
129 |       jekyll (>= 3.4, < 5.0)
130 |       octokit (~> 4.0, != 4.4.0)
131 |     jekyll-mentions (1.6.0)
132 |       html-pipeline (~> 2.3)
133 |       jekyll (>= 3.7, < 5.0)
134 |     jekyll-optional-front-matter (0.3.2)
135 |       jekyll (>= 3.0, < 5.0)
136 |     jekyll-paginate (1.1.0)
137 |     jekyll-readme-index (0.3.0)
138 |       jekyll (>= 3.0, < 5.0)
139 |     jekyll-redirect-from (0.16.0)
140 |       jekyll (>= 3.3, < 5.0)
141 |     jekyll-relative-links (0.6.1)
142 |       jekyll (>= 3.3, < 5.0)
143 |     jekyll-remote-theme (0.4.3)
144 |       addressable (~> 2.0)
145 |       jekyll (>= 3.5, < 5.0)
146 |       jekyll-sass-converter (>= 1.0, <= 3.0.0, != 2.0.0)
147 |       rubyzip (>= 1.3.0, < 3.0)
148 |     jekyll-sass-converter (1.5.2)
149 |       sass (~> 3.4)
150 |     jekyll-seo-tag (2.7.1)
151 |       jekyll (>= 3.8, < 5.0)
152 |     jekyll-sitemap (1.4.0)
153 |       jekyll (>= 3.7, < 5.0)
154 |     jekyll-swiss (1.0.0)
155 |     jekyll-theme-architect (0.1.1)
156 |       jekyll (~> 3.5)
157 |       jekyll-seo-tag (~> 2.0)
158 |     jekyll-theme-cayman (0.1.1)
159 |       jekyll (~> 3.5)
160 |       jekyll-seo-tag (~> 2.0)
161 |     jekyll-theme-dinky (0.1.1)
162 |       jekyll (~> 3.5)
163 |       jekyll-seo-tag (~> 2.0)
164 |     jekyll-theme-hacker (0.1.2)
165 |       jekyll (> 3.5, < 5.0)
166 |       jekyll-seo-tag (~> 2.0)
167 |     jekyll-theme-leap-day (0.1.1)
168 |       jekyll (~> 3.5)
169 |       jekyll-seo-tag (~> 2.0)
170 |     jekyll-theme-merlot (0.1.1)
171 |       jekyll (~> 3.5)
172 |       jekyll-seo-tag (~> 2.0)
173 |     jekyll-theme-midnight (0.1.1)
174 |       jekyll (~> 3.5)
175 |       jekyll-seo-tag (~> 2.0)
176 |     jekyll-theme-minimal (0.1.1)
177 |       jekyll (~> 3.5)
178 |       jekyll-seo-tag (~> 2.0)
179 |     jekyll-theme-modernist (0.1.1)
180 |       jekyll (~> 3.5)
181 |       jekyll-seo-tag (~> 2.0)
182 |     jekyll-theme-primer (0.5.4)
183 |       jekyll (> 3.5, < 5.0)
184 |       jekyll-github-metadata (~> 2.9)
185 |       jekyll-seo-tag (~> 2.0)
186 |     jekyll-theme-slate (0.1.1)
187 |       jekyll (~> 3.5)
188 |       jekyll-seo-tag (~> 2.0)
189 |     jekyll-theme-tactile (0.1.1)
190 |       jekyll (~> 3.5)
191 |       jekyll-seo-tag (~> 2.0)
192 |     jekyll-theme-time-machine (0.1.1)
193 |       jekyll (~> 3.5)
194 |       jekyll-seo-tag (~> 2.0)
195 |     jekyll-titles-from-headings (0.5.3)
196 |       jekyll (>= 3.3, < 5.0)
197 |     jekyll-watch (2.2.1)
198 |       listen (~> 3.0)
199 |     jemoji (0.12.0)
200 |       gemoji (~> 3.0)
201 |       html-pipeline (~> 2.2)
202 |       jekyll (>= 3.0, < 5.0)
203 |     kramdown (2.3.1)
204 |       rexml
205 |     kramdown-parser-gfm (1.1.0)
206 |       kramdown (~> 2.0)
207 |     liquid (4.0.3)
208 |     listen (3.5.1)
209 |       rb-fsevent (~> 0.10, >= 0.10.3)
210 |       rb-inotify (~> 0.9, >= 0.9.10)
211 |     mercenary (0.3.6)
212 |     mini_portile2 (2.5.1)
213 |     minima (2.5.1)
214 |       jekyll (>= 3.5, < 5.0)
215 |       jekyll-feed (~> 0.9)
216 |       jekyll-seo-tag (~> 2.1)
217 |     minitest (5.14.4)
218 |     multipart-post (2.1.1)
219 |     nokogiri (1.11.0)
220 |       mini_portile2 (~> 2.5.0)
221 |       racc (~> 1.4)
222 |     octokit (4.21.0)
223 |       faraday (>= 0.9)
224 |       sawyer (~> 0.8.0, >= 0.5.3)
225 |     pathutil (0.16.2)
226 |       forwardable-extended (~> 2.6)
227 |     public_suffix (4.0.6)
228 |     racc (1.5.2)
229 |     rb-fsevent (0.11.0)
230 |     rb-inotify (0.10.1)
231 |       ffi (~> 1.0)
232 |     rexml (3.2.5)
233 |     rouge (3.26.0)
234 |     ruby-enum (0.9.0)
235 |       i18n
236 |     ruby2_keywords (0.0.4)
237 |     rubyzip (2.3.0)
238 |     safe_yaml (1.0.5)
239 |     sass (3.7.4)
240 |       sass-listen (~> 4.0.0)
241 |     sass-listen (4.0.0)
242 |       rb-fsevent (~> 0.9, >= 0.9.4)
243 |       rb-inotify (~> 0.9, >= 0.9.7)
244 |     sawyer (0.8.2)
245 |       addressable (>= 2.3.5)
246 |       faraday (> 0.8, < 2.0)
247 |     simpleidn (0.2.1)
248 |       unf (~> 0.1.4)
249 |     terminal-table (1.8.0)
250 |       unicode-display_width (~> 1.1, >= 1.1.1)
251 |     thread_safe (0.3.6)
252 |     typhoeus (1.4.0)
253 |       ethon (>= 0.9.0)
254 |     tzinfo (1.2.9)
255 |       thread_safe (~> 0.1)
256 |     unf (0.1.4)
257 |       unf_ext
258 |     unf_ext (0.0.7.7)
259 |     unicode-display_width (1.7.0)
260 |     webrick (1.7.0)
261 |     zeitwerk (2.4.2)
262 | 
263 | PLATFORMS
264 |   ruby
265 | 
266 | DEPENDENCIES
267 |   github-pages
268 |   jekyll (>= 3.7)
269 |   jekyll-remote-theme
270 |   kramdown (>= 2.3.1)
271 |   nokogiri (< 1.11.1)
272 |   webrick (~> 1.7)
273 | 
274 | BUNDLED WITH
275 |    2.2.17
276 | 


--------------------------------------------------------------------------------
/mezzala/models.py:
--------------------------------------------------------------------------------
  1 | # AUTOGENERATED! DO NOT EDIT! File to edit: nbs/models.ipynb (unless otherwise specified).
  2 | 
  3 | __all__ = ['ScorelinePrediction', 'Outcomes', 'OutcomePrediction', 'scoreline_to_outcome', 'scorelines_to_outcomes',
  4 |            'DixonColes']
  5 | 
  6 | # Cell
  7 | import collections
  8 | import dataclasses
  9 | import enum
 10 | import functools
 11 | import itertools
 12 | import typing
 13 | import warnings
 14 | 
 15 | import numpy as np
 16 | import scipy.stats
 17 | import scipy.optimize
 18 | 
 19 | import mezzala.blocks
 20 | import mezzala.weights
 21 | import mezzala.parameters
 22 | 
 23 | # Cell
 24 | 
 25 | 
 26 | @dataclasses.dataclass(frozen=True)
 27 | class ScorelinePrediction:
 28 |     home_goals: int
 29 |     away_goals: int
 30 |     probability: float
 31 | 
 32 | 
 33 | # Cell
 34 | 
 35 | 
 36 | class Outcomes(enum.Enum):
 37 |     HOME_WIN = 'Home win'
 38 |     DRAW = 'Draw'
 39 |     AWAY_WIN = 'Away win'
 40 | 
 41 |     def __repr__(self):
 42 |         return f"Outcomes('{self.value}')"
 43 | 
 44 | 
 45 | @dataclasses.dataclass(frozen=True)
 46 | class OutcomePrediction:
 47 |     outcome: Outcomes
 48 |     probability: float
 49 | 
 50 | # Cell
 51 | 
 52 | 
 53 | def scoreline_to_outcome(home_goals, away_goals):
 54 |     if home_goals > away_goals:
 55 |         return Outcomes.HOME_WIN
 56 |     if home_goals == away_goals:
 57 |         return Outcomes.DRAW
 58 |     if home_goals < away_goals:
 59 |         return Outcomes.AWAY_WIN
 60 | 
 61 | 
 62 | def scorelines_to_outcomes(scorelines):
 63 |     return {
 64 |         outcome: OutcomePrediction(
 65 |             outcome,
 66 |             sum(s.probability for s in scorelines if scoreline_to_outcome(s.home_goals, s.away_goals) == outcome)
 67 |         )
 68 |         for outcome in Outcomes
 69 |     }
 70 | 
 71 | # Cell
 72 | 
 73 | _DEFAULT_BLOCKS = [
 74 |     mezzala.blocks.BaseRate(),
 75 |     mezzala.blocks.HomeAdvantage(),
 76 |     mezzala.blocks.TeamStrength(),
 77 | ]
 78 | 
 79 | 
 80 | class DixonColes:
 81 |     """
 82 |     Dixon-Coles models in Python
 83 |     """
 84 | 
 85 |     def __init__(self, adapter, blocks=_DEFAULT_BLOCKS, weight=mezzala.weights.UniformWeight(), params=None):
 86 |         # NOTE: Should params be stored internally as separate lists of keys and values?
 87 |         # Then `params` (the dict) can be a property?
 88 |         self.params = params
 89 |         self.adapter = adapter
 90 |         self.weight = weight
 91 |         self._blocks = blocks
 92 | 
 93 |     def __repr__(self):
 94 |         return f'DixonColes(adapter={repr(self.adapter)}, blocks={repr([b for b in self.blocks])}), weight={repr(self.weight)}'
 95 | 
 96 |     @property
 97 |     def blocks(self):
 98 |         # Make sure blocks are always in the correct order
 99 |         return sorted(self._blocks, key=lambda x: -x.PRIORITY)
100 | 
101 |     def home_goals(self, row):
102 |         """ Returns home goals scored """
103 |         return self.adapter.home_goals(row)
104 | 
105 |     def away_goals(self, row):
106 |         """ Returns away goals scored """
107 |         return self.adapter.away_goals(row)
108 | 
109 |     def parse_params(self, data):
110 |         """ Returns a tuple of (parameter_names, [constraints]) """
111 |         base_params = [mezzala.parameters.RHO_KEY]
112 |         block_params = list(itertools.chain(*[b.param_keys(self.adapter, data) for b in self.blocks]))
113 |         return (
114 |             block_params + base_params,
115 |             list(itertools.chain(*[b.constraints(self.adapter, data) for b in self.blocks]))
116 |         )
117 | 
118 |     def _home_terms(self, row):
119 |         return dict(itertools.chain(*[b.home_terms(self.adapter, row) for b in self.blocks]))
120 | 
121 |     def _away_terms(self, row):
122 |         return dict(itertools.chain(*[b.away_terms(self.adapter, row) for b in self.blocks]))
123 | 
124 |     # Core methods
125 | 
126 |     @staticmethod
127 |     def _assign_params(param_keys, param_values):
128 |         return dict(zip(param_keys, param_values))
129 | 
130 |     def _create_feature_matrices(self, param_keys, data):
131 |         """ Create X (feature) matrices for home and away poisson rates """
132 |         home_X = np.empty([len(data), len(param_keys)])
133 |         away_X = np.empty([len(data), len(param_keys)])
134 |         for row_i, row in enumerate(data):
135 |             home_rate_terms = self._home_terms(row)
136 |             away_rate_terms = self._away_terms(row)
137 |             for param_i, param_key in enumerate(param_keys):
138 |                 home_X[row_i, param_i] = home_rate_terms.get(param_key, 0)
139 |                 away_X[row_i, param_i] = away_rate_terms.get(param_key, 0)
140 |         return home_X, away_X
141 | 
142 |     @staticmethod
143 |     def _tau(home_goals, away_goals, home_rate, away_rate, rho):
144 | 
145 |         tau = np.ones(len(home_goals))
146 |         tau = np.where((home_goals == 0) & (away_goals == 0), 1 - home_rate*away_rate*rho, tau)
147 |         tau = np.where((home_goals == 0) & (away_goals == 1), 1 + home_rate*rho, tau)
148 |         tau = np.where((home_goals == 1) & (away_goals == 0), 1 + away_rate*rho, tau)
149 |         tau = np.where((home_goals == 1) & (away_goals == 1), 1 - rho, tau)
150 | 
151 |         return tau
152 | 
153 |     def _log_like(self, home_goals, away_goals, home_rate, away_rate, rho):
154 |         return (
155 |             scipy.stats.poisson.logpmf(home_goals, home_rate) +
156 |             scipy.stats.poisson.logpmf(away_goals, away_rate) +
157 |             np.log(self._tau(home_goals, away_goals, home_rate, away_rate, rho))
158 |         )
159 | 
160 |     def objective_fn(self, data, home_goals, away_goals, weights, home_X, away_X, rho_ix, xs):
161 |         rho = xs[rho_ix]
162 | 
163 |         # Parameters are estimated in log-space, but `scipy.stats.poisson`
164 |         # expects real number inputs, so we have to use `np.exp`
165 |         home_rate = np.exp(np.dot(home_X, xs))
166 |         away_rate = np.exp(np.dot(away_X, xs))
167 | 
168 |         log_like = self._log_like(home_goals, away_goals, home_rate, away_rate, rho)
169 |         pseudo_log_like = log_like * weights
170 |         return -np.sum(pseudo_log_like)
171 | 
172 |     def fit(self, data, **kwargs):
173 |         param_keys, constraints = self.parse_params(data)
174 | 
175 |         init_params = (
176 |             # Attempt to initialise parameters from any already-existing parameters
177 |             # This substantially speeds up fitting during (e.g.) backtesting
178 |             np.asarray([self.params.get(p, 0) for p in param_keys])
179 |             # If the model has no parameters, just initialise with 0s
180 |             if self.params
181 |             else np.zeros(len(param_keys))
182 |         )
183 | 
184 |         # Precalculate the things we can (for speed)
185 | 
186 |         # Create X (feature) matrices for home and away poisson rates
187 |         home_X, away_X = self._create_feature_matrices(param_keys, data)
188 | 
189 |         # Get home goals, away goals, and weights from the data
190 |         home_goals, away_goals = np.empty(len(data)), np.empty(len(data))
191 |         weights = np.empty(len(data))
192 |         for i, row in enumerate(data):
193 |             home_goals[i] = self.home_goals(row)
194 |             away_goals[i] = self.away_goals(row)
195 |             weights[i] = self.weight(row)
196 | 
197 |         # Get the index of the Rho correlation parameter
198 |         rho_ix = param_keys.index(mezzala.parameters.RHO_KEY)
199 | 
200 |         # Optimise!
201 |         with warnings.catch_warnings():
202 |             # This is a hack
203 |             # Because we haven't properly constrained `rho`, it's possible for 0 or even negative
204 |             # values of `tau` (and therefore invalid probabilities)
205 |             # Ignoring the warnings has little practical impact, since the model
206 |             # will still find the objective function's minimum point regardless
207 |             warnings.simplefilter('ignore')
208 | 
209 |             estimate = scipy.optimize.minimize(
210 |                 lambda xs: self.objective_fn(data, home_goals, away_goals, weights, home_X, away_X, rho_ix, xs),
211 |                 x0=init_params,
212 |                 constraints=constraints,
213 |                 **kwargs
214 |             )
215 | 
216 |         # Parse the estimates into parameter map
217 |         self.params = self._assign_params(param_keys, estimate.x)
218 | 
219 |         return self
220 | 
221 |     def predict_one(self, row, up_to=26):
222 |         scorelines = list(itertools.product(range(up_to), repeat=2))
223 | 
224 |         home_goals = np.asarray([h for h, a in scorelines])
225 |         away_goals = np.asarray([a for h, a in scorelines])
226 | 
227 |         param_keys = self.params.keys()
228 |         param_values = np.asarray([v for v in self.params.values()])
229 | 
230 |         home_X, away_X = self._create_feature_matrices(param_keys, [row])
231 | 
232 |         home_rate = np.exp(np.dot(home_X, param_values))
233 |         away_rate = np.exp(np.dot(away_X, param_values))
234 |         rho = self.params[mezzala.parameters.RHO_KEY]
235 | 
236 |         probs = np.exp(self._log_like(home_goals, away_goals, home_rate, away_rate, rho))
237 | 
238 |         return [ScorelinePrediction(*vals) for vals in zip(home_goals.tolist(), away_goals.tolist(), probs)]
239 | 
240 |     def predict(self, data, up_to=26):
241 |         scorelines = [self.predict_one(row, up_to=up_to) for row in data]
242 |         return scorelines


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/nbs/index.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Mezzala\n",
  8 |     "\n",
  9 |     "> Models for estimating football (soccer) team-strength"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Install"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "`pip install mezzala`"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "markdown",
 28 |    "metadata": {},
 29 |    "source": [
 30 |     "## How to use"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import mezzala"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "Fitting a Dixon-Coles team strength model:"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "markdown",
 51 |    "metadata": {},
 52 |    "source": [
 53 |     "First, we need to get some data"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [
 61 |     {
 62 |      "data": {
 63 |       "text/plain": [
 64 |        "[{'date': '2016-08-13',\n",
 65 |        "  'team1': 'Hull City AFC',\n",
 66 |        "  'team2': 'Leicester City FC',\n",
 67 |        "  'score': {'ft': [2, 1]}},\n",
 68 |        " {'date': '2016-08-13',\n",
 69 |        "  'team1': 'Everton FC',\n",
 70 |        "  'team2': 'Tottenham Hotspur FC',\n",
 71 |        "  'score': {'ft': [1, 1]}},\n",
 72 |        " {'date': '2016-08-13',\n",
 73 |        "  'team1': 'Crystal Palace FC',\n",
 74 |        "  'team2': 'West Bromwich Albion FC',\n",
 75 |        "  'score': {'ft': [0, 1]}}]"
 76 |       ]
 77 |      },
 78 |      "execution_count": null,
 79 |      "metadata": {},
 80 |      "output_type": "execute_result"
 81 |     }
 82 |    ],
 83 |    "source": [
 84 |     "import itertools\n",
 85 |     "import json\n",
 86 |     "import urllib.request\n",
 87 |     "\n",
 88 |     "\n",
 89 |     "# Use 2016/17 Premier League data from the openfootball repo\n",
 90 |     "url = 'https://raw.githubusercontent.com/openfootball/football.json/master/2016-17/en.1.json'\n",
 91 |     "\n",
 92 |     "\n",
 93 |     "response = urllib.request.urlopen(url)\n",
 94 |     "data_raw = json.loads(response.read())\n",
 95 |     "\n",
 96 |     "# Reshape the data to just get the matches\n",
 97 |     "data = list(itertools.chain(*[d['matches'] for d in data_raw['rounds']]))\n",
 98 |     "\n",
 99 |     "data[0:3]"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "metadata": {},
105 |    "source": [
106 |     "### Fitting a model"
107 |    ]
108 |   },
109 |   {
110 |    "cell_type": "markdown",
111 |    "metadata": {},
112 |    "source": [
113 |     "To fit a model with mezzala, you need to create an \"adapter\". Adapters are used to connect a model to a data source.\n",
114 |     "\n",
115 |     "Because our data is a list of dicts, we are going to use a `KeyAdapter`."
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": null,
121 |    "metadata": {},
122 |    "outputs": [
123 |     {
124 |      "data": {
125 |       "text/plain": [
126 |        "'Hull City AFC'"
127 |       ]
128 |      },
129 |      "execution_count": null,
130 |      "metadata": {},
131 |      "output_type": "execute_result"
132 |     }
133 |    ],
134 |    "source": [
135 |     "adapter = mezzala.KeyAdapter(       # `KeyAdapter` = datum['...']\n",
136 |     "    home_team='team1',\n",
137 |     "    away_team='team2',\n",
138 |     "    home_goals=['score', 'ft', 0],  # Get nested fields with lists of fields\n",
139 |     "    away_goals=['score', 'ft', 1],  # i.e. datum['score']['ft'][1]\n",
140 |     ")\n",
141 |     "\n",
142 |     "# You'll never need to call the methods on an \n",
143 |     "# adapter directly, but just to show that it \n",
144 |     "# works as expected:\n",
145 |     "adapter.home_team(data[0])"
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "markdown",
150 |    "metadata": {},
151 |    "source": [
152 |     "Once we have an adapter for our specific data source, we can fit the model:"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": null,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=UniformWeight()"
164 |       ]
165 |      },
166 |      "execution_count": null,
167 |      "metadata": {},
168 |      "output_type": "execute_result"
169 |     }
170 |    ],
171 |    "source": [
172 |     "model = mezzala.DixonColes(adapter=adapter)\n",
173 |     "model.fit(data)"
174 |    ]
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "metadata": {},
179 |    "source": [
180 |     "### Making predictions"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "metadata": {},
186 |    "source": [
187 |     "By default, you only need to supply the home and away team to get predictions. This should be supplied in the same format as the training data.\n",
188 |     "\n",
189 |     "`DixonColes` has two methods for making predictions:\n",
190 |     "\n",
191 |     "* `predict_one` - for predicting a single match\n",
192 |     "* `predict` - for predicting multiple matches"
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [
200 |     {
201 |      "data": {
202 |       "text/plain": [
203 |        "[ScorelinePrediction(home_goals=0, away_goals=0, probability=0.023625049697587167),\n",
204 |        " ScorelinePrediction(home_goals=0, away_goals=1, probability=0.012682094432376022),\n",
205 |        " ScorelinePrediction(home_goals=0, away_goals=2, probability=0.00623268833779594),\n",
206 |        " ScorelinePrediction(home_goals=0, away_goals=3, probability=0.0016251514235046444),\n",
207 |        " ScorelinePrediction(home_goals=0, away_goals=4, probability=0.00031781436109636405)]"
208 |       ]
209 |      },
210 |      "execution_count": null,
211 |      "metadata": {},
212 |      "output_type": "execute_result"
213 |     }
214 |    ],
215 |    "source": [
216 |     "match_to_predict = {\n",
217 |     "    'team1': 'Manchester City FC',\n",
218 |     "    'team2': 'Swansea City FC',\n",
219 |     "}\n",
220 |     "\n",
221 |     "scorelines = model.predict_one(match_to_predict)\n",
222 |     "\n",
223 |     "scorelines[0:5]"
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "markdown",
228 |    "metadata": {},
229 |    "source": [
230 |     "Each of these methods return predictions in the form of `ScorelinePredictions`. \n",
231 |     "\n",
232 |     "* `predict_one` returns a list of `ScorelinePredictions`\n",
233 |     "* `predict` returns a list of `ScorelinePredictions` for each predicted match (i.e. a list of lists)\n",
234 |     "\n",
235 |     "However, it can sometimes be more useful to have predictions in the form of match _outcomes_. Mezzala exposes the `scorelines_to_outcomes` function for this purpose:"
236 |    ]
237 |   },
238 |   {
239 |    "cell_type": "code",
240 |    "execution_count": null,
241 |    "metadata": {},
242 |    "outputs": [
243 |     {
244 |      "data": {
245 |       "text/plain": [
246 |        "{Outcomes('Home win'): OutcomePrediction(outcome=Outcomes('Home win'), probability=0.8255103334702835),\n",
247 |        " Outcomes('Draw'): OutcomePrediction(outcome=Outcomes('Draw'), probability=0.11615659853961693),\n",
248 |        " Outcomes('Away win'): OutcomePrediction(outcome=Outcomes('Away win'), probability=0.058333067990098304)}"
249 |       ]
250 |      },
251 |      "execution_count": null,
252 |      "metadata": {},
253 |      "output_type": "execute_result"
254 |     }
255 |    ],
256 |    "source": [
257 |     "mezzala.scorelines_to_outcomes(scorelines)"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "markdown",
262 |    "metadata": {},
263 |    "source": [
264 |     "### Extending the model\n",
265 |     "\n",
266 |     "It's possible to fit more sophisticated models with mezzala, using **weights** and **model blocks**\n",
267 |     "\n",
268 |     "#### Weights\n",
269 |     "\n",
270 |     "You can weight individual data points by supplying a function (or callable) to the `weight` argument to `DixonColes`:"
271 |    ]
272 |   },
273 |   {
274 |    "cell_type": "code",
275 |    "execution_count": null,
276 |    "metadata": {},
277 |    "outputs": [
278 |     {
279 |      "data": {
280 |       "text/plain": [
281 |        "DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=<function <lambda> at 0x123067488>"
282 |       ]
283 |      },
284 |      "execution_count": null,
285 |      "metadata": {},
286 |      "output_type": "execute_result"
287 |     }
288 |    ],
289 |    "source": [
290 |     "mezzala.DixonColes(\n",
291 |     "    adapter=adapter,\n",
292 |     "    # By default, all data points are weighted equally,\n",
293 |     "    # which is equivalent to:\n",
294 |     "    weight=lambda x: 1\n",
295 |     ")"
296 |    ]
297 |   },
298 |   {
299 |    "cell_type": "markdown",
300 |    "metadata": {},
301 |    "source": [
302 |     "Mezzala also provides an `ExponentialWeight` for the purpose of time-discounting:"
303 |    ]
304 |   },
305 |   {
306 |    "cell_type": "code",
307 |    "execution_count": null,
308 |    "metadata": {},
309 |    "outputs": [
310 |     {
311 |      "data": {
312 |       "text/plain": [
313 |        "DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=ExponentialWeight(epsilon=-0.0065, key=<function <lambda> at 0x122f938c8>)"
314 |       ]
315 |      },
316 |      "execution_count": null,
317 |      "metadata": {},
318 |      "output_type": "execute_result"
319 |     }
320 |    ],
321 |    "source": [
322 |     "mezzala.DixonColes(\n",
323 |     "    adapter=adapter,\n",
324 |     "    weight=mezzala.ExponentialWeight(\n",
325 |     "        epsilon=-0.0065,               # Decay rate\n",
326 |     "        key=lambda x: x['days_ago']\n",
327 |     "    )\n",
328 |     ")"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "markdown",
333 |    "metadata": {},
334 |    "source": [
335 |     "#### Model blocks\n",
336 |     "\n",
337 |     "Model \"blocks\" define the calculation and estimation of home and away goalscoring rates."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": null,
343 |    "metadata": {},
344 |    "outputs": [
345 |     {
346 |      "data": {
347 |       "text/plain": [
348 |        "DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), HomeAdvantage(), BaseRate()]), weight=UniformWeight()"
349 |       ]
350 |      },
351 |      "execution_count": null,
352 |      "metadata": {},
353 |      "output_type": "execute_result"
354 |     }
355 |    ],
356 |    "source": [
357 |     "mezzala.DixonColes(\n",
358 |     "    adapter=adapter,\n",
359 |     "    # By default, only team strength and home advantage,\n",
360 |     "    # is estimated:\n",
361 |     "    blocks=[\n",
362 |     "        mezzala.blocks.HomeAdvantage(),\n",
363 |     "        mezzala.blocks.TeamStrength(),\n",
364 |     "        mezzala.blocks.BaseRate(),      # Adds \"average goalscoring rate\" as a distinct parameter\n",
365 |     "    ]\n",
366 |     ")"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "markdown",
371 |    "metadata": {},
372 |    "source": [
373 |     "To add custom parameters (e.g. per-league home advantage), you need to add additional model blocks."
374 |    ]
375 |   }
376 |  ],
377 |  "metadata": {
378 |   "kernelspec": {
379 |    "display_name": "Python 3",
380 |    "language": "python",
381 |    "name": "python3"
382 |   }
383 |  },
384 |  "nbformat": 4,
385 |  "nbformat_minor": 2
386 | }
387 | 


--------------------------------------------------------------------------------
/nbs/models.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "edbf830b",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# default_exp models"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "62b5819d",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Models"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "a2ed6dc9",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "#hide\n",
 29 |     "from nbdev.showdoc import *"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "id": "1c6d1b2e",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "#export\n",
 40 |     "import collections\n",
 41 |     "import dataclasses\n",
 42 |     "import enum\n",
 43 |     "import functools\n",
 44 |     "import itertools\n",
 45 |     "import typing\n",
 46 |     "import warnings\n",
 47 |     "\n",
 48 |     "import numpy as np\n",
 49 |     "import scipy.stats\n",
 50 |     "import scipy.optimize\n",
 51 |     "\n",
 52 |     "import mezzala.blocks\n",
 53 |     "import mezzala.weights\n",
 54 |     "import mezzala.parameters"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "id": "476722af",
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "#export\n",
 65 |     "\n",
 66 |     "\n",
 67 |     "@dataclasses.dataclass(frozen=True)\n",
 68 |     "class ScorelinePrediction:\n",
 69 |     "    home_goals: int\n",
 70 |     "    away_goals: int\n",
 71 |     "    probability: float\n",
 72 |     "        "
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "id": "ca2fbcf3",
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "#export\n",
 83 |     "\n",
 84 |     "\n",
 85 |     "class Outcomes(enum.Enum):\n",
 86 |     "    HOME_WIN = 'Home win'\n",
 87 |     "    DRAW = 'Draw'\n",
 88 |     "    AWAY_WIN = 'Away win'\n",
 89 |     "    \n",
 90 |     "    def __repr__(self):\n",
 91 |     "        return f\"Outcomes('{self.value}')\"\n",
 92 |     "\n",
 93 |     "\n",
 94 |     "@dataclasses.dataclass(frozen=True)\n",
 95 |     "class OutcomePrediction:\n",
 96 |     "    outcome: Outcomes\n",
 97 |     "    probability: float"
 98 |    ]
 99 |   },
100 |   {
101 |    "cell_type": "code",
102 |    "execution_count": null,
103 |    "id": "1a44f039",
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "#export\n",
108 |     "\n",
109 |     "\n",
110 |     "def scoreline_to_outcome(home_goals, away_goals):\n",
111 |     "    if home_goals > away_goals:\n",
112 |     "        return Outcomes.HOME_WIN\n",
113 |     "    if home_goals == away_goals:\n",
114 |     "        return Outcomes.DRAW\n",
115 |     "    if home_goals < away_goals:\n",
116 |     "        return Outcomes.AWAY_WIN\n",
117 |     "    \n",
118 |     "    \n",
119 |     "def scorelines_to_outcomes(scorelines):\n",
120 |     "    return {\n",
121 |     "        outcome: OutcomePrediction(\n",
122 |     "            outcome, \n",
123 |     "            sum(s.probability for s in scorelines if scoreline_to_outcome(s.home_goals, s.away_goals) == outcome)\n",
124 |     "        )\n",
125 |     "        for outcome in Outcomes\n",
126 |     "    }"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": null,
132 |    "id": "8bff2f9f",
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": [
136 |     "#export\n",
137 |     "\n",
138 |     "_DEFAULT_BLOCKS = [\n",
139 |     "    mezzala.blocks.BaseRate(),\n",
140 |     "    mezzala.blocks.HomeAdvantage(),\n",
141 |     "    mezzala.blocks.TeamStrength(),\n",
142 |     "]\n",
143 |     "\n",
144 |     "\n",
145 |     "class DixonColes:\n",
146 |     "    \"\"\"\n",
147 |     "    Dixon-Coles models in Python\n",
148 |     "    \"\"\"\n",
149 |     "    \n",
150 |     "    def __init__(self, adapter, blocks=_DEFAULT_BLOCKS, weight=mezzala.weights.UniformWeight(), params=None):\n",
151 |     "        # NOTE: Should params be stored internally as separate lists of keys and values? \n",
152 |     "        # Then `params` (the dict) can be a property?\n",
153 |     "        self.params = params\n",
154 |     "        self.adapter = adapter\n",
155 |     "        self.weight = weight\n",
156 |     "        self._blocks = blocks\n",
157 |     "        \n",
158 |     "    def __repr__(self):\n",
159 |     "        return f'DixonColes(adapter={repr(self.adapter)}, blocks={repr([b for b in self.blocks])}), weight={repr(self.weight)}'\n",
160 |     "        \n",
161 |     "    @property\n",
162 |     "    def blocks(self):\n",
163 |     "        # Make sure blocks are always in the correct order\n",
164 |     "        return sorted(self._blocks, key=lambda x: -x.PRIORITY)\n",
165 |     "\n",
166 |     "    def home_goals(self, row):\n",
167 |     "        \"\"\" Returns home goals scored \"\"\"\n",
168 |     "        return self.adapter.home_goals(row)\n",
169 |     "\n",
170 |     "    def away_goals(self, row):\n",
171 |     "        \"\"\" Returns away goals scored \"\"\"\n",
172 |     "        return self.adapter.away_goals(row)\n",
173 |     "\n",
174 |     "    def parse_params(self, data):\n",
175 |     "        \"\"\" Returns a tuple of (parameter_names, [constraints]) \"\"\"\n",
176 |     "        base_params = [mezzala.parameters.RHO_KEY]\n",
177 |     "        block_params = list(itertools.chain(*[b.param_keys(self.adapter, data) for b in self.blocks]))\n",
178 |     "        return (\n",
179 |     "            block_params + base_params,\n",
180 |     "            list(itertools.chain(*[b.constraints(self.adapter, data) for b in self.blocks]))\n",
181 |     "        )\n",
182 |     "    \n",
183 |     "    def _home_terms(self, row):\n",
184 |     "        return dict(itertools.chain(*[b.home_terms(self.adapter, row) for b in self.blocks]))\n",
185 |     "    \n",
186 |     "    def _away_terms(self, row):\n",
187 |     "        return dict(itertools.chain(*[b.away_terms(self.adapter, row) for b in self.blocks]))\n",
188 |     "    \n",
189 |     "    # Core methods\n",
190 |     "\n",
191 |     "    @staticmethod\n",
192 |     "    def _assign_params(param_keys, param_values):\n",
193 |     "        return dict(zip(param_keys, param_values))\n",
194 |     "    \n",
195 |     "    def _create_feature_matrices(self, param_keys, data):\n",
196 |     "        \"\"\" Create X (feature) matrices for home and away poisson rates \"\"\"\n",
197 |     "        home_X = np.empty([len(data), len(param_keys)])\n",
198 |     "        away_X = np.empty([len(data), len(param_keys)])\n",
199 |     "        for row_i, row in enumerate(data):\n",
200 |     "            home_rate_terms = self._home_terms(row)\n",
201 |     "            away_rate_terms = self._away_terms(row)\n",
202 |     "            for param_i, param_key in enumerate(param_keys):\n",
203 |     "                home_X[row_i, param_i] = home_rate_terms.get(param_key, 0)\n",
204 |     "                away_X[row_i, param_i] = away_rate_terms.get(param_key, 0)\n",
205 |     "        return home_X, away_X\n",
206 |     "\n",
207 |     "    @staticmethod\n",
208 |     "    def _tau(home_goals, away_goals, home_rate, away_rate, rho):\n",
209 |     "        \n",
210 |     "        tau = np.ones(len(home_goals))\n",
211 |     "        tau = np.where((home_goals == 0) & (away_goals == 0), 1 - home_rate*away_rate*rho, tau)\n",
212 |     "        tau = np.where((home_goals == 0) & (away_goals == 1), 1 + home_rate*rho, tau)\n",
213 |     "        tau = np.where((home_goals == 1) & (away_goals == 0), 1 + away_rate*rho, tau)\n",
214 |     "        tau = np.where((home_goals == 1) & (away_goals == 1), 1 - rho, tau)        \n",
215 |     "        \n",
216 |     "        return tau\n",
217 |     "\n",
218 |     "    def _log_like(self, home_goals, away_goals, home_rate, away_rate, rho):\n",
219 |     "        return (\n",
220 |     "            scipy.stats.poisson.logpmf(home_goals, home_rate) +\n",
221 |     "            scipy.stats.poisson.logpmf(away_goals, away_rate) +\n",
222 |     "            np.log(self._tau(home_goals, away_goals, home_rate, away_rate, rho))\n",
223 |     "        )\n",
224 |     "\n",
225 |     "    def objective_fn(self, data, home_goals, away_goals, weights, home_X, away_X, rho_ix, xs):\n",
226 |     "        rho = xs[rho_ix]\n",
227 |     "\n",
228 |     "        # Parameters are estimated in log-space, but `scipy.stats.poisson`\n",
229 |     "        # expects real number inputs, so we have to use `np.exp`\n",
230 |     "        home_rate = np.exp(np.dot(home_X, xs))\n",
231 |     "        away_rate = np.exp(np.dot(away_X, xs))\n",
232 |     "\n",
233 |     "        log_like = self._log_like(home_goals, away_goals, home_rate, away_rate, rho)\n",
234 |     "        pseudo_log_like = log_like * weights\n",
235 |     "        return -np.sum(pseudo_log_like)\n",
236 |     "\n",
237 |     "    def fit(self, data, **kwargs):\n",
238 |     "        param_keys, constraints = self.parse_params(data)\n",
239 |     "\n",
240 |     "        init_params = (\n",
241 |     "            # Attempt to initialise parameters from any already-existing parameters\n",
242 |     "            # This substantially speeds up fitting during (e.g.) backtesting\n",
243 |     "            np.asarray([self.params.get(p, 0) for p in param_keys])\n",
244 |     "            # If the model has no parameters, just initialise with 0s\n",
245 |     "            if self.params\n",
246 |     "            else np.zeros(len(param_keys))\n",
247 |     "        )\n",
248 |     "\n",
249 |     "        # Precalculate the things we can (for speed)\n",
250 |     "        \n",
251 |     "        # Create X (feature) matrices for home and away poisson rates\n",
252 |     "        home_X, away_X = self._create_feature_matrices(param_keys, data)\n",
253 |     "        \n",
254 |     "        # Get home goals, away goals, and weights from the data\n",
255 |     "        home_goals, away_goals = np.empty(len(data)), np.empty(len(data))\n",
256 |     "        weights = np.empty(len(data))\n",
257 |     "        for i, row in enumerate(data):\n",
258 |     "            home_goals[i] = self.home_goals(row)\n",
259 |     "            away_goals[i] = self.away_goals(row)\n",
260 |     "            weights[i] = self.weight(row)\n",
261 |     "            \n",
262 |     "        # Get the index of the Rho correlation parameter\n",
263 |     "        rho_ix = param_keys.index(mezzala.parameters.RHO_KEY)\n",
264 |     "\n",
265 |     "        # Optimise!\n",
266 |     "        with warnings.catch_warnings():\n",
267 |     "            # This is a hack\n",
268 |     "            # Because we haven't properly constrained `rho`, it's possible for 0 or even negative\n",
269 |     "            # values of `tau` (and therefore invalid probabilities)\n",
270 |     "            # Ignoring the warnings has little practical impact, since the model\n",
271 |     "            # will still find the objective function's minimum point regardless\n",
272 |     "            warnings.simplefilter('ignore')\n",
273 |     "        \n",
274 |     "            estimate = scipy.optimize.minimize(\n",
275 |     "                lambda xs: self.objective_fn(data, home_goals, away_goals, weights, home_X, away_X, rho_ix, xs),\n",
276 |     "                x0=init_params,\n",
277 |     "                constraints=constraints,\n",
278 |     "                **kwargs\n",
279 |     "            )\n",
280 |     "\n",
281 |     "        # Parse the estimates into parameter map\n",
282 |     "        self.params = self._assign_params(param_keys, estimate.x)\n",
283 |     "\n",
284 |     "        return self\n",
285 |     "\n",
286 |     "    def predict_one(self, row, up_to=26):\n",
287 |     "        scorelines = list(itertools.product(range(up_to), repeat=2))\n",
288 |     "\n",
289 |     "        home_goals = np.asarray([h for h, a in scorelines])\n",
290 |     "        away_goals = np.asarray([a for h, a in scorelines])\n",
291 |     "        \n",
292 |     "        param_keys = self.params.keys()\n",
293 |     "        param_values = np.asarray([v for v in self.params.values()])\n",
294 |     "    \n",
295 |     "        home_X, away_X = self._create_feature_matrices(param_keys, [row])\n",
296 |     "        \n",
297 |     "        home_rate = np.exp(np.dot(home_X, param_values))\n",
298 |     "        away_rate = np.exp(np.dot(away_X, param_values))\n",
299 |     "        rho = self.params[mezzala.parameters.RHO_KEY]\n",
300 |     "        \n",
301 |     "        probs = np.exp(self._log_like(home_goals, away_goals, home_rate, away_rate, rho))\n",
302 |     "        \n",
303 |     "        return [ScorelinePrediction(*vals) for vals in zip(home_goals.tolist(), away_goals.tolist(), probs)]\n",
304 |     "\n",
305 |     "    def predict(self, data, up_to=26):\n",
306 |     "        scorelines = [self.predict_one(row, up_to=up_to) for row in data]\n",
307 |     "        return scorelines"
308 |    ]
309 |   }
310 |  ],
311 |  "metadata": {
312 |   "kernelspec": {
313 |    "display_name": "Python 3",
314 |    "language": "python",
315 |    "name": "python3"
316 |   }
317 |  },
318 |  "nbformat": 4,
319 |  "nbformat_minor": 5
320 | }
321 | 


--------------------------------------------------------------------------------
/nbs/adapters.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "id": "220e6de0",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "# default_exp adapters"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "markdown",
 15 |    "id": "6c394436",
 16 |    "metadata": {},
 17 |    "source": [
 18 |     "# Data Adapters"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "id": "a81115fc",
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "#hide\n",
 29 |     "from nbdev.showdoc import *"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "id": "d8ad4fbc",
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import dataclasses\n",
 40 |     "import typing"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "id": "9b18f71f",
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "#export\n",
 51 |     "import collections\n",
 52 |     "import functools\n",
 53 |     "\n",
 54 |     "import mezzala.parameters"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "markdown",
 59 |    "id": "86cfc9f2",
 60 |    "metadata": {},
 61 |    "source": [
 62 |     "## Basic adapters"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "id": "211bdaa4",
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "#export\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "class KeyAdapter:\n",
 76 |     "    \"\"\"\n",
 77 |     "    Get data from subscriptable objects.\n",
 78 |     "    \"\"\"\n",
 79 |     "    \n",
 80 |     "    def __init__(self, home_goals, away_goals, **kwargs):\n",
 81 |     "        self._lookup = {\n",
 82 |     "            'home_goals': home_goals,\n",
 83 |     "            'away_goals': away_goals,\n",
 84 |     "            **kwargs\n",
 85 |     "        }\n",
 86 |     "        \n",
 87 |     "    def __repr__(self):\n",
 88 |     "        args_repr = ', '.join(f'{k}={repr(v)}' for k, v in self._lookup.items())\n",
 89 |     "        return f'KeyAdapter({args_repr})'\n",
 90 |     "\n",
 91 |     "    def _get_in(self, row, item):\n",
 92 |     "        if isinstance(item, list):\n",
 93 |     "            return functools.reduce(lambda d, i: d[i], item, row)\n",
 94 |     "        return row[item]\n",
 95 |     "    \n",
 96 |     "    def __getattr__(self, key): \n",
 97 |     "        def getter(row):\n",
 98 |     "            return self._get_in(row, self._lookup[key])\n",
 99 |     "        return getter"
100 |    ]
101 |   },
102 |   {
103 |    "cell_type": "markdown",
104 |    "id": "e7427e3d",
105 |    "metadata": {},
106 |    "source": [
107 |     "Anything subscriptable can be with this type of adapter. For example,\n",
108 |     "you might have input data as a list of tuples (e.g. using Python's\n",
109 |     "in-built `csv` library)"
110 |    ]
111 |   },
112 |   {
113 |    "cell_type": "code",
114 |    "execution_count": null,
115 |    "id": "37639fc2",
116 |    "metadata": {},
117 |    "outputs": [],
118 |    "source": [
119 |     "index_adapter = KeyAdapter(0, 1)\n",
120 |     "\n",
121 |     "assert index_adapter.home_goals([1, 2]) == 1\n",
122 |     "assert index_adapter.away_goals([1, 2]) == 2"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "markdown",
127 |    "id": "dcee4f73",
128 |    "metadata": {},
129 |    "source": [
130 |     "Or, you might be using a list of dicts."
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": null,
136 |    "id": "eb152699",
137 |    "metadata": {},
138 |    "outputs": [],
139 |    "source": [
140 |     "dict_adapter = KeyAdapter('hg', 'ag', home_team='home', away_team='away')\n",
141 |     "\n",
142 |     "example_dict = {\n",
143 |     "    'home': 'Team 1',\n",
144 |     "    'away': 'Team 2',\n",
145 |     "    'hg': 4,\n",
146 |     "    'ag': 3,\n",
147 |     "}\n",
148 |     "\n",
149 |     "assert dict_adapter.home_goals(example_dict) == 4\n",
150 |     "assert dict_adapter.away_goals(example_dict) == 3\n",
151 |     "assert dict_adapter.home_team(example_dict) == 'Team 1'\n",
152 |     "assert dict_adapter.away_team(example_dict) == 'Team 2'"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "markdown",
157 |    "id": "c2c6b4dc",
158 |    "metadata": {},
159 |    "source": [
160 |     "Nested data can be supplied using a list"
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": null,
166 |    "id": "fd4dd144",
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "nested_dict_adapter = KeyAdapter(\n",
171 |     "    home_goals=['scoreline', 0], \n",
172 |     "    away_goals=['scoreline', 1]\n",
173 |     ")\n",
174 |     "\n",
175 |     "example_nested_dict = {\n",
176 |     "    'scoreline': [1, 1]\n",
177 |     "}\n",
178 |     "\n",
179 |     "assert nested_dict_adapter.home_goals(example_nested_dict) == 1\n",
180 |     "assert nested_dict_adapter.away_goals(example_nested_dict) == 1"
181 |    ]
182 |   },
183 |   {
184 |    "cell_type": "markdown",
185 |    "id": "5b8acbec",
186 |    "metadata": {},
187 |    "source": [
188 |     "`KeyAdapter` could be used alongside `pd.DataFrame.iterrows` as well; however, it is much faster when using `pd.DataFrame.itertuples`.\n",
189 |     "\n",
190 |     "Likewise, you can't use a `KeyAdapter` with custom objects (e.g. dataclasses).\n",
191 |     "\n",
192 |     "In this case, you need an `AttributeAdapter`."
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "id": "4d42dc0a",
199 |    "metadata": {},
200 |    "outputs": [],
201 |    "source": [
202 |     "#export\n",
203 |     "\n",
204 |     "\n",
205 |     "class AttributeAdapter:\n",
206 |     "    \"\"\"\n",
207 |     "    Get data from object attributes.\n",
208 |     "    \"\"\"\n",
209 |     "    def __init__(self, home_goals, away_goals, **kwargs):\n",
210 |     "        self._lookup = {\n",
211 |     "            'home_goals': home_goals,\n",
212 |     "            'away_goals': away_goals,\n",
213 |     "            **kwargs\n",
214 |     "        }\n",
215 |     "        \n",
216 |     "    def __repr__(self):\n",
217 |     "        args_repr = ', '.join(f'{k}={repr(v)}' for k, v in self._lookup.items())\n",
218 |     "        return f'KeyAdapter({args_repr})'\n",
219 |     "        \n",
220 |     "    def _get_in(self, row, item):\n",
221 |     "        if isinstance(item, list):\n",
222 |     "            return functools.reduce(getattr, item, row)\n",
223 |     "        return getattr(row, item)\n",
224 |     "    \n",
225 |     "    def __getattr__(self, key): \n",
226 |     "        def getter(row):\n",
227 |     "            return self._get_in(row, self._lookup[key])\n",
228 |     "        return getter"
229 |    ]
230 |   },
231 |   {
232 |    "cell_type": "code",
233 |    "execution_count": null,
234 |    "id": "1d3fb1dc",
235 |    "metadata": {},
236 |    "outputs": [],
237 |    "source": [
238 |     "@dataclasses.dataclass()\n",
239 |     "class ExampleData:\n",
240 |     "    hg: int\n",
241 |     "    ag: int\n",
242 |     "    home: str\n",
243 |     "    away: str\n",
244 |     "\n",
245 |     "\n",
246 |     "attr_adapter = AttributeAdapter('hg', 'ag', home_team='home', away_team='away')\n",
247 |     "\n",
248 |     "\n",
249 |     "example_attr = ExampleData(\n",
250 |     "    home='Another home team',\n",
251 |     "    away='Another away team',\n",
252 |     "    hg=5,\n",
253 |     "    ag=1,\n",
254 |     ")\n",
255 |     "\n",
256 |     "assert attr_adapter.home_goals(example_attr) == 5\n",
257 |     "assert attr_adapter.away_goals(example_attr) == 1\n",
258 |     "assert attr_adapter.home_team(example_attr) == 'Another home team'\n",
259 |     "assert attr_adapter.away_team(example_attr) == 'Another away team'"
260 |    ]
261 |   },
262 |   {
263 |    "cell_type": "markdown",
264 |    "id": "0de5484e",
265 |    "metadata": {},
266 |    "source": [
267 |     "As with `KeyAdapter`, nested attributes can also be fetched using lists"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "code",
272 |    "execution_count": null,
273 |    "id": "ae96b0e2",
274 |    "metadata": {},
275 |    "outputs": [],
276 |    "source": [
277 |     "@dataclasses.dataclass()\n",
278 |     "class Scoreline:\n",
279 |     "    home: int\n",
280 |     "    away: int\n",
281 |     "\n",
282 |     "\n",
283 |     "@dataclasses.dataclass()\n",
284 |     "class ExampleNestedData:\n",
285 |     "    scoreline: Scoreline\n",
286 |     "    home: str\n",
287 |     "    away: str\n",
288 |     "\n",
289 |     "\n",
290 |     "nested_attr_adapter = AttributeAdapter(\n",
291 |     "    home_team='home',\n",
292 |     "    home_goals=['scoreline', 'home'], \n",
293 |     "    away_team='away',\n",
294 |     "    away_goals=['scoreline', 'away'],\n",
295 |     ")\n",
296 |     "\n",
297 |     "example_nested_attr = ExampleNestedData(\n",
298 |     "    home='Another home team',\n",
299 |     "    away='Another away team',\n",
300 |     "    scoreline=Scoreline(2, 5),\n",
301 |     ")\n",
302 |     "\n",
303 |     "assert nested_attr_adapter.home_goals(example_nested_attr) == 2\n",
304 |     "assert nested_attr_adapter.away_goals(example_nested_attr) == 5"
305 |    ]
306 |   },
307 |   {
308 |    "cell_type": "markdown",
309 |    "id": "456f7c47",
310 |    "metadata": {},
311 |    "source": [
312 |     "## Composite adapters"
313 |    ]
314 |   },
315 |   {
316 |    "cell_type": "code",
317 |    "execution_count": null,
318 |    "id": "a01bc537",
319 |    "metadata": {},
320 |    "outputs": [],
321 |    "source": [
322 |     "#export\n",
323 |     "\n",
324 |     "\n",
325 |     "class LumpedAdapter:\n",
326 |     "    \"\"\" \n",
327 |     "    Lump terms which have appeared below a minimum number of times in\n",
328 |     "    the training data into a placeholder term\n",
329 |     "    \"\"\"\n",
330 |     "\n",
331 |     "    def __init__(self, base_adapter, **kwargs):\n",
332 |     "        self.base_adapter = base_adapter\n",
333 |     "        \n",
334 |     "        # Match terms to placeholders\n",
335 |     "        # If multiple terms have the same placeholder (e.g. Home and Away\n",
336 |     "        # teams) they will share a counter\n",
337 |     "        self._term_lookup = kwargs\n",
338 |     "        \n",
339 |     "        self._counters = None\n",
340 |     "        \n",
341 |     "    def __repr__(self):\n",
342 |     "        args_repr = ', '.join(f'{k}={repr(v)}' for k, v in self._term_lookup.items())\n",
343 |     "        return f'LumpedAdapter(base_adapter={repr(self.base_adapter)}, {args_repr})'\n",
344 |     "        \n",
345 |     "    def fit(self, data):\n",
346 |     "        self._counters = {}\n",
347 |     "        for term, (placeholder, _) in self._term_lookup.items():\n",
348 |     "            # Initialise with an empty counter if it doesn't already exist\n",
349 |     "            # We need to do this so that multiple terms sharing the same counter\n",
350 |     "            # (home and away teams) are shared\n",
351 |     "            init_counter = self._counters.get(placeholder, collections.Counter())\n",
352 |     "            \n",
353 |     "            counter = collections.Counter(getattr(self.base_adapter, term)(row) for row in data)\n",
354 |     "            \n",
355 |     "            self._counters[placeholder] = init_counter + counter\n",
356 |     "        return self\n",
357 |     "    \n",
358 |     "    def __getattr__(self, key):\n",
359 |     "        if not self._counters:\n",
360 |     "            raise ValueError(\n",
361 |     "                'No counts found! You need to call `LumpedAdapter.fit` '\n",
362 |     "                'on the training data before you can use it!'\n",
363 |     "            )\n",
364 |     "        \n",
365 |     "        def getter(row):\n",
366 |     "            value = getattr(self.base_adapter, key)(row)\n",
367 |     "            placeholder, min_obs = self._term_lookup.get(key, (None, None))\n",
368 |     "            if placeholder and self._counters[placeholder][value] < min_obs:\n",
369 |     "                return placeholder\n",
370 |     "            return value\n",
371 |     "        return getter"
372 |    ]
373 |   },
374 |   {
375 |    "cell_type": "code",
376 |    "execution_count": null,
377 |    "id": "8255aa1a",
378 |    "metadata": {},
379 |    "outputs": [
380 |     {
381 |      "data": {
382 |       "text/plain": [
383 |        "LumpedAdapter(base_adapter=KeyAdapter(home_goals='hg', away_goals='ag', home_team='home', away_team='away'), home_team=('Other team', 5), away_team=('Other team', 5))"
384 |       ]
385 |      },
386 |      "execution_count": null,
387 |      "metadata": {},
388 |      "output_type": "execute_result"
389 |     }
390 |    ],
391 |    "source": [
392 |     "example_lumped_data = [\n",
393 |     "    *([example_dict]*4),  # i.e., 'Team 1' and 'Team 2' appear in the data 4 times\n",
394 |     "    {'away': 'Team 1',    # 'Team 1' now appears an additional time, (5 total)\n",
395 |     "                          # Although this time appears as an *away* team\n",
396 |     "     'home': 'Team 3',    # While 'Team 3' appears once\n",
397 |     "     'hg': 4, \n",
398 |     "     'ag': 3},\n",
399 |     "]\n",
400 |     "\n",
401 |     "\n",
402 |     "lumped_dict_adapter = LumpedAdapter(\n",
403 |     "    base_adapter=dict_adapter,\n",
404 |     "    home_team=('Other team', 5),      # Because `home_team` and `away_team` share the same\n",
405 |     "                                      # placeholder value ('Other team'), they are counted\n",
406 |     "                                      # together. I.e. a team has to appear at least 5 times\n",
407 |     "                                      # as _either_ the home team, or the away team\n",
408 |     "    away_team=('Other team', 5)\n",
409 |     ")\n",
410 |     "lumped_dict_adapter.fit(example_lumped_data)\n",
411 |     "\n",
412 |     "lumped_dict_adapter"
413 |    ]
414 |   },
415 |   {
416 |    "cell_type": "code",
417 |    "execution_count": null,
418 |    "id": "a16e9648",
419 |    "metadata": {},
420 |    "outputs": [],
421 |    "source": [
422 |     "example_lumped_1 = {\n",
423 |     "    'home': 'Team 1',\n",
424 |     "    'away': 'Team 3',\n",
425 |     "    'hg': 1, \n",
426 |     "    'ag': 2\n",
427 |     "}\n",
428 |     "\n",
429 |     "# A team with more than the minimum number of observations appears as before\n",
430 |     "assert lumped_dict_adapter.home_team(example_lumped_1) == 'Team 1'\n",
431 |     "\n",
432 |     "# But a team with fewer observations appears as the placeholder\n",
433 |     "assert lumped_dict_adapter.away_team(example_lumped_1) == 'Other team'\n",
434 |     "\n",
435 |     "# Meanwhile, values without a placeholder in the LumpedAdapter\n",
436 |     "# also appear as before\n",
437 |     "assert lumped_dict_adapter.home_goals(example_lumped_1) == 1\n",
438 |     "assert lumped_dict_adapter.away_goals(example_lumped_1) == 2"
439 |    ]
440 |   },
441 |   {
442 |    "cell_type": "markdown",
443 |    "id": "b8440a77",
444 |    "metadata": {},
445 |    "source": [
446 |     "Using a lumped adapter can also allow you to handle items which didn't appear in the training set at all:"
447 |    ]
448 |   },
449 |   {
450 |    "cell_type": "code",
451 |    "execution_count": null,
452 |    "id": "5829767f",
453 |    "metadata": {},
454 |    "outputs": [],
455 |    "source": [
456 |     "example_lumped_2 = {\n",
457 |     "    'home': 'Team 2',  # Only appeared 4 times, below threshold of 5\n",
458 |     "    'away': 'Team 4',  # Appeared 0 times in the data\n",
459 |     "    'hg': 1, \n",
460 |     "    'ag': 2\n",
461 |     "}\n",
462 |     "\n",
463 |     "assert lumped_dict_adapter.home_team(example_lumped_2) == 'Other team'\n",
464 |     "assert lumped_dict_adapter.away_team(example_lumped_2) == 'Other team'"
465 |    ]
466 |   }
467 |  ],
468 |  "metadata": {
469 |   "kernelspec": {
470 |    "display_name": "Python 3",
471 |    "language": "python",
472 |    "name": "python3"
473 |   }
474 |  },
475 |  "nbformat": 4,
476 |  "nbformat_minor": 5
477 | }
478 | 


--------------------------------------------------------------------------------
/nbs/core.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# default_exp __init__"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "# Core\n",
 17 |     "\n",
 18 |     "> Team-strength models in Python"
 19 |    ]
 20 |   },
 21 |   {
 22 |    "cell_type": "code",
 23 |    "execution_count": null,
 24 |    "metadata": {},
 25 |    "outputs": [],
 26 |    "source": [
 27 |     "#hide\n",
 28 |     "from nbdev.showdoc import *"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": null,
 34 |    "metadata": {},
 35 |    "outputs": [],
 36 |    "source": [
 37 |     "import json\n",
 38 |     "import datetime as dt\n",
 39 |     "import numpy as np\n",
 40 |     "import pprint"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "#export\n",
 50 |     "\n",
 51 |     "# For now, just re-export everything\n",
 52 |     "from mezzala.adapters import *\n",
 53 |     "from mezzala.blocks import *\n",
 54 |     "from mezzala.models import *\n",
 55 |     "from mezzala.weights import *\n",
 56 |     "from mezzala.parameters import *"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "Let's demo"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [
 71 |     {
 72 |      "data": {
 73 |       "text/plain": [
 74 |        "[{'date': datetime.datetime(2015, 8, 8, 0, 0),\n",
 75 |        "  'team1': 'Manchester United FC',\n",
 76 |        "  'team2': 'Tottenham Hotspur FC',\n",
 77 |        "  'score': {'ft': [1, 0]}},\n",
 78 |        " {'date': datetime.datetime(2015, 8, 8, 0, 0),\n",
 79 |        "  'team1': 'AFC Bournemouth',\n",
 80 |        "  'team2': 'Aston Villa FC',\n",
 81 |        "  'score': {'ft': [0, 1]}},\n",
 82 |        " {'date': datetime.datetime(2015, 8, 8, 0, 0),\n",
 83 |        "  'team1': 'Leicester City FC',\n",
 84 |        "  'team2': 'Sunderland AFC',\n",
 85 |        "  'score': {'ft': [4, 2]}}]"
 86 |       ]
 87 |      },
 88 |      "execution_count": null,
 89 |      "metadata": {},
 90 |      "output_type": "execute_result"
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "with open('../data/premier-league-1516.json', 'r') as f:\n",
 95 |     "    pl_1516 = json.load(f)\n",
 96 |     "\n",
 97 |     "# Let's parse the dates, too\n",
 98 |     "for match in pl_1516:\n",
 99 |     "    match['date'] = dt.datetime.strptime(match['date'], '%Y-%m-%d')\n",
100 |     "    \n",
101 |     "pl_1516[0:3]"
102 |    ]
103 |   },
104 |   {
105 |    "cell_type": "markdown",
106 |    "metadata": {},
107 |    "source": [
108 |     "A model in `mezzala` is composed of 2 parts:\n",
109 |     "\n",
110 |     "* Model blocks (see `mezzala.blocks`)\n",
111 |     "* An adapter (see `mezzala.adapters`)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "markdown",
116 |    "metadata": {},
117 |    "source": [
118 |     "The model blocks determine which terms your model estimates. In general, you will want\n",
119 |     "to estimate offensive and defensive strength for each team (`TeamStrength`) and \n",
120 |     "as well as home advantage `HomeAdvantage`.\n",
121 |     "\n",
122 |     "The selected model blocks can be supplied to the model as a list:"
123 |    ]
124 |   },
125 |   {
126 |    "cell_type": "code",
127 |    "execution_count": null,
128 |    "metadata": {},
129 |    "outputs": [],
130 |    "source": [
131 |     "blocks = [TeamStrength(), HomeAdvantage()]"
132 |    ]
133 |   },
134 |   {
135 |    "cell_type": "markdown",
136 |    "metadata": {},
137 |    "source": [
138 |     "An adapter connects your model to the data source. In other words, it tells the model how find the information needed to fit.\n",
139 |     "\n",
140 |     "The information needed is determined by which model blocks are used. In our case,\n",
141 |     "\n",
142 |     "* All models require `home_goals` and `away_goals`\n",
143 |     "* `TeamStrength` - requires `home_team` and `away_team`\n",
144 |     "\n",
145 |     "`HomeAdvantage` doesn't require any information, since it assumes all matches have equal home-field advantage by default."
146 |    ]
147 |   },
148 |   {
149 |    "cell_type": "code",
150 |    "execution_count": null,
151 |    "metadata": {},
152 |    "outputs": [],
153 |    "source": [
154 |     "adapter = KeyAdapter(               # `KeyAdapter` = data['...']\n",
155 |     "    home_team='team1',\n",
156 |     "    away_team='team2',\n",
157 |     "    home_goals=['score', 'ft', 0],  # Get nested fields with lists of fields\n",
158 |     "    away_goals=['score', 'ft', 1],  # i.e. data['score']['ft'][1]\n",
159 |     ")"
160 |    ]
161 |   },
162 |   {
163 |    "cell_type": "markdown",
164 |    "metadata": {},
165 |    "source": [
166 |     "Pulling this together, we can construct a model from an adapter and blocks"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": null,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "model = DixonColes(adapter=adapter, blocks=blocks)\n",
176 |     "model.fit(pl_1516)\n",
177 |     "\n",
178 |     "# All estimates should be valid numbers\n",
179 |     "assert all(not np.isnan(x) for x in model.params.values())\n",
180 |     "\n",
181 |     "# Home advantage should be positive\n",
182 |     "assert 1.0 < np.exp(model.params[HFA_KEY]) < 2.0"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "Let's inspect the parameters a bit. First, let's look at the boring (non-team) ones:"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": null,
195 |    "metadata": {},
196 |    "outputs": [
197 |     {
198 |      "name": "stdout",
199 |      "output_type": "stream",
200 |      "text": [
201 |       "ParameterKey(label='Home-field advantage')           : 1.23\n",
202 |       "ParameterKey(label='Rho')                            : 0.94\n"
203 |      ]
204 |     }
205 |    ],
206 |    "source": [
207 |     "param_keys = model.params.keys()\n",
208 |     "param_key_len = max(len(str(k)) for k in param_keys)\n",
209 |     "\n",
210 |     "for k in param_keys:\n",
211 |     "    if not isinstance(k, TeamParameterKey):\n",
212 |     "        key_str = str(k).ljust(param_key_len + 1)\n",
213 |     "        print(f'{key_str}: {np.exp(model.params[k]):0.2f}')"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "markdown",
218 |    "metadata": {},
219 |    "source": [
220 |     "And the team ones. Let's look at each team's attacking quality:"
221 |    ]
222 |   },
223 |   {
224 |    "cell_type": "code",
225 |    "execution_count": null,
226 |    "metadata": {},
227 |    "outputs": [
228 |     {
229 |      "name": "stdout",
230 |      "output_type": "stream",
231 |      "text": [
232 |       "Manchester City FC: 1.38\n",
233 |       "Tottenham Hotspur FC: 1.33\n",
234 |       "Leicester City FC: 1.31\n",
235 |       "West Ham United FC: 1.27\n",
236 |       "Arsenal FC: 1.25\n",
237 |       "Liverpool FC: 1.23\n",
238 |       "Everton FC: 1.16\n",
239 |       "Chelsea FC: 1.15\n",
240 |       "Southampton FC: 1.14\n",
241 |       "Manchester United FC: 0.94\n",
242 |       "Sunderland AFC: 0.94\n",
243 |       "AFC Bournemouth: 0.89\n",
244 |       "Newcastle United FC: 0.87\n",
245 |       "Swansea City FC: 0.82\n",
246 |       "Stoke City FC: 0.81\n",
247 |       "Watford FC: 0.78\n",
248 |       "Norwich City FC: 0.77\n",
249 |       "Crystal Palace FC: 0.76\n",
250 |       "West Bromwich Albion FC: 0.66\n",
251 |       "Aston Villa FC: 0.54\n"
252 |      ]
253 |     }
254 |    ],
255 |    "source": [
256 |     "teams = {k.label for k in param_keys if isinstance(k, TeamParameterKey)}\n",
257 |     "\n",
258 |     "team_offence = [(t, np.exp(model.params[OffenceParameterKey(t)])) for t in teams]\n",
259 |     "for team, estimate in sorted(team_offence, key=lambda x: -x[1]):\n",
260 |     "    print(f'{team}: {estimate:0.2f}')"
261 |    ]
262 |   },
263 |   {
264 |    "cell_type": "code",
265 |    "execution_count": null,
266 |    "metadata": {},
267 |    "outputs": [
268 |     {
269 |      "name": "stdout",
270 |      "output_type": "stream",
271 |      "text": [
272 |       "Manchester United FC: 0.82\n",
273 |       "Tottenham Hotspur FC: 0.84\n",
274 |       "Leicester City FC: 0.86\n",
275 |       "Arsenal FC: 0.86\n",
276 |       "Southampton FC: 0.97\n",
277 |       "Manchester City FC: 0.99\n",
278 |       "West Bromwich Albion FC: 1.10\n",
279 |       "Watford FC: 1.17\n",
280 |       "Liverpool FC: 1.19\n",
281 |       "Crystal Palace FC: 1.19\n",
282 |       "Swansea City FC: 1.21\n",
283 |       "West Ham United FC: 1.22\n",
284 |       "Chelsea FC: 1.26\n",
285 |       "Stoke City FC: 1.28\n",
286 |       "Everton FC: 1.32\n",
287 |       "Sunderland AFC: 1.46\n",
288 |       "Newcastle United FC: 1.52\n",
289 |       "Norwich City FC: 1.55\n",
290 |       "AFC Bournemouth: 1.57\n",
291 |       "Aston Villa FC: 1.75\n"
292 |      ]
293 |     }
294 |    ],
295 |    "source": [
296 |     "team_defence = [(t, np.exp(model.params[DefenceParameterKey(t)])) for t in teams]\n",
297 |     "for team, estimate in sorted(team_defence, key=lambda x: x[1]):\n",
298 |     "    print(f'{team}: {estimate:0.2f}')"
299 |    ]
300 |   },
301 |   {
302 |    "cell_type": "markdown",
303 |    "metadata": {},
304 |    "source": [
305 |     "Making predictions for a single match"
306 |    ]
307 |   },
308 |   {
309 |    "cell_type": "code",
310 |    "execution_count": null,
311 |    "metadata": {},
312 |    "outputs": [
313 |     {
314 |      "data": {
315 |       "text/plain": [
316 |        "[ScorelinePrediction(home_goals=0, away_goals=0, probability=0.0619999820129133),\n",
317 |        " ScorelinePrediction(home_goals=0, away_goals=1, probability=0.03970300056443736),\n",
318 |        " ScorelinePrediction(home_goals=0, away_goals=2, probability=0.018568356365315872),\n",
319 |        " ScorelinePrediction(home_goals=0, away_goals=3, probability=0.005037154039480389),\n",
320 |        " ScorelinePrediction(home_goals=0, away_goals=4, probability=0.0010248451849317163)]"
321 |       ]
322 |      },
323 |      "execution_count": null,
324 |      "metadata": {},
325 |      "output_type": "execute_result"
326 |     }
327 |    ],
328 |    "source": [
329 |     "scorelines = model.predict_one({\n",
330 |     "    'team1': 'Manchester City FC',\n",
331 |     "    'team2': 'Swansea City FC',\n",
332 |     "})\n",
333 |     "\n",
334 |     "# Probabilities should sum to 1\n",
335 |     "assert np.isclose(\n",
336 |     "    sum(p.probability for p in scorelines),\n",
337 |     "    1.0\n",
338 |     ")\n",
339 |     "\n",
340 |     "scorelines[0:5]"
341 |    ]
342 |   },
343 |   {
344 |    "cell_type": "code",
345 |    "execution_count": null,
346 |    "metadata": {},
347 |    "outputs": [
348 |     {
349 |      "data": {
350 |       "text/plain": [
351 |        "[OutcomePrediction(outcome=Outcomes('Home win'), probability=0.658650484098139),\n",
352 |        " OutcomePrediction(outcome=Outcomes('Draw'), probability=0.21019557218753862),\n",
353 |        " OutcomePrediction(outcome=Outcomes('Away win'), probability=0.13115394371432296)]"
354 |       ]
355 |      },
356 |      "execution_count": null,
357 |      "metadata": {},
358 |      "output_type": "execute_result"
359 |     }
360 |    ],
361 |    "source": [
362 |     "outcomes = scorelines_to_outcomes(scorelines)\n",
363 |     "\n",
364 |     "# MCFC should have a better chance of beating Swansea\n",
365 |     "# at home than Swansea do of winning away\n",
366 |     "assert outcomes[Outcomes('Home win')].probability > outcomes[Outcomes('Away win')].probability\n",
367 |     "\n",
368 |     "list(outcomes.values())"
369 |    ]
370 |   },
371 |   {
372 |    "cell_type": "markdown",
373 |    "metadata": {},
374 |    "source": [
375 |     "Or for multiple matches"
376 |    ]
377 |   },
378 |   {
379 |    "cell_type": "code",
380 |    "execution_count": null,
381 |    "metadata": {},
382 |    "outputs": [],
383 |    "source": [
384 |     "many_scorelines = model.predict([\n",
385 |     "    {'team1': 'Manchester City FC',\n",
386 |     "     'team2': 'Swansea City FC'},\n",
387 |     "    {'team1': 'Manchester City FC',\n",
388 |     "     'team2': 'West Ham United FC'}\n",
389 |     "])"
390 |    ]
391 |   },
392 |   {
393 |    "cell_type": "markdown",
394 |    "metadata": {},
395 |    "source": [
396 |     "What about a model with a different weighting method?\n",
397 |     "\n",
398 |     "By default, the `DixonColes` model weights all matches equally. However, it's more realistic to give matches\n",
399 |     "closer to the current date a bigger weight than those a long time ago.\n",
400 |     "\n",
401 |     "The original Dixon-Coles paper suggests using an exponential weight, and we can use the same:"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "code",
406 |    "execution_count": null,
407 |    "metadata": {},
408 |    "outputs": [],
409 |    "source": [
410 |     "season_end_date = max(match['date'] for match in pl_1516)\n",
411 |     "\n",
412 |     "weight = ExponentialWeight(\n",
413 |     "    # Value of `epsilon` is taken from the original paper\n",
414 |     "    epsilon=-0.0065,  \n",
415 |     "    key=lambda x: (season_end_date - x['date']).days\n",
416 |     ")"
417 |    ]
418 |   },
419 |   {
420 |    "cell_type": "code",
421 |    "execution_count": null,
422 |    "metadata": {},
423 |    "outputs": [
424 |     {
425 |      "data": {
426 |       "text/plain": [
427 |        "DixonColes(adapter=KeyAdapter(home_goals=['score', 'ft', 0], away_goals=['score', 'ft', 1], home_team='team1', away_team='team2'), blocks=[TeamStrength(), HomeAdvantage()]), weight=ExponentialWeight(epsilon=-0.0065, key=<function <lambda> at 0x11eecd158>)"
428 |       ]
429 |      },
430 |      "execution_count": null,
431 |      "metadata": {},
432 |      "output_type": "execute_result"
433 |     }
434 |    ],
435 |    "source": [
436 |     "model_exp = DixonColes(\n",
437 |     "    adapter=adapter,\n",
438 |     "    blocks=blocks,\n",
439 |     "    weight=weight\n",
440 |     ")\n",
441 |     "model_exp.fit(pl_1516)"
442 |    ]
443 |   },
444 |   {
445 |    "cell_type": "markdown",
446 |    "metadata": {},
447 |    "source": [
448 |     "How much does that change the ratings at season-end?"
449 |    ]
450 |   },
451 |   {
452 |    "cell_type": "code",
453 |    "execution_count": null,
454 |    "metadata": {},
455 |    "outputs": [
456 |     {
457 |      "name": "stdout",
458 |      "output_type": "stream",
459 |      "text": [
460 |       "OffenceParameterKey(label='AFC Bournemouth')         : 0.89 -> 0.88 (0.99)\n",
461 |       "DefenceParameterKey(label='AFC Bournemouth')         : 1.57 -> 1.61 (1.02)\n",
462 |       "OffenceParameterKey(label='Arsenal FC')              : 1.25 -> 1.25 (1.00)\n",
463 |       "DefenceParameterKey(label='Arsenal FC')              : 0.86 -> 0.85 (0.98)\n",
464 |       "OffenceParameterKey(label='Aston Villa FC')          : 0.54 -> 0.49 (0.91)\n",
465 |       "DefenceParameterKey(label='Aston Villa FC')          : 1.75 -> 1.83 (1.04)\n",
466 |       "OffenceParameterKey(label='Chelsea FC')              : 1.15 -> 1.20 (1.04)\n",
467 |       "DefenceParameterKey(label='Chelsea FC')              : 1.26 -> 1.16 (0.92)\n",
468 |       "OffenceParameterKey(label='Crystal Palace FC')       : 0.76 -> 0.70 (0.92)\n",
469 |       "DefenceParameterKey(label='Crystal Palace FC')       : 1.19 -> 1.25 (1.05)\n",
470 |       "OffenceParameterKey(label='Everton FC')              : 1.16 -> 1.02 (0.88)\n",
471 |       "DefenceParameterKey(label='Everton FC')              : 1.32 -> 1.33 (1.01)\n",
472 |       "ParameterKey(label='Home-field advantage')           : 1.23 -> 1.30 (1.05)\n",
473 |       "OffenceParameterKey(label='Leicester City FC')       : 1.31 -> 1.25 (0.95)\n",
474 |       "DefenceParameterKey(label='Leicester City FC')       : 0.86 -> 0.68 (0.79)\n",
475 |       "OffenceParameterKey(label='Liverpool FC')            : 1.23 -> 1.33 (1.08)\n",
476 |       "DefenceParameterKey(label='Liverpool FC')            : 1.19 -> 1.18 (1.00)\n",
477 |       "OffenceParameterKey(label='Manchester City FC')      : 1.38 -> 1.36 (0.98)\n",
478 |       "DefenceParameterKey(label='Manchester City FC')      : 0.99 -> 1.00 (1.01)\n",
479 |       "OffenceParameterKey(label='Manchester United FC')    : 0.94 -> 0.92 (0.98)\n",
480 |       "DefenceParameterKey(label='Manchester United FC')    : 0.82 -> 0.83 (1.01)\n",
481 |       "OffenceParameterKey(label='Newcastle United FC')     : 0.87 -> 0.93 (1.08)\n",
482 |       "DefenceParameterKey(label='Newcastle United FC')     : 1.52 -> 1.37 (0.90)\n",
483 |       "OffenceParameterKey(label='Norwich City FC')         : 0.77 -> 0.69 (0.90)\n",
484 |       "DefenceParameterKey(label='Norwich City FC')         : 1.55 -> 1.51 (0.97)\n",
485 |       "ParameterKey(label='Rho')                            : 0.94 -> 0.91 (0.97)\n",
486 |       "OffenceParameterKey(label='Southampton FC')          : 1.14 -> 1.26 (1.11)\n",
487 |       "DefenceParameterKey(label='Southampton FC')          : 0.97 -> 0.95 (0.98)\n",
488 |       "OffenceParameterKey(label='Stoke City FC')           : 0.81 -> 0.82 (1.01)\n",
489 |       "DefenceParameterKey(label='Stoke City FC')           : 1.28 -> 1.42 (1.11)\n",
490 |       "OffenceParameterKey(label='Sunderland AFC')          : 0.94 -> 0.99 (1.05)\n",
491 |       "DefenceParameterKey(label='Sunderland AFC')          : 1.46 -> 1.22 (0.84)\n",
492 |       "OffenceParameterKey(label='Swansea City FC')         : 0.82 -> 0.88 (1.08)\n",
493 |       "DefenceParameterKey(label='Swansea City FC')         : 1.21 -> 1.18 (0.97)\n",
494 |       "OffenceParameterKey(label='Tottenham Hotspur FC')    : 1.33 -> 1.34 (1.01)\n",
495 |       "DefenceParameterKey(label='Tottenham Hotspur FC')    : 0.84 -> 0.95 (1.12)\n",
496 |       "OffenceParameterKey(label='Watford FC')              : 0.78 -> 0.77 (0.99)\n",
497 |       "DefenceParameterKey(label='Watford FC')              : 1.17 -> 1.33 (1.14)\n",
498 |       "OffenceParameterKey(label='West Bromwich Albion FC') : 0.66 -> 0.60 (0.91)\n",
499 |       "DefenceParameterKey(label='West Bromwich Albion FC') : 1.10 -> 1.04 (0.94)\n",
500 |       "OffenceParameterKey(label='West Ham United FC')      : 1.27 -> 1.33 (1.04)\n",
501 |       "DefenceParameterKey(label='West Ham United FC')      : 1.22 -> 1.33 (1.09)\n"
502 |      ]
503 |     }
504 |    ],
505 |    "source": [
506 |     "for k in sorted(param_keys, key=lambda x: x.label):\n",
507 |     "    key_str = str(k).ljust(param_key_len + 1)\n",
508 |     "    model_param = np.exp(model.params[k])\n",
509 |     "    model_exp_param = np.exp(model_exp.params[k])\n",
510 |     "    print(f'{key_str}: {model_param:0.2f} -> {model_exp_param:0.2f} ({model_exp_param/model_param:0.2f})')"
511 |    ]
512 |   },
513 |   {
514 |    "cell_type": "code",
515 |    "execution_count": null,
516 |    "metadata": {},
517 |    "outputs": [],
518 |    "source": []
519 |   }
520 |  ],
521 |  "metadata": {
522 |   "kernelspec": {
523 |    "display_name": "Python 3",
524 |    "language": "python",
525 |    "name": "python3"
526 |   }
527 |  },
528 |  "nbformat": 4,
529 |  "nbformat_minor": 2
530 | }
531 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | title: Mezzala
  4 | 
  5 | 
  6 | keywords: fastai
  7 | sidebar: home_sidebar
  8 | 
  9 | summary: "Models for estimating football (soccer) team-strength"
 10 | description: "Models for estimating football (soccer) team-strength"
 11 | nb_path: "nbs/index.ipynb"
 12 | ---
 13 | <!--
 14 | 
 15 | #################################################
 16 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 17 | #################################################
 18 | # file to edit: nbs/index.ipynb
 19 | # command to build the docs after a change: nbdev_build_docs
 20 | 
 21 | -->
 22 | 
 23 | <div class="container" id="notebook-container">
 24 |         
 25 |     {% raw %}
 26 |     
 27 | <div class="cell border-box-sizing code_cell rendered">
 28 | 
 29 | </div>
 30 |     {% endraw %}
 31 | 
 32 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 33 | <div class="text_cell_render border-box-sizing rendered_html">
 34 | <h2 id="Install">Install<a class="anchor-link" href="#Install"> </a></h2>
 35 | </div>
 36 | </div>
 37 | </div>
 38 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 39 | <div class="text_cell_render border-box-sizing rendered_html">
 40 | <p><code>pip install mezzala</code></p>
 41 | 
 42 | </div>
 43 | </div>
 44 | </div>
 45 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 46 | <div class="text_cell_render border-box-sizing rendered_html">
 47 | <h2 id="How-to-use">How to use<a class="anchor-link" href="#How-to-use"> </a></h2>
 48 | </div>
 49 | </div>
 50 | </div>
 51 |     {% raw %}
 52 |     
 53 | <div class="cell border-box-sizing code_cell rendered">
 54 | <div class="input">
 55 | 
 56 | <div class="inner_cell">
 57 |     <div class="input_area">
 58 | <div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">mezzala</span>
 59 | </pre></div>
 60 | 
 61 |     </div>
 62 | </div>
 63 | </div>
 64 | 
 65 | </div>
 66 |     {% endraw %}
 67 | 
 68 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 69 | <div class="text_cell_render border-box-sizing rendered_html">
 70 | <p>Fitting a Dixon-Coles team strength model:</p>
 71 | 
 72 | </div>
 73 | </div>
 74 | </div>
 75 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 76 | <div class="text_cell_render border-box-sizing rendered_html">
 77 | <p>First, we need to get some data</p>
 78 | 
 79 | </div>
 80 | </div>
 81 | </div>
 82 |     {% raw %}
 83 |     
 84 | <div class="cell border-box-sizing code_cell rendered">
 85 | <div class="input">
 86 | 
 87 | <div class="inner_cell">
 88 |     <div class="input_area">
 89 | <div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">itertools</span>
 90 | <span class="kn">import</span> <span class="nn">json</span>
 91 | <span class="kn">import</span> <span class="nn">urllib.request</span>
 92 | 
 93 | 
 94 | <span class="c1"># Use 2016/17 Premier League data from the openfootball repo</span>
 95 | <span class="n">url</span> <span class="o">=</span> <span class="s1">&#39;https://raw.githubusercontent.com/openfootball/football.json/master/2016-17/en.1.json&#39;</span>
 96 | 
 97 | 
 98 | <span class="n">response</span> <span class="o">=</span> <span class="n">urllib</span><span class="o">.</span><span class="n">request</span><span class="o">.</span><span class="n">urlopen</span><span class="p">(</span><span class="n">url</span><span class="p">)</span>
 99 | <span class="n">data_raw</span> <span class="o">=</span> <span class="n">json</span><span class="o">.</span><span class="n">loads</span><span class="p">(</span><span class="n">response</span><span class="o">.</span><span class="n">read</span><span class="p">())</span>
100 | 
101 | <span class="c1"># Reshape the data to just get the matches</span>
102 | <span class="n">data</span> <span class="o">=</span> <span class="nb">list</span><span class="p">(</span><span class="n">itertools</span><span class="o">.</span><span class="n">chain</span><span class="p">(</span><span class="o">*</span><span class="p">[</span><span class="n">d</span><span class="p">[</span><span class="s1">&#39;matches&#39;</span><span class="p">]</span> <span class="k">for</span> <span class="n">d</span> <span class="ow">in</span> <span class="n">data_raw</span><span class="p">[</span><span class="s1">&#39;rounds&#39;</span><span class="p">]]))</span>
103 | 
104 | <span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">3</span><span class="p">]</span>
105 | </pre></div>
106 | 
107 |     </div>
108 | </div>
109 | </div>
110 | 
111 | <div class="output_wrapper">
112 | <div class="output">
113 | 
114 | <div class="output_area">
115 | 
116 | 
117 | 
118 | <div class="output_text output_subarea output_execute_result">
119 | <pre>[{&#39;date&#39;: &#39;2016-08-13&#39;,
120 |   &#39;team1&#39;: &#39;Hull City AFC&#39;,
121 |   &#39;team2&#39;: &#39;Leicester City FC&#39;,
122 |   &#39;score&#39;: {&#39;ft&#39;: [2, 1]}},
123 |  {&#39;date&#39;: &#39;2016-08-13&#39;,
124 |   &#39;team1&#39;: &#39;Everton FC&#39;,
125 |   &#39;team2&#39;: &#39;Tottenham Hotspur FC&#39;,
126 |   &#39;score&#39;: {&#39;ft&#39;: [1, 1]}},
127 |  {&#39;date&#39;: &#39;2016-08-13&#39;,
128 |   &#39;team1&#39;: &#39;Crystal Palace FC&#39;,
129 |   &#39;team2&#39;: &#39;West Bromwich Albion FC&#39;,
130 |   &#39;score&#39;: {&#39;ft&#39;: [0, 1]}}]</pre>
131 | </div>
132 | 
133 | </div>
134 | 
135 | </div>
136 | </div>
137 | 
138 | </div>
139 |     {% endraw %}
140 | 
141 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
142 | <div class="text_cell_render border-box-sizing rendered_html">
143 | <h3 id="Fitting-a-model">Fitting a model<a class="anchor-link" href="#Fitting-a-model"> </a></h3>
144 | </div>
145 | </div>
146 | </div>
147 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
148 | <div class="text_cell_render border-box-sizing rendered_html">
149 | <p>To fit a model with mezzala, you need to create an "adapter". Adapters are used to connect a model to a data source.</p>
150 | <p>Because our data is a list of dicts, we are going to use a <a href="/mezzala/adapters.html#KeyAdapter"><code>KeyAdapter</code></a>.</p>
151 | 
152 | </div>
153 | </div>
154 | </div>
155 |     {% raw %}
156 |     
157 | <div class="cell border-box-sizing code_cell rendered">
158 | <div class="input">
159 | 
160 | <div class="inner_cell">
161 |     <div class="input_area">
162 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">adapter</span> <span class="o">=</span> <span class="n">mezzala</span><span class="o">.</span><span class="n">KeyAdapter</span><span class="p">(</span>       <span class="c1"># `KeyAdapter` = datum[&#39;...&#39;]</span>
163 |     <span class="n">home_team</span><span class="o">=</span><span class="s1">&#39;team1&#39;</span><span class="p">,</span>
164 |     <span class="n">away_team</span><span class="o">=</span><span class="s1">&#39;team2&#39;</span><span class="p">,</span>
165 |     <span class="n">home_goals</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="s1">&#39;ft&#39;</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span>  <span class="c1"># Get nested fields with lists of fields</span>
166 |     <span class="n">away_goals</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;score&#39;</span><span class="p">,</span> <span class="s1">&#39;ft&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">],</span>  <span class="c1"># i.e. datum[&#39;score&#39;][&#39;ft&#39;][1]</span>
167 | <span class="p">)</span>
168 | 
169 | <span class="c1"># You&#39;ll never need to call the methods on an </span>
170 | <span class="c1"># adapter directly, but just to show that it </span>
171 | <span class="c1"># works as expected:</span>
172 | <span class="n">adapter</span><span class="o">.</span><span class="n">home_team</span><span class="p">(</span><span class="n">data</span><span class="p">[</span><span class="mi">0</span><span class="p">])</span>
173 | </pre></div>
174 | 
175 |     </div>
176 | </div>
177 | </div>
178 | 
179 | <div class="output_wrapper">
180 | <div class="output">
181 | 
182 | <div class="output_area">
183 | 
184 | 
185 | 
186 | <div class="output_text output_subarea output_execute_result">
187 | <pre>&#39;Hull City AFC&#39;</pre>
188 | </div>
189 | 
190 | </div>
191 | 
192 | </div>
193 | </div>
194 | 
195 | </div>
196 |     {% endraw %}
197 | 
198 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
199 | <div class="text_cell_render border-box-sizing rendered_html">
200 | <p>Once we have an adapter for our specific data source, we can fit the model:</p>
201 | 
202 | </div>
203 | </div>
204 | </div>
205 |     {% raw %}
206 |     
207 | <div class="cell border-box-sizing code_cell rendered">
208 | <div class="input">
209 | 
210 | <div class="inner_cell">
211 |     <div class="input_area">
212 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">model</span> <span class="o">=</span> <span class="n">mezzala</span><span class="o">.</span><span class="n">DixonColes</span><span class="p">(</span><span class="n">adapter</span><span class="o">=</span><span class="n">adapter</span><span class="p">)</span>
213 | <span class="n">model</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">data</span><span class="p">)</span>
214 | </pre></div>
215 | 
216 |     </div>
217 | </div>
218 | </div>
219 | 
220 | <div class="output_wrapper">
221 | <div class="output">
222 | 
223 | <div class="output_area">
224 | 
225 | 
226 | 
227 | <div class="output_text output_subarea output_execute_result">
228 | <pre>DixonColes(adapter=KeyAdapter(home_goals=[&#39;score&#39;, &#39;ft&#39;, 0], away_goals=[&#39;score&#39;, &#39;ft&#39;, 1], home_team=&#39;team1&#39;, away_team=&#39;team2&#39;), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=UniformWeight()</pre>
229 | </div>
230 | 
231 | </div>
232 | 
233 | </div>
234 | </div>
235 | 
236 | </div>
237 |     {% endraw %}
238 | 
239 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
240 | <div class="text_cell_render border-box-sizing rendered_html">
241 | <h3 id="Making-predictions">Making predictions<a class="anchor-link" href="#Making-predictions"> </a></h3>
242 | </div>
243 | </div>
244 | </div>
245 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
246 | <div class="text_cell_render border-box-sizing rendered_html">
247 | <p>By default, you only need to supply the home and away team to get predictions. This should be supplied in the same format as the training data.</p>
248 | <p><a href="/mezzala/models.html#DixonColes"><code>DixonColes</code></a> has two methods for making predictions:</p>
249 | <ul>
250 | <li><code>predict_one</code> - for predicting a single match</li>
251 | <li><code>predict</code> - for predicting multiple matches</li>
252 | </ul>
253 | 
254 | </div>
255 | </div>
256 | </div>
257 |     {% raw %}
258 |     
259 | <div class="cell border-box-sizing code_cell rendered">
260 | <div class="input">
261 | 
262 | <div class="inner_cell">
263 |     <div class="input_area">
264 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">match_to_predict</span> <span class="o">=</span> <span class="p">{</span>
265 |     <span class="s1">&#39;team1&#39;</span><span class="p">:</span> <span class="s1">&#39;Manchester City FC&#39;</span><span class="p">,</span>
266 |     <span class="s1">&#39;team2&#39;</span><span class="p">:</span> <span class="s1">&#39;Swansea City FC&#39;</span><span class="p">,</span>
267 | <span class="p">}</span>
268 | 
269 | <span class="n">scorelines</span> <span class="o">=</span> <span class="n">model</span><span class="o">.</span><span class="n">predict_one</span><span class="p">(</span><span class="n">match_to_predict</span><span class="p">)</span>
270 | 
271 | <span class="n">scorelines</span><span class="p">[</span><span class="mi">0</span><span class="p">:</span><span class="mi">5</span><span class="p">]</span>
272 | </pre></div>
273 | 
274 |     </div>
275 | </div>
276 | </div>
277 | 
278 | <div class="output_wrapper">
279 | <div class="output">
280 | 
281 | <div class="output_area">
282 | 
283 | 
284 | 
285 | <div class="output_text output_subarea output_execute_result">
286 | <pre>[ScorelinePrediction(home_goals=0, away_goals=0, probability=0.023625049697587167),
287 |  ScorelinePrediction(home_goals=0, away_goals=1, probability=0.012682094432376022),
288 |  ScorelinePrediction(home_goals=0, away_goals=2, probability=0.00623268833779594),
289 |  ScorelinePrediction(home_goals=0, away_goals=3, probability=0.0016251514235046444),
290 |  ScorelinePrediction(home_goals=0, away_goals=4, probability=0.00031781436109636405)]</pre>
291 | </div>
292 | 
293 | </div>
294 | 
295 | </div>
296 | </div>
297 | 
298 | </div>
299 |     {% endraw %}
300 | 
301 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
302 | <div class="text_cell_render border-box-sizing rendered_html">
303 | <p>Each of these methods return predictions in the form of <code>ScorelinePredictions</code>.</p>
304 | <ul>
305 | <li><code>predict_one</code> returns a list of <code>ScorelinePredictions</code></li>
306 | <li><code>predict</code> returns a list of <code>ScorelinePredictions</code> for each predicted match (i.e. a list of lists)</li>
307 | </ul>
308 | <p>However, it can sometimes be more useful to have predictions in the form of match <em>outcomes</em>. Mezzala exposes the <a href="/mezzala/models.html#scorelines_to_outcomes"><code>scorelines_to_outcomes</code></a> function for this purpose:</p>
309 | 
310 | </div>
311 | </div>
312 | </div>
313 |     {% raw %}
314 |     
315 | <div class="cell border-box-sizing code_cell rendered">
316 | <div class="input">
317 | 
318 | <div class="inner_cell">
319 |     <div class="input_area">
320 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">mezzala</span><span class="o">.</span><span class="n">scorelines_to_outcomes</span><span class="p">(</span><span class="n">scorelines</span><span class="p">)</span>
321 | </pre></div>
322 | 
323 |     </div>
324 | </div>
325 | </div>
326 | 
327 | <div class="output_wrapper">
328 | <div class="output">
329 | 
330 | <div class="output_area">
331 | 
332 | 
333 | 
334 | <div class="output_text output_subarea output_execute_result">
335 | <pre>{Outcomes(&#39;Home win&#39;): OutcomePrediction(outcome=Outcomes(&#39;Home win&#39;), probability=0.8255103334702835),
336 |  Outcomes(&#39;Draw&#39;): OutcomePrediction(outcome=Outcomes(&#39;Draw&#39;), probability=0.11615659853961693),
337 |  Outcomes(&#39;Away win&#39;): OutcomePrediction(outcome=Outcomes(&#39;Away win&#39;), probability=0.058333067990098304)}</pre>
338 | </div>
339 | 
340 | </div>
341 | 
342 | </div>
343 | </div>
344 | 
345 | </div>
346 |     {% endraw %}
347 | 
348 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
349 | <div class="text_cell_render border-box-sizing rendered_html">
350 | <h3 id="Extending-the-model">Extending the model<a class="anchor-link" href="#Extending-the-model"> </a></h3><p>It's possible to fit more sophisticated models with mezzala, using <strong>weights</strong> and <strong>model blocks</strong></p>
351 | <h4 id="Weights">Weights<a class="anchor-link" href="#Weights"> </a></h4><p>You can weight individual data points by supplying a function (or callable) to the <code>weight</code> argument to <a href="/mezzala/models.html#DixonColes"><code>DixonColes</code></a>:</p>
352 | 
353 | </div>
354 | </div>
355 | </div>
356 |     {% raw %}
357 |     
358 | <div class="cell border-box-sizing code_cell rendered">
359 | <div class="input">
360 | 
361 | <div class="inner_cell">
362 |     <div class="input_area">
363 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">mezzala</span><span class="o">.</span><span class="n">DixonColes</span><span class="p">(</span>
364 |     <span class="n">adapter</span><span class="o">=</span><span class="n">adapter</span><span class="p">,</span>
365 |     <span class="c1"># By default, all data points are weighted equally,</span>
366 |     <span class="c1"># which is equivalent to:</span>
367 |     <span class="n">weight</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="mi">1</span>
368 | <span class="p">)</span>
369 | </pre></div>
370 | 
371 |     </div>
372 | </div>
373 | </div>
374 | 
375 | <div class="output_wrapper">
376 | <div class="output">
377 | 
378 | <div class="output_area">
379 | 
380 | 
381 | 
382 | <div class="output_text output_subarea output_execute_result">
383 | <pre>DixonColes(adapter=KeyAdapter(home_goals=[&#39;score&#39;, &#39;ft&#39;, 0], away_goals=[&#39;score&#39;, &#39;ft&#39;, 1], home_team=&#39;team1&#39;, away_team=&#39;team2&#39;), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=&lt;function &lt;lambda&gt; at 0x123067488&gt;</pre>
384 | </div>
385 | 
386 | </div>
387 | 
388 | </div>
389 | </div>
390 | 
391 | </div>
392 |     {% endraw %}
393 | 
394 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
395 | <div class="text_cell_render border-box-sizing rendered_html">
396 | <p>Mezzala also provides an <a href="/mezzala/weights.html#ExponentialWeight"><code>ExponentialWeight</code></a> for the purpose of time-discounting:</p>
397 | 
398 | </div>
399 | </div>
400 | </div>
401 |     {% raw %}
402 |     
403 | <div class="cell border-box-sizing code_cell rendered">
404 | <div class="input">
405 | 
406 | <div class="inner_cell">
407 |     <div class="input_area">
408 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">mezzala</span><span class="o">.</span><span class="n">DixonColes</span><span class="p">(</span>
409 |     <span class="n">adapter</span><span class="o">=</span><span class="n">adapter</span><span class="p">,</span>
410 |     <span class="n">weight</span><span class="o">=</span><span class="n">mezzala</span><span class="o">.</span><span class="n">ExponentialWeight</span><span class="p">(</span>
411 |         <span class="n">epsilon</span><span class="o">=-</span><span class="mf">0.0065</span><span class="p">,</span>               <span class="c1"># Decay rate</span>
412 |         <span class="n">key</span><span class="o">=</span><span class="k">lambda</span> <span class="n">x</span><span class="p">:</span> <span class="n">x</span><span class="p">[</span><span class="s1">&#39;days_ago&#39;</span><span class="p">]</span>
413 |     <span class="p">)</span>
414 | <span class="p">)</span>
415 | </pre></div>
416 | 
417 |     </div>
418 | </div>
419 | </div>
420 | 
421 | <div class="output_wrapper">
422 | <div class="output">
423 | 
424 | <div class="output_area">
425 | 
426 | 
427 | 
428 | <div class="output_text output_subarea output_execute_result">
429 | <pre>DixonColes(adapter=KeyAdapter(home_goals=[&#39;score&#39;, &#39;ft&#39;, 0], away_goals=[&#39;score&#39;, &#39;ft&#39;, 1], home_team=&#39;team1&#39;, away_team=&#39;team2&#39;), blocks=[TeamStrength(), BaseRate(), HomeAdvantage()]), weight=ExponentialWeight(epsilon=-0.0065, key=&lt;function &lt;lambda&gt; at 0x122f938c8&gt;)</pre>
430 | </div>
431 | 
432 | </div>
433 | 
434 | </div>
435 | </div>
436 | 
437 | </div>
438 |     {% endraw %}
439 | 
440 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
441 | <div class="text_cell_render border-box-sizing rendered_html">
442 | <h4 id="Model-blocks">Model blocks<a class="anchor-link" href="#Model-blocks"> </a></h4><p>Model "blocks" define the calculation and estimation of home and away goalscoring rates.</p>
443 | 
444 | </div>
445 | </div>
446 | </div>
447 |     {% raw %}
448 |     
449 | <div class="cell border-box-sizing code_cell rendered">
450 | <div class="input">
451 | 
452 | <div class="inner_cell">
453 |     <div class="input_area">
454 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">mezzala</span><span class="o">.</span><span class="n">DixonColes</span><span class="p">(</span>
455 |     <span class="n">adapter</span><span class="o">=</span><span class="n">adapter</span><span class="p">,</span>
456 |     <span class="c1"># By default, only team strength and home advantage,</span>
457 |     <span class="c1"># is estimated:</span>
458 |     <span class="n">blocks</span><span class="o">=</span><span class="p">[</span>
459 |         <span class="n">mezzala</span><span class="o">.</span><span class="n">blocks</span><span class="o">.</span><span class="n">HomeAdvantage</span><span class="p">(),</span>
460 |         <span class="n">mezzala</span><span class="o">.</span><span class="n">blocks</span><span class="o">.</span><span class="n">TeamStrength</span><span class="p">(),</span>
461 |         <span class="n">mezzala</span><span class="o">.</span><span class="n">blocks</span><span class="o">.</span><span class="n">BaseRate</span><span class="p">(),</span>      <span class="c1"># Adds &quot;average goalscoring rate&quot; as a distinct parameter</span>
462 |     <span class="p">]</span>
463 | <span class="p">)</span>
464 | </pre></div>
465 | 
466 |     </div>
467 | </div>
468 | </div>
469 | 
470 | <div class="output_wrapper">
471 | <div class="output">
472 | 
473 | <div class="output_area">
474 | 
475 | 
476 | 
477 | <div class="output_text output_subarea output_execute_result">
478 | <pre>DixonColes(adapter=KeyAdapter(home_goals=[&#39;score&#39;, &#39;ft&#39;, 0], away_goals=[&#39;score&#39;, &#39;ft&#39;, 1], home_team=&#39;team1&#39;, away_team=&#39;team2&#39;), blocks=[TeamStrength(), HomeAdvantage(), BaseRate()]), weight=UniformWeight()</pre>
479 | </div>
480 | 
481 | </div>
482 | 
483 | </div>
484 | </div>
485 | 
486 | </div>
487 |     {% endraw %}
488 | 
489 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
490 | <div class="text_cell_render border-box-sizing rendered_html">
491 | <p>To add custom parameters (e.g. per-league home advantage), you need to add additional model blocks.</p>
492 | 
493 | </div>
494 | </div>
495 | </div>
496 | </div>
497 |  
498 | 
499 | 


--------------------------------------------------------------------------------
/docs/adapters.html:
--------------------------------------------------------------------------------
  1 | ---
  2 | 
  3 | title: Data Adapters
  4 | 
  5 | 
  6 | keywords: fastai
  7 | sidebar: home_sidebar
  8 | 
  9 | 
 10 | 
 11 | nb_path: "nbs/adapters.ipynb"
 12 | ---
 13 | <!--
 14 | 
 15 | #################################################
 16 | ### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
 17 | #################################################
 18 | # file to edit: nbs/adapters.ipynb
 19 | # command to build the docs after a change: nbdev_build_docs
 20 | 
 21 | -->
 22 | 
 23 | <div class="container" id="notebook-container">
 24 |         
 25 |     {% raw %}
 26 |     
 27 | <div class="cell border-box-sizing code_cell rendered">
 28 | 
 29 | </div>
 30 |     {% endraw %}
 31 | 
 32 |     {% raw %}
 33 |     
 34 | <div class="cell border-box-sizing code_cell rendered">
 35 | <div class="input">
 36 | 
 37 | <div class="inner_cell">
 38 |     <div class="input_area">
 39 | <div class=" highlight hl-ipython3"><pre><span></span><span class="kn">import</span> <span class="nn">dataclasses</span>
 40 | <span class="kn">import</span> <span class="nn">typing</span>
 41 | </pre></div>
 42 | 
 43 |     </div>
 44 | </div>
 45 | </div>
 46 | 
 47 | </div>
 48 |     {% endraw %}
 49 | 
 50 |     {% raw %}
 51 |     
 52 | <div class="cell border-box-sizing code_cell rendered">
 53 | 
 54 | </div>
 55 |     {% endraw %}
 56 | 
 57 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 58 | <div class="text_cell_render border-box-sizing rendered_html">
 59 | <h2 id="Basic-adapters">Basic adapters<a class="anchor-link" href="#Basic-adapters"> </a></h2>
 60 | </div>
 61 | </div>
 62 | </div>
 63 |     {% raw %}
 64 |     
 65 | <div class="cell border-box-sizing code_cell rendered">
 66 | 
 67 | <div class="output_wrapper">
 68 | <div class="output">
 69 | 
 70 | <div class="output_area">
 71 | 
 72 | 
 73 | <div class="output_markdown rendered_html output_subarea ">
 74 | <h2 id="KeyAdapter" class="doc_header"><code>class</code> <code>KeyAdapter</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/adapters.py#L14" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>KeyAdapter</code>(<strong><code>home_goals</code></strong>, <strong><code>away_goals</code></strong>, <strong>**<code>kwargs</code></strong>)</p>
 75 | </blockquote>
 76 | <p>Get data from subscriptable objects.</p>
 77 | 
 78 | </div>
 79 | 
 80 | </div>
 81 | 
 82 | </div>
 83 | </div>
 84 | 
 85 | </div>
 86 |     {% endraw %}
 87 | 
 88 |     {% raw %}
 89 |     
 90 | <div class="cell border-box-sizing code_cell rendered">
 91 | 
 92 | </div>
 93 |     {% endraw %}
 94 | 
 95 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
 96 | <div class="text_cell_render border-box-sizing rendered_html">
 97 | <p>Anything subscriptable can be with this type of adapter. For example,
 98 | you might have input data as a list of tuples (e.g. using Python's
 99 | in-built <code>csv</code> library)</p>
100 | 
101 | </div>
102 | </div>
103 | </div>
104 |     {% raw %}
105 |     
106 | <div class="cell border-box-sizing code_cell rendered">
107 | <div class="input">
108 | 
109 | <div class="inner_cell">
110 |     <div class="input_area">
111 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">index_adapter</span> <span class="o">=</span> <span class="n">KeyAdapter</span><span class="p">(</span><span class="mi">0</span><span class="p">,</span> <span class="mi">1</span><span class="p">)</span>
112 | 
113 | <span class="k">assert</span> <span class="n">index_adapter</span><span class="o">.</span><span class="n">home_goals</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span> <span class="o">==</span> <span class="mi">1</span>
114 | <span class="k">assert</span> <span class="n">index_adapter</span><span class="o">.</span><span class="n">away_goals</span><span class="p">([</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">])</span> <span class="o">==</span> <span class="mi">2</span>
115 | </pre></div>
116 | 
117 |     </div>
118 | </div>
119 | </div>
120 | 
121 | </div>
122 |     {% endraw %}
123 | 
124 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
125 | <div class="text_cell_render border-box-sizing rendered_html">
126 | <p>Or, you might be using a list of dicts.</p>
127 | 
128 | </div>
129 | </div>
130 | </div>
131 |     {% raw %}
132 |     
133 | <div class="cell border-box-sizing code_cell rendered">
134 | <div class="input">
135 | 
136 | <div class="inner_cell">
137 |     <div class="input_area">
138 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">dict_adapter</span> <span class="o">=</span> <span class="n">KeyAdapter</span><span class="p">(</span><span class="s1">&#39;hg&#39;</span><span class="p">,</span> <span class="s1">&#39;ag&#39;</span><span class="p">,</span> <span class="n">home_team</span><span class="o">=</span><span class="s1">&#39;home&#39;</span><span class="p">,</span> <span class="n">away_team</span><span class="o">=</span><span class="s1">&#39;away&#39;</span><span class="p">)</span>
139 | 
140 | <span class="n">example_dict</span> <span class="o">=</span> <span class="p">{</span>
141 |     <span class="s1">&#39;home&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 1&#39;</span><span class="p">,</span>
142 |     <span class="s1">&#39;away&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 2&#39;</span><span class="p">,</span>
143 |     <span class="s1">&#39;hg&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span>
144 |     <span class="s1">&#39;ag&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span>
145 | <span class="p">}</span>
146 | 
147 | <span class="k">assert</span> <span class="n">dict_adapter</span><span class="o">.</span><span class="n">home_goals</span><span class="p">(</span><span class="n">example_dict</span><span class="p">)</span> <span class="o">==</span> <span class="mi">4</span>
148 | <span class="k">assert</span> <span class="n">dict_adapter</span><span class="o">.</span><span class="n">away_goals</span><span class="p">(</span><span class="n">example_dict</span><span class="p">)</span> <span class="o">==</span> <span class="mi">3</span>
149 | <span class="k">assert</span> <span class="n">dict_adapter</span><span class="o">.</span><span class="n">home_team</span><span class="p">(</span><span class="n">example_dict</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Team 1&#39;</span>
150 | <span class="k">assert</span> <span class="n">dict_adapter</span><span class="o">.</span><span class="n">away_team</span><span class="p">(</span><span class="n">example_dict</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Team 2&#39;</span>
151 | </pre></div>
152 | 
153 |     </div>
154 | </div>
155 | </div>
156 | 
157 | </div>
158 |     {% endraw %}
159 | 
160 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
161 | <div class="text_cell_render border-box-sizing rendered_html">
162 | <p>Nested data can be supplied using a list</p>
163 | 
164 | </div>
165 | </div>
166 | </div>
167 |     {% raw %}
168 |     
169 | <div class="cell border-box-sizing code_cell rendered">
170 | <div class="input">
171 | 
172 | <div class="inner_cell">
173 |     <div class="input_area">
174 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">nested_dict_adapter</span> <span class="o">=</span> <span class="n">KeyAdapter</span><span class="p">(</span>
175 |     <span class="n">home_goals</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;scoreline&#39;</span><span class="p">,</span> <span class="mi">0</span><span class="p">],</span> 
176 |     <span class="n">away_goals</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;scoreline&#39;</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
177 | <span class="p">)</span>
178 | 
179 | <span class="n">example_nested_dict</span> <span class="o">=</span> <span class="p">{</span>
180 |     <span class="s1">&#39;scoreline&#39;</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span>
181 | <span class="p">}</span>
182 | 
183 | <span class="k">assert</span> <span class="n">nested_dict_adapter</span><span class="o">.</span><span class="n">home_goals</span><span class="p">(</span><span class="n">example_nested_dict</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span>
184 | <span class="k">assert</span> <span class="n">nested_dict_adapter</span><span class="o">.</span><span class="n">away_goals</span><span class="p">(</span><span class="n">example_nested_dict</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span>
185 | </pre></div>
186 | 
187 |     </div>
188 | </div>
189 | </div>
190 | 
191 | </div>
192 |     {% endraw %}
193 | 
194 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
195 | <div class="text_cell_render border-box-sizing rendered_html">
196 | <p><a href="/mezzala/adapters.html#KeyAdapter"><code>KeyAdapter</code></a> could be used alongside <code>pd.DataFrame.iterrows</code> as well; however, it is much faster when using <code>pd.DataFrame.itertuples</code>.</p>
197 | <p>Likewise, you can't use a <a href="/mezzala/adapters.html#KeyAdapter"><code>KeyAdapter</code></a> with custom objects (e.g. dataclasses).</p>
198 | <p>In this case, you need an <a href="/mezzala/adapters.html#AttributeAdapter"><code>AttributeAdapter</code></a>.</p>
199 | 
200 | </div>
201 | </div>
202 | </div>
203 |     {% raw %}
204 |     
205 | <div class="cell border-box-sizing code_cell rendered">
206 | 
207 | <div class="output_wrapper">
208 | <div class="output">
209 | 
210 | <div class="output_area">
211 | 
212 | 
213 | <div class="output_markdown rendered_html output_subarea ">
214 | <h2 id="AttributeAdapter" class="doc_header"><code>class</code> <code>AttributeAdapter</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/adapters.py#L43" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>AttributeAdapter</code>(<strong><code>home_goals</code></strong>, <strong><code>away_goals</code></strong>, <strong>**<code>kwargs</code></strong>)</p>
215 | </blockquote>
216 | <p>Get data from object attributes.</p>
217 | 
218 | </div>
219 | 
220 | </div>
221 | 
222 | </div>
223 | </div>
224 | 
225 | </div>
226 |     {% endraw %}
227 | 
228 |     {% raw %}
229 |     
230 | <div class="cell border-box-sizing code_cell rendered">
231 | 
232 | </div>
233 |     {% endraw %}
234 | 
235 |     {% raw %}
236 |     
237 | <div class="cell border-box-sizing code_cell rendered">
238 | <div class="input">
239 | 
240 | <div class="inner_cell">
241 |     <div class="input_area">
242 | <div class=" highlight hl-ipython3"><pre><span></span><span class="nd">@dataclasses</span><span class="o">.</span><span class="n">dataclass</span><span class="p">()</span>
243 | <span class="k">class</span> <span class="nc">ExampleData</span><span class="p">:</span>
244 |     <span class="n">hg</span><span class="p">:</span> <span class="nb">int</span>
245 |     <span class="n">ag</span><span class="p">:</span> <span class="nb">int</span>
246 |     <span class="n">home</span><span class="p">:</span> <span class="nb">str</span>
247 |     <span class="n">away</span><span class="p">:</span> <span class="nb">str</span>
248 | 
249 | 
250 | <span class="n">attr_adapter</span> <span class="o">=</span> <span class="n">AttributeAdapter</span><span class="p">(</span><span class="s1">&#39;hg&#39;</span><span class="p">,</span> <span class="s1">&#39;ag&#39;</span><span class="p">,</span> <span class="n">home_team</span><span class="o">=</span><span class="s1">&#39;home&#39;</span><span class="p">,</span> <span class="n">away_team</span><span class="o">=</span><span class="s1">&#39;away&#39;</span><span class="p">)</span>
251 | 
252 | 
253 | <span class="n">example_attr</span> <span class="o">=</span> <span class="n">ExampleData</span><span class="p">(</span>
254 |     <span class="n">home</span><span class="o">=</span><span class="s1">&#39;Another home team&#39;</span><span class="p">,</span>
255 |     <span class="n">away</span><span class="o">=</span><span class="s1">&#39;Another away team&#39;</span><span class="p">,</span>
256 |     <span class="n">hg</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span>
257 |     <span class="n">ag</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span>
258 | <span class="p">)</span>
259 | 
260 | <span class="k">assert</span> <span class="n">attr_adapter</span><span class="o">.</span><span class="n">home_goals</span><span class="p">(</span><span class="n">example_attr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">5</span>
261 | <span class="k">assert</span> <span class="n">attr_adapter</span><span class="o">.</span><span class="n">away_goals</span><span class="p">(</span><span class="n">example_attr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span>
262 | <span class="k">assert</span> <span class="n">attr_adapter</span><span class="o">.</span><span class="n">home_team</span><span class="p">(</span><span class="n">example_attr</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Another home team&#39;</span>
263 | <span class="k">assert</span> <span class="n">attr_adapter</span><span class="o">.</span><span class="n">away_team</span><span class="p">(</span><span class="n">example_attr</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Another away team&#39;</span>
264 | </pre></div>
265 | 
266 |     </div>
267 | </div>
268 | </div>
269 | 
270 | </div>
271 |     {% endraw %}
272 | 
273 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
274 | <div class="text_cell_render border-box-sizing rendered_html">
275 | <p>As with <a href="/mezzala/adapters.html#KeyAdapter"><code>KeyAdapter</code></a>, nested attributes can also be fetched using lists</p>
276 | 
277 | </div>
278 | </div>
279 | </div>
280 |     {% raw %}
281 |     
282 | <div class="cell border-box-sizing code_cell rendered">
283 | <div class="input">
284 | 
285 | <div class="inner_cell">
286 |     <div class="input_area">
287 | <div class=" highlight hl-ipython3"><pre><span></span><span class="nd">@dataclasses</span><span class="o">.</span><span class="n">dataclass</span><span class="p">()</span>
288 | <span class="k">class</span> <span class="nc">Scoreline</span><span class="p">:</span>
289 |     <span class="n">home</span><span class="p">:</span> <span class="nb">int</span>
290 |     <span class="n">away</span><span class="p">:</span> <span class="nb">int</span>
291 | 
292 | 
293 | <span class="nd">@dataclasses</span><span class="o">.</span><span class="n">dataclass</span><span class="p">()</span>
294 | <span class="k">class</span> <span class="nc">ExampleNestedData</span><span class="p">:</span>
295 |     <span class="n">scoreline</span><span class="p">:</span> <span class="n">Scoreline</span>
296 |     <span class="n">home</span><span class="p">:</span> <span class="nb">str</span>
297 |     <span class="n">away</span><span class="p">:</span> <span class="nb">str</span>
298 | 
299 | 
300 | <span class="n">nested_attr_adapter</span> <span class="o">=</span> <span class="n">AttributeAdapter</span><span class="p">(</span>
301 |     <span class="n">home_team</span><span class="o">=</span><span class="s1">&#39;home&#39;</span><span class="p">,</span>
302 |     <span class="n">home_goals</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;scoreline&#39;</span><span class="p">,</span> <span class="s1">&#39;home&#39;</span><span class="p">],</span> 
303 |     <span class="n">away_team</span><span class="o">=</span><span class="s1">&#39;away&#39;</span><span class="p">,</span>
304 |     <span class="n">away_goals</span><span class="o">=</span><span class="p">[</span><span class="s1">&#39;scoreline&#39;</span><span class="p">,</span> <span class="s1">&#39;away&#39;</span><span class="p">],</span>
305 | <span class="p">)</span>
306 | 
307 | <span class="n">example_nested_attr</span> <span class="o">=</span> <span class="n">ExampleNestedData</span><span class="p">(</span>
308 |     <span class="n">home</span><span class="o">=</span><span class="s1">&#39;Another home team&#39;</span><span class="p">,</span>
309 |     <span class="n">away</span><span class="o">=</span><span class="s1">&#39;Another away team&#39;</span><span class="p">,</span>
310 |     <span class="n">scoreline</span><span class="o">=</span><span class="n">Scoreline</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span>
311 | <span class="p">)</span>
312 | 
313 | <span class="k">assert</span> <span class="n">nested_attr_adapter</span><span class="o">.</span><span class="n">home_goals</span><span class="p">(</span><span class="n">example_nested_attr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span>
314 | <span class="k">assert</span> <span class="n">nested_attr_adapter</span><span class="o">.</span><span class="n">away_goals</span><span class="p">(</span><span class="n">example_nested_attr</span><span class="p">)</span> <span class="o">==</span> <span class="mi">5</span>
315 | </pre></div>
316 | 
317 |     </div>
318 | </div>
319 | </div>
320 | 
321 | </div>
322 |     {% endraw %}
323 | 
324 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
325 | <div class="text_cell_render border-box-sizing rendered_html">
326 | <h2 id="Composite-adapters">Composite adapters<a class="anchor-link" href="#Composite-adapters"> </a></h2>
327 | </div>
328 | </div>
329 | </div>
330 |     {% raw %}
331 |     
332 | <div class="cell border-box-sizing code_cell rendered">
333 | 
334 | <div class="output_wrapper">
335 | <div class="output">
336 | 
337 | <div class="output_area">
338 | 
339 | 
340 | <div class="output_markdown rendered_html output_subarea ">
341 | <h2 id="LumpedAdapter" class="doc_header"><code>class</code> <code>LumpedAdapter</code><a href="https://github.com/Torvaney/mezzala/tree/master/mezzala/adapters.py#L71" class="source_link" style="float:right">[source]</a></h2><blockquote><p><code>LumpedAdapter</code>(<strong><code>base_adapter</code></strong>, <strong>**<code>kwargs</code></strong>)</p>
342 | </blockquote>
343 | <p>Lump terms which have appeared below a minimum number of times in
344 | the training data into a placeholder term</p>
345 | 
346 | </div>
347 | 
348 | </div>
349 | 
350 | </div>
351 | </div>
352 | 
353 | </div>
354 |     {% endraw %}
355 | 
356 |     {% raw %}
357 |     
358 | <div class="cell border-box-sizing code_cell rendered">
359 | 
360 | </div>
361 |     {% endraw %}
362 | 
363 |     {% raw %}
364 |     
365 | <div class="cell border-box-sizing code_cell rendered">
366 | <div class="input">
367 | 
368 | <div class="inner_cell">
369 |     <div class="input_area">
370 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">example_lumped_data</span> <span class="o">=</span> <span class="p">[</span>
371 |     <span class="o">*</span><span class="p">([</span><span class="n">example_dict</span><span class="p">]</span><span class="o">*</span><span class="mi">4</span><span class="p">),</span>  <span class="c1"># i.e., &#39;Team 1&#39; and &#39;Team 2&#39; appear in the data 4 times</span>
372 |     <span class="p">{</span><span class="s1">&#39;away&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 1&#39;</span><span class="p">,</span>    <span class="c1"># &#39;Team 1&#39; now appears an additional time, (5 total)</span>
373 |                           <span class="c1"># Although this time appears as an *away* team</span>
374 |      <span class="s1">&#39;home&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 3&#39;</span><span class="p">,</span>    <span class="c1"># While &#39;Team 3&#39; appears once</span>
375 |      <span class="s1">&#39;hg&#39;</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> 
376 |      <span class="s1">&#39;ag&#39;</span><span class="p">:</span> <span class="mi">3</span><span class="p">},</span>
377 | <span class="p">]</span>
378 | 
379 | 
380 | <span class="n">lumped_dict_adapter</span> <span class="o">=</span> <span class="n">LumpedAdapter</span><span class="p">(</span>
381 |     <span class="n">base_adapter</span><span class="o">=</span><span class="n">dict_adapter</span><span class="p">,</span>
382 |     <span class="n">home_team</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;Other team&#39;</span><span class="p">,</span> <span class="mi">5</span><span class="p">),</span>      <span class="c1"># Because `home_team` and `away_team` share the same</span>
383 |                                       <span class="c1"># placeholder value (&#39;Other team&#39;), they are counted</span>
384 |                                       <span class="c1"># together. I.e. a team has to appear at least 5 times</span>
385 |                                       <span class="c1"># as _either_ the home team, or the away team</span>
386 |     <span class="n">away_team</span><span class="o">=</span><span class="p">(</span><span class="s1">&#39;Other team&#39;</span><span class="p">,</span> <span class="mi">5</span><span class="p">)</span>
387 | <span class="p">)</span>
388 | <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">fit</span><span class="p">(</span><span class="n">example_lumped_data</span><span class="p">)</span>
389 | 
390 | <span class="n">lumped_dict_adapter</span>
391 | </pre></div>
392 | 
393 |     </div>
394 | </div>
395 | </div>
396 | 
397 | <div class="output_wrapper">
398 | <div class="output">
399 | 
400 | <div class="output_area">
401 | 
402 | 
403 | 
404 | <div class="output_text output_subarea output_execute_result">
405 | <pre>LumpedAdapter(base_adapter=KeyAdapter(home_goals=&#39;hg&#39;, away_goals=&#39;ag&#39;, home_team=&#39;home&#39;, away_team=&#39;away&#39;), home_team=(&#39;Other team&#39;, 5), away_team=(&#39;Other team&#39;, 5))</pre>
406 | </div>
407 | 
408 | </div>
409 | 
410 | </div>
411 | </div>
412 | 
413 | </div>
414 |     {% endraw %}
415 | 
416 |     {% raw %}
417 |     
418 | <div class="cell border-box-sizing code_cell rendered">
419 | <div class="input">
420 | 
421 | <div class="inner_cell">
422 |     <div class="input_area">
423 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">example_lumped_1</span> <span class="o">=</span> <span class="p">{</span>
424 |     <span class="s1">&#39;home&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 1&#39;</span><span class="p">,</span>
425 |     <span class="s1">&#39;away&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 3&#39;</span><span class="p">,</span>
426 |     <span class="s1">&#39;hg&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> 
427 |     <span class="s1">&#39;ag&#39;</span><span class="p">:</span> <span class="mi">2</span>
428 | <span class="p">}</span>
429 | 
430 | <span class="c1"># A team with more than the minimum number of observations appears as before</span>
431 | <span class="k">assert</span> <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">home_team</span><span class="p">(</span><span class="n">example_lumped_1</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Team 1&#39;</span>
432 | 
433 | <span class="c1"># But a team with fewer observations appears as the placeholder</span>
434 | <span class="k">assert</span> <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">away_team</span><span class="p">(</span><span class="n">example_lumped_1</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Other team&#39;</span>
435 | 
436 | <span class="c1"># Meanwhile, values without a placeholder in the LumpedAdapter</span>
437 | <span class="c1"># also appear as before</span>
438 | <span class="k">assert</span> <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">home_goals</span><span class="p">(</span><span class="n">example_lumped_1</span><span class="p">)</span> <span class="o">==</span> <span class="mi">1</span>
439 | <span class="k">assert</span> <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">away_goals</span><span class="p">(</span><span class="n">example_lumped_1</span><span class="p">)</span> <span class="o">==</span> <span class="mi">2</span>
440 | </pre></div>
441 | 
442 |     </div>
443 | </div>
444 | </div>
445 | 
446 | </div>
447 |     {% endraw %}
448 | 
449 | <div class="cell border-box-sizing text_cell rendered"><div class="inner_cell">
450 | <div class="text_cell_render border-box-sizing rendered_html">
451 | <p>Using a lumped adapter can also allow you to handle items which didn't appear in the training set at all:</p>
452 | 
453 | </div>
454 | </div>
455 | </div>
456 |     {% raw %}
457 |     
458 | <div class="cell border-box-sizing code_cell rendered">
459 | <div class="input">
460 | 
461 | <div class="inner_cell">
462 |     <div class="input_area">
463 | <div class=" highlight hl-ipython3"><pre><span></span><span class="n">example_lumped_2</span> <span class="o">=</span> <span class="p">{</span>
464 |     <span class="s1">&#39;home&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 2&#39;</span><span class="p">,</span>  <span class="c1"># Only appeared 4 times, below threshold of 5</span>
465 |     <span class="s1">&#39;away&#39;</span><span class="p">:</span> <span class="s1">&#39;Team 4&#39;</span><span class="p">,</span>  <span class="c1"># Appeared 0 times in the data</span>
466 |     <span class="s1">&#39;hg&#39;</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> 
467 |     <span class="s1">&#39;ag&#39;</span><span class="p">:</span> <span class="mi">2</span>
468 | <span class="p">}</span>
469 | 
470 | <span class="k">assert</span> <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">home_team</span><span class="p">(</span><span class="n">example_lumped_2</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Other team&#39;</span>
471 | <span class="k">assert</span> <span class="n">lumped_dict_adapter</span><span class="o">.</span><span class="n">away_team</span><span class="p">(</span><span class="n">example_lumped_2</span><span class="p">)</span> <span class="o">==</span> <span class="s1">&#39;Other team&#39;</span>
472 | </pre></div>
473 | 
474 |     </div>
475 | </div>
476 | </div>
477 | 
478 | </div>
479 |     {% endraw %}
480 | 
481 | </div>
482 |  
483 | 
484 | 


--------------------------------------------------------------------------------