├── .github
    └── workflows
    │   ├── ci.yml
    │   └── documentation.yml
├── .gitignore
├── CITATION.cff
├── LICENSE.txt
├── Makefile
├── README.md
├── docs
    ├── Makefile
    ├── make.bat
    └── source
    │   ├── .nojekyll
    │   ├── api
    │       ├── metrics.rst
    │       ├── models.rst
    │       ├── stats.rst
    │       └── visualisations.rst
    │   ├── conf.py
    │   ├── index.rst
    │   └── usage
    │       ├── installation.rst
    │       └── quickstart.rst
├── examples
    ├── README.txt
    ├── xmpl_quickstart.py
    ├── xmpl_reliability_diagram.py
    ├── xmpl_ternary_contours.py
    └── xmpl_ternary_samples.py
├── pycalib
    ├── __init__.py
    ├── metrics.py
    ├── models
    │   ├── __init__.py
    │   ├── calibrators.py
    │   └── multiclass.py
    ├── stats.py
    ├── tests
    │   ├── __init__.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   └── test_init.py
    │   └── test_metrics.py
    ├── utils.py
    └── visualisations
    │   ├── __init__.py
    │   ├── barycentric.py
    │   ├── plot.py
    │   ├── ternary.py
    │   └── tests
    │       ├── __init__.py
    │       └── test_init.py
├── requirements-dev.txt
├── requirements.txt
├── setup.cfg
└── setup.py


/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |   pull_request:
 7 |     branches:
 8 |       - master
 9 | jobs:
10 |   build:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/checkout@v2
14 |       - name: Install Python 3
15 |         uses: actions/setup-python@v1
16 |         with:
17 |           python-version: 3.8
18 |       - name: Install dependencies
19 |         run: |
20 |           python -m pip install --upgrade pip
21 |           pip install pytest
22 |           pip install pytest-cov
23 |           pip install -r requirements.txt
24 |       - name: Run tests with pytest
25 |         run: pytest --cov-report=xml --cov=pycalib pycalib
26 |       - name: Upload coverage to Codecov
27 |         uses: codecov/codecov-action@v1
28 |         with:
29 |           token: ${{ secrets.CODECOV_TOKEN }}
30 |           files: ./coverage.xml
31 |           directory: ./coverage/reports/
32 |           flags: unittests
33 |           env_vars: OS,PYTHON
34 |           name: codecov-umbrella
35 |           fail_ci_if_error: true
36 |           path_to_write_report: ./coverage/codecov_report.txt
37 |           verbose: true
38 | 


--------------------------------------------------------------------------------
/.github/workflows/documentation.yml:
--------------------------------------------------------------------------------
 1 | name: Documentation
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 | jobs:
 7 |   build-n-deploy:
 8 |     name: Build and deploy
 9 |     runs-on: ubuntu-22.04
10 |     steps:
11 |     - name: Checkout code 🛎️
12 |       uses: actions/checkout@v2.3.1
13 |       with:
14 |         persist-credentials: false
15 |     - name: Set up Python 3.8 🐍
16 |       uses: actions/setup-python@v1
17 |       with:
18 |         python-version: 3.8
19 |     - name: Install dependencies 💾
20 |       run: |
21 |         pip install -r requirements.txt
22 |         pip install -r requirements-dev.txt
23 |     - name: Build the documentation 🔧📖
24 |       run: |
25 |         cd docs
26 |         make html
27 |     - name: Deploy 🚀
28 |       if: github.ref == 'refs/heads/master'
29 |       uses: JamesIves/github-pages-deploy-action@3.7.1
30 |       with:
31 |         GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
32 |         BRANCH: gh-pages
33 |         FOLDER: docs/build/html
34 |         CLEAN: true
35 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.pyc
 2 | MANIFEST
 3 | dist/
 4 | pycalib.egg-info
 5 | pycalib/__pycache__/
 6 | pycalib/tests/__pycache__/
 7 | venv
 8 | .coverage
 9 | coverage.xml
10 | *.swp
11 | *.pdf
12 | *.svg
13 | *.png
14 | .ipynb_checkpoints/
15 | /docs/
16 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: 1.2.0
 2 | message: "If you use this software, please cite it as below."
 3 | authors:
 4 | - family-names: "Perello-Nieto"
 5 |   given-names: "Miquel"
 6 |   orcid: "https://orcid.org/0000-0001-8925-424X"
 7 | - family-names: "Song"
 8 |   given-names: "Hao"
 9 | - family-names: "Silva-Filho"
10 |   given-names: "Telmo"
11 | - family-names: "Kängsepp"
12 |   given-names: "Markus"
13 | title: "PyCalib a library for classifier calibration"
14 | version: 0.1.0.dev0
15 | doi: 10.5281/zenodo.5518877
16 | date-released: 2021-08-20
17 | url: "https://github.com/perellonieto/PyCalib"
18 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2018-2021 Miquel Perello-Nieto
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: venv
 2 | 
 3 | pip:
 4 | 	pip install --upgrade pip
 5 | 
 6 | venv:
 7 | 	python3.8 -m venv venv
 8 | 
 9 | requirements: pip
10 | 	pip install -r requirements.txt
11 | 
12 | requirements-dev: requirements pip
13 | 	pip install -r requirements-dev.txt
14 | 
15 | build: requirements-dev
16 | 	python3.8 setup.py sdist
17 | 
18 | pypi: build check-readme
19 | 	twine upload dist/*
20 | 
21 | doc: requirements-dev
22 | 	cd docs; make clean; make html
23 | 
24 | # From Scikit-learn
25 | code-analysis:
26 | 	flake8 pycalib | grep -v external
27 | 	pylint -E pycalib/ -d E1103,E0611,E1101 --generated-members=Blues --ignored-modules=scipy.special
28 | 
29 | clean:
30 | 	rm -rf ./dist
31 | 
32 | # All the following assume the requirmenets-dev are installed, but to make the
33 | # output clean the dependency has been removed
34 | test:
35 | 	pytest --doctest-modules --cov-report=term-missing --cov=pycalib pycalib
36 | 
37 | check-readme:
38 | 	twine check dist/*
39 | 
40 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![CI][ci:b]][ci]
  2 | [![Documentation][documentation:b]][documentation]
  3 | [![License BSD3][license:b]][license]
  4 | ![Python3.8][python:b]
  5 | [![pypi][pypi:b]][pypi]
  6 | [![codecov][codecov:b]][codecov]
  7 | [![DOI](https://zenodo.org/badge/280465805.svg)](https://zenodo.org/badge/latestdoi/280465805)
  8 | 
  9 | 
 10 | [ci]: https://github.com/classifier-calibration/PyCalib/actions/workflows/ci.yml
 11 | [ci:b]: https://github.com/classifier-calibration/pycalib/workflows/CI/badge.svg
 12 | [documentation]: https://github.com/classifier-calibration/PyCalib/actions/workflows/documentation.yml
 13 | [documentation:b]: https://github.com/classifier-calibration/pycalib/workflows/Documentation/badge.svg
 14 | [license]: https://github.com/classifier-calibration/PyCalib/blob/master/LICENSE.txt
 15 | [license:b]: https://img.shields.io/github/license/classifier-calibration/pycalib.svg
 16 | [python:b]: https://img.shields.io/badge/python-3.8-blue
 17 | [pypi]: https://badge.fury.io/py/pycalib
 18 | [pypi:b]: https://badge.fury.io/py/pycalib.svg
 19 | [codecov]: https://codecov.io/gh/classifier-calibration/PyCalib
 20 | [codecov:b]: https://codecov.io/gh/classifier-calibration/PyCalib/branch/master/graph/badge.svg?token=AYMZPLELT3
 21 | 
 22 | 
 23 | 
 24 | PyCalib
 25 | =======
 26 | Python library for classifier calibration
 27 | 
 28 | User installation
 29 | -----------------
 30 | 
 31 | The PyCalib package can be installed from Pypi with the command
 32 | 
 33 | ```
 34 | pip install pycalib
 35 | ```
 36 | 
 37 | Documentation
 38 | -------------
 39 | 
 40 | The documentation can be found at https://classifier-calibration.github.io/PyCalib/
 41 | 
 42 | Development
 43 | ===========
 44 | 
 45 | There is a make file to automate some of the common tasks during development.
 46 | After downloading the repository create the virtual environment with the
 47 | command
 48 | 
 49 | ```
 50 | make venv
 51 | ```
 52 | 
 53 | This will create a `venv` folder in your current folder. The environment needs
 54 | to be loaded out of the makefile with
 55 | 
 56 | ```
 57 | source venv/bin/activate
 58 | ```
 59 | 
 60 | After the environment is loaded, all dependencies can be installed with
 61 | 
 62 | ```
 63 | make requirements-dev
 64 | ```
 65 | 
 66 | Unittest
 67 | --------
 68 | 
 69 | Unittests are specified as doctest examples in simple functions (see example ),
 70 | and more complex tests in their own python files starting with `test_` (see
 71 | example ).
 72 | 
 73 | Run the unittest with the command
 74 | 
 75 | ```
 76 | make test
 77 | ```
 78 | 
 79 | The test will show a unittest result including the coverage of the code.
 80 | Ideally we want to increase the coverage to cover most of the library.
 81 | 
 82 | Contiunous Integration
 83 | ----------------------
 84 | 
 85 | Every time a commit is pushed to the master branch a unittest is run following
 86 | the workflow [.github/workflows/ci.yml](.github/workflows/ci.yml). The CI badge
 87 | in the README file will show if the test has passed or not.
 88 | 
 89 | Analyse code
 90 | ------------
 91 | 
 92 | We are trying to follow the same code standards as in [Numpy][numpy:c] and 
 93 | [Scikit-learn][sklearn:c], it is possible to check for pep8 and other code
 94 | conventions with
 95 | 
 96 | [numpy:c]: https://numpy.org/devdocs/dev/index.html
 97 | [sklearn:c]: https://scikit-learn.org/stable/developers/index.html
 98 | 
 99 | ```
100 | make code-analysis
101 | ```
102 | 
103 | Documentation
104 | -------------
105 | 
106 | The documentation can be found at
107 | [https://www.classifier-calibration.com/PyCalib/](https://www.classifier-calibration.com/PyCalib/),
108 | and it is automatically updated after every push to the master branch.
109 | 
110 | All documentation is done ussing the [Sphinx documentation
111 | generator][sphinx:l].  The documentation is written in
112 | [reStructuredText][rst:l] (\*.rst) files in the `docs/source` folder. We try to
113 | follow the conventions from [Numpy][numpy:d] and [Scikit-learn][sklearn:d].
114 | 
115 | [numpy:d]: https://numpydoc.readthedocs.io/en/latest/format.html
116 | [sklearn:d]: https://scikit-learn.org/stable/developers/contributing.html#documentation
117 | 
118 | The examples with images in folder `docs/source/examples` are generated
119 | automatically with [Sphinx-gallery][sphinx:g] from the python code in folder
120 | [examples/](examples/) starting with `xmpl_{example_name}.py`.
121 | 
122 | [rst:l]: https://docutils.sourceforge.io/rst.html
123 | [sphinx:l]: https://www.sphinx-doc.org/en/master/
124 | [sphinx:g]: https://sphinx-gallery.github.io/stable/index.html
125 | 
126 | The docuemnation can be build with the command
127 | 
128 | ```
129 | make doc
130 | ```
131 | 
132 | (Keep in mind that the documentation has its own Makefile inside folder [docs](docs)).
133 | 
134 | After building the documentation, a new folder should appear in `docs/build/`
135 | with an `index.html` that can be opened locally for further exploration.
136 | 
137 | The documentation is always build and deployed every time a new commit is
138 | pushed to the master branch with the workflow
139 | [.github/workflows/documentation.yml](.github/workflows/documentation.yml).
140 | 
141 | After building, the `docs/build/html` folder is pushed to the branch
142 | [gh-pages][gh:l].
143 | 
144 | [gh:l]: https://github.com/perellonieto/PyCalib/tree/gh-pages
145 | 
146 | Check Readme
147 | ------------
148 | 
149 | It is possible to check that the README file passes some tests for Pypi by
150 | running
151 | 
152 | ```
153 | make check-readme
154 | ```
155 | 
156 | Upload to PyPi
157 | --------------
158 | 
159 | After testing that the code passes all unittests and upgrading the version in
160 | the file `pycalib/__init__.py` the code can be published in Pypi with the
161 | following command:
162 | 
163 | ```
164 | make pypi
165 | ```
166 | 
167 | It may require user and password if these are not set in your home directory a
168 | file  __.pypirc__
169 | 
170 | ```
171 | [pypi]
172 | username = __token__
173 | password = pypi-yourtoken
174 | ```
175 | 
176 | Contributors
177 | ------------
178 | 
179 | This code has been adapted by Miquel from several previous codes. The following
180 | is a list of people that has been involved in some parts of the code.
181 | 
182 | - Miquel Perello Nieto
183 | - Hao Song
184 | - Telmo Silva Filho
185 | - Markus Kängsepp
186 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 
22 | clean:
23 | 	rm -rf $(BUILDDIR)/*
24 | 	rm -rf source/examples
25 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/source/.nojekyll:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/docs/source/api/metrics.rst:
--------------------------------------------------------------------------------
 1 | .. title:: Metrics
 2 | 
 3 | .. _metrics:
 4 | 
 5 | Metrics
 6 | =======
 7 | 
 8 | .. automodule:: pycalib.metrics
 9 |     :members:
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/source/api/models.rst:
--------------------------------------------------------------------------------
 1 | .. title:: Calibration methods
 2 | 
 3 | .. _calibration:
 4 | 
 5 | Calibration methods
 6 | ===================
 7 | 
 8 | 
 9 | .. automodule:: pycalib.models
10 |     :members:
11 | 
12 | .. automodule:: pycalib.models.calibrators
13 |     :members:
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/source/api/stats.rst:
--------------------------------------------------------------------------------
 1 | .. title:: Statistics tools
 2 | 
 3 | .. _statistics:
 4 | 
 5 | Statistics tools
 6 | ===============
 7 | 
 8 | .. automodule:: pycalib.stats
 9 |     :members:
10 | 


--------------------------------------------------------------------------------
/docs/source/api/visualisations.rst:
--------------------------------------------------------------------------------
 1 | .. title:: Visualisations
 2 | 
 3 | .. _visualisations:
 4 | 
 5 | Visualisations
 6 | ==============
 7 | 
 8 | Visualisations file describes all visualisations tools in PyCalib
 9 | 
10 | .. autofunction:: pycalib.visualisations.plot_reliability_diagram
11 | 
12 | .. automodule:: pycalib.visualisations
13 |     :members:
14 | 
15 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | import sphinx_rtd_theme
 8 | 
 9 | # -- Path setup --------------------------------------------------------------
10 | 
11 | # If extensions (or modules to document with autodoc) are in another directory,
12 | # add these directories to sys.path here. If the directory is relative to the
13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
14 | #
15 | import os
16 | import sys
17 | sys.path.insert(0, os.path.abspath('../../'))
18 | 
19 | 
20 | # -- Project information -----------------------------------------------------
21 | 
22 | project = 'PyCalib'
23 | copyright = '2021, Miquel Perello-Nieto'
24 | author = 'Miquel Perello-Nieto'
25 | 
26 | # The full version, including alpha/beta/rc tags
27 | release = '0.0.4.dev0'
28 | 
29 | github_org = 'perellonieto'
30 | github_repo = 'pycalib'
31 | github_docs_repo = 'pycalib'
32 | 
33 | 
34 | # -- General configuration ---------------------------------------------------
35 | 
36 | # Add any Sphinx extension module names here, as strings. They can be
37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
38 | # ones.
39 | extensions = [
40 |     "sphinx_rtd_theme",
41 |     "sphinx.ext.autodoc",
42 |     "sphinx_gallery.gen_gallery",
43 |     "numpydoc"
44 | ]
45 | 
46 | # Add any paths that contain templates here, relative to this directory.
47 | templates_path = ['_templates']
48 | 
49 | # List of patterns, relative to source directory, that match files and
50 | # directories to ignore when looking for source files.
51 | # This pattern also affects html_static_path and html_extra_path.
52 | exclude_patterns = []
53 | html_extra_path = ['.nojekyll']
54 | 
55 | # -- Options for HTML output -------------------------------------------------
56 | 
57 | # The theme to use for HTML and HTML Help pages.  See the documentation for
58 | # a list of builtin themes.
59 | #
60 | #html_theme = 'alabaster'
61 | html_theme = 'sphinx_rtd_theme'
62 | 
63 | # Add any paths that contain custom static files (such as style sheets) here,
64 | # relative to this directory. They are copied after the builtin static files,
65 | # so a file named "default.css" will overwrite the builtin "default.css".
66 | html_static_path = ['_static']
67 | 
68 | # Generate examples with figures
69 | sphinx_gallery_conf = {
70 |     'filename_pattern': '/xmpl_',
71 |     'examples_dirs': os.path.join('..', '..', 'examples'),
72 |     'gallery_dirs': 'examples',
73 |     'backreferences_dir': 'generated',  # `doc_module`
74 |     'doc_module': 'pycalib',  # Generate mini galleries for the API documentation.
75 |     'reference_url': {'pycalib': None},  # Put links to docs in the examples code.
76 |     'binder': {
77 |         'org': github_org,
78 |         'repo': github_docs_repo,
79 |         'branch': 'gh-pages',
80 |         'binderhub_url': 'https://mybinder.org',
81 |         'dependencies': [os.path.join('..', '..', 'requirements.txt'),
82 |                          os.path.join('..', '..', 'requirements-dev.txt')]
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
 1 | .. PyCalib documentation master file, created by
 2 |    sphinx-quickstart on Tue Feb 16 20:18:43 2021.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to PyCalib's documentation!
 7 | ===================================
 8 | 
 9 | PyCalib is a library that offers multiple tools to assess probabilistic
10 | classifiers in terms of calibration, and provides helpful functions to
11 | calibrate muticlass probabilistic models that follow the Scikit-learn
12 | `BaseEstimator`_ standard. 
13 | 
14 | .. _BaseEstimator: https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html
15 | 
16 | .. toctree::
17 |    :maxdepth: 2
18 |    :caption: Contents:
19 | 
20 |    usage/installation
21 |    usage/quickstart
22 |    api/models
23 |    api/metrics
24 |    api/visualisations
25 |    api/stats
26 |    examples/index
27 | 
28 | 
29 | Indices and tables
30 | ==================
31 | 
32 | * :ref:`genindex`
33 | * :ref:`modindex`
34 | 


--------------------------------------------------------------------------------
/docs/source/usage/installation.rst:
--------------------------------------------------------------------------------
 1 | .. title:: Installation
 2 | 
 3 | .. _installation:
 4 | 
 5 | Installation
 6 | ============
 7 | 
 8 | There are multiple ways to install PyCalib. The simplest way is to use the
 9 | packaged version available in the Python Package Index PyPI. Further details of
10 | the packaged version can be found at https://pypi.org/project/pycalib/
11 | 
12 | In order to install PyCalib from PyPI just run the following command
13 | 
14 | .. code-block:: bash
15 | 
16 |     pip install pycalib
17 | 
18 | This will install the library into your current Python environment. You can
19 | check that the library is available in your environment by running
20 | 
21 | .. code-block:: bash
22 | 
23 |     python -c "import pycalib; print(pycalib.__version__)"
24 | 
25 | which if successful should print the installed version in the standard output.
26 | 


--------------------------------------------------------------------------------
/docs/source/usage/quickstart.rst:
--------------------------------------------------------------------------------
 1 | .. title:: Quickstart
 2 | 
 3 | .. _quickstart:
 4 | 
 5 | Quickstart
 6 | ==========
 7 | 
 8 | This is a simple description of how to calibrate a classifier using this
 9 | library. For an extended example check the Section Examples
10 | :ref:`sphx_glr_examples_xmpl_quickstart.py`.
11 | 
12 | The simplest way to calibrate an existing probabilistic classifier is the
13 | following:
14 | 
15 | First choose the calibration method you want to use
16 | 
17 | .. code-block:: python
18 | 
19 |     from pycalib.models import IsotonicCalibration
20 |     cal = IsotonicCalibration()
21 | 
22 | Now we can put together a probabilistic classifier with the chosen calibration
23 | method
24 | 
25 | .. code-block:: python
26 | 
27 |     from pycalib.models import CalibratedModel
28 | 
29 |     cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal)
30 | 
31 | Now you can train both classifier and calibrator all together.
32 | 
33 | .. code-block:: python
34 | 
35 |     from sklearn.datasets import load_iris
36 | 
37 |     dataset = load_iris()
38 |     cal_clf.fit(dataset.data, dataset.target)
39 | 
40 | 


--------------------------------------------------------------------------------
/examples/README.txt:
--------------------------------------------------------------------------------
1 | .. _examples:
2 | 
3 | Examples
4 | ========
5 | 
6 | Here you can find various examples of using PyCalib.
7 | 


--------------------------------------------------------------------------------
/examples/xmpl_quickstart.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =============================
  3 | Quickstart
  4 | =============================
  5 | 
  6 | This example shows a simple comparison of the expected calibration error of a
  7 | non-calibrated method against a calibrated method.
  8 | """
  9 | # Author: Miquel Perello Nieto <miquel.perellonieto@bristol.ac.uk>
 10 | # License: new BSD
 11 | 
 12 | print(__doc__)
 13 | 
 14 | ##############################################################################
 15 | # First choose a classifier
 16 | 
 17 | from sklearn.naive_bayes import GaussianNB
 18 | 
 19 | clf = GaussianNB()
 20 | 
 21 | ##############################################################################
 22 | # And a dataset
 23 | 
 24 | from sklearn.datasets import make_classification
 25 | from sklearn.model_selection import train_test_split
 26 | 
 27 | X, y = make_classification(
 28 |     n_samples=100000, n_features=20, n_informative=4, n_redundant=4,
 29 |     random_state=42
 30 | )
 31 | 
 32 | from sklearn.model_selection import train_test_split
 33 | 
 34 | X_train, X_test, Y_train, Y_test = train_test_split(X, y)
 35 | 
 36 | ##############################################################################
 37 | # We can see how calibrated it is after training
 38 | 
 39 | clf.fit(X_train, Y_train)
 40 | 
 41 | n_correct = sum(clf.predict(X_test) == Y_test)
 42 | n_test = Y_test.shape[0]
 43 | 
 44 | print(f"The classifier gets {n_correct} correct "
 45 |       f"predictions out of {n_test}")
 46 | 
 47 | ##############################################################################
 48 | # We can asses the confidence expected calibration error
 49 | 
 50 | from pycalib.metrics import conf_ECE
 51 | 
 52 | scores = clf.predict_proba(X_test)
 53 | cece = conf_ECE(Y_test, scores, bins=15)
 54 | 
 55 | print(f"The classifier gets a confidence expected "
 56 |       f"calibration error of {cece:0.2f}")
 57 | 
 58 | ##############################################################################
 59 | # Let's look at its reliability diagram
 60 | 
 61 | from pycalib.visualisations import plot_reliability_diagram
 62 | 
 63 | plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True,
 64 |                          show_bars=True, show_gaps=True)
 65 | 
 66 | ##############################################################################
 67 | # We can see how a calibration can improve the conf-ECE
 68 | 
 69 | from pycalib.models import IsotonicCalibration
 70 | cal = IsotonicCalibration()
 71 | 
 72 | ##############################################################################
 73 | # Now we can put together a probabilistic classifier with the chosen calibration
 74 | # method
 75 | 
 76 | from pycalib.models import CalibratedModel
 77 | 
 78 | cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal,
 79 |                           fit_estimator=False)
 80 | 
 81 | ##############################################################################
 82 | # Now you can train both classifier and calibrator all together.
 83 | 
 84 | cal_clf.fit(X_train, Y_train)
 85 | n_correct = sum(cal_clf.predict(X_test) == Y_test)
 86 | 
 87 | print(f"The calibrated classifier gets {n_correct} "
 88 |       f"correct predictions out of {n_test}")
 89 | 
 90 | scores_cal = cal_clf.predict_proba(X_test)
 91 | cece = conf_ECE(Y_test, scores_cal, bins=15)
 92 | 
 93 | print(f"The calibrated classifier gets a confidence "
 94 |       f"expected calibration error of {cece:0.2f}")
 95 | 
 96 | ##############################################################################
 97 | # Now you can train both classifier and calibrator all together.
 98 | 
 99 | from pycalib.visualisations import plot_reliability_diagram
100 | 
101 | plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True,
102 |                          show_bars=True, show_gaps=True)
103 | 


--------------------------------------------------------------------------------
/examples/xmpl_reliability_diagram.py:
--------------------------------------------------------------------------------
  1 | """
  2 | =============================
  3 | Plotting reliability diagrams
  4 | =============================
  5 | 
  6 | This example illustrates how to visualise the reliability diagram for a binary
  7 | probabilistic classifier.
  8 | """
  9 | # Author: Miquel Perello Nieto <miquel.perellonieto@bristol.ac.uk>
 10 | # License: new BSD
 11 | 
 12 | print(__doc__)
 13 | 
 14 | ##############################################################################
 15 | # This example shows different ways to visualise the reliability diagram for a
 16 | # binary classification problem.
 17 | # 
 18 | # First we will generate two synthetic models and some synthetic scores and
 19 | # labels.
 20 | 
 21 | import matplotlib.pyplot as plt
 22 | import numpy as np
 23 | np.random.seed(42)
 24 | 
 25 | n_c1 = n_c2 = 500
 26 | p = np.concatenate((np.random.beta(2, 5, n_c1),
 27 |                     np.random.beta(4, 3, n_c2)
 28 |                    ))
 29 | 
 30 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2)))
 31 | 
 32 | s1 = 1/(1 + np.exp(-8*(p - 0.5)))
 33 | s2 = 1/(1 + np.exp(-3*(p - 0.5)))
 34 | 
 35 | plt.scatter(s1, p, label='Model 1')
 36 | plt.scatter(s2, p, label='Model 2')
 37 | plt.scatter(p, y)
 38 | plt.plot([0, 1], [0, 1], 'r--')
 39 | plt.xlabel('Model scores')
 40 | plt.ylabel('Sample true probability')
 41 | plt.grid()
 42 | plt.legend()
 43 | 
 44 | p = np.vstack((1 - p, p)).T
 45 | s1 = np.vstack((1 - s1, s1)).T
 46 | s2 = np.vstack((1 - s2, s2)).T
 47 | 
 48 | ##############################################################################
 49 | # A perfect calibration should be as follows, compared with the generated
 50 | # scores
 51 | 
 52 | import scipy.stats as stats
 53 | 
 54 | p_g_p = stats.beta.pdf(x=p[:, 1], a=3, b=2)
 55 | p_g_n = stats.beta.pdf(x=p[:, 1], a=2, b=7)
 56 | 
 57 | p_hat = p_g_p/(p_g_n+p_g_p)
 58 | p_hat = np.vstack((1 - p_hat, p_hat)).T
 59 | 
 60 | plt.scatter(p[:, 1], s1[:, 1], label='Model 1')
 61 | plt.scatter(p[:, 1], s2[:, 1], label='Model 2')
 62 | plt.scatter(p[:, 1], p_hat[:, 1], color='red', label='Bayes optimal correction')
 63 | plt.xlabel('Sample true probability')
 64 | plt.ylabel('Model scores')
 65 | plt.grid()
 66 | plt.legend()
 67 | 
 68 | ##############################################################################
 69 | # There are at least 2 very common ways to show a reliability diagram for a
 70 | # probabilistic binary classifier. Drawing a line between all the binned mean
 71 | # predictions and the true proportion of positives.
 72 | 
 73 | from pycalib.visualisations import plot_reliability_diagram
 74 | 
 75 | fig = plot_reliability_diagram(labels=y, scores=s1, show_histogram=False)
 76 | 
 77 | ##############################################################################
 78 | # And showing bars instead of a lineplot, usually with errorbars showing the
 79 | # discrepancy with respect to a perfectly calibrated model (diagonal)
 80 | 
 81 | fig = plot_reliability_diagram(labels=y, scores=s1,
 82 |                                class_names=['Negative', 'Positive'],
 83 |                                show_gaps=True, show_bars=True,
 84 |                                show_histogram=True)
 85 | 
 86 | 
 87 | ##############################################################################
 88 | # However, both previous illustrations do not include the number of samples
 89 | # that fall into each bin. By default the parameter show_bars is set to True as
 90 | # this information is crucial to understand how reliable is each estimation,
 91 | # and how this affects some of the calibration metrics.
 92 | # We also specify the bin boundaries and change the color of the gaps.
 93 | 
 94 | fig = plot_reliability_diagram(labels=y, scores=s1,
 95 |                                class_names=['Negative', 'Positive'],
 96 |                                show_gaps=True, color_gaps='firebrick',
 97 |                                bins=[0, .3, .4, .45, .5, .55, .6, .7, 1])
 98 | 
 99 | ##############################################################################
100 | # It is also common to plot only the confidence (considering the winning class
101 | # only as positive class for each prediction). Notice that the class names is
102 | # automatically set to *winning* class.
103 | 
104 | fig = plot_reliability_diagram(labels=y, scores=s1,
105 |                                show_gaps=True,
106 |                                confidence=True,
107 |                                show_bars=True)
108 | 
109 | ##############################################################################
110 | # We can enable some parameters to show several aspects of the reliability
111 | # diagram. For example, we can add a histogram indicating the number of samples
112 | # on each bin (or show the count in each marker), the correction that should be
113 | # applied to the average scores in order to calibrate the model can be also
114 | # shown as red arrows pointing to the direction of the diagonal (perfectly
115 | # calibrated model). And even the true class of each sample at the y
116 | # coordinates [0 and 1] for each scored instance (50% of the data in
117 | # this example, but default is 100%).
118 | 
119 | fig = plot_reliability_diagram(labels=y, scores=s1,
120 |                                legend=['Model 1'],
121 |                                show_histogram=True,
122 |                                bins=9, class_names=['Negative', 'Positive'],
123 |                                show_counts=True,
124 |                                show_correction=True,
125 |                                sample_proportion=0.5,
126 |                                hist_per_class=True)
127 | ##############################################################################
128 | # It can be also useful to have 95% confidence intervals for each bin by
129 | # performing a binomial proportion confidence interval with various statistical
130 | # tests. This function uses https://www.statsmodels.org/stable/generated/statsmodels.stats.proportion.proportion_confint.html
131 | # thus accepts the different tests available in the statsmodels library. In the
132 | # following example we use the Clopper-Pearson interval based on Beta
133 | # distribution and a confidence interval of 95%.
134 | 
135 | fig = plot_reliability_diagram(labels=y, scores=s2,
136 |                                legend=['Model 2'],
137 |                                show_histogram=True,
138 |                                show_counts=True,
139 |                                bins=13, class_names=['Negative', 'Positive'],
140 |                                sample_proportion=1.0,
141 |                                errorbar_interval=0.95,
142 |                                interval_method='beta',
143 |                                color_list=['orange'])
144 | ##############################################################################
145 | # The function also allows the visualisation of multiple models for comparison.
146 | 
147 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
148 |                                legend=['Model 1', 'Model 2'],
149 |                                show_histogram=True,
150 |                                bins=10, class_names=['Negative', 'Positive'],
151 |                                errorbar_interval=0.95,
152 |                                interval_method='beta')
153 | 
154 | 
155 | ##############################################################################
156 | # It is possible to draw reliability diagram for multiple classes as well. We
157 | # will simulate 3 classes by changing some original labels to a 3rd class, and
158 | # modifying the scores of Model 1 and 2 to create new models 3 and 4.
159 | 
160 | class_2_idx = range(int(len(y)/3), int(2*len(y)/3))
161 | y[class_2_idx] = 2
162 | s1 = np.hstack((s1, s1[:, 1].reshape(-1, 1)))
163 | s1[class_2_idx,2] *= 3
164 | s1 /= s1.sum(axis=1)[:, None]
165 | s2 = np.hstack((s2, s2[:, 1].reshape(-1, 1)))
166 | s2[class_2_idx,2] *= 2
167 | s2 /= s2.sum(axis=1)[:, None]
168 | 
169 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
170 |                                legend=['Model 3', 'Model 4'],
171 |                                show_histogram=True,
172 |                                color_list=['darkgreen', 'chocolate'])
173 | 
174 | ##############################################################################
175 | # If we are only interested in the confidence, the 3 classes can be visualised
176 | # in a single reliability diagram
177 | 
178 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
179 |                                legend=['Model 3', 'Model 4'],
180 |                                show_histogram=True,
181 |                                color_list=['darkgreen', 'chocolate'],
182 |                                confidence=True)
183 | 
184 | ##############################################################################
185 | # The same can be done with the bars.
186 | 
187 | fig = plot_reliability_diagram(labels=y, scores=s1,
188 |                                legend=['Model 3'],
189 |                                show_histogram=True,
190 |                                show_bars=True,
191 |                                show_gaps=True)
192 | 
193 | 
194 | ##############################################################################
195 | # And change the colors.
196 | 
197 | fig = plot_reliability_diagram(labels=y, scores=s1,
198 |                                legend=['Model 3'],
199 |                                show_histogram=True,
200 |                                color_list=['darkgreen'],
201 |                                show_bars=True,
202 |                                show_gaps=True,
203 |                                color_gaps='orange')
204 | 
205 | 
206 | ##############################################################################
207 | # If we have precomputed the average proportion of true positives and
208 | # predictions, or we have access to the ground truth, it is possible to plot
209 | # the same reliability diagram using the following function
210 | from pycalib.visualisations import plot_reliability_diagram_precomputed
211 | 
212 | avg_true = [np.array([.1, .3, .6, .8, .9, 1]).reshape(-1, 1),
213 |             np.array([.2, .4, .5, .7, .8, .9]).reshape(-1, 1)]
214 | avg_pred = [np.array([.01, .25, .4, .6, .7, .8]).reshape(-1, 1),
215 |             np.array([.15, .39, .7, .75, .8, .9]).reshape(-1, 1)]
216 | 
217 | fig = plot_reliability_diagram_precomputed(avg_true, avg_pred)
218 | 
219 | ##############################################################################
220 | # Similarly for a multiclass problem we can provide full matrices of size
221 | # (n_bins, n_classes) instead. Notice that the order of the predicted scores
222 | # doesn't need to be in order, and the probabilities doesn't need to sum to one
223 | # among all classes, as the way they are computed may be from different
224 | # instances.
225 | 
226 | avg_true = [np.array([[.1, .3, .6, .8, .9, 1.],
227 |                       [.0, .2, .4, .7, .8, .9],
228 |                       [.1, .2, .3, .5, .6, .8]]).T,
229 |             np.array([[.1, .4, .7, .8, .9, 1.],
230 |                       [.9, .3, .8, .2, .7, .1],
231 |                       [.2, .3, .5, .4, .7, .1]]).T]
232 | avg_pred = [np.array([[.0, .3, .6, .7, .8, 9.],
233 |                       [.1, .2, .3, .5, .8, .7],
234 |                       [.3, .5, .4, .7, .8, .9]]).T,
235 |             np.array([[.0, .3, .6, .8, .9, 1.],
236 |                       [.8, .1, .6, .2, .9, 0.],
237 |                       [.1, .4, .6, .3, .5, 0.]]).T]
238 | 
239 | fig = plot_reliability_diagram_precomputed(avg_true, avg_pred)
240 | 


--------------------------------------------------------------------------------
/examples/xmpl_ternary_contours.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ==============================================
 3 | Draw contour function of ternary simplex space
 4 | ==============================================
 5 | 
 6 | This example illustrates how to draw contourplots for functions with 3
 7 | probability inputs and multiple outputs.
 8 | """
 9 | # Author: Miquel Perello Nieto <miquel.perellonieto@bristol.ac.uk>
10 | # License: new BSD
11 | 
12 | import matplotlib.pyplot as plt
13 | import numpy as np
14 | np.random.seed(42)
15 | 
16 | print(__doc__)
17 | 
18 | ##############################################################################
19 | # We show first how to draw a heatmap on a ternary probability simplex, in this
20 | # case we will define a Dirichlet function and pass it with default parameters.
21 | 
22 | from scipy.stats import dirichlet
23 | 
24 | from pycalib.visualisations.ternary import draw_func_contours
25 | 
26 | function = lambda x: dirichlet.pdf(x, alpha=[5, 3, 2])
27 | fig = draw_func_contours(function)
28 | 
29 | ##############################################################################
30 | # Next we show how do use a ternary calibration model that has 3 probability
31 | # inputs and 3 ouputs. We will first simulate a calibrator by simulating 3
32 | # Dirichlet distributions and applying Bayes rule with equal prior.
33 | 
34 | class calibrator():
35 |     def predict_proba(self, x):
36 |         pred1 = dirichlet.pdf(x, alpha=[3, 1, 1])
37 |         pred2 = dirichlet.pdf(x, alpha=[6, 7, 5])
38 |         pred3 = dirichlet.pdf(x, alpha=[3, 4, 5])
39 |         pred = np.vstack([pred1, pred2, pred3]).T
40 |         pred = pred / pred.sum(axis=1)[:, None]
41 |         return pred
42 | 
43 | cal = calibrator()
44 | 
45 | ##############################################################################
46 | # Then we will first draw a contourmap only for the first class. We do that by
47 | # creating a lambda function and selecting the first column.
48 | # We also select a colormap for the first class.
49 | 
50 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][0]
51 | fig = draw_func_contours(function, cmap='Reds')
52 | 
53 | 
54 | ##############################################################################
55 | # We can look at the second class by creating a new lambda function and
56 | # selecting the second column. We will also modify how many times to subdivide
57 | # the simplex (subdiv=3). And the number of contour values (nlevels=10).
58 | 
59 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][1]
60 | fig = draw_func_contours(function, nlevels=10, subdiv=3, cmap='Oranges')
61 | 
62 | ##############################################################################
63 | # Finally we show the 3rd class with other sets of parameters and specifying
64 | # the names of each class.
65 | 
66 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][2]
67 | fig = draw_func_contours(function, nlevels=10, subdiv=5, cmap='Blues',
68 |                          labels=['strawberry', 'orange', 'smurf'])
69 | 
70 | 
71 | ##############################################################################
72 | # In order to plot the contours of all classes in the same figure it is
73 | # necessary to loop over all subplots. We show an example that uses the
74 | # previous functions.
75 | 
76 | labels=['strawberry', 'orange', 'smurf']
77 | cmap_list = ['Reds', 'Oranges', 'Blues']
78 | fig = plt.figure(figsize=(10, 5))
79 | for c in [0, 1, 2]:
80 |     ax = fig.add_subplot(1, 3, c+1)
81 |     ax.set_title('{}\n$(C_{})$'.format(labels[c], c+1), loc='left')
82 |     function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][c]
83 |     fig = draw_func_contours(function, nlevels=30, subdiv=5, cmap=cmap_list[c],
84 |                              ax=ax, fig=fig)
85 | 


--------------------------------------------------------------------------------
/examples/xmpl_ternary_samples.py:
--------------------------------------------------------------------------------
 1 | """
 2 | ====================================================
 3 | Scatter plot of ternary problem in the simplex space
 4 | ====================================================
 5 | 
 6 | This example illustrates how to draw samples from the scores of a model and
 7 | their true label.
 8 | """
 9 | # Author: Miquel Perello Nieto <miquel.perellonieto@bristol.ac.uk>
10 | # License: new BSD
11 | 
12 | print(__doc__)
13 | 
14 | ##############################################################################
15 | # We generate 3 scores as comming from 3 Dirichlet distributions simulating the
16 | # output scores of a classifier. Then we show how to draw the samples in the
17 | # simplex space.
18 | 
19 | import matplotlib.pyplot as plt
20 | import numpy as np
21 | np.random.seed(42)
22 | 
23 | n_c1 = n_c2 = n_c3 = 300
24 | p = np.concatenate((np.random.dirichlet([6, 2, 3], n_c1),
25 |                     np.random.dirichlet([5, 12, 5], n_c2),
26 |                     np.random.dirichlet([2, 3, 5], n_c3)
27 |                    ))
28 | 
29 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2), np.ones(n_c3)*2))
30 | 
31 | from pycalib.visualisations.ternary import draw_tri_samples
32 | 
33 | fig, ax = draw_tri_samples(p, classes=y, alpha=0.6)
34 | 
35 | 
36 | ##############################################################################
37 | # Here we specify the names of each class and change their colors.
38 | 
39 | fig, ax = draw_tri_samples(p, classes=y, alpha=0.6,
40 |                            labels=['dogs', 'cats', 'fox'],
41 |                            color_list=['saddlebrown', 'black', 'darkorange'])
42 | 


--------------------------------------------------------------------------------
/pycalib/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = '0.1.0.dev6'
2 | 


--------------------------------------------------------------------------------
/pycalib/metrics.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from sklearn.metrics import mean_squared_error
  3 | from sklearn.metrics import log_loss
  4 | from sklearn.preprocessing import label_binarize
  5 | from scipy.stats import percentileofscore
  6 | import multiprocessing
  7 | 
  8 | 
  9 | def accuracy(y_true, y_pred):
 10 |     """Classification accuracy score
 11 | 
 12 |     Accuracy for binary and multiclass classification problems. Consists on the
 13 |     proportion of correct estimations assuming the maximum class probability of
 14 |     each score as the estimated class.
 15 | 
 16 |     Parameters
 17 |     ----------
 18 |     y_true : label indicator matrix (n_samples, n_classes)
 19 |         True labels.
 20 |         # TODO Add option to pass array with shape (n_samples, )
 21 | 
 22 |     y_pred : matrix (n_samples, n_classes)
 23 |         Predicted scores.
 24 | 
 25 |     Returns
 26 |     -------
 27 |     score : float
 28 |         Proportion of correct predictions as a value between 0 and 1.
 29 | 
 30 |     Examples
 31 |     --------
 32 |     >>> from pycalib.metrics import accuracy
 33 |     >>> Y = np.array([[0, 1], [0, 1]])
 34 |     >>> S = np.array([[0.1, 0.9], [0.6, 0.4]])
 35 |     >>> accuracy(Y, S)
 36 |     0.5
 37 |     >>> Y = np.array([[0, 1], [0, 1]])
 38 |     >>> S = np.array([[0.1, 0.9], [0, 1]])
 39 |     >>> accuracy(Y, S)
 40 |     1.0
 41 |     """
 42 |     predictions = np.argmax(y_pred, axis=1)
 43 |     y = np.argmax(y_true, axis=1)
 44 |     return np.mean(predictions == y)
 45 | 
 46 | 
 47 | def cross_entropy(y_true, y_pred):
 48 |     """Cross-entropy score
 49 | 
 50 |     Computes the cross-entropy (a.k.a. log-loss) for binary and
 51 |     multiclass classification scores.
 52 | 
 53 |     Parameters
 54 |     ----------
 55 |     y_true : label indicator matrix (n_samples, n_classes)
 56 |         True labels.
 57 |         # TODO Add option to pass array with shape (n_samples, )
 58 | 
 59 |     y_pred : matrix (n_samples, n_classes)
 60 |         Predicted scores.
 61 | 
 62 |     Returns
 63 |     -------
 64 |     score : float
 65 | 
 66 |     Examples
 67 |     --------
 68 |     >>> from pycalib.metrics import cross_entropy
 69 |     >>> Y = np.array([[0, 1], [0, 1]])
 70 |     >>> S = np.array([[0.1, 0.9], [0.6, 0.4]])
 71 |     >>> cross_entropy(Y, S)
 72 |     0.5108256237659906
 73 |     """
 74 |     return log_loss(y_true, y_pred)
 75 | 
 76 | 
 77 | def brier_score(y_true, y_pred):
 78 |     """Brier score
 79 | 
 80 |     Computes the Brier score between the true labels and the estimated
 81 |     probabilities. This corresponds to the Mean Squared Error between the
 82 |     estimations and the true labels.
 83 | 
 84 |     Parameters
 85 |     ----------
 86 |     y_true : label indicator matrix (n_samples, n_classes)
 87 |         True labels.
 88 |         # TODO Add option to pass array with shape (n_samples, )
 89 | 
 90 |     y_pred : matrix (n_samples, n_classes)
 91 |         Predicted scores.
 92 | 
 93 |     Returns
 94 |     -------
 95 |     score : float
 96 |         Positive value between 0 and 1.
 97 | 
 98 |     Examples
 99 |     --------
100 |     >>> from pycalib.metrics import cross_entropy
101 |     >>> Y = np.array([[0, 1], [0, 1]])
102 |     >>> S = np.array([[0.1, 0.9], [0.6, 0.4]])
103 |     >>> brier_score(Y, S)
104 |     0.185
105 |     """
106 |     # TODO Consider using the following code instead
107 |     # np.mean(np.abs(S - Y)**2)
108 |     return mean_squared_error(y_true, y_pred)
109 | 
110 | 
111 | def conf_ECE(y_true, probs, bins=15):
112 |     r"""Confidence Expected Calibration Error
113 | 
114 |     Calculate ECE score based on model maximum output probabilities and true
115 |     labels
116 | 
117 |     .. math::
118 | 
119 |         \text{confidence-ECE}  = \sum_{i=1}^M \frac{|B_{i}|}{N} |
120 |         \text{accuracy}(B_{i}) - \bar{p}(B_{i})|
121 | 
122 |     In which $p$ are the maximum predicted probabilities.
123 | 
124 | 
125 |     Parameters
126 |     ----------
127 |     y_true:
128 |         - a list containing the actual class labels
129 |         - ndarray shape (n_samples) with a list containing actual class
130 |           labels
131 |         - ndarray shape (n_samples, n_classes) with largest value in
132 |           each row for the correct column class.
133 |     probs:
134 |         a list containing probabilities for all the classes with a shape of
135 |         (samples, classes)
136 |     bins: (int)
137 |         - into how many bins are probabilities divided (default = 15)
138 | 
139 |     Returns
140 |     -------
141 |     ece : float
142 |         expected calibration error
143 | 
144 |     Examples
145 |     --------
146 |     >>> from pycalib.metrics import conf_ECE
147 |     >>> Y = np.array([[1, 0], [0, 1]]).T
148 |     >>> P = np.array([[0.9, 0.1], [0.1, 0.9]]).T
149 |     >>> print(round(conf_ECE(Y, P, bins=2), 8))
150 |     0.1
151 |     >>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T
152 |     >>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]]).T
153 |     >>> print(round(conf_ECE(Y, P, bins=2), 8))
154 |     0.2
155 |     """
156 |     return ECE(y_true, probs, normalize=False, bins=bins, ece_full=False)
157 | 
158 | 
159 | def ECE(y_true, probs, normalize=False, bins=15, ece_full=True):
160 |     """
161 |     Calculate ECE score based on model output probabilities and true labels
162 | 
163 |     Parameters
164 |     ==========
165 |     y_true : list
166 |         a list containing the actual class labels
167 |         ndarray shape (n_samples) with a list containing actual class
168 |           labels
169 |         ndarray shape (n_samples, n_classes) with largest value in
170 |           each row for the correct column class.
171 |     probs : list
172 |         a list containing probabilities for all the classes with a shape of
173 |         (samples, classes)
174 |     normalize: (bool)
175 |         in case of 1-vs-K calibration, the probabilities need to be
176 |         normalized. (default = False)
177 |     bins: (int)
178 |         into how many bins are probabilities divided (default = 15)
179 |     ece_full: (bool)
180 |         whether to use ECE-full or ECE-max.
181 | 
182 |     Returns
183 |     =======
184 |     ece : float
185 |         expected calibration error
186 |     """
187 | 
188 |     probs = np.array(probs)
189 |     y_true = np.array(y_true)
190 |     if len(y_true.shape) == 2 and y_true.shape[1] > 1:
191 |         y_true = y_true.argmax(axis=1).reshape(-1, 1)
192 | 
193 |     # Prepare predictions, confidences and true labels for ECE calculation
194 |     if ece_full:
195 |         preds, confs, y_true = _get_preds_all(y_true, probs,
196 |                                               normalize=normalize,
197 |                                               flatten=True)
198 | 
199 |     else:
200 |         preds = np.argmax(probs, axis=1)  # Maximum confidence as prediction
201 | 
202 |         if normalize:
203 |             confs = np.max(probs, axis=1)/np.sum(probs, axis=1)
204 |             # Check if everything below or equal to 1?
205 |         else:
206 |             confs = np.max(probs, axis=1)  # Take only maximum confidence
207 | 
208 |     # Calculate ECE and ECE2
209 |     ece = _ECE_helper(confs, preds, y_true, bin_size=1/bins, ece_full=ece_full)
210 | 
211 |     return ece
212 | 
213 | 
214 | def _get_preds_all(y_true, y_probs, axis=1, normalize=False, flatten=True):
215 |     """
216 |     Method to get predictions in right format for ECE-full.
217 | 
218 |     Parameters
219 |     ==========
220 |     y_true: list
221 |         containing the actual class labels
222 |     y_probs: list (samples, classes)
223 |         containing probabilities for all the classes
224 |     axis: (int)
225 |         dimension of set to calculate probabilities on
226 |     normalize: (bool)
227 |         in case of 1-vs-K calibration, the probabilities need to be
228 |         normalized. (default = False)
229 |     flatten: (bool)
230 |         flatten all the arrays
231 | 
232 |     Returns
233 |     =======
234 |     (y_preds, y_probs, y_true)
235 |         predictions, probabilities and true labels
236 |     """
237 |     if len(y_true.shape) == 1:
238 |         y_true = y_true.reshape(-1, 1)
239 |     elif len(y_true.shape) == 2 and y_true.shape[1] > 1:
240 |         y_true = y_true.argmax(axis=1).reshape(-1, 1)
241 | 
242 |     y_preds = np.argmax(y_probs, axis=axis)  # Maximum confidence as prediction
243 |     y_preds = y_preds.reshape(-1, 1)
244 | 
245 |     if normalize:
246 |         y_probs /= np.sum(y_probs, axis=axis)
247 | 
248 |     n_classes = y_probs.shape[1]
249 |     y_preds = label_binarize(y_preds, classes=range(n_classes))
250 |     y_true = label_binarize(y_true, classes=range(n_classes))
251 | 
252 |     if flatten:
253 |         y_preds = y_preds.flatten()
254 |         y_true = y_true.flatten()
255 |         y_probs = y_probs.flatten()
256 | 
257 |     return y_preds, y_probs, y_true
258 | 
259 | 
260 | def _ECE_helper(conf, pred, true, bin_size=0.1, ece_full=False):
261 | 
262 |     """
263 |     Expected Calibration Error
264 | 
265 |     Parameters
266 |     ==========
267 |     conf (numpy.ndarray):
268 |         list of confidences
269 |     pred (numpy.ndarray):
270 |         list of predictions
271 |     true (numpy.ndarray):
272 |         list of true labels
273 |     bin_size: (float):
274 |         size of one bin (0,1)  # TODO should convert to number of bins?
275 | 
276 |     Returns
277 |     =======
278 |     ece: expected calibration error
279 |     """
280 | 
281 |     upper_bounds = np.arange(bin_size, 1+bin_size, bin_size)  # Bounds of bins
282 | 
283 |     n = len(conf)
284 |     ece = 0  # Starting error
285 | 
286 |     for conf_thresh in upper_bounds:  # Find accur. and confidences per bin
287 |         acc, avg_conf, len_bin = _compute_acc_bin(conf_thresh-bin_size,
288 |                                                   conf_thresh, conf, pred,
289 |                                                   true, ece_full)
290 |         ece += np.abs(acc-avg_conf)*len_bin/n  # Add weigthed difference to ECE
291 | 
292 |     return ece
293 | 
294 | 
295 | def _compute_acc_bin(conf_thresh_lower, conf_thresh_upper, conf, pred, true,
296 |                      ece_full=True):
297 |     """
298 |     # Computes accuracy and average confidence for bin
299 | 
300 |     Parameters
301 |     ==========
302 |     conf_thresh_lower (float):
303 |         Lower Threshold of confidence interval
304 |     conf_thresh_upper (float):
305 |         Upper Threshold of confidence interval
306 |     conf (numpy.ndarray):
307 |         list of confidences
308 |     pred (numpy.ndarray):
309 |         list of predictions
310 |     true (numpy.ndarray):
311 |         list of true labels
312 |     pred_thresh (float) :
313 |         float in range (0,1), indicating the prediction threshold
314 | 
315 |     Returns
316 |     =======
317 |     (accuracy, avg_conf, len_bin) :
318 |         accuracy of bin, confidence of bin and number of elements in bin.
319 |     """
320 |     filtered_tuples = [x for x in zip(pred, true, conf)
321 |                        if (x[2] > conf_thresh_lower or conf_thresh_lower == 0)
322 |                        and (x[2] <= conf_thresh_upper)]
323 | 
324 |     if len(filtered_tuples) < 1:
325 |         return 0.0, 0.0, 0
326 |     else:
327 |         if ece_full:
328 |             # How many elements falls into given bin
329 |             len_bin = len(filtered_tuples)
330 |             # Avg confidence of BIN
331 |             avg_conf = sum([x[2] for x in filtered_tuples])/len_bin
332 |             # Mean difference from actual class
333 |             accuracy = np.mean([x[1] for x in filtered_tuples])
334 |         else:
335 |             # How many correct labels
336 |             correct = len([x for x in filtered_tuples if x[0] == x[1]])
337 |             # How many elements falls into given bin
338 |             len_bin = len(filtered_tuples)
339 |             # Avg confidence of BIN
340 |             avg_conf = sum([x[2] for x in filtered_tuples]) / len_bin
341 |             # accuracy of BIN
342 |             accuracy = float(correct)/len_bin
343 | 
344 |     return accuracy, avg_conf, len_bin
345 | 
346 | 
347 | def _MCE_helper(conf, pred, true, bin_size=0.1, mce_full=True):
348 | 
349 |     """
350 |     Maximal Calibration Error
351 | 
352 |     Parameters
353 |     ==========
354 |     conf (numpy.ndarray): list of confidences
355 |     pred (numpy.ndarray): list of predictions
356 |     true (numpy.ndarray): list of true labels
357 |     bin_size: (float):
358 |         size of one bin (0,1)  # TODO should convert to number of bins?
359 |     mce_full: (bool)
360 |         whether to use ECE-full or ECE-max for bin calculation
361 | 
362 |     Returns
363 |     =======
364 |         mce: maximum calibration error
365 |     """
366 | 
367 |     upper_bounds = np.arange(bin_size, 1+bin_size, bin_size)
368 | 
369 |     cal_errors = []
370 | 
371 |     for conf_thresh in upper_bounds:
372 |         acc, avg_conf, count = _compute_acc_bin(conf_thresh-bin_size,
373 |                                                 conf_thresh, conf, pred, true,
374 |                                                 mce_full)
375 |         cal_errors.append(np.abs(acc-avg_conf))
376 | 
377 |     return np.max(np.asarray(cal_errors))
378 | 
379 | 
380 | def MCE(y_true, probs, normalize=False, bins=15, mce_full=False):
381 | 
382 |     """
383 |     Calculate MCE score based on model output probabilities and true labels
384 | 
385 |     Parameters
386 |     ==========
387 |     y_true : list
388 |         containing the actual class labels
389 |     probs : list
390 |         containing probabilities for all the classes with a shape of (samples,
391 |         classes)
392 |     normalize : bool
393 |         in case of 1-vs-K calibration, the probabilities need to be normalized.
394 |         (default = False)
395 |     bins : int
396 |         into how many bins are probabilities divided (default = 15)
397 |     mce_full : boolean
398 |         whether to use ECE-full or ECE-max for calculation MCE.
399 | 
400 |     Returns
401 |     =======
402 |     mce : float
403 |         maximum calibration error
404 |     """
405 | 
406 |     probs = np.array(probs)
407 |     y_true = np.array(y_true)
408 |     if len(probs.shape) == len(y_true.shape):
409 |         y_true = np.argmax(y_true, axis=1)
410 | 
411 |     # Prepare predictions, confidences and true labels for MCE calculation
412 |     if mce_full:
413 |         preds, confs, y_true = _get_preds_all(y_true, probs,
414 |                                               normalize=normalize,
415 |                                               flatten=True)
416 | 
417 |     else:
418 |         preds = np.argmax(probs, axis=1)  # Maximum confidence as prediction
419 | 
420 |         if normalize:
421 |             confs = np.max(probs, axis=1)/np.sum(probs, axis=1)
422 |             # Check if everything below or equal to 1?
423 |         else:
424 |             confs = np.max(probs, axis=1)  # Take only maximum confidence
425 | 
426 |     # Calculate MCE
427 |     mce = _MCE_helper(confs, preds, y_true, bin_size=1/bins, mce_full=mce_full)
428 | 
429 |     return mce
430 | 
431 | 
432 | def conf_MCE(y_true, probs, bins=15):
433 |     """
434 |     Calculate ECE score based on model output probabilities and true labels
435 | 
436 |     Parameters
437 |     ==========
438 |     y_true:
439 |         - a list containing the actual class labels
440 |         - ndarray shape (n_samples) with a list containing actual class
441 |           labels
442 |         - ndarray shape (n_samples, n_classes) with largest value in
443 |           each row for the correct column class.
444 |     probs:
445 |         a list containing probabilities for all the classes with a shape of
446 |         (samples, classes)
447 |     bins: (int)
448 |         - into how many bins are probabilities divided (default = 15)
449 | 
450 |     Returns
451 |     =======
452 |     mce : float
453 |         maximum calibration error
454 |     """
455 |     return MCE(y_true, probs, normalize=False, bins=bins, mce_full=False)
456 | 
457 | 
458 | def binary_MCE(y_true, probs, power=1, bins=15):
459 |     r"""Binary Maximum Calibration Error
460 | 
461 |     .. math::
462 | 
463 |         \text{binary-MCE}  = \max_{i \in \{1, ..., M\}} |\bar{y}(B_{i})
464 |         - \bar{p}(B_{i})|
465 | 
466 |     Parameters
467 |     ----------
468 |     y_true : indicator vector (n_samples, )
469 |         True labels.
470 | 
471 |     probs : matrix (n_samples, )
472 |         Predicted probabilities for positive class.
473 | 
474 |     Returns
475 |     -------
476 |     score : float
477 | 
478 |     Examples
479 |     --------
480 |     >>> from pycalib.metrics import binary_MCE
481 |     >>> Y = np.array([0, 1])
482 |     >>> P = np.array([0.1, 0.6])
483 |     >>> print(round(binary_MCE(Y, P, bins=2), 8))
484 |     0.4
485 |     >>> Y = np.array([0, 0, 0, 1, 1, 1])
486 |     >>> P = np.array([.1, .2, .3, .6, .7, .8])
487 |     >>> print(round(binary_MCE(Y, P, bins=2), 8))
488 |     0.3
489 |     >>> Y = np.array([0, 0, 0, 1, 1, 1])
490 |     >>> P = np.array([.1, .2, .3, .3, .2, .1])
491 |     >>> print(round(binary_MCE(Y, P, bins=1), 8))
492 |     0.3
493 |     >>> Y = np.array([0, 0, 0, 1, 1, 1])
494 |     >>> P = np.array([.1, .2, .3, .9, .9, .9])
495 |     >>> print(round(binary_MCE(Y, P, bins=2), 8))
496 |     0.2
497 |     >>> Y = np.array([0, 0, 0, 1, 1, 1])
498 |     >>> P = np.array([.1, .1, .1, .6, .6, .6])
499 |     >>> print(round(binary_MCE(Y, P, bins=2), 8))
500 |     0.4
501 |     """
502 |     idx = np.digitize(probs, np.linspace(0, 1 + 1e-8, bins + 1)) - 1
503 | 
504 |     def bin_func(y, p, idx):
505 |         return (np.abs(np.mean(p[idx]) - np.mean(y[idx])) ** power)
506 | 
507 |     mce = []
508 |     for i in np.unique(idx):
509 |         # print('Mean scores', np.mean(probs[idx == i]))
510 |         # print('True proportion', np.mean(y_true[idx == i]))
511 |         # print('Difference ', np.abs(np.mean(probs[idx == i])
512 |         #                      - np.mean(y_true[idx == i])))
513 |         mce.append(bin_func(y_true, probs, idx == i))
514 |     return max(mce)
515 | 
516 | 
517 | def binary_ECE(y_true, probs, power=1, bins=15):
518 |     r"""Binary Expected Calibration Error
519 | 
520 |     .. math::
521 | 
522 |         \text{binary-ECE}  = \sum_{i=1}^M \frac{|B_{i}|}{N} |
523 |         \bar{y}(B_{i}) - \bar{p}(B_{i})|
524 | 
525 |     Parameters
526 |     ----------
527 |     y_true : indicator vector (n_samples, )
528 |         True labels.
529 | 
530 |     probs : matrix (n_samples, )
531 |         Predicted probabilities for positive class.
532 | 
533 |     Returns
534 |     -------
535 |     score : float
536 | 
537 |     Examples
538 |     --------
539 |     >>> from pycalib.metrics import binary_ECE
540 |     >>> Y = np.array([0, 1])
541 |     >>> P = np.array([0.1, 0.9])
542 |     >>> print(round(binary_ECE(Y, P, bins=2), 8))
543 |     0.1
544 |     >>> Y = np.array([0, 0, 0, 1, 1, 1])
545 |     >>> P = np.array([.1, .2, .3, .7, .8, .9])
546 |     >>> print(round(binary_ECE(Y, P, bins=2), 8))
547 |     0.2
548 |     >>> Y = np.array([0, 0, 0, 1, 1, 1])
549 |     >>> P = np.array([.4, .4, .4, .6, .6, .6])
550 |     >>> print(round(binary_ECE(Y, P, bins=2), 8))
551 |     0.4
552 |     """
553 |     idx = np.digitize(probs, np.linspace(0, 1 + 1e-8, bins + 1)) - 1
554 | 
555 |     def bin_func(y, p, idx):
556 |         return ((np.abs(np.mean(p[idx]) - np.mean(y[idx])) ** power)
557 |                 * np.sum(idx) / len(p))
558 | 
559 |     ece = 0
560 |     for i in np.unique(idx):
561 |         # print('Mean scores', np.mean(probs[idx == i]))
562 |         # print('True proportion', np.mean(y_true[idx == i]))
563 |         # print('Difference ', np.abs(np.mean(probs[idx == i])
564 |         #                      - np.mean(y_true[idx == i])))
565 |         ece += bin_func(y_true, probs, idx == i)
566 |     return ece
567 | 
568 | 
569 | def classwise_ECE(y_true, probs, power=1, bins=15):
570 |     r"""Classwise Expected Calibration Error
571 | 
572 |     .. math::
573 | 
574 |         \text{class-$j$-ECE}  = \sum_{i=1}^M \frac{|B_{i,j}|}{N}
575 |         |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|,
576 | 
577 |         \text{classwise-ECE}  = \frac{1}{K}\sum_{j=1}^K \text{class-$j$-ECE}
578 | 
579 |     Parameters
580 |     ----------
581 |     y_true : label indicator matrix (n_samples, n_classes)
582 |         True labels.
583 |         # TODO Add option to pass array with shape (n_samples, )
584 | 
585 |     probs : matrix (n_samples, n_classes)
586 |         Predicted probabilities.
587 | 
588 |     Returns
589 |     -------
590 |     score : float
591 | 
592 |     Examples
593 |     --------
594 |     >>> from pycalib.metrics import classwise_ECE
595 |     >>> Y = np.array([[1, 0], [0, 1]]).T
596 |     >>> P = np.array([[0.9, 0.1], [0.1, 0.9]]).T
597 |     >>> print(round(classwise_ECE(Y, P, bins=2), 8))
598 |     0.1
599 |     >>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T
600 |     >>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]]).T
601 |     >>> print(round(classwise_ECE(Y, P, bins=2), 8))
602 |     0.2
603 |     """
604 |     probs = np.array(probs)
605 |     if not np.array_equal(probs.shape, y_true.shape):
606 |         y_true = label_binarize(np.array(y_true),
607 |                                 classes=range(probs.shape[1]))
608 | 
609 |     n_classes = probs.shape[1]
610 | 
611 |     return np.mean(
612 |         [
613 |             binary_ECE(
614 |                 y_true[:, c].astype(float), probs[:, c], power=power, bins=bins
615 |             ) for c in range(n_classes)
616 |         ]
617 |     )
618 | 
619 | 
620 | def classwise_MCE(y_true, probs, bins=15):
621 |     r"""Classwise Maximum Calibration Error
622 | 
623 |     .. math::
624 | 
625 |         \text{class-$j$-MCE}  = \max_{i \in {1, ..., M}}
626 |         |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|,
627 | 
628 |         \text{classwise-MCE}  = \max_{j \in {1, ..., K}} \text{class-$j$-MCE}
629 | 
630 |     Parameters
631 |     ----------
632 |     y_true : label indicator matrix (n_samples, n_classes)
633 |         True labels.
634 |         # TODO Add option to pass array with shape (n_samples, )
635 | 
636 |     probs : matrix (n_samples, n_classes)
637 |         Predicted probabilities.
638 | 
639 |     Returns
640 |     -------
641 |     score : float
642 | 
643 |     Examples
644 |     --------
645 |     >>> from pycalib.metrics import classwise_MCE
646 |     >>> Y = np.array([[1, 0], [0, 1]]).T
647 |     >>> P = np.array([[0.8, 0.1], [0.2, 0.9]]).T
648 |     >>> print(round(classwise_MCE(Y, P, bins=2), 8))
649 |     0.2
650 |     >>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T
651 |     >>> P = np.array([[.8, .7, .6, .1, .1, .1], [.2, .3, .4, .9, .9, .9]]).T
652 |     >>> print(round(classwise_MCE(Y, P, bins=2), 8))
653 |     0.3
654 |     """
655 |     probs = np.array(probs)
656 |     if not np.array_equal(probs.shape, y_true.shape):
657 |         y_true = label_binarize(np.array(y_true),
658 |                                 classes=range(probs.shape[1]))
659 | 
660 |     n_classes = probs.shape[1]
661 | 
662 |     return np.max(
663 |         [
664 |             binary_MCE(
665 |                 y_true[:, c].astype(float), probs[:, c], bins=bins
666 |             ) for c in range(n_classes)
667 |         ]
668 |     )
669 | 
670 | 
671 | def simplex_binning(y_true, probs, bins=15):
672 | 
673 |     probs = np.array(probs)
674 |     if not np.array_equal(probs.shape, y_true.shape):
675 |         y_true = label_binarize(np.array(y_true),
676 |                                 classes=range(probs.shape[1]))
677 | 
678 |     idx = np.digitize(probs, np.linspace(0, 1, bins + 1)) - 1
679 | 
680 |     prob_bins = {}
681 |     label_bins = {}
682 | 
683 |     for i, row in enumerate(idx):
684 |         try:
685 |             prob_bins[','.join([str(r) for r in row])].append(probs[i])
686 |             label_bins[','.join([str(r) for r in row])].append(y_true[i])
687 |         except KeyError:
688 |             prob_bins[','.join([str(r) for r in row])] = [probs[i]]
689 |             label_bins[','.join([str(r) for r in row])] = [y_true[i]]
690 | 
691 |     bins = []
692 |     for key in prob_bins:
693 |         bins.append(
694 |             [
695 |                 len(prob_bins[key]),
696 |                 np.mean(np.array(prob_bins[key]), axis=0),
697 |                 np.mean(np.array(label_bins[key]), axis=0)
698 |             ]
699 |         )
700 | 
701 |     return bins
702 | 
703 | 
704 | def full_ECE(y_true, probs, bins=15, power=1):
705 |     n = len(probs)
706 | 
707 |     probs = np.array(probs)
708 |     if not np.array_equal(probs.shape, y_true.shape):
709 |         y_true = label_binarize(np.array(y_true),
710 |                                 classes=range(probs.shape[1]))
711 | 
712 |     idx = np.digitize(probs, np.linspace(0, 1, bins + 1)) - 1
713 | 
714 |     filled_bins = np.unique(idx, axis=0)
715 | 
716 |     s = 0
717 |     for bin in filled_bins:
718 |         i = np.where((idx == bin).all(axis=1))[0]
719 |         s += (len(i)/n) * (
720 |             np.abs(np.mean(probs[i], axis=0) - np.mean(y_true[i],
721 |                                                        axis=0))**power
722 |         ).sum()
723 | 
724 |     return s
725 | 
726 | 
727 | # TODO: Speed up computation.
728 | def _label_resampling(probs):
729 |     c = probs.cumsum(axis=1)
730 |     u = np.random.rand(len(c), 1)
731 |     choices = (u < c).argmax(axis=1)
732 |     y = np.zeros_like(probs)
733 |     y[range(len(probs)), choices] = 1
734 |     return y
735 | 
736 | 
737 | # Speed up of the previous label_resampling function
738 | def get_one_hot(targets, nb_classes):
739 |     res = np.eye(nb_classes)[np.array(targets).reshape(-1)]
740 |     return res.reshape(list(targets.shape)+[nb_classes])
741 | 
742 | 
743 | def _label_resampling_v2(probs):
744 |     c = probs.cumsum(axis=1)
745 |     u = np.random.rand(len(c), 1)
746 |     choices = (u < c).argmax(axis=1)
747 |     y = get_one_hot(choices, probs.shape[1])
748 |     return y
749 | 
750 | 
751 | # TODO: Speed up computation.
752 | def _score_sampling(probs, samples=10000, ece_function=None):
753 | 
754 |     probs = np.array(probs)
755 | 
756 |     return np.array(
757 |         [
758 |             ece_function(_label_resampling_v2(probs), probs) for sample in
759 |             range(samples)
760 |         ]
761 |     )
762 | 
763 | 
764 | # This uses all available CPUS reducing the time by this factor
765 | def _score_sampling_v2(probs, samples=10000, ece_function=None,
766 |                        processes=None):
767 | 
768 |     probs = np.array(probs)
769 | 
770 |     pool = multiprocessing.Pool(processes=processes)
771 | 
772 |     probs_list = [probs for i in range(samples)]
773 |     labels_sampled = pool.map(_label_resampling_v2, probs_list)
774 | 
775 |     return np.array(pool.starmap(ece_function, zip(labels_sampled,
776 |                                                    probs_list)))
777 | 
778 | 
779 | def pECE(y_true, probs, samples=10000, ece_function=full_ECE, processes=None):
780 | 
781 |     probs = np.array(probs)
782 |     if not np.array_equal(probs.shape, y_true.shape):
783 |         y_true = label_binarize(np.array(y_true),
784 |                                 classes=range(probs.shape[1]))
785 | 
786 |     return 1 - (
787 |         percentileofscore(
788 |             _score_sampling_v2(
789 |                 probs,
790 |                 samples=samples,
791 |                 ece_function=ece_function,
792 |                 processes=processes),
793 |             ece_function(y_true, probs)
794 |         ) / 100.0
795 |     )
796 | 


--------------------------------------------------------------------------------
/pycalib/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .calibrators import (BetaCalibration,
2 |                           IsotonicCalibration,
3 |                           LogisticCalibration,
4 |                           SigmoidCalibration,
5 |                           BinningCalibration,
6 |                           CalibratedModel)
7 | 


--------------------------------------------------------------------------------
/pycalib/models/calibrators.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from copy import deepcopy
  4 | 
  5 | from scipy.special import expit
  6 | 
  7 | from sklearn.base import clone
  8 | 
  9 | from sklearn.utils import check_X_y, indexable
 10 | from sklearn.linear_model import LogisticRegression
 11 | from sklearn.calibration import _SigmoidCalibration
 12 | from sklearn.metrics import log_loss
 13 | 
 14 | import warnings
 15 | 
 16 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
 17 | from sklearn.preprocessing import LabelBinarizer
 18 | from sklearn.utils import check_array
 19 | from sklearn.utils.validation import check_is_fitted
 20 | from inspect import signature
 21 | from sklearn.isotonic import IsotonicRegression
 22 | from sklearn.svm import LinearSVC
 23 | from sklearn.model_selection import check_cv
 24 | 
 25 | from betacal import BetaCalibration
 26 | 
 27 | from typing import Literal
 28 | 
 29 | 
 30 | class _DummyCalibration(BaseEstimator, RegressorMixin):
 31 |     """Dummy Calibration model. The purpose of this class is to give
 32 |     the CalibratedClassifierCV class the option to just return the
 33 |     probabilities of the base classifier.
 34 |     """
 35 |     def fit(self, *args, **kwargs):
 36 |         """Does nothing"""
 37 |         return self
 38 | 
 39 |     def predict_proba(self, T):
 40 |         """Return the probabilities of the base classifier"""
 41 |         return T
 42 | 
 43 |     def predict(self, scores, *args, **kwargs):
 44 |         proba = self.predict_proba(scores, *args, **kwargs)
 45 |         return proba.argmax(axis=1)
 46 | 
 47 | 
 48 | class IsotonicCalibration(IsotonicRegression):
 49 |     def __init__(self):
 50 |         super(IsotonicCalibration, self).__init__(y_min=0.0, y_max=1.0,
 51 |                                                   increasing=True,
 52 |                                                   out_of_bounds='clip')
 53 | 
 54 |     def fit(self, scores, y, *args, **kwargs):
 55 |         '''
 56 |         Score=0 corresponds to y=0, and score=1 to y=1
 57 |         Parameters
 58 |         ----------
 59 |         scores : array-like, shape = [n_samples,]
 60 |             Data.
 61 |         y : array-like, shape = [n_samples, ]
 62 |             Labels.
 63 |         Returns
 64 |         -------
 65 |         self
 66 |         '''
 67 |         if len(scores.shape) > 1:
 68 |             scores = scores[:, 1]
 69 |         return super(IsotonicCalibration, self).fit(scores, y, *args, **kwargs)
 70 | 
 71 |     def predict_proba(self, scores, *args, **kwargs):
 72 |         if len(scores.shape) > 1:
 73 |             scores = scores[:, 1]
 74 |         transformed = self.transform(scores, *args, **kwargs)
 75 |         if len(transformed.shape) == 1:
 76 |             transformed = np.vstack((1 - transformed, transformed)).T
 77 |         return transformed
 78 | 
 79 |     def predict(self, scores, *args, **kwargs):
 80 |         proba = self.predict_proba(scores, *args, **kwargs)
 81 |         return proba.argmax(axis=1)
 82 | 
 83 | 
 84 | def logit(x):
 85 |     eps = np.finfo(x.dtype).eps
 86 |     x = np.clip(x, eps, 1-eps)
 87 |     return np.log(x/(1 - x))
 88 | 
 89 | 
 90 | def log_encode(x):
 91 |     eps = np.finfo(x.dtype).eps
 92 |     x = np.clip(x, eps, 1)
 93 |     return np.log(x)
 94 | 
 95 | 
 96 | class LogisticCalibration(LogisticRegression):
 97 |     """Probability calibration with Logistic Regression aka Platt's scaling
 98 | 
 99 |     Parameters
100 |     ----------
101 |     C: integer
102 |     solver: str 'lbfgs'
103 |     multi_class: str 'multinomial'
104 |     log_transform: boolean True
105 | 
106 |     Attributes
107 |     ----------
108 |     classes_ : array, shape (n_classes)
109 |         The class labels.
110 | 
111 |     calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit")
112 |         The list of calibrated classifiers, one for each cross-validation fold,
113 |         which has been fitted on all but the validation fold and calibrated
114 |         on the validation fold.
115 | 
116 |     References
117 |     ----------
118 |     .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to
119 |            Regularized Likelihood Methods, J. Platt, (1999)
120 |     """
121 |     def __init__(self, C=1.0, solver='lbfgs', multi_class='multinomial',
122 |                  log_transform=True):
123 |         self.C_grid = C
124 |         self.C = C if isinstance(C, float) else C[0]
125 |         self.solver = solver
126 |         self.log_transform = log_transform
127 |         self.encode = log_encode if log_transform else logit
128 |         self.multiclass = multi_class
129 |         super(LogisticCalibration, self).__init__(C=C, solver=solver,
130 |                                                   multi_class=multi_class)
131 | 
132 |     def fit(self, scores, y, X_val=None, y_val=None, *args, **kwargs):
133 |         if isinstance(self.C_grid, list):
134 |             calibrators = []
135 |             losses = np.zeros(len(self.C_grid))
136 |             for i, C in enumerate(self.C_grid):
137 |                 cal = LogisticCalibration(C=C, solver=self.solver,
138 |                                           multi_class=self.multi_class,
139 |                                           log_transform=self.log_transform)
140 |                 cal.fit(scores, y)
141 |                 losses[i] = log_loss(y_val, cal.predict_proba(X_val))
142 |                 calibrators.append(cal)
143 |             best_idx = int(losses.argmin())
144 |             self.C = calibrators[best_idx].C
145 |         return super(LogisticCalibration, self).fit(self.encode(scores), y,
146 |                                                     *args, **kwargs)
147 | 
148 |     def predict_proba(self, scores, *args, **kwargs):
149 |         return super(LogisticCalibration,
150 |                      self).predict_proba(self.encode(scores), *args, **kwargs)
151 | 
152 |     def predict(self, scores, *args, **kwargs):
153 |         return super(LogisticCalibration, self).predict(self.encode(scores),
154 |                                                         *args, **kwargs)
155 | 
156 | 
157 | class SigmoidCalibration(_SigmoidCalibration):
158 |     def fit(self, scores, y, *args, **kwargs):
159 |         if len(scores.shape) > 1:
160 |             scores = scores[:, 1]
161 |         return super(SigmoidCalibration, self).fit(scores, y, *args, **kwargs)
162 | 
163 |     def predict_proba(self, scores, *args, **kwargs):
164 |         if len(scores.shape) > 1:
165 |             scores = scores[:, 1]
166 |         transformed = super(SigmoidCalibration, self).predict(scores, *args,
167 |                                                               **kwargs)
168 |         return np.vstack((1 - transformed, transformed)).T
169 | 
170 |     def predict(self, *args, **kwargs):
171 |         proba = self.predict_proba(*args, **kwargs)
172 |         return proba.argmax(axis=1)
173 | 
174 | 
175 | class BinningCalibration(BaseEstimator, RegressorMixin):
176 |     """Probability calibration with Binning calibration.
177 | 
178 |     Parameters
179 |     ----------
180 |     n_bins: integer or list of integers
181 |         If integer, the number of bins to create in the score space in order to compute the
182 |         true fraction of positives during the training.
183 |         If a list of integers, a BinningCalibration method will be fitted for
184 |         each number of bins, and the best calibrator evaluated with the
185 |         validation set will be selected as final calibrator.
186 | 
187 | 
188 |     strategy: str {'uniform', 'quantile', 'kmeans'}
189 |         If uniform: for equal width bins
190 |         If quantile: for equal frequency bins
191 |         If kmeans: for each bin with same nearest center to a 1D k-means
192 | 
193 |     alpha: float
194 |         Laplace smoothing (x + a)/(N + 2a)
195 | 
196 |     Attributes
197 |     ----------
198 |     classes_ : array, shape (n_classes)
199 |         The class labels.
200 | 
201 |     calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit")
202 |         The list of calibrated classifiers, one for each cross-validation fold,
203 |         which has been fitted on all but the validation fold and calibrated
204 |         on the validation fold.
205 | 
206 |     References
207 |     ----------
208 |     .. [1] Obtaining calibrated probability estimates from decision trees
209 |            and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001
210 |     """
211 |     _STRATEGIES = Literal["uniform", "quantile", "kmeans"]
212 | 
213 |     def __init__(self, n_bins=10, strategy: _STRATEGIES = 'uniform', alpha=1.0):
214 |         self.strategy = strategy
215 |         self.n_bins = n_bins
216 |         self.n_bins_grid = n_bins
217 |         self.alpha = alpha
218 | 
219 |     def fit(self, scores, y, X_val=None, y_val=None, *args, **kwargs):
220 |         '''
221 |         Score=0 corresponds to y=0, and score=1 to y=1
222 |         Parameters
223 |         ----------
224 |         scores : array-like, shape = [n_samples,]
225 |             Data.
226 |         y : array-like, shape = [n_samples, ]
227 |             Labels.
228 |         Returns
229 |         -------
230 |         self
231 |         '''
232 |         if isinstance(self.n_bins, list):
233 |             if X_val is None or y_val is None:
234 |                 raise ValueError(('If n_bins is a list, scores_val and y_val'
235 |                                   'are required during fit'))
236 |             calibrators = []
237 |             losses = []
238 |             for n_bins in self.n_bins:
239 |                 cal = BinningCalibration(n_bins=n_bins, strategy=self.strategy,
240 |                                          alpha=self.alpha)
241 |                 cal.fit(scores, y)
242 |                 predict = cal.predict_proba(X_val)
243 |                 losses.append(log_loss(y_val, predict))
244 |                 calibrators.append(cal)
245 |             best_idx = np.argmin(losses)
246 |             self.n_bins = calibrators[best_idx].n_bins
247 |             self.bins = calibrators[best_idx].bins
248 |             self.predictions = calibrators[best_idx].predictions
249 |             return self
250 | 
251 |         if len(np.shape(scores)) > 1:
252 |             scores = scores[:, 1]
253 |         # TODO check that this code is correct:
254 |         if self.strategy == 'quantile':
255 |             self.bins = np.sort(scores)[::int(np.ceil(len(scores)
256 |                                                       / self.n_bins))]
257 |             self.bins = np.hstack([self.bins, scores[-1]])
258 |         elif self.strategy == 'uniform':
259 |             self.bins = np.linspace(scores.min(), scores.max(), self.n_bins+1)
260 |         else:
261 |             raise ValueError('Strategy {} not implemented'.format(
262 |                                  self.strategy))
263 |         self.bins[0] = - np.inf
264 |         self.bins[-1] = np.inf
265 |         s_binned = np.digitize(scores, self.bins) - 1
266 |         self.predictions = np.zeros(self.n_bins)
267 |         for i in range(self.n_bins):
268 |             self.predictions[i] = ((np.sum(y[s_binned == i]) + self.alpha)
269 |                                    / (np.sum(s_binned == i) + self.alpha*2))
270 | 
271 |         return self
272 | 
273 |     def predict_proba(self, scores, *args, **kwargs):
274 |         if len(np.shape(scores)) > 1:
275 |             scores = scores[:, 1]
276 |         s_binned = np.digitize(scores, self.bins) - 1
277 |         transformed = self.predictions[s_binned]
278 |         return np.vstack((1 - transformed, transformed)).T
279 | 
280 |     def predict(self, scores, *args, **kwargs):
281 |         proba = self.predict_proba(scores, *args, **kwargs)
282 |         return proba.argmax(axis=1)
283 | 
284 | 
285 | class CalibratedModel(BaseEstimator, ClassifierMixin):
286 |     ''' Initialize a Calibrated model (classifier + calibrator)
287 | 
288 |     Parameters
289 |     ----------
290 |     base_estimator : instance BaseEstimator
291 |         The classifier whose output decision function needs to be calibrated
292 |         to offer more accurate predict_proba outputs. If cv=prefit, the
293 |         classifier must have been fit already on data.
294 | 
295 |     calibrator : instance BaseEstimator
296 |         The calibrator to use.
297 |     '''
298 |     def __init__(self, base_estimator=None, calibrator=None,
299 |                  fit_estimator=True):
300 |         self.calibrator = clone(calibrator)
301 |         self.base_estimator = deepcopy(base_estimator)
302 |         self.fit_estimator = fit_estimator
303 |         self.binary = False
304 | 
305 |     def fit(self, X, y, X_val=None, y_val=None, *args, **kwargs):
306 |         """Fit the calibrated model
307 | 
308 |         Parameters
309 |         ----------
310 |         X : array-like, shape (n_samples, n_features)
311 |             Training data.
312 | 
313 |         y : array-like, shape (n_samples, n_classes)
314 |             Target values.
315 | 
316 |         Returns
317 |         -------
318 |         self : object
319 |             Returns an instance of self.
320 |         """
321 | 
322 |         X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'],
323 |                          multi_output=True)
324 |         X, y = indexable(X, y)
325 | 
326 |         if self.fit_estimator:
327 |             self.base_estimator.fit(X, y)
328 | 
329 |         scores = self.base_estimator.predict_proba(X)
330 | 
331 |         if scores.shape[1] == 2:
332 |             self.binary = True
333 | 
334 |         if self.binary:
335 |             try:
336 |                 self.calibrator.fit(scores, y, *args, **kwargs)
337 |             except ValueError:
338 |                 self.calibrator.fit(scores[:, 1], y, *args, **kwargs)
339 |         else:
340 |             self.calibrator.fit(scores, y, *args, **kwargs)
341 | 
342 |         return self
343 | 
344 |     def predict_proba(self, X):
345 |         """Posterior probabilities of classification
346 | 
347 |         This function returns posterior probabilities of classification
348 |         according to each class on an array of test vectors X.
349 | 
350 |         Parameters
351 |         ----------
352 |         X : array-like, shape (n_samples, n_features)
353 |             The samples.
354 | 
355 |         Returns
356 |         -------
357 |         C : array, shape (n_samples, n_classes)
358 |             The predicted probas. Can be exact zeros.
359 |         """
360 | 
361 |         scores = self.base_estimator.predict_proba(X)
362 | 
363 |         if self.binary:
364 |             try:
365 |                 predictions = self.calibrator.predict_proba(scores)
366 |             except ValueError:
367 |                 predictions = self.calibrator.predict_proba(scores[:, 1])
368 | 
369 |             if (len(predictions.shape) == 1) or (predictions.shape[1] == 1):
370 |                 predictions = np.vstack((1 - predictions, predictions)).T
371 |         else:
372 |             predictions = self.calibrator.predict_proba(scores)
373 | 
374 |         return predictions
375 | 
376 |     def predict(self, X):
377 |         """Predict the target of new samples. Can be different from the
378 |         prediction of the uncalibrated classifier.
379 | 
380 |         Parameters
381 |         ----------
382 |         X : array-like, shape (n_samples, n_features)
383 |             The samples.
384 | 
385 |         Returns
386 |         -------
387 |         C : array, shape (n_samples,)
388 |             The predicted class.
389 |         """
390 |         check_is_fitted(self, ["calibrator"])
391 | 
392 |         return np.argmax(self.predict_proba(X), axis=1)
393 | 


--------------------------------------------------------------------------------
/pycalib/models/multiclass.py:
--------------------------------------------------------------------------------
  1 | # All this code has been adapted from  scikit-learn.sklearn.multiclass
  2 | # The following is the COPYING clause from Scikit-learn
  3 | #
  4 | # BSD 3-Clause License
  5 | #
  6 | # Copyright (c) 2007-2020 The scikit-learn developers.
  7 | # All rights reserved.
  8 | #
  9 | # Redistribution and use in source and binary forms, with or without
 10 | # modification, are permitted provided that the following conditions are met:
 11 | #
 12 | # * Redistributions of source code must retain the above copyright notice, this
 13 | #   list of conditions and the following disclaimer.
 14 | #
 15 | # * Redistributions in binary form must reproduce the above copyright notice,
 16 | #   this list of conditions and the following disclaimer in the documentation
 17 | #   and/or other materials provided with the distribution.
 18 | #
 19 | # * Neither the name of the copyright holder nor the names of its
 20 | #   contributors may be used to endorse or promote products derived from
 21 | #   this software without specific prior written permission.
 22 | #
 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 24 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 25 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 26 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 27 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 28 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 29 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 30 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 31 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 32 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 33 | # POSSIBILITY OF SUCH DAMAGE.
 34 | import inspect
 35 | import numpy as np
 36 | 
 37 | from sklearn.base import BaseEstimator, ClassifierMixin, clone
 38 | from sklearn.preprocessing import LabelBinarizer
 39 | 
 40 | from joblib import Parallel
 41 | from joblib import delayed
 42 | 
 43 | from sklearn.multiclass import _ConstantPredictor
 44 | 
 45 | from sklearn.utils.metaestimators import if_delegate_has_method
 46 | from sklearn.utils.validation import check_is_fitted
 47 | 
 48 | import warnings
 49 | 
 50 | 
 51 | def _fit_binary(estimator, X, y, X_val=None, y_val=None, classes=None):
 52 |     """Fit a single binary estimator."""
 53 |     unique_y = np.unique(y)
 54 |     if len(unique_y) == 1:
 55 |         if classes is not None:
 56 |             if y[0] == -1:
 57 |                 c = 0
 58 |             else:
 59 |                 c = y[0]
 60 |             warnings.warn("Label %s is present in all training examples." %
 61 |                           str(classes[c]))
 62 |         estimator = _ConstantPredictor().fit(X, unique_y)
 63 |     else:
 64 |         estimator = clone(estimator)
 65 |         if X_val is not None and y_val is not None:
 66 |             estimator.fit(X, y, X_val=X_val, y_val=y_val)
 67 |         else:
 68 |             estimator.fit(X, y)
 69 |     return estimator
 70 | 
 71 | 
 72 | class OneVsRestCalibrator(BaseEstimator, ClassifierMixin):
 73 |     """One-vs-the-rest (OvR) multiclass/multilabel strategy
 74 | 
 75 |     Also known as one-vs-all, this strategy consists in fitting one calibrator
 76 |     per class. For each classifier, the class is fitted against all the other
 77 |     classes. In addition to its computational efficiency (only `n_classes`
 78 |     classifiers are needed), one advantage of this approach is its
 79 |     interpretability. Since each class is represented by one and one classifier
 80 |     only, it is possible to gain knowledge about the class by inspecting its
 81 |     corresponding classifier. This is the most commonly used strategy for
 82 |     multiclass classification and is a fair default choice.
 83 |     This strategy can also be used for multilabel learning, where a classifier
 84 |     is used to predict multiple labels for instance, by fitting on a 2-d matrix
 85 |     in which cell [i, j] is 1 if sample i has label j and 0 otherwise.
 86 |     In the multilabel learning literature, OvR is also known as the binary
 87 |     relevance method.
 88 |     Read more in the :ref:`User Guide <ovr_classification>`.
 89 |     Parameters
 90 |     ----------
 91 |     estimator : estimator object
 92 |         An estimator object implementing `fit` and one of `decision_function`
 93 |         or `predict_proba`.
 94 |     n_jobs : int, optional, default: 1
 95 |         The number of jobs to use for the computation. If -1 all CPUs are used.
 96 |         If 1 is given, no parallel computing code is used at all, which is
 97 |         useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are
 98 |         used. Thus for n_jobs = -2, all CPUs but one are used.
 99 |     Attributes
100 |     ----------
101 |     estimators_ : list of `n_classes` estimators
102 |         Estimators used for predictions.
103 |     classes_ : array, shape = [`n_classes`]
104 |         Class labels.
105 |     label_binarizer_ : LabelBinarizer object
106 |         Object used to transform multiclass labels to binary labels and
107 |         vice-versa.
108 |     multilabel_ : boolean
109 |         Whether a OneVsRestClassifier is a multilabel classifier.
110 |     """
111 |     def __init__(self, estimator, n_jobs=1, normalize=True):
112 |         self.estimator = estimator
113 |         self.n_jobs = n_jobs
114 |         self.normalize = normalize
115 | 
116 |     def fit(self, X, y, X_val=None, y_val=None, **kwargs):
117 |         """Fit underlying estimators.
118 | 
119 |         If the number of classes = 2, only one model is trained to predict the
120 |         class 1 (second column)
121 |         Parameters
122 |         ----------
123 |         X : (sparse) array-like, shape = [n_samples, n_classes]
124 |             Data.
125 |         y : (sparse) array-like, shape = [n_samples, ], [n_samples, n_classes]
126 |             Multi-class targets. An indicator matrix turns on multilabel
127 |             classification.
128 |         Returns
129 |         -------
130 |         self
131 |         """
132 |         # A sparse LabelBinarizer, with sparse_output=True, has been shown to
133 |         # outpreform or match a dense label binarizer in all cases and has also
134 |         # resulted in less or equal memory consumption in the fit_ovr function
135 |         # overall.
136 |         if X.shape[1] == 2:
137 |             x_columns = (X[:, 1].ravel().T, )
138 |         else:
139 |             x_columns = (col.ravel() for col in X.T)
140 | 
141 |         self.label_binarizer_ = LabelBinarizer(sparse_output=True)
142 |         Y = self.label_binarizer_.fit_transform(y)
143 |         Y = Y.tocsc()
144 |         self.classes_ = self.label_binarizer_.classes_
145 |         y_columns = (col.toarray().ravel() for col in Y.T)
146 | 
147 |         if 'X_val' in inspect.getargspec(self.estimator.fit).args \
148 |                 and X_val is not None:
149 |             if X_val.shape[1] == 2:
150 |                 x_val_columns = (X_val[:, 1].ravel().T, )
151 |             else:
152 |                 x_val_columns = (col.ravel() for col in X_val.T)
153 | 
154 |             Y_val = self.label_binarizer_.transform(y_val)
155 |             Y_val = Y_val.tocsc()
156 |             y_val_columns = (col.toarray().ravel() for col in Y_val.T)
157 |         else:
158 |             x_val_columns = [None]*np.shape(Y)[0]
159 |             y_val_columns = [None]*np.shape(Y)[0]
160 | 
161 |         # In cases where individual estimators are very fast to train setting
162 |         # n_jobs > 1 in can results in slower performance due to the overhead
163 |         # of spawning threads.  See joblib issue #112.
164 |         self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_binary)(
165 |             self.estimator, x_col, y_col, x_val_col, y_val_col,
166 |             classes=["not %s" % self.label_binarizer_.classes_[i],
167 |                      self.label_binarizer_.classes_[i]])
168 |             for i, (x_col, y_col, x_val_col, y_val_col) in enumerate(
169 |                 zip(x_columns, y_columns, x_val_columns, y_val_columns)))
170 | 
171 |         return self
172 | 
173 |     @if_delegate_has_method(['_first_estimator', 'estimator'])
174 |     def predict_proba(self, X):
175 |         """Probability estimates.
176 |         The returned estimates for all classes are ordered by label of classes.
177 |         Note that in the multilabel case, each sample can have any number of
178 |         labels. This returns the marginal probability that the given sample has
179 |         the label in question. For example, it is entirely consistent that two
180 |         labels both have a 90% probability of applying to a given sample.
181 |         In the single label multiclass case, the rows of the returned matrix
182 |         sum to 1.
183 |         Parameters
184 |         ----------
185 |         X : array-like, shape = [n_samples, n_features]
186 |         Returns
187 |         -------
188 |         T : (sparse) array-like, shape = [n_samples, n_classes]
189 |             Returns the probability of the sample for each class in the model,
190 |             where classes are ordered as they are in `self.classes_`.
191 |         """
192 |         check_is_fitted(self, 'estimators_')
193 |         # Y[i, j] gives the probability that sample i has the label j.
194 |         # In the multi-label case, these are not disjoint.
195 |         if X.shape[1] == 2:
196 |             x_columns = (X[:, 1].ravel().T, )
197 |         else:
198 |             x_columns = (col.ravel() for col in X.T)
199 | 
200 |         # Removed indexing as follows: e.predict_proba(x_column)[:, 1]
201 |         Y = np.array([e.predict_proba(x_column)
202 |                       for (e, x_column) in zip(self.estimators_, x_columns)]).T
203 | 
204 |         if len(self.estimators_) == 1:
205 |             # Only one estimator, but we still want to return probabilities
206 |             # for two classes.
207 |             Y = np.concatenate(((1 - Y), Y), axis=1)
208 | 
209 |         if not self.multilabel_:
210 |             # Then, probabilities should be normalized to 1.
211 |             Y /= np.sum(Y, axis=1)[:, np.newaxis]
212 |             # Change all columns to zero for a uniform prediction
213 |             Y[np.isnan(Y)] = 1/Y.shape[1]
214 | 
215 |         return Y
216 | 
217 |     @property
218 |     def multilabel_(self):
219 |         """Whether this is a multilabel classifier"""
220 |         return self.label_binarizer_.y_type_.startswith('multilabel')
221 | 


--------------------------------------------------------------------------------
/pycalib/stats.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | import numpy as np
 3 | import pandas as pd
 4 | 
 5 | from functools import partial
 6 | from scipy.stats import ranksums
 7 | from scipy.stats import mannwhitneyu
 8 | from scipy.stats import friedmanchisquare
 9 | 
10 | 
11 | TestResult = namedtuple("TestResult", ["statistic", "p_value"])
12 | 
13 | 
14 | def compute_friedmanchisquare(table: pd.DataFrame) -> TestResult:
15 |     """ Compute Friedman test for repeated samples
16 | 
17 |     Example:
18 |         - n wine judges each rate k different wines. Are any of the k wines
19 |         ranked consistently higher or lower than the others?
20 | 
21 |     Our Calibration case:
22 |         - n datasets each rate k different calibration methods. Are any of the
23 |         k calibration methods ranked consistently higher or lower than the
24 |         others?
25 | 
26 |     This will output a statistic and a p-value
27 |     SciPy does the following:
28 |         - k: is the number of parameters passed to the function
29 |         - n: is the length of each array passed to the function
30 |     The two options for the given table are:
31 |         - k is the datasets: table['mean'].values.tolist()
32 |         - k is the calibration methods: table['mean'].T.values.tolist()
33 |     """
34 |     if table.shape[1] < 3:
35 |         print('Friedman test not appropriate for less than 3 methods')
36 |         return TestResult(np.nan, np.nan)
37 | 
38 |     statistic, p = friedmanchisquare(*table.T.values)
39 |     return TestResult(statistic, p)
40 | 
41 | 
42 | def paired_test(table, stats_func=ranksums):
43 |     measure = table.columns.levels[0].values[0]
44 |     pvalues = np.zeros((table.columns.shape[0], table.columns.shape[0]))
45 |     statistics = np.zeros_like(pvalues)
46 |     for i, method_i in enumerate(table.columns.levels[1]):
47 |         for j, method_j in enumerate(table.columns.levels[1]):
48 |             sample_i = table[measure, method_i]
49 |             sample_j = table[measure, method_j]
50 |             statistic, pvalue = stats_func(sample_i, sample_j)
51 |             pvalues[i, j] = pvalue
52 |             statistics[i, j] = statistic
53 |     index = pd.MultiIndex.from_product([table.columns.levels[1],
54 |                                         ['statistic']])
55 |     df_statistics = pd.DataFrame(statistics,
56 |                                  index=table.columns.levels[1],
57 |                                  columns=index)
58 |     index = pd.MultiIndex.from_product([table.columns.levels[1],
59 |                                         ['pvalue']])
60 |     df_pvalues = pd.DataFrame(pvalues,
61 |                               index=table.columns.levels[1],
62 |                               columns=index)
63 |     return df_statistics.join(df_pvalues)
64 | 
65 | 
66 | def compute_ranksums(table):
67 |     return paired_test(table, stats_func=ranksums)
68 | 
69 | 
70 | def compute_mannwhitneyu(table):
71 |     return paired_test(table, stats_func=partial(mannwhitneyu,
72 |                                                  alternative='less'))
73 | 


--------------------------------------------------------------------------------
/pycalib/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/classifier-calibration/PyCalib/8208ab907d5b9c5149b2d45b1c8b6e4b2d763317/pycalib/tests/__init__.py


--------------------------------------------------------------------------------
/pycalib/tests/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/classifier-calibration/PyCalib/8208ab907d5b9c5149b2d45b1c8b6e4b2d763317/pycalib/tests/models/__init__.py


--------------------------------------------------------------------------------
/pycalib/tests/models/test_init.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy as np
 3 | from sklearn.linear_model import LogisticRegression
 4 | from sklearn.datasets import make_blobs
 5 | from pycalib.models import (IsotonicCalibration, LogisticCalibration,
 6 |                             BinningCalibration, SigmoidCalibration,
 7 |                             CalibratedModel)
 8 | from numpy.testing import assert_array_equal
 9 | 
10 | 
11 | class TestIsotonicCalibration(unittest.TestCase):
12 |     def test_fit_predict(self):
13 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
14 |         Y = np.array([1, 0])
15 |         cal = IsotonicCalibration()
16 |         cal.fit(S, Y)
17 |         pred = cal.predict(S)
18 |         assert_array_equal(Y, pred)
19 | 
20 | 
21 | class TestLogisticCalibration(unittest.TestCase):
22 |     def test_fit_predict(self):
23 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
24 |         Y = np.array([1, 0])
25 |         cal = LogisticCalibration()
26 |         cal.fit(S, Y)
27 |         pred = cal.predict(S)
28 |         assert_array_equal(Y, pred)
29 | 
30 | 
31 | class TestBinningCalibration(unittest.TestCase):
32 |     def test_fit_predict(self):
33 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
34 |         Y = np.array([1, 0])
35 |         cal = BinningCalibration()
36 |         cal.fit(S, Y)
37 |         pred = cal.predict(S)
38 |         assert_array_equal(Y, pred)
39 | 
40 | 
41 | class TestSigmoidCalibration(unittest.TestCase):
42 |     def test_fit_predict(self):
43 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
44 |         Y = np.array([1, 0])
45 |         cal = SigmoidCalibration()
46 |         cal.fit(S, Y)
47 |         pred = cal.predict(S)
48 |         assert_array_equal(Y, pred)
49 | 
50 | 
51 | class TestCalibratedModel(unittest.TestCase):
52 |     def test_fit_predict(self):
53 |         X, Y = make_blobs(n_samples=10000, centers=5, n_features=2,
54 |                           random_state=42)
55 |         Y = (Y > 2).astype(int)
56 |         cal = CalibratedModel(LogisticRegression(), IsotonicCalibration())
57 |         cal.fit(X, Y)
58 | 
59 |         pred = cal.predict(X)
60 |         self.assertGreater(np.mean(Y == pred), 0.7)
61 | 
62 | 
63 | def main():
64 |     unittest.main()
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     main()
69 | 


--------------------------------------------------------------------------------
/pycalib/tests/test_metrics.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import numpy as np
  3 | from functools import partial
  4 | from pycalib.metrics import (accuracy, cross_entropy, brier_score,
  5 |                              binary_ECE, conf_ECE, classwise_ECE, full_ECE,
  6 |                              MCE, pECE)
  7 | 
  8 | from sklearn.preprocessing import label_binarize
  9 | 
 10 | 
 11 | # TODO add more test cases
 12 | class TestFunctions(unittest.TestCase):
 13 |     def test_accuracy(self):
 14 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
 15 |         Y = np.array([[0, 1], [0, 1]])
 16 |         acc = accuracy(Y, S)
 17 |         self.assertAlmostEqual(acc, 0.5)
 18 | 
 19 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
 20 |         Y = np.array([[1, 0], [0, 1]])
 21 |         acc = accuracy(Y, S)
 22 |         self.assertAlmostEqual(acc, 0.0)
 23 | 
 24 |     def test_cross_entropy(self):
 25 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
 26 |         Y = np.array([[0, 1], [0, 1]])
 27 |         ce = cross_entropy(Y, S)
 28 |         expected = - (np.log(0.9) + np.log(0.4))/2
 29 |         self.assertAlmostEqual(ce, expected)
 30 | 
 31 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
 32 |         Y = np.array([[1, 0], [0, 1]])
 33 |         ce = cross_entropy(Y, S)
 34 |         expected = - (np.log(0.1) + np.log(0.4))/2
 35 |         self.assertAlmostEqual(ce, expected)
 36 | 
 37 |     def test_brier_score(self):
 38 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
 39 |         Y = np.array([[0, 1], [0, 1]])
 40 |         bs = brier_score(Y, S)
 41 |         expected = np.mean(np.abs(S - Y)**2)
 42 |         self.assertAlmostEqual(bs, expected)
 43 | 
 44 |         S = np.array([[0.1, 0.9], [0.6, 0.4]])
 45 |         Y = np.array([[1, 0], [0, 1]])
 46 |         bs = brier_score(Y, S)
 47 |         expected = np.mean(np.abs(S - Y)**2)
 48 |         self.assertAlmostEqual(bs, expected)
 49 | 
 50 |     def test_binary_ece(self):
 51 |         S = np.array([.6, .6, .6, .6, .6, .6, .6, .6, .6, .6])
 52 |         y = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0])
 53 |         ece = binary_ECE(y, S)
 54 |         self.assertAlmostEqual(ece, 0)
 55 | 
 56 |     def test_conf_ece(self):
 57 |         S = np.array([[0.6, 0.2, 0.2]]*10)
 58 |         y = [0, 0, 0, 0, 0, 0, 1, 1, 2, 2]
 59 |         Y = label_binarize(y, classes=range(3))
 60 |         cece = conf_ECE(Y, S)
 61 |         self.assertAlmostEqual(cece, 0)
 62 |         # TODO Add more tests
 63 | 
 64 |     def test_classwise_ece(self):
 65 |         S = np.array([[0.6, 0.2, 0.2]]*10)
 66 |         Y = label_binarize([0, 0, 0, 0, 0, 0, 1, 1, 2, 2], classes=range(3))
 67 |         ece = classwise_ECE(Y, S)
 68 |         self.assertAlmostEqual(ece, 0)
 69 |         # TODO Add more tests
 70 | 
 71 |     def test_full_ece(self):
 72 |         S = np.array([[0.6, 0.2, 0.2]]*10)
 73 |         Y = label_binarize([0, 0, 0, 0, 0, 0, 1, 1, 2, 2], classes=range(3))
 74 |         ece = full_ECE(Y, S)
 75 |         self.assertAlmostEqual(ece, 0)
 76 |         # TODO Add more tests
 77 | 
 78 |     def test_conf_mce(self):
 79 |         S = np.ones((2, 3))/3.0
 80 |         y = np.array([0, 0])
 81 |         mce = MCE(y, S)
 82 |         self.assertAlmostEqual(mce, 2.0/3)
 83 | 
 84 |         y = np.array([0, 1, 2])
 85 |         S = np.array([[1/3, 0.3, 0.3],
 86 |                       [1/3, 0.3, 0.3],
 87 |                       [1/3, 0.3, 0.3]])
 88 |         mce = MCE(y, S)
 89 |         self.assertAlmostEqual(mce, 0.0)
 90 | 
 91 |         y = np.array([0, 1, 2])
 92 |         S = np.array([[0.3, 1/3, 0.3],
 93 |                       [0.3, 1/3, 0.3],
 94 |                       [0.3, 1/3, 0.3]])
 95 |         mce = MCE(y, S)
 96 |         self.assertAlmostEqual(mce, 0.0)
 97 | 
 98 |         y = np.array([0, 1, 2])
 99 |         S = np.array([[0.3, 0.3, 1/3],
100 |                       [0.3, 0.3, 1/3],
101 |                       [0.3, 0.3, 1/3]])
102 |         mce = MCE(y, S)
103 |         self.assertAlmostEqual(mce, 0.0)
104 | 
105 |         Y = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
106 |         S = np.array([[0.3, 0.3, 1/3],
107 |                       [0.3, 0.3, 1/3],
108 |                       [0.3, 0.3, 1/3]])
109 |         mce = MCE(Y, S)
110 |         self.assertAlmostEqual(mce, 0.0)
111 | 
112 |         Y = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0],
113 |                       [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]])
114 |         S = np.array([[0.4, 0.3, 0.3],  # correct
115 |                       [0.3, 0.4, 0.3],  # incorrect
116 |                       [0.3, 0.3, 0.4],  # incorrect
117 |                       [0.3, 0.3, 0.4],  # incorrect
118 | 
119 |                       [0.1, 0.7, 0.2],  # incorrect mean conf 0.75
120 |                       [0.2, 0.1, 0.7],  # incorrect
121 |                       [0.2, 0.8, 0.2],  # incorrect
122 |                       [0.8, 0.1, 0.1]   # incorrect
123 |                       ])
124 |         mce = MCE(Y, S, bins=2)
125 |         self.assertEqual(mce, 0.75)
126 | 
127 |         Y = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0],
128 |                       [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]])
129 |         S = np.array([[0.4, 0.3, 0.3],  # correct   # conf 0.4
130 |                       [0.3, 0.4, 0.3],  # incorrect
131 |                       [0.3, 0.3, 0.4],  # incorrect
132 |                       [0.3, 0.3, 0.4],  # incorrect
133 | 
134 |                       [0.1, 0.7, 0.2],  # incorrect
135 |                       [0.7, 0.1, 0.2],  # correct
136 |                       [0.8, 0.0, 0.2],  # correct
137 |                       [0.1, 0.8, 0.1]   # correct
138 |                       ])
139 |         mce = MCE(Y, S, bins=2)
140 |         self.assertAlmostEqual(mce, 0.4 - 1/4)
141 | 
142 |     def test_calibrated_p_ece(self):
143 |         p = np.random.rand(5000, 3)
144 |         p /= p.sum(axis=1)[:, None]
145 |         multinomial = partial(np.random.multinomial, 1)
146 |         y = np.apply_along_axis(multinomial, 1, p)
147 |         calibrated_pECE = pECE(y, p, samples=2000, ece_function=classwise_ECE)
148 |         # FIXME Reduce computation and increase threshold to 0.04
149 |         self.assertGreater(calibrated_pECE, 0.02)
150 |         calibrated_pECE = pECE(y, p, samples=2000, ece_function=conf_ECE)
151 |         # FIXME Reduce computation and increase threshold to 0.04
152 |         self.assertGreater(calibrated_pECE, 0.02)
153 | 
154 |     def test_uncalibrated_p_ece(self):
155 |         p = np.random.rand(1000, 3)
156 |         p /= p.sum(axis=1)[:, None]
157 |         y = np.eye(3)[np.random.choice([0, 1, 2], size=p.shape[0])]
158 |         uncalibrated_pECE = pECE(y, p, samples=1000,
159 |                                  ece_function=classwise_ECE)
160 |         self.assertLess(uncalibrated_pECE, 0.04)
161 |         uncalibrated_pECE = pECE(y, p, samples=1000, ece_function=conf_ECE)
162 |         self.assertLess(uncalibrated_pECE, 0.04)
163 | 
164 | 
165 | def main():
166 |     unittest.main()
167 | 
168 | 
169 | if __name__ == '__main__':
170 |     main()
171 | 


--------------------------------------------------------------------------------
/pycalib/utils.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | 
 5 | def multiindex_to_strings(index):
 6 |     if isinstance(index, pd.MultiIndex):
 7 |         return [' '.join(col).strip() for col in index.values]
 8 |     return [''.join(col).strip() for col in index.values]
 9 | 
10 | 
11 | def df_normalise(df, columns=True):
12 |     '''
13 |     rows: bool
14 |         Normalize each column to sum to one, or each row to sum to one
15 |     '''
16 |     if columns:
17 |         return df/df.sum(axis=0)
18 |     return (df.T/df.sum(axis=1)).T
19 | 
20 | 
21 | def get_binned_scores(labels, scores, bins=10):
22 |     '''
23 |     Parameters
24 |     ==========
25 |     labels : array (n_samples, )
26 |         Labels indicating the true class.
27 |     scores : matrix (n_samples, )
28 |         Output probability scores for one or several methods.
29 |     bins : int or list of floats
30 |         Number of bins to create in the scores' space, or list of bin
31 |         boundaries.
32 |     '''
33 |     if isinstance(bins, int):
34 |         n_bins = bins
35 |         bins = np.linspace(0, 1 + 1e-8, n_bins + 1)
36 |     elif isinstance(bins, list) or isinstance(bins, np.ndarray):
37 |         n_bins = len(bins) - 1
38 |         bins = np.array(bins)
39 |         if bins[0] == 0.0:
40 |             bins[0] = 0 - 1e-8
41 |         if bins[-1] == 1.0:
42 |             bins[-1] = 1 + 1e-8
43 | 
44 |     scores = np.clip(scores, a_min=0, a_max=1)
45 | 
46 |     bin_idx = np.digitize(scores, bins) - 1
47 | 
48 |     bin_true = np.bincount(bin_idx, weights=labels,
49 |                            minlength=n_bins)
50 |     bin_pred = np.bincount(bin_idx, weights=scores,
51 |                            minlength=n_bins)
52 |     bin_total = np.bincount(bin_idx, minlength=n_bins)
53 | 
54 |     zero_idx = bin_total == 0
55 |     avg_true = np.empty(bin_total.shape[0])
56 |     avg_true.fill(np.nan)
57 |     avg_true[~zero_idx] = np.divide(bin_true[~zero_idx],
58 |                                     bin_total[~zero_idx])
59 |     avg_pred = np.empty(bin_total.shape[0])
60 |     avg_pred.fill(np.nan)
61 |     avg_pred[~zero_idx] = np.divide(bin_pred[~zero_idx],
62 |                                     bin_total[~zero_idx])
63 |     return avg_true, avg_pred, bin_true, bin_total
64 | 


--------------------------------------------------------------------------------
/pycalib/visualisations/__init__.py:
--------------------------------------------------------------------------------
 1 | from .plot import (plot_reliability_diagram_precomputed,
 2 |                    plot_reliability_diagram,
 3 |                    plot_binary_reliability_diagram_gaps,
 4 |                    plot_multiclass_reliability_diagram_gaps,
 5 |                    plot_confusion_matrix,
 6 |                    plot_individual_pdfs,
 7 |                    plot_critical_difference,
 8 |                    plot_df_to_heatmap,
 9 |                    plot_calibration_map)
10 | 


--------------------------------------------------------------------------------
/pycalib/visualisations/barycentric.py:
--------------------------------------------------------------------------------
  1 | # Code is an adaptation from
  2 | # http://blog.bogatron.net/blog/2014/02/02/visualizing-dirichlet-distributions/
  3 | 
  4 | import matplotlib.pyplot as plt
  5 | import numpy as np
  6 | 
  7 | import matplotlib.tri as tri
  8 | from matplotlib import ticker
  9 | 
 10 | 
 11 | def xy2bc(xy, tol=1.e-32):
 12 |     '''Converts 2D Cartesian coordinates to barycentric.'''
 13 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
 14 |     # Mid-points of triangle sides opposite of each corner
 15 |     midpoints = [(corners[(i + 1) % 3] + corners[(i + 2) % 3]) / 2.0
 16 |                  for i in range(3)]
 17 | 
 18 |     s = [(corners[i] - midpoints[i]).dot(xy - midpoints[i]) / 0.75
 19 |          for i in range(3)]
 20 |     return np.clip(s, tol, 1.0 - tol)
 21 | 
 22 | 
 23 | def bc2xy(pvalues, corners):
 24 |     return np.dot(pvalues, corners)
 25 | 
 26 | 
 27 | def draw_tri_samples(pvals, classes, labels=None, fig=None, ax=None,
 28 |                      handles=None, grid=True, **kwargs):
 29 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
 30 |     pvals = pvals[:, :3].copy()
 31 | 
 32 |     if fig is None:
 33 |         fig = plt.figure()
 34 |     if ax is None:
 35 |         ax = fig.add_subplot(111)
 36 | 
 37 |     if labels is None:
 38 |         labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))]
 39 |     center = corners.mean(axis=0)
 40 |     for i, corner in enumerate(corners):
 41 |         text_x, text_y = corner - (center - corner)*0.1
 42 |         ax.text(text_x, text_y, labels[i], verticalalignment='center',
 43 |                 horizontalalignment='center')
 44 | 
 45 |     xy = bc2xy(pvals, corners)
 46 |     ax.scatter(xy[:, 0], xy[:, 1], c=classes, **kwargs)
 47 | 
 48 |     if handles is not None:
 49 |         ax.legend(handles=handles)
 50 | 
 51 |     ax.axis('equal')
 52 |     ax.set_xlim(0, 1)
 53 |     ax.set_ylim(0, 0.75**0.5)
 54 |     ax.set_xbound(lower=-0.01, upper=1.01)
 55 |     ax.set_ybound(lower=-0.01, upper=(0.75**0.5)+0.01)
 56 |     ax.axis('off')
 57 | 
 58 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
 59 | 
 60 |     if grid:
 61 |         refiner = tri.UniformTriRefiner(triangle)
 62 |         trimesh = refiner.refine_triangulation(subdiv=4)
 63 |         ax.triplot(trimesh, c='gray', lw=0.2)
 64 | 
 65 |     ax.triplot(triangle, c='k', lw=0.5)
 66 | 
 67 | 
 68 | def get_func_mesh_values(func, subdiv=8):
 69 |     '''
 70 |     Gets the values returned by the function func in a triangular mesh grid
 71 |     '''
 72 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
 73 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
 74 | 
 75 |     refiner = tri.UniformTriRefiner(triangle)
 76 |     trimesh = refiner.refine_triangulation(subdiv=subdiv)
 77 |     vals = np.array([func(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)])
 78 |     return vals
 79 | 
 80 | 
 81 | def get_mesh_xy(subdiv=8):
 82 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
 83 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
 84 | 
 85 |     refiner = tri.UniformTriRefiner(triangle)
 86 |     trimesh = refiner.refine_triangulation(subdiv=subdiv)
 87 |     return zip(trimesh.x, trimesh.y)
 88 | 
 89 | 
 90 | def get_mesh_bc(**kwargs):
 91 |     mesh_xy = get_mesh_xy(**kwargs)
 92 |     mesh_bc = np.array([xy2bc(xy) for xy in mesh_xy])
 93 |     return mesh_bc
 94 | 
 95 | 
 96 | def draw_pdf_contours(dist, **kwargs):
 97 |     draw_func_contours(dist.pdf, **kwargs)
 98 | 
 99 | 
100 | # TODO Speed up function.
101 | def draw_func_contours(func, labels=None, nlevels=200, subdiv=8, fig=None,
102 |                        ax=None, grid=True, **kwargs):
103 |     '''
104 |     Parameters:
105 |     -----------
106 |     labels: None, string or list of strings
107 |         If labels == 'auto' it shows the class number on each corner
108 |         If labels is a list of strings it shows each string in the
109 |         corresponding corner
110 |         If None does not show any label
111 |     '''
112 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
113 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
114 | 
115 |     refiner = tri.UniformTriRefiner(triangle)
116 |     trimesh = refiner.refine_triangulation(subdiv=subdiv)
117 | 
118 |     z = np.array([func(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)])
119 | 
120 |     if fig is None:
121 |         fig = plt.figure()
122 |     if ax is None:
123 |         ax = fig.add_subplot(111)
124 | 
125 |     # FIXME I would like the following line to work, but the max value is not
126 |     # shown. I had to do create manually the levels and increase the max value
127 |     # by an epsilon. This could be a major problem if the epsilon is not small
128 |     # for the original range of values
129 |     # contour = ax.tricontourf(trimesh, z, nlevels, **kwargs)
130 |     # contour = ax.tricontourf(trimesh, z, nlevels, extend='both')
131 |     is_nan = ~np.isfinite(z)
132 |     # z[is_nan] = 0
133 |     nan_id = np.where(is_nan)[0]
134 |     triangles_mask = np.zeros(trimesh.triangles.shape[0])
135 |     for ni in nan_id:
136 |         for i in range(trimesh.triangles.shape[0]):
137 |             if ni in trimesh.triangles[i]:
138 |                 triangles_mask[i] = 1
139 |     trimesh.set_mask(triangles_mask)
140 |     if not np.all(triangles_mask):
141 |         contour = ax.tricontourf(trimesh, z,
142 |                                  levels=np.linspace(z[~is_nan].min(),
143 |                                                     z[~is_nan].max()+1e-9,
144 |                                                     nlevels),
145 |                                  **kwargs)
146 | 
147 |         # Colorbar
148 |         cb = fig.colorbar(contour, ax=ax, fraction=0.1,
149 |                           orientation='horizontal')
150 |         tick_locator = ticker.MaxNLocator(nbins=5)
151 |         cb.locator = tick_locator
152 |         # cb.ax.xaxis.set_major_locator(ticker.AutoLocator())
153 |         cb.update_ticks()
154 | 
155 |     if labels is not None:
156 |         if labels == 'auto':
157 |             labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))]
158 |         center = corners.mean(axis=0)
159 |         for i, corner in enumerate(corners):
160 |             text_x, text_y = corner - (center - corner)*0.1
161 |             ax.text(text_x, text_y, labels[i], verticalalignment='center',
162 |                     horizontalalignment='center')
163 | 
164 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
165 | 
166 |     if grid:
167 |         refiner = tri.UniformTriRefiner(triangle)
168 |         trimesh = refiner.refine_triangulation(subdiv=4)
169 |         ax.triplot(trimesh, c='gray', lw=0.2)
170 | 
171 |     ax.triplot(triangle, c='k', lw=0.8)
172 | 
173 |     # Axes options
174 |     ax.set_xlim(xmin=0, xmax=1)
175 |     ax.set_ylim(ymin=0, ymax=0.75**0.5)
176 |     ax.set_xbound(lower=0, upper=1)
177 |     ax.set_ybound(lower=0, upper=0.75**0.5)
178 |     ax.axis('equal')
179 |     ax.axis('off')
180 |     plt.gca().set_adjustable("box")
181 | 
182 | 
183 | def plot_individual_pdfs(class_dist, *args, **kwargs):
184 |     fig = plt.figure(figsize=(16, 5))
185 |     for i, (p, d) in enumerate(zip(class_dist.priors,
186 |                                    class_dist.distributions)):
187 |         ax = fig.add_subplot(1, len(class_dist.distributions), i+1)
188 |         ax.set_title('$P(Y={})={}$\n$\\mathcal{{D}}_{}(\\alpha={})$'.format(
189 |                          i+1, p, i+1, str(d)), loc='left')
190 |         draw_pdf_contours(d, labels='auto', fig=fig, ax=ax, *args, **kwargs)
191 |     return fig
192 | 
193 | 
194 | # FIXME remove pandas dependency from this function
195 | # def plot_marginal(func, mesh, c, ax1, ax2):
196 | #     values = np.array([func(bc) for bc in mesh]).reshape(-1, 1)
197 | #     df = pd.DataFrame(np.concatenate((mesh, values), axis=1),
198 | #     df.plot(kind='scatter', x=c, y='P', alpha=0.1, ax=ax1)
199 | #     ax2.set_title('Class {} marginal'.format(c))
200 | #     table = df.pivot_table(index=c, values='P')
201 | #     table.reset_index(inplace=True)
202 | #     table.columns = [c, 'P']
203 | #     table.plot(kind='scatter', x=c, y='P', alpha=0.2, ax=ax2)
204 | 
205 | 
206 | def plot_converging_lines_pvalues(func, lines, i, ax):
207 |     '''
208 |     Plots the probability values of the given function for each given line.
209 |     The i indicates the class index from 0 to 2
210 |     '''
211 |     # This orders the classes in the following manner:
212 |     # C1, C2, C3
213 |     # C2, C3, C1
214 |     # C3, C1, C2
215 |     classes = np.roll(np.array([0, 1, 2]), -i)
216 | 
217 |     for j, line in enumerate(lines):
218 |         pvalues = np.array([func(p) for p in line]).flatten()
219 |         if len(lines) == 1:
220 |             label = r'$C_{} = 1/2, C_{} = 1/2$'.format(
221 |                         classes[1]+1, classes[2]+1)
222 |         else:
223 |             label = r'$C_{} = {}/{}, C_{} = {}/{}$'.format(
224 |                         classes[1]+1, j, len(lines)-1,
225 |                         classes[2]+1, len(lines)-j-1, len(lines)-1)
226 |         ax.plot(line[:, i], pvalues, label=label)
227 |     ax.legend()
228 | 
229 | 
230 | def draw_calibration_map(original_p, calibrated_p, labels=None, fig=None,
231 |                          ax=None, handles=None, subdiv=5, color=None,
232 |                          **kwargs):
233 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
234 |     original_p = original_p[:, :3].copy()
235 |     calibrated_p = calibrated_p[:, :3].copy()
236 | 
237 |     if fig is None:
238 |         fig = plt.figure()
239 |     if ax is None:
240 |         ax = fig.add_subplot(111)
241 | 
242 |     if labels is None:
243 |         labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))]
244 |     center = corners.mean(axis=0)
245 |     for i, corner in enumerate(corners):
246 |         text_x, text_y = corner - (center - corner)*0.1
247 |         ax.text(text_x, text_y, labels[i], verticalalignment='center',
248 |                 horizontalalignment='center')
249 | 
250 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
251 |     ax.triplot(triangle, c='k', lw=0.8, zorder=2)
252 | 
253 |     refiner = tri.UniformTriRefiner(triangle)
254 |     trimesh = refiner.refine_triangulation(subdiv=subdiv)
255 |     ax.triplot(trimesh, c='gray', lw=0.2, zorder=1)
256 | 
257 |     o_xy = bc2xy(original_p, corners)
258 |     c_xy = bc2xy(calibrated_p, corners) - o_xy
259 |     # ax.scatter(xy[:, 0], xy[:, 1], **kwargs)
260 |     ax.quiver(o_xy[:, 0], o_xy[:, 1], c_xy[:, 0], c_xy[:, 1], scale=1,
261 |               color=color, angles='xy', zorder=3, **kwargs)
262 | 
263 |     if handles is not None:
264 |         ax.legend(handles=handles)
265 | 
266 |     ax.axis('equal')
267 |     ax.set_xlim(0, 1)
268 |     ax.set_ylim(0, 0.75**0.5)
269 |     ax.set_xbound(lower=-0.01, upper=1.01)
270 |     ax.set_ybound(lower=-0.01, upper=(0.75**0.5)+0.01)
271 |     ax.axis('off')
272 | 
273 |     return fig
274 | 


--------------------------------------------------------------------------------
/pycalib/visualisations/plot.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import itertools
  3 | 
  4 | import matplotlib.pyplot as plt
  5 | import matplotlib.ticker as mticker
  6 | from matplotlib.ticker import MaxNLocator
  7 | from mpl_toolkits.axes_grid1 import make_axes_locatable
  8 | 
  9 | from sklearn.preprocessing import OneHotEncoder
 10 | from sklearn.preprocessing import label_binarize
 11 | 
 12 | from statsmodels.stats.proportion import proportion_confint
 13 | 
 14 | from matplotlib import gridspec
 15 | 
 16 | from pycalib.utils import (df_normalise, multiindex_to_strings,
 17 |                            get_binned_scores)
 18 | 
 19 | 
 20 | def plot_reliability_diagram_precomputed(avg_true, avg_pred,
 21 |                                          legend=None,
 22 |                                          class_names=None,
 23 |                                          fig=None,
 24 |                                          fmt='s-',
 25 |                                          show_correction=False,
 26 |                                          show_gaps=False,
 27 |                                          color_list=None,
 28 |                                          color_gaps='lightcoral'):
 29 |     """ Plots the reliability diagram for precomputed averaged scores and labels
 30 | 
 31 |     NOTE: This function is currently a copy from plot_reliability_diagram and
 32 |     modified to accept average scores and true proportions. In the future both
 33 |     functions may be merged or share common private functions.
 34 |     Parameters
 35 |     ==========
 36 |     avg_true : matrix (n_bins, n_classes) or list of matrices
 37 |         True proportions per class.
 38 |     avg_pred : matrix (n_bins, n_classes) or list of matrices
 39 |         Output probability scores for one or several methods.
 40 |     legend : list of strings or None
 41 |         Text to use for the legend.
 42 |     bins : int or list of floats
 43 |         Number of bins to create in the scores' space, or list of bin
 44 |         boundaries.
 45 |     class_names : list of strings or None
 46 |         Name of each class, if None it will assign integer numbers starting
 47 |         with 1.
 48 |     fig : matplotlib.pyplot.Figure or None
 49 |         Figure to use for the plots, if None a new figure is created.
 50 |     show_counts : boolean
 51 |         If True shows the number of samples of each bin in its corresponding
 52 |         line marker.
 53 |     interval_method : string (default: 'beta')
 54 |         Method to estimate the confidence interval which uses the function
 55 |         proportion_confint from statsmodels.stats.proportion
 56 |     fmt : string (default: 's-')
 57 |         Format of the lines following the matplotlib.pyplot.plot standard.
 58 |     show_correction : boolean
 59 |         If True shows an arrow for each bin indicating the necessary correction
 60 |         to the average scores in order to be perfectly calibrated.
 61 |     show_gaps : boolean
 62 |         If True shows the gap between the average predictions and the true
 63 |         proportion of positive samples.
 64 |     sample_proportion : float in the interval [0, 1] (default 0)
 65 |         If bigger than 0, it shows the labels of the specified proportion of
 66 |         samples.
 67 |     color_list : list of strings or None
 68 |         List of string colors indicating the color of each method.
 69 |     color_gaps : string
 70 |         Color of the gaps (if shown).
 71 | 
 72 |     Regurns
 73 |     =======
 74 |     fig : matplotlib.pyplot.figure
 75 |         Figure with the reliability diagram
 76 |     """
 77 |     if isinstance(avg_true, list):
 78 |         avg_true_list = avg_true
 79 |     else:
 80 |         avg_true_list = [avg_true, ]
 81 |     if isinstance(avg_pred, list):
 82 |         avg_pred_list = avg_pred
 83 |     else:
 84 |         avg_pred_list = [avg_pred, ]
 85 | 
 86 |     n_classes = avg_true_list[0].shape[1]
 87 |     n_scores = len(avg_true_list)
 88 | 
 89 |     if color_list is None:
 90 |         color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
 91 | 
 92 |     if class_names is None:
 93 |         class_names = [str(i+1) for i in range(n_classes)]
 94 | 
 95 |     if n_classes == 2:
 96 |         avg_pred_list = [pred[:, 1].reshape(-1, 1) for pred in avg_pred_list]
 97 |         class_names = [class_names[1], ]
 98 | 
 99 |     n_columns = n_classes if n_classes != 2 else 1
100 | 
101 |     if fig is None:
102 |         fig = plt.figure(figsize=(n_columns*4, 4))
103 | 
104 |     spec = gridspec.GridSpec(ncols=n_columns, nrows=1, wspace=0.02,
105 |                              hspace=0.04, left=0.15)
106 | 
107 |     for i in range(n_columns):
108 |         ax1 = fig.add_subplot(spec[i])
109 |         # Perfect calibration
110 |         ax1.plot([0, 1], [0, 1], "--", color='lightgrey',
111 |                  zorder=10)
112 | 
113 |         for j in range(n_scores):
114 |             # bin_total = bin_total_list[j][:, i]
115 |             pred_sort_idx = np.argsort(avg_pred_list[j][:, i])
116 |             avg_true = avg_true_list[j][pred_sort_idx, i]
117 |             avg_pred = avg_pred_list[j][pred_sort_idx, i]
118 | 
119 |             name = legend[j] if legend else None
120 |             ax1.plot(avg_pred, avg_true, fmt, label=name, color=color_list[j])
121 | 
122 |             if show_correction:
123 |                 for ap, at in zip(avg_pred, avg_true):
124 |                     ax1.arrow(ap, at, at - ap, 0, color=color_gaps,
125 |                               head_width=0.02, length_includes_head=True,
126 |                               width=0.01)
127 | 
128 |             if show_gaps:
129 |                 for ap, at in zip(avg_pred, avg_true):
130 |                     error = avg_pred - avg_true
131 |                     negative_values = error < 0
132 |                     ygaps = np.zeros(shape=(2, avg_true.shape[0]))
133 |                     ygaps[0, negative_values] = - error[negative_values]
134 |                     ygaps[1, ~negative_values] = error[~negative_values]
135 |                     ax1.errorbar(avg_pred, avg_true, yerr=ygaps, fmt=" ",
136 |                                  color=color_gaps, lw=4, capsize=5, capthick=1,
137 |                                  zorder=10)
138 | 
139 |         ax1.set_xlim([0, 1])
140 |         ax1.set_ylim([0, 1])
141 |         ax1.set_xlabel('Average score (Class {})'.format(class_names[i]))
142 |         if i == 0:
143 |             ax1.set_ylabel('Fraction of positives')
144 |         else:
145 |             ax1.set_yticklabels([])
146 |             nbins = len(ax1.get_xticklabels())
147 |             ax1.xaxis.set_major_locator(MaxNLocator(nbins=nbins,
148 |                                                     prune='lower'))
149 |         ax1.grid(True)
150 |         ax1.set_axisbelow(True)
151 | 
152 |     if legend is not None:
153 |         lines, labels = fig.axes[0].get_legend_handles_labels()
154 |         fig.legend(lines, labels, loc='upper center',
155 |                    bbox_to_anchor=(0, 0, 1, 1),
156 |                    bbox_transform=fig.transFigure, ncol=6)
157 | 
158 |     fig.align_labels()
159 |     return fig
160 | 
161 | 
162 | def plot_reliability_diagram(labels, scores, legend=None,
163 |                              show_histogram=True,
164 |                              bins=10, class_names=None, fig=None,
165 |                              show_counts=False, errorbar_interval=None,
166 |                              interval_method='beta', fmt='s-',
167 |                              show_correction=False,
168 |                              show_gaps=False,
169 |                              sample_proportion=0,
170 |                              hist_per_class=False,
171 |                              color_list=None,
172 |                              show_bars=False,
173 |                              invert_histogram=False,
174 |                              color_gaps='lightcoral',
175 |                              confidence=False,
176 |                              ax=None):
177 |     """ Plots the reliability diagram of the given scores and true labels
178 | 
179 |     Parameters
180 |     ==========
181 |     labels : array (n_samples, )
182 |         Labels indicating the true class.
183 |     scores : matrix (n_samples, n_classes) or list of matrices
184 |         Output probability scores for one or several methods.
185 |     legend : list of strings or None
186 |         Text to use for the legend.
187 |     show_histogram : boolean
188 |         If True, it generates an additional figure showing the number of
189 |         samples in each bin.
190 |     bins : int or list of floats
191 |         Number of bins to create in the scores' space, or list of bin
192 |         boundaries.
193 |     class_names : list of strings or None
194 |         Name of each class, if None it will assign integer numbers starting
195 |         with 1.
196 |     fig : matplotlib.pyplot.Figure or None
197 |         Figure to use for the plots, if None a new figure is created.
198 |     show_counts : boolean
199 |         If True shows the number of samples of each bin in its corresponding
200 |         line marker.
201 |     errorbar_interval : float or None
202 |         If a float between 0 and 1 is passed, it shows an errorbar
203 |         corresponding to a confidence interval containing the specified
204 |         percentile of the data.
205 |     interval_method : string (default: 'beta')
206 |         Method to estimate the confidence interval which uses the function
207 |         proportion_confint from statsmodels.stats.proportion
208 |     fmt : string (default: 's-')
209 |         Format of the lines following the matplotlib.pyplot.plot standard.
210 |     show_correction : boolean
211 |         If True shows an arrow for each bin indicating the necessary correction
212 |         to the average scores in order to be perfectly calibrated.
213 |     show_gaps : boolean
214 |         If True shows the gap between the average predictions and the true
215 |         proportion of positive samples.
216 |     sample_proportion : float in the interval [0, 1] (default 0)
217 |         If bigger than 0, it shows the labels of the specified proportion of
218 |         samples.
219 |     hist_per_class : boolean
220 |         If True shows one histogram of the bins per class.
221 |     color_list : list of strings or None
222 |         List of string colors indicating the color of each method.
223 |     show_bars : boolean
224 |         If True shows bars instead of lines.
225 |     invert_histogram : boolean
226 |         If True shows the histogram with the zero on top and highest number of
227 |         bin samples at the bottom.
228 |     color_gaps : string
229 |         Color of the gaps (if shown).
230 |     confidence : boolean
231 |         If True shows only the confidence reliability diagram.
232 | 
233 |     Returns
234 |     =======
235 |     fig : matplotlib.pyplot.figure
236 |         Figure with the reliability diagram
237 |     """
238 |     if isinstance(scores, list):
239 |         scores_list = scores
240 |     else:
241 |         scores_list = [scores, ]
242 |     n_scores = len(scores_list)
243 |     if color_list is None:
244 |         color_list = plt.rcParams['axes.prop_cycle'].by_key()['color']
245 | 
246 |     classes = np.arange(scores_list[0].shape[1])
247 |     n_classes = len(classes)
248 |     labels = label_binarize(labels, classes=classes)
249 | 
250 |     labels_list = []
251 |     if confidence:
252 |         labels_idx = np.argmax(labels, axis=1)
253 |         new_scores_list = []
254 |         for score in scores_list:
255 |             # TODO: randomize selection when there are several winning classes
256 |             conf_idx = np.argmax(score, axis=1)
257 |             winning_score = np.max(score, axis=1)
258 |             new_scores_list.append(np.vstack([1 - winning_score,
259 |                                               winning_score]).T)
260 |             labels_list.append((conf_idx.flatten()
261 |                                == labels_idx.flatten()).astype(int))
262 |             labels_list[-1] = label_binarize(labels_list[-1], classes=[0, 1])
263 |         scores_list = new_scores_list
264 |         n_classes = 2
265 |         class_names = ['Non winning', 'winning']
266 |         n_columns = 1
267 |     else:
268 |         n_columns = n_classes
269 | 
270 |     if class_names is None:
271 |         class_names = [str(i+1) for i in range(n_classes)]
272 | 
273 |     if n_classes == 2:
274 |         scores_list = [score[:, 1].reshape(-1, 1) for score in scores_list]
275 |         class_names = [class_names[1], ]
276 |         n_columns = 1
277 | 
278 |     if fig is None:
279 |         fig = plt.figure(figsize=(n_columns*4, 4))
280 | 
281 |     if show_histogram:
282 |         spec = gridspec.GridSpec(ncols=n_columns, nrows=2,
283 |                                  height_ratios=[5, 1],
284 |                                  wspace=0.02,
285 |                                  hspace=0.04,
286 |                                  left=0.15)
287 |     else:
288 |         spec = gridspec.GridSpec(ncols=n_columns, nrows=1,
289 |                                  hspace=0.04, left=0.15)
290 | 
291 |     if isinstance(bins, int):
292 |         n_bins = bins
293 |         bins = np.linspace(0, 1 + 1e-8, n_bins + 1)
294 |     elif isinstance(bins, list) or isinstance(bins, np.ndarray):
295 |         n_bins = len(bins) - 1
296 |         bins = np.array(bins)
297 |         if bins[0] == 0.0:
298 |             bins[0] = 0 - 1e-8
299 |         if bins[-1] == 1.0:
300 |             bins[-1] = 1 + 1e-8
301 | 
302 |     for i in range(n_columns):
303 |         if ax is not None:
304 |             ax1 = ax
305 |         else:
306 |             ax1 = fig.add_subplot(spec[i])
307 |         # Perfect calibration
308 |         ax1.plot([0, 1], [0, 1], "--", color='lightgrey',
309 |                  zorder=10)
310 |         for j, score in enumerate(scores_list):
311 |             if labels_list:
312 |                 labels = labels_list[j]
313 | 
314 |             avg_true, avg_pred, bin_true, bin_total = get_binned_scores(
315 |                 labels[:, i], score[:, i], bins=bins)
316 |             zero_idx = bin_total == 0
317 | 
318 |             name = legend[j] if legend else None
319 |             if show_bars:
320 |                 ax1.bar(x=bins[:-1][~zero_idx], height=avg_true[~zero_idx],
321 |                         align='edge', width=(bins[1:] - bins[:-1])[~zero_idx],
322 |                         edgecolor='black', color=color_list[j])
323 |             else:
324 |                 if errorbar_interval is None:
325 |                     ax1.plot(avg_pred, avg_true, fmt, label=name,
326 |                              color=color_list[j])
327 |                 else:
328 |                     nozero_intervals = proportion_confint(
329 |                         count=bin_true[~zero_idx], nobs=bin_total[~zero_idx],
330 |                         alpha=1-errorbar_interval,
331 |                         method=interval_method)
332 |                     nozero_intervals = np.array(nozero_intervals)
333 | 
334 |                     intervals = np.empty((2, bin_total.shape[0]))
335 |                     intervals.fill(np.nan)
336 |                     intervals[:, ~zero_idx] = nozero_intervals
337 | 
338 |                     yerr = np.abs(intervals - avg_true)
339 |                     ax1.errorbar(avg_pred, avg_true, yerr=yerr, label=name,
340 |                                  fmt=fmt, color=color_list[j])  # markersize=5)
341 | 
342 |             if show_counts:
343 |                 for ap, at, count in zip(avg_pred, avg_true, bin_total):
344 |                     if np.isfinite(ap) and np.isfinite(at):
345 |                         ax1.text(ap, at, str(count), fontsize=6,
346 |                                  ha='center', va='center', zorder=11,
347 |                                  bbox=dict(boxstyle='square,pad=0.3',
348 |                                            fc='white', ec=color_list[j]))
349 | 
350 |             if show_correction:
351 |                 for ap, at in zip(avg_pred, avg_true):
352 |                     ax1.arrow(ap, at, at - ap, 0, color=color_gaps,
353 |                               head_width=0.02, length_includes_head=True,
354 |                               width=0.01)
355 | 
356 |             if show_gaps:
357 |                 for ap, at in zip(avg_pred, avg_true):
358 |                     error = avg_pred - avg_true
359 |                     negative_values = error < 0
360 |                     ygaps = np.zeros(shape=(2, avg_true.shape[0]))
361 |                     ygaps[0, negative_values] = - error[negative_values]
362 |                     ygaps[1, ~negative_values] = error[~negative_values]
363 |                     ax1.errorbar(avg_pred, avg_true, yerr=ygaps, fmt=" ",
364 |                                  color=color_gaps, lw=4, capsize=5, capthick=1,
365 |                                  zorder=10)
366 | 
367 |             if sample_proportion > 0:
368 |                 idx = np.random.choice(labels.shape[0],
369 |                                        int(sample_proportion*labels.shape[0]))
370 |                 ax1.scatter(score[idx, i], labels[idx, i], marker='|', s=100,
371 |                             alpha=0.2, color=color_list[j])
372 | 
373 |         ax1.set_xlim([0, 1])
374 |         ax1.set_ylim([0, 1])
375 |         # ax1.set_title('Class {}'.format(class_names[i]))
376 |         if not show_histogram:
377 |             ax1.set_xlabel('Average score (Class {})'.format(
378 |                 class_names[i]))
379 |         if i == 0:
380 |             ax1.set_ylabel('Fraction of positives')
381 |         else:
382 |             ax1.set_yticklabels([])
383 |         ax1.grid(True)
384 |         ax1.set_axisbelow(True)
385 | 
386 |         if show_histogram:
387 |             divider = make_axes_locatable(ax1)
388 |             ax2 = divider.append_axes("bottom", size="20%", pad=0.1,
389 |                                       sharex=ax1)
390 | 
391 |             # ax2 = fig.add_subplot(spec[n_columns + i],
392 |             #                      label='{}'.format(i))
393 |             for j, score in enumerate(scores_list):
394 |                 ax1.set_xticklabels([])
395 |                 # lines = ax1.get_lines()
396 |                 # ax2.set_xticklabels([])
397 | 
398 |                 name = legend[j] if legend else None
399 |                 if hist_per_class:
400 |                     for c in [0, 1]:
401 |                         linestyle = ('dotted', 'dashed')[c]
402 |                         ax2.hist(score[labels[:, i] == c, i], range=(0, 1),
403 |                                  bins=bins, label=name,
404 |                                  histtype="step",
405 |                                  lw=1, linestyle=linestyle,
406 |                                  color=color_list[j],
407 |                                  edgecolor='black')
408 |                 else:
409 |                     if n_scores > 1:
410 |                         kwargs = {'histtype': 'step',
411 |                                   'edgecolor': color_list[j]}
412 |                     else:
413 |                         kwargs = {'histtype': 'bar',
414 |                                   'edgecolor': 'black',
415 |                                   'color': color_list[j]}
416 |                     ax2.hist(score[:, i], range=(0, 1), bins=bins, label=name,
417 |                              lw=1, **kwargs)
418 |                 ax2.set_xlim([0, 1])
419 |                 ax2.set_xlabel('Average score (Class {})'.format(
420 |                     class_names[i]))
421 |                 ax2.yaxis.set_major_locator(MaxNLocator(integer=True,
422 |                                                         prune='upper',
423 |                                                         nbins=3))
424 |             if i == 0:
425 |                 ax2.set_ylabel('Count')
426 |                 ytickloc = ax2.get_yticks()
427 |                 ax2.yaxis.set_major_locator(mticker.FixedLocator(ytickloc))
428 |                 yticklabels = ['{:0.0f}'.format(value) for value in
429 |                                ytickloc]
430 |                 ax2.set_yticklabels(labels=yticklabels,
431 |                                     fontdict=dict(verticalalignment='top'))
432 |             else:
433 |                 ax2.set_yticklabels([])
434 |                 nbins = len(ax2.get_xticklabels())
435 |                 ax2.xaxis.set_major_locator(MaxNLocator(nbins=nbins,
436 |                                                         prune='lower'))
437 |             ax2.grid(True, which='both')
438 |             ax2.set_axisbelow(True)
439 |             if invert_histogram:
440 |                 ylim = ax2.get_ylim()
441 |                 ax2.set_ylim(reversed(ylim))
442 | 
443 |     if legend is not None:
444 |         lines, labels = fig.axes[0].get_legend_handles_labels()
445 |         fig.legend(lines, labels, loc='upper center',
446 |                    bbox_to_anchor=(0, 0, 1, 1),
447 |                    bbox_transform=fig.transFigure, ncol=6)
448 | 
449 |     fig.align_labels()
450 |     return fig
451 | 
452 | 
453 | def plot_binary_reliability_diagram_gaps(y_true, p_pred, n_bins=15, title=None,
454 |                                          fig=None, ax=None, legend=False,
455 |                                          color_gaps="lightcoral",
456 |                                          show_histogram=False,
457 |                                          color="cornflowerblue"):
458 |     """Plot binary reliability diagram gaps
459 | 
460 |     Parameters
461 |     ==========
462 |     y_true : np.array shape (n_samples, 2) or (n_samples, )
463 |         Labels corresponding to the scores as a binary indicator matrix or as a
464 |         vector of integers indicating the class.
465 |     p_pred : binary matrix shape (n_samples, 2) or (n_samples, )
466 |         Output probability scores for each class as a matrix, or for the
467 |         positive class
468 |     n_bins : integer
469 |         Number of bins to divide the scores
470 |     title : string
471 |         Title for the plot
472 |     fig : matplotlib.pyplot.figure
473 |         Plots the axis in the given figure
474 |     ax : matplotlib.pyplot.Axis
475 |         Axis where to draw the plot
476 |     legend : boolean
477 |         If True the function will draw a legend
478 | 
479 |     Regurns
480 |     =======
481 |     fig : matplotlib.pyplot.figure
482 |         Figure with the reliability diagram
483 |     """
484 |     if fig is None and ax is None:
485 |         fig = plt.figure()
486 |     if ax is None:
487 |         ax = fig.add_subplot()
488 | 
489 |     if title is not None:
490 |         ax.set_title(title)
491 | 
492 |     if (len(y_true.shape) == 2) and (y_true.shape[1] == 2):
493 |         y_true = y_true[:, 1]
494 |     if (len(y_true.shape) == 2) and (y_true.shape[1] > 2):
495 |         raise ValueError('y_true wrong dimensions {}'.format(y_true.shape))
496 | 
497 |     if (len(p_pred.shape) == 2) and (p_pred.shape[1] == 2):
498 |         p_pred = p_pred[:, 1]
499 |     if (len(p_pred.shape) == 2) and (p_pred.shape[1] > 2):
500 |         raise ValueError('p_pred wrong dimensions {}'.format(p_pred.shape))
501 | 
502 |     bin_size = 1.0/n_bins
503 |     centers = np.linspace(bin_size/2.0, 1.0 - bin_size/2.0, n_bins)
504 |     true_proportion = np.zeros(n_bins)
505 |     pred_mean = np.zeros(n_bins)
506 |     for i, center in enumerate(centers):
507 |         if i == 0:
508 |             # First bin includes lower bound
509 |             bin_indices = np.where(np.logical_and(
510 |                 p_pred >= center - bin_size/2,
511 |                 p_pred <= center + bin_size/2))
512 |         else:
513 |             bin_indices = np.where(np.logical_and(p_pred > center - bin_size/2,
514 |                                                   p_pred <= center +
515 |                                                   bin_size/2))
516 |         if len(bin_indices[0]) == 0:
517 |             true_proportion[i] = np.nan
518 |             pred_mean[i] = np.nan
519 |         else:
520 |             true_proportion[i] = np.mean(y_true[bin_indices])
521 |             pred_mean[i] = np.nanmean(p_pred[bin_indices])
522 | 
523 |     not_nan = np.isfinite(true_proportion - centers)
524 |     ax.bar(centers, true_proportion, width=bin_size, edgecolor="black",
525 |            # color="blue", label='True class prop.')
526 |            color=color, label='True class prop.')
527 |     ax.bar(pred_mean[not_nan], (true_proportion - pred_mean)[not_nan],
528 |            bottom=pred_mean[not_nan], width=0.01,
529 |            edgecolor=color_gaps,
530 |            color=color_gaps,
531 |            label='Gap pred. mean', align='center')
532 | 
533 |     if legend:
534 |         ax.legend()
535 | 
536 |     ax.plot([0, 1], [0, 1], linestyle="--", color='grey', zorder=10)
537 |     ax.set_xlim([0, 1])
538 |     ax.set_ylim([0, 1])
539 | 
540 |     ax.set_ylabel('Fraction of positives')
541 |     ax.grid(True)
542 |     ax.set_axisbelow(True)
543 | 
544 |     if show_histogram:
545 |         ax.set_xticklabels([])
546 | 
547 |         divider = make_axes_locatable(ax)
548 |         ax2 = divider.append_axes("bottom", size="20%", pad=0.1, sharex=ax)
549 | 
550 |         ax2.hist(p_pred, range=(0, 1),
551 |                  bins=n_bins,
552 |                  histtype="bar",
553 |                  lw=1,
554 |                  color=color,
555 |                  edgecolor='black')
556 | 
557 |         ax2.set_ylabel('Count')
558 |         ax2.grid(True, which='both')
559 |         ax2.set_axisbelow(True)
560 |         ax2.set_xlabel('Predicted probability')
561 |     else:
562 |         ax.set_xlabel('Predicted probability')
563 | 
564 |     return fig, ax
565 | 
566 | 
567 | def plot_multiclass_reliability_diagram_gaps(y_true, p_pred, fig=None, ax=None,
568 |                                              per_class=True, legend=False,
569 |                                              **kwargs):
570 | 
571 |     if len(y_true.shape) < 2 or y_true.shape[1] == 1:
572 |         ohe = OneHotEncoder(categories='auto')
573 |         ohe.fit(y_true.reshape(-1, 1))
574 |         y_true = ohe.transform(y_true.reshape(-1, 1))
575 | 
576 |     if per_class:
577 |         n_classes = y_true.shape[1]
578 |         if fig is None and ax is None:
579 |             fig = plt.figure(figsize=((n_classes-1)*4, 4))
580 |         if ax is None:
581 |             ax = [fig.add_subplot(1, n_classes, i+1) for i in range(n_classes)]
582 |         for i in range(n_classes):
583 |             if i == 0 and legend:
584 |                 sub_legend = True
585 |             else:
586 |                 sub_legend = False
587 |             plot_binary_reliability_diagram_gaps(y_true[:, i], p_pred[:, i],
588 |                                                  title='$C_{}$'.format(i+1),
589 |                                                  fig=fig, ax=ax[i],
590 |                                                  legend=sub_legend,
591 |                                                  **kwargs)
592 |             if i > 0:
593 |                 ax[i].set_ylabel('')
594 |             ax[i].set_xlabel('Predicted probability')
595 |     else:
596 |         if fig is None and ax is None:
597 |             fig = plt.figure()
598 |         mask = p_pred.argmax(axis=1)
599 |         indices = np.arange(p_pred.shape[0])
600 |         y_true = y_true[indices, mask].T
601 |         p_pred = p_pred[indices, mask].T
602 |         ax = fig.add_subplot(1, 1, 1)
603 |         plot_binary_reliability_diagram_gaps(y_true, p_pred,
604 |                                              title=r'$C_1$',
605 |                                              fig=fig, ax=ax, **kwargs)
606 |         ax.set_title('')
607 | 
608 |     return fig
609 | 
610 | 
611 | def plot_confusion_matrix(cm, classes, normalize=False,
612 |                           title='Confusion matrix', cmap=plt.cm.Blues,
613 |                           fig=None, ax=None):
614 |     """
615 |     This function prints and plots the confusion matrix.
616 |     Normalization can be applied by setting `normalize=True`.
617 |     """
618 |     if fig is None:
619 |         fig = plt.figure()
620 | 
621 |     if ax is None:
622 |         ax = fig.add_subplot(111)
623 | 
624 |     if title is not None:
625 |         ax.set_title(title)
626 | 
627 |     if normalize:
628 |         cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
629 | 
630 |     im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
631 | 
632 |     # create an axes on the right side of ax. The width of cax will be 5%
633 |     # of ax and the padding between cax and ax will be fixed at 0.05 inch.
634 |     divider = make_axes_locatable(ax)
635 |     cax = divider.append_axes("right", size="5%", pad=0.05)
636 | 
637 |     fig.colorbar(im, cax=cax)
638 | 
639 |     tick_marks = np.arange(len(classes))
640 |     ax.set_xticks(tick_marks)
641 |     ax.set_xticklabels(classes, rotation=45)
642 |     ax.set_yticks(tick_marks)
643 |     ax.set_yticklabels(classes)
644 | 
645 |     fmt = '.2f' if normalize else 'd'
646 |     thresh = cm.max() / 2.
647 |     for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
648 |         ax.text(j, i, format(cm[i, j], fmt),
649 |                 horizontalalignment="center",
650 |                 color="white" if cm[i, j] > thresh else "black")
651 | 
652 |     ax.set_ylabel('True label')
653 |     ax.set_xlabel('Predicted label')
654 |     return fig
655 | 
656 | 
657 | def plot_individual_pdfs(class_dist, x_grid=None, y_grid=None,
658 |                          grid_levels=200, fig=None, title=None,
659 |                          cmaps=None, grid=True):
660 |     if fig is None:
661 |         fig = plt.figure()
662 | 
663 |     if x_grid is None:
664 |         x_grid = np.linspace(-8, 8, grid_levels)
665 |     else:
666 |         grid_levels = len(x_grid)
667 | 
668 |     if y_grid is None:
669 |         y_grid = np.linspace(-8, 8, grid_levels)
670 | 
671 |     xx, yy = np.meshgrid(x_grid, y_grid)
672 | 
673 |     if cmaps is None:
674 |         cmaps = [None]*len(class_dist.priors)
675 | 
676 |     for i, (p, d) in enumerate(zip(class_dist.priors,
677 |                                    class_dist.distributions)):
678 |         z = d.pdf(np.vstack([xx.flatten(), yy.flatten()]).T)
679 | 
680 |         ax = fig.add_subplot(1, len(class_dist.distributions), i+1)
681 |         if title is None:
682 |             ax.set_title('$P(Y={})={:.2f}$\n{}'.format(i+1, p, str(d)),
683 |                          loc='left')
684 |         else:
685 |             ax.set_title(title[i])
686 |         contour = ax.contourf(xx, yy, z.reshape(grid_levels, grid_levels),
687 |                               cmap=cmaps[i])
688 |         if grid:
689 |             ax.grid()
690 |         fig.colorbar(contour)
691 | 
692 |     return fig
693 | 
694 | 
695 | def plot_critical_difference(avranks, num_datasets, names, title=None,
696 |                              test='bonferroni-dunn'):
697 |     """
698 |         test: string in ['nemenyi', 'bonferroni-dunn']
699 |          - nemenyi two-tailed test (up to 20 methods)
700 |          - bonferroni-dunn one-tailed test (only up to 10 methods)
701 | 
702 |     """
703 |     # Critical difference plot
704 |     import Orange
705 | 
706 |     if len(avranks) > 10:
707 |         print('Forcing Nemenyi Critical difference')
708 |         test = 'nemenyi'
709 |     cd = Orange.evaluation.compute_CD(avranks, num_datasets, alpha='0.05',
710 |                                       test=test)
711 |     Orange.evaluation.graph_ranks(avranks, names, cd=cd, width=6,
712 |                                   textspace=1.5)
713 |     fig = plt.gcf()
714 |     fig.suptitle(title, horizontalalignment='left')
715 |     return fig
716 | 
717 | 
718 | def plot_df_to_heatmap(df, title=None, figsize=None, annotate=True,
719 |                        normalise_columns=False, normalise_rows=False,
720 |                        cmap=None):
721 |     """ Exports a heatmap of the given pandas DataFrame
722 | 
723 |     Parameters
724 |     ----------
725 |     df:     pandas.DataFrame
726 |         It should be a matrix, it can have multiple index and these will be
727 |         flattened.
728 | 
729 |     title: string
730 |         Title of the figure
731 | 
732 |     figsize:    tuple of ints (x, y)
733 |         Figure size in inches
734 | 
735 |     annotate:   bool
736 |         If true, adds numbers inside each box
737 |     """
738 |     if normalise_columns:
739 |         df = df_normalise(df, columns=True)
740 |     if normalise_rows:
741 |         df = df_normalise(df, columns=False)
742 | 
743 |     yticklabels = multiindex_to_strings(df.index)
744 |     xticklabels = multiindex_to_strings(df.columns)
745 |     if figsize is not None:
746 |         fig = plt.figure(figsize=figsize)
747 |     else:
748 |         point_inch_ratio = 72.
749 |         n_rows = df.shape[0]
750 |         font_size_pt = plt.rcParams['font.size']
751 |         xlabel_space_pt = max([len(xlabel) for xlabel in xticklabels])
752 |         fig_height_in = (((xlabel_space_pt + n_rows) * (font_size_pt + 3))
753 |                          / point_inch_ratio)
754 | 
755 |         n_cols = df.shape[1]
756 |         fig_width_in = df.shape[1]+4
757 |         ylabel_space_pt = max([len(ylabel) for ylabel in yticklabels])
758 |         fig_width_in = ((ylabel_space_pt + (n_cols * 3) + 5)
759 |                         * (font_size_pt + 3)) / point_inch_ratio
760 |         fig = plt.figure(figsize=(fig_width_in, fig_height_in))
761 | 
762 |     ax = fig.add_subplot(111)
763 |     if title is not None:
764 |         ax.set_title(title)
765 |     cax = ax.pcolor(df, cmap=cmap)
766 |     fig.colorbar(cax)
767 |     ax.set_yticks(np.arange(0.5, len(df.index), 1))
768 |     ax.set_yticklabels(yticklabels)
769 |     ax.set_xticks(np.arange(0.5, len(df.columns), 1))
770 |     ax.set_xticklabels(xticklabels, rotation=45, ha="right")
771 | 
772 |     middle_value = (df.max().max() + df.min().min())/2.0
773 |     if annotate:
774 |         for y in range(df.shape[0]):
775 |             for x in range(df.shape[1]):
776 |                 color = 'white' if middle_value > df.values[y, x] else 'black'
777 |                 plt.text(x + 0.5, y + 0.5, '%.2f' % df.values[y, x],
778 |                          horizontalalignment='center',
779 |                          verticalalignment='center',
780 |                          color=color
781 |                          )
782 |     return fig
783 | 
784 | 
785 | def plot_calibration_map(scores_set, prob, legend_set, original_first=False,
786 |                          alpha=1, **kwargs):
787 |     fig_calibration_map = plt.figure('calibration_map')
788 |     fig_calibration_map.clf()
789 |     ax_calibration_map = plt.subplot(111)
790 |     ax = ax_calibration_map
791 |     # ax.set_title('calibration map')
792 |     ax.set_ylim([0, 1])
793 |     ax.set_xlim([0, 1])
794 |     n_lines = len(legend_set)
795 |     if original_first:
796 |         bins = np.linspace(0, 1, 11)
797 |         hist_tot = np.histogram(prob[0], bins=bins)
798 |         hist_pos = np.histogram(prob[0][prob[1] == 1], bins=bins)
799 |         edges = np.insert(bins, np.arange(len(bins)), bins)
800 |         empirical_p = np.true_divide(hist_pos[0]+alpha, hist_tot[0]+2*alpha)
801 |         empirical_p = np.insert(empirical_p, np.arange(len(empirical_p)),
802 |                                 empirical_p)
803 |         ax.plot(edges[1:-1], empirical_p, label='empirical')
804 | 
805 |     skip = original_first
806 |     for (scores, legend) in zip(scores_set, legend_set):
807 |         if skip and original_first:
808 |             skip = False
809 |         else:
810 |             if legend == 'uncalib':
811 |                 ax.plot([np.nan], [np.nan], '-', linewidth=n_lines,
812 |                         **kwargs)
813 |             else:
814 |                 ax.plot(prob[2], scores, '-', label=legend, linewidth=n_lines,
815 |                         **kwargs)
816 |             n_lines -= 1
817 |     if original_first:
818 |         ax.plot(prob[0], prob[1], 'kx',
819 |                 label=legend_set[0], markersize=9, markeredgewidth=1)
820 |     ax.legend(loc='upper left')
821 |     ax.grid(True)
822 |     return fig_calibration_map
823 | 


--------------------------------------------------------------------------------
/pycalib/visualisations/ternary.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | import matplotlib.tri as tri
  4 | from matplotlib import ticker
  5 | from .barycentric import bc2xy, xy2bc
  6 | 
  7 | 
  8 | def draw_tri_samples(pvals, classes, labels=None, fig=None, ax=None,
  9 |                      legend=True, color_list=[None]*3,
 10 |                      **kwargs):
 11 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
 12 | 
 13 |     if fig is None:
 14 |         fig = plt.figure()
 15 |     if ax is None:
 16 |         ax = fig.add_subplot(111)
 17 | 
 18 |     if labels is None:
 19 |         labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))]
 20 |     center = corners.mean(axis=0)
 21 |     for i, corner in enumerate(corners):
 22 |         text_x, text_y = corner - (center - corner)*0.1
 23 |         ax.text(text_x, text_y, labels[i], verticalalignment='center',
 24 |                 horizontalalignment='center')
 25 | 
 26 |     xy = bc2xy(pvals, corners)
 27 | 
 28 |     # TODO Find option to call scatter only once as now the latter classes are
 29 |     # on top of the previous ones
 30 |     for c in [0, 1, 2]:
 31 |         c_idx = classes == c
 32 |         ax.scatter(xy[c_idx, 0], xy[c_idx, 1],
 33 |                    label=labels[c], color=color_list[c],
 34 |                    **kwargs)
 35 |     if legend:
 36 |         leg = ax.legend()
 37 |         for lh in leg.legendHandles:
 38 |             lh.set_alpha(1)
 39 | 
 40 |     ax.axis('equal')
 41 |     ax.set_xlim(0, 1)
 42 |     ax.set_ylim(0, 0.75**0.5)
 43 |     ax.set_xbound(lower=-0.01, upper=1.01)
 44 |     ax.set_ybound(lower=-0.01, upper=(0.75**0.5)+0.01)
 45 |     ax.axis('off')
 46 | 
 47 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
 48 |     ax.triplot(triangle, c='k', lw=0.5)
 49 | 
 50 |     return fig, ax
 51 | 
 52 | 
 53 | def draw_func_contours(func, labels=None, nlevels=200, subdiv=5, fig=None,
 54 |                        ax=None, draw_lines=None, class_index=0, **kwargs):
 55 |     """
 56 |     Parameters:
 57 |     -----------
 58 |     labels: None, string or list of strings
 59 |         If labels == 'auto' it shows the class number on each corner
 60 |         If labels is a list of strings it shows each string in the
 61 |             corresponding corner
 62 |         If None does not show any label
 63 |     """
 64 |     corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
 65 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
 66 | 
 67 |     refiner = tri.UniformTriRefiner(triangle)
 68 |     trimesh = refiner.refine_triangulation(subdiv=subdiv)
 69 | 
 70 |     pvals = np.array([func(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)])
 71 | 
 72 |     if fig is None:
 73 |         fig = plt.figure()
 74 |     if ax is None:
 75 |         ax = fig.add_subplot(111)
 76 | 
 77 |     # FIXME I would like the following line to work, but the max value is
 78 |     # not shown. I had to do create manually the levels and increase the
 79 |     # max value by an epsilon. This could be a major problem if the epsilon
 80 |     # is not small for the original range of values
 81 |     # contour = ax.tricontourf(trimesh, pvals, nlevels, **kwargs)
 82 |     # contour = ax.tricontourf(trimesh, pvals, nlevels, extend='both')
 83 |     contour = ax.tricontourf(trimesh, pvals,
 84 |                              levels=np.linspace(pvals.min(), pvals.max()+1e-9,
 85 |                                                 nlevels),
 86 |                              **kwargs)
 87 | 
 88 |     # Colorbar
 89 |     # TODO See if the following way to define the size of the bar can be used
 90 |     # from mpl_toolkits.axes_grid1 import make_axes_locatable
 91 |     # divider = make_axes_locatable(ax)
 92 |     # cax = divider.append_axes("bottom", size="5%", pad=0.1)
 93 |     # cb = fig.colorbar(contour, ax=cax, orientation='horizontal')
 94 |     cb = fig.colorbar(contour, ax=ax, orientation='horizontal',
 95 |                       fraction=0.05, pad=0.06)
 96 |     tick_locator = ticker.MaxNLocator(nbins=5)
 97 |     cb.locator = tick_locator
 98 |     # cb.ax.xaxis.set_major_locator(ticker.AutoLocator())
 99 |     cb.update_ticks()
100 | 
101 |     if labels is None:
102 |         labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))]
103 | 
104 |     center = corners.mean(axis=0)
105 |     for i, corner in enumerate(corners):
106 |         text_x, text_y = corner - (center - corner)*0.1
107 |         ax.text(text_x, text_y, labels[i], verticalalignment='center',
108 |                 horizontalalignment='center')
109 | 
110 |     if draw_lines is not None:
111 |         lines = get_converging_lines(num_lines=draw_lines, mesh_precision=2,
112 |                                      class_index=class_index)
113 |         corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]])
114 |         for line in lines:
115 |             line = bc2xy(line, corners).T
116 |             ax.plot(line[0], line[1])
117 |             # l = mlines.Line2D()
118 |             # ax.add_line(l)
119 | 
120 |     # Axes options
121 |     ax.set_xlim(xmin=0, xmax=1)
122 |     ax.set_ylim(ymin=0, ymax=0.75**0.5)
123 |     ax.set_xbound(lower=0, upper=1)
124 |     ax.set_ybound(lower=0, upper=0.75**0.5)
125 |     ax.axis('equal')
126 |     ax.axis('off')
127 | 
128 |     triangle = tri.Triangulation(corners[:, 0], corners[:, 1])
129 |     ax.triplot(triangle, c='k', lw=0.5)
130 | 
131 |     plt.gca().set_adjustable("box")
132 |     return fig
133 | 
134 | 
135 | def plot_converging_lines_pvalues(func, lines, i, ax):
136 |     """
137 |     Plots the probability values of the given function for each given line.
138 |     The i indicates the class index from 0 to 2
139 |     """
140 |     # This orders the classes in the following manner:
141 |     # C1, C2, C3
142 |     # C2, C3, C1
143 |     # C3, C1, C2
144 |     classes = np.roll(np.array([0, 1, 2]), -i)
145 | 
146 |     for j, line in enumerate(lines):
147 |         pvalues = np.array([func(p) for p in line]).flatten()
148 |         ax.plot(line[:, i], pvalues,
149 |                 label=r'$C_{}/C_{} = {}/{}$'.format(
150 |                     classes[1]+1, classes[2]+1, j, len(lines)-j-1))
151 |     ax.legend()
152 | 
153 | 
154 | def get_converging_lines(num_lines, mesh_precision=10, class_index=0,
155 |                          tol=1e-6):
156 |     """
157 |     If class_index = 0
158 |     Create isometric lines from the oposite side of C1 simplex to the C1 corner
159 |     First line has C2 fixed to 0
160 |     Last line has C3 fixed to 0
161 |           Class 3  line 1 start
162 |                  /\\
163 |                 /  \\
164 |                /    \\ line 2 start
165 |               /    - \\
166 |              /   -/   \\
167 |             /  -/      \\
168 |            / -/      ---\\ line 3 start
169 |           /-/  -----/    \\
170 |          //---/           \\
171 |         -------------------- line 4 start
172 |     Class 1(lines end)      Class 2
173 | 
174 |     Else if class_index = [1, 2]
175 |     Then the previusly described lines are rotated towards the indicated class.
176 |     The lines always follow a clockwise order.
177 |     """
178 |     p = np.linspace(0, 1, mesh_precision).reshape(-1, 1)
179 |     if num_lines == 1:
180 |         q = [0.5]
181 |     else:
182 |         q = np.linspace(0, 1, num_lines).reshape(-1, 1)
183 |     lines = [np.hstack((p, (1-p)*q[i], (1-p)*(1-q[i]))) for i in range(len(q))]
184 |     if class_index > 0:
185 |         indices = np.array([0, 1, 2])
186 |         lines = [line[:, np.roll(indices, class_index)] for i, line in
187 |                  enumerate(lines)]
188 |     return np.clip(lines, tol, 1.0 - tol)
189 | 


--------------------------------------------------------------------------------
/pycalib/visualisations/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/classifier-calibration/PyCalib/8208ab907d5b9c5149b2d45b1c8b6e4b2d763317/pycalib/visualisations/tests/__init__.py


--------------------------------------------------------------------------------
/pycalib/visualisations/tests/test_init.py:
--------------------------------------------------------------------------------
  1 | import unittest
  2 | import matplotlib.pyplot as plt
  3 | import numpy as np
  4 | 
  5 | from pycalib.visualisations import plot_reliability_diagram
  6 | 
  7 | 
  8 | class TestVisualisations(unittest.TestCase):
  9 |     def test_plot_reliability_diagram(self):
 10 |         n_c1 = n_c2 = 500
 11 |         p = np.concatenate((np.random.beta(2, 5, n_c1),
 12 |                             np.random.beta(4, 3, n_c2)))
 13 | 
 14 |         y = np.concatenate((np.zeros(n_c1), np.ones(n_c2)))
 15 | 
 16 |         s1 = 1/(1 + np.exp(-3*(p - 0.5)))
 17 |         s2 = 1/(1 + np.exp(-8*(p - 0.5)))
 18 | 
 19 |         p = np.vstack((1 - p, p)).T
 20 |         s1 = np.vstack((1 - s1, s1)).T
 21 |         s2 = np.vstack((1 - s2, s2)).T
 22 | 
 23 |         fig = plot_reliability_diagram(labels=y, scores=[s1, s2])
 24 |         self.assertIsInstance(fig, plt.Figure)
 25 | 
 26 |     def test_plot_reliability_diagram_confidence(self):
 27 |         n_c1 = n_c2 = 500
 28 |         p = np.concatenate((np.random.beta(2, 5, n_c1),
 29 |                             np.random.beta(4, 3, n_c2)))
 30 | 
 31 |         y = np.concatenate((np.zeros(n_c1), np.ones(n_c2)))
 32 | 
 33 |         s1 = 1/(1 + np.exp(-3*(p - 0.5)))
 34 |         s2 = 1/(1 + np.exp(-8*(p - 0.5)))
 35 | 
 36 |         p = np.vstack((1 - p, p)).T
 37 |         s1 = np.vstack((1 - s1, s1)).T
 38 |         s2 = np.vstack((1 - s2, s2)).T
 39 | 
 40 |         fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
 41 |                                        confidence=True)
 42 |         self.assertIsInstance(fig, plt.Figure)
 43 | 
 44 |     def test_plot_reliability_diagram_simple(self):
 45 |         n_c1 = n_c2 = 500
 46 |         p = np.concatenate((np.random.beta(2, 5, n_c1),
 47 |                             np.random.beta(4, 3, n_c2)))
 48 | 
 49 |         y = np.concatenate((np.zeros(n_c1), np.ones(n_c2)))
 50 | 
 51 |         s1 = 1/(1 + np.exp(-3*(p - 0.5)))
 52 |         s2 = 1/(1 + np.exp(-8*(p - 0.5)))
 53 | 
 54 |         p = np.vstack((1 - p, p)).T
 55 |         s1 = np.vstack((1 - s1, s1)).T
 56 |         s2 = np.vstack((1 - s2, s2)).T
 57 | 
 58 |         fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
 59 |                                        show_histogram=False)
 60 |         self.assertIsInstance(fig, plt.Figure)
 61 | 
 62 |         fig = plot_reliability_diagram(labels=y, scores=s2,
 63 |                                        show_histogram=True)
 64 |         self.assertIsInstance(fig, plt.Figure)
 65 | 
 66 |     def test_plot_reliability_diagram_full(self):
 67 |         n_c1 = n_c2 = 500
 68 |         p = np.concatenate((np.random.beta(2, 5, n_c1),
 69 |                             np.random.beta(4, 3, n_c2)
 70 |                             ))
 71 | 
 72 |         y = np.concatenate((np.zeros(n_c1), np.ones(n_c2)))
 73 | 
 74 |         s1 = 1/(1 + np.exp(-3*(p - 0.5)))
 75 |         s2 = 1/(1 + np.exp(-8*(p - 0.5)))
 76 |         s1 = np.vstack((1 - s1, s1)).T
 77 |         s2 = np.vstack((1 - s2, s2)).T
 78 | 
 79 |         fig = plot_reliability_diagram(labels=y, scores=s1,
 80 |                                        legend=['Model 1'],
 81 |                                        show_histogram=True, bins=9,
 82 |                                        class_names=['Negative', 'Positive'],
 83 |                                        show_counts=True, show_correction=True,
 84 |                                        show_gaps=True, sample_proportion=0.5,
 85 |                                        errorbar_interval=0.95,
 86 |                                        hist_per_class=True)
 87 |         self.assertIsInstance(fig, plt.Figure)
 88 | 
 89 |         class_2_idx = range(int(len(y)/3), int(2*len(y)/3))
 90 |         y[class_2_idx] = 2
 91 |         s1 = np.hstack((s1, s1[:, 1].reshape(-1, 1)))
 92 |         s1[class_2_idx, 2] *= 3
 93 |         s1 /= s1.sum(axis=1)[:, None]
 94 |         s2 = np.hstack((s2, s2[:, 1].reshape(-1, 1)))
 95 |         s2[class_2_idx, 2] *= 2
 96 |         s2 /= s2.sum(axis=1)[:, None]
 97 | 
 98 |         bins = [0, .3, .5, .8, 1]
 99 |         fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
100 |                                        legend=['Model 3', 'Model 4'],
101 |                                        show_histogram=True,
102 |                                        show_correction=True,
103 |                                        show_counts=True,
104 |                                        show_bars=True,
105 |                                        sample_proportion=0.3,
106 |                                        bins=bins,
107 |                                        color_list=['darkgreen', 'chocolate'],
108 |                                        invert_histogram=True)
109 |         self.assertIsInstance(fig, plt.Figure)
110 | 
111 |         fig = plot_reliability_diagram(labels=y, scores=[s1, s2],
112 |                                        legend=['Model 3', 'Model 4'],
113 |                                        show_histogram=True,
114 |                                        show_correction=True,
115 |                                        show_counts=True,
116 |                                        sample_proportion=0.3,
117 |                                        bins=bins,
118 |                                        color_list=['darkgreen', 'chocolate'],
119 |                                        invert_histogram=True,
120 |                                        confidence=True)
121 |         self.assertIsInstance(fig, plt.Figure)
122 | 
123 | 
124 | def main():
125 |     unittest.main()
126 | 
127 | 
128 | if __name__ == '__main__':
129 |     main()
130 | 


--------------------------------------------------------------------------------
/requirements-dev.txt:
--------------------------------------------------------------------------------
 1 | codecov
 2 | flake8
 3 | mypy
 4 | nbval
 5 | numpydoc
 6 | pylint
 7 | pytest
 8 | pytest-cov
 9 | sphinx
10 | sphinx-gallery
11 | twine
12 | yapf
13 | restview
14 | sphinx-rtd-theme
15 | readme-renderer
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy>=1.22
2 | scipy>=1.6
3 | scikit-learn>=0.24
4 | matplotlib>=3.3
5 | statsmodels>=0.12
6 | orange3>=3.28
7 | betacal>=1.1.0
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.util import convert_path
 2 | from setuptools import setup, find_packages
 3 | from os import path
 4 | 
 5 | this_directory = path.abspath(path.dirname(__file__))
 6 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f:
 7 |     long_description = f.read()
 8 | 
 9 | main_ns = {}
10 | ver_path = convert_path('pycalib/__init__.py')
11 | with open(ver_path) as ver_file:
12 |     exec(ver_file.read(), main_ns)
13 | 
14 | setup(
15 |   name = 'pycalib',
16 |   packages = find_packages(exclude=['tests.*', 'tests', 'docs.*', 'docs']),
17 |   install_requires=[
18 |     'numpy>=1.22',
19 |     'scipy>=1.6',
20 |     'scikit-learn>=0.24',
21 |     'matplotlib>=3.3',
22 |     'statsmodels>=0.12'
23 |   ],
24 |   version=main_ns['__version__'],
25 |   description = 'Python library with tools for classifier calibration.',
26 |   author = 'Miquel Perello Nieto, Hao Song, Telmo de Menezes e Silva Filho',
27 |   author_email = 'perello.nieto@gmail.com',
28 |   url = 'https://classifier-calibration.github.io/PyCalib/',
29 |   download_url = 'https://github.com/classifier-calibration/archive/{}.tar.gz'.format(main_ns['__version__']),
30 |   keywords = ['classifier calibration', 'calibration', 'classification'],
31 |   classifiers = [],
32 |   long_description=long_description,
33 |   long_description_content_type='text/markdown'
34 | )
35 | 


--------------------------------------------------------------------------------