├── .github └── workflows │ ├── ci.yml │ └── documentation.yml ├── .gitignore ├── CITATION.cff ├── LICENSE.txt ├── Makefile ├── README.md ├── docs ├── Makefile ├── make.bat └── source │ ├── .nojekyll │ ├── api │ ├── metrics.rst │ ├── models.rst │ ├── stats.rst │ └── visualisations.rst │ ├── conf.py │ ├── index.rst │ └── usage │ ├── installation.rst │ └── quickstart.rst ├── examples ├── README.txt ├── xmpl_quickstart.py ├── xmpl_reliability_diagram.py ├── xmpl_ternary_contours.py └── xmpl_ternary_samples.py ├── pycalib ├── __init__.py ├── metrics.py ├── models │ ├── __init__.py │ ├── calibrators.py │ └── multiclass.py ├── stats.py ├── tests │ ├── __init__.py │ ├── models │ │ ├── __init__.py │ │ └── test_init.py │ └── test_metrics.py ├── utils.py └── visualisations │ ├── __init__.py │ ├── barycentric.py │ ├── plot.py │ ├── ternary.py │ └── tests │ ├── __init__.py │ └── test_init.py ├── requirements-dev.txt ├── requirements.txt ├── setup.cfg └── setup.py /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | jobs: 10 | build: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Install Python 3 15 | uses: actions/setup-python@v1 16 | with: 17 | python-version: 3.8 18 | - name: Install dependencies 19 | run: | 20 | python -m pip install --upgrade pip 21 | pip install pytest 22 | pip install pytest-cov 23 | pip install -r requirements.txt 24 | - name: Run tests with pytest 25 | run: pytest --cov-report=xml --cov=pycalib pycalib 26 | - name: Upload coverage to Codecov 27 | uses: codecov/codecov-action@v1 28 | with: 29 | token: ${{ secrets.CODECOV_TOKEN }} 30 | files: ./coverage.xml 31 | directory: ./coverage/reports/ 32 | flags: unittests 33 | env_vars: OS,PYTHON 34 | name: codecov-umbrella 35 | fail_ci_if_error: true 36 | path_to_write_report: ./coverage/codecov_report.txt 37 | verbose: true 38 | -------------------------------------------------------------------------------- /.github/workflows/documentation.yml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | on: 3 | push: 4 | branches: 5 | - master 6 | jobs: 7 | build-n-deploy: 8 | name: Build and deploy 9 | runs-on: ubuntu-22.04 10 | steps: 11 | - name: Checkout code 🛎️ 12 | uses: actions/checkout@v2.3.1 13 | with: 14 | persist-credentials: false 15 | - name: Set up Python 3.8 🐍 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: 3.8 19 | - name: Install dependencies 💾 20 | run: | 21 | pip install -r requirements.txt 22 | pip install -r requirements-dev.txt 23 | - name: Build the documentation 🔧📖 24 | run: | 25 | cd docs 26 | make html 27 | - name: Deploy 🚀 28 | if: github.ref == 'refs/heads/master' 29 | uses: JamesIves/github-pages-deploy-action@3.7.1 30 | with: 31 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 32 | BRANCH: gh-pages 33 | FOLDER: docs/build/html 34 | CLEAN: true 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | MANIFEST 3 | dist/ 4 | pycalib.egg-info 5 | pycalib/__pycache__/ 6 | pycalib/tests/__pycache__/ 7 | venv 8 | .coverage 9 | coverage.xml 10 | *.swp 11 | *.pdf 12 | *.svg 13 | *.png 14 | .ipynb_checkpoints/ 15 | /docs/ 16 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | cff-version: 1.2.0 2 | message: "If you use this software, please cite it as below." 3 | authors: 4 | - family-names: "Perello-Nieto" 5 | given-names: "Miquel" 6 | orcid: "https://orcid.org/0000-0001-8925-424X" 7 | - family-names: "Song" 8 | given-names: "Hao" 9 | - family-names: "Silva-Filho" 10 | given-names: "Telmo" 11 | - family-names: "Kängsepp" 12 | given-names: "Markus" 13 | title: "PyCalib a library for classifier calibration" 14 | version: 0.1.0.dev0 15 | doi: 10.5281/zenodo.5518877 16 | date-released: 2021-08-20 17 | url: "https://github.com/perellonieto/PyCalib" 18 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2018-2021 Miquel Perello-Nieto 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: venv 2 | 3 | pip: 4 | pip install --upgrade pip 5 | 6 | venv: 7 | python3.8 -m venv venv 8 | 9 | requirements: pip 10 | pip install -r requirements.txt 11 | 12 | requirements-dev: requirements pip 13 | pip install -r requirements-dev.txt 14 | 15 | build: requirements-dev 16 | python3.8 setup.py sdist 17 | 18 | pypi: build check-readme 19 | twine upload dist/* 20 | 21 | doc: requirements-dev 22 | cd docs; make clean; make html 23 | 24 | # From Scikit-learn 25 | code-analysis: 26 | flake8 pycalib | grep -v external 27 | pylint -E pycalib/ -d E1103,E0611,E1101 --generated-members=Blues --ignored-modules=scipy.special 28 | 29 | clean: 30 | rm -rf ./dist 31 | 32 | # All the following assume the requirmenets-dev are installed, but to make the 33 | # output clean the dependency has been removed 34 | test: 35 | pytest --doctest-modules --cov-report=term-missing --cov=pycalib pycalib 36 | 37 | check-readme: 38 | twine check dist/* 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![CI][ci:b]][ci] 2 | [![Documentation][documentation:b]][documentation] 3 | [![License BSD3][license:b]][license] 4 | ![Python3.8][python:b] 5 | [![pypi][pypi:b]][pypi] 6 | [![codecov][codecov:b]][codecov] 7 | [![DOI](https://zenodo.org/badge/280465805.svg)](https://zenodo.org/badge/latestdoi/280465805) 8 | 9 | 10 | [ci]: https://github.com/classifier-calibration/PyCalib/actions/workflows/ci.yml 11 | [ci:b]: https://github.com/classifier-calibration/pycalib/workflows/CI/badge.svg 12 | [documentation]: https://github.com/classifier-calibration/PyCalib/actions/workflows/documentation.yml 13 | [documentation:b]: https://github.com/classifier-calibration/pycalib/workflows/Documentation/badge.svg 14 | [license]: https://github.com/classifier-calibration/PyCalib/blob/master/LICENSE.txt 15 | [license:b]: https://img.shields.io/github/license/classifier-calibration/pycalib.svg 16 | [python:b]: https://img.shields.io/badge/python-3.8-blue 17 | [pypi]: https://badge.fury.io/py/pycalib 18 | [pypi:b]: https://badge.fury.io/py/pycalib.svg 19 | [codecov]: https://codecov.io/gh/classifier-calibration/PyCalib 20 | [codecov:b]: https://codecov.io/gh/classifier-calibration/PyCalib/branch/master/graph/badge.svg?token=AYMZPLELT3 21 | 22 | 23 | 24 | PyCalib 25 | ======= 26 | Python library for classifier calibration 27 | 28 | User installation 29 | ----------------- 30 | 31 | The PyCalib package can be installed from Pypi with the command 32 | 33 | ``` 34 | pip install pycalib 35 | ``` 36 | 37 | Documentation 38 | ------------- 39 | 40 | The documentation can be found at https://classifier-calibration.github.io/PyCalib/ 41 | 42 | Development 43 | =========== 44 | 45 | There is a make file to automate some of the common tasks during development. 46 | After downloading the repository create the virtual environment with the 47 | command 48 | 49 | ``` 50 | make venv 51 | ``` 52 | 53 | This will create a `venv` folder in your current folder. The environment needs 54 | to be loaded out of the makefile with 55 | 56 | ``` 57 | source venv/bin/activate 58 | ``` 59 | 60 | After the environment is loaded, all dependencies can be installed with 61 | 62 | ``` 63 | make requirements-dev 64 | ``` 65 | 66 | Unittest 67 | -------- 68 | 69 | Unittests are specified as doctest examples in simple functions (see example ), 70 | and more complex tests in their own python files starting with `test_` (see 71 | example ). 72 | 73 | Run the unittest with the command 74 | 75 | ``` 76 | make test 77 | ``` 78 | 79 | The test will show a unittest result including the coverage of the code. 80 | Ideally we want to increase the coverage to cover most of the library. 81 | 82 | Contiunous Integration 83 | ---------------------- 84 | 85 | Every time a commit is pushed to the master branch a unittest is run following 86 | the workflow [.github/workflows/ci.yml](.github/workflows/ci.yml). The CI badge 87 | in the README file will show if the test has passed or not. 88 | 89 | Analyse code 90 | ------------ 91 | 92 | We are trying to follow the same code standards as in [Numpy][numpy:c] and 93 | [Scikit-learn][sklearn:c], it is possible to check for pep8 and other code 94 | conventions with 95 | 96 | [numpy:c]: https://numpy.org/devdocs/dev/index.html 97 | [sklearn:c]: https://scikit-learn.org/stable/developers/index.html 98 | 99 | ``` 100 | make code-analysis 101 | ``` 102 | 103 | Documentation 104 | ------------- 105 | 106 | The documentation can be found at 107 | [https://www.classifier-calibration.com/PyCalib/](https://www.classifier-calibration.com/PyCalib/), 108 | and it is automatically updated after every push to the master branch. 109 | 110 | All documentation is done ussing the [Sphinx documentation 111 | generator][sphinx:l]. The documentation is written in 112 | [reStructuredText][rst:l] (\*.rst) files in the `docs/source` folder. We try to 113 | follow the conventions from [Numpy][numpy:d] and [Scikit-learn][sklearn:d]. 114 | 115 | [numpy:d]: https://numpydoc.readthedocs.io/en/latest/format.html 116 | [sklearn:d]: https://scikit-learn.org/stable/developers/contributing.html#documentation 117 | 118 | The examples with images in folder `docs/source/examples` are generated 119 | automatically with [Sphinx-gallery][sphinx:g] from the python code in folder 120 | [examples/](examples/) starting with `xmpl_{example_name}.py`. 121 | 122 | [rst:l]: https://docutils.sourceforge.io/rst.html 123 | [sphinx:l]: https://www.sphinx-doc.org/en/master/ 124 | [sphinx:g]: https://sphinx-gallery.github.io/stable/index.html 125 | 126 | The docuemnation can be build with the command 127 | 128 | ``` 129 | make doc 130 | ``` 131 | 132 | (Keep in mind that the documentation has its own Makefile inside folder [docs](docs)). 133 | 134 | After building the documentation, a new folder should appear in `docs/build/` 135 | with an `index.html` that can be opened locally for further exploration. 136 | 137 | The documentation is always build and deployed every time a new commit is 138 | pushed to the master branch with the workflow 139 | [.github/workflows/documentation.yml](.github/workflows/documentation.yml). 140 | 141 | After building, the `docs/build/html` folder is pushed to the branch 142 | [gh-pages][gh:l]. 143 | 144 | [gh:l]: https://github.com/perellonieto/PyCalib/tree/gh-pages 145 | 146 | Check Readme 147 | ------------ 148 | 149 | It is possible to check that the README file passes some tests for Pypi by 150 | running 151 | 152 | ``` 153 | make check-readme 154 | ``` 155 | 156 | Upload to PyPi 157 | -------------- 158 | 159 | After testing that the code passes all unittests and upgrading the version in 160 | the file `pycalib/__init__.py` the code can be published in Pypi with the 161 | following command: 162 | 163 | ``` 164 | make pypi 165 | ``` 166 | 167 | It may require user and password if these are not set in your home directory a 168 | file __.pypirc__ 169 | 170 | ``` 171 | [pypi] 172 | username = __token__ 173 | password = pypi-yourtoken 174 | ``` 175 | 176 | Contributors 177 | ------------ 178 | 179 | This code has been adapted by Miquel from several previous codes. The following 180 | is a list of people that has been involved in some parts of the code. 181 | 182 | - Miquel Perello Nieto 183 | - Hao Song 184 | - Telmo Silva Filho 185 | - Markus Kängsepp 186 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | 22 | clean: 23 | rm -rf $(BUILDDIR)/* 24 | rm -rf source/examples 25 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/source/.nojekyll: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/source/api/metrics.rst: -------------------------------------------------------------------------------- 1 | .. title:: Metrics 2 | 3 | .. _metrics: 4 | 5 | Metrics 6 | ======= 7 | 8 | .. automodule:: pycalib.metrics 9 | :members: 10 | 11 | -------------------------------------------------------------------------------- /docs/source/api/models.rst: -------------------------------------------------------------------------------- 1 | .. title:: Calibration methods 2 | 3 | .. _calibration: 4 | 5 | Calibration methods 6 | =================== 7 | 8 | 9 | .. automodule:: pycalib.models 10 | :members: 11 | 12 | .. automodule:: pycalib.models.calibrators 13 | :members: 14 | 15 | -------------------------------------------------------------------------------- /docs/source/api/stats.rst: -------------------------------------------------------------------------------- 1 | .. title:: Statistics tools 2 | 3 | .. _statistics: 4 | 5 | Statistics tools 6 | =============== 7 | 8 | .. automodule:: pycalib.stats 9 | :members: 10 | -------------------------------------------------------------------------------- /docs/source/api/visualisations.rst: -------------------------------------------------------------------------------- 1 | .. title:: Visualisations 2 | 3 | .. _visualisations: 4 | 5 | Visualisations 6 | ============== 7 | 8 | Visualisations file describes all visualisations tools in PyCalib 9 | 10 | .. autofunction:: pycalib.visualisations.plot_reliability_diagram 11 | 12 | .. automodule:: pycalib.visualisations 13 | :members: 14 | 15 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | import sphinx_rtd_theme 8 | 9 | # -- Path setup -------------------------------------------------------------- 10 | 11 | # If extensions (or modules to document with autodoc) are in another directory, 12 | # add these directories to sys.path here. If the directory is relative to the 13 | # documentation root, use os.path.abspath to make it absolute, like shown here. 14 | # 15 | import os 16 | import sys 17 | sys.path.insert(0, os.path.abspath('../../')) 18 | 19 | 20 | # -- Project information ----------------------------------------------------- 21 | 22 | project = 'PyCalib' 23 | copyright = '2021, Miquel Perello-Nieto' 24 | author = 'Miquel Perello-Nieto' 25 | 26 | # The full version, including alpha/beta/rc tags 27 | release = '0.0.4.dev0' 28 | 29 | github_org = 'perellonieto' 30 | github_repo = 'pycalib' 31 | github_docs_repo = 'pycalib' 32 | 33 | 34 | # -- General configuration --------------------------------------------------- 35 | 36 | # Add any Sphinx extension module names here, as strings. They can be 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 38 | # ones. 39 | extensions = [ 40 | "sphinx_rtd_theme", 41 | "sphinx.ext.autodoc", 42 | "sphinx_gallery.gen_gallery", 43 | "numpydoc" 44 | ] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ['_templates'] 48 | 49 | # List of patterns, relative to source directory, that match files and 50 | # directories to ignore when looking for source files. 51 | # This pattern also affects html_static_path and html_extra_path. 52 | exclude_patterns = [] 53 | html_extra_path = ['.nojekyll'] 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | #html_theme = 'alabaster' 61 | html_theme = 'sphinx_rtd_theme' 62 | 63 | # Add any paths that contain custom static files (such as style sheets) here, 64 | # relative to this directory. They are copied after the builtin static files, 65 | # so a file named "default.css" will overwrite the builtin "default.css". 66 | html_static_path = ['_static'] 67 | 68 | # Generate examples with figures 69 | sphinx_gallery_conf = { 70 | 'filename_pattern': '/xmpl_', 71 | 'examples_dirs': os.path.join('..', '..', 'examples'), 72 | 'gallery_dirs': 'examples', 73 | 'backreferences_dir': 'generated', # `doc_module` 74 | 'doc_module': 'pycalib', # Generate mini galleries for the API documentation. 75 | 'reference_url': {'pycalib': None}, # Put links to docs in the examples code. 76 | 'binder': { 77 | 'org': github_org, 78 | 'repo': github_docs_repo, 79 | 'branch': 'gh-pages', 80 | 'binderhub_url': 'https://mybinder.org', 81 | 'dependencies': [os.path.join('..', '..', 'requirements.txt'), 82 | os.path.join('..', '..', 'requirements-dev.txt')] 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. PyCalib documentation master file, created by 2 | sphinx-quickstart on Tue Feb 16 20:18:43 2021. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to PyCalib's documentation! 7 | =================================== 8 | 9 | PyCalib is a library that offers multiple tools to assess probabilistic 10 | classifiers in terms of calibration, and provides helpful functions to 11 | calibrate muticlass probabilistic models that follow the Scikit-learn 12 | `BaseEstimator`_ standard. 13 | 14 | .. _BaseEstimator: https://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html 15 | 16 | .. toctree:: 17 | :maxdepth: 2 18 | :caption: Contents: 19 | 20 | usage/installation 21 | usage/quickstart 22 | api/models 23 | api/metrics 24 | api/visualisations 25 | api/stats 26 | examples/index 27 | 28 | 29 | Indices and tables 30 | ================== 31 | 32 | * :ref:`genindex` 33 | * :ref:`modindex` 34 | -------------------------------------------------------------------------------- /docs/source/usage/installation.rst: -------------------------------------------------------------------------------- 1 | .. title:: Installation 2 | 3 | .. _installation: 4 | 5 | Installation 6 | ============ 7 | 8 | There are multiple ways to install PyCalib. The simplest way is to use the 9 | packaged version available in the Python Package Index PyPI. Further details of 10 | the packaged version can be found at https://pypi.org/project/pycalib/ 11 | 12 | In order to install PyCalib from PyPI just run the following command 13 | 14 | .. code-block:: bash 15 | 16 | pip install pycalib 17 | 18 | This will install the library into your current Python environment. You can 19 | check that the library is available in your environment by running 20 | 21 | .. code-block:: bash 22 | 23 | python -c "import pycalib; print(pycalib.__version__)" 24 | 25 | which if successful should print the installed version in the standard output. 26 | -------------------------------------------------------------------------------- /docs/source/usage/quickstart.rst: -------------------------------------------------------------------------------- 1 | .. title:: Quickstart 2 | 3 | .. _quickstart: 4 | 5 | Quickstart 6 | ========== 7 | 8 | This is a simple description of how to calibrate a classifier using this 9 | library. For an extended example check the Section Examples 10 | :ref:`sphx_glr_examples_xmpl_quickstart.py`. 11 | 12 | The simplest way to calibrate an existing probabilistic classifier is the 13 | following: 14 | 15 | First choose the calibration method you want to use 16 | 17 | .. code-block:: python 18 | 19 | from pycalib.models import IsotonicCalibration 20 | cal = IsotonicCalibration() 21 | 22 | Now we can put together a probabilistic classifier with the chosen calibration 23 | method 24 | 25 | .. code-block:: python 26 | 27 | from pycalib.models import CalibratedModel 28 | 29 | cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal) 30 | 31 | Now you can train both classifier and calibrator all together. 32 | 33 | .. code-block:: python 34 | 35 | from sklearn.datasets import load_iris 36 | 37 | dataset = load_iris() 38 | cal_clf.fit(dataset.data, dataset.target) 39 | 40 | -------------------------------------------------------------------------------- /examples/README.txt: -------------------------------------------------------------------------------- 1 | .. _examples: 2 | 3 | Examples 4 | ======== 5 | 6 | Here you can find various examples of using PyCalib. 7 | -------------------------------------------------------------------------------- /examples/xmpl_quickstart.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Quickstart 4 | ============================= 5 | 6 | This example shows a simple comparison of the expected calibration error of a 7 | non-calibrated method against a calibrated method. 8 | """ 9 | # Author: Miquel Perello Nieto 10 | # License: new BSD 11 | 12 | print(__doc__) 13 | 14 | ############################################################################## 15 | # First choose a classifier 16 | 17 | from sklearn.naive_bayes import GaussianNB 18 | 19 | clf = GaussianNB() 20 | 21 | ############################################################################## 22 | # And a dataset 23 | 24 | from sklearn.datasets import make_classification 25 | from sklearn.model_selection import train_test_split 26 | 27 | X, y = make_classification( 28 | n_samples=100000, n_features=20, n_informative=4, n_redundant=4, 29 | random_state=42 30 | ) 31 | 32 | from sklearn.model_selection import train_test_split 33 | 34 | X_train, X_test, Y_train, Y_test = train_test_split(X, y) 35 | 36 | ############################################################################## 37 | # We can see how calibrated it is after training 38 | 39 | clf.fit(X_train, Y_train) 40 | 41 | n_correct = sum(clf.predict(X_test) == Y_test) 42 | n_test = Y_test.shape[0] 43 | 44 | print(f"The classifier gets {n_correct} correct " 45 | f"predictions out of {n_test}") 46 | 47 | ############################################################################## 48 | # We can asses the confidence expected calibration error 49 | 50 | from pycalib.metrics import conf_ECE 51 | 52 | scores = clf.predict_proba(X_test) 53 | cece = conf_ECE(Y_test, scores, bins=15) 54 | 55 | print(f"The classifier gets a confidence expected " 56 | f"calibration error of {cece:0.2f}") 57 | 58 | ############################################################################## 59 | # Let's look at its reliability diagram 60 | 61 | from pycalib.visualisations import plot_reliability_diagram 62 | 63 | plot_reliability_diagram(labels=Y_test, scores=scores, show_histogram=True, 64 | show_bars=True, show_gaps=True) 65 | 66 | ############################################################################## 67 | # We can see how a calibration can improve the conf-ECE 68 | 69 | from pycalib.models import IsotonicCalibration 70 | cal = IsotonicCalibration() 71 | 72 | ############################################################################## 73 | # Now we can put together a probabilistic classifier with the chosen calibration 74 | # method 75 | 76 | from pycalib.models import CalibratedModel 77 | 78 | cal_clf = CalibratedModel(base_estimator=clf, calibrator=cal, 79 | fit_estimator=False) 80 | 81 | ############################################################################## 82 | # Now you can train both classifier and calibrator all together. 83 | 84 | cal_clf.fit(X_train, Y_train) 85 | n_correct = sum(cal_clf.predict(X_test) == Y_test) 86 | 87 | print(f"The calibrated classifier gets {n_correct} " 88 | f"correct predictions out of {n_test}") 89 | 90 | scores_cal = cal_clf.predict_proba(X_test) 91 | cece = conf_ECE(Y_test, scores_cal, bins=15) 92 | 93 | print(f"The calibrated classifier gets a confidence " 94 | f"expected calibration error of {cece:0.2f}") 95 | 96 | ############################################################################## 97 | # Now you can train both classifier and calibrator all together. 98 | 99 | from pycalib.visualisations import plot_reliability_diagram 100 | 101 | plot_reliability_diagram(labels=Y_test, scores=scores_cal, show_histogram=True, 102 | show_bars=True, show_gaps=True) 103 | -------------------------------------------------------------------------------- /examples/xmpl_reliability_diagram.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================= 3 | Plotting reliability diagrams 4 | ============================= 5 | 6 | This example illustrates how to visualise the reliability diagram for a binary 7 | probabilistic classifier. 8 | """ 9 | # Author: Miquel Perello Nieto 10 | # License: new BSD 11 | 12 | print(__doc__) 13 | 14 | ############################################################################## 15 | # This example shows different ways to visualise the reliability diagram for a 16 | # binary classification problem. 17 | # 18 | # First we will generate two synthetic models and some synthetic scores and 19 | # labels. 20 | 21 | import matplotlib.pyplot as plt 22 | import numpy as np 23 | np.random.seed(42) 24 | 25 | n_c1 = n_c2 = 500 26 | p = np.concatenate((np.random.beta(2, 5, n_c1), 27 | np.random.beta(4, 3, n_c2) 28 | )) 29 | 30 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2))) 31 | 32 | s1 = 1/(1 + np.exp(-8*(p - 0.5))) 33 | s2 = 1/(1 + np.exp(-3*(p - 0.5))) 34 | 35 | plt.scatter(s1, p, label='Model 1') 36 | plt.scatter(s2, p, label='Model 2') 37 | plt.scatter(p, y) 38 | plt.plot([0, 1], [0, 1], 'r--') 39 | plt.xlabel('Model scores') 40 | plt.ylabel('Sample true probability') 41 | plt.grid() 42 | plt.legend() 43 | 44 | p = np.vstack((1 - p, p)).T 45 | s1 = np.vstack((1 - s1, s1)).T 46 | s2 = np.vstack((1 - s2, s2)).T 47 | 48 | ############################################################################## 49 | # A perfect calibration should be as follows, compared with the generated 50 | # scores 51 | 52 | import scipy.stats as stats 53 | 54 | p_g_p = stats.beta.pdf(x=p[:, 1], a=3, b=2) 55 | p_g_n = stats.beta.pdf(x=p[:, 1], a=2, b=7) 56 | 57 | p_hat = p_g_p/(p_g_n+p_g_p) 58 | p_hat = np.vstack((1 - p_hat, p_hat)).T 59 | 60 | plt.scatter(p[:, 1], s1[:, 1], label='Model 1') 61 | plt.scatter(p[:, 1], s2[:, 1], label='Model 2') 62 | plt.scatter(p[:, 1], p_hat[:, 1], color='red', label='Bayes optimal correction') 63 | plt.xlabel('Sample true probability') 64 | plt.ylabel('Model scores') 65 | plt.grid() 66 | plt.legend() 67 | 68 | ############################################################################## 69 | # There are at least 2 very common ways to show a reliability diagram for a 70 | # probabilistic binary classifier. Drawing a line between all the binned mean 71 | # predictions and the true proportion of positives. 72 | 73 | from pycalib.visualisations import plot_reliability_diagram 74 | 75 | fig = plot_reliability_diagram(labels=y, scores=s1, show_histogram=False) 76 | 77 | ############################################################################## 78 | # And showing bars instead of a lineplot, usually with errorbars showing the 79 | # discrepancy with respect to a perfectly calibrated model (diagonal) 80 | 81 | fig = plot_reliability_diagram(labels=y, scores=s1, 82 | class_names=['Negative', 'Positive'], 83 | show_gaps=True, show_bars=True, 84 | show_histogram=True) 85 | 86 | 87 | ############################################################################## 88 | # However, both previous illustrations do not include the number of samples 89 | # that fall into each bin. By default the parameter show_bars is set to True as 90 | # this information is crucial to understand how reliable is each estimation, 91 | # and how this affects some of the calibration metrics. 92 | # We also specify the bin boundaries and change the color of the gaps. 93 | 94 | fig = plot_reliability_diagram(labels=y, scores=s1, 95 | class_names=['Negative', 'Positive'], 96 | show_gaps=True, color_gaps='firebrick', 97 | bins=[0, .3, .4, .45, .5, .55, .6, .7, 1]) 98 | 99 | ############################################################################## 100 | # It is also common to plot only the confidence (considering the winning class 101 | # only as positive class for each prediction). Notice that the class names is 102 | # automatically set to *winning* class. 103 | 104 | fig = plot_reliability_diagram(labels=y, scores=s1, 105 | show_gaps=True, 106 | confidence=True, 107 | show_bars=True) 108 | 109 | ############################################################################## 110 | # We can enable some parameters to show several aspects of the reliability 111 | # diagram. For example, we can add a histogram indicating the number of samples 112 | # on each bin (or show the count in each marker), the correction that should be 113 | # applied to the average scores in order to calibrate the model can be also 114 | # shown as red arrows pointing to the direction of the diagonal (perfectly 115 | # calibrated model). And even the true class of each sample at the y 116 | # coordinates [0 and 1] for each scored instance (50% of the data in 117 | # this example, but default is 100%). 118 | 119 | fig = plot_reliability_diagram(labels=y, scores=s1, 120 | legend=['Model 1'], 121 | show_histogram=True, 122 | bins=9, class_names=['Negative', 'Positive'], 123 | show_counts=True, 124 | show_correction=True, 125 | sample_proportion=0.5, 126 | hist_per_class=True) 127 | ############################################################################## 128 | # It can be also useful to have 95% confidence intervals for each bin by 129 | # performing a binomial proportion confidence interval with various statistical 130 | # tests. This function uses https://www.statsmodels.org/stable/generated/statsmodels.stats.proportion.proportion_confint.html 131 | # thus accepts the different tests available in the statsmodels library. In the 132 | # following example we use the Clopper-Pearson interval based on Beta 133 | # distribution and a confidence interval of 95%. 134 | 135 | fig = plot_reliability_diagram(labels=y, scores=s2, 136 | legend=['Model 2'], 137 | show_histogram=True, 138 | show_counts=True, 139 | bins=13, class_names=['Negative', 'Positive'], 140 | sample_proportion=1.0, 141 | errorbar_interval=0.95, 142 | interval_method='beta', 143 | color_list=['orange']) 144 | ############################################################################## 145 | # The function also allows the visualisation of multiple models for comparison. 146 | 147 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 148 | legend=['Model 1', 'Model 2'], 149 | show_histogram=True, 150 | bins=10, class_names=['Negative', 'Positive'], 151 | errorbar_interval=0.95, 152 | interval_method='beta') 153 | 154 | 155 | ############################################################################## 156 | # It is possible to draw reliability diagram for multiple classes as well. We 157 | # will simulate 3 classes by changing some original labels to a 3rd class, and 158 | # modifying the scores of Model 1 and 2 to create new models 3 and 4. 159 | 160 | class_2_idx = range(int(len(y)/3), int(2*len(y)/3)) 161 | y[class_2_idx] = 2 162 | s1 = np.hstack((s1, s1[:, 1].reshape(-1, 1))) 163 | s1[class_2_idx,2] *= 3 164 | s1 /= s1.sum(axis=1)[:, None] 165 | s2 = np.hstack((s2, s2[:, 1].reshape(-1, 1))) 166 | s2[class_2_idx,2] *= 2 167 | s2 /= s2.sum(axis=1)[:, None] 168 | 169 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 170 | legend=['Model 3', 'Model 4'], 171 | show_histogram=True, 172 | color_list=['darkgreen', 'chocolate']) 173 | 174 | ############################################################################## 175 | # If we are only interested in the confidence, the 3 classes can be visualised 176 | # in a single reliability diagram 177 | 178 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 179 | legend=['Model 3', 'Model 4'], 180 | show_histogram=True, 181 | color_list=['darkgreen', 'chocolate'], 182 | confidence=True) 183 | 184 | ############################################################################## 185 | # The same can be done with the bars. 186 | 187 | fig = plot_reliability_diagram(labels=y, scores=s1, 188 | legend=['Model 3'], 189 | show_histogram=True, 190 | show_bars=True, 191 | show_gaps=True) 192 | 193 | 194 | ############################################################################## 195 | # And change the colors. 196 | 197 | fig = plot_reliability_diagram(labels=y, scores=s1, 198 | legend=['Model 3'], 199 | show_histogram=True, 200 | color_list=['darkgreen'], 201 | show_bars=True, 202 | show_gaps=True, 203 | color_gaps='orange') 204 | 205 | 206 | ############################################################################## 207 | # If we have precomputed the average proportion of true positives and 208 | # predictions, or we have access to the ground truth, it is possible to plot 209 | # the same reliability diagram using the following function 210 | from pycalib.visualisations import plot_reliability_diagram_precomputed 211 | 212 | avg_true = [np.array([.1, .3, .6, .8, .9, 1]).reshape(-1, 1), 213 | np.array([.2, .4, .5, .7, .8, .9]).reshape(-1, 1)] 214 | avg_pred = [np.array([.01, .25, .4, .6, .7, .8]).reshape(-1, 1), 215 | np.array([.15, .39, .7, .75, .8, .9]).reshape(-1, 1)] 216 | 217 | fig = plot_reliability_diagram_precomputed(avg_true, avg_pred) 218 | 219 | ############################################################################## 220 | # Similarly for a multiclass problem we can provide full matrices of size 221 | # (n_bins, n_classes) instead. Notice that the order of the predicted scores 222 | # doesn't need to be in order, and the probabilities doesn't need to sum to one 223 | # among all classes, as the way they are computed may be from different 224 | # instances. 225 | 226 | avg_true = [np.array([[.1, .3, .6, .8, .9, 1.], 227 | [.0, .2, .4, .7, .8, .9], 228 | [.1, .2, .3, .5, .6, .8]]).T, 229 | np.array([[.1, .4, .7, .8, .9, 1.], 230 | [.9, .3, .8, .2, .7, .1], 231 | [.2, .3, .5, .4, .7, .1]]).T] 232 | avg_pred = [np.array([[.0, .3, .6, .7, .8, 9.], 233 | [.1, .2, .3, .5, .8, .7], 234 | [.3, .5, .4, .7, .8, .9]]).T, 235 | np.array([[.0, .3, .6, .8, .9, 1.], 236 | [.8, .1, .6, .2, .9, 0.], 237 | [.1, .4, .6, .3, .5, 0.]]).T] 238 | 239 | fig = plot_reliability_diagram_precomputed(avg_true, avg_pred) 240 | -------------------------------------------------------------------------------- /examples/xmpl_ternary_contours.py: -------------------------------------------------------------------------------- 1 | """ 2 | ============================================== 3 | Draw contour function of ternary simplex space 4 | ============================================== 5 | 6 | This example illustrates how to draw contourplots for functions with 3 7 | probability inputs and multiple outputs. 8 | """ 9 | # Author: Miquel Perello Nieto 10 | # License: new BSD 11 | 12 | import matplotlib.pyplot as plt 13 | import numpy as np 14 | np.random.seed(42) 15 | 16 | print(__doc__) 17 | 18 | ############################################################################## 19 | # We show first how to draw a heatmap on a ternary probability simplex, in this 20 | # case we will define a Dirichlet function and pass it with default parameters. 21 | 22 | from scipy.stats import dirichlet 23 | 24 | from pycalib.visualisations.ternary import draw_func_contours 25 | 26 | function = lambda x: dirichlet.pdf(x, alpha=[5, 3, 2]) 27 | fig = draw_func_contours(function) 28 | 29 | ############################################################################## 30 | # Next we show how do use a ternary calibration model that has 3 probability 31 | # inputs and 3 ouputs. We will first simulate a calibrator by simulating 3 32 | # Dirichlet distributions and applying Bayes rule with equal prior. 33 | 34 | class calibrator(): 35 | def predict_proba(self, x): 36 | pred1 = dirichlet.pdf(x, alpha=[3, 1, 1]) 37 | pred2 = dirichlet.pdf(x, alpha=[6, 7, 5]) 38 | pred3 = dirichlet.pdf(x, alpha=[3, 4, 5]) 39 | pred = np.vstack([pred1, pred2, pred3]).T 40 | pred = pred / pred.sum(axis=1)[:, None] 41 | return pred 42 | 43 | cal = calibrator() 44 | 45 | ############################################################################## 46 | # Then we will first draw a contourmap only for the first class. We do that by 47 | # creating a lambda function and selecting the first column. 48 | # We also select a colormap for the first class. 49 | 50 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][0] 51 | fig = draw_func_contours(function, cmap='Reds') 52 | 53 | 54 | ############################################################################## 55 | # We can look at the second class by creating a new lambda function and 56 | # selecting the second column. We will also modify how many times to subdivide 57 | # the simplex (subdiv=3). And the number of contour values (nlevels=10). 58 | 59 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][1] 60 | fig = draw_func_contours(function, nlevels=10, subdiv=3, cmap='Oranges') 61 | 62 | ############################################################################## 63 | # Finally we show the 3rd class with other sets of parameters and specifying 64 | # the names of each class. 65 | 66 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][2] 67 | fig = draw_func_contours(function, nlevels=10, subdiv=5, cmap='Blues', 68 | labels=['strawberry', 'orange', 'smurf']) 69 | 70 | 71 | ############################################################################## 72 | # In order to plot the contours of all classes in the same figure it is 73 | # necessary to loop over all subplots. We show an example that uses the 74 | # previous functions. 75 | 76 | labels=['strawberry', 'orange', 'smurf'] 77 | cmap_list = ['Reds', 'Oranges', 'Blues'] 78 | fig = plt.figure(figsize=(10, 5)) 79 | for c in [0, 1, 2]: 80 | ax = fig.add_subplot(1, 3, c+1) 81 | ax.set_title('{}\n$(C_{})$'.format(labels[c], c+1), loc='left') 82 | function = lambda x: cal.predict_proba(x.reshape(-1, 1))[0][c] 83 | fig = draw_func_contours(function, nlevels=30, subdiv=5, cmap=cmap_list[c], 84 | ax=ax, fig=fig) 85 | -------------------------------------------------------------------------------- /examples/xmpl_ternary_samples.py: -------------------------------------------------------------------------------- 1 | """ 2 | ==================================================== 3 | Scatter plot of ternary problem in the simplex space 4 | ==================================================== 5 | 6 | This example illustrates how to draw samples from the scores of a model and 7 | their true label. 8 | """ 9 | # Author: Miquel Perello Nieto 10 | # License: new BSD 11 | 12 | print(__doc__) 13 | 14 | ############################################################################## 15 | # We generate 3 scores as comming from 3 Dirichlet distributions simulating the 16 | # output scores of a classifier. Then we show how to draw the samples in the 17 | # simplex space. 18 | 19 | import matplotlib.pyplot as plt 20 | import numpy as np 21 | np.random.seed(42) 22 | 23 | n_c1 = n_c2 = n_c3 = 300 24 | p = np.concatenate((np.random.dirichlet([6, 2, 3], n_c1), 25 | np.random.dirichlet([5, 12, 5], n_c2), 26 | np.random.dirichlet([2, 3, 5], n_c3) 27 | )) 28 | 29 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2), np.ones(n_c3)*2)) 30 | 31 | from pycalib.visualisations.ternary import draw_tri_samples 32 | 33 | fig, ax = draw_tri_samples(p, classes=y, alpha=0.6) 34 | 35 | 36 | ############################################################################## 37 | # Here we specify the names of each class and change their colors. 38 | 39 | fig, ax = draw_tri_samples(p, classes=y, alpha=0.6, 40 | labels=['dogs', 'cats', 'fox'], 41 | color_list=['saddlebrown', 'black', 'darkorange']) 42 | -------------------------------------------------------------------------------- /pycalib/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '0.1.0.dev6' 2 | -------------------------------------------------------------------------------- /pycalib/metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import mean_squared_error 3 | from sklearn.metrics import log_loss 4 | from sklearn.preprocessing import label_binarize 5 | from scipy.stats import percentileofscore 6 | import multiprocessing 7 | 8 | 9 | def accuracy(y_true, y_pred): 10 | """Classification accuracy score 11 | 12 | Accuracy for binary and multiclass classification problems. Consists on the 13 | proportion of correct estimations assuming the maximum class probability of 14 | each score as the estimated class. 15 | 16 | Parameters 17 | ---------- 18 | y_true : label indicator matrix (n_samples, n_classes) 19 | True labels. 20 | # TODO Add option to pass array with shape (n_samples, ) 21 | 22 | y_pred : matrix (n_samples, n_classes) 23 | Predicted scores. 24 | 25 | Returns 26 | ------- 27 | score : float 28 | Proportion of correct predictions as a value between 0 and 1. 29 | 30 | Examples 31 | -------- 32 | >>> from pycalib.metrics import accuracy 33 | >>> Y = np.array([[0, 1], [0, 1]]) 34 | >>> S = np.array([[0.1, 0.9], [0.6, 0.4]]) 35 | >>> accuracy(Y, S) 36 | 0.5 37 | >>> Y = np.array([[0, 1], [0, 1]]) 38 | >>> S = np.array([[0.1, 0.9], [0, 1]]) 39 | >>> accuracy(Y, S) 40 | 1.0 41 | """ 42 | predictions = np.argmax(y_pred, axis=1) 43 | y = np.argmax(y_true, axis=1) 44 | return np.mean(predictions == y) 45 | 46 | 47 | def cross_entropy(y_true, y_pred): 48 | """Cross-entropy score 49 | 50 | Computes the cross-entropy (a.k.a. log-loss) for binary and 51 | multiclass classification scores. 52 | 53 | Parameters 54 | ---------- 55 | y_true : label indicator matrix (n_samples, n_classes) 56 | True labels. 57 | # TODO Add option to pass array with shape (n_samples, ) 58 | 59 | y_pred : matrix (n_samples, n_classes) 60 | Predicted scores. 61 | 62 | Returns 63 | ------- 64 | score : float 65 | 66 | Examples 67 | -------- 68 | >>> from pycalib.metrics import cross_entropy 69 | >>> Y = np.array([[0, 1], [0, 1]]) 70 | >>> S = np.array([[0.1, 0.9], [0.6, 0.4]]) 71 | >>> cross_entropy(Y, S) 72 | 0.5108256237659906 73 | """ 74 | return log_loss(y_true, y_pred) 75 | 76 | 77 | def brier_score(y_true, y_pred): 78 | """Brier score 79 | 80 | Computes the Brier score between the true labels and the estimated 81 | probabilities. This corresponds to the Mean Squared Error between the 82 | estimations and the true labels. 83 | 84 | Parameters 85 | ---------- 86 | y_true : label indicator matrix (n_samples, n_classes) 87 | True labels. 88 | # TODO Add option to pass array with shape (n_samples, ) 89 | 90 | y_pred : matrix (n_samples, n_classes) 91 | Predicted scores. 92 | 93 | Returns 94 | ------- 95 | score : float 96 | Positive value between 0 and 1. 97 | 98 | Examples 99 | -------- 100 | >>> from pycalib.metrics import cross_entropy 101 | >>> Y = np.array([[0, 1], [0, 1]]) 102 | >>> S = np.array([[0.1, 0.9], [0.6, 0.4]]) 103 | >>> brier_score(Y, S) 104 | 0.185 105 | """ 106 | # TODO Consider using the following code instead 107 | # np.mean(np.abs(S - Y)**2) 108 | return mean_squared_error(y_true, y_pred) 109 | 110 | 111 | def conf_ECE(y_true, probs, bins=15): 112 | r"""Confidence Expected Calibration Error 113 | 114 | Calculate ECE score based on model maximum output probabilities and true 115 | labels 116 | 117 | .. math:: 118 | 119 | \text{confidence-ECE} = \sum_{i=1}^M \frac{|B_{i}|}{N} | 120 | \text{accuracy}(B_{i}) - \bar{p}(B_{i})| 121 | 122 | In which $p$ are the maximum predicted probabilities. 123 | 124 | 125 | Parameters 126 | ---------- 127 | y_true: 128 | - a list containing the actual class labels 129 | - ndarray shape (n_samples) with a list containing actual class 130 | labels 131 | - ndarray shape (n_samples, n_classes) with largest value in 132 | each row for the correct column class. 133 | probs: 134 | a list containing probabilities for all the classes with a shape of 135 | (samples, classes) 136 | bins: (int) 137 | - into how many bins are probabilities divided (default = 15) 138 | 139 | Returns 140 | ------- 141 | ece : float 142 | expected calibration error 143 | 144 | Examples 145 | -------- 146 | >>> from pycalib.metrics import conf_ECE 147 | >>> Y = np.array([[1, 0], [0, 1]]).T 148 | >>> P = np.array([[0.9, 0.1], [0.1, 0.9]]).T 149 | >>> print(round(conf_ECE(Y, P, bins=2), 8)) 150 | 0.1 151 | >>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T 152 | >>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]]).T 153 | >>> print(round(conf_ECE(Y, P, bins=2), 8)) 154 | 0.2 155 | """ 156 | return ECE(y_true, probs, normalize=False, bins=bins, ece_full=False) 157 | 158 | 159 | def ECE(y_true, probs, normalize=False, bins=15, ece_full=True): 160 | """ 161 | Calculate ECE score based on model output probabilities and true labels 162 | 163 | Parameters 164 | ========== 165 | y_true : list 166 | a list containing the actual class labels 167 | ndarray shape (n_samples) with a list containing actual class 168 | labels 169 | ndarray shape (n_samples, n_classes) with largest value in 170 | each row for the correct column class. 171 | probs : list 172 | a list containing probabilities for all the classes with a shape of 173 | (samples, classes) 174 | normalize: (bool) 175 | in case of 1-vs-K calibration, the probabilities need to be 176 | normalized. (default = False) 177 | bins: (int) 178 | into how many bins are probabilities divided (default = 15) 179 | ece_full: (bool) 180 | whether to use ECE-full or ECE-max. 181 | 182 | Returns 183 | ======= 184 | ece : float 185 | expected calibration error 186 | """ 187 | 188 | probs = np.array(probs) 189 | y_true = np.array(y_true) 190 | if len(y_true.shape) == 2 and y_true.shape[1] > 1: 191 | y_true = y_true.argmax(axis=1).reshape(-1, 1) 192 | 193 | # Prepare predictions, confidences and true labels for ECE calculation 194 | if ece_full: 195 | preds, confs, y_true = _get_preds_all(y_true, probs, 196 | normalize=normalize, 197 | flatten=True) 198 | 199 | else: 200 | preds = np.argmax(probs, axis=1) # Maximum confidence as prediction 201 | 202 | if normalize: 203 | confs = np.max(probs, axis=1)/np.sum(probs, axis=1) 204 | # Check if everything below or equal to 1? 205 | else: 206 | confs = np.max(probs, axis=1) # Take only maximum confidence 207 | 208 | # Calculate ECE and ECE2 209 | ece = _ECE_helper(confs, preds, y_true, bin_size=1/bins, ece_full=ece_full) 210 | 211 | return ece 212 | 213 | 214 | def _get_preds_all(y_true, y_probs, axis=1, normalize=False, flatten=True): 215 | """ 216 | Method to get predictions in right format for ECE-full. 217 | 218 | Parameters 219 | ========== 220 | y_true: list 221 | containing the actual class labels 222 | y_probs: list (samples, classes) 223 | containing probabilities for all the classes 224 | axis: (int) 225 | dimension of set to calculate probabilities on 226 | normalize: (bool) 227 | in case of 1-vs-K calibration, the probabilities need to be 228 | normalized. (default = False) 229 | flatten: (bool) 230 | flatten all the arrays 231 | 232 | Returns 233 | ======= 234 | (y_preds, y_probs, y_true) 235 | predictions, probabilities and true labels 236 | """ 237 | if len(y_true.shape) == 1: 238 | y_true = y_true.reshape(-1, 1) 239 | elif len(y_true.shape) == 2 and y_true.shape[1] > 1: 240 | y_true = y_true.argmax(axis=1).reshape(-1, 1) 241 | 242 | y_preds = np.argmax(y_probs, axis=axis) # Maximum confidence as prediction 243 | y_preds = y_preds.reshape(-1, 1) 244 | 245 | if normalize: 246 | y_probs /= np.sum(y_probs, axis=axis) 247 | 248 | n_classes = y_probs.shape[1] 249 | y_preds = label_binarize(y_preds, classes=range(n_classes)) 250 | y_true = label_binarize(y_true, classes=range(n_classes)) 251 | 252 | if flatten: 253 | y_preds = y_preds.flatten() 254 | y_true = y_true.flatten() 255 | y_probs = y_probs.flatten() 256 | 257 | return y_preds, y_probs, y_true 258 | 259 | 260 | def _ECE_helper(conf, pred, true, bin_size=0.1, ece_full=False): 261 | 262 | """ 263 | Expected Calibration Error 264 | 265 | Parameters 266 | ========== 267 | conf (numpy.ndarray): 268 | list of confidences 269 | pred (numpy.ndarray): 270 | list of predictions 271 | true (numpy.ndarray): 272 | list of true labels 273 | bin_size: (float): 274 | size of one bin (0,1) # TODO should convert to number of bins? 275 | 276 | Returns 277 | ======= 278 | ece: expected calibration error 279 | """ 280 | 281 | upper_bounds = np.arange(bin_size, 1+bin_size, bin_size) # Bounds of bins 282 | 283 | n = len(conf) 284 | ece = 0 # Starting error 285 | 286 | for conf_thresh in upper_bounds: # Find accur. and confidences per bin 287 | acc, avg_conf, len_bin = _compute_acc_bin(conf_thresh-bin_size, 288 | conf_thresh, conf, pred, 289 | true, ece_full) 290 | ece += np.abs(acc-avg_conf)*len_bin/n # Add weigthed difference to ECE 291 | 292 | return ece 293 | 294 | 295 | def _compute_acc_bin(conf_thresh_lower, conf_thresh_upper, conf, pred, true, 296 | ece_full=True): 297 | """ 298 | # Computes accuracy and average confidence for bin 299 | 300 | Parameters 301 | ========== 302 | conf_thresh_lower (float): 303 | Lower Threshold of confidence interval 304 | conf_thresh_upper (float): 305 | Upper Threshold of confidence interval 306 | conf (numpy.ndarray): 307 | list of confidences 308 | pred (numpy.ndarray): 309 | list of predictions 310 | true (numpy.ndarray): 311 | list of true labels 312 | pred_thresh (float) : 313 | float in range (0,1), indicating the prediction threshold 314 | 315 | Returns 316 | ======= 317 | (accuracy, avg_conf, len_bin) : 318 | accuracy of bin, confidence of bin and number of elements in bin. 319 | """ 320 | filtered_tuples = [x for x in zip(pred, true, conf) 321 | if (x[2] > conf_thresh_lower or conf_thresh_lower == 0) 322 | and (x[2] <= conf_thresh_upper)] 323 | 324 | if len(filtered_tuples) < 1: 325 | return 0.0, 0.0, 0 326 | else: 327 | if ece_full: 328 | # How many elements falls into given bin 329 | len_bin = len(filtered_tuples) 330 | # Avg confidence of BIN 331 | avg_conf = sum([x[2] for x in filtered_tuples])/len_bin 332 | # Mean difference from actual class 333 | accuracy = np.mean([x[1] for x in filtered_tuples]) 334 | else: 335 | # How many correct labels 336 | correct = len([x for x in filtered_tuples if x[0] == x[1]]) 337 | # How many elements falls into given bin 338 | len_bin = len(filtered_tuples) 339 | # Avg confidence of BIN 340 | avg_conf = sum([x[2] for x in filtered_tuples]) / len_bin 341 | # accuracy of BIN 342 | accuracy = float(correct)/len_bin 343 | 344 | return accuracy, avg_conf, len_bin 345 | 346 | 347 | def _MCE_helper(conf, pred, true, bin_size=0.1, mce_full=True): 348 | 349 | """ 350 | Maximal Calibration Error 351 | 352 | Parameters 353 | ========== 354 | conf (numpy.ndarray): list of confidences 355 | pred (numpy.ndarray): list of predictions 356 | true (numpy.ndarray): list of true labels 357 | bin_size: (float): 358 | size of one bin (0,1) # TODO should convert to number of bins? 359 | mce_full: (bool) 360 | whether to use ECE-full or ECE-max for bin calculation 361 | 362 | Returns 363 | ======= 364 | mce: maximum calibration error 365 | """ 366 | 367 | upper_bounds = np.arange(bin_size, 1+bin_size, bin_size) 368 | 369 | cal_errors = [] 370 | 371 | for conf_thresh in upper_bounds: 372 | acc, avg_conf, count = _compute_acc_bin(conf_thresh-bin_size, 373 | conf_thresh, conf, pred, true, 374 | mce_full) 375 | cal_errors.append(np.abs(acc-avg_conf)) 376 | 377 | return np.max(np.asarray(cal_errors)) 378 | 379 | 380 | def MCE(y_true, probs, normalize=False, bins=15, mce_full=False): 381 | 382 | """ 383 | Calculate MCE score based on model output probabilities and true labels 384 | 385 | Parameters 386 | ========== 387 | y_true : list 388 | containing the actual class labels 389 | probs : list 390 | containing probabilities for all the classes with a shape of (samples, 391 | classes) 392 | normalize : bool 393 | in case of 1-vs-K calibration, the probabilities need to be normalized. 394 | (default = False) 395 | bins : int 396 | into how many bins are probabilities divided (default = 15) 397 | mce_full : boolean 398 | whether to use ECE-full or ECE-max for calculation MCE. 399 | 400 | Returns 401 | ======= 402 | mce : float 403 | maximum calibration error 404 | """ 405 | 406 | probs = np.array(probs) 407 | y_true = np.array(y_true) 408 | if len(probs.shape) == len(y_true.shape): 409 | y_true = np.argmax(y_true, axis=1) 410 | 411 | # Prepare predictions, confidences and true labels for MCE calculation 412 | if mce_full: 413 | preds, confs, y_true = _get_preds_all(y_true, probs, 414 | normalize=normalize, 415 | flatten=True) 416 | 417 | else: 418 | preds = np.argmax(probs, axis=1) # Maximum confidence as prediction 419 | 420 | if normalize: 421 | confs = np.max(probs, axis=1)/np.sum(probs, axis=1) 422 | # Check if everything below or equal to 1? 423 | else: 424 | confs = np.max(probs, axis=1) # Take only maximum confidence 425 | 426 | # Calculate MCE 427 | mce = _MCE_helper(confs, preds, y_true, bin_size=1/bins, mce_full=mce_full) 428 | 429 | return mce 430 | 431 | 432 | def conf_MCE(y_true, probs, bins=15): 433 | """ 434 | Calculate ECE score based on model output probabilities and true labels 435 | 436 | Parameters 437 | ========== 438 | y_true: 439 | - a list containing the actual class labels 440 | - ndarray shape (n_samples) with a list containing actual class 441 | labels 442 | - ndarray shape (n_samples, n_classes) with largest value in 443 | each row for the correct column class. 444 | probs: 445 | a list containing probabilities for all the classes with a shape of 446 | (samples, classes) 447 | bins: (int) 448 | - into how many bins are probabilities divided (default = 15) 449 | 450 | Returns 451 | ======= 452 | mce : float 453 | maximum calibration error 454 | """ 455 | return MCE(y_true, probs, normalize=False, bins=bins, mce_full=False) 456 | 457 | 458 | def binary_MCE(y_true, probs, power=1, bins=15): 459 | r"""Binary Maximum Calibration Error 460 | 461 | .. math:: 462 | 463 | \text{binary-MCE} = \max_{i \in \{1, ..., M\}} |\bar{y}(B_{i}) 464 | - \bar{p}(B_{i})| 465 | 466 | Parameters 467 | ---------- 468 | y_true : indicator vector (n_samples, ) 469 | True labels. 470 | 471 | probs : matrix (n_samples, ) 472 | Predicted probabilities for positive class. 473 | 474 | Returns 475 | ------- 476 | score : float 477 | 478 | Examples 479 | -------- 480 | >>> from pycalib.metrics import binary_MCE 481 | >>> Y = np.array([0, 1]) 482 | >>> P = np.array([0.1, 0.6]) 483 | >>> print(round(binary_MCE(Y, P, bins=2), 8)) 484 | 0.4 485 | >>> Y = np.array([0, 0, 0, 1, 1, 1]) 486 | >>> P = np.array([.1, .2, .3, .6, .7, .8]) 487 | >>> print(round(binary_MCE(Y, P, bins=2), 8)) 488 | 0.3 489 | >>> Y = np.array([0, 0, 0, 1, 1, 1]) 490 | >>> P = np.array([.1, .2, .3, .3, .2, .1]) 491 | >>> print(round(binary_MCE(Y, P, bins=1), 8)) 492 | 0.3 493 | >>> Y = np.array([0, 0, 0, 1, 1, 1]) 494 | >>> P = np.array([.1, .2, .3, .9, .9, .9]) 495 | >>> print(round(binary_MCE(Y, P, bins=2), 8)) 496 | 0.2 497 | >>> Y = np.array([0, 0, 0, 1, 1, 1]) 498 | >>> P = np.array([.1, .1, .1, .6, .6, .6]) 499 | >>> print(round(binary_MCE(Y, P, bins=2), 8)) 500 | 0.4 501 | """ 502 | idx = np.digitize(probs, np.linspace(0, 1 + 1e-8, bins + 1)) - 1 503 | 504 | def bin_func(y, p, idx): 505 | return (np.abs(np.mean(p[idx]) - np.mean(y[idx])) ** power) 506 | 507 | mce = [] 508 | for i in np.unique(idx): 509 | # print('Mean scores', np.mean(probs[idx == i])) 510 | # print('True proportion', np.mean(y_true[idx == i])) 511 | # print('Difference ', np.abs(np.mean(probs[idx == i]) 512 | # - np.mean(y_true[idx == i]))) 513 | mce.append(bin_func(y_true, probs, idx == i)) 514 | return max(mce) 515 | 516 | 517 | def binary_ECE(y_true, probs, power=1, bins=15): 518 | r"""Binary Expected Calibration Error 519 | 520 | .. math:: 521 | 522 | \text{binary-ECE} = \sum_{i=1}^M \frac{|B_{i}|}{N} | 523 | \bar{y}(B_{i}) - \bar{p}(B_{i})| 524 | 525 | Parameters 526 | ---------- 527 | y_true : indicator vector (n_samples, ) 528 | True labels. 529 | 530 | probs : matrix (n_samples, ) 531 | Predicted probabilities for positive class. 532 | 533 | Returns 534 | ------- 535 | score : float 536 | 537 | Examples 538 | -------- 539 | >>> from pycalib.metrics import binary_ECE 540 | >>> Y = np.array([0, 1]) 541 | >>> P = np.array([0.1, 0.9]) 542 | >>> print(round(binary_ECE(Y, P, bins=2), 8)) 543 | 0.1 544 | >>> Y = np.array([0, 0, 0, 1, 1, 1]) 545 | >>> P = np.array([.1, .2, .3, .7, .8, .9]) 546 | >>> print(round(binary_ECE(Y, P, bins=2), 8)) 547 | 0.2 548 | >>> Y = np.array([0, 0, 0, 1, 1, 1]) 549 | >>> P = np.array([.4, .4, .4, .6, .6, .6]) 550 | >>> print(round(binary_ECE(Y, P, bins=2), 8)) 551 | 0.4 552 | """ 553 | idx = np.digitize(probs, np.linspace(0, 1 + 1e-8, bins + 1)) - 1 554 | 555 | def bin_func(y, p, idx): 556 | return ((np.abs(np.mean(p[idx]) - np.mean(y[idx])) ** power) 557 | * np.sum(idx) / len(p)) 558 | 559 | ece = 0 560 | for i in np.unique(idx): 561 | # print('Mean scores', np.mean(probs[idx == i])) 562 | # print('True proportion', np.mean(y_true[idx == i])) 563 | # print('Difference ', np.abs(np.mean(probs[idx == i]) 564 | # - np.mean(y_true[idx == i]))) 565 | ece += bin_func(y_true, probs, idx == i) 566 | return ece 567 | 568 | 569 | def classwise_ECE(y_true, probs, power=1, bins=15): 570 | r"""Classwise Expected Calibration Error 571 | 572 | .. math:: 573 | 574 | \text{class-$j$-ECE} = \sum_{i=1}^M \frac{|B_{i,j}|}{N} 575 | |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|, 576 | 577 | \text{classwise-ECE} = \frac{1}{K}\sum_{j=1}^K \text{class-$j$-ECE} 578 | 579 | Parameters 580 | ---------- 581 | y_true : label indicator matrix (n_samples, n_classes) 582 | True labels. 583 | # TODO Add option to pass array with shape (n_samples, ) 584 | 585 | probs : matrix (n_samples, n_classes) 586 | Predicted probabilities. 587 | 588 | Returns 589 | ------- 590 | score : float 591 | 592 | Examples 593 | -------- 594 | >>> from pycalib.metrics import classwise_ECE 595 | >>> Y = np.array([[1, 0], [0, 1]]).T 596 | >>> P = np.array([[0.9, 0.1], [0.1, 0.9]]).T 597 | >>> print(round(classwise_ECE(Y, P, bins=2), 8)) 598 | 0.1 599 | >>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T 600 | >>> P = np.array([[.9, .8, .7, .3, .2, .1], [.1, .2, .3, .7, .8, .9]]).T 601 | >>> print(round(classwise_ECE(Y, P, bins=2), 8)) 602 | 0.2 603 | """ 604 | probs = np.array(probs) 605 | if not np.array_equal(probs.shape, y_true.shape): 606 | y_true = label_binarize(np.array(y_true), 607 | classes=range(probs.shape[1])) 608 | 609 | n_classes = probs.shape[1] 610 | 611 | return np.mean( 612 | [ 613 | binary_ECE( 614 | y_true[:, c].astype(float), probs[:, c], power=power, bins=bins 615 | ) for c in range(n_classes) 616 | ] 617 | ) 618 | 619 | 620 | def classwise_MCE(y_true, probs, bins=15): 621 | r"""Classwise Maximum Calibration Error 622 | 623 | .. math:: 624 | 625 | \text{class-$j$-MCE} = \max_{i \in {1, ..., M}} 626 | |\bar{y}_j(B_{i,j}) - \bar{p}_j(B_{i,j})|, 627 | 628 | \text{classwise-MCE} = \max_{j \in {1, ..., K}} \text{class-$j$-MCE} 629 | 630 | Parameters 631 | ---------- 632 | y_true : label indicator matrix (n_samples, n_classes) 633 | True labels. 634 | # TODO Add option to pass array with shape (n_samples, ) 635 | 636 | probs : matrix (n_samples, n_classes) 637 | Predicted probabilities. 638 | 639 | Returns 640 | ------- 641 | score : float 642 | 643 | Examples 644 | -------- 645 | >>> from pycalib.metrics import classwise_MCE 646 | >>> Y = np.array([[1, 0], [0, 1]]).T 647 | >>> P = np.array([[0.8, 0.1], [0.2, 0.9]]).T 648 | >>> print(round(classwise_MCE(Y, P, bins=2), 8)) 649 | 0.2 650 | >>> Y = np.array([[1, 1, 1, 0, 0, 0], [0, 0, 0, 1, 1, 1]]).T 651 | >>> P = np.array([[.8, .7, .6, .1, .1, .1], [.2, .3, .4, .9, .9, .9]]).T 652 | >>> print(round(classwise_MCE(Y, P, bins=2), 8)) 653 | 0.3 654 | """ 655 | probs = np.array(probs) 656 | if not np.array_equal(probs.shape, y_true.shape): 657 | y_true = label_binarize(np.array(y_true), 658 | classes=range(probs.shape[1])) 659 | 660 | n_classes = probs.shape[1] 661 | 662 | return np.max( 663 | [ 664 | binary_MCE( 665 | y_true[:, c].astype(float), probs[:, c], bins=bins 666 | ) for c in range(n_classes) 667 | ] 668 | ) 669 | 670 | 671 | def simplex_binning(y_true, probs, bins=15): 672 | 673 | probs = np.array(probs) 674 | if not np.array_equal(probs.shape, y_true.shape): 675 | y_true = label_binarize(np.array(y_true), 676 | classes=range(probs.shape[1])) 677 | 678 | idx = np.digitize(probs, np.linspace(0, 1, bins + 1)) - 1 679 | 680 | prob_bins = {} 681 | label_bins = {} 682 | 683 | for i, row in enumerate(idx): 684 | try: 685 | prob_bins[','.join([str(r) for r in row])].append(probs[i]) 686 | label_bins[','.join([str(r) for r in row])].append(y_true[i]) 687 | except KeyError: 688 | prob_bins[','.join([str(r) for r in row])] = [probs[i]] 689 | label_bins[','.join([str(r) for r in row])] = [y_true[i]] 690 | 691 | bins = [] 692 | for key in prob_bins: 693 | bins.append( 694 | [ 695 | len(prob_bins[key]), 696 | np.mean(np.array(prob_bins[key]), axis=0), 697 | np.mean(np.array(label_bins[key]), axis=0) 698 | ] 699 | ) 700 | 701 | return bins 702 | 703 | 704 | def full_ECE(y_true, probs, bins=15, power=1): 705 | n = len(probs) 706 | 707 | probs = np.array(probs) 708 | if not np.array_equal(probs.shape, y_true.shape): 709 | y_true = label_binarize(np.array(y_true), 710 | classes=range(probs.shape[1])) 711 | 712 | idx = np.digitize(probs, np.linspace(0, 1, bins + 1)) - 1 713 | 714 | filled_bins = np.unique(idx, axis=0) 715 | 716 | s = 0 717 | for bin in filled_bins: 718 | i = np.where((idx == bin).all(axis=1))[0] 719 | s += (len(i)/n) * ( 720 | np.abs(np.mean(probs[i], axis=0) - np.mean(y_true[i], 721 | axis=0))**power 722 | ).sum() 723 | 724 | return s 725 | 726 | 727 | # TODO: Speed up computation. 728 | def _label_resampling(probs): 729 | c = probs.cumsum(axis=1) 730 | u = np.random.rand(len(c), 1) 731 | choices = (u < c).argmax(axis=1) 732 | y = np.zeros_like(probs) 733 | y[range(len(probs)), choices] = 1 734 | return y 735 | 736 | 737 | # Speed up of the previous label_resampling function 738 | def get_one_hot(targets, nb_classes): 739 | res = np.eye(nb_classes)[np.array(targets).reshape(-1)] 740 | return res.reshape(list(targets.shape)+[nb_classes]) 741 | 742 | 743 | def _label_resampling_v2(probs): 744 | c = probs.cumsum(axis=1) 745 | u = np.random.rand(len(c), 1) 746 | choices = (u < c).argmax(axis=1) 747 | y = get_one_hot(choices, probs.shape[1]) 748 | return y 749 | 750 | 751 | # TODO: Speed up computation. 752 | def _score_sampling(probs, samples=10000, ece_function=None): 753 | 754 | probs = np.array(probs) 755 | 756 | return np.array( 757 | [ 758 | ece_function(_label_resampling_v2(probs), probs) for sample in 759 | range(samples) 760 | ] 761 | ) 762 | 763 | 764 | # This uses all available CPUS reducing the time by this factor 765 | def _score_sampling_v2(probs, samples=10000, ece_function=None, 766 | processes=None): 767 | 768 | probs = np.array(probs) 769 | 770 | pool = multiprocessing.Pool(processes=processes) 771 | 772 | probs_list = [probs for i in range(samples)] 773 | labels_sampled = pool.map(_label_resampling_v2, probs_list) 774 | 775 | return np.array(pool.starmap(ece_function, zip(labels_sampled, 776 | probs_list))) 777 | 778 | 779 | def pECE(y_true, probs, samples=10000, ece_function=full_ECE, processes=None): 780 | 781 | probs = np.array(probs) 782 | if not np.array_equal(probs.shape, y_true.shape): 783 | y_true = label_binarize(np.array(y_true), 784 | classes=range(probs.shape[1])) 785 | 786 | return 1 - ( 787 | percentileofscore( 788 | _score_sampling_v2( 789 | probs, 790 | samples=samples, 791 | ece_function=ece_function, 792 | processes=processes), 793 | ece_function(y_true, probs) 794 | ) / 100.0 795 | ) 796 | -------------------------------------------------------------------------------- /pycalib/models/__init__.py: -------------------------------------------------------------------------------- 1 | from .calibrators import (BetaCalibration, 2 | IsotonicCalibration, 3 | LogisticCalibration, 4 | SigmoidCalibration, 5 | BinningCalibration, 6 | CalibratedModel) 7 | -------------------------------------------------------------------------------- /pycalib/models/calibrators.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from copy import deepcopy 4 | 5 | from scipy.special import expit 6 | 7 | from sklearn.base import clone 8 | 9 | from sklearn.utils import check_X_y, indexable 10 | from sklearn.linear_model import LogisticRegression 11 | from sklearn.calibration import _SigmoidCalibration 12 | from sklearn.metrics import log_loss 13 | 14 | import warnings 15 | 16 | from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin 17 | from sklearn.preprocessing import LabelBinarizer 18 | from sklearn.utils import check_array 19 | from sklearn.utils.validation import check_is_fitted 20 | from inspect import signature 21 | from sklearn.isotonic import IsotonicRegression 22 | from sklearn.svm import LinearSVC 23 | from sklearn.model_selection import check_cv 24 | 25 | from betacal import BetaCalibration 26 | 27 | from typing import Literal 28 | 29 | 30 | class _DummyCalibration(BaseEstimator, RegressorMixin): 31 | """Dummy Calibration model. The purpose of this class is to give 32 | the CalibratedClassifierCV class the option to just return the 33 | probabilities of the base classifier. 34 | """ 35 | def fit(self, *args, **kwargs): 36 | """Does nothing""" 37 | return self 38 | 39 | def predict_proba(self, T): 40 | """Return the probabilities of the base classifier""" 41 | return T 42 | 43 | def predict(self, scores, *args, **kwargs): 44 | proba = self.predict_proba(scores, *args, **kwargs) 45 | return proba.argmax(axis=1) 46 | 47 | 48 | class IsotonicCalibration(IsotonicRegression): 49 | def __init__(self): 50 | super(IsotonicCalibration, self).__init__(y_min=0.0, y_max=1.0, 51 | increasing=True, 52 | out_of_bounds='clip') 53 | 54 | def fit(self, scores, y, *args, **kwargs): 55 | ''' 56 | Score=0 corresponds to y=0, and score=1 to y=1 57 | Parameters 58 | ---------- 59 | scores : array-like, shape = [n_samples,] 60 | Data. 61 | y : array-like, shape = [n_samples, ] 62 | Labels. 63 | Returns 64 | ------- 65 | self 66 | ''' 67 | if len(scores.shape) > 1: 68 | scores = scores[:, 1] 69 | return super(IsotonicCalibration, self).fit(scores, y, *args, **kwargs) 70 | 71 | def predict_proba(self, scores, *args, **kwargs): 72 | if len(scores.shape) > 1: 73 | scores = scores[:, 1] 74 | transformed = self.transform(scores, *args, **kwargs) 75 | if len(transformed.shape) == 1: 76 | transformed = np.vstack((1 - transformed, transformed)).T 77 | return transformed 78 | 79 | def predict(self, scores, *args, **kwargs): 80 | proba = self.predict_proba(scores, *args, **kwargs) 81 | return proba.argmax(axis=1) 82 | 83 | 84 | def logit(x): 85 | eps = np.finfo(x.dtype).eps 86 | x = np.clip(x, eps, 1-eps) 87 | return np.log(x/(1 - x)) 88 | 89 | 90 | def log_encode(x): 91 | eps = np.finfo(x.dtype).eps 92 | x = np.clip(x, eps, 1) 93 | return np.log(x) 94 | 95 | 96 | class LogisticCalibration(LogisticRegression): 97 | """Probability calibration with Logistic Regression aka Platt's scaling 98 | 99 | Parameters 100 | ---------- 101 | C: integer 102 | solver: str 'lbfgs' 103 | multi_class: str 'multinomial' 104 | log_transform: boolean True 105 | 106 | Attributes 107 | ---------- 108 | classes_ : array, shape (n_classes) 109 | The class labels. 110 | 111 | calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit") 112 | The list of calibrated classifiers, one for each cross-validation fold, 113 | which has been fitted on all but the validation fold and calibrated 114 | on the validation fold. 115 | 116 | References 117 | ---------- 118 | .. [3] Probabilistic Outputs for Support Vector Machines and Comparisons to 119 | Regularized Likelihood Methods, J. Platt, (1999) 120 | """ 121 | def __init__(self, C=1.0, solver='lbfgs', multi_class='multinomial', 122 | log_transform=True): 123 | self.C_grid = C 124 | self.C = C if isinstance(C, float) else C[0] 125 | self.solver = solver 126 | self.log_transform = log_transform 127 | self.encode = log_encode if log_transform else logit 128 | self.multiclass = multi_class 129 | super(LogisticCalibration, self).__init__(C=C, solver=solver, 130 | multi_class=multi_class) 131 | 132 | def fit(self, scores, y, X_val=None, y_val=None, *args, **kwargs): 133 | if isinstance(self.C_grid, list): 134 | calibrators = [] 135 | losses = np.zeros(len(self.C_grid)) 136 | for i, C in enumerate(self.C_grid): 137 | cal = LogisticCalibration(C=C, solver=self.solver, 138 | multi_class=self.multi_class, 139 | log_transform=self.log_transform) 140 | cal.fit(scores, y) 141 | losses[i] = log_loss(y_val, cal.predict_proba(X_val)) 142 | calibrators.append(cal) 143 | best_idx = int(losses.argmin()) 144 | self.C = calibrators[best_idx].C 145 | return super(LogisticCalibration, self).fit(self.encode(scores), y, 146 | *args, **kwargs) 147 | 148 | def predict_proba(self, scores, *args, **kwargs): 149 | return super(LogisticCalibration, 150 | self).predict_proba(self.encode(scores), *args, **kwargs) 151 | 152 | def predict(self, scores, *args, **kwargs): 153 | return super(LogisticCalibration, self).predict(self.encode(scores), 154 | *args, **kwargs) 155 | 156 | 157 | class SigmoidCalibration(_SigmoidCalibration): 158 | def fit(self, scores, y, *args, **kwargs): 159 | if len(scores.shape) > 1: 160 | scores = scores[:, 1] 161 | return super(SigmoidCalibration, self).fit(scores, y, *args, **kwargs) 162 | 163 | def predict_proba(self, scores, *args, **kwargs): 164 | if len(scores.shape) > 1: 165 | scores = scores[:, 1] 166 | transformed = super(SigmoidCalibration, self).predict(scores, *args, 167 | **kwargs) 168 | return np.vstack((1 - transformed, transformed)).T 169 | 170 | def predict(self, *args, **kwargs): 171 | proba = self.predict_proba(*args, **kwargs) 172 | return proba.argmax(axis=1) 173 | 174 | 175 | class BinningCalibration(BaseEstimator, RegressorMixin): 176 | """Probability calibration with Binning calibration. 177 | 178 | Parameters 179 | ---------- 180 | n_bins: integer or list of integers 181 | If integer, the number of bins to create in the score space in order to compute the 182 | true fraction of positives during the training. 183 | If a list of integers, a BinningCalibration method will be fitted for 184 | each number of bins, and the best calibrator evaluated with the 185 | validation set will be selected as final calibrator. 186 | 187 | 188 | strategy: str {'uniform', 'quantile', 'kmeans'} 189 | If uniform: for equal width bins 190 | If quantile: for equal frequency bins 191 | If kmeans: for each bin with same nearest center to a 1D k-means 192 | 193 | alpha: float 194 | Laplace smoothing (x + a)/(N + 2a) 195 | 196 | Attributes 197 | ---------- 198 | classes_ : array, shape (n_classes) 199 | The class labels. 200 | 201 | calibrated_classifiers_: list (len() equal to cv or 1 if cv == "prefit") 202 | The list of calibrated classifiers, one for each cross-validation fold, 203 | which has been fitted on all but the validation fold and calibrated 204 | on the validation fold. 205 | 206 | References 207 | ---------- 208 | .. [1] Obtaining calibrated probability estimates from decision trees 209 | and naive Bayesian classifiers, B. Zadrozny & C. Elkan, ICML 2001 210 | """ 211 | _STRATEGIES = Literal["uniform", "quantile", "kmeans"] 212 | 213 | def __init__(self, n_bins=10, strategy: _STRATEGIES = 'uniform', alpha=1.0): 214 | self.strategy = strategy 215 | self.n_bins = n_bins 216 | self.n_bins_grid = n_bins 217 | self.alpha = alpha 218 | 219 | def fit(self, scores, y, X_val=None, y_val=None, *args, **kwargs): 220 | ''' 221 | Score=0 corresponds to y=0, and score=1 to y=1 222 | Parameters 223 | ---------- 224 | scores : array-like, shape = [n_samples,] 225 | Data. 226 | y : array-like, shape = [n_samples, ] 227 | Labels. 228 | Returns 229 | ------- 230 | self 231 | ''' 232 | if isinstance(self.n_bins, list): 233 | if X_val is None or y_val is None: 234 | raise ValueError(('If n_bins is a list, scores_val and y_val' 235 | 'are required during fit')) 236 | calibrators = [] 237 | losses = [] 238 | for n_bins in self.n_bins: 239 | cal = BinningCalibration(n_bins=n_bins, strategy=self.strategy, 240 | alpha=self.alpha) 241 | cal.fit(scores, y) 242 | predict = cal.predict_proba(X_val) 243 | losses.append(log_loss(y_val, predict)) 244 | calibrators.append(cal) 245 | best_idx = np.argmin(losses) 246 | self.n_bins = calibrators[best_idx].n_bins 247 | self.bins = calibrators[best_idx].bins 248 | self.predictions = calibrators[best_idx].predictions 249 | return self 250 | 251 | if len(np.shape(scores)) > 1: 252 | scores = scores[:, 1] 253 | # TODO check that this code is correct: 254 | if self.strategy == 'quantile': 255 | self.bins = np.sort(scores)[::int(np.ceil(len(scores) 256 | / self.n_bins))] 257 | self.bins = np.hstack([self.bins, scores[-1]]) 258 | elif self.strategy == 'uniform': 259 | self.bins = np.linspace(scores.min(), scores.max(), self.n_bins+1) 260 | else: 261 | raise ValueError('Strategy {} not implemented'.format( 262 | self.strategy)) 263 | self.bins[0] = - np.inf 264 | self.bins[-1] = np.inf 265 | s_binned = np.digitize(scores, self.bins) - 1 266 | self.predictions = np.zeros(self.n_bins) 267 | for i in range(self.n_bins): 268 | self.predictions[i] = ((np.sum(y[s_binned == i]) + self.alpha) 269 | / (np.sum(s_binned == i) + self.alpha*2)) 270 | 271 | return self 272 | 273 | def predict_proba(self, scores, *args, **kwargs): 274 | if len(np.shape(scores)) > 1: 275 | scores = scores[:, 1] 276 | s_binned = np.digitize(scores, self.bins) - 1 277 | transformed = self.predictions[s_binned] 278 | return np.vstack((1 - transformed, transformed)).T 279 | 280 | def predict(self, scores, *args, **kwargs): 281 | proba = self.predict_proba(scores, *args, **kwargs) 282 | return proba.argmax(axis=1) 283 | 284 | 285 | class CalibratedModel(BaseEstimator, ClassifierMixin): 286 | ''' Initialize a Calibrated model (classifier + calibrator) 287 | 288 | Parameters 289 | ---------- 290 | base_estimator : instance BaseEstimator 291 | The classifier whose output decision function needs to be calibrated 292 | to offer more accurate predict_proba outputs. If cv=prefit, the 293 | classifier must have been fit already on data. 294 | 295 | calibrator : instance BaseEstimator 296 | The calibrator to use. 297 | ''' 298 | def __init__(self, base_estimator=None, calibrator=None, 299 | fit_estimator=True): 300 | self.calibrator = clone(calibrator) 301 | self.base_estimator = deepcopy(base_estimator) 302 | self.fit_estimator = fit_estimator 303 | self.binary = False 304 | 305 | def fit(self, X, y, X_val=None, y_val=None, *args, **kwargs): 306 | """Fit the calibrated model 307 | 308 | Parameters 309 | ---------- 310 | X : array-like, shape (n_samples, n_features) 311 | Training data. 312 | 313 | y : array-like, shape (n_samples, n_classes) 314 | Target values. 315 | 316 | Returns 317 | ------- 318 | self : object 319 | Returns an instance of self. 320 | """ 321 | 322 | X, y = check_X_y(X, y, accept_sparse=['csc', 'csr', 'coo'], 323 | multi_output=True) 324 | X, y = indexable(X, y) 325 | 326 | if self.fit_estimator: 327 | self.base_estimator.fit(X, y) 328 | 329 | scores = self.base_estimator.predict_proba(X) 330 | 331 | if scores.shape[1] == 2: 332 | self.binary = True 333 | 334 | if self.binary: 335 | try: 336 | self.calibrator.fit(scores, y, *args, **kwargs) 337 | except ValueError: 338 | self.calibrator.fit(scores[:, 1], y, *args, **kwargs) 339 | else: 340 | self.calibrator.fit(scores, y, *args, **kwargs) 341 | 342 | return self 343 | 344 | def predict_proba(self, X): 345 | """Posterior probabilities of classification 346 | 347 | This function returns posterior probabilities of classification 348 | according to each class on an array of test vectors X. 349 | 350 | Parameters 351 | ---------- 352 | X : array-like, shape (n_samples, n_features) 353 | The samples. 354 | 355 | Returns 356 | ------- 357 | C : array, shape (n_samples, n_classes) 358 | The predicted probas. Can be exact zeros. 359 | """ 360 | 361 | scores = self.base_estimator.predict_proba(X) 362 | 363 | if self.binary: 364 | try: 365 | predictions = self.calibrator.predict_proba(scores) 366 | except ValueError: 367 | predictions = self.calibrator.predict_proba(scores[:, 1]) 368 | 369 | if (len(predictions.shape) == 1) or (predictions.shape[1] == 1): 370 | predictions = np.vstack((1 - predictions, predictions)).T 371 | else: 372 | predictions = self.calibrator.predict_proba(scores) 373 | 374 | return predictions 375 | 376 | def predict(self, X): 377 | """Predict the target of new samples. Can be different from the 378 | prediction of the uncalibrated classifier. 379 | 380 | Parameters 381 | ---------- 382 | X : array-like, shape (n_samples, n_features) 383 | The samples. 384 | 385 | Returns 386 | ------- 387 | C : array, shape (n_samples,) 388 | The predicted class. 389 | """ 390 | check_is_fitted(self, ["calibrator"]) 391 | 392 | return np.argmax(self.predict_proba(X), axis=1) 393 | -------------------------------------------------------------------------------- /pycalib/models/multiclass.py: -------------------------------------------------------------------------------- 1 | # All this code has been adapted from scikit-learn.sklearn.multiclass 2 | # The following is the COPYING clause from Scikit-learn 3 | # 4 | # BSD 3-Clause License 5 | # 6 | # Copyright (c) 2007-2020 The scikit-learn developers. 7 | # All rights reserved. 8 | # 9 | # Redistribution and use in source and binary forms, with or without 10 | # modification, are permitted provided that the following conditions are met: 11 | # 12 | # * Redistributions of source code must retain the above copyright notice, this 13 | # list of conditions and the following disclaimer. 14 | # 15 | # * Redistributions in binary form must reproduce the above copyright notice, 16 | # this list of conditions and the following disclaimer in the documentation 17 | # and/or other materials provided with the distribution. 18 | # 19 | # * Neither the name of the copyright holder nor the names of its 20 | # contributors may be used to endorse or promote products derived from 21 | # this software without specific prior written permission. 22 | # 23 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 24 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 25 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 26 | # ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 27 | # LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 28 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 29 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 30 | # INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 31 | # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 32 | # ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 33 | # POSSIBILITY OF SUCH DAMAGE. 34 | import inspect 35 | import numpy as np 36 | 37 | from sklearn.base import BaseEstimator, ClassifierMixin, clone 38 | from sklearn.preprocessing import LabelBinarizer 39 | 40 | from joblib import Parallel 41 | from joblib import delayed 42 | 43 | from sklearn.multiclass import _ConstantPredictor 44 | 45 | from sklearn.utils.metaestimators import if_delegate_has_method 46 | from sklearn.utils.validation import check_is_fitted 47 | 48 | import warnings 49 | 50 | 51 | def _fit_binary(estimator, X, y, X_val=None, y_val=None, classes=None): 52 | """Fit a single binary estimator.""" 53 | unique_y = np.unique(y) 54 | if len(unique_y) == 1: 55 | if classes is not None: 56 | if y[0] == -1: 57 | c = 0 58 | else: 59 | c = y[0] 60 | warnings.warn("Label %s is present in all training examples." % 61 | str(classes[c])) 62 | estimator = _ConstantPredictor().fit(X, unique_y) 63 | else: 64 | estimator = clone(estimator) 65 | if X_val is not None and y_val is not None: 66 | estimator.fit(X, y, X_val=X_val, y_val=y_val) 67 | else: 68 | estimator.fit(X, y) 69 | return estimator 70 | 71 | 72 | class OneVsRestCalibrator(BaseEstimator, ClassifierMixin): 73 | """One-vs-the-rest (OvR) multiclass/multilabel strategy 74 | 75 | Also known as one-vs-all, this strategy consists in fitting one calibrator 76 | per class. For each classifier, the class is fitted against all the other 77 | classes. In addition to its computational efficiency (only `n_classes` 78 | classifiers are needed), one advantage of this approach is its 79 | interpretability. Since each class is represented by one and one classifier 80 | only, it is possible to gain knowledge about the class by inspecting its 81 | corresponding classifier. This is the most commonly used strategy for 82 | multiclass classification and is a fair default choice. 83 | This strategy can also be used for multilabel learning, where a classifier 84 | is used to predict multiple labels for instance, by fitting on a 2-d matrix 85 | in which cell [i, j] is 1 if sample i has label j and 0 otherwise. 86 | In the multilabel learning literature, OvR is also known as the binary 87 | relevance method. 88 | Read more in the :ref:`User Guide `. 89 | Parameters 90 | ---------- 91 | estimator : estimator object 92 | An estimator object implementing `fit` and one of `decision_function` 93 | or `predict_proba`. 94 | n_jobs : int, optional, default: 1 95 | The number of jobs to use for the computation. If -1 all CPUs are used. 96 | If 1 is given, no parallel computing code is used at all, which is 97 | useful for debugging. For n_jobs below -1, (n_cpus + 1 + n_jobs) are 98 | used. Thus for n_jobs = -2, all CPUs but one are used. 99 | Attributes 100 | ---------- 101 | estimators_ : list of `n_classes` estimators 102 | Estimators used for predictions. 103 | classes_ : array, shape = [`n_classes`] 104 | Class labels. 105 | label_binarizer_ : LabelBinarizer object 106 | Object used to transform multiclass labels to binary labels and 107 | vice-versa. 108 | multilabel_ : boolean 109 | Whether a OneVsRestClassifier is a multilabel classifier. 110 | """ 111 | def __init__(self, estimator, n_jobs=1, normalize=True): 112 | self.estimator = estimator 113 | self.n_jobs = n_jobs 114 | self.normalize = normalize 115 | 116 | def fit(self, X, y, X_val=None, y_val=None, **kwargs): 117 | """Fit underlying estimators. 118 | 119 | If the number of classes = 2, only one model is trained to predict the 120 | class 1 (second column) 121 | Parameters 122 | ---------- 123 | X : (sparse) array-like, shape = [n_samples, n_classes] 124 | Data. 125 | y : (sparse) array-like, shape = [n_samples, ], [n_samples, n_classes] 126 | Multi-class targets. An indicator matrix turns on multilabel 127 | classification. 128 | Returns 129 | ------- 130 | self 131 | """ 132 | # A sparse LabelBinarizer, with sparse_output=True, has been shown to 133 | # outpreform or match a dense label binarizer in all cases and has also 134 | # resulted in less or equal memory consumption in the fit_ovr function 135 | # overall. 136 | if X.shape[1] == 2: 137 | x_columns = (X[:, 1].ravel().T, ) 138 | else: 139 | x_columns = (col.ravel() for col in X.T) 140 | 141 | self.label_binarizer_ = LabelBinarizer(sparse_output=True) 142 | Y = self.label_binarizer_.fit_transform(y) 143 | Y = Y.tocsc() 144 | self.classes_ = self.label_binarizer_.classes_ 145 | y_columns = (col.toarray().ravel() for col in Y.T) 146 | 147 | if 'X_val' in inspect.getargspec(self.estimator.fit).args \ 148 | and X_val is not None: 149 | if X_val.shape[1] == 2: 150 | x_val_columns = (X_val[:, 1].ravel().T, ) 151 | else: 152 | x_val_columns = (col.ravel() for col in X_val.T) 153 | 154 | Y_val = self.label_binarizer_.transform(y_val) 155 | Y_val = Y_val.tocsc() 156 | y_val_columns = (col.toarray().ravel() for col in Y_val.T) 157 | else: 158 | x_val_columns = [None]*np.shape(Y)[0] 159 | y_val_columns = [None]*np.shape(Y)[0] 160 | 161 | # In cases where individual estimators are very fast to train setting 162 | # n_jobs > 1 in can results in slower performance due to the overhead 163 | # of spawning threads. See joblib issue #112. 164 | self.estimators_ = Parallel(n_jobs=self.n_jobs)(delayed(_fit_binary)( 165 | self.estimator, x_col, y_col, x_val_col, y_val_col, 166 | classes=["not %s" % self.label_binarizer_.classes_[i], 167 | self.label_binarizer_.classes_[i]]) 168 | for i, (x_col, y_col, x_val_col, y_val_col) in enumerate( 169 | zip(x_columns, y_columns, x_val_columns, y_val_columns))) 170 | 171 | return self 172 | 173 | @if_delegate_has_method(['_first_estimator', 'estimator']) 174 | def predict_proba(self, X): 175 | """Probability estimates. 176 | The returned estimates for all classes are ordered by label of classes. 177 | Note that in the multilabel case, each sample can have any number of 178 | labels. This returns the marginal probability that the given sample has 179 | the label in question. For example, it is entirely consistent that two 180 | labels both have a 90% probability of applying to a given sample. 181 | In the single label multiclass case, the rows of the returned matrix 182 | sum to 1. 183 | Parameters 184 | ---------- 185 | X : array-like, shape = [n_samples, n_features] 186 | Returns 187 | ------- 188 | T : (sparse) array-like, shape = [n_samples, n_classes] 189 | Returns the probability of the sample for each class in the model, 190 | where classes are ordered as they are in `self.classes_`. 191 | """ 192 | check_is_fitted(self, 'estimators_') 193 | # Y[i, j] gives the probability that sample i has the label j. 194 | # In the multi-label case, these are not disjoint. 195 | if X.shape[1] == 2: 196 | x_columns = (X[:, 1].ravel().T, ) 197 | else: 198 | x_columns = (col.ravel() for col in X.T) 199 | 200 | # Removed indexing as follows: e.predict_proba(x_column)[:, 1] 201 | Y = np.array([e.predict_proba(x_column) 202 | for (e, x_column) in zip(self.estimators_, x_columns)]).T 203 | 204 | if len(self.estimators_) == 1: 205 | # Only one estimator, but we still want to return probabilities 206 | # for two classes. 207 | Y = np.concatenate(((1 - Y), Y), axis=1) 208 | 209 | if not self.multilabel_: 210 | # Then, probabilities should be normalized to 1. 211 | Y /= np.sum(Y, axis=1)[:, np.newaxis] 212 | # Change all columns to zero for a uniform prediction 213 | Y[np.isnan(Y)] = 1/Y.shape[1] 214 | 215 | return Y 216 | 217 | @property 218 | def multilabel_(self): 219 | """Whether this is a multilabel classifier""" 220 | return self.label_binarizer_.y_type_.startswith('multilabel') 221 | -------------------------------------------------------------------------------- /pycalib/stats.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import numpy as np 3 | import pandas as pd 4 | 5 | from functools import partial 6 | from scipy.stats import ranksums 7 | from scipy.stats import mannwhitneyu 8 | from scipy.stats import friedmanchisquare 9 | 10 | 11 | TestResult = namedtuple("TestResult", ["statistic", "p_value"]) 12 | 13 | 14 | def compute_friedmanchisquare(table: pd.DataFrame) -> TestResult: 15 | """ Compute Friedman test for repeated samples 16 | 17 | Example: 18 | - n wine judges each rate k different wines. Are any of the k wines 19 | ranked consistently higher or lower than the others? 20 | 21 | Our Calibration case: 22 | - n datasets each rate k different calibration methods. Are any of the 23 | k calibration methods ranked consistently higher or lower than the 24 | others? 25 | 26 | This will output a statistic and a p-value 27 | SciPy does the following: 28 | - k: is the number of parameters passed to the function 29 | - n: is the length of each array passed to the function 30 | The two options for the given table are: 31 | - k is the datasets: table['mean'].values.tolist() 32 | - k is the calibration methods: table['mean'].T.values.tolist() 33 | """ 34 | if table.shape[1] < 3: 35 | print('Friedman test not appropriate for less than 3 methods') 36 | return TestResult(np.nan, np.nan) 37 | 38 | statistic, p = friedmanchisquare(*table.T.values) 39 | return TestResult(statistic, p) 40 | 41 | 42 | def paired_test(table, stats_func=ranksums): 43 | measure = table.columns.levels[0].values[0] 44 | pvalues = np.zeros((table.columns.shape[0], table.columns.shape[0])) 45 | statistics = np.zeros_like(pvalues) 46 | for i, method_i in enumerate(table.columns.levels[1]): 47 | for j, method_j in enumerate(table.columns.levels[1]): 48 | sample_i = table[measure, method_i] 49 | sample_j = table[measure, method_j] 50 | statistic, pvalue = stats_func(sample_i, sample_j) 51 | pvalues[i, j] = pvalue 52 | statistics[i, j] = statistic 53 | index = pd.MultiIndex.from_product([table.columns.levels[1], 54 | ['statistic']]) 55 | df_statistics = pd.DataFrame(statistics, 56 | index=table.columns.levels[1], 57 | columns=index) 58 | index = pd.MultiIndex.from_product([table.columns.levels[1], 59 | ['pvalue']]) 60 | df_pvalues = pd.DataFrame(pvalues, 61 | index=table.columns.levels[1], 62 | columns=index) 63 | return df_statistics.join(df_pvalues) 64 | 65 | 66 | def compute_ranksums(table): 67 | return paired_test(table, stats_func=ranksums) 68 | 69 | 70 | def compute_mannwhitneyu(table): 71 | return paired_test(table, stats_func=partial(mannwhitneyu, 72 | alternative='less')) 73 | -------------------------------------------------------------------------------- /pycalib/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/classifier-calibration/PyCalib/8208ab907d5b9c5149b2d45b1c8b6e4b2d763317/pycalib/tests/__init__.py -------------------------------------------------------------------------------- /pycalib/tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/classifier-calibration/PyCalib/8208ab907d5b9c5149b2d45b1c8b6e4b2d763317/pycalib/tests/models/__init__.py -------------------------------------------------------------------------------- /pycalib/tests/models/test_init.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from sklearn.linear_model import LogisticRegression 4 | from sklearn.datasets import make_blobs 5 | from pycalib.models import (IsotonicCalibration, LogisticCalibration, 6 | BinningCalibration, SigmoidCalibration, 7 | CalibratedModel) 8 | from numpy.testing import assert_array_equal 9 | 10 | 11 | class TestIsotonicCalibration(unittest.TestCase): 12 | def test_fit_predict(self): 13 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 14 | Y = np.array([1, 0]) 15 | cal = IsotonicCalibration() 16 | cal.fit(S, Y) 17 | pred = cal.predict(S) 18 | assert_array_equal(Y, pred) 19 | 20 | 21 | class TestLogisticCalibration(unittest.TestCase): 22 | def test_fit_predict(self): 23 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 24 | Y = np.array([1, 0]) 25 | cal = LogisticCalibration() 26 | cal.fit(S, Y) 27 | pred = cal.predict(S) 28 | assert_array_equal(Y, pred) 29 | 30 | 31 | class TestBinningCalibration(unittest.TestCase): 32 | def test_fit_predict(self): 33 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 34 | Y = np.array([1, 0]) 35 | cal = BinningCalibration() 36 | cal.fit(S, Y) 37 | pred = cal.predict(S) 38 | assert_array_equal(Y, pred) 39 | 40 | 41 | class TestSigmoidCalibration(unittest.TestCase): 42 | def test_fit_predict(self): 43 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 44 | Y = np.array([1, 0]) 45 | cal = SigmoidCalibration() 46 | cal.fit(S, Y) 47 | pred = cal.predict(S) 48 | assert_array_equal(Y, pred) 49 | 50 | 51 | class TestCalibratedModel(unittest.TestCase): 52 | def test_fit_predict(self): 53 | X, Y = make_blobs(n_samples=10000, centers=5, n_features=2, 54 | random_state=42) 55 | Y = (Y > 2).astype(int) 56 | cal = CalibratedModel(LogisticRegression(), IsotonicCalibration()) 57 | cal.fit(X, Y) 58 | 59 | pred = cal.predict(X) 60 | self.assertGreater(np.mean(Y == pred), 0.7) 61 | 62 | 63 | def main(): 64 | unittest.main() 65 | 66 | 67 | if __name__ == '__main__': 68 | main() 69 | -------------------------------------------------------------------------------- /pycalib/tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy as np 3 | from functools import partial 4 | from pycalib.metrics import (accuracy, cross_entropy, brier_score, 5 | binary_ECE, conf_ECE, classwise_ECE, full_ECE, 6 | MCE, pECE) 7 | 8 | from sklearn.preprocessing import label_binarize 9 | 10 | 11 | # TODO add more test cases 12 | class TestFunctions(unittest.TestCase): 13 | def test_accuracy(self): 14 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 15 | Y = np.array([[0, 1], [0, 1]]) 16 | acc = accuracy(Y, S) 17 | self.assertAlmostEqual(acc, 0.5) 18 | 19 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 20 | Y = np.array([[1, 0], [0, 1]]) 21 | acc = accuracy(Y, S) 22 | self.assertAlmostEqual(acc, 0.0) 23 | 24 | def test_cross_entropy(self): 25 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 26 | Y = np.array([[0, 1], [0, 1]]) 27 | ce = cross_entropy(Y, S) 28 | expected = - (np.log(0.9) + np.log(0.4))/2 29 | self.assertAlmostEqual(ce, expected) 30 | 31 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 32 | Y = np.array([[1, 0], [0, 1]]) 33 | ce = cross_entropy(Y, S) 34 | expected = - (np.log(0.1) + np.log(0.4))/2 35 | self.assertAlmostEqual(ce, expected) 36 | 37 | def test_brier_score(self): 38 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 39 | Y = np.array([[0, 1], [0, 1]]) 40 | bs = brier_score(Y, S) 41 | expected = np.mean(np.abs(S - Y)**2) 42 | self.assertAlmostEqual(bs, expected) 43 | 44 | S = np.array([[0.1, 0.9], [0.6, 0.4]]) 45 | Y = np.array([[1, 0], [0, 1]]) 46 | bs = brier_score(Y, S) 47 | expected = np.mean(np.abs(S - Y)**2) 48 | self.assertAlmostEqual(bs, expected) 49 | 50 | def test_binary_ece(self): 51 | S = np.array([.6, .6, .6, .6, .6, .6, .6, .6, .6, .6]) 52 | y = np.array([1, 1, 1, 1, 1, 1, 0, 0, 0, 0]) 53 | ece = binary_ECE(y, S) 54 | self.assertAlmostEqual(ece, 0) 55 | 56 | def test_conf_ece(self): 57 | S = np.array([[0.6, 0.2, 0.2]]*10) 58 | y = [0, 0, 0, 0, 0, 0, 1, 1, 2, 2] 59 | Y = label_binarize(y, classes=range(3)) 60 | cece = conf_ECE(Y, S) 61 | self.assertAlmostEqual(cece, 0) 62 | # TODO Add more tests 63 | 64 | def test_classwise_ece(self): 65 | S = np.array([[0.6, 0.2, 0.2]]*10) 66 | Y = label_binarize([0, 0, 0, 0, 0, 0, 1, 1, 2, 2], classes=range(3)) 67 | ece = classwise_ECE(Y, S) 68 | self.assertAlmostEqual(ece, 0) 69 | # TODO Add more tests 70 | 71 | def test_full_ece(self): 72 | S = np.array([[0.6, 0.2, 0.2]]*10) 73 | Y = label_binarize([0, 0, 0, 0, 0, 0, 1, 1, 2, 2], classes=range(3)) 74 | ece = full_ECE(Y, S) 75 | self.assertAlmostEqual(ece, 0) 76 | # TODO Add more tests 77 | 78 | def test_conf_mce(self): 79 | S = np.ones((2, 3))/3.0 80 | y = np.array([0, 0]) 81 | mce = MCE(y, S) 82 | self.assertAlmostEqual(mce, 2.0/3) 83 | 84 | y = np.array([0, 1, 2]) 85 | S = np.array([[1/3, 0.3, 0.3], 86 | [1/3, 0.3, 0.3], 87 | [1/3, 0.3, 0.3]]) 88 | mce = MCE(y, S) 89 | self.assertAlmostEqual(mce, 0.0) 90 | 91 | y = np.array([0, 1, 2]) 92 | S = np.array([[0.3, 1/3, 0.3], 93 | [0.3, 1/3, 0.3], 94 | [0.3, 1/3, 0.3]]) 95 | mce = MCE(y, S) 96 | self.assertAlmostEqual(mce, 0.0) 97 | 98 | y = np.array([0, 1, 2]) 99 | S = np.array([[0.3, 0.3, 1/3], 100 | [0.3, 0.3, 1/3], 101 | [0.3, 0.3, 1/3]]) 102 | mce = MCE(y, S) 103 | self.assertAlmostEqual(mce, 0.0) 104 | 105 | Y = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) 106 | S = np.array([[0.3, 0.3, 1/3], 107 | [0.3, 0.3, 1/3], 108 | [0.3, 0.3, 1/3]]) 109 | mce = MCE(Y, S) 110 | self.assertAlmostEqual(mce, 0.0) 111 | 112 | Y = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], 113 | [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]]) 114 | S = np.array([[0.4, 0.3, 0.3], # correct 115 | [0.3, 0.4, 0.3], # incorrect 116 | [0.3, 0.3, 0.4], # incorrect 117 | [0.3, 0.3, 0.4], # incorrect 118 | 119 | [0.1, 0.7, 0.2], # incorrect mean conf 0.75 120 | [0.2, 0.1, 0.7], # incorrect 121 | [0.2, 0.8, 0.2], # incorrect 122 | [0.8, 0.1, 0.1] # incorrect 123 | ]) 124 | mce = MCE(Y, S, bins=2) 125 | self.assertEqual(mce, 0.75) 126 | 127 | Y = np.array([[1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0], 128 | [1, 0, 0], [1, 0, 0], [1, 0, 0], [0, 1, 0]]) 129 | S = np.array([[0.4, 0.3, 0.3], # correct # conf 0.4 130 | [0.3, 0.4, 0.3], # incorrect 131 | [0.3, 0.3, 0.4], # incorrect 132 | [0.3, 0.3, 0.4], # incorrect 133 | 134 | [0.1, 0.7, 0.2], # incorrect 135 | [0.7, 0.1, 0.2], # correct 136 | [0.8, 0.0, 0.2], # correct 137 | [0.1, 0.8, 0.1] # correct 138 | ]) 139 | mce = MCE(Y, S, bins=2) 140 | self.assertAlmostEqual(mce, 0.4 - 1/4) 141 | 142 | def test_calibrated_p_ece(self): 143 | p = np.random.rand(5000, 3) 144 | p /= p.sum(axis=1)[:, None] 145 | multinomial = partial(np.random.multinomial, 1) 146 | y = np.apply_along_axis(multinomial, 1, p) 147 | calibrated_pECE = pECE(y, p, samples=2000, ece_function=classwise_ECE) 148 | # FIXME Reduce computation and increase threshold to 0.04 149 | self.assertGreater(calibrated_pECE, 0.02) 150 | calibrated_pECE = pECE(y, p, samples=2000, ece_function=conf_ECE) 151 | # FIXME Reduce computation and increase threshold to 0.04 152 | self.assertGreater(calibrated_pECE, 0.02) 153 | 154 | def test_uncalibrated_p_ece(self): 155 | p = np.random.rand(1000, 3) 156 | p /= p.sum(axis=1)[:, None] 157 | y = np.eye(3)[np.random.choice([0, 1, 2], size=p.shape[0])] 158 | uncalibrated_pECE = pECE(y, p, samples=1000, 159 | ece_function=classwise_ECE) 160 | self.assertLess(uncalibrated_pECE, 0.04) 161 | uncalibrated_pECE = pECE(y, p, samples=1000, ece_function=conf_ECE) 162 | self.assertLess(uncalibrated_pECE, 0.04) 163 | 164 | 165 | def main(): 166 | unittest.main() 167 | 168 | 169 | if __name__ == '__main__': 170 | main() 171 | -------------------------------------------------------------------------------- /pycalib/utils.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | 5 | def multiindex_to_strings(index): 6 | if isinstance(index, pd.MultiIndex): 7 | return [' '.join(col).strip() for col in index.values] 8 | return [''.join(col).strip() for col in index.values] 9 | 10 | 11 | def df_normalise(df, columns=True): 12 | ''' 13 | rows: bool 14 | Normalize each column to sum to one, or each row to sum to one 15 | ''' 16 | if columns: 17 | return df/df.sum(axis=0) 18 | return (df.T/df.sum(axis=1)).T 19 | 20 | 21 | def get_binned_scores(labels, scores, bins=10): 22 | ''' 23 | Parameters 24 | ========== 25 | labels : array (n_samples, ) 26 | Labels indicating the true class. 27 | scores : matrix (n_samples, ) 28 | Output probability scores for one or several methods. 29 | bins : int or list of floats 30 | Number of bins to create in the scores' space, or list of bin 31 | boundaries. 32 | ''' 33 | if isinstance(bins, int): 34 | n_bins = bins 35 | bins = np.linspace(0, 1 + 1e-8, n_bins + 1) 36 | elif isinstance(bins, list) or isinstance(bins, np.ndarray): 37 | n_bins = len(bins) - 1 38 | bins = np.array(bins) 39 | if bins[0] == 0.0: 40 | bins[0] = 0 - 1e-8 41 | if bins[-1] == 1.0: 42 | bins[-1] = 1 + 1e-8 43 | 44 | scores = np.clip(scores, a_min=0, a_max=1) 45 | 46 | bin_idx = np.digitize(scores, bins) - 1 47 | 48 | bin_true = np.bincount(bin_idx, weights=labels, 49 | minlength=n_bins) 50 | bin_pred = np.bincount(bin_idx, weights=scores, 51 | minlength=n_bins) 52 | bin_total = np.bincount(bin_idx, minlength=n_bins) 53 | 54 | zero_idx = bin_total == 0 55 | avg_true = np.empty(bin_total.shape[0]) 56 | avg_true.fill(np.nan) 57 | avg_true[~zero_idx] = np.divide(bin_true[~zero_idx], 58 | bin_total[~zero_idx]) 59 | avg_pred = np.empty(bin_total.shape[0]) 60 | avg_pred.fill(np.nan) 61 | avg_pred[~zero_idx] = np.divide(bin_pred[~zero_idx], 62 | bin_total[~zero_idx]) 63 | return avg_true, avg_pred, bin_true, bin_total 64 | -------------------------------------------------------------------------------- /pycalib/visualisations/__init__.py: -------------------------------------------------------------------------------- 1 | from .plot import (plot_reliability_diagram_precomputed, 2 | plot_reliability_diagram, 3 | plot_binary_reliability_diagram_gaps, 4 | plot_multiclass_reliability_diagram_gaps, 5 | plot_confusion_matrix, 6 | plot_individual_pdfs, 7 | plot_critical_difference, 8 | plot_df_to_heatmap, 9 | plot_calibration_map) 10 | -------------------------------------------------------------------------------- /pycalib/visualisations/barycentric.py: -------------------------------------------------------------------------------- 1 | # Code is an adaptation from 2 | # http://blog.bogatron.net/blog/2014/02/02/visualizing-dirichlet-distributions/ 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | 7 | import matplotlib.tri as tri 8 | from matplotlib import ticker 9 | 10 | 11 | def xy2bc(xy, tol=1.e-32): 12 | '''Converts 2D Cartesian coordinates to barycentric.''' 13 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 14 | # Mid-points of triangle sides opposite of each corner 15 | midpoints = [(corners[(i + 1) % 3] + corners[(i + 2) % 3]) / 2.0 16 | for i in range(3)] 17 | 18 | s = [(corners[i] - midpoints[i]).dot(xy - midpoints[i]) / 0.75 19 | for i in range(3)] 20 | return np.clip(s, tol, 1.0 - tol) 21 | 22 | 23 | def bc2xy(pvalues, corners): 24 | return np.dot(pvalues, corners) 25 | 26 | 27 | def draw_tri_samples(pvals, classes, labels=None, fig=None, ax=None, 28 | handles=None, grid=True, **kwargs): 29 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 30 | pvals = pvals[:, :3].copy() 31 | 32 | if fig is None: 33 | fig = plt.figure() 34 | if ax is None: 35 | ax = fig.add_subplot(111) 36 | 37 | if labels is None: 38 | labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))] 39 | center = corners.mean(axis=0) 40 | for i, corner in enumerate(corners): 41 | text_x, text_y = corner - (center - corner)*0.1 42 | ax.text(text_x, text_y, labels[i], verticalalignment='center', 43 | horizontalalignment='center') 44 | 45 | xy = bc2xy(pvals, corners) 46 | ax.scatter(xy[:, 0], xy[:, 1], c=classes, **kwargs) 47 | 48 | if handles is not None: 49 | ax.legend(handles=handles) 50 | 51 | ax.axis('equal') 52 | ax.set_xlim(0, 1) 53 | ax.set_ylim(0, 0.75**0.5) 54 | ax.set_xbound(lower=-0.01, upper=1.01) 55 | ax.set_ybound(lower=-0.01, upper=(0.75**0.5)+0.01) 56 | ax.axis('off') 57 | 58 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 59 | 60 | if grid: 61 | refiner = tri.UniformTriRefiner(triangle) 62 | trimesh = refiner.refine_triangulation(subdiv=4) 63 | ax.triplot(trimesh, c='gray', lw=0.2) 64 | 65 | ax.triplot(triangle, c='k', lw=0.5) 66 | 67 | 68 | def get_func_mesh_values(func, subdiv=8): 69 | ''' 70 | Gets the values returned by the function func in a triangular mesh grid 71 | ''' 72 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 73 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 74 | 75 | refiner = tri.UniformTriRefiner(triangle) 76 | trimesh = refiner.refine_triangulation(subdiv=subdiv) 77 | vals = np.array([func(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)]) 78 | return vals 79 | 80 | 81 | def get_mesh_xy(subdiv=8): 82 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 83 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 84 | 85 | refiner = tri.UniformTriRefiner(triangle) 86 | trimesh = refiner.refine_triangulation(subdiv=subdiv) 87 | return zip(trimesh.x, trimesh.y) 88 | 89 | 90 | def get_mesh_bc(**kwargs): 91 | mesh_xy = get_mesh_xy(**kwargs) 92 | mesh_bc = np.array([xy2bc(xy) for xy in mesh_xy]) 93 | return mesh_bc 94 | 95 | 96 | def draw_pdf_contours(dist, **kwargs): 97 | draw_func_contours(dist.pdf, **kwargs) 98 | 99 | 100 | # TODO Speed up function. 101 | def draw_func_contours(func, labels=None, nlevels=200, subdiv=8, fig=None, 102 | ax=None, grid=True, **kwargs): 103 | ''' 104 | Parameters: 105 | ----------- 106 | labels: None, string or list of strings 107 | If labels == 'auto' it shows the class number on each corner 108 | If labels is a list of strings it shows each string in the 109 | corresponding corner 110 | If None does not show any label 111 | ''' 112 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 113 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 114 | 115 | refiner = tri.UniformTriRefiner(triangle) 116 | trimesh = refiner.refine_triangulation(subdiv=subdiv) 117 | 118 | z = np.array([func(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)]) 119 | 120 | if fig is None: 121 | fig = plt.figure() 122 | if ax is None: 123 | ax = fig.add_subplot(111) 124 | 125 | # FIXME I would like the following line to work, but the max value is not 126 | # shown. I had to do create manually the levels and increase the max value 127 | # by an epsilon. This could be a major problem if the epsilon is not small 128 | # for the original range of values 129 | # contour = ax.tricontourf(trimesh, z, nlevels, **kwargs) 130 | # contour = ax.tricontourf(trimesh, z, nlevels, extend='both') 131 | is_nan = ~np.isfinite(z) 132 | # z[is_nan] = 0 133 | nan_id = np.where(is_nan)[0] 134 | triangles_mask = np.zeros(trimesh.triangles.shape[0]) 135 | for ni in nan_id: 136 | for i in range(trimesh.triangles.shape[0]): 137 | if ni in trimesh.triangles[i]: 138 | triangles_mask[i] = 1 139 | trimesh.set_mask(triangles_mask) 140 | if not np.all(triangles_mask): 141 | contour = ax.tricontourf(trimesh, z, 142 | levels=np.linspace(z[~is_nan].min(), 143 | z[~is_nan].max()+1e-9, 144 | nlevels), 145 | **kwargs) 146 | 147 | # Colorbar 148 | cb = fig.colorbar(contour, ax=ax, fraction=0.1, 149 | orientation='horizontal') 150 | tick_locator = ticker.MaxNLocator(nbins=5) 151 | cb.locator = tick_locator 152 | # cb.ax.xaxis.set_major_locator(ticker.AutoLocator()) 153 | cb.update_ticks() 154 | 155 | if labels is not None: 156 | if labels == 'auto': 157 | labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))] 158 | center = corners.mean(axis=0) 159 | for i, corner in enumerate(corners): 160 | text_x, text_y = corner - (center - corner)*0.1 161 | ax.text(text_x, text_y, labels[i], verticalalignment='center', 162 | horizontalalignment='center') 163 | 164 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 165 | 166 | if grid: 167 | refiner = tri.UniformTriRefiner(triangle) 168 | trimesh = refiner.refine_triangulation(subdiv=4) 169 | ax.triplot(trimesh, c='gray', lw=0.2) 170 | 171 | ax.triplot(triangle, c='k', lw=0.8) 172 | 173 | # Axes options 174 | ax.set_xlim(xmin=0, xmax=1) 175 | ax.set_ylim(ymin=0, ymax=0.75**0.5) 176 | ax.set_xbound(lower=0, upper=1) 177 | ax.set_ybound(lower=0, upper=0.75**0.5) 178 | ax.axis('equal') 179 | ax.axis('off') 180 | plt.gca().set_adjustable("box") 181 | 182 | 183 | def plot_individual_pdfs(class_dist, *args, **kwargs): 184 | fig = plt.figure(figsize=(16, 5)) 185 | for i, (p, d) in enumerate(zip(class_dist.priors, 186 | class_dist.distributions)): 187 | ax = fig.add_subplot(1, len(class_dist.distributions), i+1) 188 | ax.set_title('$P(Y={})={}$\n$\\mathcal{{D}}_{}(\\alpha={})$'.format( 189 | i+1, p, i+1, str(d)), loc='left') 190 | draw_pdf_contours(d, labels='auto', fig=fig, ax=ax, *args, **kwargs) 191 | return fig 192 | 193 | 194 | # FIXME remove pandas dependency from this function 195 | # def plot_marginal(func, mesh, c, ax1, ax2): 196 | # values = np.array([func(bc) for bc in mesh]).reshape(-1, 1) 197 | # df = pd.DataFrame(np.concatenate((mesh, values), axis=1), 198 | # df.plot(kind='scatter', x=c, y='P', alpha=0.1, ax=ax1) 199 | # ax2.set_title('Class {} marginal'.format(c)) 200 | # table = df.pivot_table(index=c, values='P') 201 | # table.reset_index(inplace=True) 202 | # table.columns = [c, 'P'] 203 | # table.plot(kind='scatter', x=c, y='P', alpha=0.2, ax=ax2) 204 | 205 | 206 | def plot_converging_lines_pvalues(func, lines, i, ax): 207 | ''' 208 | Plots the probability values of the given function for each given line. 209 | The i indicates the class index from 0 to 2 210 | ''' 211 | # This orders the classes in the following manner: 212 | # C1, C2, C3 213 | # C2, C3, C1 214 | # C3, C1, C2 215 | classes = np.roll(np.array([0, 1, 2]), -i) 216 | 217 | for j, line in enumerate(lines): 218 | pvalues = np.array([func(p) for p in line]).flatten() 219 | if len(lines) == 1: 220 | label = r'$C_{} = 1/2, C_{} = 1/2$'.format( 221 | classes[1]+1, classes[2]+1) 222 | else: 223 | label = r'$C_{} = {}/{}, C_{} = {}/{}$'.format( 224 | classes[1]+1, j, len(lines)-1, 225 | classes[2]+1, len(lines)-j-1, len(lines)-1) 226 | ax.plot(line[:, i], pvalues, label=label) 227 | ax.legend() 228 | 229 | 230 | def draw_calibration_map(original_p, calibrated_p, labels=None, fig=None, 231 | ax=None, handles=None, subdiv=5, color=None, 232 | **kwargs): 233 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 234 | original_p = original_p[:, :3].copy() 235 | calibrated_p = calibrated_p[:, :3].copy() 236 | 237 | if fig is None: 238 | fig = plt.figure() 239 | if ax is None: 240 | ax = fig.add_subplot(111) 241 | 242 | if labels is None: 243 | labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))] 244 | center = corners.mean(axis=0) 245 | for i, corner in enumerate(corners): 246 | text_x, text_y = corner - (center - corner)*0.1 247 | ax.text(text_x, text_y, labels[i], verticalalignment='center', 248 | horizontalalignment='center') 249 | 250 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 251 | ax.triplot(triangle, c='k', lw=0.8, zorder=2) 252 | 253 | refiner = tri.UniformTriRefiner(triangle) 254 | trimesh = refiner.refine_triangulation(subdiv=subdiv) 255 | ax.triplot(trimesh, c='gray', lw=0.2, zorder=1) 256 | 257 | o_xy = bc2xy(original_p, corners) 258 | c_xy = bc2xy(calibrated_p, corners) - o_xy 259 | # ax.scatter(xy[:, 0], xy[:, 1], **kwargs) 260 | ax.quiver(o_xy[:, 0], o_xy[:, 1], c_xy[:, 0], c_xy[:, 1], scale=1, 261 | color=color, angles='xy', zorder=3, **kwargs) 262 | 263 | if handles is not None: 264 | ax.legend(handles=handles) 265 | 266 | ax.axis('equal') 267 | ax.set_xlim(0, 1) 268 | ax.set_ylim(0, 0.75**0.5) 269 | ax.set_xbound(lower=-0.01, upper=1.01) 270 | ax.set_ybound(lower=-0.01, upper=(0.75**0.5)+0.01) 271 | ax.axis('off') 272 | 273 | return fig 274 | -------------------------------------------------------------------------------- /pycalib/visualisations/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import itertools 3 | 4 | import matplotlib.pyplot as plt 5 | import matplotlib.ticker as mticker 6 | from matplotlib.ticker import MaxNLocator 7 | from mpl_toolkits.axes_grid1 import make_axes_locatable 8 | 9 | from sklearn.preprocessing import OneHotEncoder 10 | from sklearn.preprocessing import label_binarize 11 | 12 | from statsmodels.stats.proportion import proportion_confint 13 | 14 | from matplotlib import gridspec 15 | 16 | from pycalib.utils import (df_normalise, multiindex_to_strings, 17 | get_binned_scores) 18 | 19 | 20 | def plot_reliability_diagram_precomputed(avg_true, avg_pred, 21 | legend=None, 22 | class_names=None, 23 | fig=None, 24 | fmt='s-', 25 | show_correction=False, 26 | show_gaps=False, 27 | color_list=None, 28 | color_gaps='lightcoral'): 29 | """ Plots the reliability diagram for precomputed averaged scores and labels 30 | 31 | NOTE: This function is currently a copy from plot_reliability_diagram and 32 | modified to accept average scores and true proportions. In the future both 33 | functions may be merged or share common private functions. 34 | Parameters 35 | ========== 36 | avg_true : matrix (n_bins, n_classes) or list of matrices 37 | True proportions per class. 38 | avg_pred : matrix (n_bins, n_classes) or list of matrices 39 | Output probability scores for one or several methods. 40 | legend : list of strings or None 41 | Text to use for the legend. 42 | bins : int or list of floats 43 | Number of bins to create in the scores' space, or list of bin 44 | boundaries. 45 | class_names : list of strings or None 46 | Name of each class, if None it will assign integer numbers starting 47 | with 1. 48 | fig : matplotlib.pyplot.Figure or None 49 | Figure to use for the plots, if None a new figure is created. 50 | show_counts : boolean 51 | If True shows the number of samples of each bin in its corresponding 52 | line marker. 53 | interval_method : string (default: 'beta') 54 | Method to estimate the confidence interval which uses the function 55 | proportion_confint from statsmodels.stats.proportion 56 | fmt : string (default: 's-') 57 | Format of the lines following the matplotlib.pyplot.plot standard. 58 | show_correction : boolean 59 | If True shows an arrow for each bin indicating the necessary correction 60 | to the average scores in order to be perfectly calibrated. 61 | show_gaps : boolean 62 | If True shows the gap between the average predictions and the true 63 | proportion of positive samples. 64 | sample_proportion : float in the interval [0, 1] (default 0) 65 | If bigger than 0, it shows the labels of the specified proportion of 66 | samples. 67 | color_list : list of strings or None 68 | List of string colors indicating the color of each method. 69 | color_gaps : string 70 | Color of the gaps (if shown). 71 | 72 | Regurns 73 | ======= 74 | fig : matplotlib.pyplot.figure 75 | Figure with the reliability diagram 76 | """ 77 | if isinstance(avg_true, list): 78 | avg_true_list = avg_true 79 | else: 80 | avg_true_list = [avg_true, ] 81 | if isinstance(avg_pred, list): 82 | avg_pred_list = avg_pred 83 | else: 84 | avg_pred_list = [avg_pred, ] 85 | 86 | n_classes = avg_true_list[0].shape[1] 87 | n_scores = len(avg_true_list) 88 | 89 | if color_list is None: 90 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color'] 91 | 92 | if class_names is None: 93 | class_names = [str(i+1) for i in range(n_classes)] 94 | 95 | if n_classes == 2: 96 | avg_pred_list = [pred[:, 1].reshape(-1, 1) for pred in avg_pred_list] 97 | class_names = [class_names[1], ] 98 | 99 | n_columns = n_classes if n_classes != 2 else 1 100 | 101 | if fig is None: 102 | fig = plt.figure(figsize=(n_columns*4, 4)) 103 | 104 | spec = gridspec.GridSpec(ncols=n_columns, nrows=1, wspace=0.02, 105 | hspace=0.04, left=0.15) 106 | 107 | for i in range(n_columns): 108 | ax1 = fig.add_subplot(spec[i]) 109 | # Perfect calibration 110 | ax1.plot([0, 1], [0, 1], "--", color='lightgrey', 111 | zorder=10) 112 | 113 | for j in range(n_scores): 114 | # bin_total = bin_total_list[j][:, i] 115 | pred_sort_idx = np.argsort(avg_pred_list[j][:, i]) 116 | avg_true = avg_true_list[j][pred_sort_idx, i] 117 | avg_pred = avg_pred_list[j][pred_sort_idx, i] 118 | 119 | name = legend[j] if legend else None 120 | ax1.plot(avg_pred, avg_true, fmt, label=name, color=color_list[j]) 121 | 122 | if show_correction: 123 | for ap, at in zip(avg_pred, avg_true): 124 | ax1.arrow(ap, at, at - ap, 0, color=color_gaps, 125 | head_width=0.02, length_includes_head=True, 126 | width=0.01) 127 | 128 | if show_gaps: 129 | for ap, at in zip(avg_pred, avg_true): 130 | error = avg_pred - avg_true 131 | negative_values = error < 0 132 | ygaps = np.zeros(shape=(2, avg_true.shape[0])) 133 | ygaps[0, negative_values] = - error[negative_values] 134 | ygaps[1, ~negative_values] = error[~negative_values] 135 | ax1.errorbar(avg_pred, avg_true, yerr=ygaps, fmt=" ", 136 | color=color_gaps, lw=4, capsize=5, capthick=1, 137 | zorder=10) 138 | 139 | ax1.set_xlim([0, 1]) 140 | ax1.set_ylim([0, 1]) 141 | ax1.set_xlabel('Average score (Class {})'.format(class_names[i])) 142 | if i == 0: 143 | ax1.set_ylabel('Fraction of positives') 144 | else: 145 | ax1.set_yticklabels([]) 146 | nbins = len(ax1.get_xticklabels()) 147 | ax1.xaxis.set_major_locator(MaxNLocator(nbins=nbins, 148 | prune='lower')) 149 | ax1.grid(True) 150 | ax1.set_axisbelow(True) 151 | 152 | if legend is not None: 153 | lines, labels = fig.axes[0].get_legend_handles_labels() 154 | fig.legend(lines, labels, loc='upper center', 155 | bbox_to_anchor=(0, 0, 1, 1), 156 | bbox_transform=fig.transFigure, ncol=6) 157 | 158 | fig.align_labels() 159 | return fig 160 | 161 | 162 | def plot_reliability_diagram(labels, scores, legend=None, 163 | show_histogram=True, 164 | bins=10, class_names=None, fig=None, 165 | show_counts=False, errorbar_interval=None, 166 | interval_method='beta', fmt='s-', 167 | show_correction=False, 168 | show_gaps=False, 169 | sample_proportion=0, 170 | hist_per_class=False, 171 | color_list=None, 172 | show_bars=False, 173 | invert_histogram=False, 174 | color_gaps='lightcoral', 175 | confidence=False, 176 | ax=None): 177 | """ Plots the reliability diagram of the given scores and true labels 178 | 179 | Parameters 180 | ========== 181 | labels : array (n_samples, ) 182 | Labels indicating the true class. 183 | scores : matrix (n_samples, n_classes) or list of matrices 184 | Output probability scores for one or several methods. 185 | legend : list of strings or None 186 | Text to use for the legend. 187 | show_histogram : boolean 188 | If True, it generates an additional figure showing the number of 189 | samples in each bin. 190 | bins : int or list of floats 191 | Number of bins to create in the scores' space, or list of bin 192 | boundaries. 193 | class_names : list of strings or None 194 | Name of each class, if None it will assign integer numbers starting 195 | with 1. 196 | fig : matplotlib.pyplot.Figure or None 197 | Figure to use for the plots, if None a new figure is created. 198 | show_counts : boolean 199 | If True shows the number of samples of each bin in its corresponding 200 | line marker. 201 | errorbar_interval : float or None 202 | If a float between 0 and 1 is passed, it shows an errorbar 203 | corresponding to a confidence interval containing the specified 204 | percentile of the data. 205 | interval_method : string (default: 'beta') 206 | Method to estimate the confidence interval which uses the function 207 | proportion_confint from statsmodels.stats.proportion 208 | fmt : string (default: 's-') 209 | Format of the lines following the matplotlib.pyplot.plot standard. 210 | show_correction : boolean 211 | If True shows an arrow for each bin indicating the necessary correction 212 | to the average scores in order to be perfectly calibrated. 213 | show_gaps : boolean 214 | If True shows the gap between the average predictions and the true 215 | proportion of positive samples. 216 | sample_proportion : float in the interval [0, 1] (default 0) 217 | If bigger than 0, it shows the labels of the specified proportion of 218 | samples. 219 | hist_per_class : boolean 220 | If True shows one histogram of the bins per class. 221 | color_list : list of strings or None 222 | List of string colors indicating the color of each method. 223 | show_bars : boolean 224 | If True shows bars instead of lines. 225 | invert_histogram : boolean 226 | If True shows the histogram with the zero on top and highest number of 227 | bin samples at the bottom. 228 | color_gaps : string 229 | Color of the gaps (if shown). 230 | confidence : boolean 231 | If True shows only the confidence reliability diagram. 232 | 233 | Returns 234 | ======= 235 | fig : matplotlib.pyplot.figure 236 | Figure with the reliability diagram 237 | """ 238 | if isinstance(scores, list): 239 | scores_list = scores 240 | else: 241 | scores_list = [scores, ] 242 | n_scores = len(scores_list) 243 | if color_list is None: 244 | color_list = plt.rcParams['axes.prop_cycle'].by_key()['color'] 245 | 246 | classes = np.arange(scores_list[0].shape[1]) 247 | n_classes = len(classes) 248 | labels = label_binarize(labels, classes=classes) 249 | 250 | labels_list = [] 251 | if confidence: 252 | labels_idx = np.argmax(labels, axis=1) 253 | new_scores_list = [] 254 | for score in scores_list: 255 | # TODO: randomize selection when there are several winning classes 256 | conf_idx = np.argmax(score, axis=1) 257 | winning_score = np.max(score, axis=1) 258 | new_scores_list.append(np.vstack([1 - winning_score, 259 | winning_score]).T) 260 | labels_list.append((conf_idx.flatten() 261 | == labels_idx.flatten()).astype(int)) 262 | labels_list[-1] = label_binarize(labels_list[-1], classes=[0, 1]) 263 | scores_list = new_scores_list 264 | n_classes = 2 265 | class_names = ['Non winning', 'winning'] 266 | n_columns = 1 267 | else: 268 | n_columns = n_classes 269 | 270 | if class_names is None: 271 | class_names = [str(i+1) for i in range(n_classes)] 272 | 273 | if n_classes == 2: 274 | scores_list = [score[:, 1].reshape(-1, 1) for score in scores_list] 275 | class_names = [class_names[1], ] 276 | n_columns = 1 277 | 278 | if fig is None: 279 | fig = plt.figure(figsize=(n_columns*4, 4)) 280 | 281 | if show_histogram: 282 | spec = gridspec.GridSpec(ncols=n_columns, nrows=2, 283 | height_ratios=[5, 1], 284 | wspace=0.02, 285 | hspace=0.04, 286 | left=0.15) 287 | else: 288 | spec = gridspec.GridSpec(ncols=n_columns, nrows=1, 289 | hspace=0.04, left=0.15) 290 | 291 | if isinstance(bins, int): 292 | n_bins = bins 293 | bins = np.linspace(0, 1 + 1e-8, n_bins + 1) 294 | elif isinstance(bins, list) or isinstance(bins, np.ndarray): 295 | n_bins = len(bins) - 1 296 | bins = np.array(bins) 297 | if bins[0] == 0.0: 298 | bins[0] = 0 - 1e-8 299 | if bins[-1] == 1.0: 300 | bins[-1] = 1 + 1e-8 301 | 302 | for i in range(n_columns): 303 | if ax is not None: 304 | ax1 = ax 305 | else: 306 | ax1 = fig.add_subplot(spec[i]) 307 | # Perfect calibration 308 | ax1.plot([0, 1], [0, 1], "--", color='lightgrey', 309 | zorder=10) 310 | for j, score in enumerate(scores_list): 311 | if labels_list: 312 | labels = labels_list[j] 313 | 314 | avg_true, avg_pred, bin_true, bin_total = get_binned_scores( 315 | labels[:, i], score[:, i], bins=bins) 316 | zero_idx = bin_total == 0 317 | 318 | name = legend[j] if legend else None 319 | if show_bars: 320 | ax1.bar(x=bins[:-1][~zero_idx], height=avg_true[~zero_idx], 321 | align='edge', width=(bins[1:] - bins[:-1])[~zero_idx], 322 | edgecolor='black', color=color_list[j]) 323 | else: 324 | if errorbar_interval is None: 325 | ax1.plot(avg_pred, avg_true, fmt, label=name, 326 | color=color_list[j]) 327 | else: 328 | nozero_intervals = proportion_confint( 329 | count=bin_true[~zero_idx], nobs=bin_total[~zero_idx], 330 | alpha=1-errorbar_interval, 331 | method=interval_method) 332 | nozero_intervals = np.array(nozero_intervals) 333 | 334 | intervals = np.empty((2, bin_total.shape[0])) 335 | intervals.fill(np.nan) 336 | intervals[:, ~zero_idx] = nozero_intervals 337 | 338 | yerr = np.abs(intervals - avg_true) 339 | ax1.errorbar(avg_pred, avg_true, yerr=yerr, label=name, 340 | fmt=fmt, color=color_list[j]) # markersize=5) 341 | 342 | if show_counts: 343 | for ap, at, count in zip(avg_pred, avg_true, bin_total): 344 | if np.isfinite(ap) and np.isfinite(at): 345 | ax1.text(ap, at, str(count), fontsize=6, 346 | ha='center', va='center', zorder=11, 347 | bbox=dict(boxstyle='square,pad=0.3', 348 | fc='white', ec=color_list[j])) 349 | 350 | if show_correction: 351 | for ap, at in zip(avg_pred, avg_true): 352 | ax1.arrow(ap, at, at - ap, 0, color=color_gaps, 353 | head_width=0.02, length_includes_head=True, 354 | width=0.01) 355 | 356 | if show_gaps: 357 | for ap, at in zip(avg_pred, avg_true): 358 | error = avg_pred - avg_true 359 | negative_values = error < 0 360 | ygaps = np.zeros(shape=(2, avg_true.shape[0])) 361 | ygaps[0, negative_values] = - error[negative_values] 362 | ygaps[1, ~negative_values] = error[~negative_values] 363 | ax1.errorbar(avg_pred, avg_true, yerr=ygaps, fmt=" ", 364 | color=color_gaps, lw=4, capsize=5, capthick=1, 365 | zorder=10) 366 | 367 | if sample_proportion > 0: 368 | idx = np.random.choice(labels.shape[0], 369 | int(sample_proportion*labels.shape[0])) 370 | ax1.scatter(score[idx, i], labels[idx, i], marker='|', s=100, 371 | alpha=0.2, color=color_list[j]) 372 | 373 | ax1.set_xlim([0, 1]) 374 | ax1.set_ylim([0, 1]) 375 | # ax1.set_title('Class {}'.format(class_names[i])) 376 | if not show_histogram: 377 | ax1.set_xlabel('Average score (Class {})'.format( 378 | class_names[i])) 379 | if i == 0: 380 | ax1.set_ylabel('Fraction of positives') 381 | else: 382 | ax1.set_yticklabels([]) 383 | ax1.grid(True) 384 | ax1.set_axisbelow(True) 385 | 386 | if show_histogram: 387 | divider = make_axes_locatable(ax1) 388 | ax2 = divider.append_axes("bottom", size="20%", pad=0.1, 389 | sharex=ax1) 390 | 391 | # ax2 = fig.add_subplot(spec[n_columns + i], 392 | # label='{}'.format(i)) 393 | for j, score in enumerate(scores_list): 394 | ax1.set_xticklabels([]) 395 | # lines = ax1.get_lines() 396 | # ax2.set_xticklabels([]) 397 | 398 | name = legend[j] if legend else None 399 | if hist_per_class: 400 | for c in [0, 1]: 401 | linestyle = ('dotted', 'dashed')[c] 402 | ax2.hist(score[labels[:, i] == c, i], range=(0, 1), 403 | bins=bins, label=name, 404 | histtype="step", 405 | lw=1, linestyle=linestyle, 406 | color=color_list[j], 407 | edgecolor='black') 408 | else: 409 | if n_scores > 1: 410 | kwargs = {'histtype': 'step', 411 | 'edgecolor': color_list[j]} 412 | else: 413 | kwargs = {'histtype': 'bar', 414 | 'edgecolor': 'black', 415 | 'color': color_list[j]} 416 | ax2.hist(score[:, i], range=(0, 1), bins=bins, label=name, 417 | lw=1, **kwargs) 418 | ax2.set_xlim([0, 1]) 419 | ax2.set_xlabel('Average score (Class {})'.format( 420 | class_names[i])) 421 | ax2.yaxis.set_major_locator(MaxNLocator(integer=True, 422 | prune='upper', 423 | nbins=3)) 424 | if i == 0: 425 | ax2.set_ylabel('Count') 426 | ytickloc = ax2.get_yticks() 427 | ax2.yaxis.set_major_locator(mticker.FixedLocator(ytickloc)) 428 | yticklabels = ['{:0.0f}'.format(value) for value in 429 | ytickloc] 430 | ax2.set_yticklabels(labels=yticklabels, 431 | fontdict=dict(verticalalignment='top')) 432 | else: 433 | ax2.set_yticklabels([]) 434 | nbins = len(ax2.get_xticklabels()) 435 | ax2.xaxis.set_major_locator(MaxNLocator(nbins=nbins, 436 | prune='lower')) 437 | ax2.grid(True, which='both') 438 | ax2.set_axisbelow(True) 439 | if invert_histogram: 440 | ylim = ax2.get_ylim() 441 | ax2.set_ylim(reversed(ylim)) 442 | 443 | if legend is not None: 444 | lines, labels = fig.axes[0].get_legend_handles_labels() 445 | fig.legend(lines, labels, loc='upper center', 446 | bbox_to_anchor=(0, 0, 1, 1), 447 | bbox_transform=fig.transFigure, ncol=6) 448 | 449 | fig.align_labels() 450 | return fig 451 | 452 | 453 | def plot_binary_reliability_diagram_gaps(y_true, p_pred, n_bins=15, title=None, 454 | fig=None, ax=None, legend=False, 455 | color_gaps="lightcoral", 456 | show_histogram=False, 457 | color="cornflowerblue"): 458 | """Plot binary reliability diagram gaps 459 | 460 | Parameters 461 | ========== 462 | y_true : np.array shape (n_samples, 2) or (n_samples, ) 463 | Labels corresponding to the scores as a binary indicator matrix or as a 464 | vector of integers indicating the class. 465 | p_pred : binary matrix shape (n_samples, 2) or (n_samples, ) 466 | Output probability scores for each class as a matrix, or for the 467 | positive class 468 | n_bins : integer 469 | Number of bins to divide the scores 470 | title : string 471 | Title for the plot 472 | fig : matplotlib.pyplot.figure 473 | Plots the axis in the given figure 474 | ax : matplotlib.pyplot.Axis 475 | Axis where to draw the plot 476 | legend : boolean 477 | If True the function will draw a legend 478 | 479 | Regurns 480 | ======= 481 | fig : matplotlib.pyplot.figure 482 | Figure with the reliability diagram 483 | """ 484 | if fig is None and ax is None: 485 | fig = plt.figure() 486 | if ax is None: 487 | ax = fig.add_subplot() 488 | 489 | if title is not None: 490 | ax.set_title(title) 491 | 492 | if (len(y_true.shape) == 2) and (y_true.shape[1] == 2): 493 | y_true = y_true[:, 1] 494 | if (len(y_true.shape) == 2) and (y_true.shape[1] > 2): 495 | raise ValueError('y_true wrong dimensions {}'.format(y_true.shape)) 496 | 497 | if (len(p_pred.shape) == 2) and (p_pred.shape[1] == 2): 498 | p_pred = p_pred[:, 1] 499 | if (len(p_pred.shape) == 2) and (p_pred.shape[1] > 2): 500 | raise ValueError('p_pred wrong dimensions {}'.format(p_pred.shape)) 501 | 502 | bin_size = 1.0/n_bins 503 | centers = np.linspace(bin_size/2.0, 1.0 - bin_size/2.0, n_bins) 504 | true_proportion = np.zeros(n_bins) 505 | pred_mean = np.zeros(n_bins) 506 | for i, center in enumerate(centers): 507 | if i == 0: 508 | # First bin includes lower bound 509 | bin_indices = np.where(np.logical_and( 510 | p_pred >= center - bin_size/2, 511 | p_pred <= center + bin_size/2)) 512 | else: 513 | bin_indices = np.where(np.logical_and(p_pred > center - bin_size/2, 514 | p_pred <= center + 515 | bin_size/2)) 516 | if len(bin_indices[0]) == 0: 517 | true_proportion[i] = np.nan 518 | pred_mean[i] = np.nan 519 | else: 520 | true_proportion[i] = np.mean(y_true[bin_indices]) 521 | pred_mean[i] = np.nanmean(p_pred[bin_indices]) 522 | 523 | not_nan = np.isfinite(true_proportion - centers) 524 | ax.bar(centers, true_proportion, width=bin_size, edgecolor="black", 525 | # color="blue", label='True class prop.') 526 | color=color, label='True class prop.') 527 | ax.bar(pred_mean[not_nan], (true_proportion - pred_mean)[not_nan], 528 | bottom=pred_mean[not_nan], width=0.01, 529 | edgecolor=color_gaps, 530 | color=color_gaps, 531 | label='Gap pred. mean', align='center') 532 | 533 | if legend: 534 | ax.legend() 535 | 536 | ax.plot([0, 1], [0, 1], linestyle="--", color='grey', zorder=10) 537 | ax.set_xlim([0, 1]) 538 | ax.set_ylim([0, 1]) 539 | 540 | ax.set_ylabel('Fraction of positives') 541 | ax.grid(True) 542 | ax.set_axisbelow(True) 543 | 544 | if show_histogram: 545 | ax.set_xticklabels([]) 546 | 547 | divider = make_axes_locatable(ax) 548 | ax2 = divider.append_axes("bottom", size="20%", pad=0.1, sharex=ax) 549 | 550 | ax2.hist(p_pred, range=(0, 1), 551 | bins=n_bins, 552 | histtype="bar", 553 | lw=1, 554 | color=color, 555 | edgecolor='black') 556 | 557 | ax2.set_ylabel('Count') 558 | ax2.grid(True, which='both') 559 | ax2.set_axisbelow(True) 560 | ax2.set_xlabel('Predicted probability') 561 | else: 562 | ax.set_xlabel('Predicted probability') 563 | 564 | return fig, ax 565 | 566 | 567 | def plot_multiclass_reliability_diagram_gaps(y_true, p_pred, fig=None, ax=None, 568 | per_class=True, legend=False, 569 | **kwargs): 570 | 571 | if len(y_true.shape) < 2 or y_true.shape[1] == 1: 572 | ohe = OneHotEncoder(categories='auto') 573 | ohe.fit(y_true.reshape(-1, 1)) 574 | y_true = ohe.transform(y_true.reshape(-1, 1)) 575 | 576 | if per_class: 577 | n_classes = y_true.shape[1] 578 | if fig is None and ax is None: 579 | fig = plt.figure(figsize=((n_classes-1)*4, 4)) 580 | if ax is None: 581 | ax = [fig.add_subplot(1, n_classes, i+1) for i in range(n_classes)] 582 | for i in range(n_classes): 583 | if i == 0 and legend: 584 | sub_legend = True 585 | else: 586 | sub_legend = False 587 | plot_binary_reliability_diagram_gaps(y_true[:, i], p_pred[:, i], 588 | title='$C_{}$'.format(i+1), 589 | fig=fig, ax=ax[i], 590 | legend=sub_legend, 591 | **kwargs) 592 | if i > 0: 593 | ax[i].set_ylabel('') 594 | ax[i].set_xlabel('Predicted probability') 595 | else: 596 | if fig is None and ax is None: 597 | fig = plt.figure() 598 | mask = p_pred.argmax(axis=1) 599 | indices = np.arange(p_pred.shape[0]) 600 | y_true = y_true[indices, mask].T 601 | p_pred = p_pred[indices, mask].T 602 | ax = fig.add_subplot(1, 1, 1) 603 | plot_binary_reliability_diagram_gaps(y_true, p_pred, 604 | title=r'$C_1$', 605 | fig=fig, ax=ax, **kwargs) 606 | ax.set_title('') 607 | 608 | return fig 609 | 610 | 611 | def plot_confusion_matrix(cm, classes, normalize=False, 612 | title='Confusion matrix', cmap=plt.cm.Blues, 613 | fig=None, ax=None): 614 | """ 615 | This function prints and plots the confusion matrix. 616 | Normalization can be applied by setting `normalize=True`. 617 | """ 618 | if fig is None: 619 | fig = plt.figure() 620 | 621 | if ax is None: 622 | ax = fig.add_subplot(111) 623 | 624 | if title is not None: 625 | ax.set_title(title) 626 | 627 | if normalize: 628 | cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] 629 | 630 | im = ax.imshow(cm, interpolation='nearest', cmap=cmap) 631 | 632 | # create an axes on the right side of ax. The width of cax will be 5% 633 | # of ax and the padding between cax and ax will be fixed at 0.05 inch. 634 | divider = make_axes_locatable(ax) 635 | cax = divider.append_axes("right", size="5%", pad=0.05) 636 | 637 | fig.colorbar(im, cax=cax) 638 | 639 | tick_marks = np.arange(len(classes)) 640 | ax.set_xticks(tick_marks) 641 | ax.set_xticklabels(classes, rotation=45) 642 | ax.set_yticks(tick_marks) 643 | ax.set_yticklabels(classes) 644 | 645 | fmt = '.2f' if normalize else 'd' 646 | thresh = cm.max() / 2. 647 | for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])): 648 | ax.text(j, i, format(cm[i, j], fmt), 649 | horizontalalignment="center", 650 | color="white" if cm[i, j] > thresh else "black") 651 | 652 | ax.set_ylabel('True label') 653 | ax.set_xlabel('Predicted label') 654 | return fig 655 | 656 | 657 | def plot_individual_pdfs(class_dist, x_grid=None, y_grid=None, 658 | grid_levels=200, fig=None, title=None, 659 | cmaps=None, grid=True): 660 | if fig is None: 661 | fig = plt.figure() 662 | 663 | if x_grid is None: 664 | x_grid = np.linspace(-8, 8, grid_levels) 665 | else: 666 | grid_levels = len(x_grid) 667 | 668 | if y_grid is None: 669 | y_grid = np.linspace(-8, 8, grid_levels) 670 | 671 | xx, yy = np.meshgrid(x_grid, y_grid) 672 | 673 | if cmaps is None: 674 | cmaps = [None]*len(class_dist.priors) 675 | 676 | for i, (p, d) in enumerate(zip(class_dist.priors, 677 | class_dist.distributions)): 678 | z = d.pdf(np.vstack([xx.flatten(), yy.flatten()]).T) 679 | 680 | ax = fig.add_subplot(1, len(class_dist.distributions), i+1) 681 | if title is None: 682 | ax.set_title('$P(Y={})={:.2f}$\n{}'.format(i+1, p, str(d)), 683 | loc='left') 684 | else: 685 | ax.set_title(title[i]) 686 | contour = ax.contourf(xx, yy, z.reshape(grid_levels, grid_levels), 687 | cmap=cmaps[i]) 688 | if grid: 689 | ax.grid() 690 | fig.colorbar(contour) 691 | 692 | return fig 693 | 694 | 695 | def plot_critical_difference(avranks, num_datasets, names, title=None, 696 | test='bonferroni-dunn'): 697 | """ 698 | test: string in ['nemenyi', 'bonferroni-dunn'] 699 | - nemenyi two-tailed test (up to 20 methods) 700 | - bonferroni-dunn one-tailed test (only up to 10 methods) 701 | 702 | """ 703 | # Critical difference plot 704 | import Orange 705 | 706 | if len(avranks) > 10: 707 | print('Forcing Nemenyi Critical difference') 708 | test = 'nemenyi' 709 | cd = Orange.evaluation.compute_CD(avranks, num_datasets, alpha='0.05', 710 | test=test) 711 | Orange.evaluation.graph_ranks(avranks, names, cd=cd, width=6, 712 | textspace=1.5) 713 | fig = plt.gcf() 714 | fig.suptitle(title, horizontalalignment='left') 715 | return fig 716 | 717 | 718 | def plot_df_to_heatmap(df, title=None, figsize=None, annotate=True, 719 | normalise_columns=False, normalise_rows=False, 720 | cmap=None): 721 | """ Exports a heatmap of the given pandas DataFrame 722 | 723 | Parameters 724 | ---------- 725 | df: pandas.DataFrame 726 | It should be a matrix, it can have multiple index and these will be 727 | flattened. 728 | 729 | title: string 730 | Title of the figure 731 | 732 | figsize: tuple of ints (x, y) 733 | Figure size in inches 734 | 735 | annotate: bool 736 | If true, adds numbers inside each box 737 | """ 738 | if normalise_columns: 739 | df = df_normalise(df, columns=True) 740 | if normalise_rows: 741 | df = df_normalise(df, columns=False) 742 | 743 | yticklabels = multiindex_to_strings(df.index) 744 | xticklabels = multiindex_to_strings(df.columns) 745 | if figsize is not None: 746 | fig = plt.figure(figsize=figsize) 747 | else: 748 | point_inch_ratio = 72. 749 | n_rows = df.shape[0] 750 | font_size_pt = plt.rcParams['font.size'] 751 | xlabel_space_pt = max([len(xlabel) for xlabel in xticklabels]) 752 | fig_height_in = (((xlabel_space_pt + n_rows) * (font_size_pt + 3)) 753 | / point_inch_ratio) 754 | 755 | n_cols = df.shape[1] 756 | fig_width_in = df.shape[1]+4 757 | ylabel_space_pt = max([len(ylabel) for ylabel in yticklabels]) 758 | fig_width_in = ((ylabel_space_pt + (n_cols * 3) + 5) 759 | * (font_size_pt + 3)) / point_inch_ratio 760 | fig = plt.figure(figsize=(fig_width_in, fig_height_in)) 761 | 762 | ax = fig.add_subplot(111) 763 | if title is not None: 764 | ax.set_title(title) 765 | cax = ax.pcolor(df, cmap=cmap) 766 | fig.colorbar(cax) 767 | ax.set_yticks(np.arange(0.5, len(df.index), 1)) 768 | ax.set_yticklabels(yticklabels) 769 | ax.set_xticks(np.arange(0.5, len(df.columns), 1)) 770 | ax.set_xticklabels(xticklabels, rotation=45, ha="right") 771 | 772 | middle_value = (df.max().max() + df.min().min())/2.0 773 | if annotate: 774 | for y in range(df.shape[0]): 775 | for x in range(df.shape[1]): 776 | color = 'white' if middle_value > df.values[y, x] else 'black' 777 | plt.text(x + 0.5, y + 0.5, '%.2f' % df.values[y, x], 778 | horizontalalignment='center', 779 | verticalalignment='center', 780 | color=color 781 | ) 782 | return fig 783 | 784 | 785 | def plot_calibration_map(scores_set, prob, legend_set, original_first=False, 786 | alpha=1, **kwargs): 787 | fig_calibration_map = plt.figure('calibration_map') 788 | fig_calibration_map.clf() 789 | ax_calibration_map = plt.subplot(111) 790 | ax = ax_calibration_map 791 | # ax.set_title('calibration map') 792 | ax.set_ylim([0, 1]) 793 | ax.set_xlim([0, 1]) 794 | n_lines = len(legend_set) 795 | if original_first: 796 | bins = np.linspace(0, 1, 11) 797 | hist_tot = np.histogram(prob[0], bins=bins) 798 | hist_pos = np.histogram(prob[0][prob[1] == 1], bins=bins) 799 | edges = np.insert(bins, np.arange(len(bins)), bins) 800 | empirical_p = np.true_divide(hist_pos[0]+alpha, hist_tot[0]+2*alpha) 801 | empirical_p = np.insert(empirical_p, np.arange(len(empirical_p)), 802 | empirical_p) 803 | ax.plot(edges[1:-1], empirical_p, label='empirical') 804 | 805 | skip = original_first 806 | for (scores, legend) in zip(scores_set, legend_set): 807 | if skip and original_first: 808 | skip = False 809 | else: 810 | if legend == 'uncalib': 811 | ax.plot([np.nan], [np.nan], '-', linewidth=n_lines, 812 | **kwargs) 813 | else: 814 | ax.plot(prob[2], scores, '-', label=legend, linewidth=n_lines, 815 | **kwargs) 816 | n_lines -= 1 817 | if original_first: 818 | ax.plot(prob[0], prob[1], 'kx', 819 | label=legend_set[0], markersize=9, markeredgewidth=1) 820 | ax.legend(loc='upper left') 821 | ax.grid(True) 822 | return fig_calibration_map 823 | -------------------------------------------------------------------------------- /pycalib/visualisations/ternary.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | import matplotlib.tri as tri 4 | from matplotlib import ticker 5 | from .barycentric import bc2xy, xy2bc 6 | 7 | 8 | def draw_tri_samples(pvals, classes, labels=None, fig=None, ax=None, 9 | legend=True, color_list=[None]*3, 10 | **kwargs): 11 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 12 | 13 | if fig is None: 14 | fig = plt.figure() 15 | if ax is None: 16 | ax = fig.add_subplot(111) 17 | 18 | if labels is None: 19 | labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))] 20 | center = corners.mean(axis=0) 21 | for i, corner in enumerate(corners): 22 | text_x, text_y = corner - (center - corner)*0.1 23 | ax.text(text_x, text_y, labels[i], verticalalignment='center', 24 | horizontalalignment='center') 25 | 26 | xy = bc2xy(pvals, corners) 27 | 28 | # TODO Find option to call scatter only once as now the latter classes are 29 | # on top of the previous ones 30 | for c in [0, 1, 2]: 31 | c_idx = classes == c 32 | ax.scatter(xy[c_idx, 0], xy[c_idx, 1], 33 | label=labels[c], color=color_list[c], 34 | **kwargs) 35 | if legend: 36 | leg = ax.legend() 37 | for lh in leg.legendHandles: 38 | lh.set_alpha(1) 39 | 40 | ax.axis('equal') 41 | ax.set_xlim(0, 1) 42 | ax.set_ylim(0, 0.75**0.5) 43 | ax.set_xbound(lower=-0.01, upper=1.01) 44 | ax.set_ybound(lower=-0.01, upper=(0.75**0.5)+0.01) 45 | ax.axis('off') 46 | 47 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 48 | ax.triplot(triangle, c='k', lw=0.5) 49 | 50 | return fig, ax 51 | 52 | 53 | def draw_func_contours(func, labels=None, nlevels=200, subdiv=5, fig=None, 54 | ax=None, draw_lines=None, class_index=0, **kwargs): 55 | """ 56 | Parameters: 57 | ----------- 58 | labels: None, string or list of strings 59 | If labels == 'auto' it shows the class number on each corner 60 | If labels is a list of strings it shows each string in the 61 | corresponding corner 62 | If None does not show any label 63 | """ 64 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 65 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 66 | 67 | refiner = tri.UniformTriRefiner(triangle) 68 | trimesh = refiner.refine_triangulation(subdiv=subdiv) 69 | 70 | pvals = np.array([func(xy2bc(xy)) for xy in zip(trimesh.x, trimesh.y)]) 71 | 72 | if fig is None: 73 | fig = plt.figure() 74 | if ax is None: 75 | ax = fig.add_subplot(111) 76 | 77 | # FIXME I would like the following line to work, but the max value is 78 | # not shown. I had to do create manually the levels and increase the 79 | # max value by an epsilon. This could be a major problem if the epsilon 80 | # is not small for the original range of values 81 | # contour = ax.tricontourf(trimesh, pvals, nlevels, **kwargs) 82 | # contour = ax.tricontourf(trimesh, pvals, nlevels, extend='both') 83 | contour = ax.tricontourf(trimesh, pvals, 84 | levels=np.linspace(pvals.min(), pvals.max()+1e-9, 85 | nlevels), 86 | **kwargs) 87 | 88 | # Colorbar 89 | # TODO See if the following way to define the size of the bar can be used 90 | # from mpl_toolkits.axes_grid1 import make_axes_locatable 91 | # divider = make_axes_locatable(ax) 92 | # cax = divider.append_axes("bottom", size="5%", pad=0.1) 93 | # cb = fig.colorbar(contour, ax=cax, orientation='horizontal') 94 | cb = fig.colorbar(contour, ax=ax, orientation='horizontal', 95 | fraction=0.05, pad=0.06) 96 | tick_locator = ticker.MaxNLocator(nbins=5) 97 | cb.locator = tick_locator 98 | # cb.ax.xaxis.set_major_locator(ticker.AutoLocator()) 99 | cb.update_ticks() 100 | 101 | if labels is None: 102 | labels = [r'$C_{}$'.format(i+1) for i in range(len(corners))] 103 | 104 | center = corners.mean(axis=0) 105 | for i, corner in enumerate(corners): 106 | text_x, text_y = corner - (center - corner)*0.1 107 | ax.text(text_x, text_y, labels[i], verticalalignment='center', 108 | horizontalalignment='center') 109 | 110 | if draw_lines is not None: 111 | lines = get_converging_lines(num_lines=draw_lines, mesh_precision=2, 112 | class_index=class_index) 113 | corners = np.array([[0, 0], [1, 0], [0.5, 0.75**0.5]]) 114 | for line in lines: 115 | line = bc2xy(line, corners).T 116 | ax.plot(line[0], line[1]) 117 | # l = mlines.Line2D() 118 | # ax.add_line(l) 119 | 120 | # Axes options 121 | ax.set_xlim(xmin=0, xmax=1) 122 | ax.set_ylim(ymin=0, ymax=0.75**0.5) 123 | ax.set_xbound(lower=0, upper=1) 124 | ax.set_ybound(lower=0, upper=0.75**0.5) 125 | ax.axis('equal') 126 | ax.axis('off') 127 | 128 | triangle = tri.Triangulation(corners[:, 0], corners[:, 1]) 129 | ax.triplot(triangle, c='k', lw=0.5) 130 | 131 | plt.gca().set_adjustable("box") 132 | return fig 133 | 134 | 135 | def plot_converging_lines_pvalues(func, lines, i, ax): 136 | """ 137 | Plots the probability values of the given function for each given line. 138 | The i indicates the class index from 0 to 2 139 | """ 140 | # This orders the classes in the following manner: 141 | # C1, C2, C3 142 | # C2, C3, C1 143 | # C3, C1, C2 144 | classes = np.roll(np.array([0, 1, 2]), -i) 145 | 146 | for j, line in enumerate(lines): 147 | pvalues = np.array([func(p) for p in line]).flatten() 148 | ax.plot(line[:, i], pvalues, 149 | label=r'$C_{}/C_{} = {}/{}$'.format( 150 | classes[1]+1, classes[2]+1, j, len(lines)-j-1)) 151 | ax.legend() 152 | 153 | 154 | def get_converging_lines(num_lines, mesh_precision=10, class_index=0, 155 | tol=1e-6): 156 | """ 157 | If class_index = 0 158 | Create isometric lines from the oposite side of C1 simplex to the C1 corner 159 | First line has C2 fixed to 0 160 | Last line has C3 fixed to 0 161 | Class 3 line 1 start 162 | /\\ 163 | / \\ 164 | / \\ line 2 start 165 | / - \\ 166 | / -/ \\ 167 | / -/ \\ 168 | / -/ ---\\ line 3 start 169 | /-/ -----/ \\ 170 | //---/ \\ 171 | -------------------- line 4 start 172 | Class 1(lines end) Class 2 173 | 174 | Else if class_index = [1, 2] 175 | Then the previusly described lines are rotated towards the indicated class. 176 | The lines always follow a clockwise order. 177 | """ 178 | p = np.linspace(0, 1, mesh_precision).reshape(-1, 1) 179 | if num_lines == 1: 180 | q = [0.5] 181 | else: 182 | q = np.linspace(0, 1, num_lines).reshape(-1, 1) 183 | lines = [np.hstack((p, (1-p)*q[i], (1-p)*(1-q[i]))) for i in range(len(q))] 184 | if class_index > 0: 185 | indices = np.array([0, 1, 2]) 186 | lines = [line[:, np.roll(indices, class_index)] for i, line in 187 | enumerate(lines)] 188 | return np.clip(lines, tol, 1.0 - tol) 189 | -------------------------------------------------------------------------------- /pycalib/visualisations/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/classifier-calibration/PyCalib/8208ab907d5b9c5149b2d45b1c8b6e4b2d763317/pycalib/visualisations/tests/__init__.py -------------------------------------------------------------------------------- /pycalib/visualisations/tests/test_init.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import matplotlib.pyplot as plt 3 | import numpy as np 4 | 5 | from pycalib.visualisations import plot_reliability_diagram 6 | 7 | 8 | class TestVisualisations(unittest.TestCase): 9 | def test_plot_reliability_diagram(self): 10 | n_c1 = n_c2 = 500 11 | p = np.concatenate((np.random.beta(2, 5, n_c1), 12 | np.random.beta(4, 3, n_c2))) 13 | 14 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2))) 15 | 16 | s1 = 1/(1 + np.exp(-3*(p - 0.5))) 17 | s2 = 1/(1 + np.exp(-8*(p - 0.5))) 18 | 19 | p = np.vstack((1 - p, p)).T 20 | s1 = np.vstack((1 - s1, s1)).T 21 | s2 = np.vstack((1 - s2, s2)).T 22 | 23 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2]) 24 | self.assertIsInstance(fig, plt.Figure) 25 | 26 | def test_plot_reliability_diagram_confidence(self): 27 | n_c1 = n_c2 = 500 28 | p = np.concatenate((np.random.beta(2, 5, n_c1), 29 | np.random.beta(4, 3, n_c2))) 30 | 31 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2))) 32 | 33 | s1 = 1/(1 + np.exp(-3*(p - 0.5))) 34 | s2 = 1/(1 + np.exp(-8*(p - 0.5))) 35 | 36 | p = np.vstack((1 - p, p)).T 37 | s1 = np.vstack((1 - s1, s1)).T 38 | s2 = np.vstack((1 - s2, s2)).T 39 | 40 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 41 | confidence=True) 42 | self.assertIsInstance(fig, plt.Figure) 43 | 44 | def test_plot_reliability_diagram_simple(self): 45 | n_c1 = n_c2 = 500 46 | p = np.concatenate((np.random.beta(2, 5, n_c1), 47 | np.random.beta(4, 3, n_c2))) 48 | 49 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2))) 50 | 51 | s1 = 1/(1 + np.exp(-3*(p - 0.5))) 52 | s2 = 1/(1 + np.exp(-8*(p - 0.5))) 53 | 54 | p = np.vstack((1 - p, p)).T 55 | s1 = np.vstack((1 - s1, s1)).T 56 | s2 = np.vstack((1 - s2, s2)).T 57 | 58 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 59 | show_histogram=False) 60 | self.assertIsInstance(fig, plt.Figure) 61 | 62 | fig = plot_reliability_diagram(labels=y, scores=s2, 63 | show_histogram=True) 64 | self.assertIsInstance(fig, plt.Figure) 65 | 66 | def test_plot_reliability_diagram_full(self): 67 | n_c1 = n_c2 = 500 68 | p = np.concatenate((np.random.beta(2, 5, n_c1), 69 | np.random.beta(4, 3, n_c2) 70 | )) 71 | 72 | y = np.concatenate((np.zeros(n_c1), np.ones(n_c2))) 73 | 74 | s1 = 1/(1 + np.exp(-3*(p - 0.5))) 75 | s2 = 1/(1 + np.exp(-8*(p - 0.5))) 76 | s1 = np.vstack((1 - s1, s1)).T 77 | s2 = np.vstack((1 - s2, s2)).T 78 | 79 | fig = plot_reliability_diagram(labels=y, scores=s1, 80 | legend=['Model 1'], 81 | show_histogram=True, bins=9, 82 | class_names=['Negative', 'Positive'], 83 | show_counts=True, show_correction=True, 84 | show_gaps=True, sample_proportion=0.5, 85 | errorbar_interval=0.95, 86 | hist_per_class=True) 87 | self.assertIsInstance(fig, plt.Figure) 88 | 89 | class_2_idx = range(int(len(y)/3), int(2*len(y)/3)) 90 | y[class_2_idx] = 2 91 | s1 = np.hstack((s1, s1[:, 1].reshape(-1, 1))) 92 | s1[class_2_idx, 2] *= 3 93 | s1 /= s1.sum(axis=1)[:, None] 94 | s2 = np.hstack((s2, s2[:, 1].reshape(-1, 1))) 95 | s2[class_2_idx, 2] *= 2 96 | s2 /= s2.sum(axis=1)[:, None] 97 | 98 | bins = [0, .3, .5, .8, 1] 99 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 100 | legend=['Model 3', 'Model 4'], 101 | show_histogram=True, 102 | show_correction=True, 103 | show_counts=True, 104 | show_bars=True, 105 | sample_proportion=0.3, 106 | bins=bins, 107 | color_list=['darkgreen', 'chocolate'], 108 | invert_histogram=True) 109 | self.assertIsInstance(fig, plt.Figure) 110 | 111 | fig = plot_reliability_diagram(labels=y, scores=[s1, s2], 112 | legend=['Model 3', 'Model 4'], 113 | show_histogram=True, 114 | show_correction=True, 115 | show_counts=True, 116 | sample_proportion=0.3, 117 | bins=bins, 118 | color_list=['darkgreen', 'chocolate'], 119 | invert_histogram=True, 120 | confidence=True) 121 | self.assertIsInstance(fig, plt.Figure) 122 | 123 | 124 | def main(): 125 | unittest.main() 126 | 127 | 128 | if __name__ == '__main__': 129 | main() 130 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | codecov 2 | flake8 3 | mypy 4 | nbval 5 | numpydoc 6 | pylint 7 | pytest 8 | pytest-cov 9 | sphinx 10 | sphinx-gallery 11 | twine 12 | yapf 13 | restview 14 | sphinx-rtd-theme 15 | readme-renderer 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy>=1.22 2 | scipy>=1.6 3 | scikit-learn>=0.24 4 | matplotlib>=3.3 5 | statsmodels>=0.12 6 | orange3>=3.28 7 | betacal>=1.1.0 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.util import convert_path 2 | from setuptools import setup, find_packages 3 | from os import path 4 | 5 | this_directory = path.abspath(path.dirname(__file__)) 6 | with open(path.join(this_directory, 'README.md'), encoding='utf-8') as f: 7 | long_description = f.read() 8 | 9 | main_ns = {} 10 | ver_path = convert_path('pycalib/__init__.py') 11 | with open(ver_path) as ver_file: 12 | exec(ver_file.read(), main_ns) 13 | 14 | setup( 15 | name = 'pycalib', 16 | packages = find_packages(exclude=['tests.*', 'tests', 'docs.*', 'docs']), 17 | install_requires=[ 18 | 'numpy>=1.22', 19 | 'scipy>=1.6', 20 | 'scikit-learn>=0.24', 21 | 'matplotlib>=3.3', 22 | 'statsmodels>=0.12' 23 | ], 24 | version=main_ns['__version__'], 25 | description = 'Python library with tools for classifier calibration.', 26 | author = 'Miquel Perello Nieto, Hao Song, Telmo de Menezes e Silva Filho', 27 | author_email = 'perello.nieto@gmail.com', 28 | url = 'https://classifier-calibration.github.io/PyCalib/', 29 | download_url = 'https://github.com/classifier-calibration/archive/{}.tar.gz'.format(main_ns['__version__']), 30 | keywords = ['classifier calibration', 'calibration', 'classification'], 31 | classifiers = [], 32 | long_description=long_description, 33 | long_description_content_type='text/markdown' 34 | ) 35 | --------------------------------------------------------------------------------