├── mvtk
    ├── version.py
    ├── bias_variance
    │   ├── __init__.py
    │   ├── estimators
    │   │   ├── __init__.py
    │   │   ├── estimator_wrapper.py
    │   │   ├── sklearn_estimator_wrapper.py
    │   │   ├── tensorflow_estimator_wrapper.py
    │   │   └── pytorch_estimator_wrapper.py
    │   ├── bias_variance_parallel.py
    │   └── bias_variance.py
    ├── supervisor
    │   ├── divergence
    │   │   ├── __init__.py
    │   │   ├── utils.py
    │   │   ├── nn.py
    │   │   └── generators.py
    │   ├── __init__.py
    │   ├── processing.py
    │   └── utils.py
    ├── __init__.py
    ├── sobol.py
    ├── credibility.py
    ├── metrics.py
    └── thresholding.py
├── tests
    ├── package.py
    ├── test_sobol.py
    ├── credibility
    │   └── test_credibility.py
    ├── supervisor
    │   ├── test_divergence_utils.py
    │   ├── test_processing.py
    │   └── test_divergence.py
    ├── test_metrics.py
    └── bias_variance
    │   ├── estimators
    │       ├── test_sklearn_estimator_wrapper.py
    │       ├── test_tensorflow_estimator_wrapper.py
    │       └── test_pytorch_estimator_wrapper.py
    │   ├── test_bias_variance_parallel.py
    │   └── test_bias_variance.py
├── docs
    ├── images
    │   ├── interprenet.png
    │   ├── pdf_total_variation.png
    │   ├── low_bias_low_variance.png
    │   ├── high_bias_high_variance.png
    │   ├── high_bias_low_variance.png
    │   ├── low_bias_high_variance.png
    │   ├── thresholding_expected_utility.png
    │   ├── thresholding_negative_scores.png
    │   ├── thresholding_positive_scores.png
    │   ├── bias_variance_label_distribution.png
    │   ├── thresholding_exploration_proportion.png
    │   └── logo.svg
    ├── notebooks
    │   ├── thresholding
    │   │   └── threshold_distribution_evolution.gif
    │   ├── interprenet
    │   │   └── .ipynb_checkpoints
    │   │   │   └── Periodic-checkpoint.ipynb
    │   └── divergence
    │   │   └── CategoricalColumns.ipynb
    ├── sobol.rst
    ├── metrics.rst
    ├── credibility.rst
    ├── interprenet.rst
    ├── supervisor.rst
    ├── supervisor.utils.rst
    ├── thresholding.rst
    ├── bias_variance.bias_variance.rst
    ├── bias_variance.estimators.rst
    ├── supervisor.processing.rst
    ├── bias_variance.rst
    ├── supervisor.divergence.rst
    ├── bias_variance.bias_variance_parallel.rst
    ├── _templates
    │   └── layout.html
    ├── Makefile
    ├── authors.rst
    ├── make.bat
    ├── css
    │   └── custom.css
    ├── about.rst
    ├── index.rst
    ├── sobol_user_guide.rst
    ├── conf.py
    ├── quickstart.rst
    ├── interprenet_user_guide.rst
    ├── contributing.md
    ├── refs.bib
    ├── credibility_user_guide.rst
    ├── thresholding_user_guide.rst
    └── bias_variance_user_guide.rst
├── tox.ini
├── .pre-commit-config.yaml
├── DCO
├── setup.py
├── README.md
├── .circleci
    └── config.yml
└── LICENSE


/mvtk/version.py:
--------------------------------------------------------------------------------
1 | __version__ = "0.2.0"
2 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/__init__.py:
--------------------------------------------------------------------------------
1 | from .bias_variance import *
2 | from .bias_variance_parallel import *
3 | 


--------------------------------------------------------------------------------
/tests/package.py:
--------------------------------------------------------------------------------
1 | import mvtk
2 | 
3 | 
4 | def test_version():
5 |     assert isinstance(mvtk.__version__, str)
6 | 


--------------------------------------------------------------------------------
/docs/images/interprenet.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/interprenet.png


--------------------------------------------------------------------------------
/docs/images/pdf_total_variation.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/pdf_total_variation.png


--------------------------------------------------------------------------------
/docs/images/low_bias_low_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/low_bias_low_variance.png


--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/__init__.py:
--------------------------------------------------------------------------------
1 | from .generators import *
2 | from .nn import *
3 | from .utils import *
4 | from .metrics import *
5 | 


--------------------------------------------------------------------------------
/docs/images/high_bias_high_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/high_bias_high_variance.png


--------------------------------------------------------------------------------
/docs/images/high_bias_low_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/high_bias_low_variance.png


--------------------------------------------------------------------------------
/docs/images/low_bias_high_variance.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/low_bias_high_variance.png


--------------------------------------------------------------------------------
/docs/images/thresholding_expected_utility.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_expected_utility.png


--------------------------------------------------------------------------------
/docs/images/thresholding_negative_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_negative_scores.png


--------------------------------------------------------------------------------
/docs/images/thresholding_positive_scores.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_positive_scores.png


--------------------------------------------------------------------------------
/docs/images/bias_variance_label_distribution.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/bias_variance_label_distribution.png


--------------------------------------------------------------------------------
/docs/images/thresholding_exploration_proportion.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_exploration_proportion.png


--------------------------------------------------------------------------------
/mvtk/__init__.py:
--------------------------------------------------------------------------------
1 | from . import metrics
2 | from . import supervisor
3 | from . import credibility
4 | from . import interprenet
5 | from mvtk.version import __version__ as __version__
6 | 


--------------------------------------------------------------------------------
/docs/notebooks/thresholding/threshold_distribution_evolution.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/notebooks/thresholding/threshold_distribution_evolution.gif


--------------------------------------------------------------------------------
/docs/sobol.rst:
--------------------------------------------------------------------------------
1 | sobol
2 | ==================
3 | 
4 | .. automodule:: mvtk.sobol
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/docs/metrics.rst:
--------------------------------------------------------------------------------
1 | metrics
2 | ==================
3 | 
4 | .. automodule:: mvtk.metrics
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/docs/credibility.rst:
--------------------------------------------------------------------------------
1 | credibility
2 | ==================
3 | 
4 | .. automodule:: mvtk.credibility
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/docs/interprenet.rst:
--------------------------------------------------------------------------------
1 | interprenet
2 | ==================
3 | 
4 | .. automodule:: mvtk.interprenet
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/docs/supervisor.rst:
--------------------------------------------------------------------------------
 1 | supervisor
 2 | ==================
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     supervisor.divergence
10 |     supervisor.processing
11 |     supervisor.utils
12 | 


--------------------------------------------------------------------------------
/docs/supervisor.utils.rst:
--------------------------------------------------------------------------------
1 | utils
2 | ==================================
3 | 
4 | .. automodule:: mvtk.supervisor.utils
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/thresholding.rst:
--------------------------------------------------------------------------------
1 | thresholding
2 | ==================
3 | 
4 | .. automodule:: mvtk.thresholding
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/docs/bias_variance.bias_variance.rst:
--------------------------------------------------------------------------------
1 | bias_variance
2 | =============
3 | 
4 | .. automodule:: mvtk.bias_variance.bias_variance
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/bias_variance.estimators.rst:
--------------------------------------------------------------------------------
1 | estimators
2 | ==========
3 | 
4 | .. automodule:: mvtk.bias_variance.estimators
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/docs/supervisor.processing.rst:
--------------------------------------------------------------------------------
1 | processing
2 | ==================================
3 | 
4 | .. automodule:: mvtk.supervisor.processing
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/docs/bias_variance.rst:
--------------------------------------------------------------------------------
 1 | bias_variance
 2 | =============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 | 
 9 |     bias_variance.estimators
10 |     bias_variance.bias_variance
11 |     bias_variance.bias_variance_parallel
12 | 


--------------------------------------------------------------------------------
/docs/supervisor.divergence.rst:
--------------------------------------------------------------------------------
1 | divergence
2 | ==================================
3 | 
4 | .. automodule:: mvtk.supervisor.divergence
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :special-members: __init__, __call__
9 | 


--------------------------------------------------------------------------------
/mvtk/supervisor/__init__.py:
--------------------------------------------------------------------------------
 1 | from importlib import util
 2 | 
 3 | if util.find_spec("pyspark") is not None:
 4 |     del util
 5 |     from .processing import *
 6 | else:
 7 |     del util
 8 | from .utils import *
 9 | from .divergence import *
10 | 


--------------------------------------------------------------------------------
/docs/bias_variance.bias_variance_parallel.rst:
--------------------------------------------------------------------------------
1 | bias_variance_parallel
2 | ======================
3 | 
4 | .. automodule:: mvtk.bias_variance.bias_variance_parallel
5 |     :members:
6 |     :imported-members:
7 |     :undoc-members:
8 |     :show-inheritance:
9 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/__init__.py:
--------------------------------------------------------------------------------
1 | from .estimator_wrapper import EstimatorWrapper
2 | from .pytorch_estimator_wrapper import PyTorchEstimatorWrapper
3 | from .sklearn_estimator_wrapper import SciKitLearnEstimatorWrapper
4 | from .tensorflow_estimator_wrapper import TensorFlowEstimatorWrapper
5 | 


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | 
 3 | {% block extrahead %}
 4 | {{ super() }}
 5 | <script async src="https://www.googletagmanager.com/gtag/js?id=G-2BGSHYDJP8"></script>
 6 | <script>
 7 |   window.dataLayer = window.dataLayer || [];
 8 |   function gtag(){dataLayer.push(arguments);}
 9 |   gtag('js', new Date());
10 | 
11 |   gtag('config', 'G-2BGSHYDJP8');
12 | </script>
13 | {% endblock %}
14 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | # content of: tox.ini , put in same dir as setup.py
 2 | [tox]
 3 | envlist = py38
 4 | 
 5 | [testenv]
 6 | # install pytest in the virtualenv where commands will be executed
 7 | recreate = true
 8 | deps =
 9 |     pytest
10 |     pre-commit
11 | extras =
12 |     doc
13 | commands =
14 |     pre-commit run --all
15 |     pytest tests
16 |     sphinx-build -b linkcheck docs docs/linkcheck
17 |     sphinx-build -b html docs docs/html
18 | 


--------------------------------------------------------------------------------
/tests/test_sobol.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | from mvtk import sobol
 4 | 
 5 | 
 6 | def test_sobol():
 7 |     nprng = numpy.random.RandomState(0)
 8 |     data = nprng.uniform(size=(1000000, 4))
 9 |     coefficients = numpy.arange(1, 5)
10 | 
11 |     def model(x):
12 |         return x.dot(coefficients)
13 | 
14 |     first_order, total = sobol.sobol(model, data)
15 |     variance = model(data).std() ** 2
16 |     V = coefficients**2 / 12
17 |     assert numpy.allclose(first_order * variance, V, rtol=0.01)
18 |     assert numpy.allclose(total.sum(), 1, rtol=0.01)
19 |     assert numpy.allclose(total, first_order, rtol=0.01)
20 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/estimator_wrapper.py:
--------------------------------------------------------------------------------
 1 | class EstimatorWrapper:
 2 |     r"""This is a wrapper class that can be inherited to conform any estimator
 3 |     to the fit/predict interface"""
 4 | 
 5 |     def fit(self, X, y, **kwargs):
 6 |         r"""Train the estimator
 7 | 
 8 |         Args:
 9 |             X: features
10 |             y: ground truth labels
11 |             kwargs (optional): kwargs for use in training
12 |         """
13 |         pass
14 | 
15 |     def predict(self, X, **kwargs):
16 |         r"""Get predictions from the estimator
17 | 
18 |         Args:
19 |             X: features
20 |             kwargs (optional): kwargs for use in predicting
21 |         """
22 |         pass
23 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # Install the pre-commit hooks below with
 2 | # 'pre-commit install'
 3 | 
 4 | # Auto-update the version of the hooks with
 5 | # 'pre-commit autoupdate'
 6 | 
 7 | # Run the hooks on all files with
 8 | # 'pre-commit run --all'
 9 | 
10 | repos:
11 | 
12 | - repo: https://github.com/psf/black
13 |   rev: 23.11.0
14 |   hooks:
15 |   - id: black
16 |     language_version: python3.8
17 |     args: [--line-length=88, tests, mvtk]
18 | 
19 | - repo: https://github.com/pycqa/flake8
20 |   rev: 6.1.0
21 |   hooks:
22 |   - id: flake8
23 |     args: [--max-line-length=88, '--per-file-ignores=__init__.py:F401,F403', tests, mvtk]
24 | - repo: https://github.com/pre-commit/mirrors-mypy
25 |   rev: v1.7.1
26 |   hooks:
27 |   - id: mypy
28 |     files: mvtk/
29 | 


--------------------------------------------------------------------------------
/docs/authors.rst:
--------------------------------------------------------------------------------
1 | .. raw :: html
2 | 
3 |     <table width="99%" class="container">
4 |     <tr><td class="alert" colspan="3"><font size="5"><b>Team</b></font></td></tr>
5 |     <tr><td class="alert" width="10%"><img src="https://avatars.githubusercontent.com/u/4635614?v=4" title="Alex Eftimiades"/></td><td class="alert" width="40%"><b>Alex Eftimiades - Lead</b><br/><a href="https://aeftimia.github.io" target="_new" >Website</a>&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.linkedin.com/in/alex-eftimiades-1a4836bb"target="_new" >Linkedin</a></td>
6 |         <td class="alert" width="10%"></td><td class="alert" width="40%"><b>Matthew Gillett - Developer</b><br/><a href="https://github.com/matthewgillett" target="_new" >Website</a>&nbsp;&nbsp;&nbsp;&nbsp;<a href="https://www.linkedin.com/in/matthew-gillett-sdet"target="_new" >Linkedin</a></td></tr>
7 |     </table><table style="width:100%"><tr>
8 |     <br>
9 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.http://sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/css/custom.css:
--------------------------------------------------------------------------------
 1 | /* unvisited link */
 2 | .wy-side-nav-search a:link {
 3 |   color: #000000;
 4 | }
 5 | 
 6 | /* unvisited link */
 7 | .wy-nav-content a:link, .section a:link {
 8 |   color: #0070B7;
 9 | }
10 | 
11 | .highlight .c1 {
12 |   color: #097B79;
13 | }
14 | 
15 | .highlight .si {
16 |   color: #D90E39;
17 | }
18 | 
19 | .section .nbinput.docutils.container .prompt.highlight-none.notranslate pre {
20 |   color: #0070B7;
21 | }
22 | 
23 | .section .sig.sig-object.py {
24 |   color: #000000;
25 |   background-color: rgb(255, 255, 255);
26 | }
27 | 
28 | .section .sig.sig-object.py .sig-paren {
29 |   color: #0070B7;
30 | }
31 | 
32 | .section .nboutput.docutils.container .prompt.highlight-none.notranslate pre {
33 |   color: #D90E39;
34 | }
35 | 
36 | .section .pre {
37 |   color: #D90E39;
38 | }
39 | 
40 | .section .admonition .admonition-title {
41 |   background-color: #0070B7;
42 | }
43 | 
44 | .section .brackets, .section .fn-backref {
45 |   color: #0070B7;
46 | }
47 | 
48 | /* text */
49 | footer {
50 |   color: #6B6B6B;
51 | }


--------------------------------------------------------------------------------
/DCO:
--------------------------------------------------------------------------------
1 | Developer's Certificate of Origin (adapted from the linux kernel)
2 | 
3 | By making a contribution to this project, I certify that:
4 | 
5 | The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or
6 | The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or
7 | The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
8 | I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
9 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/sklearn_estimator_wrapper.py:
--------------------------------------------------------------------------------
 1 | from . import EstimatorWrapper
 2 | 
 3 | 
 4 | class SciKitLearnEstimatorWrapper(EstimatorWrapper):
 5 |     def __init__(self, estimator):
 6 |         r"""Create a wrapper for a Scikit-Learn estimator
 7 | 
 8 |         Args:
 9 |             estimator: Scikit-Learn estimator instance
10 | 
11 |         Returns:
12 |             self
13 |         """
14 |         self.estimator = estimator
15 | 
16 |     def fit(self, X, y, **kwargs):
17 |         r"""Train the estimator
18 | 
19 |         Args:
20 |             X: features
21 |             y: ground truth labels
22 |             kwargs (optional): kwargs for use in training
23 | 
24 |         Returns:
25 |             self
26 |         """
27 |         self.estimator.fit(X, y, **kwargs)
28 |         return self
29 | 
30 |     def predict(self, X, **kwargs):
31 |         r"""Get predictions from the estimator
32 | 
33 |         Args:
34 |             X: features
35 |             kwargs (optional): kwargs for use in predicting
36 | 
37 |         Returns:
38 |             self
39 |         """
40 |         return self.estimator.predict(X, **kwargs)
41 | 


--------------------------------------------------------------------------------
/mvtk/supervisor/processing.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | import public
 4 | 
 5 | 
 6 | @public.add
 7 | def replace_nulls(df, replace, column_names):
 8 |     return df.fillna({k: replace for k in column_names})
 9 | 
10 | 
11 | @public.add
12 | # Normalize timestamp column values.
13 | def normalize_ts_columns(df, column_names):
14 |     for column_name in column_names:
15 |         normalize_ts_column(df, column_name)
16 |     return df
17 | 
18 | 
19 | # convert timestamp in HH:mm:ss to seconds -
20 | #   pandas timedelta takes the time format and converts them to seconds.
21 | # divide by the result by the total number of seconds in a day.
22 | # this normalizes the timestamp to a number between 0 and 1.
23 | # round off the value to 5 decimal places.
24 | @public.add
25 | def normalize_ts_column(df, column_name):
26 |     df[column_name] = pd.to_timedelta(
27 |         df[column_name].dt.strftime("%H:%M:%S")
28 |     ).dt.total_seconds()
29 |     df[column_name] = df[column_name].replace(np.nan, -1)
30 |     df[column_name] = df[column_name].apply(
31 |         lambda x: round(x / 86400, 5) if x >= 0 else x
32 |     )
33 | 
34 |     return df
35 | 


--------------------------------------------------------------------------------
/docs/about.rst:
--------------------------------------------------------------------------------
 1 | .. _about:
 2 | 
 3 | About
 4 | ========
 5 | 
 6 | History
 7 | -------
 8 | 
 9 | This project was started by Alex Eftimiades in 2019 as part of an
10 | internal R&D effort focused on model monitoring and sensitivity
11 | analysis. With early usage, testing, and utility contributions from
12 | Dwight Gunning, Matthew Gillett, and Mona Annaparthi, this lead to the ``supervisor``
13 | submodule and many of the initial ideas that became the
14 | ``thresholding``, ``sobol``, and ``credibility`` modules. Subsequent
15 | work on explainability lead to ``interprenet`` and the normalized
16 | mutual information score within ``metrics``.  
17 | 
18 | Authors
19 | -------
20 | 
21 | The following people are currently core contributors to Model Validation
22 | Toolkit's development and maintenance:
23 | 
24 | .. include:: authors.rst
25 | 
26 | Please see :doc:`contributing <contributing>` to join us!
27 | 
28 | Acknowledgements
29 | -------
30 | 
31 | We thank David Devakumar, Mohamad Ibrahim, Jonathan Bryant, and Ahmed Ibrahim
32 | for their support, feedback, and help allocating resources to work on this
33 | project. We thank Nil Weerasinghe for his help organizing R&D efforts.
34 | 


--------------------------------------------------------------------------------
/tests/credibility/test_credibility.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import itertools
 3 | import pandas
 4 | 
 5 | from mvtk import credibility
 6 | 
 7 | 
 8 | def test_value_error():
 9 |     try:
10 |         credibility.credible_interval(0, 0, prior=(0, 0))
11 |     except ValueError:
12 |         return
13 |     raise Exception("Expected ValueError")
14 | 
15 | 
16 | def test_equivalence():
17 |     assert credibility.credible_interval(0, 1) == credibility.credible_interval(
18 |         1, 2, prior=(0, 0)
19 |     )
20 | 
21 | 
22 | def test_prob_greater_cmp():
23 |     nprng = numpy.random.RandomState(0)
24 |     prior_sample_size = 10**6
25 |     for N in range(2, 8):
26 |         for prior1, prior2 in itertools.product(
27 |             itertools.product(range(1, 3), repeat=2), repeat=2
28 |         ):
29 |             df = pandas.DataFrame()
30 |             p1 = nprng.beta(*prior1, size=prior_sample_size)
31 |             df["positives1"] = nprng.binomial(N, p1)
32 |             p2 = nprng.beta(*prior2, size=prior_sample_size)
33 |             df["positives2"] = nprng.binomial(N, p2)
34 |             df["target"] = p1 > p2
35 |             for (p1, p2), subset in df.groupby(["positives1", "positives2"]):
36 |                 p = subset["target"].mean()
37 |                 q = credibility.prob_greater_cmp(
38 |                     p1, N - p1, p2, N - p2, prior1=prior1, prior2=prior2, err=10**-5
39 |                 )
40 |                 assert abs(q - p) < 0.05
41 | 


--------------------------------------------------------------------------------
/tests/supervisor/test_divergence_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | import scipy
 4 | 
 5 | from mvtk.supervisor.divergence.utils import arrayify
 6 | 
 7 | 
 8 | def test_arrayify_dataframes():
 9 |     df_a = pd.DataFrame({"a": list(range(4))})
10 |     assert (
11 |         df_a.shape == arrayify(df_a)[0].shape
12 |     ), "Dataframe shape is same after arrayify"
13 |     assert (
14 |         df_a.shape == arrayify([df_a])[0].shape
15 |     ), "Dataframe shape is same after arrayify"
16 |     assert isinstance(arrayify([df_a])[0], np.ndarray)
17 |     assert isinstance(arrayify(df_a)[0], np.ndarray)
18 | 
19 | 
20 | def test_arrayify_numpy():
21 |     ones = np.ones((2, 4))
22 |     ones_lst = arrayify(ones)
23 |     assert (
24 |         ones.shape == ones_lst[0].shape
25 |     ), "Shape should be same after arrayify_as_array"
26 |     ones_lst2 = arrayify([ones])
27 |     assert (
28 |         ones_lst[0].shape == ones_lst2[0].shape
29 |     ), "Shape should be same after arrayify_as_array"
30 |     ones_lst3 = arrayify([ones, ones])
31 |     assert (
32 |         ones_lst[0].shape == ones_lst3[0].shape
33 |     ), "Shape should be same after arrayify_as_array"
34 | 
35 | 
36 | def test_arrayify_csr():
37 |     ones = scipy.sparse.csr_matrix(np.ones((2, 4)))
38 |     ones_lst = arrayify(ones)
39 |     assert (
40 |         ones.shape == ones_lst[0].shape
41 |     ), "Shape should be same after arrayify_as_array"
42 |     ones_lst2 = arrayify([ones])
43 |     assert (
44 |         ones_lst[0].shape == ones_lst2[0].shape
45 |     ), "Shape should be same after arrayify_as_array"
46 |     ones_lst3 = arrayify([ones, ones])
47 |     assert (
48 |         ones_lst[0].shape == ones_lst3[0].shape
49 |     ), "Shape should be same after arrayify_as_array"
50 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup
 2 | 
 3 | _dct = {}
 4 | with open("mvtk/version.py") as f:
 5 |     exec(f.read(), _dct)
 6 | __version__ = _dct["__version__"]
 7 | 
 8 | extras_require = {
 9 |     "doc": [
10 |         "nbsphinx",
11 |         "sphinx",
12 |         "sphinx-rtd-theme",
13 |         "sphinxcontrib-bibtex",
14 |         "imageio",
15 |         "myst-parser",
16 |         "ipykernel",
17 |         "torch",
18 |         "tensorflow",
19 |     ],
20 |     "pytorch": ["torch"],
21 |     "tensorflow": ["tensorflow"],
22 | }
23 | with open("README.md", "r", encoding="utf-8") as fh:
24 |     long_description = fh.read()
25 | 
26 | setup(
27 |     name="mvtk",
28 |     version=__version__,
29 |     license="Apache-2.0",
30 |     author="Alex Eftimiades",
31 |     author_email="alexeftimiades@gmail.com",
32 |     description="Model validation toolkit",
33 |     long_description=long_description,
34 |     long_description_content_type="text/markdown",
35 |     packages=find_packages(),
36 |     classifiers=[
37 |         "Programming Language :: Python :: 3",
38 |         "License :: OSI Approved :: Apache Software License",
39 |         "Operating System :: MacOS",
40 |         "Operating System :: POSIX :: Linux",
41 |     ],
42 |     install_requires=[
43 |         "jax>=0.2.8,<=0.4.16",
44 |         "public>=2020.12.3",
45 |         "fastcore>=1.3.25",
46 |         "jaxlib>=0.1.23,<=0.4.16",
47 |         "scikit-learn",
48 |         "numpy",
49 |         "matplotlib",
50 |         "scipy",
51 |         "seaborn",
52 |         "pandas>=0.23.4",
53 |         "tqdm",
54 |         "ray",
55 |     ],
56 |     extras_require=extras_require,
57 |     url="https://finraos.github.io/model-validation-toolkit/",
58 |     project_urls={
59 |         "Bug Tracker": "https://github.com/FINRAOS/model-validation-toolkit/issues",
60 |     },
61 | )
62 | 


--------------------------------------------------------------------------------
/mvtk/sobol.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import public
 3 | 
 4 | 
 5 | def choose(x, N, nprng=None):
 6 |     if nprng is None:
 7 |         nprng = numpy.random.RandomState(0)
 8 |     return x[nprng.choice(numpy.arange(len(x), dtype="int"), N)]
 9 | 
10 | 
11 | @public.add
12 | def sobol(model, data, N=None, nprng=None):
13 |     """Total and first order Sobol sensitivity indices.
14 |     https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis.
15 | 
16 |     Args:
17 |         model (function): Maps data to scores
18 |         data (ndarray): Data matrix. Each row is a sample vector.
19 |         N (int): sample size for monte carlo estimate of sobol
20 |             indices. Should be less than or equal to the number of rows
21 |             of data. If None, entire dataset is used.
22 |         nprng (RandomState): Optional numpy RandomState.
23 |     returns:
24 |         Total and first order Sobol sensitivity indices. Each index
25 |         is expressed as an array of length equal to the number of
26 |         features in the supplied data matrix.
27 |     """
28 |     if nprng is None:
29 |         nprng = numpy.random.RandomState(0)
30 |     if N is None:
31 |         A = data.copy()
32 |         B = data.copy()
33 |         nprng.shuffle(A)
34 |         nprng.shuffle(B)
35 |         N = len(data)
36 |     elif N > len(data):
37 |         raise ValueError("Sample size must be less than or equal to size of dataset")
38 |     else:
39 |         A, B = (choose(data, N, nprng) for _ in range(2))
40 |     d = data.shape[1]
41 |     total = []
42 |     first_order = []
43 |     for i in range(d):
44 |         C = A[:, i].copy()
45 |         A[:, i] = B[:, i]
46 |         diff = model(A)
47 |         A[:, i] = C
48 |         diff -= model(A)
49 |         first_order.append(model(B).dot(diff) / N)
50 |         total.append(diff.dot(diff) / (2 * N))
51 |     variance_y = model(numpy.vstack((A, B))).std() ** 2
52 |     total = numpy.asarray(total) / variance_y
53 |     first_order = numpy.asarray(first_order) / variance_y
54 |     return total, first_order
55 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="docs/images/logo.svg" alt="drawing" width="250px"/>
 2 | 
 3 | [![CircleCI](https://circleci.com/gh/FINRAOS/model-validation-toolkit/tree/main.svg?style=svg)](https://circleci.com/gh/FINRAOS/model-validation-toolkit/tree/main)[![Join the chat at https://gitter.im/FINRAOS/model-validation-toolkit](https://badges.gitter.im/FINRAOS/model-validation-toolkit.svg)](https://gitter.im/FINRAOS/model-validation-toolkit?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)[![PyPI version](https://img.shields.io/pypi/v/mvtk)](https://pypi.org/project/mvtk/)
 4 | 
 5 | # Model Validation Tookit
 6 | 
 7 | ## Installation
 8 | 
 9 | Run `pip install mvtk`.
10 | 
11 | **Windows users**: Until [Jaxlib is supported on windows
12 | natively](https://github.com/google/jax/issues/438) you will need to either use
13 | this library from a Linux subsystem or within a Docker container.
14 | Alternatively, you can [build jaxlib from
15 | source](https://jax.readthedocs.io/en/latest/developer.html#additional-notes-for-building-jaxlib-from-source-on-windows).
16 | 
17 | ## Developers
18 | 
19 | Check out this repository and `cd` into the directory.
20 | 
21 | Run `pip install -e ".[doc]"`.
22 | 
23 | The `[doc]` is used to install dependencies for building documentation. You
24 | will need [pandoc](https://pandoc.org/) installed.
25 | 
26 | # Submodules
27 | You can import:
28 | 
29 | - `mvtk.credibility` for assessing credibility from sample size.
30 | - `mvtk.interprenet` for building interpretable neural nets.
31 | - `mvtk.thresholding` for adaptive thresholding.
32 | - `mvtk.sobol` for Sobol sensitivity analysis
33 | - `mvtk.supervisor` for divergence analysis
34 | - `mvtk.metrics` for specialised metrics
35 | - `mvtk.bias_variance` for bias-variance decomposition
36 | 
37 | # Documentation
38 | You can run `make -C docs html` on a Mac or `make.bat -C docs html` on a PC to just rebuild the docs. In this case, point your browser to ```docs/_build/html/index.html``` to view the homepage. If your browser was already pointing to documentation that you changed, you can refresh the page to see the changes.
39 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | Model Validation Toolkit
 2 | ===================================
 3 | 
 4 | The Model Validation Toolkit is a library for model validation, metaanalysis, and monitoring.
 5 | 
 6 | .. toctree::
 7 |     :glob:
 8 |     :maxdepth: 1
 9 |     :caption: Notes
10 | 
11 | .. toctree::
12 |     :maxdepth: 1
13 |     :caption: Overview
14 | 
15 |     quickstart
16 |     contributing
17 |     about
18 | 
19 | .. toctree::
20 |     :maxdepth: 1
21 |     :caption: User Guides
22 | 
23 |     supervisor_user_guide
24 |     credibility_user_guide
25 |     thresholding_user_guide
26 |     interprenet_user_guide
27 |     sobol_user_guide
28 |     bias_variance_user_guide
29 | 
30 | .. toctree::
31 |     :maxdepth: 1
32 |     :caption: Divergence Tutorials
33 |  
34 |     notebooks/divergence/Airlines
35 |     notebooks/divergence/DivergenceFunctions
36 |     notebooks/divergence/CategoricalColumns
37 |     notebooks/divergence/BugDetection
38 |     notebooks/divergence/TrainingDatasetDrift
39 | 
40 | .. toctree::
41 |     :maxdepth: 1
42 |     :caption: Credibility Tutorials
43 |  
44 |     notebooks/credibility/Credibility
45 | 
46 | .. toctree::
47 |     :maxdepth: 1
48 |     :caption: Thresholding Tutorials
49 | 
50 |     notebooks/thresholding/Thresholding
51 | 
52 | .. toctree::
53 |     :maxdepth: 1
54 |     :caption: Interprenet Tutorials
55 | 
56 |     notebooks/interprenet/Interprenet
57 | 
58 | .. toctree::
59 |     :maxdepth: 1
60 |     :caption: Bias and Metrics Tutorials
61 | 
62 |     notebooks/metrics/CounteringSampleBias
63 | 
64 | .. toctree::
65 |     :maxdepth: 1
66 |     :caption: Bias-Variance Decomposition Tutorials
67 | 
68 |     notebooks/bias_variance/BiasVarianceClassification
69 |     notebooks/bias_variance/BiasVarianceRegression
70 |     notebooks/bias_variance/BiasVarianceVisualization
71 | 
72 | .. toctree::
73 |     :maxdepth: 1
74 |     :caption: Python API
75 |  
76 |     supervisor
77 |     credibility
78 |     thresholding
79 |     interprenet
80 |     sobol
81 |     metrics
82 |     bias_variance
83 | 
84 | Indices and tables
85 | ==================
86 | 
87 | * :ref:`genindex`
88 | * :ref:`modindex`
89 | 


--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/utils.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import numpy
 3 | import scipy
 4 | import public
 5 | 
 6 | from mvtk.supervisor.utils import parallel
 7 | 
 8 | 
 9 | @public.add
10 | def get_drift_series(metric, baseline, test):
11 |     return numpy.asarray(parallel(lambda x: metric(x, baseline), test))
12 | 
13 | 
14 | @public.add
15 | def get_distance_matrix(metric, sample_distributions, show_progress=False):
16 |     distance_matrix = numpy.zeros((len(sample_distributions),) * 2)
17 |     for index, d in parallel(
18 |         lambda x: (x[0], metric(x[1][0], x[1][1])),
19 |         [
20 |             list(zip(*x))
21 |             for x in itertools.combinations(enumerate(sample_distributions), 2)
22 |         ],
23 |         show_progress=show_progress,
24 |     ):
25 |         distance_matrix[index] = d
26 |     distance_matrix += distance_matrix.T
27 |     return distance_matrix
28 | 
29 | 
30 | @public.add
31 | def sparse_wrapper(v):
32 |     class _SparseWrapper(type(v)):
33 |         def __getitem__(self, i):
34 |             ret = super().__getitem__(i)
35 |             if isinstance(i, int):
36 |                 return ret.toarray()[0]
37 |             return ret
38 | 
39 |         def __len__(self):
40 |             return self.shape[0]
41 | 
42 |     return _SparseWrapper(v)
43 | 
44 | 
45 | def to_array_like(v):
46 |     if hasattr(v, "values"):
47 |         return v.values
48 |     if isinstance(v, scipy.sparse.spmatrix):
49 |         return sparse_wrapper(v)
50 |     return v
51 | 
52 | 
53 | @public.add
54 | def arrayify(item):
55 |     """Convert the value to at least dim 3. If is dataframe it converts it to a
56 |     list of values.
57 | 
58 |     :param item: ndarray or a list of ndarray, or a dataframe, a series or a
59 |         list of dataframes or series
60 |     :return: a list of dataframes/series or array of dim 3
61 |     """
62 |     if hasattr(item, "shape"):
63 |         ret = to_array_like(item)
64 |         if len(ret.shape) == 2:
65 |             return [ret]
66 |         if len(ret.shape) == 1:
67 |             return numpy.atleast_3d(ret)
68 |     return list(map(to_array_like, item))
69 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/tensorflow_estimator_wrapper.py:
--------------------------------------------------------------------------------
 1 | from . import EstimatorWrapper
 2 | 
 3 | 
 4 | class TensorFlowEstimatorWrapper(EstimatorWrapper):
 5 |     def __init__(self, estimator):
 6 |         r"""Create a wrapper for a TensorFlow estimator
 7 | 
 8 |         Args:
 9 |             estimator: TensorFlow estimator instance
10 | 
11 |         Returns:
12 |             self
13 |         """
14 |         self.estimator = estimator
15 | 
16 |     def fit(self, X, y, **kwargs):
17 |         r"""Train the estimator
18 | 
19 |         Args:
20 |             X: features
21 |             y: ground truth labels
22 |             kwargs (optional): kwargs for use in training
23 | 
24 |         Returns:
25 |             self
26 |         """
27 |         self._reset_weights()
28 |         self.estimator.fit(X, y, **kwargs)
29 |         return self
30 | 
31 |     def predict(self, X, **kwargs):
32 |         r"""Get predictions from the estimator
33 | 
34 |         Args:
35 |             X: features
36 |             kwargs (optional): kwargs for use in predicting
37 | 
38 |         Returns:
39 |             self
40 |         """
41 |         predictions = self.estimator.predict(X, **kwargs)
42 |         prediction_list = []
43 |         for prediction in predictions:
44 |             if len(prediction) > 1:
45 |                 prediction_list.append(prediction.argmax().item())
46 |             else:
47 |                 prediction_list.append(prediction.item())
48 |         return prediction_list
49 | 
50 |     def _reset_weights(self):
51 |         r"""Reset weights of the estimator"""
52 |         import tensorflow as tf
53 | 
54 |         for layer in self.estimator.layers:
55 |             if hasattr(layer, "kernel_initializer") and hasattr(layer, "kernel"):
56 |                 layer.kernel.assign(layer.kernel_initializer(tf.shape(layer.kernel)))
57 |             if hasattr(layer, "bias_initializer") and hasattr(layer, "bias"):
58 |                 layer.bias.assign(layer.bias_initializer(tf.shape(layer.bias)))
59 |             if hasattr(layer, "recurrent_initializer") and hasattr(
60 |                 layer, "recurrent_kernal"
61 |             ):
62 |                 layer.recurrent_kernal.assign(
63 |                     layer.recurrent_initializer(tf.shape(layer.recurrent_kernal))
64 |                 )
65 | 


--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
 1 | import itertools
 2 | import numpy
 3 | 
 4 | from mvtk import metrics
 5 | 
 6 | 
 7 | def test_rank_auc():
 8 |     nprng = numpy.random.RandomState(0)
 9 |     S = 32
10 |     y_true, y_pred = nprng.randint(0, 5, S), nprng.uniform(size=S).round(1)
11 |     N = 0
12 |     auc = 0
13 |     for (true1, pred1), (true2, pred2) in itertools.product(
14 |         zip(y_true, y_pred), repeat=2
15 |     ):
16 |         if true1 > true2:
17 |             if pred1 == pred2:
18 |                 auc += 0.5
19 |             else:
20 |                 auc += pred1 > pred2
21 |             N += 1
22 |     auc /= N
23 |     assert metrics.rank_auc(y_true, y_pred) == auc
24 | 
25 | 
26 | def test_monotonicity():
27 |     nprng = numpy.random.RandomState(0)
28 |     S = 32
29 |     y_true, y_pred = nprng.randint(0, 5, S), nprng.uniform(size=S).round(1)
30 |     N = 0
31 |     auc = 0
32 |     for (true1, pred1), (true2, pred2) in itertools.product(
33 |         zip(y_true, y_pred), repeat=2
34 |     ):
35 |         if true1 - true2 == 1:
36 |             if pred1 == pred2:
37 |                 auc += 0.5
38 |             else:
39 |                 auc += pred1 > pred2
40 |             N += 1
41 |     auc /= N
42 |     assert metrics.monotonicity(y_true, y_pred) == auc
43 | 
44 | 
45 | def weighted_roc_auc(y_test, y_pred, weights):
46 |     def process(stuff):
47 |         (pos, w_p), (neg, w_n) = stuff
48 |         p = w_p * w_n
49 |         return p * (0.5 if pos == neg else pos > neg), p
50 | 
51 |     mask = y_test == 1
52 |     positives, w_pos = y_pred[mask], weights[mask]
53 |     negatives, w_neg = y_pred[~mask], weights[~mask]
54 |     numerator, denominator = map(
55 |         sum,
56 |         zip(
57 |             *map(
58 |                 process, itertools.product(zip(positives, w_pos), zip(negatives, w_neg))
59 |             )
60 |         ),
61 |     )
62 | 
63 |     return numerator / denominator
64 | 
65 | 
66 | def test_weighted_roc_auc():
67 |     nprng = numpy.random.RandomState(0)
68 |     S = 32
69 |     y_true, y_pred, weights = (
70 |         nprng.randint(0, 2, S),
71 |         nprng.uniform(size=S).round(1),
72 |         nprng.uniform(size=S),
73 |     )
74 |     assert (
75 |         abs(
76 |             weighted_roc_auc(y_true, y_pred, weights)
77 |             - metrics.rank_auc(y_true, y_pred, weights)
78 |         )
79 |         < 2**-32
80 |     )
81 | 


--------------------------------------------------------------------------------
/tests/bias_variance/estimators/test_sklearn_estimator_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.linear_model import LinearRegression
 3 | from sklearn.tree import DecisionTreeClassifier
 4 | 
 5 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper
 6 | 
 7 | 
 8 | def create_data():
 9 |     X_train = np.arange(12).reshape(6, 2)
10 |     y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
11 |     X_test = np.arange(6).reshape(3, 2)
12 |     y_test = np.array([0, 1, 1])
13 | 
14 |     return X_train, y_train, X_test, y_test
15 | 
16 | 
17 | def test_sklearn_estimator_wrapper():
18 |     X_train, y_train, X_test, y_test = create_data()
19 | 
20 |     model = LinearRegression()
21 | 
22 |     model.fit(X_train, y_train)
23 |     pred = model.predict(X_test)
24 | 
25 |     model_test = LinearRegression()
26 |     model_wrapped = SciKitLearnEstimatorWrapper(model_test)
27 | 
28 |     model_wrapped.fit(X_train, y_train)
29 |     pred_wrapped = model_wrapped.predict(X_test)
30 | 
31 |     assert np.array_equal(pred, pred_wrapped)
32 | 
33 | 
34 | def test_sklearn_estimator_wrapper_kwargs_fit():
35 |     X_train, y_train, X_test, y_test = create_data()
36 | 
37 |     model = DecisionTreeClassifier(random_state=123)
38 | 
39 |     model.fit(X_train, y_train, sample_weight=[0, 0, 1, 0, 1, 0])
40 |     pred = model.predict(X_test)
41 | 
42 |     model_test = DecisionTreeClassifier(random_state=123)
43 |     model_wrapped = SciKitLearnEstimatorWrapper(model_test)
44 | 
45 |     model_wrapped.fit(X_train, y_train, sample_weight=[0, 0, 1, 0, 1, 0])
46 |     pred_wrapped = model_wrapped.predict(X_test)
47 | 
48 |     assert np.array_equal(pred, pred_wrapped)
49 | 
50 | 
51 | def test_sklearn_estimator_wrapper_kwargs_predict():
52 |     X_train, y_train, X_test, y_test = create_data()
53 | 
54 |     model = DecisionTreeClassifier(random_state=123)
55 | 
56 |     model.fit(X_train, y_train)
57 |     try:
58 |         model.predict(X_test, check_input=False)
59 |     except ValueError as e:
60 |         assert e.args[0] == "X.dtype should be np.float32, got int64"
61 |         return
62 | 
63 |     model_test = DecisionTreeClassifier(random_state=123)
64 |     model_wrapped = SciKitLearnEstimatorWrapper(model_test)
65 | 
66 |     model_wrapped.fit(X_train, y_train)
67 |     try:
68 |         model_wrapped.predict(X_test, check_input=False)
69 |     except ValueError as e:
70 |         assert e.args[0] == "X.dtype should be np.float32, got int64"
71 |         return
72 | 
73 |     assert False
74 | 


--------------------------------------------------------------------------------
/docs/sobol_user_guide.rst:
--------------------------------------------------------------------------------
 1 | ###########
 2 | Sobol User Guide
 3 | ###########
 4 | 
 5 | **********
 6 | Motivation
 7 | **********
 8 | 
 9 | `Sensitivity analysis <https://en.wikipedia.org/wiki/Sensitivity_analysis>`_ is
10 | concerned with the degree to which uncertainty in the output of a model can be
11 | attributed to uncertainty in its inputs :cite:`saltelli2008global`. Variance
12 | based sensitivity analysis, commonly known as `sobol sensitivity analysis
13 | <https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis>`_ seeks to
14 | answer this question by attributing the variance of the output to variances in
15 | one or more inputs. This breakdown is known as a sobol indices and are typically measured
16 | in one of two ways: *first-order* indices and *total-effect* indices.
17 | :cite:`sobol2001global`.  
18 | 
19 | The first-order sobol index with respect to some feature is given by averaging
20 | the output of the model over all other values of all other features and
21 | computing the variance of the result while varying the feature in question.
22 | This is normalized by dividing by the total variance of the output measured by
23 | varying all feature values :cite:`im1993sensitivity`. Their sum is between 0 and 1. The total-effect index is computed by first computing the variance of the
24 | model output with respect to the feature in question, and then computing the
25 | expectation of the result over values of all other
26 | features. This is again normalized by the variance
27 | of the output of the model across all features.
28 | These will sum to a number greater than
29 | or equal to 1. Both are discussed in more detail
30 | here
31 | `https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis
32 | <https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis>`_.
33 | 
34 | .. currentmodule:: sobol
35 | 
36 | :meth:`sobol` takes a model and dataset, and runs a
37 | monte carlo simulation as described in the above
38 | link to compute the first and total order sobol
39 | indices. Each index is expressed as a one
40 | dimensional array of length equal to the number of
41 | features in the supplied data matrix. The model is
42 | assumed to be a function that outputs one scalar
43 | for each row of the data matrix.
44 | 
45 | .. code-block:: python
46 |     
47 |     import numpy
48 |     from mvtk import sobol
49 | 
50 |     nprng = numpy.random.RandomState(0)
51 | 
52 |     data = nprng.normal(size=(1000, 4)) # 4 features
53 |     model = lambda x: (x ** 2).dot([1, 2, 3, 4])
54 |     total, first_order = sobol.sobol(model, data, N=500)
55 | 
56 | .. bibliography:: refs.bib
57 |     :cited:
58 | 


--------------------------------------------------------------------------------
/docs/images/logo.svg:
--------------------------------------------------------------------------------
1 | <svg id="Logo:_Model_Validation_Toolkit" data-name="Logo: Model Validation Toolkit" xmlns="http://www.w3.org/2000/svg" width="44.005" height="28.283" viewBox="0 0 44.005 28.283">
2 |   <path id="Path_18" data-name="Path 18" d="M350.056,2.271,350,2.187l.234-.395.043-.063.516-.692.043-.05.473-.475.043-.036.431-.287.043-.021.39-.142.043-.009L352.645,0l.043,0,.387.1.043.018.387.219.043.031.387.334.043.043.431.5.043.056.514.763.043.07.6,1.092.043.085.731,1.572.043.1.946,2.326.043.111,1.508,4.047.043.118,1.634,4.338.043.109,1.076,2.577.043.1.86,1.823.043.085.817,1.509.043.074.731,1.158.043.063.731.983.043.053.774.861.043.043.732.66.043.035.817.583.043.027.774.426.043.02.774.324.043.015.817.251.043.011,1.16.24.043.007.775.106v.218l-.473-.005h-.043l-2.151-.048h-.043l-1.807-.111-.043,0-1.291-.162-.043-.007-.99-.2-.043-.011-.86-.256-.043-.014-.86-.355-.043-.021-.774-.426-.043-.027-.688-.483-.043-.033-.731-.637-.043-.042-.731-.783-.043-.051-.688-.886-.043-.06-.688-1.04-.043-.07-.774-1.36-.043-.081-.9-1.837-.043-.094-1.076-2.506-.043-.106-2.151-5.69-.043-.118-1.2-3.231-.043-.111-.9-2.238-.043-.1-.686-1.507-.043-.087-.559-1.055-.043-.075" transform="translate(-327.998)" fill="#5570bb"/>
3 |   <path id="Path_19" data-name="Path 19" d="M44,60.96H0v-.014l3.57-.1h.043l1.936-.189.043-.006,1.118-.208.043-.01L7.7,60.17l.043-.014.9-.364.043-.02.774-.421.043-.026.731-.509.043-.033.731-.637.043-.042.734-.782.043-.051.692-.886.043-.06.732-1.11.043-.07.688-1.209.043-.08L14.8,52.4l.043-.091.9-2.032.043-.1.9-2.248.043-.111,1.592-4.275.043-.118,1.506-4,.043-.109.989-2.344.043-.094.688-1.391.043-.079.3-.529h.043l.559,1.019.043.085.646,1.372.043.1.861,2.081.043.11L25.595,43.3l.043.118,1.807,4.834.043.11,1.162,2.808.043.1.947,2,.043.085.9,1.642.043.072.817,1.251.043.06.688.886.043.051.731.783.043.042.731.637.043.033.688.482.043.027.774.426.043.021.86.355.043.014.86.256.043.011.989.2.043.007,1.291.163.043,0,1.807.111h.043l2.151.048h.043l.473.005" transform="translate(0 -32.678)" fill="#1a3d9e"/>
4 |   <path id="Path_20" data-name="Path 20" d="M22.017,2.271l.054-.084-.234-.395-.043-.063-.516-.692-.043-.05L20.76.507,20.718.471,20.287.184,20.245.162,19.857.026,19.814.017,19.428,0l-.043,0L19,.106l-.043.018-.388.219-.043.031-.387.334-.043.043-.431.5-.043.056-.519.763-.041.068-.6,1.092-.043.085-.731,1.572-.043.1L14.7,7.31l-.043.111-1.5,4.048-.043.118-1.634,4.338-.043.109-1.076,2.577-.043.1-.86,1.823-.043.085-.817,1.509-.043.074-.731,1.158-.043.063-.731.983L7,24.455l-.774.861-.043.043-.732.659-.043.035-.817.583-.043.027-.774.426-.043.02-.774.324-.043.015-.817.251-.043.011-1.162.241-.043.007-.774.107v.218l.473-.005H.586l2.151-.048H2.78l1.807-.111.043,0,1.291-.162.043-.007.989-.2L7,27.734l.86-.256.043-.014.86-.355.043-.021.774-.426.043-.027.688-.483.043-.033.731-.637.043-.042.731-.783L11.9,24.6l.688-.886.043-.06.688-1.04.043-.07.774-1.36.043-.081.9-1.837.043-.094L16.2,16.672l.043-.106,2.151-5.69.043-.118,1.2-3.231.043-.111.9-2.236.043-.1.692-1.509.043-.087.559-1.055.043-.075" transform="translate(-0.066)" fill="#7f96d4"/>
5 | </svg>
6 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 | 
16 | sys.path.insert(0, os.path.abspath("../"))
17 | 
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = "Model Validation Toolkit"
22 | copyright = "2021, Model Validation Toolkit Team"
23 | author = "Model Validation Toolkit Team"
24 | 
25 | # The full version, including alpha/beta/rc tags
26 | release = "0.2.0"
27 | 
28 | 
29 | # -- General configuration ---------------------------------------------------
30 | 
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 |     "sphinx.ext.autodoc",
36 |     "sphinx.ext.autosummary",
37 |     "sphinxcontrib.bibtex",
38 |     "sphinx.ext.intersphinx",
39 |     "sphinx.ext.mathjax",
40 |     "sphinx.ext.napoleon",
41 |     "sphinx.ext.viewcode",
42 |     "nbsphinx",
43 |     "myst_parser",
44 | ]
45 | 
46 | # Add any paths that contain templates here, relative to this directory.
47 | templates_path = ["_templates"]
48 | 
49 | # List of patterns, relative to source directory, that match files and
50 | # directories to ignore when looking for source files.
51 | # This pattern also affects html_static_path and html_extra_path.
52 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
53 | 
54 | 
55 | # -- Options for HTML output -------------------------------------------------
56 | 
57 | # The theme to use for HTML and HTML Help pages.  See the documentation for
58 | # a list of builtin themes.
59 | #
60 | html_theme = "sphinx_rtd_theme"
61 | 
62 | # Add any paths that contain custom static files (such as style sheets) here,
63 | # relative to this directory. They are copied after the builtin static files,
64 | # so a file named "default.css" will overwrite the builtin "default.css".
65 | html_static_path = ["css", "images"]
66 | html_css_files = ["custom.css"]
67 | html_logo = "images/logo.svg"
68 | html_theme_options = {
69 |     "display_version": False,
70 | }
71 | html_favicon = html_logo
72 | 
73 | # A fix for Sphinx error contents.rst not found
74 | master_doc = "index"
75 | 
76 | # increase the timeout for long-running notebooks
77 | nbsphinx_timeout = 900
78 | 
79 | # Don't show full paths
80 | add_module_names = False
81 | 
82 | # bibtex
83 | bibtex_bibfiles = ["refs.bib"]
84 | 
85 | source_suffix = {
86 |     ".rst": "restructuredtext",
87 |     ".txt": "markdown",
88 |     ".md": "markdown",
89 | }
90 | 
91 | user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"
92 | 


--------------------------------------------------------------------------------
/tests/bias_variance/estimators/test_tensorflow_estimator_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | 
  4 | from mvtk.bias_variance.estimators import TensorFlowEstimatorWrapper
  5 | 
  6 | 
  7 | def create_data():
  8 |     X_train = np.arange(12).reshape(6, 2)
  9 |     y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
 10 |     X_test = np.arange(6).reshape(3, 2)
 11 |     y_test = np.array([0, 1, 1])
 12 | 
 13 |     return X_train, y_train, X_test, y_test
 14 | 
 15 | 
 16 | def create_model():
 17 |     model = tf.keras.Sequential(
 18 |         [
 19 |             tf.keras.layers.Dense(64, activation="relu"),
 20 |             tf.keras.layers.Dense(64, activation="relu"),
 21 |             tf.keras.layers.Dense(1),
 22 |         ]
 23 |     )
 24 | 
 25 |     model.compile(
 26 |         optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
 27 |         loss="mean_absolute_error",
 28 |         metrics=["mean_squared_error"],
 29 |     )
 30 | 
 31 |     return model
 32 | 
 33 | 
 34 | def predict(estimator, X, **kwargs):
 35 |     predictions = estimator.predict(X, **kwargs)
 36 |     prediction_list = []
 37 |     for prediction in predictions:
 38 |         if len(prediction) > 1:
 39 |             prediction_list.append(prediction.argmax().item())
 40 |         else:
 41 |             prediction_list.append(prediction.item())
 42 |     return prediction_list
 43 | 
 44 | 
 45 | def test_tensorflow_estimator_wrapper():
 46 |     X_train, y_train, X_test, y_test = create_data()
 47 | 
 48 |     tf.keras.utils.set_random_seed(123)
 49 |     model = create_model()
 50 | 
 51 |     model.fit(X_train, y_train)
 52 |     pred = predict(model, X_test)
 53 | 
 54 |     tf.keras.utils.set_random_seed(123)
 55 |     model_test = create_model()
 56 |     model_wrapped = TensorFlowEstimatorWrapper(model_test)
 57 | 
 58 |     model_wrapped.fit(X_train, y_train)
 59 |     pred_wrapped = model_wrapped.predict(X_test)
 60 | 
 61 |     assert np.array_equal(pred, pred_wrapped)
 62 | 
 63 | 
 64 | def test_tensorflow_estimator_wrapper_kwargs_fit():
 65 |     X_train, y_train, X_test, y_test = create_data()
 66 | 
 67 |     tf.keras.utils.set_random_seed(123)
 68 |     model = create_model()
 69 | 
 70 |     model.fit(X_train, y_train, epochs=10)
 71 |     pred = predict(model, X_test)
 72 | 
 73 |     tf.keras.utils.set_random_seed(123)
 74 |     model_test = create_model()
 75 |     model_wrapped = TensorFlowEstimatorWrapper(model_test)
 76 | 
 77 |     model_wrapped.fit(X_train, y_train, epochs=10)
 78 |     pred_wrapped = model_wrapped.predict(X_test)
 79 | 
 80 |     assert np.array_equal(pred, pred_wrapped)
 81 | 
 82 | 
 83 | def test_tensorflow_estimator_wrapper_kwargs_predict():
 84 |     X_train, y_train, X_test, y_test = create_data()
 85 | 
 86 |     tf.keras.utils.set_random_seed(123)
 87 |     model = create_model()
 88 | 
 89 |     model.fit(X_train, y_train)
 90 |     pred = predict(model, X_test, steps=10)
 91 | 
 92 |     tf.keras.utils.set_random_seed(123)
 93 |     model_test = create_model()
 94 |     model_wrapped = TensorFlowEstimatorWrapper(model_test)
 95 | 
 96 |     model_wrapped.fit(X_train, y_train)
 97 |     pred_wrapped = model_wrapped.predict(X_test, steps=10)
 98 | 
 99 |     assert np.array_equal(pred, pred_wrapped)
100 | 


--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
 1 | version: 2.1
 2 | 
 3 | jobs:
 4 |   test:
 5 |     docker:
 6 |       - image: cimg/python:3.8
 7 |     steps:
 8 |       - checkout
 9 |       - run: sudo apt-get update
10 |       - run: sudo apt-get install pandoc
11 |       - run: python -m pip install tox
12 |       - run: python -m tox
13 |       - run: ls -la docs
14 |       - persist_to_workspace:
15 |           root: docs
16 |           paths: html
17 |   docs-deploy:
18 |     docker:
19 |       - image: cimg/python:3.8
20 |     steps:
21 |       - run:
22 |           name: add known_hosts
23 |           command: |
24 |               mkdir ~/.ssh
25 |               printf "%s" 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==
26 |               github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
27 |               github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
28 |               bitbucket.org ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAubiN81eDcafrgMeLzaFPsw2kNvEcqTKl/VqLat/MaB33pZy0y3rJZtnqwR2qOOvbwKZYKiEO1O6VqNEBxKvJJelCq0dTXWT5pbO2gDXC6h6QDXCaHo6pOHGPUy+YBaGQRGuSusMEASYiWunYN0vCAI8QaXnWMXNMdFP3jHAJH0eDsoiGnLPBlBp4TNm6rYI74nMzgz3B9IikW4WVK+dc8KZJZWYjAuORU3jc1c/NPskD2ASinf8v3xnfXeukU0sJ5N6m5E8VLjObPEO+mN2t/FZTMZLiFqPWc/ALSqnMnnhwrNi2rbfg/rd/IpL8Le3pSBne8+seeFVBoGqzHM9yXw==
29 |               gitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY=
30 |               ' > ~/.ssh/known_hosts
31 |               chmod 0600 ~/.ssh/known_hosts
32 |       - run:
33 |           name: Checkout main
34 |           command: |
35 |               git clone $CIRCLE_REPOSITORY_URL --depth 1 -b main main
36 |               cd main
37 |               git config --global user.email "$(git log --format=%ae -n 1)"
38 |               git config --global user.name "$(git log --format=%an -n 1)"
39 |               echo "export msg="\"$(git log --format=%B -n 1)\" >> $BASH_ENV
40 |       - run:
41 |           name: Checkout website
42 |           command: |
43 |               git clone $CIRCLE_REPOSITORY_URL --depth 1 -b website website
44 |               rm -rf website/docs/html
45 |       - attach_workspace:
46 |           at: website/docs
47 |       - run:
48 |           name: Copy CircleCI config
49 |           command: |
50 |               mkdir -p website/.circleci
51 |               cp main/.circleci/config.yml website/.circleci/config.yml
52 |       - add_ssh_keys:
53 |           fingerprints:
54 |             - "dd:11:5d:b8:a7:d2:be:16:47:4e:a0:66:00:96:b4:f7"
55 |       - run:
56 |           name: Deploy docs to website branch
57 |           command: |
58 |               cd website
59 |               git add .circleci/config.yml
60 |               git add -A -- docs/html
61 |               git commit -am "$msg"
62 |               git push origin website
63 | workflows:
64 |   version: 2
65 |   build:
66 |     jobs:
67 |       - test:
68 |           filters:
69 |             branches:
70 |               ignore: website
71 |       - docs-deploy:
72 |           requires:
73 |             - test
74 |           filters:
75 |             branches:
76 |               only: main
77 | 


--------------------------------------------------------------------------------
/tests/supervisor/test_processing.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | 
  3 | import pandas as pd
  4 | import pandas.testing
  5 | 
  6 | from mvtk.supervisor.processing import (
  7 |     replace_nulls,
  8 |     normalize_ts_columns,
  9 | )
 10 | 
 11 | 
 12 | def test_replace_nulls():
 13 |     for col_list in [["col1"], ["col2"], ["col1", "col2"]]:
 14 |         init_rows = [
 15 |             {"col1": "test1_1", "col2": "test1_2"},
 16 |             {"col1": None, "col2": "test2_2"},
 17 |             {"col1": "test3_1", "col2": None},
 18 |             {"col1": None, "col2": None},
 19 |         ]
 20 | 
 21 |         expect_rows = copy.deepcopy(init_rows)
 22 | 
 23 |         for i in range(0, len(expect_rows)):
 24 |             for col in col_list:
 25 |                 if expect_rows[i][col] is None:
 26 |                     expect_rows[i][col] = "1"
 27 | 
 28 |         init_df = pd.DataFrame(init_rows)
 29 |         expect_df = pd.DataFrame(expect_rows)
 30 | 
 31 |         actual = replace_nulls(init_df, "1", col_list)
 32 |         expect = expect_df
 33 | 
 34 |         pandas.testing.assert_frame_equal(actual, expect)
 35 | 
 36 | 
 37 | def time_to_seconds(time):
 38 |     return int(time[:2]) * 3600 + int(time[2:4]) * 60 + int(time[4:6])
 39 | 
 40 | 
 41 | def test_process_ts_columns():
 42 |     format_map = {"col2": "%H:%M:%S.%f", "col3": "%H%M%S.%f", "col4": "%H%M%S"}
 43 | 
 44 |     for col_list in [
 45 |         ["col2"],
 46 |         ["col3"],
 47 |         ["col4"],
 48 |         ["col2", "col3"],
 49 |         ["col2", "col4"],
 50 |         ["col3", "col4"],
 51 |         ["col2", "col3", "col4"],
 52 |     ]:
 53 |         init_rows = [
 54 |             {
 55 |                 "col1": "test1",
 56 |                 "col2": "10:11:12.123456",
 57 |                 "col3": "101112.123456",
 58 |                 "col4": "101112",
 59 |             },
 60 |             {
 61 |                 "col1": "test2",
 62 |                 "col2": None,
 63 |                 "col3": "202123.123456",
 64 |                 "col4": "202124",
 65 |             },
 66 |             {
 67 |                 "col1": "test3",
 68 |                 "col2": "10:31:32.123456",
 69 |                 "col3": None,
 70 |                 "col4": "103134",
 71 |             },
 72 |             {
 73 |                 "col1": "test4",
 74 |                 "col2": "20:41:42.123456",
 75 |                 "col3": "204143.123456",
 76 |                 "col4": None,
 77 |             },
 78 |         ]
 79 | 
 80 |         expect_rows = copy.deepcopy(init_rows)
 81 | 
 82 |         for i in range(0, len(expect_rows)):
 83 |             for col in col_list:
 84 |                 if expect_rows[i][col] is None:
 85 |                     expect_rows[i][col] = -1
 86 |                 else:
 87 |                     expect_rows[i][col] = str(
 88 |                         round(
 89 |                             time_to_seconds(expect_rows[i][col].replace(":", ""))
 90 |                             / 86400,
 91 |                             5,
 92 |                         )
 93 |                     )
 94 | 
 95 |         init_df = pd.DataFrame(init_rows)
 96 |         expect = pd.DataFrame(expect_rows)
 97 | 
 98 |         for col in ["col2", "col3", "col4"]:
 99 |             init_df[col] = pd.to_datetime(init_df[col], format=format_map[col])
100 |             if col not in col_list:
101 |                 expect[col] = pd.to_datetime(expect[col], format=format_map[col])
102 |             else:
103 |                 expect[col] = expect[col].astype(float)
104 | 
105 |         actual = normalize_ts_columns(init_df, col_list)
106 | 
107 |         pandas.testing.assert_frame_equal(actual, expect)
108 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/pytorch_estimator_wrapper.py:
--------------------------------------------------------------------------------
  1 | from . import EstimatorWrapper
  2 | 
  3 | 
  4 | class PyTorchEstimatorWrapper(EstimatorWrapper):
  5 |     def __init__(
  6 |         self, estimator, optimizer_generator, loss_fn, fit_fn=None, predict_fn=None
  7 |     ):
  8 |         r"""Create a wrapper for a PyTorch estimator
  9 | 
 10 |         Args:
 11 |             estimator: PyTorch estimator instance
 12 |             optimizer_generator: generator function for the optimizer
 13 |             loss_fn: loss function
 14 |             fit_fn (optional): custom fit function to be called instead of default one
 15 |             predict_fn (optional): custom predict function to be called instead
 16 |                 of default one
 17 | 
 18 |         Returns:
 19 |             self
 20 |         """
 21 |         self.estimator = estimator
 22 |         self.optimizer_generator = optimizer_generator
 23 |         self.optimizer = optimizer_generator(estimator)
 24 |         self.loss_fn = loss_fn
 25 |         self.fit_fn = fit_fn
 26 |         self.predict_fn = predict_fn
 27 | 
 28 |     def fit(self, X, y, **kwargs):
 29 |         r"""Train the estimator
 30 | 
 31 |         Args:
 32 |             X: features
 33 |             y: ground truth labels
 34 |             kwargs (optional): kwargs for use in training
 35 | 
 36 |         Returns:
 37 |             self
 38 |         """
 39 |         self.estimator.apply(PyTorchEstimatorWrapper._reset_parameters)
 40 | 
 41 |         if self.fit_fn is not None:
 42 |             self.fit_fn(self, X, y, **kwargs)
 43 |             return self
 44 | 
 45 |         if kwargs.get("epochs") is None:
 46 |             epochs = 100
 47 |         else:
 48 |             epochs = kwargs.get("epochs")
 49 | 
 50 |         for i in range(epochs):
 51 |             loss = 0
 52 |             if kwargs.get("batch_size") is None:
 53 |                 batch_size = len(y)
 54 |             else:
 55 |                 batch_size = kwargs.get("batch_size")
 56 |             for j in range(0, len(y), batch_size):
 57 |                 batch_start = j
 58 |                 batch_end = j + batch_size
 59 |                 X_batch = X[batch_start:batch_end]
 60 |                 y_batch = y[batch_start:batch_end]
 61 |                 prediction = self.estimator(X_batch)
 62 |                 loss = self.loss_fn(prediction, y_batch)
 63 | 
 64 |                 self.optimizer.zero_grad()
 65 |                 loss.backward()
 66 |                 self.optimizer.step()
 67 |             if kwargs.get("verbose"):
 68 |                 print(f"epoch: {i:2} training loss: {loss.item():10.8f}")
 69 | 
 70 |         return self
 71 | 
 72 |     def predict(self, X, **kwargs):
 73 |         r"""Get predictions from the estimator
 74 | 
 75 |         Args:
 76 |             X: features
 77 |             kwargs (optional): kwargs for use in predicting
 78 | 
 79 |         Returns:
 80 |             self
 81 |         """
 82 |         if self.predict_fn is not None:
 83 |             return self.predict_fn(self, X, **kwargs)
 84 | 
 85 |         import torch
 86 | 
 87 |         prediction_list = []
 88 |         with torch.no_grad():
 89 |             for value in X:
 90 |                 prediction = self.estimator(value)
 91 |                 if len(prediction) > 1:
 92 |                     prediction_list.append(prediction.argmax().item())
 93 |                 else:
 94 |                     prediction_list.append(prediction.item())
 95 |         return prediction_list
 96 | 
 97 |     def _reset_parameters(self):
 98 |         r"""Reset parameters of the estimator"""
 99 |         if hasattr(self, "reset_parameters"):
100 |             self.reset_parameters()
101 | 


--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
 1 | Getting Started
 2 | ===============
 3 | 
 4 | Model Validation Toolkit is an open source library that provides various
 5 | tools for model validation, data quality checks, analysis of thresholding,
 6 | sensitivity analysis, and interpretable model development. The purpose of this
 7 | guide is to illustrate some of the main features that Model Validation Toolkit
 8 | provides. Please refer to the README for installation instructions.  
 9 | 
10 | Divergences
11 | ----------------------------------------
12 | 
13 | Model Validation Toolkit provides a fast and accurate means of assessing
14 | large scale statistical differences between datasets. Rather than checking
15 | whether two samples are identical, this check asserts that they are similar in
16 | a statistical sense and can be used for data quality checks and concept drift
17 | detection.
18 | 
19 | .. code-block:: python
20 |     
21 |     import numpy
22 |     from mvtk.supervisor.divergence import calc_tv
23 | 
24 |     nprng = numpy.random.RandomState(0)
25 | 
26 |     train = nprng.uniform(size=(1000, 4)) # 4 features
27 |     val = nprng.uniform(size=(1000, 4)) # 4 features
28 | 
29 |     # Close to 0 is similar; close to 1 is different
30 |     print(calc_tv(train, val))
31 | 
32 | See the :doc:`user guide <supervisor_user_guide>` for more information.
33 | 
34 | Credibility
35 | ----------------------------------------
36 | 
37 | .. currentmodule:: mvtk.credibility
38 | 
39 | Model Validation Toolkit provides a lightweight suite to assess credibility
40 | of model performance given a finite sample. Whether your validation set has
41 | several dozen or million records, you can quantify your confidence in
42 | performance using this module. For example, if a model correctly identifies 8
43 | of 10 images, its empirical accuracy is 80%. However, that does not mean we
44 | should be confident the accuracy could turn out to be lower if we had more
45 | data. We would obviously be more confident in this assessment if it identified
46 | 800 of 1000 images, but how much more so? With a few assumptions and
47 | :meth:`prob_below`, we can estimate the probability that the true accuracy
48 | would be less than 70% if we had more data.
49 | 
50 | .. code-block:: python
51 |     
52 |     from mvtk.credibility import prob_below
53 |     print(prob_below(8, 2, 0.7))
54 | 
55 | See the :doc:`user guide <credibility_user_guide>` for more information.
56 | 
57 | Thresholding
58 | ----------------------------------------
59 | 
60 | Model Validation Toolkit provides a module for determining and
61 | dynamically seta nd sample thresholds for binary classifiers that maximize a
62 | utility function. The general idea is to intelligently reassess false and true
63 | negative rates in a production system. See the :doc:`user guide
64 | <interprenet_user_guide>` for more information.
65 | 
66 | Sobol
67 | ----------------------------------------
68 | 
69 | .. currentmodule:: sobol
70 | 
71 | Model Validation Toolkit provides a lightweight module for `sobol
72 | sensitivity analysis
73 | <https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis>`_. This can
74 | be used to assess and quantify uncertainty of model outputs with respect to
75 | model inputs. The module currently supports first order and total sobol
76 | indexes--both which are computed and reported using :meth:`sobol`.
77 | 
78 | .. code-block:: python
79 |     
80 |     import numpy
81 |     from mvtk import sobol
82 | 
83 |     nprng = numpy.random.RandomState(0)
84 | 
85 |     data = nprng.normal(size=(1000, 4)) # 4 features
86 |     model = lambda x: (x ** 2).dot([1, 2, 3, 4])
87 |     total, first_order = sobol.sobol(model, data, N=500)
88 | 
89 | See the :doc:`user guide
90 | <sobol_user_guide>` for more information.
91 | 


--------------------------------------------------------------------------------
/tests/bias_variance/test_bias_variance_parallel.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from sklearn.tree import DecisionTreeClassifier
  4 | from sklearn.linear_model import Ridge
  5 | 
  6 | from mvtk.bias_variance import (
  7 |     bias_variance_compute_parallel,
  8 |     bias_variance_mse,
  9 |     bias_variance_0_1_loss,
 10 | )
 11 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper
 12 | 
 13 | 
 14 | def create_data():
 15 |     X_train = np.arange(12).reshape(6, 2)
 16 |     y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
 17 |     X_test = np.arange(6).reshape(3, 2)
 18 |     y_test = np.array([0, 1, 1])
 19 | 
 20 |     return X_train, y_train, X_test, y_test
 21 | 
 22 | 
 23 | def test_bias_variance_compute_parallel_mse():
 24 |     X_train, y_train, X_test, y_test = create_data()
 25 | 
 26 |     model = Ridge(random_state=123)
 27 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
 28 | 
 29 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
 30 |         model_wrapped,
 31 |         X_train,
 32 |         y_train,
 33 |         X_test,
 34 |         y_test,
 35 |         random_state=123,
 36 |         decomp_fn=bias_variance_mse,
 37 |     )
 38 | 
 39 |     assert np.round(avg_loss, decimals=12) == np.round(
 40 |         np.float64(0.3967829075484304), decimals=12
 41 |     )
 42 |     assert np.round(avg_bias, decimals=12) == np.round(
 43 |         np.float64(0.13298143583764407), decimals=12
 44 |     )
 45 |     assert np.round(avg_var, decimals=12) == np.round(
 46 |         np.float64(0.26380147171078644), decimals=12
 47 |     )
 48 |     assert np.round(net_var, decimals=12) == np.round(
 49 |         np.float64(0.26380147171078644), decimals=12
 50 |     )
 51 | 
 52 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
 53 |     assert avg_var == net_var
 54 | 
 55 | 
 56 | def test_bias_variance_calc_parallel_0_1():
 57 |     X_train, y_train, X_test, y_test = create_data()
 58 | 
 59 |     model = DecisionTreeClassifier(random_state=123)
 60 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
 61 | 
 62 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
 63 |         model_wrapped,
 64 |         X_train,
 65 |         y_train,
 66 |         X_test,
 67 |         y_test,
 68 |         random_state=123,
 69 |         decomp_fn=bias_variance_0_1_loss,
 70 |     )
 71 | 
 72 |     assert avg_loss == np.float64(0.4566666666666666)
 73 |     assert avg_bias == np.float64(0.3333333333333333)
 74 |     assert avg_var == np.float64(0.33499999999999996)
 75 |     assert net_var == np.float64(0.12333333333333332)
 76 | 
 77 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
 78 | 
 79 | 
 80 | def test_bias_variance_calc_parallel_mse_no_random_state():
 81 |     X_train, y_train, X_test, y_test = create_data()
 82 | 
 83 |     model = Ridge(random_state=123)
 84 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
 85 | 
 86 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
 87 |         model_wrapped,
 88 |         X_train,
 89 |         y_train,
 90 |         X_test,
 91 |         y_test,
 92 |         iterations=10,
 93 |         decomp_fn=bias_variance_mse,
 94 |     )
 95 | 
 96 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
 97 |     assert avg_var == net_var
 98 | 
 99 | 
100 | def test_bias_variance_calc_parallel_0_1_no_random_state():
101 |     X_train, y_train, X_test, y_test = create_data()
102 | 
103 |     model = DecisionTreeClassifier(random_state=123)
104 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
105 | 
106 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
107 |         model_wrapped,
108 |         X_train,
109 |         y_train,
110 |         X_test,
111 |         y_test,
112 |         iterations=10,
113 |         decomp_fn=bias_variance_0_1_loss,
114 |     )
115 | 
116 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
117 | 


--------------------------------------------------------------------------------
/docs/notebooks/interprenet/.ipynb_checkpoints/Periodic-checkpoint.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 160,
  6 |    "id": "699ea1b7",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import jax\n",
 11 |     "from sklearn.model_selection import train_test_split\n",
 12 |     "\n",
 13 |     "f = jax.numpy.cos\n",
 14 |     "n = 1000\n",
 15 |     "X_train = jax.numpy.linspace(-n * jax.numpy.pi, 0, 100 * n).reshape(-1, 1)\n",
 16 |     "y_train = f(X_train)\n",
 17 |     "\n",
 18 |     "\n",
 19 |     "X_test = jax.numpy.linspace(0, n * jax.numpy.pi, 100 * n).reshape(-1, 1)\n",
 20 |     "y_test = f(X_test)"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 161,
 26 |    "id": "f2d7f659",
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import jax\n",
 31 |     "\n",
 32 |     "from mvtk import interprenet\n",
 33 |     "\n",
 34 |     "init_params, model = interprenet.constrained_model(\n",
 35 |     "    (frozenset([interprenet.monotonic_constraint]),),\n",
 36 |     "    get_layers=lambda n: [n + 1],\n",
 37 |     "    preprocess=interprenet.identity,\n",
 38 |     "    postprocess=interprenet.identity)\n",
 39 |     "\n",
 40 |     "init_params = ((jax.numpy.asarray([0.]), jax.numpy.asarray([0.]),),\n",
 41 |     "               init_params)\n",
 42 |     "def scaled_model(params, x):\n",
 43 |     "    (m, b), model_params = params\n",
 44 |     "    u = jax.numpy.sin(x * jax.numpy.exp(m) + jax.numpy.arctan(b))\n",
 45 |     "    return model(model_params, u)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "id": "f7a0800c",
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "def loss(y, y_pred):\n",
 56 |     "    return ((y - y_pred) ** 2).mean()\n",
 57 |     "\n",
 58 |     "trained_params = interprenet.train((X_train, y_train),\n",
 59 |     "                                   (X_test, y_test),\n",
 60 |     "                                   (init_params, scaled_model),\n",
 61 |     "                                   metric=lambda y, y_pred: loss(y, y_pred),\n",
 62 |     "                                   step_size=0.01,\n",
 63 |     "                                   mini_batch_size=32,\n",
 64 |     "                                   loss_fn=loss,\n",
 65 |     "                                   num_epochs=128)"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "id": "1e7dad5d",
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "loss(y_test, y_test)"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "id": "ebae2803",
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "trained_model = lambda X: scaled_model(trained_params, X)\n",
 86 |     "y_pred = trained_model(X_test)\n",
 87 |     "loss(y_test, y_pred)"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "id": "5d4554e7",
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "import matplotlib\n",
 98 |     "import matplotlib.pyplot as pylab\n",
 99 |     "\n",
100 |     "q = 1000\n",
101 |     "pylab.plot(X_test[:q], y_test[:q])\n",
102 |     "pylab.plot(X_test[:q], y_pred[:q])\n",
103 |     "\n",
104 |     "pylab.show()"
105 |    ]
106 |   },
107 |   {
108 |    "cell_type": "code",
109 |    "execution_count": null,
110 |    "id": "b17de2c6",
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": []
114 |   }
115 |  ],
116 |  "metadata": {
117 |   "kernelspec": {
118 |    "display_name": "Python 3",
119 |    "language": "python",
120 |    "name": "python3"
121 |   },
122 |   "language_info": {
123 |    "codemirror_mode": {
124 |     "name": "ipython",
125 |     "version": 3
126 |    },
127 |    "file_extension": ".py",
128 |    "mimetype": "text/x-python",
129 |    "name": "python",
130 |    "nbconvert_exporter": "python",
131 |    "pygments_lexer": "ipython3",
132 |    "version": "3.8.8"
133 |   }
134 |  },
135 |  "nbformat": 4,
136 |  "nbformat_minor": 5
137 | }
138 | 


--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/nn.py:
--------------------------------------------------------------------------------
  1 | import jax
  2 | import public
  3 | 
  4 | from jax.example_libraries import stax
  5 | from jax._src.nn.initializers import glorot_normal, normal
  6 | from jax.example_libraries.stax import (
  7 |     Dense,
  8 |     FanInSum,
  9 |     FanOut,
 10 |     Identity,
 11 |     Relu,
 12 |     elementwise,
 13 | )
 14 | 
 15 | 
 16 | def ResBlock(*layers, fan_in=FanInSum, tail=Identity):
 17 |     """Split input, feed it through one or more layers in parallel, recombine
 18 |     them with a fan-in, apply a trailing layer (i.e. an activation)
 19 | 
 20 |     Args:
 21 |         *layers: a sequence of layers, each an (init_fun, apply_fun) pair.
 22 |         fan_in, optional: a fan-in to recombine the outputs of each layer
 23 |         tail, optional: a final layer to apply after recombination
 24 | 
 25 | 
 26 |     Returns:
 27 |         A new layer, meaning an (init_fun, apply_fun) pair, representing the
 28 |         parallel composition of the given sequence of layers fed into fan_in
 29 |         and then tail. In particular, the returned layer takes a sequence of
 30 |         inputs and returns a sequence of outputs with the same length as the
 31 |         argument `layers`.
 32 |     """
 33 |     return stax.serial(FanOut(len(layers)), stax.parallel(*layers), fan_in, tail)
 34 | 
 35 | 
 36 | @public.add
 37 | def Approximator(
 38 |     input_size,
 39 |     depth=3,
 40 |     width=None,
 41 |     output_size=1,
 42 |     linear=Dense,
 43 |     residual=True,
 44 |     activation=lambda x: x,
 45 |     rng=jax.random.PRNGKey(0),
 46 | ):
 47 |     r"""Basic Neural network based function
 48 |     :math:`\mathbb{R}^N\rightarrow\mathbb{R}^M` function approximator.
 49 | 
 50 |     Args:
 51 |         input_size (int): Size of input dimension.
 52 |         depth (int, optional): Depth of network. Defaults to ``3``.
 53 |         width (int, optional): Width of network. Defaults to ``input_size + 1``.
 54 |         output_size (int, optional): Number of outputs. Defaults to ``1``.
 55 |         linear (``torch.nn.Module``, optional): Linear layer drop in
 56 |             replacement. Defaults to ``jax.example_libraries.stax.Dense``.
 57 |         residual (bool, optional): Turn on ResNet blocks. Defaults to ``True``.
 58 |         activation (optional): A map from :math:`(-\infty, \infty)` to an
 59 |             appropriate domain (such as the domain of a convex conjugate).
 60 |             Defaults to the identity.
 61 |         rng (optional): Jax ``PRNGKey`` key. Defaults to `jax.random.PRNGKey(0)``.
 62 | 
 63 |     Returns:
 64 |         initial parameter values, neural network function
 65 |     """
 66 |     # input_size + output_size hidden hidden units is sufficient for universal
 67 |     # approximation given unconstrained depth even without ResBlocks.
 68 |     # https://arxiv.org/abs/1710.112780. With ResBlocks (as used below), only
 69 |     # one hidden unit is needed for Relu activation
 70 |     # https://arxiv.org/abs/1806.10909.
 71 |     if width is None:
 72 |         hidden = input_size + 1
 73 |     else:
 74 |         hidden = width
 75 |     if depth > 2:
 76 |         layers = [linear(hidden), Relu]
 77 |     else:
 78 |         layers = []
 79 |     for _ in range(depth - 2):
 80 |         if residual:
 81 |             layers.append(
 82 |                 ResBlock(stax.serial(linear(hidden), Relu), linear(hidden), tail=Relu)
 83 |             )
 84 |         else:
 85 |             layers.append(linear(hidden))
 86 |     layers.append(linear(output_size))
 87 |     layers.append(elementwise(activation))
 88 |     init_approximator_params, approximator = stax.serial(*layers)
 89 |     _, init_params = init_approximator_params(rng, (-1, input_size))
 90 |     return init_params, approximator
 91 | 
 92 | 
 93 | @public.add
 94 | def NormalizedLinear(out_dim, W_init=glorot_normal(), b_init=normal()):
 95 |     r"""Linear layer with positive weights with columns that sum to one."""
 96 | 
 97 |     def init_fun(rng, input_shape):
 98 |         output_shape = input_shape[:-1] + (out_dim,)
 99 |         k1, k2 = jax.random.split(rng)
100 |         W, b = W_init(k1, (input_shape[-1], out_dim)), b_init(k2, (out_dim,))
101 |         return output_shape, (W, b)
102 | 
103 |     def apply_fun(params, inputs, **kwargs):
104 |         W, b = params
105 |         W_normalized = W / jax.numpy.abs(W).sum(0)
106 |         return jax.numpy.dot(inputs, W_normalized) + b
107 | 
108 |     return init_fun, apply_fun
109 | 


--------------------------------------------------------------------------------
/mvtk/credibility.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import public
  3 | 
  4 | from sklearn.metrics import roc_auc_score
  5 | from scipy.stats import beta
  6 | 
  7 | 
  8 | @public.add
  9 | def credible_interval(positive, negative, credibility=0.5, prior=(1, 1)):
 10 |     """What is the shortest interval that contains probability(positive) with
 11 |     `credibility`% probability?
 12 | 
 13 |     Args:
 14 |         positive (int): number of times the first possible outcome has been seen
 15 |         negative (int): number of times the second possible outcome has been seen
 16 |         credibility (float): The probability that the true p(positive) is
 17 |             contained within the reported interval
 18 |         prior (tuple): psueodcount for positives and negatives
 19 | 
 20 |     returns:
 21 |         (lower bound, upper bound)
 22 |     """
 23 |     positive += prior[0]
 24 |     negative += prior[1]
 25 |     if not (positive > 1 or negative > 1):
 26 |         raise ValueError(
 27 |             "Credible intervals are only defined when at least one count + psueocount"
 28 |             " is greater than 1"
 29 |         )
 30 |     distribution = beta(positive, negative)
 31 |     mode = positive / (positive + negative)
 32 |     cdf_mode = distribution.cdf(mode)
 33 |     cred_2 = credibility / 2
 34 |     lower = cdf_mode - cred_2
 35 |     true_lower = max(lower, 0)
 36 |     excess = true_lower - lower
 37 |     upper = cdf_mode + cred_2 + excess
 38 |     true_upper = min(upper, 1)
 39 |     excess = upper - true_upper
 40 |     true_lower -= excess
 41 |     assert numpy.isclose((true_upper - true_lower), credibility)
 42 |     return distribution.ppf(true_lower), distribution.ppf(true_upper)
 43 | 
 44 | 
 45 | @public.add
 46 | def prob_below(positive, negative, cutoff, prior=(1, 1)):
 47 |     """What is the probability P(positive) is unacceptably low?
 48 | 
 49 |     Args:
 50 |         positive (int): number of times the positive outcome has been seen
 51 |         negative (int): number of times the negative outcome has been seen
 52 |         cutoff (float): lowest acceptable value of P(positive)
 53 |         prior (tuple): psueodcount for positives and negatives
 54 |     returns:
 55 |         Probability that P(positive) < cutoff
 56 |     """
 57 |     return beta(prior[0] + positive, prior[1] + negative).cdf(cutoff)
 58 | 
 59 | 
 60 | @public.add
 61 | def roc_auc_preprocess(positives, negatives, roc_auc):
 62 |     """ROC AUC analysis must be preprocessed using the number of positive and
 63 |     negative instances in the entire dataset and the AUC itself.
 64 | 
 65 |     Args:
 66 |         positives (int): number of positive instances in the dataset
 67 |         negatives (int): number of negative instances in the dataset
 68 |         roc_auc (float): ROC AUC
 69 |     returns:
 70 |         (positive, negative) tuple that can be used for `prob_below` and
 71 |             `credible_interval`
 72 |     """
 73 |     unique_combinations = positives * negatives
 74 |     # correctly ranked combinations are pairs of positives and negatives
 75 |     # instances where the model scored the positive instance higher than the
 76 |     # negative instance
 77 |     correctly_ranked_combinations = roc_auc * unique_combinations
 78 |     # the number of incorrectly ranked combinations is the number of
 79 |     # combinations that aren't correctly ranked
 80 |     incorrectly_ranked_combinations = (
 81 |         unique_combinations - correctly_ranked_combinations
 82 |     )
 83 |     return correctly_ranked_combinations, incorrectly_ranked_combinations
 84 | 
 85 | 
 86 | @public.add
 87 | def prob_greater_cmp(
 88 |     positive1,
 89 |     negative1,
 90 |     positive2,
 91 |     negative2,
 92 |     prior1=(1, 1),
 93 |     prior2=(1, 1),
 94 |     err=10**-5,
 95 | ):
 96 |     """Probability the first set comes from a distribution with a greater
 97 |     proportion of positive than the other.
 98 | 
 99 |     Args:
100 |         positive1 (int): number of positive instances in the first dataset
101 |         negative1 (int): number of negative instances in the first dataset
102 |         positive1 (int): number of positive instances in the second dataset
103 |         negative1 (int): number of negative instances in the second dataset
104 |         prior1 (tuple): psueodcount for positives and negatives
105 |         prior2 (tuple): psueodcount for positives and negatives
106 |         err (float): upper bound of frequentist sample std from monte carlo simulation.
107 |     """
108 |     nprng = numpy.random.RandomState(0)
109 |     distribution1 = beta(positive1 + prior1[0], negative1 + prior1[1])
110 |     distribution2 = beta(positive2 + prior2[0], negative2 + prior2[1])
111 |     # CLT implies ROC AUC error shrinks like 1/PN
112 |     # for P positives and N negatives
113 |     N = int(1 + 1 / (2 * err))
114 |     sample1 = distribution1.rvs(N, random_state=nprng)
115 |     sample2 = distribution2.rvs(N, random_state=nprng)
116 |     y = numpy.ones(2 * N)
117 |     y[N:] = 0
118 |     return roc_auc_score(y, numpy.concatenate((sample1, sample2)))
119 | 


--------------------------------------------------------------------------------
/tests/supervisor/test_divergence.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import mvtk.supervisor.divergence as divergence
  3 | 
  4 | from functools import partial
  5 | 
  6 | 
  7 | def mutually_exclusive_support_tester(metric, num_features=4, eps=0.1):
  8 |     data1 = numpy.ones((4, num_features))
  9 |     data1[:, :2] = 0
 10 |     data2 = 1 - data1
 11 |     assert numpy.isclose(metric([data1], [data1]), 0, atol=eps)
 12 |     assert numpy.isclose(metric([data2], [data2]), 0, atol=eps)
 13 |     assert numpy.isclose(metric([data1], [data2]), 1, atol=eps)
 14 | 
 15 | 
 16 | def get_batches(nprng, batch_size, n=2):
 17 |     """Pick a random binomial distribution Sample batch_size samples from
 18 |     it."""
 19 |     choices = numpy.arange(n)
 20 |     x = []
 21 |     alpha = nprng.rand(n)
 22 |     alpha /= alpha.sum()
 23 |     for d in range(batch_size):
 24 |         choice = nprng.choice(choices, p=alpha)
 25 |         z = numpy.zeros_like(choices)
 26 |         z[choice] = 1
 27 |         x.append(z)
 28 |     x = numpy.asarray(x).reshape(batch_size, n)
 29 |     return x, alpha
 30 | 
 31 | 
 32 | def divergence_tester(
 33 |     approximate_metric, analytical_metric, batch_sizes=[256] * 8, thresh=0.85
 34 | ):
 35 |     nprng = numpy.random.RandomState(0)
 36 |     batches, alphas = zip(*map(partial(get_batches, nprng), batch_sizes))
 37 |     assert (
 38 |         numpy.corrcoef(
 39 |             numpy.asarray([analytical_metric(alphas, alpha) for alpha in alphas]),
 40 |             divergence.utils.get_drift_series(
 41 |                 approximate_metric, batches, [[batch] for batch in batches]
 42 |             ),
 43 |         )[0, 1]
 44 |         > thresh
 45 |     )
 46 | 
 47 | 
 48 | def gaussian_test(approximate_metric, dim=1, N=1024, thresh=0.05):
 49 |     nprng = numpy.random.RandomState(0)
 50 |     m = approximate_metric(*nprng.normal(size=(2, 1, N, dim)))
 51 |     assert m < thresh
 52 |     assert m >= 0
 53 | 
 54 | 
 55 | def test_hl_gaussian():
 56 |     for dim in range(1, 4):
 57 |         gaussian_test(partial(divergence.calc_hl, train_test_split=0.5), dim)
 58 | 
 59 | 
 60 | def test_tv_gaussian():
 61 |     for dim in range(1, 4):
 62 |         gaussian_test(partial(divergence.calc_tv, train_test_split=0.5), dim)
 63 |         gaussian_test(
 64 |             partial(divergence.calc_tv_knn, k=64 * 2**dim),
 65 |             dim,
 66 |             N=1024 * 2**dim,
 67 |             thresh=0.1,
 68 |         )
 69 | 
 70 | 
 71 | def test_js_gaussian():
 72 |     for dim in range(1, 4):
 73 |         gaussian_test(partial(divergence.calc_js, train_test_split=0.5), dim)
 74 | 
 75 | 
 76 | def test_em_gaussian():
 77 |     for dim in range(1, 4):
 78 |         gaussian_test(
 79 |             partial(divergence.calc_em, train_test_split=0.5), dim, thresh=0.11
 80 |         )
 81 | 
 82 | 
 83 | def test_js_by_corr():
 84 |     def kl(alpha1, alpha2):
 85 |         return numpy.sum(alpha1 * numpy.log2(alpha1 / alpha2))
 86 | 
 87 |     def js(alpha1, alpha2):
 88 |         mean = alpha1 + alpha2
 89 |         mean /= 2
 90 |         ret = kl(alpha1, mean) + kl(alpha2, mean)
 91 |         return ret / 2
 92 | 
 93 |     divergence_tester(
 94 |         lambda *x: numpy.sqrt(divergence.calc_js_mle(*x)), lambda *x: numpy.sqrt(js(*x))
 95 |     )
 96 |     divergence_tester(
 97 |         lambda *x: numpy.sqrt(divergence.calc_js(*x)), lambda *x: numpy.sqrt(js(*x))
 98 |     )
 99 | 
100 | 
101 | def test_js_by_support():
102 |     mutually_exclusive_support_tester(divergence.calc_js_mle)
103 |     mutually_exclusive_support_tester(divergence.calc_js)
104 | 
105 | 
106 | def test_hl_by_corr():
107 |     def hl(alpha1, alpha2):
108 |         return numpy.sqrt(numpy.sum((numpy.sqrt(alpha1) - numpy.sqrt(alpha2)) ** 2) / 2)
109 | 
110 |     divergence_tester(divergence.calc_hl_mle, hl)
111 |     divergence_tester(divergence.calc_hl, hl)
112 | 
113 | 
114 | def test_hl_by_support():
115 |     mutually_exclusive_support_tester(divergence.calc_hl)
116 |     mutually_exclusive_support_tester(divergence.calc_hl_mle)
117 | 
118 | 
119 | def test_tv_by_corr():
120 |     def tv(alpha1, alpha2):
121 |         return numpy.abs(alpha1 - alpha2).sum() / 2
122 | 
123 |     divergence_tester(divergence.calc_tv_mle, tv)
124 |     divergence_tester(divergence.calc_tv, tv)
125 | 
126 | 
127 | def test_tv_by_support():
128 |     mutually_exclusive_support_tester(divergence.calc_tv_mle)
129 |     mutually_exclusive_support_tester(divergence.calc_tv)
130 | 
131 | 
132 | def test_em_by_support():
133 |     for num_features in range(1, 3):
134 |         data1 = numpy.zeros((4, num_features))
135 |         data2 = 1 - data1
136 |         eps = 0.125
137 |         assert numpy.isclose(divergence.calc_em([data1], [data1]), 0, atol=eps)
138 |         assert numpy.isclose(divergence.calc_em([data2], [data2]), 0, atol=eps)
139 |         assert numpy.isclose(divergence.calc_em([data1], [data2]), 1, atol=eps)
140 |         assert numpy.isclose(divergence.calc_em([data1], [2 * data2]), 2, atol=eps)
141 | 
142 | 
143 | def test_calc_tv_lower_bound():
144 |     a = numpy.asarray([0, 1, 0, 0, 1])
145 |     b = numpy.asarray([0.01, 0.98, 0.03, 0.04, 0.99])
146 |     log_loss = divergence.metrics.balanced_binary_cross_entropy(a, b)
147 |     tv = divergence.metrics.calc_tv_lower_bound(log_loss)
148 |     assert tv < 1 and tv > 0
149 | 


--------------------------------------------------------------------------------
/docs/interprenet_user_guide.rst:
--------------------------------------------------------------------------------
  1 | ###########
  2 | Interprenet User Guide
  3 | ###########
  4 | 
  5 | **********
  6 | Motivation
  7 | **********
  8 | 
  9 | Neural networks are generally difficult to interpret. While there
 10 | are tools that can help to interpret certain types of neural
 11 | networks such as image classifiers and language models,
 12 | interpretation of neural networks that simply ingest tabular data
 13 | and return a scalar value is generally limited to various measures of feature
 14 | importance. This can be problematic as what makes a feature "important" can
 15 | vary between use cases.
 16 | 
 17 | Rather than interpret a neural network as a black
 18 | box, we seek to constrain neural network in ways we
 19 | consider useful and interpretable. In particular,
 20 | The interprenet module currently has two such
 21 | constraints implemented:
 22 | 
 23 | * Monotonicity
 24 | * Lipschitz constraint
 25 | 
 26 | `Monotonic functions <https://en.wikipedia.org/wiki/Monotonic_function>`_
 27 | either always increase or decrease with their arguments but never both. This is
 28 | often an expected relationship between features and the model output. For
 29 | example, we may believe that increasing blood pressure increases risk of
 30 | cardiovascular disease. The exact relationship is not known, but we may believe
 31 | that it is monotonic.
 32 | 
 33 | `Lipschitz constraints
 34 | <https://en.wikipedia.org/wiki/Lipschitz_continuity>`_ constrain the
 35 | maximum rate of change of the model. This can make the model arbitrarily robust
 36 | `against adversarial perturbations
 37 | <http://karpathy.github.io/2015/03/30/breaking-convnets>`_
 38 | :cite:`anil2019sorting`.
 39 | 
 40 | 
 41 | How?
 42 | ====
 43 | 
 44 | All constraints are currently implemented as weight constraints. While
 45 | arbitrary weights are stored within each linear layer, the weights are
 46 | transformed before application so the network can satisfy is prescribed
 47 | constraints. Changes are backpropagated through this transformation.
 48 | Monotonic increasing neural networks are implemented by taking the absolute
 49 | value of weight matrices before applying them. When paired with a monotonically
 50 | increasing activation (such as ReLU, Sigmoid, or Tanh), this ensures the
 51 | gradient of the output with respect to any features is positive. This is
 52 | sufficient to ensure monotonicity with respect to the features.
 53 | 
 54 | Lipschitz constraints are enforced by dividing each weight vector by
 55 | its :math:`L^\infty` norm as described in :cite:`anil2019sorting`. This
 56 | constrains the :math:`L^\infty`-:math:`L^\infty` `operator norm
 57 | <https://en.wikipedia.org/wiki/Operator_norm>`_
 58 | of the weight matrix :cite:`tropp2004topics`. Constraining the
 59 | :math:`L^\infty`-:math:`L^\infty` operator norm of the weight
 60 | matrix ensures every element of the jacobian of the linear layers is less than
 61 | or equal to :math:`1`. Meanwhile, using activation functions with Lipschitz
 62 | constants of :math:`1` ensure the entire network is constrained to never have a
 63 | slope greater than :math:`1` for any of its features.
 64 | 
 65 | **********
 66 | Different Constraints on Different Features
 67 | **********
 68 | 
 69 | .. currentmodule:: mvtk.interprenet
 70 | 
 71 | :meth:`constrained_model` generates a neural network with one set of
 72 | constraints per feature. Constraints currently available are:
 73 | 
 74 | - :meth:`identity` (for no constraint)
 75 | - :meth:`monotonic_constraint`
 76 | - :meth:`lipschitz_constraint`
 77 | 
 78 | Features are grouped by the set of constraints applied to them, and
 79 | different constrained neural networks are generated for each group
 80 | of features. The outputs of those neural networks are concatenated
 81 | and fed into a final neural network constrained using all
 82 | constraints applied to all features. Since constraints on weight
 83 | matrices compose, they can be applied as a series of transformations
 84 | on the weights before application.
 85 | 
 86 | .. figure:: images/interprenet.png
 87 |     :width: 500px
 88 |     :align: center
 89 |     :height: 400px
 90 |     :alt: alternate text
 91 |     :figclass: align-center
 92 | 
 93 |     4 features with Lipschitz constraints and 4 features wtih
 94 |     monotonic constraints are fed to their respectively constrained
 95 |     neural networks. Intermediate outputs are concatenated and fed into a neural
 96 |     network with monotonic and lipschitz constraints.
 97 | 
 98 | We use the Sort function as a nonlinear activation as described in
 99 | :cite:`anil2019sorting`. The jacobian of this matrix is always a
100 | permutation matrix, which retains any Lipschitz and monotonicity
101 | constraints.
102 | 
103 | **********
104 | Preprocessing
105 | **********
106 | 
107 | Thus far, we have left out two important detail: How to constrain
108 | the Lipschitz constant to be something other than :math:`1`, and how
109 | to create monotonically decreasing networks. Both are a simple
110 | matter of preprocessing. The ``preprocess`` argument (defaulting to
111 | ``identity``), specifies a function to be applied to the feature
112 | vector before passing it to the neural network. For decreasing
113 | monotonic constraints, multiply the respective features by
114 | :math:`-1`. For a Lipschitz constant of :math:`L`, multiply the
115 | respective features by :math:`L`.
116 | 
117 | .. topic:: Tutorials:
118 | 
119 |     * :doc:`Interprenet <notebooks/interprenet/Interprenet>`
120 | 
121 | .. bibliography:: refs.bib
122 |     :cited:
123 | 


--------------------------------------------------------------------------------
/mvtk/metrics.py:
--------------------------------------------------------------------------------
  1 | import public
  2 | import numpy
  3 | import pandas
  4 | 
  5 | from scipy.stats import entropy
  6 | from sklearn.feature_selection import mutual_info_classif
  7 | 
  8 | 
  9 | def binarize(data, t):
 10 |     y_true, y_pred = data.values.T
 11 |     return y_true > t, y_pred
 12 | 
 13 | 
 14 | @public.add
 15 | def monotonicity(y_true, y_pred, weights=None):
 16 |     r"""Generalizes ROC AUC by computing
 17 |     :math:`P\left(\frac{\Delta\mathrm{y_pred}}{\Delta\mathrm{y_true}} >
 18 |     0\right)`, the probability incrementing ``y_true`` increases ``y_pred`` for
 19 |     a randomly chosen pair of instances. This reduces to ROC AUC when
 20 |     ``y_true`` has two unique values. Adapted from Algorithm 2 in `Fawcett, T.
 21 |     (2006). An introduction to ROC analysis. Pattern Recognition Letters,
 22 |     27(8), 861-874.
 23 |     <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_
 24 | 
 25 |     Args:
 26 |         y_true (list-like): Ground truth ordinal values
 27 |         y_pred (list-like): Predicted ordinal values
 28 |         weights (list-like): Sample weights. Will be normalized to one
 29 |             across each unique values of ``y_true``. If ``None`` (default) all
 30 |             samples are weighed equally.
 31 | 
 32 |     Returns:
 33 |        Float between 0 and 1. 0 indicates 100% chance of ``y_pred``
 34 |        decreasing upon incrementing ``y_true`` up to its next
 35 |        highest value in the dataset. 1 being a 100% chance of
 36 |        ``y_pred`` increasing for the same scenario. 0.5 would be 50%
 37 |        chance of either.
 38 |     """
 39 |     if weights is None:
 40 |         weights = numpy.ones(len(y_true))
 41 |     unique = numpy.unique(y_true)
 42 |     n = len(unique) - 1
 43 |     true_lookup = {u: i + 1 for i, u in enumerate(unique)}
 44 |     idx = numpy.argsort(-y_pred)
 45 |     y_true = y_true[idx]
 46 |     y_pred = y_pred[idx]
 47 |     weights = weights[idx]
 48 |     # fp, fp_prev, tp, tp_prev, auc
 49 |     data = numpy.zeros((5, n))
 50 |     prev_pred = numpy.full(n, numpy.nan)
 51 |     for true, pred, weight in zip(y_true, y_pred, weights):
 52 |         i = true_lookup[true]
 53 |         j = max(i - 2, 0)
 54 |         mask = pred != prev_pred[j:i]
 55 |         data[4, j:i][mask] += trap(*data[:4, j:i][:, mask])
 56 |         data[1:4:2, j:i][:, mask] = data[:4:2, j:i][:, mask]
 57 |         prev_pred[j:i] = pred
 58 |         i -= 1
 59 |         if i:
 60 |             data[2, j] += weight
 61 |         if i < n:
 62 |             data[0, i] += weight
 63 |     data[4] += trap(*data[:4])
 64 |     return numpy.sum(data[4]) / 2 / data[0].dot(data[2])
 65 | 
 66 | 
 67 | def trap(x2, x1, y2, y1):
 68 |     return (x2 - x1) * (y2 + y1)
 69 | 
 70 | 
 71 | @public.add
 72 | def rank_auc(y_true, y_pred, weights=None):
 73 |     r"""Generalizes ROC AUC by computing probability that two randomly chosen
 74 |     data points would be ranked consistently with ground truth labels. This
 75 |     reduces to ROC AUC when ``y_true`` has two unique values.
 76 |     Adapted from Algorithm 2 in `Fawcett, T. (2006). An introduction
 77 |     to ROC analysis. Pattern Recognition Letters, 27(8), 861-874.
 78 |     <https://www.sciencedirect.com/science/article/pii/S016786550500303X>`_
 79 | 
 80 |     Args:
 81 |         y_true (list-like): Ground truth ordinal values
 82 |         y_pred (list-like): Predicted ordinal values
 83 |         weights (list-like): Sample weights. Will be normalized to one
 84 |             across each unique values of ``y_true``. If ``None`` (default) all
 85 |             samples are weighed equally.
 86 | 
 87 |     Returns:
 88 |        Float between 0 and 1. 0 indicates 100% chance of ``y_pred``
 89 |        matching order of ``y_true``. 1 being a 100% chance of
 90 |        ``y_pred`` having the opposite order of ``y_true``. 0.5 would be 50%
 91 |        chance of either.
 92 |     """
 93 |     if weights is None:
 94 |         weights = numpy.ones(len(y_true))
 95 |     unique = numpy.unique(y_true)
 96 |     n = len(unique) - 1
 97 |     true_lookup = {u: i + 1 for i, u in enumerate(unique)}
 98 |     idx = numpy.argsort(-y_pred)
 99 |     y_true = y_true[idx]
100 |     y_pred = y_pred[idx]
101 |     weights = weights[idx]
102 |     # fp, fp_prev, tp, tp_prev, auc
103 |     data = numpy.zeros((5, n))
104 |     prev_pred = numpy.full(n, numpy.nan)
105 |     for true, pred, weight in zip(y_true, y_pred, weights):
106 |         i = true_lookup[true]
107 |         mask = pred != prev_pred[:i]
108 |         data[4, :i][mask] += trap(*data[:4, :i][:, mask])
109 |         data[1:4:2, :i][:, mask] = data[:4:2, :i][:, mask]
110 |         prev_pred[:i] = pred
111 |         i -= 1
112 |         data[2, :i] += weight
113 |         if i < n:
114 |             data[0, i] += weight
115 |     data[4] += trap(*data[:4])
116 |     return numpy.sum(data[4]) / 2 / data[0].dot(data[2])
117 | 
118 | 
119 | @public.add
120 | def normalized_mutual_info(X, y, **kwargs):
121 |     """Thin wrapper around `sklearn's mutual information
122 |     <https://scikit-learn.org/stable/modules/generated/sklearn.feature_selection.mutual_info_classif.html>`_.
123 |     This normalizes the result to 0-1 scale. ``y`` is assumed categorical.
124 |     """
125 |     _, counts = numpy.unique(y, return_counts=True)
126 |     return pandas.Series(
127 |         dict(
128 |             zip(
129 |                 X.columns,
130 |                 mutual_info_classif(X, y, **kwargs) / entropy(counts / counts.sum()),
131 |             )
132 |         )
133 |     )
134 | 


--------------------------------------------------------------------------------
/mvtk/supervisor/utils.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing
  2 | import sys
  3 | import time
  4 | import pandas as pd
  5 | import numpy as np
  6 | import public
  7 | 
  8 | from concurrent.futures import ThreadPoolExecutor, as_completed
  9 | from datetime import datetime
 10 | from typing import Collection
 11 | from typing import List
 12 | from itertools import combinations
 13 | from fastcore.imports import in_notebook
 14 | 
 15 | if in_notebook():
 16 |     from tqdm import tqdm_notebook as tqdm
 17 | else:
 18 |     from tqdm import tqdm
 19 | 
 20 | 
 21 | @public.add
 22 | def parallel(func, arr: Collection, max_workers=None, show_progress: bool = False):
 23 |     """
 24 |     NOTE: This code was adapted from the ``parallel`` function
 25 |         within Fastai's Fastcore library. Key differences include
 26 |         returning a list with order preserved.
 27 | 
 28 |     Run a function on a collection (list, set etc) of items
 29 |     :param func: The function to run
 30 |     :param arr: The collection to run on
 31 |     :param max_workers: How many workers to use. Will use
 32 |         multiprocessing.cpu_count() if this is not provided
 33 |     :return: a list of the results
 34 |     """
 35 |     if show_progress:
 36 |         progress_bar = tqdm(arr, smoothing=0, file=sys.stdout)
 37 |     results = []
 38 |     max_workers = multiprocessing.cpu_count() if max_workers is None else max_workers
 39 |     with ThreadPoolExecutor(max_workers=max_workers) as ex:
 40 |         future_to_index = {ex.submit(func, o): i for i, o in enumerate(arr)}
 41 |         for future in as_completed(future_to_index):
 42 |             results.append((future_to_index[future], future.result()))
 43 |             if show_progress:
 44 |                 progress_bar.update()
 45 |     results.sort()
 46 | 
 47 |     # Complete the progress bar if not complete
 48 |     if show_progress:
 49 |         for n in range(progress_bar.n, len(list(arr))):
 50 |             time.sleep(0.1)
 51 |             progress_bar.update()
 52 |     return [result for i, result in results]
 53 | 
 54 | 
 55 | @public.add
 56 | def column_indexes(df: pd.DataFrame, cols: List[str]):
 57 |     """
 58 | 
 59 |     :param df: The dataframe
 60 |     :param cols: a list of column names
 61 |     :return: The column indexes of the column names
 62 |     """
 63 |     return [df.columns.get_loc(col) for col in cols if col in df]
 64 | 
 65 | 
 66 | def format_date(date_str, dateformat="%b%d"):
 67 |     date = pd.to_datetime(date_str)
 68 |     return datetime.strftime(date, dateformat)
 69 | 
 70 | 
 71 | @public.add
 72 | def compute_divergence_crosstabs(
 73 |     data, datecol=None, format=None, show_progress=True, divergence=None
 74 | ):
 75 |     """Compute the divergence crosstabs.
 76 | 
 77 |     :param data: The data to compute the divergences on
 78 |     :param datecol: The column representing the date. If None, will
 79 |         use the index, if the index is a datetimeindex
 80 |     :param format: A function applied to datecol values for formatting
 81 |         e.g. ``format_date``
 82 |     :param show_progress: Whether the progress bar will be shown
 83 |     :param divergence: The divergence function to use
 84 |     """
 85 |     if datecol is None:
 86 |         datecol = data.index
 87 |     dates, subsets = zip(*data.groupby(datecol))
 88 |     dates = list(dates)
 89 |     subsets = (subset.drop(columns=[datecol]) for subset in subsets)
 90 | 
 91 |     return compute_divergence_crosstabs_split(
 92 |         subsets, dates, format, show_progress, divergence
 93 |     )
 94 | 
 95 | 
 96 | @public.add
 97 | def compute_divergence_crosstabs_split(
 98 |     subsets, dates, format=None, show_progress=True, divergence=None
 99 | ):
100 |     """Compute the divergence crosstabs.
101 | 
102 |     :param subsets: The data to compute the divergences on
103 |     :param dates: The list of dates for the subsets
104 |     :param format: A function applied to datecol values for formatting
105 |         e.g. ``format_date``
106 |     :param show_progress: Whether the progress bar will be shown
107 |     :param divergence: The divergence function to use
108 |     """
109 | 
110 |     # Create a divergence matrix
111 |     divergences = np.zeros((len(dates), len(dates)))
112 |     if not divergence:
113 |         from mvtk.supervisor.divergence import calc_tv
114 | 
115 |         divergence = calc_tv
116 | 
117 |     def compute_divergence(args):
118 |         return divergence(*args)
119 | 
120 |     for (i, j), v in zip(
121 |         combinations(range(len(dates)), 2),
122 |         parallel(
123 |             compute_divergence, combinations(subsets, 2), show_progress=show_progress
124 |         ),
125 |     ):
126 |         divergences[i, j] = divergences[j, i] = v
127 |     if format is None:
128 |         formatted = dates
129 |     else:
130 |         formatted = [format(d) for d in dates]
131 |     return pd.DataFrame(divergences, columns=formatted, index=formatted)
132 | 
133 | 
134 | @public.add
135 | def plot_divergence_crosstabs_3d(divergences):
136 |     """Plot the divergences in 3d.
137 | 
138 |     :params divergences: The list of divergences
139 |     """
140 |     import matplotlib.pyplot as plt
141 |     from mpl_toolkits.mplot3d import Axes3D  # noqa F401
142 | 
143 |     fig = plt.figure()
144 |     ax = fig.add_subplot(111, projection="3d")
145 | 
146 |     keys = list(divergences.keys())
147 |     indexes = range(len(keys))
148 | 
149 |     for i in indexes:
150 |         y = [x[1] for x in list(divergences[keys[i]].items())]
151 |         ax.bar(indexes, y, i, zdir="y", alpha=0.8)
152 | 
153 |     ax.set(xticks=indexes, xticklabels=keys, yticks=indexes, yticklabels=keys)
154 | 
155 |     return fig
156 | 
157 | 
158 | @public.add
159 | def split(x, train_ratio=0.5, nprng=np.random.RandomState(0)):
160 |     i = int(len(x) * train_ratio)
161 |     if hasattr(x, "shape"):
162 |         idx = np.arange(x.shape[0])
163 |         nprng.shuffle(idx)
164 |         x = x[idx]
165 |     else:
166 |         nprng.shuffle(x)
167 |     return x[:i], x[i:]
168 | 


--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
  1 | [comment]: <> (Adapted from JAX's contribution guide)
  2 | 
  3 | # Contributing
  4 | 
  5 | Everyone can contribute to Model Validation Toolkit, and we value everyone's contributions. There are several
  6 | ways to contribute, including:
  7 | 
  8 | 
  9 | - Answering questions on Model Validation Toolkit's [Gitter channel](https://gitter.im/FINRAOS/model-validation-toolkit)
 10 | - Improving or expanding Model Validation Toolkit's [documentation](https://finraos.github.io/model-validation-toolkit/docs/html/index.html)
 11 | - Contributing to Model Validation Toolkit's [code-base](https://github.com/FINRAOS/model-validation-toolkit/)
 12 | 
 13 | ## Ways to contribute
 14 | 
 15 | We welcome pull requests, in particular for those issues marked with
 16 | [contributions welcome](https://github.com/FINRAOS/model-validation-toolkit/issues?q=is%3Aopen+is%3Aissue+label%3A%22contributions+welcome%22) or
 17 | [good first issue](https://github.com/FINRAOS/model-validation-toolkit/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22).
 18 | 
 19 | For other proposals, we ask that you first open a GitHub
 20 | [Issue](https://github.com/FINRAOS/model-validation-toolkit/issues/new/choose) or
 21 | [Gitter channel](https://gitter.im/FINRAOS/model-validation-toolkit)
 22 | 
 23 | to seek feedback on your planned contribution.
 24 | 
 25 | ## Contributing code using pull requests
 26 | 
 27 | We do all of our development using git, so basic knowledge is assumed.
 28 | 
 29 | Follow these steps to contribute code:
 30 | 
 31 | 1. Fork the Model Validation Toolkit repository by clicking the **Fork** button on the
 32 |    [repository page](https://www.github.com/FINRAOS/model-validation-toolkit). This creates
 33 |    a copy of the Model Validation Toolkit repository in your own account.
 34 | 
 35 | 2. Install Python >=3.6 locally in order to run tests.
 36 | 
 37 | 3. `pip` installing your fork from source. This allows you to modify the code
 38 |    and immediately test it out:
 39 | 
 40 |    ```bash
 41 |    git clone https://github.com/YOUR_USERNAME/model-validation-toolkit
 42 |    cd model-validation-toolkit
 43 |    pip install -e .  # Installs Model Validation Toolkit from the current directory in editable mode.
 44 |    ```
 45 | 
 46 | 4. Add the Model Validation Toolkit repo as an upstream remote, so you can use it to sync your
 47 |    changes.
 48 | 
 49 |    ```bash
 50 |    git remote add upstream http://www.github.com/FINRAOS/model-validation-toolkit
 51 |    ```
 52 | 
 53 | 5. Create a branch where you will develop from:
 54 | 
 55 |    ```bash
 56 |    git checkout -b name-of-change
 57 |    ```
 58 | 
 59 |    And implement your changes using your favorite editor.
 60 | 
 61 | 6. Make sure the tests pass by running the following command from the top of
 62 |    the repository:
 63 | 
 64 |    ```bash
 65 |    pytest tests/
 66 |    ```
 67 | 
 68 |    If you know the specific test file that covers your changes, you can limit the tests to that; for example:
 69 | 
 70 |    ```bash
 71 |    pytest tests/supervisor
 72 |    ```
 73 | 
 74 |    Model Validation Toolkit also offers more fine-grained control over which particular tests are run;
 75 |    see {ref}`running-tests` for more information.
 76 | 
 77 | 7. Once you are satisfied with your change, create a commit as follows ([how to write a commit message](https://chris.beams.io/posts/git-commit/)):
 78 | 
 79 |    ```bash
 80 |    git add file1.py file2.py ...
 81 |    git commit -s -m "Your commit message"
 82 |    ```
 83 | 
 84 |    Please be sure to sign off your work when you commit it with the `-s` or, equivalently `--sign-off` flag to agree to our [DCO](https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/main/DCO).
 85 | 
 86 |    Then sync your code with the main repo:
 87 | 
 88 |    ```bash
 89 |    git fetch upstream
 90 |    git rebase upstream/main
 91 |    ```
 92 | 
 93 |    Finally, push your commit on your development branch and create a remote 
 94 |    branch in your fork that you can use to create a pull request from:
 95 | 
 96 |    ```bash
 97 |    git push --set-upstream origin name-of-change
 98 |    ```
 99 | 
100 | 8. Create a pull request from the Model Validation Toolkit repository and send it for review.
101 |    Check the {ref}`pr-checklist` for considerations when preparing your PR, and
102 |    consult [GitHub Help](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)
103 |    if you need more information on using pull requests.
104 | 
105 | ## Model Validation Toolkit pull request checklist
106 | 
107 | As you prepare a Model Validation Toolkit pull request, here are a few things to keep in mind:
108 | 
109 | ### DCO
110 | 
111 | By contributing to this project, you agree to our [DCO](https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/main/DCO).
112 | 
113 | ### Single-change commits and pull requests
114 | 
115 | A git commit ought to be a self-contained, single change with a descriptive
116 | message. This helps with review and with identifying or reverting changes if
117 | issues are uncovered later on.
118 | 
119 | Pull requests typically comprise a single git commit. In preparing a pull
120 | request for review, you may need to squash together multiple commits. We ask
121 | that you do this prior to sending the PR for review if possible. The `git
122 | rebase -i` command might be useful to this end.
123 | 
124 | ### Linting and Type-checking
125 | 
126 | Model Validation Toolkit uses [mypy](https://mypy.readthedocs.io/) and [flake8](https://flake8.pycqa.org/)
127 | to statically test code quality; the easiest way to run these checks locally is via
128 | the [pre-commit](https://pre-commit.com/) framework:
129 | 
130 | ```bash
131 | pip install pre-commit
132 | pre-commit run --all
133 | ```
134 | 
135 | ### Full GitHub test suite
136 | 
137 | Your PR will automatically be run through a full test suite on GitHub CI, which
138 | covers a range of Python versions, dependency versions, and configuration options.
139 | It's normal for these tests to turn up failures that you didn't catch locally; to
140 | fix the issues you can push new commits to your branch.
141 | 


--------------------------------------------------------------------------------
/docs/refs.bib:
--------------------------------------------------------------------------------
  1 | @article{sriperumbudur2009integral,
  2 |   title={On integral probability metrics,$\backslash$phi-divergences and binary classification},
  3 |   author={Sriperumbudur, Bharath K and Fukumizu, Kenji and Gretton, Arthur and Sch{\"o}lkopf, Bernhard and Lanckriet, Gert RG},
  4 |   journal={arXiv preprint arXiv:0901.2698},
  5 |   year={2009}
  6 | }
  7 | @inproceedings{nowozin2016f,
  8 |   title={f-gan: Training generative neural samplers using variational divergence minimization},
  9 |   author={Nowozin, Sebastian and Cseke, Botond and Tomioka, Ryota},
 10 |   booktitle={Advances in neural information processing systems},
 11 |   pages={271--279},
 12 |   year={2016}
 13 | }
 14 | @misc{yale598,
 15 |   author={Yihong Wu},
 16 |   title={Variational representation, HCR and CR lower bounds.},
 17 |   month={February},
 18 |   year={2016},
 19 |   publisher={Yale},
 20 |   url={http://www.stat.yale.edu/~yw562/teaching/598/lec06.pdf}
 21 | }
 22 | @article{csiszar2004information,
 23 |   title={Information theory and statistics: A tutorial},
 24 |   author={Csisz{\'a}r, Imre and Shields, Paul C and others},
 25 |   journal={Foundations and Trends{\textregistered} in Communications and Information Theory},
 26 |   volume={1},
 27 |   number={4},
 28 |   pages={417--528},
 29 |   year={2004},
 30 |   publisher={Now Publishers, Inc.}
 31 | }
 32 | @article{nguyen2010estimating,
 33 |   title={Estimating divergence functionals and the likelihood ratio by convex risk minimization},
 34 |   author={Nguyen, XuanLong and Wainwright, Martin J and Jordan, Michael I},
 35 |   journal={IEEE Transactions on Information Theory},
 36 |   volume={56},
 37 |   number={11},
 38 |   pages={5847--5861},
 39 |   year={2010},
 40 |   publisher={IEEE}
 41 | }
 42 | @article{topsoe2000some,
 43 |   title={Some inequalities for information divergence and related measures of discrimination},
 44 |   author={Topsoe, Flemming},
 45 |   journal={IEEE Transactions on information theory},
 46 |   volume={46},
 47 |   number={4},
 48 |   pages={1602--1609},
 49 |   year={2000},
 50 |   publisher={IEEE}
 51 | }
 52 | @article{gretton2012kernel,
 53 |   title={A kernel two-sample test},
 54 |   author={Gretton, Arthur and Borgwardt, Karsten M and Rasch, Malte J and Sch{\"o}lkopf, Bernhard and Smola, Alexander},
 55 |   journal={Journal of Machine Learning Research},
 56 |   volume={13},
 57 |   number={Mar},
 58 |   pages={723--773},
 59 |   year={2012}
 60 | }
 61 | @article{webb2016characterizing,
 62 |   title={Characterizing concept drift},
 63 |   author={Webb, Geoffrey I and Hyde, Roy and Cao, Hong and Nguyen, Hai Long and Petitjean, Francois},
 64 |   journal={Data Mining and Knowledge Discovery},
 65 |   volume={30},
 66 |   number={4},
 67 |   pages={964--994},
 68 |   year={2016},
 69 |   publisher={Springer}
 70 | }
 71 | @misc{vherrmann,
 72 |   author={Vincent Herrmann},
 73 |   title={Wasserstein GAN and the Kantorovich-Rubinstein Duality},
 74 |   month={February},
 75 |   year={2017},
 76 |   url={https://vincentherrmann.github.io/blog/wasserstein/}
 77 | }
 78 | @article{bellemare2017cramer,
 79 |   title={The cramer distance as a solution to biased wasserstein gradients},
 80 |   author={Bellemare, Marc G and Danihelka, Ivo and Dabney, Will and Mohamed, Shakir and Lakshminarayanan, Balaji and Hoyer, Stephan and Munos, R{\'e}mi},
 81 |   journal={arXiv preprint arXiv:1705.10743},
 82 |   year={2017}
 83 | }
 84 | @inproceedings{gulrajani2017improved,
 85 |   title={Improved training of wasserstein gans},
 86 |   author={Gulrajani, Ishaan and Ahmed, Faruk and Arjovsky, Martin and Dumoulin, Vincent and Courville, Aaron C},
 87 |   booktitle={Advances in neural information processing systems},
 88 |   pages={5767--5777},
 89 |   year={2017}
 90 | }
 91 | @article{arjovsky2017wasserstein,
 92 |   title={Wasserstein gan},
 93 |   author={Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on},
 94 |   journal={arXiv preprint arXiv:1701.07875},
 95 |   year={2017}
 96 | }
 97 | @phdthesis{tropp2004topics,
 98 |   title={Topics in sparse approximation},
 99 |   author={Tropp, Joel Aaron},
100 |   school={University of Texas at Austin},
101 |   year={2004}
102 | }
103 | @inproceedings{anil2019sorting,
104 |   title={Sorting out Lipschitz function approximation},
105 |   author={Anil, Cem and Lucas, James and Grosse, Roger},
106 |   booktitle={International Conference on Machine Learning},
107 |   pages={291--301},
108 |   year={2019},
109 |   organization={PMLR}
110 | }
111 | @article{sobol2001global,
112 |   title={Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates},
113 |   author={Sobol, Ilya M},
114 |   journal={Mathematics and computers in simulation},
115 |   volume={55},
116 |   number={1-3},
117 |   pages={271--280},
118 |   year={2001},
119 |   publisher={Elsevier}
120 | }
121 | @book{saltelli2008global,
122 |   title={Global sensitivity analysis: the primer},
123 |   author={Saltelli, Andrea and Ratto, Marco and Andres, Terry and Campolongo, Francesca and Cariboni, Jessica and Gatelli, Debora and Saisana, Michaela and Tarantola, Stefano},
124 |   year={2008},
125 |   publisher={John Wiley \& Sons}
126 | }
127 | @article{im1993sensitivity,
128 |   title={Sensitivity estimates for nonlinear mathematical models},
129 |   author={IM, Sobol’},
130 |   journal={Math. Model. Comput. Exp},
131 |   volume={1},
132 |   number={4},
133 |   pages={407--414},
134 |   year={1993}
135 | }
136 | @article{reid2009generalised,
137 |   title={Generalised pinsker inequalities},
138 |   author={Reid, Mark D and Williamson, Robert C},
139 |   journal={arXiv preprint arXiv:0906.1244},
140 |   year={2009}
141 | }
142 | @article{lin1991divergence,
143 |   title={Divergence measures based on the Shannon entropy},
144 |   author={Lin, Jianhua},
145 |   journal={IEEE Transactions on Information theory},
146 |   volume={37},
147 |   number={1},
148 |   pages={145--151},
149 |   year={1991},
150 |   publisher={IEEE}
151 | }
152 | @techreport{domingos2000decomp,
153 |   author={Domingos, Pedro},
154 |   title={A Unified Bias-Variance Decomposition and its Applications},
155 |   institution={University of Washington},
156 |   address={Seattle, WA},
157 |   month={January},
158 |   year={2000},
159 |   url={https://homes.cs.washington.edu/~pedrod/papers/mlc00a.pdf}
160 | }
161 | @misc{mlxtenddecomp,
162 |   author={Sebastian Raschka},
163 |   title={bias_variance_decomp: Bias-variance decomposition for classification and regression losses},
164 |   year={2014-2023},
165 |   url={https://rasbt.github.io/mlxtend/user_guide/evaluate/bias_variance_decomp/}
166 | }


--------------------------------------------------------------------------------
/mvtk/bias_variance/bias_variance_parallel.py:
--------------------------------------------------------------------------------
  1 | import ray
  2 | import numpy as np
  3 | import public
  4 | 
  5 | from sklearn.utils import resample
  6 | 
  7 | from . import bias_variance_mse, get_values, train_and_predict
  8 | 
  9 | 
 10 | def _prepare_X_and_y(X_train_values, y_train_values, prepare_X, prepare_y_train):
 11 |     return prepare_X(X_train_values), prepare_y_train(y_train_values)
 12 | 
 13 | 
 14 | @public.add
 15 | def bias_variance_compute_parallel(
 16 |     estimator,
 17 |     X_train,
 18 |     y_train,
 19 |     X_test,
 20 |     y_test,
 21 |     prepare_X=lambda x: x,
 22 |     prepare_y_train=lambda x: x,
 23 |     iterations=200,
 24 |     random_state=None,
 25 |     decomp_fn=bias_variance_mse,
 26 |     fit_kwargs=None,
 27 |     predict_kwargs=None,
 28 | ):
 29 |     r"""Compute the bias-variance decomposition in parallel
 30 | 
 31 |     Args:
 32 |         estimator (EstimatorWrapper): estimator wrapped with a class extending
 33 |             EstimatorWrapper
 34 |         X_train: features for training
 35 |         y_train: ground truth labels for training
 36 |         X_test: features for testing
 37 |         y_test: ground truth labels for testing
 38 |         prepare_X (function, optional): function to transform feature datasets before
 39 |             calling fit and predict methods
 40 |         prepare_y_train (function, optional): function to transform training ground
 41 |             truth labels before calling fit method
 42 |         iterations (int, optional): number of iterations for the training/testing
 43 |         random_state (int, optional): random state for bootstrap sampling
 44 |         decomp_fn (function, optional): bias-variance decomposition function
 45 |         fit_kwargs (dict, optional): kwargs to pass to the fit method
 46 |         predict_kwargs (dict, optional): kwargs to pass to the predict method
 47 | 
 48 |     Returns:
 49 |         (average loss, average bias, average variance, net variance)"""
 50 |     if predict_kwargs is None:
 51 |         predict_kwargs = {}
 52 |     if fit_kwargs is None:
 53 |         fit_kwargs = {}
 54 | 
 55 |     if isinstance(random_state, int):
 56 |         random_state = np.random.RandomState(seed=random_state)
 57 | 
 58 |     X_train_values = get_values(X_train)
 59 |     y_train_values = get_values(y_train)
 60 |     X_test_values = get_values(X_test)
 61 |     X_test_prepared = prepare_X(X_test_values)
 62 | 
 63 |     if random_state is None:
 64 |         result = [
 65 |             bootstrap_train_and_predict_ray.remote(
 66 |                 estimator,
 67 |                 X_train_values,
 68 |                 y_train_values,
 69 |                 X_test_prepared,
 70 |                 prepare_X,
 71 |                 prepare_y_train,
 72 |                 fit_kwargs,
 73 |                 predict_kwargs,
 74 |             )
 75 |             for _ in range(iterations)
 76 |         ]
 77 |     else:
 78 |         result = [
 79 |             train_and_predict_ray.remote(
 80 |                 estimator,
 81 |                 *_prepare_X_and_y(
 82 |                     *resample(
 83 |                         X_train_values, y_train_values, random_state=random_state
 84 |                     ),
 85 |                     prepare_X,
 86 |                     prepare_y_train
 87 |                 ),
 88 |                 X_test_prepared,
 89 |                 fit_kwargs,
 90 |                 predict_kwargs
 91 |             )
 92 |             for _ in range(iterations)
 93 |         ]
 94 | 
 95 |     predictions = np.array(ray.get(result))
 96 | 
 97 |     y_test_values = get_values(y_test)
 98 | 
 99 |     return decomp_fn(predictions, y_test_values)
100 | 
101 | 
102 | @ray.remote
103 | def train_and_predict_ray(
104 |     estimator,
105 |     X_train_values,
106 |     y_train_values,
107 |     X_test_prepared,
108 |     fit_kwargs=None,
109 |     predict_kwargs=None,
110 | ):
111 |     r"""Train an estimator and get predictions from it
112 | 
113 |     Args:
114 |         estimator (EstimatorWrapper): estimator wrapped with a class extending
115 |             EstimatorWrapper
116 |         X_train_values: numpy array of features for training
117 |         y_train_values: numpy array of ground truth labels for training
118 |         X_test_prepared: features for testing which has been processed by prepare_X
119 |             function
120 |         fit_kwargs (dict, optional): kwargs to pass to the fit method
121 |         predict_kwargs (dict, optional): kwargs to pass to the predict method
122 | 
123 |     Returns:
124 |         predictions"""
125 |     return train_and_predict(
126 |         estimator,
127 |         X_train_values,
128 |         y_train_values,
129 |         X_test_prepared,
130 |         fit_kwargs=fit_kwargs,
131 |         predict_kwargs=predict_kwargs,
132 |     )
133 | 
134 | 
135 | @ray.remote
136 | def bootstrap_train_and_predict_ray(
137 |     estimator,
138 |     X_train_values,
139 |     y_train_values,
140 |     X_test_prepared,
141 |     prepare_X=lambda x: x,
142 |     prepare_y_train=lambda x: x,
143 |     fit_kwargs=None,
144 |     predict_kwargs=None,
145 | ):
146 |     r"""Train an estimator using a bootstrap sample of the training data and get
147 |     predictions from it
148 | 
149 |     Args:
150 |         estimator (EstimatorWrapper): estimator wrapped with a class extending
151 |             EstimatorWrapper
152 |         X_train_values: numpy array of features for training
153 |         y_train_values: numpy array of ground truth labels for training
154 |         X_test_prepared: features for testing which has been processed by prepare_X
155 |             function
156 |         prepare_X (function, optional): function to transform feature datasets before
157 |             calling fit and predict methods
158 |         prepare_y_train (function, optional): function to transform train ground truth
159 |             labels before calling fit method
160 |         fit_kwargs (dict, optional): kwargs to pass to the fit method
161 |         predict_kwargs (dict, optional): kwargs to pass to the predict method
162 | 
163 |     Returns:
164 |         predictions"""
165 |     if predict_kwargs is None:
166 |         predict_kwargs = {}
167 |     if fit_kwargs is None:
168 |         fit_kwargs = {}
169 | 
170 |     X_sample, y_sample = resample(X_train_values, y_train_values)
171 | 
172 |     return train_and_predict(
173 |         estimator,
174 |         X_sample,
175 |         y_sample,
176 |         X_test_prepared,
177 |         prepare_X,
178 |         prepare_y_train,
179 |         fit_kwargs,
180 |         predict_kwargs,
181 |     )
182 | 


--------------------------------------------------------------------------------
/docs/credibility_user_guide.rst:
--------------------------------------------------------------------------------
  1 | ###########
  2 | Credibility User Guide
  3 | ###########
  4 | 
  5 | **********
  6 | Motivation
  7 | **********
  8 | 
  9 | Let's say we are training a model for medical diagnoses. Missing false negatives
 10 | is important and we have a hard requirement that a model's recall (proportion
 11 | of positive instances identified) must not fall below 70%. If someone validates
 12 | a model and reports a recall of 80%, are we clear? Well, maybe. It turns out
 13 | this data scientist had a validation set with 5 positive instances. The model
 14 | correctly identified 4 of them, giving it a recall of 80%. Would you trust
 15 | that? Of course not! You say that a larger sample size is needed. "How many do we
 16 | need?" they ask. This module will help answer that question.
 17 | 
 18 | How?
 19 | ====
 20 | 
 21 | There's two schools of thought for this problem. The `frequentist
 22 | <https://en.wikipedia.org/wiki/Frequentist_probability>`_ and the
 23 | `Bayesian <https://en.wikipedia.org/wiki/Bayesian_probability>`_ approaches.
 24 | In practice they tend to give similar results. Going back to our 5 sample
 25 | validation set, the frequentist would be concerned with how much our recall
 26 | would be expected to vary from one 5 sample hold out set to another. They would
 27 | want the hold out set to be large enough that you would not expect much change
 28 | in the estimated recall from one hold out set to another. The Bayesian approach
 29 | seeks to directly identify the probability that the recall would be lower than
 30 | 70% if the validation set were infinitely large. We believe this is a better
 31 | representation of the problem at hand, and designed the library around this
 32 | Bayesian approach.
 33 | 
 34 | 
 35 | ***********
 36 | Beta Distributions
 37 | ***********
 38 | 
 39 | Probability of Low Performance
 40 | =================
 41 | 
 42 | .. currentmodule:: mvtk.credibility
 43 | 
 44 | If you flip a coin 100 times, and it comes up heads 99 times, would you suspect
 45 | a biased coin? Probably. What about if you flipped it 5 times and saw 4 heads.
 46 | This is much less strange. Determining the bias of a coin embodies the core
 47 | principles behind determining whether many performance metrics are unacceptably
 48 | low.
 49 | 
 50 | If the coin *is* biased, how biased is it? In general, we'd say there's some
 51 | probability distribution over all possible biases. We would generally use a
 52 | `beta distribution <https://en.wikipedia.org/wiki/Beta_distribution>`_ to
 53 | model this distribution for good reasons. This distribution has two free
 54 | parameters: the number of heads and the number of tails. However, we generally
 55 | offset both of those numbers by 1 so the distribution for observed flips is
 56 | :math:`B(1, 1)` (with :math:`B` representing our beta distribution as a
 57 | function of heads and tails plus respective offsets), which as it turns out is
 58 | exactly a uniform distribution over all possible biases. In this sense, we can
 59 | express total uncertainty before taking measurements. The beta distribution
 60 | becomes more concentrated around the empirical proportion of heads as you take
 61 | more and more measurements. If, we were reasonably certain of a 60% bias, we
 62 | might offset the number of heads with a 6 and the number of tails with a 4.
 63 | Then we would start to expect an unbiased coin after observing 2 tails. This
 64 | offset is called the *prior* in Bayesian inference, and represents our
 65 | understanding before making any observations.  
 66 | 
 67 | .. math::
 68 |     B(\alpha, \beta)
 69 | 
 70 | .. figure:: images/Beta_distribution_pdf.svg
 71 |     :width: 800px
 72 |     :align: center
 73 |     :height: 400px
 74 |     :alt: alternate text
 75 |     :figclass: align-center
 76 | 
 77 |     Beta distribution for different :math:`alpha` (for heads plus offset) and
 78 |     :math:`\beta` (tails plus offset).
 79 | 
 80 | We integrate the area under :math:`B(\alpha,\beta)` from 0 to
 81 | :math:`p` to determine the probability that a coin's bias is less
 82 | than :math:`p`. This is effectively how :meth:`prob_below` works.
 83 | 
 84 | 
 85 | Credible Intervals
 86 | =================
 87 | 
 88 | Sometimes you just want a general sense of uncertainty for your sample
 89 | estimates. We use :meth:`credible_interval` to compute a `credible interval <https://en.wikipedia.org/wiki/Credible_interval>`_. This will give you the
 90 | smallest interval for which there is a `credibility` (kwarg argument that
 91 | defaults to :math:`0.5`) chance of the bias being within that region. It will
 92 | return a lower bound no less than :math:`0` and an upper bound no greater than :math:`1`.
 93 | This is subtly different from frequentist `confidence intervals
 94 | <https://en.wikipedia.org/wiki/Confidence_interval>`_. In our 5 sample
 95 | example, the latter reports an interval that is expected to contain `p` (often
 96 | chosen to be 95%) all such 5 sample estimates of the mean.
 97 | 
 98 | **********
 99 | Common Metrics
100 | **********
101 | Many performance metrics used for binary
102 | classification follow the same mechanics as the
103 | analysis above. This following is not an exhaustive
104 | list of performance metrics that can be readily
105 | translated into a biased coin scenario in which we
106 | wish to determine heads / (heads + tails).
107 | 
108 | * Precision: true positive / (true positive + false positive)
109 | * Recall: true positive / (true positive + false negative)
110 | * Accuracy: correctly identified / (correctly identified + incorrectly identified)
111 | 
112 | 
113 | ROC AUC
114 | =================
115 | 
116 | `ROC AUC
117 | <https://en.wikipedia.org/wiki/Receiver_operating_characteristic>`_
118 | is an extremely useful measure for binary classification. Like many
119 | other measures of performance for binary classification, it can be
120 | expressed as a proportion of outcomes. However,
121 | unlike other measures of performance, it does not
122 | make use of a threshold. This ultimately makes it a
123 | ranking metric, as it characterizes the degree to
124 | which positive instances are scored higher than
125 | negative instances. However, like other metrics, it
126 | can be expressed as an empirical measure of a
127 | proportion. Specifically, ROC AUC is the proportion
128 | of pairs of positive and negative examples such
129 | that the positive example is scored higher than the
130 | negative one. This can be expressed as 
131 | 
132 | .. math::
133 |     \frac{1}{NM}\sum\limits_{n,m}^{N,M} \mathrm{score}(\mathrm{Positive}_n) > \mathrm{score}(\mathrm{Negative}_m)
134 | 
135 | However, computing the area under the receiver
136 | operating characteristic is a more computationally
137 | efficient means of computing the same quantity.
138 | :meth:`roc_auc_preprocess` will convert a positive and negative
139 | sample count to an associated count of correctly and incorrectly
140 | ranked pairs of positive and negative instances using the ROC AUC
141 | score. This pair of numbers can be used as arguments for
142 | :meth:`prob_below` and :meth:`credible_interval`.
143 | 
144 | .. topic:: Tutorials:
145 | 
146 |     * :doc:`Credibility <notebooks/credibility/Credibility>`
147 | 


--------------------------------------------------------------------------------
/tests/bias_variance/estimators/test_pytorch_estimator_wrapper.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | from torch import nn
  4 | 
  5 | from mvtk.bias_variance.estimators import PyTorchEstimatorWrapper
  6 | 
  7 | 
  8 | class ModelPyTorch(nn.Module):
  9 |     def __init__(self):
 10 |         super().__init__()
 11 |         self.linear1 = nn.Linear(2, 8)
 12 |         self.linear2 = nn.Linear(8, 1)
 13 | 
 14 |     def forward(self, x):
 15 |         x = self.linear1(x)
 16 |         x = self.linear2(x)
 17 |         return x
 18 | 
 19 | 
 20 | def create_data():
 21 |     X_train = np.arange(12).reshape(6, 2)
 22 |     y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
 23 |     X_test = np.arange(6).reshape(3, 2)
 24 |     y_test = np.array([0, 1, 1])
 25 | 
 26 |     return X_train, y_train, X_test, y_test
 27 | 
 28 | 
 29 | def create_model():
 30 |     model_pytorch = ModelPyTorch()
 31 |     optimizer = torch.optim.Adam(model_pytorch.parameters(), lr=0.001)
 32 |     loss_fn = nn.MSELoss()
 33 | 
 34 |     return model_pytorch, optimizer, loss_fn
 35 | 
 36 | 
 37 | def optimizer_gen(x):
 38 |     return torch.optim.Adam(x.parameters(), lr=0.001)
 39 | 
 40 | 
 41 | def reset_parameters(x):
 42 |     if hasattr(x, "reset_parameters"):
 43 |         x.reset_parameters()
 44 | 
 45 | 
 46 | def fit(estimator, optimizer, loss_fn, X, y, epochs=10, batch_size=None):
 47 |     for i in range(epochs):
 48 |         if batch_size is None:
 49 |             batch_size = len(y)
 50 |         for j in range(0, len(y), batch_size):
 51 |             batch_start = j
 52 |             batch_end = j + batch_size
 53 |             X_batch = X[batch_start:batch_end]
 54 |             y_batch = y[batch_start:batch_end]
 55 |             prediction = estimator(X_batch)
 56 |             loss = loss_fn(prediction, y_batch)
 57 | 
 58 |             optimizer.zero_grad()
 59 |             loss.backward()
 60 |             optimizer.step()
 61 | 
 62 | 
 63 | def custom_fit(self, X, y, epochs=10, batch_size=None):
 64 |     for i in range(epochs):
 65 |         if batch_size is None:
 66 |             batch_size = len(y)
 67 |         for j in range(0, len(y), batch_size):
 68 |             batch_start = j
 69 |             batch_end = j + batch_size
 70 |             X_batch = X[batch_start:batch_end]
 71 |             y_batch = y[batch_start:batch_end]
 72 |             prediction = self.estimator(X_batch)
 73 |             loss = self.loss_fn(prediction, y_batch)
 74 | 
 75 |             self.optimizer.zero_grad()
 76 |             loss.backward()
 77 |             self.optimizer.step()
 78 | 
 79 | 
 80 | def predict(estimator, X, custom_test=False):
 81 |     if custom_test:
 82 |         return [1, 0, 1]
 83 | 
 84 |     prediction_list = []
 85 |     with torch.no_grad():
 86 |         for value in X:
 87 |             prediction = estimator(value)
 88 |             if len(prediction) > 1:
 89 |                 prediction_list.append(prediction.argmax().item())
 90 |             else:
 91 |                 prediction_list.append(prediction.item())
 92 |     return prediction_list
 93 | 
 94 | 
 95 | def custom_predict(estimator, X):
 96 |     return [1, 0, 1]
 97 | 
 98 | 
 99 | def test_pytorch_estimator_wrapper():
100 |     torch.use_deterministic_algorithms(True)
101 | 
102 |     X_train, y_train, X_test, y_test = create_data()
103 | 
104 |     X_train_torch = torch.FloatTensor(X_train)
105 |     X_test_torch = torch.FloatTensor(X_test)
106 |     y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
107 | 
108 |     torch.manual_seed(123)
109 |     model, optimizer, loss_fn = create_model()
110 | 
111 |     model.apply(reset_parameters)
112 |     fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=100)
113 |     pred = predict(model, X_test_torch)
114 | 
115 |     torch.manual_seed(123)
116 |     model_test, optimizer_test, loss_fn_test = create_model()
117 |     model_wrapped = PyTorchEstimatorWrapper(model_test, optimizer_gen, loss_fn_test)
118 | 
119 |     model_wrapped.fit(X_train_torch, y_train_torch)
120 |     pred_wrapped = model_wrapped.predict(X_test_torch)
121 | 
122 |     assert np.array_equal(pred, pred_wrapped)
123 | 
124 | 
125 | def test_pytorch_estimator_wrapper_kwargs_fit():
126 |     torch.use_deterministic_algorithms(True)
127 | 
128 |     X_train, y_train, X_test, y_test = create_data()
129 | 
130 |     X_train_torch = torch.FloatTensor(X_train)
131 |     X_test_torch = torch.FloatTensor(X_test)
132 |     y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
133 | 
134 |     torch.manual_seed(123)
135 |     model, optimizer, loss_fn = create_model()
136 | 
137 |     model.apply(reset_parameters)
138 |     fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=5)
139 |     pred = predict(model, X_test_torch)
140 | 
141 |     torch.manual_seed(123)
142 |     model_test, optimizer_test, loss_fn_test = create_model()
143 |     model_wrapped = PyTorchEstimatorWrapper(model_test, optimizer_gen, loss_fn_test)
144 | 
145 |     model_wrapped.fit(X_train_torch, y_train_torch, epochs=5)
146 |     pred_wrapped = model_wrapped.predict(X_test_torch)
147 | 
148 |     assert np.array_equal(pred, pred_wrapped)
149 | 
150 | 
151 | def test_pytorch_estimator_wrapper_custom_fit():
152 |     torch.use_deterministic_algorithms(True)
153 | 
154 |     X_train, y_train, X_test, y_test = create_data()
155 | 
156 |     X_train_torch = torch.FloatTensor(X_train)
157 |     X_test_torch = torch.FloatTensor(X_test)
158 |     y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
159 | 
160 |     torch.manual_seed(123)
161 |     model, optimizer, loss_fn = create_model()
162 | 
163 |     model.apply(reset_parameters)
164 |     fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=10)
165 |     pred = predict(model, X_test_torch)
166 | 
167 |     torch.manual_seed(123)
168 |     model_test, optimizer_test, loss_fn_test = create_model()
169 |     model_wrapped = PyTorchEstimatorWrapper(
170 |         model_test, optimizer_gen, loss_fn_test, fit_fn=custom_fit
171 |     )
172 | 
173 |     model_wrapped.fit(X_train_torch, y_train_torch)
174 |     pred_wrapped = model_wrapped.predict(X_test_torch)
175 | 
176 |     assert np.array_equal(pred, pred_wrapped)
177 | 
178 | 
179 | def test_pytorch_estimator_wrapper_custom_predict():
180 |     torch.use_deterministic_algorithms(True)
181 | 
182 |     X_train, y_train, X_test, y_test = create_data()
183 | 
184 |     X_train_torch = torch.FloatTensor(X_train)
185 |     X_test_torch = torch.FloatTensor(X_test)
186 |     y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
187 | 
188 |     torch.manual_seed(123)
189 |     model, optimizer, loss_fn = create_model()
190 | 
191 |     model.apply(reset_parameters)
192 |     fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=100)
193 |     pred = predict(model, X_test_torch, custom_test=True)
194 | 
195 |     torch.manual_seed(123)
196 |     model_test, optimizer_test, loss_fn_test = create_model()
197 |     model_wrapped = PyTorchEstimatorWrapper(
198 |         model_test, optimizer_gen, loss_fn_test, predict_fn=custom_predict
199 |     )
200 | 
201 |     model_wrapped.fit(X_train_torch, y_train_torch)
202 |     pred_wrapped = model_wrapped.predict(X_test_torch)
203 | 
204 |     assert np.array_equal(pred, pred_wrapped)
205 | 


--------------------------------------------------------------------------------
/mvtk/thresholding.py:
--------------------------------------------------------------------------------
  1 | import public
  2 | import bisect
  3 | import numpy
  4 | import matplotlib.pylab as plt
  5 | 
  6 | from functools import reduce
  7 | 
  8 | 
  9 | @public.add
 10 | def plot_err(scores, utility_mean, utility_err, color=None, label=None, alpha=0.5):
 11 |     plt.plot(scores, utility_mean, color=color)
 12 |     plt.fill_between(scores, *utility_err, alpha=alpha, color=color, label=label)
 13 | 
 14 | 
 15 | @public.add
 16 | def expected_utility(utility, data, N=4096, credibility=0.5):
 17 |     """Get the utility distribution over possible thresholds.
 18 | 
 19 |     Args:
 20 |         utility (function): utility function that ingests true/false
 21 |             positive/negative rates.
 22 |         data (list-like): iterable of list-likes of the form (ground truth,
 23 |             score). Feedback is null when an alert is not triggered.
 24 |         credibility (float): Credibility level for a credible interval. This
 25 |             interval will be centered about the mean and have a `credibility`
 26 |             chance of containing the true utility.
 27 | 
 28 |     returns:
 29 |         tuple of three elements:
 30 |         - candidate thresholds
 31 |         - mean expected utility
 32 |         - upper and lower quantile of estimate of expected utility associated
 33 |           with each threshold
 34 |     """
 35 |     credibility /= 2
 36 |     scores, utilities = sample_utilities(utility, data, N=N)
 37 |     low = int(N * credibility)
 38 |     high = int(N * (1 - credibility))
 39 |     utilities = numpy.asarray(utilities)
 40 |     utilities.sort(axis=1)
 41 |     return scores, utilities.mean(1), numpy.asarray(utilities[:, [low, high]]).T
 42 | 
 43 | 
 44 | @public.add
 45 | def optimal_threshold(utility, data, N=4096):
 46 |     scores, utilities = sample_utilities(utility, data, N=N)
 47 |     means = utilities.mean(1)
 48 |     idx = means.argmax()
 49 |     return scores[idx], means[idx]
 50 | 
 51 | 
 52 | @public.add
 53 | def sample_utilities(utility, data, N=4096):
 54 |     """Get distribution of utilities.
 55 | 
 56 |     Args:
 57 |         utility (float): utility function that ingests true/false
 58 |             positive/negative rates.
 59 |         data (list-like): iterable of of iterables of the form (ground truth, score).
 60 |             Feedback is null when an alert is not triggered.
 61 | 
 62 |     returns: thresholds, utilities
 63 |     """
 64 |     if not len(data):
 65 |         return data, numpy.asarray([])
 66 |     nprng = numpy.random.RandomState(0)
 67 |     data = numpy.asarray(data)
 68 |     num_positives = data[:, 0].sum()
 69 |     rates = [1 + num_positives, 1 + len(data) - num_positives, 1, 1]
 70 |     utilities = []
 71 |     data = data[numpy.argsort(data[:, 1])]
 72 |     for ground_truth, score in data:
 73 |         update_rates(rates, ground_truth)
 74 |         utilities.append(utility(*nprng.dirichlet(rates, size=N).T))
 75 |     return data[:, 1], numpy.asarray(utilities)
 76 | 
 77 | 
 78 | @public.add
 79 | def thompson_sample(utility, data, N=1024, quantile=False):
 80 |     scores, utilities = sample_utilities(utility, data, N)
 81 |     if quantile:
 82 |         return utilities.argmax(axis=0) / (len(utilities) - 1)
 83 |     return scores[utilities.argmax(axis=0)]
 84 | 
 85 | 
 86 | @public.add
 87 | def update_rates(rates, ground_truth):
 88 |     rates[0] -= ground_truth
 89 |     rates[1] -= not ground_truth
 90 |     rates[2] += not ground_truth
 91 |     rates[3] += ground_truth
 92 | 
 93 | 
 94 | @public.add
 95 | class AdaptiveThreshold:
 96 |     """Adaptive agent that balances exploration with exploitation with respect
 97 |     to setting and adjusting thresholds.
 98 | 
 99 |     When exploring, the threshold is 0, effectively letting anything
100 |     through. This produces unbiased data that can then be used to set a
101 |     more optimal threshold in subsequent rounds. The agent seeks to
102 |     balance the opportunity cost of running an experiment with the
103 |     utility gained over subsequent rounds using the information gained
104 |     from this experiment.
105 |     """
106 | 
107 |     def __init__(self, utility):
108 |         """
109 |         Args:
110 |             utility (function): Function that takes in true/false
111 |                 positive/negative rates. Specifically (tp, fp, tn fn) -> float
112 |                 representing utility."""
113 | 
114 |         self.utility = utility
115 |         self.results = []
116 |         self.unbiased_positives = 1
117 |         self.unbiased_negatives = 1
118 |         self.previous_threshold = 0
119 |         self.nprng = numpy.random.RandomState(0)
120 | 
121 |     def get_best_threshold(self):
122 |         # true positives, false positives, true negatives, false negatives
123 |         rates = [self.unbiased_positives, self.unbiased_negatives, 1, 1]
124 |         experiment_utility = self.utility(*self.nprng.dirichlet(rates))
125 |         hypothetical_rates = [
126 |             self.unbiased_positives - self.last_experiment_outcome,
127 |             self.unbiased_negatives - (1 - self.last_experiment_outcome),
128 |             1,
129 |             1,
130 |         ]
131 |         best_hypothetical_utility = -numpy.inf
132 |         best_utility = -numpy.inf
133 |         for score, ground_truth, idx in self.results:
134 |             update_rates(rates, ground_truth)
135 |             utility = self.utility(*self.nprng.dirichlet(rates))
136 |             if utility > best_utility:
137 |                 best_utility = utility
138 |                 best_threshold = score
139 |             if idx >= self.last_experiment_idx:
140 |                 continue
141 |             update_rates(hypothetical_rates, ground_truth)
142 |             hypothetical_utility = self.utility(
143 |                 *self.nprng.dirichlet(hypothetical_rates)
144 |             )
145 |             if hypothetical_utility > best_hypothetical_utility:
146 |                 best_hypothetical_utility = hypothetical_utility
147 |                 hindsight_utility = utility
148 |         return best_threshold, experiment_utility, best_utility, hindsight_utility
149 | 
150 |     def __call__(self, ground_truth, score):
151 |         """Args are ignored if previous threshold was not 0. Otherwise, the
152 |         score is added as a potential threhold and ground_truth noted to help
153 |         identify the optimal threshold.
154 | 
155 |         Args:
156 |             ground_truth (bool)
157 |             score (float)
158 |         """
159 |         idx = len(self.results)
160 |         if self.previous_threshold == 0:
161 |             bisect.insort(self.results, (score, ground_truth, idx))
162 |             self.unbiased_positives += ground_truth
163 |             self.unbiased_negatives += 1 - ground_truth
164 |             self.last_experiment_idx = idx
165 |             self.last_experiment_outcome = ground_truth
166 |         if len(self.results) < 2:
167 |             return self.previous_threshold
168 |         (
169 |             best_threshold,
170 |             experiment_utility,
171 |             best_utility,
172 |             hindsight_utility,
173 |         ) = self.get_best_threshold()
174 |         total_utility_gained = (best_utility - hindsight_utility) * (
175 |             idx - self.last_experiment_idx
176 |         )
177 |         opportunity_cost = hindsight_utility - experiment_utility
178 |         if opportunity_cost <= total_utility_gained:
179 |             self.previous_threshold = 0
180 |         else:
181 |             self.previous_threshold = best_threshold
182 |         return self.previous_threshold
183 | 
184 | 
185 | @public.add
186 | def exploration_proportion(thresholds, N):
187 |     exploration = thresholds == 0
188 |     alpha = 1 - 1.0 / N
189 |     return reduce(
190 |         lambda accum, elem: accum + [accum[-1] * alpha + elem * (1 - alpha)],
191 |         exploration[N:],
192 |         [exploration[:N].mean()],
193 |     )
194 | 


--------------------------------------------------------------------------------
/docs/thresholding_user_guide.rst:
--------------------------------------------------------------------------------
  1 | ###########
  2 | Thresholding User Guide
  3 | ###########
  4 | 
  5 | **********
  6 | Motivation
  7 | **********
  8 | 
  9 | Let's say you're monitoring some process for alerts. Maybe it's model
 10 | performance. Maybe it's model drift. In any case, let's say you have a score
 11 | that increases with the likelihood that you have something wrong that needs to be
 12 | investigated. You still need to decide whether to actually launch an
 13 | investigation or not for each of these scores. This is known as thresholding.
 14 | But where to put the threshold? Set it too high and you'll miss important
 15 | alerts. Set it too low and you'll be flooded with noise. This module comes with
 16 | tools and techniques to experimentally determine where to set your threshold
 17 | given your tolerance for noise.
 18 | 
 19 | How?
 20 | ====
 21 | 
 22 | Let's say the scores associated with good alerts looks like this.
 23 | 
 24 | .. figure:: images/thresholding_positive_scores.png
 25 |     :width: 500px
 26 |     :align: center
 27 |     :height: 500px
 28 |     :alt: alternate text
 29 |     :figclass: align-center
 30 | 
 31 | Moreover, scores associated with negative alerts look like this.
 32 | 
 33 | .. figure:: images/thresholding_negative_scores.png
 34 |     :width: 500px
 35 |     :align: center
 36 |     :height: 500px
 37 |     :alt: alternate text
 38 |     :figclass: align-center
 39 | 
 40 | Clearly the likelihood of finding a good alert increases with model score, but
 41 | any choice will imply a trade off between true/false positive/negatives. In
 42 | general, you need to decide on a utility function of true/false
 43 | positive/negatives.
 44 | 
 45 | .. code-block:: python
 46 |     def utility(tp, fp, tn, fn):
 47 |         return tp - 20 * fn - fp
 48 | 
 49 | The utility function would increase with true positives and/or true negatives,
 50 | and decrease with false positives and/or false negatives. A risk averse utility
 51 | function is shown above with a 20 fold preference of avoiding false negatives
 52 | to false positives. In general, we will assume the utility function is a
 53 | *proportion* of true/false positive/negatives in a data set. In this sense, the
 54 | utility function is a function of a categorical distribution over true/false
 55 | positives/negatives.
 56 | 
 57 | Now that we have a utility function, and a sample of positive and negative alert
 58 | scores, we can plot a utility function as a function of threshold.
 59 | 
 60 | .. figure:: images/thresholding_expected_utility.png
 61 |     :width: 500px
 62 |     :align: center
 63 |     :height: 400px
 64 |     :alt: alternate text
 65 |     :figclass: align-center
 66 | 
 67 |     Expected utility as a function of threshold (solid) and 50%
 68 |     `credible interval
 69 |     <https://en.wikipedia.org/wiki/Credible_interval>`_ (shaded
 70 |     region).
 71 | 
 72 | Note that we don't actually have the true distribution of positive
 73 | and negative scores in practice. Rather, we have examples. If we
 74 | only had 4 positive scores and 4 negative scores, we cannot be very
 75 | certain of its results. More on this in the `credibility user guide
 76 | <credibility_user_guide.rst>`__. We model the distribution of true/false
 77 | positive/negatives as a `Dirichlet-multinomial distribution
 78 | <https://en.wikipedia.org/wiki/Dirichlet-multinomial_distribution>`_ with
 79 | a `maximum entropy prior
 80 | <https://en.wikipedia.org/wiki/Principle_of_maximum_entropy>`_.
 81 | 
 82 | This shows a particularly apparent peak in utility, but only after (in this
 83 | case) a few thousand example scores. In practice, we could well be starting
 84 | with *no* examples and building up our knowledge as we go. To make things
 85 | worse, we will only find out if an alert was good or not if we investigate it.
 86 | Anything that falls below our threshold forever remains unlabeled. We developed
 87 | a specific algorithm to tackle this problem that we call *adaptive
 88 | thresholding*.
 89 | 
 90 | ***********
 91 | Adaptive Thresholding
 92 | ***********
 93 | 
 94 | We face a classic `exploitation/exploration dilemma
 95 | <https://en.wikipedia.org/wiki/Reinforcement_learning>`_. We can either choose
 96 | to *exploit* the information we have so far about positive and negative score
 97 | distributions to set a threshold or *explore* what may lie below that threshold
 98 | by labeling whatever comes in next. Unfortunately, the labels obtained from
 99 | scores greater than a threshold chosen at the time pose a challenge in that
100 | they yield heavily biased estimates of positive and negative score
101 | distributions (since they don't include anything below the threshold set at the
102 | time). We have not found a good way to compensate for that bias in practice.
103 | Rather, we must switch between an optimally set threshold and labeling
104 | whatever comes next. This produces a series of *unbiased labels*.
105 | 
106 | Our adaptive thresholding algorithm seeks to balance the
107 | opportunity cost of labeling data with the utility gained over subsequent
108 | rounds with the change in threshold. Each score with an unbiased label is a
109 | potential threshold. For each of those options, we sample a possible
110 | distribution of true/false positives/negatives (with a Dirichlet-multinomial
111 | distribution with a maximum entropy prior) using the other unbiased labels.
112 | Utilities are calculated for each sampled distribution for true/false
113 | positives/negatives. The highest utility is noted as well as the utility of
114 | setting the threshold to 0 (exploration). Next this process is repeated using
115 | all but the most recent unbiased label. We locate the optimal threshold
116 | computed using all but the most recent unbiased label, and then compute the
117 | utility of that threshold using the utilities calculated using *all* unbiased
118 | labels. The difference between this utility and the utility of the true optimal
119 | threshold is the expected utility gained from the last round of exploration.
120 | This expected utility gained per round times the number of rounds since the
121 | last round of exploration is the net utility gained since the last round of
122 | experimentation. Meanwhile the difference between the utility of the true
123 | optimal threshold and the utility of exploration is the opportunity cost of
124 | exploration. When the net utility gained exceeds the opportunity cost of
125 | exploration, exploration is chosen over exploitation.
126 | 
127 | Note that we stochastically sample utilities at the score associated with each
128 | unbiased label at each round. This is necessary to prevent deadlocks in which
129 | the optimal threshold is identical before and after experimentation, leaving
130 | the expected utility gained per round 0 forever (thus ending any possibility of
131 | subsequent rounds of exploration). Rather, exploration is chosen according to
132 | the *probability* that net utility gained has in fact caught up with the
133 | opportunity cost of the last round of exploration.
134 | 
135 | However, as we gain a more accurate picture of the distribution of positive and
136 | negative scores, we make smaller changes to our best guess at the location of
137 | the optimal threshold after exploration. As a result, the expected utility
138 | gained per round of exploitation will gradually decrease over time, and we will
139 | need more and more rounds of exploitation to make up for the opportunity cost
140 | of exploration (shown below).
141 | 
142 | .. figure:: images/thresholding_exploration_proportion.png
143 |     :width: 500px
144 |     :align: center
145 |     :height: 500px
146 |     :alt: alternate text
147 |     :figclass: align-center
148 | 
149 |     Probability of chosing exploration decreases from about 45% at the
150 |     beginning to about 5% after 3600 rounds.
151 | 
152 | 
153 | .. topic:: Tutorials:
154 | 
155 |     * :doc:`Thresholding <notebooks/thresholding/Thresholding>`
156 | 
157 | .. bibliography:: refs.bib
158 |     :cited:
159 | 


--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/generators.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import public
  3 | 
  4 | from collections import defaultdict
  5 | from functools import reduce
  6 | 
  7 | 
  8 | @public.add
  9 | def js_data_stream(
 10 |     nprng, batch_size, sample_distributions, categorical_columns=tuple()
 11 | ):
 12 |     r"""Data stream generator for Jensen-Shannon divergence of N distributions.
 13 |     Jensen-Shannon divergence measures the information of knowing which of
 14 |     those N distributions a sample will be drawn from before it is drawn. So if
 15 |     we rolled a fair N sided die to determine which distribution we will draw a
 16 |     sample from, JS divergence reports how many bits of information will be
 17 |     revealed from the die. This scenario is ultimately simulated in this
 18 |     function. However, in real life, we may only have examples of samples from
 19 |     each distribution we wish to compare. In the most general case, each
 20 |     distribution we wish to compare is represented by M samples of samples
 21 |     (with potentially different sizes) from M similar distributions whose
 22 |     average is most interesting. Just as we might simulate sampling from a
 23 |     single distribution by randomly sampling a batch of examples with
 24 |     replacement, we can effectively sample from an average of distributions by
 25 |     randomly sampling each batch (which may be representative of a single
 26 |     distribution), then randomly sampling elements of the chosen batch. This
 27 |     can ultimately be thought of a more data efficient means to the same end as
 28 |     downsampling large batch sizes.
 29 | 
 30 |     Args:
 31 |         nprng: Numpy ``RandomState`` used to generate random samples
 32 |             batch_size: size of batch
 33 |         *sample_distributions: list of lists of samples to compare.
 34 |             For example, ``[[batch1, batch2, batch3], [batch4, batch5],
 35 |             [batch6, batch7]]`` Assuming ``batch1`` came from distribution
 36 |             :math:`p_1`, ``batch2`` from :math:`p_2`, etc, this function will
 37 |             simulate a system in which a latent `N=3` sided die role that
 38 |             determines whether to draw a sample from :math:`\frac{p_1 + p_2 +
 39 |             p_3}{3}`, :math:`\frac{p_4 + p_5}{2}`, or :math:`\frac{p_6 +
 40 |             p_7}{2}`.
 41 |         categorical_columns (tuple): list or tuple of column indices that are
 42 |             considered categorical.
 43 | 
 44 |     Returns:
 45 |         The output of this function will be two samples of size batch_size with
 46 |         samples, :math:`x`, drawn from batch_size roles, :math:`z`, of our
 47 |         :math:`N` sided die. Following the example above for which :math:`N=3`,
 48 |         the first of these two output samples will be of the form :math:`(x,
 49 |         z)`, where x is the sample drawn and z is the die roll. The second of
 50 |         these two samples will be of the form :math:`(x, z^{\prime})` where x
 51 |         is the same sample as before, but :math:`z^\prime` is a new set of
 52 |         otherwise unrelated roles of the same :math:`N=3` sided die."""
 53 | 
 54 |     def process_sample_distributions(sample_distributions):
 55 |         z = []
 56 |         out = []
 57 |         for idx, count in zip(
 58 |             *numpy.unique(
 59 |                 nprng.randint(0, len(sample_distributions), size=batch_size),
 60 |                 return_counts=True,
 61 |             )
 62 |         ):
 63 |             sample_distribution = sample_distributions[idx]
 64 |             out.extend(
 65 |                 [
 66 |                     sample_distribution[i][
 67 |                         nprng.randint(0, len(sample_distribution[i]))
 68 |                     ]
 69 |                     for i in nprng.randint(0, len(sample_distribution), size=count)
 70 |                 ]
 71 |             )
 72 |             z.extend([idx] * count)
 73 |         sample_distribution = numpy.asarray(out)
 74 |         catted1 = numpy.concatenate(
 75 |             (sample_distribution, numpy.asarray(z)[:, numpy.newaxis]), axis=1
 76 |         )
 77 |         z = nprng.randint(0, len(sample_distributions), size=batch_size)
 78 |         catted2 = numpy.concatenate((sample_distribution, z[:, numpy.newaxis]), axis=1)
 79 |         return numpy.asarray((catted2, catted1))
 80 | 
 81 |     while True:
 82 |         yield groupby(
 83 |             categorical_columns, *process_sample_distributions(sample_distributions)
 84 |         )
 85 | 
 86 | 
 87 | @public.add
 88 | def fdiv_data_stream(
 89 |     nprng, batch_size, sample_distributions, categorical_columns=tuple()
 90 | ):
 91 |     r"""Data stream generator for f-divergence.
 92 | 
 93 |     Args:
 94 |         nprng: Numpy ``RandomState`` used to generate random samples
 95 |         batch_size: size of batch
 96 |         sample_distributions: list of lists of samples to compare for each
 97 |             partition of the data. For example, ``[[batch1, batch2, batch3],
 98 |             [batch4, batch5], [batch6, batch7]]``
 99 |         categorical_columns (tuple): list or tuple of column indices that are
100 |             considered categorical.
101 | 
102 |     Returns:
103 |         The output of this function will be ``N`` samples of size
104 |         ``batch_size``, where ``N = len(sample_distributions)`` Following the
105 |         example above, assuming ``batch1`` came from distribution p_1,
106 |         ``batch2`` from :math:`p_2`, etc, This function will output a tuple of
107 |         ``N = 3`` samples of size ``batch_size``, where ``batch1`` is sampled
108 |         from :math:`\frac{p_1 + p_2 + p_3}{3}`, ``batch2`` is sampled from
109 |         :math:`\frac{p_4 + p_5}{2}`, and ``batch3`` is sampled from
110 |         :math:`\frac{p_6 + p_7}{2}`."""
111 | 
112 |     def process_sample_distributions(sample_distributions):
113 |         return numpy.asarray(
114 |             [
115 |                 [
116 |                     sample_distribution[i][
117 |                         nprng.randint(0, len(sample_distribution[i]))
118 |                     ]
119 |                     for i in nprng.randint(0, len(sample_distribution), size=batch_size)
120 |                 ]
121 |                 for sample_distribution in sample_distributions
122 |                 if len(sample_distribution)
123 |             ]
124 |         )
125 | 
126 |     while True:
127 |         yield groupby(
128 |             categorical_columns, *process_sample_distributions(sample_distributions)
129 |         )
130 | 
131 | 
132 | def groupby(categorical_columns, *samples):
133 |     r"""Group samples by unique values found in a subset of columns
134 |     Args:
135 |         categorical_columns: List of indices of columns which should be
136 |             treated as categorical.
137 |         *samples: A set of samples drawn from distinct distributions.
138 |             Each distribution is assumed to be defined on the same probability
139 |             space, so it would make sense to compare a sample drawn from one
140 |             distribution to a sample drawn from another.
141 | 
142 |     Returns:
143 |         tuple of dicts that each map unique combinations of
144 |         ``categorical_columns`` to a subset of samples from the
145 |         ``sample_distributions`` that have these values in their
146 |         ``categorical_columns``. ``categorical_columns`` are omitted from
147 |         the values of these dicts."""
148 |     if not categorical_columns:
149 |         return [{tuple(): sample.astype("float")} for sample in samples]
150 |     # the complement of categorical_columns is assumed to be numeric
151 |     numerical_columns = [
152 |         i for i in range(samples[0].shape[1]) if i not in categorical_columns
153 |     ]
154 | 
155 |     def grouper(accum, element):
156 |         accum[tuple(element[categorical_columns])].append(element[numerical_columns])
157 |         return accum
158 | 
159 |     return tuple(
160 |         {
161 |             key: numpy.asarray(value, dtype="float")
162 |             for key, value in reduce(grouper, sample, defaultdict(list)).items()
163 |         }
164 |         for sample in samples
165 |     )
166 | 


--------------------------------------------------------------------------------
/mvtk/bias_variance/bias_variance.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | import public
  4 | 
  5 | from scipy import stats
  6 | from sklearn.utils import resample
  7 | 
  8 | 
  9 | @public.add
 10 | def get_values(x):
 11 |     r"""If argument is a Pandas dataframe, return 'values' numpy array from it.
 12 | 
 13 |     Args:
 14 |         x (Any): pandas dataframe or anything else
 15 | 
 16 |     Returns:
 17 |         if pandas dataframe - return 'values' numpy array
 18 |         otherwise - return itself
 19 | 
 20 |     """
 21 |     if isinstance(x, pd.DataFrame):
 22 |         return x.values
 23 |     else:
 24 |         return x
 25 | 
 26 | 
 27 | @public.add
 28 | def train_and_predict(
 29 |     estimator,
 30 |     X_train_values,
 31 |     y_train_values,
 32 |     X_test_prepared,
 33 |     prepare_X=lambda x: x,
 34 |     prepare_y_train=lambda x: x,
 35 |     fit_kwargs=None,
 36 |     predict_kwargs=None,
 37 | ):
 38 |     r"""Train an estimator and get predictions from it
 39 | 
 40 |     Args:
 41 |         estimator (EstimatorWrapper): estimator wrapped with a class extending
 42 |             EstimatorWrapper
 43 |         X_train_values: numpy array of features for training
 44 |         y_train_values: numpy array of ground truth labels for training
 45 |         X_test_prepared: feature set for testing which has been processed by
 46 |             prepare_X function
 47 |         prepare_X (function, optional): function to transform feature datasets
 48 |             before calling fit and predict methods
 49 |         prepare_y_train (function, optional): function to transform train ground
 50 |             truth labels before calling fit method
 51 |         fit_kwargs (dict, optional): kwargs to pass to the fit method
 52 |         predict_kwargs (dict, optional): kwargs to pass to the predict method
 53 | 
 54 |     Returns:
 55 |         predictions"""
 56 |     if predict_kwargs is None:
 57 |         predict_kwargs = {}
 58 |     if fit_kwargs is None:
 59 |         fit_kwargs = {}
 60 | 
 61 |     X_sample_prepared = prepare_X(X_train_values)
 62 |     y_sample_prepared = prepare_y_train(y_train_values)
 63 | 
 64 |     estimator = estimator.fit(X_sample_prepared, y_sample_prepared, **fit_kwargs)
 65 |     predictions = estimator.predict(X_test_prepared, **predict_kwargs)
 66 | 
 67 |     return predictions
 68 | 
 69 | 
 70 | @public.add
 71 | def bootstrap_train_and_predict(
 72 |     estimator,
 73 |     X_train_values,
 74 |     y_train_values,
 75 |     X_test_prepared,
 76 |     prepare_X=lambda x: x,
 77 |     prepare_y_train=lambda x: x,
 78 |     random_state=None,
 79 |     fit_kwargs=None,
 80 |     predict_kwargs=None,
 81 | ):
 82 |     r"""Train an estimator using a bootstrap sample of the training data and get
 83 |     predictions from it
 84 | 
 85 |     Args:
 86 |         estimator (EstimatorWrapper): estimator wrapped with a class extending
 87 |             EstimatorWrapper
 88 |         X_train_values: numpy array of features for training
 89 |         y_train_values: numpy array of ground truth labels for training
 90 |         X_test_prepared: feature set for testing which has been processed by prepare_X
 91 |             function
 92 |         prepare_X (function, optional): function to transform feature datasets before
 93 |             calling fit and predict methods
 94 |         prepare_y_train (function, optional): function to transform train ground
 95 |             truth labels before calling fit method
 96 |         random_state (int, optional): random state for bootstrap sampling
 97 |         fit_kwargs (dict, optional): kwargs to pass to the fit method
 98 |         predict_kwargs (dict, optional): kwargs to pass to the predict method
 99 | 
100 |     Returns:
101 |         predictions"""
102 |     X_sample, y_sample = resample(
103 |         X_train_values, y_train_values, random_state=random_state
104 |     )
105 | 
106 |     return train_and_predict(
107 |         estimator,
108 |         X_sample,
109 |         y_sample,
110 |         X_test_prepared,
111 |         prepare_X,
112 |         prepare_y_train,
113 |         fit_kwargs,
114 |         predict_kwargs,
115 |     )
116 | 
117 | 
118 | @public.add
119 | def bias_variance_mse(predictions, y_test):
120 |     r"""Compute the bias-variance decomposition the mean squared error loss function
121 | 
122 |     Args:
123 |         predictions: numpy array of predictions over the set of iterations
124 |         y_test: numpy array of ground truth labels
125 | 
126 |     Returns:
127 |         (average loss, average bias, average variance, net variance)"""
128 |     pred_by_x = np.swapaxes(predictions, 0, 1)
129 | 
130 |     main_predictions = np.mean(predictions, axis=0)
131 | 
132 |     avg_bias = np.mean((main_predictions - y_test) ** 2)
133 | 
134 |     arr_loss = np.zeros(pred_by_x.shape[0], dtype=np.float64)
135 |     arr_var = np.zeros(pred_by_x.shape[0], dtype=np.float64)
136 |     for i in range(pred_by_x.shape[0]):
137 |         arr_loss[i] = np.mean((pred_by_x[i] - y_test[i]) ** 2)
138 |         arr_var[i] = np.mean((pred_by_x[i] - main_predictions[i]) ** 2)
139 |     avg_loss = np.mean(arr_loss)
140 |     avg_var = np.mean(arr_var)
141 | 
142 |     return avg_loss, avg_bias, avg_var, avg_var
143 | 
144 | 
145 | @public.add
146 | def bias_variance_0_1_loss(predictions, y_test):
147 |     r"""Compute the bias-variance decomposition using the 0-1 loss function
148 | 
149 |     Args:
150 |         predictions: numpy array of predictions over the set of iterations
151 |         y_test: numpy array of ground truth labels
152 | 
153 |     Returns:
154 |         (average loss, average bias, average variance, net variance)"""
155 |     pred_by_x = np.swapaxes(predictions, 0, 1)
156 | 
157 |     main_predictions = stats.mode(predictions, axis=0, keepdims=True).mode[0]
158 | 
159 |     avg_bias = np.mean(main_predictions != y_test)
160 | 
161 |     arr_loss = np.zeros(pred_by_x.shape[0], dtype=np.float64)
162 |     arr_var = np.zeros(pred_by_x.shape[0], dtype=np.float64)
163 |     var_b = 0.0  # biased example contribution to avg_var
164 |     var_u = 0.0  # unbiased example contribution to avg_var
165 |     for i in range(pred_by_x.shape[0]):
166 |         pred_true = np.sum(pred_by_x[i] == y_test[i])
167 |         pred_not_main = np.sum(pred_by_x[i] != main_predictions[i])
168 | 
169 |         arr_loss[i] = (predictions.shape[0] - pred_true) / predictions.shape[0]
170 |         arr_var[i] = pred_not_main / predictions.shape[0]
171 | 
172 |         if main_predictions[i] != y_test[i]:
173 |             prb_true_given_not_main = (
174 |                 pred_true / pred_not_main if pred_not_main != 0 else 0
175 |             )
176 |             var_b += (pred_not_main / predictions.shape[0]) * prb_true_given_not_main
177 |         else:
178 |             var_u += pred_not_main / predictions.shape[0]
179 | 
180 |     var_b /= pred_by_x.shape[0]
181 |     var_u /= pred_by_x.shape[0]
182 | 
183 |     avg_loss = np.mean(arr_loss)
184 |     avg_var = np.mean(arr_var)
185 |     net_var = var_u - var_b
186 | 
187 |     return avg_loss, avg_bias, avg_var, net_var
188 | 
189 | 
190 | @public.add
191 | def bias_variance_compute(
192 |     estimator,
193 |     X_train,
194 |     y_train,
195 |     X_test,
196 |     y_test,
197 |     prepare_X=lambda x: x,
198 |     prepare_y_train=lambda x: x,
199 |     iterations=200,
200 |     random_state=None,
201 |     decomp_fn=bias_variance_mse,
202 |     fit_kwargs=None,
203 |     predict_kwargs=None,
204 | ):
205 |     r"""Compute the bias-variance decomposition in serial
206 | 
207 |     Args:
208 |         estimator (EstimatorWrapper): estimator wrapped with a class extending
209 |             EstimatorWrapper
210 |         X_train: features for training
211 |         y_train: ground truth labels for training
212 |         X_test: features for testing
213 |         y_test: ground truth labels for testing
214 |         prepare_X (function, optional): function to transform feature datasets before
215 |             calling fit and predict methods
216 |         prepare_y_train (function, optional): function to transform training ground
217 |             truth labels before calling fit method
218 |         iterations (int, optional): number of iterations for the training/testing
219 |         random_state (int, optional): random state for bootstrap sampling
220 |         decomp_fn (function, optional): bias-variance decomposition function
221 |         fit_kwargs (dict, optional): kwargs to pass to the fit method
222 |         predict_kwargs (dict, optional): kwargs to pass to the predict method
223 | 
224 |     Returns:
225 |         (average loss, average bias, average variance, net variance)"""
226 |     if fit_kwargs is None:
227 |         fit_kwargs = {}
228 |     if predict_kwargs is None:
229 |         predict_kwargs = {}
230 | 
231 |     if isinstance(random_state, int):
232 |         random_state = np.random.RandomState(seed=random_state)
233 | 
234 |     predictions = np.zeros((iterations, y_test.shape[0]))
235 | 
236 |     X_train_values = get_values(X_train)
237 |     y_train_values = get_values(y_train)
238 |     X_test_values = get_values(X_test)
239 |     X_test_prepared = prepare_X(X_test_values)
240 | 
241 |     for i in range(iterations):
242 |         predictions[i] = bootstrap_train_and_predict(
243 |             estimator,
244 |             X_train_values,
245 |             y_train_values,
246 |             X_test_prepared,
247 |             prepare_X,
248 |             prepare_y_train,
249 |             random_state,
250 |             fit_kwargs,
251 |             predict_kwargs,
252 |         )
253 | 
254 |     y_test_values = get_values(y_test)
255 | 
256 |     return decomp_fn(predictions, y_test_values)
257 | 


--------------------------------------------------------------------------------
/docs/notebooks/divergence/CategoricalColumns.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Handling Categorical Data\n",
  8 |     "\n",
  9 |     "More often than not a dataset is comprised of both **numeric**, and **categorical** data types. The supervisor divergence functions can handle both, but it needs to know which columns are categorical so that it can handle it properly. This notebook shows you how to do so when using the **supervisor** divergence package."
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "markdown",
 14 |    "metadata": {},
 15 |    "source": [
 16 |     "## Dataset with Mixed Data Types"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "markdown",
 21 |    "metadata": {},
 22 |    "source": [
 23 |     "### Create a dataset\n",
 24 |     "To demonstrate, we will create a simple dataset with a mix of categorical and numeric columns. "
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 1,
 30 |    "metadata": {},
 31 |    "outputs": [
 32 |     {
 33 |      "data": {
 34 |       "text/html": [
 35 |        "<div>\n",
 36 |        "<style scoped>\n",
 37 |        "    .dataframe tbody tr th:only-of-type {\n",
 38 |        "        vertical-align: middle;\n",
 39 |        "    }\n",
 40 |        "\n",
 41 |        "    .dataframe tbody tr th {\n",
 42 |        "        vertical-align: top;\n",
 43 |        "    }\n",
 44 |        "\n",
 45 |        "    .dataframe thead th {\n",
 46 |        "        text-align: right;\n",
 47 |        "    }\n",
 48 |        "</style>\n",
 49 |        "<table border=\"1\" class=\"dataframe\">\n",
 50 |        "  <thead>\n",
 51 |        "    <tr style=\"text-align: right;\">\n",
 52 |        "      <th></th>\n",
 53 |        "      <th>latitude</th>\n",
 54 |        "      <th>fruit</th>\n",
 55 |        "      <th>temp</th>\n",
 56 |        "      <th>city</th>\n",
 57 |        "      <th>longitude</th>\n",
 58 |        "    </tr>\n",
 59 |        "  </thead>\n",
 60 |        "  <tbody>\n",
 61 |        "    <tr>\n",
 62 |        "      <th>0</th>\n",
 63 |        "      <td>239</td>\n",
 64 |        "      <td>apple</td>\n",
 65 |        "      <td>104</td>\n",
 66 |        "      <td>Filly Downs</td>\n",
 67 |        "      <td>257</td>\n",
 68 |        "    </tr>\n",
 69 |        "    <tr>\n",
 70 |        "      <th>1</th>\n",
 71 |        "      <td>181</td>\n",
 72 |        "      <td>apple</td>\n",
 73 |        "      <td>11</td>\n",
 74 |        "      <td>Coldport</td>\n",
 75 |        "      <td>303</td>\n",
 76 |        "    </tr>\n",
 77 |        "    <tr>\n",
 78 |        "      <th>2</th>\n",
 79 |        "      <td>246</td>\n",
 80 |        "      <td>raspberry</td>\n",
 81 |        "      <td>99</td>\n",
 82 |        "      <td>Filly Downs</td>\n",
 83 |        "      <td>60</td>\n",
 84 |        "    </tr>\n",
 85 |        "    <tr>\n",
 86 |        "      <th>3</th>\n",
 87 |        "      <td>187</td>\n",
 88 |        "      <td>raspberry</td>\n",
 89 |        "      <td>91</td>\n",
 90 |        "      <td>Coldport</td>\n",
 91 |        "      <td>90</td>\n",
 92 |        "    </tr>\n",
 93 |        "    <tr>\n",
 94 |        "      <th>4</th>\n",
 95 |        "      <td>97</td>\n",
 96 |        "      <td>raspberry</td>\n",
 97 |        "      <td>26</td>\n",
 98 |        "      <td>Filly Downs</td>\n",
 99 |        "      <td>108</td>\n",
100 |        "    </tr>\n",
101 |        "  </tbody>\n",
102 |        "</table>\n",
103 |        "</div>"
104 |       ],
105 |       "text/plain": [
106 |        "   latitude      fruit  temp         city  longitude\n",
107 |        "0       239      apple   104  Filly Downs        257\n",
108 |        "1       181      apple    11     Coldport        303\n",
109 |        "2       246  raspberry    99  Filly Downs         60\n",
110 |        "3       187  raspberry    91     Coldport         90\n",
111 |        "4        97  raspberry    26  Filly Downs        108"
112 |       ]
113 |      },
114 |      "execution_count": 1,
115 |      "metadata": {},
116 |      "output_type": "execute_result"
117 |     }
118 |    ],
119 |    "source": [
120 |     "import pandas as pd\n",
121 |     "import numpy as np\n",
122 |     "\n",
123 |     "\n",
124 |     "size = 100000\n",
125 |     "\n",
126 |     "data = pd.DataFrame()\n",
127 |     "data['latitude'] =np.random.randint(0, 360, size=size)\n",
128 |     "data['fruit'] = np.random.choice(a=['apple', 'orange', 'plum', 'raspberry', 'blueberry'],\n",
129 |     "                          p=[0.1,      0.3,      0.3,    0.25,          0.05], size=size)\n",
130 |     "data['temp'] =np.random.randint(-10, 120, size=size)\n",
131 |     "data['city'] = np.random.choice(a=['London', 'Paris', 'Newport', 'Bradfield', 'Coldport', 'Filly Downs'],\n",
132 |     "                          p=[0.15,      0.2,      0.1,      0.1,         0.3,         0.15], size=size)\n",
133 |     "\n",
134 |     "\n",
135 |     "data['longitude'] = np.random.randint(0, 360, size=size)\n",
136 |     "\n",
137 |     "data.head(5)"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "metadata": {},
143 |    "source": [
144 |     "In the dataset, the **fruit** and **city** columns are *categorical*, while **latitude**, **temp** and **longitude** are *numeric*. "
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "markdown",
149 |    "metadata": {},
150 |    "source": [
151 |     "### Create a comparison dataset\n",
152 |     "We will create a dataset to compare by taking the original dataset and modify some of the values. In this case, we will set a couple of columns to a constant value, which would result in the new dataset being of a different distribution from the original dataset."
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 2,
158 |    "metadata": {},
159 |    "outputs": [],
160 |    "source": [
161 |     "data_shifted = data.copy()\n",
162 |     "data_shifted['temp'] = 1\n",
163 |     "data_shifted.fruit = 'apple'"
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "markdown",
168 |    "metadata": {},
169 |    "source": [
170 |     "## Calculating Divergence"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "code",
175 |    "execution_count": 3,
176 |    "metadata": {},
177 |    "outputs": [],
178 |    "source": [
179 |     "import warnings\n",
180 |     "with warnings.catch_warnings():\n",
181 |     "    warnings.simplefilter(\"ignore\")\n",
182 |     "    from mvtk.supervisor.divergence import calc_tv_knn"
183 |    ]
184 |   },
185 |   {
186 |    "cell_type": "markdown",
187 |    "metadata": {},
188 |    "source": [
189 |     "The divergence functions have a parameter called **categorical_columns** which you need to use to specify which columns are not numeric. The functions will throw an error if categorical columns are passed but not specified.\n",
190 |     "\n",
191 |     "So, if you know which columns are categorical, then you need to pass a list of the column indexes. Both the a and b datasets should have the columns in the exact order."
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 4,
197 |    "metadata": {},
198 |    "outputs": [
199 |     {
200 |      "data": {
201 |       "text/plain": [
202 |        "0.8506579001037404"
203 |       ]
204 |      },
205 |      "execution_count": 4,
206 |      "metadata": {},
207 |      "output_type": "execute_result"
208 |     }
209 |    ],
210 |    "source": [
211 |     "calc_tv_knn(data, data_shifted, categorical_columns=[1,3])"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 5,
217 |    "metadata": {},
218 |    "outputs": [
219 |     {
220 |      "data": {
221 |       "text/plain": [
222 |        "0.2598375876037403"
223 |       ]
224 |      },
225 |      "execution_count": 5,
226 |      "metadata": {},
227 |      "output_type": "execute_result"
228 |     }
229 |    ],
230 |    "source": [
231 |     "calc_tv_knn(data, data, categorical_columns=[1,3])"
232 |    ]
233 |   },
234 |   {
235 |    "cell_type": "markdown",
236 |    "metadata": {},
237 |    "source": [
238 |     "## mvtk.supervisor.utils.column_indexes"
239 |    ]
240 |   },
241 |   {
242 |    "cell_type": "markdown",
243 |    "metadata": {},
244 |    "source": [
245 |     "With the utility function **column_indexes** you can get a list of the ccategorical columns in the dataframe."
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 6,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "data": {
255 |       "text/plain": [
256 |        "[1, 3]"
257 |       ]
258 |      },
259 |      "execution_count": 6,
260 |      "metadata": {},
261 |      "output_type": "execute_result"
262 |     }
263 |    ],
264 |    "source": [
265 |     "from mvtk.supervisor.utils import column_indexes\n",
266 |     "\n",
267 |     "column_indexes(data, cols=['fruit', 'city'])"
268 |    ]
269 |   },
270 |   {
271 |    "cell_type": "markdown",
272 |    "metadata": {},
273 |    "source": [
274 |     "You can also run the **column_indexes** function inline as a function parameter."
275 |    ]
276 |   },
277 |   {
278 |    "cell_type": "code",
279 |    "execution_count": 7,
280 |    "metadata": {},
281 |    "outputs": [
282 |     {
283 |      "data": {
284 |       "text/plain": [
285 |        "0.25967482718707363"
286 |       ]
287 |      },
288 |      "execution_count": 7,
289 |      "metadata": {},
290 |      "output_type": "execute_result"
291 |     }
292 |    ],
293 |    "source": [
294 |     "calc_tv_knn(data, data, \n",
295 |     "            categorical_columns=column_indexes(data, cols=['fruit', 'city']))"
296 |    ]
297 |   }
298 |  ],
299 |  "metadata": {
300 |   "kernelspec": {
301 |    "display_name": "supervisor",
302 |    "language": "python",
303 |    "name": "supervisor"
304 |   },
305 |   "language_info": {
306 |    "codemirror_mode": {
307 |     "name": "ipython",
308 |     "version": 3
309 |    },
310 |    "file_extension": ".py",
311 |    "mimetype": "text/x-python",
312 |    "name": "python",
313 |    "nbconvert_exporter": "python",
314 |    "pygments_lexer": "ipython3",
315 |    "version": "3.6.8"
316 |   }
317 |  },
318 |  "nbformat": 4,
319 |  "nbformat_minor": 2
320 | }
321 | 


--------------------------------------------------------------------------------
/tests/bias_variance/test_bias_variance.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | 
  4 | from sklearn.tree import DecisionTreeClassifier
  5 | from sklearn.linear_model import Ridge
  6 | 
  7 | from mvtk.bias_variance import (
  8 |     bias_variance_compute,
  9 |     bias_variance_mse,
 10 |     bias_variance_0_1_loss,
 11 |     get_values,
 12 |     train_and_predict,
 13 |     bootstrap_train_and_predict,
 14 | )
 15 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper
 16 | 
 17 | 
 18 | def create_data():
 19 |     X_train = np.arange(12).reshape(6, 2)
 20 |     y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
 21 |     X_test = np.arange(6).reshape(3, 2)
 22 |     y_test = np.array([0, 1, 1])
 23 | 
 24 |     return X_train, y_train, X_test, y_test
 25 | 
 26 | 
 27 | def test_get_values():
 28 |     a = [1, 2]
 29 |     b = [3, 4]
 30 |     c = [1, 3]
 31 |     d = [2, 4]
 32 |     df = pd.DataFrame(data={"col_a": a, "col_b": b})
 33 | 
 34 |     df_values = get_values(df)
 35 |     np_array = np.asarray([c, d])
 36 | 
 37 |     assert isinstance(df_values, np.ndarray)
 38 |     assert np.array_equal(df_values, np_array)
 39 | 
 40 | 
 41 | def test_train_and_predict_default():
 42 |     X_train, y_train, X_test, y_test = create_data()
 43 | 
 44 |     model = Ridge(random_state=123)
 45 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
 46 | 
 47 |     predictions = train_and_predict(model_wrapped, X_train, y_train, X_test)
 48 | 
 49 |     expected = np.array([0.4326241134751774, 0.6595744680851064, 0.8865248226950355])
 50 | 
 51 |     assert np.array_equal(
 52 |         np.round(predictions, decimals=12), np.round(expected, decimals=12)
 53 |     )
 54 | 
 55 | 
 56 | def test_train_and_predict_prepare():
 57 |     X_train, y_train, X_test, y_test = create_data()
 58 | 
 59 |     model = Ridge(random_state=123)
 60 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
 61 | 
 62 |     predictions = train_and_predict(
 63 |         model_wrapped,
 64 |         X_train,
 65 |         y_train,
 66 |         X_test,
 67 |         prepare_X=lambda x: x + 1,
 68 |         prepare_y_train=lambda x: x + 1,
 69 |     )
 70 | 
 71 |     expected = np.array([1.3191489361702131, 1.546099290780142, 1.773049645390071])
 72 | 
 73 |     assert np.array_equal(
 74 |         np.round(predictions, decimals=12), np.round(expected, decimals=12)
 75 |     )
 76 | 
 77 | 
 78 | def test_train_and_predict_kwargs_fit():
 79 |     X_train, y_train, X_test, y_test = create_data()
 80 | 
 81 |     model = DecisionTreeClassifier(random_state=123)
 82 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
 83 | 
 84 |     predictions = train_and_predict(
 85 |         model_wrapped,
 86 |         X_train,
 87 |         y_train,
 88 |         X_test,
 89 |         fit_kwargs={"sample_weight": [0, 0, 1, 0, 1, 0]},
 90 |     )
 91 | 
 92 |     expected = np.array([2, 2, 2])
 93 | 
 94 |     assert np.array_equal(predictions, expected)
 95 | 
 96 | 
 97 | def test_train_and_predict_kwargs_predict():
 98 |     X_train, y_train, X_test, y_test = create_data()
 99 | 
100 |     model = DecisionTreeClassifier(random_state=123)
101 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
102 | 
103 |     train_and_predict(model_wrapped, X_train, y_train, X_test)
104 | 
105 |     try:
106 |         train_and_predict(
107 |             model_wrapped,
108 |             X_train,
109 |             y_train,
110 |             X_test,
111 |             predict_kwargs={"check_input": False},
112 |         )
113 |     except ValueError as e:
114 |         assert e.args[0] == "X.dtype should be np.float32, got int64"
115 |         return
116 | 
117 |     assert False
118 | 
119 | 
120 | def test_bootstrap_train_and_predict_default():
121 |     X_train, y_train, X_test, y_test = create_data()
122 | 
123 |     model = Ridge(random_state=123)
124 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
125 | 
126 |     predictions = bootstrap_train_and_predict(
127 |         model_wrapped, X_train, y_train, X_test, random_state=321
128 |     )
129 | 
130 |     expected = np.array([0.7168141592920354, 0.8584070796460177, 1.0])
131 | 
132 |     assert np.array_equal(predictions, expected)
133 | 
134 | 
135 | def test_bootstrap_train_and_predict_kwargs_fit():
136 |     X_train, y_train, X_test, y_test = create_data()
137 | 
138 |     model = DecisionTreeClassifier(random_state=123)
139 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
140 | 
141 |     predictions = bootstrap_train_and_predict(
142 |         model_wrapped,
143 |         X_train,
144 |         y_train,
145 |         X_test,
146 |         random_state=321,
147 |         fit_kwargs={"sample_weight": [0, 0, 1, 0, 1, 0]},
148 |     )
149 | 
150 |     expected = np.array([0, 0, 0])
151 | 
152 |     assert np.array_equal(predictions, expected)
153 | 
154 | 
155 | def test_bootstrap_train_and_predict_kwargs_predict():
156 |     X_train, y_train, X_test, y_test = create_data()
157 | 
158 |     model = DecisionTreeClassifier(random_state=123)
159 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
160 | 
161 |     bootstrap_train_and_predict(
162 |         model_wrapped, X_train, y_train, X_test, random_state=321
163 |     )
164 | 
165 |     try:
166 |         bootstrap_train_and_predict(
167 |             model_wrapped,
168 |             X_train,
169 |             y_train,
170 |             X_test,
171 |             random_state=321,
172 |             predict_kwargs={"check_input": False},
173 |         )
174 |     except ValueError as e:
175 |         assert e.args[0] == "X.dtype should be np.float32, got int64"
176 |         return
177 | 
178 |     assert False
179 | 
180 | 
181 | def test_bias_variance_compute_mse():
182 |     X_train, y_train, X_test, y_test = create_data()
183 | 
184 |     model = Ridge(random_state=123)
185 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
186 | 
187 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_compute(
188 |         model_wrapped,
189 |         X_train,
190 |         y_train,
191 |         X_test,
192 |         y_test,
193 |         iterations=10,
194 |         random_state=123,
195 |         decomp_fn=bias_variance_mse,
196 |     )
197 | 
198 |     assert np.round(avg_loss, decimals=12) == np.round(
199 |         np.float64(1.1158203908105646), decimals=12
200 |     )
201 |     assert np.round(avg_bias, decimals=12) == np.round(
202 |         np.float64(0.1191924176014536), decimals=12
203 |     )
204 |     assert np.round(avg_var, decimals=12) == np.round(
205 |         np.float64(0.9966279732091108), decimals=12
206 |     )
207 |     assert np.round(net_var, decimals=12) == np.round(
208 |         np.float64(0.9966279732091108), decimals=12
209 |     )
210 | 
211 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
212 |     assert avg_var == net_var
213 | 
214 | 
215 | def test_bias_variance_compute_0_1():
216 |     X_train, y_train, X_test, y_test = create_data()
217 | 
218 |     model = DecisionTreeClassifier(random_state=123)
219 |     model_wrapped = SciKitLearnEstimatorWrapper(model)
220 | 
221 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_compute(
222 |         model_wrapped,
223 |         X_train,
224 |         y_train,
225 |         X_test,
226 |         y_test,
227 |         iterations=10,
228 |         random_state=123,
229 |         decomp_fn=bias_variance_0_1_loss,
230 |     )
231 | 
232 |     assert avg_loss == np.float64(0.4666666666666666)
233 |     assert avg_bias == np.float64(0.3333333333333333)
234 |     assert avg_var == np.float64(0.3666666666666667)
235 |     assert net_var == np.float64(0.1333333333333333)
236 | 
237 |     assert avg_loss == avg_bias + net_var
238 | 
239 | 
240 | def test_bias_variance_mse_no_loss():
241 |     predictions = np.zeros((3, 5))
242 |     y_test = np.zeros(5)
243 | 
244 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_mse(predictions, y_test)
245 | 
246 |     assert avg_loss == np.float64(0.0)
247 |     assert avg_bias == np.float64(0.0)
248 |     assert avg_var == np.float64(0.0)
249 |     assert net_var == np.float64(0.0)
250 | 
251 |     assert avg_loss == avg_bias + net_var
252 |     assert avg_var == net_var
253 | 
254 | 
255 | def test_bias_variance_mse():
256 |     predictions = np.zeros((3, 5))
257 |     predictions[0] += 0.5
258 |     y_test = np.zeros(5)
259 | 
260 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_mse(predictions, y_test)
261 | 
262 |     assert avg_loss == np.float64(0.08333333333333333)
263 |     assert avg_bias == np.float64(0.02777777777777778)
264 |     assert avg_var == np.float64(0.05555555555555556)
265 |     assert net_var == np.float64(0.05555555555555556)
266 | 
267 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
268 |     assert avg_var == net_var
269 | 
270 | 
271 | def test_bias_variance_0_1_loss_no_loss():
272 |     predictions = np.zeros((3, 5))
273 |     y_test = np.zeros(5)
274 | 
275 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
276 | 
277 |     assert avg_loss == np.float64(0.0)
278 |     assert avg_bias == np.float64(0.0)
279 |     assert avg_var == np.float64(0.0)
280 |     assert net_var == np.float64(0.0)
281 | 
282 |     assert avg_loss == avg_bias + net_var
283 | 
284 | 
285 | def test_bias_variance_0_1_loss_no_bias():
286 |     predictions = np.zeros((3, 5))
287 |     predictions[0] += 1
288 |     y_test = np.zeros(5)
289 | 
290 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
291 | 
292 |     assert avg_loss == np.float64(0.3333333333333333)
293 |     assert avg_bias == np.float64(0.0)
294 |     assert avg_var == np.float64(0.3333333333333333)
295 |     assert net_var == np.float64(0.3333333333333333)
296 | 
297 |     assert avg_loss == avg_bias + net_var
298 | 
299 | 
300 | def test_bias_variance_0_1_loss_var_diff():
301 |     predictions = np.zeros((3, 5))
302 |     predictions[0] += 1
303 |     predictions[1][0] += 1
304 |     y_test = np.zeros(5)
305 |     y_test[1] += 1
306 | 
307 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
308 | 
309 |     assert avg_loss == np.float64(0.4666666666666666)
310 |     assert avg_bias == np.float64(0.4)
311 |     assert avg_var == np.float64(0.3333333333333333)
312 |     assert net_var == np.float64(0.06666666666666668)
313 | 
314 |     assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
315 | 
316 | 
317 | def test_bias_variance_0_1_loss_div_by_0():
318 |     predictions = np.ones((3, 5))
319 |     y_test = np.zeros(5)
320 | 
321 |     avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
322 | 
323 |     assert avg_loss == np.float64(1.0)
324 |     assert avg_bias == np.float64(1.0)
325 |     assert avg_var == np.float64(0.0)
326 |     assert net_var == np.float64(0.0)
327 | 
328 |     assert avg_loss == avg_bias + net_var
329 | 


--------------------------------------------------------------------------------
/docs/bias_variance_user_guide.rst:
--------------------------------------------------------------------------------
  1 | ########################
  2 | Bias-Variance User Guide
  3 | ########################
  4 | 
  5 | **********
  6 | Motivation
  7 | **********
  8 | 
  9 | Statistical Bias vs. "Fairness"
 10 | ===============================
 11 | 
 12 | For this user guide and associated submodule, we are referring to
 13 | `statistical bias <https://en.wikipedia.org/wiki/Bias_(statistics)>`_ rather
 14 | than the "fairness" type of bias.
 15 | 
 16 | Why should we care about bias and variance?
 17 | ===========================================
 18 | 
 19 | Bias and variance are two indicators of model performance and together represent
 20 | two-thirds of model error (the remaining one-third is irreducible "noise" error that
 21 | comes from the data set itself). We can define bias and variance as follows
 22 | by training a model with multiple `bootstrap sampled
 23 | <https://en.wikipedia.org/wiki/Bootstrapping_(statistics)>`_ training sets, resulting in
 24 | multiple instances of the model.
 25 | 
 26 | .. topic:: Bias and variance defined over multiple training sets:
 27 | 
 28 |     * Bias represents the average difference between the prediction a model makes and the correct prediction.
 29 |     * Variance represents the average variability of the prediction a model makes.
 30 | 
 31 | Typically, a model with high bias is "underfit" and a model with high variance is
 32 | "overfit," but keep in mind this is not always the case and there can be many reasons
 33 | why a model has high bias or high variance. An "underfit" model is oversimplified and
 34 | performs poorly on the training data, whereas an "overfit" model sticks too closely to
 35 | the training data and performs poorly on unseen examples. See Scikit-Learn's
 36 | `Underfitting vs. Overfitting
 37 | <https://scikit-learn.org/stable/auto_examples/model_selection/plot_underfitting_overfitting.html>`_
 38 | for a clear example of an "underfit" model vs. an "overfit" model.
 39 | 
 40 | There is a concept
 41 | known as the `"bias-variance tradeoff"
 42 | <https://en.wikipedia.org/wiki/Bias%E2%80%93variance_tradeoff>`_ that describes
 43 | the relationship between high bias and high variance in a model. Our ultimate goal
 44 | here is to find the ideal balance where both bias and variance is at a minimum.
 45 | It is also important from a business problem standpoint on whether the model
 46 | error that we are unable to reduce should favor bias or variance.
 47 | 
 48 | *****************************************
 49 | Visualize Bias and Variance With Examples
 50 | *****************************************
 51 | 
 52 | In order to easily understand the concepts of bias and variance, we will show
 53 | four different examples of models for each of the high and low bias and variance
 54 | combinations. These are extreme and engineered cases for the purpose of clearly
 55 | seeing the bias/variance.
 56 | 
 57 | Before we begin, let's take a look at the distribution of the labels. Notice
 58 | that the majority of label values are around 1 and 2, and much less around 5.
 59 | 
 60 | .. figure:: images/bias_variance_label_distribution.png
 61 |     :align: center
 62 |     :alt: alternate text
 63 |     :figclass: align-center
 64 | 
 65 | First we have a model with high bias and low variance. We artificially
 66 | introduce bias to the model by adding 10 to every training label, but leaving
 67 | the test labels as is. Given that values of greater than 5 in the entire label
 68 | set are considered outliers, we are fitting the model against outliers.
 69 | 
 70 | .. figure:: images/high_bias_low_variance.png
 71 |     :align: center
 72 |     :alt: alternate text
 73 |     :figclass: align-center
 74 | 
 75 |     Five sets of mean squared error results from the test set from the five
 76 |     bootstrap sample trainings of the model. Notice the model error is very
 77 |     consistent among the trials and is not centered around 0.
 78 | 
 79 | Next we have a model with low bias and high variance. We simulate this by
 80 | introducing 8 random "noise" features to the data set. We also reduce the size
 81 | of the training set and train a neural network over a low number of epochs.
 82 | 
 83 | .. figure:: images/low_bias_high_variance.png
 84 |     :align: center
 85 |     :alt: alternate text
 86 |     :figclass: align-center
 87 | 
 88 |     Five sets of mean squared error results from the test set from the five
 89 |     bootstrap sample trainings of the model. Notice the model error has
 90 |     different distributions among the trials and centers mainly around 0.
 91 | 
 92 | Next we have a model with high bias and high variance. We simulate through
 93 | a combination of the techniques from the high bias low variance example and
 94 | the low bias high variance example and train another neural network.
 95 | 
 96 | .. figure:: images/high_bias_high_variance.png
 97 |     :align: center
 98 |     :alt: alternate text
 99 |     :figclass: align-center
100 | 
101 |     Five sets of mean squared error results from the test set from the five
102 |     bootstrap sample trainings of the model. Notice the model error has
103 |     different distributions among the trials and is not centered around 0.
104 | 
105 | Finally we have a model with low bias and low variance. This is a simple
106 | linear regression model with no modifications to the training or test labels.
107 | 
108 | .. figure:: images/low_bias_low_variance.png
109 |     :align: center
110 |     :alt: alternate text
111 |     :figclass: align-center
112 | 
113 |     Five sets of mean squared error results from the test set from the five
114 |     bootstrap sample trainings of the model. Notice the model error is very
115 |     consistent among the trials and centers mainly around 0.
116 | 
117 | ***************************
118 | Bias-Variance Decomposition
119 | ***************************
120 | 
121 | .. currentmodule:: mvtk.bias_variance
122 | 
123 | There are formulas for breaking down total model error into three parts: bias,
124 | variance, and noise. This can be applied to both regression problem loss
125 | functions (mean squared error) and classification problem loss functions
126 | (0-1 loss). In a paper by Pedro Domingos, a method of unified
127 | decomposition was proposed for both types of problems :cite:`domingos2000decomp`.
128 | 
129 | First lets define :math:`y` as a single prediction, :math:`D` as the set of
130 | training sets used to train the models, :math:`Y` as the set of predictions
131 | from the models trained on :math:`D`, and a loss function :math:`L` that
132 | calculates the error between our prediction :math:`y` and the correct
133 | prediction.
134 | The main prediction :math:`y_m` is the smallest average loss for a prediction
135 | when compared to the set of predictions :math:`Y`. The main prediction is
136 | the mean of :math:`Y` for mean squared error and the mode of :math:`Y` for
137 | 0-1 loss :cite:`domingos2000decomp`.
138 | 
139 | Bias can now be defined for a single example :math:`x` over the set of models
140 | trained on :math:`D` as the loss calculated between the main prediction
141 | :math:`y_m` and the correct prediction :math:`y_*` :cite:`domingos2000decomp`.
142 | 
143 | .. math::
144 |     B(x) = L(y_*,y_m)
145 | 
146 | Variance can now be defined for a single example :math:`x` over the set of
147 | models trained on :math:`D` as the average loss calculated between all predictions
148 | and the main prediction :math:`y_m` :cite:`domingos2000decomp`.
149 | 
150 | .. math::
151 |     V(x) = E_D[L(y_m, y)]
152 | 
153 | We will need to take the average of the bias over all examples as
154 | :math:`E_x[B(x)]` and the average of the variance over all examples as
155 | :math:`E_x[V(x)]` :cite:`domingos2000decomp`.
156 | 
157 | With :math:`N(x)` representing the irreducible error from observation noise, we
158 | can decompose the average expected loss as :cite:`domingos2000decomp`
159 | 
160 | .. math::
161 |     E_x[N(x)] + E_x[B(x)] + E_x[cV(x)]
162 | 
163 | In other words, the average loss over all examples is equal to the noise plus the
164 | average bias plus the net variance (the :math:`c` factor included with the variance
165 | when calculating average variance gives us the net variance).
166 | 
167 | .. note::
168 |     We are generalizing the actual value of :math:`N(x)`, as the Model Validation
169 |     Toolkit's implementation of bias-variance decomposition does not include noise
170 |     in the average expected loss. This noise represents error in the actual data
171 |     and not error related to the model itself. If you would like to dive deeper
172 |     into the noise representation, please consult the `Pedro Domingos paper
173 |     <https://homes.cs.washington.edu/~pedrod/papers/mlc00a.pdf>`_.
174 | 
175 | For mean squared loss functions, :math:`c = 1`, meaning that average variance
176 | is equal to net variance.
177 | 
178 | For zero-one loss functions, :math:`c = 1` when :math:`y_m = y_*` otherwise
179 | :math:`c = -P_D(y = y_* | y != y_m)`. :cite:`domingos2000decomp` In other words,
180 | :math:`c` is 1 when the main prediction is the correct prediction. If the main
181 | prediction is not the correct prediction, then :math:`c` is equal to the
182 | probability of a single prediction being the correct prediction given that the
183 | single prediction is not the main prediction.
184 | 
185 | Usage
186 | =====
187 | 
188 | :meth:`bias_variance_compute` will return the average loss, average bias, average
189 | variance, and net variance for an estimator trained and tested over a specified number
190 | of training sets. This was inspired and modeled after Sebastian Raschka's
191 | `bias_variance_decomp
192 | <https://github.com/rasbt/mlxtend/blob/master/mlxtend/evaluate/bias_variance_decomp.py>`_
193 | function :cite:`mlxtenddecomp`.
194 | We use the `bootstrapping <https://en.wikipedia.org/wiki/Bootstrapping_(statistics)>`_
195 | method to produce our sets of training data from the original training set. By default
196 | it will use mean squared error as the loss function, but it will accept the following
197 | functions for calculating loss.
198 | 
199 | * :meth:`bias_variance_mse` for mean squared error
200 | * :meth:`bias_variance_0_1_loss` for 0-1 loss
201 | 
202 | Since :meth:`bias_variance_compute` trains an estimator over multiple iterations, it also
203 | expects the estimator to be wrapped in a class that extends the
204 | :class:`estimators.EstimatorWrapper` class, which provides fit and predict methods
205 | that not all estimator implementations conform to. The following estimator wrappers are
206 | provided.
207 | 
208 | * :class:`estimators.PyTorchEstimatorWrapper` for `PyTorch <https://pytorch.org/>`_
209 | * :class:`estimators.SciKitLearnEstimatorWrapper` for `Scikit-Learn <https://scikit-learn.org/stable/>`_
210 | * :class:`estimators.TensorFlowEstimatorWrapper` for `TensorFlow <https://www.tensorflow.org/>`_
211 | 
212 | :meth:`bias_variance_compute` works well for smaller data sets and less complex models, but what
213 | happens when you have a very large set of data, a very complex model, or both?
214 | :meth:`bias_variance_compute_parallel` does the same calculation, but leverages `Ray
215 | <https://www.ray.io/>`_ for parallelization of bootstrapping, training, and predicting.
216 | This allows for faster calculations using computations over a distributed architecture.
217 | 
218 | .. topic:: Tutorials:
219 | 
220 |     * :doc:`Bias-Variance Visualization <notebooks/bias_variance/BiasVarianceVisualization>`
221 |     * :doc:`Bias-Variance Regression <notebooks/bias_variance/BiasVarianceRegression>`
222 |     * :doc:`Bias-Variance Classification <notebooks/bias_variance/BiasVarianceClassification>`
223 | 
224 | .. bibliography:: refs.bib
225 |     :cited:
226 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------