├── mvtk ├── version.py ├── bias_variance │ ├── __init__.py │ ├── estimators │ │ ├── __init__.py │ │ ├── estimator_wrapper.py │ │ ├── sklearn_estimator_wrapper.py │ │ ├── tensorflow_estimator_wrapper.py │ │ └── pytorch_estimator_wrapper.py │ ├── bias_variance_parallel.py │ └── bias_variance.py ├── supervisor │ ├── divergence │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── nn.py │ │ └── generators.py │ ├── __init__.py │ ├── processing.py │ └── utils.py ├── __init__.py ├── sobol.py ├── credibility.py ├── metrics.py └── thresholding.py ├── tests ├── package.py ├── test_sobol.py ├── credibility │ └── test_credibility.py ├── supervisor │ ├── test_divergence_utils.py │ ├── test_processing.py │ └── test_divergence.py ├── test_metrics.py └── bias_variance │ ├── estimators │ ├── test_sklearn_estimator_wrapper.py │ ├── test_tensorflow_estimator_wrapper.py │ └── test_pytorch_estimator_wrapper.py │ ├── test_bias_variance_parallel.py │ └── test_bias_variance.py ├── docs ├── images │ ├── interprenet.png │ ├── pdf_total_variation.png │ ├── low_bias_low_variance.png │ ├── high_bias_high_variance.png │ ├── high_bias_low_variance.png │ ├── low_bias_high_variance.png │ ├── thresholding_expected_utility.png │ ├── thresholding_negative_scores.png │ ├── thresholding_positive_scores.png │ ├── bias_variance_label_distribution.png │ ├── thresholding_exploration_proportion.png │ └── logo.svg ├── notebooks │ ├── thresholding │ │ └── threshold_distribution_evolution.gif │ ├── interprenet │ │ └── .ipynb_checkpoints │ │ │ └── Periodic-checkpoint.ipynb │ └── divergence │ │ └── CategoricalColumns.ipynb ├── sobol.rst ├── metrics.rst ├── credibility.rst ├── interprenet.rst ├── supervisor.rst ├── supervisor.utils.rst ├── thresholding.rst ├── bias_variance.bias_variance.rst ├── bias_variance.estimators.rst ├── supervisor.processing.rst ├── bias_variance.rst ├── supervisor.divergence.rst ├── bias_variance.bias_variance_parallel.rst ├── _templates │ └── layout.html ├── Makefile ├── authors.rst ├── make.bat ├── css │ └── custom.css ├── about.rst ├── index.rst ├── sobol_user_guide.rst ├── conf.py ├── quickstart.rst ├── interprenet_user_guide.rst ├── contributing.md ├── refs.bib ├── credibility_user_guide.rst ├── thresholding_user_guide.rst └── bias_variance_user_guide.rst ├── tox.ini ├── .pre-commit-config.yaml ├── DCO ├── setup.py ├── README.md ├── .circleci └── config.yml └── LICENSE /mvtk/version.py: -------------------------------------------------------------------------------- 1 | __version__ = "0.2.0" 2 | -------------------------------------------------------------------------------- /mvtk/bias_variance/__init__.py: -------------------------------------------------------------------------------- 1 | from .bias_variance import * 2 | from .bias_variance_parallel import * 3 | -------------------------------------------------------------------------------- /tests/package.py: -------------------------------------------------------------------------------- 1 | import mvtk 2 | 3 | 4 | def test_version(): 5 | assert isinstance(mvtk.__version__, str) 6 | -------------------------------------------------------------------------------- /docs/images/interprenet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/interprenet.png -------------------------------------------------------------------------------- /docs/images/pdf_total_variation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/pdf_total_variation.png -------------------------------------------------------------------------------- /docs/images/low_bias_low_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/low_bias_low_variance.png -------------------------------------------------------------------------------- /mvtk/supervisor/divergence/__init__.py: -------------------------------------------------------------------------------- 1 | from .generators import * 2 | from .nn import * 3 | from .utils import * 4 | from .metrics import * 5 | -------------------------------------------------------------------------------- /docs/images/high_bias_high_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/high_bias_high_variance.png -------------------------------------------------------------------------------- /docs/images/high_bias_low_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/high_bias_low_variance.png -------------------------------------------------------------------------------- /docs/images/low_bias_high_variance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/low_bias_high_variance.png -------------------------------------------------------------------------------- /docs/images/thresholding_expected_utility.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_expected_utility.png -------------------------------------------------------------------------------- /docs/images/thresholding_negative_scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_negative_scores.png -------------------------------------------------------------------------------- /docs/images/thresholding_positive_scores.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_positive_scores.png -------------------------------------------------------------------------------- /docs/images/bias_variance_label_distribution.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/bias_variance_label_distribution.png -------------------------------------------------------------------------------- /docs/images/thresholding_exploration_proportion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/images/thresholding_exploration_proportion.png -------------------------------------------------------------------------------- /mvtk/__init__.py: -------------------------------------------------------------------------------- 1 | from . import metrics 2 | from . import supervisor 3 | from . import credibility 4 | from . import interprenet 5 | from mvtk.version import __version__ as __version__ 6 | -------------------------------------------------------------------------------- /docs/notebooks/thresholding/threshold_distribution_evolution.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/HEAD/docs/notebooks/thresholding/threshold_distribution_evolution.gif -------------------------------------------------------------------------------- /docs/sobol.rst: -------------------------------------------------------------------------------- 1 | sobol 2 | ================== 3 | 4 | .. automodule:: mvtk.sobol 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /docs/metrics.rst: -------------------------------------------------------------------------------- 1 | metrics 2 | ================== 3 | 4 | .. automodule:: mvtk.metrics 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /docs/credibility.rst: -------------------------------------------------------------------------------- 1 | credibility 2 | ================== 3 | 4 | .. automodule:: mvtk.credibility 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /docs/interprenet.rst: -------------------------------------------------------------------------------- 1 | interprenet 2 | ================== 3 | 4 | .. automodule:: mvtk.interprenet 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /docs/supervisor.rst: -------------------------------------------------------------------------------- 1 | supervisor 2 | ================== 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | supervisor.divergence 10 | supervisor.processing 11 | supervisor.utils 12 | -------------------------------------------------------------------------------- /docs/supervisor.utils.rst: -------------------------------------------------------------------------------- 1 | utils 2 | ================================== 3 | 4 | .. automodule:: mvtk.supervisor.utils 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/thresholding.rst: -------------------------------------------------------------------------------- 1 | thresholding 2 | ================== 3 | 4 | .. automodule:: mvtk.thresholding 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /docs/bias_variance.bias_variance.rst: -------------------------------------------------------------------------------- 1 | bias_variance 2 | ============= 3 | 4 | .. automodule:: mvtk.bias_variance.bias_variance 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/bias_variance.estimators.rst: -------------------------------------------------------------------------------- 1 | estimators 2 | ========== 3 | 4 | .. automodule:: mvtk.bias_variance.estimators 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /docs/supervisor.processing.rst: -------------------------------------------------------------------------------- 1 | processing 2 | ================================== 3 | 4 | .. automodule:: mvtk.supervisor.processing 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /docs/bias_variance.rst: -------------------------------------------------------------------------------- 1 | bias_variance 2 | ============= 3 | 4 | Subpackages 5 | ----------- 6 | 7 | .. toctree:: 8 | 9 | bias_variance.estimators 10 | bias_variance.bias_variance 11 | bias_variance.bias_variance_parallel 12 | -------------------------------------------------------------------------------- /docs/supervisor.divergence.rst: -------------------------------------------------------------------------------- 1 | divergence 2 | ================================== 3 | 4 | .. automodule:: mvtk.supervisor.divergence 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :special-members: __init__, __call__ 9 | -------------------------------------------------------------------------------- /mvtk/supervisor/__init__.py: -------------------------------------------------------------------------------- 1 | from importlib import util 2 | 3 | if util.find_spec("pyspark") is not None: 4 | del util 5 | from .processing import * 6 | else: 7 | del util 8 | from .utils import * 9 | from .divergence import * 10 | -------------------------------------------------------------------------------- /docs/bias_variance.bias_variance_parallel.rst: -------------------------------------------------------------------------------- 1 | bias_variance_parallel 2 | ====================== 3 | 4 | .. automodule:: mvtk.bias_variance.bias_variance_parallel 5 | :members: 6 | :imported-members: 7 | :undoc-members: 8 | :show-inheritance: 9 | -------------------------------------------------------------------------------- /mvtk/bias_variance/estimators/__init__.py: -------------------------------------------------------------------------------- 1 | from .estimator_wrapper import EstimatorWrapper 2 | from .pytorch_estimator_wrapper import PyTorchEstimatorWrapper 3 | from .sklearn_estimator_wrapper import SciKitLearnEstimatorWrapper 4 | from .tensorflow_estimator_wrapper import TensorFlowEstimatorWrapper 5 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {% block extrahead %} 4 | {{ super() }} 5 | 6 | 13 | {% endblock %} 14 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | # content of: tox.ini , put in same dir as setup.py 2 | [tox] 3 | envlist = py38 4 | 5 | [testenv] 6 | # install pytest in the virtualenv where commands will be executed 7 | recreate = true 8 | deps = 9 | pytest 10 | pre-commit 11 | extras = 12 | doc 13 | commands = 14 | pre-commit run --all 15 | pytest tests 16 | sphinx-build -b linkcheck docs docs/linkcheck 17 | sphinx-build -b html docs docs/html 18 | -------------------------------------------------------------------------------- /tests/test_sobol.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | from mvtk import sobol 4 | 5 | 6 | def test_sobol(): 7 | nprng = numpy.random.RandomState(0) 8 | data = nprng.uniform(size=(1000000, 4)) 9 | coefficients = numpy.arange(1, 5) 10 | 11 | def model(x): 12 | return x.dot(coefficients) 13 | 14 | first_order, total = sobol.sobol(model, data) 15 | variance = model(data).std() ** 2 16 | V = coefficients**2 / 12 17 | assert numpy.allclose(first_order * variance, V, rtol=0.01) 18 | assert numpy.allclose(total.sum(), 1, rtol=0.01) 19 | assert numpy.allclose(total, first_order, rtol=0.01) 20 | -------------------------------------------------------------------------------- /mvtk/bias_variance/estimators/estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | class EstimatorWrapper: 2 | r"""This is a wrapper class that can be inherited to conform any estimator 3 | to the fit/predict interface""" 4 | 5 | def fit(self, X, y, **kwargs): 6 | r"""Train the estimator 7 | 8 | Args: 9 | X: features 10 | y: ground truth labels 11 | kwargs (optional): kwargs for use in training 12 | """ 13 | pass 14 | 15 | def predict(self, X, **kwargs): 16 | r"""Get predictions from the estimator 17 | 18 | Args: 19 | X: features 20 | kwargs (optional): kwargs for use in predicting 21 | """ 22 | pass 23 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # Install the pre-commit hooks below with 2 | # 'pre-commit install' 3 | 4 | # Auto-update the version of the hooks with 5 | # 'pre-commit autoupdate' 6 | 7 | # Run the hooks on all files with 8 | # 'pre-commit run --all' 9 | 10 | repos: 11 | 12 | - repo: https://github.com/psf/black 13 | rev: 23.11.0 14 | hooks: 15 | - id: black 16 | language_version: python3.8 17 | args: [--line-length=88, tests, mvtk] 18 | 19 | - repo: https://github.com/pycqa/flake8 20 | rev: 6.1.0 21 | hooks: 22 | - id: flake8 23 | args: [--max-line-length=88, '--per-file-ignores=__init__.py:F401,F403', tests, mvtk] 24 | - repo: https://github.com/pre-commit/mirrors-mypy 25 | rev: v1.7.1 26 | hooks: 27 | - id: mypy 28 | files: mvtk/ 29 | -------------------------------------------------------------------------------- /docs/authors.rst: -------------------------------------------------------------------------------- 1 | .. raw :: html 2 | 3 | 4 | 5 | 6 | 7 |
Team
Alex Eftimiades - Lead
Website    Linkedin
Matthew Gillett - Developer
Website    Linkedin
8 |
9 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/css/custom.css: -------------------------------------------------------------------------------- 1 | /* unvisited link */ 2 | .wy-side-nav-search a:link { 3 | color: #000000; 4 | } 5 | 6 | /* unvisited link */ 7 | .wy-nav-content a:link, .section a:link { 8 | color: #0070B7; 9 | } 10 | 11 | .highlight .c1 { 12 | color: #097B79; 13 | } 14 | 15 | .highlight .si { 16 | color: #D90E39; 17 | } 18 | 19 | .section .nbinput.docutils.container .prompt.highlight-none.notranslate pre { 20 | color: #0070B7; 21 | } 22 | 23 | .section .sig.sig-object.py { 24 | color: #000000; 25 | background-color: rgb(255, 255, 255); 26 | } 27 | 28 | .section .sig.sig-object.py .sig-paren { 29 | color: #0070B7; 30 | } 31 | 32 | .section .nboutput.docutils.container .prompt.highlight-none.notranslate pre { 33 | color: #D90E39; 34 | } 35 | 36 | .section .pre { 37 | color: #D90E39; 38 | } 39 | 40 | .section .admonition .admonition-title { 41 | background-color: #0070B7; 42 | } 43 | 44 | .section .brackets, .section .fn-backref { 45 | color: #0070B7; 46 | } 47 | 48 | /* text */ 49 | footer { 50 | color: #6B6B6B; 51 | } -------------------------------------------------------------------------------- /DCO: -------------------------------------------------------------------------------- 1 | Developer's Certificate of Origin (adapted from the linux kernel) 2 | 3 | By making a contribution to this project, I certify that: 4 | 5 | The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or 6 | The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or 7 | The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it. 8 | I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved. 9 | -------------------------------------------------------------------------------- /mvtk/bias_variance/estimators/sklearn_estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | from . import EstimatorWrapper 2 | 3 | 4 | class SciKitLearnEstimatorWrapper(EstimatorWrapper): 5 | def __init__(self, estimator): 6 | r"""Create a wrapper for a Scikit-Learn estimator 7 | 8 | Args: 9 | estimator: Scikit-Learn estimator instance 10 | 11 | Returns: 12 | self 13 | """ 14 | self.estimator = estimator 15 | 16 | def fit(self, X, y, **kwargs): 17 | r"""Train the estimator 18 | 19 | Args: 20 | X: features 21 | y: ground truth labels 22 | kwargs (optional): kwargs for use in training 23 | 24 | Returns: 25 | self 26 | """ 27 | self.estimator.fit(X, y, **kwargs) 28 | return self 29 | 30 | def predict(self, X, **kwargs): 31 | r"""Get predictions from the estimator 32 | 33 | Args: 34 | X: features 35 | kwargs (optional): kwargs for use in predicting 36 | 37 | Returns: 38 | self 39 | """ 40 | return self.estimator.predict(X, **kwargs) 41 | -------------------------------------------------------------------------------- /mvtk/supervisor/processing.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import public 4 | 5 | 6 | @public.add 7 | def replace_nulls(df, replace, column_names): 8 | return df.fillna({k: replace for k in column_names}) 9 | 10 | 11 | @public.add 12 | # Normalize timestamp column values. 13 | def normalize_ts_columns(df, column_names): 14 | for column_name in column_names: 15 | normalize_ts_column(df, column_name) 16 | return df 17 | 18 | 19 | # convert timestamp in HH:mm:ss to seconds - 20 | # pandas timedelta takes the time format and converts them to seconds. 21 | # divide by the result by the total number of seconds in a day. 22 | # this normalizes the timestamp to a number between 0 and 1. 23 | # round off the value to 5 decimal places. 24 | @public.add 25 | def normalize_ts_column(df, column_name): 26 | df[column_name] = pd.to_timedelta( 27 | df[column_name].dt.strftime("%H:%M:%S") 28 | ).dt.total_seconds() 29 | df[column_name] = df[column_name].replace(np.nan, -1) 30 | df[column_name] = df[column_name].apply( 31 | lambda x: round(x / 86400, 5) if x >= 0 else x 32 | ) 33 | 34 | return df 35 | -------------------------------------------------------------------------------- /docs/about.rst: -------------------------------------------------------------------------------- 1 | .. _about: 2 | 3 | About 4 | ======== 5 | 6 | History 7 | ------- 8 | 9 | This project was started by Alex Eftimiades in 2019 as part of an 10 | internal R&D effort focused on model monitoring and sensitivity 11 | analysis. With early usage, testing, and utility contributions from 12 | Dwight Gunning, Matthew Gillett, and Mona Annaparthi, this lead to the ``supervisor`` 13 | submodule and many of the initial ideas that became the 14 | ``thresholding``, ``sobol``, and ``credibility`` modules. Subsequent 15 | work on explainability lead to ``interprenet`` and the normalized 16 | mutual information score within ``metrics``. 17 | 18 | Authors 19 | ------- 20 | 21 | The following people are currently core contributors to Model Validation 22 | Toolkit's development and maintenance: 23 | 24 | .. include:: authors.rst 25 | 26 | Please see :doc:`contributing ` to join us! 27 | 28 | Acknowledgements 29 | ------- 30 | 31 | We thank David Devakumar, Mohamad Ibrahim, Jonathan Bryant, and Ahmed Ibrahim 32 | for their support, feedback, and help allocating resources to work on this 33 | project. We thank Nil Weerasinghe for his help organizing R&D efforts. 34 | -------------------------------------------------------------------------------- /tests/credibility/test_credibility.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import itertools 3 | import pandas 4 | 5 | from mvtk import credibility 6 | 7 | 8 | def test_value_error(): 9 | try: 10 | credibility.credible_interval(0, 0, prior=(0, 0)) 11 | except ValueError: 12 | return 13 | raise Exception("Expected ValueError") 14 | 15 | 16 | def test_equivalence(): 17 | assert credibility.credible_interval(0, 1) == credibility.credible_interval( 18 | 1, 2, prior=(0, 0) 19 | ) 20 | 21 | 22 | def test_prob_greater_cmp(): 23 | nprng = numpy.random.RandomState(0) 24 | prior_sample_size = 10**6 25 | for N in range(2, 8): 26 | for prior1, prior2 in itertools.product( 27 | itertools.product(range(1, 3), repeat=2), repeat=2 28 | ): 29 | df = pandas.DataFrame() 30 | p1 = nprng.beta(*prior1, size=prior_sample_size) 31 | df["positives1"] = nprng.binomial(N, p1) 32 | p2 = nprng.beta(*prior2, size=prior_sample_size) 33 | df["positives2"] = nprng.binomial(N, p2) 34 | df["target"] = p1 > p2 35 | for (p1, p2), subset in df.groupby(["positives1", "positives2"]): 36 | p = subset["target"].mean() 37 | q = credibility.prob_greater_cmp( 38 | p1, N - p1, p2, N - p2, prior1=prior1, prior2=prior2, err=10**-5 39 | ) 40 | assert abs(q - p) < 0.05 41 | -------------------------------------------------------------------------------- /tests/supervisor/test_divergence_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scipy 4 | 5 | from mvtk.supervisor.divergence.utils import arrayify 6 | 7 | 8 | def test_arrayify_dataframes(): 9 | df_a = pd.DataFrame({"a": list(range(4))}) 10 | assert ( 11 | df_a.shape == arrayify(df_a)[0].shape 12 | ), "Dataframe shape is same after arrayify" 13 | assert ( 14 | df_a.shape == arrayify([df_a])[0].shape 15 | ), "Dataframe shape is same after arrayify" 16 | assert isinstance(arrayify([df_a])[0], np.ndarray) 17 | assert isinstance(arrayify(df_a)[0], np.ndarray) 18 | 19 | 20 | def test_arrayify_numpy(): 21 | ones = np.ones((2, 4)) 22 | ones_lst = arrayify(ones) 23 | assert ( 24 | ones.shape == ones_lst[0].shape 25 | ), "Shape should be same after arrayify_as_array" 26 | ones_lst2 = arrayify([ones]) 27 | assert ( 28 | ones_lst[0].shape == ones_lst2[0].shape 29 | ), "Shape should be same after arrayify_as_array" 30 | ones_lst3 = arrayify([ones, ones]) 31 | assert ( 32 | ones_lst[0].shape == ones_lst3[0].shape 33 | ), "Shape should be same after arrayify_as_array" 34 | 35 | 36 | def test_arrayify_csr(): 37 | ones = scipy.sparse.csr_matrix(np.ones((2, 4))) 38 | ones_lst = arrayify(ones) 39 | assert ( 40 | ones.shape == ones_lst[0].shape 41 | ), "Shape should be same after arrayify_as_array" 42 | ones_lst2 = arrayify([ones]) 43 | assert ( 44 | ones_lst[0].shape == ones_lst2[0].shape 45 | ), "Shape should be same after arrayify_as_array" 46 | ones_lst3 = arrayify([ones, ones]) 47 | assert ( 48 | ones_lst[0].shape == ones_lst3[0].shape 49 | ), "Shape should be same after arrayify_as_array" 50 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | _dct = {} 4 | with open("mvtk/version.py") as f: 5 | exec(f.read(), _dct) 6 | __version__ = _dct["__version__"] 7 | 8 | extras_require = { 9 | "doc": [ 10 | "nbsphinx", 11 | "sphinx", 12 | "sphinx-rtd-theme", 13 | "sphinxcontrib-bibtex", 14 | "imageio", 15 | "myst-parser", 16 | "ipykernel", 17 | "torch", 18 | "tensorflow", 19 | ], 20 | "pytorch": ["torch"], 21 | "tensorflow": ["tensorflow"], 22 | } 23 | with open("README.md", "r", encoding="utf-8") as fh: 24 | long_description = fh.read() 25 | 26 | setup( 27 | name="mvtk", 28 | version=__version__, 29 | license="Apache-2.0", 30 | author="Alex Eftimiades", 31 | author_email="alexeftimiades@gmail.com", 32 | description="Model validation toolkit", 33 | long_description=long_description, 34 | long_description_content_type="text/markdown", 35 | packages=find_packages(), 36 | classifiers=[ 37 | "Programming Language :: Python :: 3", 38 | "License :: OSI Approved :: Apache Software License", 39 | "Operating System :: MacOS", 40 | "Operating System :: POSIX :: Linux", 41 | ], 42 | install_requires=[ 43 | "jax>=0.2.8,<=0.4.16", 44 | "public>=2020.12.3", 45 | "fastcore>=1.3.25", 46 | "jaxlib>=0.1.23,<=0.4.16", 47 | "scikit-learn", 48 | "numpy", 49 | "matplotlib", 50 | "scipy", 51 | "seaborn", 52 | "pandas>=0.23.4", 53 | "tqdm", 54 | "ray", 55 | ], 56 | extras_require=extras_require, 57 | url="https://finraos.github.io/model-validation-toolkit/", 58 | project_urls={ 59 | "Bug Tracker": "https://github.com/FINRAOS/model-validation-toolkit/issues", 60 | }, 61 | ) 62 | -------------------------------------------------------------------------------- /mvtk/sobol.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import public 3 | 4 | 5 | def choose(x, N, nprng=None): 6 | if nprng is None: 7 | nprng = numpy.random.RandomState(0) 8 | return x[nprng.choice(numpy.arange(len(x), dtype="int"), N)] 9 | 10 | 11 | @public.add 12 | def sobol(model, data, N=None, nprng=None): 13 | """Total and first order Sobol sensitivity indices. 14 | https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis. 15 | 16 | Args: 17 | model (function): Maps data to scores 18 | data (ndarray): Data matrix. Each row is a sample vector. 19 | N (int): sample size for monte carlo estimate of sobol 20 | indices. Should be less than or equal to the number of rows 21 | of data. If None, entire dataset is used. 22 | nprng (RandomState): Optional numpy RandomState. 23 | returns: 24 | Total and first order Sobol sensitivity indices. Each index 25 | is expressed as an array of length equal to the number of 26 | features in the supplied data matrix. 27 | """ 28 | if nprng is None: 29 | nprng = numpy.random.RandomState(0) 30 | if N is None: 31 | A = data.copy() 32 | B = data.copy() 33 | nprng.shuffle(A) 34 | nprng.shuffle(B) 35 | N = len(data) 36 | elif N > len(data): 37 | raise ValueError("Sample size must be less than or equal to size of dataset") 38 | else: 39 | A, B = (choose(data, N, nprng) for _ in range(2)) 40 | d = data.shape[1] 41 | total = [] 42 | first_order = [] 43 | for i in range(d): 44 | C = A[:, i].copy() 45 | A[:, i] = B[:, i] 46 | diff = model(A) 47 | A[:, i] = C 48 | diff -= model(A) 49 | first_order.append(model(B).dot(diff) / N) 50 | total.append(diff.dot(diff) / (2 * N)) 51 | variance_y = model(numpy.vstack((A, B))).std() ** 2 52 | total = numpy.asarray(total) / variance_y 53 | first_order = numpy.asarray(first_order) / variance_y 54 | return total, first_order 55 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | drawing 2 | 3 | [![CircleCI](https://circleci.com/gh/FINRAOS/model-validation-toolkit/tree/main.svg?style=svg)](https://circleci.com/gh/FINRAOS/model-validation-toolkit/tree/main)[![Join the chat at https://gitter.im/FINRAOS/model-validation-toolkit](https://badges.gitter.im/FINRAOS/model-validation-toolkit.svg)](https://gitter.im/FINRAOS/model-validation-toolkit?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)[![PyPI version](https://img.shields.io/pypi/v/mvtk)](https://pypi.org/project/mvtk/) 4 | 5 | # Model Validation Tookit 6 | 7 | ## Installation 8 | 9 | Run `pip install mvtk`. 10 | 11 | **Windows users**: Until [Jaxlib is supported on windows 12 | natively](https://github.com/google/jax/issues/438) you will need to either use 13 | this library from a Linux subsystem or within a Docker container. 14 | Alternatively, you can [build jaxlib from 15 | source](https://jax.readthedocs.io/en/latest/developer.html#additional-notes-for-building-jaxlib-from-source-on-windows). 16 | 17 | ## Developers 18 | 19 | Check out this repository and `cd` into the directory. 20 | 21 | Run `pip install -e ".[doc]"`. 22 | 23 | The `[doc]` is used to install dependencies for building documentation. You 24 | will need [pandoc](https://pandoc.org/) installed. 25 | 26 | # Submodules 27 | You can import: 28 | 29 | - `mvtk.credibility` for assessing credibility from sample size. 30 | - `mvtk.interprenet` for building interpretable neural nets. 31 | - `mvtk.thresholding` for adaptive thresholding. 32 | - `mvtk.sobol` for Sobol sensitivity analysis 33 | - `mvtk.supervisor` for divergence analysis 34 | - `mvtk.metrics` for specialised metrics 35 | - `mvtk.bias_variance` for bias-variance decomposition 36 | 37 | # Documentation 38 | You can run `make -C docs html` on a Mac or `make.bat -C docs html` on a PC to just rebuild the docs. In this case, point your browser to ```docs/_build/html/index.html``` to view the homepage. If your browser was already pointing to documentation that you changed, you can refresh the page to see the changes. 39 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | Model Validation Toolkit 2 | =================================== 3 | 4 | The Model Validation Toolkit is a library for model validation, metaanalysis, and monitoring. 5 | 6 | .. toctree:: 7 | :glob: 8 | :maxdepth: 1 9 | :caption: Notes 10 | 11 | .. toctree:: 12 | :maxdepth: 1 13 | :caption: Overview 14 | 15 | quickstart 16 | contributing 17 | about 18 | 19 | .. toctree:: 20 | :maxdepth: 1 21 | :caption: User Guides 22 | 23 | supervisor_user_guide 24 | credibility_user_guide 25 | thresholding_user_guide 26 | interprenet_user_guide 27 | sobol_user_guide 28 | bias_variance_user_guide 29 | 30 | .. toctree:: 31 | :maxdepth: 1 32 | :caption: Divergence Tutorials 33 | 34 | notebooks/divergence/Airlines 35 | notebooks/divergence/DivergenceFunctions 36 | notebooks/divergence/CategoricalColumns 37 | notebooks/divergence/BugDetection 38 | notebooks/divergence/TrainingDatasetDrift 39 | 40 | .. toctree:: 41 | :maxdepth: 1 42 | :caption: Credibility Tutorials 43 | 44 | notebooks/credibility/Credibility 45 | 46 | .. toctree:: 47 | :maxdepth: 1 48 | :caption: Thresholding Tutorials 49 | 50 | notebooks/thresholding/Thresholding 51 | 52 | .. toctree:: 53 | :maxdepth: 1 54 | :caption: Interprenet Tutorials 55 | 56 | notebooks/interprenet/Interprenet 57 | 58 | .. toctree:: 59 | :maxdepth: 1 60 | :caption: Bias and Metrics Tutorials 61 | 62 | notebooks/metrics/CounteringSampleBias 63 | 64 | .. toctree:: 65 | :maxdepth: 1 66 | :caption: Bias-Variance Decomposition Tutorials 67 | 68 | notebooks/bias_variance/BiasVarianceClassification 69 | notebooks/bias_variance/BiasVarianceRegression 70 | notebooks/bias_variance/BiasVarianceVisualization 71 | 72 | .. toctree:: 73 | :maxdepth: 1 74 | :caption: Python API 75 | 76 | supervisor 77 | credibility 78 | thresholding 79 | interprenet 80 | sobol 81 | metrics 82 | bias_variance 83 | 84 | Indices and tables 85 | ================== 86 | 87 | * :ref:`genindex` 88 | * :ref:`modindex` 89 | -------------------------------------------------------------------------------- /mvtk/supervisor/divergence/utils.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy 3 | import scipy 4 | import public 5 | 6 | from mvtk.supervisor.utils import parallel 7 | 8 | 9 | @public.add 10 | def get_drift_series(metric, baseline, test): 11 | return numpy.asarray(parallel(lambda x: metric(x, baseline), test)) 12 | 13 | 14 | @public.add 15 | def get_distance_matrix(metric, sample_distributions, show_progress=False): 16 | distance_matrix = numpy.zeros((len(sample_distributions),) * 2) 17 | for index, d in parallel( 18 | lambda x: (x[0], metric(x[1][0], x[1][1])), 19 | [ 20 | list(zip(*x)) 21 | for x in itertools.combinations(enumerate(sample_distributions), 2) 22 | ], 23 | show_progress=show_progress, 24 | ): 25 | distance_matrix[index] = d 26 | distance_matrix += distance_matrix.T 27 | return distance_matrix 28 | 29 | 30 | @public.add 31 | def sparse_wrapper(v): 32 | class _SparseWrapper(type(v)): 33 | def __getitem__(self, i): 34 | ret = super().__getitem__(i) 35 | if isinstance(i, int): 36 | return ret.toarray()[0] 37 | return ret 38 | 39 | def __len__(self): 40 | return self.shape[0] 41 | 42 | return _SparseWrapper(v) 43 | 44 | 45 | def to_array_like(v): 46 | if hasattr(v, "values"): 47 | return v.values 48 | if isinstance(v, scipy.sparse.spmatrix): 49 | return sparse_wrapper(v) 50 | return v 51 | 52 | 53 | @public.add 54 | def arrayify(item): 55 | """Convert the value to at least dim 3. If is dataframe it converts it to a 56 | list of values. 57 | 58 | :param item: ndarray or a list of ndarray, or a dataframe, a series or a 59 | list of dataframes or series 60 | :return: a list of dataframes/series or array of dim 3 61 | """ 62 | if hasattr(item, "shape"): 63 | ret = to_array_like(item) 64 | if len(ret.shape) == 2: 65 | return [ret] 66 | if len(ret.shape) == 1: 67 | return numpy.atleast_3d(ret) 68 | return list(map(to_array_like, item)) 69 | -------------------------------------------------------------------------------- /mvtk/bias_variance/estimators/tensorflow_estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | from . import EstimatorWrapper 2 | 3 | 4 | class TensorFlowEstimatorWrapper(EstimatorWrapper): 5 | def __init__(self, estimator): 6 | r"""Create a wrapper for a TensorFlow estimator 7 | 8 | Args: 9 | estimator: TensorFlow estimator instance 10 | 11 | Returns: 12 | self 13 | """ 14 | self.estimator = estimator 15 | 16 | def fit(self, X, y, **kwargs): 17 | r"""Train the estimator 18 | 19 | Args: 20 | X: features 21 | y: ground truth labels 22 | kwargs (optional): kwargs for use in training 23 | 24 | Returns: 25 | self 26 | """ 27 | self._reset_weights() 28 | self.estimator.fit(X, y, **kwargs) 29 | return self 30 | 31 | def predict(self, X, **kwargs): 32 | r"""Get predictions from the estimator 33 | 34 | Args: 35 | X: features 36 | kwargs (optional): kwargs for use in predicting 37 | 38 | Returns: 39 | self 40 | """ 41 | predictions = self.estimator.predict(X, **kwargs) 42 | prediction_list = [] 43 | for prediction in predictions: 44 | if len(prediction) > 1: 45 | prediction_list.append(prediction.argmax().item()) 46 | else: 47 | prediction_list.append(prediction.item()) 48 | return prediction_list 49 | 50 | def _reset_weights(self): 51 | r"""Reset weights of the estimator""" 52 | import tensorflow as tf 53 | 54 | for layer in self.estimator.layers: 55 | if hasattr(layer, "kernel_initializer") and hasattr(layer, "kernel"): 56 | layer.kernel.assign(layer.kernel_initializer(tf.shape(layer.kernel))) 57 | if hasattr(layer, "bias_initializer") and hasattr(layer, "bias"): 58 | layer.bias.assign(layer.bias_initializer(tf.shape(layer.bias))) 59 | if hasattr(layer, "recurrent_initializer") and hasattr( 60 | layer, "recurrent_kernal" 61 | ): 62 | layer.recurrent_kernal.assign( 63 | layer.recurrent_initializer(tf.shape(layer.recurrent_kernal)) 64 | ) 65 | -------------------------------------------------------------------------------- /tests/test_metrics.py: -------------------------------------------------------------------------------- 1 | import itertools 2 | import numpy 3 | 4 | from mvtk import metrics 5 | 6 | 7 | def test_rank_auc(): 8 | nprng = numpy.random.RandomState(0) 9 | S = 32 10 | y_true, y_pred = nprng.randint(0, 5, S), nprng.uniform(size=S).round(1) 11 | N = 0 12 | auc = 0 13 | for (true1, pred1), (true2, pred2) in itertools.product( 14 | zip(y_true, y_pred), repeat=2 15 | ): 16 | if true1 > true2: 17 | if pred1 == pred2: 18 | auc += 0.5 19 | else: 20 | auc += pred1 > pred2 21 | N += 1 22 | auc /= N 23 | assert metrics.rank_auc(y_true, y_pred) == auc 24 | 25 | 26 | def test_monotonicity(): 27 | nprng = numpy.random.RandomState(0) 28 | S = 32 29 | y_true, y_pred = nprng.randint(0, 5, S), nprng.uniform(size=S).round(1) 30 | N = 0 31 | auc = 0 32 | for (true1, pred1), (true2, pred2) in itertools.product( 33 | zip(y_true, y_pred), repeat=2 34 | ): 35 | if true1 - true2 == 1: 36 | if pred1 == pred2: 37 | auc += 0.5 38 | else: 39 | auc += pred1 > pred2 40 | N += 1 41 | auc /= N 42 | assert metrics.monotonicity(y_true, y_pred) == auc 43 | 44 | 45 | def weighted_roc_auc(y_test, y_pred, weights): 46 | def process(stuff): 47 | (pos, w_p), (neg, w_n) = stuff 48 | p = w_p * w_n 49 | return p * (0.5 if pos == neg else pos > neg), p 50 | 51 | mask = y_test == 1 52 | positives, w_pos = y_pred[mask], weights[mask] 53 | negatives, w_neg = y_pred[~mask], weights[~mask] 54 | numerator, denominator = map( 55 | sum, 56 | zip( 57 | *map( 58 | process, itertools.product(zip(positives, w_pos), zip(negatives, w_neg)) 59 | ) 60 | ), 61 | ) 62 | 63 | return numerator / denominator 64 | 65 | 66 | def test_weighted_roc_auc(): 67 | nprng = numpy.random.RandomState(0) 68 | S = 32 69 | y_true, y_pred, weights = ( 70 | nprng.randint(0, 2, S), 71 | nprng.uniform(size=S).round(1), 72 | nprng.uniform(size=S), 73 | ) 74 | assert ( 75 | abs( 76 | weighted_roc_auc(y_true, y_pred, weights) 77 | - metrics.rank_auc(y_true, y_pred, weights) 78 | ) 79 | < 2**-32 80 | ) 81 | -------------------------------------------------------------------------------- /tests/bias_variance/estimators/test_sklearn_estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.linear_model import LinearRegression 3 | from sklearn.tree import DecisionTreeClassifier 4 | 5 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper 6 | 7 | 8 | def create_data(): 9 | X_train = np.arange(12).reshape(6, 2) 10 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None) 11 | X_test = np.arange(6).reshape(3, 2) 12 | y_test = np.array([0, 1, 1]) 13 | 14 | return X_train, y_train, X_test, y_test 15 | 16 | 17 | def test_sklearn_estimator_wrapper(): 18 | X_train, y_train, X_test, y_test = create_data() 19 | 20 | model = LinearRegression() 21 | 22 | model.fit(X_train, y_train) 23 | pred = model.predict(X_test) 24 | 25 | model_test = LinearRegression() 26 | model_wrapped = SciKitLearnEstimatorWrapper(model_test) 27 | 28 | model_wrapped.fit(X_train, y_train) 29 | pred_wrapped = model_wrapped.predict(X_test) 30 | 31 | assert np.array_equal(pred, pred_wrapped) 32 | 33 | 34 | def test_sklearn_estimator_wrapper_kwargs_fit(): 35 | X_train, y_train, X_test, y_test = create_data() 36 | 37 | model = DecisionTreeClassifier(random_state=123) 38 | 39 | model.fit(X_train, y_train, sample_weight=[0, 0, 1, 0, 1, 0]) 40 | pred = model.predict(X_test) 41 | 42 | model_test = DecisionTreeClassifier(random_state=123) 43 | model_wrapped = SciKitLearnEstimatorWrapper(model_test) 44 | 45 | model_wrapped.fit(X_train, y_train, sample_weight=[0, 0, 1, 0, 1, 0]) 46 | pred_wrapped = model_wrapped.predict(X_test) 47 | 48 | assert np.array_equal(pred, pred_wrapped) 49 | 50 | 51 | def test_sklearn_estimator_wrapper_kwargs_predict(): 52 | X_train, y_train, X_test, y_test = create_data() 53 | 54 | model = DecisionTreeClassifier(random_state=123) 55 | 56 | model.fit(X_train, y_train) 57 | try: 58 | model.predict(X_test, check_input=False) 59 | except ValueError as e: 60 | assert e.args[0] == "X.dtype should be np.float32, got int64" 61 | return 62 | 63 | model_test = DecisionTreeClassifier(random_state=123) 64 | model_wrapped = SciKitLearnEstimatorWrapper(model_test) 65 | 66 | model_wrapped.fit(X_train, y_train) 67 | try: 68 | model_wrapped.predict(X_test, check_input=False) 69 | except ValueError as e: 70 | assert e.args[0] == "X.dtype should be np.float32, got int64" 71 | return 72 | 73 | assert False 74 | -------------------------------------------------------------------------------- /docs/sobol_user_guide.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | Sobol User Guide 3 | ########### 4 | 5 | ********** 6 | Motivation 7 | ********** 8 | 9 | `Sensitivity analysis `_ is 10 | concerned with the degree to which uncertainty in the output of a model can be 11 | attributed to uncertainty in its inputs :cite:`saltelli2008global`. Variance 12 | based sensitivity analysis, commonly known as `sobol sensitivity analysis 13 | `_ seeks to 14 | answer this question by attributing the variance of the output to variances in 15 | one or more inputs. This breakdown is known as a sobol indices and are typically measured 16 | in one of two ways: *first-order* indices and *total-effect* indices. 17 | :cite:`sobol2001global`. 18 | 19 | The first-order sobol index with respect to some feature is given by averaging 20 | the output of the model over all other values of all other features and 21 | computing the variance of the result while varying the feature in question. 22 | This is normalized by dividing by the total variance of the output measured by 23 | varying all feature values :cite:`im1993sensitivity`. Their sum is between 0 and 1. The total-effect index is computed by first computing the variance of the 24 | model output with respect to the feature in question, and then computing the 25 | expectation of the result over values of all other 26 | features. This is again normalized by the variance 27 | of the output of the model across all features. 28 | These will sum to a number greater than 29 | or equal to 1. Both are discussed in more detail 30 | here 31 | `https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis 32 | `_. 33 | 34 | .. currentmodule:: sobol 35 | 36 | :meth:`sobol` takes a model and dataset, and runs a 37 | monte carlo simulation as described in the above 38 | link to compute the first and total order sobol 39 | indices. Each index is expressed as a one 40 | dimensional array of length equal to the number of 41 | features in the supplied data matrix. The model is 42 | assumed to be a function that outputs one scalar 43 | for each row of the data matrix. 44 | 45 | .. code-block:: python 46 | 47 | import numpy 48 | from mvtk import sobol 49 | 50 | nprng = numpy.random.RandomState(0) 51 | 52 | data = nprng.normal(size=(1000, 4)) # 4 features 53 | model = lambda x: (x ** 2).dot([1, 2, 3, 4]) 54 | total, first_order = sobol.sobol(model, data, N=500) 55 | 56 | .. bibliography:: refs.bib 57 | :cited: 58 | -------------------------------------------------------------------------------- /docs/images/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | 16 | sys.path.insert(0, os.path.abspath("../")) 17 | 18 | 19 | # -- Project information ----------------------------------------------------- 20 | 21 | project = "Model Validation Toolkit" 22 | copyright = "2021, Model Validation Toolkit Team" 23 | author = "Model Validation Toolkit Team" 24 | 25 | # The full version, including alpha/beta/rc tags 26 | release = "0.2.0" 27 | 28 | 29 | # -- General configuration --------------------------------------------------- 30 | 31 | # Add any Sphinx extension module names here, as strings. They can be 32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 33 | # ones. 34 | extensions = [ 35 | "sphinx.ext.autodoc", 36 | "sphinx.ext.autosummary", 37 | "sphinxcontrib.bibtex", 38 | "sphinx.ext.intersphinx", 39 | "sphinx.ext.mathjax", 40 | "sphinx.ext.napoleon", 41 | "sphinx.ext.viewcode", 42 | "nbsphinx", 43 | "myst_parser", 44 | ] 45 | 46 | # Add any paths that contain templates here, relative to this directory. 47 | templates_path = ["_templates"] 48 | 49 | # List of patterns, relative to source directory, that match files and 50 | # directories to ignore when looking for source files. 51 | # This pattern also affects html_static_path and html_extra_path. 52 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] 53 | 54 | 55 | # -- Options for HTML output ------------------------------------------------- 56 | 57 | # The theme to use for HTML and HTML Help pages. See the documentation for 58 | # a list of builtin themes. 59 | # 60 | html_theme = "sphinx_rtd_theme" 61 | 62 | # Add any paths that contain custom static files (such as style sheets) here, 63 | # relative to this directory. They are copied after the builtin static files, 64 | # so a file named "default.css" will overwrite the builtin "default.css". 65 | html_static_path = ["css", "images"] 66 | html_css_files = ["custom.css"] 67 | html_logo = "images/logo.svg" 68 | html_theme_options = { 69 | "display_version": False, 70 | } 71 | html_favicon = html_logo 72 | 73 | # A fix for Sphinx error contents.rst not found 74 | master_doc = "index" 75 | 76 | # increase the timeout for long-running notebooks 77 | nbsphinx_timeout = 900 78 | 79 | # Don't show full paths 80 | add_module_names = False 81 | 82 | # bibtex 83 | bibtex_bibfiles = ["refs.bib"] 84 | 85 | source_suffix = { 86 | ".rst": "restructuredtext", 87 | ".txt": "markdown", 88 | ".md": "markdown", 89 | } 90 | 91 | user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0" 92 | -------------------------------------------------------------------------------- /tests/bias_variance/estimators/test_tensorflow_estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | 4 | from mvtk.bias_variance.estimators import TensorFlowEstimatorWrapper 5 | 6 | 7 | def create_data(): 8 | X_train = np.arange(12).reshape(6, 2) 9 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None) 10 | X_test = np.arange(6).reshape(3, 2) 11 | y_test = np.array([0, 1, 1]) 12 | 13 | return X_train, y_train, X_test, y_test 14 | 15 | 16 | def create_model(): 17 | model = tf.keras.Sequential( 18 | [ 19 | tf.keras.layers.Dense(64, activation="relu"), 20 | tf.keras.layers.Dense(64, activation="relu"), 21 | tf.keras.layers.Dense(1), 22 | ] 23 | ) 24 | 25 | model.compile( 26 | optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), 27 | loss="mean_absolute_error", 28 | metrics=["mean_squared_error"], 29 | ) 30 | 31 | return model 32 | 33 | 34 | def predict(estimator, X, **kwargs): 35 | predictions = estimator.predict(X, **kwargs) 36 | prediction_list = [] 37 | for prediction in predictions: 38 | if len(prediction) > 1: 39 | prediction_list.append(prediction.argmax().item()) 40 | else: 41 | prediction_list.append(prediction.item()) 42 | return prediction_list 43 | 44 | 45 | def test_tensorflow_estimator_wrapper(): 46 | X_train, y_train, X_test, y_test = create_data() 47 | 48 | tf.keras.utils.set_random_seed(123) 49 | model = create_model() 50 | 51 | model.fit(X_train, y_train) 52 | pred = predict(model, X_test) 53 | 54 | tf.keras.utils.set_random_seed(123) 55 | model_test = create_model() 56 | model_wrapped = TensorFlowEstimatorWrapper(model_test) 57 | 58 | model_wrapped.fit(X_train, y_train) 59 | pred_wrapped = model_wrapped.predict(X_test) 60 | 61 | assert np.array_equal(pred, pred_wrapped) 62 | 63 | 64 | def test_tensorflow_estimator_wrapper_kwargs_fit(): 65 | X_train, y_train, X_test, y_test = create_data() 66 | 67 | tf.keras.utils.set_random_seed(123) 68 | model = create_model() 69 | 70 | model.fit(X_train, y_train, epochs=10) 71 | pred = predict(model, X_test) 72 | 73 | tf.keras.utils.set_random_seed(123) 74 | model_test = create_model() 75 | model_wrapped = TensorFlowEstimatorWrapper(model_test) 76 | 77 | model_wrapped.fit(X_train, y_train, epochs=10) 78 | pred_wrapped = model_wrapped.predict(X_test) 79 | 80 | assert np.array_equal(pred, pred_wrapped) 81 | 82 | 83 | def test_tensorflow_estimator_wrapper_kwargs_predict(): 84 | X_train, y_train, X_test, y_test = create_data() 85 | 86 | tf.keras.utils.set_random_seed(123) 87 | model = create_model() 88 | 89 | model.fit(X_train, y_train) 90 | pred = predict(model, X_test, steps=10) 91 | 92 | tf.keras.utils.set_random_seed(123) 93 | model_test = create_model() 94 | model_wrapped = TensorFlowEstimatorWrapper(model_test) 95 | 96 | model_wrapped.fit(X_train, y_train) 97 | pred_wrapped = model_wrapped.predict(X_test, steps=10) 98 | 99 | assert np.array_equal(pred, pred_wrapped) 100 | -------------------------------------------------------------------------------- /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | version: 2.1 2 | 3 | jobs: 4 | test: 5 | docker: 6 | - image: cimg/python:3.8 7 | steps: 8 | - checkout 9 | - run: sudo apt-get update 10 | - run: sudo apt-get install pandoc 11 | - run: python -m pip install tox 12 | - run: python -m tox 13 | - run: ls -la docs 14 | - persist_to_workspace: 15 | root: docs 16 | paths: html 17 | docs-deploy: 18 | docker: 19 | - image: cimg/python:3.8 20 | steps: 21 | - run: 22 | name: add known_hosts 23 | command: | 24 | mkdir ~/.ssh 25 | printf "%s" 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ== 26 | github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg= 27 | github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl 28 | bitbucket.org ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAubiN81eDcafrgMeLzaFPsw2kNvEcqTKl/VqLat/MaB33pZy0y3rJZtnqwR2qOOvbwKZYKiEO1O6VqNEBxKvJJelCq0dTXWT5pbO2gDXC6h6QDXCaHo6pOHGPUy+YBaGQRGuSusMEASYiWunYN0vCAI8QaXnWMXNMdFP3jHAJH0eDsoiGnLPBlBp4TNm6rYI74nMzgz3B9IikW4WVK+dc8KZJZWYjAuORU3jc1c/NPskD2ASinf8v3xnfXeukU0sJ5N6m5E8VLjObPEO+mN2t/FZTMZLiFqPWc/ALSqnMnnhwrNi2rbfg/rd/IpL8Le3pSBne8+seeFVBoGqzHM9yXw== 29 | gitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY= 30 | ' > ~/.ssh/known_hosts 31 | chmod 0600 ~/.ssh/known_hosts 32 | - run: 33 | name: Checkout main 34 | command: | 35 | git clone $CIRCLE_REPOSITORY_URL --depth 1 -b main main 36 | cd main 37 | git config --global user.email "$(git log --format=%ae -n 1)" 38 | git config --global user.name "$(git log --format=%an -n 1)" 39 | echo "export msg="\"$(git log --format=%B -n 1)\" >> $BASH_ENV 40 | - run: 41 | name: Checkout website 42 | command: | 43 | git clone $CIRCLE_REPOSITORY_URL --depth 1 -b website website 44 | rm -rf website/docs/html 45 | - attach_workspace: 46 | at: website/docs 47 | - run: 48 | name: Copy CircleCI config 49 | command: | 50 | mkdir -p website/.circleci 51 | cp main/.circleci/config.yml website/.circleci/config.yml 52 | - add_ssh_keys: 53 | fingerprints: 54 | - "dd:11:5d:b8:a7:d2:be:16:47:4e:a0:66:00:96:b4:f7" 55 | - run: 56 | name: Deploy docs to website branch 57 | command: | 58 | cd website 59 | git add .circleci/config.yml 60 | git add -A -- docs/html 61 | git commit -am "$msg" 62 | git push origin website 63 | workflows: 64 | version: 2 65 | build: 66 | jobs: 67 | - test: 68 | filters: 69 | branches: 70 | ignore: website 71 | - docs-deploy: 72 | requires: 73 | - test 74 | filters: 75 | branches: 76 | only: main 77 | -------------------------------------------------------------------------------- /tests/supervisor/test_processing.py: -------------------------------------------------------------------------------- 1 | import copy 2 | 3 | import pandas as pd 4 | import pandas.testing 5 | 6 | from mvtk.supervisor.processing import ( 7 | replace_nulls, 8 | normalize_ts_columns, 9 | ) 10 | 11 | 12 | def test_replace_nulls(): 13 | for col_list in [["col1"], ["col2"], ["col1", "col2"]]: 14 | init_rows = [ 15 | {"col1": "test1_1", "col2": "test1_2"}, 16 | {"col1": None, "col2": "test2_2"}, 17 | {"col1": "test3_1", "col2": None}, 18 | {"col1": None, "col2": None}, 19 | ] 20 | 21 | expect_rows = copy.deepcopy(init_rows) 22 | 23 | for i in range(0, len(expect_rows)): 24 | for col in col_list: 25 | if expect_rows[i][col] is None: 26 | expect_rows[i][col] = "1" 27 | 28 | init_df = pd.DataFrame(init_rows) 29 | expect_df = pd.DataFrame(expect_rows) 30 | 31 | actual = replace_nulls(init_df, "1", col_list) 32 | expect = expect_df 33 | 34 | pandas.testing.assert_frame_equal(actual, expect) 35 | 36 | 37 | def time_to_seconds(time): 38 | return int(time[:2]) * 3600 + int(time[2:4]) * 60 + int(time[4:6]) 39 | 40 | 41 | def test_process_ts_columns(): 42 | format_map = {"col2": "%H:%M:%S.%f", "col3": "%H%M%S.%f", "col4": "%H%M%S"} 43 | 44 | for col_list in [ 45 | ["col2"], 46 | ["col3"], 47 | ["col4"], 48 | ["col2", "col3"], 49 | ["col2", "col4"], 50 | ["col3", "col4"], 51 | ["col2", "col3", "col4"], 52 | ]: 53 | init_rows = [ 54 | { 55 | "col1": "test1", 56 | "col2": "10:11:12.123456", 57 | "col3": "101112.123456", 58 | "col4": "101112", 59 | }, 60 | { 61 | "col1": "test2", 62 | "col2": None, 63 | "col3": "202123.123456", 64 | "col4": "202124", 65 | }, 66 | { 67 | "col1": "test3", 68 | "col2": "10:31:32.123456", 69 | "col3": None, 70 | "col4": "103134", 71 | }, 72 | { 73 | "col1": "test4", 74 | "col2": "20:41:42.123456", 75 | "col3": "204143.123456", 76 | "col4": None, 77 | }, 78 | ] 79 | 80 | expect_rows = copy.deepcopy(init_rows) 81 | 82 | for i in range(0, len(expect_rows)): 83 | for col in col_list: 84 | if expect_rows[i][col] is None: 85 | expect_rows[i][col] = -1 86 | else: 87 | expect_rows[i][col] = str( 88 | round( 89 | time_to_seconds(expect_rows[i][col].replace(":", "")) 90 | / 86400, 91 | 5, 92 | ) 93 | ) 94 | 95 | init_df = pd.DataFrame(init_rows) 96 | expect = pd.DataFrame(expect_rows) 97 | 98 | for col in ["col2", "col3", "col4"]: 99 | init_df[col] = pd.to_datetime(init_df[col], format=format_map[col]) 100 | if col not in col_list: 101 | expect[col] = pd.to_datetime(expect[col], format=format_map[col]) 102 | else: 103 | expect[col] = expect[col].astype(float) 104 | 105 | actual = normalize_ts_columns(init_df, col_list) 106 | 107 | pandas.testing.assert_frame_equal(actual, expect) 108 | -------------------------------------------------------------------------------- /mvtk/bias_variance/estimators/pytorch_estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | from . import EstimatorWrapper 2 | 3 | 4 | class PyTorchEstimatorWrapper(EstimatorWrapper): 5 | def __init__( 6 | self, estimator, optimizer_generator, loss_fn, fit_fn=None, predict_fn=None 7 | ): 8 | r"""Create a wrapper for a PyTorch estimator 9 | 10 | Args: 11 | estimator: PyTorch estimator instance 12 | optimizer_generator: generator function for the optimizer 13 | loss_fn: loss function 14 | fit_fn (optional): custom fit function to be called instead of default one 15 | predict_fn (optional): custom predict function to be called instead 16 | of default one 17 | 18 | Returns: 19 | self 20 | """ 21 | self.estimator = estimator 22 | self.optimizer_generator = optimizer_generator 23 | self.optimizer = optimizer_generator(estimator) 24 | self.loss_fn = loss_fn 25 | self.fit_fn = fit_fn 26 | self.predict_fn = predict_fn 27 | 28 | def fit(self, X, y, **kwargs): 29 | r"""Train the estimator 30 | 31 | Args: 32 | X: features 33 | y: ground truth labels 34 | kwargs (optional): kwargs for use in training 35 | 36 | Returns: 37 | self 38 | """ 39 | self.estimator.apply(PyTorchEstimatorWrapper._reset_parameters) 40 | 41 | if self.fit_fn is not None: 42 | self.fit_fn(self, X, y, **kwargs) 43 | return self 44 | 45 | if kwargs.get("epochs") is None: 46 | epochs = 100 47 | else: 48 | epochs = kwargs.get("epochs") 49 | 50 | for i in range(epochs): 51 | loss = 0 52 | if kwargs.get("batch_size") is None: 53 | batch_size = len(y) 54 | else: 55 | batch_size = kwargs.get("batch_size") 56 | for j in range(0, len(y), batch_size): 57 | batch_start = j 58 | batch_end = j + batch_size 59 | X_batch = X[batch_start:batch_end] 60 | y_batch = y[batch_start:batch_end] 61 | prediction = self.estimator(X_batch) 62 | loss = self.loss_fn(prediction, y_batch) 63 | 64 | self.optimizer.zero_grad() 65 | loss.backward() 66 | self.optimizer.step() 67 | if kwargs.get("verbose"): 68 | print(f"epoch: {i:2} training loss: {loss.item():10.8f}") 69 | 70 | return self 71 | 72 | def predict(self, X, **kwargs): 73 | r"""Get predictions from the estimator 74 | 75 | Args: 76 | X: features 77 | kwargs (optional): kwargs for use in predicting 78 | 79 | Returns: 80 | self 81 | """ 82 | if self.predict_fn is not None: 83 | return self.predict_fn(self, X, **kwargs) 84 | 85 | import torch 86 | 87 | prediction_list = [] 88 | with torch.no_grad(): 89 | for value in X: 90 | prediction = self.estimator(value) 91 | if len(prediction) > 1: 92 | prediction_list.append(prediction.argmax().item()) 93 | else: 94 | prediction_list.append(prediction.item()) 95 | return prediction_list 96 | 97 | def _reset_parameters(self): 98 | r"""Reset parameters of the estimator""" 99 | if hasattr(self, "reset_parameters"): 100 | self.reset_parameters() 101 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Getting Started 2 | =============== 3 | 4 | Model Validation Toolkit is an open source library that provides various 5 | tools for model validation, data quality checks, analysis of thresholding, 6 | sensitivity analysis, and interpretable model development. The purpose of this 7 | guide is to illustrate some of the main features that Model Validation Toolkit 8 | provides. Please refer to the README for installation instructions. 9 | 10 | Divergences 11 | ---------------------------------------- 12 | 13 | Model Validation Toolkit provides a fast and accurate means of assessing 14 | large scale statistical differences between datasets. Rather than checking 15 | whether two samples are identical, this check asserts that they are similar in 16 | a statistical sense and can be used for data quality checks and concept drift 17 | detection. 18 | 19 | .. code-block:: python 20 | 21 | import numpy 22 | from mvtk.supervisor.divergence import calc_tv 23 | 24 | nprng = numpy.random.RandomState(0) 25 | 26 | train = nprng.uniform(size=(1000, 4)) # 4 features 27 | val = nprng.uniform(size=(1000, 4)) # 4 features 28 | 29 | # Close to 0 is similar; close to 1 is different 30 | print(calc_tv(train, val)) 31 | 32 | See the :doc:`user guide ` for more information. 33 | 34 | Credibility 35 | ---------------------------------------- 36 | 37 | .. currentmodule:: mvtk.credibility 38 | 39 | Model Validation Toolkit provides a lightweight suite to assess credibility 40 | of model performance given a finite sample. Whether your validation set has 41 | several dozen or million records, you can quantify your confidence in 42 | performance using this module. For example, if a model correctly identifies 8 43 | of 10 images, its empirical accuracy is 80%. However, that does not mean we 44 | should be confident the accuracy could turn out to be lower if we had more 45 | data. We would obviously be more confident in this assessment if it identified 46 | 800 of 1000 images, but how much more so? With a few assumptions and 47 | :meth:`prob_below`, we can estimate the probability that the true accuracy 48 | would be less than 70% if we had more data. 49 | 50 | .. code-block:: python 51 | 52 | from mvtk.credibility import prob_below 53 | print(prob_below(8, 2, 0.7)) 54 | 55 | See the :doc:`user guide ` for more information. 56 | 57 | Thresholding 58 | ---------------------------------------- 59 | 60 | Model Validation Toolkit provides a module for determining and 61 | dynamically seta nd sample thresholds for binary classifiers that maximize a 62 | utility function. The general idea is to intelligently reassess false and true 63 | negative rates in a production system. See the :doc:`user guide 64 | ` for more information. 65 | 66 | Sobol 67 | ---------------------------------------- 68 | 69 | .. currentmodule:: sobol 70 | 71 | Model Validation Toolkit provides a lightweight module for `sobol 72 | sensitivity analysis 73 | `_. This can 74 | be used to assess and quantify uncertainty of model outputs with respect to 75 | model inputs. The module currently supports first order and total sobol 76 | indexes--both which are computed and reported using :meth:`sobol`. 77 | 78 | .. code-block:: python 79 | 80 | import numpy 81 | from mvtk import sobol 82 | 83 | nprng = numpy.random.RandomState(0) 84 | 85 | data = nprng.normal(size=(1000, 4)) # 4 features 86 | model = lambda x: (x ** 2).dot([1, 2, 3, 4]) 87 | total, first_order = sobol.sobol(model, data, N=500) 88 | 89 | See the :doc:`user guide 90 | ` for more information. 91 | -------------------------------------------------------------------------------- /tests/bias_variance/test_bias_variance_parallel.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from sklearn.tree import DecisionTreeClassifier 4 | from sklearn.linear_model import Ridge 5 | 6 | from mvtk.bias_variance import ( 7 | bias_variance_compute_parallel, 8 | bias_variance_mse, 9 | bias_variance_0_1_loss, 10 | ) 11 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper 12 | 13 | 14 | def create_data(): 15 | X_train = np.arange(12).reshape(6, 2) 16 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None) 17 | X_test = np.arange(6).reshape(3, 2) 18 | y_test = np.array([0, 1, 1]) 19 | 20 | return X_train, y_train, X_test, y_test 21 | 22 | 23 | def test_bias_variance_compute_parallel_mse(): 24 | X_train, y_train, X_test, y_test = create_data() 25 | 26 | model = Ridge(random_state=123) 27 | model_wrapped = SciKitLearnEstimatorWrapper(model) 28 | 29 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel( 30 | model_wrapped, 31 | X_train, 32 | y_train, 33 | X_test, 34 | y_test, 35 | random_state=123, 36 | decomp_fn=bias_variance_mse, 37 | ) 38 | 39 | assert np.round(avg_loss, decimals=12) == np.round( 40 | np.float64(0.3967829075484304), decimals=12 41 | ) 42 | assert np.round(avg_bias, decimals=12) == np.round( 43 | np.float64(0.13298143583764407), decimals=12 44 | ) 45 | assert np.round(avg_var, decimals=12) == np.round( 46 | np.float64(0.26380147171078644), decimals=12 47 | ) 48 | assert np.round(net_var, decimals=12) == np.round( 49 | np.float64(0.26380147171078644), decimals=12 50 | ) 51 | 52 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 53 | assert avg_var == net_var 54 | 55 | 56 | def test_bias_variance_calc_parallel_0_1(): 57 | X_train, y_train, X_test, y_test = create_data() 58 | 59 | model = DecisionTreeClassifier(random_state=123) 60 | model_wrapped = SciKitLearnEstimatorWrapper(model) 61 | 62 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel( 63 | model_wrapped, 64 | X_train, 65 | y_train, 66 | X_test, 67 | y_test, 68 | random_state=123, 69 | decomp_fn=bias_variance_0_1_loss, 70 | ) 71 | 72 | assert avg_loss == np.float64(0.4566666666666666) 73 | assert avg_bias == np.float64(0.3333333333333333) 74 | assert avg_var == np.float64(0.33499999999999996) 75 | assert net_var == np.float64(0.12333333333333332) 76 | 77 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 78 | 79 | 80 | def test_bias_variance_calc_parallel_mse_no_random_state(): 81 | X_train, y_train, X_test, y_test = create_data() 82 | 83 | model = Ridge(random_state=123) 84 | model_wrapped = SciKitLearnEstimatorWrapper(model) 85 | 86 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel( 87 | model_wrapped, 88 | X_train, 89 | y_train, 90 | X_test, 91 | y_test, 92 | iterations=10, 93 | decomp_fn=bias_variance_mse, 94 | ) 95 | 96 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 97 | assert avg_var == net_var 98 | 99 | 100 | def test_bias_variance_calc_parallel_0_1_no_random_state(): 101 | X_train, y_train, X_test, y_test = create_data() 102 | 103 | model = DecisionTreeClassifier(random_state=123) 104 | model_wrapped = SciKitLearnEstimatorWrapper(model) 105 | 106 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel( 107 | model_wrapped, 108 | X_train, 109 | y_train, 110 | X_test, 111 | y_test, 112 | iterations=10, 113 | decomp_fn=bias_variance_0_1_loss, 114 | ) 115 | 116 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 117 | -------------------------------------------------------------------------------- /docs/notebooks/interprenet/.ipynb_checkpoints/Periodic-checkpoint.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 160, 6 | "id": "699ea1b7", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import jax\n", 11 | "from sklearn.model_selection import train_test_split\n", 12 | "\n", 13 | "f = jax.numpy.cos\n", 14 | "n = 1000\n", 15 | "X_train = jax.numpy.linspace(-n * jax.numpy.pi, 0, 100 * n).reshape(-1, 1)\n", 16 | "y_train = f(X_train)\n", 17 | "\n", 18 | "\n", 19 | "X_test = jax.numpy.linspace(0, n * jax.numpy.pi, 100 * n).reshape(-1, 1)\n", 20 | "y_test = f(X_test)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 161, 26 | "id": "f2d7f659", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "import jax\n", 31 | "\n", 32 | "from mvtk import interprenet\n", 33 | "\n", 34 | "init_params, model = interprenet.constrained_model(\n", 35 | " (frozenset([interprenet.monotonic_constraint]),),\n", 36 | " get_layers=lambda n: [n + 1],\n", 37 | " preprocess=interprenet.identity,\n", 38 | " postprocess=interprenet.identity)\n", 39 | "\n", 40 | "init_params = ((jax.numpy.asarray([0.]), jax.numpy.asarray([0.]),),\n", 41 | " init_params)\n", 42 | "def scaled_model(params, x):\n", 43 | " (m, b), model_params = params\n", 44 | " u = jax.numpy.sin(x * jax.numpy.exp(m) + jax.numpy.arctan(b))\n", 45 | " return model(model_params, u)" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "f7a0800c", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "def loss(y, y_pred):\n", 56 | " return ((y - y_pred) ** 2).mean()\n", 57 | "\n", 58 | "trained_params = interprenet.train((X_train, y_train),\n", 59 | " (X_test, y_test),\n", 60 | " (init_params, scaled_model),\n", 61 | " metric=lambda y, y_pred: loss(y, y_pred),\n", 62 | " step_size=0.01,\n", 63 | " mini_batch_size=32,\n", 64 | " loss_fn=loss,\n", 65 | " num_epochs=128)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": null, 71 | "id": "1e7dad5d", 72 | "metadata": {}, 73 | "outputs": [], 74 | "source": [ 75 | "loss(y_test, y_test)" 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "id": "ebae2803", 82 | "metadata": {}, 83 | "outputs": [], 84 | "source": [ 85 | "trained_model = lambda X: scaled_model(trained_params, X)\n", 86 | "y_pred = trained_model(X_test)\n", 87 | "loss(y_test, y_pred)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": null, 93 | "id": "5d4554e7", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "import matplotlib\n", 98 | "import matplotlib.pyplot as pylab\n", 99 | "\n", 100 | "q = 1000\n", 101 | "pylab.plot(X_test[:q], y_test[:q])\n", 102 | "pylab.plot(X_test[:q], y_pred[:q])\n", 103 | "\n", 104 | "pylab.show()" 105 | ] 106 | }, 107 | { 108 | "cell_type": "code", 109 | "execution_count": null, 110 | "id": "b17de2c6", 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [] 114 | } 115 | ], 116 | "metadata": { 117 | "kernelspec": { 118 | "display_name": "Python 3", 119 | "language": "python", 120 | "name": "python3" 121 | }, 122 | "language_info": { 123 | "codemirror_mode": { 124 | "name": "ipython", 125 | "version": 3 126 | }, 127 | "file_extension": ".py", 128 | "mimetype": "text/x-python", 129 | "name": "python", 130 | "nbconvert_exporter": "python", 131 | "pygments_lexer": "ipython3", 132 | "version": "3.8.8" 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 5 137 | } 138 | -------------------------------------------------------------------------------- /mvtk/supervisor/divergence/nn.py: -------------------------------------------------------------------------------- 1 | import jax 2 | import public 3 | 4 | from jax.example_libraries import stax 5 | from jax._src.nn.initializers import glorot_normal, normal 6 | from jax.example_libraries.stax import ( 7 | Dense, 8 | FanInSum, 9 | FanOut, 10 | Identity, 11 | Relu, 12 | elementwise, 13 | ) 14 | 15 | 16 | def ResBlock(*layers, fan_in=FanInSum, tail=Identity): 17 | """Split input, feed it through one or more layers in parallel, recombine 18 | them with a fan-in, apply a trailing layer (i.e. an activation) 19 | 20 | Args: 21 | *layers: a sequence of layers, each an (init_fun, apply_fun) pair. 22 | fan_in, optional: a fan-in to recombine the outputs of each layer 23 | tail, optional: a final layer to apply after recombination 24 | 25 | 26 | Returns: 27 | A new layer, meaning an (init_fun, apply_fun) pair, representing the 28 | parallel composition of the given sequence of layers fed into fan_in 29 | and then tail. In particular, the returned layer takes a sequence of 30 | inputs and returns a sequence of outputs with the same length as the 31 | argument `layers`. 32 | """ 33 | return stax.serial(FanOut(len(layers)), stax.parallel(*layers), fan_in, tail) 34 | 35 | 36 | @public.add 37 | def Approximator( 38 | input_size, 39 | depth=3, 40 | width=None, 41 | output_size=1, 42 | linear=Dense, 43 | residual=True, 44 | activation=lambda x: x, 45 | rng=jax.random.PRNGKey(0), 46 | ): 47 | r"""Basic Neural network based function 48 | :math:`\mathbb{R}^N\rightarrow\mathbb{R}^M` function approximator. 49 | 50 | Args: 51 | input_size (int): Size of input dimension. 52 | depth (int, optional): Depth of network. Defaults to ``3``. 53 | width (int, optional): Width of network. Defaults to ``input_size + 1``. 54 | output_size (int, optional): Number of outputs. Defaults to ``1``. 55 | linear (``torch.nn.Module``, optional): Linear layer drop in 56 | replacement. Defaults to ``jax.example_libraries.stax.Dense``. 57 | residual (bool, optional): Turn on ResNet blocks. Defaults to ``True``. 58 | activation (optional): A map from :math:`(-\infty, \infty)` to an 59 | appropriate domain (such as the domain of a convex conjugate). 60 | Defaults to the identity. 61 | rng (optional): Jax ``PRNGKey`` key. Defaults to `jax.random.PRNGKey(0)``. 62 | 63 | Returns: 64 | initial parameter values, neural network function 65 | """ 66 | # input_size + output_size hidden hidden units is sufficient for universal 67 | # approximation given unconstrained depth even without ResBlocks. 68 | # https://arxiv.org/abs/1710.112780. With ResBlocks (as used below), only 69 | # one hidden unit is needed for Relu activation 70 | # https://arxiv.org/abs/1806.10909. 71 | if width is None: 72 | hidden = input_size + 1 73 | else: 74 | hidden = width 75 | if depth > 2: 76 | layers = [linear(hidden), Relu] 77 | else: 78 | layers = [] 79 | for _ in range(depth - 2): 80 | if residual: 81 | layers.append( 82 | ResBlock(stax.serial(linear(hidden), Relu), linear(hidden), tail=Relu) 83 | ) 84 | else: 85 | layers.append(linear(hidden)) 86 | layers.append(linear(output_size)) 87 | layers.append(elementwise(activation)) 88 | init_approximator_params, approximator = stax.serial(*layers) 89 | _, init_params = init_approximator_params(rng, (-1, input_size)) 90 | return init_params, approximator 91 | 92 | 93 | @public.add 94 | def NormalizedLinear(out_dim, W_init=glorot_normal(), b_init=normal()): 95 | r"""Linear layer with positive weights with columns that sum to one.""" 96 | 97 | def init_fun(rng, input_shape): 98 | output_shape = input_shape[:-1] + (out_dim,) 99 | k1, k2 = jax.random.split(rng) 100 | W, b = W_init(k1, (input_shape[-1], out_dim)), b_init(k2, (out_dim,)) 101 | return output_shape, (W, b) 102 | 103 | def apply_fun(params, inputs, **kwargs): 104 | W, b = params 105 | W_normalized = W / jax.numpy.abs(W).sum(0) 106 | return jax.numpy.dot(inputs, W_normalized) + b 107 | 108 | return init_fun, apply_fun 109 | -------------------------------------------------------------------------------- /mvtk/credibility.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import public 3 | 4 | from sklearn.metrics import roc_auc_score 5 | from scipy.stats import beta 6 | 7 | 8 | @public.add 9 | def credible_interval(positive, negative, credibility=0.5, prior=(1, 1)): 10 | """What is the shortest interval that contains probability(positive) with 11 | `credibility`% probability? 12 | 13 | Args: 14 | positive (int): number of times the first possible outcome has been seen 15 | negative (int): number of times the second possible outcome has been seen 16 | credibility (float): The probability that the true p(positive) is 17 | contained within the reported interval 18 | prior (tuple): psueodcount for positives and negatives 19 | 20 | returns: 21 | (lower bound, upper bound) 22 | """ 23 | positive += prior[0] 24 | negative += prior[1] 25 | if not (positive > 1 or negative > 1): 26 | raise ValueError( 27 | "Credible intervals are only defined when at least one count + psueocount" 28 | " is greater than 1" 29 | ) 30 | distribution = beta(positive, negative) 31 | mode = positive / (positive + negative) 32 | cdf_mode = distribution.cdf(mode) 33 | cred_2 = credibility / 2 34 | lower = cdf_mode - cred_2 35 | true_lower = max(lower, 0) 36 | excess = true_lower - lower 37 | upper = cdf_mode + cred_2 + excess 38 | true_upper = min(upper, 1) 39 | excess = upper - true_upper 40 | true_lower -= excess 41 | assert numpy.isclose((true_upper - true_lower), credibility) 42 | return distribution.ppf(true_lower), distribution.ppf(true_upper) 43 | 44 | 45 | @public.add 46 | def prob_below(positive, negative, cutoff, prior=(1, 1)): 47 | """What is the probability P(positive) is unacceptably low? 48 | 49 | Args: 50 | positive (int): number of times the positive outcome has been seen 51 | negative (int): number of times the negative outcome has been seen 52 | cutoff (float): lowest acceptable value of P(positive) 53 | prior (tuple): psueodcount for positives and negatives 54 | returns: 55 | Probability that P(positive) < cutoff 56 | """ 57 | return beta(prior[0] + positive, prior[1] + negative).cdf(cutoff) 58 | 59 | 60 | @public.add 61 | def roc_auc_preprocess(positives, negatives, roc_auc): 62 | """ROC AUC analysis must be preprocessed using the number of positive and 63 | negative instances in the entire dataset and the AUC itself. 64 | 65 | Args: 66 | positives (int): number of positive instances in the dataset 67 | negatives (int): number of negative instances in the dataset 68 | roc_auc (float): ROC AUC 69 | returns: 70 | (positive, negative) tuple that can be used for `prob_below` and 71 | `credible_interval` 72 | """ 73 | unique_combinations = positives * negatives 74 | # correctly ranked combinations are pairs of positives and negatives 75 | # instances where the model scored the positive instance higher than the 76 | # negative instance 77 | correctly_ranked_combinations = roc_auc * unique_combinations 78 | # the number of incorrectly ranked combinations is the number of 79 | # combinations that aren't correctly ranked 80 | incorrectly_ranked_combinations = ( 81 | unique_combinations - correctly_ranked_combinations 82 | ) 83 | return correctly_ranked_combinations, incorrectly_ranked_combinations 84 | 85 | 86 | @public.add 87 | def prob_greater_cmp( 88 | positive1, 89 | negative1, 90 | positive2, 91 | negative2, 92 | prior1=(1, 1), 93 | prior2=(1, 1), 94 | err=10**-5, 95 | ): 96 | """Probability the first set comes from a distribution with a greater 97 | proportion of positive than the other. 98 | 99 | Args: 100 | positive1 (int): number of positive instances in the first dataset 101 | negative1 (int): number of negative instances in the first dataset 102 | positive1 (int): number of positive instances in the second dataset 103 | negative1 (int): number of negative instances in the second dataset 104 | prior1 (tuple): psueodcount for positives and negatives 105 | prior2 (tuple): psueodcount for positives and negatives 106 | err (float): upper bound of frequentist sample std from monte carlo simulation. 107 | """ 108 | nprng = numpy.random.RandomState(0) 109 | distribution1 = beta(positive1 + prior1[0], negative1 + prior1[1]) 110 | distribution2 = beta(positive2 + prior2[0], negative2 + prior2[1]) 111 | # CLT implies ROC AUC error shrinks like 1/PN 112 | # for P positives and N negatives 113 | N = int(1 + 1 / (2 * err)) 114 | sample1 = distribution1.rvs(N, random_state=nprng) 115 | sample2 = distribution2.rvs(N, random_state=nprng) 116 | y = numpy.ones(2 * N) 117 | y[N:] = 0 118 | return roc_auc_score(y, numpy.concatenate((sample1, sample2))) 119 | -------------------------------------------------------------------------------- /tests/supervisor/test_divergence.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import mvtk.supervisor.divergence as divergence 3 | 4 | from functools import partial 5 | 6 | 7 | def mutually_exclusive_support_tester(metric, num_features=4, eps=0.1): 8 | data1 = numpy.ones((4, num_features)) 9 | data1[:, :2] = 0 10 | data2 = 1 - data1 11 | assert numpy.isclose(metric([data1], [data1]), 0, atol=eps) 12 | assert numpy.isclose(metric([data2], [data2]), 0, atol=eps) 13 | assert numpy.isclose(metric([data1], [data2]), 1, atol=eps) 14 | 15 | 16 | def get_batches(nprng, batch_size, n=2): 17 | """Pick a random binomial distribution Sample batch_size samples from 18 | it.""" 19 | choices = numpy.arange(n) 20 | x = [] 21 | alpha = nprng.rand(n) 22 | alpha /= alpha.sum() 23 | for d in range(batch_size): 24 | choice = nprng.choice(choices, p=alpha) 25 | z = numpy.zeros_like(choices) 26 | z[choice] = 1 27 | x.append(z) 28 | x = numpy.asarray(x).reshape(batch_size, n) 29 | return x, alpha 30 | 31 | 32 | def divergence_tester( 33 | approximate_metric, analytical_metric, batch_sizes=[256] * 8, thresh=0.85 34 | ): 35 | nprng = numpy.random.RandomState(0) 36 | batches, alphas = zip(*map(partial(get_batches, nprng), batch_sizes)) 37 | assert ( 38 | numpy.corrcoef( 39 | numpy.asarray([analytical_metric(alphas, alpha) for alpha in alphas]), 40 | divergence.utils.get_drift_series( 41 | approximate_metric, batches, [[batch] for batch in batches] 42 | ), 43 | )[0, 1] 44 | > thresh 45 | ) 46 | 47 | 48 | def gaussian_test(approximate_metric, dim=1, N=1024, thresh=0.05): 49 | nprng = numpy.random.RandomState(0) 50 | m = approximate_metric(*nprng.normal(size=(2, 1, N, dim))) 51 | assert m < thresh 52 | assert m >= 0 53 | 54 | 55 | def test_hl_gaussian(): 56 | for dim in range(1, 4): 57 | gaussian_test(partial(divergence.calc_hl, train_test_split=0.5), dim) 58 | 59 | 60 | def test_tv_gaussian(): 61 | for dim in range(1, 4): 62 | gaussian_test(partial(divergence.calc_tv, train_test_split=0.5), dim) 63 | gaussian_test( 64 | partial(divergence.calc_tv_knn, k=64 * 2**dim), 65 | dim, 66 | N=1024 * 2**dim, 67 | thresh=0.1, 68 | ) 69 | 70 | 71 | def test_js_gaussian(): 72 | for dim in range(1, 4): 73 | gaussian_test(partial(divergence.calc_js, train_test_split=0.5), dim) 74 | 75 | 76 | def test_em_gaussian(): 77 | for dim in range(1, 4): 78 | gaussian_test( 79 | partial(divergence.calc_em, train_test_split=0.5), dim, thresh=0.11 80 | ) 81 | 82 | 83 | def test_js_by_corr(): 84 | def kl(alpha1, alpha2): 85 | return numpy.sum(alpha1 * numpy.log2(alpha1 / alpha2)) 86 | 87 | def js(alpha1, alpha2): 88 | mean = alpha1 + alpha2 89 | mean /= 2 90 | ret = kl(alpha1, mean) + kl(alpha2, mean) 91 | return ret / 2 92 | 93 | divergence_tester( 94 | lambda *x: numpy.sqrt(divergence.calc_js_mle(*x)), lambda *x: numpy.sqrt(js(*x)) 95 | ) 96 | divergence_tester( 97 | lambda *x: numpy.sqrt(divergence.calc_js(*x)), lambda *x: numpy.sqrt(js(*x)) 98 | ) 99 | 100 | 101 | def test_js_by_support(): 102 | mutually_exclusive_support_tester(divergence.calc_js_mle) 103 | mutually_exclusive_support_tester(divergence.calc_js) 104 | 105 | 106 | def test_hl_by_corr(): 107 | def hl(alpha1, alpha2): 108 | return numpy.sqrt(numpy.sum((numpy.sqrt(alpha1) - numpy.sqrt(alpha2)) ** 2) / 2) 109 | 110 | divergence_tester(divergence.calc_hl_mle, hl) 111 | divergence_tester(divergence.calc_hl, hl) 112 | 113 | 114 | def test_hl_by_support(): 115 | mutually_exclusive_support_tester(divergence.calc_hl) 116 | mutually_exclusive_support_tester(divergence.calc_hl_mle) 117 | 118 | 119 | def test_tv_by_corr(): 120 | def tv(alpha1, alpha2): 121 | return numpy.abs(alpha1 - alpha2).sum() / 2 122 | 123 | divergence_tester(divergence.calc_tv_mle, tv) 124 | divergence_tester(divergence.calc_tv, tv) 125 | 126 | 127 | def test_tv_by_support(): 128 | mutually_exclusive_support_tester(divergence.calc_tv_mle) 129 | mutually_exclusive_support_tester(divergence.calc_tv) 130 | 131 | 132 | def test_em_by_support(): 133 | for num_features in range(1, 3): 134 | data1 = numpy.zeros((4, num_features)) 135 | data2 = 1 - data1 136 | eps = 0.125 137 | assert numpy.isclose(divergence.calc_em([data1], [data1]), 0, atol=eps) 138 | assert numpy.isclose(divergence.calc_em([data2], [data2]), 0, atol=eps) 139 | assert numpy.isclose(divergence.calc_em([data1], [data2]), 1, atol=eps) 140 | assert numpy.isclose(divergence.calc_em([data1], [2 * data2]), 2, atol=eps) 141 | 142 | 143 | def test_calc_tv_lower_bound(): 144 | a = numpy.asarray([0, 1, 0, 0, 1]) 145 | b = numpy.asarray([0.01, 0.98, 0.03, 0.04, 0.99]) 146 | log_loss = divergence.metrics.balanced_binary_cross_entropy(a, b) 147 | tv = divergence.metrics.calc_tv_lower_bound(log_loss) 148 | assert tv < 1 and tv > 0 149 | -------------------------------------------------------------------------------- /docs/interprenet_user_guide.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | Interprenet User Guide 3 | ########### 4 | 5 | ********** 6 | Motivation 7 | ********** 8 | 9 | Neural networks are generally difficult to interpret. While there 10 | are tools that can help to interpret certain types of neural 11 | networks such as image classifiers and language models, 12 | interpretation of neural networks that simply ingest tabular data 13 | and return a scalar value is generally limited to various measures of feature 14 | importance. This can be problematic as what makes a feature "important" can 15 | vary between use cases. 16 | 17 | Rather than interpret a neural network as a black 18 | box, we seek to constrain neural network in ways we 19 | consider useful and interpretable. In particular, 20 | The interprenet module currently has two such 21 | constraints implemented: 22 | 23 | * Monotonicity 24 | * Lipschitz constraint 25 | 26 | `Monotonic functions `_ 27 | either always increase or decrease with their arguments but never both. This is 28 | often an expected relationship between features and the model output. For 29 | example, we may believe that increasing blood pressure increases risk of 30 | cardiovascular disease. The exact relationship is not known, but we may believe 31 | that it is monotonic. 32 | 33 | `Lipschitz constraints 34 | `_ constrain the 35 | maximum rate of change of the model. This can make the model arbitrarily robust 36 | `against adversarial perturbations 37 | `_ 38 | :cite:`anil2019sorting`. 39 | 40 | 41 | How? 42 | ==== 43 | 44 | All constraints are currently implemented as weight constraints. While 45 | arbitrary weights are stored within each linear layer, the weights are 46 | transformed before application so the network can satisfy is prescribed 47 | constraints. Changes are backpropagated through this transformation. 48 | Monotonic increasing neural networks are implemented by taking the absolute 49 | value of weight matrices before applying them. When paired with a monotonically 50 | increasing activation (such as ReLU, Sigmoid, or Tanh), this ensures the 51 | gradient of the output with respect to any features is positive. This is 52 | sufficient to ensure monotonicity with respect to the features. 53 | 54 | Lipschitz constraints are enforced by dividing each weight vector by 55 | its :math:`L^\infty` norm as described in :cite:`anil2019sorting`. This 56 | constrains the :math:`L^\infty`-:math:`L^\infty` `operator norm 57 | `_ 58 | of the weight matrix :cite:`tropp2004topics`. Constraining the 59 | :math:`L^\infty`-:math:`L^\infty` operator norm of the weight 60 | matrix ensures every element of the jacobian of the linear layers is less than 61 | or equal to :math:`1`. Meanwhile, using activation functions with Lipschitz 62 | constants of :math:`1` ensure the entire network is constrained to never have a 63 | slope greater than :math:`1` for any of its features. 64 | 65 | ********** 66 | Different Constraints on Different Features 67 | ********** 68 | 69 | .. currentmodule:: mvtk.interprenet 70 | 71 | :meth:`constrained_model` generates a neural network with one set of 72 | constraints per feature. Constraints currently available are: 73 | 74 | - :meth:`identity` (for no constraint) 75 | - :meth:`monotonic_constraint` 76 | - :meth:`lipschitz_constraint` 77 | 78 | Features are grouped by the set of constraints applied to them, and 79 | different constrained neural networks are generated for each group 80 | of features. The outputs of those neural networks are concatenated 81 | and fed into a final neural network constrained using all 82 | constraints applied to all features. Since constraints on weight 83 | matrices compose, they can be applied as a series of transformations 84 | on the weights before application. 85 | 86 | .. figure:: images/interprenet.png 87 | :width: 500px 88 | :align: center 89 | :height: 400px 90 | :alt: alternate text 91 | :figclass: align-center 92 | 93 | 4 features with Lipschitz constraints and 4 features wtih 94 | monotonic constraints are fed to their respectively constrained 95 | neural networks. Intermediate outputs are concatenated and fed into a neural 96 | network with monotonic and lipschitz constraints. 97 | 98 | We use the Sort function as a nonlinear activation as described in 99 | :cite:`anil2019sorting`. The jacobian of this matrix is always a 100 | permutation matrix, which retains any Lipschitz and monotonicity 101 | constraints. 102 | 103 | ********** 104 | Preprocessing 105 | ********** 106 | 107 | Thus far, we have left out two important detail: How to constrain 108 | the Lipschitz constant to be something other than :math:`1`, and how 109 | to create monotonically decreasing networks. Both are a simple 110 | matter of preprocessing. The ``preprocess`` argument (defaulting to 111 | ``identity``), specifies a function to be applied to the feature 112 | vector before passing it to the neural network. For decreasing 113 | monotonic constraints, multiply the respective features by 114 | :math:`-1`. For a Lipschitz constant of :math:`L`, multiply the 115 | respective features by :math:`L`. 116 | 117 | .. topic:: Tutorials: 118 | 119 | * :doc:`Interprenet ` 120 | 121 | .. bibliography:: refs.bib 122 | :cited: 123 | -------------------------------------------------------------------------------- /mvtk/metrics.py: -------------------------------------------------------------------------------- 1 | import public 2 | import numpy 3 | import pandas 4 | 5 | from scipy.stats import entropy 6 | from sklearn.feature_selection import mutual_info_classif 7 | 8 | 9 | def binarize(data, t): 10 | y_true, y_pred = data.values.T 11 | return y_true > t, y_pred 12 | 13 | 14 | @public.add 15 | def monotonicity(y_true, y_pred, weights=None): 16 | r"""Generalizes ROC AUC by computing 17 | :math:`P\left(\frac{\Delta\mathrm{y_pred}}{\Delta\mathrm{y_true}} > 18 | 0\right)`, the probability incrementing ``y_true`` increases ``y_pred`` for 19 | a randomly chosen pair of instances. This reduces to ROC AUC when 20 | ``y_true`` has two unique values. Adapted from Algorithm 2 in `Fawcett, T. 21 | (2006). An introduction to ROC analysis. Pattern Recognition Letters, 22 | 27(8), 861-874. 23 | `_ 24 | 25 | Args: 26 | y_true (list-like): Ground truth ordinal values 27 | y_pred (list-like): Predicted ordinal values 28 | weights (list-like): Sample weights. Will be normalized to one 29 | across each unique values of ``y_true``. If ``None`` (default) all 30 | samples are weighed equally. 31 | 32 | Returns: 33 | Float between 0 and 1. 0 indicates 100% chance of ``y_pred`` 34 | decreasing upon incrementing ``y_true`` up to its next 35 | highest value in the dataset. 1 being a 100% chance of 36 | ``y_pred`` increasing for the same scenario. 0.5 would be 50% 37 | chance of either. 38 | """ 39 | if weights is None: 40 | weights = numpy.ones(len(y_true)) 41 | unique = numpy.unique(y_true) 42 | n = len(unique) - 1 43 | true_lookup = {u: i + 1 for i, u in enumerate(unique)} 44 | idx = numpy.argsort(-y_pred) 45 | y_true = y_true[idx] 46 | y_pred = y_pred[idx] 47 | weights = weights[idx] 48 | # fp, fp_prev, tp, tp_prev, auc 49 | data = numpy.zeros((5, n)) 50 | prev_pred = numpy.full(n, numpy.nan) 51 | for true, pred, weight in zip(y_true, y_pred, weights): 52 | i = true_lookup[true] 53 | j = max(i - 2, 0) 54 | mask = pred != prev_pred[j:i] 55 | data[4, j:i][mask] += trap(*data[:4, j:i][:, mask]) 56 | data[1:4:2, j:i][:, mask] = data[:4:2, j:i][:, mask] 57 | prev_pred[j:i] = pred 58 | i -= 1 59 | if i: 60 | data[2, j] += weight 61 | if i < n: 62 | data[0, i] += weight 63 | data[4] += trap(*data[:4]) 64 | return numpy.sum(data[4]) / 2 / data[0].dot(data[2]) 65 | 66 | 67 | def trap(x2, x1, y2, y1): 68 | return (x2 - x1) * (y2 + y1) 69 | 70 | 71 | @public.add 72 | def rank_auc(y_true, y_pred, weights=None): 73 | r"""Generalizes ROC AUC by computing probability that two randomly chosen 74 | data points would be ranked consistently with ground truth labels. This 75 | reduces to ROC AUC when ``y_true`` has two unique values. 76 | Adapted from Algorithm 2 in `Fawcett, T. (2006). An introduction 77 | to ROC analysis. Pattern Recognition Letters, 27(8), 861-874. 78 | `_ 79 | 80 | Args: 81 | y_true (list-like): Ground truth ordinal values 82 | y_pred (list-like): Predicted ordinal values 83 | weights (list-like): Sample weights. Will be normalized to one 84 | across each unique values of ``y_true``. If ``None`` (default) all 85 | samples are weighed equally. 86 | 87 | Returns: 88 | Float between 0 and 1. 0 indicates 100% chance of ``y_pred`` 89 | matching order of ``y_true``. 1 being a 100% chance of 90 | ``y_pred`` having the opposite order of ``y_true``. 0.5 would be 50% 91 | chance of either. 92 | """ 93 | if weights is None: 94 | weights = numpy.ones(len(y_true)) 95 | unique = numpy.unique(y_true) 96 | n = len(unique) - 1 97 | true_lookup = {u: i + 1 for i, u in enumerate(unique)} 98 | idx = numpy.argsort(-y_pred) 99 | y_true = y_true[idx] 100 | y_pred = y_pred[idx] 101 | weights = weights[idx] 102 | # fp, fp_prev, tp, tp_prev, auc 103 | data = numpy.zeros((5, n)) 104 | prev_pred = numpy.full(n, numpy.nan) 105 | for true, pred, weight in zip(y_true, y_pred, weights): 106 | i = true_lookup[true] 107 | mask = pred != prev_pred[:i] 108 | data[4, :i][mask] += trap(*data[:4, :i][:, mask]) 109 | data[1:4:2, :i][:, mask] = data[:4:2, :i][:, mask] 110 | prev_pred[:i] = pred 111 | i -= 1 112 | data[2, :i] += weight 113 | if i < n: 114 | data[0, i] += weight 115 | data[4] += trap(*data[:4]) 116 | return numpy.sum(data[4]) / 2 / data[0].dot(data[2]) 117 | 118 | 119 | @public.add 120 | def normalized_mutual_info(X, y, **kwargs): 121 | """Thin wrapper around `sklearn's mutual information 122 | `_. 123 | This normalizes the result to 0-1 scale. ``y`` is assumed categorical. 124 | """ 125 | _, counts = numpy.unique(y, return_counts=True) 126 | return pandas.Series( 127 | dict( 128 | zip( 129 | X.columns, 130 | mutual_info_classif(X, y, **kwargs) / entropy(counts / counts.sum()), 131 | ) 132 | ) 133 | ) 134 | -------------------------------------------------------------------------------- /mvtk/supervisor/utils.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import sys 3 | import time 4 | import pandas as pd 5 | import numpy as np 6 | import public 7 | 8 | from concurrent.futures import ThreadPoolExecutor, as_completed 9 | from datetime import datetime 10 | from typing import Collection 11 | from typing import List 12 | from itertools import combinations 13 | from fastcore.imports import in_notebook 14 | 15 | if in_notebook(): 16 | from tqdm import tqdm_notebook as tqdm 17 | else: 18 | from tqdm import tqdm 19 | 20 | 21 | @public.add 22 | def parallel(func, arr: Collection, max_workers=None, show_progress: bool = False): 23 | """ 24 | NOTE: This code was adapted from the ``parallel`` function 25 | within Fastai's Fastcore library. Key differences include 26 | returning a list with order preserved. 27 | 28 | Run a function on a collection (list, set etc) of items 29 | :param func: The function to run 30 | :param arr: The collection to run on 31 | :param max_workers: How many workers to use. Will use 32 | multiprocessing.cpu_count() if this is not provided 33 | :return: a list of the results 34 | """ 35 | if show_progress: 36 | progress_bar = tqdm(arr, smoothing=0, file=sys.stdout) 37 | results = [] 38 | max_workers = multiprocessing.cpu_count() if max_workers is None else max_workers 39 | with ThreadPoolExecutor(max_workers=max_workers) as ex: 40 | future_to_index = {ex.submit(func, o): i for i, o in enumerate(arr)} 41 | for future in as_completed(future_to_index): 42 | results.append((future_to_index[future], future.result())) 43 | if show_progress: 44 | progress_bar.update() 45 | results.sort() 46 | 47 | # Complete the progress bar if not complete 48 | if show_progress: 49 | for n in range(progress_bar.n, len(list(arr))): 50 | time.sleep(0.1) 51 | progress_bar.update() 52 | return [result for i, result in results] 53 | 54 | 55 | @public.add 56 | def column_indexes(df: pd.DataFrame, cols: List[str]): 57 | """ 58 | 59 | :param df: The dataframe 60 | :param cols: a list of column names 61 | :return: The column indexes of the column names 62 | """ 63 | return [df.columns.get_loc(col) for col in cols if col in df] 64 | 65 | 66 | def format_date(date_str, dateformat="%b%d"): 67 | date = pd.to_datetime(date_str) 68 | return datetime.strftime(date, dateformat) 69 | 70 | 71 | @public.add 72 | def compute_divergence_crosstabs( 73 | data, datecol=None, format=None, show_progress=True, divergence=None 74 | ): 75 | """Compute the divergence crosstabs. 76 | 77 | :param data: The data to compute the divergences on 78 | :param datecol: The column representing the date. If None, will 79 | use the index, if the index is a datetimeindex 80 | :param format: A function applied to datecol values for formatting 81 | e.g. ``format_date`` 82 | :param show_progress: Whether the progress bar will be shown 83 | :param divergence: The divergence function to use 84 | """ 85 | if datecol is None: 86 | datecol = data.index 87 | dates, subsets = zip(*data.groupby(datecol)) 88 | dates = list(dates) 89 | subsets = (subset.drop(columns=[datecol]) for subset in subsets) 90 | 91 | return compute_divergence_crosstabs_split( 92 | subsets, dates, format, show_progress, divergence 93 | ) 94 | 95 | 96 | @public.add 97 | def compute_divergence_crosstabs_split( 98 | subsets, dates, format=None, show_progress=True, divergence=None 99 | ): 100 | """Compute the divergence crosstabs. 101 | 102 | :param subsets: The data to compute the divergences on 103 | :param dates: The list of dates for the subsets 104 | :param format: A function applied to datecol values for formatting 105 | e.g. ``format_date`` 106 | :param show_progress: Whether the progress bar will be shown 107 | :param divergence: The divergence function to use 108 | """ 109 | 110 | # Create a divergence matrix 111 | divergences = np.zeros((len(dates), len(dates))) 112 | if not divergence: 113 | from mvtk.supervisor.divergence import calc_tv 114 | 115 | divergence = calc_tv 116 | 117 | def compute_divergence(args): 118 | return divergence(*args) 119 | 120 | for (i, j), v in zip( 121 | combinations(range(len(dates)), 2), 122 | parallel( 123 | compute_divergence, combinations(subsets, 2), show_progress=show_progress 124 | ), 125 | ): 126 | divergences[i, j] = divergences[j, i] = v 127 | if format is None: 128 | formatted = dates 129 | else: 130 | formatted = [format(d) for d in dates] 131 | return pd.DataFrame(divergences, columns=formatted, index=formatted) 132 | 133 | 134 | @public.add 135 | def plot_divergence_crosstabs_3d(divergences): 136 | """Plot the divergences in 3d. 137 | 138 | :params divergences: The list of divergences 139 | """ 140 | import matplotlib.pyplot as plt 141 | from mpl_toolkits.mplot3d import Axes3D # noqa F401 142 | 143 | fig = plt.figure() 144 | ax = fig.add_subplot(111, projection="3d") 145 | 146 | keys = list(divergences.keys()) 147 | indexes = range(len(keys)) 148 | 149 | for i in indexes: 150 | y = [x[1] for x in list(divergences[keys[i]].items())] 151 | ax.bar(indexes, y, i, zdir="y", alpha=0.8) 152 | 153 | ax.set(xticks=indexes, xticklabels=keys, yticks=indexes, yticklabels=keys) 154 | 155 | return fig 156 | 157 | 158 | @public.add 159 | def split(x, train_ratio=0.5, nprng=np.random.RandomState(0)): 160 | i = int(len(x) * train_ratio) 161 | if hasattr(x, "shape"): 162 | idx = np.arange(x.shape[0]) 163 | nprng.shuffle(idx) 164 | x = x[idx] 165 | else: 166 | nprng.shuffle(x) 167 | return x[:i], x[i:] 168 | -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | [comment]: <> (Adapted from JAX's contribution guide) 2 | 3 | # Contributing 4 | 5 | Everyone can contribute to Model Validation Toolkit, and we value everyone's contributions. There are several 6 | ways to contribute, including: 7 | 8 | 9 | - Answering questions on Model Validation Toolkit's [Gitter channel](https://gitter.im/FINRAOS/model-validation-toolkit) 10 | - Improving or expanding Model Validation Toolkit's [documentation](https://finraos.github.io/model-validation-toolkit/docs/html/index.html) 11 | - Contributing to Model Validation Toolkit's [code-base](https://github.com/FINRAOS/model-validation-toolkit/) 12 | 13 | ## Ways to contribute 14 | 15 | We welcome pull requests, in particular for those issues marked with 16 | [contributions welcome](https://github.com/FINRAOS/model-validation-toolkit/issues?q=is%3Aopen+is%3Aissue+label%3A%22contributions+welcome%22) or 17 | [good first issue](https://github.com/FINRAOS/model-validation-toolkit/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22). 18 | 19 | For other proposals, we ask that you first open a GitHub 20 | [Issue](https://github.com/FINRAOS/model-validation-toolkit/issues/new/choose) or 21 | [Gitter channel](https://gitter.im/FINRAOS/model-validation-toolkit) 22 | 23 | to seek feedback on your planned contribution. 24 | 25 | ## Contributing code using pull requests 26 | 27 | We do all of our development using git, so basic knowledge is assumed. 28 | 29 | Follow these steps to contribute code: 30 | 31 | 1. Fork the Model Validation Toolkit repository by clicking the **Fork** button on the 32 | [repository page](https://www.github.com/FINRAOS/model-validation-toolkit). This creates 33 | a copy of the Model Validation Toolkit repository in your own account. 34 | 35 | 2. Install Python >=3.6 locally in order to run tests. 36 | 37 | 3. `pip` installing your fork from source. This allows you to modify the code 38 | and immediately test it out: 39 | 40 | ```bash 41 | git clone https://github.com/YOUR_USERNAME/model-validation-toolkit 42 | cd model-validation-toolkit 43 | pip install -e . # Installs Model Validation Toolkit from the current directory in editable mode. 44 | ``` 45 | 46 | 4. Add the Model Validation Toolkit repo as an upstream remote, so you can use it to sync your 47 | changes. 48 | 49 | ```bash 50 | git remote add upstream http://www.github.com/FINRAOS/model-validation-toolkit 51 | ``` 52 | 53 | 5. Create a branch where you will develop from: 54 | 55 | ```bash 56 | git checkout -b name-of-change 57 | ``` 58 | 59 | And implement your changes using your favorite editor. 60 | 61 | 6. Make sure the tests pass by running the following command from the top of 62 | the repository: 63 | 64 | ```bash 65 | pytest tests/ 66 | ``` 67 | 68 | If you know the specific test file that covers your changes, you can limit the tests to that; for example: 69 | 70 | ```bash 71 | pytest tests/supervisor 72 | ``` 73 | 74 | Model Validation Toolkit also offers more fine-grained control over which particular tests are run; 75 | see {ref}`running-tests` for more information. 76 | 77 | 7. Once you are satisfied with your change, create a commit as follows ([how to write a commit message](https://chris.beams.io/posts/git-commit/)): 78 | 79 | ```bash 80 | git add file1.py file2.py ... 81 | git commit -s -m "Your commit message" 82 | ``` 83 | 84 | Please be sure to sign off your work when you commit it with the `-s` or, equivalently `--sign-off` flag to agree to our [DCO](https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/main/DCO). 85 | 86 | Then sync your code with the main repo: 87 | 88 | ```bash 89 | git fetch upstream 90 | git rebase upstream/main 91 | ``` 92 | 93 | Finally, push your commit on your development branch and create a remote 94 | branch in your fork that you can use to create a pull request from: 95 | 96 | ```bash 97 | git push --set-upstream origin name-of-change 98 | ``` 99 | 100 | 8. Create a pull request from the Model Validation Toolkit repository and send it for review. 101 | Check the {ref}`pr-checklist` for considerations when preparing your PR, and 102 | consult [GitHub Help](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests) 103 | if you need more information on using pull requests. 104 | 105 | ## Model Validation Toolkit pull request checklist 106 | 107 | As you prepare a Model Validation Toolkit pull request, here are a few things to keep in mind: 108 | 109 | ### DCO 110 | 111 | By contributing to this project, you agree to our [DCO](https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/main/DCO). 112 | 113 | ### Single-change commits and pull requests 114 | 115 | A git commit ought to be a self-contained, single change with a descriptive 116 | message. This helps with review and with identifying or reverting changes if 117 | issues are uncovered later on. 118 | 119 | Pull requests typically comprise a single git commit. In preparing a pull 120 | request for review, you may need to squash together multiple commits. We ask 121 | that you do this prior to sending the PR for review if possible. The `git 122 | rebase -i` command might be useful to this end. 123 | 124 | ### Linting and Type-checking 125 | 126 | Model Validation Toolkit uses [mypy](https://mypy.readthedocs.io/) and [flake8](https://flake8.pycqa.org/) 127 | to statically test code quality; the easiest way to run these checks locally is via 128 | the [pre-commit](https://pre-commit.com/) framework: 129 | 130 | ```bash 131 | pip install pre-commit 132 | pre-commit run --all 133 | ``` 134 | 135 | ### Full GitHub test suite 136 | 137 | Your PR will automatically be run through a full test suite on GitHub CI, which 138 | covers a range of Python versions, dependency versions, and configuration options. 139 | It's normal for these tests to turn up failures that you didn't catch locally; to 140 | fix the issues you can push new commits to your branch. 141 | -------------------------------------------------------------------------------- /docs/refs.bib: -------------------------------------------------------------------------------- 1 | @article{sriperumbudur2009integral, 2 | title={On integral probability metrics,$\backslash$phi-divergences and binary classification}, 3 | author={Sriperumbudur, Bharath K and Fukumizu, Kenji and Gretton, Arthur and Sch{\"o}lkopf, Bernhard and Lanckriet, Gert RG}, 4 | journal={arXiv preprint arXiv:0901.2698}, 5 | year={2009} 6 | } 7 | @inproceedings{nowozin2016f, 8 | title={f-gan: Training generative neural samplers using variational divergence minimization}, 9 | author={Nowozin, Sebastian and Cseke, Botond and Tomioka, Ryota}, 10 | booktitle={Advances in neural information processing systems}, 11 | pages={271--279}, 12 | year={2016} 13 | } 14 | @misc{yale598, 15 | author={Yihong Wu}, 16 | title={Variational representation, HCR and CR lower bounds.}, 17 | month={February}, 18 | year={2016}, 19 | publisher={Yale}, 20 | url={http://www.stat.yale.edu/~yw562/teaching/598/lec06.pdf} 21 | } 22 | @article{csiszar2004information, 23 | title={Information theory and statistics: A tutorial}, 24 | author={Csisz{\'a}r, Imre and Shields, Paul C and others}, 25 | journal={Foundations and Trends{\textregistered} in Communications and Information Theory}, 26 | volume={1}, 27 | number={4}, 28 | pages={417--528}, 29 | year={2004}, 30 | publisher={Now Publishers, Inc.} 31 | } 32 | @article{nguyen2010estimating, 33 | title={Estimating divergence functionals and the likelihood ratio by convex risk minimization}, 34 | author={Nguyen, XuanLong and Wainwright, Martin J and Jordan, Michael I}, 35 | journal={IEEE Transactions on Information Theory}, 36 | volume={56}, 37 | number={11}, 38 | pages={5847--5861}, 39 | year={2010}, 40 | publisher={IEEE} 41 | } 42 | @article{topsoe2000some, 43 | title={Some inequalities for information divergence and related measures of discrimination}, 44 | author={Topsoe, Flemming}, 45 | journal={IEEE Transactions on information theory}, 46 | volume={46}, 47 | number={4}, 48 | pages={1602--1609}, 49 | year={2000}, 50 | publisher={IEEE} 51 | } 52 | @article{gretton2012kernel, 53 | title={A kernel two-sample test}, 54 | author={Gretton, Arthur and Borgwardt, Karsten M and Rasch, Malte J and Sch{\"o}lkopf, Bernhard and Smola, Alexander}, 55 | journal={Journal of Machine Learning Research}, 56 | volume={13}, 57 | number={Mar}, 58 | pages={723--773}, 59 | year={2012} 60 | } 61 | @article{webb2016characterizing, 62 | title={Characterizing concept drift}, 63 | author={Webb, Geoffrey I and Hyde, Roy and Cao, Hong and Nguyen, Hai Long and Petitjean, Francois}, 64 | journal={Data Mining and Knowledge Discovery}, 65 | volume={30}, 66 | number={4}, 67 | pages={964--994}, 68 | year={2016}, 69 | publisher={Springer} 70 | } 71 | @misc{vherrmann, 72 | author={Vincent Herrmann}, 73 | title={Wasserstein GAN and the Kantorovich-Rubinstein Duality}, 74 | month={February}, 75 | year={2017}, 76 | url={https://vincentherrmann.github.io/blog/wasserstein/} 77 | } 78 | @article{bellemare2017cramer, 79 | title={The cramer distance as a solution to biased wasserstein gradients}, 80 | author={Bellemare, Marc G and Danihelka, Ivo and Dabney, Will and Mohamed, Shakir and Lakshminarayanan, Balaji and Hoyer, Stephan and Munos, R{\'e}mi}, 81 | journal={arXiv preprint arXiv:1705.10743}, 82 | year={2017} 83 | } 84 | @inproceedings{gulrajani2017improved, 85 | title={Improved training of wasserstein gans}, 86 | author={Gulrajani, Ishaan and Ahmed, Faruk and Arjovsky, Martin and Dumoulin, Vincent and Courville, Aaron C}, 87 | booktitle={Advances in neural information processing systems}, 88 | pages={5767--5777}, 89 | year={2017} 90 | } 91 | @article{arjovsky2017wasserstein, 92 | title={Wasserstein gan}, 93 | author={Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on}, 94 | journal={arXiv preprint arXiv:1701.07875}, 95 | year={2017} 96 | } 97 | @phdthesis{tropp2004topics, 98 | title={Topics in sparse approximation}, 99 | author={Tropp, Joel Aaron}, 100 | school={University of Texas at Austin}, 101 | year={2004} 102 | } 103 | @inproceedings{anil2019sorting, 104 | title={Sorting out Lipschitz function approximation}, 105 | author={Anil, Cem and Lucas, James and Grosse, Roger}, 106 | booktitle={International Conference on Machine Learning}, 107 | pages={291--301}, 108 | year={2019}, 109 | organization={PMLR} 110 | } 111 | @article{sobol2001global, 112 | title={Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates}, 113 | author={Sobol, Ilya M}, 114 | journal={Mathematics and computers in simulation}, 115 | volume={55}, 116 | number={1-3}, 117 | pages={271--280}, 118 | year={2001}, 119 | publisher={Elsevier} 120 | } 121 | @book{saltelli2008global, 122 | title={Global sensitivity analysis: the primer}, 123 | author={Saltelli, Andrea and Ratto, Marco and Andres, Terry and Campolongo, Francesca and Cariboni, Jessica and Gatelli, Debora and Saisana, Michaela and Tarantola, Stefano}, 124 | year={2008}, 125 | publisher={John Wiley \& Sons} 126 | } 127 | @article{im1993sensitivity, 128 | title={Sensitivity estimates for nonlinear mathematical models}, 129 | author={IM, Sobol’}, 130 | journal={Math. Model. Comput. Exp}, 131 | volume={1}, 132 | number={4}, 133 | pages={407--414}, 134 | year={1993} 135 | } 136 | @article{reid2009generalised, 137 | title={Generalised pinsker inequalities}, 138 | author={Reid, Mark D and Williamson, Robert C}, 139 | journal={arXiv preprint arXiv:0906.1244}, 140 | year={2009} 141 | } 142 | @article{lin1991divergence, 143 | title={Divergence measures based on the Shannon entropy}, 144 | author={Lin, Jianhua}, 145 | journal={IEEE Transactions on Information theory}, 146 | volume={37}, 147 | number={1}, 148 | pages={145--151}, 149 | year={1991}, 150 | publisher={IEEE} 151 | } 152 | @techreport{domingos2000decomp, 153 | author={Domingos, Pedro}, 154 | title={A Unified Bias-Variance Decomposition and its Applications}, 155 | institution={University of Washington}, 156 | address={Seattle, WA}, 157 | month={January}, 158 | year={2000}, 159 | url={https://homes.cs.washington.edu/~pedrod/papers/mlc00a.pdf} 160 | } 161 | @misc{mlxtenddecomp, 162 | author={Sebastian Raschka}, 163 | title={bias_variance_decomp: Bias-variance decomposition for classification and regression losses}, 164 | year={2014-2023}, 165 | url={https://rasbt.github.io/mlxtend/user_guide/evaluate/bias_variance_decomp/} 166 | } -------------------------------------------------------------------------------- /mvtk/bias_variance/bias_variance_parallel.py: -------------------------------------------------------------------------------- 1 | import ray 2 | import numpy as np 3 | import public 4 | 5 | from sklearn.utils import resample 6 | 7 | from . import bias_variance_mse, get_values, train_and_predict 8 | 9 | 10 | def _prepare_X_and_y(X_train_values, y_train_values, prepare_X, prepare_y_train): 11 | return prepare_X(X_train_values), prepare_y_train(y_train_values) 12 | 13 | 14 | @public.add 15 | def bias_variance_compute_parallel( 16 | estimator, 17 | X_train, 18 | y_train, 19 | X_test, 20 | y_test, 21 | prepare_X=lambda x: x, 22 | prepare_y_train=lambda x: x, 23 | iterations=200, 24 | random_state=None, 25 | decomp_fn=bias_variance_mse, 26 | fit_kwargs=None, 27 | predict_kwargs=None, 28 | ): 29 | r"""Compute the bias-variance decomposition in parallel 30 | 31 | Args: 32 | estimator (EstimatorWrapper): estimator wrapped with a class extending 33 | EstimatorWrapper 34 | X_train: features for training 35 | y_train: ground truth labels for training 36 | X_test: features for testing 37 | y_test: ground truth labels for testing 38 | prepare_X (function, optional): function to transform feature datasets before 39 | calling fit and predict methods 40 | prepare_y_train (function, optional): function to transform training ground 41 | truth labels before calling fit method 42 | iterations (int, optional): number of iterations for the training/testing 43 | random_state (int, optional): random state for bootstrap sampling 44 | decomp_fn (function, optional): bias-variance decomposition function 45 | fit_kwargs (dict, optional): kwargs to pass to the fit method 46 | predict_kwargs (dict, optional): kwargs to pass to the predict method 47 | 48 | Returns: 49 | (average loss, average bias, average variance, net variance)""" 50 | if predict_kwargs is None: 51 | predict_kwargs = {} 52 | if fit_kwargs is None: 53 | fit_kwargs = {} 54 | 55 | if isinstance(random_state, int): 56 | random_state = np.random.RandomState(seed=random_state) 57 | 58 | X_train_values = get_values(X_train) 59 | y_train_values = get_values(y_train) 60 | X_test_values = get_values(X_test) 61 | X_test_prepared = prepare_X(X_test_values) 62 | 63 | if random_state is None: 64 | result = [ 65 | bootstrap_train_and_predict_ray.remote( 66 | estimator, 67 | X_train_values, 68 | y_train_values, 69 | X_test_prepared, 70 | prepare_X, 71 | prepare_y_train, 72 | fit_kwargs, 73 | predict_kwargs, 74 | ) 75 | for _ in range(iterations) 76 | ] 77 | else: 78 | result = [ 79 | train_and_predict_ray.remote( 80 | estimator, 81 | *_prepare_X_and_y( 82 | *resample( 83 | X_train_values, y_train_values, random_state=random_state 84 | ), 85 | prepare_X, 86 | prepare_y_train 87 | ), 88 | X_test_prepared, 89 | fit_kwargs, 90 | predict_kwargs 91 | ) 92 | for _ in range(iterations) 93 | ] 94 | 95 | predictions = np.array(ray.get(result)) 96 | 97 | y_test_values = get_values(y_test) 98 | 99 | return decomp_fn(predictions, y_test_values) 100 | 101 | 102 | @ray.remote 103 | def train_and_predict_ray( 104 | estimator, 105 | X_train_values, 106 | y_train_values, 107 | X_test_prepared, 108 | fit_kwargs=None, 109 | predict_kwargs=None, 110 | ): 111 | r"""Train an estimator and get predictions from it 112 | 113 | Args: 114 | estimator (EstimatorWrapper): estimator wrapped with a class extending 115 | EstimatorWrapper 116 | X_train_values: numpy array of features for training 117 | y_train_values: numpy array of ground truth labels for training 118 | X_test_prepared: features for testing which has been processed by prepare_X 119 | function 120 | fit_kwargs (dict, optional): kwargs to pass to the fit method 121 | predict_kwargs (dict, optional): kwargs to pass to the predict method 122 | 123 | Returns: 124 | predictions""" 125 | return train_and_predict( 126 | estimator, 127 | X_train_values, 128 | y_train_values, 129 | X_test_prepared, 130 | fit_kwargs=fit_kwargs, 131 | predict_kwargs=predict_kwargs, 132 | ) 133 | 134 | 135 | @ray.remote 136 | def bootstrap_train_and_predict_ray( 137 | estimator, 138 | X_train_values, 139 | y_train_values, 140 | X_test_prepared, 141 | prepare_X=lambda x: x, 142 | prepare_y_train=lambda x: x, 143 | fit_kwargs=None, 144 | predict_kwargs=None, 145 | ): 146 | r"""Train an estimator using a bootstrap sample of the training data and get 147 | predictions from it 148 | 149 | Args: 150 | estimator (EstimatorWrapper): estimator wrapped with a class extending 151 | EstimatorWrapper 152 | X_train_values: numpy array of features for training 153 | y_train_values: numpy array of ground truth labels for training 154 | X_test_prepared: features for testing which has been processed by prepare_X 155 | function 156 | prepare_X (function, optional): function to transform feature datasets before 157 | calling fit and predict methods 158 | prepare_y_train (function, optional): function to transform train ground truth 159 | labels before calling fit method 160 | fit_kwargs (dict, optional): kwargs to pass to the fit method 161 | predict_kwargs (dict, optional): kwargs to pass to the predict method 162 | 163 | Returns: 164 | predictions""" 165 | if predict_kwargs is None: 166 | predict_kwargs = {} 167 | if fit_kwargs is None: 168 | fit_kwargs = {} 169 | 170 | X_sample, y_sample = resample(X_train_values, y_train_values) 171 | 172 | return train_and_predict( 173 | estimator, 174 | X_sample, 175 | y_sample, 176 | X_test_prepared, 177 | prepare_X, 178 | prepare_y_train, 179 | fit_kwargs, 180 | predict_kwargs, 181 | ) 182 | -------------------------------------------------------------------------------- /docs/credibility_user_guide.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | Credibility User Guide 3 | ########### 4 | 5 | ********** 6 | Motivation 7 | ********** 8 | 9 | Let's say we are training a model for medical diagnoses. Missing false negatives 10 | is important and we have a hard requirement that a model's recall (proportion 11 | of positive instances identified) must not fall below 70%. If someone validates 12 | a model and reports a recall of 80%, are we clear? Well, maybe. It turns out 13 | this data scientist had a validation set with 5 positive instances. The model 14 | correctly identified 4 of them, giving it a recall of 80%. Would you trust 15 | that? Of course not! You say that a larger sample size is needed. "How many do we 16 | need?" they ask. This module will help answer that question. 17 | 18 | How? 19 | ==== 20 | 21 | There's two schools of thought for this problem. The `frequentist 22 | `_ and the 23 | `Bayesian `_ approaches. 24 | In practice they tend to give similar results. Going back to our 5 sample 25 | validation set, the frequentist would be concerned with how much our recall 26 | would be expected to vary from one 5 sample hold out set to another. They would 27 | want the hold out set to be large enough that you would not expect much change 28 | in the estimated recall from one hold out set to another. The Bayesian approach 29 | seeks to directly identify the probability that the recall would be lower than 30 | 70% if the validation set were infinitely large. We believe this is a better 31 | representation of the problem at hand, and designed the library around this 32 | Bayesian approach. 33 | 34 | 35 | *********** 36 | Beta Distributions 37 | *********** 38 | 39 | Probability of Low Performance 40 | ================= 41 | 42 | .. currentmodule:: mvtk.credibility 43 | 44 | If you flip a coin 100 times, and it comes up heads 99 times, would you suspect 45 | a biased coin? Probably. What about if you flipped it 5 times and saw 4 heads. 46 | This is much less strange. Determining the bias of a coin embodies the core 47 | principles behind determining whether many performance metrics are unacceptably 48 | low. 49 | 50 | If the coin *is* biased, how biased is it? In general, we'd say there's some 51 | probability distribution over all possible biases. We would generally use a 52 | `beta distribution `_ to 53 | model this distribution for good reasons. This distribution has two free 54 | parameters: the number of heads and the number of tails. However, we generally 55 | offset both of those numbers by 1 so the distribution for observed flips is 56 | :math:`B(1, 1)` (with :math:`B` representing our beta distribution as a 57 | function of heads and tails plus respective offsets), which as it turns out is 58 | exactly a uniform distribution over all possible biases. In this sense, we can 59 | express total uncertainty before taking measurements. The beta distribution 60 | becomes more concentrated around the empirical proportion of heads as you take 61 | more and more measurements. If, we were reasonably certain of a 60% bias, we 62 | might offset the number of heads with a 6 and the number of tails with a 4. 63 | Then we would start to expect an unbiased coin after observing 2 tails. This 64 | offset is called the *prior* in Bayesian inference, and represents our 65 | understanding before making any observations. 66 | 67 | .. math:: 68 | B(\alpha, \beta) 69 | 70 | .. figure:: images/Beta_distribution_pdf.svg 71 | :width: 800px 72 | :align: center 73 | :height: 400px 74 | :alt: alternate text 75 | :figclass: align-center 76 | 77 | Beta distribution for different :math:`alpha` (for heads plus offset) and 78 | :math:`\beta` (tails plus offset). 79 | 80 | We integrate the area under :math:`B(\alpha,\beta)` from 0 to 81 | :math:`p` to determine the probability that a coin's bias is less 82 | than :math:`p`. This is effectively how :meth:`prob_below` works. 83 | 84 | 85 | Credible Intervals 86 | ================= 87 | 88 | Sometimes you just want a general sense of uncertainty for your sample 89 | estimates. We use :meth:`credible_interval` to compute a `credible interval `_. This will give you the 90 | smallest interval for which there is a `credibility` (kwarg argument that 91 | defaults to :math:`0.5`) chance of the bias being within that region. It will 92 | return a lower bound no less than :math:`0` and an upper bound no greater than :math:`1`. 93 | This is subtly different from frequentist `confidence intervals 94 | `_. In our 5 sample 95 | example, the latter reports an interval that is expected to contain `p` (often 96 | chosen to be 95%) all such 5 sample estimates of the mean. 97 | 98 | ********** 99 | Common Metrics 100 | ********** 101 | Many performance metrics used for binary 102 | classification follow the same mechanics as the 103 | analysis above. This following is not an exhaustive 104 | list of performance metrics that can be readily 105 | translated into a biased coin scenario in which we 106 | wish to determine heads / (heads + tails). 107 | 108 | * Precision: true positive / (true positive + false positive) 109 | * Recall: true positive / (true positive + false negative) 110 | * Accuracy: correctly identified / (correctly identified + incorrectly identified) 111 | 112 | 113 | ROC AUC 114 | ================= 115 | 116 | `ROC AUC 117 | `_ 118 | is an extremely useful measure for binary classification. Like many 119 | other measures of performance for binary classification, it can be 120 | expressed as a proportion of outcomes. However, 121 | unlike other measures of performance, it does not 122 | make use of a threshold. This ultimately makes it a 123 | ranking metric, as it characterizes the degree to 124 | which positive instances are scored higher than 125 | negative instances. However, like other metrics, it 126 | can be expressed as an empirical measure of a 127 | proportion. Specifically, ROC AUC is the proportion 128 | of pairs of positive and negative examples such 129 | that the positive example is scored higher than the 130 | negative one. This can be expressed as 131 | 132 | .. math:: 133 | \frac{1}{NM}\sum\limits_{n,m}^{N,M} \mathrm{score}(\mathrm{Positive}_n) > \mathrm{score}(\mathrm{Negative}_m) 134 | 135 | However, computing the area under the receiver 136 | operating characteristic is a more computationally 137 | efficient means of computing the same quantity. 138 | :meth:`roc_auc_preprocess` will convert a positive and negative 139 | sample count to an associated count of correctly and incorrectly 140 | ranked pairs of positive and negative instances using the ROC AUC 141 | score. This pair of numbers can be used as arguments for 142 | :meth:`prob_below` and :meth:`credible_interval`. 143 | 144 | .. topic:: Tutorials: 145 | 146 | * :doc:`Credibility ` 147 | -------------------------------------------------------------------------------- /tests/bias_variance/estimators/test_pytorch_estimator_wrapper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | from torch import nn 4 | 5 | from mvtk.bias_variance.estimators import PyTorchEstimatorWrapper 6 | 7 | 8 | class ModelPyTorch(nn.Module): 9 | def __init__(self): 10 | super().__init__() 11 | self.linear1 = nn.Linear(2, 8) 12 | self.linear2 = nn.Linear(8, 1) 13 | 14 | def forward(self, x): 15 | x = self.linear1(x) 16 | x = self.linear2(x) 17 | return x 18 | 19 | 20 | def create_data(): 21 | X_train = np.arange(12).reshape(6, 2) 22 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None) 23 | X_test = np.arange(6).reshape(3, 2) 24 | y_test = np.array([0, 1, 1]) 25 | 26 | return X_train, y_train, X_test, y_test 27 | 28 | 29 | def create_model(): 30 | model_pytorch = ModelPyTorch() 31 | optimizer = torch.optim.Adam(model_pytorch.parameters(), lr=0.001) 32 | loss_fn = nn.MSELoss() 33 | 34 | return model_pytorch, optimizer, loss_fn 35 | 36 | 37 | def optimizer_gen(x): 38 | return torch.optim.Adam(x.parameters(), lr=0.001) 39 | 40 | 41 | def reset_parameters(x): 42 | if hasattr(x, "reset_parameters"): 43 | x.reset_parameters() 44 | 45 | 46 | def fit(estimator, optimizer, loss_fn, X, y, epochs=10, batch_size=None): 47 | for i in range(epochs): 48 | if batch_size is None: 49 | batch_size = len(y) 50 | for j in range(0, len(y), batch_size): 51 | batch_start = j 52 | batch_end = j + batch_size 53 | X_batch = X[batch_start:batch_end] 54 | y_batch = y[batch_start:batch_end] 55 | prediction = estimator(X_batch) 56 | loss = loss_fn(prediction, y_batch) 57 | 58 | optimizer.zero_grad() 59 | loss.backward() 60 | optimizer.step() 61 | 62 | 63 | def custom_fit(self, X, y, epochs=10, batch_size=None): 64 | for i in range(epochs): 65 | if batch_size is None: 66 | batch_size = len(y) 67 | for j in range(0, len(y), batch_size): 68 | batch_start = j 69 | batch_end = j + batch_size 70 | X_batch = X[batch_start:batch_end] 71 | y_batch = y[batch_start:batch_end] 72 | prediction = self.estimator(X_batch) 73 | loss = self.loss_fn(prediction, y_batch) 74 | 75 | self.optimizer.zero_grad() 76 | loss.backward() 77 | self.optimizer.step() 78 | 79 | 80 | def predict(estimator, X, custom_test=False): 81 | if custom_test: 82 | return [1, 0, 1] 83 | 84 | prediction_list = [] 85 | with torch.no_grad(): 86 | for value in X: 87 | prediction = estimator(value) 88 | if len(prediction) > 1: 89 | prediction_list.append(prediction.argmax().item()) 90 | else: 91 | prediction_list.append(prediction.item()) 92 | return prediction_list 93 | 94 | 95 | def custom_predict(estimator, X): 96 | return [1, 0, 1] 97 | 98 | 99 | def test_pytorch_estimator_wrapper(): 100 | torch.use_deterministic_algorithms(True) 101 | 102 | X_train, y_train, X_test, y_test = create_data() 103 | 104 | X_train_torch = torch.FloatTensor(X_train) 105 | X_test_torch = torch.FloatTensor(X_test) 106 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1) 107 | 108 | torch.manual_seed(123) 109 | model, optimizer, loss_fn = create_model() 110 | 111 | model.apply(reset_parameters) 112 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=100) 113 | pred = predict(model, X_test_torch) 114 | 115 | torch.manual_seed(123) 116 | model_test, optimizer_test, loss_fn_test = create_model() 117 | model_wrapped = PyTorchEstimatorWrapper(model_test, optimizer_gen, loss_fn_test) 118 | 119 | model_wrapped.fit(X_train_torch, y_train_torch) 120 | pred_wrapped = model_wrapped.predict(X_test_torch) 121 | 122 | assert np.array_equal(pred, pred_wrapped) 123 | 124 | 125 | def test_pytorch_estimator_wrapper_kwargs_fit(): 126 | torch.use_deterministic_algorithms(True) 127 | 128 | X_train, y_train, X_test, y_test = create_data() 129 | 130 | X_train_torch = torch.FloatTensor(X_train) 131 | X_test_torch = torch.FloatTensor(X_test) 132 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1) 133 | 134 | torch.manual_seed(123) 135 | model, optimizer, loss_fn = create_model() 136 | 137 | model.apply(reset_parameters) 138 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=5) 139 | pred = predict(model, X_test_torch) 140 | 141 | torch.manual_seed(123) 142 | model_test, optimizer_test, loss_fn_test = create_model() 143 | model_wrapped = PyTorchEstimatorWrapper(model_test, optimizer_gen, loss_fn_test) 144 | 145 | model_wrapped.fit(X_train_torch, y_train_torch, epochs=5) 146 | pred_wrapped = model_wrapped.predict(X_test_torch) 147 | 148 | assert np.array_equal(pred, pred_wrapped) 149 | 150 | 151 | def test_pytorch_estimator_wrapper_custom_fit(): 152 | torch.use_deterministic_algorithms(True) 153 | 154 | X_train, y_train, X_test, y_test = create_data() 155 | 156 | X_train_torch = torch.FloatTensor(X_train) 157 | X_test_torch = torch.FloatTensor(X_test) 158 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1) 159 | 160 | torch.manual_seed(123) 161 | model, optimizer, loss_fn = create_model() 162 | 163 | model.apply(reset_parameters) 164 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=10) 165 | pred = predict(model, X_test_torch) 166 | 167 | torch.manual_seed(123) 168 | model_test, optimizer_test, loss_fn_test = create_model() 169 | model_wrapped = PyTorchEstimatorWrapper( 170 | model_test, optimizer_gen, loss_fn_test, fit_fn=custom_fit 171 | ) 172 | 173 | model_wrapped.fit(X_train_torch, y_train_torch) 174 | pred_wrapped = model_wrapped.predict(X_test_torch) 175 | 176 | assert np.array_equal(pred, pred_wrapped) 177 | 178 | 179 | def test_pytorch_estimator_wrapper_custom_predict(): 180 | torch.use_deterministic_algorithms(True) 181 | 182 | X_train, y_train, X_test, y_test = create_data() 183 | 184 | X_train_torch = torch.FloatTensor(X_train) 185 | X_test_torch = torch.FloatTensor(X_test) 186 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1) 187 | 188 | torch.manual_seed(123) 189 | model, optimizer, loss_fn = create_model() 190 | 191 | model.apply(reset_parameters) 192 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=100) 193 | pred = predict(model, X_test_torch, custom_test=True) 194 | 195 | torch.manual_seed(123) 196 | model_test, optimizer_test, loss_fn_test = create_model() 197 | model_wrapped = PyTorchEstimatorWrapper( 198 | model_test, optimizer_gen, loss_fn_test, predict_fn=custom_predict 199 | ) 200 | 201 | model_wrapped.fit(X_train_torch, y_train_torch) 202 | pred_wrapped = model_wrapped.predict(X_test_torch) 203 | 204 | assert np.array_equal(pred, pred_wrapped) 205 | -------------------------------------------------------------------------------- /mvtk/thresholding.py: -------------------------------------------------------------------------------- 1 | import public 2 | import bisect 3 | import numpy 4 | import matplotlib.pylab as plt 5 | 6 | from functools import reduce 7 | 8 | 9 | @public.add 10 | def plot_err(scores, utility_mean, utility_err, color=None, label=None, alpha=0.5): 11 | plt.plot(scores, utility_mean, color=color) 12 | plt.fill_between(scores, *utility_err, alpha=alpha, color=color, label=label) 13 | 14 | 15 | @public.add 16 | def expected_utility(utility, data, N=4096, credibility=0.5): 17 | """Get the utility distribution over possible thresholds. 18 | 19 | Args: 20 | utility (function): utility function that ingests true/false 21 | positive/negative rates. 22 | data (list-like): iterable of list-likes of the form (ground truth, 23 | score). Feedback is null when an alert is not triggered. 24 | credibility (float): Credibility level for a credible interval. This 25 | interval will be centered about the mean and have a `credibility` 26 | chance of containing the true utility. 27 | 28 | returns: 29 | tuple of three elements: 30 | - candidate thresholds 31 | - mean expected utility 32 | - upper and lower quantile of estimate of expected utility associated 33 | with each threshold 34 | """ 35 | credibility /= 2 36 | scores, utilities = sample_utilities(utility, data, N=N) 37 | low = int(N * credibility) 38 | high = int(N * (1 - credibility)) 39 | utilities = numpy.asarray(utilities) 40 | utilities.sort(axis=1) 41 | return scores, utilities.mean(1), numpy.asarray(utilities[:, [low, high]]).T 42 | 43 | 44 | @public.add 45 | def optimal_threshold(utility, data, N=4096): 46 | scores, utilities = sample_utilities(utility, data, N=N) 47 | means = utilities.mean(1) 48 | idx = means.argmax() 49 | return scores[idx], means[idx] 50 | 51 | 52 | @public.add 53 | def sample_utilities(utility, data, N=4096): 54 | """Get distribution of utilities. 55 | 56 | Args: 57 | utility (float): utility function that ingests true/false 58 | positive/negative rates. 59 | data (list-like): iterable of of iterables of the form (ground truth, score). 60 | Feedback is null when an alert is not triggered. 61 | 62 | returns: thresholds, utilities 63 | """ 64 | if not len(data): 65 | return data, numpy.asarray([]) 66 | nprng = numpy.random.RandomState(0) 67 | data = numpy.asarray(data) 68 | num_positives = data[:, 0].sum() 69 | rates = [1 + num_positives, 1 + len(data) - num_positives, 1, 1] 70 | utilities = [] 71 | data = data[numpy.argsort(data[:, 1])] 72 | for ground_truth, score in data: 73 | update_rates(rates, ground_truth) 74 | utilities.append(utility(*nprng.dirichlet(rates, size=N).T)) 75 | return data[:, 1], numpy.asarray(utilities) 76 | 77 | 78 | @public.add 79 | def thompson_sample(utility, data, N=1024, quantile=False): 80 | scores, utilities = sample_utilities(utility, data, N) 81 | if quantile: 82 | return utilities.argmax(axis=0) / (len(utilities) - 1) 83 | return scores[utilities.argmax(axis=0)] 84 | 85 | 86 | @public.add 87 | def update_rates(rates, ground_truth): 88 | rates[0] -= ground_truth 89 | rates[1] -= not ground_truth 90 | rates[2] += not ground_truth 91 | rates[3] += ground_truth 92 | 93 | 94 | @public.add 95 | class AdaptiveThreshold: 96 | """Adaptive agent that balances exploration with exploitation with respect 97 | to setting and adjusting thresholds. 98 | 99 | When exploring, the threshold is 0, effectively letting anything 100 | through. This produces unbiased data that can then be used to set a 101 | more optimal threshold in subsequent rounds. The agent seeks to 102 | balance the opportunity cost of running an experiment with the 103 | utility gained over subsequent rounds using the information gained 104 | from this experiment. 105 | """ 106 | 107 | def __init__(self, utility): 108 | """ 109 | Args: 110 | utility (function): Function that takes in true/false 111 | positive/negative rates. Specifically (tp, fp, tn fn) -> float 112 | representing utility.""" 113 | 114 | self.utility = utility 115 | self.results = [] 116 | self.unbiased_positives = 1 117 | self.unbiased_negatives = 1 118 | self.previous_threshold = 0 119 | self.nprng = numpy.random.RandomState(0) 120 | 121 | def get_best_threshold(self): 122 | # true positives, false positives, true negatives, false negatives 123 | rates = [self.unbiased_positives, self.unbiased_negatives, 1, 1] 124 | experiment_utility = self.utility(*self.nprng.dirichlet(rates)) 125 | hypothetical_rates = [ 126 | self.unbiased_positives - self.last_experiment_outcome, 127 | self.unbiased_negatives - (1 - self.last_experiment_outcome), 128 | 1, 129 | 1, 130 | ] 131 | best_hypothetical_utility = -numpy.inf 132 | best_utility = -numpy.inf 133 | for score, ground_truth, idx in self.results: 134 | update_rates(rates, ground_truth) 135 | utility = self.utility(*self.nprng.dirichlet(rates)) 136 | if utility > best_utility: 137 | best_utility = utility 138 | best_threshold = score 139 | if idx >= self.last_experiment_idx: 140 | continue 141 | update_rates(hypothetical_rates, ground_truth) 142 | hypothetical_utility = self.utility( 143 | *self.nprng.dirichlet(hypothetical_rates) 144 | ) 145 | if hypothetical_utility > best_hypothetical_utility: 146 | best_hypothetical_utility = hypothetical_utility 147 | hindsight_utility = utility 148 | return best_threshold, experiment_utility, best_utility, hindsight_utility 149 | 150 | def __call__(self, ground_truth, score): 151 | """Args are ignored if previous threshold was not 0. Otherwise, the 152 | score is added as a potential threhold and ground_truth noted to help 153 | identify the optimal threshold. 154 | 155 | Args: 156 | ground_truth (bool) 157 | score (float) 158 | """ 159 | idx = len(self.results) 160 | if self.previous_threshold == 0: 161 | bisect.insort(self.results, (score, ground_truth, idx)) 162 | self.unbiased_positives += ground_truth 163 | self.unbiased_negatives += 1 - ground_truth 164 | self.last_experiment_idx = idx 165 | self.last_experiment_outcome = ground_truth 166 | if len(self.results) < 2: 167 | return self.previous_threshold 168 | ( 169 | best_threshold, 170 | experiment_utility, 171 | best_utility, 172 | hindsight_utility, 173 | ) = self.get_best_threshold() 174 | total_utility_gained = (best_utility - hindsight_utility) * ( 175 | idx - self.last_experiment_idx 176 | ) 177 | opportunity_cost = hindsight_utility - experiment_utility 178 | if opportunity_cost <= total_utility_gained: 179 | self.previous_threshold = 0 180 | else: 181 | self.previous_threshold = best_threshold 182 | return self.previous_threshold 183 | 184 | 185 | @public.add 186 | def exploration_proportion(thresholds, N): 187 | exploration = thresholds == 0 188 | alpha = 1 - 1.0 / N 189 | return reduce( 190 | lambda accum, elem: accum + [accum[-1] * alpha + elem * (1 - alpha)], 191 | exploration[N:], 192 | [exploration[:N].mean()], 193 | ) 194 | -------------------------------------------------------------------------------- /docs/thresholding_user_guide.rst: -------------------------------------------------------------------------------- 1 | ########### 2 | Thresholding User Guide 3 | ########### 4 | 5 | ********** 6 | Motivation 7 | ********** 8 | 9 | Let's say you're monitoring some process for alerts. Maybe it's model 10 | performance. Maybe it's model drift. In any case, let's say you have a score 11 | that increases with the likelihood that you have something wrong that needs to be 12 | investigated. You still need to decide whether to actually launch an 13 | investigation or not for each of these scores. This is known as thresholding. 14 | But where to put the threshold? Set it too high and you'll miss important 15 | alerts. Set it too low and you'll be flooded with noise. This module comes with 16 | tools and techniques to experimentally determine where to set your threshold 17 | given your tolerance for noise. 18 | 19 | How? 20 | ==== 21 | 22 | Let's say the scores associated with good alerts looks like this. 23 | 24 | .. figure:: images/thresholding_positive_scores.png 25 | :width: 500px 26 | :align: center 27 | :height: 500px 28 | :alt: alternate text 29 | :figclass: align-center 30 | 31 | Moreover, scores associated with negative alerts look like this. 32 | 33 | .. figure:: images/thresholding_negative_scores.png 34 | :width: 500px 35 | :align: center 36 | :height: 500px 37 | :alt: alternate text 38 | :figclass: align-center 39 | 40 | Clearly the likelihood of finding a good alert increases with model score, but 41 | any choice will imply a trade off between true/false positive/negatives. In 42 | general, you need to decide on a utility function of true/false 43 | positive/negatives. 44 | 45 | .. code-block:: python 46 | def utility(tp, fp, tn, fn): 47 | return tp - 20 * fn - fp 48 | 49 | The utility function would increase with true positives and/or true negatives, 50 | and decrease with false positives and/or false negatives. A risk averse utility 51 | function is shown above with a 20 fold preference of avoiding false negatives 52 | to false positives. In general, we will assume the utility function is a 53 | *proportion* of true/false positive/negatives in a data set. In this sense, the 54 | utility function is a function of a categorical distribution over true/false 55 | positives/negatives. 56 | 57 | Now that we have a utility function, and a sample of positive and negative alert 58 | scores, we can plot a utility function as a function of threshold. 59 | 60 | .. figure:: images/thresholding_expected_utility.png 61 | :width: 500px 62 | :align: center 63 | :height: 400px 64 | :alt: alternate text 65 | :figclass: align-center 66 | 67 | Expected utility as a function of threshold (solid) and 50% 68 | `credible interval 69 | `_ (shaded 70 | region). 71 | 72 | Note that we don't actually have the true distribution of positive 73 | and negative scores in practice. Rather, we have examples. If we 74 | only had 4 positive scores and 4 negative scores, we cannot be very 75 | certain of its results. More on this in the `credibility user guide 76 | `__. We model the distribution of true/false 77 | positive/negatives as a `Dirichlet-multinomial distribution 78 | `_ with 79 | a `maximum entropy prior 80 | `_. 81 | 82 | This shows a particularly apparent peak in utility, but only after (in this 83 | case) a few thousand example scores. In practice, we could well be starting 84 | with *no* examples and building up our knowledge as we go. To make things 85 | worse, we will only find out if an alert was good or not if we investigate it. 86 | Anything that falls below our threshold forever remains unlabeled. We developed 87 | a specific algorithm to tackle this problem that we call *adaptive 88 | thresholding*. 89 | 90 | *********** 91 | Adaptive Thresholding 92 | *********** 93 | 94 | We face a classic `exploitation/exploration dilemma 95 | `_. We can either choose 96 | to *exploit* the information we have so far about positive and negative score 97 | distributions to set a threshold or *explore* what may lie below that threshold 98 | by labeling whatever comes in next. Unfortunately, the labels obtained from 99 | scores greater than a threshold chosen at the time pose a challenge in that 100 | they yield heavily biased estimates of positive and negative score 101 | distributions (since they don't include anything below the threshold set at the 102 | time). We have not found a good way to compensate for that bias in practice. 103 | Rather, we must switch between an optimally set threshold and labeling 104 | whatever comes next. This produces a series of *unbiased labels*. 105 | 106 | Our adaptive thresholding algorithm seeks to balance the 107 | opportunity cost of labeling data with the utility gained over subsequent 108 | rounds with the change in threshold. Each score with an unbiased label is a 109 | potential threshold. For each of those options, we sample a possible 110 | distribution of true/false positives/negatives (with a Dirichlet-multinomial 111 | distribution with a maximum entropy prior) using the other unbiased labels. 112 | Utilities are calculated for each sampled distribution for true/false 113 | positives/negatives. The highest utility is noted as well as the utility of 114 | setting the threshold to 0 (exploration). Next this process is repeated using 115 | all but the most recent unbiased label. We locate the optimal threshold 116 | computed using all but the most recent unbiased label, and then compute the 117 | utility of that threshold using the utilities calculated using *all* unbiased 118 | labels. The difference between this utility and the utility of the true optimal 119 | threshold is the expected utility gained from the last round of exploration. 120 | This expected utility gained per round times the number of rounds since the 121 | last round of exploration is the net utility gained since the last round of 122 | experimentation. Meanwhile the difference between the utility of the true 123 | optimal threshold and the utility of exploration is the opportunity cost of 124 | exploration. When the net utility gained exceeds the opportunity cost of 125 | exploration, exploration is chosen over exploitation. 126 | 127 | Note that we stochastically sample utilities at the score associated with each 128 | unbiased label at each round. This is necessary to prevent deadlocks in which 129 | the optimal threshold is identical before and after experimentation, leaving 130 | the expected utility gained per round 0 forever (thus ending any possibility of 131 | subsequent rounds of exploration). Rather, exploration is chosen according to 132 | the *probability* that net utility gained has in fact caught up with the 133 | opportunity cost of the last round of exploration. 134 | 135 | However, as we gain a more accurate picture of the distribution of positive and 136 | negative scores, we make smaller changes to our best guess at the location of 137 | the optimal threshold after exploration. As a result, the expected utility 138 | gained per round of exploitation will gradually decrease over time, and we will 139 | need more and more rounds of exploitation to make up for the opportunity cost 140 | of exploration (shown below). 141 | 142 | .. figure:: images/thresholding_exploration_proportion.png 143 | :width: 500px 144 | :align: center 145 | :height: 500px 146 | :alt: alternate text 147 | :figclass: align-center 148 | 149 | Probability of chosing exploration decreases from about 45% at the 150 | beginning to about 5% after 3600 rounds. 151 | 152 | 153 | .. topic:: Tutorials: 154 | 155 | * :doc:`Thresholding ` 156 | 157 | .. bibliography:: refs.bib 158 | :cited: 159 | -------------------------------------------------------------------------------- /mvtk/supervisor/divergence/generators.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import public 3 | 4 | from collections import defaultdict 5 | from functools import reduce 6 | 7 | 8 | @public.add 9 | def js_data_stream( 10 | nprng, batch_size, sample_distributions, categorical_columns=tuple() 11 | ): 12 | r"""Data stream generator for Jensen-Shannon divergence of N distributions. 13 | Jensen-Shannon divergence measures the information of knowing which of 14 | those N distributions a sample will be drawn from before it is drawn. So if 15 | we rolled a fair N sided die to determine which distribution we will draw a 16 | sample from, JS divergence reports how many bits of information will be 17 | revealed from the die. This scenario is ultimately simulated in this 18 | function. However, in real life, we may only have examples of samples from 19 | each distribution we wish to compare. In the most general case, each 20 | distribution we wish to compare is represented by M samples of samples 21 | (with potentially different sizes) from M similar distributions whose 22 | average is most interesting. Just as we might simulate sampling from a 23 | single distribution by randomly sampling a batch of examples with 24 | replacement, we can effectively sample from an average of distributions by 25 | randomly sampling each batch (which may be representative of a single 26 | distribution), then randomly sampling elements of the chosen batch. This 27 | can ultimately be thought of a more data efficient means to the same end as 28 | downsampling large batch sizes. 29 | 30 | Args: 31 | nprng: Numpy ``RandomState`` used to generate random samples 32 | batch_size: size of batch 33 | *sample_distributions: list of lists of samples to compare. 34 | For example, ``[[batch1, batch2, batch3], [batch4, batch5], 35 | [batch6, batch7]]`` Assuming ``batch1`` came from distribution 36 | :math:`p_1`, ``batch2`` from :math:`p_2`, etc, this function will 37 | simulate a system in which a latent `N=3` sided die role that 38 | determines whether to draw a sample from :math:`\frac{p_1 + p_2 + 39 | p_3}{3}`, :math:`\frac{p_4 + p_5}{2}`, or :math:`\frac{p_6 + 40 | p_7}{2}`. 41 | categorical_columns (tuple): list or tuple of column indices that are 42 | considered categorical. 43 | 44 | Returns: 45 | The output of this function will be two samples of size batch_size with 46 | samples, :math:`x`, drawn from batch_size roles, :math:`z`, of our 47 | :math:`N` sided die. Following the example above for which :math:`N=3`, 48 | the first of these two output samples will be of the form :math:`(x, 49 | z)`, where x is the sample drawn and z is the die roll. The second of 50 | these two samples will be of the form :math:`(x, z^{\prime})` where x 51 | is the same sample as before, but :math:`z^\prime` is a new set of 52 | otherwise unrelated roles of the same :math:`N=3` sided die.""" 53 | 54 | def process_sample_distributions(sample_distributions): 55 | z = [] 56 | out = [] 57 | for idx, count in zip( 58 | *numpy.unique( 59 | nprng.randint(0, len(sample_distributions), size=batch_size), 60 | return_counts=True, 61 | ) 62 | ): 63 | sample_distribution = sample_distributions[idx] 64 | out.extend( 65 | [ 66 | sample_distribution[i][ 67 | nprng.randint(0, len(sample_distribution[i])) 68 | ] 69 | for i in nprng.randint(0, len(sample_distribution), size=count) 70 | ] 71 | ) 72 | z.extend([idx] * count) 73 | sample_distribution = numpy.asarray(out) 74 | catted1 = numpy.concatenate( 75 | (sample_distribution, numpy.asarray(z)[:, numpy.newaxis]), axis=1 76 | ) 77 | z = nprng.randint(0, len(sample_distributions), size=batch_size) 78 | catted2 = numpy.concatenate((sample_distribution, z[:, numpy.newaxis]), axis=1) 79 | return numpy.asarray((catted2, catted1)) 80 | 81 | while True: 82 | yield groupby( 83 | categorical_columns, *process_sample_distributions(sample_distributions) 84 | ) 85 | 86 | 87 | @public.add 88 | def fdiv_data_stream( 89 | nprng, batch_size, sample_distributions, categorical_columns=tuple() 90 | ): 91 | r"""Data stream generator for f-divergence. 92 | 93 | Args: 94 | nprng: Numpy ``RandomState`` used to generate random samples 95 | batch_size: size of batch 96 | sample_distributions: list of lists of samples to compare for each 97 | partition of the data. For example, ``[[batch1, batch2, batch3], 98 | [batch4, batch5], [batch6, batch7]]`` 99 | categorical_columns (tuple): list or tuple of column indices that are 100 | considered categorical. 101 | 102 | Returns: 103 | The output of this function will be ``N`` samples of size 104 | ``batch_size``, where ``N = len(sample_distributions)`` Following the 105 | example above, assuming ``batch1`` came from distribution p_1, 106 | ``batch2`` from :math:`p_2`, etc, This function will output a tuple of 107 | ``N = 3`` samples of size ``batch_size``, where ``batch1`` is sampled 108 | from :math:`\frac{p_1 + p_2 + p_3}{3}`, ``batch2`` is sampled from 109 | :math:`\frac{p_4 + p_5}{2}`, and ``batch3`` is sampled from 110 | :math:`\frac{p_6 + p_7}{2}`.""" 111 | 112 | def process_sample_distributions(sample_distributions): 113 | return numpy.asarray( 114 | [ 115 | [ 116 | sample_distribution[i][ 117 | nprng.randint(0, len(sample_distribution[i])) 118 | ] 119 | for i in nprng.randint(0, len(sample_distribution), size=batch_size) 120 | ] 121 | for sample_distribution in sample_distributions 122 | if len(sample_distribution) 123 | ] 124 | ) 125 | 126 | while True: 127 | yield groupby( 128 | categorical_columns, *process_sample_distributions(sample_distributions) 129 | ) 130 | 131 | 132 | def groupby(categorical_columns, *samples): 133 | r"""Group samples by unique values found in a subset of columns 134 | Args: 135 | categorical_columns: List of indices of columns which should be 136 | treated as categorical. 137 | *samples: A set of samples drawn from distinct distributions. 138 | Each distribution is assumed to be defined on the same probability 139 | space, so it would make sense to compare a sample drawn from one 140 | distribution to a sample drawn from another. 141 | 142 | Returns: 143 | tuple of dicts that each map unique combinations of 144 | ``categorical_columns`` to a subset of samples from the 145 | ``sample_distributions`` that have these values in their 146 | ``categorical_columns``. ``categorical_columns`` are omitted from 147 | the values of these dicts.""" 148 | if not categorical_columns: 149 | return [{tuple(): sample.astype("float")} for sample in samples] 150 | # the complement of categorical_columns is assumed to be numeric 151 | numerical_columns = [ 152 | i for i in range(samples[0].shape[1]) if i not in categorical_columns 153 | ] 154 | 155 | def grouper(accum, element): 156 | accum[tuple(element[categorical_columns])].append(element[numerical_columns]) 157 | return accum 158 | 159 | return tuple( 160 | { 161 | key: numpy.asarray(value, dtype="float") 162 | for key, value in reduce(grouper, sample, defaultdict(list)).items() 163 | } 164 | for sample in samples 165 | ) 166 | -------------------------------------------------------------------------------- /mvtk/bias_variance/bias_variance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import public 4 | 5 | from scipy import stats 6 | from sklearn.utils import resample 7 | 8 | 9 | @public.add 10 | def get_values(x): 11 | r"""If argument is a Pandas dataframe, return 'values' numpy array from it. 12 | 13 | Args: 14 | x (Any): pandas dataframe or anything else 15 | 16 | Returns: 17 | if pandas dataframe - return 'values' numpy array 18 | otherwise - return itself 19 | 20 | """ 21 | if isinstance(x, pd.DataFrame): 22 | return x.values 23 | else: 24 | return x 25 | 26 | 27 | @public.add 28 | def train_and_predict( 29 | estimator, 30 | X_train_values, 31 | y_train_values, 32 | X_test_prepared, 33 | prepare_X=lambda x: x, 34 | prepare_y_train=lambda x: x, 35 | fit_kwargs=None, 36 | predict_kwargs=None, 37 | ): 38 | r"""Train an estimator and get predictions from it 39 | 40 | Args: 41 | estimator (EstimatorWrapper): estimator wrapped with a class extending 42 | EstimatorWrapper 43 | X_train_values: numpy array of features for training 44 | y_train_values: numpy array of ground truth labels for training 45 | X_test_prepared: feature set for testing which has been processed by 46 | prepare_X function 47 | prepare_X (function, optional): function to transform feature datasets 48 | before calling fit and predict methods 49 | prepare_y_train (function, optional): function to transform train ground 50 | truth labels before calling fit method 51 | fit_kwargs (dict, optional): kwargs to pass to the fit method 52 | predict_kwargs (dict, optional): kwargs to pass to the predict method 53 | 54 | Returns: 55 | predictions""" 56 | if predict_kwargs is None: 57 | predict_kwargs = {} 58 | if fit_kwargs is None: 59 | fit_kwargs = {} 60 | 61 | X_sample_prepared = prepare_X(X_train_values) 62 | y_sample_prepared = prepare_y_train(y_train_values) 63 | 64 | estimator = estimator.fit(X_sample_prepared, y_sample_prepared, **fit_kwargs) 65 | predictions = estimator.predict(X_test_prepared, **predict_kwargs) 66 | 67 | return predictions 68 | 69 | 70 | @public.add 71 | def bootstrap_train_and_predict( 72 | estimator, 73 | X_train_values, 74 | y_train_values, 75 | X_test_prepared, 76 | prepare_X=lambda x: x, 77 | prepare_y_train=lambda x: x, 78 | random_state=None, 79 | fit_kwargs=None, 80 | predict_kwargs=None, 81 | ): 82 | r"""Train an estimator using a bootstrap sample of the training data and get 83 | predictions from it 84 | 85 | Args: 86 | estimator (EstimatorWrapper): estimator wrapped with a class extending 87 | EstimatorWrapper 88 | X_train_values: numpy array of features for training 89 | y_train_values: numpy array of ground truth labels for training 90 | X_test_prepared: feature set for testing which has been processed by prepare_X 91 | function 92 | prepare_X (function, optional): function to transform feature datasets before 93 | calling fit and predict methods 94 | prepare_y_train (function, optional): function to transform train ground 95 | truth labels before calling fit method 96 | random_state (int, optional): random state for bootstrap sampling 97 | fit_kwargs (dict, optional): kwargs to pass to the fit method 98 | predict_kwargs (dict, optional): kwargs to pass to the predict method 99 | 100 | Returns: 101 | predictions""" 102 | X_sample, y_sample = resample( 103 | X_train_values, y_train_values, random_state=random_state 104 | ) 105 | 106 | return train_and_predict( 107 | estimator, 108 | X_sample, 109 | y_sample, 110 | X_test_prepared, 111 | prepare_X, 112 | prepare_y_train, 113 | fit_kwargs, 114 | predict_kwargs, 115 | ) 116 | 117 | 118 | @public.add 119 | def bias_variance_mse(predictions, y_test): 120 | r"""Compute the bias-variance decomposition the mean squared error loss function 121 | 122 | Args: 123 | predictions: numpy array of predictions over the set of iterations 124 | y_test: numpy array of ground truth labels 125 | 126 | Returns: 127 | (average loss, average bias, average variance, net variance)""" 128 | pred_by_x = np.swapaxes(predictions, 0, 1) 129 | 130 | main_predictions = np.mean(predictions, axis=0) 131 | 132 | avg_bias = np.mean((main_predictions - y_test) ** 2) 133 | 134 | arr_loss = np.zeros(pred_by_x.shape[0], dtype=np.float64) 135 | arr_var = np.zeros(pred_by_x.shape[0], dtype=np.float64) 136 | for i in range(pred_by_x.shape[0]): 137 | arr_loss[i] = np.mean((pred_by_x[i] - y_test[i]) ** 2) 138 | arr_var[i] = np.mean((pred_by_x[i] - main_predictions[i]) ** 2) 139 | avg_loss = np.mean(arr_loss) 140 | avg_var = np.mean(arr_var) 141 | 142 | return avg_loss, avg_bias, avg_var, avg_var 143 | 144 | 145 | @public.add 146 | def bias_variance_0_1_loss(predictions, y_test): 147 | r"""Compute the bias-variance decomposition using the 0-1 loss function 148 | 149 | Args: 150 | predictions: numpy array of predictions over the set of iterations 151 | y_test: numpy array of ground truth labels 152 | 153 | Returns: 154 | (average loss, average bias, average variance, net variance)""" 155 | pred_by_x = np.swapaxes(predictions, 0, 1) 156 | 157 | main_predictions = stats.mode(predictions, axis=0, keepdims=True).mode[0] 158 | 159 | avg_bias = np.mean(main_predictions != y_test) 160 | 161 | arr_loss = np.zeros(pred_by_x.shape[0], dtype=np.float64) 162 | arr_var = np.zeros(pred_by_x.shape[0], dtype=np.float64) 163 | var_b = 0.0 # biased example contribution to avg_var 164 | var_u = 0.0 # unbiased example contribution to avg_var 165 | for i in range(pred_by_x.shape[0]): 166 | pred_true = np.sum(pred_by_x[i] == y_test[i]) 167 | pred_not_main = np.sum(pred_by_x[i] != main_predictions[i]) 168 | 169 | arr_loss[i] = (predictions.shape[0] - pred_true) / predictions.shape[0] 170 | arr_var[i] = pred_not_main / predictions.shape[0] 171 | 172 | if main_predictions[i] != y_test[i]: 173 | prb_true_given_not_main = ( 174 | pred_true / pred_not_main if pred_not_main != 0 else 0 175 | ) 176 | var_b += (pred_not_main / predictions.shape[0]) * prb_true_given_not_main 177 | else: 178 | var_u += pred_not_main / predictions.shape[0] 179 | 180 | var_b /= pred_by_x.shape[0] 181 | var_u /= pred_by_x.shape[0] 182 | 183 | avg_loss = np.mean(arr_loss) 184 | avg_var = np.mean(arr_var) 185 | net_var = var_u - var_b 186 | 187 | return avg_loss, avg_bias, avg_var, net_var 188 | 189 | 190 | @public.add 191 | def bias_variance_compute( 192 | estimator, 193 | X_train, 194 | y_train, 195 | X_test, 196 | y_test, 197 | prepare_X=lambda x: x, 198 | prepare_y_train=lambda x: x, 199 | iterations=200, 200 | random_state=None, 201 | decomp_fn=bias_variance_mse, 202 | fit_kwargs=None, 203 | predict_kwargs=None, 204 | ): 205 | r"""Compute the bias-variance decomposition in serial 206 | 207 | Args: 208 | estimator (EstimatorWrapper): estimator wrapped with a class extending 209 | EstimatorWrapper 210 | X_train: features for training 211 | y_train: ground truth labels for training 212 | X_test: features for testing 213 | y_test: ground truth labels for testing 214 | prepare_X (function, optional): function to transform feature datasets before 215 | calling fit and predict methods 216 | prepare_y_train (function, optional): function to transform training ground 217 | truth labels before calling fit method 218 | iterations (int, optional): number of iterations for the training/testing 219 | random_state (int, optional): random state for bootstrap sampling 220 | decomp_fn (function, optional): bias-variance decomposition function 221 | fit_kwargs (dict, optional): kwargs to pass to the fit method 222 | predict_kwargs (dict, optional): kwargs to pass to the predict method 223 | 224 | Returns: 225 | (average loss, average bias, average variance, net variance)""" 226 | if fit_kwargs is None: 227 | fit_kwargs = {} 228 | if predict_kwargs is None: 229 | predict_kwargs = {} 230 | 231 | if isinstance(random_state, int): 232 | random_state = np.random.RandomState(seed=random_state) 233 | 234 | predictions = np.zeros((iterations, y_test.shape[0])) 235 | 236 | X_train_values = get_values(X_train) 237 | y_train_values = get_values(y_train) 238 | X_test_values = get_values(X_test) 239 | X_test_prepared = prepare_X(X_test_values) 240 | 241 | for i in range(iterations): 242 | predictions[i] = bootstrap_train_and_predict( 243 | estimator, 244 | X_train_values, 245 | y_train_values, 246 | X_test_prepared, 247 | prepare_X, 248 | prepare_y_train, 249 | random_state, 250 | fit_kwargs, 251 | predict_kwargs, 252 | ) 253 | 254 | y_test_values = get_values(y_test) 255 | 256 | return decomp_fn(predictions, y_test_values) 257 | -------------------------------------------------------------------------------- /docs/notebooks/divergence/CategoricalColumns.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Handling Categorical Data\n", 8 | "\n", 9 | "More often than not a dataset is comprised of both **numeric**, and **categorical** data types. The supervisor divergence functions can handle both, but it needs to know which columns are categorical so that it can handle it properly. This notebook shows you how to do so when using the **supervisor** divergence package." 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Dataset with Mixed Data Types" 17 | ] 18 | }, 19 | { 20 | "cell_type": "markdown", 21 | "metadata": {}, 22 | "source": [ 23 | "### Create a dataset\n", 24 | "To demonstrate, we will create a simple dataset with a mix of categorical and numeric columns. " 25 | ] 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": 1, 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "data": { 34 | "text/html": [ 35 | "
\n", 36 | "\n", 49 | "
\n", 50 | " \n", 51 | " \n", 52 | " \n", 53 | " \n", 54 | " \n", 55 | " \n", 56 | " \n", 57 | " \n", 58 | " \n", 59 | " \n", 60 | " \n", 61 | " \n", 62 | " \n", 63 | " \n", 64 | " \n", 65 | " \n", 66 | " \n", 67 | " \n", 68 | " \n", 69 | " \n", 70 | " \n", 71 | " \n", 72 | " \n", 73 | " \n", 74 | " \n", 75 | " \n", 76 | " \n", 77 | " \n", 78 | " \n", 79 | " \n", 80 | " \n", 81 | " \n", 82 | " \n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | "
latitudefruittempcitylongitude
0239apple104Filly Downs257
1181apple11Coldport303
2246raspberry99Filly Downs60
3187raspberry91Coldport90
497raspberry26Filly Downs108
\n", 103 | "" 104 | ], 105 | "text/plain": [ 106 | " latitude fruit temp city longitude\n", 107 | "0 239 apple 104 Filly Downs 257\n", 108 | "1 181 apple 11 Coldport 303\n", 109 | "2 246 raspberry 99 Filly Downs 60\n", 110 | "3 187 raspberry 91 Coldport 90\n", 111 | "4 97 raspberry 26 Filly Downs 108" 112 | ] 113 | }, 114 | "execution_count": 1, 115 | "metadata": {}, 116 | "output_type": "execute_result" 117 | } 118 | ], 119 | "source": [ 120 | "import pandas as pd\n", 121 | "import numpy as np\n", 122 | "\n", 123 | "\n", 124 | "size = 100000\n", 125 | "\n", 126 | "data = pd.DataFrame()\n", 127 | "data['latitude'] =np.random.randint(0, 360, size=size)\n", 128 | "data['fruit'] = np.random.choice(a=['apple', 'orange', 'plum', 'raspberry', 'blueberry'],\n", 129 | " p=[0.1, 0.3, 0.3, 0.25, 0.05], size=size)\n", 130 | "data['temp'] =np.random.randint(-10, 120, size=size)\n", 131 | "data['city'] = np.random.choice(a=['London', 'Paris', 'Newport', 'Bradfield', 'Coldport', 'Filly Downs'],\n", 132 | " p=[0.15, 0.2, 0.1, 0.1, 0.3, 0.15], size=size)\n", 133 | "\n", 134 | "\n", 135 | "data['longitude'] = np.random.randint(0, 360, size=size)\n", 136 | "\n", 137 | "data.head(5)" 138 | ] 139 | }, 140 | { 141 | "cell_type": "markdown", 142 | "metadata": {}, 143 | "source": [ 144 | "In the dataset, the **fruit** and **city** columns are *categorical*, while **latitude**, **temp** and **longitude** are *numeric*. " 145 | ] 146 | }, 147 | { 148 | "cell_type": "markdown", 149 | "metadata": {}, 150 | "source": [ 151 | "### Create a comparison dataset\n", 152 | "We will create a dataset to compare by taking the original dataset and modify some of the values. In this case, we will set a couple of columns to a constant value, which would result in the new dataset being of a different distribution from the original dataset." 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 2, 158 | "metadata": {}, 159 | "outputs": [], 160 | "source": [ 161 | "data_shifted = data.copy()\n", 162 | "data_shifted['temp'] = 1\n", 163 | "data_shifted.fruit = 'apple'" 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "metadata": {}, 169 | "source": [ 170 | "## Calculating Divergence" 171 | ] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "execution_count": 3, 176 | "metadata": {}, 177 | "outputs": [], 178 | "source": [ 179 | "import warnings\n", 180 | "with warnings.catch_warnings():\n", 181 | " warnings.simplefilter(\"ignore\")\n", 182 | " from mvtk.supervisor.divergence import calc_tv_knn" 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "metadata": {}, 188 | "source": [ 189 | "The divergence functions have a parameter called **categorical_columns** which you need to use to specify which columns are not numeric. The functions will throw an error if categorical columns are passed but not specified.\n", 190 | "\n", 191 | "So, if you know which columns are categorical, then you need to pass a list of the column indexes. Both the a and b datasets should have the columns in the exact order." 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 4, 197 | "metadata": {}, 198 | "outputs": [ 199 | { 200 | "data": { 201 | "text/plain": [ 202 | "0.8506579001037404" 203 | ] 204 | }, 205 | "execution_count": 4, 206 | "metadata": {}, 207 | "output_type": "execute_result" 208 | } 209 | ], 210 | "source": [ 211 | "calc_tv_knn(data, data_shifted, categorical_columns=[1,3])" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 5, 217 | "metadata": {}, 218 | "outputs": [ 219 | { 220 | "data": { 221 | "text/plain": [ 222 | "0.2598375876037403" 223 | ] 224 | }, 225 | "execution_count": 5, 226 | "metadata": {}, 227 | "output_type": "execute_result" 228 | } 229 | ], 230 | "source": [ 231 | "calc_tv_knn(data, data, categorical_columns=[1,3])" 232 | ] 233 | }, 234 | { 235 | "cell_type": "markdown", 236 | "metadata": {}, 237 | "source": [ 238 | "## mvtk.supervisor.utils.column_indexes" 239 | ] 240 | }, 241 | { 242 | "cell_type": "markdown", 243 | "metadata": {}, 244 | "source": [ 245 | "With the utility function **column_indexes** you can get a list of the ccategorical columns in the dataframe." 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 6, 251 | "metadata": {}, 252 | "outputs": [ 253 | { 254 | "data": { 255 | "text/plain": [ 256 | "[1, 3]" 257 | ] 258 | }, 259 | "execution_count": 6, 260 | "metadata": {}, 261 | "output_type": "execute_result" 262 | } 263 | ], 264 | "source": [ 265 | "from mvtk.supervisor.utils import column_indexes\n", 266 | "\n", 267 | "column_indexes(data, cols=['fruit', 'city'])" 268 | ] 269 | }, 270 | { 271 | "cell_type": "markdown", 272 | "metadata": {}, 273 | "source": [ 274 | "You can also run the **column_indexes** function inline as a function parameter." 275 | ] 276 | }, 277 | { 278 | "cell_type": "code", 279 | "execution_count": 7, 280 | "metadata": {}, 281 | "outputs": [ 282 | { 283 | "data": { 284 | "text/plain": [ 285 | "0.25967482718707363" 286 | ] 287 | }, 288 | "execution_count": 7, 289 | "metadata": {}, 290 | "output_type": "execute_result" 291 | } 292 | ], 293 | "source": [ 294 | "calc_tv_knn(data, data, \n", 295 | " categorical_columns=column_indexes(data, cols=['fruit', 'city']))" 296 | ] 297 | } 298 | ], 299 | "metadata": { 300 | "kernelspec": { 301 | "display_name": "supervisor", 302 | "language": "python", 303 | "name": "supervisor" 304 | }, 305 | "language_info": { 306 | "codemirror_mode": { 307 | "name": "ipython", 308 | "version": 3 309 | }, 310 | "file_extension": ".py", 311 | "mimetype": "text/x-python", 312 | "name": "python", 313 | "nbconvert_exporter": "python", 314 | "pygments_lexer": "ipython3", 315 | "version": "3.6.8" 316 | } 317 | }, 318 | "nbformat": 4, 319 | "nbformat_minor": 2 320 | } 321 | -------------------------------------------------------------------------------- /tests/bias_variance/test_bias_variance.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | 4 | from sklearn.tree import DecisionTreeClassifier 5 | from sklearn.linear_model import Ridge 6 | 7 | from mvtk.bias_variance import ( 8 | bias_variance_compute, 9 | bias_variance_mse, 10 | bias_variance_0_1_loss, 11 | get_values, 12 | train_and_predict, 13 | bootstrap_train_and_predict, 14 | ) 15 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper 16 | 17 | 18 | def create_data(): 19 | X_train = np.arange(12).reshape(6, 2) 20 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None) 21 | X_test = np.arange(6).reshape(3, 2) 22 | y_test = np.array([0, 1, 1]) 23 | 24 | return X_train, y_train, X_test, y_test 25 | 26 | 27 | def test_get_values(): 28 | a = [1, 2] 29 | b = [3, 4] 30 | c = [1, 3] 31 | d = [2, 4] 32 | df = pd.DataFrame(data={"col_a": a, "col_b": b}) 33 | 34 | df_values = get_values(df) 35 | np_array = np.asarray([c, d]) 36 | 37 | assert isinstance(df_values, np.ndarray) 38 | assert np.array_equal(df_values, np_array) 39 | 40 | 41 | def test_train_and_predict_default(): 42 | X_train, y_train, X_test, y_test = create_data() 43 | 44 | model = Ridge(random_state=123) 45 | model_wrapped = SciKitLearnEstimatorWrapper(model) 46 | 47 | predictions = train_and_predict(model_wrapped, X_train, y_train, X_test) 48 | 49 | expected = np.array([0.4326241134751774, 0.6595744680851064, 0.8865248226950355]) 50 | 51 | assert np.array_equal( 52 | np.round(predictions, decimals=12), np.round(expected, decimals=12) 53 | ) 54 | 55 | 56 | def test_train_and_predict_prepare(): 57 | X_train, y_train, X_test, y_test = create_data() 58 | 59 | model = Ridge(random_state=123) 60 | model_wrapped = SciKitLearnEstimatorWrapper(model) 61 | 62 | predictions = train_and_predict( 63 | model_wrapped, 64 | X_train, 65 | y_train, 66 | X_test, 67 | prepare_X=lambda x: x + 1, 68 | prepare_y_train=lambda x: x + 1, 69 | ) 70 | 71 | expected = np.array([1.3191489361702131, 1.546099290780142, 1.773049645390071]) 72 | 73 | assert np.array_equal( 74 | np.round(predictions, decimals=12), np.round(expected, decimals=12) 75 | ) 76 | 77 | 78 | def test_train_and_predict_kwargs_fit(): 79 | X_train, y_train, X_test, y_test = create_data() 80 | 81 | model = DecisionTreeClassifier(random_state=123) 82 | model_wrapped = SciKitLearnEstimatorWrapper(model) 83 | 84 | predictions = train_and_predict( 85 | model_wrapped, 86 | X_train, 87 | y_train, 88 | X_test, 89 | fit_kwargs={"sample_weight": [0, 0, 1, 0, 1, 0]}, 90 | ) 91 | 92 | expected = np.array([2, 2, 2]) 93 | 94 | assert np.array_equal(predictions, expected) 95 | 96 | 97 | def test_train_and_predict_kwargs_predict(): 98 | X_train, y_train, X_test, y_test = create_data() 99 | 100 | model = DecisionTreeClassifier(random_state=123) 101 | model_wrapped = SciKitLearnEstimatorWrapper(model) 102 | 103 | train_and_predict(model_wrapped, X_train, y_train, X_test) 104 | 105 | try: 106 | train_and_predict( 107 | model_wrapped, 108 | X_train, 109 | y_train, 110 | X_test, 111 | predict_kwargs={"check_input": False}, 112 | ) 113 | except ValueError as e: 114 | assert e.args[0] == "X.dtype should be np.float32, got int64" 115 | return 116 | 117 | assert False 118 | 119 | 120 | def test_bootstrap_train_and_predict_default(): 121 | X_train, y_train, X_test, y_test = create_data() 122 | 123 | model = Ridge(random_state=123) 124 | model_wrapped = SciKitLearnEstimatorWrapper(model) 125 | 126 | predictions = bootstrap_train_and_predict( 127 | model_wrapped, X_train, y_train, X_test, random_state=321 128 | ) 129 | 130 | expected = np.array([0.7168141592920354, 0.8584070796460177, 1.0]) 131 | 132 | assert np.array_equal(predictions, expected) 133 | 134 | 135 | def test_bootstrap_train_and_predict_kwargs_fit(): 136 | X_train, y_train, X_test, y_test = create_data() 137 | 138 | model = DecisionTreeClassifier(random_state=123) 139 | model_wrapped = SciKitLearnEstimatorWrapper(model) 140 | 141 | predictions = bootstrap_train_and_predict( 142 | model_wrapped, 143 | X_train, 144 | y_train, 145 | X_test, 146 | random_state=321, 147 | fit_kwargs={"sample_weight": [0, 0, 1, 0, 1, 0]}, 148 | ) 149 | 150 | expected = np.array([0, 0, 0]) 151 | 152 | assert np.array_equal(predictions, expected) 153 | 154 | 155 | def test_bootstrap_train_and_predict_kwargs_predict(): 156 | X_train, y_train, X_test, y_test = create_data() 157 | 158 | model = DecisionTreeClassifier(random_state=123) 159 | model_wrapped = SciKitLearnEstimatorWrapper(model) 160 | 161 | bootstrap_train_and_predict( 162 | model_wrapped, X_train, y_train, X_test, random_state=321 163 | ) 164 | 165 | try: 166 | bootstrap_train_and_predict( 167 | model_wrapped, 168 | X_train, 169 | y_train, 170 | X_test, 171 | random_state=321, 172 | predict_kwargs={"check_input": False}, 173 | ) 174 | except ValueError as e: 175 | assert e.args[0] == "X.dtype should be np.float32, got int64" 176 | return 177 | 178 | assert False 179 | 180 | 181 | def test_bias_variance_compute_mse(): 182 | X_train, y_train, X_test, y_test = create_data() 183 | 184 | model = Ridge(random_state=123) 185 | model_wrapped = SciKitLearnEstimatorWrapper(model) 186 | 187 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute( 188 | model_wrapped, 189 | X_train, 190 | y_train, 191 | X_test, 192 | y_test, 193 | iterations=10, 194 | random_state=123, 195 | decomp_fn=bias_variance_mse, 196 | ) 197 | 198 | assert np.round(avg_loss, decimals=12) == np.round( 199 | np.float64(1.1158203908105646), decimals=12 200 | ) 201 | assert np.round(avg_bias, decimals=12) == np.round( 202 | np.float64(0.1191924176014536), decimals=12 203 | ) 204 | assert np.round(avg_var, decimals=12) == np.round( 205 | np.float64(0.9966279732091108), decimals=12 206 | ) 207 | assert np.round(net_var, decimals=12) == np.round( 208 | np.float64(0.9966279732091108), decimals=12 209 | ) 210 | 211 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 212 | assert avg_var == net_var 213 | 214 | 215 | def test_bias_variance_compute_0_1(): 216 | X_train, y_train, X_test, y_test = create_data() 217 | 218 | model = DecisionTreeClassifier(random_state=123) 219 | model_wrapped = SciKitLearnEstimatorWrapper(model) 220 | 221 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute( 222 | model_wrapped, 223 | X_train, 224 | y_train, 225 | X_test, 226 | y_test, 227 | iterations=10, 228 | random_state=123, 229 | decomp_fn=bias_variance_0_1_loss, 230 | ) 231 | 232 | assert avg_loss == np.float64(0.4666666666666666) 233 | assert avg_bias == np.float64(0.3333333333333333) 234 | assert avg_var == np.float64(0.3666666666666667) 235 | assert net_var == np.float64(0.1333333333333333) 236 | 237 | assert avg_loss == avg_bias + net_var 238 | 239 | 240 | def test_bias_variance_mse_no_loss(): 241 | predictions = np.zeros((3, 5)) 242 | y_test = np.zeros(5) 243 | 244 | avg_loss, avg_bias, avg_var, net_var = bias_variance_mse(predictions, y_test) 245 | 246 | assert avg_loss == np.float64(0.0) 247 | assert avg_bias == np.float64(0.0) 248 | assert avg_var == np.float64(0.0) 249 | assert net_var == np.float64(0.0) 250 | 251 | assert avg_loss == avg_bias + net_var 252 | assert avg_var == net_var 253 | 254 | 255 | def test_bias_variance_mse(): 256 | predictions = np.zeros((3, 5)) 257 | predictions[0] += 0.5 258 | y_test = np.zeros(5) 259 | 260 | avg_loss, avg_bias, avg_var, net_var = bias_variance_mse(predictions, y_test) 261 | 262 | assert avg_loss == np.float64(0.08333333333333333) 263 | assert avg_bias == np.float64(0.02777777777777778) 264 | assert avg_var == np.float64(0.05555555555555556) 265 | assert net_var == np.float64(0.05555555555555556) 266 | 267 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 268 | assert avg_var == net_var 269 | 270 | 271 | def test_bias_variance_0_1_loss_no_loss(): 272 | predictions = np.zeros((3, 5)) 273 | y_test = np.zeros(5) 274 | 275 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test) 276 | 277 | assert avg_loss == np.float64(0.0) 278 | assert avg_bias == np.float64(0.0) 279 | assert avg_var == np.float64(0.0) 280 | assert net_var == np.float64(0.0) 281 | 282 | assert avg_loss == avg_bias + net_var 283 | 284 | 285 | def test_bias_variance_0_1_loss_no_bias(): 286 | predictions = np.zeros((3, 5)) 287 | predictions[0] += 1 288 | y_test = np.zeros(5) 289 | 290 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test) 291 | 292 | assert avg_loss == np.float64(0.3333333333333333) 293 | assert avg_bias == np.float64(0.0) 294 | assert avg_var == np.float64(0.3333333333333333) 295 | assert net_var == np.float64(0.3333333333333333) 296 | 297 | assert avg_loss == avg_bias + net_var 298 | 299 | 300 | def test_bias_variance_0_1_loss_var_diff(): 301 | predictions = np.zeros((3, 5)) 302 | predictions[0] += 1 303 | predictions[1][0] += 1 304 | y_test = np.zeros(5) 305 | y_test[1] += 1 306 | 307 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test) 308 | 309 | assert avg_loss == np.float64(0.4666666666666666) 310 | assert avg_bias == np.float64(0.4) 311 | assert avg_var == np.float64(0.3333333333333333) 312 | assert net_var == np.float64(0.06666666666666668) 313 | 314 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12) 315 | 316 | 317 | def test_bias_variance_0_1_loss_div_by_0(): 318 | predictions = np.ones((3, 5)) 319 | y_test = np.zeros(5) 320 | 321 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test) 322 | 323 | assert avg_loss == np.float64(1.0) 324 | assert avg_bias == np.float64(1.0) 325 | assert avg_var == np.float64(0.0) 326 | assert net_var == np.float64(0.0) 327 | 328 | assert avg_loss == avg_bias + net_var 329 | -------------------------------------------------------------------------------- /docs/bias_variance_user_guide.rst: -------------------------------------------------------------------------------- 1 | ######################## 2 | Bias-Variance User Guide 3 | ######################## 4 | 5 | ********** 6 | Motivation 7 | ********** 8 | 9 | Statistical Bias vs. "Fairness" 10 | =============================== 11 | 12 | For this user guide and associated submodule, we are referring to 13 | `statistical bias `_ rather 14 | than the "fairness" type of bias. 15 | 16 | Why should we care about bias and variance? 17 | =========================================== 18 | 19 | Bias and variance are two indicators of model performance and together represent 20 | two-thirds of model error (the remaining one-third is irreducible "noise" error that 21 | comes from the data set itself). We can define bias and variance as follows 22 | by training a model with multiple `bootstrap sampled 23 | `_ training sets, resulting in 24 | multiple instances of the model. 25 | 26 | .. topic:: Bias and variance defined over multiple training sets: 27 | 28 | * Bias represents the average difference between the prediction a model makes and the correct prediction. 29 | * Variance represents the average variability of the prediction a model makes. 30 | 31 | Typically, a model with high bias is "underfit" and a model with high variance is 32 | "overfit," but keep in mind this is not always the case and there can be many reasons 33 | why a model has high bias or high variance. An "underfit" model is oversimplified and 34 | performs poorly on the training data, whereas an "overfit" model sticks too closely to 35 | the training data and performs poorly on unseen examples. See Scikit-Learn's 36 | `Underfitting vs. Overfitting 37 | `_ 38 | for a clear example of an "underfit" model vs. an "overfit" model. 39 | 40 | There is a concept 41 | known as the `"bias-variance tradeoff" 42 | `_ that describes 43 | the relationship between high bias and high variance in a model. Our ultimate goal 44 | here is to find the ideal balance where both bias and variance is at a minimum. 45 | It is also important from a business problem standpoint on whether the model 46 | error that we are unable to reduce should favor bias or variance. 47 | 48 | ***************************************** 49 | Visualize Bias and Variance With Examples 50 | ***************************************** 51 | 52 | In order to easily understand the concepts of bias and variance, we will show 53 | four different examples of models for each of the high and low bias and variance 54 | combinations. These are extreme and engineered cases for the purpose of clearly 55 | seeing the bias/variance. 56 | 57 | Before we begin, let's take a look at the distribution of the labels. Notice 58 | that the majority of label values are around 1 and 2, and much less around 5. 59 | 60 | .. figure:: images/bias_variance_label_distribution.png 61 | :align: center 62 | :alt: alternate text 63 | :figclass: align-center 64 | 65 | First we have a model with high bias and low variance. We artificially 66 | introduce bias to the model by adding 10 to every training label, but leaving 67 | the test labels as is. Given that values of greater than 5 in the entire label 68 | set are considered outliers, we are fitting the model against outliers. 69 | 70 | .. figure:: images/high_bias_low_variance.png 71 | :align: center 72 | :alt: alternate text 73 | :figclass: align-center 74 | 75 | Five sets of mean squared error results from the test set from the five 76 | bootstrap sample trainings of the model. Notice the model error is very 77 | consistent among the trials and is not centered around 0. 78 | 79 | Next we have a model with low bias and high variance. We simulate this by 80 | introducing 8 random "noise" features to the data set. We also reduce the size 81 | of the training set and train a neural network over a low number of epochs. 82 | 83 | .. figure:: images/low_bias_high_variance.png 84 | :align: center 85 | :alt: alternate text 86 | :figclass: align-center 87 | 88 | Five sets of mean squared error results from the test set from the five 89 | bootstrap sample trainings of the model. Notice the model error has 90 | different distributions among the trials and centers mainly around 0. 91 | 92 | Next we have a model with high bias and high variance. We simulate through 93 | a combination of the techniques from the high bias low variance example and 94 | the low bias high variance example and train another neural network. 95 | 96 | .. figure:: images/high_bias_high_variance.png 97 | :align: center 98 | :alt: alternate text 99 | :figclass: align-center 100 | 101 | Five sets of mean squared error results from the test set from the five 102 | bootstrap sample trainings of the model. Notice the model error has 103 | different distributions among the trials and is not centered around 0. 104 | 105 | Finally we have a model with low bias and low variance. This is a simple 106 | linear regression model with no modifications to the training or test labels. 107 | 108 | .. figure:: images/low_bias_low_variance.png 109 | :align: center 110 | :alt: alternate text 111 | :figclass: align-center 112 | 113 | Five sets of mean squared error results from the test set from the five 114 | bootstrap sample trainings of the model. Notice the model error is very 115 | consistent among the trials and centers mainly around 0. 116 | 117 | *************************** 118 | Bias-Variance Decomposition 119 | *************************** 120 | 121 | .. currentmodule:: mvtk.bias_variance 122 | 123 | There are formulas for breaking down total model error into three parts: bias, 124 | variance, and noise. This can be applied to both regression problem loss 125 | functions (mean squared error) and classification problem loss functions 126 | (0-1 loss). In a paper by Pedro Domingos, a method of unified 127 | decomposition was proposed for both types of problems :cite:`domingos2000decomp`. 128 | 129 | First lets define :math:`y` as a single prediction, :math:`D` as the set of 130 | training sets used to train the models, :math:`Y` as the set of predictions 131 | from the models trained on :math:`D`, and a loss function :math:`L` that 132 | calculates the error between our prediction :math:`y` and the correct 133 | prediction. 134 | The main prediction :math:`y_m` is the smallest average loss for a prediction 135 | when compared to the set of predictions :math:`Y`. The main prediction is 136 | the mean of :math:`Y` for mean squared error and the mode of :math:`Y` for 137 | 0-1 loss :cite:`domingos2000decomp`. 138 | 139 | Bias can now be defined for a single example :math:`x` over the set of models 140 | trained on :math:`D` as the loss calculated between the main prediction 141 | :math:`y_m` and the correct prediction :math:`y_*` :cite:`domingos2000decomp`. 142 | 143 | .. math:: 144 | B(x) = L(y_*,y_m) 145 | 146 | Variance can now be defined for a single example :math:`x` over the set of 147 | models trained on :math:`D` as the average loss calculated between all predictions 148 | and the main prediction :math:`y_m` :cite:`domingos2000decomp`. 149 | 150 | .. math:: 151 | V(x) = E_D[L(y_m, y)] 152 | 153 | We will need to take the average of the bias over all examples as 154 | :math:`E_x[B(x)]` and the average of the variance over all examples as 155 | :math:`E_x[V(x)]` :cite:`domingos2000decomp`. 156 | 157 | With :math:`N(x)` representing the irreducible error from observation noise, we 158 | can decompose the average expected loss as :cite:`domingos2000decomp` 159 | 160 | .. math:: 161 | E_x[N(x)] + E_x[B(x)] + E_x[cV(x)] 162 | 163 | In other words, the average loss over all examples is equal to the noise plus the 164 | average bias plus the net variance (the :math:`c` factor included with the variance 165 | when calculating average variance gives us the net variance). 166 | 167 | .. note:: 168 | We are generalizing the actual value of :math:`N(x)`, as the Model Validation 169 | Toolkit's implementation of bias-variance decomposition does not include noise 170 | in the average expected loss. This noise represents error in the actual data 171 | and not error related to the model itself. If you would like to dive deeper 172 | into the noise representation, please consult the `Pedro Domingos paper 173 | `_. 174 | 175 | For mean squared loss functions, :math:`c = 1`, meaning that average variance 176 | is equal to net variance. 177 | 178 | For zero-one loss functions, :math:`c = 1` when :math:`y_m = y_*` otherwise 179 | :math:`c = -P_D(y = y_* | y != y_m)`. :cite:`domingos2000decomp` In other words, 180 | :math:`c` is 1 when the main prediction is the correct prediction. If the main 181 | prediction is not the correct prediction, then :math:`c` is equal to the 182 | probability of a single prediction being the correct prediction given that the 183 | single prediction is not the main prediction. 184 | 185 | Usage 186 | ===== 187 | 188 | :meth:`bias_variance_compute` will return the average loss, average bias, average 189 | variance, and net variance for an estimator trained and tested over a specified number 190 | of training sets. This was inspired and modeled after Sebastian Raschka's 191 | `bias_variance_decomp 192 | `_ 193 | function :cite:`mlxtenddecomp`. 194 | We use the `bootstrapping `_ 195 | method to produce our sets of training data from the original training set. By default 196 | it will use mean squared error as the loss function, but it will accept the following 197 | functions for calculating loss. 198 | 199 | * :meth:`bias_variance_mse` for mean squared error 200 | * :meth:`bias_variance_0_1_loss` for 0-1 loss 201 | 202 | Since :meth:`bias_variance_compute` trains an estimator over multiple iterations, it also 203 | expects the estimator to be wrapped in a class that extends the 204 | :class:`estimators.EstimatorWrapper` class, which provides fit and predict methods 205 | that not all estimator implementations conform to. The following estimator wrappers are 206 | provided. 207 | 208 | * :class:`estimators.PyTorchEstimatorWrapper` for `PyTorch `_ 209 | * :class:`estimators.SciKitLearnEstimatorWrapper` for `Scikit-Learn `_ 210 | * :class:`estimators.TensorFlowEstimatorWrapper` for `TensorFlow `_ 211 | 212 | :meth:`bias_variance_compute` works well for smaller data sets and less complex models, but what 213 | happens when you have a very large set of data, a very complex model, or both? 214 | :meth:`bias_variance_compute_parallel` does the same calculation, but leverages `Ray 215 | `_ for parallelization of bootstrapping, training, and predicting. 216 | This allows for faster calculations using computations over a distributed architecture. 217 | 218 | .. topic:: Tutorials: 219 | 220 | * :doc:`Bias-Variance Visualization ` 221 | * :doc:`Bias-Variance Regression ` 222 | * :doc:`Bias-Variance Classification ` 223 | 224 | .. bibliography:: refs.bib 225 | :cited: 226 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------