8 |
9 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | pushd %~dp0
4 |
5 | REM Command file for Sphinx documentation
6 |
7 | if "%SPHINXBUILD%" == "" (
8 | set SPHINXBUILD=sphinx-build
9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 |
13 | if "%1" == "" goto help
14 |
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | echo.
18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | echo.installed, then set the SPHINXBUILD environment variable to point
20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | echo.may add the Sphinx directory to PATH.
22 | echo.
23 | echo.If you don't have Sphinx installed, grab it from
24 | echo.http://sphinx-doc.org/
25 | exit /b 1
26 | )
27 |
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 |
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 |
34 | :end
35 | popd
36 |
--------------------------------------------------------------------------------
/docs/css/custom.css:
--------------------------------------------------------------------------------
1 | /* unvisited link */
2 | .wy-side-nav-search a:link {
3 | color: #000000;
4 | }
5 |
6 | /* unvisited link */
7 | .wy-nav-content a:link, .section a:link {
8 | color: #0070B7;
9 | }
10 |
11 | .highlight .c1 {
12 | color: #097B79;
13 | }
14 |
15 | .highlight .si {
16 | color: #D90E39;
17 | }
18 |
19 | .section .nbinput.docutils.container .prompt.highlight-none.notranslate pre {
20 | color: #0070B7;
21 | }
22 |
23 | .section .sig.sig-object.py {
24 | color: #000000;
25 | background-color: rgb(255, 255, 255);
26 | }
27 |
28 | .section .sig.sig-object.py .sig-paren {
29 | color: #0070B7;
30 | }
31 |
32 | .section .nboutput.docutils.container .prompt.highlight-none.notranslate pre {
33 | color: #D90E39;
34 | }
35 |
36 | .section .pre {
37 | color: #D90E39;
38 | }
39 |
40 | .section .admonition .admonition-title {
41 | background-color: #0070B7;
42 | }
43 |
44 | .section .brackets, .section .fn-backref {
45 | color: #0070B7;
46 | }
47 |
48 | /* text */
49 | footer {
50 | color: #6B6B6B;
51 | }
--------------------------------------------------------------------------------
/DCO:
--------------------------------------------------------------------------------
1 | Developer's Certificate of Origin (adapted from the linux kernel)
2 |
3 | By making a contribution to this project, I certify that:
4 |
5 | The contribution was created in whole or in part by me and I have the right to submit it under the open source license indicated in the file; or
6 | The contribution is based upon previous work that, to the best of my knowledge, is covered under an appropriate open source license and I have the right under that license to submit that work with modifications, whether created in whole or in part by me, under the same open source license (unless I am permitted to submit under a different license), as indicated in the file; or
7 | The contribution was provided directly to me by some other person who certified (a), (b) or (c) and I have not modified it.
8 | I understand and agree that this project and the contribution are public and that a record of the contribution (including all personal information I submit with it, including my sign-off) is maintained indefinitely and may be redistributed consistent with this project or the open source license(s) involved.
9 |
--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/sklearn_estimator_wrapper.py:
--------------------------------------------------------------------------------
1 | from . import EstimatorWrapper
2 |
3 |
4 | class SciKitLearnEstimatorWrapper(EstimatorWrapper):
5 | def __init__(self, estimator):
6 | r"""Create a wrapper for a Scikit-Learn estimator
7 |
8 | Args:
9 | estimator: Scikit-Learn estimator instance
10 |
11 | Returns:
12 | self
13 | """
14 | self.estimator = estimator
15 |
16 | def fit(self, X, y, **kwargs):
17 | r"""Train the estimator
18 |
19 | Args:
20 | X: features
21 | y: ground truth labels
22 | kwargs (optional): kwargs for use in training
23 |
24 | Returns:
25 | self
26 | """
27 | self.estimator.fit(X, y, **kwargs)
28 | return self
29 |
30 | def predict(self, X, **kwargs):
31 | r"""Get predictions from the estimator
32 |
33 | Args:
34 | X: features
35 | kwargs (optional): kwargs for use in predicting
36 |
37 | Returns:
38 | self
39 | """
40 | return self.estimator.predict(X, **kwargs)
41 |
--------------------------------------------------------------------------------
/mvtk/supervisor/processing.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 | import public
4 |
5 |
6 | @public.add
7 | def replace_nulls(df, replace, column_names):
8 | return df.fillna({k: replace for k in column_names})
9 |
10 |
11 | @public.add
12 | # Normalize timestamp column values.
13 | def normalize_ts_columns(df, column_names):
14 | for column_name in column_names:
15 | normalize_ts_column(df, column_name)
16 | return df
17 |
18 |
19 | # convert timestamp in HH:mm:ss to seconds -
20 | # pandas timedelta takes the time format and converts them to seconds.
21 | # divide by the result by the total number of seconds in a day.
22 | # this normalizes the timestamp to a number between 0 and 1.
23 | # round off the value to 5 decimal places.
24 | @public.add
25 | def normalize_ts_column(df, column_name):
26 | df[column_name] = pd.to_timedelta(
27 | df[column_name].dt.strftime("%H:%M:%S")
28 | ).dt.total_seconds()
29 | df[column_name] = df[column_name].replace(np.nan, -1)
30 | df[column_name] = df[column_name].apply(
31 | lambda x: round(x / 86400, 5) if x >= 0 else x
32 | )
33 |
34 | return df
35 |
--------------------------------------------------------------------------------
/docs/about.rst:
--------------------------------------------------------------------------------
1 | .. _about:
2 |
3 | About
4 | ========
5 |
6 | History
7 | -------
8 |
9 | This project was started by Alex Eftimiades in 2019 as part of an
10 | internal R&D effort focused on model monitoring and sensitivity
11 | analysis. With early usage, testing, and utility contributions from
12 | Dwight Gunning, Matthew Gillett, and Mona Annaparthi, this lead to the ``supervisor``
13 | submodule and many of the initial ideas that became the
14 | ``thresholding``, ``sobol``, and ``credibility`` modules. Subsequent
15 | work on explainability lead to ``interprenet`` and the normalized
16 | mutual information score within ``metrics``.
17 |
18 | Authors
19 | -------
20 |
21 | The following people are currently core contributors to Model Validation
22 | Toolkit's development and maintenance:
23 |
24 | .. include:: authors.rst
25 |
26 | Please see :doc:`contributing ` to join us!
27 |
28 | Acknowledgements
29 | -------
30 |
31 | We thank David Devakumar, Mohamad Ibrahim, Jonathan Bryant, and Ahmed Ibrahim
32 | for their support, feedback, and help allocating resources to work on this
33 | project. We thank Nil Weerasinghe for his help organizing R&D efforts.
34 |
--------------------------------------------------------------------------------
/tests/credibility/test_credibility.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import itertools
3 | import pandas
4 |
5 | from mvtk import credibility
6 |
7 |
8 | def test_value_error():
9 | try:
10 | credibility.credible_interval(0, 0, prior=(0, 0))
11 | except ValueError:
12 | return
13 | raise Exception("Expected ValueError")
14 |
15 |
16 | def test_equivalence():
17 | assert credibility.credible_interval(0, 1) == credibility.credible_interval(
18 | 1, 2, prior=(0, 0)
19 | )
20 |
21 |
22 | def test_prob_greater_cmp():
23 | nprng = numpy.random.RandomState(0)
24 | prior_sample_size = 10**6
25 | for N in range(2, 8):
26 | for prior1, prior2 in itertools.product(
27 | itertools.product(range(1, 3), repeat=2), repeat=2
28 | ):
29 | df = pandas.DataFrame()
30 | p1 = nprng.beta(*prior1, size=prior_sample_size)
31 | df["positives1"] = nprng.binomial(N, p1)
32 | p2 = nprng.beta(*prior2, size=prior_sample_size)
33 | df["positives2"] = nprng.binomial(N, p2)
34 | df["target"] = p1 > p2
35 | for (p1, p2), subset in df.groupby(["positives1", "positives2"]):
36 | p = subset["target"].mean()
37 | q = credibility.prob_greater_cmp(
38 | p1, N - p1, p2, N - p2, prior1=prior1, prior2=prior2, err=10**-5
39 | )
40 | assert abs(q - p) < 0.05
41 |
--------------------------------------------------------------------------------
/tests/supervisor/test_divergence_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import scipy
4 |
5 | from mvtk.supervisor.divergence.utils import arrayify
6 |
7 |
8 | def test_arrayify_dataframes():
9 | df_a = pd.DataFrame({"a": list(range(4))})
10 | assert (
11 | df_a.shape == arrayify(df_a)[0].shape
12 | ), "Dataframe shape is same after arrayify"
13 | assert (
14 | df_a.shape == arrayify([df_a])[0].shape
15 | ), "Dataframe shape is same after arrayify"
16 | assert isinstance(arrayify([df_a])[0], np.ndarray)
17 | assert isinstance(arrayify(df_a)[0], np.ndarray)
18 |
19 |
20 | def test_arrayify_numpy():
21 | ones = np.ones((2, 4))
22 | ones_lst = arrayify(ones)
23 | assert (
24 | ones.shape == ones_lst[0].shape
25 | ), "Shape should be same after arrayify_as_array"
26 | ones_lst2 = arrayify([ones])
27 | assert (
28 | ones_lst[0].shape == ones_lst2[0].shape
29 | ), "Shape should be same after arrayify_as_array"
30 | ones_lst3 = arrayify([ones, ones])
31 | assert (
32 | ones_lst[0].shape == ones_lst3[0].shape
33 | ), "Shape should be same after arrayify_as_array"
34 |
35 |
36 | def test_arrayify_csr():
37 | ones = scipy.sparse.csr_matrix(np.ones((2, 4)))
38 | ones_lst = arrayify(ones)
39 | assert (
40 | ones.shape == ones_lst[0].shape
41 | ), "Shape should be same after arrayify_as_array"
42 | ones_lst2 = arrayify([ones])
43 | assert (
44 | ones_lst[0].shape == ones_lst2[0].shape
45 | ), "Shape should be same after arrayify_as_array"
46 | ones_lst3 = arrayify([ones, ones])
47 | assert (
48 | ones_lst[0].shape == ones_lst3[0].shape
49 | ), "Shape should be same after arrayify_as_array"
50 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import find_packages, setup
2 |
3 | _dct = {}
4 | with open("mvtk/version.py") as f:
5 | exec(f.read(), _dct)
6 | __version__ = _dct["__version__"]
7 |
8 | extras_require = {
9 | "doc": [
10 | "nbsphinx",
11 | "sphinx",
12 | "sphinx-rtd-theme",
13 | "sphinxcontrib-bibtex",
14 | "imageio",
15 | "myst-parser",
16 | "ipykernel",
17 | "torch",
18 | "tensorflow",
19 | ],
20 | "pytorch": ["torch"],
21 | "tensorflow": ["tensorflow"],
22 | }
23 | with open("README.md", "r", encoding="utf-8") as fh:
24 | long_description = fh.read()
25 |
26 | setup(
27 | name="mvtk",
28 | version=__version__,
29 | license="Apache-2.0",
30 | author="Alex Eftimiades",
31 | author_email="alexeftimiades@gmail.com",
32 | description="Model validation toolkit",
33 | long_description=long_description,
34 | long_description_content_type="text/markdown",
35 | packages=find_packages(),
36 | classifiers=[
37 | "Programming Language :: Python :: 3",
38 | "License :: OSI Approved :: Apache Software License",
39 | "Operating System :: MacOS",
40 | "Operating System :: POSIX :: Linux",
41 | ],
42 | install_requires=[
43 | "jax>=0.2.8,<=0.4.16",
44 | "public>=2020.12.3",
45 | "fastcore>=1.3.25",
46 | "jaxlib>=0.1.23,<=0.4.16",
47 | "scikit-learn",
48 | "numpy",
49 | "matplotlib",
50 | "scipy",
51 | "seaborn",
52 | "pandas>=0.23.4",
53 | "tqdm",
54 | "ray",
55 | ],
56 | extras_require=extras_require,
57 | url="https://finraos.github.io/model-validation-toolkit/",
58 | project_urls={
59 | "Bug Tracker": "https://github.com/FINRAOS/model-validation-toolkit/issues",
60 | },
61 | )
62 |
--------------------------------------------------------------------------------
/mvtk/sobol.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import public
3 |
4 |
5 | def choose(x, N, nprng=None):
6 | if nprng is None:
7 | nprng = numpy.random.RandomState(0)
8 | return x[nprng.choice(numpy.arange(len(x), dtype="int"), N)]
9 |
10 |
11 | @public.add
12 | def sobol(model, data, N=None, nprng=None):
13 | """Total and first order Sobol sensitivity indices.
14 | https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis.
15 |
16 | Args:
17 | model (function): Maps data to scores
18 | data (ndarray): Data matrix. Each row is a sample vector.
19 | N (int): sample size for monte carlo estimate of sobol
20 | indices. Should be less than or equal to the number of rows
21 | of data. If None, entire dataset is used.
22 | nprng (RandomState): Optional numpy RandomState.
23 | returns:
24 | Total and first order Sobol sensitivity indices. Each index
25 | is expressed as an array of length equal to the number of
26 | features in the supplied data matrix.
27 | """
28 | if nprng is None:
29 | nprng = numpy.random.RandomState(0)
30 | if N is None:
31 | A = data.copy()
32 | B = data.copy()
33 | nprng.shuffle(A)
34 | nprng.shuffle(B)
35 | N = len(data)
36 | elif N > len(data):
37 | raise ValueError("Sample size must be less than or equal to size of dataset")
38 | else:
39 | A, B = (choose(data, N, nprng) for _ in range(2))
40 | d = data.shape[1]
41 | total = []
42 | first_order = []
43 | for i in range(d):
44 | C = A[:, i].copy()
45 | A[:, i] = B[:, i]
46 | diff = model(A)
47 | A[:, i] = C
48 | diff -= model(A)
49 | first_order.append(model(B).dot(diff) / N)
50 | total.append(diff.dot(diff) / (2 * N))
51 | variance_y = model(numpy.vstack((A, B))).std() ** 2
52 | total = numpy.asarray(total) / variance_y
53 | first_order = numpy.asarray(first_order) / variance_y
54 | return total, first_order
55 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | [](https://circleci.com/gh/FINRAOS/model-validation-toolkit/tree/main)[](https://gitter.im/FINRAOS/model-validation-toolkit?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)[](https://pypi.org/project/mvtk/)
4 |
5 | # Model Validation Tookit
6 |
7 | ## Installation
8 |
9 | Run `pip install mvtk`.
10 |
11 | **Windows users**: Until [Jaxlib is supported on windows
12 | natively](https://github.com/google/jax/issues/438) you will need to either use
13 | this library from a Linux subsystem or within a Docker container.
14 | Alternatively, you can [build jaxlib from
15 | source](https://jax.readthedocs.io/en/latest/developer.html#additional-notes-for-building-jaxlib-from-source-on-windows).
16 |
17 | ## Developers
18 |
19 | Check out this repository and `cd` into the directory.
20 |
21 | Run `pip install -e ".[doc]"`.
22 |
23 | The `[doc]` is used to install dependencies for building documentation. You
24 | will need [pandoc](https://pandoc.org/) installed.
25 |
26 | # Submodules
27 | You can import:
28 |
29 | - `mvtk.credibility` for assessing credibility from sample size.
30 | - `mvtk.interprenet` for building interpretable neural nets.
31 | - `mvtk.thresholding` for adaptive thresholding.
32 | - `mvtk.sobol` for Sobol sensitivity analysis
33 | - `mvtk.supervisor` for divergence analysis
34 | - `mvtk.metrics` for specialised metrics
35 | - `mvtk.bias_variance` for bias-variance decomposition
36 |
37 | # Documentation
38 | You can run `make -C docs html` on a Mac or `make.bat -C docs html` on a PC to just rebuild the docs. In this case, point your browser to ```docs/_build/html/index.html``` to view the homepage. If your browser was already pointing to documentation that you changed, you can refresh the page to see the changes.
39 |
--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
1 | Model Validation Toolkit
2 | ===================================
3 |
4 | The Model Validation Toolkit is a library for model validation, metaanalysis, and monitoring.
5 |
6 | .. toctree::
7 | :glob:
8 | :maxdepth: 1
9 | :caption: Notes
10 |
11 | .. toctree::
12 | :maxdepth: 1
13 | :caption: Overview
14 |
15 | quickstart
16 | contributing
17 | about
18 |
19 | .. toctree::
20 | :maxdepth: 1
21 | :caption: User Guides
22 |
23 | supervisor_user_guide
24 | credibility_user_guide
25 | thresholding_user_guide
26 | interprenet_user_guide
27 | sobol_user_guide
28 | bias_variance_user_guide
29 |
30 | .. toctree::
31 | :maxdepth: 1
32 | :caption: Divergence Tutorials
33 |
34 | notebooks/divergence/Airlines
35 | notebooks/divergence/DivergenceFunctions
36 | notebooks/divergence/CategoricalColumns
37 | notebooks/divergence/BugDetection
38 | notebooks/divergence/TrainingDatasetDrift
39 |
40 | .. toctree::
41 | :maxdepth: 1
42 | :caption: Credibility Tutorials
43 |
44 | notebooks/credibility/Credibility
45 |
46 | .. toctree::
47 | :maxdepth: 1
48 | :caption: Thresholding Tutorials
49 |
50 | notebooks/thresholding/Thresholding
51 |
52 | .. toctree::
53 | :maxdepth: 1
54 | :caption: Interprenet Tutorials
55 |
56 | notebooks/interprenet/Interprenet
57 |
58 | .. toctree::
59 | :maxdepth: 1
60 | :caption: Bias and Metrics Tutorials
61 |
62 | notebooks/metrics/CounteringSampleBias
63 |
64 | .. toctree::
65 | :maxdepth: 1
66 | :caption: Bias-Variance Decomposition Tutorials
67 |
68 | notebooks/bias_variance/BiasVarianceClassification
69 | notebooks/bias_variance/BiasVarianceRegression
70 | notebooks/bias_variance/BiasVarianceVisualization
71 |
72 | .. toctree::
73 | :maxdepth: 1
74 | :caption: Python API
75 |
76 | supervisor
77 | credibility
78 | thresholding
79 | interprenet
80 | sobol
81 | metrics
82 | bias_variance
83 |
84 | Indices and tables
85 | ==================
86 |
87 | * :ref:`genindex`
88 | * :ref:`modindex`
89 |
--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/utils.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import numpy
3 | import scipy
4 | import public
5 |
6 | from mvtk.supervisor.utils import parallel
7 |
8 |
9 | @public.add
10 | def get_drift_series(metric, baseline, test):
11 | return numpy.asarray(parallel(lambda x: metric(x, baseline), test))
12 |
13 |
14 | @public.add
15 | def get_distance_matrix(metric, sample_distributions, show_progress=False):
16 | distance_matrix = numpy.zeros((len(sample_distributions),) * 2)
17 | for index, d in parallel(
18 | lambda x: (x[0], metric(x[1][0], x[1][1])),
19 | [
20 | list(zip(*x))
21 | for x in itertools.combinations(enumerate(sample_distributions), 2)
22 | ],
23 | show_progress=show_progress,
24 | ):
25 | distance_matrix[index] = d
26 | distance_matrix += distance_matrix.T
27 | return distance_matrix
28 |
29 |
30 | @public.add
31 | def sparse_wrapper(v):
32 | class _SparseWrapper(type(v)):
33 | def __getitem__(self, i):
34 | ret = super().__getitem__(i)
35 | if isinstance(i, int):
36 | return ret.toarray()[0]
37 | return ret
38 |
39 | def __len__(self):
40 | return self.shape[0]
41 |
42 | return _SparseWrapper(v)
43 |
44 |
45 | def to_array_like(v):
46 | if hasattr(v, "values"):
47 | return v.values
48 | if isinstance(v, scipy.sparse.spmatrix):
49 | return sparse_wrapper(v)
50 | return v
51 |
52 |
53 | @public.add
54 | def arrayify(item):
55 | """Convert the value to at least dim 3. If is dataframe it converts it to a
56 | list of values.
57 |
58 | :param item: ndarray or a list of ndarray, or a dataframe, a series or a
59 | list of dataframes or series
60 | :return: a list of dataframes/series or array of dim 3
61 | """
62 | if hasattr(item, "shape"):
63 | ret = to_array_like(item)
64 | if len(ret.shape) == 2:
65 | return [ret]
66 | if len(ret.shape) == 1:
67 | return numpy.atleast_3d(ret)
68 | return list(map(to_array_like, item))
69 |
--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/tensorflow_estimator_wrapper.py:
--------------------------------------------------------------------------------
1 | from . import EstimatorWrapper
2 |
3 |
4 | class TensorFlowEstimatorWrapper(EstimatorWrapper):
5 | def __init__(self, estimator):
6 | r"""Create a wrapper for a TensorFlow estimator
7 |
8 | Args:
9 | estimator: TensorFlow estimator instance
10 |
11 | Returns:
12 | self
13 | """
14 | self.estimator = estimator
15 |
16 | def fit(self, X, y, **kwargs):
17 | r"""Train the estimator
18 |
19 | Args:
20 | X: features
21 | y: ground truth labels
22 | kwargs (optional): kwargs for use in training
23 |
24 | Returns:
25 | self
26 | """
27 | self._reset_weights()
28 | self.estimator.fit(X, y, **kwargs)
29 | return self
30 |
31 | def predict(self, X, **kwargs):
32 | r"""Get predictions from the estimator
33 |
34 | Args:
35 | X: features
36 | kwargs (optional): kwargs for use in predicting
37 |
38 | Returns:
39 | self
40 | """
41 | predictions = self.estimator.predict(X, **kwargs)
42 | prediction_list = []
43 | for prediction in predictions:
44 | if len(prediction) > 1:
45 | prediction_list.append(prediction.argmax().item())
46 | else:
47 | prediction_list.append(prediction.item())
48 | return prediction_list
49 |
50 | def _reset_weights(self):
51 | r"""Reset weights of the estimator"""
52 | import tensorflow as tf
53 |
54 | for layer in self.estimator.layers:
55 | if hasattr(layer, "kernel_initializer") and hasattr(layer, "kernel"):
56 | layer.kernel.assign(layer.kernel_initializer(tf.shape(layer.kernel)))
57 | if hasattr(layer, "bias_initializer") and hasattr(layer, "bias"):
58 | layer.bias.assign(layer.bias_initializer(tf.shape(layer.bias)))
59 | if hasattr(layer, "recurrent_initializer") and hasattr(
60 | layer, "recurrent_kernal"
61 | ):
62 | layer.recurrent_kernal.assign(
63 | layer.recurrent_initializer(tf.shape(layer.recurrent_kernal))
64 | )
65 |
--------------------------------------------------------------------------------
/tests/test_metrics.py:
--------------------------------------------------------------------------------
1 | import itertools
2 | import numpy
3 |
4 | from mvtk import metrics
5 |
6 |
7 | def test_rank_auc():
8 | nprng = numpy.random.RandomState(0)
9 | S = 32
10 | y_true, y_pred = nprng.randint(0, 5, S), nprng.uniform(size=S).round(1)
11 | N = 0
12 | auc = 0
13 | for (true1, pred1), (true2, pred2) in itertools.product(
14 | zip(y_true, y_pred), repeat=2
15 | ):
16 | if true1 > true2:
17 | if pred1 == pred2:
18 | auc += 0.5
19 | else:
20 | auc += pred1 > pred2
21 | N += 1
22 | auc /= N
23 | assert metrics.rank_auc(y_true, y_pred) == auc
24 |
25 |
26 | def test_monotonicity():
27 | nprng = numpy.random.RandomState(0)
28 | S = 32
29 | y_true, y_pred = nprng.randint(0, 5, S), nprng.uniform(size=S).round(1)
30 | N = 0
31 | auc = 0
32 | for (true1, pred1), (true2, pred2) in itertools.product(
33 | zip(y_true, y_pred), repeat=2
34 | ):
35 | if true1 - true2 == 1:
36 | if pred1 == pred2:
37 | auc += 0.5
38 | else:
39 | auc += pred1 > pred2
40 | N += 1
41 | auc /= N
42 | assert metrics.monotonicity(y_true, y_pred) == auc
43 |
44 |
45 | def weighted_roc_auc(y_test, y_pred, weights):
46 | def process(stuff):
47 | (pos, w_p), (neg, w_n) = stuff
48 | p = w_p * w_n
49 | return p * (0.5 if pos == neg else pos > neg), p
50 |
51 | mask = y_test == 1
52 | positives, w_pos = y_pred[mask], weights[mask]
53 | negatives, w_neg = y_pred[~mask], weights[~mask]
54 | numerator, denominator = map(
55 | sum,
56 | zip(
57 | *map(
58 | process, itertools.product(zip(positives, w_pos), zip(negatives, w_neg))
59 | )
60 | ),
61 | )
62 |
63 | return numerator / denominator
64 |
65 |
66 | def test_weighted_roc_auc():
67 | nprng = numpy.random.RandomState(0)
68 | S = 32
69 | y_true, y_pred, weights = (
70 | nprng.randint(0, 2, S),
71 | nprng.uniform(size=S).round(1),
72 | nprng.uniform(size=S),
73 | )
74 | assert (
75 | abs(
76 | weighted_roc_auc(y_true, y_pred, weights)
77 | - metrics.rank_auc(y_true, y_pred, weights)
78 | )
79 | < 2**-32
80 | )
81 |
--------------------------------------------------------------------------------
/tests/bias_variance/estimators/test_sklearn_estimator_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from sklearn.linear_model import LinearRegression
3 | from sklearn.tree import DecisionTreeClassifier
4 |
5 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper
6 |
7 |
8 | def create_data():
9 | X_train = np.arange(12).reshape(6, 2)
10 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
11 | X_test = np.arange(6).reshape(3, 2)
12 | y_test = np.array([0, 1, 1])
13 |
14 | return X_train, y_train, X_test, y_test
15 |
16 |
17 | def test_sklearn_estimator_wrapper():
18 | X_train, y_train, X_test, y_test = create_data()
19 |
20 | model = LinearRegression()
21 |
22 | model.fit(X_train, y_train)
23 | pred = model.predict(X_test)
24 |
25 | model_test = LinearRegression()
26 | model_wrapped = SciKitLearnEstimatorWrapper(model_test)
27 |
28 | model_wrapped.fit(X_train, y_train)
29 | pred_wrapped = model_wrapped.predict(X_test)
30 |
31 | assert np.array_equal(pred, pred_wrapped)
32 |
33 |
34 | def test_sklearn_estimator_wrapper_kwargs_fit():
35 | X_train, y_train, X_test, y_test = create_data()
36 |
37 | model = DecisionTreeClassifier(random_state=123)
38 |
39 | model.fit(X_train, y_train, sample_weight=[0, 0, 1, 0, 1, 0])
40 | pred = model.predict(X_test)
41 |
42 | model_test = DecisionTreeClassifier(random_state=123)
43 | model_wrapped = SciKitLearnEstimatorWrapper(model_test)
44 |
45 | model_wrapped.fit(X_train, y_train, sample_weight=[0, 0, 1, 0, 1, 0])
46 | pred_wrapped = model_wrapped.predict(X_test)
47 |
48 | assert np.array_equal(pred, pred_wrapped)
49 |
50 |
51 | def test_sklearn_estimator_wrapper_kwargs_predict():
52 | X_train, y_train, X_test, y_test = create_data()
53 |
54 | model = DecisionTreeClassifier(random_state=123)
55 |
56 | model.fit(X_train, y_train)
57 | try:
58 | model.predict(X_test, check_input=False)
59 | except ValueError as e:
60 | assert e.args[0] == "X.dtype should be np.float32, got int64"
61 | return
62 |
63 | model_test = DecisionTreeClassifier(random_state=123)
64 | model_wrapped = SciKitLearnEstimatorWrapper(model_test)
65 |
66 | model_wrapped.fit(X_train, y_train)
67 | try:
68 | model_wrapped.predict(X_test, check_input=False)
69 | except ValueError as e:
70 | assert e.args[0] == "X.dtype should be np.float32, got int64"
71 | return
72 |
73 | assert False
74 |
--------------------------------------------------------------------------------
/docs/sobol_user_guide.rst:
--------------------------------------------------------------------------------
1 | ###########
2 | Sobol User Guide
3 | ###########
4 |
5 | **********
6 | Motivation
7 | **********
8 |
9 | `Sensitivity analysis `_ is
10 | concerned with the degree to which uncertainty in the output of a model can be
11 | attributed to uncertainty in its inputs :cite:`saltelli2008global`. Variance
12 | based sensitivity analysis, commonly known as `sobol sensitivity analysis
13 | `_ seeks to
14 | answer this question by attributing the variance of the output to variances in
15 | one or more inputs. This breakdown is known as a sobol indices and are typically measured
16 | in one of two ways: *first-order* indices and *total-effect* indices.
17 | :cite:`sobol2001global`.
18 |
19 | The first-order sobol index with respect to some feature is given by averaging
20 | the output of the model over all other values of all other features and
21 | computing the variance of the result while varying the feature in question.
22 | This is normalized by dividing by the total variance of the output measured by
23 | varying all feature values :cite:`im1993sensitivity`. Their sum is between 0 and 1. The total-effect index is computed by first computing the variance of the
24 | model output with respect to the feature in question, and then computing the
25 | expectation of the result over values of all other
26 | features. This is again normalized by the variance
27 | of the output of the model across all features.
28 | These will sum to a number greater than
29 | or equal to 1. Both are discussed in more detail
30 | here
31 | `https://en.wikipedia.org/wiki/Variance-based_sensitivity_analysis
32 | `_.
33 |
34 | .. currentmodule:: sobol
35 |
36 | :meth:`sobol` takes a model and dataset, and runs a
37 | monte carlo simulation as described in the above
38 | link to compute the first and total order sobol
39 | indices. Each index is expressed as a one
40 | dimensional array of length equal to the number of
41 | features in the supplied data matrix. The model is
42 | assumed to be a function that outputs one scalar
43 | for each row of the data matrix.
44 |
45 | .. code-block:: python
46 |
47 | import numpy
48 | from mvtk import sobol
49 |
50 | nprng = numpy.random.RandomState(0)
51 |
52 | data = nprng.normal(size=(1000, 4)) # 4 features
53 | model = lambda x: (x ** 2).dot([1, 2, 3, 4])
54 | total, first_order = sobol.sobol(model, data, N=500)
55 |
56 | .. bibliography:: refs.bib
57 | :cited:
58 |
--------------------------------------------------------------------------------
/docs/images/logo.svg:
--------------------------------------------------------------------------------
1 |
6 |
--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
1 | # Configuration file for the Sphinx documentation builder.
2 | #
3 | # This file only contains a selection of the most common options. For a full
4 | # list see the documentation:
5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
6 |
7 | # -- Path setup --------------------------------------------------------------
8 |
9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | import os
14 | import sys
15 |
16 | sys.path.insert(0, os.path.abspath("../"))
17 |
18 |
19 | # -- Project information -----------------------------------------------------
20 |
21 | project = "Model Validation Toolkit"
22 | copyright = "2021, Model Validation Toolkit Team"
23 | author = "Model Validation Toolkit Team"
24 |
25 | # The full version, including alpha/beta/rc tags
26 | release = "0.2.0"
27 |
28 |
29 | # -- General configuration ---------------------------------------------------
30 |
31 | # Add any Sphinx extension module names here, as strings. They can be
32 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
33 | # ones.
34 | extensions = [
35 | "sphinx.ext.autodoc",
36 | "sphinx.ext.autosummary",
37 | "sphinxcontrib.bibtex",
38 | "sphinx.ext.intersphinx",
39 | "sphinx.ext.mathjax",
40 | "sphinx.ext.napoleon",
41 | "sphinx.ext.viewcode",
42 | "nbsphinx",
43 | "myst_parser",
44 | ]
45 |
46 | # Add any paths that contain templates here, relative to this directory.
47 | templates_path = ["_templates"]
48 |
49 | # List of patterns, relative to source directory, that match files and
50 | # directories to ignore when looking for source files.
51 | # This pattern also affects html_static_path and html_extra_path.
52 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
53 |
54 |
55 | # -- Options for HTML output -------------------------------------------------
56 |
57 | # The theme to use for HTML and HTML Help pages. See the documentation for
58 | # a list of builtin themes.
59 | #
60 | html_theme = "sphinx_rtd_theme"
61 |
62 | # Add any paths that contain custom static files (such as style sheets) here,
63 | # relative to this directory. They are copied after the builtin static files,
64 | # so a file named "default.css" will overwrite the builtin "default.css".
65 | html_static_path = ["css", "images"]
66 | html_css_files = ["custom.css"]
67 | html_logo = "images/logo.svg"
68 | html_theme_options = {
69 | "display_version": False,
70 | }
71 | html_favicon = html_logo
72 |
73 | # A fix for Sphinx error contents.rst not found
74 | master_doc = "index"
75 |
76 | # increase the timeout for long-running notebooks
77 | nbsphinx_timeout = 900
78 |
79 | # Don't show full paths
80 | add_module_names = False
81 |
82 | # bibtex
83 | bibtex_bibfiles = ["refs.bib"]
84 |
85 | source_suffix = {
86 | ".rst": "restructuredtext",
87 | ".txt": "markdown",
88 | ".md": "markdown",
89 | }
90 |
91 | user_agent = "Mozilla/5.0 (X11; Linux x86_64; rv:25.0) Gecko/20100101 Firefox/25.0"
92 |
--------------------------------------------------------------------------------
/tests/bias_variance/estimators/test_tensorflow_estimator_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 |
4 | from mvtk.bias_variance.estimators import TensorFlowEstimatorWrapper
5 |
6 |
7 | def create_data():
8 | X_train = np.arange(12).reshape(6, 2)
9 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
10 | X_test = np.arange(6).reshape(3, 2)
11 | y_test = np.array([0, 1, 1])
12 |
13 | return X_train, y_train, X_test, y_test
14 |
15 |
16 | def create_model():
17 | model = tf.keras.Sequential(
18 | [
19 | tf.keras.layers.Dense(64, activation="relu"),
20 | tf.keras.layers.Dense(64, activation="relu"),
21 | tf.keras.layers.Dense(1),
22 | ]
23 | )
24 |
25 | model.compile(
26 | optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
27 | loss="mean_absolute_error",
28 | metrics=["mean_squared_error"],
29 | )
30 |
31 | return model
32 |
33 |
34 | def predict(estimator, X, **kwargs):
35 | predictions = estimator.predict(X, **kwargs)
36 | prediction_list = []
37 | for prediction in predictions:
38 | if len(prediction) > 1:
39 | prediction_list.append(prediction.argmax().item())
40 | else:
41 | prediction_list.append(prediction.item())
42 | return prediction_list
43 |
44 |
45 | def test_tensorflow_estimator_wrapper():
46 | X_train, y_train, X_test, y_test = create_data()
47 |
48 | tf.keras.utils.set_random_seed(123)
49 | model = create_model()
50 |
51 | model.fit(X_train, y_train)
52 | pred = predict(model, X_test)
53 |
54 | tf.keras.utils.set_random_seed(123)
55 | model_test = create_model()
56 | model_wrapped = TensorFlowEstimatorWrapper(model_test)
57 |
58 | model_wrapped.fit(X_train, y_train)
59 | pred_wrapped = model_wrapped.predict(X_test)
60 |
61 | assert np.array_equal(pred, pred_wrapped)
62 |
63 |
64 | def test_tensorflow_estimator_wrapper_kwargs_fit():
65 | X_train, y_train, X_test, y_test = create_data()
66 |
67 | tf.keras.utils.set_random_seed(123)
68 | model = create_model()
69 |
70 | model.fit(X_train, y_train, epochs=10)
71 | pred = predict(model, X_test)
72 |
73 | tf.keras.utils.set_random_seed(123)
74 | model_test = create_model()
75 | model_wrapped = TensorFlowEstimatorWrapper(model_test)
76 |
77 | model_wrapped.fit(X_train, y_train, epochs=10)
78 | pred_wrapped = model_wrapped.predict(X_test)
79 |
80 | assert np.array_equal(pred, pred_wrapped)
81 |
82 |
83 | def test_tensorflow_estimator_wrapper_kwargs_predict():
84 | X_train, y_train, X_test, y_test = create_data()
85 |
86 | tf.keras.utils.set_random_seed(123)
87 | model = create_model()
88 |
89 | model.fit(X_train, y_train)
90 | pred = predict(model, X_test, steps=10)
91 |
92 | tf.keras.utils.set_random_seed(123)
93 | model_test = create_model()
94 | model_wrapped = TensorFlowEstimatorWrapper(model_test)
95 |
96 | model_wrapped.fit(X_train, y_train)
97 | pred_wrapped = model_wrapped.predict(X_test, steps=10)
98 |
99 | assert np.array_equal(pred, pred_wrapped)
100 |
--------------------------------------------------------------------------------
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2.1
2 |
3 | jobs:
4 | test:
5 | docker:
6 | - image: cimg/python:3.8
7 | steps:
8 | - checkout
9 | - run: sudo apt-get update
10 | - run: sudo apt-get install pandoc
11 | - run: python -m pip install tox
12 | - run: python -m tox
13 | - run: ls -la docs
14 | - persist_to_workspace:
15 | root: docs
16 | paths: html
17 | docs-deploy:
18 | docker:
19 | - image: cimg/python:3.8
20 | steps:
21 | - run:
22 | name: add known_hosts
23 | command: |
24 | mkdir ~/.ssh
25 | printf "%s" 'github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ==
26 | github.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBEmKSENjQEezOmxkZMy7opKgwFB9nkt5YRrYMjNuG5N87uRgg6CLrbo5wAdT/y6v0mKV0U2w0WZ2YB/++Tpockg=
27 | github.com ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOMqqnkVzrm0SdG6UOoqKLsabgH5C9okWi0dh2l9GKJl
28 | bitbucket.org ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAubiN81eDcafrgMeLzaFPsw2kNvEcqTKl/VqLat/MaB33pZy0y3rJZtnqwR2qOOvbwKZYKiEO1O6VqNEBxKvJJelCq0dTXWT5pbO2gDXC6h6QDXCaHo6pOHGPUy+YBaGQRGuSusMEASYiWunYN0vCAI8QaXnWMXNMdFP3jHAJH0eDsoiGnLPBlBp4TNm6rYI74nMzgz3B9IikW4WVK+dc8KZJZWYjAuORU3jc1c/NPskD2ASinf8v3xnfXeukU0sJ5N6m5E8VLjObPEO+mN2t/FZTMZLiFqPWc/ALSqnMnnhwrNi2rbfg/rd/IpL8Le3pSBne8+seeFVBoGqzHM9yXw==
29 | gitlab.com ecdsa-sha2-nistp256 AAAAE2VjZHNhLXNoYTItbmlzdHAyNTYAAAAIbmlzdHAyNTYAAABBBFSMqzJeV9rUzU4kWitGjeR4PWSa29SPqJ1fVkhtj3Hw9xjLVXVYrU9QlYWrOLXBpQ6KWjbjTDTdDkoohFzgbEY=
30 | ' > ~/.ssh/known_hosts
31 | chmod 0600 ~/.ssh/known_hosts
32 | - run:
33 | name: Checkout main
34 | command: |
35 | git clone $CIRCLE_REPOSITORY_URL --depth 1 -b main main
36 | cd main
37 | git config --global user.email "$(git log --format=%ae -n 1)"
38 | git config --global user.name "$(git log --format=%an -n 1)"
39 | echo "export msg="\"$(git log --format=%B -n 1)\" >> $BASH_ENV
40 | - run:
41 | name: Checkout website
42 | command: |
43 | git clone $CIRCLE_REPOSITORY_URL --depth 1 -b website website
44 | rm -rf website/docs/html
45 | - attach_workspace:
46 | at: website/docs
47 | - run:
48 | name: Copy CircleCI config
49 | command: |
50 | mkdir -p website/.circleci
51 | cp main/.circleci/config.yml website/.circleci/config.yml
52 | - add_ssh_keys:
53 | fingerprints:
54 | - "dd:11:5d:b8:a7:d2:be:16:47:4e:a0:66:00:96:b4:f7"
55 | - run:
56 | name: Deploy docs to website branch
57 | command: |
58 | cd website
59 | git add .circleci/config.yml
60 | git add -A -- docs/html
61 | git commit -am "$msg"
62 | git push origin website
63 | workflows:
64 | version: 2
65 | build:
66 | jobs:
67 | - test:
68 | filters:
69 | branches:
70 | ignore: website
71 | - docs-deploy:
72 | requires:
73 | - test
74 | filters:
75 | branches:
76 | only: main
77 |
--------------------------------------------------------------------------------
/tests/supervisor/test_processing.py:
--------------------------------------------------------------------------------
1 | import copy
2 |
3 | import pandas as pd
4 | import pandas.testing
5 |
6 | from mvtk.supervisor.processing import (
7 | replace_nulls,
8 | normalize_ts_columns,
9 | )
10 |
11 |
12 | def test_replace_nulls():
13 | for col_list in [["col1"], ["col2"], ["col1", "col2"]]:
14 | init_rows = [
15 | {"col1": "test1_1", "col2": "test1_2"},
16 | {"col1": None, "col2": "test2_2"},
17 | {"col1": "test3_1", "col2": None},
18 | {"col1": None, "col2": None},
19 | ]
20 |
21 | expect_rows = copy.deepcopy(init_rows)
22 |
23 | for i in range(0, len(expect_rows)):
24 | for col in col_list:
25 | if expect_rows[i][col] is None:
26 | expect_rows[i][col] = "1"
27 |
28 | init_df = pd.DataFrame(init_rows)
29 | expect_df = pd.DataFrame(expect_rows)
30 |
31 | actual = replace_nulls(init_df, "1", col_list)
32 | expect = expect_df
33 |
34 | pandas.testing.assert_frame_equal(actual, expect)
35 |
36 |
37 | def time_to_seconds(time):
38 | return int(time[:2]) * 3600 + int(time[2:4]) * 60 + int(time[4:6])
39 |
40 |
41 | def test_process_ts_columns():
42 | format_map = {"col2": "%H:%M:%S.%f", "col3": "%H%M%S.%f", "col4": "%H%M%S"}
43 |
44 | for col_list in [
45 | ["col2"],
46 | ["col3"],
47 | ["col4"],
48 | ["col2", "col3"],
49 | ["col2", "col4"],
50 | ["col3", "col4"],
51 | ["col2", "col3", "col4"],
52 | ]:
53 | init_rows = [
54 | {
55 | "col1": "test1",
56 | "col2": "10:11:12.123456",
57 | "col3": "101112.123456",
58 | "col4": "101112",
59 | },
60 | {
61 | "col1": "test2",
62 | "col2": None,
63 | "col3": "202123.123456",
64 | "col4": "202124",
65 | },
66 | {
67 | "col1": "test3",
68 | "col2": "10:31:32.123456",
69 | "col3": None,
70 | "col4": "103134",
71 | },
72 | {
73 | "col1": "test4",
74 | "col2": "20:41:42.123456",
75 | "col3": "204143.123456",
76 | "col4": None,
77 | },
78 | ]
79 |
80 | expect_rows = copy.deepcopy(init_rows)
81 |
82 | for i in range(0, len(expect_rows)):
83 | for col in col_list:
84 | if expect_rows[i][col] is None:
85 | expect_rows[i][col] = -1
86 | else:
87 | expect_rows[i][col] = str(
88 | round(
89 | time_to_seconds(expect_rows[i][col].replace(":", ""))
90 | / 86400,
91 | 5,
92 | )
93 | )
94 |
95 | init_df = pd.DataFrame(init_rows)
96 | expect = pd.DataFrame(expect_rows)
97 |
98 | for col in ["col2", "col3", "col4"]:
99 | init_df[col] = pd.to_datetime(init_df[col], format=format_map[col])
100 | if col not in col_list:
101 | expect[col] = pd.to_datetime(expect[col], format=format_map[col])
102 | else:
103 | expect[col] = expect[col].astype(float)
104 |
105 | actual = normalize_ts_columns(init_df, col_list)
106 |
107 | pandas.testing.assert_frame_equal(actual, expect)
108 |
--------------------------------------------------------------------------------
/mvtk/bias_variance/estimators/pytorch_estimator_wrapper.py:
--------------------------------------------------------------------------------
1 | from . import EstimatorWrapper
2 |
3 |
4 | class PyTorchEstimatorWrapper(EstimatorWrapper):
5 | def __init__(
6 | self, estimator, optimizer_generator, loss_fn, fit_fn=None, predict_fn=None
7 | ):
8 | r"""Create a wrapper for a PyTorch estimator
9 |
10 | Args:
11 | estimator: PyTorch estimator instance
12 | optimizer_generator: generator function for the optimizer
13 | loss_fn: loss function
14 | fit_fn (optional): custom fit function to be called instead of default one
15 | predict_fn (optional): custom predict function to be called instead
16 | of default one
17 |
18 | Returns:
19 | self
20 | """
21 | self.estimator = estimator
22 | self.optimizer_generator = optimizer_generator
23 | self.optimizer = optimizer_generator(estimator)
24 | self.loss_fn = loss_fn
25 | self.fit_fn = fit_fn
26 | self.predict_fn = predict_fn
27 |
28 | def fit(self, X, y, **kwargs):
29 | r"""Train the estimator
30 |
31 | Args:
32 | X: features
33 | y: ground truth labels
34 | kwargs (optional): kwargs for use in training
35 |
36 | Returns:
37 | self
38 | """
39 | self.estimator.apply(PyTorchEstimatorWrapper._reset_parameters)
40 |
41 | if self.fit_fn is not None:
42 | self.fit_fn(self, X, y, **kwargs)
43 | return self
44 |
45 | if kwargs.get("epochs") is None:
46 | epochs = 100
47 | else:
48 | epochs = kwargs.get("epochs")
49 |
50 | for i in range(epochs):
51 | loss = 0
52 | if kwargs.get("batch_size") is None:
53 | batch_size = len(y)
54 | else:
55 | batch_size = kwargs.get("batch_size")
56 | for j in range(0, len(y), batch_size):
57 | batch_start = j
58 | batch_end = j + batch_size
59 | X_batch = X[batch_start:batch_end]
60 | y_batch = y[batch_start:batch_end]
61 | prediction = self.estimator(X_batch)
62 | loss = self.loss_fn(prediction, y_batch)
63 |
64 | self.optimizer.zero_grad()
65 | loss.backward()
66 | self.optimizer.step()
67 | if kwargs.get("verbose"):
68 | print(f"epoch: {i:2} training loss: {loss.item():10.8f}")
69 |
70 | return self
71 |
72 | def predict(self, X, **kwargs):
73 | r"""Get predictions from the estimator
74 |
75 | Args:
76 | X: features
77 | kwargs (optional): kwargs for use in predicting
78 |
79 | Returns:
80 | self
81 | """
82 | if self.predict_fn is not None:
83 | return self.predict_fn(self, X, **kwargs)
84 |
85 | import torch
86 |
87 | prediction_list = []
88 | with torch.no_grad():
89 | for value in X:
90 | prediction = self.estimator(value)
91 | if len(prediction) > 1:
92 | prediction_list.append(prediction.argmax().item())
93 | else:
94 | prediction_list.append(prediction.item())
95 | return prediction_list
96 |
97 | def _reset_parameters(self):
98 | r"""Reset parameters of the estimator"""
99 | if hasattr(self, "reset_parameters"):
100 | self.reset_parameters()
101 |
--------------------------------------------------------------------------------
/docs/quickstart.rst:
--------------------------------------------------------------------------------
1 | Getting Started
2 | ===============
3 |
4 | Model Validation Toolkit is an open source library that provides various
5 | tools for model validation, data quality checks, analysis of thresholding,
6 | sensitivity analysis, and interpretable model development. The purpose of this
7 | guide is to illustrate some of the main features that Model Validation Toolkit
8 | provides. Please refer to the README for installation instructions.
9 |
10 | Divergences
11 | ----------------------------------------
12 |
13 | Model Validation Toolkit provides a fast and accurate means of assessing
14 | large scale statistical differences between datasets. Rather than checking
15 | whether two samples are identical, this check asserts that they are similar in
16 | a statistical sense and can be used for data quality checks and concept drift
17 | detection.
18 |
19 | .. code-block:: python
20 |
21 | import numpy
22 | from mvtk.supervisor.divergence import calc_tv
23 |
24 | nprng = numpy.random.RandomState(0)
25 |
26 | train = nprng.uniform(size=(1000, 4)) # 4 features
27 | val = nprng.uniform(size=(1000, 4)) # 4 features
28 |
29 | # Close to 0 is similar; close to 1 is different
30 | print(calc_tv(train, val))
31 |
32 | See the :doc:`user guide ` for more information.
33 |
34 | Credibility
35 | ----------------------------------------
36 |
37 | .. currentmodule:: mvtk.credibility
38 |
39 | Model Validation Toolkit provides a lightweight suite to assess credibility
40 | of model performance given a finite sample. Whether your validation set has
41 | several dozen or million records, you can quantify your confidence in
42 | performance using this module. For example, if a model correctly identifies 8
43 | of 10 images, its empirical accuracy is 80%. However, that does not mean we
44 | should be confident the accuracy could turn out to be lower if we had more
45 | data. We would obviously be more confident in this assessment if it identified
46 | 800 of 1000 images, but how much more so? With a few assumptions and
47 | :meth:`prob_below`, we can estimate the probability that the true accuracy
48 | would be less than 70% if we had more data.
49 |
50 | .. code-block:: python
51 |
52 | from mvtk.credibility import prob_below
53 | print(prob_below(8, 2, 0.7))
54 |
55 | See the :doc:`user guide ` for more information.
56 |
57 | Thresholding
58 | ----------------------------------------
59 |
60 | Model Validation Toolkit provides a module for determining and
61 | dynamically seta nd sample thresholds for binary classifiers that maximize a
62 | utility function. The general idea is to intelligently reassess false and true
63 | negative rates in a production system. See the :doc:`user guide
64 | ` for more information.
65 |
66 | Sobol
67 | ----------------------------------------
68 |
69 | .. currentmodule:: sobol
70 |
71 | Model Validation Toolkit provides a lightweight module for `sobol
72 | sensitivity analysis
73 | `_. This can
74 | be used to assess and quantify uncertainty of model outputs with respect to
75 | model inputs. The module currently supports first order and total sobol
76 | indexes--both which are computed and reported using :meth:`sobol`.
77 |
78 | .. code-block:: python
79 |
80 | import numpy
81 | from mvtk import sobol
82 |
83 | nprng = numpy.random.RandomState(0)
84 |
85 | data = nprng.normal(size=(1000, 4)) # 4 features
86 | model = lambda x: (x ** 2).dot([1, 2, 3, 4])
87 | total, first_order = sobol.sobol(model, data, N=500)
88 |
89 | See the :doc:`user guide
90 | ` for more information.
91 |
--------------------------------------------------------------------------------
/tests/bias_variance/test_bias_variance_parallel.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 |
3 | from sklearn.tree import DecisionTreeClassifier
4 | from sklearn.linear_model import Ridge
5 |
6 | from mvtk.bias_variance import (
7 | bias_variance_compute_parallel,
8 | bias_variance_mse,
9 | bias_variance_0_1_loss,
10 | )
11 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper
12 |
13 |
14 | def create_data():
15 | X_train = np.arange(12).reshape(6, 2)
16 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
17 | X_test = np.arange(6).reshape(3, 2)
18 | y_test = np.array([0, 1, 1])
19 |
20 | return X_train, y_train, X_test, y_test
21 |
22 |
23 | def test_bias_variance_compute_parallel_mse():
24 | X_train, y_train, X_test, y_test = create_data()
25 |
26 | model = Ridge(random_state=123)
27 | model_wrapped = SciKitLearnEstimatorWrapper(model)
28 |
29 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
30 | model_wrapped,
31 | X_train,
32 | y_train,
33 | X_test,
34 | y_test,
35 | random_state=123,
36 | decomp_fn=bias_variance_mse,
37 | )
38 |
39 | assert np.round(avg_loss, decimals=12) == np.round(
40 | np.float64(0.3967829075484304), decimals=12
41 | )
42 | assert np.round(avg_bias, decimals=12) == np.round(
43 | np.float64(0.13298143583764407), decimals=12
44 | )
45 | assert np.round(avg_var, decimals=12) == np.round(
46 | np.float64(0.26380147171078644), decimals=12
47 | )
48 | assert np.round(net_var, decimals=12) == np.round(
49 | np.float64(0.26380147171078644), decimals=12
50 | )
51 |
52 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
53 | assert avg_var == net_var
54 |
55 |
56 | def test_bias_variance_calc_parallel_0_1():
57 | X_train, y_train, X_test, y_test = create_data()
58 |
59 | model = DecisionTreeClassifier(random_state=123)
60 | model_wrapped = SciKitLearnEstimatorWrapper(model)
61 |
62 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
63 | model_wrapped,
64 | X_train,
65 | y_train,
66 | X_test,
67 | y_test,
68 | random_state=123,
69 | decomp_fn=bias_variance_0_1_loss,
70 | )
71 |
72 | assert avg_loss == np.float64(0.4566666666666666)
73 | assert avg_bias == np.float64(0.3333333333333333)
74 | assert avg_var == np.float64(0.33499999999999996)
75 | assert net_var == np.float64(0.12333333333333332)
76 |
77 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
78 |
79 |
80 | def test_bias_variance_calc_parallel_mse_no_random_state():
81 | X_train, y_train, X_test, y_test = create_data()
82 |
83 | model = Ridge(random_state=123)
84 | model_wrapped = SciKitLearnEstimatorWrapper(model)
85 |
86 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
87 | model_wrapped,
88 | X_train,
89 | y_train,
90 | X_test,
91 | y_test,
92 | iterations=10,
93 | decomp_fn=bias_variance_mse,
94 | )
95 |
96 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
97 | assert avg_var == net_var
98 |
99 |
100 | def test_bias_variance_calc_parallel_0_1_no_random_state():
101 | X_train, y_train, X_test, y_test = create_data()
102 |
103 | model = DecisionTreeClassifier(random_state=123)
104 | model_wrapped = SciKitLearnEstimatorWrapper(model)
105 |
106 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute_parallel(
107 | model_wrapped,
108 | X_train,
109 | y_train,
110 | X_test,
111 | y_test,
112 | iterations=10,
113 | decomp_fn=bias_variance_0_1_loss,
114 | )
115 |
116 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
117 |
--------------------------------------------------------------------------------
/docs/notebooks/interprenet/.ipynb_checkpoints/Periodic-checkpoint.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 160,
6 | "id": "699ea1b7",
7 | "metadata": {},
8 | "outputs": [],
9 | "source": [
10 | "import jax\n",
11 | "from sklearn.model_selection import train_test_split\n",
12 | "\n",
13 | "f = jax.numpy.cos\n",
14 | "n = 1000\n",
15 | "X_train = jax.numpy.linspace(-n * jax.numpy.pi, 0, 100 * n).reshape(-1, 1)\n",
16 | "y_train = f(X_train)\n",
17 | "\n",
18 | "\n",
19 | "X_test = jax.numpy.linspace(0, n * jax.numpy.pi, 100 * n).reshape(-1, 1)\n",
20 | "y_test = f(X_test)"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 161,
26 | "id": "f2d7f659",
27 | "metadata": {},
28 | "outputs": [],
29 | "source": [
30 | "import jax\n",
31 | "\n",
32 | "from mvtk import interprenet\n",
33 | "\n",
34 | "init_params, model = interprenet.constrained_model(\n",
35 | " (frozenset([interprenet.monotonic_constraint]),),\n",
36 | " get_layers=lambda n: [n + 1],\n",
37 | " preprocess=interprenet.identity,\n",
38 | " postprocess=interprenet.identity)\n",
39 | "\n",
40 | "init_params = ((jax.numpy.asarray([0.]), jax.numpy.asarray([0.]),),\n",
41 | " init_params)\n",
42 | "def scaled_model(params, x):\n",
43 | " (m, b), model_params = params\n",
44 | " u = jax.numpy.sin(x * jax.numpy.exp(m) + jax.numpy.arctan(b))\n",
45 | " return model(model_params, u)"
46 | ]
47 | },
48 | {
49 | "cell_type": "code",
50 | "execution_count": null,
51 | "id": "f7a0800c",
52 | "metadata": {},
53 | "outputs": [],
54 | "source": [
55 | "def loss(y, y_pred):\n",
56 | " return ((y - y_pred) ** 2).mean()\n",
57 | "\n",
58 | "trained_params = interprenet.train((X_train, y_train),\n",
59 | " (X_test, y_test),\n",
60 | " (init_params, scaled_model),\n",
61 | " metric=lambda y, y_pred: loss(y, y_pred),\n",
62 | " step_size=0.01,\n",
63 | " mini_batch_size=32,\n",
64 | " loss_fn=loss,\n",
65 | " num_epochs=128)"
66 | ]
67 | },
68 | {
69 | "cell_type": "code",
70 | "execution_count": null,
71 | "id": "1e7dad5d",
72 | "metadata": {},
73 | "outputs": [],
74 | "source": [
75 | "loss(y_test, y_test)"
76 | ]
77 | },
78 | {
79 | "cell_type": "code",
80 | "execution_count": null,
81 | "id": "ebae2803",
82 | "metadata": {},
83 | "outputs": [],
84 | "source": [
85 | "trained_model = lambda X: scaled_model(trained_params, X)\n",
86 | "y_pred = trained_model(X_test)\n",
87 | "loss(y_test, y_pred)"
88 | ]
89 | },
90 | {
91 | "cell_type": "code",
92 | "execution_count": null,
93 | "id": "5d4554e7",
94 | "metadata": {},
95 | "outputs": [],
96 | "source": [
97 | "import matplotlib\n",
98 | "import matplotlib.pyplot as pylab\n",
99 | "\n",
100 | "q = 1000\n",
101 | "pylab.plot(X_test[:q], y_test[:q])\n",
102 | "pylab.plot(X_test[:q], y_pred[:q])\n",
103 | "\n",
104 | "pylab.show()"
105 | ]
106 | },
107 | {
108 | "cell_type": "code",
109 | "execution_count": null,
110 | "id": "b17de2c6",
111 | "metadata": {},
112 | "outputs": [],
113 | "source": []
114 | }
115 | ],
116 | "metadata": {
117 | "kernelspec": {
118 | "display_name": "Python 3",
119 | "language": "python",
120 | "name": "python3"
121 | },
122 | "language_info": {
123 | "codemirror_mode": {
124 | "name": "ipython",
125 | "version": 3
126 | },
127 | "file_extension": ".py",
128 | "mimetype": "text/x-python",
129 | "name": "python",
130 | "nbconvert_exporter": "python",
131 | "pygments_lexer": "ipython3",
132 | "version": "3.8.8"
133 | }
134 | },
135 | "nbformat": 4,
136 | "nbformat_minor": 5
137 | }
138 |
--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/nn.py:
--------------------------------------------------------------------------------
1 | import jax
2 | import public
3 |
4 | from jax.example_libraries import stax
5 | from jax._src.nn.initializers import glorot_normal, normal
6 | from jax.example_libraries.stax import (
7 | Dense,
8 | FanInSum,
9 | FanOut,
10 | Identity,
11 | Relu,
12 | elementwise,
13 | )
14 |
15 |
16 | def ResBlock(*layers, fan_in=FanInSum, tail=Identity):
17 | """Split input, feed it through one or more layers in parallel, recombine
18 | them with a fan-in, apply a trailing layer (i.e. an activation)
19 |
20 | Args:
21 | *layers: a sequence of layers, each an (init_fun, apply_fun) pair.
22 | fan_in, optional: a fan-in to recombine the outputs of each layer
23 | tail, optional: a final layer to apply after recombination
24 |
25 |
26 | Returns:
27 | A new layer, meaning an (init_fun, apply_fun) pair, representing the
28 | parallel composition of the given sequence of layers fed into fan_in
29 | and then tail. In particular, the returned layer takes a sequence of
30 | inputs and returns a sequence of outputs with the same length as the
31 | argument `layers`.
32 | """
33 | return stax.serial(FanOut(len(layers)), stax.parallel(*layers), fan_in, tail)
34 |
35 |
36 | @public.add
37 | def Approximator(
38 | input_size,
39 | depth=3,
40 | width=None,
41 | output_size=1,
42 | linear=Dense,
43 | residual=True,
44 | activation=lambda x: x,
45 | rng=jax.random.PRNGKey(0),
46 | ):
47 | r"""Basic Neural network based function
48 | :math:`\mathbb{R}^N\rightarrow\mathbb{R}^M` function approximator.
49 |
50 | Args:
51 | input_size (int): Size of input dimension.
52 | depth (int, optional): Depth of network. Defaults to ``3``.
53 | width (int, optional): Width of network. Defaults to ``input_size + 1``.
54 | output_size (int, optional): Number of outputs. Defaults to ``1``.
55 | linear (``torch.nn.Module``, optional): Linear layer drop in
56 | replacement. Defaults to ``jax.example_libraries.stax.Dense``.
57 | residual (bool, optional): Turn on ResNet blocks. Defaults to ``True``.
58 | activation (optional): A map from :math:`(-\infty, \infty)` to an
59 | appropriate domain (such as the domain of a convex conjugate).
60 | Defaults to the identity.
61 | rng (optional): Jax ``PRNGKey`` key. Defaults to `jax.random.PRNGKey(0)``.
62 |
63 | Returns:
64 | initial parameter values, neural network function
65 | """
66 | # input_size + output_size hidden hidden units is sufficient for universal
67 | # approximation given unconstrained depth even without ResBlocks.
68 | # https://arxiv.org/abs/1710.112780. With ResBlocks (as used below), only
69 | # one hidden unit is needed for Relu activation
70 | # https://arxiv.org/abs/1806.10909.
71 | if width is None:
72 | hidden = input_size + 1
73 | else:
74 | hidden = width
75 | if depth > 2:
76 | layers = [linear(hidden), Relu]
77 | else:
78 | layers = []
79 | for _ in range(depth - 2):
80 | if residual:
81 | layers.append(
82 | ResBlock(stax.serial(linear(hidden), Relu), linear(hidden), tail=Relu)
83 | )
84 | else:
85 | layers.append(linear(hidden))
86 | layers.append(linear(output_size))
87 | layers.append(elementwise(activation))
88 | init_approximator_params, approximator = stax.serial(*layers)
89 | _, init_params = init_approximator_params(rng, (-1, input_size))
90 | return init_params, approximator
91 |
92 |
93 | @public.add
94 | def NormalizedLinear(out_dim, W_init=glorot_normal(), b_init=normal()):
95 | r"""Linear layer with positive weights with columns that sum to one."""
96 |
97 | def init_fun(rng, input_shape):
98 | output_shape = input_shape[:-1] + (out_dim,)
99 | k1, k2 = jax.random.split(rng)
100 | W, b = W_init(k1, (input_shape[-1], out_dim)), b_init(k2, (out_dim,))
101 | return output_shape, (W, b)
102 |
103 | def apply_fun(params, inputs, **kwargs):
104 | W, b = params
105 | W_normalized = W / jax.numpy.abs(W).sum(0)
106 | return jax.numpy.dot(inputs, W_normalized) + b
107 |
108 | return init_fun, apply_fun
109 |
--------------------------------------------------------------------------------
/mvtk/credibility.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import public
3 |
4 | from sklearn.metrics import roc_auc_score
5 | from scipy.stats import beta
6 |
7 |
8 | @public.add
9 | def credible_interval(positive, negative, credibility=0.5, prior=(1, 1)):
10 | """What is the shortest interval that contains probability(positive) with
11 | `credibility`% probability?
12 |
13 | Args:
14 | positive (int): number of times the first possible outcome has been seen
15 | negative (int): number of times the second possible outcome has been seen
16 | credibility (float): The probability that the true p(positive) is
17 | contained within the reported interval
18 | prior (tuple): psueodcount for positives and negatives
19 |
20 | returns:
21 | (lower bound, upper bound)
22 | """
23 | positive += prior[0]
24 | negative += prior[1]
25 | if not (positive > 1 or negative > 1):
26 | raise ValueError(
27 | "Credible intervals are only defined when at least one count + psueocount"
28 | " is greater than 1"
29 | )
30 | distribution = beta(positive, negative)
31 | mode = positive / (positive + negative)
32 | cdf_mode = distribution.cdf(mode)
33 | cred_2 = credibility / 2
34 | lower = cdf_mode - cred_2
35 | true_lower = max(lower, 0)
36 | excess = true_lower - lower
37 | upper = cdf_mode + cred_2 + excess
38 | true_upper = min(upper, 1)
39 | excess = upper - true_upper
40 | true_lower -= excess
41 | assert numpy.isclose((true_upper - true_lower), credibility)
42 | return distribution.ppf(true_lower), distribution.ppf(true_upper)
43 |
44 |
45 | @public.add
46 | def prob_below(positive, negative, cutoff, prior=(1, 1)):
47 | """What is the probability P(positive) is unacceptably low?
48 |
49 | Args:
50 | positive (int): number of times the positive outcome has been seen
51 | negative (int): number of times the negative outcome has been seen
52 | cutoff (float): lowest acceptable value of P(positive)
53 | prior (tuple): psueodcount for positives and negatives
54 | returns:
55 | Probability that P(positive) < cutoff
56 | """
57 | return beta(prior[0] + positive, prior[1] + negative).cdf(cutoff)
58 |
59 |
60 | @public.add
61 | def roc_auc_preprocess(positives, negatives, roc_auc):
62 | """ROC AUC analysis must be preprocessed using the number of positive and
63 | negative instances in the entire dataset and the AUC itself.
64 |
65 | Args:
66 | positives (int): number of positive instances in the dataset
67 | negatives (int): number of negative instances in the dataset
68 | roc_auc (float): ROC AUC
69 | returns:
70 | (positive, negative) tuple that can be used for `prob_below` and
71 | `credible_interval`
72 | """
73 | unique_combinations = positives * negatives
74 | # correctly ranked combinations are pairs of positives and negatives
75 | # instances where the model scored the positive instance higher than the
76 | # negative instance
77 | correctly_ranked_combinations = roc_auc * unique_combinations
78 | # the number of incorrectly ranked combinations is the number of
79 | # combinations that aren't correctly ranked
80 | incorrectly_ranked_combinations = (
81 | unique_combinations - correctly_ranked_combinations
82 | )
83 | return correctly_ranked_combinations, incorrectly_ranked_combinations
84 |
85 |
86 | @public.add
87 | def prob_greater_cmp(
88 | positive1,
89 | negative1,
90 | positive2,
91 | negative2,
92 | prior1=(1, 1),
93 | prior2=(1, 1),
94 | err=10**-5,
95 | ):
96 | """Probability the first set comes from a distribution with a greater
97 | proportion of positive than the other.
98 |
99 | Args:
100 | positive1 (int): number of positive instances in the first dataset
101 | negative1 (int): number of negative instances in the first dataset
102 | positive1 (int): number of positive instances in the second dataset
103 | negative1 (int): number of negative instances in the second dataset
104 | prior1 (tuple): psueodcount for positives and negatives
105 | prior2 (tuple): psueodcount for positives and negatives
106 | err (float): upper bound of frequentist sample std from monte carlo simulation.
107 | """
108 | nprng = numpy.random.RandomState(0)
109 | distribution1 = beta(positive1 + prior1[0], negative1 + prior1[1])
110 | distribution2 = beta(positive2 + prior2[0], negative2 + prior2[1])
111 | # CLT implies ROC AUC error shrinks like 1/PN
112 | # for P positives and N negatives
113 | N = int(1 + 1 / (2 * err))
114 | sample1 = distribution1.rvs(N, random_state=nprng)
115 | sample2 = distribution2.rvs(N, random_state=nprng)
116 | y = numpy.ones(2 * N)
117 | y[N:] = 0
118 | return roc_auc_score(y, numpy.concatenate((sample1, sample2)))
119 |
--------------------------------------------------------------------------------
/tests/supervisor/test_divergence.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import mvtk.supervisor.divergence as divergence
3 |
4 | from functools import partial
5 |
6 |
7 | def mutually_exclusive_support_tester(metric, num_features=4, eps=0.1):
8 | data1 = numpy.ones((4, num_features))
9 | data1[:, :2] = 0
10 | data2 = 1 - data1
11 | assert numpy.isclose(metric([data1], [data1]), 0, atol=eps)
12 | assert numpy.isclose(metric([data2], [data2]), 0, atol=eps)
13 | assert numpy.isclose(metric([data1], [data2]), 1, atol=eps)
14 |
15 |
16 | def get_batches(nprng, batch_size, n=2):
17 | """Pick a random binomial distribution Sample batch_size samples from
18 | it."""
19 | choices = numpy.arange(n)
20 | x = []
21 | alpha = nprng.rand(n)
22 | alpha /= alpha.sum()
23 | for d in range(batch_size):
24 | choice = nprng.choice(choices, p=alpha)
25 | z = numpy.zeros_like(choices)
26 | z[choice] = 1
27 | x.append(z)
28 | x = numpy.asarray(x).reshape(batch_size, n)
29 | return x, alpha
30 |
31 |
32 | def divergence_tester(
33 | approximate_metric, analytical_metric, batch_sizes=[256] * 8, thresh=0.85
34 | ):
35 | nprng = numpy.random.RandomState(0)
36 | batches, alphas = zip(*map(partial(get_batches, nprng), batch_sizes))
37 | assert (
38 | numpy.corrcoef(
39 | numpy.asarray([analytical_metric(alphas, alpha) for alpha in alphas]),
40 | divergence.utils.get_drift_series(
41 | approximate_metric, batches, [[batch] for batch in batches]
42 | ),
43 | )[0, 1]
44 | > thresh
45 | )
46 |
47 |
48 | def gaussian_test(approximate_metric, dim=1, N=1024, thresh=0.05):
49 | nprng = numpy.random.RandomState(0)
50 | m = approximate_metric(*nprng.normal(size=(2, 1, N, dim)))
51 | assert m < thresh
52 | assert m >= 0
53 |
54 |
55 | def test_hl_gaussian():
56 | for dim in range(1, 4):
57 | gaussian_test(partial(divergence.calc_hl, train_test_split=0.5), dim)
58 |
59 |
60 | def test_tv_gaussian():
61 | for dim in range(1, 4):
62 | gaussian_test(partial(divergence.calc_tv, train_test_split=0.5), dim)
63 | gaussian_test(
64 | partial(divergence.calc_tv_knn, k=64 * 2**dim),
65 | dim,
66 | N=1024 * 2**dim,
67 | thresh=0.1,
68 | )
69 |
70 |
71 | def test_js_gaussian():
72 | for dim in range(1, 4):
73 | gaussian_test(partial(divergence.calc_js, train_test_split=0.5), dim)
74 |
75 |
76 | def test_em_gaussian():
77 | for dim in range(1, 4):
78 | gaussian_test(
79 | partial(divergence.calc_em, train_test_split=0.5), dim, thresh=0.11
80 | )
81 |
82 |
83 | def test_js_by_corr():
84 | def kl(alpha1, alpha2):
85 | return numpy.sum(alpha1 * numpy.log2(alpha1 / alpha2))
86 |
87 | def js(alpha1, alpha2):
88 | mean = alpha1 + alpha2
89 | mean /= 2
90 | ret = kl(alpha1, mean) + kl(alpha2, mean)
91 | return ret / 2
92 |
93 | divergence_tester(
94 | lambda *x: numpy.sqrt(divergence.calc_js_mle(*x)), lambda *x: numpy.sqrt(js(*x))
95 | )
96 | divergence_tester(
97 | lambda *x: numpy.sqrt(divergence.calc_js(*x)), lambda *x: numpy.sqrt(js(*x))
98 | )
99 |
100 |
101 | def test_js_by_support():
102 | mutually_exclusive_support_tester(divergence.calc_js_mle)
103 | mutually_exclusive_support_tester(divergence.calc_js)
104 |
105 |
106 | def test_hl_by_corr():
107 | def hl(alpha1, alpha2):
108 | return numpy.sqrt(numpy.sum((numpy.sqrt(alpha1) - numpy.sqrt(alpha2)) ** 2) / 2)
109 |
110 | divergence_tester(divergence.calc_hl_mle, hl)
111 | divergence_tester(divergence.calc_hl, hl)
112 |
113 |
114 | def test_hl_by_support():
115 | mutually_exclusive_support_tester(divergence.calc_hl)
116 | mutually_exclusive_support_tester(divergence.calc_hl_mle)
117 |
118 |
119 | def test_tv_by_corr():
120 | def tv(alpha1, alpha2):
121 | return numpy.abs(alpha1 - alpha2).sum() / 2
122 |
123 | divergence_tester(divergence.calc_tv_mle, tv)
124 | divergence_tester(divergence.calc_tv, tv)
125 |
126 |
127 | def test_tv_by_support():
128 | mutually_exclusive_support_tester(divergence.calc_tv_mle)
129 | mutually_exclusive_support_tester(divergence.calc_tv)
130 |
131 |
132 | def test_em_by_support():
133 | for num_features in range(1, 3):
134 | data1 = numpy.zeros((4, num_features))
135 | data2 = 1 - data1
136 | eps = 0.125
137 | assert numpy.isclose(divergence.calc_em([data1], [data1]), 0, atol=eps)
138 | assert numpy.isclose(divergence.calc_em([data2], [data2]), 0, atol=eps)
139 | assert numpy.isclose(divergence.calc_em([data1], [data2]), 1, atol=eps)
140 | assert numpy.isclose(divergence.calc_em([data1], [2 * data2]), 2, atol=eps)
141 |
142 |
143 | def test_calc_tv_lower_bound():
144 | a = numpy.asarray([0, 1, 0, 0, 1])
145 | b = numpy.asarray([0.01, 0.98, 0.03, 0.04, 0.99])
146 | log_loss = divergence.metrics.balanced_binary_cross_entropy(a, b)
147 | tv = divergence.metrics.calc_tv_lower_bound(log_loss)
148 | assert tv < 1 and tv > 0
149 |
--------------------------------------------------------------------------------
/docs/interprenet_user_guide.rst:
--------------------------------------------------------------------------------
1 | ###########
2 | Interprenet User Guide
3 | ###########
4 |
5 | **********
6 | Motivation
7 | **********
8 |
9 | Neural networks are generally difficult to interpret. While there
10 | are tools that can help to interpret certain types of neural
11 | networks such as image classifiers and language models,
12 | interpretation of neural networks that simply ingest tabular data
13 | and return a scalar value is generally limited to various measures of feature
14 | importance. This can be problematic as what makes a feature "important" can
15 | vary between use cases.
16 |
17 | Rather than interpret a neural network as a black
18 | box, we seek to constrain neural network in ways we
19 | consider useful and interpretable. In particular,
20 | The interprenet module currently has two such
21 | constraints implemented:
22 |
23 | * Monotonicity
24 | * Lipschitz constraint
25 |
26 | `Monotonic functions `_
27 | either always increase or decrease with their arguments but never both. This is
28 | often an expected relationship between features and the model output. For
29 | example, we may believe that increasing blood pressure increases risk of
30 | cardiovascular disease. The exact relationship is not known, but we may believe
31 | that it is monotonic.
32 |
33 | `Lipschitz constraints
34 | `_ constrain the
35 | maximum rate of change of the model. This can make the model arbitrarily robust
36 | `against adversarial perturbations
37 | `_
38 | :cite:`anil2019sorting`.
39 |
40 |
41 | How?
42 | ====
43 |
44 | All constraints are currently implemented as weight constraints. While
45 | arbitrary weights are stored within each linear layer, the weights are
46 | transformed before application so the network can satisfy is prescribed
47 | constraints. Changes are backpropagated through this transformation.
48 | Monotonic increasing neural networks are implemented by taking the absolute
49 | value of weight matrices before applying them. When paired with a monotonically
50 | increasing activation (such as ReLU, Sigmoid, or Tanh), this ensures the
51 | gradient of the output with respect to any features is positive. This is
52 | sufficient to ensure monotonicity with respect to the features.
53 |
54 | Lipschitz constraints are enforced by dividing each weight vector by
55 | its :math:`L^\infty` norm as described in :cite:`anil2019sorting`. This
56 | constrains the :math:`L^\infty`-:math:`L^\infty` `operator norm
57 | `_
58 | of the weight matrix :cite:`tropp2004topics`. Constraining the
59 | :math:`L^\infty`-:math:`L^\infty` operator norm of the weight
60 | matrix ensures every element of the jacobian of the linear layers is less than
61 | or equal to :math:`1`. Meanwhile, using activation functions with Lipschitz
62 | constants of :math:`1` ensure the entire network is constrained to never have a
63 | slope greater than :math:`1` for any of its features.
64 |
65 | **********
66 | Different Constraints on Different Features
67 | **********
68 |
69 | .. currentmodule:: mvtk.interprenet
70 |
71 | :meth:`constrained_model` generates a neural network with one set of
72 | constraints per feature. Constraints currently available are:
73 |
74 | - :meth:`identity` (for no constraint)
75 | - :meth:`monotonic_constraint`
76 | - :meth:`lipschitz_constraint`
77 |
78 | Features are grouped by the set of constraints applied to them, and
79 | different constrained neural networks are generated for each group
80 | of features. The outputs of those neural networks are concatenated
81 | and fed into a final neural network constrained using all
82 | constraints applied to all features. Since constraints on weight
83 | matrices compose, they can be applied as a series of transformations
84 | on the weights before application.
85 |
86 | .. figure:: images/interprenet.png
87 | :width: 500px
88 | :align: center
89 | :height: 400px
90 | :alt: alternate text
91 | :figclass: align-center
92 |
93 | 4 features with Lipschitz constraints and 4 features wtih
94 | monotonic constraints are fed to their respectively constrained
95 | neural networks. Intermediate outputs are concatenated and fed into a neural
96 | network with monotonic and lipschitz constraints.
97 |
98 | We use the Sort function as a nonlinear activation as described in
99 | :cite:`anil2019sorting`. The jacobian of this matrix is always a
100 | permutation matrix, which retains any Lipschitz and monotonicity
101 | constraints.
102 |
103 | **********
104 | Preprocessing
105 | **********
106 |
107 | Thus far, we have left out two important detail: How to constrain
108 | the Lipschitz constant to be something other than :math:`1`, and how
109 | to create monotonically decreasing networks. Both are a simple
110 | matter of preprocessing. The ``preprocess`` argument (defaulting to
111 | ``identity``), specifies a function to be applied to the feature
112 | vector before passing it to the neural network. For decreasing
113 | monotonic constraints, multiply the respective features by
114 | :math:`-1`. For a Lipschitz constant of :math:`L`, multiply the
115 | respective features by :math:`L`.
116 |
117 | .. topic:: Tutorials:
118 |
119 | * :doc:`Interprenet `
120 |
121 | .. bibliography:: refs.bib
122 | :cited:
123 |
--------------------------------------------------------------------------------
/mvtk/metrics.py:
--------------------------------------------------------------------------------
1 | import public
2 | import numpy
3 | import pandas
4 |
5 | from scipy.stats import entropy
6 | from sklearn.feature_selection import mutual_info_classif
7 |
8 |
9 | def binarize(data, t):
10 | y_true, y_pred = data.values.T
11 | return y_true > t, y_pred
12 |
13 |
14 | @public.add
15 | def monotonicity(y_true, y_pred, weights=None):
16 | r"""Generalizes ROC AUC by computing
17 | :math:`P\left(\frac{\Delta\mathrm{y_pred}}{\Delta\mathrm{y_true}} >
18 | 0\right)`, the probability incrementing ``y_true`` increases ``y_pred`` for
19 | a randomly chosen pair of instances. This reduces to ROC AUC when
20 | ``y_true`` has two unique values. Adapted from Algorithm 2 in `Fawcett, T.
21 | (2006). An introduction to ROC analysis. Pattern Recognition Letters,
22 | 27(8), 861-874.
23 | `_
24 |
25 | Args:
26 | y_true (list-like): Ground truth ordinal values
27 | y_pred (list-like): Predicted ordinal values
28 | weights (list-like): Sample weights. Will be normalized to one
29 | across each unique values of ``y_true``. If ``None`` (default) all
30 | samples are weighed equally.
31 |
32 | Returns:
33 | Float between 0 and 1. 0 indicates 100% chance of ``y_pred``
34 | decreasing upon incrementing ``y_true`` up to its next
35 | highest value in the dataset. 1 being a 100% chance of
36 | ``y_pred`` increasing for the same scenario. 0.5 would be 50%
37 | chance of either.
38 | """
39 | if weights is None:
40 | weights = numpy.ones(len(y_true))
41 | unique = numpy.unique(y_true)
42 | n = len(unique) - 1
43 | true_lookup = {u: i + 1 for i, u in enumerate(unique)}
44 | idx = numpy.argsort(-y_pred)
45 | y_true = y_true[idx]
46 | y_pred = y_pred[idx]
47 | weights = weights[idx]
48 | # fp, fp_prev, tp, tp_prev, auc
49 | data = numpy.zeros((5, n))
50 | prev_pred = numpy.full(n, numpy.nan)
51 | for true, pred, weight in zip(y_true, y_pred, weights):
52 | i = true_lookup[true]
53 | j = max(i - 2, 0)
54 | mask = pred != prev_pred[j:i]
55 | data[4, j:i][mask] += trap(*data[:4, j:i][:, mask])
56 | data[1:4:2, j:i][:, mask] = data[:4:2, j:i][:, mask]
57 | prev_pred[j:i] = pred
58 | i -= 1
59 | if i:
60 | data[2, j] += weight
61 | if i < n:
62 | data[0, i] += weight
63 | data[4] += trap(*data[:4])
64 | return numpy.sum(data[4]) / 2 / data[0].dot(data[2])
65 |
66 |
67 | def trap(x2, x1, y2, y1):
68 | return (x2 - x1) * (y2 + y1)
69 |
70 |
71 | @public.add
72 | def rank_auc(y_true, y_pred, weights=None):
73 | r"""Generalizes ROC AUC by computing probability that two randomly chosen
74 | data points would be ranked consistently with ground truth labels. This
75 | reduces to ROC AUC when ``y_true`` has two unique values.
76 | Adapted from Algorithm 2 in `Fawcett, T. (2006). An introduction
77 | to ROC analysis. Pattern Recognition Letters, 27(8), 861-874.
78 | `_
79 |
80 | Args:
81 | y_true (list-like): Ground truth ordinal values
82 | y_pred (list-like): Predicted ordinal values
83 | weights (list-like): Sample weights. Will be normalized to one
84 | across each unique values of ``y_true``. If ``None`` (default) all
85 | samples are weighed equally.
86 |
87 | Returns:
88 | Float between 0 and 1. 0 indicates 100% chance of ``y_pred``
89 | matching order of ``y_true``. 1 being a 100% chance of
90 | ``y_pred`` having the opposite order of ``y_true``. 0.5 would be 50%
91 | chance of either.
92 | """
93 | if weights is None:
94 | weights = numpy.ones(len(y_true))
95 | unique = numpy.unique(y_true)
96 | n = len(unique) - 1
97 | true_lookup = {u: i + 1 for i, u in enumerate(unique)}
98 | idx = numpy.argsort(-y_pred)
99 | y_true = y_true[idx]
100 | y_pred = y_pred[idx]
101 | weights = weights[idx]
102 | # fp, fp_prev, tp, tp_prev, auc
103 | data = numpy.zeros((5, n))
104 | prev_pred = numpy.full(n, numpy.nan)
105 | for true, pred, weight in zip(y_true, y_pred, weights):
106 | i = true_lookup[true]
107 | mask = pred != prev_pred[:i]
108 | data[4, :i][mask] += trap(*data[:4, :i][:, mask])
109 | data[1:4:2, :i][:, mask] = data[:4:2, :i][:, mask]
110 | prev_pred[:i] = pred
111 | i -= 1
112 | data[2, :i] += weight
113 | if i < n:
114 | data[0, i] += weight
115 | data[4] += trap(*data[:4])
116 | return numpy.sum(data[4]) / 2 / data[0].dot(data[2])
117 |
118 |
119 | @public.add
120 | def normalized_mutual_info(X, y, **kwargs):
121 | """Thin wrapper around `sklearn's mutual information
122 | `_.
123 | This normalizes the result to 0-1 scale. ``y`` is assumed categorical.
124 | """
125 | _, counts = numpy.unique(y, return_counts=True)
126 | return pandas.Series(
127 | dict(
128 | zip(
129 | X.columns,
130 | mutual_info_classif(X, y, **kwargs) / entropy(counts / counts.sum()),
131 | )
132 | )
133 | )
134 |
--------------------------------------------------------------------------------
/mvtk/supervisor/utils.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | import sys
3 | import time
4 | import pandas as pd
5 | import numpy as np
6 | import public
7 |
8 | from concurrent.futures import ThreadPoolExecutor, as_completed
9 | from datetime import datetime
10 | from typing import Collection
11 | from typing import List
12 | from itertools import combinations
13 | from fastcore.imports import in_notebook
14 |
15 | if in_notebook():
16 | from tqdm import tqdm_notebook as tqdm
17 | else:
18 | from tqdm import tqdm
19 |
20 |
21 | @public.add
22 | def parallel(func, arr: Collection, max_workers=None, show_progress: bool = False):
23 | """
24 | NOTE: This code was adapted from the ``parallel`` function
25 | within Fastai's Fastcore library. Key differences include
26 | returning a list with order preserved.
27 |
28 | Run a function on a collection (list, set etc) of items
29 | :param func: The function to run
30 | :param arr: The collection to run on
31 | :param max_workers: How many workers to use. Will use
32 | multiprocessing.cpu_count() if this is not provided
33 | :return: a list of the results
34 | """
35 | if show_progress:
36 | progress_bar = tqdm(arr, smoothing=0, file=sys.stdout)
37 | results = []
38 | max_workers = multiprocessing.cpu_count() if max_workers is None else max_workers
39 | with ThreadPoolExecutor(max_workers=max_workers) as ex:
40 | future_to_index = {ex.submit(func, o): i for i, o in enumerate(arr)}
41 | for future in as_completed(future_to_index):
42 | results.append((future_to_index[future], future.result()))
43 | if show_progress:
44 | progress_bar.update()
45 | results.sort()
46 |
47 | # Complete the progress bar if not complete
48 | if show_progress:
49 | for n in range(progress_bar.n, len(list(arr))):
50 | time.sleep(0.1)
51 | progress_bar.update()
52 | return [result for i, result in results]
53 |
54 |
55 | @public.add
56 | def column_indexes(df: pd.DataFrame, cols: List[str]):
57 | """
58 |
59 | :param df: The dataframe
60 | :param cols: a list of column names
61 | :return: The column indexes of the column names
62 | """
63 | return [df.columns.get_loc(col) for col in cols if col in df]
64 |
65 |
66 | def format_date(date_str, dateformat="%b%d"):
67 | date = pd.to_datetime(date_str)
68 | return datetime.strftime(date, dateformat)
69 |
70 |
71 | @public.add
72 | def compute_divergence_crosstabs(
73 | data, datecol=None, format=None, show_progress=True, divergence=None
74 | ):
75 | """Compute the divergence crosstabs.
76 |
77 | :param data: The data to compute the divergences on
78 | :param datecol: The column representing the date. If None, will
79 | use the index, if the index is a datetimeindex
80 | :param format: A function applied to datecol values for formatting
81 | e.g. ``format_date``
82 | :param show_progress: Whether the progress bar will be shown
83 | :param divergence: The divergence function to use
84 | """
85 | if datecol is None:
86 | datecol = data.index
87 | dates, subsets = zip(*data.groupby(datecol))
88 | dates = list(dates)
89 | subsets = (subset.drop(columns=[datecol]) for subset in subsets)
90 |
91 | return compute_divergence_crosstabs_split(
92 | subsets, dates, format, show_progress, divergence
93 | )
94 |
95 |
96 | @public.add
97 | def compute_divergence_crosstabs_split(
98 | subsets, dates, format=None, show_progress=True, divergence=None
99 | ):
100 | """Compute the divergence crosstabs.
101 |
102 | :param subsets: The data to compute the divergences on
103 | :param dates: The list of dates for the subsets
104 | :param format: A function applied to datecol values for formatting
105 | e.g. ``format_date``
106 | :param show_progress: Whether the progress bar will be shown
107 | :param divergence: The divergence function to use
108 | """
109 |
110 | # Create a divergence matrix
111 | divergences = np.zeros((len(dates), len(dates)))
112 | if not divergence:
113 | from mvtk.supervisor.divergence import calc_tv
114 |
115 | divergence = calc_tv
116 |
117 | def compute_divergence(args):
118 | return divergence(*args)
119 |
120 | for (i, j), v in zip(
121 | combinations(range(len(dates)), 2),
122 | parallel(
123 | compute_divergence, combinations(subsets, 2), show_progress=show_progress
124 | ),
125 | ):
126 | divergences[i, j] = divergences[j, i] = v
127 | if format is None:
128 | formatted = dates
129 | else:
130 | formatted = [format(d) for d in dates]
131 | return pd.DataFrame(divergences, columns=formatted, index=formatted)
132 |
133 |
134 | @public.add
135 | def plot_divergence_crosstabs_3d(divergences):
136 | """Plot the divergences in 3d.
137 |
138 | :params divergences: The list of divergences
139 | """
140 | import matplotlib.pyplot as plt
141 | from mpl_toolkits.mplot3d import Axes3D # noqa F401
142 |
143 | fig = plt.figure()
144 | ax = fig.add_subplot(111, projection="3d")
145 |
146 | keys = list(divergences.keys())
147 | indexes = range(len(keys))
148 |
149 | for i in indexes:
150 | y = [x[1] for x in list(divergences[keys[i]].items())]
151 | ax.bar(indexes, y, i, zdir="y", alpha=0.8)
152 |
153 | ax.set(xticks=indexes, xticklabels=keys, yticks=indexes, yticklabels=keys)
154 |
155 | return fig
156 |
157 |
158 | @public.add
159 | def split(x, train_ratio=0.5, nprng=np.random.RandomState(0)):
160 | i = int(len(x) * train_ratio)
161 | if hasattr(x, "shape"):
162 | idx = np.arange(x.shape[0])
163 | nprng.shuffle(idx)
164 | x = x[idx]
165 | else:
166 | nprng.shuffle(x)
167 | return x[:i], x[i:]
168 |
--------------------------------------------------------------------------------
/docs/contributing.md:
--------------------------------------------------------------------------------
1 | [comment]: <> (Adapted from JAX's contribution guide)
2 |
3 | # Contributing
4 |
5 | Everyone can contribute to Model Validation Toolkit, and we value everyone's contributions. There are several
6 | ways to contribute, including:
7 |
8 |
9 | - Answering questions on Model Validation Toolkit's [Gitter channel](https://gitter.im/FINRAOS/model-validation-toolkit)
10 | - Improving or expanding Model Validation Toolkit's [documentation](https://finraos.github.io/model-validation-toolkit/docs/html/index.html)
11 | - Contributing to Model Validation Toolkit's [code-base](https://github.com/FINRAOS/model-validation-toolkit/)
12 |
13 | ## Ways to contribute
14 |
15 | We welcome pull requests, in particular for those issues marked with
16 | [contributions welcome](https://github.com/FINRAOS/model-validation-toolkit/issues?q=is%3Aopen+is%3Aissue+label%3A%22contributions+welcome%22) or
17 | [good first issue](https://github.com/FINRAOS/model-validation-toolkit/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22).
18 |
19 | For other proposals, we ask that you first open a GitHub
20 | [Issue](https://github.com/FINRAOS/model-validation-toolkit/issues/new/choose) or
21 | [Gitter channel](https://gitter.im/FINRAOS/model-validation-toolkit)
22 |
23 | to seek feedback on your planned contribution.
24 |
25 | ## Contributing code using pull requests
26 |
27 | We do all of our development using git, so basic knowledge is assumed.
28 |
29 | Follow these steps to contribute code:
30 |
31 | 1. Fork the Model Validation Toolkit repository by clicking the **Fork** button on the
32 | [repository page](https://www.github.com/FINRAOS/model-validation-toolkit). This creates
33 | a copy of the Model Validation Toolkit repository in your own account.
34 |
35 | 2. Install Python >=3.6 locally in order to run tests.
36 |
37 | 3. `pip` installing your fork from source. This allows you to modify the code
38 | and immediately test it out:
39 |
40 | ```bash
41 | git clone https://github.com/YOUR_USERNAME/model-validation-toolkit
42 | cd model-validation-toolkit
43 | pip install -e . # Installs Model Validation Toolkit from the current directory in editable mode.
44 | ```
45 |
46 | 4. Add the Model Validation Toolkit repo as an upstream remote, so you can use it to sync your
47 | changes.
48 |
49 | ```bash
50 | git remote add upstream http://www.github.com/FINRAOS/model-validation-toolkit
51 | ```
52 |
53 | 5. Create a branch where you will develop from:
54 |
55 | ```bash
56 | git checkout -b name-of-change
57 | ```
58 |
59 | And implement your changes using your favorite editor.
60 |
61 | 6. Make sure the tests pass by running the following command from the top of
62 | the repository:
63 |
64 | ```bash
65 | pytest tests/
66 | ```
67 |
68 | If you know the specific test file that covers your changes, you can limit the tests to that; for example:
69 |
70 | ```bash
71 | pytest tests/supervisor
72 | ```
73 |
74 | Model Validation Toolkit also offers more fine-grained control over which particular tests are run;
75 | see {ref}`running-tests` for more information.
76 |
77 | 7. Once you are satisfied with your change, create a commit as follows ([how to write a commit message](https://chris.beams.io/posts/git-commit/)):
78 |
79 | ```bash
80 | git add file1.py file2.py ...
81 | git commit -s -m "Your commit message"
82 | ```
83 |
84 | Please be sure to sign off your work when you commit it with the `-s` or, equivalently `--sign-off` flag to agree to our [DCO](https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/main/DCO).
85 |
86 | Then sync your code with the main repo:
87 |
88 | ```bash
89 | git fetch upstream
90 | git rebase upstream/main
91 | ```
92 |
93 | Finally, push your commit on your development branch and create a remote
94 | branch in your fork that you can use to create a pull request from:
95 |
96 | ```bash
97 | git push --set-upstream origin name-of-change
98 | ```
99 |
100 | 8. Create a pull request from the Model Validation Toolkit repository and send it for review.
101 | Check the {ref}`pr-checklist` for considerations when preparing your PR, and
102 | consult [GitHub Help](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/about-pull-requests)
103 | if you need more information on using pull requests.
104 |
105 | ## Model Validation Toolkit pull request checklist
106 |
107 | As you prepare a Model Validation Toolkit pull request, here are a few things to keep in mind:
108 |
109 | ### DCO
110 |
111 | By contributing to this project, you agree to our [DCO](https://raw.githubusercontent.com/FINRAOS/model-validation-toolkit/main/DCO).
112 |
113 | ### Single-change commits and pull requests
114 |
115 | A git commit ought to be a self-contained, single change with a descriptive
116 | message. This helps with review and with identifying or reverting changes if
117 | issues are uncovered later on.
118 |
119 | Pull requests typically comprise a single git commit. In preparing a pull
120 | request for review, you may need to squash together multiple commits. We ask
121 | that you do this prior to sending the PR for review if possible. The `git
122 | rebase -i` command might be useful to this end.
123 |
124 | ### Linting and Type-checking
125 |
126 | Model Validation Toolkit uses [mypy](https://mypy.readthedocs.io/) and [flake8](https://flake8.pycqa.org/)
127 | to statically test code quality; the easiest way to run these checks locally is via
128 | the [pre-commit](https://pre-commit.com/) framework:
129 |
130 | ```bash
131 | pip install pre-commit
132 | pre-commit run --all
133 | ```
134 |
135 | ### Full GitHub test suite
136 |
137 | Your PR will automatically be run through a full test suite on GitHub CI, which
138 | covers a range of Python versions, dependency versions, and configuration options.
139 | It's normal for these tests to turn up failures that you didn't catch locally; to
140 | fix the issues you can push new commits to your branch.
141 |
--------------------------------------------------------------------------------
/docs/refs.bib:
--------------------------------------------------------------------------------
1 | @article{sriperumbudur2009integral,
2 | title={On integral probability metrics,$\backslash$phi-divergences and binary classification},
3 | author={Sriperumbudur, Bharath K and Fukumizu, Kenji and Gretton, Arthur and Sch{\"o}lkopf, Bernhard and Lanckriet, Gert RG},
4 | journal={arXiv preprint arXiv:0901.2698},
5 | year={2009}
6 | }
7 | @inproceedings{nowozin2016f,
8 | title={f-gan: Training generative neural samplers using variational divergence minimization},
9 | author={Nowozin, Sebastian and Cseke, Botond and Tomioka, Ryota},
10 | booktitle={Advances in neural information processing systems},
11 | pages={271--279},
12 | year={2016}
13 | }
14 | @misc{yale598,
15 | author={Yihong Wu},
16 | title={Variational representation, HCR and CR lower bounds.},
17 | month={February},
18 | year={2016},
19 | publisher={Yale},
20 | url={http://www.stat.yale.edu/~yw562/teaching/598/lec06.pdf}
21 | }
22 | @article{csiszar2004information,
23 | title={Information theory and statistics: A tutorial},
24 | author={Csisz{\'a}r, Imre and Shields, Paul C and others},
25 | journal={Foundations and Trends{\textregistered} in Communications and Information Theory},
26 | volume={1},
27 | number={4},
28 | pages={417--528},
29 | year={2004},
30 | publisher={Now Publishers, Inc.}
31 | }
32 | @article{nguyen2010estimating,
33 | title={Estimating divergence functionals and the likelihood ratio by convex risk minimization},
34 | author={Nguyen, XuanLong and Wainwright, Martin J and Jordan, Michael I},
35 | journal={IEEE Transactions on Information Theory},
36 | volume={56},
37 | number={11},
38 | pages={5847--5861},
39 | year={2010},
40 | publisher={IEEE}
41 | }
42 | @article{topsoe2000some,
43 | title={Some inequalities for information divergence and related measures of discrimination},
44 | author={Topsoe, Flemming},
45 | journal={IEEE Transactions on information theory},
46 | volume={46},
47 | number={4},
48 | pages={1602--1609},
49 | year={2000},
50 | publisher={IEEE}
51 | }
52 | @article{gretton2012kernel,
53 | title={A kernel two-sample test},
54 | author={Gretton, Arthur and Borgwardt, Karsten M and Rasch, Malte J and Sch{\"o}lkopf, Bernhard and Smola, Alexander},
55 | journal={Journal of Machine Learning Research},
56 | volume={13},
57 | number={Mar},
58 | pages={723--773},
59 | year={2012}
60 | }
61 | @article{webb2016characterizing,
62 | title={Characterizing concept drift},
63 | author={Webb, Geoffrey I and Hyde, Roy and Cao, Hong and Nguyen, Hai Long and Petitjean, Francois},
64 | journal={Data Mining and Knowledge Discovery},
65 | volume={30},
66 | number={4},
67 | pages={964--994},
68 | year={2016},
69 | publisher={Springer}
70 | }
71 | @misc{vherrmann,
72 | author={Vincent Herrmann},
73 | title={Wasserstein GAN and the Kantorovich-Rubinstein Duality},
74 | month={February},
75 | year={2017},
76 | url={https://vincentherrmann.github.io/blog/wasserstein/}
77 | }
78 | @article{bellemare2017cramer,
79 | title={The cramer distance as a solution to biased wasserstein gradients},
80 | author={Bellemare, Marc G and Danihelka, Ivo and Dabney, Will and Mohamed, Shakir and Lakshminarayanan, Balaji and Hoyer, Stephan and Munos, R{\'e}mi},
81 | journal={arXiv preprint arXiv:1705.10743},
82 | year={2017}
83 | }
84 | @inproceedings{gulrajani2017improved,
85 | title={Improved training of wasserstein gans},
86 | author={Gulrajani, Ishaan and Ahmed, Faruk and Arjovsky, Martin and Dumoulin, Vincent and Courville, Aaron C},
87 | booktitle={Advances in neural information processing systems},
88 | pages={5767--5777},
89 | year={2017}
90 | }
91 | @article{arjovsky2017wasserstein,
92 | title={Wasserstein gan},
93 | author={Arjovsky, Martin and Chintala, Soumith and Bottou, L{\'e}on},
94 | journal={arXiv preprint arXiv:1701.07875},
95 | year={2017}
96 | }
97 | @phdthesis{tropp2004topics,
98 | title={Topics in sparse approximation},
99 | author={Tropp, Joel Aaron},
100 | school={University of Texas at Austin},
101 | year={2004}
102 | }
103 | @inproceedings{anil2019sorting,
104 | title={Sorting out Lipschitz function approximation},
105 | author={Anil, Cem and Lucas, James and Grosse, Roger},
106 | booktitle={International Conference on Machine Learning},
107 | pages={291--301},
108 | year={2019},
109 | organization={PMLR}
110 | }
111 | @article{sobol2001global,
112 | title={Global sensitivity indices for nonlinear mathematical models and their Monte Carlo estimates},
113 | author={Sobol, Ilya M},
114 | journal={Mathematics and computers in simulation},
115 | volume={55},
116 | number={1-3},
117 | pages={271--280},
118 | year={2001},
119 | publisher={Elsevier}
120 | }
121 | @book{saltelli2008global,
122 | title={Global sensitivity analysis: the primer},
123 | author={Saltelli, Andrea and Ratto, Marco and Andres, Terry and Campolongo, Francesca and Cariboni, Jessica and Gatelli, Debora and Saisana, Michaela and Tarantola, Stefano},
124 | year={2008},
125 | publisher={John Wiley \& Sons}
126 | }
127 | @article{im1993sensitivity,
128 | title={Sensitivity estimates for nonlinear mathematical models},
129 | author={IM, Sobol’},
130 | journal={Math. Model. Comput. Exp},
131 | volume={1},
132 | number={4},
133 | pages={407--414},
134 | year={1993}
135 | }
136 | @article{reid2009generalised,
137 | title={Generalised pinsker inequalities},
138 | author={Reid, Mark D and Williamson, Robert C},
139 | journal={arXiv preprint arXiv:0906.1244},
140 | year={2009}
141 | }
142 | @article{lin1991divergence,
143 | title={Divergence measures based on the Shannon entropy},
144 | author={Lin, Jianhua},
145 | journal={IEEE Transactions on Information theory},
146 | volume={37},
147 | number={1},
148 | pages={145--151},
149 | year={1991},
150 | publisher={IEEE}
151 | }
152 | @techreport{domingos2000decomp,
153 | author={Domingos, Pedro},
154 | title={A Unified Bias-Variance Decomposition and its Applications},
155 | institution={University of Washington},
156 | address={Seattle, WA},
157 | month={January},
158 | year={2000},
159 | url={https://homes.cs.washington.edu/~pedrod/papers/mlc00a.pdf}
160 | }
161 | @misc{mlxtenddecomp,
162 | author={Sebastian Raschka},
163 | title={bias_variance_decomp: Bias-variance decomposition for classification and regression losses},
164 | year={2014-2023},
165 | url={https://rasbt.github.io/mlxtend/user_guide/evaluate/bias_variance_decomp/}
166 | }
--------------------------------------------------------------------------------
/mvtk/bias_variance/bias_variance_parallel.py:
--------------------------------------------------------------------------------
1 | import ray
2 | import numpy as np
3 | import public
4 |
5 | from sklearn.utils import resample
6 |
7 | from . import bias_variance_mse, get_values, train_and_predict
8 |
9 |
10 | def _prepare_X_and_y(X_train_values, y_train_values, prepare_X, prepare_y_train):
11 | return prepare_X(X_train_values), prepare_y_train(y_train_values)
12 |
13 |
14 | @public.add
15 | def bias_variance_compute_parallel(
16 | estimator,
17 | X_train,
18 | y_train,
19 | X_test,
20 | y_test,
21 | prepare_X=lambda x: x,
22 | prepare_y_train=lambda x: x,
23 | iterations=200,
24 | random_state=None,
25 | decomp_fn=bias_variance_mse,
26 | fit_kwargs=None,
27 | predict_kwargs=None,
28 | ):
29 | r"""Compute the bias-variance decomposition in parallel
30 |
31 | Args:
32 | estimator (EstimatorWrapper): estimator wrapped with a class extending
33 | EstimatorWrapper
34 | X_train: features for training
35 | y_train: ground truth labels for training
36 | X_test: features for testing
37 | y_test: ground truth labels for testing
38 | prepare_X (function, optional): function to transform feature datasets before
39 | calling fit and predict methods
40 | prepare_y_train (function, optional): function to transform training ground
41 | truth labels before calling fit method
42 | iterations (int, optional): number of iterations for the training/testing
43 | random_state (int, optional): random state for bootstrap sampling
44 | decomp_fn (function, optional): bias-variance decomposition function
45 | fit_kwargs (dict, optional): kwargs to pass to the fit method
46 | predict_kwargs (dict, optional): kwargs to pass to the predict method
47 |
48 | Returns:
49 | (average loss, average bias, average variance, net variance)"""
50 | if predict_kwargs is None:
51 | predict_kwargs = {}
52 | if fit_kwargs is None:
53 | fit_kwargs = {}
54 |
55 | if isinstance(random_state, int):
56 | random_state = np.random.RandomState(seed=random_state)
57 |
58 | X_train_values = get_values(X_train)
59 | y_train_values = get_values(y_train)
60 | X_test_values = get_values(X_test)
61 | X_test_prepared = prepare_X(X_test_values)
62 |
63 | if random_state is None:
64 | result = [
65 | bootstrap_train_and_predict_ray.remote(
66 | estimator,
67 | X_train_values,
68 | y_train_values,
69 | X_test_prepared,
70 | prepare_X,
71 | prepare_y_train,
72 | fit_kwargs,
73 | predict_kwargs,
74 | )
75 | for _ in range(iterations)
76 | ]
77 | else:
78 | result = [
79 | train_and_predict_ray.remote(
80 | estimator,
81 | *_prepare_X_and_y(
82 | *resample(
83 | X_train_values, y_train_values, random_state=random_state
84 | ),
85 | prepare_X,
86 | prepare_y_train
87 | ),
88 | X_test_prepared,
89 | fit_kwargs,
90 | predict_kwargs
91 | )
92 | for _ in range(iterations)
93 | ]
94 |
95 | predictions = np.array(ray.get(result))
96 |
97 | y_test_values = get_values(y_test)
98 |
99 | return decomp_fn(predictions, y_test_values)
100 |
101 |
102 | @ray.remote
103 | def train_and_predict_ray(
104 | estimator,
105 | X_train_values,
106 | y_train_values,
107 | X_test_prepared,
108 | fit_kwargs=None,
109 | predict_kwargs=None,
110 | ):
111 | r"""Train an estimator and get predictions from it
112 |
113 | Args:
114 | estimator (EstimatorWrapper): estimator wrapped with a class extending
115 | EstimatorWrapper
116 | X_train_values: numpy array of features for training
117 | y_train_values: numpy array of ground truth labels for training
118 | X_test_prepared: features for testing which has been processed by prepare_X
119 | function
120 | fit_kwargs (dict, optional): kwargs to pass to the fit method
121 | predict_kwargs (dict, optional): kwargs to pass to the predict method
122 |
123 | Returns:
124 | predictions"""
125 | return train_and_predict(
126 | estimator,
127 | X_train_values,
128 | y_train_values,
129 | X_test_prepared,
130 | fit_kwargs=fit_kwargs,
131 | predict_kwargs=predict_kwargs,
132 | )
133 |
134 |
135 | @ray.remote
136 | def bootstrap_train_and_predict_ray(
137 | estimator,
138 | X_train_values,
139 | y_train_values,
140 | X_test_prepared,
141 | prepare_X=lambda x: x,
142 | prepare_y_train=lambda x: x,
143 | fit_kwargs=None,
144 | predict_kwargs=None,
145 | ):
146 | r"""Train an estimator using a bootstrap sample of the training data and get
147 | predictions from it
148 |
149 | Args:
150 | estimator (EstimatorWrapper): estimator wrapped with a class extending
151 | EstimatorWrapper
152 | X_train_values: numpy array of features for training
153 | y_train_values: numpy array of ground truth labels for training
154 | X_test_prepared: features for testing which has been processed by prepare_X
155 | function
156 | prepare_X (function, optional): function to transform feature datasets before
157 | calling fit and predict methods
158 | prepare_y_train (function, optional): function to transform train ground truth
159 | labels before calling fit method
160 | fit_kwargs (dict, optional): kwargs to pass to the fit method
161 | predict_kwargs (dict, optional): kwargs to pass to the predict method
162 |
163 | Returns:
164 | predictions"""
165 | if predict_kwargs is None:
166 | predict_kwargs = {}
167 | if fit_kwargs is None:
168 | fit_kwargs = {}
169 |
170 | X_sample, y_sample = resample(X_train_values, y_train_values)
171 |
172 | return train_and_predict(
173 | estimator,
174 | X_sample,
175 | y_sample,
176 | X_test_prepared,
177 | prepare_X,
178 | prepare_y_train,
179 | fit_kwargs,
180 | predict_kwargs,
181 | )
182 |
--------------------------------------------------------------------------------
/docs/credibility_user_guide.rst:
--------------------------------------------------------------------------------
1 | ###########
2 | Credibility User Guide
3 | ###########
4 |
5 | **********
6 | Motivation
7 | **********
8 |
9 | Let's say we are training a model for medical diagnoses. Missing false negatives
10 | is important and we have a hard requirement that a model's recall (proportion
11 | of positive instances identified) must not fall below 70%. If someone validates
12 | a model and reports a recall of 80%, are we clear? Well, maybe. It turns out
13 | this data scientist had a validation set with 5 positive instances. The model
14 | correctly identified 4 of them, giving it a recall of 80%. Would you trust
15 | that? Of course not! You say that a larger sample size is needed. "How many do we
16 | need?" they ask. This module will help answer that question.
17 |
18 | How?
19 | ====
20 |
21 | There's two schools of thought for this problem. The `frequentist
22 | `_ and the
23 | `Bayesian `_ approaches.
24 | In practice they tend to give similar results. Going back to our 5 sample
25 | validation set, the frequentist would be concerned with how much our recall
26 | would be expected to vary from one 5 sample hold out set to another. They would
27 | want the hold out set to be large enough that you would not expect much change
28 | in the estimated recall from one hold out set to another. The Bayesian approach
29 | seeks to directly identify the probability that the recall would be lower than
30 | 70% if the validation set were infinitely large. We believe this is a better
31 | representation of the problem at hand, and designed the library around this
32 | Bayesian approach.
33 |
34 |
35 | ***********
36 | Beta Distributions
37 | ***********
38 |
39 | Probability of Low Performance
40 | =================
41 |
42 | .. currentmodule:: mvtk.credibility
43 |
44 | If you flip a coin 100 times, and it comes up heads 99 times, would you suspect
45 | a biased coin? Probably. What about if you flipped it 5 times and saw 4 heads.
46 | This is much less strange. Determining the bias of a coin embodies the core
47 | principles behind determining whether many performance metrics are unacceptably
48 | low.
49 |
50 | If the coin *is* biased, how biased is it? In general, we'd say there's some
51 | probability distribution over all possible biases. We would generally use a
52 | `beta distribution `_ to
53 | model this distribution for good reasons. This distribution has two free
54 | parameters: the number of heads and the number of tails. However, we generally
55 | offset both of those numbers by 1 so the distribution for observed flips is
56 | :math:`B(1, 1)` (with :math:`B` representing our beta distribution as a
57 | function of heads and tails plus respective offsets), which as it turns out is
58 | exactly a uniform distribution over all possible biases. In this sense, we can
59 | express total uncertainty before taking measurements. The beta distribution
60 | becomes more concentrated around the empirical proportion of heads as you take
61 | more and more measurements. If, we were reasonably certain of a 60% bias, we
62 | might offset the number of heads with a 6 and the number of tails with a 4.
63 | Then we would start to expect an unbiased coin after observing 2 tails. This
64 | offset is called the *prior* in Bayesian inference, and represents our
65 | understanding before making any observations.
66 |
67 | .. math::
68 | B(\alpha, \beta)
69 |
70 | .. figure:: images/Beta_distribution_pdf.svg
71 | :width: 800px
72 | :align: center
73 | :height: 400px
74 | :alt: alternate text
75 | :figclass: align-center
76 |
77 | Beta distribution for different :math:`alpha` (for heads plus offset) and
78 | :math:`\beta` (tails plus offset).
79 |
80 | We integrate the area under :math:`B(\alpha,\beta)` from 0 to
81 | :math:`p` to determine the probability that a coin's bias is less
82 | than :math:`p`. This is effectively how :meth:`prob_below` works.
83 |
84 |
85 | Credible Intervals
86 | =================
87 |
88 | Sometimes you just want a general sense of uncertainty for your sample
89 | estimates. We use :meth:`credible_interval` to compute a `credible interval `_. This will give you the
90 | smallest interval for which there is a `credibility` (kwarg argument that
91 | defaults to :math:`0.5`) chance of the bias being within that region. It will
92 | return a lower bound no less than :math:`0` and an upper bound no greater than :math:`1`.
93 | This is subtly different from frequentist `confidence intervals
94 | `_. In our 5 sample
95 | example, the latter reports an interval that is expected to contain `p` (often
96 | chosen to be 95%) all such 5 sample estimates of the mean.
97 |
98 | **********
99 | Common Metrics
100 | **********
101 | Many performance metrics used for binary
102 | classification follow the same mechanics as the
103 | analysis above. This following is not an exhaustive
104 | list of performance metrics that can be readily
105 | translated into a biased coin scenario in which we
106 | wish to determine heads / (heads + tails).
107 |
108 | * Precision: true positive / (true positive + false positive)
109 | * Recall: true positive / (true positive + false negative)
110 | * Accuracy: correctly identified / (correctly identified + incorrectly identified)
111 |
112 |
113 | ROC AUC
114 | =================
115 |
116 | `ROC AUC
117 | `_
118 | is an extremely useful measure for binary classification. Like many
119 | other measures of performance for binary classification, it can be
120 | expressed as a proportion of outcomes. However,
121 | unlike other measures of performance, it does not
122 | make use of a threshold. This ultimately makes it a
123 | ranking metric, as it characterizes the degree to
124 | which positive instances are scored higher than
125 | negative instances. However, like other metrics, it
126 | can be expressed as an empirical measure of a
127 | proportion. Specifically, ROC AUC is the proportion
128 | of pairs of positive and negative examples such
129 | that the positive example is scored higher than the
130 | negative one. This can be expressed as
131 |
132 | .. math::
133 | \frac{1}{NM}\sum\limits_{n,m}^{N,M} \mathrm{score}(\mathrm{Positive}_n) > \mathrm{score}(\mathrm{Negative}_m)
134 |
135 | However, computing the area under the receiver
136 | operating characteristic is a more computationally
137 | efficient means of computing the same quantity.
138 | :meth:`roc_auc_preprocess` will convert a positive and negative
139 | sample count to an associated count of correctly and incorrectly
140 | ranked pairs of positive and negative instances using the ROC AUC
141 | score. This pair of numbers can be used as arguments for
142 | :meth:`prob_below` and :meth:`credible_interval`.
143 |
144 | .. topic:: Tutorials:
145 |
146 | * :doc:`Credibility `
147 |
--------------------------------------------------------------------------------
/tests/bias_variance/estimators/test_pytorch_estimator_wrapper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import torch
3 | from torch import nn
4 |
5 | from mvtk.bias_variance.estimators import PyTorchEstimatorWrapper
6 |
7 |
8 | class ModelPyTorch(nn.Module):
9 | def __init__(self):
10 | super().__init__()
11 | self.linear1 = nn.Linear(2, 8)
12 | self.linear2 = nn.Linear(8, 1)
13 |
14 | def forward(self, x):
15 | x = self.linear1(x)
16 | x = self.linear2(x)
17 | return x
18 |
19 |
20 | def create_data():
21 | X_train = np.arange(12).reshape(6, 2)
22 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
23 | X_test = np.arange(6).reshape(3, 2)
24 | y_test = np.array([0, 1, 1])
25 |
26 | return X_train, y_train, X_test, y_test
27 |
28 |
29 | def create_model():
30 | model_pytorch = ModelPyTorch()
31 | optimizer = torch.optim.Adam(model_pytorch.parameters(), lr=0.001)
32 | loss_fn = nn.MSELoss()
33 |
34 | return model_pytorch, optimizer, loss_fn
35 |
36 |
37 | def optimizer_gen(x):
38 | return torch.optim.Adam(x.parameters(), lr=0.001)
39 |
40 |
41 | def reset_parameters(x):
42 | if hasattr(x, "reset_parameters"):
43 | x.reset_parameters()
44 |
45 |
46 | def fit(estimator, optimizer, loss_fn, X, y, epochs=10, batch_size=None):
47 | for i in range(epochs):
48 | if batch_size is None:
49 | batch_size = len(y)
50 | for j in range(0, len(y), batch_size):
51 | batch_start = j
52 | batch_end = j + batch_size
53 | X_batch = X[batch_start:batch_end]
54 | y_batch = y[batch_start:batch_end]
55 | prediction = estimator(X_batch)
56 | loss = loss_fn(prediction, y_batch)
57 |
58 | optimizer.zero_grad()
59 | loss.backward()
60 | optimizer.step()
61 |
62 |
63 | def custom_fit(self, X, y, epochs=10, batch_size=None):
64 | for i in range(epochs):
65 | if batch_size is None:
66 | batch_size = len(y)
67 | for j in range(0, len(y), batch_size):
68 | batch_start = j
69 | batch_end = j + batch_size
70 | X_batch = X[batch_start:batch_end]
71 | y_batch = y[batch_start:batch_end]
72 | prediction = self.estimator(X_batch)
73 | loss = self.loss_fn(prediction, y_batch)
74 |
75 | self.optimizer.zero_grad()
76 | loss.backward()
77 | self.optimizer.step()
78 |
79 |
80 | def predict(estimator, X, custom_test=False):
81 | if custom_test:
82 | return [1, 0, 1]
83 |
84 | prediction_list = []
85 | with torch.no_grad():
86 | for value in X:
87 | prediction = estimator(value)
88 | if len(prediction) > 1:
89 | prediction_list.append(prediction.argmax().item())
90 | else:
91 | prediction_list.append(prediction.item())
92 | return prediction_list
93 |
94 |
95 | def custom_predict(estimator, X):
96 | return [1, 0, 1]
97 |
98 |
99 | def test_pytorch_estimator_wrapper():
100 | torch.use_deterministic_algorithms(True)
101 |
102 | X_train, y_train, X_test, y_test = create_data()
103 |
104 | X_train_torch = torch.FloatTensor(X_train)
105 | X_test_torch = torch.FloatTensor(X_test)
106 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
107 |
108 | torch.manual_seed(123)
109 | model, optimizer, loss_fn = create_model()
110 |
111 | model.apply(reset_parameters)
112 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=100)
113 | pred = predict(model, X_test_torch)
114 |
115 | torch.manual_seed(123)
116 | model_test, optimizer_test, loss_fn_test = create_model()
117 | model_wrapped = PyTorchEstimatorWrapper(model_test, optimizer_gen, loss_fn_test)
118 |
119 | model_wrapped.fit(X_train_torch, y_train_torch)
120 | pred_wrapped = model_wrapped.predict(X_test_torch)
121 |
122 | assert np.array_equal(pred, pred_wrapped)
123 |
124 |
125 | def test_pytorch_estimator_wrapper_kwargs_fit():
126 | torch.use_deterministic_algorithms(True)
127 |
128 | X_train, y_train, X_test, y_test = create_data()
129 |
130 | X_train_torch = torch.FloatTensor(X_train)
131 | X_test_torch = torch.FloatTensor(X_test)
132 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
133 |
134 | torch.manual_seed(123)
135 | model, optimizer, loss_fn = create_model()
136 |
137 | model.apply(reset_parameters)
138 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=5)
139 | pred = predict(model, X_test_torch)
140 |
141 | torch.manual_seed(123)
142 | model_test, optimizer_test, loss_fn_test = create_model()
143 | model_wrapped = PyTorchEstimatorWrapper(model_test, optimizer_gen, loss_fn_test)
144 |
145 | model_wrapped.fit(X_train_torch, y_train_torch, epochs=5)
146 | pred_wrapped = model_wrapped.predict(X_test_torch)
147 |
148 | assert np.array_equal(pred, pred_wrapped)
149 |
150 |
151 | def test_pytorch_estimator_wrapper_custom_fit():
152 | torch.use_deterministic_algorithms(True)
153 |
154 | X_train, y_train, X_test, y_test = create_data()
155 |
156 | X_train_torch = torch.FloatTensor(X_train)
157 | X_test_torch = torch.FloatTensor(X_test)
158 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
159 |
160 | torch.manual_seed(123)
161 | model, optimizer, loss_fn = create_model()
162 |
163 | model.apply(reset_parameters)
164 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=10)
165 | pred = predict(model, X_test_torch)
166 |
167 | torch.manual_seed(123)
168 | model_test, optimizer_test, loss_fn_test = create_model()
169 | model_wrapped = PyTorchEstimatorWrapper(
170 | model_test, optimizer_gen, loss_fn_test, fit_fn=custom_fit
171 | )
172 |
173 | model_wrapped.fit(X_train_torch, y_train_torch)
174 | pred_wrapped = model_wrapped.predict(X_test_torch)
175 |
176 | assert np.array_equal(pred, pred_wrapped)
177 |
178 |
179 | def test_pytorch_estimator_wrapper_custom_predict():
180 | torch.use_deterministic_algorithms(True)
181 |
182 | X_train, y_train, X_test, y_test = create_data()
183 |
184 | X_train_torch = torch.FloatTensor(X_train)
185 | X_test_torch = torch.FloatTensor(X_test)
186 | y_train_torch = torch.FloatTensor(y_train).reshape(-1, 1)
187 |
188 | torch.manual_seed(123)
189 | model, optimizer, loss_fn = create_model()
190 |
191 | model.apply(reset_parameters)
192 | fit(model, optimizer, loss_fn, X_train_torch, y_train_torch, epochs=100)
193 | pred = predict(model, X_test_torch, custom_test=True)
194 |
195 | torch.manual_seed(123)
196 | model_test, optimizer_test, loss_fn_test = create_model()
197 | model_wrapped = PyTorchEstimatorWrapper(
198 | model_test, optimizer_gen, loss_fn_test, predict_fn=custom_predict
199 | )
200 |
201 | model_wrapped.fit(X_train_torch, y_train_torch)
202 | pred_wrapped = model_wrapped.predict(X_test_torch)
203 |
204 | assert np.array_equal(pred, pred_wrapped)
205 |
--------------------------------------------------------------------------------
/mvtk/thresholding.py:
--------------------------------------------------------------------------------
1 | import public
2 | import bisect
3 | import numpy
4 | import matplotlib.pylab as plt
5 |
6 | from functools import reduce
7 |
8 |
9 | @public.add
10 | def plot_err(scores, utility_mean, utility_err, color=None, label=None, alpha=0.5):
11 | plt.plot(scores, utility_mean, color=color)
12 | plt.fill_between(scores, *utility_err, alpha=alpha, color=color, label=label)
13 |
14 |
15 | @public.add
16 | def expected_utility(utility, data, N=4096, credibility=0.5):
17 | """Get the utility distribution over possible thresholds.
18 |
19 | Args:
20 | utility (function): utility function that ingests true/false
21 | positive/negative rates.
22 | data (list-like): iterable of list-likes of the form (ground truth,
23 | score). Feedback is null when an alert is not triggered.
24 | credibility (float): Credibility level for a credible interval. This
25 | interval will be centered about the mean and have a `credibility`
26 | chance of containing the true utility.
27 |
28 | returns:
29 | tuple of three elements:
30 | - candidate thresholds
31 | - mean expected utility
32 | - upper and lower quantile of estimate of expected utility associated
33 | with each threshold
34 | """
35 | credibility /= 2
36 | scores, utilities = sample_utilities(utility, data, N=N)
37 | low = int(N * credibility)
38 | high = int(N * (1 - credibility))
39 | utilities = numpy.asarray(utilities)
40 | utilities.sort(axis=1)
41 | return scores, utilities.mean(1), numpy.asarray(utilities[:, [low, high]]).T
42 |
43 |
44 | @public.add
45 | def optimal_threshold(utility, data, N=4096):
46 | scores, utilities = sample_utilities(utility, data, N=N)
47 | means = utilities.mean(1)
48 | idx = means.argmax()
49 | return scores[idx], means[idx]
50 |
51 |
52 | @public.add
53 | def sample_utilities(utility, data, N=4096):
54 | """Get distribution of utilities.
55 |
56 | Args:
57 | utility (float): utility function that ingests true/false
58 | positive/negative rates.
59 | data (list-like): iterable of of iterables of the form (ground truth, score).
60 | Feedback is null when an alert is not triggered.
61 |
62 | returns: thresholds, utilities
63 | """
64 | if not len(data):
65 | return data, numpy.asarray([])
66 | nprng = numpy.random.RandomState(0)
67 | data = numpy.asarray(data)
68 | num_positives = data[:, 0].sum()
69 | rates = [1 + num_positives, 1 + len(data) - num_positives, 1, 1]
70 | utilities = []
71 | data = data[numpy.argsort(data[:, 1])]
72 | for ground_truth, score in data:
73 | update_rates(rates, ground_truth)
74 | utilities.append(utility(*nprng.dirichlet(rates, size=N).T))
75 | return data[:, 1], numpy.asarray(utilities)
76 |
77 |
78 | @public.add
79 | def thompson_sample(utility, data, N=1024, quantile=False):
80 | scores, utilities = sample_utilities(utility, data, N)
81 | if quantile:
82 | return utilities.argmax(axis=0) / (len(utilities) - 1)
83 | return scores[utilities.argmax(axis=0)]
84 |
85 |
86 | @public.add
87 | def update_rates(rates, ground_truth):
88 | rates[0] -= ground_truth
89 | rates[1] -= not ground_truth
90 | rates[2] += not ground_truth
91 | rates[3] += ground_truth
92 |
93 |
94 | @public.add
95 | class AdaptiveThreshold:
96 | """Adaptive agent that balances exploration with exploitation with respect
97 | to setting and adjusting thresholds.
98 |
99 | When exploring, the threshold is 0, effectively letting anything
100 | through. This produces unbiased data that can then be used to set a
101 | more optimal threshold in subsequent rounds. The agent seeks to
102 | balance the opportunity cost of running an experiment with the
103 | utility gained over subsequent rounds using the information gained
104 | from this experiment.
105 | """
106 |
107 | def __init__(self, utility):
108 | """
109 | Args:
110 | utility (function): Function that takes in true/false
111 | positive/negative rates. Specifically (tp, fp, tn fn) -> float
112 | representing utility."""
113 |
114 | self.utility = utility
115 | self.results = []
116 | self.unbiased_positives = 1
117 | self.unbiased_negatives = 1
118 | self.previous_threshold = 0
119 | self.nprng = numpy.random.RandomState(0)
120 |
121 | def get_best_threshold(self):
122 | # true positives, false positives, true negatives, false negatives
123 | rates = [self.unbiased_positives, self.unbiased_negatives, 1, 1]
124 | experiment_utility = self.utility(*self.nprng.dirichlet(rates))
125 | hypothetical_rates = [
126 | self.unbiased_positives - self.last_experiment_outcome,
127 | self.unbiased_negatives - (1 - self.last_experiment_outcome),
128 | 1,
129 | 1,
130 | ]
131 | best_hypothetical_utility = -numpy.inf
132 | best_utility = -numpy.inf
133 | for score, ground_truth, idx in self.results:
134 | update_rates(rates, ground_truth)
135 | utility = self.utility(*self.nprng.dirichlet(rates))
136 | if utility > best_utility:
137 | best_utility = utility
138 | best_threshold = score
139 | if idx >= self.last_experiment_idx:
140 | continue
141 | update_rates(hypothetical_rates, ground_truth)
142 | hypothetical_utility = self.utility(
143 | *self.nprng.dirichlet(hypothetical_rates)
144 | )
145 | if hypothetical_utility > best_hypothetical_utility:
146 | best_hypothetical_utility = hypothetical_utility
147 | hindsight_utility = utility
148 | return best_threshold, experiment_utility, best_utility, hindsight_utility
149 |
150 | def __call__(self, ground_truth, score):
151 | """Args are ignored if previous threshold was not 0. Otherwise, the
152 | score is added as a potential threhold and ground_truth noted to help
153 | identify the optimal threshold.
154 |
155 | Args:
156 | ground_truth (bool)
157 | score (float)
158 | """
159 | idx = len(self.results)
160 | if self.previous_threshold == 0:
161 | bisect.insort(self.results, (score, ground_truth, idx))
162 | self.unbiased_positives += ground_truth
163 | self.unbiased_negatives += 1 - ground_truth
164 | self.last_experiment_idx = idx
165 | self.last_experiment_outcome = ground_truth
166 | if len(self.results) < 2:
167 | return self.previous_threshold
168 | (
169 | best_threshold,
170 | experiment_utility,
171 | best_utility,
172 | hindsight_utility,
173 | ) = self.get_best_threshold()
174 | total_utility_gained = (best_utility - hindsight_utility) * (
175 | idx - self.last_experiment_idx
176 | )
177 | opportunity_cost = hindsight_utility - experiment_utility
178 | if opportunity_cost <= total_utility_gained:
179 | self.previous_threshold = 0
180 | else:
181 | self.previous_threshold = best_threshold
182 | return self.previous_threshold
183 |
184 |
185 | @public.add
186 | def exploration_proportion(thresholds, N):
187 | exploration = thresholds == 0
188 | alpha = 1 - 1.0 / N
189 | return reduce(
190 | lambda accum, elem: accum + [accum[-1] * alpha + elem * (1 - alpha)],
191 | exploration[N:],
192 | [exploration[:N].mean()],
193 | )
194 |
--------------------------------------------------------------------------------
/docs/thresholding_user_guide.rst:
--------------------------------------------------------------------------------
1 | ###########
2 | Thresholding User Guide
3 | ###########
4 |
5 | **********
6 | Motivation
7 | **********
8 |
9 | Let's say you're monitoring some process for alerts. Maybe it's model
10 | performance. Maybe it's model drift. In any case, let's say you have a score
11 | that increases with the likelihood that you have something wrong that needs to be
12 | investigated. You still need to decide whether to actually launch an
13 | investigation or not for each of these scores. This is known as thresholding.
14 | But where to put the threshold? Set it too high and you'll miss important
15 | alerts. Set it too low and you'll be flooded with noise. This module comes with
16 | tools and techniques to experimentally determine where to set your threshold
17 | given your tolerance for noise.
18 |
19 | How?
20 | ====
21 |
22 | Let's say the scores associated with good alerts looks like this.
23 |
24 | .. figure:: images/thresholding_positive_scores.png
25 | :width: 500px
26 | :align: center
27 | :height: 500px
28 | :alt: alternate text
29 | :figclass: align-center
30 |
31 | Moreover, scores associated with negative alerts look like this.
32 |
33 | .. figure:: images/thresholding_negative_scores.png
34 | :width: 500px
35 | :align: center
36 | :height: 500px
37 | :alt: alternate text
38 | :figclass: align-center
39 |
40 | Clearly the likelihood of finding a good alert increases with model score, but
41 | any choice will imply a trade off between true/false positive/negatives. In
42 | general, you need to decide on a utility function of true/false
43 | positive/negatives.
44 |
45 | .. code-block:: python
46 | def utility(tp, fp, tn, fn):
47 | return tp - 20 * fn - fp
48 |
49 | The utility function would increase with true positives and/or true negatives,
50 | and decrease with false positives and/or false negatives. A risk averse utility
51 | function is shown above with a 20 fold preference of avoiding false negatives
52 | to false positives. In general, we will assume the utility function is a
53 | *proportion* of true/false positive/negatives in a data set. In this sense, the
54 | utility function is a function of a categorical distribution over true/false
55 | positives/negatives.
56 |
57 | Now that we have a utility function, and a sample of positive and negative alert
58 | scores, we can plot a utility function as a function of threshold.
59 |
60 | .. figure:: images/thresholding_expected_utility.png
61 | :width: 500px
62 | :align: center
63 | :height: 400px
64 | :alt: alternate text
65 | :figclass: align-center
66 |
67 | Expected utility as a function of threshold (solid) and 50%
68 | `credible interval
69 | `_ (shaded
70 | region).
71 |
72 | Note that we don't actually have the true distribution of positive
73 | and negative scores in practice. Rather, we have examples. If we
74 | only had 4 positive scores and 4 negative scores, we cannot be very
75 | certain of its results. More on this in the `credibility user guide
76 | `__. We model the distribution of true/false
77 | positive/negatives as a `Dirichlet-multinomial distribution
78 | `_ with
79 | a `maximum entropy prior
80 | `_.
81 |
82 | This shows a particularly apparent peak in utility, but only after (in this
83 | case) a few thousand example scores. In practice, we could well be starting
84 | with *no* examples and building up our knowledge as we go. To make things
85 | worse, we will only find out if an alert was good or not if we investigate it.
86 | Anything that falls below our threshold forever remains unlabeled. We developed
87 | a specific algorithm to tackle this problem that we call *adaptive
88 | thresholding*.
89 |
90 | ***********
91 | Adaptive Thresholding
92 | ***********
93 |
94 | We face a classic `exploitation/exploration dilemma
95 | `_. We can either choose
96 | to *exploit* the information we have so far about positive and negative score
97 | distributions to set a threshold or *explore* what may lie below that threshold
98 | by labeling whatever comes in next. Unfortunately, the labels obtained from
99 | scores greater than a threshold chosen at the time pose a challenge in that
100 | they yield heavily biased estimates of positive and negative score
101 | distributions (since they don't include anything below the threshold set at the
102 | time). We have not found a good way to compensate for that bias in practice.
103 | Rather, we must switch between an optimally set threshold and labeling
104 | whatever comes next. This produces a series of *unbiased labels*.
105 |
106 | Our adaptive thresholding algorithm seeks to balance the
107 | opportunity cost of labeling data with the utility gained over subsequent
108 | rounds with the change in threshold. Each score with an unbiased label is a
109 | potential threshold. For each of those options, we sample a possible
110 | distribution of true/false positives/negatives (with a Dirichlet-multinomial
111 | distribution with a maximum entropy prior) using the other unbiased labels.
112 | Utilities are calculated for each sampled distribution for true/false
113 | positives/negatives. The highest utility is noted as well as the utility of
114 | setting the threshold to 0 (exploration). Next this process is repeated using
115 | all but the most recent unbiased label. We locate the optimal threshold
116 | computed using all but the most recent unbiased label, and then compute the
117 | utility of that threshold using the utilities calculated using *all* unbiased
118 | labels. The difference between this utility and the utility of the true optimal
119 | threshold is the expected utility gained from the last round of exploration.
120 | This expected utility gained per round times the number of rounds since the
121 | last round of exploration is the net utility gained since the last round of
122 | experimentation. Meanwhile the difference between the utility of the true
123 | optimal threshold and the utility of exploration is the opportunity cost of
124 | exploration. When the net utility gained exceeds the opportunity cost of
125 | exploration, exploration is chosen over exploitation.
126 |
127 | Note that we stochastically sample utilities at the score associated with each
128 | unbiased label at each round. This is necessary to prevent deadlocks in which
129 | the optimal threshold is identical before and after experimentation, leaving
130 | the expected utility gained per round 0 forever (thus ending any possibility of
131 | subsequent rounds of exploration). Rather, exploration is chosen according to
132 | the *probability* that net utility gained has in fact caught up with the
133 | opportunity cost of the last round of exploration.
134 |
135 | However, as we gain a more accurate picture of the distribution of positive and
136 | negative scores, we make smaller changes to our best guess at the location of
137 | the optimal threshold after exploration. As a result, the expected utility
138 | gained per round of exploitation will gradually decrease over time, and we will
139 | need more and more rounds of exploitation to make up for the opportunity cost
140 | of exploration (shown below).
141 |
142 | .. figure:: images/thresholding_exploration_proportion.png
143 | :width: 500px
144 | :align: center
145 | :height: 500px
146 | :alt: alternate text
147 | :figclass: align-center
148 |
149 | Probability of chosing exploration decreases from about 45% at the
150 | beginning to about 5% after 3600 rounds.
151 |
152 |
153 | .. topic:: Tutorials:
154 |
155 | * :doc:`Thresholding `
156 |
157 | .. bibliography:: refs.bib
158 | :cited:
159 |
--------------------------------------------------------------------------------
/mvtk/supervisor/divergence/generators.py:
--------------------------------------------------------------------------------
1 | import numpy
2 | import public
3 |
4 | from collections import defaultdict
5 | from functools import reduce
6 |
7 |
8 | @public.add
9 | def js_data_stream(
10 | nprng, batch_size, sample_distributions, categorical_columns=tuple()
11 | ):
12 | r"""Data stream generator for Jensen-Shannon divergence of N distributions.
13 | Jensen-Shannon divergence measures the information of knowing which of
14 | those N distributions a sample will be drawn from before it is drawn. So if
15 | we rolled a fair N sided die to determine which distribution we will draw a
16 | sample from, JS divergence reports how many bits of information will be
17 | revealed from the die. This scenario is ultimately simulated in this
18 | function. However, in real life, we may only have examples of samples from
19 | each distribution we wish to compare. In the most general case, each
20 | distribution we wish to compare is represented by M samples of samples
21 | (with potentially different sizes) from M similar distributions whose
22 | average is most interesting. Just as we might simulate sampling from a
23 | single distribution by randomly sampling a batch of examples with
24 | replacement, we can effectively sample from an average of distributions by
25 | randomly sampling each batch (which may be representative of a single
26 | distribution), then randomly sampling elements of the chosen batch. This
27 | can ultimately be thought of a more data efficient means to the same end as
28 | downsampling large batch sizes.
29 |
30 | Args:
31 | nprng: Numpy ``RandomState`` used to generate random samples
32 | batch_size: size of batch
33 | *sample_distributions: list of lists of samples to compare.
34 | For example, ``[[batch1, batch2, batch3], [batch4, batch5],
35 | [batch6, batch7]]`` Assuming ``batch1`` came from distribution
36 | :math:`p_1`, ``batch2`` from :math:`p_2`, etc, this function will
37 | simulate a system in which a latent `N=3` sided die role that
38 | determines whether to draw a sample from :math:`\frac{p_1 + p_2 +
39 | p_3}{3}`, :math:`\frac{p_4 + p_5}{2}`, or :math:`\frac{p_6 +
40 | p_7}{2}`.
41 | categorical_columns (tuple): list or tuple of column indices that are
42 | considered categorical.
43 |
44 | Returns:
45 | The output of this function will be two samples of size batch_size with
46 | samples, :math:`x`, drawn from batch_size roles, :math:`z`, of our
47 | :math:`N` sided die. Following the example above for which :math:`N=3`,
48 | the first of these two output samples will be of the form :math:`(x,
49 | z)`, where x is the sample drawn and z is the die roll. The second of
50 | these two samples will be of the form :math:`(x, z^{\prime})` where x
51 | is the same sample as before, but :math:`z^\prime` is a new set of
52 | otherwise unrelated roles of the same :math:`N=3` sided die."""
53 |
54 | def process_sample_distributions(sample_distributions):
55 | z = []
56 | out = []
57 | for idx, count in zip(
58 | *numpy.unique(
59 | nprng.randint(0, len(sample_distributions), size=batch_size),
60 | return_counts=True,
61 | )
62 | ):
63 | sample_distribution = sample_distributions[idx]
64 | out.extend(
65 | [
66 | sample_distribution[i][
67 | nprng.randint(0, len(sample_distribution[i]))
68 | ]
69 | for i in nprng.randint(0, len(sample_distribution), size=count)
70 | ]
71 | )
72 | z.extend([idx] * count)
73 | sample_distribution = numpy.asarray(out)
74 | catted1 = numpy.concatenate(
75 | (sample_distribution, numpy.asarray(z)[:, numpy.newaxis]), axis=1
76 | )
77 | z = nprng.randint(0, len(sample_distributions), size=batch_size)
78 | catted2 = numpy.concatenate((sample_distribution, z[:, numpy.newaxis]), axis=1)
79 | return numpy.asarray((catted2, catted1))
80 |
81 | while True:
82 | yield groupby(
83 | categorical_columns, *process_sample_distributions(sample_distributions)
84 | )
85 |
86 |
87 | @public.add
88 | def fdiv_data_stream(
89 | nprng, batch_size, sample_distributions, categorical_columns=tuple()
90 | ):
91 | r"""Data stream generator for f-divergence.
92 |
93 | Args:
94 | nprng: Numpy ``RandomState`` used to generate random samples
95 | batch_size: size of batch
96 | sample_distributions: list of lists of samples to compare for each
97 | partition of the data. For example, ``[[batch1, batch2, batch3],
98 | [batch4, batch5], [batch6, batch7]]``
99 | categorical_columns (tuple): list or tuple of column indices that are
100 | considered categorical.
101 |
102 | Returns:
103 | The output of this function will be ``N`` samples of size
104 | ``batch_size``, where ``N = len(sample_distributions)`` Following the
105 | example above, assuming ``batch1`` came from distribution p_1,
106 | ``batch2`` from :math:`p_2`, etc, This function will output a tuple of
107 | ``N = 3`` samples of size ``batch_size``, where ``batch1`` is sampled
108 | from :math:`\frac{p_1 + p_2 + p_3}{3}`, ``batch2`` is sampled from
109 | :math:`\frac{p_4 + p_5}{2}`, and ``batch3`` is sampled from
110 | :math:`\frac{p_6 + p_7}{2}`."""
111 |
112 | def process_sample_distributions(sample_distributions):
113 | return numpy.asarray(
114 | [
115 | [
116 | sample_distribution[i][
117 | nprng.randint(0, len(sample_distribution[i]))
118 | ]
119 | for i in nprng.randint(0, len(sample_distribution), size=batch_size)
120 | ]
121 | for sample_distribution in sample_distributions
122 | if len(sample_distribution)
123 | ]
124 | )
125 |
126 | while True:
127 | yield groupby(
128 | categorical_columns, *process_sample_distributions(sample_distributions)
129 | )
130 |
131 |
132 | def groupby(categorical_columns, *samples):
133 | r"""Group samples by unique values found in a subset of columns
134 | Args:
135 | categorical_columns: List of indices of columns which should be
136 | treated as categorical.
137 | *samples: A set of samples drawn from distinct distributions.
138 | Each distribution is assumed to be defined on the same probability
139 | space, so it would make sense to compare a sample drawn from one
140 | distribution to a sample drawn from another.
141 |
142 | Returns:
143 | tuple of dicts that each map unique combinations of
144 | ``categorical_columns`` to a subset of samples from the
145 | ``sample_distributions`` that have these values in their
146 | ``categorical_columns``. ``categorical_columns`` are omitted from
147 | the values of these dicts."""
148 | if not categorical_columns:
149 | return [{tuple(): sample.astype("float")} for sample in samples]
150 | # the complement of categorical_columns is assumed to be numeric
151 | numerical_columns = [
152 | i for i in range(samples[0].shape[1]) if i not in categorical_columns
153 | ]
154 |
155 | def grouper(accum, element):
156 | accum[tuple(element[categorical_columns])].append(element[numerical_columns])
157 | return accum
158 |
159 | return tuple(
160 | {
161 | key: numpy.asarray(value, dtype="float")
162 | for key, value in reduce(grouper, sample, defaultdict(list)).items()
163 | }
164 | for sample in samples
165 | )
166 |
--------------------------------------------------------------------------------
/mvtk/bias_variance/bias_variance.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 | import public
4 |
5 | from scipy import stats
6 | from sklearn.utils import resample
7 |
8 |
9 | @public.add
10 | def get_values(x):
11 | r"""If argument is a Pandas dataframe, return 'values' numpy array from it.
12 |
13 | Args:
14 | x (Any): pandas dataframe or anything else
15 |
16 | Returns:
17 | if pandas dataframe - return 'values' numpy array
18 | otherwise - return itself
19 |
20 | """
21 | if isinstance(x, pd.DataFrame):
22 | return x.values
23 | else:
24 | return x
25 |
26 |
27 | @public.add
28 | def train_and_predict(
29 | estimator,
30 | X_train_values,
31 | y_train_values,
32 | X_test_prepared,
33 | prepare_X=lambda x: x,
34 | prepare_y_train=lambda x: x,
35 | fit_kwargs=None,
36 | predict_kwargs=None,
37 | ):
38 | r"""Train an estimator and get predictions from it
39 |
40 | Args:
41 | estimator (EstimatorWrapper): estimator wrapped with a class extending
42 | EstimatorWrapper
43 | X_train_values: numpy array of features for training
44 | y_train_values: numpy array of ground truth labels for training
45 | X_test_prepared: feature set for testing which has been processed by
46 | prepare_X function
47 | prepare_X (function, optional): function to transform feature datasets
48 | before calling fit and predict methods
49 | prepare_y_train (function, optional): function to transform train ground
50 | truth labels before calling fit method
51 | fit_kwargs (dict, optional): kwargs to pass to the fit method
52 | predict_kwargs (dict, optional): kwargs to pass to the predict method
53 |
54 | Returns:
55 | predictions"""
56 | if predict_kwargs is None:
57 | predict_kwargs = {}
58 | if fit_kwargs is None:
59 | fit_kwargs = {}
60 |
61 | X_sample_prepared = prepare_X(X_train_values)
62 | y_sample_prepared = prepare_y_train(y_train_values)
63 |
64 | estimator = estimator.fit(X_sample_prepared, y_sample_prepared, **fit_kwargs)
65 | predictions = estimator.predict(X_test_prepared, **predict_kwargs)
66 |
67 | return predictions
68 |
69 |
70 | @public.add
71 | def bootstrap_train_and_predict(
72 | estimator,
73 | X_train_values,
74 | y_train_values,
75 | X_test_prepared,
76 | prepare_X=lambda x: x,
77 | prepare_y_train=lambda x: x,
78 | random_state=None,
79 | fit_kwargs=None,
80 | predict_kwargs=None,
81 | ):
82 | r"""Train an estimator using a bootstrap sample of the training data and get
83 | predictions from it
84 |
85 | Args:
86 | estimator (EstimatorWrapper): estimator wrapped with a class extending
87 | EstimatorWrapper
88 | X_train_values: numpy array of features for training
89 | y_train_values: numpy array of ground truth labels for training
90 | X_test_prepared: feature set for testing which has been processed by prepare_X
91 | function
92 | prepare_X (function, optional): function to transform feature datasets before
93 | calling fit and predict methods
94 | prepare_y_train (function, optional): function to transform train ground
95 | truth labels before calling fit method
96 | random_state (int, optional): random state for bootstrap sampling
97 | fit_kwargs (dict, optional): kwargs to pass to the fit method
98 | predict_kwargs (dict, optional): kwargs to pass to the predict method
99 |
100 | Returns:
101 | predictions"""
102 | X_sample, y_sample = resample(
103 | X_train_values, y_train_values, random_state=random_state
104 | )
105 |
106 | return train_and_predict(
107 | estimator,
108 | X_sample,
109 | y_sample,
110 | X_test_prepared,
111 | prepare_X,
112 | prepare_y_train,
113 | fit_kwargs,
114 | predict_kwargs,
115 | )
116 |
117 |
118 | @public.add
119 | def bias_variance_mse(predictions, y_test):
120 | r"""Compute the bias-variance decomposition the mean squared error loss function
121 |
122 | Args:
123 | predictions: numpy array of predictions over the set of iterations
124 | y_test: numpy array of ground truth labels
125 |
126 | Returns:
127 | (average loss, average bias, average variance, net variance)"""
128 | pred_by_x = np.swapaxes(predictions, 0, 1)
129 |
130 | main_predictions = np.mean(predictions, axis=0)
131 |
132 | avg_bias = np.mean((main_predictions - y_test) ** 2)
133 |
134 | arr_loss = np.zeros(pred_by_x.shape[0], dtype=np.float64)
135 | arr_var = np.zeros(pred_by_x.shape[0], dtype=np.float64)
136 | for i in range(pred_by_x.shape[0]):
137 | arr_loss[i] = np.mean((pred_by_x[i] - y_test[i]) ** 2)
138 | arr_var[i] = np.mean((pred_by_x[i] - main_predictions[i]) ** 2)
139 | avg_loss = np.mean(arr_loss)
140 | avg_var = np.mean(arr_var)
141 |
142 | return avg_loss, avg_bias, avg_var, avg_var
143 |
144 |
145 | @public.add
146 | def bias_variance_0_1_loss(predictions, y_test):
147 | r"""Compute the bias-variance decomposition using the 0-1 loss function
148 |
149 | Args:
150 | predictions: numpy array of predictions over the set of iterations
151 | y_test: numpy array of ground truth labels
152 |
153 | Returns:
154 | (average loss, average bias, average variance, net variance)"""
155 | pred_by_x = np.swapaxes(predictions, 0, 1)
156 |
157 | main_predictions = stats.mode(predictions, axis=0, keepdims=True).mode[0]
158 |
159 | avg_bias = np.mean(main_predictions != y_test)
160 |
161 | arr_loss = np.zeros(pred_by_x.shape[0], dtype=np.float64)
162 | arr_var = np.zeros(pred_by_x.shape[0], dtype=np.float64)
163 | var_b = 0.0 # biased example contribution to avg_var
164 | var_u = 0.0 # unbiased example contribution to avg_var
165 | for i in range(pred_by_x.shape[0]):
166 | pred_true = np.sum(pred_by_x[i] == y_test[i])
167 | pred_not_main = np.sum(pred_by_x[i] != main_predictions[i])
168 |
169 | arr_loss[i] = (predictions.shape[0] - pred_true) / predictions.shape[0]
170 | arr_var[i] = pred_not_main / predictions.shape[0]
171 |
172 | if main_predictions[i] != y_test[i]:
173 | prb_true_given_not_main = (
174 | pred_true / pred_not_main if pred_not_main != 0 else 0
175 | )
176 | var_b += (pred_not_main / predictions.shape[0]) * prb_true_given_not_main
177 | else:
178 | var_u += pred_not_main / predictions.shape[0]
179 |
180 | var_b /= pred_by_x.shape[0]
181 | var_u /= pred_by_x.shape[0]
182 |
183 | avg_loss = np.mean(arr_loss)
184 | avg_var = np.mean(arr_var)
185 | net_var = var_u - var_b
186 |
187 | return avg_loss, avg_bias, avg_var, net_var
188 |
189 |
190 | @public.add
191 | def bias_variance_compute(
192 | estimator,
193 | X_train,
194 | y_train,
195 | X_test,
196 | y_test,
197 | prepare_X=lambda x: x,
198 | prepare_y_train=lambda x: x,
199 | iterations=200,
200 | random_state=None,
201 | decomp_fn=bias_variance_mse,
202 | fit_kwargs=None,
203 | predict_kwargs=None,
204 | ):
205 | r"""Compute the bias-variance decomposition in serial
206 |
207 | Args:
208 | estimator (EstimatorWrapper): estimator wrapped with a class extending
209 | EstimatorWrapper
210 | X_train: features for training
211 | y_train: ground truth labels for training
212 | X_test: features for testing
213 | y_test: ground truth labels for testing
214 | prepare_X (function, optional): function to transform feature datasets before
215 | calling fit and predict methods
216 | prepare_y_train (function, optional): function to transform training ground
217 | truth labels before calling fit method
218 | iterations (int, optional): number of iterations for the training/testing
219 | random_state (int, optional): random state for bootstrap sampling
220 | decomp_fn (function, optional): bias-variance decomposition function
221 | fit_kwargs (dict, optional): kwargs to pass to the fit method
222 | predict_kwargs (dict, optional): kwargs to pass to the predict method
223 |
224 | Returns:
225 | (average loss, average bias, average variance, net variance)"""
226 | if fit_kwargs is None:
227 | fit_kwargs = {}
228 | if predict_kwargs is None:
229 | predict_kwargs = {}
230 |
231 | if isinstance(random_state, int):
232 | random_state = np.random.RandomState(seed=random_state)
233 |
234 | predictions = np.zeros((iterations, y_test.shape[0]))
235 |
236 | X_train_values = get_values(X_train)
237 | y_train_values = get_values(y_train)
238 | X_test_values = get_values(X_test)
239 | X_test_prepared = prepare_X(X_test_values)
240 |
241 | for i in range(iterations):
242 | predictions[i] = bootstrap_train_and_predict(
243 | estimator,
244 | X_train_values,
245 | y_train_values,
246 | X_test_prepared,
247 | prepare_X,
248 | prepare_y_train,
249 | random_state,
250 | fit_kwargs,
251 | predict_kwargs,
252 | )
253 |
254 | y_test_values = get_values(y_test)
255 |
256 | return decomp_fn(predictions, y_test_values)
257 |
--------------------------------------------------------------------------------
/docs/notebooks/divergence/CategoricalColumns.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "metadata": {},
6 | "source": [
7 | "# Handling Categorical Data\n",
8 | "\n",
9 | "More often than not a dataset is comprised of both **numeric**, and **categorical** data types. The supervisor divergence functions can handle both, but it needs to know which columns are categorical so that it can handle it properly. This notebook shows you how to do so when using the **supervisor** divergence package."
10 | ]
11 | },
12 | {
13 | "cell_type": "markdown",
14 | "metadata": {},
15 | "source": [
16 | "## Dataset with Mixed Data Types"
17 | ]
18 | },
19 | {
20 | "cell_type": "markdown",
21 | "metadata": {},
22 | "source": [
23 | "### Create a dataset\n",
24 | "To demonstrate, we will create a simple dataset with a mix of categorical and numeric columns. "
25 | ]
26 | },
27 | {
28 | "cell_type": "code",
29 | "execution_count": 1,
30 | "metadata": {},
31 | "outputs": [
32 | {
33 | "data": {
34 | "text/html": [
35 | "
\n",
36 | "\n",
49 | "
\n",
50 | " \n",
51 | "
\n",
52 | "
\n",
53 | "
latitude
\n",
54 | "
fruit
\n",
55 | "
temp
\n",
56 | "
city
\n",
57 | "
longitude
\n",
58 | "
\n",
59 | " \n",
60 | " \n",
61 | "
\n",
62 | "
0
\n",
63 | "
239
\n",
64 | "
apple
\n",
65 | "
104
\n",
66 | "
Filly Downs
\n",
67 | "
257
\n",
68 | "
\n",
69 | "
\n",
70 | "
1
\n",
71 | "
181
\n",
72 | "
apple
\n",
73 | "
11
\n",
74 | "
Coldport
\n",
75 | "
303
\n",
76 | "
\n",
77 | "
\n",
78 | "
2
\n",
79 | "
246
\n",
80 | "
raspberry
\n",
81 | "
99
\n",
82 | "
Filly Downs
\n",
83 | "
60
\n",
84 | "
\n",
85 | "
\n",
86 | "
3
\n",
87 | "
187
\n",
88 | "
raspberry
\n",
89 | "
91
\n",
90 | "
Coldport
\n",
91 | "
90
\n",
92 | "
\n",
93 | "
\n",
94 | "
4
\n",
95 | "
97
\n",
96 | "
raspberry
\n",
97 | "
26
\n",
98 | "
Filly Downs
\n",
99 | "
108
\n",
100 | "
\n",
101 | " \n",
102 | "
\n",
103 | "
"
104 | ],
105 | "text/plain": [
106 | " latitude fruit temp city longitude\n",
107 | "0 239 apple 104 Filly Downs 257\n",
108 | "1 181 apple 11 Coldport 303\n",
109 | "2 246 raspberry 99 Filly Downs 60\n",
110 | "3 187 raspberry 91 Coldport 90\n",
111 | "4 97 raspberry 26 Filly Downs 108"
112 | ]
113 | },
114 | "execution_count": 1,
115 | "metadata": {},
116 | "output_type": "execute_result"
117 | }
118 | ],
119 | "source": [
120 | "import pandas as pd\n",
121 | "import numpy as np\n",
122 | "\n",
123 | "\n",
124 | "size = 100000\n",
125 | "\n",
126 | "data = pd.DataFrame()\n",
127 | "data['latitude'] =np.random.randint(0, 360, size=size)\n",
128 | "data['fruit'] = np.random.choice(a=['apple', 'orange', 'plum', 'raspberry', 'blueberry'],\n",
129 | " p=[0.1, 0.3, 0.3, 0.25, 0.05], size=size)\n",
130 | "data['temp'] =np.random.randint(-10, 120, size=size)\n",
131 | "data['city'] = np.random.choice(a=['London', 'Paris', 'Newport', 'Bradfield', 'Coldport', 'Filly Downs'],\n",
132 | " p=[0.15, 0.2, 0.1, 0.1, 0.3, 0.15], size=size)\n",
133 | "\n",
134 | "\n",
135 | "data['longitude'] = np.random.randint(0, 360, size=size)\n",
136 | "\n",
137 | "data.head(5)"
138 | ]
139 | },
140 | {
141 | "cell_type": "markdown",
142 | "metadata": {},
143 | "source": [
144 | "In the dataset, the **fruit** and **city** columns are *categorical*, while **latitude**, **temp** and **longitude** are *numeric*. "
145 | ]
146 | },
147 | {
148 | "cell_type": "markdown",
149 | "metadata": {},
150 | "source": [
151 | "### Create a comparison dataset\n",
152 | "We will create a dataset to compare by taking the original dataset and modify some of the values. In this case, we will set a couple of columns to a constant value, which would result in the new dataset being of a different distribution from the original dataset."
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 2,
158 | "metadata": {},
159 | "outputs": [],
160 | "source": [
161 | "data_shifted = data.copy()\n",
162 | "data_shifted['temp'] = 1\n",
163 | "data_shifted.fruit = 'apple'"
164 | ]
165 | },
166 | {
167 | "cell_type": "markdown",
168 | "metadata": {},
169 | "source": [
170 | "## Calculating Divergence"
171 | ]
172 | },
173 | {
174 | "cell_type": "code",
175 | "execution_count": 3,
176 | "metadata": {},
177 | "outputs": [],
178 | "source": [
179 | "import warnings\n",
180 | "with warnings.catch_warnings():\n",
181 | " warnings.simplefilter(\"ignore\")\n",
182 | " from mvtk.supervisor.divergence import calc_tv_knn"
183 | ]
184 | },
185 | {
186 | "cell_type": "markdown",
187 | "metadata": {},
188 | "source": [
189 | "The divergence functions have a parameter called **categorical_columns** which you need to use to specify which columns are not numeric. The functions will throw an error if categorical columns are passed but not specified.\n",
190 | "\n",
191 | "So, if you know which columns are categorical, then you need to pass a list of the column indexes. Both the a and b datasets should have the columns in the exact order."
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 4,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "data": {
201 | "text/plain": [
202 | "0.8506579001037404"
203 | ]
204 | },
205 | "execution_count": 4,
206 | "metadata": {},
207 | "output_type": "execute_result"
208 | }
209 | ],
210 | "source": [
211 | "calc_tv_knn(data, data_shifted, categorical_columns=[1,3])"
212 | ]
213 | },
214 | {
215 | "cell_type": "code",
216 | "execution_count": 5,
217 | "metadata": {},
218 | "outputs": [
219 | {
220 | "data": {
221 | "text/plain": [
222 | "0.2598375876037403"
223 | ]
224 | },
225 | "execution_count": 5,
226 | "metadata": {},
227 | "output_type": "execute_result"
228 | }
229 | ],
230 | "source": [
231 | "calc_tv_knn(data, data, categorical_columns=[1,3])"
232 | ]
233 | },
234 | {
235 | "cell_type": "markdown",
236 | "metadata": {},
237 | "source": [
238 | "## mvtk.supervisor.utils.column_indexes"
239 | ]
240 | },
241 | {
242 | "cell_type": "markdown",
243 | "metadata": {},
244 | "source": [
245 | "With the utility function **column_indexes** you can get a list of the ccategorical columns in the dataframe."
246 | ]
247 | },
248 | {
249 | "cell_type": "code",
250 | "execution_count": 6,
251 | "metadata": {},
252 | "outputs": [
253 | {
254 | "data": {
255 | "text/plain": [
256 | "[1, 3]"
257 | ]
258 | },
259 | "execution_count": 6,
260 | "metadata": {},
261 | "output_type": "execute_result"
262 | }
263 | ],
264 | "source": [
265 | "from mvtk.supervisor.utils import column_indexes\n",
266 | "\n",
267 | "column_indexes(data, cols=['fruit', 'city'])"
268 | ]
269 | },
270 | {
271 | "cell_type": "markdown",
272 | "metadata": {},
273 | "source": [
274 | "You can also run the **column_indexes** function inline as a function parameter."
275 | ]
276 | },
277 | {
278 | "cell_type": "code",
279 | "execution_count": 7,
280 | "metadata": {},
281 | "outputs": [
282 | {
283 | "data": {
284 | "text/plain": [
285 | "0.25967482718707363"
286 | ]
287 | },
288 | "execution_count": 7,
289 | "metadata": {},
290 | "output_type": "execute_result"
291 | }
292 | ],
293 | "source": [
294 | "calc_tv_knn(data, data, \n",
295 | " categorical_columns=column_indexes(data, cols=['fruit', 'city']))"
296 | ]
297 | }
298 | ],
299 | "metadata": {
300 | "kernelspec": {
301 | "display_name": "supervisor",
302 | "language": "python",
303 | "name": "supervisor"
304 | },
305 | "language_info": {
306 | "codemirror_mode": {
307 | "name": "ipython",
308 | "version": 3
309 | },
310 | "file_extension": ".py",
311 | "mimetype": "text/x-python",
312 | "name": "python",
313 | "nbconvert_exporter": "python",
314 | "pygments_lexer": "ipython3",
315 | "version": "3.6.8"
316 | }
317 | },
318 | "nbformat": 4,
319 | "nbformat_minor": 2
320 | }
321 |
--------------------------------------------------------------------------------
/tests/bias_variance/test_bias_variance.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pandas as pd
3 |
4 | from sklearn.tree import DecisionTreeClassifier
5 | from sklearn.linear_model import Ridge
6 |
7 | from mvtk.bias_variance import (
8 | bias_variance_compute,
9 | bias_variance_mse,
10 | bias_variance_0_1_loss,
11 | get_values,
12 | train_and_predict,
13 | bootstrap_train_and_predict,
14 | )
15 | from mvtk.bias_variance.estimators import SciKitLearnEstimatorWrapper
16 |
17 |
18 | def create_data():
19 | X_train = np.arange(12).reshape(6, 2)
20 | y_train = np.concatenate((np.arange(3), np.arange(3)), axis=None)
21 | X_test = np.arange(6).reshape(3, 2)
22 | y_test = np.array([0, 1, 1])
23 |
24 | return X_train, y_train, X_test, y_test
25 |
26 |
27 | def test_get_values():
28 | a = [1, 2]
29 | b = [3, 4]
30 | c = [1, 3]
31 | d = [2, 4]
32 | df = pd.DataFrame(data={"col_a": a, "col_b": b})
33 |
34 | df_values = get_values(df)
35 | np_array = np.asarray([c, d])
36 |
37 | assert isinstance(df_values, np.ndarray)
38 | assert np.array_equal(df_values, np_array)
39 |
40 |
41 | def test_train_and_predict_default():
42 | X_train, y_train, X_test, y_test = create_data()
43 |
44 | model = Ridge(random_state=123)
45 | model_wrapped = SciKitLearnEstimatorWrapper(model)
46 |
47 | predictions = train_and_predict(model_wrapped, X_train, y_train, X_test)
48 |
49 | expected = np.array([0.4326241134751774, 0.6595744680851064, 0.8865248226950355])
50 |
51 | assert np.array_equal(
52 | np.round(predictions, decimals=12), np.round(expected, decimals=12)
53 | )
54 |
55 |
56 | def test_train_and_predict_prepare():
57 | X_train, y_train, X_test, y_test = create_data()
58 |
59 | model = Ridge(random_state=123)
60 | model_wrapped = SciKitLearnEstimatorWrapper(model)
61 |
62 | predictions = train_and_predict(
63 | model_wrapped,
64 | X_train,
65 | y_train,
66 | X_test,
67 | prepare_X=lambda x: x + 1,
68 | prepare_y_train=lambda x: x + 1,
69 | )
70 |
71 | expected = np.array([1.3191489361702131, 1.546099290780142, 1.773049645390071])
72 |
73 | assert np.array_equal(
74 | np.round(predictions, decimals=12), np.round(expected, decimals=12)
75 | )
76 |
77 |
78 | def test_train_and_predict_kwargs_fit():
79 | X_train, y_train, X_test, y_test = create_data()
80 |
81 | model = DecisionTreeClassifier(random_state=123)
82 | model_wrapped = SciKitLearnEstimatorWrapper(model)
83 |
84 | predictions = train_and_predict(
85 | model_wrapped,
86 | X_train,
87 | y_train,
88 | X_test,
89 | fit_kwargs={"sample_weight": [0, 0, 1, 0, 1, 0]},
90 | )
91 |
92 | expected = np.array([2, 2, 2])
93 |
94 | assert np.array_equal(predictions, expected)
95 |
96 |
97 | def test_train_and_predict_kwargs_predict():
98 | X_train, y_train, X_test, y_test = create_data()
99 |
100 | model = DecisionTreeClassifier(random_state=123)
101 | model_wrapped = SciKitLearnEstimatorWrapper(model)
102 |
103 | train_and_predict(model_wrapped, X_train, y_train, X_test)
104 |
105 | try:
106 | train_and_predict(
107 | model_wrapped,
108 | X_train,
109 | y_train,
110 | X_test,
111 | predict_kwargs={"check_input": False},
112 | )
113 | except ValueError as e:
114 | assert e.args[0] == "X.dtype should be np.float32, got int64"
115 | return
116 |
117 | assert False
118 |
119 |
120 | def test_bootstrap_train_and_predict_default():
121 | X_train, y_train, X_test, y_test = create_data()
122 |
123 | model = Ridge(random_state=123)
124 | model_wrapped = SciKitLearnEstimatorWrapper(model)
125 |
126 | predictions = bootstrap_train_and_predict(
127 | model_wrapped, X_train, y_train, X_test, random_state=321
128 | )
129 |
130 | expected = np.array([0.7168141592920354, 0.8584070796460177, 1.0])
131 |
132 | assert np.array_equal(predictions, expected)
133 |
134 |
135 | def test_bootstrap_train_and_predict_kwargs_fit():
136 | X_train, y_train, X_test, y_test = create_data()
137 |
138 | model = DecisionTreeClassifier(random_state=123)
139 | model_wrapped = SciKitLearnEstimatorWrapper(model)
140 |
141 | predictions = bootstrap_train_and_predict(
142 | model_wrapped,
143 | X_train,
144 | y_train,
145 | X_test,
146 | random_state=321,
147 | fit_kwargs={"sample_weight": [0, 0, 1, 0, 1, 0]},
148 | )
149 |
150 | expected = np.array([0, 0, 0])
151 |
152 | assert np.array_equal(predictions, expected)
153 |
154 |
155 | def test_bootstrap_train_and_predict_kwargs_predict():
156 | X_train, y_train, X_test, y_test = create_data()
157 |
158 | model = DecisionTreeClassifier(random_state=123)
159 | model_wrapped = SciKitLearnEstimatorWrapper(model)
160 |
161 | bootstrap_train_and_predict(
162 | model_wrapped, X_train, y_train, X_test, random_state=321
163 | )
164 |
165 | try:
166 | bootstrap_train_and_predict(
167 | model_wrapped,
168 | X_train,
169 | y_train,
170 | X_test,
171 | random_state=321,
172 | predict_kwargs={"check_input": False},
173 | )
174 | except ValueError as e:
175 | assert e.args[0] == "X.dtype should be np.float32, got int64"
176 | return
177 |
178 | assert False
179 |
180 |
181 | def test_bias_variance_compute_mse():
182 | X_train, y_train, X_test, y_test = create_data()
183 |
184 | model = Ridge(random_state=123)
185 | model_wrapped = SciKitLearnEstimatorWrapper(model)
186 |
187 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute(
188 | model_wrapped,
189 | X_train,
190 | y_train,
191 | X_test,
192 | y_test,
193 | iterations=10,
194 | random_state=123,
195 | decomp_fn=bias_variance_mse,
196 | )
197 |
198 | assert np.round(avg_loss, decimals=12) == np.round(
199 | np.float64(1.1158203908105646), decimals=12
200 | )
201 | assert np.round(avg_bias, decimals=12) == np.round(
202 | np.float64(0.1191924176014536), decimals=12
203 | )
204 | assert np.round(avg_var, decimals=12) == np.round(
205 | np.float64(0.9966279732091108), decimals=12
206 | )
207 | assert np.round(net_var, decimals=12) == np.round(
208 | np.float64(0.9966279732091108), decimals=12
209 | )
210 |
211 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
212 | assert avg_var == net_var
213 |
214 |
215 | def test_bias_variance_compute_0_1():
216 | X_train, y_train, X_test, y_test = create_data()
217 |
218 | model = DecisionTreeClassifier(random_state=123)
219 | model_wrapped = SciKitLearnEstimatorWrapper(model)
220 |
221 | avg_loss, avg_bias, avg_var, net_var = bias_variance_compute(
222 | model_wrapped,
223 | X_train,
224 | y_train,
225 | X_test,
226 | y_test,
227 | iterations=10,
228 | random_state=123,
229 | decomp_fn=bias_variance_0_1_loss,
230 | )
231 |
232 | assert avg_loss == np.float64(0.4666666666666666)
233 | assert avg_bias == np.float64(0.3333333333333333)
234 | assert avg_var == np.float64(0.3666666666666667)
235 | assert net_var == np.float64(0.1333333333333333)
236 |
237 | assert avg_loss == avg_bias + net_var
238 |
239 |
240 | def test_bias_variance_mse_no_loss():
241 | predictions = np.zeros((3, 5))
242 | y_test = np.zeros(5)
243 |
244 | avg_loss, avg_bias, avg_var, net_var = bias_variance_mse(predictions, y_test)
245 |
246 | assert avg_loss == np.float64(0.0)
247 | assert avg_bias == np.float64(0.0)
248 | assert avg_var == np.float64(0.0)
249 | assert net_var == np.float64(0.0)
250 |
251 | assert avg_loss == avg_bias + net_var
252 | assert avg_var == net_var
253 |
254 |
255 | def test_bias_variance_mse():
256 | predictions = np.zeros((3, 5))
257 | predictions[0] += 0.5
258 | y_test = np.zeros(5)
259 |
260 | avg_loss, avg_bias, avg_var, net_var = bias_variance_mse(predictions, y_test)
261 |
262 | assert avg_loss == np.float64(0.08333333333333333)
263 | assert avg_bias == np.float64(0.02777777777777778)
264 | assert avg_var == np.float64(0.05555555555555556)
265 | assert net_var == np.float64(0.05555555555555556)
266 |
267 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
268 | assert avg_var == net_var
269 |
270 |
271 | def test_bias_variance_0_1_loss_no_loss():
272 | predictions = np.zeros((3, 5))
273 | y_test = np.zeros(5)
274 |
275 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
276 |
277 | assert avg_loss == np.float64(0.0)
278 | assert avg_bias == np.float64(0.0)
279 | assert avg_var == np.float64(0.0)
280 | assert net_var == np.float64(0.0)
281 |
282 | assert avg_loss == avg_bias + net_var
283 |
284 |
285 | def test_bias_variance_0_1_loss_no_bias():
286 | predictions = np.zeros((3, 5))
287 | predictions[0] += 1
288 | y_test = np.zeros(5)
289 |
290 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
291 |
292 | assert avg_loss == np.float64(0.3333333333333333)
293 | assert avg_bias == np.float64(0.0)
294 | assert avg_var == np.float64(0.3333333333333333)
295 | assert net_var == np.float64(0.3333333333333333)
296 |
297 | assert avg_loss == avg_bias + net_var
298 |
299 |
300 | def test_bias_variance_0_1_loss_var_diff():
301 | predictions = np.zeros((3, 5))
302 | predictions[0] += 1
303 | predictions[1][0] += 1
304 | y_test = np.zeros(5)
305 | y_test[1] += 1
306 |
307 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
308 |
309 | assert avg_loss == np.float64(0.4666666666666666)
310 | assert avg_bias == np.float64(0.4)
311 | assert avg_var == np.float64(0.3333333333333333)
312 | assert net_var == np.float64(0.06666666666666668)
313 |
314 | assert np.round(avg_loss, decimals=12) == np.round(avg_bias + net_var, decimals=12)
315 |
316 |
317 | def test_bias_variance_0_1_loss_div_by_0():
318 | predictions = np.ones((3, 5))
319 | y_test = np.zeros(5)
320 |
321 | avg_loss, avg_bias, avg_var, net_var = bias_variance_0_1_loss(predictions, y_test)
322 |
323 | assert avg_loss == np.float64(1.0)
324 | assert avg_bias == np.float64(1.0)
325 | assert avg_var == np.float64(0.0)
326 | assert net_var == np.float64(0.0)
327 |
328 | assert avg_loss == avg_bias + net_var
329 |
--------------------------------------------------------------------------------
/docs/bias_variance_user_guide.rst:
--------------------------------------------------------------------------------
1 | ########################
2 | Bias-Variance User Guide
3 | ########################
4 |
5 | **********
6 | Motivation
7 | **********
8 |
9 | Statistical Bias vs. "Fairness"
10 | ===============================
11 |
12 | For this user guide and associated submodule, we are referring to
13 | `statistical bias `_ rather
14 | than the "fairness" type of bias.
15 |
16 | Why should we care about bias and variance?
17 | ===========================================
18 |
19 | Bias and variance are two indicators of model performance and together represent
20 | two-thirds of model error (the remaining one-third is irreducible "noise" error that
21 | comes from the data set itself). We can define bias and variance as follows
22 | by training a model with multiple `bootstrap sampled
23 | `_ training sets, resulting in
24 | multiple instances of the model.
25 |
26 | .. topic:: Bias and variance defined over multiple training sets:
27 |
28 | * Bias represents the average difference between the prediction a model makes and the correct prediction.
29 | * Variance represents the average variability of the prediction a model makes.
30 |
31 | Typically, a model with high bias is "underfit" and a model with high variance is
32 | "overfit," but keep in mind this is not always the case and there can be many reasons
33 | why a model has high bias or high variance. An "underfit" model is oversimplified and
34 | performs poorly on the training data, whereas an "overfit" model sticks too closely to
35 | the training data and performs poorly on unseen examples. See Scikit-Learn's
36 | `Underfitting vs. Overfitting
37 | `_
38 | for a clear example of an "underfit" model vs. an "overfit" model.
39 |
40 | There is a concept
41 | known as the `"bias-variance tradeoff"
42 | `_ that describes
43 | the relationship between high bias and high variance in a model. Our ultimate goal
44 | here is to find the ideal balance where both bias and variance is at a minimum.
45 | It is also important from a business problem standpoint on whether the model
46 | error that we are unable to reduce should favor bias or variance.
47 |
48 | *****************************************
49 | Visualize Bias and Variance With Examples
50 | *****************************************
51 |
52 | In order to easily understand the concepts of bias and variance, we will show
53 | four different examples of models for each of the high and low bias and variance
54 | combinations. These are extreme and engineered cases for the purpose of clearly
55 | seeing the bias/variance.
56 |
57 | Before we begin, let's take a look at the distribution of the labels. Notice
58 | that the majority of label values are around 1 and 2, and much less around 5.
59 |
60 | .. figure:: images/bias_variance_label_distribution.png
61 | :align: center
62 | :alt: alternate text
63 | :figclass: align-center
64 |
65 | First we have a model with high bias and low variance. We artificially
66 | introduce bias to the model by adding 10 to every training label, but leaving
67 | the test labels as is. Given that values of greater than 5 in the entire label
68 | set are considered outliers, we are fitting the model against outliers.
69 |
70 | .. figure:: images/high_bias_low_variance.png
71 | :align: center
72 | :alt: alternate text
73 | :figclass: align-center
74 |
75 | Five sets of mean squared error results from the test set from the five
76 | bootstrap sample trainings of the model. Notice the model error is very
77 | consistent among the trials and is not centered around 0.
78 |
79 | Next we have a model with low bias and high variance. We simulate this by
80 | introducing 8 random "noise" features to the data set. We also reduce the size
81 | of the training set and train a neural network over a low number of epochs.
82 |
83 | .. figure:: images/low_bias_high_variance.png
84 | :align: center
85 | :alt: alternate text
86 | :figclass: align-center
87 |
88 | Five sets of mean squared error results from the test set from the five
89 | bootstrap sample trainings of the model. Notice the model error has
90 | different distributions among the trials and centers mainly around 0.
91 |
92 | Next we have a model with high bias and high variance. We simulate through
93 | a combination of the techniques from the high bias low variance example and
94 | the low bias high variance example and train another neural network.
95 |
96 | .. figure:: images/high_bias_high_variance.png
97 | :align: center
98 | :alt: alternate text
99 | :figclass: align-center
100 |
101 | Five sets of mean squared error results from the test set from the five
102 | bootstrap sample trainings of the model. Notice the model error has
103 | different distributions among the trials and is not centered around 0.
104 |
105 | Finally we have a model with low bias and low variance. This is a simple
106 | linear regression model with no modifications to the training or test labels.
107 |
108 | .. figure:: images/low_bias_low_variance.png
109 | :align: center
110 | :alt: alternate text
111 | :figclass: align-center
112 |
113 | Five sets of mean squared error results from the test set from the five
114 | bootstrap sample trainings of the model. Notice the model error is very
115 | consistent among the trials and centers mainly around 0.
116 |
117 | ***************************
118 | Bias-Variance Decomposition
119 | ***************************
120 |
121 | .. currentmodule:: mvtk.bias_variance
122 |
123 | There are formulas for breaking down total model error into three parts: bias,
124 | variance, and noise. This can be applied to both regression problem loss
125 | functions (mean squared error) and classification problem loss functions
126 | (0-1 loss). In a paper by Pedro Domingos, a method of unified
127 | decomposition was proposed for both types of problems :cite:`domingos2000decomp`.
128 |
129 | First lets define :math:`y` as a single prediction, :math:`D` as the set of
130 | training sets used to train the models, :math:`Y` as the set of predictions
131 | from the models trained on :math:`D`, and a loss function :math:`L` that
132 | calculates the error between our prediction :math:`y` and the correct
133 | prediction.
134 | The main prediction :math:`y_m` is the smallest average loss for a prediction
135 | when compared to the set of predictions :math:`Y`. The main prediction is
136 | the mean of :math:`Y` for mean squared error and the mode of :math:`Y` for
137 | 0-1 loss :cite:`domingos2000decomp`.
138 |
139 | Bias can now be defined for a single example :math:`x` over the set of models
140 | trained on :math:`D` as the loss calculated between the main prediction
141 | :math:`y_m` and the correct prediction :math:`y_*` :cite:`domingos2000decomp`.
142 |
143 | .. math::
144 | B(x) = L(y_*,y_m)
145 |
146 | Variance can now be defined for a single example :math:`x` over the set of
147 | models trained on :math:`D` as the average loss calculated between all predictions
148 | and the main prediction :math:`y_m` :cite:`domingos2000decomp`.
149 |
150 | .. math::
151 | V(x) = E_D[L(y_m, y)]
152 |
153 | We will need to take the average of the bias over all examples as
154 | :math:`E_x[B(x)]` and the average of the variance over all examples as
155 | :math:`E_x[V(x)]` :cite:`domingos2000decomp`.
156 |
157 | With :math:`N(x)` representing the irreducible error from observation noise, we
158 | can decompose the average expected loss as :cite:`domingos2000decomp`
159 |
160 | .. math::
161 | E_x[N(x)] + E_x[B(x)] + E_x[cV(x)]
162 |
163 | In other words, the average loss over all examples is equal to the noise plus the
164 | average bias plus the net variance (the :math:`c` factor included with the variance
165 | when calculating average variance gives us the net variance).
166 |
167 | .. note::
168 | We are generalizing the actual value of :math:`N(x)`, as the Model Validation
169 | Toolkit's implementation of bias-variance decomposition does not include noise
170 | in the average expected loss. This noise represents error in the actual data
171 | and not error related to the model itself. If you would like to dive deeper
172 | into the noise representation, please consult the `Pedro Domingos paper
173 | `_.
174 |
175 | For mean squared loss functions, :math:`c = 1`, meaning that average variance
176 | is equal to net variance.
177 |
178 | For zero-one loss functions, :math:`c = 1` when :math:`y_m = y_*` otherwise
179 | :math:`c = -P_D(y = y_* | y != y_m)`. :cite:`domingos2000decomp` In other words,
180 | :math:`c` is 1 when the main prediction is the correct prediction. If the main
181 | prediction is not the correct prediction, then :math:`c` is equal to the
182 | probability of a single prediction being the correct prediction given that the
183 | single prediction is not the main prediction.
184 |
185 | Usage
186 | =====
187 |
188 | :meth:`bias_variance_compute` will return the average loss, average bias, average
189 | variance, and net variance for an estimator trained and tested over a specified number
190 | of training sets. This was inspired and modeled after Sebastian Raschka's
191 | `bias_variance_decomp
192 | `_
193 | function :cite:`mlxtenddecomp`.
194 | We use the `bootstrapping `_
195 | method to produce our sets of training data from the original training set. By default
196 | it will use mean squared error as the loss function, but it will accept the following
197 | functions for calculating loss.
198 |
199 | * :meth:`bias_variance_mse` for mean squared error
200 | * :meth:`bias_variance_0_1_loss` for 0-1 loss
201 |
202 | Since :meth:`bias_variance_compute` trains an estimator over multiple iterations, it also
203 | expects the estimator to be wrapped in a class that extends the
204 | :class:`estimators.EstimatorWrapper` class, which provides fit and predict methods
205 | that not all estimator implementations conform to. The following estimator wrappers are
206 | provided.
207 |
208 | * :class:`estimators.PyTorchEstimatorWrapper` for `PyTorch `_
209 | * :class:`estimators.SciKitLearnEstimatorWrapper` for `Scikit-Learn `_
210 | * :class:`estimators.TensorFlowEstimatorWrapper` for `TensorFlow `_
211 |
212 | :meth:`bias_variance_compute` works well for smaller data sets and less complex models, but what
213 | happens when you have a very large set of data, a very complex model, or both?
214 | :meth:`bias_variance_compute_parallel` does the same calculation, but leverages `Ray
215 | `_ for parallelization of bootstrapping, training, and predicting.
216 | This allows for faster calculations using computations over a distributed architecture.
217 |
218 | .. topic:: Tutorials:
219 |
220 | * :doc:`Bias-Variance Visualization `
221 | * :doc:`Bias-Variance Regression `
222 | * :doc:`Bias-Variance Classification `
223 |
224 | .. bibliography:: refs.bib
225 | :cited:
226 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------