├── divergence ├── tests │ ├── __init__.py │ ├── test_intersection.py │ ├── test_continuous.py │ └── test_discrete.py ├── base.py ├── __init__.py ├── discrete.py └── continuous.py ├── LICENSE ├── README.md ├── .gitignore ├── setup.py └── notebooks └── Divergence.ipynb /divergence/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /divergence/tests/test_intersection.py: -------------------------------------------------------------------------------- 1 | from divergence import intersection 2 | 3 | 4 | def test_non_overlapping_1(): 5 | assert intersection(1, 2, 3, 4) is None 6 | 7 | 8 | def test_non_overlapping_2(): 9 | assert intersection(3, 4, 1, 2) is None 10 | 11 | 12 | def test_sub_interval(): 13 | assert intersection(1, 4, 2, 3) == (2, 3) 14 | 15 | 16 | def test_overlap(): 17 | assert intersection(2, 4, 3, 5) == (3, 4) 18 | 19 | 20 | def test_sub_overlap_2(): 21 | assert intersection(3, 5, 2, 4) == (3, 4) 22 | -------------------------------------------------------------------------------- /divergence/base.py: -------------------------------------------------------------------------------- 1 | import typing as tp 2 | 3 | import cocos.numerics as cn 4 | import numba 5 | import numpy as np 6 | 7 | 8 | def _select_vectorized_log_fun_for_base(base: float, gpu: bool = False) -> tp.Callable: 9 | if base == 2: 10 | if gpu: 11 | return cn.log2 12 | else: 13 | return np.log2 14 | if base == np.e: 15 | if gpu: 16 | return cn.log 17 | else: 18 | return np.log 19 | if base == 10: 20 | if gpu: 21 | return cn.log10 22 | else: 23 | return np.log10 24 | 25 | raise ValueError('base not supported') 26 | 27 | 28 | spec = [('base', numba.float64)] 29 | 30 | 31 | @numba.experimental.jitclass(spec) 32 | class Logarithm: 33 | def __init__(self, base): 34 | self.base = base 35 | 36 | def log(self, x): 37 | if self.base == 2: 38 | return np.log2(x) 39 | if self.base == np.e: 40 | return np.log(x) 41 | if self.base == 10: 42 | return np.log10(x) 43 | 44 | raise ValueError('base not supported') 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Michael Nowotny 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Divergence 2 | Divergence is a Python package to compute statistical measures of entropy and divergence from probability distributions and samples. 3 | 4 | The following functionality is provided: 5 | * (Information) Entropy [1], [2] 6 | * Cross Entropy: [3] 7 | * Relative Entropy or Kullback-Leibler (KL-) Divergence [4], [5] 8 | * Jensen-Shannon Divergence [6] 9 | * Joint Entropy [7] 10 | * Conditional Entropy [8] 11 | * Mutual Information [9] 12 | 13 | The units in which these entropy and divergence measures are calculated can be specified by the user. 14 | This is achieved by setting the argument `base`, to `2.0`, `10.0`, or `np.e`. 15 | 16 | In a Bayesian context, relative entropy can be used as a measure of the information gained by moving 17 | from a prior distribution `q` to a posterior distribution `p`. 18 | 19 | ## Installation 20 | 21 |
22 |     pip install divergence
23 | 
24 | 25 | ## Examples 26 | See the Jupyter notebook [Divergence](https://github.com/michaelnowotny/divergence/blob/master/notebooks/Divergence.ipynb). 27 | 28 | ## References: 29 | #### [1] https://en.wikipedia.org/wiki/Entropy_(information_theory) 30 | #### [2] Shannon, Claude Elwood (July 1948). "A Mathematical Theory of Communication". Bell System Technical Journal. 27 (3): 379–423 31 | #### [3] https://en.wikipedia.org/wiki/Cross_entropy 32 | #### [4] https://en.wikipedia.org/wiki/Kullback–Leibler_divergence 33 | #### [5] Kullback, S.; Leibler, R.A. (1951). "On information and sufficiency". Annals of Mathematical Statistics. 22 (1): 79–86 34 | #### [6] https://en.wikipedia.org/wiki/Jensen–Shannon_divergence 35 | #### [7] https://en.wikipedia.org/wiki/Joint_entropy 36 | #### [8] https://en.wikipedia.org/wiki/Conditional_entropy 37 | #### [9] https://en.wikipedia.org/wiki/Mutual_information 38 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | pip-wheel-metadata/ 25 | share/python-wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | *.py,cover 52 | .hypothesis/ 53 | .pytest_cache/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | .python-version 87 | 88 | # pipenv 89 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 90 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 91 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 92 | # install all needed dependencies. 93 | #Pipfile.lock 94 | 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 96 | __pypackages__/ 97 | 98 | # Celery stuff 99 | celerybeat-schedule 100 | celerybeat.pid 101 | 102 | # SageMath parsed files 103 | *.sage.py 104 | 105 | # Environments 106 | .env 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pip install twine 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | 15 | # Package meta-data. 16 | NAME = 'divergence' 17 | DESCRIPTION = 'Information Theoretic Measures of Entropy and Divergence' 18 | URL = 'https://github.com/michaelnowotny/divergence' 19 | EMAIL = 'nowotnym@gmail.com' 20 | AUTHOR = 'Michael Christoph Nowotny' 21 | REQUIRES_PYTHON = '>=3.6.0' 22 | VERSION = "0.4.2" 23 | 24 | # What packages are required for this module to be executed? 25 | REQUIRED = [ 26 | "cocos", 27 | "cubature", 28 | "numba", 29 | "numpy", 30 | "pytest", 31 | "scipy", 32 | "statsmodels" 33 | ] 34 | 35 | # What packages are optional? 36 | EXTRAS = { 37 | # 'fancy feature': ['django'], 38 | } 39 | 40 | # The rest you shouldn't have to touch too much :) 41 | # ------------------------------------------------ 42 | # Except, perhaps the License and Trove Classifiers! 43 | # If you do change the License, remember to change the Trove Classifier for that! 44 | 45 | here = os.path.abspath(os.path.dirname(__file__)) 46 | 47 | # Import the README and use it as the long-description. 48 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 49 | try: 50 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 51 | long_description = '\n' + f.read() 52 | except FileNotFoundError: 53 | long_description = DESCRIPTION 54 | 55 | # Load the package's __version__.py module as a dictionary. 56 | about = {} 57 | if not VERSION: 58 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 59 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 60 | exec(f.read(), about) 61 | else: 62 | about['__version__'] = VERSION 63 | 64 | 65 | class UploadCommand(Command): 66 | """Support setup.py upload.""" 67 | 68 | description = 'Build and publish the package.' 69 | user_options = [] 70 | 71 | @staticmethod 72 | def status(s): 73 | """Prints things in bold.""" 74 | print('\033[1m{0}\033[0m'.format(s)) 75 | 76 | def initialize_options(self): 77 | pass 78 | 79 | def finalize_options(self): 80 | pass 81 | 82 | def run(self): 83 | try: 84 | self.status('Removing previous builds…') 85 | rmtree(os.path.join(here, 'dist')) 86 | except OSError: 87 | pass 88 | 89 | self.status('Building Source and Wheel (universal) distribution…') 90 | os.system( 91 | '{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 92 | 93 | self.status('Uploading the package to PyPI via Twine…') 94 | os.system('twine upload dist/*') 95 | 96 | self.status('Pushing git tags…') 97 | os.system('git tag v{0}'.format(about['__version__'])) 98 | os.system('git push --tags') 99 | 100 | sys.exit() 101 | 102 | 103 | # Where the magic happens: 104 | setup( 105 | name=NAME, 106 | version=VERSION, 107 | # version=versioneer.get_version(), 108 | description=DESCRIPTION, 109 | long_description=long_description, 110 | long_description_content_type='text/markdown', 111 | author=AUTHOR, 112 | author_email=EMAIL, 113 | python_requires=REQUIRES_PYTHON, 114 | url=URL, 115 | packages=find_packages(exclude=('examples', )), 116 | # If your package is a single module, use this instead of 'packages': 117 | # py_modules=['mypackage'], 118 | 119 | # entry_points={ 120 | # 'console_scripts': ['mycli=mymodule:cli'], 121 | # }, 122 | install_requires=REQUIRED, 123 | extras_require=EXTRAS, 124 | include_package_data=True, 125 | license='MIT', 126 | classifiers=[ 127 | # Trove classifiers 128 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 129 | 'License :: OSI Approved :: MIT License', 130 | 'Programming Language :: Python', 131 | 'Programming Language :: Python :: 3', 132 | 'Programming Language :: Python :: 3.6', 133 | 'Programming Language :: Python :: 3.7', 134 | 'Programming Language :: Python :: 3.8', 135 | 'Programming Language :: Python :: Implementation :: CPython', 136 | ], 137 | # $ setup.py publish support. 138 | cmdclass={ 139 | 'upload': UploadCommand, 140 | }, 141 | ) -------------------------------------------------------------------------------- /divergence/__init__.py: -------------------------------------------------------------------------------- 1 | from .continuous import * 2 | from .discrete import ( 3 | discrete_entropy, 4 | discrete_relative_entropy, 5 | discrete_cross_entropy, 6 | discrete_jensen_shannon_divergence, 7 | discrete_mutual_information, 8 | discrete_joint_entropy, 9 | discrete_conditional_entropy_of_y_given_x 10 | ) 11 | 12 | 13 | def entropy_from_samples(sample: np.ndarray, 14 | base: float = np.e, 15 | discrete: bool = False) -> float: 16 | if discrete: 17 | return discrete_entropy(sample=sample, 18 | base=base) 19 | else: 20 | return continuous_entropy_from_sample(sample=sample, 21 | base=base) 22 | 23 | 24 | def cross_entropy_from_samples(sample_p: np.ndarray, 25 | sample_q: np.ndarray, 26 | base: float = np.e, 27 | discrete: bool = False) -> float: 28 | if discrete: 29 | return discrete_cross_entropy(sample_p=sample_p, 30 | sample_q=sample_q, 31 | base=base) 32 | else: 33 | return continuous_cross_entropy_from_sample(sample_p=sample_p, 34 | sample_q=sample_q, 35 | base=base) 36 | 37 | 38 | def relative_entropy_from_samples(sample_p: np.ndarray, 39 | sample_q: np.ndarray, 40 | base: float = np.e, 41 | discrete: bool = False) -> float: 42 | if discrete: 43 | return discrete_relative_entropy(sample_p=sample_p, 44 | sample_q=sample_q, 45 | base=base) 46 | else: 47 | return continuous_relative_entropy_from_sample(sample_p=sample_p, 48 | sample_q=sample_q, 49 | base=base) 50 | 51 | 52 | def jensen_shannon_divergence_from_samples(sample_p: np.ndarray, 53 | sample_q: np.ndarray, 54 | base: float = np.e, 55 | discrete: bool = False) -> float: 56 | if discrete: 57 | return discrete_jensen_shannon_divergence(sample_p=sample_p, 58 | sample_q=sample_q, 59 | base=base) 60 | else: 61 | return continuous_jensen_shannon_divergence_from_sample(sample_p=sample_p, 62 | sample_q=sample_q, 63 | base=base) 64 | 65 | 66 | def mutual_information_from_samples(sample_x: np.ndarray, 67 | sample_y: np.ndarray, 68 | base: float = np.e, 69 | discrete: bool = False) -> float: 70 | if discrete: 71 | return discrete_mutual_information(sample_x=sample_x, 72 | sample_y=sample_y, 73 | base=base) 74 | else: 75 | return continuous_mutual_information_from_samples(sample_x=sample_x, 76 | sample_y=sample_y, 77 | base=base) 78 | 79 | 80 | def joint_entropy_from_samples(sample_x: np.ndarray, 81 | sample_y: np.ndarray, 82 | base: float = np.e, 83 | discrete: bool = False) -> float: 84 | if discrete: 85 | return discrete_joint_entropy(sample_x=sample_x, 86 | sample_y=sample_y, 87 | base=base) 88 | else: 89 | return continuous_joint_entropy_from_samples(sample_x=sample_x, 90 | sample_y=sample_y, 91 | base=base) 92 | 93 | 94 | def conditional_entropy_from_samples(sample_x: np.ndarray, 95 | sample_y: np.ndarray, 96 | base: float = np.e, 97 | discrete: bool = False) -> float: 98 | if discrete: 99 | return discrete_conditional_entropy_of_y_given_x(sample_x=sample_x, 100 | sample_y=sample_y, 101 | base=base) 102 | else: 103 | return continuous_conditional_entropy_from_samples(sample_x=sample_x, 104 | sample_y=sample_y, 105 | base=base) 106 | -------------------------------------------------------------------------------- /divergence/tests/test_continuous.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pytest 3 | import scipy as sp 4 | import statsmodels.api as sm 5 | import typing as tp 6 | 7 | from divergence import * 8 | from divergence.base import _select_vectorized_log_fun_for_base 9 | 10 | 11 | def entropy_of_normal_distribution(sigma: float, 12 | log_fun: tp.Callable = np.log) \ 13 | -> float: 14 | return 0.5 * (1.0 + log_fun(2 * np.pi * sigma**2)) 15 | 16 | 17 | def relative_entropy_between_two_normal_distributions(mu_1: float, 18 | sigma_1: float, 19 | mu_2: float, 20 | sigma_2: float, 21 | log_fun: tp.Callable = np.log) \ 22 | -> float: 23 | return ((mu_1 - mu_2)**2 + sigma_1**2 - sigma_2**2) / (2 * sigma_2**2) + \ 24 | log_fun(sigma_2/sigma_1) 25 | 26 | 27 | def cross_entropy_between_two_normal_distributions(mu_1: float, 28 | sigma_1: float, 29 | mu_2: float, 30 | sigma_2: float, 31 | log_fun: tp.Callable = np.log) \ 32 | -> float: 33 | return entropy_of_normal_distribution(sigma_1, log_fun=log_fun) + \ 34 | relative_entropy_between_two_normal_distributions(mu_1=mu_1, 35 | sigma_1=sigma_1, 36 | mu_2=mu_2, 37 | sigma_2=sigma_2, 38 | log_fun=log_fun) 39 | 40 | 41 | def mutual_information_for_bivariate_normal_distribution(rho: float) -> float: 42 | return - 0.5 * np.log(1.0 - rho**2) 43 | 44 | 45 | def generate_normal_sample(mu: float, 46 | sigma: float, 47 | n: int, 48 | antithetic: bool = False) -> np.ndarray: 49 | z = np.random.randn(n) 50 | if antithetic: 51 | z = np.hstack((z, -z)) 52 | 53 | return mu + sigma * z 54 | 55 | 56 | # fix random seed for reproducibility 57 | np.random.seed(42) 58 | 59 | # set parameters of the normal distributions p and q 60 | mu_p = 2 61 | sigma_p = 3 62 | mu_q = 1 63 | sigma_q = 2 64 | 65 | # draw samples from each normal distribution 66 | n = 10000 67 | 68 | sample_p = generate_normal_sample(mu_p, sigma_p, n=n, antithetic=True) 69 | sample_q = generate_normal_sample(mu_q, sigma_q, n=n, antithetic=True) 70 | 71 | # fit a non-parametric density estimate for both distributions 72 | kde_p = sm.nonparametric.KDEUnivariate(sample_p) 73 | kde_q = sm.nonparametric.KDEUnivariate(sample_q) 74 | kde_p.fit() 75 | kde_q.fit() 76 | 77 | # construct exact normal densities for p and q 78 | pdf_p = lambda x: sp.stats.norm.pdf(x, mu_p, sigma_p) 79 | pdf_q = lambda x: sp.stats.norm.pdf(x, mu_q, sigma_q) 80 | 81 | # compute support for kernel density estimates 82 | p_min = min(kde_p.support) 83 | p_max = max(kde_p.support) 84 | q_min = min(kde_q.support) 85 | q_max = max(kde_q.support) 86 | combined_min = min(p_min, q_min) 87 | combined_max = max(p_max, q_max) 88 | 89 | 90 | @pytest.mark.parametrize("sigma, sample", ((sigma_p, sample_p), (sigma_q, sample_q))) 91 | def test_entropy(sigma: float, sample: np.ndarray, base: float = np.e): 92 | log_fun = _select_vectorized_log_fun_for_base(base) 93 | 94 | assert np.isclose(entropy_from_samples(sample, base=base, discrete=False), 95 | entropy_of_normal_distribution(sigma, log_fun=log_fun), 96 | rtol=1e-2, 97 | atol=1e-2) 98 | 99 | 100 | def test_cross_entropy(base: float = np.e): 101 | log_fun = _select_vectorized_log_fun_for_base(base) 102 | 103 | assert np.isclose(cross_entropy_from_samples(sample_p, 104 | sample_q, 105 | base=base, 106 | discrete=False), 107 | cross_entropy_between_two_normal_distributions(mu_p, 108 | sigma_p, 109 | mu_q, 110 | sigma_q, 111 | log_fun=log_fun), 112 | rtol=1e-1, 113 | atol=1e-1) 114 | 115 | 116 | def test_relative_entropy(base: float = np.e): 117 | log_fun = _select_vectorized_log_fun_for_base(base) 118 | 119 | assert np.isclose(relative_entropy_from_samples(sample_p, 120 | sample_q, 121 | base=base, 122 | discrete=False), 123 | relative_entropy_between_two_normal_distributions(mu_p, 124 | sigma_p, 125 | mu_q, 126 | sigma_q, 127 | log_fun=log_fun), 128 | rtol=1e-1, 129 | atol=1e-1) 130 | 131 | 132 | # set parameters of the normal distributions x and y 133 | mu_x = 2 134 | sigma_x = 3 135 | mu_y = 1 136 | sigma_y = 2 137 | rho = 0.5 138 | 139 | # draw 1000 samples from each normal distribution 140 | n = 10000 141 | z = np.random.randn(n) 142 | sample_x = mu_x + sigma_x * z 143 | sample_y = mu_y + sigma_y * (rho * z + np.sqrt(1.0 - rho**2) * np.random.randn(n)) 144 | 145 | # fit a non-parametric density estimate for both distributions 146 | kde_x = sm.nonparametric.KDEUnivariate(sample_x) 147 | kde_y = sm.nonparametric.KDEUnivariate(sample_y) 148 | kde_x.fit() # Estimate the densities 149 | kde_y.fit() # Estimate the densities 150 | kde_xy = sp.stats.gaussian_kde([sample_x, sample_y]) 151 | 152 | # construct exact normal densities for x and y 153 | pdf_x = lambda x: sp.stats.norm.pdf(x, mu_x, sigma_x) 154 | pdf_y = lambda y: sp.stats.norm.pdf(y, mu_y, sigma_y) 155 | pdf_xy = sp.stats.multivariate_normal(mean=[mu_x, mu_y], 156 | cov=[[sigma_x**2, rho * sigma_x * sigma_y], 157 | [rho * sigma_x * sigma_y, sigma_y**2]]).pdf 158 | 159 | # # compute support for kernel density estimates 160 | x_min = min(kde_x.support) 161 | x_max = max(kde_x.support) 162 | y_min = min(kde_y.support) 163 | y_max = max(kde_y.support) 164 | 165 | 166 | @pytest.fixture 167 | def mutual_information_from_bivariate_normal_samples() -> float: 168 | return continuous_mutual_information_from_samples(sample_x=sample_x, 169 | sample_y=sample_y) 170 | 171 | 172 | @pytest.fixture 173 | def joint_entropy_of_x_and_y() -> float: 174 | return joint_entropy_from_samples(sample_x, sample_y) 175 | 176 | 177 | @pytest.fixture 178 | def conditional_entropy_of_y_given_x_from_bivariate_normal_samples() -> float: 179 | return conditional_entropy_from_samples(sample_x, sample_y) 180 | 181 | 182 | @pytest.fixture 183 | def conditional_entropy_of_x_given_y_from_bivariate_normal_samples() -> float: 184 | return conditional_entropy_from_samples(sample_y, sample_x) 185 | 186 | 187 | def test_mutual_information(mutual_information_from_bivariate_normal_samples): 188 | theoretical_mutual_information = mutual_information_for_bivariate_normal_distribution(rho) 189 | 190 | assert np.isclose(theoretical_mutual_information, 191 | mutual_information_from_bivariate_normal_samples, 192 | rtol=1e-1, 193 | atol=1e-1) 194 | 195 | 196 | def test_joint_entropy_via_conditional_entropy_of_y_given_x( 197 | joint_entropy_of_x_and_y, 198 | conditional_entropy_of_y_given_x_from_bivariate_normal_samples): 199 | np.isclose(entropy_from_samples(sample_x) + 200 | conditional_entropy_of_y_given_x_from_bivariate_normal_samples, 201 | joint_entropy_of_x_and_y, 202 | rtol=1e-2, 203 | atol=1e-3) 204 | 205 | 206 | def test_joint_entropy_via_conditional_entropy_of_x_given_y( 207 | joint_entropy_of_x_and_y, 208 | conditional_entropy_of_x_given_y_from_bivariate_normal_samples): 209 | np.isclose(entropy_from_samples(sample_y) + 210 | conditional_entropy_of_x_given_y_from_bivariate_normal_samples, 211 | joint_entropy_of_x_and_y, 212 | rtol=1e-2, 213 | atol=1e-3) 214 | -------------------------------------------------------------------------------- /divergence/tests/test_discrete.py: -------------------------------------------------------------------------------- 1 | import numbers 2 | import numpy as np 3 | import pytest 4 | import scipy as sp 5 | import typing as tp 6 | 7 | from divergence.base import _select_vectorized_log_fun_for_base 8 | 9 | from divergence.discrete import ( 10 | discrete_entropy, 11 | _construct_frequencies_for_one_sample, 12 | _construct_frequencies_for_two_samples, 13 | discrete_relative_entropy, 14 | _construct_unique_combinations_and_counts_from_two_samples, 15 | _get_index_for_combination, 16 | _get_count_for_combination, 17 | _get_index_of_value_in_1d_array, 18 | _get_count_for_value, 19 | discrete_mutual_information, 20 | discrete_joint_entropy, 21 | discrete_conditional_entropy_of_y_given_x 22 | ) 23 | 24 | 25 | multinomial_sample_q_1 = np.array([1, 2, 3, 2, 3, 3, 3, 2, 1, 1]) 26 | multinomial_sample_p_1 = np.array([2, 2, 3, 2, 3]) 27 | expected_frequencies_q_1 = np.array([0.3, 0.4]) 28 | expected_frequencies_p_1 = np.array([0.6, 0.4]) 29 | 30 | multinomial_sample_q_2 = np.array([1, 2, 3, 2, 3, 3, 3, 2, 1, 1]) 31 | multinomial_sample_p_2 = np.array([1, 2, 3, 2, 3]) 32 | expected_frequencies_q_2 = np.array([0.3, 0.3, 0.4]) 33 | expected_frequencies_p_2 = np.array([0.2, 0.4, 0.4]) 34 | 35 | 36 | def _get_base_from_log_fun(log_fun: tp.Callable): 37 | if log_fun is np.log: 38 | base = np.e 39 | elif log_fun is np.log2: 40 | base = 2 41 | elif log_fun is np.log10: 42 | base = 10 43 | else: 44 | raise ValueError('log_fun is not supported') 45 | 46 | return base 47 | 48 | 49 | def discrete_entropy_scipy(sample: np.ndarray, log_fun: tp.Callable = np.log) -> float: 50 | base = _get_base_from_log_fun(log_fun) 51 | return sp.stats.entropy(_construct_frequencies_for_one_sample(sample), base=base) 52 | 53 | 54 | @pytest.mark.parametrize("sample", (multinomial_sample_q_1, 55 | multinomial_sample_p_1, 56 | multinomial_sample_q_2, 57 | multinomial_sample_p_2)) 58 | @pytest.mark.parametrize("base", (np.e, 2.0, 10.0)) 59 | def test_entropy(sample: np.ndarray, base: float): 60 | log_fun = _select_vectorized_log_fun_for_base(base) 61 | entropy_from_divergence = discrete_entropy(sample=sample, base=base) 62 | entropy_from_scipy = discrete_entropy_scipy(sample=sample, log_fun=log_fun) 63 | assert np.isclose(entropy_from_divergence, entropy_from_scipy) 64 | 65 | 66 | @pytest.mark.parametrize("sample_p, sample_q, expected_frequencies_p, expected_frequencies_q", 67 | [ 68 | (multinomial_sample_p_1, multinomial_sample_q_1, expected_frequencies_p_1, expected_frequencies_q_1), 69 | (multinomial_sample_p_2, multinomial_sample_q_2, expected_frequencies_p_2, expected_frequencies_q_2) 70 | ]) 71 | def test_construct_frequencies(sample_p: np.ndarray, 72 | sample_q: np.ndarray, 73 | expected_frequencies_p: np.ndarray, 74 | expected_frequencies_q: np.ndarray): 75 | combined_sample = np.hstack((sample_p, sample_q)) 76 | unique_combined = np.unique(combined_sample) 77 | 78 | unique_q, counts_q = np.unique(sample_q, return_counts=True) 79 | frequencies_q = counts_q / len(sample_q) 80 | 81 | unique_p, counts_p = np.unique(sample_p, return_counts=True) 82 | frequencies_p = counts_p / len(sample_p) 83 | 84 | combined_frequencies_p, combined_frequencies_q = \ 85 | _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p, 86 | sorted_q_realizations=unique_q, 87 | sorted_q_frequencies=frequencies_q, 88 | sorted_p_frequencies=frequencies_p, 89 | sorted_combined_realizations=unique_combined) 90 | 91 | assert np.allclose(combined_frequencies_p, expected_frequencies_p) 92 | assert np.allclose(combined_frequencies_q, expected_frequencies_q) 93 | 94 | 95 | def test_construct_frequencies_error_q_zero_and_p_nonzero(): 96 | sample_q = np.array([2, 2, 3, 2, 3, 3, 3, 2, 2, 2]) 97 | sample_p = np.array([1, 2, 3, 2, 3]) 98 | 99 | combined_sample = np.hstack((sample_p, sample_q)) 100 | unique_combined = np.unique(combined_sample) 101 | 102 | unique_q, counts_q = np.unique(sample_q, return_counts=True) 103 | frequencies_q = counts_q / len(sample_q) 104 | 105 | unique_p, counts_p = np.unique(sample_p, return_counts=True) 106 | frequencies_p = counts_p / len(sample_p) 107 | 108 | with pytest.raises(ValueError): 109 | combined_frequencies_p, combined_frequencies_q = \ 110 | _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p, 111 | sorted_q_realizations=unique_q, 112 | sorted_q_frequencies=frequencies_q, 113 | sorted_p_frequencies=frequencies_p, 114 | sorted_combined_realizations=unique_combined) 115 | 116 | 117 | def _discrete_relative_entropy_slow(sample_p: np.ndarray, 118 | sample_q: np.ndarray, 119 | log_fun: tp.Callable = np.log): 120 | combined_sample = np.hstack((sample_p, sample_q)) 121 | unique_combined = np.unique(combined_sample) 122 | 123 | unique_q, counts_q = np.unique(sample_q, return_counts=True) 124 | frequencies_q = counts_q / len(sample_q) 125 | realization_to_frequency_dict_q = dict(zip(unique_q, frequencies_q)) 126 | 127 | unique_p, counts_p = np.unique(sample_p, return_counts=True) 128 | frequencies_p = counts_p / len(sample_p) 129 | realization_to_frequency_dict_p = dict(zip(unique_p, frequencies_p)) 130 | 131 | combined_frequencies_q = np.array([realization_to_frequency_dict_q.get(realization, 0.0) 132 | for realization 133 | in unique_combined]) 134 | 135 | combined_frequencies_p = np.array([realization_to_frequency_dict_p.get(realization, 0.0) 136 | for realization 137 | in unique_combined]) 138 | 139 | base = _get_base_from_log_fun(log_fun) 140 | # if log_fun is np.log: 141 | # base = np.e 142 | # elif log_fun is np.log2: 143 | # base = 2 144 | # elif log_fun is np.log10: 145 | # base = 10 146 | # else: 147 | # raise ValueError('log_fun is not supported') 148 | 149 | return sp.stats.entropy(pk=combined_frequencies_p, qk=combined_frequencies_q, base=base) 150 | 151 | 152 | @pytest.mark.parametrize("sample_p, sample_q", ((multinomial_sample_p_1, multinomial_sample_q_1), 153 | (multinomial_sample_p_2, multinomial_sample_q_2))) 154 | @pytest.mark.parametrize("base", (np.e, 2.0, 10.0)) 155 | def test_compare_slow_and_fast_implementations_of_relative_entropy(sample_p: np.ndarray, 156 | sample_q: np.ndarray, 157 | base: float): 158 | log_fun = _select_vectorized_log_fun_for_base(base) 159 | 160 | relative_entropy_from_slow_calculation = \ 161 | _discrete_relative_entropy_slow(sample_p=sample_p, 162 | sample_q=sample_q, 163 | log_fun=log_fun) 164 | 165 | relative_entropy_from_fast_calculation = \ 166 | discrete_relative_entropy(sample_p=sample_p, 167 | sample_q=sample_q, 168 | base=base) 169 | 170 | assert np.isclose(relative_entropy_from_slow_calculation, 171 | relative_entropy_from_fast_calculation) 172 | 173 | 174 | @pytest.fixture 175 | def sample_x() -> np.ndarray: 176 | return np.array([1, 1, 3, 1, 2, 3]) 177 | 178 | 179 | @pytest.fixture 180 | def sample_y() -> np.ndarray: 181 | return np.array([1, 1, 1, 3, 2, 1]) 182 | 183 | 184 | def test_construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y): 185 | unique_combinations, counts = \ 186 | _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y) 187 | 188 | print('unique combinations:') 189 | print(unique_combinations) 190 | 191 | print('counts') 192 | print(counts) 193 | 194 | assert np.all(unique_combinations == np.array([[1, 1], [1, 3], [2, 2], [3, 1]])) 195 | assert np.all(counts == np.array([2, 1, 1, 2])) 196 | 197 | 198 | @pytest.mark.parametrize('combination, index', [(np.array([1, 1]), 0), 199 | (np.array([1, 3]), 1), 200 | (np.array([2, 2]), 2), 201 | (np.array([3, 1]), 3)]) 202 | def test_get_index_for_combination(combination: np.ndarray, 203 | index: int, 204 | sample_x: np.ndarray, 205 | sample_y: np.ndarray): 206 | unique_combinations, counts = \ 207 | _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y) 208 | 209 | assert index == _get_index_for_combination(combination=combination, 210 | unique_combinations=unique_combinations) 211 | 212 | 213 | @pytest.mark.parametrize('combination, count', [(np.array([1, 1]), 2), 214 | (np.array([1, 3]), 1), 215 | (np.array([2, 2]), 1), 216 | (np.array([3, 1]), 2)]) 217 | def test_get_count_for_combination(combination: np.ndarray, 218 | count: int, 219 | sample_x: np.ndarray, 220 | sample_y: np.ndarray): 221 | unique_combinations, counts = \ 222 | _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y) 223 | 224 | assert count == _get_count_for_combination(combination=combination, 225 | unique_combinations=unique_combinations, 226 | counts=counts) 227 | 228 | 229 | @pytest.mark.parametrize('value, index', [(1, 0), (2, 1), (3, 2)]) 230 | def test_get_index_for_value(value: numbers.Number, 231 | index: int, 232 | sample_x: np.ndarray): 233 | unique_values = np.unique(sample_x) 234 | 235 | assert index == _get_index_of_value_in_1d_array(value, unique_values) 236 | 237 | 238 | @pytest.mark.parametrize('value, count', [(1, 3), (2, 1), (3, 2)]) 239 | def test_get_count_for_value(value: numbers.Number, 240 | count: int, 241 | sample_x: np.ndarray): 242 | unique_values, counts = np.unique(sample_x, return_counts=True) 243 | 244 | assert count == _get_count_for_value(value, 245 | unique_values=unique_values, 246 | counts=counts) 247 | 248 | 249 | @pytest.mark.parametrize('sample', [np.array([1, 1, 3, 1, 2, 3]), 250 | np.array([1, 1, 1, 3, 2, 1]), 251 | np.array([1, 1, 1, 1, 1, 1])]) 252 | def test_compare_mutual_information_of_self_with_entropy(sample): 253 | assert discrete_entropy(sample) == discrete_mutual_information(sample, sample) 254 | 255 | 256 | @pytest.mark.parametrize('sample_x, sample_y', 257 | [(np.array([1, 1, 3, 1, 2, 3]), np.array([1, 1, 1, 3, 2, 1])), 258 | (np.array([1, 1, 1, 1, 1, 1]), np.array([2, 2, 2, 2, 2, 2]))]) 259 | def test_symmetry_of_mutual_information(sample_x, sample_y): 260 | assert discrete_mutual_information(sample_x, sample_y) == \ 261 | discrete_mutual_information(sample_y, sample_x) 262 | 263 | 264 | def test_discrete_conditional_entropy(sample_x: np.ndarray, sample_y: np.ndarray): 265 | joint_entropy = discrete_joint_entropy(sample_x=sample_x, sample_y=sample_y) 266 | entropy_x = discrete_entropy(sample_x) 267 | entropy_y = discrete_entropy(sample_y) 268 | conditional_entropy_of_y_given_x = \ 269 | discrete_conditional_entropy_of_y_given_x(sample_x=sample_x, 270 | sample_y=sample_y) 271 | 272 | conditional_entropy_of_x_given_y = \ 273 | discrete_conditional_entropy_of_y_given_x(sample_x=sample_y, 274 | sample_y=sample_x) 275 | 276 | assert np.isclose(entropy_x - conditional_entropy_of_x_given_y, 277 | entropy_y - conditional_entropy_of_y_given_x) 278 | 279 | assert np.isclose(joint_entropy, entropy_x + conditional_entropy_of_y_given_x) 280 | assert np.isclose(joint_entropy, entropy_y + conditional_entropy_of_x_given_y) 281 | 282 | 283 | def test_discrete_mutual_information_and_conditional_entropy(sample_x: np.ndarray, 284 | sample_y: np.ndarray): 285 | mutual_information = discrete_mutual_information(sample_x=sample_x, sample_y=sample_y) 286 | 287 | entropy_x = discrete_entropy(sample_x) 288 | entropy_y = discrete_entropy(sample_y) 289 | conditional_entropy_of_y_given_x = \ 290 | discrete_conditional_entropy_of_y_given_x(sample_x=sample_x, 291 | sample_y=sample_y) 292 | 293 | conditional_entropy_of_x_given_y = \ 294 | discrete_conditional_entropy_of_y_given_x(sample_x=sample_y, 295 | sample_y=sample_x) 296 | 297 | assert np.isclose(mutual_information, entropy_x - conditional_entropy_of_x_given_y) 298 | assert np.isclose(mutual_information, entropy_y - conditional_entropy_of_y_given_x) 299 | -------------------------------------------------------------------------------- /divergence/discrete.py: -------------------------------------------------------------------------------- 1 | import numba 2 | import numbers 3 | import numpy as np 4 | import typing as tp 5 | 6 | from divergence.base import ( 7 | _select_vectorized_log_fun_for_base, 8 | Logarithm 9 | ) 10 | 11 | 12 | def _construct_counts_for_one_sample(sample: np.ndarray) -> np.ndarray: 13 | """ 14 | Compute the count (i.e. number of occurrences) for each realization in the sample. 15 | The realizations in the argument `sample` do not need to be sorted. But the output counts will 16 | correspond to sorted realizations. 17 | 18 | Parameters 19 | ---------- 20 | sample: a sample from the discrete distribution 21 | 22 | Returns 23 | ------- 24 | Counts of realizations from a sample 25 | 26 | """ 27 | _, counts = np.unique(sample, return_counts=True) 28 | return counts 29 | 30 | 31 | def _construct_frequencies_for_one_sample(sample: np.ndarray) -> np.ndarray: 32 | """ 33 | Compute the frequency (i.e. number of occurrences) for each realization in the sample. 34 | The realizations in the argument `sample` do not need to be sorted. But the output frequencies 35 | will correspond to sorted realizations. 36 | 37 | Parameters 38 | ---------- 39 | sample: a sample from the discrete distribution 40 | 41 | Returns 42 | ------- 43 | Frequencies of realizations from a sample 44 | 45 | """ 46 | return _construct_counts_for_one_sample(sample) / len(sample) 47 | 48 | 49 | def discrete_entropy(sample: np.ndarray, 50 | base: float = np.e) -> float: 51 | """ 52 | Approximate the entropy of a discrete distribution 53 | 54 | H(p) = - E_p[log(p)] 55 | 56 | from a sample. 57 | 58 | Parameters 59 | ---------- 60 | sample: a sample from the discrete distribution 61 | base: the base of the logarithm used to control the units of measurement for the result 62 | 63 | Returns 64 | ------- 65 | An approximation of the entropy of the discrete distribution from which the sample is drawn. 66 | 67 | """ 68 | log_fun = _select_vectorized_log_fun_for_base(base) 69 | frequencies = _construct_frequencies_for_one_sample(sample) 70 | return - np.sum(frequencies * log_fun(frequencies)) 71 | 72 | 73 | @numba.njit 74 | def _construct_frequencies_for_two_samples(sorted_p_realizations: np.ndarray, 75 | sorted_p_frequencies: np.ndarray, 76 | sorted_q_realizations: np.ndarray, 77 | sorted_q_frequencies: np.ndarray, 78 | sorted_combined_realizations: np.ndarray) \ 79 | -> tp.Tuple[np.ndarray, np.ndarray]: 80 | """ 81 | Construct two NumPy arrays of frequencies for corresponding observations from sorted 82 | realizations and frequencies from two samples. If a realization in the sample from q is not in 83 | the sample from p or has frequency zero then it is not included in either of the output 84 | frequency arrays. 85 | 86 | Parameters 87 | ---------- 88 | sorted_p_realizations: NumPy array of unique realizations in the sample from p 89 | sorted_p_frequencies: The frequency of each realization in `sorted_p_realizations` 90 | sorted_q_realizations: NumPy array of unique realizations in the sample from q 91 | sorted_q_frequencies: The frequency of each realization in `sorted_q_realizations` 92 | sorted_combined_realizations: NumPy array of unique realizations in the samples from p and q 93 | combined 94 | 95 | Returns 96 | ------- 97 | Two NumPy arraysof the same length with frequencies for corresponding observations that have 98 | positive weight in the sample from p. 99 | 100 | """ 101 | assert len(sorted_p_realizations) == len(sorted_p_frequencies) 102 | assert len(sorted_q_realizations) == len(sorted_q_frequencies) 103 | 104 | p_source_index = 0 105 | q_source_index = 0 106 | p_target_index = 0 107 | q_target_index = 0 108 | 109 | p_frequencies = np.zeros((len(sorted_p_realizations, ))) 110 | q_frequencies = np.zeros((len(sorted_p_realizations, ))) 111 | 112 | for combined_index in range(len(sorted_combined_realizations)): 113 | realization = sorted_combined_realizations[combined_index] 114 | 115 | if sorted_p_realizations[p_source_index] != realization: 116 | if sorted_q_realizations[q_source_index] == realization: 117 | q_source_index += 1 118 | continue 119 | 120 | if sorted_p_frequencies[p_source_index] == 0.0: 121 | p_source_index += 1 122 | if sorted_q_realizations[q_source_index] == realization: 123 | q_source_index += 1 124 | continue 125 | 126 | if sorted_q_realizations[q_source_index] != realization or \ 127 | sorted_q_realizations[q_source_index] == 0.0: 128 | raise ValueError('q(x) is zero but p(x) is not') 129 | # if sorted_p_frequencies[p_source_index] != 0.0: # we know that is true 130 | # # if q(x) == 0 we must have p(x) == 0, which is not the case here 131 | # raise ValueError('q(x) is zero but p(x) is not') 132 | # else: 133 | # continue 134 | 135 | p_frequencies[p_target_index] = sorted_p_frequencies[p_source_index] 136 | q_frequencies[q_target_index] = sorted_q_frequencies[q_source_index] 137 | p_source_index += 1 138 | q_source_index += 1 139 | p_target_index += 1 140 | q_target_index += 1 141 | 142 | return p_frequencies[:p_target_index], q_frequencies[:q_target_index] 143 | 144 | 145 | def discrete_relative_entropy(sample_p: np.ndarray, 146 | sample_q: np.ndarray, 147 | base: float = np.e): 148 | """ 149 | Approximate the relative entropy of the discrete distribution q relative to the discrete 150 | distribution p 151 | 152 | D_KL(p||q) = E_p [log(p/q)] 153 | 154 | from samples of these distributions. 155 | 156 | Parameters 157 | ---------- 158 | sample_p: sample from the distribution p 159 | sample_q: sample from the distribution q 160 | base: the base of the logarithm used to control the units of measurement for the result 161 | 162 | Returns 163 | ------- 164 | The relative entropy of the distribution q relative to the distribution p. 165 | 166 | """ 167 | log_fun = _select_vectorized_log_fun_for_base(base) 168 | combined_sample = np.hstack((sample_p, sample_q)) 169 | unique_combined = np.unique(combined_sample) 170 | 171 | unique_q, counts_q = np.unique(sample_q, return_counts=True) 172 | frequencies_q = counts_q / len(sample_q) 173 | 174 | unique_p, counts_p = np.unique(sample_p, return_counts=True) 175 | frequencies_p = counts_p / len(sample_p) 176 | 177 | combined_frequencies_p, combined_frequencies_q = \ 178 | _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p, 179 | sorted_q_realizations=unique_q, 180 | sorted_q_frequencies=frequencies_q, 181 | sorted_p_frequencies=frequencies_p, 182 | sorted_combined_realizations=unique_combined) 183 | 184 | return np.sum(combined_frequencies_p * log_fun(combined_frequencies_p / combined_frequencies_q)) 185 | 186 | 187 | def discrete_cross_entropy(sample_p: np.ndarray, 188 | sample_q: np.ndarray, 189 | base: float = np.e): 190 | """ 191 | Approximate the cross entropy of the discrete distribution q relative to the discrete 192 | distribution p 193 | 194 | H_q(p) = - E_p [log(q)] 195 | 196 | from samples of these distributions. 197 | 198 | Parameters 199 | ---------- 200 | sample_p: sample from the distribution p 201 | sample_q: sample from the distribution q 202 | base: the base of the logarithm used to control the units of measurement for the result 203 | 204 | Returns 205 | ------- 206 | The cross entropy of the distribution q relative to the distribution p. 207 | 208 | """ 209 | return discrete_relative_entropy(sample_p=sample_p, 210 | sample_q=sample_q, 211 | base=base) + \ 212 | discrete_entropy(sample=sample_p, 213 | base=base) 214 | 215 | 216 | def discrete_jensen_shannon_divergence(sample_p: np.ndarray, 217 | sample_q: np.ndarray, 218 | base: float = np.e): 219 | """ 220 | Approximate the Jensen-Shannon divergence between discrete distributions p and q 221 | 222 | JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q) 223 | 224 | from samples of these distributions. 225 | 226 | Parameters 227 | ---------- 228 | sample_p: sample from the distribution p 229 | sample_q: sample from the distribution q 230 | base: the base of the logarithm used to control the units of measurement for the result 231 | 232 | Returns 233 | ------- 234 | The Jensen-Shannon divergence between distributions p and q. 235 | 236 | """ 237 | m = np.hstack((sample_p, sample_q)) 238 | D_PM = discrete_relative_entropy(sample_p=sample_p, sample_q=m, base=base) 239 | D_QM = discrete_relative_entropy(sample_p=sample_q, sample_q=m, base=base) 240 | 241 | return 0.5 * D_PM + 0.5 * D_QM 242 | 243 | 244 | def _construct_unique_combinations_and_counts_from_two_samples(sample_x: np.ndarray, 245 | sample_y: np.ndarray) \ 246 | -> tp.Tuple[np.ndarray, np.ndarray]: 247 | """ 248 | Construct an array of unique co-located combinations of sample_x and sample_y as well as an 249 | array of associated counts. 250 | 251 | Parameters 252 | ---------- 253 | sample_x: a NumPy array of draws of variable x 254 | sample_y: a NumPy array of draws of variable y 255 | 256 | Returns 257 | ------- 258 | a tuple of unique combinations of draws from x and y and associated counts 259 | """ 260 | assert sample_x.ndim == 1 261 | assert sample_y.ndim == 1 262 | 263 | assert sample_x.shape == sample_y.shape 264 | 265 | n = len(sample_x) 266 | 267 | sample_x = sample_x.reshape((n, 1)) 268 | sample_y = sample_y.reshape((n, 1)) 269 | 270 | sample_xy = np.concatenate((sample_x, sample_y), axis=1) 271 | 272 | unique_combinations, counts = np.unique(sample_xy, axis=0, return_counts=True) 273 | 274 | return unique_combinations, counts 275 | 276 | 277 | @numba.njit 278 | def _get_index_for_combination(combination: np.ndarray, 279 | unique_combinations: np.ndarray) -> int: 280 | """ 281 | Returns the row index of a 2 element array in a nx2 dimensional array. Returns -1 if the 282 | requested array is not in the search array. 283 | 284 | Parameters 285 | ---------- 286 | combination: an array whose position of first occurence is to be found 287 | unique_combinations: an array which is to be searched 288 | 289 | Returns 290 | ------- 291 | the row index of the combination 292 | """ 293 | for i in range(unique_combinations.shape[0]): 294 | if np.all(unique_combinations[i, :] == combination): 295 | return i 296 | 297 | return -1 298 | 299 | 300 | @numba.njit 301 | def _get_count_for_combination(combination: np.ndarray, 302 | unique_combinations: np.ndarray, 303 | counts: np.ndarray) -> int: 304 | """ 305 | Given a 2x1 combination and arrays of unique combinations and associated counts, return the 306 | count of the combination. 307 | 308 | Parameters 309 | ---------- 310 | combination: a 2 element array whose count is to be determined 311 | unique_combinations: a 2xn array of unique combinations 312 | counts: the count associated with the unique combinations 313 | 314 | Returns 315 | ------- 316 | the count of the combination 317 | """ 318 | 319 | return counts[_get_index_for_combination(combination=combination, 320 | unique_combinations=unique_combinations)] 321 | 322 | 323 | @numba.njit 324 | def _get_index_of_value_in_1d_array(value: numbers.Number, 325 | array: np.ndarray) -> int: 326 | """ 327 | Returns the index of a value in an array and returns -1 if the array does not contain the value. 328 | Parameters 329 | ---------- 330 | value: a number 331 | array: a one-dimensional NumPy array 332 | 333 | Returns 334 | ------- 335 | the index of the value in the array 336 | """ 337 | for i in range(len(array)): 338 | if value == array[i]: 339 | return i 340 | 341 | return -1 342 | 343 | 344 | @numba.njit 345 | def _get_count_for_value(value: numbers.Number, 346 | unique_values: np.ndarray, 347 | counts: np.ndarray) -> int: 348 | """ 349 | Given a value and arrays of unique values and associated counts, return the 350 | count of the value. 351 | 352 | Parameters 353 | ---------- 354 | value: a number whose count is to be determined 355 | unique_values: a one-dimensional array of unique values 356 | counts: the count associated with each unique value 357 | 358 | Returns 359 | ------- 360 | the count of the value 361 | """ 362 | 363 | return counts[_get_index_of_value_in_1d_array(value, unique_values)] 364 | 365 | 366 | @numba.njit 367 | def _discrete_mutual_information_internal(n: int, 368 | unique_combinations_xy: np.ndarray, 369 | counts_xy: np.ndarray, 370 | unique_values_x: np.ndarray, 371 | counts_x: np.ndarray, 372 | unique_values_y: np.ndarray, 373 | counts_y: np.ndarray, 374 | base: float = np.e) -> float: 375 | """ 376 | Compute mutual information of discrete random variables x and y from 377 | 378 | Parameters 379 | ---------- 380 | n: sample size 381 | unique_combinations_xy: NumPy array with shape (number_of_combinations, 2) of unique 382 | combinations of X and Y appearing in the sample 383 | counts_xy: the number of appearances of each unique combination in the sample 384 | unique_values_x: NumPy array of the unique values of x 385 | counts_x: number of appearances of each unique value of x 386 | unique_values_y: NumPy array of the unique values of y 387 | counts_y: number of appearances of each unique value of y 388 | base: the base of the logarithm used to control the units of measurement for the result 389 | 390 | Returns 391 | ------- 392 | 393 | """ 394 | 395 | logarithm = Logarithm(base) 396 | 397 | mutual_information = 0.0 398 | for i in range(counts_xy.shape[0]): 399 | x = unique_combinations_xy[i, 0] 400 | y = unique_combinations_xy[i, 1] 401 | joint_count = counts_xy[i] 402 | 403 | x_count = _get_count_for_value(value=x, 404 | unique_values=unique_values_x, 405 | counts=counts_x) 406 | 407 | y_count = _get_count_for_value(value=y, 408 | unique_values=unique_values_y, 409 | counts=counts_y) 410 | 411 | mutual_information += (joint_count / n) * logarithm.log(n * joint_count / (x_count * y_count)) 412 | 413 | return mutual_information 414 | 415 | 416 | def _check_dimensions_of_two_variable_sample(sample_x: np.ndarray, 417 | sample_y: np.ndarray) \ 418 | -> tp.Tuple[np.ndarray, np.ndarray, int]: 419 | """ 420 | Check that sample_x and sample_y have the same number of elements and make them vectors. 421 | 422 | Parameters 423 | ---------- 424 | sample_x: a NumPy array of draws of variable x 425 | sample_y: a NumPy array of draws of variable y 426 | 427 | Returns 428 | ------- 429 | 430 | """ 431 | if sample_x.ndim > 1: 432 | raise ValueError('sample_x must be a one dimensional array') 433 | 434 | if sample_y.ndim > 1: 435 | raise ValueError('sample_y must be a one dimensional array') 436 | 437 | sample_x = sample_x.reshape((-1, )) 438 | sample_y = sample_y.reshape((-1, )) 439 | 440 | n = len(sample_x) 441 | 442 | if n != len(sample_y): 443 | raise ValueError('sample_x and sample_y must have the same length') 444 | 445 | return sample_x, sample_y, n 446 | 447 | 448 | def discrete_mutual_information(sample_x: np.ndarray, 449 | sample_y: np.ndarray, 450 | base: float = np.e) -> float: 451 | """ 452 | Approximate the mutual information of x and y 453 | 454 | I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) = 455 | E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right] 456 | 457 | from a sample of both distributions. 458 | 459 | Parameters 460 | ---------- 461 | sample_x: a NumPy array of draws of variable x 462 | sample_y: a NumPy array of draws of variable y 463 | base: the base of the logarithm used to control the units of measurement for the result 464 | 465 | Returns 466 | ------- 467 | The mutual information of x and y. 468 | """ 469 | sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(sample_x, sample_y) 470 | 471 | unique_combinations_xy, counts_xy = \ 472 | _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y) 473 | 474 | unique_values_x, counts_x = np.unique(sample_x, return_counts=True) 475 | unique_values_y, counts_y = np.unique(sample_y, return_counts=True) 476 | 477 | return _discrete_mutual_information_internal(n=n, 478 | unique_combinations_xy=unique_combinations_xy, 479 | counts_xy=counts_xy, 480 | unique_values_x=unique_values_x, 481 | counts_x=counts_x, 482 | unique_values_y=unique_values_y, 483 | counts_y=counts_y, 484 | base=base) 485 | 486 | 487 | def discrete_joint_entropy(sample_x: np.ndarray, 488 | sample_y: np.ndarray, 489 | base: float = np.e) -> float: 490 | """ 491 | Approximate the joint entropy of x and y 492 | 493 | H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right] 494 | 495 | from a sample of both distributions. 496 | 497 | Parameters 498 | ---------- 499 | sample_x: a NumPy array of draws of variable x 500 | sample_y: a NumPy array of draws of variable y 501 | base: the base of the logarithm used to control the units of measurement for the result 502 | 503 | Returns 504 | ------- 505 | The joint entropy between of x and y 506 | """ 507 | 508 | log_fun = _select_vectorized_log_fun_for_base(base) 509 | sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(sample_x, sample_y) 510 | 511 | unique_combinations_xy, counts_xy = \ 512 | _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y) 513 | 514 | joint_frequency = (1.0 / n) * counts_xy 515 | 516 | return - np.sum(joint_frequency * log_fun(joint_frequency)) 517 | 518 | 519 | @numba.njit 520 | def _get_conditional_frequency_of_y_given_x(n: int, 521 | x: numbers.Number, 522 | y: numbers.Number, 523 | sample_x: np.ndarray, 524 | sample_y: np.ndarray) -> float: 525 | """ 526 | Given a sample of two variables X and Y, and specific values of these variables x and y, 527 | determine the conditional frequency of Y=y given that X=x. 528 | 529 | Parameters 530 | ---------- 531 | n: sample size 532 | x: value of x 533 | y: value of y 534 | sample_x: NumPy array containing the x-variable of the sample 535 | sample_y: NumPy array containing the y-variable of the sample 536 | 537 | Returns 538 | ------- 539 | the conditional frequency of Y=y given that X=x 540 | """ 541 | count_x = 0.0 542 | count_x_and_y = 0.0 543 | for i in range(n): 544 | if sample_x[i] == x: 545 | count_x += 1 546 | if sample_y[i] == y: 547 | count_x_and_y += 1 548 | 549 | if count_x == 0: 550 | raise ValueError('x value is not present in the sample') 551 | else: 552 | return count_x_and_y / count_x 553 | 554 | 555 | @numba.njit 556 | def _discrete_conditional_entropy_of_y_given_x_internal(n: int, 557 | unique_combinations_xy: np.ndarray, 558 | counts_xy: np.ndarray, 559 | sample_x: np.ndarray, 560 | sample_y: np.ndarray, 561 | base: float = np.e) -> float: 562 | """ 563 | Compute conditional entropy of discrete random variables X and Y from NumPy arrays of samples of 564 | these random variables. This function relies on pre-computed unique combinations of both 565 | variables and associated counts. 566 | 567 | Parameters 568 | ---------- 569 | n: sample size 570 | unique_combinations_xy: NumPy array with shape (number_of_combinations, 2) of unique 571 | combinations of X and Y appearing in the sample 572 | counts_xy: the number of appearances of each unique combination in the sample 573 | sample_x: NumPy array containing the x-variable of the sample 574 | sample_y: NumPy array containing the y-variable of the sample 575 | base: the base of the logarithm used to control the units of measurement for the result 576 | 577 | Returns 578 | ------- 579 | The conditional entropy from a sample of discrete random variables 580 | """ 581 | 582 | logarithm = Logarithm(base) 583 | 584 | conditional_entropy = 0.0 585 | for i in range(len(counts_xy)): 586 | x = unique_combinations_xy[i, 0] 587 | y = unique_combinations_xy[i, 1] 588 | 589 | conditional_frequency_of_y_given_x = \ 590 | _get_conditional_frequency_of_y_given_x(n=n, 591 | x=x, 592 | y=y, 593 | sample_x=sample_x, 594 | sample_y=sample_y) 595 | conditional_entropy -= counts_xy[i] * logarithm.log(conditional_frequency_of_y_given_x) / n 596 | 597 | return conditional_entropy 598 | 599 | 600 | def discrete_conditional_entropy_of_y_given_x(sample_x: np.ndarray, 601 | sample_y: np.ndarray, 602 | base: float = np.e) -> float: 603 | """ 604 | Approximate the conditional entropy of y given x 605 | 606 | H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right] 607 | 608 | from a sample of both distributions. 609 | 610 | Parameters 611 | ---------- 612 | sample_x: a NumPy array of draws of variable x 613 | sample_y: a NumPy array of draws of variable y 614 | base: the base of the logarithm used to control the units of measurement for the result 615 | 616 | Returns 617 | ------- 618 | The conditional entropy between of y given x. 619 | """ 620 | sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(sample_x, sample_y) 621 | 622 | unique_combinations_xy, counts_xy = \ 623 | _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y) 624 | 625 | return _discrete_conditional_entropy_of_y_given_x_internal( 626 | n=n, 627 | unique_combinations_xy=unique_combinations_xy, 628 | counts_xy=counts_xy, 629 | sample_x=sample_x, 630 | sample_y=sample_y, 631 | base=base) 632 | -------------------------------------------------------------------------------- /notebooks/Divergence.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Package Imports and Setup" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": { 14 | "ExecuteTime": { 15 | "end_time": "2020-07-31T03:56:35.065578Z", 16 | "start_time": "2020-07-31T03:56:35.043177Z" 17 | } 18 | }, 19 | "outputs": [], 20 | "source": [ 21 | "%load_ext autoreload\n", 22 | "%autoreload 2" 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 2, 28 | "metadata": { 29 | "ExecuteTime": { 30 | "end_time": "2020-07-31T03:56:35.856507Z", 31 | "start_time": "2020-07-31T03:56:35.067510Z" 32 | } 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "# %matplotlib inline\n", 37 | "import collections\n", 38 | "import math\n", 39 | "import numpy as np\n", 40 | "import scipy as sp\n", 41 | "import statsmodels.api as sm" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": 3, 47 | "metadata": { 48 | "ExecuteTime": { 49 | "end_time": "2020-07-31T03:56:39.231858Z", 50 | "start_time": "2020-07-31T03:56:35.858577Z" 51 | } 52 | }, 53 | "outputs": [], 54 | "source": [ 55 | "from divergence import *" 56 | ] 57 | }, 58 | { 59 | "cell_type": "markdown", 60 | "metadata": {}, 61 | "source": [ 62 | "# Distributions and Samples" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": {}, 68 | "source": [ 69 | "## Construct Artificial Sample from two Normal Distributions" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "metadata": {}, 75 | "source": [ 76 | "This example considers two different normal distributions $p$ and $q$ with\n", 77 | "$p = N(2, 9)$ and $q = N(1, 4)$." 78 | ] 79 | }, 80 | { 81 | "cell_type": "code", 82 | "execution_count": 4, 83 | "metadata": { 84 | "ExecuteTime": { 85 | "end_time": "2020-07-31T03:56:39.304942Z", 86 | "start_time": "2020-07-31T03:56:39.234347Z" 87 | } 88 | }, 89 | "outputs": [], 90 | "source": [ 91 | "# fix random seed for reproducibility\n", 92 | "np.random.seed(42)\n", 93 | "\n", 94 | "# set parameters of the normal distributions p and q\n", 95 | "mu_p = 2\n", 96 | "sigma_p = 3\n", 97 | "mu_q = 1\n", 98 | "sigma_q = 2\n", 99 | "\n", 100 | "# draw samples from each normal distribution\n", 101 | "n = 10000\n", 102 | "\n", 103 | "def draw_normal(mu, sigma, n: int, antithetic: bool = False):\n", 104 | " z = np.random.randn(n)\n", 105 | " if antithetic: \n", 106 | " z = np.hstack((z, -z))\n", 107 | " \n", 108 | " return mu + sigma * z\n", 109 | "\n", 110 | "sample_p = draw_normal(mu_p, sigma_p, n=n, antithetic=True)\n", 111 | "sample_q = draw_normal(mu_q, sigma_q, n=n, antithetic=True)\n", 112 | "\n", 113 | "# fit a non-parametric density estimate for both distributions\n", 114 | "kde_p = sm.nonparametric.KDEUnivariate(sample_p)\n", 115 | "kde_q = sm.nonparametric.KDEUnivariate(sample_q)\n", 116 | "kde_p.fit()\n", 117 | "kde_q.fit()\n", 118 | "\n", 119 | "# construct exact normal densities for p and q\n", 120 | "pdf_p = lambda x: sp.stats.norm.pdf(x, mu_p, sigma_p)\n", 121 | "pdf_q = lambda x: sp.stats.norm.pdf(x, mu_q, sigma_q)\n", 122 | "\n", 123 | "# compute support for kernel density estimates\n", 124 | "p_min = min(kde_p.support)\n", 125 | "p_max = max(kde_p.support)\n", 126 | "q_min = min(kde_q.support)\n", 127 | "q_max = max(kde_q.support)\n", 128 | "combined_min = min(p_min, q_min)\n", 129 | "combined_max = max(p_max, q_max)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "metadata": {}, 135 | "source": [ 136 | "## Construct Sample from Multinomial Distribution" 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": 5, 142 | "metadata": { 143 | "ExecuteTime": { 144 | "end_time": "2020-07-31T03:56:39.339814Z", 145 | "start_time": "2020-07-31T03:56:39.306432Z" 146 | } 147 | }, 148 | "outputs": [], 149 | "source": [ 150 | "multinomial_sample_q = np.array([1, 2, 3, 2, 3, 3, 3, 2, 1, 1])\n", 151 | "multinomial_sample_p = np.array([1, 2, 3, 3, 3, 3, 3, 3, 3, 3])" 152 | ] 153 | }, 154 | { 155 | "cell_type": "markdown", 156 | "metadata": {}, 157 | "source": [ 158 | "# Entropy" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "metadata": {}, 164 | "source": [ 165 | "The entropy of a probability distribution $p$ is defined as \n", 166 | "\n", 167 | "$H(X) = - \\mathbb{E}_p \\left[ \\log_{\\text{base}} p \\right]$, \n", 168 | "\n", 169 | "where $\\mathbb{E}_P$ denotes expectation with respect the probability distribution $p$. In information theory, the base of the logarithm is 2 and the interpretation of entropy is the average number of bits needed to optimally encode the signal represented by the distribution $p$. \n", 170 | "\n", 171 | "Divergence defaults to $\\text{base}=e$, which results in the natural logarithm i.e. $\\log_e = \\ln$. This default choice can be overridden via the argument 'base' during the entropy calculation. In particular, specifying $\\text{base}=2$ results in the classical Shannon entropy expressed in bits, whereas specifying $\\text{base}=10$ produces the entropy in decimal bits (dits or Hartleys)." 172 | ] 173 | }, 174 | { 175 | "cell_type": "markdown", 176 | "metadata": {}, 177 | "source": [ 178 | "## Continuous Case" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Entropy from Samples (via Statsmodels KDE Objects)" 186 | ] 187 | }, 188 | { 189 | "cell_type": "code", 190 | "execution_count": 6, 191 | "metadata": { 192 | "ExecuteTime": { 193 | "end_time": "2020-07-31T03:56:39.517682Z", 194 | "start_time": "2020-07-31T03:56:39.341601Z" 195 | } 196 | }, 197 | "outputs": [ 198 | { 199 | "name": "stdout", 200 | "output_type": "stream", 201 | "text": [ 202 | "Entropy of p = 2.531109986651922\n", 203 | "Entropy of q = 2.123343378353565\n" 204 | ] 205 | } 206 | ], 207 | "source": [ 208 | "print(f'Entropy of p = {entropy_from_samples(sample_p, discrete=False)}')\n", 209 | "print(f'Entropy of q = {entropy_from_samples(sample_q, discrete=False)}')" 210 | ] 211 | }, 212 | { 213 | "cell_type": "markdown", 214 | "metadata": {}, 215 | "source": [ 216 | "### Entropy from Statsmodels KDE Objects (via Statsmodels)" 217 | ] 218 | }, 219 | { 220 | "cell_type": "code", 221 | "execution_count": 7, 222 | "metadata": { 223 | "ExecuteTime": { 224 | "end_time": "2020-07-31T03:56:39.953586Z", 225 | "start_time": "2020-07-31T03:56:39.520153Z" 226 | } 227 | }, 228 | "outputs": [ 229 | { 230 | "name": "stdout", 231 | "output_type": "stream", 232 | "text": [ 233 | "Entropy of p = 2.531114322639585\n", 234 | "Entropy of q = 2.1233454054445\n" 235 | ] 236 | } 237 | ], 238 | "source": [ 239 | "print(f'Entropy of p = {kde_p.entropy}')\n", 240 | "print(f'Entropy of q = {kde_q.entropy}')" 241 | ] 242 | }, 243 | { 244 | "cell_type": "markdown", 245 | "metadata": {}, 246 | "source": [ 247 | "### Entropy from Statsmodels KDE Objects (via Divergence)" 248 | ] 249 | }, 250 | { 251 | "cell_type": "code", 252 | "execution_count": 8, 253 | "metadata": { 254 | "ExecuteTime": { 255 | "end_time": "2020-07-31T03:56:40.127364Z", 256 | "start_time": "2020-07-31T03:56:39.957380Z" 257 | } 258 | }, 259 | "outputs": [ 260 | { 261 | "name": "stdout", 262 | "output_type": "stream", 263 | "text": [ 264 | "Entropy of p = 2.531109986651922\n", 265 | "Entropy of q = 2.123343378353565\n" 266 | ] 267 | } 268 | ], 269 | "source": [ 270 | "print(f'Entropy of p = {entropy_from_kde(kde_p)}')\n", 271 | "print(f'Entropy of q = {entropy_from_kde(kde_q)}')" 272 | ] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "metadata": {}, 277 | "source": [ 278 | "### Entropy from Normal Probability Density Functions" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 9, 284 | "metadata": { 285 | "ExecuteTime": { 286 | "end_time": "2020-07-31T03:56:40.190449Z", 287 | "start_time": "2020-07-31T03:56:40.131586Z" 288 | } 289 | }, 290 | "outputs": [ 291 | { 292 | "name": "stdout", 293 | "output_type": "stream", 294 | "text": [ 295 | "Entropy of p = 2.517390423126535\n", 296 | "Entropy of q = 2.1120728496363306\n" 297 | ] 298 | } 299 | ], 300 | "source": [ 301 | "print(f'Entropy of p = {entropy_from_density_with_support(pdf_p, p_min, p_max)}')\n", 302 | "print(f'Entropy of q = {entropy_from_density_with_support(pdf_q, q_min, q_max)}')" 303 | ] 304 | }, 305 | { 306 | "cell_type": "markdown", 307 | "metadata": {}, 308 | "source": [ 309 | "### Theoretical Entropy of a Normal Distribution" 310 | ] 311 | }, 312 | { 313 | "cell_type": "code", 314 | "execution_count": 10, 315 | "metadata": { 316 | "ExecuteTime": { 317 | "end_time": "2020-07-31T03:56:40.228574Z", 318 | "start_time": "2020-07-31T03:56:40.191930Z" 319 | } 320 | }, 321 | "outputs": [ 322 | { 323 | "name": "stdout", 324 | "output_type": "stream", 325 | "text": [ 326 | "Entropy of p = 2.5175508218727822\n", 327 | "Entropy of q = 2.112085713764618\n" 328 | ] 329 | } 330 | ], 331 | "source": [ 332 | "def theoretical_entropy_of_normal_distribution(mu: float, sigma: float, log_fun: tp.Callable = np.log) -> float:\n", 333 | " return 0.5 * (1.0 + log_fun(2 * np.pi * sigma**2))\n", 334 | "\n", 335 | "print(f'Entropy of p = {theoretical_entropy_of_normal_distribution(mu_p, sigma_p)}')\n", 336 | "print(f'Entropy of q = {theoretical_entropy_of_normal_distribution(mu_q, sigma_q)}')" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "## Discrete Case" 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": 11, 349 | "metadata": { 350 | "ExecuteTime": { 351 | "end_time": "2020-07-31T03:56:40.268189Z", 352 | "start_time": "2020-07-31T03:56:40.230965Z" 353 | } 354 | }, 355 | "outputs": [ 356 | { 357 | "name": "stdout", 358 | "output_type": "stream", 359 | "text": [ 360 | "Entropy of p = 0.639031859650177\n", 361 | "Entropy of q = 1.0888999753452238\n" 362 | ] 363 | } 364 | ], 365 | "source": [ 366 | "print(f'Entropy of p = {discrete_entropy(multinomial_sample_p)}')\n", 367 | "print(f'Entropy of q = {discrete_entropy(multinomial_sample_q)}')" 368 | ] 369 | }, 370 | { 371 | "cell_type": "markdown", 372 | "metadata": {}, 373 | "source": [ 374 | "# Cross Entropy" 375 | ] 376 | }, 377 | { 378 | "cell_type": "markdown", 379 | "metadata": {}, 380 | "source": [ 381 | "The cross entropy of a distribution $q$ relative to a distribution $p$ is defined as \n", 382 | "\n", 383 | "$H_q(p) = - \\mathbb{E}_p \\left[ \\log_{\\text{base}} q \\right]$.\n", 384 | "\n", 385 | "With a base of 2, the cross-entropy of $q$ relative to $p$ is the average number of bits required to encode the signal in $p$ using a code optimized for the signal in $q$." 386 | ] 387 | }, 388 | { 389 | "cell_type": "markdown", 390 | "metadata": {}, 391 | "source": [ 392 | "## Continuous Case" 393 | ] 394 | }, 395 | { 396 | "cell_type": "markdown", 397 | "metadata": {}, 398 | "source": [ 399 | "### Cross Entropy from Samples (via Statsmodels KDE Objects)" 400 | ] 401 | }, 402 | { 403 | "cell_type": "code", 404 | "execution_count": 12, 405 | "metadata": { 406 | "ExecuteTime": { 407 | "end_time": "2020-07-31T03:56:40.738979Z", 408 | "start_time": "2020-07-31T03:56:40.269946Z" 409 | } 410 | }, 411 | "outputs": [ 412 | { 413 | "name": "stdout", 414 | "output_type": "stream", 415 | "text": [ 416 | "Cross Entropy of p relative to q = 2.9007913519550272\n", 417 | "Cross Entropy of q relative to p = 2.306094354037839\n" 418 | ] 419 | } 420 | ], 421 | "source": [ 422 | "print(f'Cross Entropy of p relative to q = {cross_entropy_from_samples(sample_p, sample_q, discrete=False)}')\n", 423 | "print(f'Cross Entropy of q relative to p = {cross_entropy_from_samples(sample_q, sample_p, discrete=False)}')" 424 | ] 425 | }, 426 | { 427 | "cell_type": "markdown", 428 | "metadata": {}, 429 | "source": [ 430 | "### Cross Entropy from Statsmodels KDE Objects" 431 | ] 432 | }, 433 | { 434 | "cell_type": "code", 435 | "execution_count": 13, 436 | "metadata": { 437 | "ExecuteTime": { 438 | "end_time": "2020-07-31T03:56:43.995286Z", 439 | "start_time": "2020-07-31T03:56:40.740765Z" 440 | } 441 | }, 442 | "outputs": [ 443 | { 444 | "name": "stdout", 445 | "output_type": "stream", 446 | "text": [ 447 | "403 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 448 | ] 449 | } 450 | ], 451 | "source": [ 452 | "%timeit cross_entropy_from_kde(kde_p, kde_q), cross_entropy_from_kde(kde_q, kde_p)" 453 | ] 454 | }, 455 | { 456 | "cell_type": "code", 457 | "execution_count": 14, 458 | "metadata": { 459 | "ExecuteTime": { 460 | "end_time": "2020-07-31T03:56:44.448376Z", 461 | "start_time": "2020-07-31T03:56:43.997577Z" 462 | } 463 | }, 464 | "outputs": [ 465 | { 466 | "name": "stdout", 467 | "output_type": "stream", 468 | "text": [ 469 | "Cross Entropy of p relative to q = 2.9007913519550272\n", 470 | "Cross Entropy of q relative to p = 2.306094354037839\n" 471 | ] 472 | } 473 | ], 474 | "source": [ 475 | "print(f'Cross Entropy of p relative to q = {cross_entropy_from_kde(kde_p, kde_q)}')\n", 476 | "print(f'Cross Entropy of q relative to p = {cross_entropy_from_kde(kde_q, kde_p)}')" 477 | ] 478 | }, 479 | { 480 | "cell_type": "markdown", 481 | "metadata": {}, 482 | "source": [ 483 | "### Cross Entropy from Normal Probability Density Functions" 484 | ] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": 15, 489 | "metadata": { 490 | "ExecuteTime": { 491 | "end_time": "2020-07-31T03:56:44.518454Z", 492 | "start_time": "2020-07-31T03:56:44.450634Z" 493 | } 494 | }, 495 | "outputs": [ 496 | { 497 | "name": "stdout", 498 | "output_type": "stream", 499 | "text": [ 500 | "Cross Entropy of p relative to q = 2.86176079907269\n", 501 | "Cross Entropy of q relative to p = 2.295328590629144\n" 502 | ] 503 | } 504 | ], 505 | "source": [ 506 | "print(f'Cross Entropy of p relative to q = {cross_entropy_from_densities_with_support(pdf_p, pdf_q, combined_min, combined_max)}')\n", 507 | "print(f'Cross Entropy of q relative to p = {cross_entropy_from_densities_with_support(pdf_q, pdf_p, combined_min, combined_max)}')" 508 | ] 509 | }, 510 | { 511 | "cell_type": "markdown", 512 | "metadata": {}, 513 | "source": [ 514 | "## Discrete Case" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": 16, 520 | "metadata": { 521 | "ExecuteTime": { 522 | "end_time": "2020-07-31T03:56:44.914306Z", 523 | "start_time": "2020-07-31T03:56:44.520198Z" 524 | } 525 | }, 526 | "outputs": [ 527 | { 528 | "name": "stdout", 529 | "output_type": "stream", 530 | "text": [ 531 | "Cross Entropy of p relative to q = 0.9738271463645112\n", 532 | "Cross Entropy of q relative to p = 1.4708084763221114\n" 533 | ] 534 | } 535 | ], 536 | "source": [ 537 | "print(f'Cross Entropy of p relative to q = {discrete_cross_entropy(multinomial_sample_p, multinomial_sample_q)}')\n", 538 | "print(f'Cross Entropy of q relative to p = {discrete_cross_entropy(multinomial_sample_q, multinomial_sample_p)}')" 539 | ] 540 | }, 541 | { 542 | "cell_type": "markdown", 543 | "metadata": {}, 544 | "source": [ 545 | "# Relative Entropy (Kullback-Leibler Divergence)" 546 | ] 547 | }, 548 | { 549 | "cell_type": "markdown", 550 | "metadata": {}, 551 | "source": [ 552 | "Relative entropy or Kullback-Leibler divergence measures the dispersion of two probability distributions $P$ and $Q$. It is defined as the difference between the cross entropy of $q$ relative to $p$ and the entropy of $p$\n", 553 | "\n", 554 | "$D_{KL} (P||Q) = \\mathbb{E}_p \\left[ \\log_{\\text{base}} \\left( \\frac{p}{q} \\right) \\right] = H_q(p) - H(p)$.\n", 555 | "\n", 556 | "With a base of 2, it can be interpreted as the average number of additional bits required to encode the signal in $p$ using a code optimized for the signal in $q$ over and above the number of bits required by the optimal code for $p$." 557 | ] 558 | }, 559 | { 560 | "cell_type": "markdown", 561 | "metadata": {}, 562 | "source": [ 563 | "## Continuous Case" 564 | ] 565 | }, 566 | { 567 | "cell_type": "markdown", 568 | "metadata": {}, 569 | "source": [ 570 | "### Relative Entropy from Samples (via Statsmodels KDE Objects)" 571 | ] 572 | }, 573 | { 574 | "cell_type": "code", 575 | "execution_count": 17, 576 | "metadata": { 577 | "ExecuteTime": { 578 | "end_time": "2020-07-31T03:56:45.504577Z", 579 | "start_time": "2020-07-31T03:56:44.916068Z" 580 | } 581 | }, 582 | "outputs": [ 583 | { 584 | "name": "stdout", 585 | "output_type": "stream", 586 | "text": [ 587 | "Relative Entropy of p relative to q = 0.3696813653031077\n", 588 | "Relative Entropy of q relative to p = 0.18274894857179375\n" 589 | ] 590 | } 591 | ], 592 | "source": [ 593 | "print(f'Relative Entropy of p relative to q = {relative_entropy_from_samples(sample_p, sample_q, discrete=False)}')\n", 594 | "print(f'Relative Entropy of q relative to p = {relative_entropy_from_samples(sample_q, sample_p, discrete=False)}')" 595 | ] 596 | }, 597 | { 598 | "cell_type": "markdown", 599 | "metadata": {}, 600 | "source": [ 601 | "### Relative Entropy from Statsmodels KDE Objects" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 18, 607 | "metadata": { 608 | "ExecuteTime": { 609 | "end_time": "2020-07-31T03:56:46.081252Z", 610 | "start_time": "2020-07-31T03:56:45.506134Z" 611 | } 612 | }, 613 | "outputs": [ 614 | { 615 | "name": "stdout", 616 | "output_type": "stream", 617 | "text": [ 618 | "Relative Entropy of p relative to q = 0.3696813653031077\n", 619 | "Relative Entropy of q relative to p = 0.18274894857179375\n" 620 | ] 621 | } 622 | ], 623 | "source": [ 624 | "print(f'Relative Entropy of p relative to q = {relative_entropy_from_kde(kde_p, kde_q)}')\n", 625 | "print(f'Relative Entropy of q relative to p = {relative_entropy_from_kde(kde_q, kde_p)}')" 626 | ] 627 | }, 628 | { 629 | "cell_type": "markdown", 630 | "metadata": {}, 631 | "source": [ 632 | "### Relative Entropy from Normal Probability Density Functions" 633 | ] 634 | }, 635 | { 636 | "cell_type": "code", 637 | "execution_count": 19, 638 | "metadata": { 639 | "ExecuteTime": { 640 | "end_time": "2020-07-31T03:56:46.142660Z", 641 | "start_time": "2020-07-31T03:56:46.082739Z" 642 | } 643 | }, 644 | "outputs": [ 645 | { 646 | "name": "stdout", 647 | "output_type": "stream", 648 | "text": [ 649 | "Relative Entropy from p to q = 0.34437037594615566\n", 650 | "Relative Entropy from q to p = 0.1832428925442867\n" 651 | ] 652 | } 653 | ], 654 | "source": [ 655 | "print(f'Relative Entropy from p to q = {relative_entropy_from_densities_with_support(pdf_p, pdf_q, combined_min, combined_max)}')\n", 656 | "print(f'Relative Entropy from q to p = {relative_entropy_from_densities_with_support(pdf_q, pdf_p, combined_min, combined_max)}')" 657 | ] 658 | }, 659 | { 660 | "cell_type": "markdown", 661 | "metadata": {}, 662 | "source": [ 663 | "### Theoretical Relative Entropy for Normal Distributions" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 20, 669 | "metadata": { 670 | "ExecuteTime": { 671 | "end_time": "2020-07-31T03:56:46.177322Z", 672 | "start_time": "2020-07-31T03:56:46.144357Z" 673 | } 674 | }, 675 | "outputs": [ 676 | { 677 | "name": "stdout", 678 | "output_type": "stream", 679 | "text": [ 680 | "Relative Entropy from p to q = 0.34453489189183556\n", 681 | "Relative Entropy from q to p = 0.18324288588594217\n" 682 | ] 683 | } 684 | ], 685 | "source": [ 686 | "def relative_entropy_between_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, log_fun: tp.Callable = np.log):\n", 687 | " return ((mu_1 - mu_2)**2 + sigma_1**2 - sigma_2**2 ) / (2 * sigma_2**2) + log_fun(sigma_2/sigma_1)\n", 688 | "\n", 689 | "print(f'Relative Entropy from p to q = {relative_entropy_between_normal_distributions(mu_p, sigma_p, mu_q, sigma_q)}')\n", 690 | "print(f'Relative Entropy from q to p = {relative_entropy_between_normal_distributions(mu_q, sigma_q, mu_p, sigma_p)}')" 691 | ] 692 | }, 693 | { 694 | "cell_type": "markdown", 695 | "metadata": {}, 696 | "source": [ 697 | "## Discrete Case" 698 | ] 699 | }, 700 | { 701 | "cell_type": "code", 702 | "execution_count": 21, 703 | "metadata": { 704 | "ExecuteTime": { 705 | "end_time": "2020-07-31T03:56:46.216253Z", 706 | "start_time": "2020-07-31T03:56:46.178950Z" 707 | } 708 | }, 709 | "outputs": [ 710 | { 711 | "name": "stdout", 712 | "output_type": "stream", 713 | "text": [ 714 | "Relative Entropy of p relative to q = 0.3347952867143343\n", 715 | "Relative Entropy of q relative to p = 0.3819085009768876\n" 716 | ] 717 | } 718 | ], 719 | "source": [ 720 | "print(f'Relative Entropy of p relative to q = {discrete_relative_entropy(multinomial_sample_p, multinomial_sample_q)}')\n", 721 | "print(f'Relative Entropy of q relative to p = {discrete_relative_entropy(multinomial_sample_q, multinomial_sample_p)}')" 722 | ] 723 | }, 724 | { 725 | "cell_type": "markdown", 726 | "metadata": {}, 727 | "source": [ 728 | "# Jensen-Shannon Divergence" 729 | ] 730 | }, 731 | { 732 | "cell_type": "markdown", 733 | "metadata": {}, 734 | "source": [ 735 | "The Jensen-Shannon divergence, a symmetric measure of the divergence of probability distributions, is defined as\n", 736 | "\n", 737 | "$JSD(p||q) = \\frac{1}{2} D_{KL} (p||m) + \\frac{1}{2} D_{KL} (q||m)$, \n", 738 | "\n", 739 | "where $m = \\frac{1}{2} \\left( p + q \\right)$.\n", 740 | "\n", 741 | "For base 2, the JSD is bounded between 0 and 1. For base $e$, it is bounded between $0$ and $\\ln(2)$." 742 | ] 743 | }, 744 | { 745 | "cell_type": "markdown", 746 | "metadata": {}, 747 | "source": [ 748 | "## Continuous Case" 749 | ] 750 | }, 751 | { 752 | "cell_type": "markdown", 753 | "metadata": {}, 754 | "source": [ 755 | "### Jensen-Shannon Divergence from Samples (via Statsmodels KDE Objects)" 756 | ] 757 | }, 758 | { 759 | "cell_type": "code", 760 | "execution_count": 22, 761 | "metadata": { 762 | "ExecuteTime": { 763 | "end_time": "2020-07-31T03:56:48.168485Z", 764 | "start_time": "2020-07-31T03:56:46.218072Z" 765 | } 766 | }, 767 | "outputs": [ 768 | { 769 | "name": "stdout", 770 | "output_type": "stream", 771 | "text": [ 772 | "Jensen-Shannon Divergence between p and q = 0.052550634833070334\n", 773 | "Jensen-Shannon Divergence between q and p = 0.052550634833070334\n" 774 | ] 775 | } 776 | ], 777 | "source": [ 778 | "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_samples(sample_p, sample_q, discrete=False)}')\n", 779 | "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_samples(sample_q, sample_p, discrete=False)}')" 780 | ] 781 | }, 782 | { 783 | "cell_type": "markdown", 784 | "metadata": {}, 785 | "source": [ 786 | "### Jensen-Shannon Divergence from Statsmodels KDE Objects" 787 | ] 788 | }, 789 | { 790 | "cell_type": "code", 791 | "execution_count": 23, 792 | "metadata": { 793 | "ExecuteTime": { 794 | "end_time": "2020-07-31T03:56:50.119943Z", 795 | "start_time": "2020-07-31T03:56:48.176238Z" 796 | } 797 | }, 798 | "outputs": [ 799 | { 800 | "name": "stdout", 801 | "output_type": "stream", 802 | "text": [ 803 | "Jensen-Shannon Divergence between p and q = 0.052550634833070334\n", 804 | "Jensen-Shannon Divergence between q and p = 0.052550634833070334\n" 805 | ] 806 | } 807 | ], 808 | "source": [ 809 | "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_kde(kde_p, kde_q)}')\n", 810 | "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_kde(kde_q, kde_p)}')" 811 | ] 812 | }, 813 | { 814 | "cell_type": "markdown", 815 | "metadata": {}, 816 | "source": [ 817 | "### Jensen-Shannon Divergence from Normal Probability Density Functions" 818 | ] 819 | }, 820 | { 821 | "cell_type": "code", 822 | "execution_count": 24, 823 | "metadata": { 824 | "ExecuteTime": { 825 | "end_time": "2020-07-31T03:56:50.286626Z", 826 | "start_time": "2020-07-31T03:56:50.123680Z" 827 | } 828 | }, 829 | "outputs": [ 830 | { 831 | "name": "stdout", 832 | "output_type": "stream", 833 | "text": [ 834 | "Jensen-Shannon Divergence between p and q = 0.05290044224944191\n", 835 | "Jensen-Shannon Divergence between q and p = 0.05290044224944191\n" 836 | ] 837 | } 838 | ], 839 | "source": [ 840 | "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_densities_with_support(pdf_p, pdf_q, combined_min, combined_max)}')\n", 841 | "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_densities_with_support(pdf_q, pdf_p, combined_min, combined_max)}')" 842 | ] 843 | }, 844 | { 845 | "cell_type": "markdown", 846 | "metadata": {}, 847 | "source": [ 848 | "### Jensen-Shannon Divergence from Statsmodels KDE Objects in Bits" 849 | ] 850 | }, 851 | { 852 | "cell_type": "code", 853 | "execution_count": 25, 854 | "metadata": { 855 | "ExecuteTime": { 856 | "end_time": "2020-07-31T03:56:52.790953Z", 857 | "start_time": "2020-07-31T03:56:50.288603Z" 858 | } 859 | }, 860 | "outputs": [ 861 | { 862 | "name": "stdout", 863 | "output_type": "stream", 864 | "text": [ 865 | "Jensen-Shannon Divergence between p and q = 0.07581454026923815\n", 866 | "Jensen-Shannon Divergence between q and p = 0.07581454026923815\n" 867 | ] 868 | } 869 | ], 870 | "source": [ 871 | "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_kde(kde_p, kde_q, base=2.0)}')\n", 872 | "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_kde(kde_q, kde_p, base=2.0)}')" 873 | ] 874 | }, 875 | { 876 | "cell_type": "markdown", 877 | "metadata": {}, 878 | "source": [ 879 | "## Discrete Case" 880 | ] 881 | }, 882 | { 883 | "cell_type": "markdown", 884 | "metadata": {}, 885 | "source": [ 886 | "### Calculation Function Specific to Discrete Distributions" 887 | ] 888 | }, 889 | { 890 | "cell_type": "code", 891 | "execution_count": 26, 892 | "metadata": { 893 | "ExecuteTime": { 894 | "end_time": "2020-07-31T03:56:52.833513Z", 895 | "start_time": "2020-07-31T03:56:52.792870Z" 896 | } 897 | }, 898 | "outputs": [ 899 | { 900 | "name": "stdout", 901 | "output_type": "stream", 902 | "text": [ 903 | "Jensen-Shannon Divergence between p and q = 0.0863046217355343\n", 904 | "Jensen-Shannon Divergence between q and p = 0.0863046217355343\n" 905 | ] 906 | } 907 | ], 908 | "source": [ 909 | "print(f'Jensen-Shannon Divergence between p and q = {discrete_jensen_shannon_divergence(multinomial_sample_p, multinomial_sample_q)}')\n", 910 | "print(f'Jensen-Shannon Divergence between q and p = {discrete_jensen_shannon_divergence(multinomial_sample_q, multinomial_sample_p)}')" 911 | ] 912 | }, 913 | { 914 | "cell_type": "markdown", 915 | "metadata": {}, 916 | "source": [ 917 | "### Generic calculation functionality covering samples from continuous as well as discrete distributions" 918 | ] 919 | }, 920 | { 921 | "cell_type": "code", 922 | "execution_count": 27, 923 | "metadata": { 924 | "ExecuteTime": { 925 | "end_time": "2020-07-31T03:56:52.877694Z", 926 | "start_time": "2020-07-31T03:56:52.835345Z" 927 | } 928 | }, 929 | "outputs": [ 930 | { 931 | "name": "stdout", 932 | "output_type": "stream", 933 | "text": [ 934 | "Jensen-Shannon Divergence between p and q = 0.0863046217355343\n", 935 | "Jensen-Shannon Divergence between q and p = 0.0863046217355343\n" 936 | ] 937 | } 938 | ], 939 | "source": [ 940 | "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_samples(multinomial_sample_p, multinomial_sample_q, discrete=True)}')\n", 941 | "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_samples(multinomial_sample_q, multinomial_sample_p, discrete=True)}')" 942 | ] 943 | }, 944 | { 945 | "cell_type": "markdown", 946 | "metadata": {}, 947 | "source": [ 948 | "# Mutual Information" 949 | ] 950 | }, 951 | { 952 | "cell_type": "markdown", 953 | "metadata": {}, 954 | "source": [ 955 | "Mutual information is a measure of measure of mutual dependence of random variables that goes beyond linear dependence measured by correlation. It is defined as the KL-divergence between the joint density of two random variables $x$ and $y$ and the product of their marginal densities, i.e. \n", 956 | "\n", 957 | "$I(X; Y) = D_KL(p_{x, y}|| p_x \\otimes p_y) = E_{p_{x, y}} \\left[ \\log_{\\text{base}} \\left( \\frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \\right) \\right]$. \n", 958 | "\n" 959 | ] 960 | }, 961 | { 962 | "cell_type": "markdown", 963 | "metadata": {}, 964 | "source": [ 965 | "## Continuous Case" 966 | ] 967 | }, 968 | { 969 | "cell_type": "markdown", 970 | "metadata": {}, 971 | "source": [ 972 | "### Construct Artificial Data from a Bi-Variate Normal Distribution" 973 | ] 974 | }, 975 | { 976 | "cell_type": "code", 977 | "execution_count": 28, 978 | "metadata": { 979 | "ExecuteTime": { 980 | "end_time": "2020-07-31T03:56:52.940599Z", 981 | "start_time": "2020-07-31T03:56:52.879443Z" 982 | } 983 | }, 984 | "outputs": [], 985 | "source": [ 986 | "# set parameters of the normal distributions x and y\n", 987 | "mu_x = 2\n", 988 | "sigma_x = 3\n", 989 | "mu_y = 1\n", 990 | "sigma_y = 2\n", 991 | "rho = 0.5\n", 992 | "\n", 993 | "# draw 1000 samples from each normal distribution\n", 994 | "n = 10000\n", 995 | "z = np.random.randn(n)\n", 996 | "sample_x = mu_x + sigma_x * z\n", 997 | "sample_y = mu_y + sigma_y * (rho * z + np.sqrt(1.0 - rho**2) * np.random.randn(n))\n", 998 | "\n", 999 | "# fit a non-parametric density estimate for both distributions\n", 1000 | "kde_x = sm.nonparametric.KDEUnivariate(sample_x)\n", 1001 | "kde_y = sm.nonparametric.KDEUnivariate(sample_y)\n", 1002 | "kde_x.fit() # Estimate the densities\n", 1003 | "kde_y.fit() # Estimate the densities\n", 1004 | "kde_xy = sp.stats.gaussian_kde([sample_x, sample_y])\n", 1005 | "\n", 1006 | "# construct exact normal densities for x and y\n", 1007 | "pdf_x = lambda x: sp.stats.norm.pdf(x, mu_x, sigma_x)\n", 1008 | "pdf_y = lambda y: sp.stats.norm.pdf(y, mu_y, sigma_y)\n", 1009 | "pdf_xy = sp.stats.multivariate_normal(mean=[mu_x, mu_y], cov=[[sigma_x**2, rho * sigma_x * sigma_y], [rho * sigma_x * sigma_y, sigma_y**2]]).pdf\n", 1010 | "\n", 1011 | "# # compute support for kernel density estimates\n", 1012 | "x_min = min(kde_x.support)\n", 1013 | "x_max = max(kde_x.support)\n", 1014 | "y_min = min(kde_y.support)\n", 1015 | "y_max = max(kde_y.support)" 1016 | ] 1017 | }, 1018 | { 1019 | "cell_type": "markdown", 1020 | "metadata": {}, 1021 | "source": [ 1022 | "### Mutual Information from Samples (via Statsmodels KDE Objects)" 1023 | ] 1024 | }, 1025 | { 1026 | "cell_type": "code", 1027 | "execution_count": 29, 1028 | "metadata": { 1029 | "ExecuteTime": { 1030 | "end_time": "2020-07-31T03:57:26.465180Z", 1031 | "start_time": "2020-07-31T03:56:52.942483Z" 1032 | } 1033 | }, 1034 | "outputs": [ 1035 | { 1036 | "name": "stdout", 1037 | "output_type": "stream", 1038 | "text": [ 1039 | "Mutual Information of x and y = 0.14540631373336696\n" 1040 | ] 1041 | } 1042 | ], 1043 | "source": [ 1044 | "print(f'Mutual Information of x and y = {mutual_information_from_samples(sample_x, sample_y)}')" 1045 | ] 1046 | }, 1047 | { 1048 | "cell_type": "markdown", 1049 | "metadata": {}, 1050 | "source": [ 1051 | "### Mutual Information from Statsmodels KDE Objects" 1052 | ] 1053 | }, 1054 | { 1055 | "cell_type": "code", 1056 | "execution_count": 30, 1057 | "metadata": { 1058 | "ExecuteTime": { 1059 | "end_time": "2020-07-31T03:57:57.222125Z", 1060 | "start_time": "2020-07-31T03:57:26.466817Z" 1061 | } 1062 | }, 1063 | "outputs": [ 1064 | { 1065 | "name": "stdout", 1066 | "output_type": "stream", 1067 | "text": [ 1068 | "Mutual Information of x and y = 0.14540631373336696\n" 1069 | ] 1070 | } 1071 | ], 1072 | "source": [ 1073 | "print(f'Mutual Information of x and y = {mutual_information_from_kde(kde_x, kde_y, kde_xy)}')" 1074 | ] 1075 | }, 1076 | { 1077 | "cell_type": "markdown", 1078 | "metadata": {}, 1079 | "source": [ 1080 | "### Mutual Information from Normal Probability Density Functions" 1081 | ] 1082 | }, 1083 | { 1084 | "cell_type": "code", 1085 | "execution_count": 31, 1086 | "metadata": { 1087 | "ExecuteTime": { 1088 | "end_time": "2020-07-31T03:57:59.131119Z", 1089 | "start_time": "2020-07-31T03:57:57.223582Z" 1090 | } 1091 | }, 1092 | "outputs": [ 1093 | { 1094 | "name": "stdout", 1095 | "output_type": "stream", 1096 | "text": [ 1097 | "Mutual Information of x and y = 0.14384103152628203\n" 1098 | ] 1099 | } 1100 | ], 1101 | "source": [ 1102 | "print(f'Mutual Information of x and y = {mutual_information_from_densities_with_support(pdf_x, pdf_y, pdf_xy, x_min=-20, x_max=20, y_min=-20, y_max=20)}')" 1103 | ] 1104 | }, 1105 | { 1106 | "cell_type": "markdown", 1107 | "metadata": {}, 1108 | "source": [ 1109 | "### Theoretical Mutual Information of Bi-Variate Normal Distributions" 1110 | ] 1111 | }, 1112 | { 1113 | "cell_type": "code", 1114 | "execution_count": 32, 1115 | "metadata": { 1116 | "ExecuteTime": { 1117 | "end_time": "2020-07-31T03:57:59.163352Z", 1118 | "start_time": "2020-07-31T03:57:59.132515Z" 1119 | } 1120 | }, 1121 | "outputs": [ 1122 | { 1123 | "name": "stdout", 1124 | "output_type": "stream", 1125 | "text": [ 1126 | "Mutual Information of x and y = 0.14384103622589045\n" 1127 | ] 1128 | } 1129 | ], 1130 | "source": [ 1131 | "def mutual_information_for_bivariate_normal_distribution(rho: float, \n", 1132 | " log_fun: tp.Callable = np.log) -> float:\n", 1133 | " return - 0.5 * np.log(1.0 - rho**2)\n", 1134 | "\n", 1135 | "print(f'Mutual Information of x and y = {mutual_information_for_bivariate_normal_distribution(rho)}')" 1136 | ] 1137 | }, 1138 | { 1139 | "cell_type": "markdown", 1140 | "metadata": {}, 1141 | "source": [ 1142 | "## Discrete Case" 1143 | ] 1144 | }, 1145 | { 1146 | "cell_type": "markdown", 1147 | "metadata": {}, 1148 | "source": [ 1149 | "Construct two discrete samples" 1150 | ] 1151 | }, 1152 | { 1153 | "cell_type": "code", 1154 | "execution_count": 33, 1155 | "metadata": { 1156 | "ExecuteTime": { 1157 | "end_time": "2020-07-31T03:57:59.197908Z", 1158 | "start_time": "2020-07-31T03:57:59.165032Z" 1159 | } 1160 | }, 1161 | "outputs": [], 1162 | "source": [ 1163 | "discrete_sample_x = np.array([1, 1, 3, 1, 2, 3])\n", 1164 | "discrete_sample_y = np.array([1, 1, 1, 3, 2, 1])" 1165 | ] 1166 | }, 1167 | { 1168 | "cell_type": "code", 1169 | "execution_count": 34, 1170 | "metadata": { 1171 | "ExecuteTime": { 1172 | "end_time": "2020-07-31T03:57:59.549553Z", 1173 | "start_time": "2020-07-31T03:57:59.199548Z" 1174 | }, 1175 | "scrolled": false 1176 | }, 1177 | "outputs": [ 1178 | { 1179 | "name": "stdout", 1180 | "output_type": "stream", 1181 | "text": [ 1182 | "The mutual information of x and y is 0.5493061443340548\n" 1183 | ] 1184 | } 1185 | ], 1186 | "source": [ 1187 | "print(f'The mutual information of x and y is {discrete_mutual_information(discrete_sample_x, discrete_sample_y, base=np.e)}')" 1188 | ] 1189 | }, 1190 | { 1191 | "cell_type": "markdown", 1192 | "metadata": {}, 1193 | "source": [ 1194 | "Mutual Information is symmetric" 1195 | ] 1196 | }, 1197 | { 1198 | "cell_type": "code", 1199 | "execution_count": 35, 1200 | "metadata": { 1201 | "ExecuteTime": { 1202 | "end_time": "2020-07-31T03:57:59.583343Z", 1203 | "start_time": "2020-07-31T03:57:59.551049Z" 1204 | } 1205 | }, 1206 | "outputs": [ 1207 | { 1208 | "name": "stdout", 1209 | "output_type": "stream", 1210 | "text": [ 1211 | "The mutual information of y and x is 0.5493061443340548\n" 1212 | ] 1213 | } 1214 | ], 1215 | "source": [ 1216 | "print(f'The mutual information of y and x is {discrete_mutual_information(discrete_sample_y, discrete_sample_x, base=np.e)}')" 1217 | ] 1218 | }, 1219 | { 1220 | "cell_type": "markdown", 1221 | "metadata": {}, 1222 | "source": [ 1223 | "# Joint Entropy" 1224 | ] 1225 | }, 1226 | { 1227 | "cell_type": "markdown", 1228 | "metadata": {}, 1229 | "source": [ 1230 | "The joint entropy of the random variables x and y with joint density $p_{x, y}$ is defined as \n", 1231 | "\n", 1232 | "$H(X, Y) = - E_{p_{x, y}} \\left[ \\log_{\\text{base}} p_{x, y} (x, y) \\right]$.\n", 1233 | "\n", 1234 | "Joint entropy is symmetric, i.e. \n", 1235 | "\n", 1236 | "$H(X, Y) = H(Y, X)$." 1237 | ] 1238 | }, 1239 | { 1240 | "cell_type": "markdown", 1241 | "metadata": {}, 1242 | "source": [ 1243 | "## Continuous Case" 1244 | ] 1245 | }, 1246 | { 1247 | "cell_type": "code", 1248 | "execution_count": 36, 1249 | "metadata": { 1250 | "ExecuteTime": { 1251 | "end_time": "2020-07-31T03:58:03.608382Z", 1252 | "start_time": "2020-07-31T03:57:59.584775Z" 1253 | } 1254 | }, 1255 | "outputs": [ 1256 | { 1257 | "name": "stdout", 1258 | "output_type": "stream", 1259 | "text": [ 1260 | "Joint entropy of x and y = 4.475745990640665\n" 1261 | ] 1262 | } 1263 | ], 1264 | "source": [ 1265 | "joint_entropy_of_x_and_y = joint_entropy_from_samples(sample_x, sample_y)\n", 1266 | "print(f'Joint entropy of x and y = {joint_entropy_of_x_and_y}')" 1267 | ] 1268 | }, 1269 | { 1270 | "cell_type": "markdown", 1271 | "metadata": {}, 1272 | "source": [ 1273 | "## Discrete Case" 1274 | ] 1275 | }, 1276 | { 1277 | "cell_type": "code", 1278 | "execution_count": 37, 1279 | "metadata": { 1280 | "ExecuteTime": { 1281 | "end_time": "2020-07-31T03:58:03.640898Z", 1282 | "start_time": "2020-07-31T03:58:03.610576Z" 1283 | } 1284 | }, 1285 | "outputs": [ 1286 | { 1287 | "name": "stdout", 1288 | "output_type": "stream", 1289 | "text": [ 1290 | "The joint entropy of x and y is 1.3296613488547582\n" 1291 | ] 1292 | } 1293 | ], 1294 | "source": [ 1295 | "print(f'The joint entropy of x and y is {discrete_joint_entropy(discrete_sample_x, discrete_sample_y, base=np.e)}')" 1296 | ] 1297 | }, 1298 | { 1299 | "cell_type": "markdown", 1300 | "metadata": {}, 1301 | "source": [ 1302 | "# Conditional Entropy" 1303 | ] 1304 | }, 1305 | { 1306 | "cell_type": "markdown", 1307 | "metadata": {}, 1308 | "source": [ 1309 | "The conditional entropy of the random variable y given x with joint density $p_{x, y}$ and marginal density $p_x$ of $x$ is defined as \n", 1310 | "\n", 1311 | "$H(Y|X) = - E_{p_{x, y}} \\left[ \\log \\frac{p_{x, y} (x, y)}{p_x(x)} \\right]$. \n", 1312 | "\n", 1313 | "From this definition follows the change rule for conditional entropy\n", 1314 | "\n", 1315 | "\n", 1316 | "$H(X, Y) = H(X) + H(Y|X)$.\n", 1317 | "\n", 1318 | "Switching the roles of $x$ and $y$ and using the symmetry of joint entropy, we obtain \n", 1319 | "\n", 1320 | "$H(X, Y) = H(Y) + H(X|Y)$.\n", 1321 | "\n", 1322 | "Substracting second equation for joint entropyfrom the first and rearranging yields \n", 1323 | "\n", 1324 | "$H(Y) - H(Y|X) = H(X) - H(X|Y)$." 1325 | ] 1326 | }, 1327 | { 1328 | "cell_type": "markdown", 1329 | "metadata": {}, 1330 | "source": [ 1331 | "## Continuous Case" 1332 | ] 1333 | }, 1334 | { 1335 | "cell_type": "code", 1336 | "execution_count": 38, 1337 | "metadata": { 1338 | "ExecuteTime": { 1339 | "end_time": "2020-07-31T03:58:14.694029Z", 1340 | "start_time": "2020-07-31T03:58:03.642453Z" 1341 | } 1342 | }, 1343 | "outputs": [ 1344 | { 1345 | "name": "stdout", 1346 | "output_type": "stream", 1347 | "text": [ 1348 | "Conditional entropy of y given x = 1.9912929526616132\n" 1349 | ] 1350 | } 1351 | ], 1352 | "source": [ 1353 | "conditional_entropy_of_y_given_x = conditional_entropy_from_samples(sample_x, sample_y)\n", 1354 | "print(f'Conditional entropy of y given x = {conditional_entropy_of_y_given_x}')" 1355 | ] 1356 | }, 1357 | { 1358 | "cell_type": "code", 1359 | "execution_count": 39, 1360 | "metadata": { 1361 | "ExecuteTime": { 1362 | "end_time": "2020-07-31T03:58:24.841970Z", 1363 | "start_time": "2020-07-31T03:58:14.696527Z" 1364 | } 1365 | }, 1366 | "outputs": [ 1367 | { 1368 | "name": "stdout", 1369 | "output_type": "stream", 1370 | "text": [ 1371 | "Conditional entropy of x given y = 2.3857520195720445\n" 1372 | ] 1373 | } 1374 | ], 1375 | "source": [ 1376 | "conditional_entropy_of_x_given_y = conditional_entropy_from_samples(sample_y, sample_x)\n", 1377 | "print(f'Conditional entropy of x given y = {conditional_entropy_of_x_given_y}')" 1378 | ] 1379 | }, 1380 | { 1381 | "cell_type": "markdown", 1382 | "metadata": {}, 1383 | "source": [ 1384 | "Check whether the chain rule of conditional entropy is satisfied" 1385 | ] 1386 | }, 1387 | { 1388 | "cell_type": "code", 1389 | "execution_count": 40, 1390 | "metadata": { 1391 | "ExecuteTime": { 1392 | "end_time": "2020-07-31T03:58:24.923287Z", 1393 | "start_time": "2020-07-31T03:58:24.843722Z" 1394 | } 1395 | }, 1396 | "outputs": [ 1397 | { 1398 | "data": { 1399 | "text/plain": [ 1400 | "True" 1401 | ] 1402 | }, 1403 | "execution_count": 40, 1404 | "metadata": {}, 1405 | "output_type": "execute_result" 1406 | } 1407 | ], 1408 | "source": [ 1409 | "np.isclose(entropy_from_samples(sample_x) + conditional_entropy_of_y_given_x, joint_entropy_of_x_and_y, rtol=1e-2, atol=1e-3)" 1410 | ] 1411 | }, 1412 | { 1413 | "cell_type": "code", 1414 | "execution_count": 41, 1415 | "metadata": { 1416 | "ExecuteTime": { 1417 | "end_time": "2020-07-31T03:58:24.995415Z", 1418 | "start_time": "2020-07-31T03:58:24.925091Z" 1419 | }, 1420 | "scrolled": true 1421 | }, 1422 | "outputs": [ 1423 | { 1424 | "data": { 1425 | "text/plain": [ 1426 | "True" 1427 | ] 1428 | }, 1429 | "execution_count": 41, 1430 | "metadata": {}, 1431 | "output_type": "execute_result" 1432 | } 1433 | ], 1434 | "source": [ 1435 | "np.isclose(entropy_from_samples(sample_y) + conditional_entropy_of_x_given_y, joint_entropy_of_x_and_y, rtol=1e-2, atol=1e-3)" 1436 | ] 1437 | }, 1438 | { 1439 | "cell_type": "code", 1440 | "execution_count": 42, 1441 | "metadata": { 1442 | "ExecuteTime": { 1443 | "end_time": "2020-07-31T03:58:32.421300Z", 1444 | "start_time": "2020-07-31T03:58:24.996920Z" 1445 | }, 1446 | "scrolled": false 1447 | }, 1448 | "outputs": [ 1449 | { 1450 | "name": "stdout", 1451 | "output_type": "stream", 1452 | "text": [ 1453 | "Conditional entropy of y given x (on gpu) = 1.9912435966457076\n" 1454 | ] 1455 | } 1456 | ], 1457 | "source": [ 1458 | "conditional_entropy_of_y_given_x_gpu = \\\n", 1459 | " continuous_conditional_entropy_from_samples_gpu(\n", 1460 | " sample_x, \n", 1461 | " sample_y, \n", 1462 | " maximum_number_of_elements_per_batch=-1)\n", 1463 | "print(f'Conditional entropy of y given x (on gpu) = {conditional_entropy_of_y_given_x_gpu}')" 1464 | ] 1465 | }, 1466 | { 1467 | "cell_type": "code", 1468 | "execution_count": 43, 1469 | "metadata": { 1470 | "ExecuteTime": { 1471 | "end_time": "2020-07-31T03:58:37.384352Z", 1472 | "start_time": "2020-07-31T03:58:32.423384Z" 1473 | } 1474 | }, 1475 | "outputs": [ 1476 | { 1477 | "name": "stdout", 1478 | "output_type": "stream", 1479 | "text": [ 1480 | "Conditional entropy of x given y (on gpu) = 2.385699213557811\n" 1481 | ] 1482 | } 1483 | ], 1484 | "source": [ 1485 | "conditional_entropy_of_x_given_y_gpu = continuous_conditional_entropy_from_samples_gpu(sample_y, sample_x)\n", 1486 | "print(f'Conditional entropy of x given y (on gpu) = {conditional_entropy_of_x_given_y_gpu}')" 1487 | ] 1488 | }, 1489 | { 1490 | "cell_type": "markdown", 1491 | "metadata": {}, 1492 | "source": [ 1493 | "## Discrete Case" 1494 | ] 1495 | }, 1496 | { 1497 | "cell_type": "code", 1498 | "execution_count": 44, 1499 | "metadata": { 1500 | "ExecuteTime": { 1501 | "end_time": "2020-07-31T03:58:37.677112Z", 1502 | "start_time": "2020-07-31T03:58:37.386846Z" 1503 | } 1504 | }, 1505 | "outputs": [ 1506 | { 1507 | "name": "stdout", 1508 | "output_type": "stream", 1509 | "text": [ 1510 | "The conditional entropy of y given x is 0.31825708414740644\n" 1511 | ] 1512 | } 1513 | ], 1514 | "source": [ 1515 | "print(f'The conditional entropy of y given x is {discrete_conditional_entropy_of_y_given_x(discrete_sample_x, discrete_sample_y, base=np.e)}')" 1516 | ] 1517 | }, 1518 | { 1519 | "cell_type": "markdown", 1520 | "metadata": {}, 1521 | "source": [ 1522 | "We can verify the chain rule for conditional entropy:" 1523 | ] 1524 | }, 1525 | { 1526 | "cell_type": "code", 1527 | "execution_count": 45, 1528 | "metadata": { 1529 | "ExecuteTime": { 1530 | "end_time": "2020-07-31T03:58:37.712640Z", 1531 | "start_time": "2020-07-31T03:58:37.679034Z" 1532 | } 1533 | }, 1534 | "outputs": [ 1535 | { 1536 | "data": { 1537 | "text/plain": [ 1538 | "True" 1539 | ] 1540 | }, 1541 | "execution_count": 45, 1542 | "metadata": {}, 1543 | "output_type": "execute_result" 1544 | } 1545 | ], 1546 | "source": [ 1547 | "np.isclose(discrete_entropy(discrete_sample_y) + discrete_conditional_entropy_of_y_given_x(discrete_sample_y, discrete_sample_x), discrete_joint_entropy(discrete_sample_x, discrete_sample_y))" 1548 | ] 1549 | }, 1550 | { 1551 | "cell_type": "code", 1552 | "execution_count": 46, 1553 | "metadata": { 1554 | "ExecuteTime": { 1555 | "end_time": "2020-07-31T03:58:37.750939Z", 1556 | "start_time": "2020-07-31T03:58:37.714443Z" 1557 | } 1558 | }, 1559 | "outputs": [ 1560 | { 1561 | "data": { 1562 | "text/plain": [ 1563 | "True" 1564 | ] 1565 | }, 1566 | "execution_count": 46, 1567 | "metadata": {}, 1568 | "output_type": "execute_result" 1569 | } 1570 | ], 1571 | "source": [ 1572 | "np.isclose(discrete_entropy(discrete_sample_x) + discrete_conditional_entropy_of_y_given_x(discrete_sample_x, discrete_sample_y), discrete_joint_entropy(discrete_sample_y, discrete_sample_x))" 1573 | ] 1574 | }, 1575 | { 1576 | "cell_type": "code", 1577 | "execution_count": null, 1578 | "metadata": {}, 1579 | "outputs": [], 1580 | "source": [] 1581 | } 1582 | ], 1583 | "metadata": { 1584 | "kernelspec": { 1585 | "display_name": "Python 3", 1586 | "language": "python", 1587 | "name": "python3" 1588 | }, 1589 | "language_info": { 1590 | "codemirror_mode": { 1591 | "name": "ipython", 1592 | "version": 3 1593 | }, 1594 | "file_extension": ".py", 1595 | "mimetype": "text/x-python", 1596 | "name": "python", 1597 | "nbconvert_exporter": "python", 1598 | "pygments_lexer": "ipython3", 1599 | "version": "3.8.3" 1600 | }, 1601 | "toc": { 1602 | "base_numbering": 1, 1603 | "nav_menu": {}, 1604 | "number_sections": true, 1605 | "sideBar": true, 1606 | "skip_h1_title": false, 1607 | "title_cell": "Table of Contents", 1608 | "title_sidebar": "Contents", 1609 | "toc_cell": false, 1610 | "toc_position": { 1611 | "height": "calc(100% - 180px)", 1612 | "left": "10px", 1613 | "top": "150px", 1614 | "width": "295.4755554199219px" 1615 | }, 1616 | "toc_section_display": true, 1617 | "toc_window_display": true 1618 | } 1619 | }, 1620 | "nbformat": 4, 1621 | "nbformat_minor": 4 1622 | } 1623 | -------------------------------------------------------------------------------- /divergence/continuous.py: -------------------------------------------------------------------------------- 1 | from cocos.numerics.data_types import NumericArray 2 | from cocos.scientific.kde import ( 3 | gaussian_kde as cocos_gaussian_kde, 4 | evaluate_gaussian_kde_in_batches 5 | ) 6 | 7 | from cubature import cubature 8 | import numpy as np 9 | import scipy as sp 10 | import statsmodels.api as sm 11 | import typing as tp 12 | 13 | from divergence.base import _select_vectorized_log_fun_for_base 14 | 15 | 16 | def _get_min_and_max_support_for_scotts_bw_rule(x: np.ndarray, 17 | cut: float = 3) \ 18 | -> tp.Tuple[float, float]: 19 | bw = sm.nonparametric.bandwidths.bw_scott(x) 20 | a = np.min(x) - cut * bw 21 | b = np.max(x) + cut * bw 22 | 23 | return a, b 24 | 25 | 26 | def _get_min_and_max_support_for_silverman_bw_rule(x: np.ndarray, 27 | cut: float = 3) \ 28 | -> tp.Tuple[float, float]: 29 | bw = sm.nonparametric.bandwidths.bw_silverman(x) 30 | a = np.min(x) - cut * bw 31 | b = np.max(x) + cut * bw 32 | 33 | return a, b 34 | 35 | 36 | def intersection(a0: float, 37 | b0: float, 38 | a1: float, 39 | b1: float) \ 40 | -> tp.Optional[tp.Tuple[float, float]]: 41 | """ 42 | Calculate the intersection of two intervals [a0, b0] and [a1, b1]. If the intervals do not 43 | overlap the function returns None. The parameters must satisfy a0 <= b0 and a1 <= b1. 44 | 45 | Parameters 46 | ---------- 47 | a0: beginning of the first interval 48 | b0: end of the first interval 49 | a1: beginning of the second interval 50 | b1: end of the second interval 51 | 52 | Returns 53 | ------- 54 | 55 | """ 56 | assert a0 <= b0 57 | assert a1 <= b1 58 | 59 | if a0 >= b1: 60 | return None 61 | 62 | if b0 < a1: 63 | return None 64 | 65 | return max(a0, a1), min(b0, b1) 66 | 67 | 68 | ################################################################################ 69 | # Entropy 70 | ################################################################################ 71 | def entropy_from_density_with_support(pdf: tp.Callable, 72 | a: float, 73 | b: float, 74 | base: float = np.e, 75 | eps_abs: float = 1.49e-08, 76 | eps_rel: float = 1.49e-08) \ 77 | -> float: 78 | """ 79 | Compute the entropy 80 | 81 | H(p) = - E_p[log(p)] 82 | 83 | of the density given in pdf via numerical integration from a to b. 84 | The argument base can be used to specify the units in which the entropy is measured. 85 | The default choice is the natural logarithm. 86 | 87 | Parameters 88 | ---------- 89 | pdf: a function of a scalar parameter which computes the probability density at that point 90 | a: lower bound of the integration region 91 | b: upper bound of the integration region 92 | base: the base of the logarithm used to control the units of measurement for the result 93 | eps_abs: absolute error tolerance for numerical integration 94 | eps_rel: relative error tolerance for numerical integration 95 | 96 | Returns 97 | ------- 98 | The entropy of the density given by pdf 99 | """ 100 | log_fun = _select_vectorized_log_fun_for_base(base) 101 | 102 | def entropy_integrand_vectorized_fast(x: np.ndarray): 103 | p = pdf(x) 104 | return - np.where(p > 0.0, p * log_fun(p), 0.0) 105 | 106 | return cubature(func=entropy_integrand_vectorized_fast, 107 | ndim=1, 108 | fdim=1, 109 | xmin=np.array([a]), 110 | xmax=np.array([b]), 111 | vectorized=False, 112 | adaptive='p', 113 | abserr=eps_abs, 114 | relerr=eps_rel)[0].item() 115 | 116 | 117 | def entropy_from_kde(kde: sm.nonparametric.KDEUnivariate, 118 | base: float = np.e, 119 | eps_abs: float = 1.49e-08, 120 | eps_rel: float = 1.49e-08) -> float: 121 | """ 122 | Compute the entropy 123 | 124 | H(p) = - E_p[log(p)] 125 | 126 | of the density given by the statsmodels kde object via numerical integration. 127 | The argument base can be used to specify the units in which the entropy is measured. 128 | The default choice is the natural logarithm. 129 | 130 | Parameters 131 | ---------- 132 | kde: statsmodels kde object representing an approximation of the density 133 | base: the base of the logarithm used to control the units of measurement for the result 134 | eps_abs: absolute error tolerance for numerical integration 135 | eps_rel: relative error tolerance for numerical integration 136 | 137 | Returns 138 | ------- 139 | The entropy of the density approximated by the kde 140 | """ 141 | a = min(kde.support) 142 | b = max(kde.support) 143 | return entropy_from_density_with_support(pdf=kde.evaluate, 144 | a=a, 145 | b=b, 146 | base=base, 147 | eps_abs=eps_abs, 148 | eps_rel=eps_rel) 149 | 150 | 151 | def continuous_entropy_from_sample(sample: np.ndarray, 152 | base: float = np.e, 153 | eps_abs: float = 1.49e-08, 154 | eps_rel: float = 1.49e-08) -> float: 155 | """ 156 | Compute the entropy 157 | 158 | H(p) = - E_p[log(p)] 159 | 160 | of a sample via approximation by a kernel density estimate and numerical integration. 161 | The argument base can be used to specify the units in which the entropy is measured. 162 | The default choice is the natural logarithm. 163 | 164 | Parameters 165 | ---------- 166 | sample: a sample of draws from the density represented as a 1-dimensional NumPy array 167 | base: the base of the logarithm used to control the units of measurement for the result 168 | eps_abs: absolute error tolerance for numerical integration 169 | eps_rel: relative error tolerance for numerical integration 170 | 171 | Returns 172 | ------- 173 | The entropy of the density approximated by the sample 174 | """ 175 | kde = sm.nonparametric.KDEUnivariate(sample) 176 | kde.fit() 177 | return entropy_from_kde(kde=kde, 178 | base=base, 179 | eps_abs=eps_abs, 180 | eps_rel=eps_rel) 181 | 182 | 183 | ################################################################################ 184 | # Cross Entropy 185 | ################################################################################ 186 | def _cross_entropy_integrand(p: tp.Callable, 187 | q: tp.Callable, 188 | x: float, 189 | log_fun: tp.Callable) -> float: 190 | """ 191 | Compute the integrand p(x) * log(q(x)) at a given point x for the calculation of cross entropy. 192 | 193 | Parameters 194 | ---------- 195 | p: probability density function of the distribution p 196 | q: probability density function of the distribution q 197 | x: the point at which to evaluate the integrand 198 | base: the base of the logarithm used to control the units of measurement for the result 199 | 200 | Returns 201 | ------- 202 | Integrand for the cross entropy calculation 203 | """ 204 | # return p(x) * log_fun(q(x) + 1e-12) 205 | qx = q(x) 206 | px = p(x) 207 | if qx == 0.0: 208 | if px == 0.0: 209 | return 0.0 210 | else: 211 | raise ValueError(f'q(x) is zero at x={x} but p(x) is not') 212 | elif px == 0.0: 213 | return 0.0 214 | else: 215 | return px * log_fun(qx) 216 | 217 | 218 | def _vectorized_cross_entropy_integrand(p: tp.Callable, 219 | q: tp.Callable, 220 | x: np.ndarray, 221 | log_fun: tp.Callable) -> np.ndarray: 222 | """ 223 | Compute the integrand p(x) * log(q(x)) vectorized at given points x for the calculation of cross 224 | entropy. 225 | 226 | Parameters 227 | ---------- 228 | p: probability density function of the distribution p 229 | q: probability density function of the distribution q 230 | x: the point at which to evaluate the integrand 231 | base: the base of the logarithm used to control the units of measurement for the result 232 | 233 | Returns 234 | ------- 235 | Integrand for the cross entropy calculation 236 | """ 237 | # return p(x) * log_fun(q(x) + 1e-12) 238 | qx = q(x) 239 | px = p(x) 240 | 241 | q_positive_index = qx > 0.0 242 | p_positive_index = px > 0.0 243 | 244 | q_zero_but_p_positive_index = ~q_positive_index & p_positive_index 245 | if np.any(q_zero_but_p_positive_index): 246 | raise ValueError(f'q(x) is zero at x={x[q_zero_but_p_positive_index]} but p(x) is not') 247 | 248 | return - np.where(p_positive_index, px * log_fun(qx), 0.0) 249 | 250 | 251 | def cross_entropy_from_densities_with_support(p: tp.Callable, 252 | q: tp.Callable, 253 | a: float, 254 | b: float, 255 | base: float = np.e, 256 | eps_abs: float = 1.49e-08, 257 | eps_rel: float = 1.49e-08) -> float: 258 | """ 259 | Compute the cross entropy of the distribution q relative to the distribution p 260 | 261 | H_q(p) = - E_p [log(q)] 262 | 263 | via numerical integration from a to b. 264 | The argument base can be used to specify the units in which the entropy is measured. 265 | The default choice is the natural logarithm. 266 | 267 | Parameters 268 | ---------- 269 | p: probability density function of the distribution p 270 | q: probability density function of the distribution q 271 | a: lower bound of the integration region 272 | b: upper bound of the integration region 273 | base: the base of the logarithm used to control the units of measurement for the result 274 | eps_abs: absolute error tolerance for numerical integration 275 | eps_rel: relative error tolerance for numerical integration 276 | 277 | Returns 278 | ------- 279 | The cross entropy of the distribution q relative to the distribution p. 280 | """ 281 | log_fun = _select_vectorized_log_fun_for_base(base) 282 | 283 | return - cubature(func=lambda x: _cross_entropy_integrand(p=p, q=q, x=x, log_fun=log_fun), 284 | ndim=1, 285 | fdim=1, 286 | xmin=np.array([a]), 287 | xmax=np.array([b]), 288 | vectorized=False, 289 | adaptive='p', 290 | abserr=eps_abs, 291 | relerr=eps_rel)[0].item() 292 | 293 | 294 | def _does_support_overlap(p: sm.nonparametric.KDEUnivariate, 295 | q: sm.nonparametric.KDEUnivariate) -> bool: 296 | """ 297 | Determine whether the support of distributions of kernel density estimates p and q overlap. 298 | 299 | Parameters 300 | ---------- 301 | p: statsmodels kde object representing an approximation of the distribution p 302 | q: statsmodels kde object representing an approximation of the distribution q 303 | 304 | Returns 305 | ------- 306 | whether the support of distributions of kernel density estimates p and q overlap 307 | """ 308 | return intersection(min(p.support), max(p.support), min(q.support), max(q.support)) is not None 309 | 310 | 311 | def cross_entropy_from_kde(p: sm.nonparametric.KDEUnivariate, 312 | q: sm.nonparametric.KDEUnivariate, 313 | base: float = np.e, 314 | eps_abs: float = 1.49e-08, 315 | eps_rel: float = 1.49e-08) -> float: 316 | """ 317 | Compute the cross entropy of the distribution q relative to the distribution p 318 | 319 | H_q(p) = - E_p [log(q)] 320 | 321 | given by the statsmodels kde objects via numerical integration. 322 | The argument base can be used to specify the units in which the entropy is measured. 323 | The default choice is the natural logarithm. 324 | 325 | Parameters 326 | ---------- 327 | p: statsmodels kde object approximating the probability density function of the distribution p 328 | q: statsmodels kde object approximating the probability density function of the distribution q 329 | base: the base of the logarithm used to control the units of measurement for the result 330 | eps_abs: absolute error tolerance for numerical integration 331 | eps_rel: relative error tolerance for numerical integration 332 | 333 | Returns 334 | ------- 335 | The cross entropy of the distribution q relative to the distribution p. 336 | """ 337 | if not _does_support_overlap(p, q): 338 | raise ValueError('The support of p and q does not overlap.') 339 | 340 | a = min(min(p.support), min(q.support)) 341 | b = max(max(p.support), max(q.support)) 342 | 343 | return cross_entropy_from_densities_with_support(p=p.evaluate, 344 | q=q.evaluate, 345 | a=a, 346 | b=b, 347 | base=base, 348 | eps_abs=eps_abs, 349 | eps_rel=eps_rel) 350 | 351 | 352 | def continuous_cross_entropy_from_sample(sample_p: np.ndarray, 353 | sample_q: np.ndarray, 354 | base: float = np.e, 355 | eps_abs: float = 1.49e-08, 356 | eps_rel: float = 1.49e-08) -> float: 357 | """ 358 | Compute the cross entropy of the distribution q relative to the distribution p 359 | 360 | H_q(p) = - E_p [log(q)] 361 | 362 | from samples of the two distributions via approximation by a kernel density estimate and 363 | numerical integration. 364 | The argument base can be used to specify the units in which the entropy is measured. 365 | The default choice is the natural logarithm. 366 | 367 | Parameters 368 | ---------- 369 | sample_p: sample from the distribution p 370 | sample_q: sample from the distribution q 371 | base: the base of the logarithm used to control the units of measurement for the result 372 | eps_abs: absolute error tolerance for numerical integration 373 | eps_rel: relative error tolerance for numerical integration 374 | 375 | Returns 376 | ------- 377 | The cross entropy of the distribution q relative to the distribution p. 378 | 379 | """ 380 | kde_p = sm.nonparametric.KDEUnivariate(sample_p) 381 | kde_p.fit() 382 | kde_q = sm.nonparametric.KDEUnivariate(sample_q) 383 | kde_q.fit() 384 | 385 | return cross_entropy_from_kde(kde_p, kde_q, base=base, eps_abs=eps_abs, eps_rel=eps_rel) 386 | 387 | 388 | ################################################################################ 389 | # Relative Entropy (KL Divergence) 390 | ################################################################################ 391 | def _relative_entropy_integrand(p: tp.Callable, 392 | q: tp.Callable, 393 | x: float, 394 | log_fun: tp.Callable = np.log) -> float: 395 | """ 396 | Compute the integrand p(x) * log(p(x) / q(x)) at a given point x for the calculation of relative 397 | entropy. 398 | 399 | Parameters 400 | ---------- 401 | p: probability density function of the distribution p 402 | q: probability density function of the distribution q 403 | x: the point at which to evaluate the integrand 404 | base: the base of the logarithm used to control the units of measurement for the result 405 | 406 | Returns 407 | ------- 408 | Integrand for the relative entropy calculation 409 | """ 410 | qx = q(x) 411 | px = p(x) 412 | if qx == 0.0: 413 | if px == 0.0: 414 | return 0.0 415 | else: 416 | raise ValueError(f'q(x) is zero at x={x} but p(x) is not') 417 | elif px == 0.0: 418 | return 0.0 419 | else: 420 | return px * log_fun(px / qx) 421 | 422 | 423 | def _vectorized_relative_entropy_integrand(p: tp.Callable, 424 | q: tp.Callable, 425 | x: np.ndarray, 426 | log_fun: tp.Callable = np.log) -> np.ndarray: 427 | """ 428 | Compute the integrand p(x) * log(p(x) / q(x)) vectorized at given points x for the calculation 429 | of relative entropy. 430 | 431 | Parameters 432 | ---------- 433 | p: probability density function of the distribution p 434 | q: probability density function of the distribution q 435 | x: the point at which to evaluate the integrand 436 | base: the base of the logarithm used to control the units of measurement for the result 437 | 438 | Returns 439 | ------- 440 | Integrand for the cross entropy calculation 441 | """ 442 | # return p(x) * log_fun(q(x) + 1e-12) 443 | qx = q(x) 444 | px = p(x) 445 | 446 | q_positive_index = qx > 0.0 447 | p_positive_index = px > 0.0 448 | 449 | q_zero_but_p_positive_index = ~q_positive_index & p_positive_index 450 | if np.any(q_zero_but_p_positive_index): 451 | raise ValueError(f'q(x) is zero at x={x[q_zero_but_p_positive_index]} but p(x) is not') 452 | 453 | return np.where(p_positive_index, px * log_fun(px / qx), 0.0) 454 | 455 | 456 | def relative_entropy_from_densities_with_support(p: tp.Callable, 457 | q: tp.Callable, 458 | a: float, 459 | b: float, 460 | base: float = np.e, 461 | eps_abs: float = 1.49e-08, 462 | eps_rel: float = 1.49e-08 463 | ) -> float: 464 | """ 465 | Compute the relative entropy of the distribution q relative to the distribution p 466 | 467 | D_KL(p||q) = E_p [log(p/q)] 468 | 469 | via numerical integration from a to b. 470 | The argument base can be used to specify the units in which the entropy is measured. 471 | The default choice is the natural logarithm. 472 | 473 | Parameters 474 | ---------- 475 | p: probability density function of the distribution p 476 | q: probability density function of the distribution q 477 | a: lower bound of the integration region 478 | b: upper bound of the integration region 479 | base: the base of the logarithm used to control the units of measurement for the result 480 | eps_abs: absolute error tolerance for numerical integration 481 | eps_rel: relative error tolerance for numerical integration 482 | 483 | Returns 484 | ------- 485 | The relative entropy of the distribution q relative to the distribution p. 486 | """ 487 | log_fun = _select_vectorized_log_fun_for_base(base) 488 | 489 | def integrand(x: float): 490 | return _relative_entropy_integrand(p=p, q=q, x=x, log_fun=log_fun) 491 | 492 | return cubature(func=integrand, 493 | ndim=1, 494 | fdim=1, 495 | xmin=np.array([a]), 496 | xmax=np.array([b]), 497 | vectorized=False, 498 | adaptive='p', 499 | abserr=eps_abs, 500 | relerr=eps_rel)[0].item() 501 | 502 | 503 | def relative_entropy_from_kde(p: sm.nonparametric.KDEUnivariate, 504 | q: sm.nonparametric.KDEUnivariate, 505 | base: float = np.e, 506 | eps_abs: float = 1.49e-08, 507 | eps_rel: float = 1.49e-08) -> float: 508 | """ 509 | Compute the relative entropy of the distribution q relative to the distribution p 510 | 511 | D_KL(p||q) E_p [log(p/q)] 512 | 513 | given by the statsmodels kde objects via numerical integration. 514 | The argument base can be used to specify the units in which the entropy is measured. 515 | The default choice is the natural logarithm. 516 | 517 | Parameters 518 | ---------- 519 | p: statsmodels kde object approximating the probability density function of the distribution p 520 | q: statsmodels kde object approximating the probability density function of the distribution q 521 | base: the base of the logarithm used to control the units of measurement for the result 522 | eps_abs: absolute error tolerance for numerical integration 523 | eps_rel: relative error tolerance for numerical integration 524 | 525 | Returns 526 | ------- 527 | The relative entropy of the distribution q relative to the distribution p. 528 | """ 529 | if not _does_support_overlap(p, q): 530 | raise ValueError('The support of p and q does not overlap.') 531 | 532 | a = min(min(p.support), min(q.support)) 533 | b = max(max(p.support), max(q.support)) 534 | return relative_entropy_from_densities_with_support(p=p.evaluate, 535 | q=q.evaluate, 536 | a=a, 537 | b=b, 538 | base=base, 539 | eps_abs=eps_abs, 540 | eps_rel=eps_rel) 541 | 542 | 543 | def continuous_relative_entropy_from_sample(sample_p: np.ndarray, 544 | sample_q: np.ndarray, 545 | base: float = np.e, 546 | eps_abs: float = 1.49e-08, 547 | eps_rel: float = 1.49e-08) -> float: 548 | """ 549 | Compute the relative entropy of the distribution q relative to the distribution p 550 | 551 | D_KL(p||q) = E_p [log(p/q)] 552 | 553 | from samples of the two distributions via approximation by a kernel density estimate and 554 | numerical integration. 555 | The argument base can be used to specify the units in which the entropy is measured. 556 | The default choice is the natural logarithm. 557 | 558 | Parameters 559 | ---------- 560 | sample_p: sample from the distribution p 561 | sample_q: sample from the distribution q 562 | base: the base of the logarithm used to control the units of measurement for the result 563 | eps_abs: absolute error tolerance for numerical integration 564 | eps_rel: relative error tolerance for numerical integration 565 | 566 | Returns 567 | ------- 568 | The relative entropy of the distribution q relative to the distribution p. 569 | """ 570 | kde_p = sm.nonparametric.KDEUnivariate(sample_p) 571 | kde_p.fit() 572 | kde_q = sm.nonparametric.KDEUnivariate(sample_q) 573 | kde_q.fit() 574 | 575 | return relative_entropy_from_kde(p=kde_p, 576 | q=kde_q, 577 | base=base, 578 | eps_abs=eps_abs, 579 | eps_rel=eps_rel) 580 | 581 | 582 | ################################################################################ 583 | # Jensen-Shannon Divergence 584 | ############################################################################### 585 | def _relative_entropy_from_densities_with_support_for_shannon_divergence( 586 | p: tp.Callable, 587 | q: tp.Callable, 588 | a: float, 589 | b: float, 590 | log_fun: tp.Callable = np.log, 591 | eps_abs: float = 1.49e-08, 592 | eps_rel: float = 1.49e-08) -> float: 593 | """ 594 | Compute the relative entropy of the distribution q relative to the distribution p 595 | 596 | D_KL(p||q) = E_p [log(p/q)] 597 | 598 | via numerical integration from a to b. 599 | The argument base can be used to specify the units in which the entropy is measured. 600 | The default choice is the natural logarithm. 601 | 602 | Parameters 603 | ---------- 604 | p: probability density function of the distribution p 605 | q: probability density function of the distribution q 606 | a: lower bound of the integration region 607 | b: upper bound of the integration region 608 | base: the base of the logarithm used to control the units of measurement for the result 609 | eps_abs: absolute error tolerance for numerical integration 610 | eps_rel: relative error tolerance for numerical integration 611 | 612 | Returns 613 | ------- 614 | The relative entropy of the distribution q relative to the distribution p. 615 | 616 | """ 617 | def integrand(x): 618 | return p(x) * log_fun(p(x) / q(x)) if p(x) > 0.0 else 0.0 619 | 620 | return cubature(func=integrand, 621 | ndim=1, 622 | fdim=1, 623 | xmin=np.array([a]), 624 | xmax=np.array([b]), 625 | vectorized=False, 626 | adaptive='p', 627 | abserr=eps_abs, 628 | relerr=eps_rel)[0].item() 629 | 630 | 631 | def jensen_shannon_divergence_from_densities_with_support(p: tp.Callable, 632 | q: tp.Callable, 633 | a: float, 634 | b: float, 635 | base: float = np.e, 636 | eps_abs: float = 1.49e-08, 637 | eps_rel: float = 1.49e-08) \ 638 | -> float: 639 | """ 640 | Compute the Jensen-Shannon divergence between distributions p and q 641 | 642 | JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q) 643 | 644 | via numerical integration from a to b. 645 | The argument base can be used to specify the units in which the entropy is measured. 646 | The default choice is the natural logarithm. 647 | 648 | Parameters 649 | ---------- 650 | p: probability density function of the distribution p 651 | q: probability density function of the distribution q 652 | a: lower bound of the integration region 653 | b: upper bound of the integration region 654 | base: the base of the logarithm used to control the units of measurement for the result 655 | eps_abs: absolute error tolerance for numerical integration 656 | eps_rel: relative error tolerance for numerical integration 657 | 658 | Returns 659 | ------- 660 | The Jensen-Shannon divergence between distributions p and q. 661 | 662 | """ 663 | log_fun = _select_vectorized_log_fun_for_base(base) 664 | 665 | m = lambda x: 0.5 * (p(x) + q(x)) 666 | D_PM = _relative_entropy_from_densities_with_support_for_shannon_divergence( 667 | p=p, 668 | q=m, 669 | a=a, 670 | b=b, 671 | log_fun=log_fun, 672 | eps_abs=eps_abs, 673 | eps_rel=eps_rel) 674 | 675 | D_QM = _relative_entropy_from_densities_with_support_for_shannon_divergence( 676 | p=q, 677 | q=m, 678 | a=a, 679 | b=b, 680 | log_fun=log_fun, 681 | eps_abs=eps_abs, 682 | eps_rel=eps_rel) 683 | 684 | return 0.5 * D_PM + 0.5 * D_QM 685 | 686 | 687 | def jensen_shannon_divergence_from_kde(p: sm.nonparametric.KDEUnivariate, 688 | q: sm.nonparametric.KDEUnivariate, 689 | base: float = np.e, 690 | eps_abs: float = 1.49e-08, 691 | eps_rel: float = 1.49e-08) \ 692 | -> float: 693 | """ 694 | Compute the Jensen-Shannon divergence between distributions p and q 695 | 696 | JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q) 697 | 698 | given by the statsmodels kde objects via numerical integration. 699 | The argument base can be used to specify the units in which the entropy is measured. 700 | The default choice is the natural logarithm. 701 | 702 | Parameters 703 | ---------- 704 | p: statsmodels kde object approximating the probability density function of the distribution p 705 | q: statsmodels kde object approximating the probability density function of the distribution q 706 | base: the base of the logarithm used to control the units of measurement for the result 707 | eps_abs: absolute error tolerance for numerical integration 708 | eps_rel: relative error tolerance for numerical integration 709 | 710 | Returns 711 | ------- 712 | The Jensen-Shannon divergence between distributions p and q. 713 | 714 | """ 715 | a = min(min(p.support), min(q.support)) 716 | b = max(max(p.support), max(q.support)) 717 | return jensen_shannon_divergence_from_densities_with_support(p=p.evaluate, 718 | q=q.evaluate, 719 | a=a, 720 | b=b, 721 | base=base, 722 | eps_abs=eps_abs, 723 | eps_rel=eps_rel) 724 | 725 | 726 | def continuous_jensen_shannon_divergence_from_sample(sample_p: np.ndarray, 727 | sample_q: np.ndarray, 728 | base: float = np.e, 729 | eps_abs: float = 1.49e-08, 730 | eps_rel: float = 1.49e-08) -> float: 731 | """ 732 | Compute the Jensen-Shannon divergence between distributions p and q 733 | 734 | JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q) 735 | 736 | from samples of the two distributions via approximation by a kernel density estimate and 737 | numerical integration. 738 | The argument base can be used to specify the units in which the entropy is measured. 739 | The default choice is the natural logarithm. 740 | 741 | Parameters 742 | ---------- 743 | sample_p: sample from the distribution p 744 | sample_q: sample from the distribution q 745 | base: the base of the logarithm used to control the units of measurement for the result 746 | eps_abs: absolute error tolerance for numerical integration 747 | eps_rel: relative error tolerance for numerical integration 748 | 749 | Returns 750 | ------- 751 | The Jensen-Shannon divergence between distributions p and q. 752 | 753 | """ 754 | kde_p = sm.nonparametric.KDEUnivariate(sample_p) 755 | kde_p.fit() 756 | kde_q = sm.nonparametric.KDEUnivariate(sample_q) 757 | kde_q.fit() 758 | 759 | return jensen_shannon_divergence_from_kde(kde_p, 760 | kde_q, 761 | base=base, 762 | eps_abs=eps_abs, 763 | eps_rel=eps_rel) 764 | 765 | 766 | ################################################################################ 767 | # Mutual Information 768 | ############################################################################### 769 | def mutual_information_from_densities_with_support(pdf_x: tp.Callable, 770 | pdf_y: tp.Callable, 771 | pdf_xy: tp.Callable, 772 | x_min: float, 773 | x_max: float, 774 | y_min: float, 775 | y_max: float, 776 | base: float = np.e, 777 | eps_abs: float = 1.49e-08, 778 | eps_rel: float = 1.49e-08 779 | ) -> float: 780 | """ 781 | Compute mutual information of the random variables x and y with joint density p_{x, y} and 782 | marginal densities p_x and p_y defined as the KL divergence between the product of marginal 783 | densities and the joint density, i.e. 784 | 785 | I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) = 786 | E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right] 787 | 788 | via numerical integration on a rectangular domain aligned with the axes. 789 | The argument base can be used to specify the units in which the entropy is measured. 790 | The default choice is the natural logarithm. 791 | 792 | Parameters 793 | ---------- 794 | pdf_x: probability density function of the random variable x 795 | pdf_y: probability density function of the random variable y 796 | pdf_xy: joint probability density function of the random variables x and y 797 | x_min: lower bound of the integration domain for x 798 | x_max: upper bound of the integration domain for x 799 | y_min: lower bound of the integration domain for y 800 | y_max: upper bound of the integration domain for y 801 | base: the base of the logarithm used to control the units of measurement for the result 802 | eps_abs: absolute error tolerance for numerical integration 803 | eps_rel: relative error tolerance for numerical integration 804 | 805 | Returns 806 | ------- 807 | The mutual information of the random variables x and y 808 | """ 809 | log_fun = _select_vectorized_log_fun_for_base(base) 810 | 811 | def mutual_information_integrand(arg: np.ndarray): 812 | if arg.ndim == 1: 813 | x, y = arg 814 | pxy = pdf_xy((x, y)) 815 | elif arg.ndim == 2: 816 | x = arg[:, 0] 817 | y = arg[:, 1] 818 | pxy = pdf_xy(arg.T) 819 | else: 820 | raise ValueError('arg must be a numpy array with one or two axes') 821 | 822 | px = pdf_x(x) 823 | py = pdf_y(y) 824 | 825 | return pxy * log_fun(pxy / (px * py)) 826 | 827 | return cubature(func=mutual_information_integrand, 828 | ndim=2, 829 | fdim=1, 830 | xmin=np.array([x_min, y_min]), 831 | xmax=np.array([x_max, y_max]), 832 | adaptive='p', 833 | vectorized=False, 834 | abserr=eps_abs, 835 | relerr=eps_rel)[0].item() 836 | 837 | 838 | def mutual_information_from_kde(kde_x: sm.nonparametric.KDEUnivariate, 839 | kde_y: sm.nonparametric.KDEUnivariate, 840 | kde_xy: sp.stats.kde.gaussian_kde, 841 | base: float = np.e, 842 | eps_abs: float = 1.49e-08, 843 | eps_rel: float = 1.49e-08) -> float: 844 | """ 845 | Compute mutual information of the random variables x and y with joint density p_{x, y} and 846 | marginal densities p_x and p_y defined as the KL divergence between the product of marginal 847 | densities and the joint density, i.e. 848 | 849 | I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) = 850 | E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right] 851 | 852 | given by the statsmodels kde objects for the marginal densities and a SciPy gaussian_kde object 853 | for the joint density via numerical integration. 854 | The argument base can be used to specify the units in which the entropy is measured. 855 | The default choice is the natural logarithm. 856 | 857 | Parameters 858 | ---------- 859 | kde_x: statsmodels kde object approximating the marginal density of x 860 | kde_y: statsmodels kde object approximating the marginal density of y 861 | kde_xy: SciPy gaussian_kde object approximating the joint density of x and y 862 | base: the base of the logarithm used to control the units of measurement for the result 863 | eps_abs: absolute error tolerance for numerical integration 864 | eps_rel: relative error tolerance for numerical integration 865 | 866 | Returns 867 | ------- 868 | The mutual information of the random variables x and y 869 | """ 870 | x_min = min(kde_x.support) 871 | x_max = max(kde_x.support) 872 | y_min = min(kde_y.support) 873 | y_max = max(kde_y.support) 874 | 875 | return mutual_information_from_densities_with_support(pdf_x=kde_x.evaluate, 876 | pdf_y=kde_y.evaluate, 877 | pdf_xy=kde_xy.pdf, 878 | x_min=x_min, 879 | x_max=x_max, 880 | y_min=y_min, 881 | y_max=y_max, 882 | base=base, 883 | eps_abs=eps_abs, 884 | eps_rel=eps_rel) 885 | 886 | 887 | def continuous_mutual_information_from_samples(sample_x: np.ndarray, 888 | sample_y: np.ndarray, 889 | base: float = np.e, 890 | eps_abs: float = 1.49e-08, 891 | eps_rel: float = 1.49e-08) -> float: 892 | """ 893 | Compute mutual information of the random variables x and y with joint density p_{x, y} and 894 | marginal densities p_x and p_y defined as the KL divergence between the product of marginal 895 | densities and the joint density, i.e. 896 | 897 | I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) = 898 | E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right] 899 | 900 | from samples of the two distributions via approximation by kernel density estimates and 901 | numerical integration. 902 | The argument base can be used to specify the units in which the entropy is measured. 903 | The default choice is the natural logarithm. 904 | 905 | Parameters 906 | ---------- 907 | sample_x: x-component of the sample from the joint density p_{x, y} 908 | sample_y: y-component of the sample from the joint density p_{x, y} 909 | base: the base of the logarithm used to control the units of measurement for the result 910 | eps_abs: absolute error tolerance for numerical integration 911 | eps_rel: relative error tolerance for numerical integration 912 | 913 | Returns 914 | ------- 915 | The mutual information of the random variables x and y 916 | """ 917 | kde_x = sm.nonparametric.KDEUnivariate(sample_x) 918 | kde_x.fit() 919 | kde_y = sm.nonparametric.KDEUnivariate(sample_y) 920 | kde_y.fit() 921 | 922 | kde_xy = sp.stats.gaussian_kde([sample_x, sample_y]) 923 | 924 | return mutual_information_from_kde(kde_x=kde_x, 925 | kde_y=kde_y, 926 | kde_xy=kde_xy, 927 | base=base, 928 | eps_abs=eps_abs, 929 | eps_rel=eps_rel) 930 | 931 | 932 | ################################################################################ 933 | # Joint Entropy 934 | ############################################################################### 935 | def joint_entropy_from_densities_with_support(pdf_xy: tp.Callable, 936 | x_min: float, 937 | x_max: float, 938 | y_min: float, 939 | y_max: float, 940 | base: float = np.e, 941 | eps_abs: float = 1.49e-08, 942 | eps_rel: float = 1.49e-08) -> float: 943 | """ 944 | Compute joint entropy of the random variables x and y with joint density p_{x, y} defined as 945 | 946 | H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right] 947 | 948 | via numerical integration on a rectangular domain aligned with the axes. 949 | The argument base can be used to specify the units in which the entropy is measured. 950 | The default choice is the natural logarithm. 951 | 952 | Parameters 953 | ---------- 954 | pdf_xy: joint probability density function of the random variables x and y 955 | x_min: lower bound of the integration domain for x 956 | x_max: upper bound of the integration domain for x 957 | y_min: lower bound of the integration domain for y 958 | y_max: upper bound of the integration domain for y 959 | base: the base of the logarithm used to control the units of measurement for the result 960 | eps_abs: absolute error tolerance for numerical integration 961 | eps_rel: relative error tolerance for numerical integration 962 | 963 | Returns 964 | ------- 965 | The joint entropy of the random variables x and y 966 | """ 967 | log_fun = _select_vectorized_log_fun_for_base(base) 968 | 969 | def joint_entropy_integrand(arg: np.ndarray): 970 | x, y = arg 971 | pxy = pdf_xy((x, y)) 972 | 973 | return pxy * log_fun(pxy) 974 | 975 | return - cubature(func=joint_entropy_integrand, 976 | ndim=2, 977 | fdim=1, 978 | xmin=np.array([x_min, y_min]), 979 | xmax=np.array([x_max, y_max]), 980 | adaptive='p', 981 | vectorized=False, 982 | abserr=eps_abs, 983 | relerr=eps_rel)[0].item() 984 | 985 | 986 | def joint_entropy_from_kde(kde_xy: sp.stats.kde.gaussian_kde, 987 | x_min: float, 988 | x_max: float, 989 | y_min: float, 990 | y_max: float, 991 | base: float = np.e, 992 | eps_abs: float = 1.49e-08, 993 | eps_rel: float = 1.49e-08) -> float: 994 | """ 995 | Compute joint entropy of the random variables x and y with joint density p_{x, y} defined as 996 | 997 | H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right] 998 | 999 | via numerical integration, where the joint density is given by a SciPy gaussian_kde object. 1000 | The argument base can be used to specify the units in which the entropy is measured. 1001 | The default choice is the natural logarithm. 1002 | 1003 | Parameters 1004 | ---------- 1005 | kde_xy: SciPy gaussian_kde object approximating the joint density of x and y 1006 | x_min: lower bound of the integration domain for x 1007 | x_max: upper bound of the integration domain for x 1008 | y_min: lower bound of the integration domain for y 1009 | y_max: upper bound of the integration domain for y 1010 | base: the base of the logarithm used to control the units of measurement for the result 1011 | eps_abs: absolute error tolerance for numerical integration 1012 | eps_rel: relative error tolerance for numerical integration 1013 | 1014 | Returns 1015 | ------- 1016 | The joint entropy of the random variables x and y 1017 | """ 1018 | 1019 | return joint_entropy_from_densities_with_support(pdf_xy=kde_xy.pdf, 1020 | x_min=x_min, 1021 | x_max=x_max, 1022 | y_min=y_min, 1023 | y_max=y_max, 1024 | base=base, 1025 | eps_abs=eps_abs, 1026 | eps_rel=eps_rel) 1027 | 1028 | 1029 | def continuous_joint_entropy_from_samples(sample_x: np.ndarray, 1030 | sample_y: np.ndarray, 1031 | base: float = np.e, 1032 | eps_abs: float = 1.49e-08, 1033 | eps_rel: float = 1.49e-08) -> float: 1034 | """ 1035 | Compute joint entropy of the random variables x and y with joint density p_{x, y} defined as 1036 | 1037 | H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right] 1038 | 1039 | from samples of the two distributions via approximation by kernel density estimates and 1040 | numerical integration. 1041 | The argument base can be used to specify the units in which the entropy is measured. 1042 | The default choice is the natural logarithm. 1043 | 1044 | Parameters 1045 | ---------- 1046 | sample_x: x-component of the sample from the joint density p_{x, y} 1047 | sample_y: y-component of the sample from the joint density p_{x, y} 1048 | base: the base of the logarithm used to control the units of measurement for the result 1049 | eps_abs: absolute error tolerance for numerical integration 1050 | eps_rel: relative error tolerance for numerical integration 1051 | 1052 | Returns 1053 | ------- 1054 | The joint entropy of the random variables x and y 1055 | """ 1056 | # kde_x = sm.nonparametric.KDEUnivariate(sample_x) 1057 | # kde_x.fit() 1058 | # kde_y = sm.nonparametric.KDEUnivariate(sample_y) 1059 | # kde_y.fit() 1060 | # x_min = min(kde_x.support) 1061 | # x_max = max(kde_x.support) 1062 | # y_min = min(kde_y.support) 1063 | # y_max = max(kde_y.support) 1064 | 1065 | kde_xy = sp.stats.gaussian_kde([sample_x, sample_y]) 1066 | 1067 | x_min, x_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y) 1068 | y_min, y_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y) 1069 | 1070 | return joint_entropy_from_kde(kde_xy=kde_xy, 1071 | x_min=x_min, 1072 | x_max=x_max, 1073 | y_min=y_min, 1074 | y_max=y_max, 1075 | base=base, 1076 | eps_abs=eps_abs, 1077 | eps_rel=eps_rel) 1078 | 1079 | 1080 | ################################################################################ 1081 | # Conditional Entropy 1082 | ############################################################################### 1083 | def conditional_entropy_from_densities_with_support(pdf_x: tp.Callable, 1084 | pdf_xy: tp.Callable, 1085 | x_min: float, 1086 | x_max: float, 1087 | y_min: float, 1088 | y_max: float, 1089 | base: float = np.e, 1090 | eps_abs: float = 1.49e-08, 1091 | eps_rel: float = 1.49e-08, 1092 | gpu: bool = False 1093 | ) -> float: 1094 | """ 1095 | Compute conditional entropy of the random variables x and y with joint density p_{x, y} and 1096 | marginal density p_x defined as 1097 | 1098 | H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right] 1099 | 1100 | via numerical integration on a rectangular domain aligned with the axes. 1101 | The argument base can be used to specify the units in which the entropy is measured. 1102 | The default choice is the natural logarithm. 1103 | 1104 | Parameters 1105 | ---------- 1106 | pdf_x: probability density function of the random variable x 1107 | pdf_xy: joint probability density function of the random variables x and y 1108 | x_min: lower bound of the integration domain for x 1109 | x_max: upper bound of the integration domain for x 1110 | y_min: lower bound of the integration domain for y 1111 | y_max: upper bound of the integration domain for y 1112 | base: the base of the logarithm used to control the units of measurement for the result 1113 | eps_abs: absolute error tolerance for numerical integration 1114 | eps_rel: relative error tolerance for numerical integration 1115 | gpu: whether to use the gpu for evaluation of the kernel density estimate 1116 | 1117 | Returns 1118 | ------- 1119 | The conditional entropy of the random variables x and y 1120 | """ 1121 | log_fun = _select_vectorized_log_fun_for_base(base, gpu=gpu) 1122 | 1123 | def conditional_entropy_integrand(arg: np.ndarray): 1124 | if arg.ndim == 1: 1125 | x, y = arg 1126 | pxy = pdf_xy((x, y)) 1127 | px = pdf_x(x) 1128 | elif arg.ndim == 2: 1129 | x = arg[:, 0] 1130 | pxy = pdf_xy(arg.T) 1131 | px = pdf_x(x) 1132 | else: 1133 | raise ValueError('the number of axes in arg must be either 1 or 2') 1134 | 1135 | return pxy * log_fun(pxy / px) 1136 | 1137 | return - cubature(func=conditional_entropy_integrand, 1138 | ndim=2, 1139 | fdim=1, 1140 | xmin=np.array([x_min, y_min]), 1141 | xmax=np.array([x_max, y_max]), 1142 | adaptive='p', 1143 | vectorized=True, 1144 | abserr=eps_abs, 1145 | relerr=eps_rel)[0].item() 1146 | 1147 | 1148 | def conditional_entropy_from_kde(kde_x: sm.nonparametric.KDEUnivariate, 1149 | kde_xy: sp.stats.kde.gaussian_kde, 1150 | y_min: float, 1151 | y_max: float, 1152 | base: float = np.e, 1153 | eps_abs: float = 1.49e-08, 1154 | eps_rel: float = 1.49e-08) -> float: 1155 | """ 1156 | Compute conditional entropy of the random variables x and y with joint density p_{x, y} and 1157 | marginal density p_x defined as 1158 | 1159 | H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right] 1160 | 1161 | via numerical integration, where the marginal density of x is given by a statsmodels kde object 1162 | and the joint density by a SciPy gaussian_kde object. 1163 | The argument base can be used to specify the units in which the entropy is measured. 1164 | The default choice is the natural logarithm. 1165 | 1166 | Parameters 1167 | ---------- 1168 | kde_x: statsmodels kde object approximating the marginal density of x 1169 | kde_y: statsmodels kde object approximating the marginal density of y 1170 | kde_xy: SciPy gaussian_kde object approximating the joint density of x and y 1171 | y_min: lower bound of the integration domain for y 1172 | y_max: upper bound of the integration domain for y 1173 | base: the base of the logarithm used to control the units of measurement for the result 1174 | eps_abs: absolute error tolerance for numerical integration 1175 | eps_rel: relative error tolerance for numerical integration 1176 | 1177 | Returns 1178 | ------- 1179 | The conditional entropy of the random variables x and y 1180 | """ 1181 | x_min = min(kde_x.support) 1182 | x_max = max(kde_x.support) 1183 | 1184 | return conditional_entropy_from_densities_with_support(pdf_x=kde_x.evaluate, 1185 | pdf_xy=kde_xy.pdf, 1186 | x_min=x_min, 1187 | x_max=x_max, 1188 | y_min=y_min, 1189 | y_max=y_max, 1190 | base=base, 1191 | eps_abs=eps_abs, 1192 | eps_rel=eps_rel) 1193 | 1194 | 1195 | def continuous_conditional_entropy_from_samples(sample_x: np.ndarray, 1196 | sample_y: np.ndarray, 1197 | base: float = np.e, 1198 | eps_abs: float = 1.49e-08, 1199 | eps_rel: float = 1.49e-08) -> float: 1200 | """ 1201 | Compute conditional entropy of the random variables x and y with joint density p_{x, y} and 1202 | marginal density p_x defined as 1203 | 1204 | H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right] 1205 | 1206 | from samples of the two distributions via approximation by kernel density estimates and 1207 | numerical integration. 1208 | The argument base can be used to specify the units in which the entropy is measured. 1209 | The default choice is the natural logarithm. 1210 | 1211 | Parameters 1212 | ---------- 1213 | sample_x: x-component of the sample from the joint density p_{x, y} 1214 | sample_y: y-component of the sample from the joint density p_{x, y} 1215 | base: the base of the logarithm used to control the units of measurement for the result 1216 | eps_abs: absolute error tolerance for numerical integration 1217 | eps_rel: relative error tolerance for numerical integration 1218 | 1219 | Returns 1220 | ------- 1221 | The conditional entropy of the random variables x and y 1222 | """ 1223 | kde_x = sm.nonparametric.KDEUnivariate(sample_x) 1224 | kde_x.fit() 1225 | 1226 | kde_xy = sp.stats.gaussian_kde([sample_x, sample_y]) 1227 | y_min, y_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y) 1228 | 1229 | return conditional_entropy_from_kde(kde_x=kde_x, 1230 | kde_xy=kde_xy, 1231 | y_min=y_min, 1232 | y_max=y_max, 1233 | base=base, 1234 | eps_abs=eps_abs, 1235 | eps_rel=eps_rel) 1236 | 1237 | 1238 | def continuous_conditional_entropy_from_samples_gpu( 1239 | sample_x: np.ndarray, 1240 | sample_y: np.ndarray, 1241 | base: float = np.e, 1242 | eps_abs: float = 1.49e-08, 1243 | eps_rel: float = 1.49e-08, 1244 | maximum_number_of_elements_per_batch: int = -1) -> float: 1245 | """ 1246 | Compute conditional entropy of the random variables x and y with joint density p_{x, y} and 1247 | marginal density p_x defined as 1248 | 1249 | H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right] 1250 | 1251 | from samples of the two distributions via approximation by kernel density estimates and 1252 | numerical integration. 1253 | The argument base can be used to specify the units in which the entropy is measured. 1254 | The default choice is the natural logarithm. 1255 | 1256 | Parameters 1257 | ---------- 1258 | sample_x: x-component of the sample from the joint density p_{x, y} 1259 | sample_y: y-component of the sample from the joint density p_{x, y} 1260 | base: the base of the logarithm used to control the units of measurement for the result 1261 | eps_abs: absolute error tolerance for numerical integration 1262 | eps_rel: relative error tolerance for numerical integration 1263 | maximum_number_of_elements_per_batch: 1264 | maximum number of data points times evaluation points to process in a single batch 1265 | 1266 | Returns 1267 | ------- 1268 | The conditional entropy of the random variables x and y 1269 | """ 1270 | kde_x = cocos_gaussian_kde(sample_x, gpu=True) 1271 | kde_xy = cocos_gaussian_kde(np.vstack((sample_x.reshape((1, -1)), 1272 | sample_y.reshape((1, -1)))), 1273 | gpu=True) 1274 | 1275 | x_min, x_max = _get_min_and_max_support_for_silverman_bw_rule(sample_x) 1276 | y_min, y_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y) 1277 | 1278 | log_fun = _select_vectorized_log_fun_for_base(base, gpu=True) 1279 | 1280 | def conditional_entropy_integrand(arg: NumericArray): 1281 | # print(f'arg.shape={arg.shape}') 1282 | if arg.ndim == 1: 1283 | x, y = arg 1284 | pxy = kde_xy.evaluate((x, y)) 1285 | elif arg.ndim == 2: 1286 | x = arg[:, 0] 1287 | if maximum_number_of_elements_per_batch == -1: 1288 | pxy = kde_xy.evaluate(arg.T) 1289 | else: 1290 | pxy = evaluate_gaussian_kde_in_batches(kde_xy, 1291 | arg.T, 1292 | maximum_number_of_elements_per_batch 1293 | =maximum_number_of_elements_per_batch) 1294 | else: 1295 | raise ValueError('the number of axes in arg must be either 1 or 2') 1296 | 1297 | px = kde_x.evaluate(x) 1298 | # print(f'x.shape={x.shape}') 1299 | # print(f'px.shape={px.shape}') 1300 | # print(f'pxy.shape={pxy.shape}') 1301 | integrand = np.array(pxy * log_fun(pxy / px)) 1302 | return integrand 1303 | 1304 | return - cubature(func=conditional_entropy_integrand, 1305 | ndim=2, 1306 | fdim=1, 1307 | xmin=np.array([x_min, y_min]), 1308 | xmax=np.array([x_max, y_max]), 1309 | adaptive='p', 1310 | vectorized=True, 1311 | abserr=eps_abs, 1312 | relerr=eps_rel)[0].item() 1313 | --------------------------------------------------------------------------------