├── divergence
    ├── tests
    │   ├── __init__.py
    │   ├── test_intersection.py
    │   ├── test_continuous.py
    │   └── test_discrete.py
    ├── base.py
    ├── __init__.py
    ├── discrete.py
    └── continuous.py
├── LICENSE
├── README.md
├── .gitignore
├── setup.py
└── notebooks
    └── Divergence.ipynb


/divergence/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/divergence/tests/test_intersection.py:
--------------------------------------------------------------------------------
 1 | from divergence import intersection
 2 | 
 3 | 
 4 | def test_non_overlapping_1():
 5 |     assert intersection(1, 2, 3, 4) is None
 6 | 
 7 | 
 8 | def test_non_overlapping_2():
 9 |     assert intersection(3, 4, 1, 2) is None
10 | 
11 | 
12 | def test_sub_interval():
13 |     assert intersection(1, 4, 2, 3) == (2, 3)
14 | 
15 | 
16 | def test_overlap():
17 |     assert intersection(2, 4, 3, 5) == (3, 4)
18 | 
19 | 
20 | def test_sub_overlap_2():
21 |     assert intersection(3, 5, 2, 4) == (3, 4)
22 | 


--------------------------------------------------------------------------------
/divergence/base.py:
--------------------------------------------------------------------------------
 1 | import typing as tp
 2 | 
 3 | import cocos.numerics as cn
 4 | import numba
 5 | import numpy as np
 6 | 
 7 | 
 8 | def _select_vectorized_log_fun_for_base(base: float, gpu: bool = False) -> tp.Callable:
 9 |     if base == 2:
10 |         if gpu:
11 |             return cn.log2
12 |         else:
13 |             return np.log2
14 |     if base == np.e:
15 |         if gpu:
16 |             return cn.log
17 |         else:
18 |             return np.log
19 |     if base == 10:
20 |         if gpu:
21 |             return cn.log10
22 |         else:
23 |             return np.log10
24 | 
25 |     raise ValueError('base not supported')
26 | 
27 | 
28 | spec = [('base', numba.float64)]
29 | 
30 | 
31 | @numba.experimental.jitclass(spec)
32 | class Logarithm:
33 |     def __init__(self, base):
34 |         self.base = base
35 | 
36 |     def log(self, x):
37 |         if self.base == 2:
38 |             return np.log2(x)
39 |         if self.base == np.e:
40 |             return np.log(x)
41 |         if self.base == 10:
42 |             return np.log10(x)
43 | 
44 |         raise ValueError('base not supported')
45 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Michael Nowotny
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Divergence
 2 | Divergence is a Python package to compute statistical measures of entropy and divergence from probability distributions and samples.
 3 | 
 4 | The following functionality is provided:
 5 | * (Information) Entropy [1], [2]
 6 | * Cross Entropy: [3]
 7 | * Relative Entropy or Kullback-Leibler (KL-) Divergence [4], [5]
 8 | * Jensen-Shannon Divergence [6]
 9 | * Joint Entropy [7]
10 | * Conditional Entropy [8]
11 | * Mutual Information [9]
12 | 
13 | The units in which these entropy and divergence measures are calculated can be specified by the user. 
14 | This is achieved by setting the argument `base`, to `2.0`, `10.0`, or `np.e`. 
15 | 
16 | In a Bayesian context, relative entropy can be used as a measure of the information gained by moving 
17 | from a prior distribution `q` to a posterior distribution `p`.
18 | 
19 | ## Installation
20 | 
21 | <pre>
22 |     pip install divergence
23 | </pre>
24 | 
25 | ## Examples
26 | See the Jupyter notebook [Divergence](https://github.com/michaelnowotny/divergence/blob/master/notebooks/Divergence.ipynb).
27 | 
28 | ## References: 
29 | #### [1] https://en.wikipedia.org/wiki/Entropy_(information_theory)
30 | #### [2] Shannon, Claude Elwood (July 1948). "A Mathematical Theory of Communication". Bell System Technical Journal. 27 (3): 379–423
31 | #### [3] https://en.wikipedia.org/wiki/Cross_entropy
32 | #### [4] https://en.wikipedia.org/wiki/Kullback–Leibler_divergence
33 | #### [5] Kullback, S.; Leibler, R.A. (1951). "On information and sufficiency". Annals of Mathematical Statistics. 22 (1): 79–86
34 | #### [6] https://en.wikipedia.org/wiki/Jensen–Shannon_divergence
35 | #### [7] https://en.wikipedia.org/wiki/Joint_entropy
36 | #### [8] https://en.wikipedia.org/wiki/Conditional_entropy
37 | #### [9] https://en.wikipedia.org/wiki/Mutual_information
38 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | .idea/
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | pip-wheel-metadata/
 25 | share/python-wheels/
 26 | *.egg-info/
 27 | .installed.cfg
 28 | *.egg
 29 | MANIFEST
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .nox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | *.py,cover
 52 | .hypothesis/
 53 | .pytest_cache/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | .python-version
 87 | 
 88 | # pipenv
 89 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 90 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 91 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 92 | #   install all needed dependencies.
 93 | #Pipfile.lock
 94 | 
 95 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 96 | __pypackages__/
 97 | 
 98 | # Celery stuff
 99 | celerybeat-schedule
100 | celerybeat.pid
101 | 
102 | # SageMath parsed files
103 | *.sage.py
104 | 
105 | # Environments
106 | .env
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | # Note: To use the 'upload' functionality of this file, you must:
  5 | #   $ pip install twine
  6 | 
  7 | import io
  8 | import os
  9 | import sys
 10 | from shutil import rmtree
 11 | 
 12 | from setuptools import find_packages, setup, Command
 13 | 
 14 | 
 15 | # Package meta-data.
 16 | NAME = 'divergence'
 17 | DESCRIPTION = 'Information Theoretic Measures of Entropy and Divergence'
 18 | URL = 'https://github.com/michaelnowotny/divergence'
 19 | EMAIL = 'nowotnym@gmail.com'
 20 | AUTHOR = 'Michael Christoph Nowotny'
 21 | REQUIRES_PYTHON = '>=3.6.0'
 22 | VERSION = "0.4.2"
 23 | 
 24 | # What packages are required for this module to be executed?
 25 | REQUIRED = [
 26 |     "cocos",
 27 |     "cubature",
 28 |     "numba",
 29 |     "numpy",
 30 |     "pytest",
 31 |     "scipy",
 32 |     "statsmodels"
 33 | ]
 34 | 
 35 | # What packages are optional?
 36 | EXTRAS = {
 37 |     # 'fancy feature': ['django'],
 38 | }
 39 | 
 40 | # The rest you shouldn't have to touch too much :)
 41 | # ------------------------------------------------
 42 | # Except, perhaps the License and Trove Classifiers!
 43 | # If you do change the License, remember to change the Trove Classifier for that!
 44 | 
 45 | here = os.path.abspath(os.path.dirname(__file__))
 46 | 
 47 | # Import the README and use it as the long-description.
 48 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file!
 49 | try:
 50 |     with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 51 |         long_description = '\n' + f.read()
 52 | except FileNotFoundError:
 53 |     long_description = DESCRIPTION
 54 | 
 55 | # Load the package's __version__.py module as a dictionary.
 56 | about = {}
 57 | if not VERSION:
 58 |     project_slug = NAME.lower().replace("-", "_").replace(" ", "_")
 59 |     with open(os.path.join(here, project_slug, '__version__.py')) as f:
 60 |         exec(f.read(), about)
 61 | else:
 62 |     about['__version__'] = VERSION
 63 | 
 64 | 
 65 | class UploadCommand(Command):
 66 |     """Support setup.py upload."""
 67 | 
 68 |     description = 'Build and publish the package.'
 69 |     user_options = []
 70 | 
 71 |     @staticmethod
 72 |     def status(s):
 73 |         """Prints things in bold."""
 74 |         print('\033[1m{0}\033[0m'.format(s))
 75 | 
 76 |     def initialize_options(self):
 77 |         pass
 78 | 
 79 |     def finalize_options(self):
 80 |         pass
 81 | 
 82 |     def run(self):
 83 |         try:
 84 |             self.status('Removing previous builds…')
 85 |             rmtree(os.path.join(here, 'dist'))
 86 |         except OSError:
 87 |             pass
 88 | 
 89 |         self.status('Building Source and Wheel (universal) distribution…')
 90 |         os.system(
 91 |             '{0} setup.py sdist bdist_wheel --universal'.format(sys.executable))
 92 | 
 93 |         self.status('Uploading the package to PyPI via Twine…')
 94 |         os.system('twine upload dist/*')
 95 | 
 96 |         self.status('Pushing git tags…')
 97 |         os.system('git tag v{0}'.format(about['__version__']))
 98 |         os.system('git push --tags')
 99 | 
100 |         sys.exit()
101 | 
102 | 
103 | # Where the magic happens:
104 | setup(
105 |     name=NAME,
106 |     version=VERSION,
107 |     # version=versioneer.get_version(),
108 |     description=DESCRIPTION,
109 |     long_description=long_description,
110 |     long_description_content_type='text/markdown',
111 |     author=AUTHOR,
112 |     author_email=EMAIL,
113 |     python_requires=REQUIRES_PYTHON,
114 |     url=URL,
115 |     packages=find_packages(exclude=('examples', )),
116 |     # If your package is a single module, use this instead of 'packages':
117 |     # py_modules=['mypackage'],
118 | 
119 |     # entry_points={
120 |     #     'console_scripts': ['mycli=mymodule:cli'],
121 |     # },
122 |     install_requires=REQUIRED,
123 |     extras_require=EXTRAS,
124 |     include_package_data=True,
125 |     license='MIT',
126 |     classifiers=[
127 |         # Trove classifiers
128 |         # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers
129 |         'License :: OSI Approved :: MIT License',
130 |         'Programming Language :: Python',
131 |         'Programming Language :: Python :: 3',
132 |         'Programming Language :: Python :: 3.6',
133 |         'Programming Language :: Python :: 3.7',
134 |         'Programming Language :: Python :: 3.8',
135 |         'Programming Language :: Python :: Implementation :: CPython',
136 |     ],
137 |     # $ setup.py publish support.
138 |     cmdclass={
139 |         'upload': UploadCommand,
140 |     },
141 | )


--------------------------------------------------------------------------------
/divergence/__init__.py:
--------------------------------------------------------------------------------
  1 | from .continuous import *
  2 | from .discrete import (
  3 |     discrete_entropy,
  4 |     discrete_relative_entropy,
  5 |     discrete_cross_entropy,
  6 |     discrete_jensen_shannon_divergence,
  7 |     discrete_mutual_information,
  8 |     discrete_joint_entropy,
  9 |     discrete_conditional_entropy_of_y_given_x
 10 | )
 11 | 
 12 | 
 13 | def entropy_from_samples(sample: np.ndarray,
 14 |                          base: float = np.e,
 15 |                          discrete: bool = False) -> float:
 16 |     if discrete:
 17 |         return discrete_entropy(sample=sample,
 18 |                                 base=base)
 19 |     else:
 20 |         return continuous_entropy_from_sample(sample=sample,
 21 |                                               base=base)
 22 | 
 23 | 
 24 | def cross_entropy_from_samples(sample_p: np.ndarray,
 25 |                                sample_q: np.ndarray,
 26 |                                base: float = np.e,
 27 |                                discrete: bool = False) -> float:
 28 |     if discrete:
 29 |         return discrete_cross_entropy(sample_p=sample_p,
 30 |                                       sample_q=sample_q,
 31 |                                       base=base)
 32 |     else:
 33 |         return continuous_cross_entropy_from_sample(sample_p=sample_p,
 34 |                                                     sample_q=sample_q,
 35 |                                                     base=base)
 36 | 
 37 | 
 38 | def relative_entropy_from_samples(sample_p: np.ndarray,
 39 |                                   sample_q: np.ndarray,
 40 |                                   base: float = np.e,
 41 |                                   discrete: bool = False) -> float:
 42 |     if discrete:
 43 |         return discrete_relative_entropy(sample_p=sample_p,
 44 |                                          sample_q=sample_q,
 45 |                                          base=base)
 46 |     else:
 47 |         return continuous_relative_entropy_from_sample(sample_p=sample_p,
 48 |                                                        sample_q=sample_q,
 49 |                                                        base=base)
 50 | 
 51 | 
 52 | def jensen_shannon_divergence_from_samples(sample_p: np.ndarray,
 53 |                                            sample_q: np.ndarray,
 54 |                                            base: float = np.e,
 55 |                                            discrete: bool = False) -> float:
 56 |     if discrete:
 57 |         return discrete_jensen_shannon_divergence(sample_p=sample_p,
 58 |                                                   sample_q=sample_q,
 59 |                                                   base=base)
 60 |     else:
 61 |         return continuous_jensen_shannon_divergence_from_sample(sample_p=sample_p,
 62 |                                                                 sample_q=sample_q,
 63 |                                                                 base=base)
 64 | 
 65 | 
 66 | def mutual_information_from_samples(sample_x: np.ndarray,
 67 |                                     sample_y: np.ndarray,
 68 |                                     base: float = np.e,
 69 |                                     discrete: bool = False) -> float:
 70 |     if discrete:
 71 |         return discrete_mutual_information(sample_x=sample_x,
 72 |                                            sample_y=sample_y,
 73 |                                            base=base)
 74 |     else:
 75 |         return continuous_mutual_information_from_samples(sample_x=sample_x,
 76 |                                                           sample_y=sample_y,
 77 |                                                           base=base)
 78 | 
 79 | 
 80 | def joint_entropy_from_samples(sample_x: np.ndarray,
 81 |                                sample_y: np.ndarray,
 82 |                                base: float = np.e,
 83 |                                discrete: bool = False) -> float:
 84 |     if discrete:
 85 |         return discrete_joint_entropy(sample_x=sample_x,
 86 |                                       sample_y=sample_y,
 87 |                                       base=base)
 88 |     else:
 89 |         return continuous_joint_entropy_from_samples(sample_x=sample_x,
 90 |                                                      sample_y=sample_y,
 91 |                                                      base=base)
 92 | 
 93 | 
 94 | def conditional_entropy_from_samples(sample_x: np.ndarray,
 95 |                                      sample_y: np.ndarray,
 96 |                                      base: float = np.e,
 97 |                                      discrete: bool = False) -> float:
 98 |     if discrete:
 99 |         return discrete_conditional_entropy_of_y_given_x(sample_x=sample_x,
100 |                                                          sample_y=sample_y,
101 |                                                          base=base)
102 |     else:
103 |         return continuous_conditional_entropy_from_samples(sample_x=sample_x,
104 |                                                            sample_y=sample_y,
105 |                                                            base=base)
106 | 


--------------------------------------------------------------------------------
/divergence/tests/test_continuous.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pytest
  3 | import scipy as sp
  4 | import statsmodels.api as sm
  5 | import typing as tp
  6 | 
  7 | from divergence import *
  8 | from divergence.base import _select_vectorized_log_fun_for_base
  9 | 
 10 | 
 11 | def entropy_of_normal_distribution(sigma: float,
 12 |                                    log_fun: tp.Callable = np.log) \
 13 |         -> float:
 14 |     return 0.5 * (1.0 + log_fun(2 * np.pi * sigma**2))
 15 | 
 16 | 
 17 | def relative_entropy_between_two_normal_distributions(mu_1: float,
 18 |                                                       sigma_1: float,
 19 |                                                       mu_2: float,
 20 |                                                       sigma_2: float,
 21 |                                                       log_fun: tp.Callable = np.log) \
 22 |         -> float:
 23 |     return ((mu_1 - mu_2)**2 + sigma_1**2 - sigma_2**2) / (2 * sigma_2**2) + \
 24 |            log_fun(sigma_2/sigma_1)
 25 | 
 26 | 
 27 | def cross_entropy_between_two_normal_distributions(mu_1: float,
 28 |                                                    sigma_1: float,
 29 |                                                    mu_2: float,
 30 |                                                    sigma_2: float,
 31 |                                                    log_fun: tp.Callable = np.log) \
 32 |         -> float:
 33 |     return entropy_of_normal_distribution(sigma_1, log_fun=log_fun) + \
 34 |            relative_entropy_between_two_normal_distributions(mu_1=mu_1,
 35 |                                                              sigma_1=sigma_1,
 36 |                                                              mu_2=mu_2,
 37 |                                                              sigma_2=sigma_2,
 38 |                                                              log_fun=log_fun)
 39 | 
 40 | 
 41 | def mutual_information_for_bivariate_normal_distribution(rho: float) -> float:
 42 |     return - 0.5 * np.log(1.0 - rho**2)
 43 | 
 44 | 
 45 | def generate_normal_sample(mu: float,
 46 |                            sigma: float,
 47 |                            n: int,
 48 |                            antithetic: bool = False) -> np.ndarray:
 49 |     z = np.random.randn(n)
 50 |     if antithetic:
 51 |         z = np.hstack((z, -z))
 52 | 
 53 |     return mu + sigma * z
 54 | 
 55 | 
 56 | # fix random seed for reproducibility
 57 | np.random.seed(42)
 58 | 
 59 | # set parameters of the normal distributions p and q
 60 | mu_p = 2
 61 | sigma_p = 3
 62 | mu_q = 1
 63 | sigma_q = 2
 64 | 
 65 | # draw samples from each normal distribution
 66 | n = 10000
 67 | 
 68 | sample_p = generate_normal_sample(mu_p, sigma_p, n=n, antithetic=True)
 69 | sample_q = generate_normal_sample(mu_q, sigma_q, n=n, antithetic=True)
 70 | 
 71 | # fit a non-parametric density estimate for both distributions
 72 | kde_p = sm.nonparametric.KDEUnivariate(sample_p)
 73 | kde_q = sm.nonparametric.KDEUnivariate(sample_q)
 74 | kde_p.fit()
 75 | kde_q.fit()
 76 | 
 77 | # construct exact normal densities for p and q
 78 | pdf_p = lambda x: sp.stats.norm.pdf(x, mu_p, sigma_p)
 79 | pdf_q = lambda x: sp.stats.norm.pdf(x, mu_q, sigma_q)
 80 | 
 81 | # compute support for kernel density estimates
 82 | p_min = min(kde_p.support)
 83 | p_max = max(kde_p.support)
 84 | q_min = min(kde_q.support)
 85 | q_max = max(kde_q.support)
 86 | combined_min = min(p_min, q_min)
 87 | combined_max = max(p_max, q_max)
 88 | 
 89 | 
 90 | @pytest.mark.parametrize("sigma, sample", ((sigma_p, sample_p), (sigma_q, sample_q)))
 91 | def test_entropy(sigma: float, sample: np.ndarray, base: float = np.e):
 92 |     log_fun = _select_vectorized_log_fun_for_base(base)
 93 | 
 94 |     assert np.isclose(entropy_from_samples(sample, base=base, discrete=False),
 95 |                       entropy_of_normal_distribution(sigma, log_fun=log_fun),
 96 |                       rtol=1e-2,
 97 |                       atol=1e-2)
 98 | 
 99 | 
100 | def test_cross_entropy(base: float = np.e):
101 |     log_fun = _select_vectorized_log_fun_for_base(base)
102 | 
103 |     assert np.isclose(cross_entropy_from_samples(sample_p,
104 |                                                  sample_q,
105 |                                                  base=base,
106 |                                                  discrete=False),
107 |                       cross_entropy_between_two_normal_distributions(mu_p,
108 |                                                                      sigma_p,
109 |                                                                      mu_q,
110 |                                                                      sigma_q,
111 |                                                                      log_fun=log_fun),
112 |                       rtol=1e-1,
113 |                       atol=1e-1)
114 | 
115 | 
116 | def test_relative_entropy(base: float = np.e):
117 |     log_fun = _select_vectorized_log_fun_for_base(base)
118 | 
119 |     assert np.isclose(relative_entropy_from_samples(sample_p,
120 |                                                     sample_q,
121 |                                                     base=base,
122 |                                                     discrete=False),
123 |                       relative_entropy_between_two_normal_distributions(mu_p,
124 |                                                                         sigma_p,
125 |                                                                         mu_q,
126 |                                                                         sigma_q,
127 |                                                                         log_fun=log_fun),
128 |                       rtol=1e-1,
129 |                       atol=1e-1)
130 | 
131 | 
132 | # set parameters of the normal distributions x and y
133 | mu_x = 2
134 | sigma_x = 3
135 | mu_y = 1
136 | sigma_y = 2
137 | rho = 0.5
138 | 
139 | # draw 1000 samples from each normal distribution
140 | n = 10000
141 | z = np.random.randn(n)
142 | sample_x = mu_x + sigma_x * z
143 | sample_y = mu_y + sigma_y * (rho * z + np.sqrt(1.0 - rho**2) * np.random.randn(n))
144 | 
145 | # fit a non-parametric density estimate for both distributions
146 | kde_x = sm.nonparametric.KDEUnivariate(sample_x)
147 | kde_y = sm.nonparametric.KDEUnivariate(sample_y)
148 | kde_x.fit() # Estimate the densities
149 | kde_y.fit() # Estimate the densities
150 | kde_xy = sp.stats.gaussian_kde([sample_x, sample_y])
151 | 
152 | # construct exact normal densities for x and y
153 | pdf_x = lambda x: sp.stats.norm.pdf(x, mu_x, sigma_x)
154 | pdf_y = lambda y: sp.stats.norm.pdf(y, mu_y, sigma_y)
155 | pdf_xy = sp.stats.multivariate_normal(mean=[mu_x, mu_y],
156 |                                       cov=[[sigma_x**2, rho * sigma_x * sigma_y],
157 |                                            [rho * sigma_x * sigma_y, sigma_y**2]]).pdf
158 | 
159 | # # compute support for kernel density estimates
160 | x_min = min(kde_x.support)
161 | x_max = max(kde_x.support)
162 | y_min = min(kde_y.support)
163 | y_max = max(kde_y.support)
164 | 
165 | 
166 | @pytest.fixture
167 | def mutual_information_from_bivariate_normal_samples() -> float:
168 |     return continuous_mutual_information_from_samples(sample_x=sample_x,
169 |                                                       sample_y=sample_y)
170 | 
171 | 
172 | @pytest.fixture
173 | def joint_entropy_of_x_and_y() -> float:
174 |     return joint_entropy_from_samples(sample_x, sample_y)
175 | 
176 | 
177 | @pytest.fixture
178 | def conditional_entropy_of_y_given_x_from_bivariate_normal_samples() -> float:
179 |     return conditional_entropy_from_samples(sample_x, sample_y)
180 | 
181 | 
182 | @pytest.fixture
183 | def conditional_entropy_of_x_given_y_from_bivariate_normal_samples() -> float:
184 |     return conditional_entropy_from_samples(sample_y, sample_x)
185 | 
186 | 
187 | def test_mutual_information(mutual_information_from_bivariate_normal_samples):
188 |     theoretical_mutual_information = mutual_information_for_bivariate_normal_distribution(rho)
189 | 
190 |     assert np.isclose(theoretical_mutual_information,
191 |                       mutual_information_from_bivariate_normal_samples,
192 |                       rtol=1e-1,
193 |                       atol=1e-1)
194 | 
195 | 
196 | def test_joint_entropy_via_conditional_entropy_of_y_given_x(
197 |         joint_entropy_of_x_and_y,
198 |         conditional_entropy_of_y_given_x_from_bivariate_normal_samples):
199 |     np.isclose(entropy_from_samples(sample_x) +
200 |                conditional_entropy_of_y_given_x_from_bivariate_normal_samples,
201 |                joint_entropy_of_x_and_y,
202 |                rtol=1e-2,
203 |                atol=1e-3)
204 | 
205 | 
206 | def test_joint_entropy_via_conditional_entropy_of_x_given_y(
207 |         joint_entropy_of_x_and_y,
208 |         conditional_entropy_of_x_given_y_from_bivariate_normal_samples):
209 |     np.isclose(entropy_from_samples(sample_y) +
210 |                conditional_entropy_of_x_given_y_from_bivariate_normal_samples,
211 |                joint_entropy_of_x_and_y,
212 |                rtol=1e-2,
213 |                atol=1e-3)
214 | 


--------------------------------------------------------------------------------
/divergence/tests/test_discrete.py:
--------------------------------------------------------------------------------
  1 | import numbers
  2 | import numpy as np
  3 | import pytest
  4 | import scipy as sp
  5 | import typing as tp
  6 | 
  7 | from divergence.base import _select_vectorized_log_fun_for_base
  8 | 
  9 | from divergence.discrete import (
 10 |     discrete_entropy,
 11 |     _construct_frequencies_for_one_sample,
 12 |     _construct_frequencies_for_two_samples,
 13 |     discrete_relative_entropy,
 14 |     _construct_unique_combinations_and_counts_from_two_samples,
 15 |     _get_index_for_combination,
 16 |     _get_count_for_combination,
 17 |     _get_index_of_value_in_1d_array,
 18 |     _get_count_for_value,
 19 |     discrete_mutual_information,
 20 |     discrete_joint_entropy,
 21 |     discrete_conditional_entropy_of_y_given_x
 22 | )
 23 | 
 24 | 
 25 | multinomial_sample_q_1 = np.array([1, 2, 3, 2, 3, 3, 3, 2, 1, 1])
 26 | multinomial_sample_p_1 = np.array([2, 2, 3, 2, 3])
 27 | expected_frequencies_q_1 = np.array([0.3, 0.4])
 28 | expected_frequencies_p_1 = np.array([0.6, 0.4])
 29 | 
 30 | multinomial_sample_q_2 = np.array([1, 2, 3, 2, 3, 3, 3, 2, 1, 1])
 31 | multinomial_sample_p_2 = np.array([1, 2, 3, 2, 3])
 32 | expected_frequencies_q_2 = np.array([0.3, 0.3, 0.4])
 33 | expected_frequencies_p_2 = np.array([0.2, 0.4, 0.4])
 34 | 
 35 | 
 36 | def _get_base_from_log_fun(log_fun: tp.Callable):
 37 |     if log_fun is np.log:
 38 |         base = np.e
 39 |     elif log_fun is np.log2:
 40 |         base = 2
 41 |     elif log_fun is np.log10:
 42 |         base = 10
 43 |     else:
 44 |         raise ValueError('log_fun is not supported')
 45 | 
 46 |     return base
 47 | 
 48 | 
 49 | def discrete_entropy_scipy(sample: np.ndarray, log_fun: tp.Callable = np.log) -> float:
 50 |     base = _get_base_from_log_fun(log_fun)
 51 |     return sp.stats.entropy(_construct_frequencies_for_one_sample(sample), base=base)
 52 | 
 53 | 
 54 | @pytest.mark.parametrize("sample", (multinomial_sample_q_1,
 55 |                                     multinomial_sample_p_1,
 56 |                                     multinomial_sample_q_2,
 57 |                                     multinomial_sample_p_2))
 58 | @pytest.mark.parametrize("base", (np.e, 2.0, 10.0))
 59 | def test_entropy(sample: np.ndarray, base: float):
 60 |     log_fun = _select_vectorized_log_fun_for_base(base)
 61 |     entropy_from_divergence = discrete_entropy(sample=sample, base=base)
 62 |     entropy_from_scipy = discrete_entropy_scipy(sample=sample, log_fun=log_fun)
 63 |     assert np.isclose(entropy_from_divergence, entropy_from_scipy)
 64 | 
 65 | 
 66 | @pytest.mark.parametrize("sample_p, sample_q, expected_frequencies_p, expected_frequencies_q",
 67 |                          [
 68 |                              (multinomial_sample_p_1, multinomial_sample_q_1, expected_frequencies_p_1, expected_frequencies_q_1),
 69 |                              (multinomial_sample_p_2, multinomial_sample_q_2, expected_frequencies_p_2, expected_frequencies_q_2)
 70 |                          ])
 71 | def test_construct_frequencies(sample_p: np.ndarray,
 72 |                                sample_q: np.ndarray,
 73 |                                expected_frequencies_p: np.ndarray,
 74 |                                expected_frequencies_q: np.ndarray):
 75 |     combined_sample = np.hstack((sample_p, sample_q))
 76 |     unique_combined = np.unique(combined_sample)
 77 | 
 78 |     unique_q, counts_q = np.unique(sample_q, return_counts=True)
 79 |     frequencies_q = counts_q / len(sample_q)
 80 | 
 81 |     unique_p, counts_p = np.unique(sample_p, return_counts=True)
 82 |     frequencies_p = counts_p / len(sample_p)
 83 | 
 84 |     combined_frequencies_p, combined_frequencies_q = \
 85 |         _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p,
 86 |                                                sorted_q_realizations=unique_q,
 87 |                                                sorted_q_frequencies=frequencies_q,
 88 |                                                sorted_p_frequencies=frequencies_p,
 89 |                                                sorted_combined_realizations=unique_combined)
 90 | 
 91 |     assert np.allclose(combined_frequencies_p, expected_frequencies_p)
 92 |     assert np.allclose(combined_frequencies_q, expected_frequencies_q)
 93 | 
 94 | 
 95 | def test_construct_frequencies_error_q_zero_and_p_nonzero():
 96 |     sample_q = np.array([2, 2, 3, 2, 3, 3, 3, 2, 2, 2])
 97 |     sample_p = np.array([1, 2, 3, 2, 3])
 98 | 
 99 |     combined_sample = np.hstack((sample_p, sample_q))
100 |     unique_combined = np.unique(combined_sample)
101 | 
102 |     unique_q, counts_q = np.unique(sample_q, return_counts=True)
103 |     frequencies_q = counts_q / len(sample_q)
104 | 
105 |     unique_p, counts_p = np.unique(sample_p, return_counts=True)
106 |     frequencies_p = counts_p / len(sample_p)
107 | 
108 |     with pytest.raises(ValueError):
109 |         combined_frequencies_p, combined_frequencies_q = \
110 |             _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p,
111 |                                                    sorted_q_realizations=unique_q,
112 |                                                    sorted_q_frequencies=frequencies_q,
113 |                                                    sorted_p_frequencies=frequencies_p,
114 |                                                    sorted_combined_realizations=unique_combined)
115 | 
116 | 
117 | def _discrete_relative_entropy_slow(sample_p: np.ndarray,
118 |                                     sample_q: np.ndarray,
119 |                                     log_fun: tp.Callable = np.log):
120 |     combined_sample = np.hstack((sample_p, sample_q))
121 |     unique_combined = np.unique(combined_sample)
122 | 
123 |     unique_q, counts_q = np.unique(sample_q, return_counts=True)
124 |     frequencies_q = counts_q / len(sample_q)
125 |     realization_to_frequency_dict_q = dict(zip(unique_q, frequencies_q))
126 | 
127 |     unique_p, counts_p = np.unique(sample_p, return_counts=True)
128 |     frequencies_p = counts_p / len(sample_p)
129 |     realization_to_frequency_dict_p = dict(zip(unique_p, frequencies_p))
130 | 
131 |     combined_frequencies_q = np.array([realization_to_frequency_dict_q.get(realization, 0.0)
132 |                                        for realization
133 |                                        in unique_combined])
134 | 
135 |     combined_frequencies_p = np.array([realization_to_frequency_dict_p.get(realization, 0.0)
136 |                                        for realization
137 |                                        in unique_combined])
138 | 
139 |     base = _get_base_from_log_fun(log_fun)
140 |     # if log_fun is np.log:
141 |     #     base = np.e
142 |     # elif log_fun is np.log2:
143 |     #     base = 2
144 |     # elif log_fun is np.log10:
145 |     #     base = 10
146 |     # else:
147 |     #     raise ValueError('log_fun is not supported')
148 | 
149 |     return sp.stats.entropy(pk=combined_frequencies_p, qk=combined_frequencies_q, base=base)
150 | 
151 | 
152 | @pytest.mark.parametrize("sample_p, sample_q", ((multinomial_sample_p_1, multinomial_sample_q_1),
153 |                                                 (multinomial_sample_p_2, multinomial_sample_q_2)))
154 | @pytest.mark.parametrize("base", (np.e, 2.0, 10.0))
155 | def test_compare_slow_and_fast_implementations_of_relative_entropy(sample_p: np.ndarray,
156 |                                                                    sample_q: np.ndarray,
157 |                                                                    base: float):
158 |     log_fun = _select_vectorized_log_fun_for_base(base)
159 | 
160 |     relative_entropy_from_slow_calculation = \
161 |         _discrete_relative_entropy_slow(sample_p=sample_p,
162 |                                         sample_q=sample_q,
163 |                                         log_fun=log_fun)
164 | 
165 |     relative_entropy_from_fast_calculation = \
166 |         discrete_relative_entropy(sample_p=sample_p,
167 |                                   sample_q=sample_q,
168 |                                   base=base)
169 | 
170 |     assert np.isclose(relative_entropy_from_slow_calculation,
171 |                       relative_entropy_from_fast_calculation)
172 | 
173 | 
174 | @pytest.fixture
175 | def sample_x() -> np.ndarray:
176 |     return np.array([1, 1, 3, 1, 2, 3])
177 | 
178 | 
179 | @pytest.fixture
180 | def sample_y() -> np.ndarray:
181 |     return np.array([1, 1, 1, 3, 2, 1])
182 | 
183 | 
184 | def test_construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y):
185 |     unique_combinations, counts = \
186 |         _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)
187 | 
188 |     print('unique combinations:')
189 |     print(unique_combinations)
190 | 
191 |     print('counts')
192 |     print(counts)
193 | 
194 |     assert np.all(unique_combinations == np.array([[1, 1], [1, 3], [2, 2], [3, 1]]))
195 |     assert np.all(counts == np.array([2, 1, 1, 2]))
196 | 
197 | 
198 | @pytest.mark.parametrize('combination, index', [(np.array([1, 1]), 0),
199 |                                                 (np.array([1, 3]), 1),
200 |                                                 (np.array([2, 2]), 2),
201 |                                                 (np.array([3, 1]), 3)])
202 | def test_get_index_for_combination(combination: np.ndarray,
203 |                                    index: int,
204 |                                    sample_x: np.ndarray,
205 |                                    sample_y: np.ndarray):
206 |     unique_combinations, counts = \
207 |         _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)
208 | 
209 |     assert index == _get_index_for_combination(combination=combination,
210 |                                                unique_combinations=unique_combinations)
211 | 
212 | 
213 | @pytest.mark.parametrize('combination, count', [(np.array([1, 1]), 2),
214 |                                                 (np.array([1, 3]), 1),
215 |                                                 (np.array([2, 2]), 1),
216 |                                                 (np.array([3, 1]), 2)])
217 | def test_get_count_for_combination(combination: np.ndarray,
218 |                                    count: int,
219 |                                    sample_x: np.ndarray,
220 |                                    sample_y: np.ndarray):
221 |     unique_combinations, counts = \
222 |         _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)
223 | 
224 |     assert count == _get_count_for_combination(combination=combination,
225 |                                                unique_combinations=unique_combinations,
226 |                                                counts=counts)
227 | 
228 | 
229 | @pytest.mark.parametrize('value, index', [(1, 0), (2, 1), (3, 2)])
230 | def test_get_index_for_value(value: numbers.Number,
231 |                              index: int,
232 |                              sample_x: np.ndarray):
233 |     unique_values = np.unique(sample_x)
234 | 
235 |     assert index == _get_index_of_value_in_1d_array(value, unique_values)
236 | 
237 | 
238 | @pytest.mark.parametrize('value, count', [(1, 3), (2, 1), (3, 2)])
239 | def test_get_count_for_value(value: numbers.Number,
240 |                              count: int,
241 |                              sample_x: np.ndarray):
242 |     unique_values, counts = np.unique(sample_x, return_counts=True)
243 | 
244 |     assert count == _get_count_for_value(value,
245 |                                          unique_values=unique_values,
246 |                                          counts=counts)
247 | 
248 | 
249 | @pytest.mark.parametrize('sample', [np.array([1, 1, 3, 1, 2, 3]),
250 |                                     np.array([1, 1, 1, 3, 2, 1]),
251 |                                     np.array([1, 1, 1, 1, 1, 1])])
252 | def test_compare_mutual_information_of_self_with_entropy(sample):
253 |     assert discrete_entropy(sample) == discrete_mutual_information(sample, sample)
254 | 
255 | 
256 | @pytest.mark.parametrize('sample_x, sample_y',
257 |                          [(np.array([1, 1, 3, 1, 2, 3]), np.array([1, 1, 1, 3, 2, 1])),
258 |                           (np.array([1, 1, 1, 1, 1, 1]), np.array([2, 2, 2, 2, 2, 2]))])
259 | def test_symmetry_of_mutual_information(sample_x, sample_y):
260 |     assert discrete_mutual_information(sample_x, sample_y) == \
261 |            discrete_mutual_information(sample_y, sample_x)
262 | 
263 | 
264 | def test_discrete_conditional_entropy(sample_x: np.ndarray, sample_y: np.ndarray):
265 |     joint_entropy = discrete_joint_entropy(sample_x=sample_x, sample_y=sample_y)
266 |     entropy_x = discrete_entropy(sample_x)
267 |     entropy_y = discrete_entropy(sample_y)
268 |     conditional_entropy_of_y_given_x = \
269 |         discrete_conditional_entropy_of_y_given_x(sample_x=sample_x,
270 |                                                   sample_y=sample_y)
271 | 
272 |     conditional_entropy_of_x_given_y = \
273 |         discrete_conditional_entropy_of_y_given_x(sample_x=sample_y,
274 |                                                   sample_y=sample_x)
275 | 
276 |     assert np.isclose(entropy_x - conditional_entropy_of_x_given_y,
277 |                       entropy_y - conditional_entropy_of_y_given_x)
278 | 
279 |     assert np.isclose(joint_entropy, entropy_x + conditional_entropy_of_y_given_x)
280 |     assert np.isclose(joint_entropy, entropy_y + conditional_entropy_of_x_given_y)
281 | 
282 | 
283 | def test_discrete_mutual_information_and_conditional_entropy(sample_x: np.ndarray,
284 |                                                              sample_y: np.ndarray):
285 |     mutual_information = discrete_mutual_information(sample_x=sample_x, sample_y=sample_y)
286 | 
287 |     entropy_x = discrete_entropy(sample_x)
288 |     entropy_y = discrete_entropy(sample_y)
289 |     conditional_entropy_of_y_given_x = \
290 |         discrete_conditional_entropy_of_y_given_x(sample_x=sample_x,
291 |                                                   sample_y=sample_y)
292 | 
293 |     conditional_entropy_of_x_given_y = \
294 |         discrete_conditional_entropy_of_y_given_x(sample_x=sample_y,
295 |                                                   sample_y=sample_x)
296 | 
297 |     assert np.isclose(mutual_information, entropy_x - conditional_entropy_of_x_given_y)
298 |     assert np.isclose(mutual_information, entropy_y - conditional_entropy_of_y_given_x)
299 | 


--------------------------------------------------------------------------------
/divergence/discrete.py:
--------------------------------------------------------------------------------
  1 | import numba
  2 | import numbers
  3 | import numpy as np
  4 | import typing as tp
  5 | 
  6 | from divergence.base import (
  7 |     _select_vectorized_log_fun_for_base,
  8 |     Logarithm
  9 | )
 10 | 
 11 | 
 12 | def _construct_counts_for_one_sample(sample: np.ndarray) -> np.ndarray:
 13 |     """
 14 |     Compute the count (i.e. number of occurrences) for each realization in the sample.
 15 |     The realizations in the argument `sample` do not need to be sorted. But the output counts will
 16 |     correspond to sorted realizations.
 17 | 
 18 |     Parameters
 19 |     ----------
 20 |     sample: a sample from the discrete distribution
 21 | 
 22 |     Returns
 23 |     -------
 24 |     Counts of realizations from a sample
 25 | 
 26 |     """
 27 |     _, counts = np.unique(sample, return_counts=True)
 28 |     return counts
 29 | 
 30 | 
 31 | def _construct_frequencies_for_one_sample(sample: np.ndarray) -> np.ndarray:
 32 |     """
 33 |     Compute the frequency (i.e. number of occurrences) for each realization in the sample.
 34 |     The realizations in the argument `sample` do not need to be sorted. But the output frequencies
 35 |     will correspond to sorted realizations.
 36 | 
 37 |     Parameters
 38 |     ----------
 39 |     sample: a sample from the discrete distribution
 40 | 
 41 |     Returns
 42 |     -------
 43 |     Frequencies of realizations from a sample
 44 | 
 45 |     """
 46 |     return _construct_counts_for_one_sample(sample) / len(sample)
 47 | 
 48 | 
 49 | def discrete_entropy(sample: np.ndarray,
 50 |                      base: float = np.e) -> float:
 51 |     """
 52 |     Approximate the entropy of a discrete distribution
 53 | 
 54 |                 H(p) = - E_p[log(p)]
 55 | 
 56 |     from a sample.
 57 | 
 58 |     Parameters
 59 |     ----------
 60 |     sample: a sample from the discrete distribution
 61 |     base: the base of the logarithm used to control the units of measurement for the result
 62 | 
 63 |     Returns
 64 |     -------
 65 |     An approximation of the entropy of the discrete distribution from which the sample is drawn.
 66 | 
 67 |     """
 68 |     log_fun = _select_vectorized_log_fun_for_base(base)
 69 |     frequencies = _construct_frequencies_for_one_sample(sample)
 70 |     return - np.sum(frequencies * log_fun(frequencies))
 71 | 
 72 | 
 73 | @numba.njit
 74 | def _construct_frequencies_for_two_samples(sorted_p_realizations: np.ndarray,
 75 |                                            sorted_p_frequencies: np.ndarray,
 76 |                                            sorted_q_realizations: np.ndarray,
 77 |                                            sorted_q_frequencies: np.ndarray,
 78 |                                            sorted_combined_realizations: np.ndarray) \
 79 |         -> tp.Tuple[np.ndarray, np.ndarray]:
 80 |     """
 81 |     Construct two NumPy arrays of frequencies for corresponding observations from sorted
 82 |     realizations and frequencies from two samples. If a realization in the sample from q is not in
 83 |     the sample from p or has frequency zero then it is not included in either of the output
 84 |     frequency arrays.
 85 | 
 86 |     Parameters
 87 |     ----------
 88 |     sorted_p_realizations: NumPy array of unique realizations in the sample from p
 89 |     sorted_p_frequencies: The frequency of each realization in `sorted_p_realizations`
 90 |     sorted_q_realizations: NumPy array of unique realizations in the sample from q
 91 |     sorted_q_frequencies: The frequency of each realization in `sorted_q_realizations`
 92 |     sorted_combined_realizations: NumPy array of unique realizations in the samples from p and q
 93 |                                   combined
 94 | 
 95 |     Returns
 96 |     -------
 97 |     Two NumPy arraysof the same length with frequencies for corresponding observations that have
 98 |     positive weight in the sample from p.
 99 | 
100 |     """
101 |     assert len(sorted_p_realizations) == len(sorted_p_frequencies)
102 |     assert len(sorted_q_realizations) == len(sorted_q_frequencies)
103 | 
104 |     p_source_index = 0
105 |     q_source_index = 0
106 |     p_target_index = 0
107 |     q_target_index = 0
108 | 
109 |     p_frequencies = np.zeros((len(sorted_p_realizations, )))
110 |     q_frequencies = np.zeros((len(sorted_p_realizations, )))
111 | 
112 |     for combined_index in range(len(sorted_combined_realizations)):
113 |         realization = sorted_combined_realizations[combined_index]
114 | 
115 |         if sorted_p_realizations[p_source_index] != realization:
116 |             if sorted_q_realizations[q_source_index] == realization:
117 |                 q_source_index += 1
118 |             continue
119 | 
120 |         if sorted_p_frequencies[p_source_index] == 0.0:
121 |             p_source_index += 1
122 |             if sorted_q_realizations[q_source_index] == realization:
123 |                 q_source_index += 1
124 |             continue
125 | 
126 |         if sorted_q_realizations[q_source_index] != realization or \
127 |            sorted_q_realizations[q_source_index] == 0.0:
128 |             raise ValueError('q(x) is zero but p(x) is not')
129 |             # if sorted_p_frequencies[p_source_index] != 0.0:  # we know that is true
130 |             #     # if q(x) == 0 we must have p(x) == 0, which is not the case here
131 |             #     raise ValueError('q(x) is zero but p(x) is not')
132 |             # else:
133 |             #     continue
134 | 
135 |         p_frequencies[p_target_index] = sorted_p_frequencies[p_source_index]
136 |         q_frequencies[q_target_index] = sorted_q_frequencies[q_source_index]
137 |         p_source_index += 1
138 |         q_source_index += 1
139 |         p_target_index += 1
140 |         q_target_index += 1
141 | 
142 |     return p_frequencies[:p_target_index], q_frequencies[:q_target_index]
143 | 
144 | 
145 | def discrete_relative_entropy(sample_p: np.ndarray,
146 |                               sample_q: np.ndarray,
147 |                               base: float = np.e):
148 |     """
149 |     Approximate the relative entropy of the discrete distribution q relative to the discrete
150 |     distribution p
151 | 
152 |                 D_KL(p||q) = E_p [log(p/q)]
153 | 
154 |     from samples of these distributions.
155 | 
156 |     Parameters
157 |     ----------
158 |     sample_p: sample from the distribution p
159 |     sample_q: sample from the distribution q
160 |     base: the base of the logarithm used to control the units of measurement for the result
161 | 
162 |     Returns
163 |     -------
164 |     The relative entropy of the distribution q relative to the distribution p.
165 | 
166 |     """
167 |     log_fun = _select_vectorized_log_fun_for_base(base)
168 |     combined_sample = np.hstack((sample_p, sample_q))
169 |     unique_combined = np.unique(combined_sample)
170 | 
171 |     unique_q, counts_q = np.unique(sample_q, return_counts=True)
172 |     frequencies_q = counts_q / len(sample_q)
173 | 
174 |     unique_p, counts_p = np.unique(sample_p, return_counts=True)
175 |     frequencies_p = counts_p / len(sample_p)
176 | 
177 |     combined_frequencies_p, combined_frequencies_q = \
178 |         _construct_frequencies_for_two_samples(sorted_p_realizations=unique_p,
179 |                                                sorted_q_realizations=unique_q,
180 |                                                sorted_q_frequencies=frequencies_q,
181 |                                                sorted_p_frequencies=frequencies_p,
182 |                                                sorted_combined_realizations=unique_combined)
183 | 
184 |     return np.sum(combined_frequencies_p * log_fun(combined_frequencies_p / combined_frequencies_q))
185 | 
186 | 
187 | def discrete_cross_entropy(sample_p: np.ndarray,
188 |                            sample_q: np.ndarray,
189 |                            base: float = np.e):
190 |     """
191 |     Approximate the cross entropy of the discrete distribution q relative to the discrete
192 |     distribution p
193 | 
194 |                 H_q(p) = - E_p [log(q)]
195 | 
196 |     from samples of these distributions.
197 | 
198 |     Parameters
199 |     ----------
200 |     sample_p: sample from the distribution p
201 |     sample_q: sample from the distribution q
202 |     base: the base of the logarithm used to control the units of measurement for the result
203 | 
204 |     Returns
205 |     -------
206 |     The cross entropy of the distribution q relative to the distribution p.
207 | 
208 |     """
209 |     return discrete_relative_entropy(sample_p=sample_p,
210 |                                      sample_q=sample_q,
211 |                                      base=base) + \
212 |            discrete_entropy(sample=sample_p,
213 |                             base=base)
214 | 
215 | 
216 | def discrete_jensen_shannon_divergence(sample_p: np.ndarray,
217 |                                        sample_q: np.ndarray,
218 |                                        base: float = np.e):
219 |     """
220 |     Approximate the Jensen-Shannon divergence between discrete distributions p and q
221 | 
222 |                 JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q)
223 | 
224 |     from samples of these distributions.
225 | 
226 |     Parameters
227 |     ----------
228 |     sample_p: sample from the distribution p
229 |     sample_q: sample from the distribution q
230 |     base: the base of the logarithm used to control the units of measurement for the result
231 | 
232 |     Returns
233 |     -------
234 |     The Jensen-Shannon divergence between distributions p and q.
235 | 
236 |     """
237 |     m = np.hstack((sample_p, sample_q))
238 |     D_PM = discrete_relative_entropy(sample_p=sample_p, sample_q=m, base=base)
239 |     D_QM = discrete_relative_entropy(sample_p=sample_q, sample_q=m, base=base)
240 | 
241 |     return 0.5 * D_PM + 0.5 * D_QM
242 | 
243 | 
244 | def _construct_unique_combinations_and_counts_from_two_samples(sample_x: np.ndarray,
245 |                                                                sample_y: np.ndarray) \
246 |         -> tp.Tuple[np.ndarray, np.ndarray]:
247 |     """
248 |     Construct an array of unique co-located combinations of sample_x and sample_y as well as an
249 |     array of associated counts.
250 | 
251 |     Parameters
252 |     ----------
253 |     sample_x: a NumPy array of draws of variable x
254 |     sample_y: a NumPy array of draws of variable y
255 | 
256 |     Returns
257 |     -------
258 |     a tuple of unique combinations of draws from x and y and associated counts
259 |     """
260 |     assert sample_x.ndim == 1
261 |     assert sample_y.ndim == 1
262 | 
263 |     assert sample_x.shape == sample_y.shape
264 | 
265 |     n = len(sample_x)
266 | 
267 |     sample_x = sample_x.reshape((n, 1))
268 |     sample_y = sample_y.reshape((n, 1))
269 | 
270 |     sample_xy = np.concatenate((sample_x, sample_y), axis=1)
271 | 
272 |     unique_combinations, counts = np.unique(sample_xy, axis=0, return_counts=True)
273 | 
274 |     return unique_combinations, counts
275 | 
276 | 
277 | @numba.njit
278 | def _get_index_for_combination(combination: np.ndarray,
279 |                                unique_combinations: np.ndarray) -> int:
280 |     """
281 |     Returns the row index of a 2 element array in a nx2 dimensional array. Returns -1 if the
282 |     requested array is not in the search array.
283 | 
284 |     Parameters
285 |     ----------
286 |     combination: an array whose position of first occurence is to be found
287 |     unique_combinations: an array which is to be searched
288 | 
289 |     Returns
290 |     -------
291 |     the row index of the combination
292 |     """
293 |     for i in range(unique_combinations.shape[0]):
294 |         if np.all(unique_combinations[i, :] == combination):
295 |             return i
296 | 
297 |     return -1
298 | 
299 | 
300 | @numba.njit
301 | def _get_count_for_combination(combination: np.ndarray,
302 |                                unique_combinations: np.ndarray,
303 |                                counts: np.ndarray) -> int:
304 |     """
305 |     Given a 2x1 combination and arrays of unique combinations and associated counts, return the
306 |     count of the combination.
307 | 
308 |     Parameters
309 |     ----------
310 |     combination: a 2 element array whose count is to be determined
311 |     unique_combinations: a 2xn array of unique combinations
312 |     counts: the count associated with the unique combinations
313 | 
314 |     Returns
315 |     -------
316 |     the count of the combination
317 |     """
318 | 
319 |     return counts[_get_index_for_combination(combination=combination,
320 |                                              unique_combinations=unique_combinations)]
321 | 
322 | 
323 | @numba.njit
324 | def _get_index_of_value_in_1d_array(value: numbers.Number,
325 |                                     array: np.ndarray) -> int:
326 |     """
327 |     Returns the index of a value in an array and returns -1 if the array does not contain the value.
328 |     Parameters
329 |     ----------
330 |     value: a number
331 |     array: a one-dimensional NumPy array
332 | 
333 |     Returns
334 |     -------
335 |     the index of the value in the array
336 |     """
337 |     for i in range(len(array)):
338 |         if value == array[i]:
339 |             return i
340 | 
341 |     return -1
342 | 
343 | 
344 | @numba.njit
345 | def _get_count_for_value(value: numbers.Number,
346 |                          unique_values: np.ndarray,
347 |                          counts: np.ndarray) -> int:
348 |     """
349 |     Given a value and arrays of unique values and associated counts, return the
350 |     count of the value.
351 | 
352 |     Parameters
353 |     ----------
354 |     value: a number whose count is to be determined
355 |     unique_values: a one-dimensional array of unique values
356 |     counts: the count associated with each unique value
357 | 
358 |     Returns
359 |     -------
360 |     the count of the value
361 |     """
362 | 
363 |     return counts[_get_index_of_value_in_1d_array(value, unique_values)]
364 | 
365 | 
366 | @numba.njit
367 | def _discrete_mutual_information_internal(n: int,
368 |                                           unique_combinations_xy: np.ndarray,
369 |                                           counts_xy: np.ndarray,
370 |                                           unique_values_x: np.ndarray,
371 |                                           counts_x: np.ndarray,
372 |                                           unique_values_y: np.ndarray,
373 |                                           counts_y: np.ndarray,
374 |                                           base: float = np.e) -> float:
375 |     """
376 |     Compute mutual information of discrete random variables x and y from
377 | 
378 |     Parameters
379 |     ----------
380 |     n: sample size
381 |     unique_combinations_xy: NumPy array with shape (number_of_combinations, 2) of unique
382 |                             combinations of X and Y appearing in the sample
383 |     counts_xy: the number of appearances of each unique combination in the sample
384 |     unique_values_x: NumPy array of the unique values of x
385 |     counts_x: number of appearances of each unique value of x
386 |     unique_values_y: NumPy array of the unique values of y
387 |     counts_y: number of appearances of each unique value of y
388 |     base: the base of the logarithm used to control the units of measurement for the result
389 | 
390 |     Returns
391 |     -------
392 | 
393 |     """
394 | 
395 |     logarithm = Logarithm(base)
396 | 
397 |     mutual_information = 0.0
398 |     for i in range(counts_xy.shape[0]):
399 |         x = unique_combinations_xy[i, 0]
400 |         y = unique_combinations_xy[i, 1]
401 |         joint_count = counts_xy[i]
402 | 
403 |         x_count = _get_count_for_value(value=x,
404 |                                        unique_values=unique_values_x,
405 |                                        counts=counts_x)
406 | 
407 |         y_count = _get_count_for_value(value=y,
408 |                                        unique_values=unique_values_y,
409 |                                        counts=counts_y)
410 | 
411 |         mutual_information += (joint_count / n) * logarithm.log(n * joint_count / (x_count * y_count))
412 | 
413 |     return mutual_information
414 | 
415 | 
416 | def _check_dimensions_of_two_variable_sample(sample_x: np.ndarray,
417 |                                              sample_y: np.ndarray) \
418 |         -> tp.Tuple[np.ndarray, np.ndarray, int]:
419 |     """
420 |     Check that sample_x and sample_y have the same number of elements and make them vectors.
421 | 
422 |     Parameters
423 |     ----------
424 |     sample_x: a NumPy array of draws of variable x
425 |     sample_y: a NumPy array of draws of variable y
426 | 
427 |     Returns
428 |     -------
429 | 
430 |     """
431 |     if sample_x.ndim > 1:
432 |         raise ValueError('sample_x must be a one dimensional array')
433 | 
434 |     if sample_y.ndim > 1:
435 |         raise ValueError('sample_y must be a one dimensional array')
436 | 
437 |     sample_x = sample_x.reshape((-1, ))
438 |     sample_y = sample_y.reshape((-1, ))
439 | 
440 |     n = len(sample_x)
441 | 
442 |     if n != len(sample_y):
443 |         raise ValueError('sample_x and sample_y must have the same length')
444 | 
445 |     return sample_x, sample_y, n
446 | 
447 | 
448 | def discrete_mutual_information(sample_x: np.ndarray,
449 |                                 sample_y: np.ndarray,
450 |                                 base: float = np.e) -> float:
451 |     """
452 |     Approximate the mutual information of x and y
453 | 
454 |             I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) =
455 |             E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right]
456 | 
457 |     from a sample of both distributions.
458 | 
459 |     Parameters
460 |     ----------
461 |     sample_x: a NumPy array of draws of variable x
462 |     sample_y: a NumPy array of draws of variable y
463 |     base: the base of the logarithm used to control the units of measurement for the result
464 | 
465 |     Returns
466 |     -------
467 |     The mutual information of x and y.
468 |     """
469 |     sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(sample_x, sample_y)
470 | 
471 |     unique_combinations_xy, counts_xy = \
472 |         _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)
473 | 
474 |     unique_values_x, counts_x = np.unique(sample_x, return_counts=True)
475 |     unique_values_y, counts_y = np.unique(sample_y, return_counts=True)
476 | 
477 |     return _discrete_mutual_information_internal(n=n,
478 |                                                  unique_combinations_xy=unique_combinations_xy,
479 |                                                  counts_xy=counts_xy,
480 |                                                  unique_values_x=unique_values_x,
481 |                                                  counts_x=counts_x,
482 |                                                  unique_values_y=unique_values_y,
483 |                                                  counts_y=counts_y,
484 |                                                  base=base)
485 | 
486 | 
487 | def discrete_joint_entropy(sample_x: np.ndarray,
488 |                            sample_y: np.ndarray,
489 |                            base: float = np.e) -> float:
490 |     """
491 |     Approximate the joint entropy of x and y
492 | 
493 |        H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right]
494 | 
495 |     from a sample of both distributions.
496 | 
497 |     Parameters
498 |     ----------
499 |     sample_x: a NumPy array of draws of variable x
500 |     sample_y: a NumPy array of draws of variable y
501 |     base: the base of the logarithm used to control the units of measurement for the result
502 | 
503 |     Returns
504 |     -------
505 |     The joint entropy between of x and y
506 |     """
507 | 
508 |     log_fun = _select_vectorized_log_fun_for_base(base)
509 |     sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(sample_x, sample_y)
510 | 
511 |     unique_combinations_xy, counts_xy = \
512 |         _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)
513 | 
514 |     joint_frequency = (1.0 / n) * counts_xy
515 | 
516 |     return - np.sum(joint_frequency * log_fun(joint_frequency))
517 | 
518 | 
519 | @numba.njit
520 | def _get_conditional_frequency_of_y_given_x(n: int,
521 |                                             x: numbers.Number,
522 |                                             y: numbers.Number,
523 |                                             sample_x: np.ndarray,
524 |                                             sample_y: np.ndarray) -> float:
525 |     """
526 |     Given a sample of two variables X and Y, and specific values of these variables x and y,
527 |     determine the conditional frequency of Y=y given that X=x.
528 | 
529 |     Parameters
530 |     ----------
531 |     n: sample size
532 |     x: value of x
533 |     y: value of y
534 |     sample_x: NumPy array containing the x-variable of the sample
535 |     sample_y: NumPy array containing the y-variable of the sample
536 | 
537 |     Returns
538 |     -------
539 |     the conditional frequency of Y=y given that X=x
540 |     """
541 |     count_x = 0.0
542 |     count_x_and_y = 0.0
543 |     for i in range(n):
544 |         if sample_x[i] == x:
545 |             count_x += 1
546 |             if sample_y[i] == y:
547 |                 count_x_and_y += 1
548 | 
549 |     if count_x == 0:
550 |         raise ValueError('x value is not present in the sample')
551 |     else:
552 |         return count_x_and_y / count_x
553 | 
554 | 
555 | @numba.njit
556 | def _discrete_conditional_entropy_of_y_given_x_internal(n: int,
557 |                                                         unique_combinations_xy: np.ndarray,
558 |                                                         counts_xy: np.ndarray,
559 |                                                         sample_x: np.ndarray,
560 |                                                         sample_y: np.ndarray,
561 |                                                         base: float = np.e) -> float:
562 |     """
563 |     Compute conditional entropy of discrete random variables X and Y from NumPy arrays of samples of
564 |     these random variables. This function relies on pre-computed unique combinations of both
565 |     variables and associated counts.
566 | 
567 |     Parameters
568 |     ----------
569 |     n: sample size
570 |     unique_combinations_xy: NumPy array with shape (number_of_combinations, 2) of unique
571 |                             combinations of X and Y appearing in the sample
572 |     counts_xy: the number of appearances of each unique combination in the sample
573 |     sample_x: NumPy array containing the x-variable of the sample
574 |     sample_y: NumPy array containing the y-variable of the sample
575 |     base: the base of the logarithm used to control the units of measurement for the result
576 | 
577 |     Returns
578 |     -------
579 |     The conditional entropy from a sample of discrete random variables
580 |     """
581 | 
582 |     logarithm = Logarithm(base)
583 | 
584 |     conditional_entropy = 0.0
585 |     for i in range(len(counts_xy)):
586 |         x = unique_combinations_xy[i, 0]
587 |         y = unique_combinations_xy[i, 1]
588 | 
589 |         conditional_frequency_of_y_given_x = \
590 |             _get_conditional_frequency_of_y_given_x(n=n,
591 |                                                     x=x,
592 |                                                     y=y,
593 |                                                     sample_x=sample_x,
594 |                                                     sample_y=sample_y)
595 |         conditional_entropy -= counts_xy[i] * logarithm.log(conditional_frequency_of_y_given_x) / n
596 | 
597 |     return conditional_entropy
598 | 
599 | 
600 | def discrete_conditional_entropy_of_y_given_x(sample_x: np.ndarray,
601 |                                               sample_y: np.ndarray,
602 |                                               base: float = np.e) -> float:
603 |     """
604 |     Approximate the conditional entropy of y given x
605 | 
606 |         H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right]
607 | 
608 |     from a sample of both distributions.
609 | 
610 |     Parameters
611 |     ----------
612 |     sample_x: a NumPy array of draws of variable x
613 |     sample_y: a NumPy array of draws of variable y
614 |     base: the base of the logarithm used to control the units of measurement for the result
615 | 
616 |     Returns
617 |     -------
618 |     The conditional entropy between of y given x.
619 |     """
620 |     sample_x, sample_y, n = _check_dimensions_of_two_variable_sample(sample_x, sample_y)
621 | 
622 |     unique_combinations_xy, counts_xy = \
623 |         _construct_unique_combinations_and_counts_from_two_samples(sample_x, sample_y)
624 | 
625 |     return _discrete_conditional_entropy_of_y_given_x_internal(
626 |                 n=n,
627 |                 unique_combinations_xy=unique_combinations_xy,
628 |                 counts_xy=counts_xy,
629 |                 sample_x=sample_x,
630 |                 sample_y=sample_y,
631 |                 base=base)
632 | 


--------------------------------------------------------------------------------
/notebooks/Divergence.ipynb:
--------------------------------------------------------------------------------
   1 | {
   2 |  "cells": [
   3 |   {
   4 |    "cell_type": "markdown",
   5 |    "metadata": {},
   6 |    "source": [
   7 |     "# Package Imports and Setup"
   8 |    ]
   9 |   },
  10 |   {
  11 |    "cell_type": "code",
  12 |    "execution_count": 1,
  13 |    "metadata": {
  14 |     "ExecuteTime": {
  15 |      "end_time": "2020-07-31T03:56:35.065578Z",
  16 |      "start_time": "2020-07-31T03:56:35.043177Z"
  17 |     }
  18 |    },
  19 |    "outputs": [],
  20 |    "source": [
  21 |     "%load_ext autoreload\n",
  22 |     "%autoreload 2"
  23 |    ]
  24 |   },
  25 |   {
  26 |    "cell_type": "code",
  27 |    "execution_count": 2,
  28 |    "metadata": {
  29 |     "ExecuteTime": {
  30 |      "end_time": "2020-07-31T03:56:35.856507Z",
  31 |      "start_time": "2020-07-31T03:56:35.067510Z"
  32 |     }
  33 |    },
  34 |    "outputs": [],
  35 |    "source": [
  36 |     "# %matplotlib inline\n",
  37 |     "import collections\n",
  38 |     "import math\n",
  39 |     "import numpy as np\n",
  40 |     "import scipy as sp\n",
  41 |     "import statsmodels.api as sm"
  42 |    ]
  43 |   },
  44 |   {
  45 |    "cell_type": "code",
  46 |    "execution_count": 3,
  47 |    "metadata": {
  48 |     "ExecuteTime": {
  49 |      "end_time": "2020-07-31T03:56:39.231858Z",
  50 |      "start_time": "2020-07-31T03:56:35.858577Z"
  51 |     }
  52 |    },
  53 |    "outputs": [],
  54 |    "source": [
  55 |     "from divergence import *"
  56 |    ]
  57 |   },
  58 |   {
  59 |    "cell_type": "markdown",
  60 |    "metadata": {},
  61 |    "source": [
  62 |     "# Distributions and Samples"
  63 |    ]
  64 |   },
  65 |   {
  66 |    "cell_type": "markdown",
  67 |    "metadata": {},
  68 |    "source": [
  69 |     "## Construct Artificial Sample from two Normal Distributions"
  70 |    ]
  71 |   },
  72 |   {
  73 |    "cell_type": "markdown",
  74 |    "metadata": {},
  75 |    "source": [
  76 |     "This example considers two different normal distributions $p$ and $q$ with\n",
  77 |     "$p = N(2, 9)$ and $q = N(1, 4)$."
  78 |    ]
  79 |   },
  80 |   {
  81 |    "cell_type": "code",
  82 |    "execution_count": 4,
  83 |    "metadata": {
  84 |     "ExecuteTime": {
  85 |      "end_time": "2020-07-31T03:56:39.304942Z",
  86 |      "start_time": "2020-07-31T03:56:39.234347Z"
  87 |     }
  88 |    },
  89 |    "outputs": [],
  90 |    "source": [
  91 |     "# fix random seed for reproducibility\n",
  92 |     "np.random.seed(42)\n",
  93 |     "\n",
  94 |     "# set parameters of the normal distributions p and q\n",
  95 |     "mu_p = 2\n",
  96 |     "sigma_p = 3\n",
  97 |     "mu_q = 1\n",
  98 |     "sigma_q = 2\n",
  99 |     "\n",
 100 |     "# draw samples from each normal distribution\n",
 101 |     "n = 10000\n",
 102 |     "\n",
 103 |     "def draw_normal(mu, sigma, n: int, antithetic: bool = False):\n",
 104 |     "    z = np.random.randn(n)\n",
 105 |     "    if antithetic: \n",
 106 |     "        z = np.hstack((z, -z))\n",
 107 |     "    \n",
 108 |     "    return mu + sigma * z\n",
 109 |     "\n",
 110 |     "sample_p = draw_normal(mu_p, sigma_p, n=n, antithetic=True)\n",
 111 |     "sample_q = draw_normal(mu_q, sigma_q, n=n, antithetic=True)\n",
 112 |     "\n",
 113 |     "# fit a non-parametric density estimate for both distributions\n",
 114 |     "kde_p = sm.nonparametric.KDEUnivariate(sample_p)\n",
 115 |     "kde_q = sm.nonparametric.KDEUnivariate(sample_q)\n",
 116 |     "kde_p.fit()\n",
 117 |     "kde_q.fit()\n",
 118 |     "\n",
 119 |     "# construct exact normal densities for p and q\n",
 120 |     "pdf_p = lambda x: sp.stats.norm.pdf(x, mu_p, sigma_p)\n",
 121 |     "pdf_q = lambda x: sp.stats.norm.pdf(x, mu_q, sigma_q)\n",
 122 |     "\n",
 123 |     "# compute support for kernel density estimates\n",
 124 |     "p_min = min(kde_p.support)\n",
 125 |     "p_max = max(kde_p.support)\n",
 126 |     "q_min = min(kde_q.support)\n",
 127 |     "q_max = max(kde_q.support)\n",
 128 |     "combined_min = min(p_min, q_min)\n",
 129 |     "combined_max = max(p_max, q_max)"
 130 |    ]
 131 |   },
 132 |   {
 133 |    "cell_type": "markdown",
 134 |    "metadata": {},
 135 |    "source": [
 136 |     "## Construct Sample from Multinomial Distribution"
 137 |    ]
 138 |   },
 139 |   {
 140 |    "cell_type": "code",
 141 |    "execution_count": 5,
 142 |    "metadata": {
 143 |     "ExecuteTime": {
 144 |      "end_time": "2020-07-31T03:56:39.339814Z",
 145 |      "start_time": "2020-07-31T03:56:39.306432Z"
 146 |     }
 147 |    },
 148 |    "outputs": [],
 149 |    "source": [
 150 |     "multinomial_sample_q = np.array([1, 2, 3, 2, 3, 3, 3, 2, 1, 1])\n",
 151 |     "multinomial_sample_p = np.array([1, 2, 3, 3, 3, 3, 3, 3, 3, 3])"
 152 |    ]
 153 |   },
 154 |   {
 155 |    "cell_type": "markdown",
 156 |    "metadata": {},
 157 |    "source": [
 158 |     "# Entropy"
 159 |    ]
 160 |   },
 161 |   {
 162 |    "cell_type": "markdown",
 163 |    "metadata": {},
 164 |    "source": [
 165 |     "The entropy of a probability distribution $p$ is defined as \n",
 166 |     "\n",
 167 |     "$H(X) = - \\mathbb{E}_p \\left[ \\log_{\\text{base}} p \\right]$, \n",
 168 |     "\n",
 169 |     "where $\\mathbb{E}_P$ denotes expectation with respect the probability distribution $p$. In information theory, the base of the logarithm is 2 and the interpretation of entropy is the average number of bits needed to optimally encode the signal represented by the distribution $p$. \n",
 170 |     "\n",
 171 |     "Divergence defaults to $\\text{base}=e$, which results in the natural logarithm i.e. $\\log_e = \\ln$. This default choice can be overridden via the argument 'base' during the entropy calculation. In particular, specifying $\\text{base}=2$ results in the classical Shannon entropy expressed in bits, whereas specifying $\\text{base}=10$ produces the entropy in decimal bits (dits or Hartleys)."
 172 |    ]
 173 |   },
 174 |   {
 175 |    "cell_type": "markdown",
 176 |    "metadata": {},
 177 |    "source": [
 178 |     "## Continuous Case"
 179 |    ]
 180 |   },
 181 |   {
 182 |    "cell_type": "markdown",
 183 |    "metadata": {},
 184 |    "source": [
 185 |     "### Entropy from Samples (via Statsmodels KDE Objects)"
 186 |    ]
 187 |   },
 188 |   {
 189 |    "cell_type": "code",
 190 |    "execution_count": 6,
 191 |    "metadata": {
 192 |     "ExecuteTime": {
 193 |      "end_time": "2020-07-31T03:56:39.517682Z",
 194 |      "start_time": "2020-07-31T03:56:39.341601Z"
 195 |     }
 196 |    },
 197 |    "outputs": [
 198 |     {
 199 |      "name": "stdout",
 200 |      "output_type": "stream",
 201 |      "text": [
 202 |       "Entropy of p = 2.531109986651922\n",
 203 |       "Entropy of q = 2.123343378353565\n"
 204 |      ]
 205 |     }
 206 |    ],
 207 |    "source": [
 208 |     "print(f'Entropy of p = {entropy_from_samples(sample_p, discrete=False)}')\n",
 209 |     "print(f'Entropy of q = {entropy_from_samples(sample_q, discrete=False)}')"
 210 |    ]
 211 |   },
 212 |   {
 213 |    "cell_type": "markdown",
 214 |    "metadata": {},
 215 |    "source": [
 216 |     "### Entropy from Statsmodels KDE Objects (via Statsmodels)"
 217 |    ]
 218 |   },
 219 |   {
 220 |    "cell_type": "code",
 221 |    "execution_count": 7,
 222 |    "metadata": {
 223 |     "ExecuteTime": {
 224 |      "end_time": "2020-07-31T03:56:39.953586Z",
 225 |      "start_time": "2020-07-31T03:56:39.520153Z"
 226 |     }
 227 |    },
 228 |    "outputs": [
 229 |     {
 230 |      "name": "stdout",
 231 |      "output_type": "stream",
 232 |      "text": [
 233 |       "Entropy of p = 2.531114322639585\n",
 234 |       "Entropy of q = 2.1233454054445\n"
 235 |      ]
 236 |     }
 237 |    ],
 238 |    "source": [
 239 |     "print(f'Entropy of p = {kde_p.entropy}')\n",
 240 |     "print(f'Entropy of q = {kde_q.entropy}')"
 241 |    ]
 242 |   },
 243 |   {
 244 |    "cell_type": "markdown",
 245 |    "metadata": {},
 246 |    "source": [
 247 |     "### Entropy from Statsmodels KDE Objects (via Divergence)"
 248 |    ]
 249 |   },
 250 |   {
 251 |    "cell_type": "code",
 252 |    "execution_count": 8,
 253 |    "metadata": {
 254 |     "ExecuteTime": {
 255 |      "end_time": "2020-07-31T03:56:40.127364Z",
 256 |      "start_time": "2020-07-31T03:56:39.957380Z"
 257 |     }
 258 |    },
 259 |    "outputs": [
 260 |     {
 261 |      "name": "stdout",
 262 |      "output_type": "stream",
 263 |      "text": [
 264 |       "Entropy of p = 2.531109986651922\n",
 265 |       "Entropy of q = 2.123343378353565\n"
 266 |      ]
 267 |     }
 268 |    ],
 269 |    "source": [
 270 |     "print(f'Entropy of p = {entropy_from_kde(kde_p)}')\n",
 271 |     "print(f'Entropy of q = {entropy_from_kde(kde_q)}')"
 272 |    ]
 273 |   },
 274 |   {
 275 |    "cell_type": "markdown",
 276 |    "metadata": {},
 277 |    "source": [
 278 |     "### Entropy from Normal Probability Density Functions"
 279 |    ]
 280 |   },
 281 |   {
 282 |    "cell_type": "code",
 283 |    "execution_count": 9,
 284 |    "metadata": {
 285 |     "ExecuteTime": {
 286 |      "end_time": "2020-07-31T03:56:40.190449Z",
 287 |      "start_time": "2020-07-31T03:56:40.131586Z"
 288 |     }
 289 |    },
 290 |    "outputs": [
 291 |     {
 292 |      "name": "stdout",
 293 |      "output_type": "stream",
 294 |      "text": [
 295 |       "Entropy of p = 2.517390423126535\n",
 296 |       "Entropy of q = 2.1120728496363306\n"
 297 |      ]
 298 |     }
 299 |    ],
 300 |    "source": [
 301 |     "print(f'Entropy of p = {entropy_from_density_with_support(pdf_p, p_min, p_max)}')\n",
 302 |     "print(f'Entropy of q = {entropy_from_density_with_support(pdf_q, q_min, q_max)}')"
 303 |    ]
 304 |   },
 305 |   {
 306 |    "cell_type": "markdown",
 307 |    "metadata": {},
 308 |    "source": [
 309 |     "### Theoretical Entropy of a Normal Distribution"
 310 |    ]
 311 |   },
 312 |   {
 313 |    "cell_type": "code",
 314 |    "execution_count": 10,
 315 |    "metadata": {
 316 |     "ExecuteTime": {
 317 |      "end_time": "2020-07-31T03:56:40.228574Z",
 318 |      "start_time": "2020-07-31T03:56:40.191930Z"
 319 |     }
 320 |    },
 321 |    "outputs": [
 322 |     {
 323 |      "name": "stdout",
 324 |      "output_type": "stream",
 325 |      "text": [
 326 |       "Entropy of p = 2.5175508218727822\n",
 327 |       "Entropy of q = 2.112085713764618\n"
 328 |      ]
 329 |     }
 330 |    ],
 331 |    "source": [
 332 |     "def theoretical_entropy_of_normal_distribution(mu: float, sigma: float, log_fun: tp.Callable = np.log) -> float:\n",
 333 |     "    return 0.5 * (1.0 + log_fun(2 * np.pi * sigma**2))\n",
 334 |     "\n",
 335 |     "print(f'Entropy of p = {theoretical_entropy_of_normal_distribution(mu_p, sigma_p)}')\n",
 336 |     "print(f'Entropy of q = {theoretical_entropy_of_normal_distribution(mu_q, sigma_q)}')"
 337 |    ]
 338 |   },
 339 |   {
 340 |    "cell_type": "markdown",
 341 |    "metadata": {},
 342 |    "source": [
 343 |     "## Discrete Case"
 344 |    ]
 345 |   },
 346 |   {
 347 |    "cell_type": "code",
 348 |    "execution_count": 11,
 349 |    "metadata": {
 350 |     "ExecuteTime": {
 351 |      "end_time": "2020-07-31T03:56:40.268189Z",
 352 |      "start_time": "2020-07-31T03:56:40.230965Z"
 353 |     }
 354 |    },
 355 |    "outputs": [
 356 |     {
 357 |      "name": "stdout",
 358 |      "output_type": "stream",
 359 |      "text": [
 360 |       "Entropy of p = 0.639031859650177\n",
 361 |       "Entropy of q = 1.0888999753452238\n"
 362 |      ]
 363 |     }
 364 |    ],
 365 |    "source": [
 366 |     "print(f'Entropy of p = {discrete_entropy(multinomial_sample_p)}')\n",
 367 |     "print(f'Entropy of q = {discrete_entropy(multinomial_sample_q)}')"
 368 |    ]
 369 |   },
 370 |   {
 371 |    "cell_type": "markdown",
 372 |    "metadata": {},
 373 |    "source": [
 374 |     "# Cross Entropy"
 375 |    ]
 376 |   },
 377 |   {
 378 |    "cell_type": "markdown",
 379 |    "metadata": {},
 380 |    "source": [
 381 |     "The cross entropy of a distribution $q$ relative to a distribution $p$ is defined as  \n",
 382 |     "\n",
 383 |     "$H_q(p) = - \\mathbb{E}_p \\left[ \\log_{\\text{base}} q \\right]$.\n",
 384 |     "\n",
 385 |     "With a base of 2, the cross-entropy of $q$ relative to $p$ is the average number of bits required to encode the signal in $p$ using a code optimized for the signal in $q$."
 386 |    ]
 387 |   },
 388 |   {
 389 |    "cell_type": "markdown",
 390 |    "metadata": {},
 391 |    "source": [
 392 |     "## Continuous Case"
 393 |    ]
 394 |   },
 395 |   {
 396 |    "cell_type": "markdown",
 397 |    "metadata": {},
 398 |    "source": [
 399 |     "### Cross Entropy from Samples (via Statsmodels KDE Objects)"
 400 |    ]
 401 |   },
 402 |   {
 403 |    "cell_type": "code",
 404 |    "execution_count": 12,
 405 |    "metadata": {
 406 |     "ExecuteTime": {
 407 |      "end_time": "2020-07-31T03:56:40.738979Z",
 408 |      "start_time": "2020-07-31T03:56:40.269946Z"
 409 |     }
 410 |    },
 411 |    "outputs": [
 412 |     {
 413 |      "name": "stdout",
 414 |      "output_type": "stream",
 415 |      "text": [
 416 |       "Cross Entropy of p relative to q = 2.9007913519550272\n",
 417 |       "Cross Entropy of q relative to p = 2.306094354037839\n"
 418 |      ]
 419 |     }
 420 |    ],
 421 |    "source": [
 422 |     "print(f'Cross Entropy of p relative to q = {cross_entropy_from_samples(sample_p, sample_q, discrete=False)}')\n",
 423 |     "print(f'Cross Entropy of q relative to p = {cross_entropy_from_samples(sample_q, sample_p, discrete=False)}')"
 424 |    ]
 425 |   },
 426 |   {
 427 |    "cell_type": "markdown",
 428 |    "metadata": {},
 429 |    "source": [
 430 |     "### Cross Entropy from Statsmodels KDE Objects"
 431 |    ]
 432 |   },
 433 |   {
 434 |    "cell_type": "code",
 435 |    "execution_count": 13,
 436 |    "metadata": {
 437 |     "ExecuteTime": {
 438 |      "end_time": "2020-07-31T03:56:43.995286Z",
 439 |      "start_time": "2020-07-31T03:56:40.740765Z"
 440 |     }
 441 |    },
 442 |    "outputs": [
 443 |     {
 444 |      "name": "stdout",
 445 |      "output_type": "stream",
 446 |      "text": [
 447 |       "403 ms ± 12.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n"
 448 |      ]
 449 |     }
 450 |    ],
 451 |    "source": [
 452 |     "%timeit cross_entropy_from_kde(kde_p, kde_q), cross_entropy_from_kde(kde_q, kde_p)"
 453 |    ]
 454 |   },
 455 |   {
 456 |    "cell_type": "code",
 457 |    "execution_count": 14,
 458 |    "metadata": {
 459 |     "ExecuteTime": {
 460 |      "end_time": "2020-07-31T03:56:44.448376Z",
 461 |      "start_time": "2020-07-31T03:56:43.997577Z"
 462 |     }
 463 |    },
 464 |    "outputs": [
 465 |     {
 466 |      "name": "stdout",
 467 |      "output_type": "stream",
 468 |      "text": [
 469 |       "Cross Entropy of p relative to q = 2.9007913519550272\n",
 470 |       "Cross Entropy of q relative to p = 2.306094354037839\n"
 471 |      ]
 472 |     }
 473 |    ],
 474 |    "source": [
 475 |     "print(f'Cross Entropy of p relative to q = {cross_entropy_from_kde(kde_p, kde_q)}')\n",
 476 |     "print(f'Cross Entropy of q relative to p = {cross_entropy_from_kde(kde_q, kde_p)}')"
 477 |    ]
 478 |   },
 479 |   {
 480 |    "cell_type": "markdown",
 481 |    "metadata": {},
 482 |    "source": [
 483 |     "### Cross Entropy from Normal Probability Density Functions"
 484 |    ]
 485 |   },
 486 |   {
 487 |    "cell_type": "code",
 488 |    "execution_count": 15,
 489 |    "metadata": {
 490 |     "ExecuteTime": {
 491 |      "end_time": "2020-07-31T03:56:44.518454Z",
 492 |      "start_time": "2020-07-31T03:56:44.450634Z"
 493 |     }
 494 |    },
 495 |    "outputs": [
 496 |     {
 497 |      "name": "stdout",
 498 |      "output_type": "stream",
 499 |      "text": [
 500 |       "Cross Entropy of p relative to q = 2.86176079907269\n",
 501 |       "Cross Entropy of q relative to p = 2.295328590629144\n"
 502 |      ]
 503 |     }
 504 |    ],
 505 |    "source": [
 506 |     "print(f'Cross Entropy of p relative to q = {cross_entropy_from_densities_with_support(pdf_p, pdf_q, combined_min, combined_max)}')\n",
 507 |     "print(f'Cross Entropy of q relative to p = {cross_entropy_from_densities_with_support(pdf_q, pdf_p, combined_min, combined_max)}')"
 508 |    ]
 509 |   },
 510 |   {
 511 |    "cell_type": "markdown",
 512 |    "metadata": {},
 513 |    "source": [
 514 |     "## Discrete Case"
 515 |    ]
 516 |   },
 517 |   {
 518 |    "cell_type": "code",
 519 |    "execution_count": 16,
 520 |    "metadata": {
 521 |     "ExecuteTime": {
 522 |      "end_time": "2020-07-31T03:56:44.914306Z",
 523 |      "start_time": "2020-07-31T03:56:44.520198Z"
 524 |     }
 525 |    },
 526 |    "outputs": [
 527 |     {
 528 |      "name": "stdout",
 529 |      "output_type": "stream",
 530 |      "text": [
 531 |       "Cross Entropy of p relative to q = 0.9738271463645112\n",
 532 |       "Cross Entropy of q relative to p = 1.4708084763221114\n"
 533 |      ]
 534 |     }
 535 |    ],
 536 |    "source": [
 537 |     "print(f'Cross Entropy of p relative to q = {discrete_cross_entropy(multinomial_sample_p, multinomial_sample_q)}')\n",
 538 |     "print(f'Cross Entropy of q relative to p = {discrete_cross_entropy(multinomial_sample_q, multinomial_sample_p)}')"
 539 |    ]
 540 |   },
 541 |   {
 542 |    "cell_type": "markdown",
 543 |    "metadata": {},
 544 |    "source": [
 545 |     "# Relative Entropy (Kullback-Leibler Divergence)"
 546 |    ]
 547 |   },
 548 |   {
 549 |    "cell_type": "markdown",
 550 |    "metadata": {},
 551 |    "source": [
 552 |     "Relative entropy or Kullback-Leibler divergence measures the dispersion of two probability distributions $P$ and $Q$. It is defined as the difference between the cross entropy of $q$ relative to $p$ and the entropy of $p$\n",
 553 |     "\n",
 554 |     "$D_{KL} (P||Q) = \\mathbb{E}_p \\left[ \\log_{\\text{base}} \\left( \\frac{p}{q} \\right) \\right] = H_q(p) - H(p)$.\n",
 555 |     "\n",
 556 |     "With a base of 2, it can be interpreted as the average number of additional bits required to encode the signal in $p$ using a code optimized for the signal in $q$ over and above the number of bits required by the optimal code for $p$."
 557 |    ]
 558 |   },
 559 |   {
 560 |    "cell_type": "markdown",
 561 |    "metadata": {},
 562 |    "source": [
 563 |     "## Continuous Case"
 564 |    ]
 565 |   },
 566 |   {
 567 |    "cell_type": "markdown",
 568 |    "metadata": {},
 569 |    "source": [
 570 |     "### Relative Entropy from Samples (via Statsmodels KDE Objects)"
 571 |    ]
 572 |   },
 573 |   {
 574 |    "cell_type": "code",
 575 |    "execution_count": 17,
 576 |    "metadata": {
 577 |     "ExecuteTime": {
 578 |      "end_time": "2020-07-31T03:56:45.504577Z",
 579 |      "start_time": "2020-07-31T03:56:44.916068Z"
 580 |     }
 581 |    },
 582 |    "outputs": [
 583 |     {
 584 |      "name": "stdout",
 585 |      "output_type": "stream",
 586 |      "text": [
 587 |       "Relative Entropy of p relative to q = 0.3696813653031077\n",
 588 |       "Relative Entropy of q relative to p = 0.18274894857179375\n"
 589 |      ]
 590 |     }
 591 |    ],
 592 |    "source": [
 593 |     "print(f'Relative Entropy of p relative to q = {relative_entropy_from_samples(sample_p, sample_q, discrete=False)}')\n",
 594 |     "print(f'Relative Entropy of q relative to p = {relative_entropy_from_samples(sample_q, sample_p, discrete=False)}')"
 595 |    ]
 596 |   },
 597 |   {
 598 |    "cell_type": "markdown",
 599 |    "metadata": {},
 600 |    "source": [
 601 |     "### Relative Entropy from Statsmodels KDE Objects"
 602 |    ]
 603 |   },
 604 |   {
 605 |    "cell_type": "code",
 606 |    "execution_count": 18,
 607 |    "metadata": {
 608 |     "ExecuteTime": {
 609 |      "end_time": "2020-07-31T03:56:46.081252Z",
 610 |      "start_time": "2020-07-31T03:56:45.506134Z"
 611 |     }
 612 |    },
 613 |    "outputs": [
 614 |     {
 615 |      "name": "stdout",
 616 |      "output_type": "stream",
 617 |      "text": [
 618 |       "Relative Entropy of p relative to q = 0.3696813653031077\n",
 619 |       "Relative Entropy of q relative to p = 0.18274894857179375\n"
 620 |      ]
 621 |     }
 622 |    ],
 623 |    "source": [
 624 |     "print(f'Relative Entropy of p relative to q = {relative_entropy_from_kde(kde_p, kde_q)}')\n",
 625 |     "print(f'Relative Entropy of q relative to p = {relative_entropy_from_kde(kde_q, kde_p)}')"
 626 |    ]
 627 |   },
 628 |   {
 629 |    "cell_type": "markdown",
 630 |    "metadata": {},
 631 |    "source": [
 632 |     "### Relative Entropy from Normal Probability Density Functions"
 633 |    ]
 634 |   },
 635 |   {
 636 |    "cell_type": "code",
 637 |    "execution_count": 19,
 638 |    "metadata": {
 639 |     "ExecuteTime": {
 640 |      "end_time": "2020-07-31T03:56:46.142660Z",
 641 |      "start_time": "2020-07-31T03:56:46.082739Z"
 642 |     }
 643 |    },
 644 |    "outputs": [
 645 |     {
 646 |      "name": "stdout",
 647 |      "output_type": "stream",
 648 |      "text": [
 649 |       "Relative Entropy from p to q = 0.34437037594615566\n",
 650 |       "Relative Entropy from q to p = 0.1832428925442867\n"
 651 |      ]
 652 |     }
 653 |    ],
 654 |    "source": [
 655 |     "print(f'Relative Entropy from p to q = {relative_entropy_from_densities_with_support(pdf_p, pdf_q, combined_min, combined_max)}')\n",
 656 |     "print(f'Relative Entropy from q to p = {relative_entropy_from_densities_with_support(pdf_q, pdf_p, combined_min, combined_max)}')"
 657 |    ]
 658 |   },
 659 |   {
 660 |    "cell_type": "markdown",
 661 |    "metadata": {},
 662 |    "source": [
 663 |     "### Theoretical Relative Entropy for Normal Distributions"
 664 |    ]
 665 |   },
 666 |   {
 667 |    "cell_type": "code",
 668 |    "execution_count": 20,
 669 |    "metadata": {
 670 |     "ExecuteTime": {
 671 |      "end_time": "2020-07-31T03:56:46.177322Z",
 672 |      "start_time": "2020-07-31T03:56:46.144357Z"
 673 |     }
 674 |    },
 675 |    "outputs": [
 676 |     {
 677 |      "name": "stdout",
 678 |      "output_type": "stream",
 679 |      "text": [
 680 |       "Relative Entropy from p to q = 0.34453489189183556\n",
 681 |       "Relative Entropy from q to p = 0.18324288588594217\n"
 682 |      ]
 683 |     }
 684 |    ],
 685 |    "source": [
 686 |     "def relative_entropy_between_normal_distributions(mu_1, sigma_1, mu_2, sigma_2, log_fun: tp.Callable = np.log):\n",
 687 |     "    return ((mu_1 - mu_2)**2 + sigma_1**2 - sigma_2**2 ) / (2 * sigma_2**2) + log_fun(sigma_2/sigma_1)\n",
 688 |     "\n",
 689 |     "print(f'Relative Entropy from p to q = {relative_entropy_between_normal_distributions(mu_p, sigma_p, mu_q, sigma_q)}')\n",
 690 |     "print(f'Relative Entropy from q to p = {relative_entropy_between_normal_distributions(mu_q, sigma_q, mu_p, sigma_p)}')"
 691 |    ]
 692 |   },
 693 |   {
 694 |    "cell_type": "markdown",
 695 |    "metadata": {},
 696 |    "source": [
 697 |     "## Discrete Case"
 698 |    ]
 699 |   },
 700 |   {
 701 |    "cell_type": "code",
 702 |    "execution_count": 21,
 703 |    "metadata": {
 704 |     "ExecuteTime": {
 705 |      "end_time": "2020-07-31T03:56:46.216253Z",
 706 |      "start_time": "2020-07-31T03:56:46.178950Z"
 707 |     }
 708 |    },
 709 |    "outputs": [
 710 |     {
 711 |      "name": "stdout",
 712 |      "output_type": "stream",
 713 |      "text": [
 714 |       "Relative Entropy of p relative to q = 0.3347952867143343\n",
 715 |       "Relative Entropy of q relative to p = 0.3819085009768876\n"
 716 |      ]
 717 |     }
 718 |    ],
 719 |    "source": [
 720 |     "print(f'Relative Entropy of p relative to q = {discrete_relative_entropy(multinomial_sample_p, multinomial_sample_q)}')\n",
 721 |     "print(f'Relative Entropy of q relative to p = {discrete_relative_entropy(multinomial_sample_q, multinomial_sample_p)}')"
 722 |    ]
 723 |   },
 724 |   {
 725 |    "cell_type": "markdown",
 726 |    "metadata": {},
 727 |    "source": [
 728 |     "# Jensen-Shannon Divergence"
 729 |    ]
 730 |   },
 731 |   {
 732 |    "cell_type": "markdown",
 733 |    "metadata": {},
 734 |    "source": [
 735 |     "The Jensen-Shannon divergence, a symmetric measure of the divergence of probability distributions, is defined as\n",
 736 |     "\n",
 737 |     "$JSD(p||q) = \\frac{1}{2} D_{KL} (p||m) + \\frac{1}{2} D_{KL} (q||m)$, \n",
 738 |     "\n",
 739 |     "where $m = \\frac{1}{2} \\left( p + q \\right)$.\n",
 740 |     "\n",
 741 |     "For base 2, the JSD is bounded between 0 and 1. For base $e$, it is bounded between $0$ and $\\ln(2)$."
 742 |    ]
 743 |   },
 744 |   {
 745 |    "cell_type": "markdown",
 746 |    "metadata": {},
 747 |    "source": [
 748 |     "## Continuous Case"
 749 |    ]
 750 |   },
 751 |   {
 752 |    "cell_type": "markdown",
 753 |    "metadata": {},
 754 |    "source": [
 755 |     "### Jensen-Shannon Divergence from Samples (via Statsmodels KDE Objects)"
 756 |    ]
 757 |   },
 758 |   {
 759 |    "cell_type": "code",
 760 |    "execution_count": 22,
 761 |    "metadata": {
 762 |     "ExecuteTime": {
 763 |      "end_time": "2020-07-31T03:56:48.168485Z",
 764 |      "start_time": "2020-07-31T03:56:46.218072Z"
 765 |     }
 766 |    },
 767 |    "outputs": [
 768 |     {
 769 |      "name": "stdout",
 770 |      "output_type": "stream",
 771 |      "text": [
 772 |       "Jensen-Shannon Divergence between p and q = 0.052550634833070334\n",
 773 |       "Jensen-Shannon Divergence between q and p = 0.052550634833070334\n"
 774 |      ]
 775 |     }
 776 |    ],
 777 |    "source": [
 778 |     "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_samples(sample_p, sample_q, discrete=False)}')\n",
 779 |     "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_samples(sample_q, sample_p, discrete=False)}')"
 780 |    ]
 781 |   },
 782 |   {
 783 |    "cell_type": "markdown",
 784 |    "metadata": {},
 785 |    "source": [
 786 |     "### Jensen-Shannon Divergence from Statsmodels KDE Objects"
 787 |    ]
 788 |   },
 789 |   {
 790 |    "cell_type": "code",
 791 |    "execution_count": 23,
 792 |    "metadata": {
 793 |     "ExecuteTime": {
 794 |      "end_time": "2020-07-31T03:56:50.119943Z",
 795 |      "start_time": "2020-07-31T03:56:48.176238Z"
 796 |     }
 797 |    },
 798 |    "outputs": [
 799 |     {
 800 |      "name": "stdout",
 801 |      "output_type": "stream",
 802 |      "text": [
 803 |       "Jensen-Shannon Divergence between p and q = 0.052550634833070334\n",
 804 |       "Jensen-Shannon Divergence between q and p = 0.052550634833070334\n"
 805 |      ]
 806 |     }
 807 |    ],
 808 |    "source": [
 809 |     "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_kde(kde_p, kde_q)}')\n",
 810 |     "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_kde(kde_q, kde_p)}')"
 811 |    ]
 812 |   },
 813 |   {
 814 |    "cell_type": "markdown",
 815 |    "metadata": {},
 816 |    "source": [
 817 |     "### Jensen-Shannon Divergence from Normal Probability Density Functions"
 818 |    ]
 819 |   },
 820 |   {
 821 |    "cell_type": "code",
 822 |    "execution_count": 24,
 823 |    "metadata": {
 824 |     "ExecuteTime": {
 825 |      "end_time": "2020-07-31T03:56:50.286626Z",
 826 |      "start_time": "2020-07-31T03:56:50.123680Z"
 827 |     }
 828 |    },
 829 |    "outputs": [
 830 |     {
 831 |      "name": "stdout",
 832 |      "output_type": "stream",
 833 |      "text": [
 834 |       "Jensen-Shannon Divergence between p and q = 0.05290044224944191\n",
 835 |       "Jensen-Shannon Divergence between q and p = 0.05290044224944191\n"
 836 |      ]
 837 |     }
 838 |    ],
 839 |    "source": [
 840 |     "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_densities_with_support(pdf_p, pdf_q, combined_min, combined_max)}')\n",
 841 |     "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_densities_with_support(pdf_q, pdf_p, combined_min, combined_max)}')"
 842 |    ]
 843 |   },
 844 |   {
 845 |    "cell_type": "markdown",
 846 |    "metadata": {},
 847 |    "source": [
 848 |     "### Jensen-Shannon Divergence from Statsmodels KDE Objects in Bits"
 849 |    ]
 850 |   },
 851 |   {
 852 |    "cell_type": "code",
 853 |    "execution_count": 25,
 854 |    "metadata": {
 855 |     "ExecuteTime": {
 856 |      "end_time": "2020-07-31T03:56:52.790953Z",
 857 |      "start_time": "2020-07-31T03:56:50.288603Z"
 858 |     }
 859 |    },
 860 |    "outputs": [
 861 |     {
 862 |      "name": "stdout",
 863 |      "output_type": "stream",
 864 |      "text": [
 865 |       "Jensen-Shannon Divergence between p and q = 0.07581454026923815\n",
 866 |       "Jensen-Shannon Divergence between q and p = 0.07581454026923815\n"
 867 |      ]
 868 |     }
 869 |    ],
 870 |    "source": [
 871 |     "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_kde(kde_p, kde_q, base=2.0)}')\n",
 872 |     "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_kde(kde_q, kde_p, base=2.0)}')"
 873 |    ]
 874 |   },
 875 |   {
 876 |    "cell_type": "markdown",
 877 |    "metadata": {},
 878 |    "source": [
 879 |     "## Discrete Case"
 880 |    ]
 881 |   },
 882 |   {
 883 |    "cell_type": "markdown",
 884 |    "metadata": {},
 885 |    "source": [
 886 |     "### Calculation Function Specific to Discrete Distributions"
 887 |    ]
 888 |   },
 889 |   {
 890 |    "cell_type": "code",
 891 |    "execution_count": 26,
 892 |    "metadata": {
 893 |     "ExecuteTime": {
 894 |      "end_time": "2020-07-31T03:56:52.833513Z",
 895 |      "start_time": "2020-07-31T03:56:52.792870Z"
 896 |     }
 897 |    },
 898 |    "outputs": [
 899 |     {
 900 |      "name": "stdout",
 901 |      "output_type": "stream",
 902 |      "text": [
 903 |       "Jensen-Shannon Divergence between p and q = 0.0863046217355343\n",
 904 |       "Jensen-Shannon Divergence between q and p = 0.0863046217355343\n"
 905 |      ]
 906 |     }
 907 |    ],
 908 |    "source": [
 909 |     "print(f'Jensen-Shannon Divergence between p and q = {discrete_jensen_shannon_divergence(multinomial_sample_p, multinomial_sample_q)}')\n",
 910 |     "print(f'Jensen-Shannon Divergence between q and p = {discrete_jensen_shannon_divergence(multinomial_sample_q, multinomial_sample_p)}')"
 911 |    ]
 912 |   },
 913 |   {
 914 |    "cell_type": "markdown",
 915 |    "metadata": {},
 916 |    "source": [
 917 |     "### Generic calculation functionality covering samples from continuous as well as discrete distributions"
 918 |    ]
 919 |   },
 920 |   {
 921 |    "cell_type": "code",
 922 |    "execution_count": 27,
 923 |    "metadata": {
 924 |     "ExecuteTime": {
 925 |      "end_time": "2020-07-31T03:56:52.877694Z",
 926 |      "start_time": "2020-07-31T03:56:52.835345Z"
 927 |     }
 928 |    },
 929 |    "outputs": [
 930 |     {
 931 |      "name": "stdout",
 932 |      "output_type": "stream",
 933 |      "text": [
 934 |       "Jensen-Shannon Divergence between p and q = 0.0863046217355343\n",
 935 |       "Jensen-Shannon Divergence between q and p = 0.0863046217355343\n"
 936 |      ]
 937 |     }
 938 |    ],
 939 |    "source": [
 940 |     "print(f'Jensen-Shannon Divergence between p and q = {jensen_shannon_divergence_from_samples(multinomial_sample_p, multinomial_sample_q, discrete=True)}')\n",
 941 |     "print(f'Jensen-Shannon Divergence between q and p = {jensen_shannon_divergence_from_samples(multinomial_sample_q, multinomial_sample_p, discrete=True)}')"
 942 |    ]
 943 |   },
 944 |   {
 945 |    "cell_type": "markdown",
 946 |    "metadata": {},
 947 |    "source": [
 948 |     "# Mutual Information"
 949 |    ]
 950 |   },
 951 |   {
 952 |    "cell_type": "markdown",
 953 |    "metadata": {},
 954 |    "source": [
 955 |     "Mutual information is a measure of measure of mutual dependence of random variables that goes beyond linear dependence measured by correlation. It is defined as the KL-divergence between the joint density of two random variables $x$ and $y$ and the product of their marginal densities, i.e.  \n",
 956 |     "\n",
 957 |     "$I(X; Y) = D_KL(p_{x, y}|| p_x \\otimes p_y) = E_{p_{x, y}} \\left[ \\log_{\\text{base}} \\left( \\frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \\right) \\right]$.  \n",
 958 |     "\n"
 959 |    ]
 960 |   },
 961 |   {
 962 |    "cell_type": "markdown",
 963 |    "metadata": {},
 964 |    "source": [
 965 |     "## Continuous Case"
 966 |    ]
 967 |   },
 968 |   {
 969 |    "cell_type": "markdown",
 970 |    "metadata": {},
 971 |    "source": [
 972 |     "### Construct Artificial Data from a Bi-Variate Normal Distribution"
 973 |    ]
 974 |   },
 975 |   {
 976 |    "cell_type": "code",
 977 |    "execution_count": 28,
 978 |    "metadata": {
 979 |     "ExecuteTime": {
 980 |      "end_time": "2020-07-31T03:56:52.940599Z",
 981 |      "start_time": "2020-07-31T03:56:52.879443Z"
 982 |     }
 983 |    },
 984 |    "outputs": [],
 985 |    "source": [
 986 |     "# set parameters of the normal distributions x and y\n",
 987 |     "mu_x = 2\n",
 988 |     "sigma_x = 3\n",
 989 |     "mu_y = 1\n",
 990 |     "sigma_y = 2\n",
 991 |     "rho = 0.5\n",
 992 |     "\n",
 993 |     "# draw 1000 samples from each normal distribution\n",
 994 |     "n = 10000\n",
 995 |     "z = np.random.randn(n)\n",
 996 |     "sample_x = mu_x + sigma_x * z\n",
 997 |     "sample_y = mu_y + sigma_y * (rho * z + np.sqrt(1.0 - rho**2) * np.random.randn(n))\n",
 998 |     "\n",
 999 |     "# fit a non-parametric density estimate for both distributions\n",
1000 |     "kde_x = sm.nonparametric.KDEUnivariate(sample_x)\n",
1001 |     "kde_y = sm.nonparametric.KDEUnivariate(sample_y)\n",
1002 |     "kde_x.fit() # Estimate the densities\n",
1003 |     "kde_y.fit() # Estimate the densities\n",
1004 |     "kde_xy = sp.stats.gaussian_kde([sample_x, sample_y])\n",
1005 |     "\n",
1006 |     "# construct exact normal densities for x and y\n",
1007 |     "pdf_x = lambda x: sp.stats.norm.pdf(x, mu_x, sigma_x)\n",
1008 |     "pdf_y = lambda y: sp.stats.norm.pdf(y, mu_y, sigma_y)\n",
1009 |     "pdf_xy = sp.stats.multivariate_normal(mean=[mu_x, mu_y], cov=[[sigma_x**2, rho * sigma_x * sigma_y], [rho * sigma_x * sigma_y, sigma_y**2]]).pdf\n",
1010 |     "\n",
1011 |     "# # compute support for kernel density estimates\n",
1012 |     "x_min = min(kde_x.support)\n",
1013 |     "x_max = max(kde_x.support)\n",
1014 |     "y_min = min(kde_y.support)\n",
1015 |     "y_max = max(kde_y.support)"
1016 |    ]
1017 |   },
1018 |   {
1019 |    "cell_type": "markdown",
1020 |    "metadata": {},
1021 |    "source": [
1022 |     "### Mutual Information from Samples (via Statsmodels KDE Objects)"
1023 |    ]
1024 |   },
1025 |   {
1026 |    "cell_type": "code",
1027 |    "execution_count": 29,
1028 |    "metadata": {
1029 |     "ExecuteTime": {
1030 |      "end_time": "2020-07-31T03:57:26.465180Z",
1031 |      "start_time": "2020-07-31T03:56:52.942483Z"
1032 |     }
1033 |    },
1034 |    "outputs": [
1035 |     {
1036 |      "name": "stdout",
1037 |      "output_type": "stream",
1038 |      "text": [
1039 |       "Mutual Information of x and y = 0.14540631373336696\n"
1040 |      ]
1041 |     }
1042 |    ],
1043 |    "source": [
1044 |     "print(f'Mutual Information of x and y = {mutual_information_from_samples(sample_x, sample_y)}')"
1045 |    ]
1046 |   },
1047 |   {
1048 |    "cell_type": "markdown",
1049 |    "metadata": {},
1050 |    "source": [
1051 |     "### Mutual Information from Statsmodels KDE Objects"
1052 |    ]
1053 |   },
1054 |   {
1055 |    "cell_type": "code",
1056 |    "execution_count": 30,
1057 |    "metadata": {
1058 |     "ExecuteTime": {
1059 |      "end_time": "2020-07-31T03:57:57.222125Z",
1060 |      "start_time": "2020-07-31T03:57:26.466817Z"
1061 |     }
1062 |    },
1063 |    "outputs": [
1064 |     {
1065 |      "name": "stdout",
1066 |      "output_type": "stream",
1067 |      "text": [
1068 |       "Mutual Information of x and y = 0.14540631373336696\n"
1069 |      ]
1070 |     }
1071 |    ],
1072 |    "source": [
1073 |     "print(f'Mutual Information of x and y = {mutual_information_from_kde(kde_x, kde_y, kde_xy)}')"
1074 |    ]
1075 |   },
1076 |   {
1077 |    "cell_type": "markdown",
1078 |    "metadata": {},
1079 |    "source": [
1080 |     "### Mutual Information from Normal Probability Density Functions"
1081 |    ]
1082 |   },
1083 |   {
1084 |    "cell_type": "code",
1085 |    "execution_count": 31,
1086 |    "metadata": {
1087 |     "ExecuteTime": {
1088 |      "end_time": "2020-07-31T03:57:59.131119Z",
1089 |      "start_time": "2020-07-31T03:57:57.223582Z"
1090 |     }
1091 |    },
1092 |    "outputs": [
1093 |     {
1094 |      "name": "stdout",
1095 |      "output_type": "stream",
1096 |      "text": [
1097 |       "Mutual Information of x and y = 0.14384103152628203\n"
1098 |      ]
1099 |     }
1100 |    ],
1101 |    "source": [
1102 |     "print(f'Mutual Information of x and y = {mutual_information_from_densities_with_support(pdf_x, pdf_y, pdf_xy, x_min=-20, x_max=20, y_min=-20, y_max=20)}')"
1103 |    ]
1104 |   },
1105 |   {
1106 |    "cell_type": "markdown",
1107 |    "metadata": {},
1108 |    "source": [
1109 |     "### Theoretical Mutual Information of Bi-Variate Normal Distributions"
1110 |    ]
1111 |   },
1112 |   {
1113 |    "cell_type": "code",
1114 |    "execution_count": 32,
1115 |    "metadata": {
1116 |     "ExecuteTime": {
1117 |      "end_time": "2020-07-31T03:57:59.163352Z",
1118 |      "start_time": "2020-07-31T03:57:59.132515Z"
1119 |     }
1120 |    },
1121 |    "outputs": [
1122 |     {
1123 |      "name": "stdout",
1124 |      "output_type": "stream",
1125 |      "text": [
1126 |       "Mutual Information of x and y = 0.14384103622589045\n"
1127 |      ]
1128 |     }
1129 |    ],
1130 |    "source": [
1131 |     "def mutual_information_for_bivariate_normal_distribution(rho: float, \n",
1132 |     "                                                         log_fun: tp.Callable = np.log) -> float:\n",
1133 |     "    return - 0.5 * np.log(1.0 - rho**2)\n",
1134 |     "\n",
1135 |     "print(f'Mutual Information of x and y = {mutual_information_for_bivariate_normal_distribution(rho)}')"
1136 |    ]
1137 |   },
1138 |   {
1139 |    "cell_type": "markdown",
1140 |    "metadata": {},
1141 |    "source": [
1142 |     "## Discrete Case"
1143 |    ]
1144 |   },
1145 |   {
1146 |    "cell_type": "markdown",
1147 |    "metadata": {},
1148 |    "source": [
1149 |     "Construct two discrete samples"
1150 |    ]
1151 |   },
1152 |   {
1153 |    "cell_type": "code",
1154 |    "execution_count": 33,
1155 |    "metadata": {
1156 |     "ExecuteTime": {
1157 |      "end_time": "2020-07-31T03:57:59.197908Z",
1158 |      "start_time": "2020-07-31T03:57:59.165032Z"
1159 |     }
1160 |    },
1161 |    "outputs": [],
1162 |    "source": [
1163 |     "discrete_sample_x = np.array([1, 1, 3, 1, 2, 3])\n",
1164 |     "discrete_sample_y = np.array([1, 1, 1, 3, 2, 1])"
1165 |    ]
1166 |   },
1167 |   {
1168 |    "cell_type": "code",
1169 |    "execution_count": 34,
1170 |    "metadata": {
1171 |     "ExecuteTime": {
1172 |      "end_time": "2020-07-31T03:57:59.549553Z",
1173 |      "start_time": "2020-07-31T03:57:59.199548Z"
1174 |     },
1175 |     "scrolled": false
1176 |    },
1177 |    "outputs": [
1178 |     {
1179 |      "name": "stdout",
1180 |      "output_type": "stream",
1181 |      "text": [
1182 |       "The mutual information of x and y is 0.5493061443340548\n"
1183 |      ]
1184 |     }
1185 |    ],
1186 |    "source": [
1187 |     "print(f'The mutual information of x and y is {discrete_mutual_information(discrete_sample_x, discrete_sample_y, base=np.e)}')"
1188 |    ]
1189 |   },
1190 |   {
1191 |    "cell_type": "markdown",
1192 |    "metadata": {},
1193 |    "source": [
1194 |     "Mutual Information is symmetric"
1195 |    ]
1196 |   },
1197 |   {
1198 |    "cell_type": "code",
1199 |    "execution_count": 35,
1200 |    "metadata": {
1201 |     "ExecuteTime": {
1202 |      "end_time": "2020-07-31T03:57:59.583343Z",
1203 |      "start_time": "2020-07-31T03:57:59.551049Z"
1204 |     }
1205 |    },
1206 |    "outputs": [
1207 |     {
1208 |      "name": "stdout",
1209 |      "output_type": "stream",
1210 |      "text": [
1211 |       "The mutual information of y and x is 0.5493061443340548\n"
1212 |      ]
1213 |     }
1214 |    ],
1215 |    "source": [
1216 |     "print(f'The mutual information of y and x is {discrete_mutual_information(discrete_sample_y, discrete_sample_x, base=np.e)}')"
1217 |    ]
1218 |   },
1219 |   {
1220 |    "cell_type": "markdown",
1221 |    "metadata": {},
1222 |    "source": [
1223 |     "# Joint Entropy"
1224 |    ]
1225 |   },
1226 |   {
1227 |    "cell_type": "markdown",
1228 |    "metadata": {},
1229 |    "source": [
1230 |     "The joint entropy of the random variables x and y with joint density $p_{x, y}$ is defined as  \n",
1231 |     "\n",
1232 |     "$H(X, Y) = - E_{p_{x, y}} \\left[ \\log_{\\text{base}} p_{x, y} (x, y) \\right]$.\n",
1233 |     "\n",
1234 |     "Joint entropy is symmetric, i.e.  \n",
1235 |     "\n",
1236 |     "$H(X, Y) = H(Y, X)$."
1237 |    ]
1238 |   },
1239 |   {
1240 |    "cell_type": "markdown",
1241 |    "metadata": {},
1242 |    "source": [
1243 |     "## Continuous Case"
1244 |    ]
1245 |   },
1246 |   {
1247 |    "cell_type": "code",
1248 |    "execution_count": 36,
1249 |    "metadata": {
1250 |     "ExecuteTime": {
1251 |      "end_time": "2020-07-31T03:58:03.608382Z",
1252 |      "start_time": "2020-07-31T03:57:59.584775Z"
1253 |     }
1254 |    },
1255 |    "outputs": [
1256 |     {
1257 |      "name": "stdout",
1258 |      "output_type": "stream",
1259 |      "text": [
1260 |       "Joint entropy of x and y = 4.475745990640665\n"
1261 |      ]
1262 |     }
1263 |    ],
1264 |    "source": [
1265 |     "joint_entropy_of_x_and_y = joint_entropy_from_samples(sample_x, sample_y)\n",
1266 |     "print(f'Joint entropy of x and y = {joint_entropy_of_x_and_y}')"
1267 |    ]
1268 |   },
1269 |   {
1270 |    "cell_type": "markdown",
1271 |    "metadata": {},
1272 |    "source": [
1273 |     "## Discrete Case"
1274 |    ]
1275 |   },
1276 |   {
1277 |    "cell_type": "code",
1278 |    "execution_count": 37,
1279 |    "metadata": {
1280 |     "ExecuteTime": {
1281 |      "end_time": "2020-07-31T03:58:03.640898Z",
1282 |      "start_time": "2020-07-31T03:58:03.610576Z"
1283 |     }
1284 |    },
1285 |    "outputs": [
1286 |     {
1287 |      "name": "stdout",
1288 |      "output_type": "stream",
1289 |      "text": [
1290 |       "The joint entropy of x and y is 1.3296613488547582\n"
1291 |      ]
1292 |     }
1293 |    ],
1294 |    "source": [
1295 |     "print(f'The joint entropy of x and y is {discrete_joint_entropy(discrete_sample_x, discrete_sample_y, base=np.e)}')"
1296 |    ]
1297 |   },
1298 |   {
1299 |    "cell_type": "markdown",
1300 |    "metadata": {},
1301 |    "source": [
1302 |     "# Conditional Entropy"
1303 |    ]
1304 |   },
1305 |   {
1306 |    "cell_type": "markdown",
1307 |    "metadata": {},
1308 |    "source": [
1309 |     "The conditional entropy of the random variable y given x with joint density $p_{x, y}$ and marginal density $p_x$ of $x$ is defined as  \n",
1310 |     "\n",
1311 |     "$H(Y|X) = - E_{p_{x, y}} \\left[ \\log \\frac{p_{x, y} (x, y)}{p_x(x)} \\right]$.  \n",
1312 |     "\n",
1313 |     "From this definition follows the change rule for conditional entropy\n",
1314 |     "\n",
1315 |     "\n",
1316 |     "$H(X, Y) = H(X) + H(Y|X)$.\n",
1317 |     "\n",
1318 |     "Switching the roles of $x$ and $y$ and using the symmetry of joint entropy, we obtain  \n",
1319 |     "\n",
1320 |     "$H(X, Y) = H(Y) + H(X|Y)$.\n",
1321 |     "\n",
1322 |     "Substracting second equation for joint entropyfrom the first and rearranging yields  \n",
1323 |     "\n",
1324 |     "$H(Y) - H(Y|X) = H(X) - H(X|Y)$."
1325 |    ]
1326 |   },
1327 |   {
1328 |    "cell_type": "markdown",
1329 |    "metadata": {},
1330 |    "source": [
1331 |     "## Continuous Case"
1332 |    ]
1333 |   },
1334 |   {
1335 |    "cell_type": "code",
1336 |    "execution_count": 38,
1337 |    "metadata": {
1338 |     "ExecuteTime": {
1339 |      "end_time": "2020-07-31T03:58:14.694029Z",
1340 |      "start_time": "2020-07-31T03:58:03.642453Z"
1341 |     }
1342 |    },
1343 |    "outputs": [
1344 |     {
1345 |      "name": "stdout",
1346 |      "output_type": "stream",
1347 |      "text": [
1348 |       "Conditional entropy of y given x = 1.9912929526616132\n"
1349 |      ]
1350 |     }
1351 |    ],
1352 |    "source": [
1353 |     "conditional_entropy_of_y_given_x = conditional_entropy_from_samples(sample_x, sample_y)\n",
1354 |     "print(f'Conditional entropy of y given x = {conditional_entropy_of_y_given_x}')"
1355 |    ]
1356 |   },
1357 |   {
1358 |    "cell_type": "code",
1359 |    "execution_count": 39,
1360 |    "metadata": {
1361 |     "ExecuteTime": {
1362 |      "end_time": "2020-07-31T03:58:24.841970Z",
1363 |      "start_time": "2020-07-31T03:58:14.696527Z"
1364 |     }
1365 |    },
1366 |    "outputs": [
1367 |     {
1368 |      "name": "stdout",
1369 |      "output_type": "stream",
1370 |      "text": [
1371 |       "Conditional entropy of x given y = 2.3857520195720445\n"
1372 |      ]
1373 |     }
1374 |    ],
1375 |    "source": [
1376 |     "conditional_entropy_of_x_given_y = conditional_entropy_from_samples(sample_y, sample_x)\n",
1377 |     "print(f'Conditional entropy of x given y = {conditional_entropy_of_x_given_y}')"
1378 |    ]
1379 |   },
1380 |   {
1381 |    "cell_type": "markdown",
1382 |    "metadata": {},
1383 |    "source": [
1384 |     "Check whether the chain rule of conditional entropy is satisfied"
1385 |    ]
1386 |   },
1387 |   {
1388 |    "cell_type": "code",
1389 |    "execution_count": 40,
1390 |    "metadata": {
1391 |     "ExecuteTime": {
1392 |      "end_time": "2020-07-31T03:58:24.923287Z",
1393 |      "start_time": "2020-07-31T03:58:24.843722Z"
1394 |     }
1395 |    },
1396 |    "outputs": [
1397 |     {
1398 |      "data": {
1399 |       "text/plain": [
1400 |        "True"
1401 |       ]
1402 |      },
1403 |      "execution_count": 40,
1404 |      "metadata": {},
1405 |      "output_type": "execute_result"
1406 |     }
1407 |    ],
1408 |    "source": [
1409 |     "np.isclose(entropy_from_samples(sample_x) + conditional_entropy_of_y_given_x, joint_entropy_of_x_and_y, rtol=1e-2, atol=1e-3)"
1410 |    ]
1411 |   },
1412 |   {
1413 |    "cell_type": "code",
1414 |    "execution_count": 41,
1415 |    "metadata": {
1416 |     "ExecuteTime": {
1417 |      "end_time": "2020-07-31T03:58:24.995415Z",
1418 |      "start_time": "2020-07-31T03:58:24.925091Z"
1419 |     },
1420 |     "scrolled": true
1421 |    },
1422 |    "outputs": [
1423 |     {
1424 |      "data": {
1425 |       "text/plain": [
1426 |        "True"
1427 |       ]
1428 |      },
1429 |      "execution_count": 41,
1430 |      "metadata": {},
1431 |      "output_type": "execute_result"
1432 |     }
1433 |    ],
1434 |    "source": [
1435 |     "np.isclose(entropy_from_samples(sample_y) + conditional_entropy_of_x_given_y, joint_entropy_of_x_and_y, rtol=1e-2, atol=1e-3)"
1436 |    ]
1437 |   },
1438 |   {
1439 |    "cell_type": "code",
1440 |    "execution_count": 42,
1441 |    "metadata": {
1442 |     "ExecuteTime": {
1443 |      "end_time": "2020-07-31T03:58:32.421300Z",
1444 |      "start_time": "2020-07-31T03:58:24.996920Z"
1445 |     },
1446 |     "scrolled": false
1447 |    },
1448 |    "outputs": [
1449 |     {
1450 |      "name": "stdout",
1451 |      "output_type": "stream",
1452 |      "text": [
1453 |       "Conditional entropy of y given x (on gpu) = 1.9912435966457076\n"
1454 |      ]
1455 |     }
1456 |    ],
1457 |    "source": [
1458 |     "conditional_entropy_of_y_given_x_gpu = \\\n",
1459 |     "    continuous_conditional_entropy_from_samples_gpu(\n",
1460 |     "        sample_x, \n",
1461 |     "        sample_y, \n",
1462 |     "        maximum_number_of_elements_per_batch=-1)\n",
1463 |     "print(f'Conditional entropy of y given x (on gpu) = {conditional_entropy_of_y_given_x_gpu}')"
1464 |    ]
1465 |   },
1466 |   {
1467 |    "cell_type": "code",
1468 |    "execution_count": 43,
1469 |    "metadata": {
1470 |     "ExecuteTime": {
1471 |      "end_time": "2020-07-31T03:58:37.384352Z",
1472 |      "start_time": "2020-07-31T03:58:32.423384Z"
1473 |     }
1474 |    },
1475 |    "outputs": [
1476 |     {
1477 |      "name": "stdout",
1478 |      "output_type": "stream",
1479 |      "text": [
1480 |       "Conditional entropy of x given y (on gpu) = 2.385699213557811\n"
1481 |      ]
1482 |     }
1483 |    ],
1484 |    "source": [
1485 |     "conditional_entropy_of_x_given_y_gpu = continuous_conditional_entropy_from_samples_gpu(sample_y, sample_x)\n",
1486 |     "print(f'Conditional entropy of x given y (on gpu) = {conditional_entropy_of_x_given_y_gpu}')"
1487 |    ]
1488 |   },
1489 |   {
1490 |    "cell_type": "markdown",
1491 |    "metadata": {},
1492 |    "source": [
1493 |     "## Discrete Case"
1494 |    ]
1495 |   },
1496 |   {
1497 |    "cell_type": "code",
1498 |    "execution_count": 44,
1499 |    "metadata": {
1500 |     "ExecuteTime": {
1501 |      "end_time": "2020-07-31T03:58:37.677112Z",
1502 |      "start_time": "2020-07-31T03:58:37.386846Z"
1503 |     }
1504 |    },
1505 |    "outputs": [
1506 |     {
1507 |      "name": "stdout",
1508 |      "output_type": "stream",
1509 |      "text": [
1510 |       "The conditional entropy of y given x is 0.31825708414740644\n"
1511 |      ]
1512 |     }
1513 |    ],
1514 |    "source": [
1515 |     "print(f'The conditional entropy of y given x is {discrete_conditional_entropy_of_y_given_x(discrete_sample_x, discrete_sample_y, base=np.e)}')"
1516 |    ]
1517 |   },
1518 |   {
1519 |    "cell_type": "markdown",
1520 |    "metadata": {},
1521 |    "source": [
1522 |     "We can verify the chain rule for conditional entropy:"
1523 |    ]
1524 |   },
1525 |   {
1526 |    "cell_type": "code",
1527 |    "execution_count": 45,
1528 |    "metadata": {
1529 |     "ExecuteTime": {
1530 |      "end_time": "2020-07-31T03:58:37.712640Z",
1531 |      "start_time": "2020-07-31T03:58:37.679034Z"
1532 |     }
1533 |    },
1534 |    "outputs": [
1535 |     {
1536 |      "data": {
1537 |       "text/plain": [
1538 |        "True"
1539 |       ]
1540 |      },
1541 |      "execution_count": 45,
1542 |      "metadata": {},
1543 |      "output_type": "execute_result"
1544 |     }
1545 |    ],
1546 |    "source": [
1547 |     "np.isclose(discrete_entropy(discrete_sample_y) + discrete_conditional_entropy_of_y_given_x(discrete_sample_y, discrete_sample_x), discrete_joint_entropy(discrete_sample_x, discrete_sample_y))"
1548 |    ]
1549 |   },
1550 |   {
1551 |    "cell_type": "code",
1552 |    "execution_count": 46,
1553 |    "metadata": {
1554 |     "ExecuteTime": {
1555 |      "end_time": "2020-07-31T03:58:37.750939Z",
1556 |      "start_time": "2020-07-31T03:58:37.714443Z"
1557 |     }
1558 |    },
1559 |    "outputs": [
1560 |     {
1561 |      "data": {
1562 |       "text/plain": [
1563 |        "True"
1564 |       ]
1565 |      },
1566 |      "execution_count": 46,
1567 |      "metadata": {},
1568 |      "output_type": "execute_result"
1569 |     }
1570 |    ],
1571 |    "source": [
1572 |     "np.isclose(discrete_entropy(discrete_sample_x) + discrete_conditional_entropy_of_y_given_x(discrete_sample_x, discrete_sample_y), discrete_joint_entropy(discrete_sample_y, discrete_sample_x))"
1573 |    ]
1574 |   },
1575 |   {
1576 |    "cell_type": "code",
1577 |    "execution_count": null,
1578 |    "metadata": {},
1579 |    "outputs": [],
1580 |    "source": []
1581 |   }
1582 |  ],
1583 |  "metadata": {
1584 |   "kernelspec": {
1585 |    "display_name": "Python 3",
1586 |    "language": "python",
1587 |    "name": "python3"
1588 |   },
1589 |   "language_info": {
1590 |    "codemirror_mode": {
1591 |     "name": "ipython",
1592 |     "version": 3
1593 |    },
1594 |    "file_extension": ".py",
1595 |    "mimetype": "text/x-python",
1596 |    "name": "python",
1597 |    "nbconvert_exporter": "python",
1598 |    "pygments_lexer": "ipython3",
1599 |    "version": "3.8.3"
1600 |   },
1601 |   "toc": {
1602 |    "base_numbering": 1,
1603 |    "nav_menu": {},
1604 |    "number_sections": true,
1605 |    "sideBar": true,
1606 |    "skip_h1_title": false,
1607 |    "title_cell": "Table of Contents",
1608 |    "title_sidebar": "Contents",
1609 |    "toc_cell": false,
1610 |    "toc_position": {
1611 |     "height": "calc(100% - 180px)",
1612 |     "left": "10px",
1613 |     "top": "150px",
1614 |     "width": "295.4755554199219px"
1615 |    },
1616 |    "toc_section_display": true,
1617 |    "toc_window_display": true
1618 |   }
1619 |  },
1620 |  "nbformat": 4,
1621 |  "nbformat_minor": 4
1622 | }
1623 | 


--------------------------------------------------------------------------------
/divergence/continuous.py:
--------------------------------------------------------------------------------
   1 | from cocos.numerics.data_types import NumericArray
   2 | from cocos.scientific.kde import (
   3 |     gaussian_kde as cocos_gaussian_kde,
   4 |     evaluate_gaussian_kde_in_batches
   5 | )
   6 | 
   7 | from cubature import cubature
   8 | import numpy as np
   9 | import scipy as sp
  10 | import statsmodels.api as sm
  11 | import typing as tp
  12 | 
  13 | from divergence.base import _select_vectorized_log_fun_for_base
  14 | 
  15 | 
  16 | def _get_min_and_max_support_for_scotts_bw_rule(x: np.ndarray,
  17 |                                                 cut: float = 3) \
  18 |         -> tp.Tuple[float, float]:
  19 |     bw = sm.nonparametric.bandwidths.bw_scott(x)
  20 |     a = np.min(x) - cut * bw
  21 |     b = np.max(x) + cut * bw
  22 | 
  23 |     return a, b
  24 | 
  25 | 
  26 | def _get_min_and_max_support_for_silverman_bw_rule(x: np.ndarray,
  27 |                                                    cut: float = 3) \
  28 |         -> tp.Tuple[float, float]:
  29 |     bw = sm.nonparametric.bandwidths.bw_silverman(x)
  30 |     a = np.min(x) - cut * bw
  31 |     b = np.max(x) + cut * bw
  32 | 
  33 |     return a, b
  34 | 
  35 | 
  36 | def intersection(a0: float,
  37 |                  b0: float,
  38 |                  a1: float,
  39 |                  b1: float) \
  40 |         -> tp.Optional[tp.Tuple[float, float]]:
  41 |     """
  42 |     Calculate the intersection of two intervals [a0, b0] and [a1, b1]. If the intervals do not
  43 |     overlap the function returns None. The parameters must satisfy a0 <= b0 and a1 <= b1.
  44 | 
  45 |     Parameters
  46 |     ----------
  47 |     a0: beginning of the first interval
  48 |     b0: end of the first interval
  49 |     a1: beginning of the second interval
  50 |     b1: end of the second interval
  51 | 
  52 |     Returns
  53 |     -------
  54 | 
  55 |     """
  56 |     assert a0 <= b0
  57 |     assert a1 <= b1
  58 | 
  59 |     if a0 >= b1:
  60 |         return None
  61 | 
  62 |     if b0 < a1:
  63 |         return None
  64 | 
  65 |     return max(a0, a1), min(b0, b1)
  66 | 
  67 | 
  68 | ################################################################################
  69 | # Entropy
  70 | ################################################################################
  71 | def entropy_from_density_with_support(pdf: tp.Callable,
  72 |                                       a: float,
  73 |                                       b: float,
  74 |                                       base: float = np.e,
  75 |                                       eps_abs: float = 1.49e-08,
  76 |                                       eps_rel: float = 1.49e-08) \
  77 |         -> float:
  78 |     """
  79 |     Compute the entropy
  80 | 
  81 |                 H(p) = - E_p[log(p)]
  82 | 
  83 |     of the density given in pdf via numerical integration from a to b.
  84 |     The argument base can be used to specify the units in which the entropy is measured.
  85 |     The default choice is the natural logarithm.
  86 | 
  87 |     Parameters
  88 |     ----------
  89 |     pdf: a function of a scalar parameter which computes the probability density at that point
  90 |     a: lower bound of the integration region
  91 |     b: upper bound of the integration region
  92 |     base: the base of the logarithm used to control the units of measurement for the result
  93 |     eps_abs: absolute error tolerance for numerical integration
  94 |     eps_rel: relative error tolerance for numerical integration
  95 | 
  96 |     Returns
  97 |     -------
  98 |     The entropy of the density given by pdf
  99 |     """
 100 |     log_fun = _select_vectorized_log_fun_for_base(base)
 101 | 
 102 |     def entropy_integrand_vectorized_fast(x: np.ndarray):
 103 |         p = pdf(x)
 104 |         return - np.where(p > 0.0, p * log_fun(p), 0.0)
 105 | 
 106 |     return cubature(func=entropy_integrand_vectorized_fast,
 107 |                     ndim=1,
 108 |                     fdim=1,
 109 |                     xmin=np.array([a]),
 110 |                     xmax=np.array([b]),
 111 |                     vectorized=False,
 112 |                     adaptive='p',
 113 |                     abserr=eps_abs,
 114 |                     relerr=eps_rel)[0].item()
 115 | 
 116 | 
 117 | def entropy_from_kde(kde: sm.nonparametric.KDEUnivariate,
 118 |                      base: float = np.e,
 119 |                      eps_abs: float = 1.49e-08,
 120 |                      eps_rel: float = 1.49e-08) -> float:
 121 |     """
 122 |     Compute the entropy
 123 | 
 124 |                 H(p) = - E_p[log(p)]
 125 | 
 126 |     of the density given by the statsmodels kde object via numerical integration.
 127 |     The argument base can be used to specify the units in which the entropy is measured.
 128 |     The default choice is the natural logarithm.
 129 | 
 130 |     Parameters
 131 |     ----------
 132 |     kde: statsmodels kde object representing an approximation of the density
 133 |     base: the base of the logarithm used to control the units of measurement for the result
 134 |     eps_abs: absolute error tolerance for numerical integration
 135 |     eps_rel: relative error tolerance for numerical integration
 136 | 
 137 |     Returns
 138 |     -------
 139 |     The entropy of the density approximated by the kde
 140 |     """
 141 |     a = min(kde.support)
 142 |     b = max(kde.support)
 143 |     return entropy_from_density_with_support(pdf=kde.evaluate,
 144 |                                              a=a,
 145 |                                              b=b,
 146 |                                              base=base,
 147 |                                              eps_abs=eps_abs,
 148 |                                              eps_rel=eps_rel)
 149 | 
 150 | 
 151 | def continuous_entropy_from_sample(sample: np.ndarray,
 152 |                                    base: float = np.e,
 153 |                                    eps_abs: float = 1.49e-08,
 154 |                                    eps_rel: float = 1.49e-08) -> float:
 155 |     """
 156 |     Compute the entropy
 157 | 
 158 |                 H(p) = - E_p[log(p)]
 159 | 
 160 |     of a sample via approximation by a kernel density estimate and numerical integration.
 161 |     The argument base can be used to specify the units in which the entropy is measured.
 162 |     The default choice is the natural logarithm.
 163 | 
 164 |     Parameters
 165 |     ----------
 166 |     sample: a sample of draws from the density represented as a 1-dimensional NumPy array
 167 |     base: the base of the logarithm used to control the units of measurement for the result
 168 |     eps_abs: absolute error tolerance for numerical integration
 169 |     eps_rel: relative error tolerance for numerical integration
 170 | 
 171 |     Returns
 172 |     -------
 173 |     The entropy of the density approximated by the sample
 174 |     """
 175 |     kde = sm.nonparametric.KDEUnivariate(sample)
 176 |     kde.fit()
 177 |     return entropy_from_kde(kde=kde,
 178 |                             base=base,
 179 |                             eps_abs=eps_abs,
 180 |                             eps_rel=eps_rel)
 181 | 
 182 | 
 183 | ################################################################################
 184 | # Cross Entropy
 185 | ################################################################################
 186 | def _cross_entropy_integrand(p: tp.Callable,
 187 |                              q: tp.Callable,
 188 |                              x: float,
 189 |                              log_fun: tp.Callable) -> float:
 190 |     """
 191 |     Compute the integrand p(x) * log(q(x)) at a given point x for the calculation of cross entropy.
 192 | 
 193 |     Parameters
 194 |     ----------
 195 |     p: probability density function of the distribution p
 196 |     q: probability density function of the distribution q
 197 |     x: the point at which to evaluate the integrand
 198 |     base: the base of the logarithm used to control the units of measurement for the result
 199 | 
 200 |     Returns
 201 |     -------
 202 |     Integrand for the cross entropy calculation
 203 |     """
 204 |     # return p(x) * log_fun(q(x) + 1e-12)
 205 |     qx = q(x)
 206 |     px = p(x)
 207 |     if qx == 0.0:
 208 |         if px == 0.0:
 209 |             return 0.0
 210 |         else:
 211 |             raise ValueError(f'q(x) is zero at x={x} but p(x) is not')
 212 |     elif px == 0.0:
 213 |         return 0.0
 214 |     else:
 215 |         return px * log_fun(qx)
 216 | 
 217 | 
 218 | def _vectorized_cross_entropy_integrand(p: tp.Callable,
 219 |                                         q: tp.Callable,
 220 |                                         x: np.ndarray,
 221 |                                         log_fun: tp.Callable) -> np.ndarray:
 222 |     """
 223 |     Compute the integrand p(x) * log(q(x)) vectorized at given points x for the calculation of cross
 224 |     entropy.
 225 | 
 226 |     Parameters
 227 |     ----------
 228 |     p: probability density function of the distribution p
 229 |     q: probability density function of the distribution q
 230 |     x: the point at which to evaluate the integrand
 231 |     base: the base of the logarithm used to control the units of measurement for the result
 232 | 
 233 |     Returns
 234 |     -------
 235 |     Integrand for the cross entropy calculation
 236 |     """
 237 |     # return p(x) * log_fun(q(x) + 1e-12)
 238 |     qx = q(x)
 239 |     px = p(x)
 240 | 
 241 |     q_positive_index = qx > 0.0
 242 |     p_positive_index = px > 0.0
 243 | 
 244 |     q_zero_but_p_positive_index = ~q_positive_index & p_positive_index
 245 |     if np.any(q_zero_but_p_positive_index):
 246 |         raise ValueError(f'q(x) is zero at x={x[q_zero_but_p_positive_index]} but p(x) is not')
 247 | 
 248 |     return - np.where(p_positive_index, px * log_fun(qx), 0.0)
 249 | 
 250 | 
 251 | def cross_entropy_from_densities_with_support(p: tp.Callable,
 252 |                                               q: tp.Callable,
 253 |                                               a: float,
 254 |                                               b: float,
 255 |                                               base: float = np.e,
 256 |                                               eps_abs: float = 1.49e-08,
 257 |                                               eps_rel: float = 1.49e-08) -> float:
 258 |     """
 259 |     Compute the cross entropy of the distribution q relative to the distribution p
 260 | 
 261 |                 H_q(p) = - E_p [log(q)]
 262 | 
 263 |     via numerical integration from a to b.
 264 |     The argument base can be used to specify the units in which the entropy is measured.
 265 |     The default choice is the natural logarithm.
 266 | 
 267 |     Parameters
 268 |     ----------
 269 |     p: probability density function of the distribution p
 270 |     q: probability density function of the distribution q
 271 |     a: lower bound of the integration region
 272 |     b: upper bound of the integration region
 273 |     base: the base of the logarithm used to control the units of measurement for the result
 274 |     eps_abs: absolute error tolerance for numerical integration
 275 |     eps_rel: relative error tolerance for numerical integration
 276 | 
 277 |     Returns
 278 |     -------
 279 |     The cross entropy of the distribution q relative to the distribution p.
 280 |     """
 281 |     log_fun = _select_vectorized_log_fun_for_base(base)
 282 | 
 283 |     return - cubature(func=lambda x: _cross_entropy_integrand(p=p, q=q, x=x, log_fun=log_fun),
 284 |                       ndim=1,
 285 |                       fdim=1,
 286 |                       xmin=np.array([a]),
 287 |                       xmax=np.array([b]),
 288 |                       vectorized=False,
 289 |                       adaptive='p',
 290 |                       abserr=eps_abs,
 291 |                       relerr=eps_rel)[0].item()
 292 | 
 293 | 
 294 | def _does_support_overlap(p: sm.nonparametric.KDEUnivariate,
 295 |                           q: sm.nonparametric.KDEUnivariate) -> bool:
 296 |     """
 297 |     Determine whether the support of distributions of kernel density estimates p and q overlap.
 298 | 
 299 |     Parameters
 300 |     ----------
 301 |     p: statsmodels kde object representing an approximation of the distribution p
 302 |     q: statsmodels kde object representing an approximation of the distribution q
 303 | 
 304 |     Returns
 305 |     -------
 306 |     whether the support of distributions of kernel density estimates p and q overlap
 307 |     """
 308 |     return intersection(min(p.support), max(p.support), min(q.support), max(q.support)) is not None
 309 | 
 310 | 
 311 | def cross_entropy_from_kde(p: sm.nonparametric.KDEUnivariate,
 312 |                            q: sm.nonparametric.KDEUnivariate,
 313 |                            base: float = np.e,
 314 |                            eps_abs: float = 1.49e-08,
 315 |                            eps_rel: float = 1.49e-08) -> float:
 316 |     """
 317 |     Compute the cross entropy of the distribution q relative to the distribution p
 318 | 
 319 |                 H_q(p) = - E_p [log(q)]
 320 | 
 321 |     given by the statsmodels kde objects via numerical integration.
 322 |     The argument base can be used to specify the units in which the entropy is measured.
 323 |     The default choice is the natural logarithm.
 324 | 
 325 |     Parameters
 326 |     ----------
 327 |     p: statsmodels kde object approximating the probability density function of the distribution p
 328 |     q: statsmodels kde object approximating the probability density function of the distribution q
 329 |     base: the base of the logarithm used to control the units of measurement for the result
 330 |     eps_abs: absolute error tolerance for numerical integration
 331 |     eps_rel: relative error tolerance for numerical integration
 332 | 
 333 |     Returns
 334 |     -------
 335 |     The cross entropy of the distribution q relative to the distribution p.
 336 |     """
 337 |     if not _does_support_overlap(p, q):
 338 |         raise ValueError('The support of p and q does not overlap.')
 339 | 
 340 |     a = min(min(p.support), min(q.support))
 341 |     b = max(max(p.support), max(q.support))
 342 | 
 343 |     return cross_entropy_from_densities_with_support(p=p.evaluate,
 344 |                                                      q=q.evaluate,
 345 |                                                      a=a,
 346 |                                                      b=b,
 347 |                                                      base=base,
 348 |                                                      eps_abs=eps_abs,
 349 |                                                      eps_rel=eps_rel)
 350 | 
 351 | 
 352 | def continuous_cross_entropy_from_sample(sample_p: np.ndarray,
 353 |                                          sample_q: np.ndarray,
 354 |                                          base: float = np.e,
 355 |                                          eps_abs: float = 1.49e-08,
 356 |                                          eps_rel: float = 1.49e-08) -> float:
 357 |     """
 358 |     Compute the cross entropy of the distribution q relative to the distribution p
 359 | 
 360 |                 H_q(p) = - E_p [log(q)]
 361 | 
 362 |     from samples of the two distributions via approximation by a kernel density estimate and
 363 |     numerical integration.
 364 |     The argument base can be used to specify the units in which the entropy is measured.
 365 |     The default choice is the natural logarithm.
 366 | 
 367 |     Parameters
 368 |     ----------
 369 |     sample_p: sample from the distribution p
 370 |     sample_q: sample from the distribution q
 371 |     base: the base of the logarithm used to control the units of measurement for the result
 372 |     eps_abs: absolute error tolerance for numerical integration
 373 |     eps_rel: relative error tolerance for numerical integration
 374 | 
 375 |     Returns
 376 |     -------
 377 |     The cross entropy of the distribution q relative to the distribution p.
 378 | 
 379 |     """
 380 |     kde_p = sm.nonparametric.KDEUnivariate(sample_p)
 381 |     kde_p.fit()
 382 |     kde_q = sm.nonparametric.KDEUnivariate(sample_q)
 383 |     kde_q.fit()
 384 | 
 385 |     return cross_entropy_from_kde(kde_p, kde_q, base=base, eps_abs=eps_abs, eps_rel=eps_rel)
 386 | 
 387 | 
 388 | ################################################################################
 389 | # Relative Entropy (KL Divergence)
 390 | ################################################################################
 391 | def _relative_entropy_integrand(p: tp.Callable,
 392 |                                 q: tp.Callable,
 393 |                                 x: float,
 394 |                                 log_fun: tp.Callable = np.log) -> float:
 395 |     """
 396 |     Compute the integrand p(x) * log(p(x) / q(x)) at a given point x for the calculation of relative
 397 |     entropy.
 398 | 
 399 |     Parameters
 400 |     ----------
 401 |     p: probability density function of the distribution p
 402 |     q: probability density function of the distribution q
 403 |     x: the point at which to evaluate the integrand
 404 |     base: the base of the logarithm used to control the units of measurement for the result
 405 | 
 406 |     Returns
 407 |     -------
 408 |     Integrand for the relative entropy calculation
 409 |     """
 410 |     qx = q(x)
 411 |     px = p(x)
 412 |     if qx == 0.0:
 413 |         if px == 0.0:
 414 |             return 0.0
 415 |         else:
 416 |             raise ValueError(f'q(x) is zero at x={x} but p(x) is not')
 417 |     elif px == 0.0:
 418 |         return 0.0
 419 |     else:
 420 |         return px * log_fun(px / qx)
 421 | 
 422 | 
 423 | def _vectorized_relative_entropy_integrand(p: tp.Callable,
 424 |                                            q: tp.Callable,
 425 |                                            x: np.ndarray,
 426 |                                            log_fun: tp.Callable = np.log) -> np.ndarray:
 427 |     """
 428 |     Compute the integrand p(x) * log(p(x) / q(x)) vectorized at given points x for the calculation
 429 |     of relative entropy.
 430 | 
 431 |     Parameters
 432 |     ----------
 433 |     p: probability density function of the distribution p
 434 |     q: probability density function of the distribution q
 435 |     x: the point at which to evaluate the integrand
 436 |     base: the base of the logarithm used to control the units of measurement for the result
 437 | 
 438 |     Returns
 439 |     -------
 440 |     Integrand for the cross entropy calculation
 441 |     """
 442 |     # return p(x) * log_fun(q(x) + 1e-12)
 443 |     qx = q(x)
 444 |     px = p(x)
 445 | 
 446 |     q_positive_index = qx > 0.0
 447 |     p_positive_index = px > 0.0
 448 | 
 449 |     q_zero_but_p_positive_index = ~q_positive_index & p_positive_index
 450 |     if np.any(q_zero_but_p_positive_index):
 451 |         raise ValueError(f'q(x) is zero at x={x[q_zero_but_p_positive_index]} but p(x) is not')
 452 | 
 453 |     return np.where(p_positive_index, px * log_fun(px / qx), 0.0)
 454 | 
 455 | 
 456 | def relative_entropy_from_densities_with_support(p: tp.Callable,
 457 |                                                  q: tp.Callable,
 458 |                                                  a: float,
 459 |                                                  b: float,
 460 |                                                  base: float = np.e,
 461 |                                                  eps_abs: float = 1.49e-08,
 462 |                                                  eps_rel: float = 1.49e-08
 463 |                                                  ) -> float:
 464 |     """
 465 |     Compute the relative entropy of the distribution q relative to the distribution p
 466 | 
 467 |                 D_KL(p||q) = E_p [log(p/q)]
 468 | 
 469 |     via numerical integration from a to b.
 470 |     The argument base can be used to specify the units in which the entropy is measured.
 471 |     The default choice is the natural logarithm.
 472 | 
 473 |     Parameters
 474 |     ----------
 475 |     p: probability density function of the distribution p
 476 |     q: probability density function of the distribution q
 477 |     a: lower bound of the integration region
 478 |     b: upper bound of the integration region
 479 |     base: the base of the logarithm used to control the units of measurement for the result
 480 |     eps_abs: absolute error tolerance for numerical integration
 481 |     eps_rel: relative error tolerance for numerical integration
 482 | 
 483 |     Returns
 484 |     -------
 485 |     The relative entropy of the distribution q relative to the distribution p.
 486 |     """
 487 |     log_fun = _select_vectorized_log_fun_for_base(base)
 488 | 
 489 |     def integrand(x: float):
 490 |         return _relative_entropy_integrand(p=p, q=q, x=x, log_fun=log_fun)
 491 | 
 492 |     return cubature(func=integrand,
 493 |                     ndim=1,
 494 |                     fdim=1,
 495 |                     xmin=np.array([a]),
 496 |                     xmax=np.array([b]),
 497 |                     vectorized=False,
 498 |                     adaptive='p',
 499 |                     abserr=eps_abs,
 500 |                     relerr=eps_rel)[0].item()
 501 | 
 502 | 
 503 | def relative_entropy_from_kde(p: sm.nonparametric.KDEUnivariate,
 504 |                               q: sm.nonparametric.KDEUnivariate,
 505 |                               base: float = np.e,
 506 |                               eps_abs: float = 1.49e-08,
 507 |                               eps_rel: float = 1.49e-08) -> float:
 508 |     """
 509 |     Compute the relative entropy of the distribution q relative to the distribution p
 510 | 
 511 |                 D_KL(p||q) E_p [log(p/q)]
 512 | 
 513 |     given by the statsmodels kde objects via numerical integration.
 514 |     The argument base can be used to specify the units in which the entropy is measured.
 515 |     The default choice is the natural logarithm.
 516 | 
 517 |     Parameters
 518 |     ----------
 519 |     p: statsmodels kde object approximating the probability density function of the distribution p
 520 |     q: statsmodels kde object approximating the probability density function of the distribution q
 521 |     base: the base of the logarithm used to control the units of measurement for the result
 522 |     eps_abs: absolute error tolerance for numerical integration
 523 |     eps_rel: relative error tolerance for numerical integration
 524 | 
 525 |     Returns
 526 |     -------
 527 |     The relative entropy of the distribution q relative to the distribution p.
 528 |     """
 529 |     if not _does_support_overlap(p, q):
 530 |         raise ValueError('The support of p and q does not overlap.')
 531 | 
 532 |     a = min(min(p.support), min(q.support))
 533 |     b = max(max(p.support), max(q.support))
 534 |     return relative_entropy_from_densities_with_support(p=p.evaluate,
 535 |                                                         q=q.evaluate,
 536 |                                                         a=a,
 537 |                                                         b=b,
 538 |                                                         base=base,
 539 |                                                         eps_abs=eps_abs,
 540 |                                                         eps_rel=eps_rel)
 541 | 
 542 | 
 543 | def continuous_relative_entropy_from_sample(sample_p: np.ndarray,
 544 |                                             sample_q: np.ndarray,
 545 |                                             base: float = np.e,
 546 |                                             eps_abs: float = 1.49e-08,
 547 |                                             eps_rel: float = 1.49e-08) -> float:
 548 |     """
 549 |     Compute the relative entropy of the distribution q relative to the distribution p
 550 | 
 551 |                 D_KL(p||q) = E_p [log(p/q)]
 552 | 
 553 |     from samples of the two distributions via approximation by a kernel density estimate and
 554 |     numerical integration.
 555 |     The argument base can be used to specify the units in which the entropy is measured.
 556 |     The default choice is the natural logarithm.
 557 | 
 558 |     Parameters
 559 |     ----------
 560 |     sample_p: sample from the distribution p
 561 |     sample_q: sample from the distribution q
 562 |     base: the base of the logarithm used to control the units of measurement for the result
 563 |     eps_abs: absolute error tolerance for numerical integration
 564 |     eps_rel: relative error tolerance for numerical integration
 565 | 
 566 |     Returns
 567 |     -------
 568 |     The relative entropy of the distribution q relative to the distribution p.
 569 |     """
 570 |     kde_p = sm.nonparametric.KDEUnivariate(sample_p)
 571 |     kde_p.fit()
 572 |     kde_q = sm.nonparametric.KDEUnivariate(sample_q)
 573 |     kde_q.fit()
 574 | 
 575 |     return relative_entropy_from_kde(p=kde_p,
 576 |                                      q=kde_q,
 577 |                                      base=base,
 578 |                                      eps_abs=eps_abs,
 579 |                                      eps_rel=eps_rel)
 580 | 
 581 | 
 582 | ################################################################################
 583 | # Jensen-Shannon Divergence
 584 | ###############################################################################
 585 | def _relative_entropy_from_densities_with_support_for_shannon_divergence(
 586 |         p: tp.Callable,
 587 |         q: tp.Callable,
 588 |         a: float,
 589 |         b: float,
 590 |         log_fun: tp.Callable = np.log,
 591 |         eps_abs: float = 1.49e-08,
 592 |         eps_rel: float = 1.49e-08) -> float:
 593 |     """
 594 |     Compute the relative entropy of the distribution q relative to the distribution p
 595 | 
 596 |                 D_KL(p||q) = E_p [log(p/q)]
 597 | 
 598 |     via numerical integration from a to b.
 599 |     The argument base can be used to specify the units in which the entropy is measured.
 600 |     The default choice is the natural logarithm.
 601 | 
 602 |     Parameters
 603 |     ----------
 604 |     p: probability density function of the distribution p
 605 |     q: probability density function of the distribution q
 606 |     a: lower bound of the integration region
 607 |     b: upper bound of the integration region
 608 |     base: the base of the logarithm used to control the units of measurement for the result
 609 |     eps_abs: absolute error tolerance for numerical integration
 610 |     eps_rel: relative error tolerance for numerical integration
 611 | 
 612 |     Returns
 613 |     -------
 614 |     The relative entropy of the distribution q relative to the distribution p.
 615 | 
 616 |     """
 617 |     def integrand(x):
 618 |         return p(x) * log_fun(p(x) / q(x)) if p(x) > 0.0 else 0.0
 619 | 
 620 |     return cubature(func=integrand,
 621 |                     ndim=1,
 622 |                     fdim=1,
 623 |                     xmin=np.array([a]),
 624 |                     xmax=np.array([b]),
 625 |                     vectorized=False,
 626 |                     adaptive='p',
 627 |                     abserr=eps_abs,
 628 |                     relerr=eps_rel)[0].item()
 629 | 
 630 | 
 631 | def jensen_shannon_divergence_from_densities_with_support(p: tp.Callable,
 632 |                                                           q: tp.Callable,
 633 |                                                           a: float,
 634 |                                                           b: float,
 635 |                                                           base: float = np.e,
 636 |                                                           eps_abs: float = 1.49e-08,
 637 |                                                           eps_rel: float = 1.49e-08) \
 638 |         -> float:
 639 |     """
 640 |     Compute the Jensen-Shannon divergence between distributions p and q
 641 | 
 642 |                 JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q)
 643 | 
 644 |     via numerical integration from a to b.
 645 |     The argument base can be used to specify the units in which the entropy is measured.
 646 |     The default choice is the natural logarithm.
 647 | 
 648 |     Parameters
 649 |     ----------
 650 |     p: probability density function of the distribution p
 651 |     q: probability density function of the distribution q
 652 |     a: lower bound of the integration region
 653 |     b: upper bound of the integration region
 654 |     base: the base of the logarithm used to control the units of measurement for the result
 655 |     eps_abs: absolute error tolerance for numerical integration
 656 |     eps_rel: relative error tolerance for numerical integration
 657 | 
 658 |     Returns
 659 |     -------
 660 |     The Jensen-Shannon divergence between distributions p and q.
 661 | 
 662 |     """
 663 |     log_fun = _select_vectorized_log_fun_for_base(base)
 664 | 
 665 |     m = lambda x: 0.5 * (p(x) + q(x))
 666 |     D_PM = _relative_entropy_from_densities_with_support_for_shannon_divergence(
 667 |                 p=p,
 668 |                 q=m,
 669 |                 a=a,
 670 |                 b=b,
 671 |                 log_fun=log_fun,
 672 |                 eps_abs=eps_abs,
 673 |                 eps_rel=eps_rel)
 674 | 
 675 |     D_QM = _relative_entropy_from_densities_with_support_for_shannon_divergence(
 676 |                 p=q,
 677 |                 q=m,
 678 |                 a=a,
 679 |                 b=b,
 680 |                 log_fun=log_fun,
 681 |                 eps_abs=eps_abs,
 682 |                 eps_rel=eps_rel)
 683 | 
 684 |     return 0.5 * D_PM + 0.5 * D_QM
 685 | 
 686 | 
 687 | def jensen_shannon_divergence_from_kde(p: sm.nonparametric.KDEUnivariate,
 688 |                                        q: sm.nonparametric.KDEUnivariate,
 689 |                                        base: float = np.e,
 690 |                                        eps_abs: float = 1.49e-08,
 691 |                                        eps_rel: float = 1.49e-08) \
 692 |         -> float:
 693 |     """
 694 |     Compute the Jensen-Shannon divergence between distributions p and q
 695 | 
 696 |                 JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q)
 697 | 
 698 |     given by the statsmodels kde objects via numerical integration.
 699 |     The argument base can be used to specify the units in which the entropy is measured.
 700 |     The default choice is the natural logarithm.
 701 | 
 702 |     Parameters
 703 |     ----------
 704 |     p: statsmodels kde object approximating the probability density function of the distribution p
 705 |     q: statsmodels kde object approximating the probability density function of the distribution q
 706 |     base: the base of the logarithm used to control the units of measurement for the result
 707 |     eps_abs: absolute error tolerance for numerical integration
 708 |     eps_rel: relative error tolerance for numerical integration
 709 | 
 710 |     Returns
 711 |     -------
 712 |     The Jensen-Shannon divergence between distributions p and q.
 713 | 
 714 |     """
 715 |     a = min(min(p.support), min(q.support))
 716 |     b = max(max(p.support), max(q.support))
 717 |     return jensen_shannon_divergence_from_densities_with_support(p=p.evaluate,
 718 |                                                                  q=q.evaluate,
 719 |                                                                  a=a,
 720 |                                                                  b=b,
 721 |                                                                  base=base,
 722 |                                                                  eps_abs=eps_abs,
 723 |                                                                  eps_rel=eps_rel)
 724 | 
 725 | 
 726 | def continuous_jensen_shannon_divergence_from_sample(sample_p: np.ndarray,
 727 |                                                      sample_q: np.ndarray,
 728 |                                                      base: float = np.e,
 729 |                                                      eps_abs: float = 1.49e-08,
 730 |                                                      eps_rel: float = 1.49e-08) -> float:
 731 |     """
 732 |     Compute the Jensen-Shannon divergence between distributions p and q
 733 | 
 734 |                 JSD(p||q) = 0.5 * (D_KL(p||m) + D_KL(q||m)), with m = 0.5 * (p + q)
 735 | 
 736 |     from samples of the two distributions via approximation by a kernel density estimate and
 737 |     numerical integration.
 738 |     The argument base can be used to specify the units in which the entropy is measured.
 739 |     The default choice is the natural logarithm.
 740 | 
 741 |     Parameters
 742 |     ----------
 743 |     sample_p: sample from the distribution p
 744 |     sample_q: sample from the distribution q
 745 |     base: the base of the logarithm used to control the units of measurement for the result
 746 |     eps_abs: absolute error tolerance for numerical integration
 747 |     eps_rel: relative error tolerance for numerical integration
 748 | 
 749 |     Returns
 750 |     -------
 751 |     The Jensen-Shannon divergence between distributions p and q.
 752 | 
 753 |     """
 754 |     kde_p = sm.nonparametric.KDEUnivariate(sample_p)
 755 |     kde_p.fit()
 756 |     kde_q = sm.nonparametric.KDEUnivariate(sample_q)
 757 |     kde_q.fit()
 758 | 
 759 |     return jensen_shannon_divergence_from_kde(kde_p,
 760 |                                               kde_q,
 761 |                                               base=base,
 762 |                                               eps_abs=eps_abs,
 763 |                                               eps_rel=eps_rel)
 764 | 
 765 | 
 766 | ################################################################################
 767 | # Mutual Information
 768 | ###############################################################################
 769 | def mutual_information_from_densities_with_support(pdf_x: tp.Callable,
 770 |                                                    pdf_y: tp.Callable,
 771 |                                                    pdf_xy: tp.Callable,
 772 |                                                    x_min: float,
 773 |                                                    x_max: float,
 774 |                                                    y_min: float,
 775 |                                                    y_max: float,
 776 |                                                    base: float = np.e,
 777 |                                                    eps_abs: float = 1.49e-08,
 778 |                                                    eps_rel: float = 1.49e-08
 779 |                                                    ) -> float:
 780 |     """
 781 |     Compute mutual information of the random variables x and y with joint density p_{x, y} and
 782 |     marginal densities p_x and p_y defined as the KL divergence between the product of marginal
 783 |     densities and the joint density, i.e.
 784 | 
 785 |             I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) =
 786 |             E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right]
 787 | 
 788 |     via numerical integration on a rectangular domain aligned with the axes.
 789 |     The argument base can be used to specify the units in which the entropy is measured.
 790 |     The default choice is the natural logarithm.
 791 | 
 792 |     Parameters
 793 |     ----------
 794 |     pdf_x: probability density function of the random variable x
 795 |     pdf_y: probability density function of the random variable y
 796 |     pdf_xy: joint probability density function of the random variables x and y
 797 |     x_min: lower bound of the integration domain for x
 798 |     x_max: upper bound of the integration domain for x
 799 |     y_min: lower bound of the integration domain for y
 800 |     y_max: upper bound of the integration domain for y
 801 |     base: the base of the logarithm used to control the units of measurement for the result
 802 |     eps_abs: absolute error tolerance for numerical integration
 803 |     eps_rel: relative error tolerance for numerical integration
 804 | 
 805 |     Returns
 806 |     -------
 807 |     The mutual information of the random variables x and y
 808 |     """
 809 |     log_fun = _select_vectorized_log_fun_for_base(base)
 810 | 
 811 |     def mutual_information_integrand(arg: np.ndarray):
 812 |         if arg.ndim == 1:
 813 |             x, y = arg
 814 |             pxy = pdf_xy((x, y))
 815 |         elif arg.ndim == 2:
 816 |             x = arg[:, 0]
 817 |             y = arg[:, 1]
 818 |             pxy = pdf_xy(arg.T)
 819 |         else:
 820 |             raise ValueError('arg must be a numpy array with one or two axes')
 821 | 
 822 |         px = pdf_x(x)
 823 |         py = pdf_y(y)
 824 | 
 825 |         return pxy * log_fun(pxy / (px * py))
 826 | 
 827 |     return cubature(func=mutual_information_integrand,
 828 |                     ndim=2,
 829 |                     fdim=1,
 830 |                     xmin=np.array([x_min, y_min]),
 831 |                     xmax=np.array([x_max, y_max]),
 832 |                     adaptive='p',
 833 |                     vectorized=False,
 834 |                     abserr=eps_abs,
 835 |                     relerr=eps_rel)[0].item()
 836 | 
 837 | 
 838 | def mutual_information_from_kde(kde_x: sm.nonparametric.KDEUnivariate,
 839 |                                 kde_y: sm.nonparametric.KDEUnivariate,
 840 |                                 kde_xy: sp.stats.kde.gaussian_kde,
 841 |                                 base: float = np.e,
 842 |                                 eps_abs: float = 1.49e-08,
 843 |                                 eps_rel: float = 1.49e-08) -> float:
 844 |     """
 845 |     Compute mutual information of the random variables x and y with joint density p_{x, y} and
 846 |     marginal densities p_x and p_y defined as the KL divergence between the product of marginal
 847 |     densities and the joint density, i.e.
 848 | 
 849 |             I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) =
 850 |             E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right]
 851 | 
 852 |     given by the statsmodels kde objects for the marginal densities and a SciPy gaussian_kde object
 853 |     for the joint density via numerical integration.
 854 |     The argument base can be used to specify the units in which the entropy is measured.
 855 |     The default choice is the natural logarithm.
 856 | 
 857 |     Parameters
 858 |     ----------
 859 |     kde_x: statsmodels kde object approximating the marginal density of x
 860 |     kde_y: statsmodels kde object approximating the marginal density of y
 861 |     kde_xy: SciPy gaussian_kde object approximating the joint density of x and y
 862 |     base: the base of the logarithm used to control the units of measurement for the result
 863 |     eps_abs: absolute error tolerance for numerical integration
 864 |     eps_rel: relative error tolerance for numerical integration
 865 | 
 866 |     Returns
 867 |     -------
 868 |     The mutual information of the random variables x and y
 869 |     """
 870 |     x_min = min(kde_x.support)
 871 |     x_max = max(kde_x.support)
 872 |     y_min = min(kde_y.support)
 873 |     y_max = max(kde_y.support)
 874 | 
 875 |     return mutual_information_from_densities_with_support(pdf_x=kde_x.evaluate,
 876 |                                                           pdf_y=kde_y.evaluate,
 877 |                                                           pdf_xy=kde_xy.pdf,
 878 |                                                           x_min=x_min,
 879 |                                                           x_max=x_max,
 880 |                                                           y_min=y_min,
 881 |                                                           y_max=y_max,
 882 |                                                           base=base,
 883 |                                                           eps_abs=eps_abs,
 884 |                                                           eps_rel=eps_rel)
 885 | 
 886 | 
 887 | def continuous_mutual_information_from_samples(sample_x: np.ndarray,
 888 |                                                sample_y: np.ndarray,
 889 |                                                base: float = np.e,
 890 |                                                eps_abs: float = 1.49e-08,
 891 |                                                eps_rel: float = 1.49e-08) -> float:
 892 |     """
 893 |     Compute mutual information of the random variables x and y with joint density p_{x, y} and
 894 |     marginal densities p_x and p_y defined as the KL divergence between the product of marginal
 895 |     densities and the joint density, i.e.
 896 | 
 897 |             I(X; Y) = D_KL(p_{x, y}|| p_x \otimes p_y) =
 898 |             E_{p_{x, y}} \left[ \log \left( \frac{p_{x, y} (x, y)}{p_x(x) p_y(y)} \right) \right]
 899 | 
 900 |     from samples of the two distributions via approximation by kernel density estimates and
 901 |     numerical integration.
 902 |     The argument base can be used to specify the units in which the entropy is measured.
 903 |     The default choice is the natural logarithm.
 904 | 
 905 |     Parameters
 906 |     ----------
 907 |     sample_x: x-component of the sample from the joint density p_{x, y}
 908 |     sample_y: y-component of the sample from the joint density p_{x, y}
 909 |     base: the base of the logarithm used to control the units of measurement for the result
 910 |     eps_abs: absolute error tolerance for numerical integration
 911 |     eps_rel: relative error tolerance for numerical integration
 912 | 
 913 |     Returns
 914 |     -------
 915 |     The mutual information of the random variables x and y
 916 |     """
 917 |     kde_x = sm.nonparametric.KDEUnivariate(sample_x)
 918 |     kde_x.fit()
 919 |     kde_y = sm.nonparametric.KDEUnivariate(sample_y)
 920 |     kde_y.fit()
 921 | 
 922 |     kde_xy = sp.stats.gaussian_kde([sample_x, sample_y])
 923 | 
 924 |     return mutual_information_from_kde(kde_x=kde_x,
 925 |                                        kde_y=kde_y,
 926 |                                        kde_xy=kde_xy,
 927 |                                        base=base,
 928 |                                        eps_abs=eps_abs,
 929 |                                        eps_rel=eps_rel)
 930 | 
 931 | 
 932 | ################################################################################
 933 | # Joint Entropy
 934 | ###############################################################################
 935 | def joint_entropy_from_densities_with_support(pdf_xy: tp.Callable,
 936 |                                               x_min: float,
 937 |                                               x_max: float,
 938 |                                               y_min: float,
 939 |                                               y_max: float,
 940 |                                               base: float = np.e,
 941 |                                               eps_abs: float = 1.49e-08,
 942 |                                               eps_rel: float = 1.49e-08) -> float:
 943 |     """
 944 |     Compute joint entropy of the random variables x and y with joint density p_{x, y} defined as
 945 | 
 946 |             H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right]
 947 | 
 948 |     via numerical integration on a rectangular domain aligned with the axes.
 949 |     The argument base can be used to specify the units in which the entropy is measured.
 950 |     The default choice is the natural logarithm.
 951 | 
 952 |     Parameters
 953 |     ----------
 954 |     pdf_xy: joint probability density function of the random variables x and y
 955 |     x_min: lower bound of the integration domain for x
 956 |     x_max: upper bound of the integration domain for x
 957 |     y_min: lower bound of the integration domain for y
 958 |     y_max: upper bound of the integration domain for y
 959 |     base: the base of the logarithm used to control the units of measurement for the result
 960 |     eps_abs: absolute error tolerance for numerical integration
 961 |     eps_rel: relative error tolerance for numerical integration
 962 | 
 963 |     Returns
 964 |     -------
 965 |     The joint entropy of the random variables x and y
 966 |     """
 967 |     log_fun = _select_vectorized_log_fun_for_base(base)
 968 | 
 969 |     def joint_entropy_integrand(arg: np.ndarray):
 970 |         x, y = arg
 971 |         pxy = pdf_xy((x, y))
 972 | 
 973 |         return pxy * log_fun(pxy)
 974 | 
 975 |     return - cubature(func=joint_entropy_integrand,
 976 |                       ndim=2,
 977 |                       fdim=1,
 978 |                       xmin=np.array([x_min, y_min]),
 979 |                       xmax=np.array([x_max, y_max]),
 980 |                       adaptive='p',
 981 |                       vectorized=False,
 982 |                       abserr=eps_abs,
 983 |                       relerr=eps_rel)[0].item()
 984 | 
 985 | 
 986 | def joint_entropy_from_kde(kde_xy: sp.stats.kde.gaussian_kde,
 987 |                            x_min: float,
 988 |                            x_max: float,
 989 |                            y_min: float,
 990 |                            y_max: float,
 991 |                            base: float = np.e,
 992 |                            eps_abs: float = 1.49e-08,
 993 |                            eps_rel: float = 1.49e-08) -> float:
 994 |     """
 995 |     Compute joint entropy of the random variables x and y with joint density p_{x, y} defined as
 996 | 
 997 |             H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right]
 998 | 
 999 |     via numerical integration, where the joint density is given by a SciPy gaussian_kde object.
1000 |     The argument base can be used to specify the units in which the entropy is measured.
1001 |     The default choice is the natural logarithm.
1002 | 
1003 |     Parameters
1004 |     ----------
1005 |     kde_xy: SciPy gaussian_kde object approximating the joint density of x and y
1006 |     x_min: lower bound of the integration domain for x
1007 |     x_max: upper bound of the integration domain for x
1008 |     y_min: lower bound of the integration domain for y
1009 |     y_max: upper bound of the integration domain for y
1010 |     base: the base of the logarithm used to control the units of measurement for the result
1011 |     eps_abs: absolute error tolerance for numerical integration
1012 |     eps_rel: relative error tolerance for numerical integration
1013 | 
1014 |     Returns
1015 |     -------
1016 |     The joint entropy of the random variables x and y
1017 |     """
1018 | 
1019 |     return joint_entropy_from_densities_with_support(pdf_xy=kde_xy.pdf,
1020 |                                                      x_min=x_min,
1021 |                                                      x_max=x_max,
1022 |                                                      y_min=y_min,
1023 |                                                      y_max=y_max,
1024 |                                                      base=base,
1025 |                                                      eps_abs=eps_abs,
1026 |                                                      eps_rel=eps_rel)
1027 | 
1028 | 
1029 | def continuous_joint_entropy_from_samples(sample_x: np.ndarray,
1030 |                                           sample_y: np.ndarray,
1031 |                                           base: float = np.e,
1032 |                                           eps_abs: float = 1.49e-08,
1033 |                                           eps_rel: float = 1.49e-08) -> float:
1034 |     """
1035 |     Compute joint entropy of the random variables x and y with joint density p_{x, y} defined as
1036 | 
1037 |             H(X, Y) = - E_{p_{x, y}} \left[ \log p_{x, y} (x, y) \right]
1038 | 
1039 |     from samples of the two distributions via approximation by kernel density estimates and
1040 |     numerical integration.
1041 |     The argument base can be used to specify the units in which the entropy is measured.
1042 |     The default choice is the natural logarithm.
1043 | 
1044 |     Parameters
1045 |     ----------
1046 |     sample_x: x-component of the sample from the joint density p_{x, y}
1047 |     sample_y: y-component of the sample from the joint density p_{x, y}
1048 |     base: the base of the logarithm used to control the units of measurement for the result
1049 |     eps_abs: absolute error tolerance for numerical integration
1050 |     eps_rel: relative error tolerance for numerical integration
1051 | 
1052 |     Returns
1053 |     -------
1054 |     The joint entropy of the random variables x and y
1055 |     """
1056 |     # kde_x = sm.nonparametric.KDEUnivariate(sample_x)
1057 |     # kde_x.fit()
1058 |     # kde_y = sm.nonparametric.KDEUnivariate(sample_y)
1059 |     # kde_y.fit()
1060 |     # x_min = min(kde_x.support)
1061 |     # x_max = max(kde_x.support)
1062 |     # y_min = min(kde_y.support)
1063 |     # y_max = max(kde_y.support)
1064 | 
1065 |     kde_xy = sp.stats.gaussian_kde([sample_x, sample_y])
1066 | 
1067 |     x_min, x_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y)
1068 |     y_min, y_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y)
1069 | 
1070 |     return joint_entropy_from_kde(kde_xy=kde_xy,
1071 |                                   x_min=x_min,
1072 |                                   x_max=x_max,
1073 |                                   y_min=y_min,
1074 |                                   y_max=y_max,
1075 |                                   base=base,
1076 |                                   eps_abs=eps_abs,
1077 |                                   eps_rel=eps_rel)
1078 | 
1079 | 
1080 | ################################################################################
1081 | # Conditional Entropy
1082 | ###############################################################################
1083 | def conditional_entropy_from_densities_with_support(pdf_x: tp.Callable,
1084 |                                                     pdf_xy: tp.Callable,
1085 |                                                     x_min: float,
1086 |                                                     x_max: float,
1087 |                                                     y_min: float,
1088 |                                                     y_max: float,
1089 |                                                     base: float = np.e,
1090 |                                                     eps_abs: float = 1.49e-08,
1091 |                                                     eps_rel: float = 1.49e-08,
1092 |                                                     gpu: bool = False
1093 |                                                     ) -> float:
1094 |     """
1095 |     Compute conditional entropy of the random variables x and y with joint density p_{x, y} and
1096 |     marginal density p_x defined as
1097 | 
1098 |             H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right]
1099 | 
1100 |     via numerical integration on a rectangular domain aligned with the axes.
1101 |     The argument base can be used to specify the units in which the entropy is measured.
1102 |     The default choice is the natural logarithm.
1103 | 
1104 |     Parameters
1105 |     ----------
1106 |     pdf_x: probability density function of the random variable x
1107 |     pdf_xy: joint probability density function of the random variables x and y
1108 |     x_min: lower bound of the integration domain for x
1109 |     x_max: upper bound of the integration domain for x
1110 |     y_min: lower bound of the integration domain for y
1111 |     y_max: upper bound of the integration domain for y
1112 |     base: the base of the logarithm used to control the units of measurement for the result
1113 |     eps_abs: absolute error tolerance for numerical integration
1114 |     eps_rel: relative error tolerance for numerical integration
1115 |     gpu: whether to use the gpu for evaluation of the kernel density estimate
1116 | 
1117 |     Returns
1118 |     -------
1119 |     The conditional entropy of the random variables x and y
1120 |     """
1121 |     log_fun = _select_vectorized_log_fun_for_base(base, gpu=gpu)
1122 | 
1123 |     def conditional_entropy_integrand(arg: np.ndarray):
1124 |         if arg.ndim == 1:
1125 |             x, y = arg
1126 |             pxy = pdf_xy((x, y))
1127 |             px = pdf_x(x)
1128 |         elif arg.ndim == 2:
1129 |             x = arg[:, 0]
1130 |             pxy = pdf_xy(arg.T)
1131 |             px = pdf_x(x)
1132 |         else:
1133 |             raise ValueError('the number of axes in arg must be either 1 or 2')
1134 | 
1135 |         return pxy * log_fun(pxy / px)
1136 | 
1137 |     return - cubature(func=conditional_entropy_integrand,
1138 |                       ndim=2,
1139 |                       fdim=1,
1140 |                       xmin=np.array([x_min, y_min]),
1141 |                       xmax=np.array([x_max, y_max]),
1142 |                       adaptive='p',
1143 |                       vectorized=True,
1144 |                       abserr=eps_abs,
1145 |                       relerr=eps_rel)[0].item()
1146 | 
1147 | 
1148 | def conditional_entropy_from_kde(kde_x: sm.nonparametric.KDEUnivariate,
1149 |                                  kde_xy: sp.stats.kde.gaussian_kde,
1150 |                                  y_min: float,
1151 |                                  y_max: float,
1152 |                                  base: float = np.e,
1153 |                                  eps_abs: float = 1.49e-08,
1154 |                                  eps_rel: float = 1.49e-08) -> float:
1155 |     """
1156 |     Compute conditional entropy of the random variables x and y with joint density p_{x, y} and
1157 |     marginal density p_x defined as
1158 | 
1159 |             H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right]
1160 | 
1161 |     via numerical integration, where the marginal density of x is given by a statsmodels kde object
1162 |     and the joint density by a SciPy gaussian_kde object.
1163 |     The argument base can be used to specify the units in which the entropy is measured.
1164 |     The default choice is the natural logarithm.
1165 | 
1166 |     Parameters
1167 |     ----------
1168 |     kde_x: statsmodels kde object approximating the marginal density of x
1169 |     kde_y: statsmodels kde object approximating the marginal density of y
1170 |     kde_xy: SciPy gaussian_kde object approximating the joint density of x and y
1171 |     y_min: lower bound of the integration domain for y
1172 |     y_max: upper bound of the integration domain for y
1173 |     base: the base of the logarithm used to control the units of measurement for the result
1174 |     eps_abs: absolute error tolerance for numerical integration
1175 |     eps_rel: relative error tolerance for numerical integration
1176 | 
1177 |     Returns
1178 |     -------
1179 |     The conditional entropy of the random variables x and y
1180 |     """
1181 |     x_min = min(kde_x.support)
1182 |     x_max = max(kde_x.support)
1183 | 
1184 |     return conditional_entropy_from_densities_with_support(pdf_x=kde_x.evaluate,
1185 |                                                            pdf_xy=kde_xy.pdf,
1186 |                                                            x_min=x_min,
1187 |                                                            x_max=x_max,
1188 |                                                            y_min=y_min,
1189 |                                                            y_max=y_max,
1190 |                                                            base=base,
1191 |                                                            eps_abs=eps_abs,
1192 |                                                            eps_rel=eps_rel)
1193 | 
1194 | 
1195 | def continuous_conditional_entropy_from_samples(sample_x: np.ndarray,
1196 |                                                 sample_y: np.ndarray,
1197 |                                                 base: float = np.e,
1198 |                                                 eps_abs: float = 1.49e-08,
1199 |                                                 eps_rel: float = 1.49e-08) -> float:
1200 |     """
1201 |     Compute conditional entropy of the random variables x and y with joint density p_{x, y} and
1202 |     marginal density p_x defined as
1203 | 
1204 |             H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right]
1205 | 
1206 |     from samples of the two distributions via approximation by kernel density estimates and
1207 |     numerical integration.
1208 |     The argument base can be used to specify the units in which the entropy is measured.
1209 |     The default choice is the natural logarithm.
1210 | 
1211 |     Parameters
1212 |     ----------
1213 |     sample_x: x-component of the sample from the joint density p_{x, y}
1214 |     sample_y: y-component of the sample from the joint density p_{x, y}
1215 |     base: the base of the logarithm used to control the units of measurement for the result
1216 |     eps_abs: absolute error tolerance for numerical integration
1217 |     eps_rel: relative error tolerance for numerical integration
1218 | 
1219 |     Returns
1220 |     -------
1221 |     The conditional entropy of the random variables x and y
1222 |     """
1223 |     kde_x = sm.nonparametric.KDEUnivariate(sample_x)
1224 |     kde_x.fit()
1225 | 
1226 |     kde_xy = sp.stats.gaussian_kde([sample_x, sample_y])
1227 |     y_min, y_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y)
1228 | 
1229 |     return conditional_entropy_from_kde(kde_x=kde_x,
1230 |                                         kde_xy=kde_xy,
1231 |                                         y_min=y_min,
1232 |                                         y_max=y_max,
1233 |                                         base=base,
1234 |                                         eps_abs=eps_abs,
1235 |                                         eps_rel=eps_rel)
1236 | 
1237 | 
1238 | def continuous_conditional_entropy_from_samples_gpu(
1239 |         sample_x: np.ndarray,
1240 |         sample_y: np.ndarray,
1241 |         base: float = np.e,
1242 |         eps_abs: float = 1.49e-08,
1243 |         eps_rel: float = 1.49e-08,
1244 |         maximum_number_of_elements_per_batch: int = -1) -> float:
1245 |     """
1246 |     Compute conditional entropy of the random variables x and y with joint density p_{x, y} and
1247 |     marginal density p_x defined as
1248 | 
1249 |             H(Y|X) = - E_{p_{x, y}} \left[ \log \frac{p_{x, y} (x, y)}{p_x(x)} \right]
1250 | 
1251 |     from samples of the two distributions via approximation by kernel density estimates and
1252 |     numerical integration.
1253 |     The argument base can be used to specify the units in which the entropy is measured.
1254 |     The default choice is the natural logarithm.
1255 | 
1256 |     Parameters
1257 |     ----------
1258 |     sample_x: x-component of the sample from the joint density p_{x, y}
1259 |     sample_y: y-component of the sample from the joint density p_{x, y}
1260 |     base: the base of the logarithm used to control the units of measurement for the result
1261 |     eps_abs: absolute error tolerance for numerical integration
1262 |     eps_rel: relative error tolerance for numerical integration
1263 |     maximum_number_of_elements_per_batch:
1264 |         maximum number of data points times evaluation points to process in a single batch
1265 | 
1266 |     Returns
1267 |     -------
1268 |     The conditional entropy of the random variables x and y
1269 |     """
1270 |     kde_x = cocos_gaussian_kde(sample_x, gpu=True)
1271 |     kde_xy = cocos_gaussian_kde(np.vstack((sample_x.reshape((1, -1)),
1272 |                                            sample_y.reshape((1, -1)))),
1273 |                                 gpu=True)
1274 | 
1275 |     x_min, x_max = _get_min_and_max_support_for_silverman_bw_rule(sample_x)
1276 |     y_min, y_max = _get_min_and_max_support_for_silverman_bw_rule(sample_y)
1277 | 
1278 |     log_fun = _select_vectorized_log_fun_for_base(base, gpu=True)
1279 | 
1280 |     def conditional_entropy_integrand(arg: NumericArray):
1281 |         # print(f'arg.shape={arg.shape}')
1282 |         if arg.ndim == 1:
1283 |             x, y = arg
1284 |             pxy = kde_xy.evaluate((x, y))
1285 |         elif arg.ndim == 2:
1286 |             x = arg[:, 0]
1287 |             if maximum_number_of_elements_per_batch == -1:
1288 |                 pxy = kde_xy.evaluate(arg.T)
1289 |             else:
1290 |                 pxy = evaluate_gaussian_kde_in_batches(kde_xy,
1291 |                                                        arg.T,
1292 |                                                        maximum_number_of_elements_per_batch
1293 |                                                        =maximum_number_of_elements_per_batch)
1294 |         else:
1295 |             raise ValueError('the number of axes in arg must be either 1 or 2')
1296 | 
1297 |         px = kde_x.evaluate(x)
1298 |         # print(f'x.shape={x.shape}')
1299 |         # print(f'px.shape={px.shape}')
1300 |         # print(f'pxy.shape={pxy.shape}')
1301 |         integrand = np.array(pxy * log_fun(pxy / px))
1302 |         return integrand
1303 | 
1304 |     return - cubature(func=conditional_entropy_integrand,
1305 |                       ndim=2,
1306 |                       fdim=1,
1307 |                       xmin=np.array([x_min, y_min]),
1308 |                       xmax=np.array([x_max, y_max]),
1309 |                       adaptive='p',
1310 |                       vectorized=True,
1311 |                       abserr=eps_abs,
1312 |                       relerr=eps_rel)[0].item()
1313 | 


--------------------------------------------------------------------------------