├── .github ├── ISSUE_TEMPLATE │ └── bug_report.md └── workflows │ └── python-package.yml ├── .gitignore ├── .readthedocs.yaml ├── LICENSE ├── README.md ├── benchmarks ├── Benchmark_1_Distributions.ipynb ├── Benchmark_2_General_Mixture_Models.ipynb ├── Benchmark_3_KMeans.ipynb ├── Benchmark_4_Bayes_Classifier.ipynb └── Benchmark_5_Hidden_Markov_Model.ipynb ├── docs ├── CODE_OF_CONDUCT.rst ├── Makefile ├── _static │ └── custom.css ├── _templates │ └── class.rst ├── api.rst ├── conf.py ├── faq.rst ├── index.rst ├── install.rst ├── logo │ ├── pomegranate-logo.png │ └── pomegranate_comparison.png ├── requirements.txt ├── tutorials │ ├── B_Model_Tutorial_1_Distributions.ipynb │ ├── B_Model_Tutorial_2_General_Mixture_Models.ipynb │ ├── B_Model_Tutorial_3_Bayes_Classifier.ipynb │ ├── B_Model_Tutorial_4_Hidden_Markov_Models.ipynb │ ├── B_Model_Tutorial_5_Markov_Chains.ipynb │ ├── B_Model_Tutorial_6_Bayesian_Networks.ipynb │ ├── B_Model_Tutorial_7_Factor_Graphs.ipynb │ ├── C_Feature_Tutorial_1_GPU_Usage.ipynb │ ├── C_Feature_Tutorial_2_Mixed_Precision_and_DataTypes.ipynb │ ├── C_Feature_Tutorial_3_Out_Of_Core_Learning.ipynb │ └── C_Feature_Tutorial_4_Priors_and_Semi-supervised_Learning.ipynb └── whats_new.rst ├── examples └── Bayesian_Network_Monty_Hall.ipynb ├── pomegranate ├── __init__.py ├── _bayes.py ├── _utils.py ├── bayes_classifier.py ├── bayesian_network.py ├── distributions │ ├── __init__.py │ ├── _distribution.py │ ├── bernoulli.py │ ├── categorical.py │ ├── conditional_categorical.py │ ├── dirac_delta.py │ ├── exponential.py │ ├── gamma.py │ ├── halfnormal.py │ ├── independent_components.py │ ├── joint_categorical.py │ ├── lognormal.py │ ├── normal.py │ ├── poisson.py │ ├── student_t.py │ ├── uniform.py │ └── zero_inflated.py ├── factor_graph.py ├── gmm.py ├── hmm │ ├── __init__.py │ ├── _base.py │ ├── dense_hmm.py │ └── sparse_hmm.py ├── kmeans.py └── markov_chain.py ├── requirements.txt ├── setup.py ├── slides ├── pomegranate ODSC East 2019.pdf ├── pomegranate ODSC Europe 2020.pdf ├── pomegranate ODSC West 2017.pdf ├── pomegranate ODSC West 2018.pdf ├── pomegranate ODSC West 2019.pdf ├── pomegranate PyData NYC 2017.pdf ├── pomegranate data intelligence 2017.pdf ├── pomegranate odsc east 2017 turorial.pdf ├── pomegranate pydata seattle 2017.pdf └── pomegranate scipy 2017.pdf └── tests ├── __init__.py ├── distributions ├── __init__.py ├── _utils.py ├── test_bernoulli.py ├── test_categorical.py ├── test_conditional_categorical.py ├── test_dirac_delta.py ├── test_exponential.py ├── test_gamma.py ├── test_independent_component.py ├── test_joint_categorical.py ├── test_normal_diagonal.py ├── test_normal_full.py ├── test_poisson.py ├── test_student_t.py └── test_uniform.py ├── hmm ├── __init__.py ├── test_dense_hmm.py └── test_sparse_hmm.py ├── test_bayes_classifier.py ├── test_bayesian_network.py ├── test_bayesian_network_structure_learning.py ├── test_factor_graph.py ├── test_gmm.py ├── test_kmeans.py ├── test_markov_chain.py ├── test_semisupervised.py ├── test_utils.py └── tools.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[BUG]" 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is, including what you were expecting to happen and what actually happened. Please report the version of pomegranate that you are using and the operating system. Also, please make sure that you have upgraded to the latest version of pomegranate before submitting the bug report. 12 | 13 | **To Reproduce** 14 | Please provide a snippet of code that can reproduce this error. It is much easier for us to track down bugs and fix them if we have an example script that fails until we're successful. 15 | 16 | **Response time** 17 | Although I will likely respond during weekdays if I am not on vacation, I am not likely to be able to merge PRs or write code until the weekend. 18 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | 5 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 6 | # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 7 | 8 | name: build 9 | 10 | on: 11 | push: 12 | branches: [ master ] 13 | pull_request: 14 | branches: [ master ] 15 | 16 | jobs: 17 | build: 18 | name: ${{ matrix.os }} Python ${{ matrix.python-version }} 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | matrix: 22 | os: [ubuntu-latest, macOS-latest] 23 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12'] 24 | 25 | steps: 26 | - uses: actions/checkout@v3 27 | - name: Set up Python ${{ matrix.os }} ${{ matrix.python-version }} 28 | uses: actions/setup-python@v3 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | - name: Install dependencies 32 | run: | 33 | python -m pip install --upgrade pip 34 | python -m pip install flake8 pytest 35 | pip install -r requirements.txt 36 | - name: Lint with flake8 37 | run: | 38 | # stop the build if there are Python syntax errors or undefined names 39 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 40 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 41 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 42 | - name: Test with pytest 43 | run: | 44 | pytest -m "not sample" -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.c 3 | .ipynb_checkpoints 4 | *~ 5 | .DS_Store 6 | build 7 | *.so 8 | .idea/ 9 | .vscode/ 10 | dist/ 11 | .eggs/ 12 | *.egg-info/ 13 | *.pyd 14 | .python-version 15 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # .readthedocs.yaml 2 | # Read the Docs configuration file 3 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 4 | 5 | # Required 6 | version: 2 7 | 8 | # Set the version of Python and other tools you might need 9 | build: 10 | os: ubuntu-22.04 11 | tools: 12 | python: "3.10" 13 | 14 | 15 | # Build documentation in the docs/ directory with Sphinx 16 | sphinx: 17 | configuration: docs/conf.py 18 | 19 | 20 | # Optionally declare the Python requirements required to build your docs 21 | python: 22 | install: 23 | - requirements: docs/requirements.txt 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2022 Jacob Schreiber 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /benchmarks/Benchmark_1_Distributions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "6bc2e9e8", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "numpy : 1.23.4\n", 14 | "scipy : 1.9.3\n", 15 | "torch : 1.12.1\n", 16 | "pomegranate: 0.14.8\n", 17 | "\n", 18 | "Compiler : GCC 11.2.0\n", 19 | "OS : Linux\n", 20 | "Release : 4.15.0-197-generic\n", 21 | "Machine : x86_64\n", 22 | "Processor : x86_64\n", 23 | "CPU cores : 8\n", 24 | "Architecture: 64bit\n", 25 | "\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import numpy\n", 31 | "import scipy\n", 32 | "import torch\n", 33 | "\n", 34 | "from torchegranate.distributions import *\n", 35 | "\n", 36 | "numpy.random.seed(0)\n", 37 | "numpy.set_printoptions(suppress=True)\n", 38 | "\n", 39 | "%load_ext watermark\n", 40 | "%watermark -m -n -p numpy,scipy,torch,pomegranate" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "7dd56360", 46 | "metadata": {}, 47 | "source": [ 48 | "### Normal w/ Diagonal Covariance Distributions" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 2, 54 | "id": "5efcc291", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "n, d = 100000, 500\n", 59 | "\n", 60 | "X = torch.randn(n, d)\n", 61 | "Xn = X.numpy()\n", 62 | "\n", 63 | "mus = torch.randn(d)\n", 64 | "covs = torch.abs(torch.randn(d))\n", 65 | "stds = torch.sqrt(covs)" 66 | ] 67 | }, 68 | { 69 | "cell_type": "code", 70 | "execution_count": 3, 71 | "id": "1c3325d9", 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "143 ms ± 12.5 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 79 | "227 ms ± 14.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 80 | "1.12 s ± 18.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "%timeit Normal(mus, covs, covariance_type='diag').log_probability(X)\n", 86 | "%timeit torch.distributions.Normal(mus, stds).log_prob(X).sum(dim=-1)\n", 87 | "%timeit scipy.stats.norm.logpdf(Xn, mus, stds).sum(axis=1)" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "id": "bd46b4b0", 93 | "metadata": {}, 94 | "source": [ 95 | "### Normal w/ Full Covariance Distribution" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 4, 101 | "id": "07fab284", 102 | "metadata": {}, 103 | "outputs": [], 104 | "source": [ 105 | "d0 = Normal().fit(X)\n", 106 | "\n", 107 | "mu, cov = d0.means, d0.covs" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 5, 113 | "id": "194d7679", 114 | "metadata": {}, 115 | "outputs": [ 116 | { 117 | "name": "stdout", 118 | "output_type": "stream", 119 | "text": [ 120 | "211 ms ± 19.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 121 | "205 ms ± 22.3 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 122 | "765 ms ± 36.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 123 | ] 124 | } 125 | ], 126 | "source": [ 127 | "%timeit Normal(mu, cov).log_probability(X)\n", 128 | "%timeit torch.distributions.MultivariateNormal(mu, cov).log_prob(X).sum(dim=-1)\n", 129 | "%timeit scipy.stats.multivariate_normal.logpdf(Xn, mu, cov).sum(axis=-1)" 130 | ] 131 | }, 132 | { 133 | "cell_type": "markdown", 134 | "id": "1a5adc6a", 135 | "metadata": {}, 136 | "source": [ 137 | "### Exponential Distribution" 138 | ] 139 | }, 140 | { 141 | "cell_type": "code", 142 | "execution_count": 6, 143 | "id": "f70bb98d", 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "X = torch.abs(torch.randn(n, d))\n", 148 | "Xn = X.numpy()\n", 149 | "\n", 150 | "means = torch.abs(torch.randn(d))" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 7, 156 | "id": "ab3d0af3", 157 | "metadata": {}, 158 | "outputs": [ 159 | { 160 | "name": "stdout", 161 | "output_type": "stream", 162 | "text": [ 163 | "150 ms ± 1.31 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 164 | "89 ms ± 3.47 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 165 | "1.36 s ± 86.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 166 | ] 167 | } 168 | ], 169 | "source": [ 170 | "%timeit Exponential(means).log_probability(X)\n", 171 | "%timeit torch.distributions.Exponential(means).log_prob(X)\n", 172 | "%timeit scipy.stats.expon.logpdf(X, means)" 173 | ] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "id": "5108fce5", 178 | "metadata": {}, 179 | "source": [ 180 | "### Gamma Distribution" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": 8, 186 | "id": "06865521", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [ 190 | "shapes = torch.abs(torch.randn(d))\n", 191 | "rates = torch.abs(torch.randn(d))" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": 9, 197 | "id": "2459f3f0", 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "name": "stdout", 202 | "output_type": "stream", 203 | "text": [ 204 | "270 ms ± 9.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 205 | "250 ms ± 30.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 206 | "2.67 s ± 75.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 207 | ] 208 | } 209 | ], 210 | "source": [ 211 | "%timeit Gamma(shapes, rates).log_probability(X)\n", 212 | "%timeit torch.distributions.Gamma(shapes, rates).log_prob(X)\n", 213 | "%timeit scipy.stats.gamma.logpdf(X, shapes, rates)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "id": "c81f8f06", 219 | "metadata": {}, 220 | "source": [ 221 | "### Bernoulli Distribution" 222 | ] 223 | }, 224 | { 225 | "cell_type": "code", 226 | "execution_count": 10, 227 | "id": "7cee5e63", 228 | "metadata": {}, 229 | "outputs": [], 230 | "source": [ 231 | "X = torch.tensor(numpy.random.choice(2, size=(n, d)), dtype=torch.float32)\n", 232 | "probs = torch.mean(X, dim=0)" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 11, 238 | "id": "0f697993", 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stdout", 243 | "output_type": "stream", 244 | "text": [ 245 | "181 ms ± 8.04 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n", 246 | "419 ms ± 20.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 247 | "3.78 s ± 66.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "%timeit Bernoulli(probs).log_probability(X)\n", 253 | "%timeit torch.distributions.Bernoulli(probs).log_prob(X)\n", 254 | "%timeit scipy.stats.bernoulli.logpmf(X, probs)" 255 | ] 256 | } 257 | ], 258 | "metadata": { 259 | "kernelspec": { 260 | "display_name": "Python 3 (ipykernel)", 261 | "language": "python", 262 | "name": "python3" 263 | }, 264 | "language_info": { 265 | "codemirror_mode": { 266 | "name": "ipython", 267 | "version": 3 268 | }, 269 | "file_extension": ".py", 270 | "mimetype": "text/x-python", 271 | "name": "python", 272 | "nbconvert_exporter": "python", 273 | "pygments_lexer": "ipython3", 274 | "version": "3.9.13" 275 | } 276 | }, 277 | "nbformat": 4, 278 | "nbformat_minor": 5 279 | } 280 | -------------------------------------------------------------------------------- /benchmarks/Benchmark_4_Bayes_Classifier.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "752ca88f", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "numpy : 1.23.4\n", 14 | "scipy : 1.9.3\n", 15 | "torch : 1.12.1\n", 16 | "pomegranate: 0.14.8\n", 17 | "\n", 18 | "Compiler : GCC 11.2.0\n", 19 | "OS : Linux\n", 20 | "Release : 4.15.0-197-generic\n", 21 | "Machine : x86_64\n", 22 | "Processor : x86_64\n", 23 | "CPU cores : 8\n", 24 | "Architecture: 64bit\n", 25 | "\n" 26 | ] 27 | } 28 | ], 29 | "source": [ 30 | "import numpy\n", 31 | "import scipy\n", 32 | "import torch\n", 33 | "\n", 34 | "from sklearn.datasets import make_blobs\n", 35 | "\n", 36 | "from torchegranate.distributions import *\n", 37 | "from torchegranate.bayes_classifier import BayesClassifier\n", 38 | "\n", 39 | "from sklearn.naive_bayes import GaussianNB, BernoulliNB\n", 40 | "\n", 41 | "import matplotlib.pyplot as plt\n", 42 | "import seaborn; seaborn.set_style('whitegrid')\n", 43 | "\n", 44 | "numpy.random.seed(0)\n", 45 | "numpy.set_printoptions(suppress=True)\n", 46 | "\n", 47 | "%load_ext watermark\n", 48 | "%watermark -m -n -p numpy,scipy,torch,pomegranate" 49 | ] 50 | }, 51 | { 52 | "cell_type": "markdown", 53 | "id": "1d323e83", 54 | "metadata": {}, 55 | "source": [ 56 | "### Gaussian Naive Bayes" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "id": "db6dc2d8", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "n, d, k = 200000, 500, 50\n", 67 | "\n", 68 | "X, y = make_blobs(n_samples=n, n_features=d, centers=k, cluster_std=0.75, random_state=0)" 69 | ] 70 | }, 71 | { 72 | "cell_type": "code", 73 | "execution_count": 3, 74 | "id": "4a980a8a", 75 | "metadata": {}, 76 | "outputs": [ 77 | { 78 | "name": "stdout", 79 | "output_type": "stream", 80 | "text": [ 81 | "787 ms ± 22.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 82 | "872 ms ± 16.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 83 | ] 84 | } 85 | ], 86 | "source": [ 87 | "%timeit model_sklearn = GaussianNB().fit(X, y)\n", 88 | "%timeit model_pom = BayesClassifier([Normal(covariance_type='diag') for i in range(k)]).fit(X, y)" 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": 4, 94 | "id": "24f702bc", 95 | "metadata": {}, 96 | "outputs": [ 97 | { 98 | "name": "stdout", 99 | "output_type": "stream", 100 | "text": [ 101 | "20.9 s ± 24.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 102 | "15.6 s ± 152 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 103 | ] 104 | } 105 | ], 106 | "source": [ 107 | "model_sklearn = GaussianNB().fit(X, y)\n", 108 | "model_pom = BayesClassifier([Normal(covariance_type='diag') for i in range(k)]).fit(X, y)\n", 109 | "\n", 110 | "%timeit model_sklearn.predict(X)\n", 111 | "%timeit model_pom.predict(X)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "f0f5959c", 117 | "metadata": {}, 118 | "source": [ 119 | "### Bernoulli Naive Bayes" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "id": "1a73281c", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "n, d, k = 200000, 200, 25\n", 130 | "\n", 131 | "X = numpy.random.choice(2, size=(n, d))\n", 132 | "y = numpy.random.choice(k, size=(n,))" 133 | ] 134 | }, 135 | { 136 | "cell_type": "code", 137 | "execution_count": 6, 138 | "id": "f711516d", 139 | "metadata": {}, 140 | "outputs": [ 141 | { 142 | "name": "stdout", 143 | "output_type": "stream", 144 | "text": [ 145 | "14 s ± 242 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 146 | "359 ms ± 905 µs per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 147 | ] 148 | } 149 | ], 150 | "source": [ 151 | "%timeit model_sklearn = BernoulliNB().fit(X, y)\n", 152 | "%timeit model_pom = BayesClassifier([Bernoulli() for i in range(k)]).fit(X, y)" 153 | ] 154 | }, 155 | { 156 | "cell_type": "code", 157 | "execution_count": 7, 158 | "id": "bab540b7", 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "name": "stdout", 163 | "output_type": "stream", 164 | "text": [ 165 | "628 ms ± 12.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n", 166 | "3.01 s ± 35.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" 167 | ] 168 | } 169 | ], 170 | "source": [ 171 | "model_sklearn = BernoulliNB().fit(X, y)\n", 172 | "model_pom = BayesClassifier([Bernoulli() for i in range(k)]).fit(X, y)\n", 173 | "\n", 174 | "%timeit model_sklearn.predict(X)\n", 175 | "%timeit model_pom.predict(X)" 176 | ] 177 | } 178 | ], 179 | "metadata": { 180 | "kernelspec": { 181 | "display_name": "Python 3 (ipykernel)", 182 | "language": "python", 183 | "name": "python3" 184 | }, 185 | "language_info": { 186 | "codemirror_mode": { 187 | "name": "ipython", 188 | "version": 3 189 | }, 190 | "file_extension": ".py", 191 | "mimetype": "text/x-python", 192 | "name": "python", 193 | "nbconvert_exporter": "python", 194 | "pygments_lexer": "ipython3", 195 | "version": "3.9.13" 196 | } 197 | }, 198 | "nbformat": 4, 199 | "nbformat_minor": 5 200 | } 201 | -------------------------------------------------------------------------------- /docs/CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | =============== 2 | Code of Conduct 3 | =============== 4 | 5 | Our Pledge 6 | ---------- 7 | 8 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 9 | 10 | Our Standards 11 | ------------- 12 | 13 | Examples of behavior that contributes to creating a positive environment include: 14 | 15 | * Using welcoming and inclusive language 16 | * Being respectful of differing viewpoints and experiences 17 | * Gracefully accepting constructive criticism 18 | * Focusing on what is best for the community 19 | * Showing empathy towards other community members 20 | 21 | Examples of unacceptable behavior by participants include: 22 | 23 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 24 | * Trolling, insulting/derogatory comments, and personal or political attacks 25 | * Public or private harassment 26 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 27 | * Other conduct which could reasonably be considered inappropriate in a professional setting 28 | 29 | Our Responsibilities 30 | -------------------- 31 | 32 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 33 | 34 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 35 | 36 | Scope 37 | ----- 38 | 39 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 40 | 41 | Enforcement 42 | ----------- 43 | 44 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jmschreiber91@gmail.com. Because the project team currently consists of only one member, that member shall investigate within one week whether a violation of the code of conduct occurred and what the appropriate response is. That member shall then contact the original reporter and any other affected parties to explain the response and note feedback for the record. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Should you wish to file a report anonymously you should fill out a report at https://goo.gl/forms/aQtlDdrhZf4Y8flk2. If your report involves any members of the project team, if you feel uncomfortable making a report to the project team for any reason, or you feel that the issue has not been adequately handled, you are encouraged to send `your report `_ to conduct@numfocus.org where it will be independently reviewed by the `NumFOCUS team `_. 45 | 46 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 47 | 48 | Attribution 49 | ----------- 50 | 51 | This Code of Conduct is adapted from the `Contributor Covenant homepage `_, `version 1\.4 `_. 52 | 53 | For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq. 54 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = _build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/pomegranate.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/pomegranate.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/pomegranate" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/pomegranate" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/_static/custom.css: -------------------------------------------------------------------------------- 1 | /* Sidebar header (and top-bar for mobile) */ 2 | .wy-side-nav-search, .wy-nav-top { 3 | background: #A91D47; 4 | } 5 | 6 | .wy-menu > .caption > span.caption-text { 7 | color: #A91D47; 8 | } 9 | 10 | code.literal { 11 | color: #A91D47 !important; 12 | background-color: #fbfbfb !important; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /docs/_templates/class.rst: -------------------------------------------------------------------------------- 1 | {{ fullname }} 2 | {{ underline }} 3 | 4 | .. currentmodule:: {{ module }} 5 | 6 | .. autoclass:: {{ objname }} 7 | 8 | {% block methods %} 9 | .. automethod:: __init__ 10 | {% endblock %} 11 | -------------------------------------------------------------------------------- /docs/api.rst: -------------------------------------------------------------------------------- 1 | ======= 2 | The API 3 | ======= 4 | 5 | pomegranate has a minimal core API that is made possible because all models are treated as probability distributions regardless of their complexity. This point is repeated throughout the documentation because it has important consequences for how the package is designed and also for how one should think about designing probabilistic models. Although each model documentation page has an API reference showing the full set of methods and parameters for each model, each models has the following methods:. 6 | 7 | .. code-block:: python 8 | 9 | >>> model.probability(X) 10 | 11 | This method takes in a set of examples (either 2D or 3D depending on the model) and returns a vector of probabilities. 12 | 13 | .. code-block:: python 14 | 15 | >>> model.log_probability(X) 16 | 17 | This method takes in a set of examples (either 2D or 3D depending on the model) and returns a vector of log probabilities. Log probabilities are more numerically stable and, in fact, calls to `model.probability` just exponentiate the value returned from this call. 18 | 19 | .. code-block:: python 20 | 21 | >>> model.fit(X, sample_weight=None) 22 | 23 | This method will fit the model to the given data that is optionally weighted. If the model is a simple probability distribution, a Bayes classifier, or a Bayesian network with fully observed features, the method will use maximum likelihood estimates. For other models and settings, the method will use expectation-maximization to fit the model parameters. When a structure is not provided for hidden Markov models or Bayesian networks, this method will jointly learn the structure and the parameters of the model. The shape of data should be (n, d) or (n, l, d) depending on if there is a length dimension, where n is the number of samples, l is the length of the data, and d is the dimensionality. Sample weights should either be a vector of non-negative numbers of size (n,) or a matrix of size (n, d). 24 | 25 | .. code-block:: python 26 | 27 | >>> model.summarize(X, sample_weight=None) 28 | 29 | This method is the first step of the two step out-of-core learning API. The method will take in a data and optional weights and extract the sufficient statistics that allow for an exact update and added to the cached values. Because these sufficient statistics are additive one can derive an exact update from multiple calls to this method without having to store an entire data set in memory. 30 | 31 | .. code-block:: python 32 | 33 | >>> model.from_summaries() 34 | 35 | This method is the second step in the out-of-core learning API. The method uses the extracted and aggregated sufficient statistics to derive exact parameter updates for the model. After the parameters are updated, the stored sufficient statistics will be zeroed out. 36 | 37 | 38 | Compositional Methods 39 | --------------------- 40 | 41 | For models that are composed of other models/distributions, e.g. mixture models, hidden Markov models, and Bayesian networks, there are additional methods that relate to inferring how the data relates to each of these distributions. For example, instead of just calculating the log probability of an example under an entire mixture model, one might want to calculate the posterior probability that the data was generated by each of the distributions. These posterior probabilities are found by applying Bayes' rule, which connects prior probabilities and likelihoods to posterior probabilities. 42 | 43 | .. code-block:: python 44 | 45 | >>> model.predict(X) 46 | 47 | This method will return the most likely inferred value for each example in the data. In the case of Bayesian networks operating on incomplete data, this inferred value is the most likely value that each variable takes given the structure of the model and the observed data. For all other methods, this is the most likely component that explains the data, P(M|D). 48 | 49 | .. code-block:: python 50 | 51 | >>> model.predict_proba(X) 52 | 53 | This returns the matrix of posterior probabilities P(M|D) directly. The predict method simply runs an argmax over this matrix. 54 | 55 | .. code-block:: python 56 | 57 | >>> model.predict_log_proba(X) 58 | 59 | This returns the matrix of log posterior probabilities for numerical stability. 60 | 61 | 62 | API Reference 63 | ------------- 64 | 65 | Distributions 66 | ============= 67 | 68 | .. automodule:: pomegranate.distributions 69 | :members: Bernoulli, Categorical, ConditionalCategorical, JointCategorical, DiracDelta, Exponential, Gamma, Normal, Poisson, StudentT, Uniform, ZeroInflated 70 | 71 | Models 72 | ====== 73 | 74 | .. autoclass:: pomegranate.bayes_classifier.BayesClassifier 75 | 76 | .. autoclass:: pomegranate.gmm.GeneralMixtureModel 77 | 78 | .. autoclass:: pomegranate.hmm.DenseHMM 79 | 80 | .. autoclass:: pomegranate.hmm.SparseHMM 81 | 82 | .. autoclass:: pomegranate.markov_chain.MarkovChain 83 | 84 | .. autoclass:: pomegranate.bayesian_network.BayesianNetwork 85 | 86 | .. autoclass:: pomegranate.factor_graph.FactorGraph -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | project = 'pomegranate' 10 | copyright = '2023, Jacob Schreiber' 11 | author = 'Jacob Schreiber' 12 | release = '1.0.0' 13 | 14 | # -- General configuration --------------------------------------------------- 15 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 16 | 17 | extensions = ['sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'nbsphinx'] 18 | 19 | templates_path = ['_templates'] 20 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 21 | 22 | root_doc = 'index' 23 | master_doc = 'index' 24 | 25 | # -- Options for HTML output ------------------------------------------------- 26 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 27 | 28 | html_theme = 'sphinx_rtd_theme' 29 | html_static_path = ['_static'] 30 | html_css_files = ['custom.css'] 31 | -------------------------------------------------------------------------------- /docs/faq.rst: -------------------------------------------------------------------------------- 1 | .. _faq: 2 | 3 | FAQ 4 | === 5 | 6 | **Can I create a usable model if I already know the parameters and just want to do inference** 7 | 8 | Yes! Each model allows you to either pass in the parameters, or to leave it uninitialized and fit it directly to data. If you pass in your own parameters you can do inference by calling methods like ``log_probability`` and ``predict``. 9 | 10 | **If I have an initial/pretrained model, can I fine-tune it using pomegranate?** 11 | 12 | Yes! In the same way that you could just do inference after giving it parameters, you can fine-tune those parameters using the built-in fitting functions. You may want to modify the inertia or freeze some of the parameters for fine-tuning. 13 | 14 | **If I have an initial/pretrained model, can I freeze some parameters and fine-tune the remainder?** 15 | 16 | Yes! Do the same as above, but pass in ``frozen=True`` for the model components that you would like to remain frozen. 17 | 18 | **How do I learn a model directly from data?** 19 | 20 | pomegranate v1.0.0 follows the scikit-learn API in the sense that you pass all hyperparameters into the initialization and then fit the parameters using the ``fit`` function. All models allow you to use a signature similar to ``NormalDistribution().fit(X)``. Some models allow you to leave the initialization blank, but most models require at least one parameter, e.g. mixture models requires specifying the distributions and Markov chains require specifying the order. Other optional hyperparameters can be provided to alter the fitting process. the initialization is empty (or requires a few parameters, e.g. Markov chains setting the order. 21 | 22 | **My data set has missing values. Can I use pomegranate?** 23 | 24 | Yes! Almost all algorithms in pomegranate can operate on incomplete data sets. All you need to do is pass in a ``torch.masked.MaskedTensor``, where the missing values are masked out (have a value of ``False``), in place of a normal tensor. 25 | 26 | **How can I use out-of-core learning in pomegranate?** 27 | 28 | Once a model has been initialized the ``summarize`` method can be used on arbitrarily sized chunks of the data to reduce them into their sufficient statistics. These sufficient statistics are additive, meaning that if they are calculated for all chunks of a dataset and then added together they can yield exact updates. Once all chunks have been summarized then ``from_summaries`` is called to update the parameters of the model based on these added sufficient statistics. Out-of-core computing is supported by allowing the user to load up chunks of data from memory, summarize it, discard it, and move on to the next chunk. 29 | 30 | **Does pomegranate support parallelization?** 31 | 32 | Yes! Because pomegranate v1.0.0 is written in PyTorch which is natively multithreaded, all algorithms will use the available threads. See PyTorch documentation for controlling the number of threads to use. 33 | 34 | **Does pomegranate support GPUs?** 35 | 36 | Yes! Again, because pomegranate v1.0.0 is written in PyTorch, every algorithm has GPU support. The speed increase scales with the complexity of the algorithm, with simple probability distributions having approximately a ~2-3x speedup whereas the forward-backward algorithm for hidden Markov models can be up to ~5-10x faster by using a GPU. 37 | 38 | **Does pomegranate support distributed computing?** 39 | 40 | Currently pomegranate is not set up for a distributed environment, though the pieces are currently there to make this possible. 41 | 42 | **How can I cite pomegranate?** 43 | 44 | The research paper that presents pomegranate is: 45 | 46 | *Schreiber, J. (2018). Pomegranate: fast and flexible probabilistic modeling in python. Journal of Machine Learning Research, 18(164), 1-6.* 47 | 48 | which can be downloaded from `JML`_ or from `arXiv`_. 49 | 50 | .. _jml: http://www.jmlr.org/papers/volume18/17-636/17-636.pdf 51 | .. _arxiv: https://arxiv.org/abs/1711.00137 52 | 53 | The paper can be cited as: 54 | :: 55 | 56 | @article{schreiber2018pomegranate, 57 | title={Pomegranate: fast and flexible probabilistic modeling in python}, 58 | author={Schreiber, Jacob}, 59 | journal={Journal of Machine Learning Research}, 60 | volume={18}, 61 | number={164}, 62 | pages={1--6}, 63 | year={2018} 64 | } 65 | 66 | Alternatively, the GitHub repository can be cited as: 67 | :: 68 | 69 | @misc{Schreiber2016, 70 | author = {Jacob Schreiber}, 71 | title = {pomegranate}, 72 | year = {2016}, 73 | publisher = {GitHub}, 74 | journal = {GitHub repository}, 75 | howpublished = {\url{https://github.com/jmschrei/pomegranate}}, 76 | commit = {enter commit that you used} 77 | } 78 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | .. Introduction documentation master file, created by 2 | sphinx-quickstart on Sun Oct 30 18:10:26 2016. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | 7 | .. image:: logo/pomegranate-logo.png 8 | :width: 300px 9 | 10 | | 11 | 12 | .. image:: https://readthedocs.org/projects/pomegranate/badge/?version=latest 13 | :target: http://pomegranate.readthedocs.io/en/latest/?badge=latest 14 | 15 | | 16 | 17 | 18 | Home 19 | ==== 20 | 21 | pomegranate is a Python package that implements fast and flexible probabilistic models ranging from individual probability distributions to compositional models such as Bayesian networks and hidden Markov models. The core philosophy behind pomegranate is that all probabilistic models can be viewed as a probability distribution in that they all yield probability estimates for samples and can be updated given samples and their associated weights. The primary consequence of this view is that the components that are implemented in pomegranate can be stacked more flexibly than other packages. For example, one can build a Gaussian mixture model just as easily as building an exponential or log normal mixture model. But that's not all! One can create a Bayes classifier that uses different types of distributions on each features, perhaps modeling time-associated features using an exponential distribution and counts using a Poisson distribution. Lastly, since these compositional models themselves can be viewed as probability distributions, one can build a mixture of Bayesian networks or a hidden Markov model Bayes' classifier that makes predictions over sequences. 22 | 23 | In addition to a variety of probability distributions and models, pomegranate has a variety of built-in features that are implemented for all of the models. These include different training strategies such as semi-supervised learning, learning with missing values, and mini-batch learning. It also includes support for massive data supports with out-of-core learning, multi-threaded parallelism, and GPU support. 24 | 25 | 26 | Thank You 27 | ========= 28 | 29 | No good project is done alone, and so I'd like to thank all the previous contributors to YAHMM, all the current contributors to pomegranate, and the many graduate students whom I have pestered with ideas and questions. 30 | 31 | Contributions 32 | ============= 33 | 34 | Contributions are eagerly accepted! If you would like to contribute a feature then fork the master branch and be sure to run the tests before changing any code. Let us know what you want to do on the issue tracker just in case we're already working on an implementation of something similar. Also, please don't forget to add tests for any new functions. Please review the `Code of Conduct `_ before contributing. 35 | 36 | .. toctree:: 37 | :maxdepth: 1 38 | :hidden: 39 | :caption: Getting Started 40 | 41 | self 42 | install.rst 43 | api.rst 44 | CODE_OF_CONDUCT.rst 45 | faq.rst 46 | whats_new.rst 47 | 48 | .. toctree:: 49 | :maxdepth: 1 50 | :hidden: 51 | :caption: Features 52 | 53 | tutorials/C_Feature_Tutorial_1_GPU_Usage.ipynb 54 | tutorials/C_Feature_Tutorial_2_Mixed_Precision_and_DataTypes.ipynb 55 | tutorials/C_Feature_Tutorial_3_Out_Of_Core_Learning.ipynb 56 | tutorials/C_Feature_Tutorial_4_Priors_and_Semi-supervised_Learning.ipynb 57 | 58 | .. toctree:: 59 | :maxdepth: 1 60 | :hidden: 61 | :caption: Models 62 | 63 | tutorials/B_Model_Tutorial_1_Distributions.ipynb 64 | tutorials/B_Model_Tutorial_2_General_Mixture_Models.ipynb 65 | tutorials/B_Model_Tutorial_3_Bayes_Classifier.ipynb 66 | tutorials/B_Model_Tutorial_4_Hidden_Markov_Models.ipynb 67 | tutorials/B_Model_Tutorial_5_Markov_Chains.ipynb 68 | tutorials/B_Model_Tutorial_6_Bayesian_Networks.ipynb 69 | tutorials/B_Model_Tutorial_7_Factor_Graphs.ipynb 70 | -------------------------------------------------------------------------------- /docs/install.rst: -------------------------------------------------------------------------------- 1 | .. _install: 2 | 3 | Installation 4 | ============ 5 | 6 | The easiest way to get pomegranate is through pip using the command 7 | 8 | .. code-block:: bash 9 | 10 | pip install pomegranate 11 | 12 | This should install all the dependencies in addition to the package. 13 | 14 | You can also get the bleeding edge from GitHub using the following commands: 15 | 16 | .. code-block:: bash 17 | 18 | git clone https://github.com/jmschrei/pomegranate 19 | cd pomegranate 20 | python setup.py install 21 | 22 | Because pomegranate recently moved to a PyTorch backend, the most complicated installation step now is likely installing that and its CUDA dependencies. Please see the PyTorch documentation for help installing those. 23 | -------------------------------------------------------------------------------- /docs/logo/pomegranate-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/docs/logo/pomegranate-logo.png -------------------------------------------------------------------------------- /docs/logo/pomegranate_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/docs/logo/pomegranate_comparison.png -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.22.2 2 | scipy >= 1.6.2 3 | scikit-learn >= 1.0.2 4 | torch >= 1.9.0 5 | apricot-select >= 0.6.1 6 | networkx >= 2.8.4 7 | pomegranate >= 1.0.0 8 | sphinx-rtd-theme 9 | pandoc 10 | nbsphinx 11 | jinja2==3.1.4 12 | -------------------------------------------------------------------------------- /docs/tutorials/C_Feature_Tutorial_3_Out_Of_Core_Learning.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "692646c3", 6 | "metadata": {}, 7 | "source": [ 8 | "## Out-of-Core Learning" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "id": "79d62d75", 14 | "metadata": {}, 15 | "source": [ 16 | "author: Jacob Schreiber
\n", 17 | "contact: jmschreiber91@gmail.com\n", 18 | "\n", 19 | "Out-of-core learning refers to the process of training a model on an amount of data that cannot fit in memory. There are several approaches that can be described as out-of-core, but here we refer to the ability to derive exact updates to a model from a massive data set, despite not being able to fit the entire thing in memory.\n", 20 | "\n", 21 | "This out-of-core learning approach is implemented for all of pomegranate's models using two methods. The first is a summarize method that will take in a batch of data and reduce it down to additive sufficient statistics. Because these summaries are additive, after the first call, these summaries are added to the previously stored summaries. Once the entire data set has been seen, the stored sufficient statistics will be identical to those that would have been derived if the entire data set had been seen at once. The second method is the from_summaries method, which uses the stored sufficient statistics to derive parameter updates for the model.\n", 22 | "\n", 23 | "A common solution to having too much data is to randomly select an amount of data that does fit in memory to use in the place of the full data set. While simple to implement, this approach is likely to yield lower performance models because it is exposed to less data. However, by using out-of-core learning, on can train their models on a massive amount of data without being limited by the amount of memory their computer has." 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "id": "732d90aa", 30 | "metadata": {}, 31 | "outputs": [ 32 | { 33 | "name": "stdout", 34 | "output_type": "stream", 35 | "text": [ 36 | "Populating the interactive namespace from numpy and matplotlib\n", 37 | "torch : 1.13.0\n", 38 | "pomegranate: 1.0.0\n", 39 | "\n", 40 | "Compiler : GCC 11.2.0\n", 41 | "OS : Linux\n", 42 | "Release : 4.15.0-208-generic\n", 43 | "Machine : x86_64\n", 44 | "Processor : x86_64\n", 45 | "CPU cores : 8\n", 46 | "Architecture: 64bit\n", 47 | "\n" 48 | ] 49 | } 50 | ], 51 | "source": [ 52 | "%pylab inline\n", 53 | "import torch\n", 54 | "\n", 55 | "numpy.random.seed(0)\n", 56 | "numpy.set_printoptions(suppress=True)\n", 57 | "\n", 58 | "%load_ext watermark\n", 59 | "%watermark -m -n -p torch,pomegranate" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "id": "e77be408", 65 | "metadata": {}, 66 | "source": [ 67 | "### `summarize ` and `from_summaries`\n", 68 | "\n", 69 | "Let's start off simple with training a normal distribution in an out-of-core manner. First, we'll generate some random data." 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": 2, 75 | "id": "40c81d88", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [ 79 | "X = torch.randn(1000, 5)" 80 | ] 81 | }, 82 | { 83 | "cell_type": "markdown", 84 | "id": "68782e46", 85 | "metadata": {}, 86 | "source": [ 87 | "Then, we can initialize a distribution." 88 | ] 89 | }, 90 | { 91 | "cell_type": "code", 92 | "execution_count": 3, 93 | "id": "fec969dc", 94 | "metadata": {}, 95 | "outputs": [], 96 | "source": [ 97 | "from pomegranate.distributions import Normal\n", 98 | "\n", 99 | "dist = Normal()" 100 | ] 101 | }, 102 | { 103 | "cell_type": "markdown", 104 | "id": "e18b3d50", 105 | "metadata": {}, 106 | "source": [ 107 | "Now let's summarize through a few batches of data using the `summarize` method." 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 4, 113 | "id": "8d181be6", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "dist.summarize(X[:200])\n", 118 | "dist.summarize(X[200:])" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "id": "6df91e38", 124 | "metadata": {}, 125 | "source": [ 126 | "Importantly, summarizing data doesn't update parameters by itself. Rather, it extracts additive sufficient statistics from the data. Each time `summarize` is called, these statistics are added to the previously aggregated statistics.\n", 127 | "\n", 128 | "In order to update the parameters of the model, you need to call the `from_summaries` method. This method updates the parameters of the model given the stored sufficient statistics." 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 5, 134 | "id": "9cbbe4dc", 135 | "metadata": {}, 136 | "outputs": [ 137 | { 138 | "data": { 139 | "text/plain": [ 140 | "(Parameter containing:\n", 141 | " tensor([ 0.0175, 0.0096, 0.0228, 0.0592, -0.0089]),\n", 142 | " Parameter containing:\n", 143 | " tensor([[ 0.9786, -0.0106, 0.0344, 0.0571, 0.0330],\n", 144 | " [-0.0106, 0.9970, 0.0165, -0.0330, 0.0021],\n", 145 | " [ 0.0344, 0.0165, 0.9405, -0.0075, -0.0374],\n", 146 | " [ 0.0571, -0.0330, -0.0075, 1.0399, 0.0333],\n", 147 | " [ 0.0330, 0.0021, -0.0374, 0.0333, 0.9978]]))" 148 | ] 149 | }, 150 | "execution_count": 5, 151 | "metadata": {}, 152 | "output_type": "execute_result" 153 | } 154 | ], 155 | "source": [ 156 | "dist.from_summaries()\n", 157 | "dist.means, dist.covs" 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "id": "2ae90d8d", 163 | "metadata": {}, 164 | "source": [ 165 | "This update is exactly the same as one would get if they had trained on the entire data set." 166 | ] 167 | }, 168 | { 169 | "cell_type": "code", 170 | "execution_count": 6, 171 | "id": "c33e1a42", 172 | "metadata": {}, 173 | "outputs": [ 174 | { 175 | "data": { 176 | "text/plain": [ 177 | "(Parameter containing:\n", 178 | " tensor([ 0.0175, 0.0096, 0.0228, 0.0592, -0.0089]),\n", 179 | " Parameter containing:\n", 180 | " tensor([[ 0.9786, -0.0106, 0.0344, 0.0571, 0.0330],\n", 181 | " [-0.0106, 0.9970, 0.0165, -0.0330, 0.0021],\n", 182 | " [ 0.0344, 0.0165, 0.9405, -0.0075, -0.0374],\n", 183 | " [ 0.0571, -0.0330, -0.0075, 1.0399, 0.0333],\n", 184 | " [ 0.0330, 0.0021, -0.0374, 0.0333, 0.9978]]))" 185 | ] 186 | }, 187 | "execution_count": 6, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "dist = Normal()\n", 194 | "dist.summarize(X)\n", 195 | "dist.from_summaries()\n", 196 | "dist.means, dist.covs" 197 | ] 198 | }, 199 | { 200 | "cell_type": "markdown", 201 | "id": "9b217107", 202 | "metadata": {}, 203 | "source": [ 204 | "### Batched Training\n", 205 | "\n", 206 | "Sometimes your data is so large that it cannot fit in memory (either CPU or GPU). In these cases, we can use the out-of-core API to train on batches at a time. This is similar to how neural networks are trained except that, rather than updating after each batch (or aggregating gradients over a small number of batches), we can summarize over a much larger number of batches -- potentially even the entire data set to get an exact update. Let's see an example of how that might work." 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 7, 212 | "id": "a6232d3c", 213 | "metadata": {}, 214 | "outputs": [], 215 | "source": [ 216 | "dist = Normal()\n", 217 | "\n", 218 | "for i in range(10):\n", 219 | " X_batch = torch.randn(1000, 20) # This is meant to mimic loading a batch of data\n", 220 | " dist.summarize(X_batch)\n", 221 | " del X_batch # Now we can discard the batch \n", 222 | " \n", 223 | "dist.from_summaries()" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "id": "7c30c9f4", 229 | "metadata": {}, 230 | "source": [ 231 | "Batched training is easy to implement for simple probability distributions but it can also be done with more complicated models if you want to code your own expectation-maximization. For instance, let's try training a mixture model using a modified version of the training code." 232 | ] 233 | }, 234 | { 235 | "cell_type": "code", 236 | "execution_count": 8, 237 | "id": "14012265", 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "[1] Improvement: 1945.53125, Time: 0.01443s\n", 245 | "[2] Improvement: 99.875, Time: 0.01562s\n", 246 | "[3] Improvement: 34.1875, Time: 0.01019s\n", 247 | "[4] Improvement: 17.65625, Time: 0.00994s\n" 248 | ] 249 | } 250 | ], 251 | "source": [ 252 | "from pomegranate.gmm import GeneralMixtureModel\n", 253 | "\n", 254 | "X = torch.randn(10000, 20)\n", 255 | "\n", 256 | "model = GeneralMixtureModel([Normal(), Normal()])\n", 257 | "\n", 258 | "logp = None\n", 259 | "for i in range(5):\n", 260 | " start_time = time.time()\n", 261 | "\n", 262 | " last_logp = logp\n", 263 | " \n", 264 | " logp = 0\n", 265 | " for j in range(0, X.shape[0], 1000): # Train on batches of size 1000\n", 266 | " logp += model.summarize(X[j:j+1000])\n", 267 | "\n", 268 | " if i > 0:\n", 269 | " improvement = logp - last_logp\n", 270 | " duration = time.time() - start_time\n", 271 | " print(\"[{}] Improvement: {}, Time: {:4.4}s\".format(i, improvement, duration))\n", 272 | "\n", 273 | " model.from_summaries()" 274 | ] 275 | } 276 | ], 277 | "metadata": { 278 | "kernelspec": { 279 | "display_name": "Python 3 (ipykernel)", 280 | "language": "python", 281 | "name": "python3" 282 | }, 283 | "language_info": { 284 | "codemirror_mode": { 285 | "name": "ipython", 286 | "version": 3 287 | }, 288 | "file_extension": ".py", 289 | "mimetype": "text/x-python", 290 | "name": "python", 291 | "nbconvert_exporter": "python", 292 | "pygments_lexer": "ipython3", 293 | "version": "3.9.13" 294 | } 295 | }, 296 | "nbformat": 4, 297 | "nbformat_minor": 5 298 | } 299 | -------------------------------------------------------------------------------- /examples/Bayesian_Network_Monty_Hall.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "473b0cb3", 6 | "metadata": {}, 7 | "source": [ 8 | "## The Monty Hall Problem\n", 9 | "\n", 10 | "The Monty Hall problem arose from the gameshow Let's Make a Deal, where a guest had to choose which one of three doors had a prize behind it. The twist was that after the guest chose, the host, originally Monty Hall, would then open one of the doors the guest did not pick that also did not have the prize behind it. Afterwards, Monty would ask if the guest wanted to switch which door they had picked. Initial inspection may lead you to believe that if there are only two doors left there is a 50-50 chance of you picking the right one, and so there is no advantage one way or the other. However, it has been proven both through simulations and analytically that there is in fact a 66% chance of getting the prize if the guest switches their door after Monty opens one, regardless of the door they initially went with." 11 | ] 12 | }, 13 | { 14 | "cell_type": "code", 15 | "execution_count": 1, 16 | "id": "76bedfe3", 17 | "metadata": {}, 18 | "outputs": [ 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Populating the interactive namespace from numpy and matplotlib\n", 24 | "torch : 1.13.0\n", 25 | "torchegranate: 0.4.0\n", 26 | "\n", 27 | "Compiler : GCC 11.2.0\n", 28 | "OS : Linux\n", 29 | "Release : 4.15.0-206-generic\n", 30 | "Machine : x86_64\n", 31 | "Processor : x86_64\n", 32 | "CPU cores : 8\n", 33 | "Architecture: 64bit\n", 34 | "\n" 35 | ] 36 | } 37 | ], 38 | "source": [ 39 | "%pylab inline\n", 40 | "import seaborn; seaborn.set_style('whitegrid')\n", 41 | "import torch\n", 42 | "\n", 43 | "%load_ext watermark\n", 44 | "%watermark -m -n -p torch,torchegranate" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "id": "8dccea1d", 50 | "metadata": {}, 51 | "source": [ 52 | "We can reproduce this result in pomegranate using Bayesian networks with three nodes, one for the guest, one for the prize, and one for the door Monty chooses to open. The door the guest initially chooses and the door the prize is behind are completely random processes across the three doors, but the door which Monty opens is dependent on both the door the guest chooses (it cannot be the door the guest chooses), and the door the prize is behind (it cannot be the door with the prize behind it).\n", 53 | "\n", 54 | "To create the Bayesian network in pomegranate, we first create the distributions which live in each node in the graph. For a categorical bayesian network we use Categorical distributions for the root nodes and ConditionalCategorical distributions for the inner and leaf nodes. \n", 55 | "\n", 56 | "First, we can create our \"prize\" and \"guest\" distributions. These are each Categorical distributions because they do not depend on anything, and they are uniform distributions because they are equally likely to be any of the three doors." 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": 2, 62 | "id": "ce8a68c4", 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "from torchegranate.distributions import Categorical\n", 67 | "\n", 68 | "guest = Categorical([[1./3, 1./3, 1./3]])\n", 69 | "prize = Categorical([[1./3, 1./3, 1./3]])" 70 | ] 71 | }, 72 | { 73 | "cell_type": "markdown", 74 | "id": "369339f5", 75 | "metadata": {}, 76 | "source": [ 77 | "You may notice that there is an additional dimension added to the probabilities. This is because all distributions in pomegranate have the potential to be multivariate even when being applied to univariate problems.\n", 78 | "\n", 79 | "Next, we need to create the conditional distribution describing the door that Monty will open. Because Monty can only open a door that is not selected by the contestant and also does not have the prize, sometimes this leaves Monty with only one door that can be opened. Overall, the distribution is a 3x3x3 tensor, with three possibilities from the guest, three independent possibilities from the prize, and three possible doors to open." 80 | ] 81 | }, 82 | { 83 | "cell_type": "code", 84 | "execution_count": 3, 85 | "id": "2b303ce6", 86 | "metadata": {}, 87 | "outputs": [], 88 | "source": [ 89 | "from torchegranate.distributions import ConditionalCategorical\n", 90 | "\n", 91 | "probs = numpy.array([[\n", 92 | " [[0.0, 0.5, 0.5], [0.0, 0.0, 1.0], [0.0, 1.0, 0.0]], \n", 93 | " [[0.0, 0.0, 1.0], [0.5, 0.0, 0.5], [1.0, 0.0, 0.0]],\n", 94 | " [[0.0, 1.0, 0.0], [1.0, 0.0, 0.0], [0.5, 0.5, 0.0]]\n", 95 | "]])\n", 96 | "\n", 97 | "monty = ConditionalCategorical(probs) " 98 | ] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "id": "f693f22a", 103 | "metadata": {}, 104 | "source": [ 105 | "Next, we can create the Bayesian network object in just one line by passing in the distribution objects and edges in the form of (parent, child) tuples. Previous versions of pomegranate required that you create State or Node objects and add them in using `add_edge` and `add_node` methods. State and Node objects no longer exist, and while those methods still exist if you would prefer to use them you no longer need to. The `bake` method has also been removed and is no longer required." 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": 6, 111 | "id": "cdd24ba0", 112 | "metadata": {}, 113 | "outputs": [], 114 | "source": [ 115 | "from torchegranate.bayesian_network import BayesianNetwork\n", 116 | "\n", 117 | "model = BayesianNetwork([guest, prize, monty], [(guest, monty), (prize, monty)])" 118 | ] 119 | }, 120 | { 121 | "cell_type": "code", 122 | "execution_count": 9, 123 | "id": "ec59baca", 124 | "metadata": {}, 125 | "outputs": [ 126 | { 127 | "data": { 128 | "text/plain": [ 129 | "tensor([[0, 1, 2],\n", 130 | " [0, 2, 1],\n", 131 | " [2, 1, 0]])" 132 | ] 133 | }, 134 | "execution_count": 9, 135 | "metadata": {}, 136 | "output_type": "execute_result" 137 | } 138 | ], 139 | "source": [ 140 | "X = torch.tensor([[0, 1, -1],\n", 141 | " [0, 2, -1],\n", 142 | " [2, 1, -1]])\n", 143 | "\n", 144 | "X_masked = torch.masked.MaskedTensor(X, mask=X >= 0)\n", 145 | "\n", 146 | "\n", 147 | "model.predict(X_masked)" 148 | ] 149 | }, 150 | { 151 | "cell_type": "code", 152 | "execution_count": 16, 153 | "id": "dac5d471", 154 | "metadata": {}, 155 | "outputs": [ 156 | { 157 | "data": { 158 | "text/plain": [ 159 | "Parameter containing:\n", 160 | "tensor([1.6111])" 161 | ] 162 | }, 163 | "execution_count": 16, 164 | "metadata": {}, 165 | "output_type": "execute_result" 166 | } 167 | ], 168 | "source": [ 169 | "from torchegranate.distributions import Exponential\n", 170 | "\n", 171 | "X = torch.exp(torch.randn(100, 1))\n", 172 | "mask = torch.ones(100, 1, dtype=bool)\n", 173 | "mask[75:] = False\n", 174 | "X_masked = torch.masked.MaskedTensor(X, mask=mask)\n", 175 | "\n", 176 | "Exponential().fit(X[:75]).scales" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 17, 182 | "id": "6169a52f", 183 | "metadata": {}, 184 | "outputs": [ 185 | { 186 | "data": { 187 | "text/plain": [ 188 | "Parameter containing:\n", 189 | "tensor([1.6111])" 190 | ] 191 | }, 192 | "execution_count": 17, 193 | "metadata": {}, 194 | "output_type": "execute_result" 195 | } 196 | ], 197 | "source": [ 198 | "Exponential().fit(X_masked).scales" 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": null, 204 | "id": "1ea963c8", 205 | "metadata": {}, 206 | "outputs": [], 207 | "source": [] 208 | } 209 | ], 210 | "metadata": { 211 | "kernelspec": { 212 | "display_name": "Python 3 (ipykernel)", 213 | "language": "python", 214 | "name": "python3" 215 | }, 216 | "language_info": { 217 | "codemirror_mode": { 218 | "name": "ipython", 219 | "version": 3 220 | }, 221 | "file_extension": ".py", 222 | "mimetype": "text/x-python", 223 | "name": "python", 224 | "nbconvert_exporter": "python", 225 | "pygments_lexer": "ipython3", 226 | "version": "3.9.13" 227 | } 228 | }, 229 | "nbformat": 4, 230 | "nbformat_minor": 5 231 | } 232 | -------------------------------------------------------------------------------- /pomegranate/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.1.2" 2 | -------------------------------------------------------------------------------- /pomegranate/_bayes.py: -------------------------------------------------------------------------------- 1 | # _bayes.py 2 | # Author: Jacob Schreiber 3 | 4 | 5 | import torch 6 | 7 | from ._utils import _cast_as_tensor 8 | from ._utils import _update_parameter 9 | from ._utils import _check_parameter 10 | from ._utils import _reshape_weights 11 | 12 | from .distributions._distribution import Distribution 13 | 14 | 15 | class BayesMixin(torch.nn.Module): 16 | def _reset_cache(self): 17 | """Reset the internally stored statistics. 18 | 19 | This method is meant to only be called internally. It resets the 20 | stored statistics used to update the model parameters as well as 21 | recalculates the cached values meant to speed up log probability 22 | calculations. 23 | """ 24 | 25 | if self._initialized == False: 26 | return 27 | 28 | self.register_buffer("_w_sum", torch.zeros(self.k, device=self.device)) 29 | self.register_buffer("_log_priors", torch.log(self.priors)) 30 | 31 | def _emission_matrix(self, X, priors=None): 32 | """Return the emission/responsibility matrix. 33 | 34 | This method returns the log probability of each example under each 35 | distribution contained in the model with the log prior probability 36 | of each component added. 37 | 38 | 39 | Parameters 40 | ---------- 41 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 42 | A set of examples to evaluate. 43 | 44 | priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k) 45 | Prior probabilities of assigning each symbol to each node. If not 46 | provided, do not include in the calculations (conceptually 47 | equivalent to a uniform probability, but without scaling the 48 | probabilities). This can be used to assign labels to observatons 49 | by setting one of the probabilities for an observation to 1.0. 50 | Note that this can be used to assign hard labels, but does not 51 | have the same semantics for soft labels, in that it only 52 | influences the initial estimate of an observation being generated 53 | by a component, not gives a target. Default is None. 54 | 55 | 56 | Returns 57 | ------- 58 | e: torch.Tensor, shape=(-1, self.k) 59 | A set of log probabilities for each example under each distribution. 60 | """ 61 | 62 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 63 | shape=(-1, self.d), check_parameter=self.check_data) 64 | 65 | priors = _check_parameter(_cast_as_tensor(priors), "priors", 66 | ndim=2, shape=(X.shape[0], self.k), min_value=0.0, max_value=1.0, 67 | value_sum=1.0, value_sum_dim=-1, check_parameter=self.check_data) 68 | 69 | d = X.shape[0] 70 | e = torch.empty(d, self.k, device=self.device, dtype=self.dtype) 71 | for i, d in enumerate(self.distributions): 72 | e[:, i] = d.log_probability(X) 73 | 74 | if priors is not None: 75 | e += torch.log(priors) 76 | 77 | return e + self._log_priors 78 | 79 | def probability(self, X, priors=None): 80 | """Calculate the probability of each example. 81 | 82 | This method calculates the probability of each example given the 83 | parameters of the distribution. The examples must be given in a 2D 84 | format. 85 | 86 | Note: This differs from some other probability calculation 87 | functions, like those in torch.distributions, because it is not 88 | returning the probability of each feature independently, but rather 89 | the total probability of the entire example. 90 | 91 | 92 | Parameters 93 | ---------- 94 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 95 | A set of examples to evaluate. 96 | 97 | priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k) 98 | Prior probabilities of assigning each symbol to each node. If not 99 | provided, do not include in the calculations (conceptually 100 | equivalent to a uniform probability, but without scaling the 101 | probabilities). This can be used to assign labels to observatons 102 | by setting one of the probabilities for an observation to 1.0. 103 | Note that this can be used to assign hard labels, but does not 104 | have the same semantics for soft labels, in that it only 105 | influences the initial estimate of an observation being generated 106 | by a component, not gives a target. Default is None. 107 | 108 | 109 | Returns 110 | ------- 111 | prob: torch.Tensor, shape=(-1,) 112 | The probability of each example. 113 | """ 114 | 115 | return torch.exp(self.log_probability(X, priors=priors)) 116 | 117 | def log_probability(self, X, priors=None): 118 | """Calculate the log probability of each example. 119 | 120 | This method calculates the log probability of each example given the 121 | parameters of the distribution. The examples must be given in a 2D 122 | format. For a Bernoulli distribution, each entry in the data must 123 | be either 0 or 1. 124 | 125 | Note: This differs from some other log probability calculation 126 | functions, like those in torch.distributions, because it is not 127 | returning the log probability of each feature independently, but rather 128 | the total log probability of the entire example. 129 | 130 | 131 | Parameters 132 | ---------- 133 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 134 | A set of examples to evaluate. 135 | 136 | priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k) 137 | Prior probabilities of assigning each symbol to each node. If not 138 | provided, do not include in the calculations (conceptually 139 | equivalent to a uniform probability, but without scaling the 140 | probabilities). This can be used to assign labels to observatons 141 | by setting one of the probabilities for an observation to 1.0. 142 | Note that this can be used to assign hard labels, but does not 143 | have the same semantics for soft labels, in that it only 144 | influences the initial estimate of an observation being generated 145 | by a component, not gives a target. Default is None. 146 | 147 | 148 | Returns 149 | ------- 150 | logp: torch.Tensor, shape=(-1,) 151 | The log probability of each example. 152 | """ 153 | 154 | e = self._emission_matrix(X, priors=priors) 155 | return torch.logsumexp(e, dim=1) 156 | 157 | def predict(self, X, priors=None): 158 | """Calculate the label assignment for each example. 159 | 160 | This method calculates the label for each example as the most likely 161 | component after factoring in the prior probability. 162 | 163 | 164 | Parameters 165 | ---------- 166 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 167 | A set of examples to summarize. 168 | 169 | priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k) 170 | Prior probabilities of assigning each symbol to each node. If not 171 | provided, do not include in the calculations (conceptually 172 | equivalent to a uniform probability, but without scaling the 173 | probabilities). This can be used to assign labels to observatons 174 | by setting one of the probabilities for an observation to 1.0. 175 | Note that this can be used to assign hard labels, but does not 176 | have the same semantics for soft labels, in that it only 177 | influences the initial estimate of an observation being generated 178 | by a component, not gives a target. Default is None. 179 | 180 | 181 | Returns 182 | ------- 183 | y: torch.Tensor, shape=(-1,) 184 | The predicted label for each example. 185 | """ 186 | 187 | e = self._emission_matrix(X, priors=priors) 188 | return torch.argmax(e, dim=1) 189 | 190 | def predict_proba(self, X, priors=None): 191 | """Calculate the posterior probabilities for each example. 192 | 193 | This method calculates the posterior probabilities for each example 194 | under each component of the model after factoring in the prior 195 | probability and normalizing across all the components. 196 | 197 | 198 | Parameters 199 | ---------- 200 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 201 | A set of examples to summarize. 202 | 203 | priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k) 204 | Prior probabilities of assigning each symbol to each node. If not 205 | provided, do not include in the calculations (conceptually 206 | equivalent to a uniform probability, but without scaling the 207 | probabilities). This can be used to assign labels to observatons 208 | by setting one of the probabilities for an observation to 1.0. 209 | Note that this can be used to assign hard labels, but does not 210 | have the same semantics for soft labels, in that it only 211 | influences the initial estimate of an observation being generated 212 | by a component, not gives a target. Default is None. 213 | 214 | 215 | Returns 216 | ------- 217 | y: torch.Tensor, shape=(-1, self.k) 218 | The posterior probabilities for each example under each component. 219 | """ 220 | 221 | e = self._emission_matrix(X, priors=priors) 222 | return torch.exp(e - torch.logsumexp(e, dim=1, keepdims=True)) 223 | 224 | def predict_log_proba(self, X, priors=None): 225 | """Calculate the log posterior probabilities for each example. 226 | 227 | This method calculates the log posterior probabilities for each example 228 | under each component of the model after factoring in the prior 229 | probability and normalizing across all the components. 230 | 231 | 232 | Parameters 233 | ---------- 234 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 235 | A set of examples to summarize. 236 | 237 | priors: list, numpy.ndarray, torch.Tensor, shape=(-1, self.k) 238 | Prior probabilities of assigning each symbol to each node. If not 239 | provided, do not include in the calculations (conceptually 240 | equivalent to a uniform probability, but without scaling the 241 | probabilities). This can be used to assign labels to observatons 242 | by setting one of the probabilities for an observation to 1.0. 243 | Note that this can be used to assign hard labels, but does not 244 | have the same semantics for soft labels, in that it only 245 | influences the initial estimate of an observation being generated 246 | by a component, not gives a target. Default is None. 247 | 248 | 249 | Returns 250 | ------- 251 | y: torch.Tensor, shape=(-1, self.k) 252 | The log posterior probabilities for each example under each 253 | component. 254 | """ 255 | 256 | e = self._emission_matrix(X, priors=priors) 257 | return e - torch.logsumexp(e, dim=1, keepdims=True) 258 | 259 | def from_summaries(self): 260 | """Update the model parameters given the extracted statistics. 261 | 262 | This method uses calculated statistics from calls to the `summarize` 263 | method to update the distribution parameters. Hyperparameters for the 264 | update are passed in at initialization time. 265 | 266 | Note: Internally, a call to `fit` is just a successive call to the 267 | `summarize` method followed by the `from_summaries` method. 268 | """ 269 | 270 | for d in self.distributions: 271 | d.from_summaries() 272 | 273 | if self.frozen == True: 274 | return 275 | 276 | priors = self._w_sum / torch.sum(self._w_sum) 277 | 278 | _update_parameter(self.priors, priors, self.inertia) 279 | self._reset_cache() 280 | -------------------------------------------------------------------------------- /pomegranate/bayes_classifier.py: -------------------------------------------------------------------------------- 1 | # BayesClassifier.py 2 | # Author: Jacob Schreiber 3 | 4 | import numpy 5 | import torch 6 | 7 | from ._utils import _cast_as_tensor 8 | from ._utils import _cast_as_parameter 9 | from ._utils import _update_parameter 10 | from ._utils import _check_parameter 11 | from ._utils import _reshape_weights 12 | 13 | from ._bayes import BayesMixin 14 | 15 | from .distributions._distribution import Distribution 16 | 17 | 18 | class BayesClassifier(BayesMixin, Distribution): 19 | """A Bayes classifier object. 20 | 21 | A simple way to produce a classifier using probabilistic models is to plug 22 | them into Bayes' rule. Basically, inference is the same as the 'E' step in 23 | EM for mixture models. However, fitting can be significantly faster because 24 | instead of having to iteratively infer labels and learn parameters, you can 25 | just learn the parameters given the known labels. Because the learning step 26 | for most models are simple MLE estimates, this can be done extremely 27 | quickly. 28 | 29 | Although the most common distribution to use is a Gaussian with a diagonal 30 | covariance matrix, termed the Gaussian naive Bayes model, any probability 31 | distribution can be used. Here, you can just drop any distributions or 32 | probabilistic model in as long as it has the `log_probability`, `summarize`, 33 | and `from_samples` methods implemented. 34 | 35 | Further, the probabilistic models do not even need to be simple 36 | distributions. The distributions can be mixture models or hidden Markov 37 | models or Bayesian networks. 38 | 39 | 40 | Parameters 41 | ---------- 42 | distributions: tuple or list 43 | A set of distribution objects. These objects do not need to be 44 | initialized, i.e., can be "Normal()". 45 | 46 | priors: tuple, numpy.ndarray, torch.Tensor, or None. shape=(k,), optional 47 | The prior probabilities over the given distributions. Default is None. 48 | 49 | inertia: float, [0, 1], optional 50 | Indicates the proportion of the update to apply to the parameters 51 | during training. When the inertia is 0.0, the update is applied in 52 | its entirety and the previous parameters are ignored. When the 53 | inertia is 1.0, the update is entirely ignored and the previous 54 | parameters are kept, equivalently to if the parameters were frozen. 55 | 56 | frozen: bool, optional 57 | Whether all the parameters associated with this distribution are frozen. 58 | If you want to freeze individual pameters, or individual values in those 59 | parameters, you must modify the `frozen` attribute of the tensor or 60 | parameter directly. Default is False. 61 | 62 | check_data: bool, optional 63 | Whether to check properties of the data and potentially recast it to 64 | torch.tensors. This does not prevent checking of parameters but can 65 | slightly speed up computation when you know that your inputs are valid. 66 | Setting this to False is also necessary for compiling. Default is True. 67 | """ 68 | 69 | def __init__(self, distributions, priors=None, inertia=0.0, frozen=False, 70 | check_data=True): 71 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 72 | self.name = "BayesClassifier" 73 | 74 | _check_parameter(distributions, "distributions", dtypes=(list, tuple, 75 | numpy.array, torch.nn.ModuleList)) 76 | self.distributions = torch.nn.ModuleList(distributions) 77 | 78 | self.priors = _check_parameter(_cast_as_parameter(priors), "priors", 79 | min_value=0, max_value=1, ndim=1, value_sum=1.0, 80 | shape=(len(distributions),)) 81 | 82 | self.k = len(distributions) 83 | 84 | if all(d._initialized for d in distributions): 85 | self._initialized = True 86 | self.d = distributions[0].d 87 | if self.priors is None: 88 | self.priors = _cast_as_parameter(torch.ones(self.k) / self.k) 89 | 90 | else: 91 | self._initialized = False 92 | self.d = None 93 | 94 | self._reset_cache() 95 | 96 | def _initialize(self, d): 97 | """Initialize the probability distribution. 98 | 99 | This method is meant to only be called internally. It initializes the 100 | parameters of the distribution and stores its dimensionality. For more 101 | complex methods, this function will do more. 102 | 103 | 104 | Parameters 105 | ---------- 106 | d: int 107 | The dimensionality the distribution is being initialized to. 108 | """ 109 | 110 | self.priors = _cast_as_parameter(torch.ones(self.k, dtype=self.dtype, 111 | device=self.device) / self.k) 112 | 113 | self._initialized = True 114 | super()._initialize(d) 115 | 116 | def fit(self, X, y, sample_weight=None): 117 | """Fit the model to optionally weighted examples. 118 | 119 | This method implements the core of the learning process. For a 120 | general Bayes model, this involves fitting each component of the model 121 | using the labels that are provided. 122 | 123 | This method is largely a wrapper around the `summarize` and 124 | `from_summaries` methods. It's primary contribution is serving as a 125 | loop around these functions and to monitor convergence. 126 | 127 | 128 | Parameters 129 | ---------- 130 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 131 | A set of examples to evaluate. 132 | 133 | y: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1,) 134 | A set of labels, one per example. 135 | 136 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 137 | A set of weights for the examples. This can be either of shape 138 | (-1, self.d) or a vector of shape (-1,). Default is ones. 139 | 140 | 141 | Returns 142 | ------- 143 | self 144 | """ 145 | 146 | self.summarize(X, y, sample_weight=sample_weight) 147 | self.from_summaries() 148 | return self 149 | 150 | def summarize(self, X, y, sample_weight=None): 151 | """Extract the sufficient statistics from a batch of data. 152 | 153 | This method calculates the sufficient statistics from optionally 154 | weighted data and adds them to the stored cache. The examples must be 155 | given in a 2D format. Sample weights can either be provided as one 156 | value per example or as a 2D matrix of weights for each feature in 157 | each example. 158 | 159 | For a Bayes' classifier, this step involves partitioning the data 160 | according to the labels and then training each component using MLE 161 | estimates. 162 | 163 | 164 | Parameters 165 | ---------- 166 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 167 | A set of examples to summarize. 168 | 169 | y: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1,) 170 | A set of labels, one per example. 171 | 172 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 173 | A set of weights for the examples. This can be either of shape 174 | (-1, self.d) or a vector of shape (-1,). Default is ones. 175 | """ 176 | 177 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 178 | y = _check_parameter(_cast_as_tensor(y), "y", min_value=0, 179 | max_value=self.k-1, ndim=1, shape=(len(X),), 180 | check_parameter=self.check_data) 181 | sample_weight = _check_parameter(sample_weight, "sample_weight", 182 | min_value=0, shape=(-1, self.d), check_parameter=self.check_data) 183 | 184 | for j, d in enumerate(self.distributions): 185 | idx = y == j 186 | d.summarize(X[idx], sample_weight[idx]) 187 | 188 | if self.frozen == False: 189 | self._w_sum[j] = self._w_sum[j] + sample_weight[idx].mean( 190 | dim=-1).sum() 191 | -------------------------------------------------------------------------------- /pomegranate/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | from .bernoulli import Bernoulli 2 | from .categorical import Categorical 3 | from .conditional_categorical import ConditionalCategorical 4 | from .dirac_delta import DiracDelta 5 | from .exponential import Exponential 6 | from .gamma import Gamma 7 | from .independent_components import IndependentComponents 8 | from .joint_categorical import JointCategorical 9 | from .normal import Normal 10 | from .poisson import Poisson 11 | from .student_t import StudentT 12 | from .uniform import Uniform 13 | from .zero_inflated import ZeroInflated 14 | from .lognormal import LogNormal 15 | from .halfnormal import HalfNormal -------------------------------------------------------------------------------- /pomegranate/distributions/_distribution.py: -------------------------------------------------------------------------------- 1 | # _distribution.py 2 | # Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _update_parameter 8 | from .._utils import _cast_as_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import _reshape_weights 11 | 12 | 13 | class Distribution(torch.nn.Module): 14 | """A base distribution object. 15 | 16 | This distribution is inherited by all the other distributions. 17 | """ 18 | 19 | def __init__(self, inertia, frozen, check_data): 20 | super(Distribution, self).__init__() 21 | self._device = _cast_as_parameter([0.0]) 22 | 23 | _check_parameter(inertia, "inertia", min_value=0, max_value=1, ndim=0) 24 | _check_parameter(frozen, "frozen", value_set=[True, False], ndim=0) 25 | _check_parameter(check_data, "check_data", value_set=[True, False], 26 | ndim=0) 27 | 28 | self.register_buffer("inertia", _cast_as_tensor(inertia)) 29 | self.register_buffer("frozen", _cast_as_tensor(frozen)) 30 | self.register_buffer("check_data", _cast_as_tensor(check_data)) 31 | 32 | self._initialized = False 33 | 34 | @property 35 | def device(self): 36 | try: 37 | return next(self.parameters()).device 38 | except: 39 | return 'cpu' 40 | 41 | @property 42 | def dtype(self): 43 | return next(self.parameters()).dtype 44 | 45 | def freeze(self): 46 | self.register_buffer("frozen", _cast_as_tensor(True)) 47 | return self 48 | 49 | def unfreeze(self): 50 | self.register_buffer("frozen", _cast_as_tensor(False)) 51 | return self 52 | 53 | def forward(self, X): 54 | self.summarize(X) 55 | return self.log_probability(X) 56 | 57 | def backward(self, X): 58 | self.from_summaries() 59 | return X 60 | 61 | def _initialize(self, d): 62 | self.d = d 63 | self._reset_cache() 64 | 65 | def _reset_cache(self): 66 | raise NotImplementedError 67 | 68 | def probability(self, X): 69 | return torch.exp(self.log_probability(X)) 70 | 71 | def log_probability(self, X): 72 | raise NotImplementedError 73 | 74 | def fit(self, X, sample_weight=None): 75 | self.summarize(X, sample_weight=sample_weight) 76 | self.from_summaries() 77 | return self 78 | 79 | def summarize(self, X, sample_weight=None): 80 | if not self._initialized: 81 | self._initialize(len(X[0])) 82 | 83 | X = _cast_as_tensor(X) 84 | _check_parameter(X, "X", ndim=2, shape=(-1, self.d), 85 | check_parameter=self.check_data) 86 | 87 | sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight), 88 | device=self.device) 89 | 90 | return X, sample_weight 91 | 92 | def from_summaries(self): 93 | raise NotImplementedError 94 | 95 | 96 | class ConditionalDistribution(Distribution): 97 | def __init__(self, inertia, frozen, check_data): 98 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 99 | 100 | def marginal(self, dim): 101 | raise NotImplementedError -------------------------------------------------------------------------------- /pomegranate/distributions/bernoulli.py: -------------------------------------------------------------------------------- 1 | # bernoulli.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import eps 11 | 12 | from ._distribution import Distribution 13 | 14 | 15 | class Bernoulli(Distribution): 16 | """A Bernoulli distribution object. 17 | 18 | A Bernoulli distribution models the probability of a binary variable 19 | occurring. rates of discrete events, and has a probability parameter 20 | describing this value. This distribution assumes that each feature is 21 | independent of the others. 22 | 23 | There are two ways to initialize this object. The first is to pass in 24 | the tensor of probability parameters, at which point they can immediately be 25 | used. The second is to not pass in the rate parameters and then call 26 | either `fit` or `summary` + `from_summaries`, at which point the probability 27 | parameter will be learned from data. 28 | 29 | 30 | Parameters 31 | ---------- 32 | probs: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 33 | The probability parameters for each feature. Default is None. 34 | 35 | inertia: float, [0, 1], optional 36 | Indicates the proportion of the update to apply to the parameters 37 | during training. When the inertia is 0.0, the update is applied in 38 | its entirety and the previous parameters are ignored. When the 39 | inertia is 1.0, the update is entirely ignored and the previous 40 | parameters are kept, equivalently to if the parameters were frozen. 41 | 42 | frozen: bool, optional 43 | Whether all the parameters associated with this distribution are frozen. 44 | If you want to freeze individual pameters, or individual values in those 45 | parameters, you must modify the `frozen` attribute of the tensor or 46 | parameter directly. Default is False. 47 | 48 | check_data: bool, optional 49 | Whether to check properties of the data and potentially recast it to 50 | torch.tensors. This does not prevent checking of parameters but can 51 | slightly speed up computation when you know that your inputs are valid. 52 | Setting this to False is also necessary for compiling. 53 | """ 54 | 55 | def __init__(self, probs=None, inertia=0.0, frozen=False, check_data=True): 56 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 57 | self.name = "Bernoulli" 58 | 59 | self.probs = _check_parameter(_cast_as_parameter(probs), "probs", 60 | min_value=eps, max_value=1-eps, ndim=1) 61 | 62 | self._initialized = self.probs is not None 63 | self.d = self.probs.shape[-1] if self._initialized else None 64 | self._reset_cache() 65 | 66 | def _initialize(self, d): 67 | """Initialize the probability distribution. 68 | 69 | This method is meant to only be called internally. It initializes the 70 | parameters of the distribution and stores its dimensionality. For more 71 | complex methods, this function will do more. 72 | 73 | 74 | Parameters 75 | ---------- 76 | d: int 77 | The dimensionality the distribution is being initialized to. 78 | """ 79 | 80 | self.probs = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 81 | device=self.device)) 82 | 83 | self._initialized = True 84 | super()._initialize(d) 85 | 86 | def _reset_cache(self): 87 | """Reset the internally stored statistics. 88 | 89 | This method is meant to only be called internally. It resets the 90 | stored statistics used to update the model parameters as well as 91 | recalculates the cached values meant to speed up log probability 92 | calculations. 93 | """ 94 | 95 | if self._initialized == False: 96 | return 97 | 98 | self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device)) 99 | self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device)) 100 | 101 | self.register_buffer("_log_probs", torch.log(self.probs)) 102 | self.register_buffer("_log_inv_probs", torch.log(-(self.probs-1))) 103 | 104 | def sample(self, n): 105 | """Sample from the probability distribution. 106 | 107 | This method will return `n` samples generated from the underlying 108 | probability distribution. 109 | 110 | 111 | Parameters 112 | ---------- 113 | n: int 114 | The number of samples to generate. 115 | 116 | 117 | Returns 118 | ------- 119 | X: torch.tensor, shape=(n, self.d) 120 | Randomly generated samples. 121 | """ 122 | 123 | return torch.distributions.Bernoulli(self.probs).sample([n]) 124 | 125 | def log_probability(self, X): 126 | """Calculate the log probability of each example. 127 | 128 | This method calculates the log probability of each example given the 129 | parameters of the distribution. The examples must be given in a 2D 130 | format. For a Bernoulli distribution, each entry in the data must 131 | be either 0 or 1. 132 | 133 | Note: This differs from some other log probability calculation 134 | functions, like those in torch.distributions, because it is not 135 | returning the log probability of each feature independently, but rather 136 | the total log probability of the entire example. 137 | 138 | 139 | Parameters 140 | ---------- 141 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 142 | A set of examples to evaluate. 143 | 144 | 145 | Returns 146 | ------- 147 | logp: torch.Tensor, shape=(-1,) 148 | The log probability of each example. 149 | """ 150 | 151 | X = _check_parameter(_cast_as_tensor(X, dtype=self.probs.dtype), "X", 152 | value_set=(0, 1), ndim=2, shape=(-1, self.d), 153 | check_parameter=self.check_data) 154 | 155 | return X.matmul(self._log_probs) + (1-X).matmul(self._log_inv_probs) 156 | 157 | def summarize(self, X, sample_weight=None): 158 | """Extract the sufficient statistics from a batch of data. 159 | 160 | This method calculates the sufficient statistics from optionally 161 | weighted data and adds them to the stored cache. The examples must be 162 | given in a 2D format. Sample weights can either be provided as one 163 | value per example or as a 2D matrix of weights for each feature in 164 | each example. 165 | 166 | 167 | Parameters 168 | ---------- 169 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 170 | A set of examples to summarize. 171 | 172 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 173 | A set of weights for the examples. This can be either of shape 174 | (-1, self.d) or a vector of shape (-1,). Default is ones. 175 | """ 176 | 177 | if self.frozen == True: 178 | return 179 | 180 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 181 | _check_parameter(X, "X", value_set=(0, 1), 182 | check_parameter=self.check_data) 183 | 184 | self._w_sum += torch.sum(sample_weight, dim=0) 185 | self._xw_sum += torch.sum(X * sample_weight, dim=0) 186 | 187 | def from_summaries(self): 188 | """Update the model parameters given the extracted statistics. 189 | 190 | This method uses calculated statistics from calls to the `summarize` 191 | method to update the distribution parameters. Hyperparameters for the 192 | update are passed in at initialization time. 193 | 194 | Note: Internally, a call to `fit` is just a successive call to the 195 | `summarize` method followed by the `from_summaries` method. 196 | """ 197 | 198 | if self.frozen == True: 199 | return 200 | 201 | probs = self._xw_sum / self._w_sum 202 | _update_parameter(self.probs, probs, self.inertia) 203 | self._reset_cache() 204 | -------------------------------------------------------------------------------- /pomegranate/distributions/categorical.py: -------------------------------------------------------------------------------- 1 | # categorical.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _inplace_add 7 | from .._utils import _cast_as_tensor 8 | from .._utils import _cast_as_parameter 9 | from .._utils import _update_parameter 10 | from .._utils import _check_parameter 11 | from .._utils import _reshape_weights 12 | 13 | from ._distribution import Distribution 14 | 15 | 16 | class Categorical(Distribution): 17 | """A categorical distribution object. 18 | 19 | A categorical distribution models the probability of a set of distinct 20 | values happening. It is an extension of the Bernoulli distribution to 21 | multiple values. Sometimes it is referred to as a discrete distribution, 22 | but this distribution does not enforce that the numeric values used for the 23 | keys have any relationship based on their identity. Permuting the keys will 24 | have no effect on the calculation. This distribution assumes that the 25 | features are independent from each other. 26 | 27 | The keys must be contiguous non-negative integers that begin at zero. 28 | Because the probabilities are represented as a single tensor, each feature 29 | must have values for all keys up to the maximum key of any one distribution. 30 | Specifically, if one feature has 10 keys and a second feature has only 4, 31 | the tensor must go out to 10 for each feature but encode probabilities of 32 | zero for the second feature. 33 | 34 | 35 | Parameters 36 | ---------- 37 | probs: list, numpy.ndarray, torch.tensor or None, shape=(k, d), optional 38 | Probabilities for each key for each feature, where k is the largest 39 | number of keys across all features. Default is None 40 | 41 | n_categories: list, numpy.ndarray, torch.tensor or None, optional 42 | The number of categories for each feature in the data. Only needs to 43 | be provided when the parameters will be learned directly from data and 44 | you want to make sure that right number of keys are included in each 45 | dimension. Default is None. 46 | 47 | pseudocount: float, optional 48 | A value to add to the observed counts of each feature when training. 49 | Setting this to a positive value ensures that no probabilities are 50 | truly zero. Default is 0. 51 | 52 | inertia: float, (0, 1), optional 53 | Indicates the proportion of the update to apply to the parameters 54 | during training. When the inertia is 0.0, the update is applied in 55 | its entirety and the previous parameters are ignored. When the 56 | inertia is 1.0, the update is entirely ignored and the previous 57 | parameters are kept, equivalently to if the parameters were frozen. 58 | 59 | frozen: bool, optional 60 | Whether all the parameters associated with this distribution are frozen. 61 | If you want to freeze individual pameters, or individual values in those 62 | parameters, you must modify the `frozen` attribute of the tensor or 63 | parameter directly. Default is False. 64 | 65 | check_data: bool, optional 66 | Whether to check properties of the data and potentially recast it to 67 | torch.tensors. This does not prevent checking of parameters but can 68 | slightly speed up computation when you know that your inputs are valid. 69 | Setting this to False is also necessary for compiling. 70 | """ 71 | 72 | def __init__(self, probs=None, n_categories=None, pseudocount=0.0, 73 | inertia=0.0, frozen=False, check_data=True): 74 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 75 | self.name = "Categorical" 76 | 77 | self.probs = _check_parameter(_cast_as_parameter(probs), "probs", 78 | min_value=0, max_value=1, ndim=2) 79 | 80 | self.pseudocount = pseudocount 81 | 82 | self._initialized = probs is not None 83 | self.d = self.probs.shape[-2] if self._initialized else None 84 | 85 | if n_categories is not None: 86 | self.n_keys = n_categories 87 | else: 88 | self.n_keys = self.probs.shape[-1] if self._initialized else None 89 | 90 | self._reset_cache() 91 | 92 | def _initialize(self, d, n_keys): 93 | """Initialize the probability distribution. 94 | 95 | This method is meant to only be called internally. It initializes the 96 | parameters of the distribution and stores its dimensionality. For more 97 | complex methods, this function will do more. 98 | 99 | 100 | Parameters 101 | ---------- 102 | d: int 103 | The dimensionality the distribution is being initialized to. 104 | 105 | n_keys: int 106 | The number of keys the distribution is being initialized with. 107 | """ 108 | 109 | self.probs = _cast_as_parameter(torch.zeros(d, n_keys, 110 | dtype=self.dtype, device=self.device)) 111 | 112 | self.n_keys = n_keys 113 | self._initialized = True 114 | super()._initialize(d) 115 | 116 | def _reset_cache(self): 117 | """Reset the internally stored statistics. 118 | 119 | This method is meant to only be called internally. It resets the 120 | stored statistics used to update the model parameters as well as 121 | recalculates the cached values meant to speed up log probability 122 | calculations. 123 | """ 124 | 125 | if self._initialized == False: 126 | return 127 | 128 | self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device)) 129 | self.register_buffer("_xw_sum", torch.zeros(self.d, self.n_keys, 130 | device=self.device)) 131 | 132 | self.register_buffer("_log_probs", torch.log(self.probs)) 133 | 134 | def sample(self, n): 135 | """Sample from the probability distribution. 136 | 137 | This method will return `n` samples generated from the underlying 138 | probability distribution. 139 | 140 | 141 | Parameters 142 | ---------- 143 | n: int 144 | The number of samples to generate. 145 | 146 | 147 | Returns 148 | ------- 149 | X: torch.tensor, shape=(n, self.d) 150 | Randomly generated samples. 151 | """ 152 | 153 | return torch.distributions.Categorical(self.probs).sample([n]) 154 | 155 | def log_probability(self, X): 156 | """Calculate the log probability of each example. 157 | 158 | This method calculates the log probability of each example given the 159 | parameters of the distribution. The examples must be given in a 2D 160 | format. For a categorical distribution, each entry in the data must 161 | be an integer in the range [0, n_keys). 162 | 163 | Note: This differs from some other log probability calculation 164 | functions, like those in torch.distributions, because it is not 165 | returning the log probability of each feature independently, but rather 166 | the total log probability of the entire example. 167 | 168 | 169 | Parameters 170 | ---------- 171 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 172 | A set of examples to evaluate. 173 | 174 | 175 | Returns 176 | ------- 177 | logp: torch.Tensor, shape=(-1,) 178 | The log probability of each example. 179 | """ 180 | 181 | X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 182 | max_value=self.n_keys-1, ndim=2, shape=(-1, self.d), 183 | check_parameter=self.check_data) 184 | 185 | logps = torch.zeros(X.shape[0], dtype=self.probs.dtype, 186 | device=self.device) 187 | 188 | for i in range(self.d): 189 | if isinstance(X, torch.masked.MaskedTensor): 190 | logp_ = self._log_probs[i][X[:, i]._masked_data] 191 | logp_[~X[:, i]._masked_mask] = 0 192 | logps += logp_ 193 | else: 194 | logps += self._log_probs[i][X[:, i]] 195 | 196 | return logps 197 | 198 | def summarize(self, X, sample_weight=None): 199 | """Extract the sufficient statistics from a batch of data. 200 | 201 | This method calculates the sufficient statistics from optionally 202 | weighted data and adds them to the stored cache. The examples must be 203 | given in a 2D format. Sample weights can either be provided as one 204 | value per example or as a 2D matrix of weights for each feature in 205 | each example. 206 | 207 | 208 | Parameters 209 | ---------- 210 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 211 | A set of examples to summarize. 212 | 213 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 214 | A set of weights for the examples. This can be either of shape 215 | (-1, self.d) or a vector of shape (-1,). Default is ones. 216 | """ 217 | 218 | if self.frozen == True: 219 | return 220 | 221 | X = _cast_as_tensor(X) 222 | if not self._initialized: 223 | if self.n_keys is not None: 224 | n_keys = self.n_keys 225 | elif isinstance(X, torch.masked.MaskedTensor): 226 | n_keys = int(torch.max(X._masked_data)) + 1 227 | else: 228 | n_keys = int(torch.max(X)) + 1 229 | 230 | self._initialize(X.shape[1], n_keys) 231 | 232 | X = _check_parameter(X, "X", min_value=0, max_value=self.n_keys-1, 233 | ndim=2, shape=(-1, self.d), check_parameter=self.check_data) 234 | sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight)) 235 | 236 | _inplace_add(self._w_sum, torch.sum(sample_weight, dim=0)) 237 | for i in range(self.n_keys): 238 | _inplace_add(self._xw_sum[:, i], torch.sum((X == i) * sample_weight, 239 | dim=0)) 240 | 241 | def from_summaries(self): 242 | """Update the model parameters given the extracted statistics. 243 | 244 | This method uses calculated statistics from calls to the `summarize` 245 | method to update the distribution parameters. Hyperparameters for the 246 | update are passed in at initialization time. 247 | 248 | Note: Internally, a call to `fit` is just a successive call to the 249 | `summarize` method followed by the `from_summaries` method. 250 | """ 251 | 252 | if self.frozen == True: 253 | return 254 | 255 | probs = (self._xw_sum + self.pseudocount) / (self._w_sum + 256 | self.pseudocount * self.n_keys).unsqueeze(1) 257 | 258 | _update_parameter(self.probs, probs, self.inertia) 259 | self._reset_cache() 260 | -------------------------------------------------------------------------------- /pomegranate/distributions/conditional_categorical.py: -------------------------------------------------------------------------------- 1 | # conditional_categorical.py 2 | # Contact: Jacob Schreiber 3 | 4 | import numpy 5 | import torch 6 | import itertools 7 | 8 | from .._utils import _cast_as_tensor 9 | from .._utils import _cast_as_parameter 10 | from .._utils import _update_parameter 11 | from .._utils import _check_parameter 12 | from .._utils import _reshape_weights 13 | 14 | from .._utils import BufferList 15 | 16 | from ._distribution import ConditionalDistribution 17 | from .categorical import Categorical 18 | 19 | 20 | class ConditionalCategorical(ConditionalDistribution): 21 | """A conditional categorical distribution. 22 | 23 | This is a categorical distribution that is conditioned on previous 24 | emissions, meaning that the probability of each character depends on the 25 | observed character earlier in the sequence. Each feature is conditioned 26 | independently of the others like a `Categorical` distribution. 27 | 28 | This conditioning makes the shape of the distribution a bit more 29 | complicated than the `JointCategorical` distribution. Specifically, a 30 | `JointCategorical` distribution is multivariate by definition but a 31 | `ConditionalCategorical` does not have to be. Although both may appear 32 | similar in that they both take in a vector of characters and return 33 | probabilities, the vector fed into the JointCategorical are all observed 34 | together without some notion of time, whereas the ConditionalCategorical 35 | explicitly requires a notion of timing, where the probability of later 36 | characters depend on the composition of characters seen before. 37 | 38 | 39 | Parameters 40 | ---------- 41 | probs: list of numpy.ndarray, torch.tensor or None, shape=(k, k), optional 42 | A list of conditional probabilities with one tensor for each feature 43 | in the data being modeled. Each tensor should have `k+1` dimensions 44 | where `k` is the number of timesteps to condition on. Each dimension 45 | should span the number of keys in that dimension. For example, if 46 | specifying a univariate conditional categorical distribution where 47 | k=2, a valid tensor shape would be [(2, 3, 4)]. Default is None. 48 | 49 | n_categories: list, numpy.ndarray, torch.tensor or None, optional 50 | The number of categories for each feature in the data. Only needs to 51 | be provided when the parameters will be learned directly from data and 52 | you want to make sure that right number of keys are included in each 53 | dimension. Unlike the `Categorical` distribution, this needs to be 54 | a list of shapes with one shape for each feature and the shape matches 55 | that specified in `probs`. Default is None. 56 | 57 | pseudocount: float, optional 58 | A value to add to the observed counts of each feature when training. 59 | Setting this to a positive value ensures that no probabilities are 60 | truly zero. Default is 0. 61 | 62 | inertia: float, (0, 1), optional 63 | Indicates the proportion of the update to apply to the parameters 64 | during training. When the inertia is 0.0, the update is applied in 65 | its entirety and the previous parameters are ignored. When the 66 | inertia is 1.0, the update is entirely ignored and the previous 67 | parameters are kept, equivalently to if the parameters were frozen. 68 | 69 | frozen: bool, optional 70 | Whether all the parameters associated with this distribution are frozen. 71 | If you want to freeze individual pameters, or individual values in those 72 | parameters, you must modify the `frozen` attribute of the tensor or 73 | parameter directly. Default is False. 74 | 75 | check_data: bool, optional 76 | Whether to check properties of the data and potentially recast it to 77 | torch.tensors. This does not prevent checking of parameters but can 78 | slightly speed up computation when you know that your inputs are valid. 79 | Setting this to False is also necessary for compiling. 80 | """ 81 | 82 | def __init__(self, probs=None, n_categories=None, pseudocount=0, 83 | inertia=0.0, frozen=False, check_data=True): 84 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 85 | self.name = "ConditionalCategorical" 86 | 87 | if probs is not None: 88 | self.n_categories = [] 89 | self.probs = torch.nn.ParameterList([]) 90 | 91 | for prob in probs: 92 | prob = _check_parameter(_cast_as_parameter(prob), "probs", 93 | min_value=0, max_value=1) 94 | 95 | self.probs.append(prob) 96 | self.n_categories.append(tuple(prob.shape)) 97 | 98 | else: 99 | self.probs = None 100 | self.n_categories = n_categories 101 | 102 | self.pseudocount = _check_parameter(pseudocount, "pseudocount") 103 | 104 | self._initialized = probs is not None 105 | self.d = len(self.probs) if self._initialized else None 106 | self.n_parents = len(self.probs[0].shape) if self._initialized else None 107 | self._reset_cache() 108 | 109 | def _initialize(self, d, n_categories): 110 | """Initialize the probability distribution. 111 | 112 | This method is meant to only be called internally. It initializes the 113 | parameters of the distribution and stores its dimensionality. For more 114 | complex methods, this function will do more. 115 | 116 | 117 | Parameters 118 | ---------- 119 | d: int 120 | The dimensionality the distribution is being initialized to. 121 | 122 | n_categories: list of tuples 123 | The shape of each conditional distribution, one per feature. 124 | """ 125 | 126 | self.n_categories = [] 127 | for n_cat in n_categories: 128 | if isinstance(n_cat, (list, tuple)): 129 | self.n_categories.append(tuple(n_cat)) 130 | elif isinstance(n_cat, (numpy.ndarray, torch.Tensor)): 131 | self.n_categories.append(tuple(n_cat.tolist())) 132 | 133 | self.n_parents = len(self.n_categories[0]) 134 | self.probs = torch.nn.ParameterList([_cast_as_parameter(torch.zeros( 135 | *cats, dtype=self.dtype, device=self.device, requires_grad=False)) 136 | for cats in self.n_categories]) 137 | 138 | self._initialized = True 139 | super()._initialize(d) 140 | 141 | def _reset_cache(self): 142 | """Reset the internally stored statistics. 143 | 144 | This method is meant to only be called internally. It resets the 145 | stored statistics used to update the model parameters as well as 146 | recalculates the cached values meant to speed up log probability 147 | calculations. 148 | """ 149 | 150 | if self._initialized == False: 151 | return 152 | 153 | _w_sum = [] 154 | _xw_sum = [] 155 | 156 | for n_categories in self.n_categories: 157 | _w_sum.append(torch.zeros(*n_categories[:-1], 158 | dtype=self.probs[0].dtype, device=self.device)) 159 | _xw_sum.append(torch.zeros(*n_categories, 160 | dtype=self.probs[0].dtype, device=self.device)) 161 | 162 | self._w_sum = BufferList(_w_sum) 163 | self._xw_sum = BufferList(_xw_sum) 164 | 165 | self._log_probs = BufferList([torch.log(prob) for prob in self.probs]) 166 | 167 | def sample(self, n, X): 168 | """Sample from the probability distribution. 169 | 170 | This method will return `n` samples generated from the underlying 171 | probability distribution. For a mixture model, this involves first 172 | sampling the component using the prior probabilities, and then sampling 173 | from the chosen distribution. 174 | 175 | 176 | Parameters 177 | ---------- 178 | n: int 179 | The number of samples to generate. 180 | 181 | X: list, numpy.ndarray, torch.tensor, shape=(n, d, *self.probs.shape-1) 182 | The values to be conditioned on when generating the samples. 183 | 184 | Returns 185 | ------- 186 | X: torch.tensor, shape=(n, self.d) 187 | Randomly generated samples. 188 | """ 189 | 190 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 191 | shape=(-1, self.n_parents-1, self.d)) 192 | 193 | y = [] 194 | for i in range(n): 195 | y.append([]) 196 | 197 | for j in range(self.d): 198 | idx = tuple(X[i, :, j]) 199 | if len(idx) == 1: 200 | idx = idx[0].item() 201 | 202 | probs = self.probs[j][idx] 203 | 204 | y_ = torch.multinomial(probs, 1).item() 205 | y[-1].append(y_) 206 | 207 | return torch.tensor(y) 208 | 209 | def log_probability(self, X): 210 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 211 | shape=(-1, self.n_parents, self.d), check_parameter=self.check_data) 212 | 213 | logps = torch.zeros(len(X), dtype=self.probs[0].dtype, device=X.device, 214 | requires_grad=False) 215 | 216 | for i in range(len(X)): 217 | for j in range(self.d): 218 | logps[i] += self._log_probs[j][tuple(X[i, :, j])] 219 | 220 | return logps 221 | 222 | def summarize(self, X, sample_weight=None): 223 | if self.frozen == True: 224 | return 225 | 226 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 227 | dtypes=(torch.int32, torch.int64), check_parameter=self.check_data) 228 | 229 | if not self._initialized: 230 | self._initialize(len(X[0][0]), torch.max(X, dim=0)[0].T+1) 231 | 232 | X = _check_parameter(X, "X", shape=(-1, self.n_parents, self.d), 233 | check_parameter=self.check_data) 234 | sample_weight = _check_parameter(_cast_as_tensor(sample_weight, 235 | dtype=torch.float32), "sample_weight", min_value=0, ndim=(1, 2)) 236 | 237 | if sample_weight is None: 238 | sample_weight = torch.ones(X[:, 0].shape[0], X[:, 0].shape[-1], 239 | dtype=self.probs[0].dtype) 240 | elif len(sample_weight.shape) == 1: 241 | sample_weight = sample_weight.reshape(-1, 1).expand(-1, X.shape[2]) 242 | elif sample_weight.shape[1] == 1 and self.d > 1: 243 | sample_weight = sample_weight.expand(-1, X.shape[2]) 244 | 245 | _check_parameter(sample_weight, "sample_weight", 246 | min_value=0, ndim=2, shape=(X.shape[0], X.shape[2])) 247 | 248 | for j in range(self.d): 249 | strides = torch.tensor(self._xw_sum[j].stride(), device=X.device) 250 | X_ = torch.sum(X[:, :, j] * strides, dim=-1) 251 | 252 | self._xw_sum[j].view(-1).scatter_add_(0, X_, sample_weight[:,j]) 253 | self._w_sum[j][:] = self._xw_sum[j].sum(dim=-1) 254 | 255 | def from_summaries(self): 256 | if self.frozen == True: 257 | return 258 | 259 | for i in range(self.d): 260 | probs = self._xw_sum[i] / self._w_sum[i].unsqueeze(-1) 261 | probs = torch.nan_to_num(probs, 1. / probs.shape[-1]) 262 | 263 | _update_parameter(self.probs[i], probs, self.inertia) 264 | 265 | self._reset_cache() 266 | 267 | -------------------------------------------------------------------------------- /pomegranate/distributions/dirac_delta.py: -------------------------------------------------------------------------------- 1 | # diracdelta.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | 11 | from ._distribution import Distribution 12 | 13 | 14 | class DiracDelta(Distribution): 15 | """A dirac delta distribution object. 16 | 17 | A dirac delta distribution is a probability distribution that has its entire 18 | density at zero. This distribution assumes that each feature is independent 19 | of the others. This means that, in practice, it will assign a zero 20 | probability if any value in an example is non-zero. 21 | 22 | There are two ways to initialize this object. The first is to pass in 23 | the tensor of alpha values representing the probability to return given a 24 | zero value, at which point they can immediately be 25 | used. The second is to not pass in the rate parameters and then call 26 | either `fit` or `summary` + `from_summaries`, at which point the probability 27 | parameter will be learned from data. 28 | 29 | 30 | Parameters 31 | ---------- 32 | alphas: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 33 | The probability parameters for each feature. Default is None. 34 | 35 | inertia: float, [0, 1], optional 36 | Indicates the proportion of the update to apply to the parameters 37 | during training. When the inertia is 0.0, the update is applied in 38 | its entirety and the previous parameters are ignored. When the 39 | inertia is 1.0, the update is entirely ignored and the previous 40 | parameters are kept, equivalently to if the parameters were frozen. 41 | 42 | frozen: bool, optional 43 | Whether all the parameters associated with this distribution are frozen. 44 | If you want to freeze individual pameters, or individual values in those 45 | parameters, you must modify the `frozen` attribute of the tensor or 46 | parameter directly. Default is False. 47 | 48 | check_data: bool, optional 49 | Whether to check properties of the data and potentially recast it to 50 | torch.tensors. This does not prevent checking of parameters but can 51 | slightly speed up computation when you know that your inputs are valid. 52 | Setting this to False is also necessary for compiling. 53 | """ 54 | 55 | def __init__(self, alphas=None, inertia=0.0, frozen=False, check_data=True): 56 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 57 | self.name = "DiracDelta" 58 | 59 | self.alphas = _check_parameter(_cast_as_parameter(alphas), "alphas", 60 | min_value=0.0, ndim=1) 61 | 62 | self._initialized = alphas is not None 63 | self.d = len(self.alphas) if self._initialized else None 64 | self._reset_cache() 65 | 66 | def _initialize(self, d): 67 | """Initialize the probability distribution. 68 | 69 | This method is meant to only be called internally. It initializes the 70 | parameters of the distribution and stores its dimensionality. For more 71 | complex methods, this function will do more. 72 | 73 | 74 | Parameters 75 | ---------- 76 | d: int 77 | The dimensionality the distribution is being initialized to. 78 | """ 79 | 80 | self.alphas = _cast_as_parameter(torch.ones(d, device=self.device)) 81 | 82 | self._initialized = True 83 | super()._initialize(d) 84 | 85 | def _reset_cache(self): 86 | """Reset the internally stored statistics. 87 | 88 | This method is meant to only be called internally. It resets the 89 | stored statistics used to update the model parameters as well as 90 | recalculates the cached values meant to speed up log probability 91 | calculations. 92 | """ 93 | 94 | if self._initialized == False: 95 | return 96 | 97 | self.register_buffer("_log_alphas", torch.log(self.alphas)) 98 | 99 | def log_probability(self, X): 100 | """Calculate the log probability of each example. 101 | 102 | This method calculates the log probability of each example given the 103 | parameters of the distribution. The examples must be given in a 2D 104 | format. 105 | 106 | Note: This differs from some other log probability calculation 107 | functions, like those in torch.distributions, because it is not 108 | returning the log probability of each feature independently, but rather 109 | the total log probability of the entire example. 110 | 111 | 112 | Parameters 113 | ---------- 114 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 115 | A set of examples to evaluate. 116 | 117 | 118 | Returns 119 | ------- 120 | logp: torch.Tensor, shape=(-1,) 121 | The log probability of each example. 122 | """ 123 | 124 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 125 | shape=(-1, self.d), check_parameter=self.check_data) 126 | 127 | return torch.sum(torch.where(X == 0.0, self._log_alphas, float("-inf")), 128 | dim=-1) 129 | 130 | def summarize(self, X, sample_weight=None): 131 | """Extract the sufficient statistics from a batch of data. 132 | 133 | This method calculates the sufficient statistics from optionally 134 | weighted data and adds them to the stored cache. The examples must be 135 | given in a 2D format. Sample weights can either be provided as one 136 | value per example or as a 2D matrix of weights for each feature in 137 | each example. 138 | 139 | For a dirac delta distribution, there are no statistics to extract. 140 | 141 | 142 | Parameters 143 | ---------- 144 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 145 | A set of examples to summarize. 146 | 147 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 148 | A set of weights for the examples. This can be either of shape 149 | (-1, self.d) or a vector of shape (-1,). Default is ones. 150 | """ 151 | 152 | if self.frozen == True: 153 | return 154 | 155 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 156 | 157 | def from_summaries(self): 158 | """Update the model parameters given the extracted statistics. 159 | 160 | This method uses calculated statistics from calls to the `summarize` 161 | method to update the distribution parameters. Hyperparameters for the 162 | update are passed in at initialization time. 163 | 164 | For a dirac delta distribution, there are no updates. 165 | 166 | Note: Internally, a call to `fit` is just a successive call to the 167 | `summarize` method followed by the `from_summaries` method. 168 | """ 169 | 170 | return 171 | -------------------------------------------------------------------------------- /pomegranate/distributions/exponential.py: -------------------------------------------------------------------------------- 1 | # exponential.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | from torch.distributions import Exponential as tExponential 6 | 7 | from .._utils import _cast_as_tensor 8 | from .._utils import _cast_as_parameter 9 | from .._utils import _update_parameter 10 | from .._utils import _check_parameter 11 | 12 | from ._distribution import Distribution 13 | 14 | 15 | class Exponential(Distribution): 16 | """An exponential distribution object. 17 | 18 | An exponential distribution models scales of discrete events, and has a 19 | rate parameter describing the average time between event occurrences. 20 | This distribution assumes that each feature is independent of the others. 21 | Although the object is meant to operate on discrete counts, it can be used 22 | on any non-negative continuous data. 23 | 24 | There are two ways to initialize this object. The first is to pass in 25 | the tensor of rate parameters, at which point they can immediately be 26 | used. The second is to not pass in the rate parameters and then call 27 | either `fit` or `summary` + `from_summaries`, at which point the rate 28 | parameter will be learned from data. 29 | 30 | 31 | Parameters 32 | ---------- 33 | scales: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 34 | The rate parameters for each feature. Default is None. 35 | 36 | inertia: float, (0, 1), optional 37 | Indicates the proportion of the update to apply to the parameters 38 | during training. When the inertia is 0.0, the update is applied in 39 | its entirety and the previous parameters are ignored. When the 40 | inertia is 1.0, the update is entirely ignored and the previous 41 | parameters are kept, equivalently to if the parameters were frozen. 42 | 43 | frozen: bool, optional 44 | Whether all the parameters associated with this distribution are 45 | frozen. If you want to freeze individual pameters, or individual values 46 | in those parameters, you must modify the `frozen` attribute of the 47 | tensor or parameter directly. Default is False. 48 | 49 | check_data: bool, optional 50 | Whether to check properties of the data and potentially recast it to 51 | torch.tensors. This does not prevent checking of parameters but can 52 | slightly speed up computation when you know that your inputs are valid. 53 | Setting this to False is also necessary for compiling. 54 | """ 55 | 56 | def __init__(self, scales=None, inertia=0.0, frozen=False, check_data=True): 57 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 58 | self.name = "Exponential" 59 | 60 | self.scales = _check_parameter(_cast_as_parameter(scales), "scales", 61 | min_value=0, ndim=1) 62 | 63 | self._initialized = scales is not None 64 | self.d = self.scales.shape[-1] if self._initialized else None 65 | self._reset_cache() 66 | 67 | def _initialize(self, d): 68 | """Initialize the probability distribution. 69 | 70 | This method is meant to only be called internally. It initializes the 71 | parameters of the distribution and stores its dimensionality. For more 72 | complex methods, this function will do more. 73 | 74 | 75 | Parameters 76 | ---------- 77 | d: int 78 | The dimensionality the distribution is being initialized to. 79 | """ 80 | 81 | self.scales = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 82 | device=self.device)) 83 | 84 | self._initialized = True 85 | super()._initialize(d) 86 | 87 | def _reset_cache(self): 88 | """Reset the internally stored statistics. 89 | 90 | This method is meant to only be called internally. It resets the 91 | stored statistics used to update the model parameters as well as 92 | recalculates the cached values meant to speed up log probability 93 | calculations. 94 | """ 95 | 96 | if self._initialized == False: 97 | return 98 | 99 | self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device)) 100 | self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device)) 101 | 102 | self.register_buffer("_log_scales", torch.log(self.scales)) 103 | 104 | def sample(self, n): 105 | """Sample from the probability distribution. 106 | 107 | This method will return `n` samples generated from the underlying 108 | probability distribution. 109 | 110 | 111 | Parameters 112 | ---------- 113 | n: int 114 | The number of samples to generate. 115 | 116 | 117 | Returns 118 | ------- 119 | X: torch.tensor, shape=(n, self.d) 120 | Randomly generated samples. 121 | """ 122 | 123 | return tExponential(1. / self.scales).sample([n]) 124 | 125 | def log_probability(self, X): 126 | """Calculate the log probability of each example. 127 | 128 | This method calculates the log probability of each example given the 129 | parameters of the distribution. The examples must be given in a 2D 130 | format. For an exponential distribution, the data must be non-negative. 131 | 132 | Note: This differs from some other log probability calculation 133 | functions, like those in torch.distributions, because it is not 134 | returning the log probability of each feature independently, but rather 135 | the total log probability of the entire example. 136 | 137 | 138 | Parameters 139 | ---------- 140 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 141 | A set of examples to evaluate. 142 | 143 | 144 | Returns 145 | ------- 146 | logp: torch.Tensor, shape=(-1,) 147 | The log probability of each example. 148 | """ 149 | 150 | X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 151 | ndim=2, shape=(-1, self.d), check_parameter=self.check_data) 152 | 153 | return torch.sum(-self._log_scales - (1. / self.scales) * X, dim=1) 154 | 155 | def summarize(self, X, sample_weight=None): 156 | """Extract the sufficient statistics from a batch of data. 157 | 158 | This method calculates the sufficient statistics from optionally 159 | weighted data and adds them to the stored cache. The examples must be 160 | given in a 2D format. Sample weights can either be provided as one 161 | value per example or as a 2D matrix of weights for each feature in 162 | each example. 163 | 164 | 165 | Parameters 166 | ---------- 167 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 168 | A set of examples to summarize. 169 | 170 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 171 | A set of weights for the examples. This can be either of shape 172 | (-1, self.d) or a vector of shape (-1,). Default is ones. 173 | """ 174 | 175 | if self.frozen == True: 176 | return 177 | 178 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 179 | _check_parameter(X, "X", min_value=0, check_parameter=self.check_data) 180 | 181 | self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0) 182 | self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0) 183 | 184 | def from_summaries(self): 185 | """Update the model parameters given the extracted statistics. 186 | 187 | This method uses calculated statistics from calls to the `summarize` 188 | method to update the distribution parameters. Hyperparameters for the 189 | update are passed in at initialization time. 190 | 191 | Note: Internally, a call to `fit` is just a successive call to the 192 | `summarize` method followed by the `from_summaries` method. 193 | """ 194 | 195 | if self.frozen == True: 196 | return 197 | 198 | scales = self._xw_sum / self._w_sum 199 | _update_parameter(self.scales, scales, self.inertia) 200 | self._reset_cache() 201 | -------------------------------------------------------------------------------- /pomegranate/distributions/gamma.py: -------------------------------------------------------------------------------- 1 | # gamma.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import _check_shapes 11 | 12 | from ._distribution import Distribution 13 | 14 | 15 | class Gamma(Distribution): 16 | """A gamma distribution object. 17 | 18 | A gamma distribution is the sum of exponential distributions, and has shape 19 | and rate parameters. This distribution assumes that each feature is 20 | independent of the others. 21 | 22 | There are two ways to initialize this objecct. The first is to pass in 23 | the tensor of rate and shae parameters, at which point they can immediately 24 | be used. The second is to not pass in the rate parameters and then call 25 | either `fit` or `summary` + `from_summaries`, at which point the rate 26 | and shape parameters will be learned from data. 27 | 28 | 29 | Parameters 30 | ---------- 31 | shapes: torch.tensor or None, shape=(d,), optional 32 | The shape parameter for each feature. Default is None 33 | 34 | rates: torch.tensor or None, shape=(d,), optional 35 | The rate parameters for each feature. Default is None. 36 | 37 | inertia: float, (0, 1), optional 38 | Indicates the proportion of the update to apply to the parameters 39 | during training. When the inertia is 0.0, the update is applied in 40 | its entirety and the previous parameters are ignored. When the 41 | inertia is 1.0, the update is entirely ignored and the previous 42 | parameters are kept, equivalently to if the parameters were frozen. 43 | 44 | tol: float, [0, inf), optional 45 | The threshold at which to stop fitting the parameters of the 46 | distribution. Default is 1e-4. 47 | 48 | max_iter: int, [0, inf), optional 49 | The maximum number of iterations to run EM when fitting the parameters 50 | of the distribution. Default is 20. 51 | 52 | frozen: bool, optional 53 | Whether all the parameters associated with this distribution are frozen. 54 | If you want to freeze individual pameters, or individual values in those 55 | parameters, you must modify the `frozen` attribute of the tensor or 56 | parameter directly. Default is False. 57 | 58 | check_data: bool, optional 59 | Whether to check properties of the data and potentially recast it to 60 | torch.tensors. This does not prevent checking of parameters but can 61 | slightly speed up computation when you know that your inputs are valid. 62 | Setting this to False is also necessary for compiling. 63 | """ 64 | 65 | def __init__(self, shapes=None, rates=None, inertia=0.0, tol=1e-4, 66 | max_iter=20, frozen=False, check_data=True): 67 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 68 | self.name = "Gamma" 69 | 70 | self.shapes = _check_parameter(_cast_as_parameter(shapes), "shapes", 71 | min_value=0, ndim=1) 72 | self.rates = _check_parameter(_cast_as_parameter(rates), "rates", 73 | min_value=0, ndim=1) 74 | 75 | _check_shapes([self.shapes, self.rates], ["shapes", "rates"]) 76 | 77 | self.tol = _check_parameter(tol, "tol", min_value=0, ndim=0) 78 | self.max_iter = _check_parameter(max_iter, "max_iter", min_value=1, 79 | ndim=0) 80 | 81 | self._initialized = (shapes is not None) and (rates is not None) 82 | self.d = self.shapes.shape[-1] if self._initialized else None 83 | self._reset_cache() 84 | 85 | def _initialize(self, d): 86 | """Initialize the probability distribution. 87 | 88 | This method is meant to only be called internally. It initializes the 89 | parameters of the distribution and stores its dimensionality. For more 90 | complex methods, this function will do more. 91 | 92 | 93 | Parameters 94 | ---------- 95 | d: int 96 | The dimensionality the distribution is being initialized to. 97 | """ 98 | 99 | self.shapes = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 100 | device=self.device)) 101 | self.rates = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 102 | device=self.device)) 103 | 104 | self._initialized = True 105 | super()._initialize(d) 106 | 107 | def _reset_cache(self): 108 | """Reset the internally stored statistics. 109 | 110 | This method is meant to only be called internally. It resets the 111 | stored statistics used to update the model parameters as well as 112 | recalculates the cached values meant to speed up log probability 113 | calculations. 114 | """ 115 | 116 | if self._initialized == False: 117 | return 118 | 119 | self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device)) 120 | self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device)) 121 | self.register_buffer("_logx_w_sum", torch.zeros(self.d, 122 | device=self.device)) 123 | 124 | self.register_buffer("_log_rates", torch.log(self.rates)) 125 | self.register_buffer("_lgamma_shapes", torch.lgamma(self.shapes)) 126 | self.register_buffer("_thetas", self._log_rates * self.shapes - 127 | self._lgamma_shapes) 128 | 129 | def sample(self, n): 130 | """Sample from the probability distribution. 131 | 132 | This method will return `n` samples generated from the underlying 133 | probability distribution. 134 | 135 | 136 | Parameters 137 | ---------- 138 | n: int 139 | The number of samples to generate. 140 | 141 | 142 | Returns 143 | ------- 144 | X: torch.tensor, shape=(n, self.d) 145 | Randomly generated samples. 146 | """ 147 | 148 | return torch.distributions.Gamma(self.shapes, self.rates).sample([n]) 149 | 150 | def log_probability(self, X): 151 | """Calculate the log probability of each example. 152 | 153 | This method calculates the log probability of each example given the 154 | parameters of the distribution. The examples must be given in a 2D 155 | format. For a gamma distribution, the data must be non-negative. 156 | 157 | Note: This differs from some other log probability calculation 158 | functions, like those in torch.distributions, because it is not 159 | returning the log probability of each feature independently, but rather 160 | the total log probability of the entire example. 161 | 162 | 163 | Parameters 164 | ---------- 165 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 166 | A set of examples to evaluate. 167 | 168 | 169 | Returns 170 | ------- 171 | logp: torch.Tensor, shape=(-1,) 172 | The log probability of each example. 173 | """ 174 | 175 | X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 176 | ndim=2, shape=(-1, self.d), check_parameter=self.check_data) 177 | 178 | return torch.sum(self._thetas + torch.log(X) * (self.shapes - 1) - 179 | self.rates * X, dim=-1) 180 | 181 | def summarize(self, X, sample_weight=None): 182 | """Extract the sufficient statistics from a batch of data. 183 | 184 | This method calculates the sufficient statistics from optionally 185 | weighted data and adds them to the stored cache. The examples must be 186 | given in a 2D format. Sample weights can either be provided as one 187 | value per example or as a 2D matrix of weights for each feature in 188 | each example. 189 | 190 | 191 | Parameters 192 | ---------- 193 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 194 | A set of examples to summarize. 195 | 196 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 197 | A set of weights for the examples. This can be either of shape 198 | (-1, self.d) or a vector of shape (-1,). Default is ones. 199 | """ 200 | 201 | if self.frozen == True: 202 | return 203 | 204 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 205 | _check_parameter(X, "X", min_value=0, check_parameter=self.check_data) 206 | 207 | self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0) 208 | self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0) 209 | self._logx_w_sum[:] = self._logx_w_sum + torch.sum(torch.log(X) * 210 | sample_weight, dim=0) 211 | 212 | def from_summaries(self): 213 | """Update the model parameters given the extracted statistics. 214 | 215 | This method uses calculated statistics from calls to the `summarize` 216 | method to update the distribution parameters. Hyperparameters for the 217 | update are passed in at initialization time. 218 | 219 | Note: Internally, a call to `fit` is just a successive call to the 220 | `summarize` method followed by the `from_summaries` method. 221 | """ 222 | 223 | if self.frozen == True: 224 | return 225 | 226 | thetas = torch.log(self._xw_sum / self._w_sum) - \ 227 | self._logx_w_sum / self._w_sum 228 | 229 | numerator = (3 - thetas + torch.sqrt((thetas - 3) ** 2 + 24 * thetas)) 230 | denominator = (12 * thetas) 231 | 232 | new_shapes = numerator / denominator 233 | shapes = new_shapes + self.tol 234 | 235 | for iteration in range(self.max_iter): 236 | mask = torch.abs(shapes - new_shapes) < self.tol 237 | if torch.all(mask): 238 | break 239 | 240 | shapes = new_shapes 241 | new_shapes = (shapes - (torch.log(shapes) - torch.polygamma(0, 242 | shapes) - thetas) / (1.0 / shapes - torch.polygamma(1, shapes))) 243 | 244 | shapes = new_shapes 245 | rates = 1.0 / (1.0 / (shapes * self._w_sum) * self._xw_sum) 246 | 247 | _update_parameter(self.shapes, shapes, self.inertia) 248 | _update_parameter(self.rates, rates, self.inertia) 249 | self._reset_cache() 250 | -------------------------------------------------------------------------------- /pomegranate/distributions/halfnormal.py: -------------------------------------------------------------------------------- 1 | # normal.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import _check_shapes 11 | 12 | from ._distribution import Distribution 13 | from .normal import Normal 14 | 15 | 16 | # Define some useful constants 17 | LOG_2 = 0.6931471805599453 18 | 19 | 20 | class HalfNormal(Normal): 21 | """A half-normal distribution object. 22 | 23 | A half-normal distribution is a distribution over positive real numbers that 24 | is zero for negative numbers. It is defined by a single parameter, sigma, 25 | which is the standard deviation of the distribution. The mean of the 26 | distribution is sqrt(2/pi) * sigma, and the variance is (1 - 2/pi) * sigma^2. 27 | 28 | This distribution can assume that features are independent of the others if 29 | the covariance type is 'diag' or 'sphere', but if the type is 'full' then 30 | the features are not independent. 31 | 32 | There are two ways to initialize this object. The first is to pass in 33 | the tensor of probablity parameters, at which point they can immediately be 34 | used. The second is to not pass in the rate parameters and then call 35 | either `fit` or `summarize` + `from_summaries`, at which point the probability 36 | parameter will be learned from data. 37 | 38 | 39 | Parameters 40 | ---------- 41 | covs: list, numpy.ndarray, torch.Tensor, or None, optional 42 | The variances and covariances of the distribution. If covariance_type 43 | is 'full', the shape should be (self.d, self.d); if 'diag', the shape 44 | should be (self.d,); if 'sphere', it should be (1,). Note that this is 45 | the variances or covariances in all settings, and not the standard 46 | deviation, as may be more common for diagonal covariance matrices. 47 | Default is None. 48 | 49 | covariance_type: str, optional 50 | The type of covariance matrix. Must be one of 'full', 'diag', or 51 | 'sphere'. Default is 'full'. 52 | 53 | min_cov: float or None, optional 54 | The minimum variance or covariance. 55 | 56 | inertia: float, [0, 1], optional 57 | Indicates the proportion of the update to apply to the parameters 58 | during training. When the inertia is 0.0, the update is applied in 59 | its entirety and the previous parameters are ignored. When the 60 | inertia is 1.0, the update is entirely ignored and the previous 61 | parameters are kept, equivalently to if the parameters were frozen. 62 | 63 | frozen: bool, optional 64 | Whether all the parameters associated with this distribution are frozen. 65 | If you want to freeze individual pameters, or individual values in those 66 | parameters, you must modify the `frozen` attribute of the tensor or 67 | parameter directly. Default is False. 68 | """ 69 | 70 | def __init__( 71 | self, 72 | covs=None, 73 | covariance_type="full", 74 | min_cov=None, 75 | inertia=0.0, 76 | frozen=False, 77 | check_data=True, 78 | ): 79 | self.name = "HalfNormal" 80 | super().__init__( 81 | means=None, 82 | covs=covs, 83 | min_cov=min_cov, 84 | covariance_type=covariance_type, 85 | inertia=inertia, 86 | frozen=frozen, 87 | check_data=check_data, 88 | ) 89 | 90 | def _initialize(self, d): 91 | """Initialize the probability distribution. 92 | 93 | This method is meant to only be called internally. It initializes the 94 | parameters of the distribution and stores its dimensionality. For more 95 | complex methods, this function will do more. 96 | 97 | 98 | Parameters 99 | ---------- 100 | d: int 101 | The dimensionality the distribution is being initialized to. 102 | """ 103 | super()._initialize(d) 104 | 105 | def _reset_cache(self): 106 | """Reset the internally stored statistics. 107 | 108 | This method is meant to only be called internally. It resets the 109 | stored statistics used to update the model parameters as well as 110 | recalculates the cached values meant to speed up log probability 111 | calculations. 112 | """ 113 | super()._reset_cache() 114 | 115 | def sample(self, n): 116 | """Sample from the probability distribution. 117 | 118 | This method will return `n` samples generated from the underlying 119 | probability distribution. 120 | 121 | 122 | Parameters 123 | ---------- 124 | n: int 125 | The number of samples to generate. 126 | 127 | 128 | Returns 129 | ------- 130 | X: torch.tensor, shape=(n, self.d) 131 | Randomly generated samples. 132 | """ 133 | if self.covariance_type in ["diag", "full"]: 134 | return torch.distributions.HalfNormal(self.covs).sample([n]) 135 | 136 | def log_probability(self, X): 137 | """Calculate the log probability of each example. 138 | 139 | This method calculates the log probability of each example given the 140 | parameters of the distribution. The examples must be given in a 2D 141 | format. 142 | 143 | Note: This differs from some other log probability calculation 144 | functions, like those in torch.distributions, because it is not 145 | returning the log probability of each feature independently, but rather 146 | the total log probability of the entire example. 147 | 148 | 149 | Parameters 150 | ---------- 151 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 152 | A set of examples to evaluate. 153 | 154 | 155 | Returns 156 | ------- 157 | logp: torch.Tensor, shape=(-1,) 158 | The log probability of each example. 159 | """ 160 | 161 | X = _check_parameter( 162 | _cast_as_tensor(X, dtype=self.covs.dtype), 163 | "X", 164 | ndim=2, 165 | shape=(-1, self.d), 166 | check_parameter=self.check_data, 167 | ) 168 | return super().log_probability(X) + LOG_2 169 | 170 | def summarize(self, X, sample_weight=None): 171 | """Extract the sufficient statistics from a batch of data. 172 | 173 | This method calculates the sufficient statistics from optionally 174 | weighted data and adds them to the stored cache. The examples must be 175 | given in a 2D format. Sample weights can either be provided as one 176 | value per example or as a 2D matrix of weights for each feature in 177 | each example. 178 | 179 | 180 | Parameters 181 | ---------- 182 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 183 | A set of examples to summarize. 184 | 185 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 186 | A set of weights for the examples. This can be either of shape 187 | (-1, self.d) or a vector of shape (-1,). Default is ones. 188 | """ 189 | 190 | super().summarize(X, sample_weight=sample_weight) 191 | 192 | def from_summaries(self): 193 | """Update the model parameters given the extracted statistics. 194 | 195 | This method uses calculated statistics from calls to the `summarize` 196 | method to update the distribution parameters. Hyperparameters for the 197 | update are passed in at initialization time. 198 | 199 | Note: Internally, a call to `fit` is just a successive call to the 200 | `summarize` method followed by the `from_summaries` method. 201 | """ 202 | 203 | if self.frozen == True: 204 | return 205 | 206 | # the means are always zero for a half normal distribution 207 | means = torch.zeros(self.d, dtype=self.covs.dtype) 208 | 209 | if self.covariance_type == "full": 210 | v = self._xw_sum.unsqueeze(0) * self._xw_sum.unsqueeze(1) 211 | covs = self._xxw_sum / self._w_sum - v / self._w_sum**2.0 212 | 213 | elif self.covariance_type in ["diag", "sphere"]: 214 | covs = self._xxw_sum / self._w_sum - self._xw_sum**2.0 / self._w_sum**2.0 215 | if self.covariance_type == "sphere": 216 | covs = covs.mean(dim=-1) 217 | 218 | _update_parameter(self.covs, covs, self.inertia) 219 | _update_parameter(self.means, means, self.inertia) 220 | self._reset_cache() 221 | -------------------------------------------------------------------------------- /pomegranate/distributions/independent_components.py: -------------------------------------------------------------------------------- 1 | # independent_components.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import _reshape_weights 11 | 12 | from ._distribution import Distribution 13 | 14 | 15 | class IndependentComponents(Distribution): 16 | """An independent components distribution object. 17 | 18 | A distribution made up of independent, univariate, distributions that each 19 | model a single feature in the data. This means that instead of using a 20 | single type of distribution to model all of the features in your data, you 21 | use one distribution per feature. Note that this will likely be slower 22 | than using a single distribution because the amount of batching possible 23 | will go down significantly. 24 | 25 | There are two ways to initialize this object. The first is to pass in a 26 | set of distributions that are all initialized with parameters, at which 27 | point this distribution can be immediately used for inference. The second 28 | is to pass in a set of distributions that are not initialized with 29 | parameters, and then call either `fit` or `summary` + `from_summaries` to 30 | learn the parameters of all the distributions. 31 | 32 | 33 | Parameters 34 | ---------- 35 | distributions: list, tuple, numpy.ndarray, torch.Tensor, shape=(d,) 36 | An ordered iterable containing all of the distributions, one per 37 | feature, that will be used. 38 | 39 | check_data: bool, optional 40 | Whether to check properties of the data and potentially recast it to 41 | torch.tensors. This does not prevent checking of parameters but can 42 | slightly speed up computation when you know that your inputs are valid. 43 | Setting this to False is also necessary for compiling. 44 | """ 45 | 46 | def __init__(self, distributions, check_data=False): 47 | super().__init__(inertia=0.0, frozen=False, check_data=check_data) 48 | self.name = "IndependentComponents" 49 | 50 | if len(distributions) <= 1: 51 | raise ValueError("Must pass in at least 2 distributions.") 52 | for distribution in distributions: 53 | if not isinstance(distribution, Distribution): 54 | raise ValueError("All passed in distributions must " + 55 | "inherit from the Distribution object.") 56 | 57 | self.distributions = distributions 58 | self._initialized = all(d._initialized for d in distributions) 59 | self.d = len(distributions) 60 | self._reset_cache() 61 | 62 | 63 | def _initialize(self, d): 64 | """Initialize the probability distribution. 65 | 66 | This method is meant to only be called internally. It initializes the 67 | parameters of the distribution and stores its dimensionality. For more 68 | complex methods, this function will do more. 69 | 70 | 71 | Parameters 72 | ---------- 73 | d: int 74 | The dimensionality the distribution is being initialized to. 75 | """ 76 | 77 | for distribution in self.distributions: 78 | distribution._initialize(d) 79 | 80 | self._initialized = True 81 | 82 | 83 | def _reset_cache(self): 84 | """Reset the internally stored statistics. 85 | 86 | This method is meant to only be called internally. It resets the 87 | stored statistics used to update the model parameters as well as 88 | recalculates the cached values meant to speed up log probability 89 | calculations. 90 | """ 91 | 92 | if self._initialized == False: 93 | return 94 | 95 | for distribution in self.distributions: 96 | distribution._reset_cache() 97 | 98 | 99 | def sample(self, n): 100 | """Sample from the probability distribution. 101 | 102 | This method will return `n` samples generated from the underlying 103 | probability distribution. 104 | 105 | 106 | Parameters 107 | ---------- 108 | n: int 109 | The number of samples to generate. 110 | 111 | 112 | Returns 113 | ------- 114 | X: torch.tensor, shape=(n, self.d) 115 | Randomly generated samples. 116 | """ 117 | 118 | return torch.hstack([d.sample(n) for d in self.distributions]) 119 | 120 | 121 | def log_probability(self, X): 122 | """Calculate the log probability of each example. 123 | 124 | This method calculates the log probability of each example given the 125 | parameters of the distribution. The examples must be given in a 2D 126 | format. 127 | 128 | Note: This differs from some other log probability calculation 129 | functions, like those in torch.distributions, because it is not 130 | returning the log probability of each feature independently, but rather 131 | the total log probability of the entire example. 132 | 133 | 134 | Parameters 135 | ---------- 136 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 137 | A set of examples to evaluate. 138 | 139 | 140 | Returns 141 | ------- 142 | logp: torch.Tensor, shape=(-1,) 143 | The log probability of each example. 144 | """ 145 | 146 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 147 | shape=(-1, self.d)) 148 | 149 | logp = torch.zeros(X.shape[0]) 150 | for i, d in enumerate(self.distributions): 151 | if isinstance(X, torch.masked.MaskedTensor): 152 | logp.add_(d.log_probability(X[:, i:i+1])._masked_data) 153 | else: 154 | logp.add_(d.log_probability(X[:, i:i+1])) 155 | 156 | return logp 157 | 158 | 159 | def summarize(self, X, sample_weight=None): 160 | """Extract the sufficient statistics from a batch of data. 161 | 162 | This method calculates the sufficient statistics from optionally 163 | weighted data and adds them to the stored cache. The examples must be 164 | given in a 2D format. Sample weights can either be provided as one 165 | value per example or as a 2D matrix of weights for each feature in 166 | each example. 167 | 168 | 169 | Parameters 170 | ---------- 171 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 172 | A set of examples to summarize. 173 | 174 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 175 | A set of weights for the examples. This can be either of shape 176 | (-1, self.d) or a vector of shape (-1,). Default is ones. 177 | """ 178 | 179 | if self.frozen == True: 180 | return 181 | 182 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 183 | shape=(-1, self.d)) 184 | 185 | sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight, 186 | dtype=torch.float32), device=self.device) 187 | 188 | for i, d in enumerate(self.distributions): 189 | d.summarize(X[:, i:i+1], sample_weight=sample_weight[:, i:i+1]) 190 | 191 | 192 | def from_summaries(self): 193 | """Update the model parameters given the extracted statistics. 194 | 195 | This method uses calculated statistics from calls to the `summarize` 196 | method to update the distribution parameters. Hyperparameters for the 197 | update are passed in at initialization time. 198 | 199 | Note: Internally, a call to `fit` is just a successive call to the 200 | `summarize` method followed by the `from_summaries` method. 201 | """ 202 | 203 | if self.frozen == True: 204 | return 205 | 206 | for distribution in self.distributions: 207 | distribution.from_summaries() 208 | -------------------------------------------------------------------------------- /pomegranate/distributions/joint_categorical.py: -------------------------------------------------------------------------------- 1 | # joint_categorical.py 2 | # Contact: Jacob Schreiber 3 | 4 | import numpy 5 | import torch 6 | 7 | from .._utils import _cast_as_tensor 8 | from .._utils import _cast_as_parameter 9 | from .._utils import _update_parameter 10 | from .._utils import _check_parameter 11 | from .._utils import _reshape_weights 12 | 13 | from ._distribution import Distribution 14 | from .categorical import Categorical 15 | 16 | 17 | class JointCategorical(Distribution): 18 | """A joint categorical distribution. 19 | 20 | A joint categorical distribution models the probability of a vector of 21 | categorical values occurring without assuming that the dimensions are 22 | independent from each other. Essentially, it is a Categorical distribution 23 | without the assumption that the dimensions are independent of each other. 24 | 25 | There are two ways to initialize this object. The first is to pass in 26 | the tensor of probability parameters, at which point they can immediately be 27 | used. The second is to not pass in the rate parameters and then call 28 | either `fit` or `summary` + `from_summaries`, at which point the 29 | probability parameters will be learned from data. 30 | 31 | 32 | Parameters 33 | ---------- 34 | probs: list, numpy.ndarray, torch.tensor, or None, shape=*n_categories 35 | A tensor where each dimension corresponds to one column in the data 36 | set being modeled and the size of each dimension is the number of 37 | categories in that column, e.g., if the data being modeled is binary 38 | and has shape (5, 4), this will be a tensor with shape (2, 2, 2, 2). 39 | Default is None. 40 | 41 | n_categories: list, numpy.ndarray, torch.tensor, or None, shape=(d,) 42 | A vector with the maximum number of categories that each column 43 | can have. If not given, this will be inferred from the data. Default 44 | is None. 45 | 46 | inertia: float, [0, 1], optional 47 | Indicates the proportion of the update to apply to the parameters 48 | during training. When the inertia is 0.0, the update is applied in 49 | its entirety and the previous parameters are ignored. When the 50 | inertia is 1.0, the update is entirely ignored and the previous 51 | parameters are kept, equivalently to if the parameters were frozen. 52 | 53 | pseudocount: float, optional 54 | A number of observations to add to each entry in the probability 55 | distribution during training. A higher value will smooth the 56 | distributions more. Default is 0. 57 | 58 | inertia: float, [0, 1], optional 59 | Indicates the proportion of the update to apply to the parameters 60 | during training. When the inertia is 0.0, the update is applied in 61 | its entirety and the previous parameters are ignored. When the 62 | inertia is 1.0, the update is entirely ignored and the previous 63 | parameters are kept, equivalently to if the parameters were frozen. 64 | 65 | frozen: bool, optional 66 | Whether all the parameters associated with this distribution are frozen. 67 | If you want to freeze individual pameters, or individual values in those 68 | parameters, you must modify the `frozen` attribute of the tensor or 69 | parameter directly. Default is False. 70 | 71 | check_data: bool, optional 72 | Whether to check properties of the data and potentially recast it to 73 | torch.tensors. This does not prevent checking of parameters but can 74 | slightly speed up computation when you know that your inputs are valid. 75 | Setting this to False is also necessary for compiling. 76 | """ 77 | 78 | def __init__(self, probs=None, n_categories=None, pseudocount=0, 79 | inertia=0.0, frozen=False, check_data=True): 80 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 81 | self.name = "JointCategorical" 82 | 83 | self.probs = _check_parameter(_cast_as_parameter(probs), "probs", 84 | min_value=0, max_value=1, value_sum=1) 85 | 86 | self.n_categories = _check_parameter(n_categories, "n_categories", 87 | min_value=2) 88 | self.pseudocount = _check_parameter(pseudocount, "pseudocount") 89 | 90 | self._initialized = probs is not None 91 | self.d = len(self.probs.shape) if self._initialized else None 92 | 93 | if self._initialized: 94 | if n_categories is None: 95 | self.n_categories = tuple(self.probs.shape) 96 | elif isinstance(n_categories, int): 97 | self.n_categories = (n_categories for i in range(n_categories)) 98 | else: 99 | self.n_categories = tuple(n_categories) 100 | else: 101 | self.n_categories = None 102 | 103 | self._reset_cache() 104 | 105 | def _initialize(self, d, n_categories): 106 | """Initialize the probability distribution. 107 | 108 | This method is meant to only be called internally. It initializes the 109 | parameters of the distribution and stores its dimensionality. For more 110 | complex methods, this function will do more. 111 | 112 | 113 | Parameters 114 | ---------- 115 | d: int 116 | The dimensionality the distribution is being initialized to. 117 | 118 | n_categories: list, numpy.ndarray, torch.tensor, or None, shape=(d,) 119 | A vector with the maximum number of categories that each column 120 | can have. If not given, this will be inferred from the data. 121 | Default is None. 122 | """ 123 | 124 | self.probs = _cast_as_parameter(torch.zeros(*n_categories, 125 | dtype=self.dtype, device=self.device)) 126 | 127 | self.n_categories = n_categories 128 | self._initialized = True 129 | super()._initialize(d) 130 | 131 | def _reset_cache(self): 132 | """Reset the internally stored statistics. 133 | 134 | This method is meant to only be called internally. It resets the 135 | stored statistics used to update the model parameters as well as 136 | recalculates the cached values meant to speed up log probability 137 | calculations. 138 | """ 139 | 140 | if self._initialized == False: 141 | return 142 | 143 | self._w_sum = torch.zeros(self.d, dtype=self.probs.dtype) 144 | self._xw_sum = torch.zeros(*self.n_categories, dtype=self.probs.dtype) 145 | 146 | self._log_probs = torch.log(self.probs) 147 | 148 | def sample(self, n): 149 | """Sample from the probability distribution. 150 | 151 | This method will return `n` samples generated from the underlying 152 | probability distribution. For a mixture model, this involves first 153 | sampling the component using the prior probabilities, and then sampling 154 | from the chosen distribution. 155 | 156 | 157 | Parameters 158 | ---------- 159 | n: int 160 | The number of samples to generate. 161 | 162 | 163 | Returns 164 | ------- 165 | X: torch.tensor, shape=(n, self.d) 166 | Randomly generated samples. 167 | """ 168 | 169 | idxs = torch.multinomial(self.probs.flatten(), num_samples=n, 170 | replacement=True) 171 | 172 | X = numpy.unravel_index(idxs.numpy(), self.n_categories) 173 | X = numpy.stack(X).T 174 | return torch.from_numpy(X) 175 | 176 | def log_probability(self, X): 177 | """Calculate the log probability of each example. 178 | 179 | This method calculates the log probability of each example given the 180 | parameters of the distribution. The examples must be given in a 2D 181 | format. For a joint categorical distribution, each value must be an 182 | integer category that is smaller than the maximum number of categories 183 | for each feature. 184 | 185 | Note: This differs from some other log probability calculation 186 | functions, like those in torch.distributions, because it is not 187 | returning the log probability of each feature independently, but rather 188 | the total log probability of the entire example. 189 | 190 | 191 | Parameters 192 | ---------- 193 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 194 | A set of examples to evaluate. 195 | 196 | 197 | Returns 198 | ------- 199 | logp: torch.Tensor, shape=(-1,) 200 | The log probability of each example. 201 | """ 202 | 203 | X = _check_parameter(_cast_as_tensor(X), "X", 204 | value_set=tuple(range(max(self.n_categories)+1)), ndim=2, 205 | shape=(-1, self.d), check_parameter=self.check_data) 206 | 207 | logps = torch.zeros(len(X), dtype=self.probs.dtype) 208 | for i in range(len(X)): 209 | logps[i] = self._log_probs[tuple(X[i])] 210 | 211 | return logps 212 | 213 | 214 | def summarize(self, X, sample_weight=None): 215 | """Extract the sufficient statistics from a batch of data. 216 | 217 | This method calculates the sufficient statistics from optionally 218 | weighted data and adds them to the stored cache. The examples must be 219 | given in a 2D format. Sample weights can either be provided as one 220 | value per example or as a 2D matrix of weights for each feature in 221 | each example. 222 | 223 | 224 | Parameters 225 | ---------- 226 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 227 | A set of examples to summarize. 228 | 229 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 230 | A set of weights for the examples. This can be either of shape 231 | (-1, self.d) or a vector of shape (-1,). Default is ones. 232 | """ 233 | 234 | if self.frozen == True: 235 | return 236 | 237 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 238 | dtypes=(torch.int32, torch.int64), check_parameter=self.check_data) 239 | 240 | if not self._initialized: 241 | self._initialize(len(X[0]), torch.max(X, dim=0)[0]+1) 242 | 243 | X = _check_parameter(X, "X", shape=(-1, self.d), 244 | value_set=tuple(range(max(self.n_categories)+1)), 245 | check_parameter=self.check_data) 246 | 247 | sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight, 248 | dtype=torch.float32))[:,0] 249 | 250 | self._w_sum += torch.sum(sample_weight, dim=0) 251 | for i in range(len(X)): 252 | self._xw_sum[tuple(X[i])] += sample_weight[i] 253 | 254 | def from_summaries(self): 255 | """Update the model parameters given the extracted statistics. 256 | 257 | This method uses calculated statistics from calls to the `summarize` 258 | method to update the distribution parameters. Hyperparameters for the 259 | update are passed in at initialization time. 260 | 261 | Note: Internally, a call to `fit` is just a successive call to the 262 | `summarize` method followed by the `from_summaries` method. 263 | """ 264 | 265 | if self.frozen == True: 266 | return 267 | 268 | probs = self._xw_sum / self._w_sum[0] 269 | 270 | _update_parameter(self.probs, probs, self.inertia) 271 | self._reset_cache() 272 | -------------------------------------------------------------------------------- /pomegranate/distributions/lognormal.py: -------------------------------------------------------------------------------- 1 | # normal.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import _check_shapes 11 | 12 | from .normal import Normal 13 | 14 | 15 | class LogNormal(Normal): 16 | """A lognormal object. 17 | 18 | The parameters are the mu and sigma of the normal distribution, which 19 | is the the exponential of the log normal distribution. This 20 | distribution can assume that features are independent of the others if 21 | the covariance type is 'diag' or 'sphere', but if the type is 'full' then 22 | the features are not independent. 23 | 24 | There are two ways to initialize this object. The first is to pass in 25 | the tensor of probablity parameters, at which point they can immediately be 26 | used. The second is to not pass in the rate parameters and then call 27 | either `fit` or `summarize` + `from_summaries`, at which point the probability 28 | parameter will be learned from data. 29 | 30 | 31 | Parameters 32 | ---------- 33 | means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 34 | The mean values of the normal distributions. Default is None. 35 | 36 | covs: list, numpy.ndarray, torch.Tensor, or None, optional 37 | The variances and covariances of the distribution. If covariance_type 38 | is 'full', the shape should be (self.d, self.d); if 'diag', the shape 39 | should be (self.d,); if 'sphere', it should be (1,). Note that this is 40 | the variances or covariances in all settings, and not the standard 41 | deviation, as may be more common for diagonal covariance matrices. 42 | Default is None. 43 | 44 | covariance_type: str, optional 45 | The type of covariance matrix. Must be one of 'full', 'diag', or 46 | 'sphere'. Default is 'full'. 47 | 48 | min_cov: float or None, optional 49 | The minimum variance or covariance. 50 | 51 | inertia: float, [0, 1], optional 52 | Indicates the proportion of the update to apply to the parameters 53 | during training. When the inertia is 0.0, the update is applied in 54 | its entirety and the previous parameters are ignored. When the 55 | inertia is 1.0, the update is entirely ignored and the previous 56 | parameters are kept, equivalently to if the parameters were frozen. 57 | 58 | frozen: bool, optional 59 | Whether all the parameters associated with this distribution are frozen. 60 | If you want to freeze individual pameters, or individual values in those 61 | parameters, you must modify the `frozen` attribute of the tensor or 62 | parameter directly. Default is False. 63 | """ 64 | 65 | def __init__( 66 | self, 67 | means=None, 68 | covs=None, 69 | covariance_type="full", 70 | min_cov=None, 71 | inertia=0.0, 72 | frozen=False, 73 | check_data=True, 74 | ): 75 | self.name = "LogNormal" 76 | super().__init__( 77 | means=means, 78 | covs=covs, 79 | covariance_type=covariance_type, 80 | min_cov=min_cov, 81 | inertia=inertia, 82 | frozen=frozen, 83 | check_data=check_data, 84 | ) 85 | 86 | def sample(self, n): 87 | """Sample from the probability distribution. 88 | 89 | This method will return `n` samples generated from the underlying 90 | probability distribution. 91 | 92 | 93 | Parameters 94 | ---------- 95 | n: int 96 | The number of samples to generate. 97 | 98 | 99 | Returns 100 | ------- 101 | X: torch.tensor, shape=(n, self.d) 102 | Randomly generated samples. 103 | """ 104 | 105 | if self.covariance_type == "diag": 106 | return torch.distributions.Normal(self.means, self.covs).sample([n]).exp() 107 | elif self.covariance_type == "full": 108 | return ( 109 | torch.distributions.MultivariateNormal(self.means, self.covs) 110 | .sample([n]) 111 | .exp() 112 | ) 113 | 114 | def log_probability(self, X): 115 | """Calculate the log probability of each example. 116 | 117 | This method calculates the log probability of each example given the 118 | parameters of the distribution. The examples must be given in a 2D 119 | format. 120 | 121 | Note: This differs from some other log probability calculation 122 | functions, like those in torch.distributions, because it is not 123 | returning the log probability of each feature independently, but rather 124 | the total log probability of the entire example. 125 | 126 | 127 | Parameters 128 | ---------- 129 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 130 | A set of examples to evaluate. 131 | 132 | 133 | Returns 134 | ------- 135 | logp: torch.Tensor, shape=(-1,) 136 | The log probability of each example. 137 | """ 138 | 139 | X = _check_parameter( 140 | _cast_as_tensor(X, dtype=self.means.dtype), 141 | "X", 142 | ndim=2, 143 | shape=(-1, self.d), 144 | check_parameter=self.check_data, 145 | ) 146 | 147 | # take the log of X 148 | x_log = X.log() 149 | 150 | return super().log_probability(x_log) 151 | 152 | def summarize(self, X, sample_weight=None): 153 | """Extract the sufficient statistics from a batch of data. 154 | 155 | This method calculates the sufficient statistics from optionally 156 | weighted data and adds them to the stored cache. The examples must be 157 | given in a 2D format. Sample weights can either be provided as one 158 | value per example or as a 2D matrix of weights for each feature in 159 | each example. 160 | 161 | 162 | Parameters 163 | ---------- 164 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 165 | A set of examples to summarize. 166 | 167 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 168 | A set of weights for the examples. This can be either of shape 169 | (-1, self.d) or a vector of shape (-1,). Default is ones. 170 | """ 171 | 172 | if self.frozen is True: 173 | return 174 | X = _cast_as_tensor(X, dtype=self.means.dtype) 175 | super().summarize(X.log(), sample_weight=sample_weight) 176 | -------------------------------------------------------------------------------- /pomegranate/distributions/poisson.py: -------------------------------------------------------------------------------- 1 | # poisson.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | 11 | from ._distribution import Distribution 12 | 13 | 14 | class Poisson(Distribution): 15 | """An poisson distribution object. 16 | 17 | A poisson distribution models the number of occurrences of events that 18 | happen in a fixed time span, assuming that the occurrence of each event 19 | is independent. This distribution also assumes that each feature is 20 | independent of the others. 21 | 22 | There are two ways to initialize this objecct. The first is to pass in 23 | the tensor of lambda parameters, at which point they can immediately be 24 | used. The second is to not pass in the lambda parameters and then call 25 | either `fit` or `summary` + `from_summaries`, at which point the lambda 26 | parameter will be learned from data. 27 | 28 | 29 | Parameters 30 | ---------- 31 | lambdas: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 32 | The lambda parameters for each feature. Default is None. 33 | 34 | inertia: float, (0, 1), optional 35 | Indicates the proportion of the update to apply to the parameters 36 | during training. When the inertia is 0.0, the update is applied in 37 | its entirety and the previous parameters are ignored. When the 38 | inertia is 1.0, the update is entirely ignored and the previous 39 | parameters are kept, equivalently to if the parameters were frozen. 40 | 41 | frozen: bool, optional 42 | Whether all the parameters associated with this distribution are frozen. 43 | If you want to freeze individual pameters, or individual values in those 44 | parameters, you must modify the `frozen` attribute of the tensor or 45 | parameter directly. Default is False. 46 | 47 | check_data: bool, optional 48 | Whether to check properties of the data and potentially recast it to 49 | torch.tensors. This does not prevent checking of parameters but can 50 | slightly speed up computation when you know that your inputs are valid. 51 | Setting this to False is also necessary for compiling. 52 | """ 53 | 54 | 55 | def __init__(self, lambdas=None, inertia=0.0, frozen=False, 56 | check_data=True): 57 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 58 | self.name = "Poisson" 59 | 60 | self.lambdas = _check_parameter(_cast_as_parameter(lambdas), "lambdas", 61 | min_value=0, ndim=1) 62 | 63 | self._initialized = lambdas is not None 64 | self.d = self.lambdas.shape[-1] if self._initialized else None 65 | self._reset_cache() 66 | 67 | def _initialize(self, d): 68 | """Initialize the probability distribution. 69 | 70 | This method is meant to only be called internally. It initializes the 71 | parameters of the distribution and stores its dimensionality. For more 72 | complex methods, this function will do more. 73 | 74 | 75 | Parameters 76 | ---------- 77 | d: int 78 | The dimensionality the distribution is being initialized to. 79 | """ 80 | 81 | self.lambdas = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 82 | device=self.device)) 83 | 84 | self._initialized = True 85 | super()._initialize(d) 86 | 87 | def _reset_cache(self): 88 | """Reset the internally stored statistics. 89 | 90 | This method is meant to only be called internally. It resets the 91 | stored statistics used to update the model parameters as well as 92 | recalculates the cached values meant to speed up log probability 93 | calculations. 94 | """ 95 | 96 | if self._initialized == False: 97 | return 98 | 99 | self.register_buffer("_w_sum", torch.zeros(self.d, device=self.device)) 100 | self.register_buffer("_xw_sum", torch.zeros(self.d, device=self.device)) 101 | 102 | self.register_buffer("_log_lambdas", torch.log(self.lambdas)) 103 | 104 | def sample(self, n): 105 | """Sample from the probability distribution. 106 | 107 | This method will return `n` samples generated from the underlying 108 | probability distribution. 109 | 110 | 111 | Parameters 112 | ---------- 113 | n: int 114 | The number of samples to generate. 115 | 116 | 117 | Returns 118 | ------- 119 | X: torch.tensor, shape=(n, self.d) 120 | Randomly generated samples. 121 | """ 122 | 123 | return torch.distributions.Poisson(self.lambdas).sample([n]) 124 | 125 | def log_probability(self, X): 126 | """Calculate the log probability of each example. 127 | 128 | This method calculates the log probability of each example given the 129 | parameters of the distribution. The examples must be given in a 2D 130 | format. For a Poisson distribution, each entry in the data must 131 | be non-negative. 132 | 133 | Note: This differs from some other log probability calculation 134 | functions, like those in torch.distributions, because it is not 135 | returning the log probability of each feature independently, but rather 136 | the total log probability of the entire example. 137 | 138 | 139 | Parameters 140 | ---------- 141 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 142 | A set of examples to evaluate. 143 | 144 | 145 | Returns 146 | ------- 147 | logp: torch.Tensor, shape=(-1,) 148 | The log probability of each example. 149 | """ 150 | 151 | X = _check_parameter(_cast_as_tensor(X), "X", min_value=0.0, 152 | ndim=2, shape=(-1, self.d), check_parameter=self.check_data) 153 | 154 | return torch.sum(X * self._log_lambdas - self.lambdas - 155 | torch.lgamma(X+1), dim=-1) 156 | 157 | def summarize(self, X, sample_weight=None): 158 | """Extract the sufficient statistics from a batch of data. 159 | 160 | This method calculates the sufficient statistics from optionally 161 | weighted data and adds them to the stored cache. The examples must be 162 | given in a 2D format. Sample weights can either be provided as one 163 | value per example or as a 2D matrix of weights for each feature in 164 | each example. 165 | 166 | 167 | Parameters 168 | ---------- 169 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 170 | A set of examples to summarize. 171 | 172 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 173 | A set of weights for the examples. This can be either of shape 174 | (-1, self.d) or a vector of shape (-1,). Default is ones. 175 | """ 176 | 177 | if self.frozen == True: 178 | return 179 | 180 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 181 | _check_parameter(X, "X", min_value=0, check_parameter=self.check_data) 182 | 183 | self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0) 184 | self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0) 185 | 186 | def from_summaries(self): 187 | """Update the model parameters given the extracted statistics. 188 | 189 | This method uses calculated statistics from calls to the `summarize` 190 | method to update the distribution parameters. Hyperparameters for the 191 | update are passed in at initialization time. 192 | 193 | Note: Internally, a call to `fit` is just a successive call to the 194 | `summarize` method followed by the `from_summaries` method. 195 | """ 196 | 197 | if self.frozen == True: 198 | return 199 | 200 | lambdas = self._xw_sum / self._w_sum 201 | _update_parameter(self.lambdas, lambdas, self.inertia) 202 | self._reset_cache() 203 | -------------------------------------------------------------------------------- /pomegranate/distributions/student_t.py: -------------------------------------------------------------------------------- 1 | # student_t.py 2 | # Contact: Jacob Schreiber 3 | 4 | import math 5 | import torch 6 | 7 | from .._utils import _cast_as_tensor 8 | from .._utils import _cast_as_parameter 9 | from .._utils import _update_parameter 10 | from .._utils import _check_parameter 11 | 12 | from .normal import Normal 13 | 14 | 15 | class StudentT(Normal): 16 | """A Student T distribution. 17 | 18 | A Student T distribution models the probability of a variable occurring under 19 | a bell-shaped curve with heavy tails. Basically, this is a version of the 20 | normal distribution that is less resistant to outliers. It is described by 21 | a vector of mean values and a vector of variance values. This 22 | distribution can assume that features are independent of the others if 23 | the covariance type is 'diag' or 'sphere', but if the type is 'full' then 24 | the features are not independent. 25 | 26 | There are two ways to initialize this object. The first is to pass in 27 | the tensor of probability parameters, at which point they can immediately be 28 | used. The second is to not pass in the rate parameters and then call 29 | either `fit` or `summary` + `from_summaries`, at which point the probability 30 | parameter will be learned from data. 31 | 32 | 33 | Parameters 34 | ---------- 35 | means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 36 | The mean values of the distributions. Default is None. 37 | 38 | covs: list, numpy.ndarray, torch.Tensor, or None, optional 39 | The variances and covariances of the distribution. If covariance_type 40 | is 'full', the shape should be (self.d, self.d); if 'diag', the shape 41 | should be (self.d,); if 'sphere', it should be (1,). Note that this is 42 | the variances or covariances in all settings, and not the standard 43 | deviation, as may be more common for diagonal covariance matrices. 44 | Default is None. 45 | 46 | covariance_type: str, optional 47 | The type of covariance matrix. Must be one of 'full', 'diag', or 48 | 'sphere'. Default is 'full'. 49 | 50 | min_cov: float or None, optional 51 | The minimum variance or covariance. 52 | 53 | inertia: float, [0, 1], optional 54 | Indicates the proportion of the update to apply to the parameters 55 | during training. When the inertia is 0.0, the update is applied in 56 | its entirety and the previous parameters are ignored. When the 57 | inertia is 1.0, the update is entirely ignored and the previous 58 | parameters are kept, equivalently to if the parameters were frozen. 59 | 60 | frozen: bool, optional 61 | Whether all the parameters associated with this distribution are frozen. 62 | If you want to freeze individual pameters, or individual values in those 63 | parameters, you must modify the `frozen` attribute of the tensor or 64 | parameter directly. Default is False. 65 | 66 | check_data: bool, optional 67 | Whether to check properties of the data and potentially recast it to 68 | torch.tensors. This does not prevent checking of parameters but can 69 | slightly speed up computation when you know that your inputs are valid. 70 | Setting this to False is also necessary for compiling. 71 | """ 72 | 73 | def __init__(self, dofs, means=None, covs=None, covariance_type='diag', 74 | min_cov=None, inertia=0.0, frozen=False, check_data=True): 75 | dofs = _check_parameter(_cast_as_tensor(dofs), "dofs", min_value=1, 76 | ndim=0, dtypes=(torch.int32, torch.int64)) 77 | self.dofs = dofs 78 | 79 | super().__init__(means=means, covs=covs, min_cov=min_cov, 80 | covariance_type=covariance_type, inertia=inertia, frozen=frozen, 81 | check_data=check_data) 82 | 83 | self.name = "StudentT" 84 | 85 | del self.dofs 86 | 87 | self.register_buffer("dofs", _cast_as_tensor(dofs)) 88 | self.register_buffer("_lgamma_dofsp1", torch.lgamma((dofs + 1) / 2.0)) 89 | self.register_buffer("_lgamma_dofs", torch.lgamma(dofs / 2.0)) 90 | 91 | def _reset_cache(self): 92 | """Reset the internally stored statistics. 93 | 94 | This method is meant to only be called internally. It resets the 95 | stored statistics used to update the model parameters as well as 96 | recalculates the cached values meant to speed up log probability 97 | calculations. 98 | """ 99 | 100 | super()._reset_cache() 101 | if self._initialized == False: 102 | return 103 | 104 | self.register_buffer("_log_sqrt_dofs_pi_cov", torch.log(torch.sqrt( 105 | self.dofs * math.pi * self.covs))) 106 | 107 | def sample(self, n): 108 | """Sample from the probability distribution. 109 | 110 | This method will return `n` samples generated from the underlying 111 | probability distribution. 112 | 113 | 114 | Parameters 115 | ---------- 116 | n: int 117 | The number of samples to generate. 118 | 119 | 120 | Returns 121 | ------- 122 | X: torch.tensor, shape=(n, self.d) 123 | Randomly generated samples. 124 | """ 125 | 126 | return torch.distributions.StudentT(self.means, self.covs).sample([n]) 127 | 128 | def log_probability(self, X): 129 | """Calculate the log probability of each example. 130 | 131 | This method calculates the log probability of each example given the 132 | parameters of the distribution. The examples must be given in a 2D 133 | format. 134 | 135 | Note: This differs from some other log probability calculation 136 | functions, like those in torch.distributions, because it is not 137 | returning the log probability of each feature independently, but rather 138 | the total log probability of the entire example. 139 | 140 | 141 | Parameters 142 | ---------- 143 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 144 | A set of examples to evaluate. 145 | 146 | 147 | Returns 148 | ------- 149 | logp: torch.Tensor, shape=(-1,) 150 | The log probability of each example. 151 | """ 152 | 153 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 154 | shape=(-1, self.d), check_parameter=self.check_data) 155 | 156 | t = (X - self.means) ** 2 / self.covs 157 | return torch.sum(self._lgamma_dofsp1 - self._lgamma_dofs - \ 158 | self._log_sqrt_dofs_pi_cov -((self.dofs + 1) / 2.0) * 159 | torch.log(1 + t / self.dofs), dim=-1) 160 | 161 | -------------------------------------------------------------------------------- /pomegranate/distributions/uniform.py: -------------------------------------------------------------------------------- 1 | # uniform.py 2 | # Contact: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from .._utils import _cast_as_tensor 7 | from .._utils import _cast_as_parameter 8 | from .._utils import _update_parameter 9 | from .._utils import _check_parameter 10 | from .._utils import _check_shapes 11 | 12 | from ._distribution import Distribution 13 | 14 | inf = float("inf") 15 | 16 | 17 | class Uniform(Distribution): 18 | """A uniform distribution. 19 | 20 | A uniform distribution models the probability of a variable occurring given 21 | a range that has the same probability within it and no probability outside 22 | it. It is described by a vector of minimum and maximum values for this 23 | range. This distribution assumes that the features are independent of 24 | each other. 25 | 26 | There are two ways to initialize this object. The first is to pass in 27 | the tensor of probability parameters, at which point they can immediately be 28 | used. The second is to not pass in the rate parameters and then call 29 | either `fit` or `summary` + `from_summaries`, at which point the probability 30 | parameter will be learned from data. 31 | 32 | 33 | Parameters 34 | ---------- 35 | mins: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional 36 | The minimum values of the range. 37 | 38 | maxs: list, numpy.ndarray, torch.Tensor, or None, optional 39 | The maximum values of the range. 40 | 41 | inertia: float, [0, 1], optional 42 | Indicates the proportion of the update to apply to the parameters 43 | during training. When the inertia is 0.0, the update is applied in 44 | its entirety and the previous parameters are ignored. When the 45 | inertia is 1.0, the update is entirely ignored and the previous 46 | parameters are kept, equivalently to if the parameters were frozen. 47 | 48 | frozen: bool, optional 49 | Whether all the parameters associated with this distribution are frozen. 50 | If you want to freeze individual pameters, or individual values in those 51 | parameters, you must modify the `frozen` attribute of the tensor or 52 | parameter directly. Default is False. 53 | 54 | check_data: bool, optional 55 | Whether to check properties of the data and potentially recast it to 56 | torch.tensors. This does not prevent checking of parameters but can 57 | slightly speed up computation when you know that your inputs are valid. 58 | Setting this to False is also necessary for compiling. 59 | """ 60 | 61 | def __init__(self, mins=None, maxs=None, inertia=0.0, frozen=False, 62 | check_data=True): 63 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 64 | self.name = "Uniform" 65 | 66 | self.mins = _check_parameter(_cast_as_parameter(mins), "mins", ndim=1) 67 | self.maxs = _check_parameter(_cast_as_parameter(maxs), "maxs", ndim=1) 68 | 69 | _check_shapes([self.mins, self.maxs], ["mins", "maxs"]) 70 | 71 | self._initialized = (mins is not None) and (maxs is not None) 72 | self.d = self.mins.shape[-1] if self._initialized else None 73 | self._reset_cache() 74 | 75 | def _initialize(self, d): 76 | """Initialize the probability distribution. 77 | 78 | This method is meant to only be called internally. It initializes the 79 | parameters of the distribution and stores its dimensionality. For more 80 | complex methods, this function will do more. 81 | 82 | 83 | Parameters 84 | ---------- 85 | d: int 86 | The dimensionality the distribution is being initialized to. 87 | """ 88 | 89 | self.mins = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 90 | device=self.device)) 91 | self.maxs = _cast_as_parameter(torch.zeros(d, dtype=self.dtype, 92 | device=self.device)) 93 | 94 | self._initialized = True 95 | super()._initialize(d) 96 | 97 | def _reset_cache(self): 98 | """Reset the internally stored statistics. 99 | 100 | This method is meant to only be called internally. It resets the 101 | stored statistics used to update the model parameters as well as 102 | recalculates the cached values meant to speed up log probability 103 | calculations. 104 | """ 105 | 106 | if self._initialized == False: 107 | return 108 | 109 | self.register_buffer("_x_mins", torch.full((self.d,), inf, 110 | device=self.device)) 111 | self.register_buffer("_x_maxs", torch.full((self.d,), -inf, 112 | device=self.device)) 113 | self.register_buffer("_logps", -torch.log(self.maxs - self.mins)) 114 | 115 | def sample(self, n): 116 | """Sample from the probability distribution. 117 | 118 | This method will return `n` samples generated from the underlying 119 | probability distribution. 120 | 121 | 122 | Parameters 123 | ---------- 124 | n: int 125 | The number of samples to generate. 126 | 127 | 128 | Returns 129 | ------- 130 | X: torch.tensor, shape=(n, self.d) 131 | Randomly generated samples. 132 | """ 133 | 134 | return torch.distributions.Uniform(self.mins, self.maxs).sample([n]) 135 | 136 | def log_probability(self, X): 137 | """Calculate the log probability of each example. 138 | 139 | This method calculates the log probability of each example given the 140 | parameters of the distribution. The examples must be given in a 2D 141 | format. For a Bernoulli distribution, each entry in the data must 142 | be either 0 or 1. 143 | 144 | Note: This differs from some other log probability calculation 145 | functions, like those in torch.distributions, because it is not 146 | returning the log probability of each feature independently, but rather 147 | the total log probability of the entire example. 148 | 149 | 150 | Parameters 151 | ---------- 152 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 153 | A set of examples to evaluate. 154 | 155 | 156 | Returns 157 | ------- 158 | logp: torch.Tensor, shape=(-1,) 159 | The log probability of each example. 160 | """ 161 | 162 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 163 | shape=(-1, self.d), check_parameter=self.check_data) 164 | 165 | return torch.where((X >= self.mins) & (X <= self.maxs), self._logps, 166 | float("-inf")).sum(dim=1) 167 | 168 | def summarize(self, X, sample_weight=None): 169 | """Extract the sufficient statistics from a batch of data. 170 | 171 | This method calculates the sufficient statistics from optionally 172 | weighted data and adds them to the stored cache. The examples must be 173 | given in a 2D format. Sample weights can either be provided as one 174 | value per example or as a 2D matrix of weights for each feature in 175 | each example. 176 | 177 | 178 | Parameters 179 | ---------- 180 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 181 | A set of examples to summarize. 182 | 183 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 184 | A set of weights for the examples. This can be either of shape 185 | (-1, self.d) or a vector of shape (-1,). Default is ones. 186 | """ 187 | 188 | if self.frozen == True: 189 | return 190 | 191 | X, sample_weight = super().summarize(X, sample_weight=sample_weight) 192 | 193 | self._x_mins = torch.minimum(self._x_mins, X.min(dim=0).values) 194 | self._x_maxs = torch.maximum(self._x_maxs, X.max(dim=0).values) 195 | 196 | def from_summaries(self): 197 | """Update the model parameters given the extracted statistics. 198 | 199 | This method uses calculated statistics from calls to the `summarize` 200 | method to update the distribution parameters. Hyperparameters for the 201 | update are passed in at initialization time. 202 | 203 | Note: Internally, a call to `fit` is just a successive call to the 204 | `summarize` method followed by the `from_summaries` method. 205 | """ 206 | 207 | if self.frozen == True: 208 | return 209 | 210 | _update_parameter(self.mins, self._x_mins, self.inertia) 211 | _update_parameter(self.maxs, self._x_maxs, self.inertia) 212 | self._reset_cache() 213 | -------------------------------------------------------------------------------- /pomegranate/distributions/zero_inflated.py: -------------------------------------------------------------------------------- 1 | # zero_inflated.py 2 | # Contact: Jacob Schreiber 3 | 4 | import time 5 | import torch 6 | 7 | from .._utils import _cast_as_tensor 8 | from .._utils import _cast_as_parameter 9 | from .._utils import _update_parameter 10 | from .._utils import _check_parameter 11 | from .._utils import _reshape_weights 12 | 13 | from ._distribution import Distribution 14 | 15 | 16 | class ZeroInflated(Distribution): 17 | """A wrapper for a zero-inflated distribution. 18 | 19 | Some discrete distributions, e.g. Poisson or negative binomial, are used 20 | to model data that has many more zeroes in it than one would expect from 21 | the true signal itself. Potentially, this is because data collection devices 22 | fail or other gaps exist in the data. A zero-inflated distribution is 23 | essentially a mixture of these zero values and the real underlying 24 | distribution. 25 | 26 | Accordingly, this class serves as a wrapper that can be dropped in for 27 | other probability distributions and makes them "zero-inflated". It is 28 | similar to a mixture model between the distribution passed in and a dirac 29 | delta distribution, except that the mixture happens independently for each 30 | distribution as well as for each example. 31 | 32 | 33 | Parameters 34 | ---------- 35 | distribution: pomegranate.distributions.Distribution 36 | A pomegranate distribution object. It should probably be a discrete 37 | distribution, but does not technically have to be. 38 | 39 | priors: tuple, numpy.ndarray, torch.Tensor, or None. shape=(2,), optional 40 | The prior probabilities over the given distribution and the dirac 41 | delta component. Default is None. 42 | 43 | max_iter: int, optional 44 | The number of iterations to do in the EM step of fitting the 45 | distribution. Default is 10. 46 | 47 | tol: float, optional 48 | The threshold at which to stop during fitting when the improvement 49 | goes under. Default is 0.1. 50 | 51 | inertia: float, [0, 1], optional 52 | Indicates the proportion of the update to apply to the parameters 53 | during training. When the inertia is 0.0, the update is applied in 54 | its entirety and the previous parameters are ignored. When the 55 | inertia is 1.0, the update is entirely ignored and the previous 56 | parameters are kept, equivalently to if the parameters were frozen. 57 | 58 | frozen: bool, optional 59 | Whether all the parameters associated with this distribution are frozen. 60 | If you want to freeze individual pameters, or individual values in those 61 | parameters, you must modify the `frozen` attribute of the tensor or 62 | parameter directly. Default is False. 63 | 64 | verbose: bool, optional 65 | Whether to print the improvement and timings during training. 66 | """ 67 | 68 | def __init__(self, distribution, priors=None, max_iter=10, 69 | tol=0.1, inertia=0.0, frozen=False, check_data=False, verbose=False): 70 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 71 | self.name = "ZeroInflated" 72 | 73 | self.distribution = distribution 74 | self.priors = _check_parameter(_cast_as_parameter(priors), "priors", 75 | min_value=0, max_value=1, ndim=1, value_sum=1.0) 76 | 77 | self.verbose = verbose 78 | self._initialized = distribution._initialized is True 79 | self.d = distribution.d if self._initialized else None 80 | 81 | self.max_iter = max_iter 82 | self.tol = tol 83 | 84 | if self.priors is None and self.d is not None: 85 | self.priors = _cast_as_parameter(torch.ones(self.d, 86 | device=self.device) / 2) 87 | 88 | self._reset_cache() 89 | 90 | def _initialize(self, X): 91 | """Initialize the probability distribution. 92 | 93 | This method is meant to only be called internally. It initializes the 94 | parameters of the distribution and stores its dimensionality. For more 95 | complex methods, this function will do more. 96 | 97 | 98 | Parameters 99 | ---------- 100 | X: list, numpy.ndarray, torch.Tensor, shape=(1, self.d) 101 | The data to use to initialize the model. 102 | """ 103 | 104 | self.distribution._initialize(X.shape[1]) 105 | self.distribution.fit(X) 106 | 107 | self.priors = _cast_as_parameter(torch.ones(X.shape[1], 108 | device=self.device) / 2) 109 | self._initialized = True 110 | super()._initialize(X.shape[1]) 111 | 112 | def _reset_cache(self): 113 | """Reset the internally stored statistics. 114 | 115 | This method is meant to only be called internally. It resets the 116 | stored statistics used to update the model parameters as well as 117 | recalculates the cached values meant to speed up log probability 118 | calculations. 119 | """ 120 | 121 | if self._initialized == False: 122 | return 123 | 124 | self.register_buffer("_w_sum", torch.zeros(self.d, 2, 125 | device=self.device)) 126 | self.register_buffer("_log_priors", torch.log(self.priors)) 127 | 128 | def _emission_matrix(self, X): 129 | """Return the emission/responsibility matrix. 130 | 131 | This method returns the log probability of each example under each 132 | distribution contained in the model with the log prior probability 133 | of each component added. 134 | 135 | 136 | Parameters 137 | ---------- 138 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 139 | A set of examples to evaluate. 140 | 141 | 142 | Returns 143 | ------- 144 | e: torch.Tensor, shape=(-1, self.k) 145 | A set of log probabilities for each example under each distribution. 146 | """ 147 | 148 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=2, 149 | shape=(-1, self.d)) 150 | 151 | e = torch.empty(X.shape[0], self.d, 2, device=self.device) 152 | e[:, :, 0] = self._log_priors.unsqueeze(0) 153 | e[:, :, 0] += self.distribution.log_probability(X).unsqueeze(1) 154 | 155 | e[:, :, 1] = torch.log(1 - self.priors).unsqueeze(0) 156 | e[:, :, 1] += torch.where(X == 0, 0, float("-inf")) 157 | return e 158 | 159 | def fit(self, X, sample_weight=None): 160 | """Fit the model to optionally weighted examples. 161 | 162 | This method implements the core of the learning process. For a 163 | zero-inflated distribution, this involves performing EM until the 164 | distribution being fit converges. 165 | 166 | This method is largely a wrapper around the `summarize` and 167 | `from_summaries` methods. It's primary contribution is serving as a 168 | loop around these functions and to monitor convergence. 169 | 170 | 171 | Parameters 172 | ---------- 173 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 174 | A set of examples to evaluate. 175 | 176 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 177 | A set of weights for the examples. This can be either of shape 178 | (-1, self.d) or a vector of shape (-1,). Default is ones. 179 | 180 | 181 | Returns 182 | ------- 183 | self 184 | """ 185 | 186 | logp = None 187 | for i in range(self.max_iter): 188 | start_time = time.time() 189 | 190 | last_logp = logp 191 | logp = self.summarize(X, sample_weight=sample_weight) 192 | 193 | if i > 0: 194 | improvement = logp - last_logp 195 | duration = time.time() - start_time 196 | 197 | if self.verbose: 198 | print("[{}] Improvement: {}, Time: {:4.4}s".format(i, 199 | improvement, duration)) 200 | 201 | if improvement < self.tol: 202 | break 203 | 204 | self.from_summaries() 205 | 206 | self._reset_cache() 207 | return self 208 | 209 | def summarize(self, X, sample_weight=None): 210 | """Extract the sufficient statistics from a batch of data. 211 | 212 | This method calculates the sufficient statistics from optionally 213 | weighted data and adds them to the stored cache. The examples must be 214 | given in a 2D format. Sample weights can either be provided as one 215 | value per example or as a 2D matrix of weights for each feature in 216 | each example. 217 | 218 | 219 | Parameters 220 | ---------- 221 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d) 222 | A set of examples to summarize. 223 | 224 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 225 | A set of weights for the examples. This can be either of shape 226 | (-1, self.d) or a vector of shape (-1,). Default is ones. 227 | """ 228 | 229 | X = _cast_as_tensor(X) 230 | if not self._initialized: 231 | self._initialize(X) 232 | 233 | _check_parameter(X, "X", ndim=2, shape=(-1, self.d)) 234 | sample_weight = _reshape_weights(X, _cast_as_tensor(sample_weight, 235 | dtype=torch.float32), device=self.device) 236 | 237 | e = self._emission_matrix(X) 238 | logp = torch.logsumexp(e, dim=2, keepdims=True) 239 | y = torch.exp(e - logp) 240 | 241 | self.distribution.summarize(X, y[:, :, 0] * sample_weight) 242 | 243 | if not self.frozen: 244 | self._w_sum += torch.sum(y * sample_weight.unsqueeze(-1), dim=(0, 1)) 245 | 246 | return torch.sum(logp) 247 | 248 | def from_summaries(self): 249 | """Update the model parameters given the extracted statistics. 250 | 251 | This method uses calculated statistics from calls to the `summarize` 252 | method to update the distribution parameters. Hyperparameters for the 253 | update are passed in at initialization time. 254 | 255 | Note: Internally, a call to `fit` is just a successive call to the 256 | `summarize` method followed by the `from_summaries` method. 257 | """ 258 | 259 | self.distribution.from_summaries() 260 | 261 | if self.frozen == True: 262 | return 263 | 264 | priors = self._w_sum[:,0] / torch.sum(self._w_sum, dim=-1) 265 | 266 | _update_parameter(self.priors, priors, self.inertia) 267 | self._reset_cache() 268 | -------------------------------------------------------------------------------- /pomegranate/hmm/__init__.py: -------------------------------------------------------------------------------- 1 | # __init__.py 2 | # Author: Jacob Schreiber 3 | 4 | from .dense_hmm import DenseHMM 5 | from .sparse_hmm import SparseHMM 6 | -------------------------------------------------------------------------------- /pomegranate/markov_chain.py: -------------------------------------------------------------------------------- 1 | # markov_chain.py 2 | # Author: Jacob Schreiber 3 | 4 | import torch 5 | 6 | from ._utils import _cast_as_tensor 7 | from ._utils import _update_parameter 8 | from ._utils import _check_parameter 9 | from ._utils import _reshape_weights 10 | 11 | from .distributions._distribution import Distribution 12 | from .distributions import Categorical 13 | from .distributions import ConditionalCategorical 14 | 15 | 16 | class MarkovChain(Distribution): 17 | """A Markov chain. 18 | 19 | A Markov chain is the simplest sequential model which factorizes the 20 | joint probability distribution P(X_{0} ... X_{t}) along a chain into the 21 | product of a marginal distribution P(X_{0}) P(X_{1} | X_{0}) ... with 22 | k conditional probability distributions for a k-th order Markov chain. 23 | 24 | Despite sometimes being thought of as an independent model, Markov chains 25 | are probability distributions over sequences just like hidden Markov 26 | models. Because a Markov chain has the same theoretical properties as a 27 | probability distribution, it can be used in any situation that a simpler 28 | distribution could, such as an emission distribution for a HMM or a 29 | component of a Bayes classifier. 30 | 31 | 32 | Parameters 33 | ---------- 34 | distributions: tuple or list or None 35 | A set of distribution objects. These objects do not need to be 36 | initialized, i.e., can be "Categorical()". 37 | 38 | k: int or None 39 | The number of conditional distributions to include in the chain, also 40 | the number of steps back to model in the sequence. This must be passed 41 | in if the distributions are not passed in. 42 | 43 | n_categories: list, tuple, or None 44 | A list or tuple containing the number of categories that each feature 45 | has. 46 | 47 | inertia: float, [0, 1], optional 48 | Indicates the proportion of the update to apply to the parameters 49 | during training. When the inertia is 0.0, the update is applied in 50 | its entirety and the previous parameters are ignored. When the 51 | inertia is 1.0, the update is entirely ignored and the previous 52 | parameters are kept, equivalently to if the parameters were frozen. 53 | 54 | frozen: bool, optional 55 | Whether all the parameters associated with this distribution are frozen. 56 | If you want to freeze individual pameters, or individual values in those 57 | parameters, you must modify the `frozen` attribute of the tensor or 58 | parameter directly. Default is False. 59 | 60 | check_data: bool, optional 61 | Whether to check properties of the data and potentially recast it to 62 | torch.tensors. This does not prevent checking of parameters but can 63 | slightly speed up computation when you know that your inputs are valid. 64 | Setting this to False is also necessary for compiling. 65 | """ 66 | 67 | def __init__(self, distributions=None, k=None, n_categories=None, 68 | inertia=0.0, frozen=False, check_data=True): 69 | super().__init__(inertia=inertia, frozen=frozen, check_data=check_data) 70 | self.name = "MarkovChain" 71 | 72 | self.distributions = _check_parameter(distributions, "distributions", 73 | dtypes=(list, tuple)) 74 | self.k = _check_parameter(_cast_as_tensor(k, dtype=torch.int32), "k", 75 | ndim=0) 76 | self.n_categories = _check_parameter(n_categories, "n_categories", 77 | dtypes=(list, tuple)) 78 | 79 | if distributions is None and k is None: 80 | raise ValueError("Must provide one of 'distributions', or 'k'.") 81 | 82 | if distributions is not None: 83 | self.k = len(distributions) - 1 84 | 85 | self.d = None 86 | self._initialized = distributions is not None and distributions[0]._initialized 87 | self._reset_cache() 88 | 89 | def _initialize(self, d, n_categories): 90 | """Initialize the probability distribution. 91 | 92 | This method is meant to only be called internally. It initializes the 93 | parameters of the distribution and stores its dimensionality. For more 94 | complex methods, this function will do more. 95 | 96 | 97 | Parameters 98 | ---------- 99 | d: int 100 | The dimensionality the distribution is being initialized to. 101 | 102 | n_categories: int 103 | The maximum number of categories to model. This single number is 104 | used as the maximum across all features and all timesteps. 105 | """ 106 | 107 | if self.distributions is None: 108 | self.distributions = [Categorical()] 109 | self.distributions[0]._initialize(d, max(n_categories)) 110 | 111 | for i in range(self.k): 112 | distribution = ConditionalCategorical() 113 | distribution._initialize(d, [[n_categories[j]]*(i+2) 114 | for j in range(d)]) 115 | 116 | self.distributions.append(distribution) 117 | 118 | self.n_categories = n_categories 119 | self._initialized = True 120 | super()._initialize(d) 121 | 122 | def _reset_cache(self): 123 | """Reset the internally stored statistics. 124 | 125 | This method is meant to only be called internally. It resets the 126 | stored statistics used to update the model parameters as well as 127 | recalculates the cached values meant to speed up log probability 128 | calculations. 129 | """ 130 | 131 | if self._initialized: 132 | for distribution in self.distributions: 133 | distribution._reset_cache() 134 | 135 | def sample(self, n): 136 | """Sample from the probability distribution. 137 | 138 | This method will return `n` samples generated from the underlying 139 | probability distribution. For a mixture model, this involves first 140 | sampling the component using the prior probabilities, and then sampling 141 | from the chosen distribution. 142 | 143 | 144 | Parameters 145 | ---------- 146 | n: int 147 | The number of samples to generate. 148 | 149 | 150 | Returns 151 | ------- 152 | X: torch.tensor, shape=(n, self.d) 153 | Randomly generated samples. 154 | """ 155 | 156 | X = [self.distributions[0].sample(n)] 157 | 158 | for distribution in self.distributions[1:]: 159 | X_ = torch.stack(X).permute(1, 0, 2) 160 | samples = distribution.sample(n, X_[:, -self.k-1:]) 161 | X.append(samples) 162 | 163 | return torch.stack(X).permute(1, 0, 2) 164 | 165 | def log_probability(self, X): 166 | """Calculate the log probability of each example. 167 | 168 | This method calculates the log probability of each example given the 169 | parameters of the distribution. The examples must be given in a 3D 170 | format. 171 | 172 | 173 | Parameters 174 | ---------- 175 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, length, self.d) 176 | A set of examples to evaluate. 177 | 178 | Returns 179 | ------- 180 | logp: torch.Tensor, shape=(-1,) 181 | The log probability of each example. 182 | """ 183 | 184 | 185 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 186 | check_parameter=self.check_data) 187 | self.d = X.shape[1] 188 | 189 | logps = self.distributions[0].log_probability(X[:, 0]) 190 | for i, distribution in enumerate(self.distributions[1:-1]): 191 | logps += distribution.log_probability(X[:, :i+2]) 192 | 193 | for i in range(X.shape[1] - self.k): 194 | j = i + self.k + 1 195 | logps += self.distributions[-1].log_probability(X[:, i:j]) 196 | 197 | return logps 198 | 199 | def fit(self, X, sample_weight=None): 200 | """Fit the model to optionally weighted examples. 201 | 202 | This method will fit the provided distributions given the data and 203 | their weights. If only `k` has been provided, the relevant set of 204 | distributions will be initialized. 205 | 206 | 207 | Parameters 208 | ---------- 209 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, length, self.d) 210 | A set of examples to evaluate. 211 | 212 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 213 | A set of weights for the examples. This can be either of shape 214 | (-1, self.d) or a vector of shape (-1,). Default is ones. 215 | 216 | 217 | Returns 218 | ------- 219 | self 220 | """ 221 | 222 | self.summarize(X, sample_weight=sample_weight) 223 | self.from_summaries() 224 | return self 225 | 226 | def summarize(self, X, sample_weight=None): 227 | """Extract the sufficient statistics from a batch of data. 228 | 229 | This method calculates the sufficient statistics from optionally 230 | weighted data and adds them to the stored cache for each distribution 231 | in the network. Sample weights can either be provided as one 232 | value per example or as a 2D matrix of weights for each feature in 233 | each example. 234 | 235 | 236 | Parameters 237 | ---------- 238 | X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, length, self.d) 239 | A set of examples to summarize. 240 | 241 | sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional 242 | A set of weights for the examples. This can be either of shape 243 | (-1, self.d) or a vector of shape (-1,). Default is ones. 244 | 245 | 246 | Returns 247 | ------- 248 | logp: torch.Tensor, shape=(-1,) 249 | The log probability of each example. 250 | """ 251 | 252 | if self.frozen: 253 | return 254 | 255 | X = _check_parameter(_cast_as_tensor(X), "X", ndim=3, 256 | check_parameter=self.check_data) 257 | sample_weight = _check_parameter(_cast_as_tensor(sample_weight), 258 | "sample_weight", min_value=0, ndim=(1, 2), 259 | check_parameter=self.check_data) 260 | 261 | if not self._initialized: 262 | if self.n_categories is not None: 263 | n_keys = self.n_categories 264 | elif isinstance(X, torch.masked.MaskedTensor): 265 | n_keys = (torch.max(torch.max(X._masked_data, dim=0)[0], 266 | dim=0)[0] + 1).type(torch.int32) 267 | else: 268 | n_keys = (torch.max(torch.max(X, dim=0)[0], dim=0)[0] + 1).type( 269 | torch.int32) 270 | 271 | self._initialize(len(X[0][0]), n_keys) 272 | 273 | if sample_weight is None: 274 | sample_weight = torch.ones_like(X[:, 0]) 275 | elif len(sample_weight.shape) == 1: 276 | sample_weight = sample_weight.reshape(-1, 1).expand(-1, X.shape[2]) 277 | elif sample_weight.shape[1] == 1: 278 | sample_weight = sample_weight.expand(-1, X.shape[2]) 279 | 280 | _check_parameter(_cast_as_tensor(sample_weight), "sample_weight", 281 | min_value=0, ndim=2, shape=(X.shape[0], X.shape[2]), 282 | check_parameter=self.check_data) 283 | 284 | self.distributions[0].summarize(X[:, 0], sample_weight=sample_weight) 285 | for i, distribution in enumerate(self.distributions[1:-1]): 286 | distribution.summarize(X[:, :i+2], sample_weight=sample_weight) 287 | 288 | distribution = self.distributions[-1] 289 | for i in range(X.shape[1] - self.k): 290 | j = i + self.k + 1 291 | distribution.summarize(X[:, i:j], sample_weight=sample_weight) 292 | 293 | def from_summaries(self): 294 | """Update the model parameters given the extracted statistics. 295 | 296 | This method uses calculated statistics from calls to the `summarize` 297 | method to update the distribution parameters. Hyperparameters for the 298 | update are passed in at initialization time. 299 | 300 | Note: Internally, a call to `fit` is just a successive call to the 301 | `summarize` method followed by the `from_summaries` method. 302 | """ 303 | 304 | if self.frozen: 305 | return 306 | 307 | for distribution in self.distributions: 308 | distribution.from_summaries() 309 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy >= 1.22.2 2 | scipy >= 1.6.2 3 | scikit-learn >= 1.0.2 4 | torch >= 1.9.0 5 | apricot-select >= 0.6.1 6 | networkx >= 2.8.4 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | setup( 4 | name='pomegranate', 5 | version='1.1.2', 6 | author='Jacob Schreiber', 7 | author_email='jmschreiber91@gmail.com', 8 | packages=['pomegranate', 'pomegranate.distributions', 'pomegranate.hmm'], 9 | url='https://github.com/jmschrei/torchegranate', 10 | license='MIT', 11 | description='A PyTorch implementation of probabilistic models.', 12 | install_requires=[ 13 | 'numpy >= 1.22.2', 14 | 'scipy >= 1.6.2', 15 | 'scikit-learn >= 1.0.2', 16 | 'torch >= 1.9.0', 17 | 'apricot-select >= 0.6.1', 18 | 'networkx >= 2.8.4' 19 | ] 20 | ) -------------------------------------------------------------------------------- /slides/pomegranate ODSC East 2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC East 2019.pdf -------------------------------------------------------------------------------- /slides/pomegranate ODSC Europe 2020.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC Europe 2020.pdf -------------------------------------------------------------------------------- /slides/pomegranate ODSC West 2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC West 2017.pdf -------------------------------------------------------------------------------- /slides/pomegranate ODSC West 2018.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC West 2018.pdf -------------------------------------------------------------------------------- /slides/pomegranate ODSC West 2019.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate ODSC West 2019.pdf -------------------------------------------------------------------------------- /slides/pomegranate PyData NYC 2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate PyData NYC 2017.pdf -------------------------------------------------------------------------------- /slides/pomegranate data intelligence 2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate data intelligence 2017.pdf -------------------------------------------------------------------------------- /slides/pomegranate odsc east 2017 turorial.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate odsc east 2017 turorial.pdf -------------------------------------------------------------------------------- /slides/pomegranate pydata seattle 2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate pydata seattle 2017.pdf -------------------------------------------------------------------------------- /slides/pomegranate scipy 2017.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/slides/pomegranate scipy 2017.pdf -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/tests/__init__.py -------------------------------------------------------------------------------- /tests/distributions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/tests/distributions/__init__.py -------------------------------------------------------------------------------- /tests/distributions/_utils.py: -------------------------------------------------------------------------------- 1 | # _utils.py 2 | # Contact: Jacob Schreiber 3 | 4 | import numpy 5 | import torch 6 | 7 | from ..tools import assert_raises 8 | from numpy.testing import assert_array_almost_equal 9 | 10 | 11 | def _test_initialization(d, x, name, inertia, frozen, dtype): 12 | assert d.inertia == inertia 13 | assert d.frozen == frozen 14 | param = getattr(d, name) 15 | 16 | if x is not None: 17 | assert param.shape[0] == len(x) 18 | assert param.dtype == dtype 19 | assert_array_almost_equal(param, x) 20 | else: 21 | assert param == x 22 | 23 | 24 | def _test_initialization_raises_one_parameter(distribution, valid_value, 25 | min_value=None, max_value=None): 26 | assert_raises(ValueError, distribution, valid_value) 27 | assert_raises(ValueError, distribution, [valid_value], inertia=-0.4) 28 | assert_raises(ValueError, distribution, [valid_value], inertia=1.2) 29 | assert_raises(ValueError, distribution, [valid_value], inertia=1.2, 30 | frozen="true") 31 | assert_raises(ValueError, distribution, [valid_value], inertia=1.2, 32 | frozen=3) 33 | 34 | assert_raises(ValueError, distribution, inertia=-0.4) 35 | assert_raises(ValueError, distribution, inertia=1.2) 36 | assert_raises(ValueError, distribution, inertia=1.2, frozen="true") 37 | assert_raises(ValueError, distribution, inertia=1.2, frozen=3) 38 | 39 | if min_value is not None: 40 | assert_raises(ValueError, distribution, [valid_value, min_value-0.1]) 41 | 42 | if max_value is not None: 43 | assert_raises(ValueError, distribution, [valid_value, max_value+0.1]) 44 | 45 | 46 | def _test_initialization_raises_two_parameters(distribution, valid_value1, 47 | valid_value2, min_value1=None, min_value2=None, max_value1=None, 48 | max_value2=None): 49 | 50 | assert_raises(ValueError, distribution, valid_value1) 51 | assert_raises(ValueError, distribution, None, valid_value2) 52 | assert_raises(ValueError, distribution, valid_value1, valid_value2) 53 | assert_raises(ValueError, distribution, [valid_value1], 54 | [valid_value2, valid_value2]) 55 | assert_raises(ValueError, distribution, [valid_value1, valid_value1], 56 | [valid_value2]) 57 | 58 | assert_raises(ValueError, distribution, [valid_value1, valid_value2], 59 | inertia=-0.4) 60 | assert_raises(ValueError, distribution, [valid_value1, valid_value2], 61 | inertia=1.2) 62 | assert_raises(ValueError, distribution, [valid_value1, valid_value2], 63 | inertia=1.2, frozen="true") 64 | assert_raises(ValueError, distribution, [valid_value1, valid_value2], 65 | inertia=1.2, frozen=3) 66 | 67 | assert_raises(ValueError, distribution, inertia=-0.4) 68 | assert_raises(ValueError, distribution, inertia=1.2) 69 | assert_raises(ValueError, distribution, inertia=1.2, frozen="true") 70 | assert_raises(ValueError, distribution, inertia=1.2, frozen=3) 71 | 72 | if min_value1 is not None: 73 | assert_raises(ValueError, distribution, [valid_value1, min_value1-0.1], 74 | [valid_value2, valid_value2]) 75 | 76 | if min_value2 is not None: 77 | assert_raises(ValueError, distribution, [valid_value1, valid_value1], 78 | [valid_value2, min_value2-0.1]) 79 | 80 | if max_value1 is not None: 81 | assert_raises(ValueError, distribution, [valid_value1, max_value1+0.1], 82 | [valid_value2, valid_value2]) 83 | 84 | if max_value2 is not None: 85 | assert_raises(ValueError, distribution, [valid_value1, valid_value1], 86 | [valid_value2, max_value2+0.1]) 87 | 88 | 89 | def _test_predictions(x, y, y_hat, dtype): 90 | assert isinstance(y_hat, torch.Tensor) 91 | assert y_hat.dtype == dtype 92 | assert y_hat.shape == (len(x),) 93 | assert_array_almost_equal(y, y_hat) 94 | 95 | 96 | def _test_raises(d, name, X, w=None, min_value=None, max_value=None): 97 | f = getattr(d, name) 98 | 99 | assert_raises(ValueError, f, [X]) 100 | assert_raises(ValueError, f, X[0]) 101 | assert_raises((ValueError, TypeError, RuntimeError), f, X[0][0]) 102 | 103 | if d._initialized == True: 104 | assert_raises(ValueError, f, [x[:-1] for x in X]) 105 | 106 | if min_value is not None: 107 | assert_raises(ValueError, f, [[min_value-0.1 for i in range(d.d)]]) 108 | 109 | if max_value is not None: 110 | assert_raises(ValueError, f, [[max_value+0.1 for i in range(d.d)]]) 111 | else: 112 | if min_value is not None: 113 | assert_raises(ValueError, f, [[min_value-0.1 for i in range(3)]]) 114 | 115 | if max_value is not None: 116 | assert_raises(ValueError, f, [[max_value+0.1 for i in range(3)]]) 117 | 118 | 119 | if w is not None: 120 | assert_raises(ValueError, f, [X], w) 121 | assert_raises(ValueError, f, X, [w]) 122 | assert_raises(ValueError, f, [X], [w]) 123 | assert_raises(ValueError, f, X, w[:len(w)-1]) 124 | assert_raises(ValueError, f, X[:len(X)-1], w) 125 | 126 | 127 | def _test_efd_from_summaries(d, name1, name2, values): 128 | assert_array_almost_equal(getattr(d, name1), values) 129 | assert_array_almost_equal(getattr(d, name2), numpy.log(values)) 130 | assert_array_almost_equal(d._w_sum, numpy.zeros(d.d)) 131 | assert_array_almost_equal(d._xw_sum, numpy.zeros(d.d)) 132 | -------------------------------------------------------------------------------- /tests/hmm/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jmschrei/pomegranate/e9162731f4f109b7b17ecffde768734cacdb839b/tests/hmm/__init__.py -------------------------------------------------------------------------------- /tests/tools.py: -------------------------------------------------------------------------------- 1 | # tools.py 2 | # Author: Jacob Schreiber 3 | 4 | ''' 5 | Tools taken from nose since it can no longer be installed after Py3.12. 6 | ''' 7 | 8 | 9 | import re 10 | import unittest 11 | 12 | 13 | __all__ = ['ok_', 'eq_'] 14 | 15 | # Use the same flag as unittest itself to prevent descent into these functions: 16 | __unittest = 1 17 | 18 | 19 | def ok_(expr, msg=None): 20 | """Shorthand for assert. Saves 3 whole characters! 21 | """ 22 | if not expr: 23 | raise AssertionError(msg) 24 | 25 | 26 | def eq_(a, b, msg=None): 27 | """Shorthand for 'assert a == b, "%r != %r" % (a, b) 28 | """ 29 | if not a == b: 30 | raise AssertionError(msg or "%r != %r" % (a, b)) 31 | 32 | 33 | # 34 | # Expose assert* from unittest.TestCase 35 | # - give them pep8 style names 36 | # 37 | caps = re.compile('([A-Z])') 38 | 39 | def pep8(name): 40 | return caps.sub(lambda m: '_' + m.groups()[0].lower(), name) 41 | 42 | class Dummy(unittest.TestCase): 43 | def nop(): 44 | pass 45 | _t = Dummy('nop') 46 | 47 | for at in [ at for at in dir(_t) 48 | if at.startswith('assert') and not '_' in at ]: 49 | pepd = pep8(at) 50 | vars()[pepd] = getattr(_t, at) 51 | __all__.append(pepd) 52 | 53 | del Dummy 54 | del _t 55 | del pep8 56 | --------------------------------------------------------------------------------